OpenCores
URL https://opencores.org/ocsvn/neorv32/neorv32/trunk

Subversion Repositories neorv32

[/] [neorv32/] [trunk/] [sw/] [example/] [floating_point_test/] [neorv32_zfinx_extension_intrinsics.h] - Blame information for rev 56

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 55 zero_gravi
// #################################################################################################
2
// # << NEORV32 - Intrinsics + Emulation Functions for the RISC-V "Zfinx" CPU extension >>         #
3
// # ********************************************************************************************* #
4
// # The intrinsics provided by this library allow to use the hardware floating-point unit of the  #
5
// # RISC-V Zfinx CPU extension without the need for Zfinx support by the compiler / toolchain.    #
6
// # ********************************************************************************************* #
7
// # BSD 3-Clause License                                                                          #
8
// #                                                                                               #
9
// # Copyright (c) 2021, Stephan Nolting. All rights reserved.                                     #
10
// #                                                                                               #
11
// # Redistribution and use in source and binary forms, with or without modification, are          #
12
// # permitted provided that the following conditions are met:                                     #
13
// #                                                                                               #
14
// # 1. Redistributions of source code must retain the above copyright notice, this list of        #
15
// #    conditions and the following disclaimer.                                                   #
16
// #                                                                                               #
17
// # 2. Redistributions in binary form must reproduce the above copyright notice, this list of     #
18
// #    conditions and the following disclaimer in the documentation and/or other materials        #
19
// #    provided with the distribution.                                                            #
20
// #                                                                                               #
21
// # 3. Neither the name of the copyright holder nor the names of its contributors may be used to  #
22
// #    endorse or promote products derived from this software without specific prior written      #
23
// #    permission.                                                                                #
24
// #                                                                                               #
25
// # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS   #
26
// # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF               #
27
// # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE    #
28
// # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
29
// # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
30
// # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    #
31
// # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     #
32
// # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED  #
33
// # OF THE POSSIBILITY OF SUCH DAMAGE.                                                            #
34
// # ********************************************************************************************* #
35
// # The NEORV32 Processor - https://github.com/stnolting/neorv32              (c) Stephan Nolting #
36
// #################################################################################################
37
 
38
 
39
/**********************************************************************//**
40
 * @file floating_point_test/neorv32_zfinx_extension_intrinsics.h
41
 * @author Stephan Nolting
42
 *
43
 * @brief "Intrinsic" library for the NEORV32 single-precision floating-point in x registers (Zfinx) extension
44
 * @brief Also provides emulation functions for all intrinsics (functionality re-built in pure software). The functionality of the emulation
45
 * @brief functions is based on the RISC-V floating-point spec.
46
 *
47
 * @note All operations from this library use the default GCC "round to nearest, ties to even" rounding mode.
48
 *
49
 * @warning This library is just a temporary fall-back until the Zfinx extensions are supported by the upstream RISC-V GCC port.
50
 **************************************************************************/
51
 
52
#ifndef neorv32_zfinx_extension_intrinsics_h
53
#define neorv32_zfinx_extension_intrinsics_h
54
 
55
#define __USE_GNU
56
 
57
#include <fenv.h>
58
//#pragma STDC FENV_ACCESS ON
59
 
60
#define _GNU_SOURCE
61
 
62
#include <float.h>
63
#include <math.h>
64
 
65
 
66
/**********************************************************************//**
67
 * Sanity check
68
 **************************************************************************/
69
#if defined __riscv_f || (__riscv_flen == 32)
70
  #error Application programs using the Zfinx intrinsic library have to be compiled WITHOUT the <F> MARCH ISA attribute!
71
#endif
72
 
73
 
74
/**********************************************************************//**
75
 * Custom data type to access floating-point values as native floats and in binary representation
76
 **************************************************************************/
77
typedef union
78
{
79
  uint32_t binary_value; /**< Access as native float */
80
  float    float_value;  /**< Access in binary representation */
81
} float_conv_t;
82
 
83
 
84
// ################################################################################################
85
// Helper functions
86
// ################################################################################################
87
 
88
/**********************************************************************//**
89
 * Flush to zero if denormal number.
90
 *
91
 * @warning Subnormal numbers are not supported yet! Flush them to zero.
92
 *
93 56 zero_gravi
 * @param[in] tmp Source operand.
94 55 zero_gravi
 * @return Result.
95
 **************************************************************************/
96
float subnormal_flush(float tmp) {
97
 
98
  float res = tmp;
99
 
100
  if (fpclassify(tmp) == FP_SUBNORMAL) {
101
    if (signbit(tmp) != 0) {
102
      res = -0.0f;
103
    }
104
    else {
105
      res = +0.0f;
106
    }
107
  }
108
 
109
  return res;
110
}
111
 
112
 
113
// ################################################################################################
114
// Exception access
115
// ################################################################################################
116
 
117
/**********************************************************************//**
118
 * Get exception flags from fflags CSR (floating-point hardware).
119
 *
120
 * @return Floating point exception status word.
121
 **************************************************************************/
122
uint32_t get_hw_exceptions(void) {
123
 
124
  uint32_t res = neorv32_cpu_csr_read(CSR_FFLAGS);
125
 
126
  neorv32_cpu_csr_write(CSR_FFLAGS, 0); // clear status word
127
 
128
  return res;
129
}
130
 
131
 
132
/**********************************************************************//**
133
 * Get exception flags from C runtime (floating-point emulation).
134
 *
135
 * @warning WORK-IN-PROGRESS!
136
 *
137
 * @return Floating point exception status word.
138
 **************************************************************************/
139
uint32_t get_sw_exceptions(void) {
140
 
141
  const uint32_t FP_EXC_NV_C = 1 << 0; // invalid operation
142
  const uint32_t FP_EXC_DZ_C = 1 << 1; // divide by zero
143
  const uint32_t FP_EXC_OF_C = 1 << 2; // overflow
144
  const uint32_t FP_EXC_UF_C = 1 << 3; // underflow
145
  const uint32_t FP_EXC_NX_C = 1 << 4; // inexact
146
 
147
  int fpeRaised = fetestexcept(FE_ALL_EXCEPT);
148
 
149
  uint32_t res = 0;
150
 
151
  if (fpeRaised & FE_INVALID)   { res |= FP_EXC_NV_C; }
152
  if (fpeRaised & FE_DIVBYZERO) { res |= FP_EXC_DZ_C; }
153
  if (fpeRaised & FE_OVERFLOW)  { res |= FP_EXC_OF_C; }
154
  if (fpeRaised & FE_UNDERFLOW) { res |= FP_EXC_UF_C; }
155
  if (fpeRaised & FE_INEXACT)   { res |= FP_EXC_NX_C; }
156
 
157
  feclearexcept(FE_ALL_EXCEPT);
158
 
159
  return res;
160
}
161
 
162
 
163
// ################################################################################################
164
// "Intrinsics"
165
// ################################################################################################
166
 
167
/**********************************************************************//**
168
 * Single-precision floating-point addition
169
 *
170
 * @param[in] rs1 Source operand 1 (a0).
171
 * @param[in] rs2 Source operand 2 (a1).
172
 * @return Result.
173
 **************************************************************************/
174 56 zero_gravi
inline float __attribute__ ((always_inline)) riscv_intrinsic_fadds(float rs1, float rs2) {
175 55 zero_gravi
 
176
  float_conv_t opa, opb, res;
177
  opa.float_value = rs1;
178
  opb.float_value = rs2;
179
 
180
  register uint32_t result __asm__ ("a0");
181
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
182
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
183
 
184
  // dummy instruction to prevent GCC "constprop" optimization
185
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
186
 
187
  // fadd.s a0, a0, a1
188
  CUSTOM_INSTR_R2_TYPE(0b0000000, a1, a0, 0b000, a0, 0b1010011);
189
 
190 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
191
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
192
 
193 55 zero_gravi
  res.binary_value = result;
194
  return res.float_value;
195
}
196
 
197
 
198
/**********************************************************************//**
199
 * Single-precision floating-point subtraction
200
 *
201
 * @param[in] rs1 Source operand 1 (a0).
202
 * @param[in] rs2 Source operand 2 (a1).
203
 * @return Result.
204
 **************************************************************************/
205 56 zero_gravi
inline float __attribute__ ((always_inline)) riscv_intrinsic_fsubs(float rs1, float rs2) {
206 55 zero_gravi
 
207
  float_conv_t opa, opb, res;
208
  opa.float_value = rs1;
209
  opb.float_value = rs2;
210
 
211
  register uint32_t result __asm__ ("a0");
212
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
213
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
214
 
215
  // dummy instruction to prevent GCC "constprop" optimization
216
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
217
 
218
  // fsub.s a0, a0, a1
219
  CUSTOM_INSTR_R2_TYPE(0b0000100, a1, a0, 0b000, a0, 0b1010011);
220
 
221 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
222
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
223
 
224 55 zero_gravi
  res.binary_value = result;
225
  return res.float_value;
226
}
227
 
228
 
229
/**********************************************************************//**
230
 * Single-precision floating-point multiplication
231
 *
232
 * @param[in] rs1 Source operand 1 (a0).
233
 * @param[in] rs2 Source operand 2 (a1).
234
 * @return Result.
235
 **************************************************************************/
236 56 zero_gravi
inline float __attribute__ ((always_inline)) riscv_intrinsic_fmuls(float rs1, float rs2) {
237 55 zero_gravi
 
238
  float_conv_t opa, opb, res;
239
  opa.float_value = rs1;
240
  opb.float_value = rs2;
241
 
242
  register uint32_t result __asm__ ("a0");
243
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
244
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
245
 
246
  // dummy instruction to prevent GCC "constprop" optimization
247
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
248
 
249
  // fmul.s a0, a0, a1
250
  CUSTOM_INSTR_R2_TYPE(0b0001000, a1, a0, 0b000, a0, 0b1010011);
251
 
252 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
253
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
254
 
255 55 zero_gravi
  res.binary_value = result;
256
  return res.float_value;
257
}
258
 
259
 
260
/**********************************************************************//**
261
 * Single-precision floating-point minimum
262
 *
263
 * @param[in] rs1 Source operand 1 (a0).
264
 * @param[in] rs2 Source operand 2 (a1).
265
 * @return Result.
266
 **************************************************************************/
267 56 zero_gravi
inline float __attribute__ ((always_inline)) riscv_intrinsic_fmins(float rs1, float rs2) {
268 55 zero_gravi
 
269
  float_conv_t opa, opb, res;
270
  opa.float_value = rs1;
271
  opb.float_value = rs2;
272
 
273
  register uint32_t result __asm__ ("a0");
274
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
275
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
276
 
277
  // dummy instruction to prevent GCC "constprop" optimization
278
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
279
 
280
  // fmin.s a0, a0, a1
281
  CUSTOM_INSTR_R2_TYPE(0b0010100, a1, a0, 0b000, a0, 0b1010011);
282
 
283 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
284
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
285
 
286 55 zero_gravi
  res.binary_value = result;
287
  return res.float_value;
288
}
289
 
290
 
291
/**********************************************************************//**
292
 * Single-precision floating-point maximum
293
 *
294
 * @param[in] rs1 Source operand 1 (a0).
295
 * @param[in] rs2 Source operand 2 (a1).
296
 * @return Result.
297
 **************************************************************************/
298 56 zero_gravi
inline float __attribute__ ((always_inline)) riscv_intrinsic_fmaxs(float rs1, float rs2) {
299 55 zero_gravi
 
300
  float_conv_t opa, opb, res;
301
  opa.float_value = rs1;
302
  opb.float_value = rs2;
303
 
304
  register uint32_t result __asm__ ("a0");
305
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
306
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
307
 
308
  // dummy instruction to prevent GCC "constprop" optimization
309
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
310
 
311
  // fmax.s a0, a0, a1
312
  CUSTOM_INSTR_R2_TYPE(0b0010100, a1, a0, 0b001, a0, 0b1010011);
313
 
314 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
315
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
316
 
317 55 zero_gravi
  res.binary_value = result;
318
  return res.float_value;
319
}
320
 
321
 
322
/**********************************************************************//**
323
 * Single-precision floating-point convert float to unsigned integer
324
 *
325
 * @param[in] rs1 Source operand 1 (a0).
326
 * @return Result.
327
 **************************************************************************/
328 56 zero_gravi
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_fcvt_wus(float rs1) {
329 55 zero_gravi
 
330
  float_conv_t opa;
331
  opa.float_value = rs1;
332
 
333
  register uint32_t result __asm__ ("a0");
334
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
335
 
336
  // dummy instruction to prevent GCC "constprop" optimization
337
  asm volatile ("add x0, %[input_i], x0" : : [input_i] "r" (tmp_a));
338
 
339
  // fcvt.wu.s a0, a0
340
  CUSTOM_INSTR_R2_TYPE(0b1100000, x1, a0, 0b000, a0, 0b1010011);
341
 
342 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
343
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
344
 
345 55 zero_gravi
  return result;
346
}
347
 
348
 
349
/**********************************************************************//**
350
 * Single-precision floating-point convert float to signed integer
351
 *
352
 * @param[in] rs1 Source operand 1 (a0).
353
 * @return Result.
354
 **************************************************************************/
355 56 zero_gravi
inline int32_t __attribute__ ((always_inline)) riscv_intrinsic_fcvt_ws(float rs1) {
356 55 zero_gravi
 
357
  float_conv_t opa;
358
  opa.float_value = rs1;
359
 
360
  register uint32_t result __asm__ ("a0");
361
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
362
 
363
  // dummy instruction to prevent GCC "constprop" optimization
364
  asm volatile ("add x0, %[input_i], x0" : : [input_i] "r" (tmp_a));
365
 
366
  // fcvt.w.s a0, a0
367
  CUSTOM_INSTR_R2_TYPE(0b1100000, x0, a0, 0b000, a0, 0b1010011);
368
 
369 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
370
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
371
 
372 55 zero_gravi
  return (int32_t)result;
373
}
374
 
375
 
376
/**********************************************************************//**
377
 * Single-precision floating-point convert unsigned integer to float
378
 *
379
 * @param[in] rs1 Source operand 1 (a0).
380
 * @return Result.
381
 **************************************************************************/
382 56 zero_gravi
inline float __attribute__ ((always_inline)) riscv_intrinsic_fcvt_swu(uint32_t rs1) {
383 55 zero_gravi
 
384
  float_conv_t res;
385
 
386
  register uint32_t result __asm__ ("a0");
387
  register uint32_t tmp_a  __asm__ ("a0") = rs1;
388
 
389
  // dummy instruction to prevent GCC "constprop" optimization
390
  asm volatile ("add x0, %[input_i], x0" : : [input_i] "r" (tmp_a));
391
 
392
  // fcvt.s.wu a0, a0
393
  CUSTOM_INSTR_R2_TYPE(0b1101000, x1, a0, 0b000, a0, 0b1010011);
394
 
395 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
396
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
397
 
398 55 zero_gravi
  res.binary_value = result;
399
  return res.float_value;
400
}
401
 
402
 
403
/**********************************************************************//**
404
 * Single-precision floating-point convert signed integer to float
405
 *
406
 * @param[in] rs1 Source operand 1 (a0).
407
 * @return Result.
408
 **************************************************************************/
409 56 zero_gravi
inline float __attribute__ ((always_inline)) riscv_intrinsic_fcvt_sw(int32_t rs1) {
410 55 zero_gravi
 
411
  float_conv_t res;
412
 
413
  register uint32_t result __asm__ ("a0");
414
  register uint32_t tmp_a  __asm__ ("a0") = (uint32_t)rs1;
415
 
416
  // dummy instruction to prevent GCC "constprop" optimization
417
  asm volatile ("add x0, %[input_i], x0" : : [input_i] "r" (tmp_a));
418
 
419
  // fcvt.s.w a0, a0
420
  CUSTOM_INSTR_R2_TYPE(0b1101000, x0, a0, 0b000, a0, 0b1010011);
421
 
422 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
423
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
424
 
425 55 zero_gravi
  res.binary_value = result;
426
  return res.float_value;
427
}
428
 
429
 
430
/**********************************************************************//**
431
 * Single-precision floating-point equal comparison
432
 *
433
 * @param[in] rs1 Source operand 1 (a0).
434
 * @param[in] rs2 Source operand 2 (a1).
435
 * @return Result.
436
 **************************************************************************/
437 56 zero_gravi
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_feqs(float rs1, float rs2) {
438 55 zero_gravi
 
439
  float_conv_t opa, opb;
440
  opa.float_value = rs1;
441
  opb.float_value = rs2;
442
 
443
  register uint32_t result __asm__ ("a0");
444
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
445
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
446
 
447
  // dummy instruction to prevent GCC "constprop" optimization
448
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
449
 
450
  // feq.s a0, a0, a1
451
  CUSTOM_INSTR_R2_TYPE(0b1010000, a1, a0, 0b010, a0, 0b1010011);
452
 
453 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
454
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
455
 
456 55 zero_gravi
  return result;
457
}
458
 
459
 
460
/**********************************************************************//**
461
 * Single-precision floating-point less-than comparison
462
 *
463
 * @param[in] rs1 Source operand 1 (a0).
464
 * @param[in] rs2 Source operand 2 (a1).
465
 * @return Result.
466
 **************************************************************************/
467 56 zero_gravi
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_flts(float rs1, float rs2) {
468 55 zero_gravi
 
469
  float_conv_t opa, opb;
470
  opa.float_value = rs1;
471
  opb.float_value = rs2;
472
 
473
  register uint32_t result __asm__ ("a0");
474
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
475
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
476
 
477
  // dummy instruction to prevent GCC "constprop" optimization
478
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
479
 
480
  // flt.s a0, a0, a1
481
  CUSTOM_INSTR_R2_TYPE(0b1010000, a1, a0, 0b001, a0, 0b1010011);
482
 
483 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
484
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
485
 
486 55 zero_gravi
  return result;
487
}
488
 
489
 
490
/**********************************************************************//**
491
 * Single-precision floating-point less-than-or-equal comparison
492
 *
493
 * @param[in] rs1 Source operand 1 (a0).
494
 * @param[in] rs2 Source operand 2 (a1).
495
 * @return Result.
496
 **************************************************************************/
497 56 zero_gravi
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_fles(float rs1, float rs2) {
498 55 zero_gravi
 
499
  float_conv_t opa, opb;
500
  opa.float_value = rs1;
501
  opb.float_value = rs2;
502
 
503
  register uint32_t result __asm__ ("a0");
504
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
505
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
506
 
507
  // dummy instruction to prevent GCC "constprop" optimization
508
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
509
 
510
  // fle.s a0, a0, a1
511
  CUSTOM_INSTR_R2_TYPE(0b1010000, a1, a0, 0b000, a0, 0b1010011);
512
 
513 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
514
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
515
 
516 55 zero_gravi
  return result;
517
}
518
 
519
 
520
/**********************************************************************//**
521
 * Single-precision floating-point sign-injection
522
 *
523
 * @param[in] rs1 Source operand 1 (a0).
524
 * @param[in] rs2 Source operand 2 (a1).
525
 * @return Result.
526
 **************************************************************************/
527 56 zero_gravi
inline float __attribute__ ((always_inline)) riscv_intrinsic_fsgnjs(float rs1, float rs2) {
528 55 zero_gravi
 
529
  float_conv_t opa, opb, res;
530
  opa.float_value = rs1;
531
  opb.float_value = rs2;
532
 
533
  register uint32_t result __asm__ ("a0");
534
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
535
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
536
 
537
  // dummy instruction to prevent GCC "constprop" optimization
538
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
539
 
540
  // fsgnj.s a0, a0, a1
541
  CUSTOM_INSTR_R2_TYPE(0b0010000, a1, a0, 0b000, a0, 0b1010011);
542
 
543 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
544
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
545
 
546 55 zero_gravi
  res.binary_value = result;
547
  return res.float_value;
548
}
549
 
550
 
551
/**********************************************************************//**
552
 * Single-precision floating-point sign-injection NOT
553
 *
554
 * @param[in] rs1 Source operand 1 (a0).
555
 * @param[in] rs2 Source operand 2 (a1).
556
 * @return Result.
557
 **************************************************************************/
558 56 zero_gravi
inline float __attribute__ ((always_inline)) riscv_intrinsic_fsgnjns(float rs1, float rs2) {
559 55 zero_gravi
 
560
  float_conv_t opa, opb, res;
561
  opa.float_value = rs1;
562
  opb.float_value = rs2;
563
 
564
  register uint32_t result __asm__ ("a0");
565
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
566
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
567
 
568
  // dummy instruction to prevent GCC "constprop" optimization
569
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
570
 
571
  // fsgnjn.s a0, a0, a1
572
  CUSTOM_INSTR_R2_TYPE(0b0010000, a1, a0, 0b001, a0, 0b1010011);
573
 
574 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
575
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
576
 
577 55 zero_gravi
  res.binary_value = result;
578
  return res.float_value;
579
}
580
 
581
 
582
/**********************************************************************//**
583
 * Single-precision floating-point sign-injection XOR
584
 *
585
 * @param[in] rs1 Source operand 1 (a0).
586
 * @param[in] rs2 Source operand 2 (a1).
587
 * @return Result.
588
 **************************************************************************/
589 56 zero_gravi
inline float __attribute__ ((always_inline)) riscv_intrinsic_fsgnjxs(float rs1, float rs2) {
590 55 zero_gravi
 
591
  float_conv_t opa, opb, res;
592
  opa.float_value = rs1;
593
  opb.float_value = rs2;
594
 
595
  register uint32_t result __asm__ ("a0");
596
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
597
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
598
 
599
  // dummy instruction to prevent GCC "constprop" optimization
600
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
601
 
602
  // fsgnjx.s a0, a0, a1
603
  CUSTOM_INSTR_R2_TYPE(0b0010000, a1, a0, 0b010, a0, 0b1010011);
604
 
605 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
606
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
607
 
608 55 zero_gravi
  res.binary_value = result;
609
  return res.float_value;
610
}
611
 
612
 
613
/**********************************************************************//**
614
 * Single-precision floating-point number classification
615
 *
616
 * @param[in] rs1 Source operand 1 (a0).
617
 * @return Result.
618
 **************************************************************************/
619 56 zero_gravi
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_fclasss(float rs1) {
620 55 zero_gravi
 
621
  float_conv_t opa;
622
  opa.float_value = rs1;
623
 
624
  register uint32_t result __asm__ ("a0");
625
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
626
 
627
  // dummy instruction to prevent GCC "constprop" optimization
628
  asm volatile ("add x0, %[input_i], x0" : : [input_i] "r" (tmp_a));
629
 
630
  // fclass.s a0, a0
631
  CUSTOM_INSTR_R2_TYPE(0b1110000, x0, a0, 0b001, a0, 0b1010011);
632
 
633 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
634
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
635
 
636 55 zero_gravi
  return result;
637
}
638
 
639
 
640
// ################################################################################################
641
// !!! UNSUPPORTED instructions !!!
642
// ################################################################################################
643
 
644
/**********************************************************************//**
645
 * Single-precision floating-point division
646
 *
647
 * @warning This instruction is not supported and should raise an illegal instruction exception when executed.
648
 *
649
 * @param[in] rs1 Source operand 1 (a0).
650
 * @param[in] rs2 Source operand 2 (a1).
651
 * @return Result.
652
 **************************************************************************/
653 56 zero_gravi
inline float __attribute__ ((always_inline)) riscv_intrinsic_fdivs(float rs1, float rs2) {
654 55 zero_gravi
 
655
  float_conv_t opa, opb, res;
656
  opa.float_value = rs1;
657
  opb.float_value = rs2;
658
 
659
  register uint32_t result __asm__ ("a0");
660
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
661
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
662
 
663
  // dummy instruction to prevent GCC "constprop" optimization
664
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
665
 
666
  // fdiv.s a0, a0, x1
667
  CUSTOM_INSTR_R2_TYPE(0b0001100, a1, a0, 0b000, a0, 0b1010011);
668
 
669 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
670
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
671
 
672 55 zero_gravi
  res.binary_value = result;
673
  return res.float_value;
674
}
675
 
676
 
677
/**********************************************************************//**
678
 * Single-precision floating-point square root
679
 *
680
 * @warning This instruction is not supported and should raise an illegal instruction exception when executed.
681
 *
682
 * @param[in] rs1 Source operand 1 (a0).
683
 * @return Result.
684
 **************************************************************************/
685 56 zero_gravi
inline float __attribute__ ((always_inline)) riscv_intrinsic_fsqrts(float rs1) {
686 55 zero_gravi
 
687
  float_conv_t opa, res;
688
  opa.float_value = rs1;
689
 
690
  register uint32_t result __asm__ ("a0");
691
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
692
 
693
  // dummy instruction to prevent GCC "constprop" optimization
694
  asm volatile ("add x0, %[input_i], x0" : : [input_i] "r" (tmp_a));
695
 
696
  // fsqrt.s a0, a0, a1
697
  CUSTOM_INSTR_R2_TYPE(0b0101100, a1, a0, 0b000, a0, 0b1010011);
698
 
699 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
700
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
701
 
702 55 zero_gravi
  res.binary_value = result;
703
  return res.float_value;
704
}
705
 
706
 
707
/**********************************************************************//**
708
 * Single-precision floating-point fused multiply-add
709
 *
710
 * @warning This instruction is not supported and should raise an illegal instruction exception when executed.
711
 *
712
 * @param[in] rs1 Source operand 1 (a0)
713
 * @param[in] rs2 Source operand 2 (a1)
714
 * @param[in] rs3 Source operand 3 (a2)
715
 * @return Result.
716
 **************************************************************************/
717 56 zero_gravi
inline float __attribute__ ((always_inline)) riscv_intrinsic_fmadds(float rs1, float rs2, float rs3) {
718 55 zero_gravi
 
719
  float_conv_t opa, opb, opc, res;
720
  opa.float_value = rs1;
721
  opb.float_value = rs2;
722
  opc.float_value = rs3;
723
 
724
  register uint32_t result __asm__ ("a0");
725
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
726
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
727
  register uint32_t tmp_c  __asm__ ("a2") = opc.binary_value;
728
 
729
  // dummy instruction to prevent GCC "constprop" optimization
730
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
731
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_b), [input_j] "r" (tmp_c));
732
 
733
  // fmadd.s a0, a0, a1, a2
734
  CUSTOM_INSTR_R3_TYPE(a2, a1, a0, 0b000, a0, 0b1000011);
735
 
736 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
737
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
738
 
739 55 zero_gravi
  res.binary_value = result;
740
  return res.float_value;
741
}
742
 
743
 
744
/**********************************************************************//**
745
 * Single-precision floating-point fused multiply-sub
746
 *
747
 * @warning This instruction is not supported and should raise an illegal instruction exception when executed.
748
 *
749
 * @param[in] rs1 Source operand 1 (a0)
750
 * @param[in] rs2 Source operand 2 (a1)
751
 * @param[in] rs3 Source operand 3 (a2)
752
 * @return Result.
753
 **************************************************************************/
754 56 zero_gravi
inline float __attribute__ ((always_inline)) riscv_intrinsic_fmsubs(float rs1, float rs2, float rs3) {
755 55 zero_gravi
 
756
  float_conv_t opa, opb, opc, res;
757
  opa.float_value = rs1;
758
  opb.float_value = rs2;
759
  opc.float_value = rs3;
760
 
761
  register uint32_t result __asm__ ("a0");
762
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
763
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
764
  register uint32_t tmp_c  __asm__ ("a2") = opc.binary_value;
765
 
766
  // dummy instruction to prevent GCC "constprop" optimization
767
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
768
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_b), [input_j] "r" (tmp_c));
769
 
770
  // fmsub.s a0, a0, a1, a2
771
  CUSTOM_INSTR_R3_TYPE(a2, a1, a0, 0b000, a0, 0b1000111);
772
 
773 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
774
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
775
 
776 55 zero_gravi
  res.binary_value = result;
777
  return res.float_value;
778
}
779
 
780
 
781
/**********************************************************************//**
782
 * Single-precision floating-point fused negated multiply-sub
783
 *
784
 * @warning This instruction is not supported and should raise an illegal instruction exception when executed.
785
 *
786
 * @param[in] rs1 Source operand 1 (a0)
787
 * @param[in] rs2 Source operand 2 (a1)
788
 * @param[in] rs3 Source operand 3 (a2)
789
 * @return Result.
790
 **************************************************************************/
791 56 zero_gravi
inline float __attribute__ ((always_inline)) riscv_intrinsic_fnmsubs(float rs1, float rs2, float rs3) {
792 55 zero_gravi
 
793
  float_conv_t opa, opb, opc, res;
794
  opa.float_value = rs1;
795
  opb.float_value = rs2;
796
  opc.float_value = rs3;
797
 
798
  register uint32_t result __asm__ ("a0");
799
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
800
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
801
  register uint32_t tmp_c  __asm__ ("a2") = opc.binary_value;
802
 
803
  // dummy instruction to prevent GCC "constprop" optimization
804
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
805
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_b), [input_j] "r" (tmp_c));
806
 
807
  // fnmsub.s a0, a0, a1, a2
808
  CUSTOM_INSTR_R3_TYPE(a2, a1, a0, 0b000, a0, 0b1001011);
809
 
810 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
811
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
812
 
813 55 zero_gravi
  res.binary_value = result;
814
  return res.float_value;
815
}
816
 
817
 
818
/**********************************************************************//**
819
 * Single-precision floating-point fused negated multiply-add
820
 *
821
 * @warning This instruction is not supported and should raise an illegal instruction exception when executed.
822
 *
823
 * @param[in] rs1 Source operand 1 (a0)
824
 * @param[in] rs2 Source operand 2 (a1)
825
 * @param[in] rs3 Source operand 3 (a2)
826
 * @return Result.
827
 **************************************************************************/
828 56 zero_gravi
inline float __attribute__ ((always_inline)) riscv_intrinsic_fnmadds(float rs1, float rs2, float rs3) {
829 55 zero_gravi
 
830
  float_conv_t opa, opb, opc, res;
831
  opa.float_value = rs1;
832
  opb.float_value = rs2;
833
  opc.float_value = rs3;
834
 
835
  register uint32_t result __asm__ ("a0");
836
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
837
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
838
  register uint32_t tmp_c  __asm__ ("a2") = opc.binary_value;
839
 
840
  // dummy instruction to prevent GCC "constprop" optimization
841
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
842
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_b), [input_j] "r" (tmp_c));
843
 
844
  // fnmadd.s a0, a0, a1, a2
845
  CUSTOM_INSTR_R3_TYPE(a2, a1, a0, 0b000, a0, 0b1001111);
846
 
847 56 zero_gravi
  // dummy instruction to prevent GCC "constprop" optimization
848
  asm volatile ("add %[res], %[input], x0" : [res] "=r" (result) : [input] "r" (result) );
849
 
850 55 zero_gravi
  res.binary_value = result;
851
  return res.float_value;
852
}
853
 
854
 
855
// ################################################################################################
856
// Emulation functions
857
// ################################################################################################
858
 
859
/**********************************************************************//**
860
 * Single-precision floating-point addition
861
 *
862
 * @param[in] rs1 Source operand 1.
863
 * @param[in] rs2 Source operand 2.
864
 * @return Result.
865
 **************************************************************************/
866 56 zero_gravi
float __attribute__ ((noinline)) riscv_emulate_fadds(float rs1, float rs2) {
867 55 zero_gravi
 
868
  float opa = subnormal_flush(rs1);
869
  float opb = subnormal_flush(rs2);
870
 
871
  float res = opa + opb;
872
  return subnormal_flush(res);
873
}
874
 
875
 
876
/**********************************************************************//**
877
 * Single-precision floating-point subtraction
878
 *
879
 * @param[in] rs1 Source operand 1.
880
 * @param[in] rs2 Source operand 2.
881
 * @return Result.
882
 **************************************************************************/
883 56 zero_gravi
float __attribute__ ((noinline)) riscv_emulate_fsubs(float rs1, float rs2) {
884 55 zero_gravi
 
885
  float opa = subnormal_flush(rs1);
886
  float opb = subnormal_flush(rs2);
887
 
888
  float res = opa - opb;
889
  return subnormal_flush(res);
890
}
891
 
892
 
893
/**********************************************************************//**
894
 * Single-precision floating-point multiplication
895
 *
896
 * @param[in] rs1 Source operand 1.
897
 * @param[in] rs2 Source operand 2.
898
 * @return Result.
899
 **************************************************************************/
900 56 zero_gravi
float __attribute__ ((noinline)) riscv_emulate_fmuls(float rs1, float rs2) {
901 55 zero_gravi
 
902
  float opa = subnormal_flush(rs1);
903
  float opb = subnormal_flush(rs2);
904
 
905
  float res = opa * opb;
906
  return subnormal_flush(res);
907
}
908
 
909
 
910
/**********************************************************************//**
911
 * Single-precision floating-point minimum
912
 *
913
 * @param[in] rs1 Source operand 1.
914
 * @param[in] rs2 Source operand 2.
915
 * @return Result.
916
 **************************************************************************/
917 56 zero_gravi
float __attribute__ ((noinline)) riscv_emulate_fmins(float rs1, float rs2) {
918 55 zero_gravi
 
919
  float opa = subnormal_flush(rs1);
920
  float opb = subnormal_flush(rs2);
921
 
922
  union {
923
  uint32_t binary_value; /**< Access as native float */
924
  float    float_value;  /**< Access in binary representation */
925
  } tmp_a, tmp_b;
926
 
927
  if ((fpclassify(opa) == FP_NAN) && (fpclassify(opb) == FP_NAN)) {
928
    return nanf("");
929
  }
930
 
931
  if (fpclassify(opa) == FP_NAN) {
932
    return opb;
933
  }
934
 
935
  if (fpclassify(opb) == FP_NAN) {
936
    return opa;
937
  }
938
 
939
  // RISC-V spec: -0 < +0
940
  tmp_a.float_value = opa;
941
  tmp_b.float_value = opb;
942
  if (((tmp_a.binary_value == 0x80000000) && (tmp_b.binary_value == 0x00000000)) ||
943
      ((tmp_a.binary_value == 0x00000000) && (tmp_b.binary_value == 0x80000000))) {
944
    return -0.0f;
945
  }
946
 
947
  return fmin(opa, opb);
948
}
949
 
950
 
951
/**********************************************************************//**
952
 * Single-precision floating-point maximum
953
 *
954
 * @param[in] rs1 Source operand 1.
955
 * @param[in] rs2 Source operand 2.
956
 * @return Result.
957
 **************************************************************************/
958 56 zero_gravi
float __attribute__ ((noinline)) riscv_emulate_fmaxs(float rs1, float rs2) {
959 55 zero_gravi
 
960
  float opa = subnormal_flush(rs1);
961
  float opb = subnormal_flush(rs2);
962
 
963
  union {
964
  uint32_t binary_value; /**< Access as native float */
965
  float    float_value;  /**< Access in binary representation */
966
  } tmp_a, tmp_b;
967
 
968
 
969
  if ((fpclassify(opa) == FP_NAN) && (fpclassify(opb) == FP_NAN)) {
970
    return nanf("");
971
  }
972
 
973
  if (fpclassify(opa) == FP_NAN) {
974
    return opb;
975
  }
976
 
977
  if (fpclassify(opb) == FP_NAN) {
978
    return opa;
979
  }
980
 
981
  // RISC-V spec: -0 < +0
982
  tmp_a.float_value = opa;
983
  tmp_b.float_value = opb;
984
  if (((tmp_a.binary_value == 0x80000000) && (tmp_b.binary_value == 0x00000000)) ||
985
      ((tmp_a.binary_value == 0x00000000) && (tmp_b.binary_value == 0x80000000))) {
986
    return +0.0f;
987
  }
988
 
989
  return fmax(opa, opb);
990
}
991
 
992
 
993
/**********************************************************************//**
994
 * Single-precision floating-point float to unsigned integer
995
 *
996
 * @param[in] rs1 Source operand 1.
997
 * @return Result.
998
 **************************************************************************/
999 56 zero_gravi
uint32_t __attribute__ ((noinline)) riscv_emulate_fcvt_wus(float rs1) {
1000 55 zero_gravi
 
1001
  float opa = subnormal_flush(rs1);
1002
 
1003
  return (uint32_t)roundf(opa);
1004
}
1005
 
1006
 
1007
/**********************************************************************//**
1008
 * Single-precision floating-point float to signed integer
1009
 *
1010
 * @param[in] rs1 Source operand 1.
1011
 * @return Result.
1012
 **************************************************************************/
1013 56 zero_gravi
int32_t __attribute__ ((noinline)) riscv_emulate_fcvt_ws(float rs1) {
1014 55 zero_gravi
 
1015
  float opa = subnormal_flush(rs1);
1016
 
1017
  return (int32_t)roundf(opa);
1018
}
1019
 
1020
 
1021
/**********************************************************************//**
1022
 * Single-precision floating-point unsigned integer to float
1023
 *
1024
 * @param[in] rs1 Source operand 1.
1025
 * @return Result.
1026
 **************************************************************************/
1027 56 zero_gravi
float __attribute__ ((noinline)) riscv_emulate_fcvt_swu(uint32_t rs1) {
1028 55 zero_gravi
 
1029
  return (float)rs1;
1030
}
1031
 
1032
 
1033
/**********************************************************************//**
1034
 * Single-precision floating-point signed integer to float
1035
 *
1036
 * @param[in] rs1 Source operand 1.
1037
 * @return Result.
1038
 **************************************************************************/
1039 56 zero_gravi
float __attribute__ ((noinline)) riscv_emulate_fcvt_sw(int32_t rs1) {
1040 55 zero_gravi
 
1041
  return (float)rs1;
1042
}
1043
 
1044
 
1045
/**********************************************************************//**
1046
 * Single-precision floating-point equal comparison
1047
 *
1048
 * @param[in] rs1 Source operand 1.
1049
 * @param[in] rs2 Source operand 2.
1050
 * @return Result.
1051
 **************************************************************************/
1052 56 zero_gravi
uint32_t __attribute__ ((noinline)) riscv_emulate_feqs(float rs1, float rs2) {
1053 55 zero_gravi
 
1054
  float opa = subnormal_flush(rs1);
1055
  float opb = subnormal_flush(rs2);
1056
 
1057
  if ((fpclassify(opa) == FP_NAN) || (fpclassify(opb) == FP_NAN)) {
1058
    return 0;
1059
  }
1060
 
1061
  if isless(opa, opb) {
1062
    return 0;
1063
  }
1064
  else if isgreater(opa, opb) {
1065
    return 0;
1066
  }
1067
  else {
1068
    return 1;
1069
  }
1070
}
1071
 
1072
 
1073
/**********************************************************************//**
1074
 * Single-precision floating-point less-than comparison
1075
 *
1076
 * @param[in] rs1 Source operand 1.
1077
 * @param[in] rs2 Source operand 2.
1078
 * @return Result.
1079
 **************************************************************************/
1080 56 zero_gravi
uint32_t __attribute__ ((noinline)) riscv_emulate_flts(float rs1, float rs2) {
1081 55 zero_gravi
 
1082
  float opa = subnormal_flush(rs1);
1083
  float opb = subnormal_flush(rs2);
1084
 
1085
  if ((fpclassify(opa) == FP_NAN) || (fpclassify(opb) == FP_NAN)) {
1086
    return 0;
1087
  }
1088
 
1089
  if isless(opa, opb) {
1090
    return 1;
1091
  }
1092
  else {
1093
    return 0;
1094
  }
1095
}
1096
 
1097
 
1098
/**********************************************************************//**
1099
 * Single-precision floating-point less-than-or-equal comparison
1100
 *
1101
 * @param[in] rs1 Source operand 1.
1102
 * @param[in] rs2 Source operand 2.
1103
 * @return Result.
1104
 **************************************************************************/
1105 56 zero_gravi
uint32_t __attribute__ ((noinline)) riscv_emulate_fles(float rs1, float rs2) {
1106 55 zero_gravi
 
1107
  float opa = subnormal_flush(rs1);
1108
  float opb = subnormal_flush(rs2);
1109
 
1110
  if ((fpclassify(opa) == FP_NAN) || (fpclassify(opb) == FP_NAN)) {
1111
    return 0;
1112
  }
1113
 
1114
  if islessequal(opa, opb) {
1115
    return 1;
1116
  }
1117
  else {
1118
    return 0;
1119
  }
1120
}
1121
 
1122
 
1123
/**********************************************************************//**
1124
 * Single-precision floating-point sign-injection
1125
 *
1126
 * @param[in] rs1 Source operand 1.
1127
 * @param[in] rs2 Source operand 2.
1128
 * @return Result.
1129
 **************************************************************************/
1130 56 zero_gravi
float __attribute__ ((noinline)) riscv_emulate_fsgnjs(float rs1, float rs2) {
1131 55 zero_gravi
 
1132
  float opa = subnormal_flush(rs1);
1133
  float opb = subnormal_flush(rs2);
1134
 
1135
  int sign_1 = (int)signbit(opa);
1136
  int sign_2 = (int)signbit(opb);
1137
  float res = 0;
1138
 
1139
  if (sign_2 != 0) { // opb is negative
1140
    if (sign_1 == 0) {
1141
      res = -opa;
1142
    }
1143
    else {
1144
      res = opa;
1145
    }
1146
  }
1147
  else { // opb is positive
1148
    if (sign_1 == 0) {
1149
      res = opa;
1150
    }
1151
    else {
1152
      res = -opa;
1153
    }
1154
  }
1155
 
1156
  return res;
1157
}
1158
 
1159
 
1160
/**********************************************************************//**
1161
 * Single-precision floating-point sign-injection NOT
1162
 *
1163
 * @param[in] rs1 Source operand 1.
1164
 * @param[in] rs2 Source operand 2.
1165
 * @return Result.
1166
 **************************************************************************/
1167 56 zero_gravi
float __attribute__ ((noinline)) riscv_emulate_fsgnjns(float rs1, float rs2) {
1168 55 zero_gravi
 
1169
  float opa = subnormal_flush(rs1);
1170
  float opb = subnormal_flush(rs2);
1171
 
1172
  int sign_1 = (int)signbit(opa);
1173
  int sign_2 = (int)signbit(opb);
1174
  float res = 0;
1175
 
1176
  if (sign_2 != 0) { // opb is negative
1177
    if (sign_1 == 0) {
1178
      res = opa;
1179
    }
1180
    else {
1181
      res = -opa;
1182
    }
1183
  }
1184
  else { // opb is positive
1185
    if (sign_1 == 0) {
1186
      res = -opa;
1187
    }
1188
    else {
1189
      res = opa;
1190
    }
1191
  }
1192
 
1193
  return res;
1194
}
1195
 
1196
 
1197
/**********************************************************************//**
1198
 * Single-precision floating-point sign-injection XOR
1199
 *
1200
 * @param[in] rs1 Source operand 1.
1201
 * @param[in] rs2 Source operand 2.
1202
 * @return Result.
1203
 **************************************************************************/
1204 56 zero_gravi
float __attribute__ ((noinline)) riscv_emulate_fsgnjxs(float rs1, float rs2) {
1205 55 zero_gravi
 
1206
  float opa = subnormal_flush(rs1);
1207
  float opb = subnormal_flush(rs2);
1208
 
1209
  int sign_1 = (int)signbit(opa);
1210
  int sign_2 = (int)signbit(opb);
1211
  float res = 0;
1212
 
1213
  if (((sign_1 == 0) && (sign_2 != 0)) || ((sign_1 != 0) && (sign_2 == 0))) {
1214
    if (sign_1 == 0) {
1215
      res = -opa;
1216
    }
1217
    else {
1218
      res = opa;
1219
    }
1220
  }
1221
  else {
1222
    if (sign_1 == 0) {
1223
      res = opa;
1224
    }
1225
    else {
1226
      res = -opa;
1227
    }
1228
  }
1229
 
1230
  return res;
1231
}
1232
 
1233
 
1234
/**********************************************************************//**
1235
 * Single-precision floating-point number classification
1236
 *
1237
 * @param[in] rs1 Source operand 1.
1238
 * @return Result.
1239
 **************************************************************************/
1240 56 zero_gravi
uint32_t __attribute__ ((noinline)) riscv_emulate_fclasss(float rs1) {
1241 55 zero_gravi
 
1242
  float opa = subnormal_flush(rs1);
1243
 
1244
  union {
1245
    uint32_t binary_value; /**< Access as native float */
1246
    float    float_value;  /**< Access in binary representation */
1247
  } aux;
1248
 
1249
  // RISC-V classify result layout
1250
  const uint32_t CLASS_NEG_INF    = 1 << 0; // negative infinity
1251
  const uint32_t CLASS_NEG_NORM   = 1 << 1; // negative normal number
1252
  const uint32_t CLASS_NEG_DENORM = 1 << 2; // negative subnormal number
1253
  const uint32_t CLASS_NEG_ZERO   = 1 << 3; // negative zero
1254
  const uint32_t CLASS_POS_ZERO   = 1 << 4; // positive zero
1255
  const uint32_t CLASS_POS_DENORM = 1 << 5; // positive subnormal number
1256
  const uint32_t CLASS_POS_NORM   = 1 << 6; // positive normal number
1257
  const uint32_t CLASS_POS_INF    = 1 << 7; // positive infinity
1258
  const uint32_t CLASS_SNAN       = 1 << 8; // signaling NaN (sNaN)
1259
  const uint32_t CLASS_QNAN       = 1 << 9; // quiet NaN (qNaN)
1260
 
1261
  int tmp = fpclassify(opa);
1262
  int sgn = (int)signbit(opa);
1263
 
1264
  uint32_t res = 0;
1265
 
1266
  // infinity
1267
  if (tmp == FP_INFINITE) {
1268
    if (sgn) { res |= CLASS_NEG_INF; }
1269
    else     { res |= CLASS_POS_INF; }
1270
  }
1271
 
1272
  // zero
1273
  if (tmp == FP_ZERO) {
1274
    if (sgn) { res |= CLASS_NEG_ZERO; }
1275
    else     { res |= CLASS_POS_ZERO; }
1276
  }
1277
 
1278
  // normal
1279
  if (tmp == FP_NORMAL) {
1280
    if (sgn) { res |= CLASS_NEG_NORM; }
1281
    else     { res |= CLASS_POS_NORM; }
1282
  }
1283
 
1284
  // subnormal
1285
  if (tmp == FP_SUBNORMAL) {
1286
    if (sgn) { res |= CLASS_NEG_DENORM; }
1287
    else     { res |= CLASS_POS_DENORM; }
1288
  }
1289
 
1290
  // NaN
1291
  if (tmp == FP_NAN) {
1292
    aux.float_value = opa;
1293
    if ((aux.binary_value >> 22) & 0b1) { // bit 22 (mantissa's MSB) is set -> canonical (quiet) NAN
1294
      res |= CLASS_QNAN;
1295
    }
1296
    else {
1297
      res |= CLASS_SNAN;
1298
    }
1299
  }
1300
 
1301
  return res;
1302
}
1303
 
1304
 
1305
/**********************************************************************//**
1306
 * Single-precision floating-point division
1307
 *
1308
 * @param[in] rs1 Source operand 1.
1309
 * @param[in] rs2 Source operand 2.
1310
 * @return Result.
1311
 **************************************************************************/
1312 56 zero_gravi
float __attribute__ ((noinline)) riscv_emulate_fdivs(float rs1, float rs2) {
1313 55 zero_gravi
 
1314
  float opa = subnormal_flush(rs1);
1315
  float opb = subnormal_flush(rs2);
1316
 
1317
  float res = opa / opb;
1318
  return subnormal_flush(res);
1319
}
1320
 
1321
 
1322
/**********************************************************************//**
1323
 * Single-precision floating-point square root
1324
 *
1325
 * @param[in] rs1 Source operand 1.
1326
 * @return Result.
1327
 **************************************************************************/
1328 56 zero_gravi
float __attribute__ ((noinline)) riscv_emulate_fsqrts(float rs1) {
1329 55 zero_gravi
 
1330
  float opa = subnormal_flush(rs1);
1331
 
1332
  float res = sqrtf(opa);
1333
  return subnormal_flush(res);
1334
}
1335
 
1336
 
1337
/**********************************************************************//**
1338
 * Single-precision floating-point fused multiply-add
1339
 *
1340
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
1341
 *
1342
 * @warning This instruction is not supported!
1343
 *
1344
 * @param[in] rs1 Source operand 1
1345
 * @param[in] rs2 Source operand 2
1346
 * @param[in] rs3 Source operand 3
1347
 * @return Result.
1348
 **************************************************************************/
1349 56 zero_gravi
float __attribute__ ((noinline)) riscv_emulate_fmadds(float rs1, float rs2, float rs3) {
1350 55 zero_gravi
 
1351
  float opa = subnormal_flush(rs1);
1352
  float opb = subnormal_flush(rs2);
1353
  float opc = subnormal_flush(rs3);
1354
 
1355
  float res = (opa * opb) + opc;
1356
  return subnormal_flush(res);
1357
}
1358
 
1359
 
1360
/**********************************************************************//**
1361
 * Single-precision floating-point fused multiply-sub
1362
 *
1363
 * @param[in] rs1 Source operand 1
1364
 * @param[in] rs2 Source operand 2
1365
 * @param[in] rs3 Source operand 3
1366
 * @return Result.
1367
 **************************************************************************/
1368 56 zero_gravi
float __attribute__ ((noinline)) riscv_emulate_fmsubs(float rs1, float rs2, float rs3) {
1369 55 zero_gravi
 
1370
  float opa = subnormal_flush(rs1);
1371
  float opb = subnormal_flush(rs2);
1372
  float opc = subnormal_flush(rs3);
1373
 
1374
  float res = (opa * opb) - opc;
1375
  return subnormal_flush(res);
1376
}
1377
 
1378
 
1379
/**********************************************************************//**
1380
 * Single-precision floating-point fused negated multiply-sub
1381
 *
1382
 * @param[in] rs1 Source operand 1
1383
 * @param[in] rs2 Source operand 2
1384
 * @param[in] rs3 Source operand 3
1385
 * @return Result.
1386
 **************************************************************************/
1387 56 zero_gravi
float __attribute__ ((noinline)) riscv_emulate_fnmsubs(float rs1, float rs2, float rs3) {
1388 55 zero_gravi
 
1389
  float opa = subnormal_flush(rs1);
1390
  float opb = subnormal_flush(rs2);
1391
  float opc = subnormal_flush(rs3);
1392
 
1393
  float res = -(opa * opb) + opc;
1394
  return subnormal_flush(res);
1395
}
1396
 
1397
 
1398
/**********************************************************************//**
1399
 * Single-precision floating-point fused negated multiply-add
1400
 *
1401
 * @param[in] rs1 Source operand 1
1402
 * @param[in] rs2 Source operand 2
1403
 * @param[in] rs3 Source operand 3
1404
 * @return Result.
1405
 **************************************************************************/
1406 56 zero_gravi
float __attribute__ ((noinline)) riscv_emulate_fnmadds(float rs1, float rs2, float rs3) {
1407 55 zero_gravi
 
1408
  float opa = subnormal_flush(rs1);
1409
  float opb = subnormal_flush(rs2);
1410
  float opc = subnormal_flush(rs3);
1411
 
1412
  float res = -(opa * opb) - opc;
1413
  return subnormal_flush(res);
1414
}
1415
 
1416
 
1417
#endif // neorv32_zfinx_extension_intrinsics_h
1418
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.