OpenCores
URL https://opencores.org/ocsvn/neorv32/neorv32/trunk

Subversion Repositories neorv32

[/] [neorv32/] [trunk/] [sw/] [example/] [floating_point_test/] [neorv32_zfinx_extension_intrinsics.h] - Blame information for rev 55

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 55 zero_gravi
// #################################################################################################
2
// # << NEORV32 - Intrinsics + Emulation Functions for the RISC-V "Zfinx" CPU extension >>         #
3
// # ********************************************************************************************* #
4
// # The intrinsics provided by this library allow to use the hardware floating-point unit of the  #
5
// # RISC-V Zfinx CPU extension without the need for Zfinx support by the compiler / toolchain.    #
6
// # ********************************************************************************************* #
7
// # BSD 3-Clause License                                                                          #
8
// #                                                                                               #
9
// # Copyright (c) 2021, Stephan Nolting. All rights reserved.                                     #
10
// #                                                                                               #
11
// # Redistribution and use in source and binary forms, with or without modification, are          #
12
// # permitted provided that the following conditions are met:                                     #
13
// #                                                                                               #
14
// # 1. Redistributions of source code must retain the above copyright notice, this list of        #
15
// #    conditions and the following disclaimer.                                                   #
16
// #                                                                                               #
17
// # 2. Redistributions in binary form must reproduce the above copyright notice, this list of     #
18
// #    conditions and the following disclaimer in the documentation and/or other materials        #
19
// #    provided with the distribution.                                                            #
20
// #                                                                                               #
21
// # 3. Neither the name of the copyright holder nor the names of its contributors may be used to  #
22
// #    endorse or promote products derived from this software without specific prior written      #
23
// #    permission.                                                                                #
24
// #                                                                                               #
25
// # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS   #
26
// # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF               #
27
// # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE    #
28
// # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
29
// # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
30
// # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    #
31
// # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     #
32
// # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED  #
33
// # OF THE POSSIBILITY OF SUCH DAMAGE.                                                            #
34
// # ********************************************************************************************* #
35
// # The NEORV32 Processor - https://github.com/stnolting/neorv32              (c) Stephan Nolting #
36
// #################################################################################################
37
 
38
 
39
/**********************************************************************//**
40
 * @file floating_point_test/neorv32_zfinx_extension_intrinsics.h
41
 * @author Stephan Nolting
42
 *
43
 * @brief "Intrinsic" library for the NEORV32 single-precision floating-point in x registers (Zfinx) extension
44
 * @brief Also provides emulation functions for all intrinsics (functionality re-built in pure software). The functionality of the emulation
45
 * @brief functions is based on the RISC-V floating-point spec.
46
 *
47
 * @note All operations from this library use the default GCC "round to nearest, ties to even" rounding mode.
48
 *
49
 * @warning This library is just a temporary fall-back until the Zfinx extensions are supported by the upstream RISC-V GCC port.
50
 **************************************************************************/
51
 
52
#ifndef neorv32_zfinx_extension_intrinsics_h
53
#define neorv32_zfinx_extension_intrinsics_h
54
 
55
#define __USE_GNU
56
 
57
#include <fenv.h>
58
//#pragma STDC FENV_ACCESS ON
59
 
60
#define _GNU_SOURCE
61
 
62
#include <float.h>
63
#include <math.h>
64
 
65
 
66
/**********************************************************************//**
67
 * Sanity check
68
 **************************************************************************/
69
#if defined __riscv_f || (__riscv_flen == 32)
70
  #error Application programs using the Zfinx intrinsic library have to be compiled WITHOUT the <F> MARCH ISA attribute!
71
#endif
72
 
73
 
74
/**********************************************************************//**
75
 * Custom data type to access floating-point values as native floats and in binary representation
76
 **************************************************************************/
77
typedef union
78
{
79
  uint32_t binary_value; /**< Access as native float */
80
  float    float_value;  /**< Access in binary representation */
81
} float_conv_t;
82
 
83
 
84
// ################################################################################################
85
// Helper functions
86
// ################################################################################################
87
 
88
/**********************************************************************//**
89
 * Flush to zero if denormal number.
90
 *
91
 * @warning Subnormal numbers are not supported yet! Flush them to zero.
92
 *
93
 * @param[in] tmp Source operand 1.
94
 * @return Result.
95
 **************************************************************************/
96
float subnormal_flush(float tmp) {
97
 
98
  float res = tmp;
99
 
100
  if (fpclassify(tmp) == FP_SUBNORMAL) {
101
    if (signbit(tmp) != 0) {
102
      res = -0.0f;
103
    }
104
    else {
105
      res = +0.0f;
106
    }
107
  }
108
 
109
  return res;
110
}
111
 
112
 
113
// ################################################################################################
114
// Exception access
115
// ################################################################################################
116
 
117
/**********************************************************************//**
118
 * Get exception flags from fflags CSR (floating-point hardware).
119
 *
120
 * @return Floating point exception status word.
121
 **************************************************************************/
122
uint32_t get_hw_exceptions(void) {
123
 
124
  uint32_t res = neorv32_cpu_csr_read(CSR_FFLAGS);
125
 
126
  neorv32_cpu_csr_write(CSR_FFLAGS, 0); // clear status word
127
 
128
  return res;
129
}
130
 
131
 
132
/**********************************************************************//**
133
 * Get exception flags from C runtime (floating-point emulation).
134
 *
135
 * @warning WORK-IN-PROGRESS!
136
 *
137
 * @return Floating point exception status word.
138
 **************************************************************************/
139
uint32_t get_sw_exceptions(void) {
140
 
141
  const uint32_t FP_EXC_NV_C = 1 << 0; // invalid operation
142
  const uint32_t FP_EXC_DZ_C = 1 << 1; // divide by zero
143
  const uint32_t FP_EXC_OF_C = 1 << 2; // overflow
144
  const uint32_t FP_EXC_UF_C = 1 << 3; // underflow
145
  const uint32_t FP_EXC_NX_C = 1 << 4; // inexact
146
 
147
  int fpeRaised = fetestexcept(FE_ALL_EXCEPT);
148
 
149
  uint32_t res = 0;
150
 
151
  if (fpeRaised & FE_INVALID)   { res |= FP_EXC_NV_C; }
152
  if (fpeRaised & FE_DIVBYZERO) { res |= FP_EXC_DZ_C; }
153
  if (fpeRaised & FE_OVERFLOW)  { res |= FP_EXC_OF_C; }
154
  if (fpeRaised & FE_UNDERFLOW) { res |= FP_EXC_UF_C; }
155
  if (fpeRaised & FE_INEXACT)   { res |= FP_EXC_NX_C; }
156
 
157
  feclearexcept(FE_ALL_EXCEPT);
158
 
159
  return res;
160
}
161
 
162
 
163
// ################################################################################################
164
// "Intrinsics"
165
// ################################################################################################
166
 
167
/**********************************************************************//**
168
 * Single-precision floating-point addition
169
 *
170
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
171
 *
172
 * @param[in] rs1 Source operand 1 (a0).
173
 * @param[in] rs2 Source operand 2 (a1).
174
 * @return Result.
175
 **************************************************************************/
176
float __attribute__ ((noinline)) riscv_intrinsic_fadds(float rs1, float rs2) {
177
 
178
  float_conv_t opa, opb, res;
179
  opa.float_value = rs1;
180
  opb.float_value = rs2;
181
 
182
  register uint32_t result __asm__ ("a0");
183
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
184
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
185
 
186
  // dummy instruction to prevent GCC "constprop" optimization
187
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
188
 
189
  // fadd.s a0, a0, a1
190
  CUSTOM_INSTR_R2_TYPE(0b0000000, a1, a0, 0b000, a0, 0b1010011);
191
 
192
  res.binary_value = result;
193
  return res.float_value;
194
}
195
 
196
 
197
/**********************************************************************//**
198
 * Single-precision floating-point subtraction
199
 *
200
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
201
 *
202
 * @param[in] rs1 Source operand 1 (a0).
203
 * @param[in] rs2 Source operand 2 (a1).
204
 * @return Result.
205
 **************************************************************************/
206
float __attribute__ ((noinline)) riscv_intrinsic_fsubs(float rs1, float rs2) {
207
 
208
  float_conv_t opa, opb, res;
209
  opa.float_value = rs1;
210
  opb.float_value = rs2;
211
 
212
  register uint32_t result __asm__ ("a0");
213
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
214
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
215
 
216
  // dummy instruction to prevent GCC "constprop" optimization
217
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
218
 
219
  // fsub.s a0, a0, a1
220
  CUSTOM_INSTR_R2_TYPE(0b0000100, a1, a0, 0b000, a0, 0b1010011);
221
 
222
  res.binary_value = result;
223
  return res.float_value;
224
}
225
 
226
 
227
/**********************************************************************//**
228
 * Single-precision floating-point multiplication
229
 *
230
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
231
 *
232
 * @param[in] rs1 Source operand 1 (a0).
233
 * @param[in] rs2 Source operand 2 (a1).
234
 * @return Result.
235
 **************************************************************************/
236
float __attribute__ ((noinline)) riscv_intrinsic_fmuls(float rs1, float rs2) {
237
 
238
  float_conv_t opa, opb, res;
239
  opa.float_value = rs1;
240
  opb.float_value = rs2;
241
 
242
  register uint32_t result __asm__ ("a0");
243
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
244
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
245
 
246
  // dummy instruction to prevent GCC "constprop" optimization
247
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
248
 
249
  // fmul.s a0, a0, a1
250
  CUSTOM_INSTR_R2_TYPE(0b0001000, a1, a0, 0b000, a0, 0b1010011);
251
 
252
  res.binary_value = result;
253
  return res.float_value;
254
}
255
 
256
 
257
/**********************************************************************//**
258
 * Single-precision floating-point minimum
259
 *
260
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
261
 *
262
 * @param[in] rs1 Source operand 1 (a0).
263
 * @param[in] rs2 Source operand 2 (a1).
264
 * @return Result.
265
 **************************************************************************/
266
float __attribute__ ((noinline)) riscv_intrinsic_fmins(float rs1, float rs2) {
267
 
268
  float_conv_t opa, opb, res;
269
  opa.float_value = rs1;
270
  opb.float_value = rs2;
271
 
272
  register uint32_t result __asm__ ("a0");
273
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
274
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
275
 
276
  // dummy instruction to prevent GCC "constprop" optimization
277
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
278
 
279
  // fmin.s a0, a0, a1
280
  CUSTOM_INSTR_R2_TYPE(0b0010100, a1, a0, 0b000, a0, 0b1010011);
281
 
282
  res.binary_value = result;
283
  return res.float_value;
284
}
285
 
286
 
287
/**********************************************************************//**
288
 * Single-precision floating-point maximum
289
 *
290
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
291
 *
292
 * @param[in] rs1 Source operand 1 (a0).
293
 * @param[in] rs2 Source operand 2 (a1).
294
 * @return Result.
295
 **************************************************************************/
296
float __attribute__ ((noinline)) riscv_intrinsic_fmaxs(float rs1, float rs2) {
297
 
298
  float_conv_t opa, opb, res;
299
  opa.float_value = rs1;
300
  opb.float_value = rs2;
301
 
302
  register uint32_t result __asm__ ("a0");
303
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
304
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
305
 
306
  // dummy instruction to prevent GCC "constprop" optimization
307
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
308
 
309
  // fmax.s a0, a0, a1
310
  CUSTOM_INSTR_R2_TYPE(0b0010100, a1, a0, 0b001, a0, 0b1010011);
311
 
312
  res.binary_value = result;
313
  return res.float_value;
314
}
315
 
316
 
317
/**********************************************************************//**
318
 * Single-precision floating-point convert float to unsigned integer
319
 *
320
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
321
 *
322
 * @param[in] rs1 Source operand 1 (a0).
323
 * @return Result.
324
 **************************************************************************/
325
uint32_t __attribute__ ((noinline)) riscv_intrinsic_fcvt_wus(float rs1) {
326
 
327
  float_conv_t opa;
328
  opa.float_value = rs1;
329
 
330
  register uint32_t result __asm__ ("a0");
331
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
332
 
333
  // dummy instruction to prevent GCC "constprop" optimization
334
  asm volatile ("add x0, %[input_i], x0" : : [input_i] "r" (tmp_a));
335
 
336
  // fcvt.wu.s a0, a0
337
  CUSTOM_INSTR_R2_TYPE(0b1100000, x1, a0, 0b000, a0, 0b1010011);
338
 
339
  return result;
340
}
341
 
342
 
343
/**********************************************************************//**
344
 * Single-precision floating-point convert float to signed integer
345
 *
346
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
347
 *
348
 * @param[in] rs1 Source operand 1 (a0).
349
 * @return Result.
350
 **************************************************************************/
351
int32_t __attribute__ ((noinline)) riscv_intrinsic_fcvt_ws(float rs1) {
352
 
353
  float_conv_t opa;
354
  opa.float_value = rs1;
355
 
356
  register uint32_t result __asm__ ("a0");
357
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
358
 
359
  // dummy instruction to prevent GCC "constprop" optimization
360
  asm volatile ("add x0, %[input_i], x0" : : [input_i] "r" (tmp_a));
361
 
362
  // fcvt.w.s a0, a0
363
  CUSTOM_INSTR_R2_TYPE(0b1100000, x0, a0, 0b000, a0, 0b1010011);
364
 
365
  return (int32_t)result;
366
}
367
 
368
 
369
/**********************************************************************//**
370
 * Single-precision floating-point convert unsigned integer to float
371
 *
372
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
373
 *
374
 * @param[in] rs1 Source operand 1 (a0).
375
 * @return Result.
376
 **************************************************************************/
377
float __attribute__ ((noinline)) riscv_intrinsic_fcvt_swu(uint32_t rs1) {
378
 
379
  float_conv_t res;
380
 
381
  register uint32_t result __asm__ ("a0");
382
  register uint32_t tmp_a  __asm__ ("a0") = rs1;
383
 
384
  // dummy instruction to prevent GCC "constprop" optimization
385
  asm volatile ("add x0, %[input_i], x0" : : [input_i] "r" (tmp_a));
386
 
387
  // fcvt.s.wu a0, a0
388
  CUSTOM_INSTR_R2_TYPE(0b1101000, x1, a0, 0b000, a0, 0b1010011);
389
 
390
  res.binary_value = result;
391
  return res.float_value;
392
}
393
 
394
 
395
/**********************************************************************//**
396
 * Single-precision floating-point convert signed integer to float
397
 *
398
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
399
 *
400
 * @param[in] rs1 Source operand 1 (a0).
401
 * @return Result.
402
 **************************************************************************/
403
float __attribute__ ((noinline)) riscv_intrinsic_fcvt_sw(int32_t rs1) {
404
 
405
  float_conv_t res;
406
 
407
  register uint32_t result __asm__ ("a0");
408
  register uint32_t tmp_a  __asm__ ("a0") = (uint32_t)rs1;
409
 
410
  // dummy instruction to prevent GCC "constprop" optimization
411
  asm volatile ("add x0, %[input_i], x0" : : [input_i] "r" (tmp_a));
412
 
413
  // fcvt.s.w a0, a0
414
  CUSTOM_INSTR_R2_TYPE(0b1101000, x0, a0, 0b000, a0, 0b1010011);
415
 
416
  res.binary_value = result;
417
  return res.float_value;
418
}
419
 
420
 
421
/**********************************************************************//**
422
 * Single-precision floating-point equal comparison
423
 *
424
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
425
 *
426
 * @param[in] rs1 Source operand 1 (a0).
427
 * @param[in] rs2 Source operand 2 (a1).
428
 * @return Result.
429
 **************************************************************************/
430
uint32_t __attribute__ ((noinline)) riscv_intrinsic_feqs(float rs1, float rs2) {
431
 
432
  float_conv_t opa, opb;
433
  opa.float_value = rs1;
434
  opb.float_value = rs2;
435
 
436
  register uint32_t result __asm__ ("a0");
437
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
438
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
439
 
440
  // dummy instruction to prevent GCC "constprop" optimization
441
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
442
 
443
  // feq.s a0, a0, a1
444
  CUSTOM_INSTR_R2_TYPE(0b1010000, a1, a0, 0b010, a0, 0b1010011);
445
 
446
  return result;
447
}
448
 
449
 
450
/**********************************************************************//**
451
 * Single-precision floating-point less-than comparison
452
 *
453
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
454
 *
455
 * @param[in] rs1 Source operand 1 (a0).
456
 * @param[in] rs2 Source operand 2 (a1).
457
 * @return Result.
458
 **************************************************************************/
459
uint32_t __attribute__ ((noinline)) riscv_intrinsic_flts(float rs1, float rs2) {
460
 
461
  float_conv_t opa, opb;
462
  opa.float_value = rs1;
463
  opb.float_value = rs2;
464
 
465
  register uint32_t result __asm__ ("a0");
466
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
467
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
468
 
469
  // dummy instruction to prevent GCC "constprop" optimization
470
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
471
 
472
  // flt.s a0, a0, a1
473
  CUSTOM_INSTR_R2_TYPE(0b1010000, a1, a0, 0b001, a0, 0b1010011);
474
 
475
  return result;
476
}
477
 
478
 
479
/**********************************************************************//**
480
 * Single-precision floating-point less-than-or-equal comparison
481
 *
482
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
483
 *
484
 * @param[in] rs1 Source operand 1 (a0).
485
 * @param[in] rs2 Source operand 2 (a1).
486
 * @return Result.
487
 **************************************************************************/
488
uint32_t __attribute__ ((noinline)) riscv_intrinsic_fles(float rs1, float rs2) {
489
 
490
  float_conv_t opa, opb;
491
  opa.float_value = rs1;
492
  opb.float_value = rs2;
493
 
494
  register uint32_t result __asm__ ("a0");
495
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
496
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
497
 
498
  // dummy instruction to prevent GCC "constprop" optimization
499
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
500
 
501
  // fle.s a0, a0, a1
502
  CUSTOM_INSTR_R2_TYPE(0b1010000, a1, a0, 0b000, a0, 0b1010011);
503
 
504
  return result;
505
}
506
 
507
 
508
/**********************************************************************//**
509
 * Single-precision floating-point sign-injection
510
 *
511
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
512
 *
513
 * @param[in] rs1 Source operand 1 (a0).
514
 * @param[in] rs2 Source operand 2 (a1).
515
 * @return Result.
516
 **************************************************************************/
517
float __attribute__ ((noinline)) riscv_intrinsic_fsgnjs(float rs1, float rs2) {
518
 
519
  float_conv_t opa, opb, res;
520
  opa.float_value = rs1;
521
  opb.float_value = rs2;
522
 
523
  register uint32_t result __asm__ ("a0");
524
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
525
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
526
 
527
  // dummy instruction to prevent GCC "constprop" optimization
528
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
529
 
530
  // fsgnj.s a0, a0, a1
531
  CUSTOM_INSTR_R2_TYPE(0b0010000, a1, a0, 0b000, a0, 0b1010011);
532
 
533
  res.binary_value = result;
534
  return res.float_value;
535
}
536
 
537
 
538
/**********************************************************************//**
539
 * Single-precision floating-point sign-injection NOT
540
 *
541
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
542
 *
543
 * @param[in] rs1 Source operand 1 (a0).
544
 * @param[in] rs2 Source operand 2 (a1).
545
 * @return Result.
546
 **************************************************************************/
547
float __attribute__ ((noinline)) riscv_intrinsic_fsgnjns(float rs1, float rs2) {
548
 
549
  float_conv_t opa, opb, res;
550
  opa.float_value = rs1;
551
  opb.float_value = rs2;
552
 
553
  register uint32_t result __asm__ ("a0");
554
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
555
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
556
 
557
  // dummy instruction to prevent GCC "constprop" optimization
558
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
559
 
560
  // fsgnjn.s a0, a0, a1
561
  CUSTOM_INSTR_R2_TYPE(0b0010000, a1, a0, 0b001, a0, 0b1010011);
562
 
563
  res.binary_value = result;
564
  return res.float_value;
565
}
566
 
567
 
568
/**********************************************************************//**
569
 * Single-precision floating-point sign-injection XOR
570
 *
571
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
572
 *
573
 * @param[in] rs1 Source operand 1 (a0).
574
 * @param[in] rs2 Source operand 2 (a1).
575
 * @return Result.
576
 **************************************************************************/
577
float __attribute__ ((noinline)) riscv_intrinsic_fsgnjxs(float rs1, float rs2) {
578
 
579
  float_conv_t opa, opb, res;
580
  opa.float_value = rs1;
581
  opb.float_value = rs2;
582
 
583
  register uint32_t result __asm__ ("a0");
584
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
585
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
586
 
587
  // dummy instruction to prevent GCC "constprop" optimization
588
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
589
 
590
  // fsgnjx.s a0, a0, a1
591
  CUSTOM_INSTR_R2_TYPE(0b0010000, a1, a0, 0b010, a0, 0b1010011);
592
 
593
  res.binary_value = result;
594
  return res.float_value;
595
}
596
 
597
 
598
/**********************************************************************//**
599
 * Single-precision floating-point number classification
600
 *
601
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
602
 *
603
 * @param[in] rs1 Source operand 1 (a0).
604
 * @return Result.
605
 **************************************************************************/
606
uint32_t __attribute__ ((noinline)) riscv_intrinsic_fclasss(float rs1) {
607
 
608
  float_conv_t opa;
609
  opa.float_value = rs1;
610
 
611
  register uint32_t result __asm__ ("a0");
612
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
613
 
614
  // dummy instruction to prevent GCC "constprop" optimization
615
  asm volatile ("add x0, %[input_i], x0" : : [input_i] "r" (tmp_a));
616
 
617
  // fclass.s a0, a0
618
  CUSTOM_INSTR_R2_TYPE(0b1110000, x0, a0, 0b001, a0, 0b1010011);
619
 
620
  return result;
621
}
622
 
623
 
624
// ################################################################################################
625
// !!! UNSUPPORTED instructions !!!
626
// ################################################################################################
627
 
628
/**********************************************************************//**
629
 * Single-precision floating-point division
630
 *
631
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
632
 *
633
 * @warning This instruction is not supported and should raise an illegal instruction exception when executed.
634
 *
635
 * @param[in] rs1 Source operand 1 (a0).
636
 * @param[in] rs2 Source operand 2 (a1).
637
 * @return Result.
638
 **************************************************************************/
639
float __attribute__ ((noinline)) riscv_intrinsic_fdivs(float rs1, float rs2) {
640
 
641
  float_conv_t opa, opb, res;
642
  opa.float_value = rs1;
643
  opb.float_value = rs2;
644
 
645
  register uint32_t result __asm__ ("a0");
646
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
647
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
648
 
649
  // dummy instruction to prevent GCC "constprop" optimization
650
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
651
 
652
  // fdiv.s a0, a0, x1
653
  CUSTOM_INSTR_R2_TYPE(0b0001100, a1, a0, 0b000, a0, 0b1010011);
654
 
655
  res.binary_value = result;
656
  return res.float_value;
657
}
658
 
659
 
660
/**********************************************************************//**
661
 * Single-precision floating-point square root
662
 *
663
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
664
 *
665
 * @warning This instruction is not supported and should raise an illegal instruction exception when executed.
666
 *
667
 * @param[in] rs1 Source operand 1 (a0).
668
 * @return Result.
669
 **************************************************************************/
670
float __attribute__ ((noinline)) riscv_intrinsic_fsqrts(float rs1) {
671
 
672
  float_conv_t opa, res;
673
  opa.float_value = rs1;
674
 
675
  register uint32_t result __asm__ ("a0");
676
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
677
 
678
  // dummy instruction to prevent GCC "constprop" optimization
679
  asm volatile ("add x0, %[input_i], x0" : : [input_i] "r" (tmp_a));
680
 
681
  // fsqrt.s a0, a0, a1
682
  CUSTOM_INSTR_R2_TYPE(0b0101100, a1, a0, 0b000, a0, 0b1010011);
683
 
684
  res.binary_value = result;
685
  return res.float_value;
686
}
687
 
688
 
689
/**********************************************************************//**
690
 * Single-precision floating-point fused multiply-add
691
 *
692
 * @note "noinline" attributed to make sure arguments/return values are in a0, a1 and a2.
693
 *
694
 * @warning This instruction is not supported and should raise an illegal instruction exception when executed.
695
 *
696
 * @param[in] rs1 Source operand 1 (a0)
697
 * @param[in] rs2 Source operand 2 (a1)
698
 * @param[in] rs3 Source operand 3 (a2)
699
 * @return Result.
700
 **************************************************************************/
701
float __attribute__ ((noinline)) riscv_intrinsic_fmadds(float rs1, float rs2, float rs3) {
702
 
703
  float_conv_t opa, opb, opc, res;
704
  opa.float_value = rs1;
705
  opb.float_value = rs2;
706
  opc.float_value = rs3;
707
 
708
  register uint32_t result __asm__ ("a0");
709
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
710
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
711
  register uint32_t tmp_c  __asm__ ("a2") = opc.binary_value;
712
 
713
  // dummy instruction to prevent GCC "constprop" optimization
714
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
715
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_b), [input_j] "r" (tmp_c));
716
 
717
  // fmadd.s a0, a0, a1, a2
718
  CUSTOM_INSTR_R3_TYPE(a2, a1, a0, 0b000, a0, 0b1000011);
719
 
720
  res.binary_value = result;
721
  return res.float_value;
722
}
723
 
724
 
725
/**********************************************************************//**
726
 * Single-precision floating-point fused multiply-sub
727
 *
728
 * @note "noinline" attributed to make sure arguments/return values are in a0, a1 and a2.
729
 *
730
 * @warning This instruction is not supported and should raise an illegal instruction exception when executed.
731
 *
732
 * @param[in] rs1 Source operand 1 (a0)
733
 * @param[in] rs2 Source operand 2 (a1)
734
 * @param[in] rs3 Source operand 3 (a2)
735
 * @return Result.
736
 **************************************************************************/
737
float __attribute__ ((noinline)) riscv_intrinsic_fmsubs(float rs1, float rs2, float rs3) {
738
 
739
  float_conv_t opa, opb, opc, res;
740
  opa.float_value = rs1;
741
  opb.float_value = rs2;
742
  opc.float_value = rs3;
743
 
744
  register uint32_t result __asm__ ("a0");
745
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
746
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
747
  register uint32_t tmp_c  __asm__ ("a2") = opc.binary_value;
748
 
749
  // dummy instruction to prevent GCC "constprop" optimization
750
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
751
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_b), [input_j] "r" (tmp_c));
752
 
753
  // fmsub.s a0, a0, a1, a2
754
  CUSTOM_INSTR_R3_TYPE(a2, a1, a0, 0b000, a0, 0b1000111);
755
 
756
  res.binary_value = result;
757
  return res.float_value;
758
}
759
 
760
 
761
/**********************************************************************//**
762
 * Single-precision floating-point fused negated multiply-sub
763
 *
764
 * @note "noinline" attributed to make sure arguments/return values are in a0, a1 and a2.
765
 *
766
 * @warning This instruction is not supported and should raise an illegal instruction exception when executed.
767
 *
768
 * @param[in] rs1 Source operand 1 (a0)
769
 * @param[in] rs2 Source operand 2 (a1)
770
 * @param[in] rs3 Source operand 3 (a2)
771
 * @return Result.
772
 **************************************************************************/
773
float __attribute__ ((noinline)) riscv_intrinsic_fnmsubs(float rs1, float rs2, float rs3) {
774
 
775
  float_conv_t opa, opb, opc, res;
776
  opa.float_value = rs1;
777
  opb.float_value = rs2;
778
  opc.float_value = rs3;
779
 
780
  register uint32_t result __asm__ ("a0");
781
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
782
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
783
  register uint32_t tmp_c  __asm__ ("a2") = opc.binary_value;
784
 
785
  // dummy instruction to prevent GCC "constprop" optimization
786
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
787
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_b), [input_j] "r" (tmp_c));
788
 
789
  // fnmsub.s a0, a0, a1, a2
790
  CUSTOM_INSTR_R3_TYPE(a2, a1, a0, 0b000, a0, 0b1001011);
791
 
792
  res.binary_value = result;
793
  return res.float_value;
794
}
795
 
796
 
797
/**********************************************************************//**
798
 * Single-precision floating-point fused negated multiply-add
799
 *
800
 * @note "noinline" attributed to make sure arguments/return values are in a0, a1 and a2.
801
 *
802
 * @warning This instruction is not supported and should raise an illegal instruction exception when executed.
803
 *
804
 * @param[in] rs1 Source operand 1 (a0)
805
 * @param[in] rs2 Source operand 2 (a1)
806
 * @param[in] rs3 Source operand 3 (a2)
807
 * @return Result.
808
 **************************************************************************/
809
float __attribute__ ((noinline)) riscv_intrinsic_fnmadds(float rs1, float rs2, float rs3) {
810
 
811
  float_conv_t opa, opb, opc, res;
812
  opa.float_value = rs1;
813
  opb.float_value = rs2;
814
  opc.float_value = rs3;
815
 
816
  register uint32_t result __asm__ ("a0");
817
  register uint32_t tmp_a  __asm__ ("a0") = opa.binary_value;
818
  register uint32_t tmp_b  __asm__ ("a1") = opb.binary_value;
819
  register uint32_t tmp_c  __asm__ ("a2") = opc.binary_value;
820
 
821
  // dummy instruction to prevent GCC "constprop" optimization
822
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b));
823
  asm volatile ("add x0, %[input_i], %[input_j]" : : [input_i] "r" (tmp_b), [input_j] "r" (tmp_c));
824
 
825
  // fnmadd.s a0, a0, a1, a2
826
  CUSTOM_INSTR_R3_TYPE(a2, a1, a0, 0b000, a0, 0b1001111);
827
 
828
  res.binary_value = result;
829
  return res.float_value;
830
}
831
 
832
 
833
// ################################################################################################
834
// Emulation functions
835
// ################################################################################################
836
 
837
/**********************************************************************//**
838
 * Single-precision floating-point addition
839
 *
840
 * @param[in] rs1 Source operand 1.
841
 * @param[in] rs2 Source operand 2.
842
 * @return Result.
843
 **************************************************************************/
844
float riscv_emulate_fadds(float rs1, float rs2) {
845
 
846
  float opa = subnormal_flush(rs1);
847
  float opb = subnormal_flush(rs2);
848
 
849
  float res = opa + opb;
850
  return subnormal_flush(res);
851
}
852
 
853
 
854
/**********************************************************************//**
855
 * Single-precision floating-point subtraction
856
 *
857
 * @param[in] rs1 Source operand 1.
858
 * @param[in] rs2 Source operand 2.
859
 * @return Result.
860
 **************************************************************************/
861
float riscv_emulate_fsubs(float rs1, float rs2) {
862
 
863
  float opa = subnormal_flush(rs1);
864
  float opb = subnormal_flush(rs2);
865
 
866
  float res = opa - opb;
867
  return subnormal_flush(res);
868
}
869
 
870
 
871
/**********************************************************************//**
872
 * Single-precision floating-point multiplication
873
 *
874
 * @param[in] rs1 Source operand 1.
875
 * @param[in] rs2 Source operand 2.
876
 * @return Result.
877
 **************************************************************************/
878
float riscv_emulate_fmuls(float rs1, float rs2) {
879
 
880
  float opa = subnormal_flush(rs1);
881
  float opb = subnormal_flush(rs2);
882
 
883
  float res = opa * opb;
884
  return subnormal_flush(res);
885
}
886
 
887
 
888
/**********************************************************************//**
889
 * Single-precision floating-point minimum
890
 *
891
 * @param[in] rs1 Source operand 1.
892
 * @param[in] rs2 Source operand 2.
893
 * @return Result.
894
 **************************************************************************/
895
float riscv_emulate_fmins(float rs1, float rs2) {
896
 
897
  float opa = subnormal_flush(rs1);
898
  float opb = subnormal_flush(rs2);
899
 
900
  union {
901
  uint32_t binary_value; /**< Access as native float */
902
  float    float_value;  /**< Access in binary representation */
903
  } tmp_a, tmp_b;
904
 
905
  if ((fpclassify(opa) == FP_NAN) && (fpclassify(opb) == FP_NAN)) {
906
    return nanf("");
907
  }
908
 
909
  if (fpclassify(opa) == FP_NAN) {
910
    return opb;
911
  }
912
 
913
  if (fpclassify(opb) == FP_NAN) {
914
    return opa;
915
  }
916
 
917
  // RISC-V spec: -0 < +0
918
  tmp_a.float_value = opa;
919
  tmp_b.float_value = opb;
920
  if (((tmp_a.binary_value == 0x80000000) && (tmp_b.binary_value == 0x00000000)) ||
921
      ((tmp_a.binary_value == 0x00000000) && (tmp_b.binary_value == 0x80000000))) {
922
    return -0.0f;
923
  }
924
 
925
  return fmin(opa, opb);
926
}
927
 
928
 
929
/**********************************************************************//**
930
 * Single-precision floating-point maximum
931
 *
932
 * @param[in] rs1 Source operand 1.
933
 * @param[in] rs2 Source operand 2.
934
 * @return Result.
935
 **************************************************************************/
936
float riscv_emulate_fmaxs(float rs1, float rs2) {
937
 
938
  float opa = subnormal_flush(rs1);
939
  float opb = subnormal_flush(rs2);
940
 
941
  union {
942
  uint32_t binary_value; /**< Access as native float */
943
  float    float_value;  /**< Access in binary representation */
944
  } tmp_a, tmp_b;
945
 
946
 
947
  if ((fpclassify(opa) == FP_NAN) && (fpclassify(opb) == FP_NAN)) {
948
    return nanf("");
949
  }
950
 
951
  if (fpclassify(opa) == FP_NAN) {
952
    return opb;
953
  }
954
 
955
  if (fpclassify(opb) == FP_NAN) {
956
    return opa;
957
  }
958
 
959
  // RISC-V spec: -0 < +0
960
  tmp_a.float_value = opa;
961
  tmp_b.float_value = opb;
962
  if (((tmp_a.binary_value == 0x80000000) && (tmp_b.binary_value == 0x00000000)) ||
963
      ((tmp_a.binary_value == 0x00000000) && (tmp_b.binary_value == 0x80000000))) {
964
    return +0.0f;
965
  }
966
 
967
  return fmax(opa, opb);
968
}
969
 
970
 
971
/**********************************************************************//**
972
 * Single-precision floating-point float to unsigned integer
973
 *
974
 * @param[in] rs1 Source operand 1.
975
 * @return Result.
976
 **************************************************************************/
977
uint32_t riscv_emulate_fcvt_wus(float rs1) {
978
 
979
  float opa = subnormal_flush(rs1);
980
 
981
  return (uint32_t)roundf(opa);
982
}
983
 
984
 
985
/**********************************************************************//**
986
 * Single-precision floating-point float to signed integer
987
 *
988
 * @param[in] rs1 Source operand 1.
989
 * @return Result.
990
 **************************************************************************/
991
int32_t riscv_emulate_fcvt_ws(float rs1) {
992
 
993
  float opa = subnormal_flush(rs1);
994
 
995
  return (int32_t)roundf(opa);
996
}
997
 
998
 
999
/**********************************************************************//**
1000
 * Single-precision floating-point unsigned integer to float
1001
 *
1002
 * @param[in] rs1 Source operand 1.
1003
 * @return Result.
1004
 **************************************************************************/
1005
float riscv_emulate_fcvt_swu(uint32_t rs1) {
1006
 
1007
  return (float)rs1;
1008
}
1009
 
1010
 
1011
/**********************************************************************//**
1012
 * Single-precision floating-point signed integer to float
1013
 *
1014
 * @param[in] rs1 Source operand 1.
1015
 * @return Result.
1016
 **************************************************************************/
1017
float riscv_emulate_fcvt_sw(int32_t rs1) {
1018
 
1019
  return (float)rs1;
1020
}
1021
 
1022
 
1023
/**********************************************************************//**
1024
 * Single-precision floating-point equal comparison
1025
 *
1026
 * @param[in] rs1 Source operand 1.
1027
 * @param[in] rs2 Source operand 2.
1028
 * @return Result.
1029
 **************************************************************************/
1030
uint32_t riscv_emulate_feqs(float rs1, float rs2) {
1031
 
1032
  float opa = subnormal_flush(rs1);
1033
  float opb = subnormal_flush(rs2);
1034
 
1035
  if ((fpclassify(opa) == FP_NAN) || (fpclassify(opb) == FP_NAN)) {
1036
    return 0;
1037
  }
1038
 
1039
  if isless(opa, opb) {
1040
    return 0;
1041
  }
1042
  else if isgreater(opa, opb) {
1043
    return 0;
1044
  }
1045
  else {
1046
    return 1;
1047
  }
1048
}
1049
 
1050
 
1051
/**********************************************************************//**
1052
 * Single-precision floating-point less-than comparison
1053
 *
1054
 * @param[in] rs1 Source operand 1.
1055
 * @param[in] rs2 Source operand 2.
1056
 * @return Result.
1057
 **************************************************************************/
1058
uint32_t riscv_emulate_flts(float rs1, float rs2) {
1059
 
1060
  float opa = subnormal_flush(rs1);
1061
  float opb = subnormal_flush(rs2);
1062
 
1063
  if ((fpclassify(opa) == FP_NAN) || (fpclassify(opb) == FP_NAN)) {
1064
    return 0;
1065
  }
1066
 
1067
  if isless(opa, opb) {
1068
    return 1;
1069
  }
1070
  else {
1071
    return 0;
1072
  }
1073
}
1074
 
1075
 
1076
/**********************************************************************//**
1077
 * Single-precision floating-point less-than-or-equal comparison
1078
 *
1079
 * @param[in] rs1 Source operand 1.
1080
 * @param[in] rs2 Source operand 2.
1081
 * @return Result.
1082
 **************************************************************************/
1083
uint32_t riscv_emulate_fles(float rs1, float rs2) {
1084
 
1085
  float opa = subnormal_flush(rs1);
1086
  float opb = subnormal_flush(rs2);
1087
 
1088
  if ((fpclassify(opa) == FP_NAN) || (fpclassify(opb) == FP_NAN)) {
1089
    return 0;
1090
  }
1091
 
1092
  if islessequal(opa, opb) {
1093
    return 1;
1094
  }
1095
  else {
1096
    return 0;
1097
  }
1098
}
1099
 
1100
 
1101
/**********************************************************************//**
1102
 * Single-precision floating-point sign-injection
1103
 *
1104
 * @param[in] rs1 Source operand 1.
1105
 * @param[in] rs2 Source operand 2.
1106
 * @return Result.
1107
 **************************************************************************/
1108
float riscv_emulate_fsgnjs(float rs1, float rs2) {
1109
 
1110
  float opa = subnormal_flush(rs1);
1111
  float opb = subnormal_flush(rs2);
1112
 
1113
  int sign_1 = (int)signbit(opa);
1114
  int sign_2 = (int)signbit(opb);
1115
  float res = 0;
1116
 
1117
  if (sign_2 != 0) { // opb is negative
1118
    if (sign_1 == 0) {
1119
      res = -opa;
1120
    }
1121
    else {
1122
      res = opa;
1123
    }
1124
  }
1125
  else { // opb is positive
1126
    if (sign_1 == 0) {
1127
      res = opa;
1128
    }
1129
    else {
1130
      res = -opa;
1131
    }
1132
  }
1133
 
1134
  return res;
1135
}
1136
 
1137
 
1138
/**********************************************************************//**
1139
 * Single-precision floating-point sign-injection NOT
1140
 *
1141
 * @param[in] rs1 Source operand 1.
1142
 * @param[in] rs2 Source operand 2.
1143
 * @return Result.
1144
 **************************************************************************/
1145
float riscv_emulate_fsgnjns(float rs1, float rs2) {
1146
 
1147
  float opa = subnormal_flush(rs1);
1148
  float opb = subnormal_flush(rs2);
1149
 
1150
  int sign_1 = (int)signbit(opa);
1151
  int sign_2 = (int)signbit(opb);
1152
  float res = 0;
1153
 
1154
  if (sign_2 != 0) { // opb is negative
1155
    if (sign_1 == 0) {
1156
      res = opa;
1157
    }
1158
    else {
1159
      res = -opa;
1160
    }
1161
  }
1162
  else { // opb is positive
1163
    if (sign_1 == 0) {
1164
      res = -opa;
1165
    }
1166
    else {
1167
      res = opa;
1168
    }
1169
  }
1170
 
1171
  return res;
1172
}
1173
 
1174
 
1175
/**********************************************************************//**
1176
 * Single-precision floating-point sign-injection XOR
1177
 *
1178
 * @param[in] rs1 Source operand 1.
1179
 * @param[in] rs2 Source operand 2.
1180
 * @return Result.
1181
 **************************************************************************/
1182
float riscv_emulate_fsgnjxs(float rs1, float rs2) {
1183
 
1184
  float opa = subnormal_flush(rs1);
1185
  float opb = subnormal_flush(rs2);
1186
 
1187
  int sign_1 = (int)signbit(opa);
1188
  int sign_2 = (int)signbit(opb);
1189
  float res = 0;
1190
 
1191
  if (((sign_1 == 0) && (sign_2 != 0)) || ((sign_1 != 0) && (sign_2 == 0))) {
1192
    if (sign_1 == 0) {
1193
      res = -opa;
1194
    }
1195
    else {
1196
      res = opa;
1197
    }
1198
  }
1199
  else {
1200
    if (sign_1 == 0) {
1201
      res = opa;
1202
    }
1203
    else {
1204
      res = -opa;
1205
    }
1206
  }
1207
 
1208
  return res;
1209
}
1210
 
1211
 
1212
/**********************************************************************//**
1213
 * Single-precision floating-point number classification
1214
 *
1215
 * @param[in] rs1 Source operand 1.
1216
 * @return Result.
1217
 **************************************************************************/
1218
uint32_t riscv_emulate_fclasss(float rs1) {
1219
 
1220
  float opa = subnormal_flush(rs1);
1221
 
1222
  union {
1223
    uint32_t binary_value; /**< Access as native float */
1224
    float    float_value;  /**< Access in binary representation */
1225
  } aux;
1226
 
1227
  // RISC-V classify result layout
1228
  const uint32_t CLASS_NEG_INF    = 1 << 0; // negative infinity
1229
  const uint32_t CLASS_NEG_NORM   = 1 << 1; // negative normal number
1230
  const uint32_t CLASS_NEG_DENORM = 1 << 2; // negative subnormal number
1231
  const uint32_t CLASS_NEG_ZERO   = 1 << 3; // negative zero
1232
  const uint32_t CLASS_POS_ZERO   = 1 << 4; // positive zero
1233
  const uint32_t CLASS_POS_DENORM = 1 << 5; // positive subnormal number
1234
  const uint32_t CLASS_POS_NORM   = 1 << 6; // positive normal number
1235
  const uint32_t CLASS_POS_INF    = 1 << 7; // positive infinity
1236
  const uint32_t CLASS_SNAN       = 1 << 8; // signaling NaN (sNaN)
1237
  const uint32_t CLASS_QNAN       = 1 << 9; // quiet NaN (qNaN)
1238
 
1239
  int tmp = fpclassify(opa);
1240
  int sgn = (int)signbit(opa);
1241
 
1242
  uint32_t res = 0;
1243
 
1244
  // infinity
1245
  if (tmp == FP_INFINITE) {
1246
    if (sgn) { res |= CLASS_NEG_INF; }
1247
    else     { res |= CLASS_POS_INF; }
1248
  }
1249
 
1250
  // zero
1251
  if (tmp == FP_ZERO) {
1252
    if (sgn) { res |= CLASS_NEG_ZERO; }
1253
    else     { res |= CLASS_POS_ZERO; }
1254
  }
1255
 
1256
  // normal
1257
  if (tmp == FP_NORMAL) {
1258
    if (sgn) { res |= CLASS_NEG_NORM; }
1259
    else     { res |= CLASS_POS_NORM; }
1260
  }
1261
 
1262
  // subnormal
1263
  if (tmp == FP_SUBNORMAL) {
1264
    if (sgn) { res |= CLASS_NEG_DENORM; }
1265
    else     { res |= CLASS_POS_DENORM; }
1266
  }
1267
 
1268
  // NaN
1269
  if (tmp == FP_NAN) {
1270
    aux.float_value = opa;
1271
    if ((aux.binary_value >> 22) & 0b1) { // bit 22 (mantissa's MSB) is set -> canonical (quiet) NAN
1272
      res |= CLASS_QNAN;
1273
    }
1274
    else {
1275
      res |= CLASS_SNAN;
1276
    }
1277
  }
1278
 
1279
  return res;
1280
}
1281
 
1282
 
1283
/**********************************************************************//**
1284
 * Single-precision floating-point division
1285
 *
1286
 * @param[in] rs1 Source operand 1.
1287
 * @param[in] rs2 Source operand 2.
1288
 * @return Result.
1289
 **************************************************************************/
1290
float riscv_emulate_fdivs(float rs1, float rs2) {
1291
 
1292
  float opa = subnormal_flush(rs1);
1293
  float opb = subnormal_flush(rs2);
1294
 
1295
  float res = opa / opb;
1296
  return subnormal_flush(res);
1297
}
1298
 
1299
 
1300
/**********************************************************************//**
1301
 * Single-precision floating-point square root
1302
 *
1303
 * @param[in] rs1 Source operand 1.
1304
 * @return Result.
1305
 **************************************************************************/
1306
float riscv_emulate_fsqrts(float rs1) {
1307
 
1308
  float opa = subnormal_flush(rs1);
1309
 
1310
  float res = sqrtf(opa);
1311
  return subnormal_flush(res);
1312
}
1313
 
1314
 
1315
/**********************************************************************//**
1316
 * Single-precision floating-point fused multiply-add
1317
 *
1318
 * @note "noinline" attributed to make sure arguments/return values are in a0 and a1.
1319
 *
1320
 * @warning This instruction is not supported!
1321
 *
1322
 * @param[in] rs1 Source operand 1
1323
 * @param[in] rs2 Source operand 2
1324
 * @param[in] rs3 Source operand 3
1325
 * @return Result.
1326
 **************************************************************************/
1327
float riscv_emulate_fmadds(float rs1, float rs2, float rs3) {
1328
 
1329
  float opa = subnormal_flush(rs1);
1330
  float opb = subnormal_flush(rs2);
1331
  float opc = subnormal_flush(rs3);
1332
 
1333
  float res = (opa * opb) + opc;
1334
  return subnormal_flush(res);
1335
}
1336
 
1337
 
1338
/**********************************************************************//**
1339
 * Single-precision floating-point fused multiply-sub
1340
 *
1341
 * @param[in] rs1 Source operand 1
1342
 * @param[in] rs2 Source operand 2
1343
 * @param[in] rs3 Source operand 3
1344
 * @return Result.
1345
 **************************************************************************/
1346
float riscv_emulate_fmsubs(float rs1, float rs2, float rs3) {
1347
 
1348
  float opa = subnormal_flush(rs1);
1349
  float opb = subnormal_flush(rs2);
1350
  float opc = subnormal_flush(rs3);
1351
 
1352
  float res = (opa * opb) - opc;
1353
  return subnormal_flush(res);
1354
}
1355
 
1356
 
1357
/**********************************************************************//**
1358
 * Single-precision floating-point fused negated multiply-sub
1359
 *
1360
 * @param[in] rs1 Source operand 1
1361
 * @param[in] rs2 Source operand 2
1362
 * @param[in] rs3 Source operand 3
1363
 * @return Result.
1364
 **************************************************************************/
1365
float riscv_emulate_fnmsubs(float rs1, float rs2, float rs3) {
1366
 
1367
  float opa = subnormal_flush(rs1);
1368
  float opb = subnormal_flush(rs2);
1369
  float opc = subnormal_flush(rs3);
1370
 
1371
  float res = -(opa * opb) + opc;
1372
  return subnormal_flush(res);
1373
}
1374
 
1375
 
1376
/**********************************************************************//**
1377
 * Single-precision floating-point fused negated multiply-add
1378
 *
1379
 * @param[in] rs1 Source operand 1
1380
 * @param[in] rs2 Source operand 2
1381
 * @param[in] rs3 Source operand 3
1382
 * @return Result.
1383
 **************************************************************************/
1384
float riscv_emulate_fnmadds(float rs1, float rs2, float rs3) {
1385
 
1386
  float opa = subnormal_flush(rs1);
1387
  float opb = subnormal_flush(rs2);
1388
  float opc = subnormal_flush(rs3);
1389
 
1390
  float res = -(opa * opb) - opc;
1391
  return subnormal_flush(res);
1392
}
1393
 
1394
 
1395
#endif // neorv32_zfinx_extension_intrinsics_h
1396
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.