OpenCores
URL https://opencores.org/ocsvn/forwardcom/forwardcom/trunk

Subversion Repositories forwardcom

[/] [forwardcom/] [bintools/] [emulator3.cpp] - Blame information for rev 163

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 54 Agner
/****************************  emulator3.cpp  ********************************
2
* Author:        Agner Fog
3
* date created:  2018-02-18
4
* Last modified: 2021-06-29
5
* Version:       1.11
6
* Project:       Binary tools for ForwardCom instruction set
7
* Description:
8
* Emulator: Execution functions for multiformat instructions
9
*
10
* Copyright 2018-2021 GNU General Public License http://www.gnu.org/licenses
11
*****************************************************************************/
12
 
13
#include "stdafx.h"
14
 
15
// get intrinsic functions for _mm_getcsr and _mm_setcsr to control floating point rounding and exceptions
16
#if defined(_M_X64) || defined(__x86_64__) || defined(__amd64) || defined(__SSE2__)
17
#if defined(__FMA__) || defined(__AVX2__)
18
#define FMA_AVAILABLE 1
19
#else 
20
#define FMA_AVAILABLE 0
21
#endif
22
#if defined(_MSC_VER) && !FMA_AVAILABLE
23
#include <xmmintrin.h>
24
#else
25
#include <immintrin.h>
26
#endif
27
#define MCSCR_AVAILABLE 1
28
#else
29
#define MCSCR_AVAILABLE 0
30
#endif
31
 
32
 
33
//////////////////////////////////////////////////////////////////////////////////////////////////////
34
// functions for detecting exceptions and controlling rounding mode on the CPU that runs the emulator
35
// Note: these functions are only available in x86 systems with SSE2 or x64 enabled
36
//////////////////////////////////////////////////////////////////////////////////////////////////////
37
 
38
// Error message if MXCSR not available
39
void errorFpControlMissing() {
40
    static int repeated = 0;
41
    if (!repeated) {
42
        fprintf(stderr, "Error: Cannot control floating point exceptions and rounding mode on this platform");
43
        repeated = 1;
44
    }
45
}
46
 
47
void setRoundingMode(uint8_t r) {
48
    // change rounding mode
49
#if MCSCR_AVAILABLE
50
    uint32_t e = _mm_getcsr();
51
    e = (e & 0x9FFF) | (r & 3) << 13;
52
    _mm_setcsr(e);
53
#else
54
    errorFpControlMissing();
55
#endif
56
}
57
 
58
void clearExceptionFlags() {
59
    // clear exception flags before detecting exceptions
60
#if MCSCR_AVAILABLE
61
    uint32_t e = _mm_getcsr();
62
    _mm_setcsr(e & 0xFFC0);
63
#else
64
    errorFpControlMissing();
65
#endif
66
}
67
 
68
uint32_t getExceptionFlags() {
69
    // read exception flags after instructions that may cause exceptions
70
    // 1: invalid operation
71
    // 2: denormal
72
    // 4: divide by zero
73
    // 8: overflow
74
    // 0x10: underflow
75
    // 0x20: precision
76
#if MCSCR_AVAILABLE
77
    return _mm_getcsr() & 0x3F;
78
#else
79
    errorFpControlMissing();
80
    return 0;
81
#endif
82
}
83
 
84
void enableSubnormals(uint32_t e) {
85
    // enable or disable subnormal numbers
86
#if MCSCR_AVAILABLE
87
    uint32_t x = _mm_getcsr();
88
    if (e != 0) {
89
        _mm_setcsr(x & ~0x8040);
90
    }
91
    else {
92
        _mm_setcsr(x | 0x8040);
93
    }
94
#else
95
    errorFpControlMissing();
96
#endif
97
}
98
 
99
 
100
/////////////////////////////
101
// Multi-format instructions
102
/////////////////////////////
103
 
104
uint64_t f_nop(CThread * t) {
105
    // No operation
106
    t->running = 2;                                        // don't save RD
107
    t->returnType = 0;                                     // debug return output
108
    return 0;
109
}
110
 
111
static uint64_t f_store(CThread * t) {
112
    // Store value RD to memory
113
    uint8_t rd = t->operands[0];
114
    uint64_t value = t->registers[rd];
115
    if (t->vect) {
116
        value = t->readVectorElement(rd, t->vectorOffset);
117
    }
118
    // check mask
119
    if (t->parm[3].b & 1) {
120
        uint64_t address = t->memAddress;                     // memory address
121
        if (t->vect) address += t->vectorOffset;
122
        t->writeMemoryOperand(value, address);
123
    }
124
    else {  // mask is 0. This instruction has no fallback. Don't write
125
        /*
126
        uint8_t fallback = t->operands[2];                 // mask is 0. get fallback
127
        if (fallback == 0x1F) value = 0;
128
        else if (t->vect) value = t->readVectorElement(fallback, t->vectorOffset);
129
        else value = t->registers[fallback];*/
130
    }
131
    t->returnType = (t->returnType & ~0x10) | 0x20;        // return type is memory
132
    t->running = 2;                                        // don't save RD
133
    return 0;
134
}
135
 
136
static uint64_t f_move(CThread * t) {
137
    // copy value
138
    return t->parm[2].q;
139
}
140
 
141
static uint64_t f_prefetch(CThread * t) {
142
    // prefetch from address. not emulated
143
    return f_nop(t);
144
}
145
 
146
static uint64_t f_sign_extend(CThread * t) {
147
    // sign-extend integer to 64 bits
148
    int64_t value = 0;
149
    switch (t->operandType) {
150
    case 0:
151
        value = (int64_t)(int8_t)t->parm[2].b;
152
        break;
153
    case 1:
154
        value = (int64_t)(int16_t)t->parm[2].s;
155
        break;
156
    case 2:
157
        value = (int64_t)(int32_t)t->parm[2].i;
158
        break;
159
    case 3:
160
        value = (int64_t)t->parm[2].q;
161
        break;
162
    default:
163
        t->interrupt(INT_WRONG_PARAMETERS);
164
        value = 0;
165
    }
166
    t->operandType = 3;  // change operand size of result    
167
    if (t->vect) {
168
        t->vectorLength[t->operands[0]] = t->vectorLengthR = 8; // change vector length of result and stop vector loop
169
    }
170
    t->returnType = (t->returnType & ~7) | 3;              // debug return output
171
    return (uint64_t)value;
172
}
173
 
174
static uint64_t f_sign_extend_add(CThread * t) {
175
    // sign-extend integer to 64 bits and add 64-bit register
176
    int64_t value = 0;
177
    uint8_t options = 0;
178
    if (t->fInstr->tmplate == 0xE) options = t->pInstr->a.im3;
179
 
180
    switch (t->operandType) {
181
    case 0:
182
        value = (int64_t)(int8_t)t->parm[2].b;
183
        break;
184
    case 1:
185
        value = (int64_t)(int16_t)t->parm[2].s;
186
        break;
187
    case 2:
188
        value = (int64_t)(int32_t)t->parm[2].i;
189
        break;
190
    case 3:
191
        value = (int64_t)t->parm[2].q;
192
        break;
193
    default:
194
        t->interrupt(INT_WRONG_PARAMETERS);
195
        value = 0;
196
    }
197
    value <<= options;
198
 
199
    uint8_t r1 = t->operands[4];                           // first operand. g.p. register
200
    value += t->registers[r1];                             // read register with full size
201
    t->operandType = 3;                                    // change operand size of result    
202
    t->returnType = (t->returnType & ~7) | 3;              // debug return output
203
    if (t->vect) t->interrupt(INT_WRONG_PARAMETERS);
204
    return (uint64_t)value;
205
}
206
 
207
static uint64_t f_compare(CThread * t) {
208
    // compare two source operands and generate a boolean result
209
    // get condition code
210
    uint8_t cond = 0;
211
    uint32_t mask = t->parm[3].i;                          // mask register value or NUMCONTR
212
    if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) {
213
        cond = t->pInstr->a.im3;                           // E template. get condition from IM3
214
    }
215
    // get operands
216
    SNum a = t->parm[1];
217
    SNum b = t->parm[2];
218
    if ((t->fInstr->imm2 & 4) && t->operandType < 5) {
219
        b = t->parm[4];                                    // avoid immediate operand shifted by imm3
220
    }
221
    uint64_t result = 0;
222
    uint8_t cond1 = cond >> 1 & 3;                         // bit 1 - 2 of condition
223
    bool isnan = false;
224
    // select operand type
225
    if (t->operandType < 5) {  // integer types
226
        uint64_t sizeMask = dataSizeMask[t->operandType];  // mask for data size
227
        uint64_t signBit = (sizeMask >> 1) + 1;            // sign bit
228
        a.q &= sizeMask;  b.q &= sizeMask;                 // mask to desired size
229
        if (cond1 != 3 && !(cond & 8)) {                   // signed 
230
            a.q ^= signBit;  b.q ^= signBit;               // flip sign bit to use unsigned compare
231
        }
232
        switch (cond1) {  // select condition
233
        case 0:   // a == b
234
            result = a.q == b.q;
235
            break;
236
        case 1:   // a < b
237
            result = a.q < b.q;
238
            break;
239
        case 2:   // a > b
240
            result = a.q > b.q;
241
            break;
242
        case 3:   // abs(a) < abs(b). Not officially supported in version 1.11
243
            if (a.q & signBit) a.q = (~a.q + 1) & sizeMask;  // change sign. overflow allowed
244
            if (b.q & signBit) b.q = (~b.q + 1) & sizeMask;  // change sign. overflow allowed
245
            result = a.q < b.q;
246
            break;
247
        }
248
    }
249
    else if (t->operandType == 5) {    // float
250
        isnan = isnan_f(a.i) || isnan_f(b.i);              // check for NAN
251
        if (!isnan) {
252
            switch (cond1) {  // select condition
253
            case 0:   // a == b
254
                result = a.f == b.f;
255
                break;
256
            case 1:   // a < b
257
                result = a.f < b.f;
258
                break;
259
            case 2:   // a > b
260
                result = a.f > b.f;
261
                break;
262
            case 3:   // abs(a) < abs(b)
263
                result = fabsf(a.f) < fabsf(b.f);
264
                break;
265
            }
266
        }
267
    }
268
    else if (t->operandType == 6) {   // double
269
        isnan = isnan_d(a.q) || isnan_d(b.q);
270
        if (!isnan) {
271
            switch (cond1) {  // select condition
272
            case 0:   // a == b
273
                result = a.d == b.d;
274
                break;
275
            case 1:   // a < b
276
                result = a.d < b.d;
277
                break;
278
            case 2:   // a > b
279
                result = a.d > b.d;
280
                break;
281
            case 3:   // abs(a) < abs(b)
282
                result = fabs(a.d) < fabs(b.d);
283
                break;
284
            }
285
        }
286
    }
287
    else t->interrupt(INT_WRONG_PARAMETERS);                   // unsupported type
288
    // invert result
289
    if (cond & 1) result ^= 1;
290
 
291
    // check for NAN
292
    if (isnan) {
293
        result = (cond >> 3) & 1;                          // bit 3 tells what to get if unordered
294
        //if (t->parm[3].i & MSK_FLOAT_NAN_LOSS) t->interrupt(INT_FLOAT_NAN_LOSS); // mask bit 29: trap if NAN loss
295
    }
296
 
297
    // mask and fallback
298
    uint8_t fallbackreg = t->operands[2];
299
    uint64_t fallback = (fallbackreg & 0x1F) != 0x1F ? t->readRegister(fallbackreg) : 0;
300
    switch (cond >> 4) {
301
    case 0:    // normal fallback
302
        if (!(mask & 1)) result = fallback;
303
        break;
304
    case 1:    // mask & result & fallback
305
        result &= mask & fallback;
306
        break;
307
    case 2:    // mask & (result | fallback)
308
        result = mask & (result | fallback);
309
        break;
310
    case 3:    // mask & (result ^ fallback)
311
        result = mask & (result ^ fallback);
312
        break;
313
    }
314
    if ((t->returnType & 7) >= 5) t->returnType -= 3;  // debug return output must be integer
315
 
316
    result &= 1;       // use only bit 0 of result
317
    if ((t->operands[1] & 0x1F) < 7) {
318
        // There is a mask. get remaining bits from mask
319
        result |= (t->parm[3].q & ~(uint64_t)1);
320
    }
321
    t->parm[3].b = 1;    // prevent normal mask operation
322
    return result;
323
}
324
 
325
uint64_t f_add(CThread * t) {
326
    // add two numbers
327
    SNum a = t->parm[1];
328
    SNum b = t->parm[2];
329
    uint32_t mask = t->parm[3].i;
330
    SNum result;
331
    bool roundingMode = (mask & (3 << MSKI_ROUNDING)) != 0;  // non-standard rounding mode
332
    bool detectExceptions = (mask & (0xF << MSKI_EXCEPTIONS)) != 0;  // make NAN if exceptions
333
    uint8_t operandType = t->operandType;
334
 
335
    if (((mask ^ t->lastMask) & (1<<MSK_SUBNORMAL)) != 0) {
336
        // subnormal status changed
337
        enableSubnormals (mask & (1<<MSK_SUBNORMAL));
338
        t->lastMask = mask;
339
    }
340
    // operand type
341
    if (operandType < 4) {  // integer
342
        // uint64_t sizeMask = dataSizeMask[t->operandType]; // mask for data size        
343
        result.q = a.q + b.q;
344
    }
345
    else if (operandType == 5) {                           // float
346
        bool nana = isnan_f(a.i);                          // check for NAN input
347
        bool nanb = isnan_f(b.i);
348
        if (nana && nanb) {                                // both are NAN
349
            return (a.i << 1) > (b.i << 1) ? a.i : b.i;    // return the biggest payload
350
        }
351
        else if (nana) return a.q;
352
        else if (nanb) return b.q;
353
        if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING);
354
        if (detectExceptions) clearExceptionFlags();       // clear previous exceptions
355
        result.f = a.f + b.f;                              // this is the actual addition
356
        if (isnan_f(result.i)) {
357
            // the result is NAN but neither input is NAN. This must be INF-INF
358
            result.q = t->makeNan(nan_invalid_sub, operandType);
359
        }
360
        if (detectExceptions) {
361
            uint32_t x = getExceptionFlags();              // read exceptions
362
            if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_add, operandType);
363
            else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType);
364
            else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType);
365
        }
366
        if (roundingMode) setRoundingMode(0);              // reset rounding mode
367
    }
368
 
369
    else if (operandType == 6) {                           // double
370
        bool nana = isnan_d(a.q);                          // check for NAN input
371
        bool nanb = isnan_d(b.q);
372
        if (nana && nanb) {                                // both are NAN
373
            return (a.q << 1) > (b.q << 1) ? a.q : b.q;    // return the biggest payload
374
        }
375
        else if (nana) return a.q;
376
        else if (nanb) return b.q;
377
        if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING);
378
        if (detectExceptions) clearExceptionFlags();       // clear previous exceptions
379
        result.d = a.d + b.d;                              // this is the actual addition
380
        if (isnan_d(result.q)) {
381
            // the result is NAN but neither input is NAN. This must be INF-INF
382
            result.q = t->makeNan(nan_invalid_sub, operandType);
383
        }
384
        if (detectExceptions) {
385
            uint32_t x = getExceptionFlags();              // read exceptions
386
            if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_add, operandType);
387
            else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType);
388
            else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType);
389
        }
390
        if (roundingMode) setRoundingMode(0);              // reset rounding mode
391
    }
392
    else {
393
        // unsupported operand type
394
        t->interrupt(INT_WRONG_PARAMETERS);
395
        result.i = 0;
396
    }
397
    return result.q;
398
}
399
 
400
uint64_t f_sub(CThread * t) {
401
    // subtract two numbers
402
    SNum a = t->parm[1];
403
    SNum b = t->parm[2];
404
    uint32_t mask = t->parm[3].i;
405
    SNum result;
406
    bool roundingMode = (mask & (3 << MSKI_ROUNDING)) != 0;  // non-standard rounding mode
407
    bool detectExceptions = (mask & (0xF << MSKI_EXCEPTIONS)) != 0;  // make NAN if exceptions
408
    uint8_t operandType = t->operandType;
409
    if (((mask ^ t->lastMask) & (1<<MSK_SUBNORMAL)) != 0) {
410
        // subnormal status changed
411
        enableSubnormals (mask & (1<<MSK_SUBNORMAL));
412
        t->lastMask = mask;
413
    }
414
    if (operandType < 4) {      // integer
415
        result.q = a.q - b.q;   // subtract
416
    }
417
    else if (operandType == 5) { // float
418
        bool nana = isnan_f(a.i);                          // check for NAN input
419
        bool nanb = isnan_f(b.i);
420
        if (nana && nanb) {                                // both are NAN
421
            return (a.i << 1) > (b.i << 1) ? a.i : b.i;    // return the biggest payload
422
        }
423
        else if (nana) return a.q;
424
        else if (nanb) return b.q;
425
        if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING);
426
        if (detectExceptions) clearExceptionFlags();       // clear previous exceptions
427
        result.f = a.f - b.f;                              // this is the actual subtraction
428
        if (isnan_f(result.i)) {
429
            // the result is NAN but neither input is NAN. This must be INF-INF
430
            result.q = t->makeNan(nan_invalid_sub, operandType);
431
        }
432
        if (detectExceptions) {
433
            uint32_t x = getExceptionFlags();              // read exceptions
434
            if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_add, operandType);
435
            else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType);
436
            else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType);
437
        }
438
        if (roundingMode) setRoundingMode(0);              // reset rounding mode
439
    }
440
    else if (operandType == 6) {// double
441
        bool nana = isnan_d(a.q);                          // check for NAN input
442
        bool nanb = isnan_d(b.q);
443
        if (nana && nanb) {                                // both are NAN
444
            return (a.q << 1) > (b.q << 1) ? a.q : b.q;    // return the biggest payload
445
        }
446
        else if (nana) return a.q;
447
        else if (nanb) return b.q;
448
        if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING);
449
        if (detectExceptions) clearExceptionFlags();       // clear previous exceptions
450
        result.d = a.d - b.d;                              // this is the actual subtraction
451
        if (isnan_d(result.q)) {
452
            // the result is NAN but neither input is NAN. This must be INF-INF
453
            result.q = t->makeNan(nan_invalid_sub, operandType);
454
        }
455
        if (detectExceptions) {
456
            uint32_t x = getExceptionFlags();              // read exceptions
457
            if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_add, operandType);
458
            else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType);
459
            else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType);
460
        }
461
        if (roundingMode) setRoundingMode(0);              // reset rounding mode
462
    }
463
    else {
464
        // unsupported operand type
465
        t->interrupt(INT_WRONG_PARAMETERS);
466
        result.i = 0;
467
    }
468
    return result.q;
469
}
470
 
471
extern uint64_t f_sub_rev(CThread * t) {
472
    // subtract two numbers, b-a
473
    uint64_t temp = t->parm[2].q;  // swap operands
474
    t->parm[2].q = t->parm[1].q;
475
    t->parm[1].q = temp;
476
    uint64_t retval = f_sub(t);
477
    t->parm[2].q = temp;           // restore parm[2] in case it is a constant
478
    return retval;
479
}
480
 
481
uint64_t f_mul(CThread * t) {
482
    // multiply two numbers
483
    SNum a = t->parm[1];
484
    SNum b = t->parm[2];
485
    uint32_t mask = t->parm[3].i;
486
    SNum result;
487
    bool roundingMode = (mask & (3 << MSKI_ROUNDING)) != 0;  // non-standard rounding mode
488
    bool detectExceptions = (mask & (0xF << MSKI_EXCEPTIONS)) != 0;  // make NAN if exceptions
489
    uint8_t operandType = t->operandType;
490
    if (((mask ^ t->lastMask) & (1<<MSK_SUBNORMAL)) != 0) {
491
        // subnormal status changed
492
        enableSubnormals (mask & (1<<MSK_SUBNORMAL));
493
        t->lastMask = mask;
494
    }
495
    if (operandType < 4) {
496
        // integer
497
        result.q = a.q * b.q;
498
    }
499
    else if (operandType == 5) {                           // float
500
        bool nana = isnan_f(a.i);                          // check for NAN input
501
        bool nanb = isnan_f(b.i);
502
        if (nana && nanb) {                                // both are NAN
503
            return (a.i << 1) > (b.i << 1) ? a.i : b.i;    // return the biggest payload
504
        }
505
        else if (nana) return a.q;
506
        else if (nanb) return b.q;
507
        if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING);
508
        if (detectExceptions) clearExceptionFlags();       // clear previous exceptions
509
        result.f = a.f * b.f;                              // this is the actual multiplication
510
        if (isnan_f(result.i)) {
511
            // the result is NAN but neither input is NAN. This must be 0*INF
512
            result.q = t->makeNan(nan_invalid_0mulinf, operandType);
513
        }
514
        if (detectExceptions) {
515
            uint32_t x = getExceptionFlags();              // read exceptions
516
            if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_mul, operandType);
517
            else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType);
518
            else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType);
519
        }
520
        if (roundingMode) setRoundingMode(0);              // reset rounding mode
521
    }
522
 
523
    else if (operandType == 6) { // double
524
        bool nana = isnan_d(a.q);                          // check for NAN input
525
        bool nanb = isnan_d(b.q);
526
        if (nana && nanb) {                                // both are NAN
527
            return (a.q << 1) > (b.q << 1) ? a.q : b.q;    // return the biggest payload
528
        }
529
        else if (nana) return a.q;
530
        else if (nanb) return b.q;
531
        if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING);
532
        if (detectExceptions) clearExceptionFlags();       // clear previous exceptions
533
        result.d = a.d * b.d;                              // this is the actual multiplication
534
        if (isnan_d(result.q)) {
535
            // the result is NAN but neither input is NAN. This must be 0*INF
536
            result.q = t->makeNan(nan_invalid_0mulinf, operandType);
537
        }
538
        if (detectExceptions) {
539
            uint32_t x = getExceptionFlags();              // read exceptions
540
            if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_mul, operandType);
541
            else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType);
542
            else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType);
543
        }
544
        if (roundingMode) setRoundingMode(0);              // reset rounding mode
545
    }
546
    else {
547
        // unsupported operand type
548
        t->interrupt(INT_WRONG_PARAMETERS);
549
        result.i = 0;
550
    }
551
    return result.q;
552
}
553
 
554
 
555
uint64_t f_div(CThread * t) {
556
    // divide two floating point numbers or signed integers
557
    SNum a = t->parm[1];
558
    SNum b = t->parm[2];
559
    uint32_t mask = t->parm[3].i;
560
    SNum result;
561
    bool overflow = false;
562
    bool roundingMode = (mask & (3 << MSKI_ROUNDING))!=0;  // non-standard floating point rounding mode
563
    bool detectExceptions = (mask & (0xF << MSKI_EXCEPTIONS)) != 0;  // make NAN if exceptions
564
    bool nana, nanb;                                       // inputs are NAN
565
    uint8_t operandType = t->operandType;
566
    uint32_t intRounding = 0;                              // integer rounding mode
567
    if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) {
568
        intRounding = t->pInstr->a.im3;                    // E template. get integer rounding mode from IM3
569
    }
570
    if (((mask ^ t->lastMask) & (1<<MSK_SUBNORMAL)) != 0) {
571
        // subnormal status changed
572
        enableSubnormals (mask & (1<<MSK_SUBNORMAL));
573
        t->lastMask = mask;
574
    }
575
    switch (operandType) {
576
    case 0:  // int8
577
        if (b.b == 0 || (a.b == 0x80 && b.bs == -1)) {
578
            result.i = 0x80; overflow = true;
579
        }
580
        else {
581
            result.i = a.bs / b.bs;
582
            if (intRounding != 0 && intRounding != 7 && abs(b.bs) != 1) {
583
                int rem = a.bs % b.bs;
584
                switch (intRounding) {
585
                case 4: { // nearest or even
586
                    uint32_t r2 = 2*abs(rem);
587
                    uint32_t b2 = abs(b.bs);
588
                    int s = int8_t(a.i ^ b.i) < 0 ? -1 : 1;  // one with sign of result
589
                    if (r2 > b2 || (r2 == b2 && (result.b & 1))) result.i += s;
590
                    break;}
591
                case 5:  // down
592
                    if (rem != 0 && int8_t(a.i ^ b.i) < 0 && result.b != 0x80u) result.i--;
593
                    break;
594
                case 6:  // up
595
                    if (rem != 0 && int8_t(a.i ^ b.i) >= 0) result.i++;
596
                    break;
597
                }
598
            }
599
        }
600
        break;
601
    case 1:  // int16
602
        if (b.s == 0 || (a.s == 0x8000u && b.ss == -1)) {
603
            result.i = 0x8000; overflow = true;
604
        }
605
        else {
606
            result.i = a.ss / b.ss;
607
            if (intRounding != 0 && intRounding != 7 && abs(b.ss) != 1) {
608
                int16_t rem = a.ss % b.ss;
609
                switch (intRounding) {
610
                case 4: { // nearest or even
611
                    uint16_t r2 = 2*abs(rem);
612
                    uint16_t b2 = abs(b.is);
613
                    int16_t  s = int16_t(a.s ^ b.s) < 0 ? -1 : 1;  // one with sign of result
614
                    if (r2 > b2 || (r2 == b2 && (result.s & 1))) result.s += s;
615
                    break;}
616
                case 5:  // down
617
                    if (rem != 0 && int16_t(a.s ^ b.s) < 0 && result.s != 0x8000u) result.s--;
618
                    break;
619
                case 6:  // up
620
                    if (rem != 0 && int16_t(a.s ^ b.s) >= 0) result.s++;
621
                    break;
622
                }
623
            }
624
        }
625
        break;
626
    case 2:  // int32
627
        if (b.i == 0 || (a.i == sign_f && b.is == -1)) {
628
            result.i = sign_f; overflow = true;
629
        }
630
        else {
631
            result.i = a.is / b.is;
632
            if (intRounding != 0 && intRounding != 7 && abs(b.is) != 1) {
633
                int rem = a.is % b.is;
634
                switch (intRounding) {
635
                case 4: { // nearest or even
636
                    uint32_t r2 = 2*abs(rem);
637
                    uint32_t b2 = abs(b.is);
638
                    int s = int32_t(a.i ^ b.i) < 0 ? -1 : 1;  // one with sign of result
639
                    if (r2 > b2 || (r2 == b2 && (result.i & 1))) result.i += s;
640
                    break;}
641
                case 5:  // down
642
                    if (rem != 0 && int32_t(a.i ^ b.i) < 0 && result.i != 0x80000000u) result.i--;
643
                    break;
644
                case 6:  // up
645
                    if (rem != 0 && int32_t(a.i ^ b.i) >= 0) result.i++;
646
                    break;
647
                }
648
            }
649
        }
650
        break;
651
    case 3:  // int64
652
        if (b.q == 0 || (a.q == sign_d && b.qs == int64_t(-1))) {
653
            result.q = sign_d; overflow = true;
654
        }
655
        else {
656
            result.qs = a.qs / b.qs;
657
            if (intRounding != 0 && intRounding != 7 && abs(b.qs) != 1) {
658
                int64_t rem = a.qs % b.qs;
659
                switch (intRounding) {
660
                case 4: { // nearest or even
661
                    uint64_t r2 = 2*abs(rem);
662
                    uint64_t b2 = abs(b.qs);
663
                    int64_t s = int64_t(a.q ^ b.q) < 0 ? -1 : 1;  // one with sign of result
664
                    if (r2 > b2 || (r2 == b2 && (result.i & 1))) result.q += s;
665
                    break;}
666
                case 5:  // down
667
                    if (rem != 0 && int64_t(a.q ^ b.q) < 0 && result.q != 0x8000000000000000u) result.q--;
668
                    break;
669
                case 6:  // up
670
                    if (rem != 0 && int64_t(a.q ^ b.q) >= 0) result.q++;
671
                    break;
672
                }
673
            }
674
        }
675
        break;
676
    case 5:  // float
677
        nana = isnan_f(a.i);                          // check for NAN input
678
        nanb = isnan_f(b.i);
679
        if (nana && nanb) {                                // both are NAN
680
            result.i = (a.i << 1) > (b.i << 1) ? a.i : b.i;    // return the biggest payload
681
        }
682
        else if (nana) result.i = a.i;
683
        else if (nanb) result.i = b.i;
684
        else if (b.i << 1 == 0) { // division by zero
685
            if (a.i << 1 == 0) { // 0./0. = nan
686
                result.q = t->makeNan(nan_invalid_0div0, operandType);
687
            }
688
            else {
689
                // a / 0. = infinity
690
                if (mask & (1<<MSK_DIVZERO)) result.q = t->makeNan(nan_div0, operandType);
691
                else result.i = inf_f;
692
            }
693
            result.i |= (a.i ^ b.i) & sign_f; // sign bit
694
        }
695
        else if (isinf_f(a.i) && isinf_f(b.i)) {
696
            result.i = (uint32_t)t->makeNan(nan_invalid_divinf, operandType); // INF/INF
697
            result.i |= (a.i ^ b.i) & sign_f; // sign bit
698
        }
699
        else {
700
            if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING);
701
            if (detectExceptions) clearExceptionFlags();       // clear previous exceptions
702
            result.f = a.f / b.f;                              // normal division
703
            if (detectExceptions) {
704
                uint32_t x = getExceptionFlags();              // read exceptions
705
                if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_div, operandType);
706
                else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType);
707
                else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType);
708
            }
709
            if (roundingMode) setRoundingMode(0);              // reset rounding mode
710
        }
711
        break;
712
    case 6:  // double
713
        nana = isnan_d(a.q);                          // check for NAN input
714
        nanb = isnan_d(b.q);
715
        if (nana && nanb) {                                // both are NAN
716
            result.q = (a.q << 1) > (b.q << 1) ? a.q : b.q;    // return the biggest payload
717
        }
718
        else if (nana) result.q = a.q;
719
        else if (nanb) result.q = b.q;
720
        else if (b.q << 1 == 0) { // division by zero
721
            if (a.q << 1 == 0) { // 0./0. = nan
722
                result.q = t->makeNan(nan_invalid_0div0, operandType);
723
            }
724
            else {
725
                // a / 0. = infinity
726
                if (mask & (1<<MSK_DIVZERO)) result.q = t->makeNan(nan_div0, operandType);
727
                else result.q = inf_d;
728
            }
729
            result.q |= (a.q ^ b.q) & sign_d; // sign bit
730
        }
731
        else if (isinf_d(a.q) && isinf_d(b.q)) {
732
            result.q = t->makeNan(nan_invalid_divinf, operandType); // INF/INF
733
            result.q |= (a.q ^ b.q) & sign_d; // sign bit
734
        }
735
        else {
736
            if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING);
737
            if (detectExceptions) clearExceptionFlags();       // clear previous exceptions
738
            result.d = a.d / b.d;                              // normal division
739
            if (detectExceptions) {
740
                uint32_t x = getExceptionFlags();              // read exceptions
741
                if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_div, operandType);
742
                else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType);
743
                else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType);
744
            }
745
            if (roundingMode) setRoundingMode(0);              // reset rounding mode
746
        }
747
        break;
748
    default:
749
        t->interrupt(INT_WRONG_PARAMETERS);
750
        result.i = 0;
751
    }
752
    return result.q;
753
}
754
 
755
uint64_t f_div_u(CThread * t) {
756
    // divide two unsigned numbers
757
 
758
    if (t->operandType > 4) {
759
        return f_div(t);                         // floating point: same as f_div
760
    }
761
    SNum a = t->parm[1];
762
    SNum b = t->parm[2];
763
    //SNum mask = t->parm[3];
764
    SNum result;
765
    bool overflow = false;
766
    uint32_t intRounding = 0;                              // integer rounding mode
767
    if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) {
768
        intRounding = t->pInstr->a.im3;                    // E template. get integer rounding mode from IM3
769
    }
770
 
771
    switch (t->operandType) {
772
    case 0:  // int8
773
        if (b.b == 0) {
774
            result.i = 0xFF; overflow = true;
775
        }
776
        else {
777
            result.i = a.b / b.b;
778
            if (intRounding == 4 || intRounding == 6) {
779
                uint32_t rem = a.b % b.b;
780
                switch (intRounding) {
781
                case 4:  // nearest or even
782
                    if (rem*2 > b.b || (rem*2 == b.b && (result.i & 1))) result.i++;
783
                    break;
784
                case 6:  // up
785
                    if (rem != 0) result.i++;
786
                    break;
787
                }
788
            }
789
        }
790
        break;
791
    case 1:  // int16
792
        if (b.s == 0) {
793
            result.i = 0xFFFF; overflow = true;
794
        }
795
        else {
796
            result.i = a.s / b.s;
797
            if (intRounding == 4 || intRounding == 6) {
798
                uint32_t rem = a.s % b.s;
799
                switch (intRounding) {
800
                case 4:  // nearest or even
801
                    if (rem*2 > b.s || (rem*2 == b.s && (result.i & 1))) result.i++;
802
                    break;
803
                case 6:  // up
804
                    if (rem != 0) result.i++;
805
                    break;
806
                }
807
            }
808
        }
809
        break;
810
    case 2:  // int32
811
        if (b.i == 0) {
812
            result.is = -1; overflow = true;
813
        }
814
        else {
815
            result.i = a.i / b.i;
816
            if (intRounding == 4 || intRounding == 6) {
817
                uint32_t rem = a.i % b.i;
818
                switch (intRounding) {
819
                case 4:  // nearest or even
820
                    if (rem*2 > b.i || (rem*2 == b.i && (result.i & 1))) result.i++;
821
                    break;
822
                case 6:  // up
823
                    if (rem != 0) result.i++;
824
                    break;
825
                }
826
            }
827
        }
828
        break;
829
    case 3:  // int64
830
        if (b.q == 0) {
831
            result.qs = -(int64_t)1; overflow = true;
832
        }
833
        else {
834
            result.q = a.q / b.q;
835
            if (intRounding == 4 || intRounding == 6) {
836
                uint64_t rem = a.q % b.q;
837
                switch (intRounding) {
838
                case 4:  // nearest or even
839
                    if (rem*2 > b.q || (rem*2 == b.q && (result.q & 1))) result.q++;
840
                    break;
841
                case 6:  // up
842
                    if (rem != 0) result.q++;
843
                    break;
844
                }
845
            }
846
        }
847
        break;
848
    default:
849
        t->interrupt(INT_WRONG_PARAMETERS);
850
    }
851
    //if (overflow && (mask.i & MSK_OVERFL_UNSIGN)) t->interrupt(INT_OVERFL_UNSIGN);    // unsigned integer overflow
852
    return result.q;
853
}
854
 
855
static uint64_t f_div_rev(CThread * t) {
856
    // divide two numbers, b/a
857
    uint64_t temp = t->parm[2].q;  // swap operands
858
    t->parm[2].q = t->parm[1].q;
859
    t->parm[1].q = temp;
860
    uint64_t retval = f_div(t);
861
    t->parm[2].q = temp;           // restore parm[2] in case it is a constant
862
    return retval;
863
}
864
 
865
uint64_t mul64_128u(uint64_t * low, uint64_t a, uint64_t b) {
866
    // extended unsigned multiplication 64*64 -> 128 bits.
867
    // Note: you may replace this by inline assembly or intrinsic functions on
868
    // platforms that have extended multiplication instructions.
869
 
870
    // The return value is the high half of the product, 
871
    // *low receives the low half of the product
872
    union { // arrays for storing result
873
        uint64_t q[2];
874
        uint32_t i[4];
875
    } res;
876
    uint64_t t;             // temporary product
877
    uint64_t k;             // temporary carry
878
    uint64_t a0 = (uint32_t)a;     // low a
879
    uint64_t a1 = a >> 32;         // high a
880
    uint64_t b0 = (uint32_t)b;     // low b
881
    uint64_t b1 = b >> 32;         // high b 
882
    t = a0 * b0;
883
    res.i[0] = (uint32_t)t;
884
    k = t >> 32;
885
    t = a1 * b0 + k;
886
    res.i[1] = (uint32_t)t;
887
    k = t >> 32;
888
    res.i[2] = (uint32_t)k;
889
    t = a0 * b1 + res.i[1];
890
    res.i[1] = (uint32_t)t;
891
    k = t >> 32;
892
    t = a1 * b1 + k + res.i[2];
893
    res.i[2] = (uint32_t)t;
894
    k = t >> 32;
895
    res.i[3] = (uint32_t)k;
896
    if (low) *low = res.q[0];
897
    return res.q[1];
898
}
899
 
900
int64_t mul64_128s(uint64_t * low, int64_t a, int64_t b) {
901
    // extended signed multiplication 64*64 -> 128 bits.
902
    // Note: you may replace this by inline assembly or intrinsic functions on
903
    // platforms that have extended multiplication instructions.
904
 
905
    // The return value is the high half of the product, 
906
    // *low receives the low half of the product
907
    bool sign = false;
908
    if (a < 0) {
909
        a = -a, sign = true;
910
    }
911
    if (b < 0) {
912
        b = -b; sign = !sign;
913
    }
914
    uint64_t lo, hi;
915
    hi = mul64_128u(&lo, a, b);
916
    if (sign) {             // change sign
917
        lo = uint64_t(-int64_t(lo));
918
        hi = ~hi;
919
        if (lo == 0) hi++;  // carry
920
    }
921
    if (low) *low = lo;
922
    return (int64_t)hi;
923
}
924
 
925
static uint64_t f_mul_hi(CThread * t) {
926
    // high part of extended signed multiply
927
    SNum result;
928
    switch (t->operandType) {
929
    case 0:   // int8
930
        result.qs = ((int32_t)t->parm[1].bs * (int32_t)t->parm[2].bs) >> 8;
931
        break;
932
    case 1:   // int16
933
        result.qs = ((int32_t)t->parm[1].ss * (int32_t)t->parm[2].ss) >> 16;
934
        break;
935
    case 2:   // int32
936
        result.qs = ((int64_t)t->parm[1].is * (int64_t)t->parm[2].is) >> 32;
937
        break;
938
    case 3:   // int64
939
        result.qs = mul64_128s(0, t->parm[1].qs, t->parm[2].qs);
940
        break;
941
    default:
942
        t->interrupt(INT_WRONG_PARAMETERS);
943
        result.q = 0;
944
    }
945
    return result.q;
946
}
947
 
948
static uint64_t f_mul_hi_u(CThread * t) {
949
    // high part of extended unsigned multiply
950
    SNum result;
951
    switch (t->operandType) {
952
    case 0:   // int8
953
        result.q = ((uint32_t)t->parm[1].b * (uint32_t)t->parm[2].b) >> 8;
954
        break;
955
    case 1:   // int16
956
        result.q = ((uint32_t)t->parm[1].s * (uint32_t)t->parm[2].s) >> 16;
957
        break;
958
    case 2:   // int32
959
        result.q = ((uint64_t)t->parm[1].i * (uint64_t)t->parm[2].i) >> 32;
960
        break;
961
    case 3:   // int64
962
        result.q = mul64_128u(0, t->parm[1].q, t->parm[2].q);
963
        break;
964
    default:
965
        t->interrupt(INT_WRONG_PARAMETERS);
966
        result.q = 0;
967
    }
968
    return result.q;
969
}
970
 
971
 
972
static uint64_t f_rem(CThread * t) {
973
    // remainder/modulo of two signed numbers
974
    SNum a = t->parm[1];
975
    SNum b = t->parm[2];
976
    //SNum mask = t->parm[3];
977
    SNum result;
978
    bool overflow = false;
979
 
980
    switch (t->operandType) {
981
    case 0:  // int8
982
        if (b.b == 0 || (a.b == 0x80 && b.bs == -1)) {
983
            result.i = 0x80; overflow = true;
984
        }
985
        else result.is = a.bs % b.bs;
986
        break;
987
    case 1:  // int16
988
        if (b.s == 0 || (a.s == 0x8000 && b.ss == -1)) {
989
            result.i = 0x8000; overflow = true;
990
        }
991
        else result.is = a.ss % b.ss;
992
        break;
993
    case 2:  // int32
994
        if (b.i == 0 || (a.i == sign_f && b.is == -1)) {
995
            result.i = sign_f; overflow = true;
996
        }
997
        else result.is = a.is % b.is;
998
        break;
999
    case 3:  // int64
1000
        if (b.q == 0 || (a.q == sign_d && b.qs == int64_t(-1))) {
1001
            result.q = sign_d; overflow = true;
1002
        }
1003
        else result.qs = a.qs % b.qs;
1004
        break;
1005
    case 5:  // float
1006
        if (isnan_f(a.i) && isnan_f(b.i)) {    // both are NAN
1007
            result.i = (a.i << 1) > (b.i << 1) ? a.i : b.i; // return the biggest payload
1008
        }
1009
        else if (b.i << 1 == 0 || isinf_f(a.i)) { // rem(1,0) or rem(inf,1)
1010
            result.q = t->makeNan(nan_invalid_rem, 5);
1011
        }
1012
        else {
1013
            result.f = fmodf(a.f, b.f);           // normal modulo
1014
        }
1015
        break;
1016
    case 6:  // double
1017
        if (isnan_d(a.q) && isnan_d(b.q)) {    // both are NAN
1018
            result.q = (a.q << 1) > (b.q << 1) ? a.q : b.q; // return the biggest payload
1019
        }
1020
        else if (b.q << 1 == 0 || isinf_d(a.q)) { // rem(1,0) or rem(inf,1)
1021
            result.q = t->makeNan(nan_invalid_rem, 5);
1022
        }
1023
        else {
1024
            result.d = fmod(a.d, b.d);           // normal modulo
1025
        }
1026
        break;
1027
    default:
1028
        t->interrupt(INT_WRONG_PARAMETERS);
1029
        result.i = 0;
1030
    }
1031
    //if (overflow&& (mask.i & MSK_OVERFL_SIGN)) t->interrupt(INT_OVERFL_SIGN);    // signed integer overflow
1032
    return result.q;
1033
}
1034
 
1035
static uint64_t f_rem_u(CThread * t) {
1036
    // remainder/modulo of two unsigned numbers
1037
    if (t->operandType > 4) return f_rem(t);  // float types use f_rem
1038
    SNum a = t->parm[1];
1039
    SNum b = t->parm[2];
1040
    //SNum mask = t->parm[3];
1041
    SNum result;
1042
    bool overflow = false;
1043
 
1044
    switch (t->operandType) {
1045
    case 0:  // int8
1046
        if (b.b == 0) {
1047
            result.i = 0x80; overflow = true;
1048
        }
1049
        else result.i = a.b % b.b;
1050
        break;
1051
    case 1:  // int16
1052
        if (b.s == 0) {
1053
            result.i = 0x8000; overflow = true;
1054
        }
1055
        else result.i = a.s % b.s;
1056
        break;
1057
    case 2:  // int32
1058
        if (b.i == 0) {
1059
            result.i = sign_f; overflow = true;
1060
        }
1061
        else result.i = a.i % b.i;
1062
        break;
1063
    case 3:  // int64
1064
        if (b.q == 0) {
1065
            result.q = sign_d; overflow = true;
1066
        }
1067
        else result.q = a.q % b.q;
1068
        break;
1069
    default:
1070
        t->interrupt(INT_WRONG_PARAMETERS);
1071
        result.i = 0;
1072
    }
1073
    //if (overflow&& (mask.i & MSK_OVERFL_SIGN)) t->interrupt(INT_OVERFL_SIGN);    // signed integer overflow
1074
    return result.q;
1075
}
1076
 
1077
static uint64_t f_min(CThread * t) {
1078
    // minimum of two signed numbers
1079
    SNum a = t->parm[1];
1080
    SNum b = t->parm[2];
1081
    SNum result;
1082
    int8_t isnan;
1083
    switch (t->operandType) {
1084
    case 0:   // int8
1085
        result.is = a.bs < b.bs ? a.bs : b.bs;
1086
        break;
1087
    case 1:   // int16
1088
        result.is = a.ss < b.ss ? a.ss : b.ss;
1089
        break;
1090
    case 2:   // int32
1091
        result.is = a.is < b.is ? a.is : b.is;
1092
        break;
1093
    case 3:   // int64
1094
        result.qs = a.qs < b.qs ? a.qs : b.qs;
1095
        break;
1096
    case 5:   // float
1097
        result.f = a.f < b.f ? a.f : b.f;
1098
        // check NANs
1099
        isnan  = isnan_f(a.i);           // a is nan
1100
        isnan |= isnan_f(b.i) << 1;      // b is nan
1101
        if (isnan) { // propagate NAN according to the 2019 revision of the IEEE-754 standard
1102
            if (isnan == 1) result.i = a.i;
1103
            else if (isnan == 2) result.i = b.i;
1104
            else result.i = (a.i << 1) > (b.i << 1) ? a.i : b.i; // return the biggest payload
1105
        }
1106
        break;
1107
    case 6:   // double
1108
        result.d = a.d < b.d ? a.d : b.d;
1109
        // check NANs
1110
        isnan  = isnan_d(a.q);           // a is nan
1111
        isnan |= isnan_d(b.q) << 1;      // b is nan
1112
        if (isnan) { // propagate NAN according to the 2019 revision of the IEEE-754 standard
1113
            if (isnan == 1) result.q = a.q;
1114
            else if (isnan == 2) result.q = b.q;
1115
            else result.q = (a.q << 1) > (b.q << 1) ? a.q : b.q; // return the biggest payload
1116
        }
1117
        break;
1118
    default:
1119
        t->interrupt(INT_WRONG_PARAMETERS);
1120
        result.i = 0;
1121
    }
1122
    return result.q;
1123
}
1124
 
1125
static uint64_t f_min_u(CThread * t) {
1126
    // minimum of two unsigned numbers
1127
    SNum a = t->parm[1];
1128
    SNum b = t->parm[2];
1129
    SNum result;
1130
    switch (t->operandType) {
1131
    case 0:   // int8
1132
        result.i = a.b < b.b ? a.b : b.b;
1133
        break;
1134
    case 1:   // int16
1135
        result.i = a.s < b.s ? a.s : b.s;
1136
        break;
1137
    case 2:   // int32
1138
        result.i = a.i < b.i ? a.i : b.i;
1139
        break;
1140
    case 3:   // int64
1141
        result.q = a.q < b.q ? a.q : b.q;
1142
        break;
1143
    case 5:   // float
1144
    case 6:   // double
1145
        return f_min(t);
1146
    default:
1147
        t->interrupt(INT_WRONG_PARAMETERS);
1148
        result.i = 0;
1149
    }
1150
    return result.q;
1151
}
1152
 
1153
static uint64_t f_max(CThread * t) {
1154
    // maximum of two signed numbers
1155
    SNum a = t->parm[1];
1156
    SNum b = t->parm[2];
1157
    SNum result;
1158
    uint8_t isnan;
1159
    switch (t->operandType) {
1160
    case 0:   // int8
1161
        result.is = a.bs > b.bs ? a.bs : b.bs;
1162
        break;
1163
    case 1:   // int16
1164
        result.is = a.ss > b.ss ? a.ss : b.ss;
1165
        break;
1166
    case 2:   // int32
1167
        result.is = a.is > b.is ? a.is : b.is;
1168
        break;
1169
    case 3:   // int64
1170
        result.qs = a.qs > b.qs ? a.qs : b.qs;
1171
        break;
1172
    case 5:   // float
1173
        result.f = a.f > b.f ? a.f : b.f;
1174
        // check NANs
1175
        isnan  = isnan_f(a.i);           // a is nan
1176
        isnan |= isnan_f(b.i) << 1;      // b is nan
1177
        if (isnan) {
1178
            // propagate NAN according to the 2019 revision of the IEEE-754 standard
1179
            if (isnan == 1) result.i = a.i;
1180
            else if (isnan == 2) result.i = b.i;
1181
            else result.i = (a.i << 1) > (b.i << 1) ? a.i : b.i; // return the biggest payload
1182
        }
1183
        break;
1184
    case 6:   // double
1185
        result.d = a.d > b.d ? a.d : b.d;
1186
        // check NANs
1187
        isnan  = isnan_d(a.q);           // a is nan
1188
        isnan |= isnan_d(b.q) << 1;      // b is nan
1189
        if (isnan) {
1190
            // propagate NAN according to the 2019 revision of the IEEE-754 standard
1191
            if (isnan == 1) result.q = a.q;
1192
            else if (isnan == 2) result.q = b.q;
1193
            else result.q = (a.q << 1) > (b.q << 1) ? a.q : b.q; // return the biggest payload
1194
        }
1195
        break;
1196
    default:
1197
        t->interrupt(INT_WRONG_PARAMETERS);
1198
        result.i = 0;
1199
    }
1200
    return result.q;
1201
}
1202
 
1203
static uint64_t f_max_u(CThread * t) {
1204
    // maximum of two unsigned numbers
1205
    SNum a = t->parm[1];
1206
    SNum b = t->parm[2];
1207
    SNum result;
1208
    switch (t->operandType) {
1209
    case 0:   // int8
1210
        result.i = a.b > b.b ? a.b : b.b;
1211
        break;
1212
    case 1:   // int16
1213
        result.i = a.s > b.s ? a.s : b.s;
1214
        break;
1215
    case 2:   // int32
1216
        result.i = a.i > b.i ? a.i : b.i;
1217
        break;
1218
    case 3:   // int64
1219
        result.q = a.q > b.q ? a.q : b.q;
1220
        break;
1221
    case 5:   // float
1222
    case 6:   // double
1223
        return f_max(t);
1224
    default:
1225
        t->interrupt(INT_WRONG_PARAMETERS);
1226
        result.i = 0;
1227
    }
1228
    return result.q;
1229
}
1230
 
1231
static uint64_t f_and(CThread * t) {
1232
    // bitwise AND of two numbers
1233
    return t->parm[1].q & t->parm[2].q;
1234
}
1235
/*
1236
static uint64_t f_and_not(CThread * t) {
1237
    // a & ~b
1238
    return t->parm[1].q & ~ t->parm[2].q;
1239
}*/
1240
 
1241
static uint64_t f_or(CThread * t) {
1242
    // bitwise OR of two numbers
1243
    return t->parm[1].q | t->parm[2].q;
1244
}
1245
 
1246
static uint64_t f_xor(CThread * t) {
1247
    // bitwise exclusive OR of two numbers
1248
    return t->parm[1].q ^ t->parm[2].q;
1249
}
1250
 
1251
static uint64_t f_select_bits(CThread * t) {
1252
    // a & c | b & ~c
1253
    return (t->parm[0].q & t->parm[2].q) | (t->parm[1].q & ~ t->parm[2].q);
1254
}
1255
 
1256
static uint64_t f_shift_left(CThread * t) {
1257
    // integer: a << b, float a * 2^b where b is interpreted as integer
1258
    SNum a = t->parm[1];
1259
    SNum b = t->parm[2];
1260
    //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float
1261
    if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float
1262
 
1263
    SNum mask = t->parm[3];
1264
    SNum result;
1265
    uint64_t exponent;
1266
    switch (t->operandType) {
1267
    case 0:   // int8
1268
        result.b = a.b << b.b;
1269
        if (b.b > 7) result.q = 0;
1270
        break;
1271
    case 1:   // int16
1272
        result.s = a.s << b.s;
1273
        if (b.b > 15) result.q = 0;
1274
        break;
1275
    case 2:   // int32
1276
        result.i = a.i << b.i;
1277
        if (b.b > 31) result.q = 0;
1278
        break;
1279
    case 3:   // int64
1280
        result.q = a.q << b.q;
1281
        if (b.b > 63) result.q = 0;
1282
        break;
1283
    case 5:   // float
1284
        if (isnan_f(a.i)) return a.q;  // a is nan
1285
        exponent = a.i >> 23 & 0xFF;       // get exponent
1286
        if (exponent == 0) return a.i & sign_f; // a is zero or subnormal. return zero
1287
        exponent += b.i;                  // add integer to exponent
1288
        if ((int32_t)exponent >= 0xFF) { // overflow
1289
            result.i = inf_f;
1290
        }
1291
        else if ((int32_t)exponent <= 0) { // underflow
1292
            if ((mask.i & (1<<MSK_UNDERFLOW)) != 0) {  // make NAN if exception
1293
                result.q = t->makeNan(nan_underflow, 5);
1294
            }
1295
            else {
1296
                result.q = 0;
1297
            }
1298
        }
1299
        else {
1300
            result.i = (a.i & 0x807FFFFF) | uint32_t(exponent) << 23; // insert new exponent
1301
        }
1302
        break;
1303
    case 6:   // double
1304
        if (isnan_d(a.q)) return a.q;  // a is nan
1305
        exponent = a.q >> 52 & 0x7FF;
1306
        if (exponent == 0) return a.q & sign_d; // a is zero or subnormal. return zero
1307
        exponent += b.q;                  // add integer to exponent
1308
        if ((int64_t)exponent >= 0x7FF) { // overflow
1309
            result.q = inf_d;
1310
            //if (mask.i & MSK_OVERFL_FLOAT) t->interrupt(INT_OVERFL_FLOAT);
1311
        }
1312
        else if ((int64_t)exponent <= 0) { // underflow
1313
            if ((mask.i & (1<<MSK_UNDERFLOW)) != 0) {  // make NAN if exception
1314
                result.q = t->makeNan(nan_underflow, 6);
1315
            }
1316
            else {
1317
                result.q = 0;
1318
            }
1319
        }
1320
        else {
1321
            result.q = (a.q & 0x800FFFFFFFFFFFFF) | (exponent << 52); // insert new exponent
1322
        }
1323
        break;
1324
    default:
1325
        t->interrupt(INT_WRONG_PARAMETERS);
1326
        result.i = 0;
1327
    }
1328
    return result.q;
1329
}
1330
 
1331
static uint64_t f_rotate(CThread * t) {
1332
    // rotate bits left
1333
    SNum a = t->parm[1];
1334
    SNum b = t->parm[2];
1335
    //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float
1336
    if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float
1337
 
1338
    SNum result;
1339
    switch (t->operandType) {
1340
    case 0:   // int8
1341
        result.b = a.b << (b.b & 7) | a.b >> (8 - (b.b & 7));
1342
        break;
1343
    case 1:   // int16
1344
        result.s = a.s << (b.s & 15) | a.s >> (16 - (b.s & 15));
1345
        break;
1346
    case 2:   // int32
1347
    case 5:   // float
1348
        result.i = a.i << (b.i & 31) | a.i >> (32 - (b.i & 31));
1349
        break;
1350
    case 3:   // int64
1351
    case 6:   // double
1352
        result.q = a.q << (b.q & 63) | a.q >> (64 - (b.q & 63));
1353
        break;
1354
    default:
1355
        t->interrupt(INT_WRONG_PARAMETERS);
1356
        result.i = 0;
1357
    }
1358
    return result.q;
1359
}
1360
 
1361
static uint64_t f_shift_right_s(CThread * t) {
1362
    // integer only: a >> b, with sign extension
1363
    SNum a = t->parm[1];
1364
    SNum b = t->parm[2];
1365
    //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float
1366
    if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float
1367
 
1368
    SNum result;
1369
    switch (t->operandType) {
1370
    case 0:   // int8
1371
        result.bs = a.bs >> b.bs;
1372
        if (b.b > 7) result.qs = a.bs >> 7;
1373
        break;
1374
    case 1:   // int16
1375
        result.ss = a.ss >> b.ss;
1376
        if (b.s > 15) result.qs = a.ss >> 15;
1377
        break;
1378
    case 2:   // int32
1379
        result.is = a.is >> b.is;
1380
        if (b.i > 31) result.qs = a.is >> 31;
1381
        break;
1382
    case 3:   // int64
1383
        result.qs = a.qs >> b.qs;
1384
        if (b.q > 63) result.qs = a.qs >> 63;
1385
        break;
1386
    default:
1387
        t->interrupt(INT_WRONG_PARAMETERS);
1388
        result.i = 0;
1389
    }
1390
    return result.q;
1391
}
1392
 
1393
static uint64_t f_shift_right_u(CThread * t) {
1394
    // integer only: a >> b, with zero extension
1395
    SNum a = t->parm[1];
1396
    SNum b = t->parm[2];
1397
    //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float
1398
    if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float
1399
 
1400
    SNum result;
1401
    switch (t->operandType) {
1402
    case 0:   // int8
1403
        result.b = a.b >> b.b;
1404
        if (b.b > 7) result.q = 0;
1405
        break;
1406
    case 1:   // int16
1407
        result.s = a.s >> b.s;
1408
        if (b.s > 15) result.q = 0;
1409
        break;
1410
    case 2:   // int32
1411
        result.i = a.i >> b.i;
1412
        if (b.i > 31) result.q = 0;
1413
        break;
1414
    case 3:   // int64
1415
        result.q = a.q >> b.q;
1416
        if (b.q > 63) result.q = 0;
1417
        break;
1418
    default:
1419
        t->interrupt(INT_WRONG_PARAMETERS);
1420
        result.i = 0;
1421
    }
1422
    return result.q;
1423
}
1424
 
1425
static uint64_t f_funnel_shift(CThread * t) {
1426
    uint64_t shift_count = t->parm[2].q;
1427
    if ((t->operands[5] & 0x20) && t->operandType > 4) shift_count = t->parm[4].q; // avoid conversion of c to float
1428
 
1429
    if (t->vect == 0) {     // g.p. registers. shift integers n bytes
1430
        uint32_t dataSize = dataSizeTableBits[t->operandType]; //  operand size, bits
1431
        uint64_t dataMask = dataSizeMask[t->operandType]; //  operand size mask, dataSize bits of 1        
1432
        if ((shift_count & dataMask) >= dataSize) return 0;  // shift count out of range
1433
        return ((t->parm[0].q & dataMask) >> shift_count | (t->parm[1].q & dataMask) << (dataSize - shift_count)) & dataMask;
1434
    }
1435
    else {  // vector registers. shift concatenated whole vectors n bytes down
1436
 
1437
        // The second operand may be a vector register of incomplete size or a broadcast memory operand.
1438
        // Both input operands may be the same as the destination register.
1439
        // The operand size may not match the shift count
1440
        // The easiest way to handle all these cases is to copy both input vectors into temporary buffers
1441
        switch (t->operandType) {
1442
        case 0:
1443
            *(t->tempBuffer + t->vectorOffset) =  t->parm[0].bs;
1444
            *(t->tempBuffer + t->MaxVectorLength + t->vectorOffset) =  t->parm[1].bs;
1445
            break;
1446
        case 1:
1447
            *(uint16_t*)(t->tempBuffer + t->vectorOffset) =  t->parm[0].s;
1448
            *(uint16_t*)(t->tempBuffer + t->MaxVectorLength + t->vectorOffset) =  t->parm[1].s;
1449
            break;
1450
        case 2: case 5:
1451
            *(uint32_t*)(t->tempBuffer + t->vectorOffset) =  t->parm[0].i;
1452
            *(uint32_t*)(t->tempBuffer + t->MaxVectorLength + t->vectorOffset) =  t->parm[1].i;
1453
            break;
1454
        case 3: case 6:
1455
            *(uint64_t*)(t->tempBuffer + t->vectorOffset) =  t->parm[0].q;
1456
            *(uint64_t*)(t->tempBuffer + t->MaxVectorLength + t->vectorOffset) =  t->parm[1].q;
1457
            break;
1458
        case 4: case 7:  // to do: support 128 bits
1459
            t->interrupt(INT_WRONG_PARAMETERS);
1460
            break;
1461
        }
1462
        uint32_t dataSizeBytes = dataSizeTable[t->operandType]; //  operand size, bits
1463
        if (t->vectorOffset + dataSizeBytes >= t->vectorLengthR) {
1464
            // last iteration. Make the result
1465
            uint8_t  rd = t->operands[0];         // destination vector
1466
            shift_count *= dataSizeBytes;         // shift n elements
1467
            if (shift_count >= t->vectorLengthR) {
1468
                // shift count out of range. return 0
1469
                memset(t->vectors.buf() + t->MaxVectorLength*rd, 0, t->vectorLengthR);
1470
            }
1471
            else {
1472
                // copy upper part of first vector to lower part of destination vector
1473
                memcpy(t->vectors.buf() + t->MaxVectorLength * rd, t->tempBuffer + shift_count, t->vectorLengthR - shift_count);
1474
                // copy lower part of second vector to upper part of destination vector
1475
                memcpy(t->vectors.buf() + t->MaxVectorLength * rd + (t->vectorLengthR - shift_count), t->tempBuffer + t->MaxVectorLength, shift_count);
1476
            }
1477
        }
1478
        t->running = 2;        // don't save RD. It is saved by above code
1479
        return 0;
1480
    }
1481
}
1482
 
1483
static uint64_t f_set_bit(CThread * t) {
1484
    // a | 1 << b
1485
    SNum a = t->parm[1];
1486
    SNum b = t->parm[2];
1487
    //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float
1488
    if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float
1489
 
1490
    SNum result;
1491
    switch (t->operandType) {
1492
    case 0:   // int8
1493
        result.b = a.b;
1494
        if (b.b < 8) result.b |= 1 << b.b;
1495
        break;
1496
    case 1:   // int16
1497
        result.s = a.s;
1498
        if (b.s < 16) result.s |= 1 << b.s;
1499
        break;
1500
    case 2:   // int32
1501
    case 5:   // float
1502
        result.i = a.i;
1503
        if (b.i < 32) result.i |= 1 << b.i;
1504
        break;
1505
    case 3:   // int64
1506
    case 6:   // double
1507
        result.q = a.q;
1508
        if (b.q < 64) result.q |= (uint64_t)1 << b.q;
1509
        break;
1510
    default:
1511
        t->interrupt(INT_WRONG_PARAMETERS);
1512
        result.i = 0;
1513
    }
1514
    return result.q;
1515
}
1516
 
1517
static uint64_t f_clear_bit(CThread * t) {
1518
    // a & ~ (1 << b)
1519
    SNum a = t->parm[1];
1520
    SNum b = t->parm[2];
1521
    //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float
1522
    if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float
1523
    SNum result;
1524
    switch (t->operandType) {
1525
    case 0:   // int8
1526
        result.b = a.b;
1527
        if (b.b < 8) result.b &= ~(1 << b.b);
1528
        break;
1529
    case 1:   // int16
1530
        result.s = a.s;
1531
        if (b.s < 16) result.s &= ~(1 << b.s);
1532
        break;
1533
    case 2:   // int32
1534
    case 5:   // float
1535
        result.i = a.i;
1536
        if (b.i < 32) result.i &= ~(1 << b.i);
1537
        break;
1538
    case 3:   // int64
1539
    case 6:   // double
1540
        result.q = a.q;
1541
        if (b.q < 64) result.q &= ~((uint64_t)1 << b.q);
1542
        break;
1543
    default:
1544
        t->interrupt(INT_WRONG_PARAMETERS);
1545
        result.i = 0;
1546
    }
1547
    return result.q;
1548
}
1549
 
1550
static uint64_t f_toggle_bit(CThread * t) {
1551
    // a ^ (1 << b)
1552
    SNum a = t->parm[1];
1553
    SNum b = t->parm[2];
1554
    //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float
1555
    if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float
1556
    SNum result;
1557
    switch (t->operandType) {
1558
    case 0:   // int8
1559
        result.b = a.b;
1560
        if (b.b < 8) result.b ^= 1 << b.b;
1561
        break;
1562
    case 1:   // int16
1563
        result.s = a.s;
1564
        if (b.s < 16) result.s ^= 1 << b.s;
1565
        break;
1566
    case 2:   // int32
1567
    case 5:   // float
1568
        result.i = a.i;
1569
        if (b.i < 32) result.i ^= 1 << b.i;
1570
        break;
1571
    case 3:   // int64
1572
    case 6:   // double
1573
        result.q = a.q;
1574
        if (b.q < 64) result.q ^= (uint64_t)1 << b.q;
1575
        break;
1576
    default:
1577
        t->interrupt(INT_WRONG_PARAMETERS);
1578
        result.i = 0;
1579
    }
1580
    return result.q;
1581
}
1582
 
1583
/*
1584
static uint64_t f_and_bit(CThread * t) {
1585
    // clear all bits except one
1586
    // a & (1 << b)
1587
    SNum a = t->parm[1];
1588
    SNum b = t->parm[2];
1589
    //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float
1590
    if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float
1591
    SNum result;
1592
    switch (t->operandType) {
1593
    case 0:   // int8
1594
        result.b = a.b;
1595
        if (b.b < 8) result.b &= 1 << b.b;
1596
        break;
1597
    case 1:   // int16
1598
        result.s = a.s;
1599
        if (b.s < 16) result.s &= 1 << b.s;
1600
        break;
1601
    case 2:   // int32
1602
    case 5:   // float
1603
        result.i = a.i;
1604
        if (b.i < 32) result.i &= 1 << b.i;
1605
        break;
1606
    case 3:   // int64
1607
    case 6:   // double
1608
        result.q = a.q;
1609
        if (b.q < 64) result.q &= (uint64_t)1 << b.q;
1610
        break;
1611
    default:
1612
        t->interrupt(INT_WRONG_PARAMETERS);
1613
        result.i = 0;
1614
    }
1615
    return result.q;
1616
}*/
1617
 
1618
static uint64_t f_test_bit(CThread * t) {
1619
    // test a single bit: a >> b & 1
1620
    SNum a = t->parm[1];
1621
    SNum b = t->parm[2];
1622
    //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float
1623
    if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float
1624
    if (t->fInstr->imm2 & 4) {
1625
        b = t->parm[4];                                    // avoid immediate operand shifted by imm3
1626
    }
1627
    SNum result;
1628
    result.q = 0;
1629
    SNum mask = t->parm[3];
1630
    uint8_t fallbackreg = t->operands[2];  // fallback register
1631
    SNum fallback;  // fallback value
1632
    fallback.q = (fallbackreg & 0x1F) != 0x1F ? t->readRegister(fallbackreg & 0x1F) : 0;
1633
    switch (t->operandType) {
1634
    case 0:   // int8
1635
        if (b.b < 8) result.b = a.b >> b.b & 1;
1636
        break;
1637
    case 1:   // int16
1638
        if (b.s < 16) result.s = a.s >> b.s & 1;
1639
        break;
1640
    case 2:   // int32
1641
    case 5:   // float
1642
        if (b.i < 32) result.i = a.i >> b.i & 1;
1643
        break;
1644
    case 3:   // int64
1645
    case 6:   // double
1646
        if (b.q < 64) result.q = a.q >> b.q & 1;
1647
        break;
1648
    default:
1649
        t->interrupt(INT_WRONG_PARAMETERS);
1650
        result.i = 0;
1651
    }
1652
    // get additional options
1653
    uint8_t options = 0;
1654
    if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) options = t->pInstr->a.im3;
1655
    if (options & 4) result.b ^= 1;    // invert result
1656
    if (options & 8) fallback.b ^= 1;  // invert fallback
1657
    if (options & 0x10) mask.b ^= 1;   // invert mask
1658
    switch (options & 3) {
1659
    case 0:
1660
        result.b = (mask.b & 1) ? result.b : fallback.b;
1661
        break;
1662
    case 1:
1663
        result.b = mask.b  &  result.b & fallback.b;
1664
        break;
1665
    case 2:
1666
        result.b = mask.b  & (result.b | fallback.b);
1667
        break;
1668
    case 3:
1669
        result.b = mask.b  & (result.b ^ fallback.b);
1670
    }
1671
    // ignore other bits
1672
    result.q &= 1;
1673
    if (options & 0x20) {  // get remaining bits from flag or NUMCONTR
1674
        result.q |= mask.q & ~(uint64_t)1;
1675
    }
1676
    // disable normal fallback process
1677
    t->parm[3].b = 1;
1678
    return result.q;
1679
}
1680
 
1681
static uint64_t f_test_bits_and(CThread * t) {
1682
    // Test if all the indicated bits are 1
1683
    // result = (a & b) == b
1684
    SNum a = t->parm[1];
1685
    SNum b = t->parm[2];
1686
    //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float
1687
    if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float
1688
    if (t->fInstr->imm2 & 4) {
1689
        b = t->parm[4];                                    // avoid immediate operand shifted by imm3
1690
    }
1691
    SNum result;
1692
    SNum mask = t->parm[3];
1693
    uint8_t fallbackreg = t->operands[2];        // fallback register
1694
    SNum fallback;  // fallback value
1695
    fallback.q = (fallbackreg & 0x1F) != 0x1F ? t->readRegister(fallbackreg & 0x1F) : 0;
1696
    switch (t->operandType) {
1697
    case 0:   // int8
1698
        result.b = (a.b & b.b) == b.b;
1699
        break;
1700
    case 1:   // int16
1701
        result.s = (a.s & b.s) == b.s;
1702
        break;
1703
    case 2:   // int32
1704
    case 5:   // float
1705
        result.i = (a.i & b.i) == b.i;
1706
        break;
1707
    case 3:   // int64
1708
    case 6:   // double
1709
        result.q = (a.q & b.q) == b.q;
1710
        break;
1711
    default:
1712
        t->interrupt(INT_WRONG_PARAMETERS);
1713
        result.i = 0;
1714
    }
1715
    // get additional options
1716
    uint8_t options = 0;
1717
    if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) options = t->pInstr->a.im3;
1718
    if (options & 4) result.b ^= 1;    // invert result
1719
    if (options & 8) fallback.b ^= 1;  // invert fallback
1720
    if (options & 0x10) mask.b ^= 1;   // invert mask
1721
    switch (options & 3) {
1722
    case 0:
1723
        result.b = (mask.b & 1) ? result.b : fallback.b;
1724
        break;
1725
    case 1:
1726
        result.b &= mask.b & fallback.b;
1727
        break;
1728
    case 2:
1729
        result.b = mask.b  & (result.b | fallback.b);
1730
        break;
1731
    case 3:
1732
        result.b = mask.b  & (result.b ^ fallback.b);
1733
    }
1734
    // ignore other bits
1735
    result.q &= 1;
1736
    if (options & 0x20) {  // get remaining bits from flag or NUMCONTR
1737
        result.q |= mask.q & ~(uint64_t)1;
1738
    }
1739
    // disable normal fallback process
1740
    t->parm[3].b = 1;
1741
    return result.q;
1742
}
1743
 
1744
static uint64_t f_test_bits_or(CThread * t) {
1745
    // Test if at least one of the indicated bits is 1. 
1746
    // result = (a & b) != 0
1747
    SNum a = t->parm[1];
1748
    SNum b = t->parm[2];
1749
    //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float
1750
    if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float
1751
    if (t->fInstr->imm2 & 4) {
1752
        b = t->parm[4];                                    // avoid immediate operand shifted by imm3
1753
    }
1754
    SNum result;
1755
    SNum mask = t->parm[3];
1756
    uint8_t fallbackreg = t->operands[2];  // fallback register
1757
    SNum fallback;  // fallback value
1758
    fallback.q = (fallbackreg & 0x1F) != 0x1F ? t->readRegister(fallbackreg & 0x1F) : 0;
1759
    switch (t->operandType) {
1760
    case 0:   // int8
1761
        result.b = (a.b & b.b) != 0;
1762
        break;
1763
    case 1:   // int16
1764
        result.s = (a.s & b.s) != 0;
1765
        break;
1766
    case 2:   // int32
1767
    case 5:   // float
1768
        result.i = (a.i & b.i) != 0;
1769
        break;
1770
    case 3:   // int64
1771
    case 6:   // double
1772
        result.q = (a.q & b.q) != 0;
1773
        break;
1774
    default:
1775
        t->interrupt(INT_WRONG_PARAMETERS);
1776
        result.i = 0;
1777
    }
1778
    // get additional options
1779
    uint8_t options = 0;
1780
    if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) options = t->pInstr->a.im3;
1781
    if (options & 4) result.b ^= 1;    // invert result
1782
    if (options & 8) fallback.b ^= 1;  // invert fallback
1783
    if (options & 0x10) mask.b ^= 1;   // invert mask
1784
    switch (options & 3) {
1785
    case 0:
1786
        result.b = (mask.b & 1) ? result.b : fallback.b;
1787
        break;
1788
    case 1:
1789
        result.b &= mask.b & fallback.b;
1790
        break;
1791
    case 2:
1792
        result.b = mask.b  & (result.b | fallback.b);
1793
        break;
1794
    case 3:
1795
        result.b = mask.b  & (result.b ^ fallback.b);
1796
    }
1797
    // ignore other bits
1798
    result.q &= 1;
1799
    if (options & 0x20) {  // get remaining bits from flag or NUMCONTR
1800
        result.q |= mask.q & ~(uint64_t)1;
1801
    }
1802
    // disable normal fallback process
1803
    t->parm[3].b = 1;
1804
    return result.q;
1805
}
1806
 
1807
 
1808
float mul_add_f(float a, float b, float c) {
1809
    // calculate a * b + c with extra precision on the intermediate product.
1810
#if FMA_AVAILABLE
1811
    // use FMA instruction for correct precision if available
1812
    return _mm_cvtss_f32(_mm_fmadd_ss(_mm_load_ss(&a), _mm_load_ss(&b), _mm_load_ss(&c)));
1813
#else
1814
    return float((double)a * (double)b + (double)c);
1815
#endif
1816
}
1817
 
1818
double mul_add_d(double a, double b, double c) {
1819
    // calculate a * b + c with extra precision on the intermediate product.
1820
#if FMA_AVAILABLE
1821
    // use FMA instruction for correct precision if available
1822
    return _mm_cvtsd_f64(_mm_fmadd_sd(_mm_load_sd(&a), _mm_load_sd(&b), _mm_load_sd(&c)));
1823
#else
1824
    // calculate a*b-c with extended precision. This code is not as good as the real FMA instruction
1825
    SNum aa, bb, ahi, bhi, alo, blo;
1826
    uint64_t upper_mask = 0xFFFFFFFFF8000000;
1827
    aa.d = a;  bb.d = b;
1828
    ahi.q = aa.q & upper_mask;                   // split into high and low parts
1829
    alo.d = a - ahi.d;
1830
    bhi.q = bb.q & upper_mask;
1831
    blo.d = b - bhi.d;
1832
    double r1 = ahi.d * bhi.d;                   // this product is exact
1833
    // perhaps a different order of addition is better here in some cases?
1834
    double r2 = r1 + c;                          // add c to high product
1835
    double r3 = r2 + (ahi.d * blo.d + bhi.d * alo.d) + alo.d * blo.d; // add rest of product
1836
    return r3;
1837
#endif
1838
}
1839
 
1840
uint64_t f_mul_add(CThread * t) {
1841
    // a * b + c, calculated with extra precision on the intermediate product
1842
    SNum a = t->parm[0];
1843
    SNum b = t->parm[1];
1844
    SNum c = t->parm[2];
1845
    if ((t->fInstr->imm2 & 4) && t->operandType < 5) {
1846
        c = t->parm[4];                 // avoid immediate operand shifted by imm3
1847
    }
1848
    if (t->op == II_MUL_ADD2) {
1849
        SNum t = b; b = c; c = t;       // swap last two operands
1850
    }
1851
    uint32_t mask = t->parm[3].i;
1852
    SNum result;
1853
    bool roundingMode = (mask & (3 << MSKI_ROUNDING)) != 0;  // non-standard rounding mode
1854
    bool detectExceptions = (mask & (0xF << MSKI_EXCEPTIONS)) != 0;  // make NAN if exceptions
1855
 
1856
    // get sign options
1857
    uint8_t options = 0;
1858
    if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) options = t->pInstr->a.im3;
1859
    //else if (t->fInstr->tmplate == 0xA) options = (mask >> MSKI_OPTIONS) & 0xF; 
1860
    if (t->vect == 2) { // odd vector element
1861
        options >>= 1;
1862
    }
1863
    bool unsignedOverflow = false;
1864
    bool signedOverflow = false;
1865
    uint8_t operandType = t->operandType;
1866
    switch (operandType) {
1867
    case 0:   // int8
1868
        a.is = a.bs; b.is = b.bs;    // sign extend to avoid overflow during sign change
1869
        if (options & 1) a.is = -a.is;
1870
        if (options & 4) c.is = -c.is;
1871
        result.is = a.is * b.is + c.bs;
1872
        signedOverflow = result.is != result.bs;
1873
        unsignedOverflow = result.i != result.b;
1874
        break;
1875
    case 1:   // int16
1876
        a.is = a.ss; b.is = b.ss;    // sign extend to avoid overflow during sign change
1877
        if (options & 1) a.is = -a.is;
1878
        if (options & 4) c.is = -c.is;
1879
        result.is = a.is * b.is + c.ss;
1880
        signedOverflow = result.is != result.ss;
1881
        unsignedOverflow = result.i != result.s;
1882
        break;
1883
    case 2:   // int32
1884
        a.qs = a.is; b.qs = b.is;    // sign extend to avoid overflow during sign change
1885
        if (options & 1) a.qs = -a.qs;
1886
        if (options & 4) c.qs = -c.qs;
1887
        result.qs = a.qs * b.qs + c.is;
1888
        signedOverflow = result.qs != result.is;
1889
        unsignedOverflow = result.q != result.i;
1890
        break;
1891
    case 3:   // int64
1892
        if (options & 1) {
1893
            if (a.q == sign_d) signedOverflow = true;
1894
            a.qs = -a.qs;
1895
        }
1896
        if (options & 4) {
1897
            if (b.q == sign_d) signedOverflow = true;
1898
            c.qs = -c.qs;
1899
        }
1900
        result.qs = a.qs * b.qs + c.qs;
1901
        /*
1902
        if (mask.b & MSK_OVERFL_UNSIGN) {                  // check for unsigned overflow
1903
            if (fabs((double)a.q + (double)b.q * (double)c.q - (double)result.q) > 1.E8) unsignedOverflow = true;
1904
        }
1905
        if (mask.b & MSK_OVERFL_SIGN) {                    // check for signed overflow
1906
            if (fabs((double)a.qs + (double)b.qs * (double)c.qs - (double)result.qs) > 1.E8) signedOverflow = true;
1907
        } */
1908
        break;
1909
    case 5:   // float
1910
        if (options & 1) a.f = -a.f;
1911
        if (options & 4) c.f = -c.f;
1912
        if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING);
1913
        if (detectExceptions) clearExceptionFlags();       // clear previous exceptions
1914
 
1915
        result.f = mul_add_f(a.f, b.f, c.f);               // do the calculation
1916
        if (isnan_or_inf_f(result.i)) {                    // check for overflow and nan
1917
            uint32_t nans = 0;                             // biggest NAN
1918
            uint32_t infs = 0;                             // count INF inputs
1919
            for (int i = 0; i < 3; i++) {                  // loop through input operands
1920
                uint32_t tmp = t->parm[i].i & nsign_f;     // ignore sign bit
1921
                if (tmp == inf_f) infs++;                  // is INF
1922
                else if (tmp > nans) nans = tmp;           // get the biggest if there are multiple NANs
1923
            }
1924
            if (nans) result.i = nans;                     // there is at least one NAN. return the biggest (sign bit is lost)
1925
            else if (isnan_f(result.i)) {
1926
                // result is NAN, but no input is NAN. This can be 0*INF or INF-INF
1927
                if ((a.i << 1 == 0 || b.i << 1 == 0) && infs) result.q = t->makeNan(nan_invalid_0mulinf, operandType);
1928
                else result.q = t->makeNan(nan_invalid_sub, operandType);
1929
            }
1930
        }
1931
        else if (detectExceptions) {
1932
            uint32_t x = getExceptionFlags();              // read exceptions
1933
            if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_mul, operandType);
1934
            else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType);
1935
            else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType);
1936
        }
1937
        if (roundingMode) setRoundingMode(0);              // reset rounding mode
1938
        break;
1939
 
1940
    case 6:   // double
1941
        if (options & 1) a.d = -a.d;
1942
        if (options & 4) c.d = -c.d;
1943
        if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING);
1944
        if (detectExceptions) clearExceptionFlags();       // clear previous exceptions
1945
 
1946
        result.d = mul_add_d(a.d, b.d, c.d);               // do the calculation
1947
        if (isnan_or_inf_d(result.q)) {                    // check for overflow and nan
1948
            uint64_t nans = 0;                             // biggest NAN
1949
            uint32_t infs = 0;                             // count INF inputs
1950
            for (int i = 0; i < 3; i++) {                  // loop through input operands
1951
                uint64_t tmp = t->parm[i].q & nsign_d;     // ignore sign bit
1952
                if (tmp == inf_d) infs++;                  // is INF
1953
                else if (tmp > nans) nans = tmp;           // get the biggest if there are multiple NANs
1954
            }
1955
            if (nans) result.q = nans;                     // there is at least one NAN. return the biggest (sign bit is lost)
1956
            else if (isnan_d(result.q)) {
1957
                // result is NAN, but no input is NAN. This can be 0*INF or INF-INF
1958
                if ((a.q << 1 == 0 || b.q << 1 == 0) && infs) result.q = t->makeNan(nan_invalid_0mulinf, operandType);
1959
                else result.q = t->makeNan(nan_invalid_sub, operandType);
1960
            }
1961
        }
1962
        else if (detectExceptions) {
1963
            uint32_t x = getExceptionFlags();              // read exceptions
1964
            if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_mul, operandType);
1965
            else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType);
1966
            else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType);
1967
        }
1968
        if (roundingMode) setRoundingMode(0);              // reset rounding mode
1969
        break;
1970
 
1971
    default:
1972
        t->interrupt(INT_WRONG_PARAMETERS);
1973
        result.i = 0;
1974
    }
1975
    return result.q;
1976
}
1977
 
1978
static uint64_t f_add_add(CThread * t) {
1979
    // a + b + c, calculated with extra precision on the intermediate sum
1980
    int i, j;
1981
    SNum parm[3];
1982
    // copy parameters so that we change sign and reorder them without changing original constant
1983
    for (i = 0; i < 3; i++) parm[i] = t->parm[i];
1984
    if ((t->fInstr->imm2 & 4) && t->operandType < 5) {
1985
        parm[2] = t->parm[4];                                    // avoid immediate operand shifted by imm3
1986
    }
1987
    uint32_t mask = t->parm[3].i;
1988
    bool roundingMode = (mask & (3 << MSKI_ROUNDING)) != 0;  // non-standard rounding mode
1989
    bool detectExceptions = (mask & (0xF << MSKI_EXCEPTIONS)) != 0;  // make NAN if exceptions
1990
    uint8_t operandType = t->operandType;
1991
    SNum sumS, sumU;                             // signed and unsigned sums
1992
    SNum nanS;                                   // combined nan's
1993
    // get sign options
1994
    uint8_t options = 0;
1995
    if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) options = t->pInstr->a.im3;
1996
    //else if (t->fInstr->tmplate == 0xA) options = uint8_t(mask >> MSKI_OPTIONS);
1997
 
1998
    uint8_t signedOverflow = 0;
1999
    uint8_t unsignedOverflow = 0;
2000
    //bool parmInf = false;
2001
    sumS.q = sumU.q = 0;
2002
    uint32_t temp1;
2003
    uint64_t temp2;
2004
 
2005
    switch (operandType) {
2006
    case 0:   // int8
2007
        for (i = 0; i < 3; i++) {                // loop through operands
2008
            if (options & 1) {                   // change sign
2009
                if (parm[i].b == 0x80) signedOverflow ^= 1;
2010
                if (parm[i].b != 0) unsignedOverflow ^= 1;
2011
                parm[i].is = - parm[i].is;
2012
            }
2013
            options >>= 1;                       // get next option bit
2014
            sumU.i  += parm[i].b;                // unsigned sum
2015
            sumS.is += parm[i].bs;               // sign-extended sum
2016
        }
2017
        if (sumU.b != sumU.i) unsignedOverflow ^= 1;
2018
        if (sumS.bs != sumS.is) signedOverflow ^= 1;
2019
        break;
2020
    case 1:   // int16
2021
        for (i = 0; i < 3; i++) {                // loop through operands
2022
            if (options & 1) {                   // change sign
2023
                if (parm[i].s == 0x8000) signedOverflow ^= 1;
2024
                if (parm[i].s != 0) unsignedOverflow ^= 1;
2025
                parm[i].is = - parm[i].is;
2026
            }
2027
            options >>= 1;                       // get next option bit
2028
            sumU.i  += parm[i].s;                // unsigned sum
2029
            sumS.is += parm[i].ss;               // sign-extended sum
2030
        }
2031
        if (sumU.s != sumU.i) unsignedOverflow ^= 1;
2032
        if (sumS.ss != sumS.is) signedOverflow ^= 1;
2033
        break;
2034
    case 2:   // int32
2035
        for (i = 0; i < 3; i++) {                // loop through operands
2036
            if (options & 1) {                   // change sign
2037
                if (parm[i].i == sign_f) signedOverflow ^= 1;
2038
                if (parm[i].i != 0) unsignedOverflow ^= 1;
2039
                parm[i].is = - parm[i].is;
2040
            }
2041
            options >>= 1;                       // get next option bit
2042
            sumU.q  += parm[i].i;                // unsigned sum
2043
            sumS.qs += parm[i].is;               // sign-extended sum
2044
        }
2045
        if (sumU.i != sumU.q) unsignedOverflow ^= 1;
2046
        if (sumS.is != sumS.qs) signedOverflow ^= 1;
2047
        break;
2048
    case 3:   // int64
2049
        for (i = 0; i < 3; i++) {                // loop through operands
2050
            if (options & 1) {                   // change sign
2051
                if (parm[i].q == sign_d) signedOverflow ^= 1;
2052
                if (parm[i].q != 0) unsignedOverflow ^= 1;
2053
                parm[i].qs = - parm[i].qs;
2054
            }
2055
            options >>= 1;                       // get next option bit
2056
            uint64_t a = parm[i].q;
2057
            uint64_t b = sumU.q;
2058
            sumU.q  = a + b;     // sum
2059
            if (sumU.q < a) unsignedOverflow ^= 1;
2060
            if (int64_t(~(a ^ b) & (a ^ sumU.q)) < 0) signedOverflow ^= 1;
2061
        }
2062
        break;
2063
    case 5:   // float
2064
        sumS.is = -1;  nanS.i = 0;
2065
        for (i = 0; i < 3; i++) {                          // loop through operands
2066
            if (options & 1) parm[i].f = -parm[i].f;       // change sign
2067
            // find the smallest of the three operands
2068
            if ((parm[i].i << 1) < sumS.i) {
2069
                sumS.i = (parm[i].i << 1);  j = i;
2070
            }
2071
            // find NANs and infs
2072
            temp1 = parm[i].i & nsign_f;                   // ignore sign bit
2073
            if (temp1 > nanS.i) nanS.i = temp1;            // find the biggest NAN
2074
            //if (temp1 == inf_f) parmInf = true;            // OR of all INFs
2075
            options >>= 1;                                 // next option bit
2076
        }
2077
        if (nanS.i > inf_f) return nanS.i;                 // result is NAN
2078
        // get the smallest operand last to minimize loss of precision if the two biggest operands have opposite signs
2079
        temp1 = parm[j].i;
2080
        parm[j].i = parm[2].i;
2081
        parm[2].i = temp1;
2082
 
2083
        if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING);
2084
        if (detectExceptions) clearExceptionFlags();       // clear previous exceptions
2085
 
2086
        // calculate sum
2087
        sumU.f = (parm[0].f + parm[1].f) + parm[2].f;
2088
 
2089
        if (isnan_f(sumU.i)) {
2090
            // the result is NAN but neither input is NAN. This must be INF-INF
2091
            sumU.q = t->makeNan(nan_invalid_sub, operandType);
2092
        }
2093
        if (detectExceptions) {
2094
            uint32_t x = getExceptionFlags();              // read exceptions
2095
            if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) sumU.q = t->makeNan(nan_overflow_add, operandType);
2096
            else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) sumU.q = t->makeNan(nan_underflow, operandType);
2097
            else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) sumU.q = t->makeNan(nan_inexact, operandType);
2098
        }
2099
        if (roundingMode) setRoundingMode(0);              // reset rounding mode
2100
        break;
2101
 
2102
    case 6:   // double
2103
        sumS.qs = -1;  nanS.q = 0;
2104
        for (i = 0; i < 3; i++) {                          // loop through operands
2105
            if (options & 1) parm[i].d = -parm[i].d;       // change sign
2106
            // find the smallest of the three operands
2107
            if ((parm[i].q << 1) < sumS.q) {
2108
                sumS.q = (parm[i].q << 1);  j = i;
2109
            }
2110
            // find NANs and infs
2111
            temp2 = parm[i].q & nsign_d;                   // ignore sign bit
2112
            if (temp2 > nanS.q) nanS.q = temp2;            // find the biggest NAN
2113
            //if (temp2 == inf_d) parmInf = true;            // OR of all INFs
2114
            options >>= 1;                                 // next option bit
2115
        }
2116
        if (nanS.q > inf_d) return nanS.q;                 // result is NAN
2117
 
2118
        // get the smallest operand last to minimize loss of precision if 
2119
        // the two biggest operands have opposite signs
2120
        temp2 = parm[j].q;
2121
        parm[j].q = parm[2].q;
2122
        parm[2].q = temp2;
2123
        if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING);
2124
        if (detectExceptions) clearExceptionFlags();       // clear previous exceptions
2125
 
2126
        // calculate sum
2127
        sumU.d = (parm[0].d + parm[1].d) + parm[2].d;
2128
 
2129
        if (isnan_d(sumU.q)) {
2130
            // the result is NAN but neither input is NAN. This must be INF-INF
2131
            sumU.q = t->makeNan(nan_invalid_sub, operandType);
2132
        }
2133
        if (detectExceptions) {
2134
            uint32_t x = getExceptionFlags();              // read exceptions
2135
            if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) sumU.q = t->makeNan(nan_overflow_add, operandType);
2136
            else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) sumU.q = t->makeNan(nan_underflow, operandType);
2137
            else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) sumU.q = t->makeNan(nan_inexact, operandType);
2138
        }
2139
        if (roundingMode) setRoundingMode(0);              // reset rounding mode
2140
        break;
2141
 
2142
    default:
2143
        t->interrupt(INT_WRONG_PARAMETERS);
2144
        sumU.i = 0;
2145
    }
2146
    return sumU.q;
2147
}
2148
 
2149
uint64_t f_add_h(CThread * t) {
2150
    // add two numbers, float16
2151
    // (rounding mode not supported)
2152
    SNum a = t->parm[1];
2153
    SNum b = t->parm[2];
2154
    uint32_t mask = t->parm[3].i;
2155
    uint16_t result;
2156
 
2157
    if (t->fInstr->immSize == 1) b.s = float2half(b.bs);  // convert 8-bit integer to float16
2158
    if (t->operandType != 1) t->interrupt(INT_WRONG_PARAMETERS);
2159
    if (isnan_h(a.s) && isnan_h(b.s)) {    // both are NAN
2160
        result = (a.s << 1) > (b.s << 1) ? a.s : b.s; // return the biggest payload
2161
    }
2162
    if (mask & (1<<MSK_INEXACT)) clearExceptionFlags();       // clear previous exceptions
2163
 
2164
    // the exact result is obtained with double precision. This makes sure we don't get double rounding errors
2165
    double resultd = (double)half2float(a.s) + (double)half2float(b.s);  // calculate with single precision
2166
    result = double2half(resultd);
2167
 
2168
    // check for exceptions
2169
    if ((mask & (1<<MSK_OVERFLOW)) && isinf_h(result) && !isinf_h(a.s) && !isinf_h(b.s)) {
2170
        // overflow
2171
        result = (uint16_t)t->makeNan(nan_overflow_add, 1);
2172
        result |= (a.s ^ b.s) & 0x8000;  // get the sign
2173
    }
2174
    else if ((mask & (1<<MSK_UNDERFLOW)) && is_zero_or_subnormal_h(result) && resultd != 0.0) {
2175
        // underflow
2176
        result = (uint16_t)t->makeNan(nan_underflow, 1) | (result & 0x8000); // signed NAN
2177
    }
2178
    else if ((mask & (1<<MSK_INEXACT)) && (half2float(result) != resultd || (getExceptionFlags() & 0x20)) != 0) {
2179
        // inexact
2180
        result = (uint16_t)t->makeNan(nan_inexact, 1);
2181
    }
2182
 
2183
    uint8_t roundingMode = mask >> MSKI_ROUNDING & 3;
2184
    if (roundingMode != 0 && !isnan_or_inf_h(result)) {
2185
        double r = half2float(result);
2186
        // non-standard rounding mode
2187
        switch (roundingMode) {
2188
        case 1:  // down
2189
            if (r > resultd && result != 0xFBFF) {
2190
                if (result == 0) result = 0x8001;
2191
                else if ((int16_t)result > 0) result--;
2192
                else result++;
2193
            }
2194
            break;
2195
        case 2:  // up
2196
            if (r < resultd && result != 0x7BFF) {
2197
                if ((int16_t)result > 0) result++;
2198
                else result--;
2199
            }
2200
            break;
2201
        case 3:  // towards zero
2202
            if ((int16_t)result > 0 && r > resultd) result--;
2203
            else if ((int16_t)result < 0 && r < resultd) result--;
2204
        }
2205
    }
2206
    t->returnType =0x118;
2207
    return result;
2208
}
2209
 
2210
uint64_t f_sub_h(CThread * t) {
2211
    // subtract two numbers, float16
2212
    // (rounding mode not supported)
2213
    SNum a = t->parm[1];
2214
    SNum b = t->parm[2];
2215
    uint32_t mask = t->parm[3].i;
2216
    uint16_t result;
2217
 
2218
    if (t->fInstr->immSize == 1) b.s = float2half(b.bs);  // convert 8-bit integer to float16
2219
    if (t->operandType != 1) t->interrupt(INT_WRONG_PARAMETERS);
2220
    if (isnan_h(a.s) && isnan_h(b.s)) {    // both are NAN
2221
        result = (a.s << 1) > (b.s << 1) ? a.s : b.s; // return the biggest payload
2222
    }
2223
    if (mask & (1<<MSK_INEXACT)) clearExceptionFlags();       // clear previous exceptions
2224
 
2225
    // the exact result is obtained with double precision. This makes sure we don't get double rounding errors
2226
    double resultd = (double)half2float(a.s) - (double)half2float(b.s);  // calculate with single precision
2227
    result = double2half(resultd);
2228
 
2229
    // check for exceptions
2230
    if ((mask & (1<<MSK_OVERFLOW)) && isinf_h(result) && !isinf_h(a.s) && !isinf_h(b.s)) {
2231
        // overflow
2232
        result = (uint16_t)t->makeNan(nan_overflow_add, 1);
2233
        result |= (a.s ^ b.s) & 0x8000;  // get the sign
2234
    }
2235
    else if ((mask & (1<<MSK_UNDERFLOW)) && is_zero_or_subnormal_h(result) && resultd != 0.0) {
2236
        // underflow
2237
        result = (uint16_t)t->makeNan(nan_underflow, 1) | (result & 0x8000); // signed NAN
2238
    }
2239
    else if ((mask & (1<<MSK_INEXACT)) && (half2float(result) != resultd || (getExceptionFlags() & 0x20)) != 0) {
2240
        // inexact
2241
        result = (uint16_t)t->makeNan(nan_inexact, 1);
2242
    }
2243
    uint8_t roundingMode = mask >> MSKI_ROUNDING & 3;
2244
    if (roundingMode != 0 && !isnan_or_inf_h(result)) {
2245
        double r = half2float(result);
2246
        // non-standard rounding mode
2247
        switch (roundingMode) {
2248
        case 1:  // down
2249
            if (r > resultd && result != 0xFBFF) {
2250
                if (result == 0) result = 0x8001;
2251
                else if ((int16_t)result > 0) result--;
2252
                else result++;
2253
            }
2254
            break;
2255
        case 2:  // up
2256
            if (r < resultd && result != 0x7BFF) {
2257
                if ((int16_t)result > 0) result++;
2258
                else result--;
2259
            }
2260
            break;
2261
        case 3:  // towards zero
2262
            if ((int16_t)result > 0 && r > resultd) result--;
2263
            else if ((int16_t)result < 0 && r < resultd) result--;
2264
        }
2265
    }
2266
    t->returnType =0x118;
2267
    return result;
2268
}
2269
 
2270
uint64_t f_mul_h(CThread * t) {
2271
    // multiply two numbers, float16
2272
    SNum a = t->parm[1];
2273
    SNum b = t->parm[2];
2274
    uint32_t mask = t->parm[3].i;
2275
    uint16_t result;
2276
 
2277
    if (t->fInstr->immSize == 1) b.s = float2half(b.bs);  // convert 8-bit integer to float16
2278
    if (t->operandType != 1) t->interrupt(INT_WRONG_PARAMETERS);
2279
    if (isnan_h(a.s) && isnan_h(b.s)) {    // both are NAN
2280
        result = (a.s << 1) > (b.s << 1) ? a.s : b.s; // return the biggest payload
2281
    }
2282
    if (mask & (1<<MSK_INEXACT)) clearExceptionFlags();       // clear previous exceptions
2283
 
2284
    // single precision is sufficient to get an exact multiplication result
2285
    float resultf = half2float(a.s) * half2float(b.s);  // calculate with single precision
2286
    result = float2half(resultf);
2287
 
2288
    // check for exceptions
2289
    if ((mask & (1<<MSK_OVERFLOW)) && isinf_h(result) && !isinf_h(a.s) && !isinf_h(b.s)) {
2290
        // overflow
2291
        result = (uint16_t)t->makeNan(nan_overflow_mul, 1);
2292
        result |= (a.s ^ b.s) & 0x8000;  // get the sign
2293
    }
2294
    else if ((mask & (1<<MSK_UNDERFLOW)) && is_zero_or_subnormal_h(result) && resultf != 0.0f) {
2295
        // underflow
2296
        result = (uint16_t)t->makeNan(nan_underflow, 1) | (result & 0x8000); // signed NAN
2297
    }
2298
    else if ((mask & (1<<MSK_INEXACT)) && (half2float(result) != resultf || (getExceptionFlags() & 0x20)) != 0) {
2299
        // inexact
2300
        result = (uint16_t)t->makeNan(nan_inexact, 1);
2301
    }
2302
    uint8_t roundingMode = mask >> MSKI_ROUNDING & 3;
2303
    if (roundingMode != 0 && !isnan_or_inf_h(result)) {
2304
        // non-standard rounding mode
2305
        float r = half2float(result);
2306
        switch (roundingMode) {
2307
        case 1:  // down
2308
            if (r > resultf && result != 0xFBFF) {
2309
                if (result == 0) result = 0x8001;
2310
                else if ((int16_t)result > 0) result--;
2311
                else result++;
2312
            }
2313
            break;
2314
        case 2:  // up
2315
            if (r < resultf && result != 0x7BFF) {
2316
                if ((int16_t)result > 0) result++;
2317
                else result--;
2318
            }
2319
            break;
2320
        case 3:  // towards zero
2321
            if ((int16_t)result > 0 && r > resultf) result--;
2322
            else if ((int16_t)result < 0 && r < resultf) result--;
2323
        }
2324
    }
2325
    t->returnType =0x118;
2326
    return result;
2327
}
2328
 
2329
 
2330
uint64_t f_mul_add_h(CThread * t) {
2331
    // a + b * c, float16
2332
    SNum a = t->parm[0];
2333
    SNum b = t->parm[1];
2334
    SNum c = t->parm[2];
2335
    uint32_t mask = t->parm[3].i;
2336
    if (t->fInstr->imm2 & 4) c = t->parm[4];          // avoid immediate operand shifted by imm3
2337
    if (t->fInstr->immSize == 1) c.s = float2half(c.bs);  // convert 8-bit integer to float16                                                          // get sign options
2338
    uint8_t options = 0;
2339
    if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) options = t->pInstr->a.im3;
2340
    //else if (t->fInstr->tmplate == 0xA) options = (mask >> MSKI_OPTIONS) & 0xF;
2341
    if (t->vect == 2) { // odd vector element
2342
        options >>= 1;
2343
    }
2344
    if (t->operandType != 1) t->interrupt(INT_WRONG_PARAMETERS);
2345
    if (options & 1) a.s ^= 0x8000;                           // adjust sign
2346
    if (options & 4) b.s ^= 0x8000;
2347
 
2348
    if (mask & (1<<MSK_INEXACT)) clearExceptionFlags();         // clear previous exceptions
2349
 
2350
    double resultd = (double)half2float(a.s) + (double)half2float(b.s) * (double)half2float(c.s);
2351
 
2352
    uint16_t result = double2half(resultd);
2353
    uint32_t nans = 0;  bool parmInf = false;
2354
 
2355
    if (isnan_or_inf_h(result)) {                          // check for overflow and nan
2356
        for (int i = 0; i < 3; i++) {                      // loop through input operands
2357
            uint32_t tmp = t->parm[i].s & 0x7FFF;          // ignore sign bit
2358
            if (tmp > nans) nans = tmp;                    // get the biggest if there are multiple NANs
2359
            if (tmp == inf_h) parmInf = true;              // OR of all INFs
2360
        }
2361
        if (nans > inf_h) return nans;                     // there is at least one NAN. return the biggest (sign bit is lost)
2362
        else if (isnan_h(result)) {
2363
            // result is NAN, but no input is NAN. This can be 0*INF or INF-INF
2364
            if ((a.s << 1 == 0 || b.s << 1 == 0) && parmInf) result = (uint16_t)t->makeNan(nan_invalid_0mulinf, 1);
2365
            else result = (uint16_t)t->makeNan(nan_invalid_sub, 1);
2366
        }
2367
    }
2368
    else if ((mask & (1<<MSK_OVERFLOW)) && isinf_h(result) && !parmInf) result = (uint16_t)t->makeNan(nan_overflow_mul, 1);
2369
    else if ((mask & (1<<MSK_UNDERFLOW)) && is_zero_or_subnormal_h(result) && resultd != 0.0) result = (uint16_t)t->makeNan(nan_underflow, 1);
2370
    else if ((mask & (1<<MSK_INEXACT)) && ((getExceptionFlags() & 0x20) != 0 || half2float(result) != resultd)) result = (uint16_t)t->makeNan(nan_inexact, 1);
2371
 
2372
    uint8_t roundingMode = mask >> MSKI_ROUNDING & 3;
2373
    if (roundingMode != 0 && !isnan_or_inf_h(result)) {
2374
        float r = half2float(result);
2375
        // non-standard rounding mode
2376
        switch (roundingMode) {
2377
        case 1:  // down
2378
            if (r > resultd && result != 0xFBFF) {
2379
                if (result == 0) result = 0x8001;
2380
                else if ((int16_t)result > 0) result--;
2381
                else result++;
2382
            }
2383
            break;
2384
        case 2:  // up
2385
            if (r < resultd && result != 0x7BFF) {
2386
                if ((int16_t)result > 0) result++;
2387
                else result--;
2388
            }
2389
            break;
2390
        case 3:  // towards zero
2391
            if ((int16_t)result > 0 && r > resultd) result--;
2392
            else if ((int16_t)result < 0 && r < resultd) result--;
2393
        }
2394
    }
2395
    t->returnType = 0x118;
2396
    return result;
2397
}
2398
 
2399
 
2400
// Tables of function pointers
2401
 
2402
// multiformat instructions
2403
PFunc funcTab1[64] = {
2404
    f_nop, f_store, f_move, f_prefetch, f_sign_extend, f_sign_extend_add, 0, f_compare,  // 0 - 7
2405
    f_add, f_sub, f_sub_rev, f_mul, f_mul_hi, f_mul_hi_u, f_div, f_div_u,                         // 8 - 15
2406
    f_div_rev, 0, f_rem, f_rem_u, f_min, f_min_u, f_max, f_max_u,                       // 16 - 23
2407
    0, 0, f_and, f_or, f_xor, 0, 0, 0,                             // 24 - 31
2408
    f_shift_left, f_rotate, f_shift_right_s, f_shift_right_u, f_clear_bit, f_set_bit, f_toggle_bit, f_test_bit,  // 32 - 39
2409
    f_test_bits_and, f_test_bits_or, 0, 0, f_add_h, f_sub_h, f_mul_h, 0,          // 40 - 47
2410
    f_mul_add_h, f_mul_add, f_mul_add, f_add_add, f_select_bits, f_funnel_shift, 0, 0,                           // 48 - 55
2411
    0, 0, 0, 0, 0, 0, 0, 0                                                               // 56 - 63
2412
};
2413
 

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.