URL
https://opencores.org/ocsvn/forwardcom/forwardcom/trunk
Subversion Repositories forwardcom
Compare Revisions
- This comparison shows the changes necessary to convert path
/
- from Rev 53 to Rev 54
- ↔ Reverse comparison
Rev 53 → Rev 54
/forwardcom/bintools/emulator3.cpp
0,0 → 1,2413
/**************************** emulator3.cpp ******************************** |
* Author: Agner Fog |
* date created: 2018-02-18 |
* Last modified: 2021-06-29 |
* Version: 1.11 |
* Project: Binary tools for ForwardCom instruction set |
* Description: |
* Emulator: Execution functions for multiformat instructions |
* |
* Copyright 2018-2021 GNU General Public License http://www.gnu.org/licenses |
*****************************************************************************/ |
|
#include "stdafx.h" |
|
// get intrinsic functions for _mm_getcsr and _mm_setcsr to control floating point rounding and exceptions |
#if defined(_M_X64) || defined(__x86_64__) || defined(__amd64) || defined(__SSE2__) |
#if defined(__FMA__) || defined(__AVX2__) |
#define FMA_AVAILABLE 1 |
#else |
#define FMA_AVAILABLE 0 |
#endif |
#if defined(_MSC_VER) && !FMA_AVAILABLE |
#include <xmmintrin.h> |
#else |
#include <immintrin.h> |
#endif |
#define MCSCR_AVAILABLE 1 |
#else |
#define MCSCR_AVAILABLE 0 |
#endif |
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////// |
// functions for detecting exceptions and controlling rounding mode on the CPU that runs the emulator |
// Note: these functions are only available in x86 systems with SSE2 or x64 enabled |
////////////////////////////////////////////////////////////////////////////////////////////////////// |
|
// Error message if MXCSR not available |
void errorFpControlMissing() { |
static int repeated = 0; |
if (!repeated) { |
fprintf(stderr, "Error: Cannot control floating point exceptions and rounding mode on this platform"); |
repeated = 1; |
} |
} |
|
void setRoundingMode(uint8_t r) { |
// change rounding mode |
#if MCSCR_AVAILABLE |
uint32_t e = _mm_getcsr(); |
e = (e & 0x9FFF) | (r & 3) << 13; |
_mm_setcsr(e); |
#else |
errorFpControlMissing(); |
#endif |
} |
|
void clearExceptionFlags() { |
// clear exception flags before detecting exceptions |
#if MCSCR_AVAILABLE |
uint32_t e = _mm_getcsr(); |
_mm_setcsr(e & 0xFFC0); |
#else |
errorFpControlMissing(); |
#endif |
} |
|
uint32_t getExceptionFlags() { |
// read exception flags after instructions that may cause exceptions |
// 1: invalid operation |
// 2: denormal |
// 4: divide by zero |
// 8: overflow |
// 0x10: underflow |
// 0x20: precision |
#if MCSCR_AVAILABLE |
return _mm_getcsr() & 0x3F; |
#else |
errorFpControlMissing(); |
return 0; |
#endif |
} |
|
void enableSubnormals(uint32_t e) { |
// enable or disable subnormal numbers |
#if MCSCR_AVAILABLE |
uint32_t x = _mm_getcsr(); |
if (e != 0) { |
_mm_setcsr(x & ~0x8040); |
} |
else { |
_mm_setcsr(x | 0x8040); |
} |
#else |
errorFpControlMissing(); |
#endif |
} |
|
|
///////////////////////////// |
// Multi-format instructions |
///////////////////////////// |
|
uint64_t f_nop(CThread * t) { |
// No operation |
t->running = 2; // don't save RD |
t->returnType = 0; // debug return output |
return 0; |
} |
|
static uint64_t f_store(CThread * t) { |
// Store value RD to memory |
uint8_t rd = t->operands[0]; |
uint64_t value = t->registers[rd]; |
if (t->vect) { |
value = t->readVectorElement(rd, t->vectorOffset); |
} |
// check mask |
if (t->parm[3].b & 1) { |
uint64_t address = t->memAddress; // memory address |
if (t->vect) address += t->vectorOffset; |
t->writeMemoryOperand(value, address); |
} |
else { // mask is 0. This instruction has no fallback. Don't write |
/* |
uint8_t fallback = t->operands[2]; // mask is 0. get fallback |
if (fallback == 0x1F) value = 0; |
else if (t->vect) value = t->readVectorElement(fallback, t->vectorOffset); |
else value = t->registers[fallback];*/ |
} |
t->returnType = (t->returnType & ~0x10) | 0x20; // return type is memory |
t->running = 2; // don't save RD |
return 0; |
} |
|
static uint64_t f_move(CThread * t) { |
// copy value |
return t->parm[2].q; |
} |
|
static uint64_t f_prefetch(CThread * t) { |
// prefetch from address. not emulated |
return f_nop(t); |
} |
|
static uint64_t f_sign_extend(CThread * t) { |
// sign-extend integer to 64 bits |
int64_t value = 0; |
switch (t->operandType) { |
case 0: |
value = (int64_t)(int8_t)t->parm[2].b; |
break; |
case 1: |
value = (int64_t)(int16_t)t->parm[2].s; |
break; |
case 2: |
value = (int64_t)(int32_t)t->parm[2].i; |
break; |
case 3: |
value = (int64_t)t->parm[2].q; |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
value = 0; |
} |
t->operandType = 3; // change operand size of result |
if (t->vect) { |
t->vectorLength[t->operands[0]] = t->vectorLengthR = 8; // change vector length of result and stop vector loop |
} |
t->returnType = (t->returnType & ~7) | 3; // debug return output |
return (uint64_t)value; |
} |
|
static uint64_t f_sign_extend_add(CThread * t) { |
// sign-extend integer to 64 bits and add 64-bit register |
int64_t value = 0; |
uint8_t options = 0; |
if (t->fInstr->tmplate == 0xE) options = t->pInstr->a.im3; |
|
switch (t->operandType) { |
case 0: |
value = (int64_t)(int8_t)t->parm[2].b; |
break; |
case 1: |
value = (int64_t)(int16_t)t->parm[2].s; |
break; |
case 2: |
value = (int64_t)(int32_t)t->parm[2].i; |
break; |
case 3: |
value = (int64_t)t->parm[2].q; |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
value = 0; |
} |
value <<= options; |
|
uint8_t r1 = t->operands[4]; // first operand. g.p. register |
value += t->registers[r1]; // read register with full size |
t->operandType = 3; // change operand size of result |
t->returnType = (t->returnType & ~7) | 3; // debug return output |
if (t->vect) t->interrupt(INT_WRONG_PARAMETERS); |
return (uint64_t)value; |
} |
|
static uint64_t f_compare(CThread * t) { |
// compare two source operands and generate a boolean result |
// get condition code |
uint8_t cond = 0; |
uint32_t mask = t->parm[3].i; // mask register value or NUMCONTR |
if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) { |
cond = t->pInstr->a.im3; // E template. get condition from IM3 |
} |
// get operands |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
if ((t->fInstr->imm2 & 4) && t->operandType < 5) { |
b = t->parm[4]; // avoid immediate operand shifted by imm3 |
} |
uint64_t result = 0; |
uint8_t cond1 = cond >> 1 & 3; // bit 1 - 2 of condition |
bool isnan = false; |
// select operand type |
if (t->operandType < 5) { // integer types |
uint64_t sizeMask = dataSizeMask[t->operandType]; // mask for data size |
uint64_t signBit = (sizeMask >> 1) + 1; // sign bit |
a.q &= sizeMask; b.q &= sizeMask; // mask to desired size |
if (cond1 != 3 && !(cond & 8)) { // signed |
a.q ^= signBit; b.q ^= signBit; // flip sign bit to use unsigned compare |
} |
switch (cond1) { // select condition |
case 0: // a == b |
result = a.q == b.q; |
break; |
case 1: // a < b |
result = a.q < b.q; |
break; |
case 2: // a > b |
result = a.q > b.q; |
break; |
case 3: // abs(a) < abs(b). Not officially supported in version 1.11 |
if (a.q & signBit) a.q = (~a.q + 1) & sizeMask; // change sign. overflow allowed |
if (b.q & signBit) b.q = (~b.q + 1) & sizeMask; // change sign. overflow allowed |
result = a.q < b.q; |
break; |
} |
} |
else if (t->operandType == 5) { // float |
isnan = isnan_f(a.i) || isnan_f(b.i); // check for NAN |
if (!isnan) { |
switch (cond1) { // select condition |
case 0: // a == b |
result = a.f == b.f; |
break; |
case 1: // a < b |
result = a.f < b.f; |
break; |
case 2: // a > b |
result = a.f > b.f; |
break; |
case 3: // abs(a) < abs(b) |
result = fabsf(a.f) < fabsf(b.f); |
break; |
} |
} |
} |
else if (t->operandType == 6) { // double |
isnan = isnan_d(a.q) || isnan_d(b.q); |
if (!isnan) { |
switch (cond1) { // select condition |
case 0: // a == b |
result = a.d == b.d; |
break; |
case 1: // a < b |
result = a.d < b.d; |
break; |
case 2: // a > b |
result = a.d > b.d; |
break; |
case 3: // abs(a) < abs(b) |
result = fabs(a.d) < fabs(b.d); |
break; |
} |
} |
} |
else t->interrupt(INT_WRONG_PARAMETERS); // unsupported type |
// invert result |
if (cond & 1) result ^= 1; |
|
// check for NAN |
if (isnan) { |
result = (cond >> 3) & 1; // bit 3 tells what to get if unordered |
//if (t->parm[3].i & MSK_FLOAT_NAN_LOSS) t->interrupt(INT_FLOAT_NAN_LOSS); // mask bit 29: trap if NAN loss |
} |
|
// mask and fallback |
uint8_t fallbackreg = t->operands[2]; |
uint64_t fallback = (fallbackreg & 0x1F) != 0x1F ? t->readRegister(fallbackreg) : 0; |
switch (cond >> 4) { |
case 0: // normal fallback |
if (!(mask & 1)) result = fallback; |
break; |
case 1: // mask & result & fallback |
result &= mask & fallback; |
break; |
case 2: // mask & (result | fallback) |
result = mask & (result | fallback); |
break; |
case 3: // mask & (result ^ fallback) |
result = mask & (result ^ fallback); |
break; |
} |
if ((t->returnType & 7) >= 5) t->returnType -= 3; // debug return output must be integer |
|
result &= 1; // use only bit 0 of result |
if ((t->operands[1] & 0x1F) < 7) { |
// There is a mask. get remaining bits from mask |
result |= (t->parm[3].q & ~(uint64_t)1); |
} |
t->parm[3].b = 1; // prevent normal mask operation |
return result; |
} |
|
uint64_t f_add(CThread * t) { |
// add two numbers |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
uint32_t mask = t->parm[3].i; |
SNum result; |
bool roundingMode = (mask & (3 << MSKI_ROUNDING)) != 0; // non-standard rounding mode |
bool detectExceptions = (mask & (0xF << MSKI_EXCEPTIONS)) != 0; // make NAN if exceptions |
uint8_t operandType = t->operandType; |
|
if (((mask ^ t->lastMask) & (1<<MSK_SUBNORMAL)) != 0) { |
// subnormal status changed |
enableSubnormals (mask & (1<<MSK_SUBNORMAL)); |
t->lastMask = mask; |
} |
// operand type |
if (operandType < 4) { // integer |
// uint64_t sizeMask = dataSizeMask[t->operandType]; // mask for data size |
result.q = a.q + b.q; |
} |
else if (operandType == 5) { // float |
bool nana = isnan_f(a.i); // check for NAN input |
bool nanb = isnan_f(b.i); |
if (nana && nanb) { // both are NAN |
return (a.i << 1) > (b.i << 1) ? a.i : b.i; // return the biggest payload |
} |
else if (nana) return a.q; |
else if (nanb) return b.q; |
if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); |
if (detectExceptions) clearExceptionFlags(); // clear previous exceptions |
result.f = a.f + b.f; // this is the actual addition |
if (isnan_f(result.i)) { |
// the result is NAN but neither input is NAN. This must be INF-INF |
result.q = t->makeNan(nan_invalid_sub, operandType); |
} |
if (detectExceptions) { |
uint32_t x = getExceptionFlags(); // read exceptions |
if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_add, operandType); |
else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); |
else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); |
} |
if (roundingMode) setRoundingMode(0); // reset rounding mode |
} |
|
else if (operandType == 6) { // double |
bool nana = isnan_d(a.q); // check for NAN input |
bool nanb = isnan_d(b.q); |
if (nana && nanb) { // both are NAN |
return (a.q << 1) > (b.q << 1) ? a.q : b.q; // return the biggest payload |
} |
else if (nana) return a.q; |
else if (nanb) return b.q; |
if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); |
if (detectExceptions) clearExceptionFlags(); // clear previous exceptions |
result.d = a.d + b.d; // this is the actual addition |
if (isnan_d(result.q)) { |
// the result is NAN but neither input is NAN. This must be INF-INF |
result.q = t->makeNan(nan_invalid_sub, operandType); |
} |
if (detectExceptions) { |
uint32_t x = getExceptionFlags(); // read exceptions |
if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_add, operandType); |
else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); |
else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); |
} |
if (roundingMode) setRoundingMode(0); // reset rounding mode |
} |
else { |
// unsupported operand type |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
return result.q; |
} |
|
uint64_t f_sub(CThread * t) { |
// subtract two numbers |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
uint32_t mask = t->parm[3].i; |
SNum result; |
bool roundingMode = (mask & (3 << MSKI_ROUNDING)) != 0; // non-standard rounding mode |
bool detectExceptions = (mask & (0xF << MSKI_EXCEPTIONS)) != 0; // make NAN if exceptions |
uint8_t operandType = t->operandType; |
if (((mask ^ t->lastMask) & (1<<MSK_SUBNORMAL)) != 0) { |
// subnormal status changed |
enableSubnormals (mask & (1<<MSK_SUBNORMAL)); |
t->lastMask = mask; |
} |
if (operandType < 4) { // integer |
result.q = a.q - b.q; // subtract |
} |
else if (operandType == 5) { // float |
bool nana = isnan_f(a.i); // check for NAN input |
bool nanb = isnan_f(b.i); |
if (nana && nanb) { // both are NAN |
return (a.i << 1) > (b.i << 1) ? a.i : b.i; // return the biggest payload |
} |
else if (nana) return a.q; |
else if (nanb) return b.q; |
if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); |
if (detectExceptions) clearExceptionFlags(); // clear previous exceptions |
result.f = a.f - b.f; // this is the actual subtraction |
if (isnan_f(result.i)) { |
// the result is NAN but neither input is NAN. This must be INF-INF |
result.q = t->makeNan(nan_invalid_sub, operandType); |
} |
if (detectExceptions) { |
uint32_t x = getExceptionFlags(); // read exceptions |
if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_add, operandType); |
else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); |
else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); |
} |
if (roundingMode) setRoundingMode(0); // reset rounding mode |
} |
else if (operandType == 6) {// double |
bool nana = isnan_d(a.q); // check for NAN input |
bool nanb = isnan_d(b.q); |
if (nana && nanb) { // both are NAN |
return (a.q << 1) > (b.q << 1) ? a.q : b.q; // return the biggest payload |
} |
else if (nana) return a.q; |
else if (nanb) return b.q; |
if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); |
if (detectExceptions) clearExceptionFlags(); // clear previous exceptions |
result.d = a.d - b.d; // this is the actual subtraction |
if (isnan_d(result.q)) { |
// the result is NAN but neither input is NAN. This must be INF-INF |
result.q = t->makeNan(nan_invalid_sub, operandType); |
} |
if (detectExceptions) { |
uint32_t x = getExceptionFlags(); // read exceptions |
if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_add, operandType); |
else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); |
else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); |
} |
if (roundingMode) setRoundingMode(0); // reset rounding mode |
} |
else { |
// unsupported operand type |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
return result.q; |
} |
|
extern uint64_t f_sub_rev(CThread * t) { |
// subtract two numbers, b-a |
uint64_t temp = t->parm[2].q; // swap operands |
t->parm[2].q = t->parm[1].q; |
t->parm[1].q = temp; |
uint64_t retval = f_sub(t); |
t->parm[2].q = temp; // restore parm[2] in case it is a constant |
return retval; |
} |
|
uint64_t f_mul(CThread * t) { |
// multiply two numbers |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
uint32_t mask = t->parm[3].i; |
SNum result; |
bool roundingMode = (mask & (3 << MSKI_ROUNDING)) != 0; // non-standard rounding mode |
bool detectExceptions = (mask & (0xF << MSKI_EXCEPTIONS)) != 0; // make NAN if exceptions |
uint8_t operandType = t->operandType; |
if (((mask ^ t->lastMask) & (1<<MSK_SUBNORMAL)) != 0) { |
// subnormal status changed |
enableSubnormals (mask & (1<<MSK_SUBNORMAL)); |
t->lastMask = mask; |
} |
if (operandType < 4) { |
// integer |
result.q = a.q * b.q; |
} |
else if (operandType == 5) { // float |
bool nana = isnan_f(a.i); // check for NAN input |
bool nanb = isnan_f(b.i); |
if (nana && nanb) { // both are NAN |
return (a.i << 1) > (b.i << 1) ? a.i : b.i; // return the biggest payload |
} |
else if (nana) return a.q; |
else if (nanb) return b.q; |
if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); |
if (detectExceptions) clearExceptionFlags(); // clear previous exceptions |
result.f = a.f * b.f; // this is the actual multiplication |
if (isnan_f(result.i)) { |
// the result is NAN but neither input is NAN. This must be 0*INF |
result.q = t->makeNan(nan_invalid_0mulinf, operandType); |
} |
if (detectExceptions) { |
uint32_t x = getExceptionFlags(); // read exceptions |
if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_mul, operandType); |
else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); |
else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); |
} |
if (roundingMode) setRoundingMode(0); // reset rounding mode |
} |
|
else if (operandType == 6) { // double |
bool nana = isnan_d(a.q); // check for NAN input |
bool nanb = isnan_d(b.q); |
if (nana && nanb) { // both are NAN |
return (a.q << 1) > (b.q << 1) ? a.q : b.q; // return the biggest payload |
} |
else if (nana) return a.q; |
else if (nanb) return b.q; |
if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); |
if (detectExceptions) clearExceptionFlags(); // clear previous exceptions |
result.d = a.d * b.d; // this is the actual multiplication |
if (isnan_d(result.q)) { |
// the result is NAN but neither input is NAN. This must be 0*INF |
result.q = t->makeNan(nan_invalid_0mulinf, operandType); |
} |
if (detectExceptions) { |
uint32_t x = getExceptionFlags(); // read exceptions |
if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_mul, operandType); |
else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); |
else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); |
} |
if (roundingMode) setRoundingMode(0); // reset rounding mode |
} |
else { |
// unsupported operand type |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
return result.q; |
} |
|
|
uint64_t f_div(CThread * t) { |
// divide two floating point numbers or signed integers |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
uint32_t mask = t->parm[3].i; |
SNum result; |
bool overflow = false; |
bool roundingMode = (mask & (3 << MSKI_ROUNDING))!=0; // non-standard floating point rounding mode |
bool detectExceptions = (mask & (0xF << MSKI_EXCEPTIONS)) != 0; // make NAN if exceptions |
bool nana, nanb; // inputs are NAN |
uint8_t operandType = t->operandType; |
uint32_t intRounding = 0; // integer rounding mode |
if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) { |
intRounding = t->pInstr->a.im3; // E template. get integer rounding mode from IM3 |
} |
if (((mask ^ t->lastMask) & (1<<MSK_SUBNORMAL)) != 0) { |
// subnormal status changed |
enableSubnormals (mask & (1<<MSK_SUBNORMAL)); |
t->lastMask = mask; |
} |
switch (operandType) { |
case 0: // int8 |
if (b.b == 0 || (a.b == 0x80 && b.bs == -1)) { |
result.i = 0x80; overflow = true; |
} |
else { |
result.i = a.bs / b.bs; |
if (intRounding != 0 && intRounding != 7 && abs(b.bs) != 1) { |
int rem = a.bs % b.bs; |
switch (intRounding) { |
case 4: { // nearest or even |
uint32_t r2 = 2*abs(rem); |
uint32_t b2 = abs(b.bs); |
int s = int8_t(a.i ^ b.i) < 0 ? -1 : 1; // one with sign of result |
if (r2 > b2 || (r2 == b2 && (result.b & 1))) result.i += s; |
break;} |
case 5: // down |
if (rem != 0 && int8_t(a.i ^ b.i) < 0 && result.b != 0x80u) result.i--; |
break; |
case 6: // up |
if (rem != 0 && int8_t(a.i ^ b.i) >= 0) result.i++; |
break; |
} |
} |
} |
break; |
case 1: // int16 |
if (b.s == 0 || (a.s == 0x8000u && b.ss == -1)) { |
result.i = 0x8000; overflow = true; |
} |
else { |
result.i = a.ss / b.ss; |
if (intRounding != 0 && intRounding != 7 && abs(b.ss) != 1) { |
int16_t rem = a.ss % b.ss; |
switch (intRounding) { |
case 4: { // nearest or even |
uint16_t r2 = 2*abs(rem); |
uint16_t b2 = abs(b.is); |
int16_t s = int16_t(a.s ^ b.s) < 0 ? -1 : 1; // one with sign of result |
if (r2 > b2 || (r2 == b2 && (result.s & 1))) result.s += s; |
break;} |
case 5: // down |
if (rem != 0 && int16_t(a.s ^ b.s) < 0 && result.s != 0x8000u) result.s--; |
break; |
case 6: // up |
if (rem != 0 && int16_t(a.s ^ b.s) >= 0) result.s++; |
break; |
} |
} |
} |
break; |
case 2: // int32 |
if (b.i == 0 || (a.i == sign_f && b.is == -1)) { |
result.i = sign_f; overflow = true; |
} |
else { |
result.i = a.is / b.is; |
if (intRounding != 0 && intRounding != 7 && abs(b.is) != 1) { |
int rem = a.is % b.is; |
switch (intRounding) { |
case 4: { // nearest or even |
uint32_t r2 = 2*abs(rem); |
uint32_t b2 = abs(b.is); |
int s = int32_t(a.i ^ b.i) < 0 ? -1 : 1; // one with sign of result |
if (r2 > b2 || (r2 == b2 && (result.i & 1))) result.i += s; |
break;} |
case 5: // down |
if (rem != 0 && int32_t(a.i ^ b.i) < 0 && result.i != 0x80000000u) result.i--; |
break; |
case 6: // up |
if (rem != 0 && int32_t(a.i ^ b.i) >= 0) result.i++; |
break; |
} |
} |
} |
break; |
case 3: // int64 |
if (b.q == 0 || (a.q == sign_d && b.qs == int64_t(-1))) { |
result.q = sign_d; overflow = true; |
} |
else { |
result.qs = a.qs / b.qs; |
if (intRounding != 0 && intRounding != 7 && abs(b.qs) != 1) { |
int64_t rem = a.qs % b.qs; |
switch (intRounding) { |
case 4: { // nearest or even |
uint64_t r2 = 2*abs(rem); |
uint64_t b2 = abs(b.qs); |
int64_t s = int64_t(a.q ^ b.q) < 0 ? -1 : 1; // one with sign of result |
if (r2 > b2 || (r2 == b2 && (result.i & 1))) result.q += s; |
break;} |
case 5: // down |
if (rem != 0 && int64_t(a.q ^ b.q) < 0 && result.q != 0x8000000000000000u) result.q--; |
break; |
case 6: // up |
if (rem != 0 && int64_t(a.q ^ b.q) >= 0) result.q++; |
break; |
} |
} |
} |
break; |
case 5: // float |
nana = isnan_f(a.i); // check for NAN input |
nanb = isnan_f(b.i); |
if (nana && nanb) { // both are NAN |
result.i = (a.i << 1) > (b.i << 1) ? a.i : b.i; // return the biggest payload |
} |
else if (nana) result.i = a.i; |
else if (nanb) result.i = b.i; |
else if (b.i << 1 == 0) { // division by zero |
if (a.i << 1 == 0) { // 0./0. = nan |
result.q = t->makeNan(nan_invalid_0div0, operandType); |
} |
else { |
// a / 0. = infinity |
if (mask & (1<<MSK_DIVZERO)) result.q = t->makeNan(nan_div0, operandType); |
else result.i = inf_f; |
} |
result.i |= (a.i ^ b.i) & sign_f; // sign bit |
} |
else if (isinf_f(a.i) && isinf_f(b.i)) { |
result.i = (uint32_t)t->makeNan(nan_invalid_divinf, operandType); // INF/INF |
result.i |= (a.i ^ b.i) & sign_f; // sign bit |
} |
else { |
if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); |
if (detectExceptions) clearExceptionFlags(); // clear previous exceptions |
result.f = a.f / b.f; // normal division |
if (detectExceptions) { |
uint32_t x = getExceptionFlags(); // read exceptions |
if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_div, operandType); |
else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); |
else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); |
} |
if (roundingMode) setRoundingMode(0); // reset rounding mode |
} |
break; |
case 6: // double |
nana = isnan_d(a.q); // check for NAN input |
nanb = isnan_d(b.q); |
if (nana && nanb) { // both are NAN |
result.q = (a.q << 1) > (b.q << 1) ? a.q : b.q; // return the biggest payload |
} |
else if (nana) result.q = a.q; |
else if (nanb) result.q = b.q; |
else if (b.q << 1 == 0) { // division by zero |
if (a.q << 1 == 0) { // 0./0. = nan |
result.q = t->makeNan(nan_invalid_0div0, operandType); |
} |
else { |
// a / 0. = infinity |
if (mask & (1<<MSK_DIVZERO)) result.q = t->makeNan(nan_div0, operandType); |
else result.q = inf_d; |
} |
result.q |= (a.q ^ b.q) & sign_d; // sign bit |
} |
else if (isinf_d(a.q) && isinf_d(b.q)) { |
result.q = t->makeNan(nan_invalid_divinf, operandType); // INF/INF |
result.q |= (a.q ^ b.q) & sign_d; // sign bit |
} |
else { |
if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); |
if (detectExceptions) clearExceptionFlags(); // clear previous exceptions |
result.d = a.d / b.d; // normal division |
if (detectExceptions) { |
uint32_t x = getExceptionFlags(); // read exceptions |
if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_div, operandType); |
else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); |
else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); |
} |
if (roundingMode) setRoundingMode(0); // reset rounding mode |
} |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
return result.q; |
} |
|
uint64_t f_div_u(CThread * t) { |
// divide two unsigned numbers |
|
if (t->operandType > 4) { |
return f_div(t); // floating point: same as f_div |
} |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
//SNum mask = t->parm[3]; |
SNum result; |
bool overflow = false; |
uint32_t intRounding = 0; // integer rounding mode |
if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) { |
intRounding = t->pInstr->a.im3; // E template. get integer rounding mode from IM3 |
} |
|
switch (t->operandType) { |
case 0: // int8 |
if (b.b == 0) { |
result.i = 0xFF; overflow = true; |
} |
else { |
result.i = a.b / b.b; |
if (intRounding == 4 || intRounding == 6) { |
uint32_t rem = a.b % b.b; |
switch (intRounding) { |
case 4: // nearest or even |
if (rem*2 > b.b || (rem*2 == b.b && (result.i & 1))) result.i++; |
break; |
case 6: // up |
if (rem != 0) result.i++; |
break; |
} |
} |
} |
break; |
case 1: // int16 |
if (b.s == 0) { |
result.i = 0xFFFF; overflow = true; |
} |
else { |
result.i = a.s / b.s; |
if (intRounding == 4 || intRounding == 6) { |
uint32_t rem = a.s % b.s; |
switch (intRounding) { |
case 4: // nearest or even |
if (rem*2 > b.s || (rem*2 == b.s && (result.i & 1))) result.i++; |
break; |
case 6: // up |
if (rem != 0) result.i++; |
break; |
} |
} |
} |
break; |
case 2: // int32 |
if (b.i == 0) { |
result.is = -1; overflow = true; |
} |
else { |
result.i = a.i / b.i; |
if (intRounding == 4 || intRounding == 6) { |
uint32_t rem = a.i % b.i; |
switch (intRounding) { |
case 4: // nearest or even |
if (rem*2 > b.i || (rem*2 == b.i && (result.i & 1))) result.i++; |
break; |
case 6: // up |
if (rem != 0) result.i++; |
break; |
} |
} |
} |
break; |
case 3: // int64 |
if (b.q == 0) { |
result.qs = -(int64_t)1; overflow = true; |
} |
else { |
result.q = a.q / b.q; |
if (intRounding == 4 || intRounding == 6) { |
uint64_t rem = a.q % b.q; |
switch (intRounding) { |
case 4: // nearest or even |
if (rem*2 > b.q || (rem*2 == b.q && (result.q & 1))) result.q++; |
break; |
case 6: // up |
if (rem != 0) result.q++; |
break; |
} |
} |
} |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
} |
//if (overflow && (mask.i & MSK_OVERFL_UNSIGN)) t->interrupt(INT_OVERFL_UNSIGN); // unsigned integer overflow |
return result.q; |
} |
|
static uint64_t f_div_rev(CThread * t) { |
// divide two numbers, b/a |
uint64_t temp = t->parm[2].q; // swap operands |
t->parm[2].q = t->parm[1].q; |
t->parm[1].q = temp; |
uint64_t retval = f_div(t); |
t->parm[2].q = temp; // restore parm[2] in case it is a constant |
return retval; |
} |
|
uint64_t mul64_128u(uint64_t * low, uint64_t a, uint64_t b) { |
// extended unsigned multiplication 64*64 -> 128 bits. |
// Note: you may replace this by inline assembly or intrinsic functions on |
// platforms that have extended multiplication instructions. |
|
// The return value is the high half of the product, |
// *low receives the low half of the product |
union { // arrays for storing result |
uint64_t q[2]; |
uint32_t i[4]; |
} res; |
uint64_t t; // temporary product |
uint64_t k; // temporary carry |
uint64_t a0 = (uint32_t)a; // low a |
uint64_t a1 = a >> 32; // high a |
uint64_t b0 = (uint32_t)b; // low b |
uint64_t b1 = b >> 32; // high b |
t = a0 * b0; |
res.i[0] = (uint32_t)t; |
k = t >> 32; |
t = a1 * b0 + k; |
res.i[1] = (uint32_t)t; |
k = t >> 32; |
res.i[2] = (uint32_t)k; |
t = a0 * b1 + res.i[1]; |
res.i[1] = (uint32_t)t; |
k = t >> 32; |
t = a1 * b1 + k + res.i[2]; |
res.i[2] = (uint32_t)t; |
k = t >> 32; |
res.i[3] = (uint32_t)k; |
if (low) *low = res.q[0]; |
return res.q[1]; |
} |
|
int64_t mul64_128s(uint64_t * low, int64_t a, int64_t b) { |
// extended signed multiplication 64*64 -> 128 bits. |
// Note: you may replace this by inline assembly or intrinsic functions on |
// platforms that have extended multiplication instructions. |
|
// The return value is the high half of the product, |
// *low receives the low half of the product |
bool sign = false; |
if (a < 0) { |
a = -a, sign = true; |
} |
if (b < 0) { |
b = -b; sign = !sign; |
} |
uint64_t lo, hi; |
hi = mul64_128u(&lo, a, b); |
if (sign) { // change sign |
lo = uint64_t(-int64_t(lo)); |
hi = ~hi; |
if (lo == 0) hi++; // carry |
} |
if (low) *low = lo; |
return (int64_t)hi; |
} |
|
static uint64_t f_mul_hi(CThread * t) { |
// high part of extended signed multiply |
SNum result; |
switch (t->operandType) { |
case 0: // int8 |
result.qs = ((int32_t)t->parm[1].bs * (int32_t)t->parm[2].bs) >> 8; |
break; |
case 1: // int16 |
result.qs = ((int32_t)t->parm[1].ss * (int32_t)t->parm[2].ss) >> 16; |
break; |
case 2: // int32 |
result.qs = ((int64_t)t->parm[1].is * (int64_t)t->parm[2].is) >> 32; |
break; |
case 3: // int64 |
result.qs = mul64_128s(0, t->parm[1].qs, t->parm[2].qs); |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.q = 0; |
} |
return result.q; |
} |
|
static uint64_t f_mul_hi_u(CThread * t) { |
// high part of extended unsigned multiply |
SNum result; |
switch (t->operandType) { |
case 0: // int8 |
result.q = ((uint32_t)t->parm[1].b * (uint32_t)t->parm[2].b) >> 8; |
break; |
case 1: // int16 |
result.q = ((uint32_t)t->parm[1].s * (uint32_t)t->parm[2].s) >> 16; |
break; |
case 2: // int32 |
result.q = ((uint64_t)t->parm[1].i * (uint64_t)t->parm[2].i) >> 32; |
break; |
case 3: // int64 |
result.q = mul64_128u(0, t->parm[1].q, t->parm[2].q); |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.q = 0; |
} |
return result.q; |
} |
|
|
static uint64_t f_rem(CThread * t) { |
// remainder/modulo of two signed numbers |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
//SNum mask = t->parm[3]; |
SNum result; |
bool overflow = false; |
|
switch (t->operandType) { |
case 0: // int8 |
if (b.b == 0 || (a.b == 0x80 && b.bs == -1)) { |
result.i = 0x80; overflow = true; |
} |
else result.is = a.bs % b.bs; |
break; |
case 1: // int16 |
if (b.s == 0 || (a.s == 0x8000 && b.ss == -1)) { |
result.i = 0x8000; overflow = true; |
} |
else result.is = a.ss % b.ss; |
break; |
case 2: // int32 |
if (b.i == 0 || (a.i == sign_f && b.is == -1)) { |
result.i = sign_f; overflow = true; |
} |
else result.is = a.is % b.is; |
break; |
case 3: // int64 |
if (b.q == 0 || (a.q == sign_d && b.qs == int64_t(-1))) { |
result.q = sign_d; overflow = true; |
} |
else result.qs = a.qs % b.qs; |
break; |
case 5: // float |
if (isnan_f(a.i) && isnan_f(b.i)) { // both are NAN |
result.i = (a.i << 1) > (b.i << 1) ? a.i : b.i; // return the biggest payload |
} |
else if (b.i << 1 == 0 || isinf_f(a.i)) { // rem(1,0) or rem(inf,1) |
result.q = t->makeNan(nan_invalid_rem, 5); |
} |
else { |
result.f = fmodf(a.f, b.f); // normal modulo |
} |
break; |
case 6: // double |
if (isnan_d(a.q) && isnan_d(b.q)) { // both are NAN |
result.q = (a.q << 1) > (b.q << 1) ? a.q : b.q; // return the biggest payload |
} |
else if (b.q << 1 == 0 || isinf_d(a.q)) { // rem(1,0) or rem(inf,1) |
result.q = t->makeNan(nan_invalid_rem, 5); |
} |
else { |
result.d = fmod(a.d, b.d); // normal modulo |
} |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
//if (overflow&& (mask.i & MSK_OVERFL_SIGN)) t->interrupt(INT_OVERFL_SIGN); // signed integer overflow |
return result.q; |
} |
|
static uint64_t f_rem_u(CThread * t) { |
// remainder/modulo of two unsigned numbers |
if (t->operandType > 4) return f_rem(t); // float types use f_rem |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
//SNum mask = t->parm[3]; |
SNum result; |
bool overflow = false; |
|
switch (t->operandType) { |
case 0: // int8 |
if (b.b == 0) { |
result.i = 0x80; overflow = true; |
} |
else result.i = a.b % b.b; |
break; |
case 1: // int16 |
if (b.s == 0) { |
result.i = 0x8000; overflow = true; |
} |
else result.i = a.s % b.s; |
break; |
case 2: // int32 |
if (b.i == 0) { |
result.i = sign_f; overflow = true; |
} |
else result.i = a.i % b.i; |
break; |
case 3: // int64 |
if (b.q == 0) { |
result.q = sign_d; overflow = true; |
} |
else result.q = a.q % b.q; |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
//if (overflow&& (mask.i & MSK_OVERFL_SIGN)) t->interrupt(INT_OVERFL_SIGN); // signed integer overflow |
return result.q; |
} |
|
static uint64_t f_min(CThread * t) { |
// minimum of two signed numbers |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
SNum result; |
int8_t isnan; |
switch (t->operandType) { |
case 0: // int8 |
result.is = a.bs < b.bs ? a.bs : b.bs; |
break; |
case 1: // int16 |
result.is = a.ss < b.ss ? a.ss : b.ss; |
break; |
case 2: // int32 |
result.is = a.is < b.is ? a.is : b.is; |
break; |
case 3: // int64 |
result.qs = a.qs < b.qs ? a.qs : b.qs; |
break; |
case 5: // float |
result.f = a.f < b.f ? a.f : b.f; |
// check NANs |
isnan = isnan_f(a.i); // a is nan |
isnan |= isnan_f(b.i) << 1; // b is nan |
if (isnan) { // propagate NAN according to the 2019 revision of the IEEE-754 standard |
if (isnan == 1) result.i = a.i; |
else if (isnan == 2) result.i = b.i; |
else result.i = (a.i << 1) > (b.i << 1) ? a.i : b.i; // return the biggest payload |
} |
break; |
case 6: // double |
result.d = a.d < b.d ? a.d : b.d; |
// check NANs |
isnan = isnan_d(a.q); // a is nan |
isnan |= isnan_d(b.q) << 1; // b is nan |
if (isnan) { // propagate NAN according to the 2019 revision of the IEEE-754 standard |
if (isnan == 1) result.q = a.q; |
else if (isnan == 2) result.q = b.q; |
else result.q = (a.q << 1) > (b.q << 1) ? a.q : b.q; // return the biggest payload |
} |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
return result.q; |
} |
|
static uint64_t f_min_u(CThread * t) { |
// minimum of two unsigned numbers |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
SNum result; |
switch (t->operandType) { |
case 0: // int8 |
result.i = a.b < b.b ? a.b : b.b; |
break; |
case 1: // int16 |
result.i = a.s < b.s ? a.s : b.s; |
break; |
case 2: // int32 |
result.i = a.i < b.i ? a.i : b.i; |
break; |
case 3: // int64 |
result.q = a.q < b.q ? a.q : b.q; |
break; |
case 5: // float |
case 6: // double |
return f_min(t); |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
return result.q; |
} |
|
static uint64_t f_max(CThread * t) { |
// maximum of two signed numbers |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
SNum result; |
uint8_t isnan; |
switch (t->operandType) { |
case 0: // int8 |
result.is = a.bs > b.bs ? a.bs : b.bs; |
break; |
case 1: // int16 |
result.is = a.ss > b.ss ? a.ss : b.ss; |
break; |
case 2: // int32 |
result.is = a.is > b.is ? a.is : b.is; |
break; |
case 3: // int64 |
result.qs = a.qs > b.qs ? a.qs : b.qs; |
break; |
case 5: // float |
result.f = a.f > b.f ? a.f : b.f; |
// check NANs |
isnan = isnan_f(a.i); // a is nan |
isnan |= isnan_f(b.i) << 1; // b is nan |
if (isnan) { |
// propagate NAN according to the 2019 revision of the IEEE-754 standard |
if (isnan == 1) result.i = a.i; |
else if (isnan == 2) result.i = b.i; |
else result.i = (a.i << 1) > (b.i << 1) ? a.i : b.i; // return the biggest payload |
} |
break; |
case 6: // double |
result.d = a.d > b.d ? a.d : b.d; |
// check NANs |
isnan = isnan_d(a.q); // a is nan |
isnan |= isnan_d(b.q) << 1; // b is nan |
if (isnan) { |
// propagate NAN according to the 2019 revision of the IEEE-754 standard |
if (isnan == 1) result.q = a.q; |
else if (isnan == 2) result.q = b.q; |
else result.q = (a.q << 1) > (b.q << 1) ? a.q : b.q; // return the biggest payload |
} |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
return result.q; |
} |
|
static uint64_t f_max_u(CThread * t) { |
// maximum of two unsigned numbers |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
SNum result; |
switch (t->operandType) { |
case 0: // int8 |
result.i = a.b > b.b ? a.b : b.b; |
break; |
case 1: // int16 |
result.i = a.s > b.s ? a.s : b.s; |
break; |
case 2: // int32 |
result.i = a.i > b.i ? a.i : b.i; |
break; |
case 3: // int64 |
result.q = a.q > b.q ? a.q : b.q; |
break; |
case 5: // float |
case 6: // double |
return f_max(t); |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
return result.q; |
} |
|
static uint64_t f_and(CThread * t) { |
// bitwise AND of two numbers |
return t->parm[1].q & t->parm[2].q; |
} |
/* |
static uint64_t f_and_not(CThread * t) { |
// a & ~b |
return t->parm[1].q & ~ t->parm[2].q; |
}*/ |
|
static uint64_t f_or(CThread * t) { |
// bitwise OR of two numbers |
return t->parm[1].q | t->parm[2].q; |
} |
|
static uint64_t f_xor(CThread * t) { |
// bitwise exclusive OR of two numbers |
return t->parm[1].q ^ t->parm[2].q; |
} |
|
static uint64_t f_select_bits(CThread * t) { |
// a & c | b & ~c |
return (t->parm[0].q & t->parm[2].q) | (t->parm[1].q & ~ t->parm[2].q); |
} |
|
static uint64_t f_shift_left(CThread * t) { |
// integer: a << b, float a * 2^b where b is interpreted as integer |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
//if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float |
if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float |
|
SNum mask = t->parm[3]; |
SNum result; |
uint64_t exponent; |
switch (t->operandType) { |
case 0: // int8 |
result.b = a.b << b.b; |
if (b.b > 7) result.q = 0; |
break; |
case 1: // int16 |
result.s = a.s << b.s; |
if (b.b > 15) result.q = 0; |
break; |
case 2: // int32 |
result.i = a.i << b.i; |
if (b.b > 31) result.q = 0; |
break; |
case 3: // int64 |
result.q = a.q << b.q; |
if (b.b > 63) result.q = 0; |
break; |
case 5: // float |
if (isnan_f(a.i)) return a.q; // a is nan |
exponent = a.i >> 23 & 0xFF; // get exponent |
if (exponent == 0) return a.i & sign_f; // a is zero or subnormal. return zero |
exponent += b.i; // add integer to exponent |
if ((int32_t)exponent >= 0xFF) { // overflow |
result.i = inf_f; |
} |
else if ((int32_t)exponent <= 0) { // underflow |
if ((mask.i & (1<<MSK_UNDERFLOW)) != 0) { // make NAN if exception |
result.q = t->makeNan(nan_underflow, 5); |
} |
else { |
result.q = 0; |
} |
} |
else { |
result.i = (a.i & 0x807FFFFF) | uint32_t(exponent) << 23; // insert new exponent |
} |
break; |
case 6: // double |
if (isnan_d(a.q)) return a.q; // a is nan |
exponent = a.q >> 52 & 0x7FF; |
if (exponent == 0) return a.q & sign_d; // a is zero or subnormal. return zero |
exponent += b.q; // add integer to exponent |
if ((int64_t)exponent >= 0x7FF) { // overflow |
result.q = inf_d; |
//if (mask.i & MSK_OVERFL_FLOAT) t->interrupt(INT_OVERFL_FLOAT); |
} |
else if ((int64_t)exponent <= 0) { // underflow |
if ((mask.i & (1<<MSK_UNDERFLOW)) != 0) { // make NAN if exception |
result.q = t->makeNan(nan_underflow, 6); |
} |
else { |
result.q = 0; |
} |
} |
else { |
result.q = (a.q & 0x800FFFFFFFFFFFFF) | (exponent << 52); // insert new exponent |
} |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
return result.q; |
} |
|
static uint64_t f_rotate(CThread * t) { |
// rotate bits left |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
//if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float |
if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float |
|
SNum result; |
switch (t->operandType) { |
case 0: // int8 |
result.b = a.b << (b.b & 7) | a.b >> (8 - (b.b & 7)); |
break; |
case 1: // int16 |
result.s = a.s << (b.s & 15) | a.s >> (16 - (b.s & 15)); |
break; |
case 2: // int32 |
case 5: // float |
result.i = a.i << (b.i & 31) | a.i >> (32 - (b.i & 31)); |
break; |
case 3: // int64 |
case 6: // double |
result.q = a.q << (b.q & 63) | a.q >> (64 - (b.q & 63)); |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
return result.q; |
} |
|
static uint64_t f_shift_right_s(CThread * t) { |
// integer only: a >> b, with sign extension |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
//if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float |
if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float |
|
SNum result; |
switch (t->operandType) { |
case 0: // int8 |
result.bs = a.bs >> b.bs; |
if (b.b > 7) result.qs = a.bs >> 7; |
break; |
case 1: // int16 |
result.ss = a.ss >> b.ss; |
if (b.s > 15) result.qs = a.ss >> 15; |
break; |
case 2: // int32 |
result.is = a.is >> b.is; |
if (b.i > 31) result.qs = a.is >> 31; |
break; |
case 3: // int64 |
result.qs = a.qs >> b.qs; |
if (b.q > 63) result.qs = a.qs >> 63; |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
return result.q; |
} |
|
static uint64_t f_shift_right_u(CThread * t) { |
// integer only: a >> b, with zero extension |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
//if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float |
if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float |
|
SNum result; |
switch (t->operandType) { |
case 0: // int8 |
result.b = a.b >> b.b; |
if (b.b > 7) result.q = 0; |
break; |
case 1: // int16 |
result.s = a.s >> b.s; |
if (b.s > 15) result.q = 0; |
break; |
case 2: // int32 |
result.i = a.i >> b.i; |
if (b.i > 31) result.q = 0; |
break; |
case 3: // int64 |
result.q = a.q >> b.q; |
if (b.q > 63) result.q = 0; |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
return result.q; |
} |
|
static uint64_t f_funnel_shift(CThread * t) { |
uint64_t shift_count = t->parm[2].q; |
if ((t->operands[5] & 0x20) && t->operandType > 4) shift_count = t->parm[4].q; // avoid conversion of c to float |
|
if (t->vect == 0) { // g.p. registers. shift integers n bytes |
uint32_t dataSize = dataSizeTableBits[t->operandType]; // operand size, bits |
uint64_t dataMask = dataSizeMask[t->operandType]; // operand size mask, dataSize bits of 1 |
if ((shift_count & dataMask) >= dataSize) return 0; // shift count out of range |
return ((t->parm[0].q & dataMask) >> shift_count | (t->parm[1].q & dataMask) << (dataSize - shift_count)) & dataMask; |
} |
else { // vector registers. shift concatenated whole vectors n bytes down |
|
// The second operand may be a vector register of incomplete size or a broadcast memory operand. |
// Both input operands may be the same as the destination register. |
// The operand size may not match the shift count |
// The easiest way to handle all these cases is to copy both input vectors into temporary buffers |
switch (t->operandType) { |
case 0: |
*(t->tempBuffer + t->vectorOffset) = t->parm[0].bs; |
*(t->tempBuffer + t->MaxVectorLength + t->vectorOffset) = t->parm[1].bs; |
break; |
case 1: |
*(uint16_t*)(t->tempBuffer + t->vectorOffset) = t->parm[0].s; |
*(uint16_t*)(t->tempBuffer + t->MaxVectorLength + t->vectorOffset) = t->parm[1].s; |
break; |
case 2: case 5: |
*(uint32_t*)(t->tempBuffer + t->vectorOffset) = t->parm[0].i; |
*(uint32_t*)(t->tempBuffer + t->MaxVectorLength + t->vectorOffset) = t->parm[1].i; |
break; |
case 3: case 6: |
*(uint64_t*)(t->tempBuffer + t->vectorOffset) = t->parm[0].q; |
*(uint64_t*)(t->tempBuffer + t->MaxVectorLength + t->vectorOffset) = t->parm[1].q; |
break; |
case 4: case 7: // to do: support 128 bits |
t->interrupt(INT_WRONG_PARAMETERS); |
break; |
} |
uint32_t dataSizeBytes = dataSizeTable[t->operandType]; // operand size, bits |
if (t->vectorOffset + dataSizeBytes >= t->vectorLengthR) { |
// last iteration. Make the result |
uint8_t rd = t->operands[0]; // destination vector |
shift_count *= dataSizeBytes; // shift n elements |
if (shift_count >= t->vectorLengthR) { |
// shift count out of range. return 0 |
memset(t->vectors.buf() + t->MaxVectorLength*rd, 0, t->vectorLengthR); |
} |
else { |
// copy upper part of first vector to lower part of destination vector |
memcpy(t->vectors.buf() + t->MaxVectorLength * rd, t->tempBuffer + shift_count, t->vectorLengthR - shift_count); |
// copy lower part of second vector to upper part of destination vector |
memcpy(t->vectors.buf() + t->MaxVectorLength * rd + (t->vectorLengthR - shift_count), t->tempBuffer + t->MaxVectorLength, shift_count); |
} |
} |
t->running = 2; // don't save RD. It is saved by above code |
return 0; |
} |
} |
|
static uint64_t f_set_bit(CThread * t) { |
// a | 1 << b |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
//if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float |
if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float |
|
SNum result; |
switch (t->operandType) { |
case 0: // int8 |
result.b = a.b; |
if (b.b < 8) result.b |= 1 << b.b; |
break; |
case 1: // int16 |
result.s = a.s; |
if (b.s < 16) result.s |= 1 << b.s; |
break; |
case 2: // int32 |
case 5: // float |
result.i = a.i; |
if (b.i < 32) result.i |= 1 << b.i; |
break; |
case 3: // int64 |
case 6: // double |
result.q = a.q; |
if (b.q < 64) result.q |= (uint64_t)1 << b.q; |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
return result.q; |
} |
|
static uint64_t f_clear_bit(CThread * t) { |
// a & ~ (1 << b) |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
//if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float |
if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float |
SNum result; |
switch (t->operandType) { |
case 0: // int8 |
result.b = a.b; |
if (b.b < 8) result.b &= ~(1 << b.b); |
break; |
case 1: // int16 |
result.s = a.s; |
if (b.s < 16) result.s &= ~(1 << b.s); |
break; |
case 2: // int32 |
case 5: // float |
result.i = a.i; |
if (b.i < 32) result.i &= ~(1 << b.i); |
break; |
case 3: // int64 |
case 6: // double |
result.q = a.q; |
if (b.q < 64) result.q &= ~((uint64_t)1 << b.q); |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
return result.q; |
} |
|
static uint64_t f_toggle_bit(CThread * t) { |
// a ^ (1 << b) |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
//if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float |
if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float |
SNum result; |
switch (t->operandType) { |
case 0: // int8 |
result.b = a.b; |
if (b.b < 8) result.b ^= 1 << b.b; |
break; |
case 1: // int16 |
result.s = a.s; |
if (b.s < 16) result.s ^= 1 << b.s; |
break; |
case 2: // int32 |
case 5: // float |
result.i = a.i; |
if (b.i < 32) result.i ^= 1 << b.i; |
break; |
case 3: // int64 |
case 6: // double |
result.q = a.q; |
if (b.q < 64) result.q ^= (uint64_t)1 << b.q; |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
return result.q; |
} |
|
/* |
static uint64_t f_and_bit(CThread * t) { |
// clear all bits except one |
// a & (1 << b) |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
//if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float |
if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float |
SNum result; |
switch (t->operandType) { |
case 0: // int8 |
result.b = a.b; |
if (b.b < 8) result.b &= 1 << b.b; |
break; |
case 1: // int16 |
result.s = a.s; |
if (b.s < 16) result.s &= 1 << b.s; |
break; |
case 2: // int32 |
case 5: // float |
result.i = a.i; |
if (b.i < 32) result.i &= 1 << b.i; |
break; |
case 3: // int64 |
case 6: // double |
result.q = a.q; |
if (b.q < 64) result.q &= (uint64_t)1 << b.q; |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
return result.q; |
}*/ |
|
static uint64_t f_test_bit(CThread * t) { |
// test a single bit: a >> b & 1 |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
//if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float |
if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float |
if (t->fInstr->imm2 & 4) { |
b = t->parm[4]; // avoid immediate operand shifted by imm3 |
} |
SNum result; |
result.q = 0; |
SNum mask = t->parm[3]; |
uint8_t fallbackreg = t->operands[2]; // fallback register |
SNum fallback; // fallback value |
fallback.q = (fallbackreg & 0x1F) != 0x1F ? t->readRegister(fallbackreg & 0x1F) : 0; |
switch (t->operandType) { |
case 0: // int8 |
if (b.b < 8) result.b = a.b >> b.b & 1; |
break; |
case 1: // int16 |
if (b.s < 16) result.s = a.s >> b.s & 1; |
break; |
case 2: // int32 |
case 5: // float |
if (b.i < 32) result.i = a.i >> b.i & 1; |
break; |
case 3: // int64 |
case 6: // double |
if (b.q < 64) result.q = a.q >> b.q & 1; |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
// get additional options |
uint8_t options = 0; |
if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) options = t->pInstr->a.im3; |
if (options & 4) result.b ^= 1; // invert result |
if (options & 8) fallback.b ^= 1; // invert fallback |
if (options & 0x10) mask.b ^= 1; // invert mask |
switch (options & 3) { |
case 0: |
result.b = (mask.b & 1) ? result.b : fallback.b; |
break; |
case 1: |
result.b = mask.b & result.b & fallback.b; |
break; |
case 2: |
result.b = mask.b & (result.b | fallback.b); |
break; |
case 3: |
result.b = mask.b & (result.b ^ fallback.b); |
} |
// ignore other bits |
result.q &= 1; |
if (options & 0x20) { // get remaining bits from flag or NUMCONTR |
result.q |= mask.q & ~(uint64_t)1; |
} |
// disable normal fallback process |
t->parm[3].b = 1; |
return result.q; |
} |
|
static uint64_t f_test_bits_and(CThread * t) { |
// Test if all the indicated bits are 1 |
// result = (a & b) == b |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
//if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float |
if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float |
if (t->fInstr->imm2 & 4) { |
b = t->parm[4]; // avoid immediate operand shifted by imm3 |
} |
SNum result; |
SNum mask = t->parm[3]; |
uint8_t fallbackreg = t->operands[2]; // fallback register |
SNum fallback; // fallback value |
fallback.q = (fallbackreg & 0x1F) != 0x1F ? t->readRegister(fallbackreg & 0x1F) : 0; |
switch (t->operandType) { |
case 0: // int8 |
result.b = (a.b & b.b) == b.b; |
break; |
case 1: // int16 |
result.s = (a.s & b.s) == b.s; |
break; |
case 2: // int32 |
case 5: // float |
result.i = (a.i & b.i) == b.i; |
break; |
case 3: // int64 |
case 6: // double |
result.q = (a.q & b.q) == b.q; |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
// get additional options |
uint8_t options = 0; |
if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) options = t->pInstr->a.im3; |
if (options & 4) result.b ^= 1; // invert result |
if (options & 8) fallback.b ^= 1; // invert fallback |
if (options & 0x10) mask.b ^= 1; // invert mask |
switch (options & 3) { |
case 0: |
result.b = (mask.b & 1) ? result.b : fallback.b; |
break; |
case 1: |
result.b &= mask.b & fallback.b; |
break; |
case 2: |
result.b = mask.b & (result.b | fallback.b); |
break; |
case 3: |
result.b = mask.b & (result.b ^ fallback.b); |
} |
// ignore other bits |
result.q &= 1; |
if (options & 0x20) { // get remaining bits from flag or NUMCONTR |
result.q |= mask.q & ~(uint64_t)1; |
} |
// disable normal fallback process |
t->parm[3].b = 1; |
return result.q; |
} |
|
static uint64_t f_test_bits_or(CThread * t) { |
// Test if at least one of the indicated bits is 1. |
// result = (a & b) != 0 |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
//if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float |
if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float |
if (t->fInstr->imm2 & 4) { |
b = t->parm[4]; // avoid immediate operand shifted by imm3 |
} |
SNum result; |
SNum mask = t->parm[3]; |
uint8_t fallbackreg = t->operands[2]; // fallback register |
SNum fallback; // fallback value |
fallback.q = (fallbackreg & 0x1F) != 0x1F ? t->readRegister(fallbackreg & 0x1F) : 0; |
switch (t->operandType) { |
case 0: // int8 |
result.b = (a.b & b.b) != 0; |
break; |
case 1: // int16 |
result.s = (a.s & b.s) != 0; |
break; |
case 2: // int32 |
case 5: // float |
result.i = (a.i & b.i) != 0; |
break; |
case 3: // int64 |
case 6: // double |
result.q = (a.q & b.q) != 0; |
break; |
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
// get additional options |
uint8_t options = 0; |
if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) options = t->pInstr->a.im3; |
if (options & 4) result.b ^= 1; // invert result |
if (options & 8) fallback.b ^= 1; // invert fallback |
if (options & 0x10) mask.b ^= 1; // invert mask |
switch (options & 3) { |
case 0: |
result.b = (mask.b & 1) ? result.b : fallback.b; |
break; |
case 1: |
result.b &= mask.b & fallback.b; |
break; |
case 2: |
result.b = mask.b & (result.b | fallback.b); |
break; |
case 3: |
result.b = mask.b & (result.b ^ fallback.b); |
} |
// ignore other bits |
result.q &= 1; |
if (options & 0x20) { // get remaining bits from flag or NUMCONTR |
result.q |= mask.q & ~(uint64_t)1; |
} |
// disable normal fallback process |
t->parm[3].b = 1; |
return result.q; |
} |
|
|
float mul_add_f(float a, float b, float c) { |
// calculate a * b + c with extra precision on the intermediate product. |
#if FMA_AVAILABLE |
// use FMA instruction for correct precision if available |
return _mm_cvtss_f32(_mm_fmadd_ss(_mm_load_ss(&a), _mm_load_ss(&b), _mm_load_ss(&c))); |
#else |
return float((double)a * (double)b + (double)c); |
#endif |
} |
|
double mul_add_d(double a, double b, double c) { |
// calculate a * b + c with extra precision on the intermediate product. |
#if FMA_AVAILABLE |
// use FMA instruction for correct precision if available |
return _mm_cvtsd_f64(_mm_fmadd_sd(_mm_load_sd(&a), _mm_load_sd(&b), _mm_load_sd(&c))); |
#else |
// calculate a*b-c with extended precision. This code is not as good as the real FMA instruction |
SNum aa, bb, ahi, bhi, alo, blo; |
uint64_t upper_mask = 0xFFFFFFFFF8000000; |
aa.d = a; bb.d = b; |
ahi.q = aa.q & upper_mask; // split into high and low parts |
alo.d = a - ahi.d; |
bhi.q = bb.q & upper_mask; |
blo.d = b - bhi.d; |
double r1 = ahi.d * bhi.d; // this product is exact |
// perhaps a different order of addition is better here in some cases? |
double r2 = r1 + c; // add c to high product |
double r3 = r2 + (ahi.d * blo.d + bhi.d * alo.d) + alo.d * blo.d; // add rest of product |
return r3; |
#endif |
} |
|
uint64_t f_mul_add(CThread * t) { |
// a * b + c, calculated with extra precision on the intermediate product |
SNum a = t->parm[0]; |
SNum b = t->parm[1]; |
SNum c = t->parm[2]; |
if ((t->fInstr->imm2 & 4) && t->operandType < 5) { |
c = t->parm[4]; // avoid immediate operand shifted by imm3 |
} |
if (t->op == II_MUL_ADD2) { |
SNum t = b; b = c; c = t; // swap last two operands |
} |
uint32_t mask = t->parm[3].i; |
SNum result; |
bool roundingMode = (mask & (3 << MSKI_ROUNDING)) != 0; // non-standard rounding mode |
bool detectExceptions = (mask & (0xF << MSKI_EXCEPTIONS)) != 0; // make NAN if exceptions |
|
// get sign options |
uint8_t options = 0; |
if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) options = t->pInstr->a.im3; |
//else if (t->fInstr->tmplate == 0xA) options = (mask >> MSKI_OPTIONS) & 0xF; |
if (t->vect == 2) { // odd vector element |
options >>= 1; |
} |
bool unsignedOverflow = false; |
bool signedOverflow = false; |
uint8_t operandType = t->operandType; |
switch (operandType) { |
case 0: // int8 |
a.is = a.bs; b.is = b.bs; // sign extend to avoid overflow during sign change |
if (options & 1) a.is = -a.is; |
if (options & 4) c.is = -c.is; |
result.is = a.is * b.is + c.bs; |
signedOverflow = result.is != result.bs; |
unsignedOverflow = result.i != result.b; |
break; |
case 1: // int16 |
a.is = a.ss; b.is = b.ss; // sign extend to avoid overflow during sign change |
if (options & 1) a.is = -a.is; |
if (options & 4) c.is = -c.is; |
result.is = a.is * b.is + c.ss; |
signedOverflow = result.is != result.ss; |
unsignedOverflow = result.i != result.s; |
break; |
case 2: // int32 |
a.qs = a.is; b.qs = b.is; // sign extend to avoid overflow during sign change |
if (options & 1) a.qs = -a.qs; |
if (options & 4) c.qs = -c.qs; |
result.qs = a.qs * b.qs + c.is; |
signedOverflow = result.qs != result.is; |
unsignedOverflow = result.q != result.i; |
break; |
case 3: // int64 |
if (options & 1) { |
if (a.q == sign_d) signedOverflow = true; |
a.qs = -a.qs; |
} |
if (options & 4) { |
if (b.q == sign_d) signedOverflow = true; |
c.qs = -c.qs; |
} |
result.qs = a.qs * b.qs + c.qs; |
/* |
if (mask.b & MSK_OVERFL_UNSIGN) { // check for unsigned overflow |
if (fabs((double)a.q + (double)b.q * (double)c.q - (double)result.q) > 1.E8) unsignedOverflow = true; |
} |
if (mask.b & MSK_OVERFL_SIGN) { // check for signed overflow |
if (fabs((double)a.qs + (double)b.qs * (double)c.qs - (double)result.qs) > 1.E8) signedOverflow = true; |
} */ |
break; |
case 5: // float |
if (options & 1) a.f = -a.f; |
if (options & 4) c.f = -c.f; |
if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); |
if (detectExceptions) clearExceptionFlags(); // clear previous exceptions |
|
result.f = mul_add_f(a.f, b.f, c.f); // do the calculation |
if (isnan_or_inf_f(result.i)) { // check for overflow and nan |
uint32_t nans = 0; // biggest NAN |
uint32_t infs = 0; // count INF inputs |
for (int i = 0; i < 3; i++) { // loop through input operands |
uint32_t tmp = t->parm[i].i & nsign_f; // ignore sign bit |
if (tmp == inf_f) infs++; // is INF |
else if (tmp > nans) nans = tmp; // get the biggest if there are multiple NANs |
} |
if (nans) result.i = nans; // there is at least one NAN. return the biggest (sign bit is lost) |
else if (isnan_f(result.i)) { |
// result is NAN, but no input is NAN. This can be 0*INF or INF-INF |
if ((a.i << 1 == 0 || b.i << 1 == 0) && infs) result.q = t->makeNan(nan_invalid_0mulinf, operandType); |
else result.q = t->makeNan(nan_invalid_sub, operandType); |
} |
} |
else if (detectExceptions) { |
uint32_t x = getExceptionFlags(); // read exceptions |
if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_mul, operandType); |
else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); |
else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); |
} |
if (roundingMode) setRoundingMode(0); // reset rounding mode |
break; |
|
case 6: // double |
if (options & 1) a.d = -a.d; |
if (options & 4) c.d = -c.d; |
if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); |
if (detectExceptions) clearExceptionFlags(); // clear previous exceptions |
|
result.d = mul_add_d(a.d, b.d, c.d); // do the calculation |
if (isnan_or_inf_d(result.q)) { // check for overflow and nan |
uint64_t nans = 0; // biggest NAN |
uint32_t infs = 0; // count INF inputs |
for (int i = 0; i < 3; i++) { // loop through input operands |
uint64_t tmp = t->parm[i].q & nsign_d; // ignore sign bit |
if (tmp == inf_d) infs++; // is INF |
else if (tmp > nans) nans = tmp; // get the biggest if there are multiple NANs |
} |
if (nans) result.q = nans; // there is at least one NAN. return the biggest (sign bit is lost) |
else if (isnan_d(result.q)) { |
// result is NAN, but no input is NAN. This can be 0*INF or INF-INF |
if ((a.q << 1 == 0 || b.q << 1 == 0) && infs) result.q = t->makeNan(nan_invalid_0mulinf, operandType); |
else result.q = t->makeNan(nan_invalid_sub, operandType); |
} |
} |
else if (detectExceptions) { |
uint32_t x = getExceptionFlags(); // read exceptions |
if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_mul, operandType); |
else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); |
else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); |
} |
if (roundingMode) setRoundingMode(0); // reset rounding mode |
break; |
|
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
result.i = 0; |
} |
return result.q; |
} |
|
static uint64_t f_add_add(CThread * t) { |
// a + b + c, calculated with extra precision on the intermediate sum |
int i, j; |
SNum parm[3]; |
// copy parameters so that we change sign and reorder them without changing original constant |
for (i = 0; i < 3; i++) parm[i] = t->parm[i]; |
if ((t->fInstr->imm2 & 4) && t->operandType < 5) { |
parm[2] = t->parm[4]; // avoid immediate operand shifted by imm3 |
} |
uint32_t mask = t->parm[3].i; |
bool roundingMode = (mask & (3 << MSKI_ROUNDING)) != 0; // non-standard rounding mode |
bool detectExceptions = (mask & (0xF << MSKI_EXCEPTIONS)) != 0; // make NAN if exceptions |
uint8_t operandType = t->operandType; |
SNum sumS, sumU; // signed and unsigned sums |
SNum nanS; // combined nan's |
// get sign options |
uint8_t options = 0; |
if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) options = t->pInstr->a.im3; |
//else if (t->fInstr->tmplate == 0xA) options = uint8_t(mask >> MSKI_OPTIONS); |
|
uint8_t signedOverflow = 0; |
uint8_t unsignedOverflow = 0; |
//bool parmInf = false; |
sumS.q = sumU.q = 0; |
uint32_t temp1; |
uint64_t temp2; |
|
switch (operandType) { |
case 0: // int8 |
for (i = 0; i < 3; i++) { // loop through operands |
if (options & 1) { // change sign |
if (parm[i].b == 0x80) signedOverflow ^= 1; |
if (parm[i].b != 0) unsignedOverflow ^= 1; |
parm[i].is = - parm[i].is; |
} |
options >>= 1; // get next option bit |
sumU.i += parm[i].b; // unsigned sum |
sumS.is += parm[i].bs; // sign-extended sum |
} |
if (sumU.b != sumU.i) unsignedOverflow ^= 1; |
if (sumS.bs != sumS.is) signedOverflow ^= 1; |
break; |
case 1: // int16 |
for (i = 0; i < 3; i++) { // loop through operands |
if (options & 1) { // change sign |
if (parm[i].s == 0x8000) signedOverflow ^= 1; |
if (parm[i].s != 0) unsignedOverflow ^= 1; |
parm[i].is = - parm[i].is; |
} |
options >>= 1; // get next option bit |
sumU.i += parm[i].s; // unsigned sum |
sumS.is += parm[i].ss; // sign-extended sum |
} |
if (sumU.s != sumU.i) unsignedOverflow ^= 1; |
if (sumS.ss != sumS.is) signedOverflow ^= 1; |
break; |
case 2: // int32 |
for (i = 0; i < 3; i++) { // loop through operands |
if (options & 1) { // change sign |
if (parm[i].i == sign_f) signedOverflow ^= 1; |
if (parm[i].i != 0) unsignedOverflow ^= 1; |
parm[i].is = - parm[i].is; |
} |
options >>= 1; // get next option bit |
sumU.q += parm[i].i; // unsigned sum |
sumS.qs += parm[i].is; // sign-extended sum |
} |
if (sumU.i != sumU.q) unsignedOverflow ^= 1; |
if (sumS.is != sumS.qs) signedOverflow ^= 1; |
break; |
case 3: // int64 |
for (i = 0; i < 3; i++) { // loop through operands |
if (options & 1) { // change sign |
if (parm[i].q == sign_d) signedOverflow ^= 1; |
if (parm[i].q != 0) unsignedOverflow ^= 1; |
parm[i].qs = - parm[i].qs; |
} |
options >>= 1; // get next option bit |
uint64_t a = parm[i].q; |
uint64_t b = sumU.q; |
sumU.q = a + b; // sum |
if (sumU.q < a) unsignedOverflow ^= 1; |
if (int64_t(~(a ^ b) & (a ^ sumU.q)) < 0) signedOverflow ^= 1; |
} |
break; |
case 5: // float |
sumS.is = -1; nanS.i = 0; |
for (i = 0; i < 3; i++) { // loop through operands |
if (options & 1) parm[i].f = -parm[i].f; // change sign |
// find the smallest of the three operands |
if ((parm[i].i << 1) < sumS.i) { |
sumS.i = (parm[i].i << 1); j = i; |
} |
// find NANs and infs |
temp1 = parm[i].i & nsign_f; // ignore sign bit |
if (temp1 > nanS.i) nanS.i = temp1; // find the biggest NAN |
//if (temp1 == inf_f) parmInf = true; // OR of all INFs |
options >>= 1; // next option bit |
} |
if (nanS.i > inf_f) return nanS.i; // result is NAN |
// get the smallest operand last to minimize loss of precision if the two biggest operands have opposite signs |
temp1 = parm[j].i; |
parm[j].i = parm[2].i; |
parm[2].i = temp1; |
|
if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); |
if (detectExceptions) clearExceptionFlags(); // clear previous exceptions |
|
// calculate sum |
sumU.f = (parm[0].f + parm[1].f) + parm[2].f; |
|
if (isnan_f(sumU.i)) { |
// the result is NAN but neither input is NAN. This must be INF-INF |
sumU.q = t->makeNan(nan_invalid_sub, operandType); |
} |
if (detectExceptions) { |
uint32_t x = getExceptionFlags(); // read exceptions |
if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) sumU.q = t->makeNan(nan_overflow_add, operandType); |
else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) sumU.q = t->makeNan(nan_underflow, operandType); |
else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) sumU.q = t->makeNan(nan_inexact, operandType); |
} |
if (roundingMode) setRoundingMode(0); // reset rounding mode |
break; |
|
case 6: // double |
sumS.qs = -1; nanS.q = 0; |
for (i = 0; i < 3; i++) { // loop through operands |
if (options & 1) parm[i].d = -parm[i].d; // change sign |
// find the smallest of the three operands |
if ((parm[i].q << 1) < sumS.q) { |
sumS.q = (parm[i].q << 1); j = i; |
} |
// find NANs and infs |
temp2 = parm[i].q & nsign_d; // ignore sign bit |
if (temp2 > nanS.q) nanS.q = temp2; // find the biggest NAN |
//if (temp2 == inf_d) parmInf = true; // OR of all INFs |
options >>= 1; // next option bit |
} |
if (nanS.q > inf_d) return nanS.q; // result is NAN |
|
// get the smallest operand last to minimize loss of precision if |
// the two biggest operands have opposite signs |
temp2 = parm[j].q; |
parm[j].q = parm[2].q; |
parm[2].q = temp2; |
if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); |
if (detectExceptions) clearExceptionFlags(); // clear previous exceptions |
|
// calculate sum |
sumU.d = (parm[0].d + parm[1].d) + parm[2].d; |
|
if (isnan_d(sumU.q)) { |
// the result is NAN but neither input is NAN. This must be INF-INF |
sumU.q = t->makeNan(nan_invalid_sub, operandType); |
} |
if (detectExceptions) { |
uint32_t x = getExceptionFlags(); // read exceptions |
if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) sumU.q = t->makeNan(nan_overflow_add, operandType); |
else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) sumU.q = t->makeNan(nan_underflow, operandType); |
else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) sumU.q = t->makeNan(nan_inexact, operandType); |
} |
if (roundingMode) setRoundingMode(0); // reset rounding mode |
break; |
|
default: |
t->interrupt(INT_WRONG_PARAMETERS); |
sumU.i = 0; |
} |
return sumU.q; |
} |
|
uint64_t f_add_h(CThread * t) { |
// add two numbers, float16 |
// (rounding mode not supported) |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
uint32_t mask = t->parm[3].i; |
uint16_t result; |
|
if (t->fInstr->immSize == 1) b.s = float2half(b.bs); // convert 8-bit integer to float16 |
if (t->operandType != 1) t->interrupt(INT_WRONG_PARAMETERS); |
if (isnan_h(a.s) && isnan_h(b.s)) { // both are NAN |
result = (a.s << 1) > (b.s << 1) ? a.s : b.s; // return the biggest payload |
} |
if (mask & (1<<MSK_INEXACT)) clearExceptionFlags(); // clear previous exceptions |
|
// the exact result is obtained with double precision. This makes sure we don't get double rounding errors |
double resultd = (double)half2float(a.s) + (double)half2float(b.s); // calculate with single precision |
result = double2half(resultd); |
|
// check for exceptions |
if ((mask & (1<<MSK_OVERFLOW)) && isinf_h(result) && !isinf_h(a.s) && !isinf_h(b.s)) { |
// overflow |
result = (uint16_t)t->makeNan(nan_overflow_add, 1); |
result |= (a.s ^ b.s) & 0x8000; // get the sign |
} |
else if ((mask & (1<<MSK_UNDERFLOW)) && is_zero_or_subnormal_h(result) && resultd != 0.0) { |
// underflow |
result = (uint16_t)t->makeNan(nan_underflow, 1) | (result & 0x8000); // signed NAN |
} |
else if ((mask & (1<<MSK_INEXACT)) && (half2float(result) != resultd || (getExceptionFlags() & 0x20)) != 0) { |
// inexact |
result = (uint16_t)t->makeNan(nan_inexact, 1); |
} |
|
uint8_t roundingMode = mask >> MSKI_ROUNDING & 3; |
if (roundingMode != 0 && !isnan_or_inf_h(result)) { |
double r = half2float(result); |
// non-standard rounding mode |
switch (roundingMode) { |
case 1: // down |
if (r > resultd && result != 0xFBFF) { |
if (result == 0) result = 0x8001; |
else if ((int16_t)result > 0) result--; |
else result++; |
} |
break; |
case 2: // up |
if (r < resultd && result != 0x7BFF) { |
if ((int16_t)result > 0) result++; |
else result--; |
} |
break; |
case 3: // towards zero |
if ((int16_t)result > 0 && r > resultd) result--; |
else if ((int16_t)result < 0 && r < resultd) result--; |
} |
} |
t->returnType =0x118; |
return result; |
} |
|
uint64_t f_sub_h(CThread * t) { |
// subtract two numbers, float16 |
// (rounding mode not supported) |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
uint32_t mask = t->parm[3].i; |
uint16_t result; |
|
if (t->fInstr->immSize == 1) b.s = float2half(b.bs); // convert 8-bit integer to float16 |
if (t->operandType != 1) t->interrupt(INT_WRONG_PARAMETERS); |
if (isnan_h(a.s) && isnan_h(b.s)) { // both are NAN |
result = (a.s << 1) > (b.s << 1) ? a.s : b.s; // return the biggest payload |
} |
if (mask & (1<<MSK_INEXACT)) clearExceptionFlags(); // clear previous exceptions |
|
// the exact result is obtained with double precision. This makes sure we don't get double rounding errors |
double resultd = (double)half2float(a.s) - (double)half2float(b.s); // calculate with single precision |
result = double2half(resultd); |
|
// check for exceptions |
if ((mask & (1<<MSK_OVERFLOW)) && isinf_h(result) && !isinf_h(a.s) && !isinf_h(b.s)) { |
// overflow |
result = (uint16_t)t->makeNan(nan_overflow_add, 1); |
result |= (a.s ^ b.s) & 0x8000; // get the sign |
} |
else if ((mask & (1<<MSK_UNDERFLOW)) && is_zero_or_subnormal_h(result) && resultd != 0.0) { |
// underflow |
result = (uint16_t)t->makeNan(nan_underflow, 1) | (result & 0x8000); // signed NAN |
} |
else if ((mask & (1<<MSK_INEXACT)) && (half2float(result) != resultd || (getExceptionFlags() & 0x20)) != 0) { |
// inexact |
result = (uint16_t)t->makeNan(nan_inexact, 1); |
} |
uint8_t roundingMode = mask >> MSKI_ROUNDING & 3; |
if (roundingMode != 0 && !isnan_or_inf_h(result)) { |
double r = half2float(result); |
// non-standard rounding mode |
switch (roundingMode) { |
case 1: // down |
if (r > resultd && result != 0xFBFF) { |
if (result == 0) result = 0x8001; |
else if ((int16_t)result > 0) result--; |
else result++; |
} |
break; |
case 2: // up |
if (r < resultd && result != 0x7BFF) { |
if ((int16_t)result > 0) result++; |
else result--; |
} |
break; |
case 3: // towards zero |
if ((int16_t)result > 0 && r > resultd) result--; |
else if ((int16_t)result < 0 && r < resultd) result--; |
} |
} |
t->returnType =0x118; |
return result; |
} |
|
uint64_t f_mul_h(CThread * t) { |
// multiply two numbers, float16 |
SNum a = t->parm[1]; |
SNum b = t->parm[2]; |
uint32_t mask = t->parm[3].i; |
uint16_t result; |
|
if (t->fInstr->immSize == 1) b.s = float2half(b.bs); // convert 8-bit integer to float16 |
if (t->operandType != 1) t->interrupt(INT_WRONG_PARAMETERS); |
if (isnan_h(a.s) && isnan_h(b.s)) { // both are NAN |
result = (a.s << 1) > (b.s << 1) ? a.s : b.s; // return the biggest payload |
} |
if (mask & (1<<MSK_INEXACT)) clearExceptionFlags(); // clear previous exceptions |
|
// single precision is sufficient to get an exact multiplication result |
float resultf = half2float(a.s) * half2float(b.s); // calculate with single precision |
result = float2half(resultf); |
|
// check for exceptions |
if ((mask & (1<<MSK_OVERFLOW)) && isinf_h(result) && !isinf_h(a.s) && !isinf_h(b.s)) { |
// overflow |
result = (uint16_t)t->makeNan(nan_overflow_mul, 1); |
result |= (a.s ^ b.s) & 0x8000; // get the sign |
} |
else if ((mask & (1<<MSK_UNDERFLOW)) && is_zero_or_subnormal_h(result) && resultf != 0.0f) { |
// underflow |
result = (uint16_t)t->makeNan(nan_underflow, 1) | (result & 0x8000); // signed NAN |
} |
else if ((mask & (1<<MSK_INEXACT)) && (half2float(result) != resultf || (getExceptionFlags() & 0x20)) != 0) { |
// inexact |
result = (uint16_t)t->makeNan(nan_inexact, 1); |
} |
uint8_t roundingMode = mask >> MSKI_ROUNDING & 3; |
if (roundingMode != 0 && !isnan_or_inf_h(result)) { |
// non-standard rounding mode |
float r = half2float(result); |
switch (roundingMode) { |
case 1: // down |
if (r > resultf && result != 0xFBFF) { |
if (result == 0) result = 0x8001; |
else if ((int16_t)result > 0) result--; |
else result++; |
} |
break; |
case 2: // up |
if (r < resultf && result != 0x7BFF) { |
if ((int16_t)result > 0) result++; |
else result--; |
} |
break; |
case 3: // towards zero |
if ((int16_t)result > 0 && r > resultf) result--; |
else if ((int16_t)result < 0 && r < resultf) result--; |
} |
} |
t->returnType =0x118; |
return result; |
} |
|
|
uint64_t f_mul_add_h(CThread * t) { |
// a + b * c, float16 |
SNum a = t->parm[0]; |
SNum b = t->parm[1]; |
SNum c = t->parm[2]; |
uint32_t mask = t->parm[3].i; |
if (t->fInstr->imm2 & 4) c = t->parm[4]; // avoid immediate operand shifted by imm3 |
if (t->fInstr->immSize == 1) c.s = float2half(c.bs); // convert 8-bit integer to float16 // get sign options |
uint8_t options = 0; |
if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) options = t->pInstr->a.im3; |
//else if (t->fInstr->tmplate == 0xA) options = (mask >> MSKI_OPTIONS) & 0xF; |
if (t->vect == 2) { // odd vector element |
options >>= 1; |
} |
if (t->operandType != 1) t->interrupt(INT_WRONG_PARAMETERS); |
if (options & 1) a.s ^= 0x8000; // adjust sign |
if (options & 4) b.s ^= 0x8000; |
|
if (mask & (1<<MSK_INEXACT)) clearExceptionFlags(); // clear previous exceptions |
|
double resultd = (double)half2float(a.s) + (double)half2float(b.s) * (double)half2float(c.s); |
|
uint16_t result = double2half(resultd); |
uint32_t nans = 0; bool parmInf = false; |
|
if (isnan_or_inf_h(result)) { // check for overflow and nan |
for (int i = 0; i < 3; i++) { // loop through input operands |
uint32_t tmp = t->parm[i].s & 0x7FFF; // ignore sign bit |
if (tmp > nans) nans = tmp; // get the biggest if there are multiple NANs |
if (tmp == inf_h) parmInf = true; // OR of all INFs |
} |
if (nans > inf_h) return nans; // there is at least one NAN. return the biggest (sign bit is lost) |
else if (isnan_h(result)) { |
// result is NAN, but no input is NAN. This can be 0*INF or INF-INF |
if ((a.s << 1 == 0 || b.s << 1 == 0) && parmInf) result = (uint16_t)t->makeNan(nan_invalid_0mulinf, 1); |
else result = (uint16_t)t->makeNan(nan_invalid_sub, 1); |
} |
} |
else if ((mask & (1<<MSK_OVERFLOW)) && isinf_h(result) && !parmInf) result = (uint16_t)t->makeNan(nan_overflow_mul, 1); |
else if ((mask & (1<<MSK_UNDERFLOW)) && is_zero_or_subnormal_h(result) && resultd != 0.0) result = (uint16_t)t->makeNan(nan_underflow, 1); |
else if ((mask & (1<<MSK_INEXACT)) && ((getExceptionFlags() & 0x20) != 0 || half2float(result) != resultd)) result = (uint16_t)t->makeNan(nan_inexact, 1); |
|
uint8_t roundingMode = mask >> MSKI_ROUNDING & 3; |
if (roundingMode != 0 && !isnan_or_inf_h(result)) { |
float r = half2float(result); |
// non-standard rounding mode |
switch (roundingMode) { |
case 1: // down |
if (r > resultd && result != 0xFBFF) { |
if (result == 0) result = 0x8001; |
else if ((int16_t)result > 0) result--; |
else result++; |
} |
break; |
case 2: // up |
if (r < resultd && result != 0x7BFF) { |
if ((int16_t)result > 0) result++; |
else result--; |
} |
break; |
case 3: // towards zero |
if ((int16_t)result > 0 && r > resultd) result--; |
else if ((int16_t)result < 0 && r < resultd) result--; |
} |
} |
t->returnType = 0x118; |
return result; |
} |
|
|
// Tables of function pointers |
|
// multiformat instructions |
PFunc funcTab1[64] = { |
f_nop, f_store, f_move, f_prefetch, f_sign_extend, f_sign_extend_add, 0, f_compare, // 0 - 7 |
f_add, f_sub, f_sub_rev, f_mul, f_mul_hi, f_mul_hi_u, f_div, f_div_u, // 8 - 15 |
f_div_rev, 0, f_rem, f_rem_u, f_min, f_min_u, f_max, f_max_u, // 16 - 23 |
0, 0, f_and, f_or, f_xor, 0, 0, 0, // 24 - 31 |
f_shift_left, f_rotate, f_shift_right_s, f_shift_right_u, f_clear_bit, f_set_bit, f_toggle_bit, f_test_bit, // 32 - 39 |
f_test_bits_and, f_test_bits_or, 0, 0, f_add_h, f_sub_h, f_mul_h, 0, // 40 - 47 |
f_mul_add_h, f_mul_add, f_mul_add, f_add_add, f_select_bits, f_funnel_shift, 0, 0, // 48 - 55 |
0, 0, 0, 0, 0, 0, 0, 0 // 56 - 63 |
}; |
|