URL
https://opencores.org/ocsvn/forwardcom/forwardcom/trunk
Subversion Repositories forwardcom
[/] [forwardcom/] [bintools/] [emulator3.cpp] - Rev 74
Go to most recent revision | Compare with Previous | Blame | View Log
/**************************** emulator3.cpp ******************************** * Author: Agner Fog * date created: 2018-02-18 * Last modified: 2021-06-29 * Version: 1.11 * Project: Binary tools for ForwardCom instruction set * Description: * Emulator: Execution functions for multiformat instructions * * Copyright 2018-2021 GNU General Public License http://www.gnu.org/licenses *****************************************************************************/ #include "stdafx.h" // get intrinsic functions for _mm_getcsr and _mm_setcsr to control floating point rounding and exceptions #if defined(_M_X64) || defined(__x86_64__) || defined(__amd64) || defined(__SSE2__) #if defined(__FMA__) || defined(__AVX2__) #define FMA_AVAILABLE 1 #else #define FMA_AVAILABLE 0 #endif #if defined(_MSC_VER) && !FMA_AVAILABLE #include <xmmintrin.h> #else #include <immintrin.h> #endif #define MCSCR_AVAILABLE 1 #else #define MCSCR_AVAILABLE 0 #endif ////////////////////////////////////////////////////////////////////////////////////////////////////// // functions for detecting exceptions and controlling rounding mode on the CPU that runs the emulator // Note: these functions are only available in x86 systems with SSE2 or x64 enabled ////////////////////////////////////////////////////////////////////////////////////////////////////// // Error message if MXCSR not available void errorFpControlMissing() { static int repeated = 0; if (!repeated) { fprintf(stderr, "Error: Cannot control floating point exceptions and rounding mode on this platform"); repeated = 1; } } void setRoundingMode(uint8_t r) { // change rounding mode #if MCSCR_AVAILABLE uint32_t e = _mm_getcsr(); e = (e & 0x9FFF) | (r & 3) << 13; _mm_setcsr(e); #else errorFpControlMissing(); #endif } void clearExceptionFlags() { // clear exception flags before detecting exceptions #if MCSCR_AVAILABLE uint32_t e = _mm_getcsr(); _mm_setcsr(e & 0xFFC0); #else errorFpControlMissing(); #endif } uint32_t getExceptionFlags() { // read exception flags after instructions that may cause exceptions // 1: invalid operation // 2: denormal // 4: divide by zero // 8: overflow // 0x10: underflow // 0x20: precision #if MCSCR_AVAILABLE return _mm_getcsr() & 0x3F; #else errorFpControlMissing(); return 0; #endif } void enableSubnormals(uint32_t e) { // enable or disable subnormal numbers #if MCSCR_AVAILABLE uint32_t x = _mm_getcsr(); if (e != 0) { _mm_setcsr(x & ~0x8040); } else { _mm_setcsr(x | 0x8040); } #else errorFpControlMissing(); #endif } ///////////////////////////// // Multi-format instructions ///////////////////////////// uint64_t f_nop(CThread * t) { // No operation t->running = 2; // don't save RD t->returnType = 0; // debug return output return 0; } static uint64_t f_store(CThread * t) { // Store value RD to memory uint8_t rd = t->operands[0]; uint64_t value = t->registers[rd]; if (t->vect) { value = t->readVectorElement(rd, t->vectorOffset); } // check mask if (t->parm[3].b & 1) { uint64_t address = t->memAddress; // memory address if (t->vect) address += t->vectorOffset; t->writeMemoryOperand(value, address); } else { // mask is 0. This instruction has no fallback. Don't write /* uint8_t fallback = t->operands[2]; // mask is 0. get fallback if (fallback == 0x1F) value = 0; else if (t->vect) value = t->readVectorElement(fallback, t->vectorOffset); else value = t->registers[fallback];*/ } t->returnType = (t->returnType & ~0x10) | 0x20; // return type is memory t->running = 2; // don't save RD return 0; } static uint64_t f_move(CThread * t) { // copy value return t->parm[2].q; } static uint64_t f_prefetch(CThread * t) { // prefetch from address. not emulated return f_nop(t); } static uint64_t f_sign_extend(CThread * t) { // sign-extend integer to 64 bits int64_t value = 0; switch (t->operandType) { case 0: value = (int64_t)(int8_t)t->parm[2].b; break; case 1: value = (int64_t)(int16_t)t->parm[2].s; break; case 2: value = (int64_t)(int32_t)t->parm[2].i; break; case 3: value = (int64_t)t->parm[2].q; break; default: t->interrupt(INT_WRONG_PARAMETERS); value = 0; } t->operandType = 3; // change operand size of result if (t->vect) { t->vectorLength[t->operands[0]] = t->vectorLengthR = 8; // change vector length of result and stop vector loop } t->returnType = (t->returnType & ~7) | 3; // debug return output return (uint64_t)value; } static uint64_t f_sign_extend_add(CThread * t) { // sign-extend integer to 64 bits and add 64-bit register int64_t value = 0; uint8_t options = 0; if (t->fInstr->tmplate == 0xE) options = t->pInstr->a.im3; switch (t->operandType) { case 0: value = (int64_t)(int8_t)t->parm[2].b; break; case 1: value = (int64_t)(int16_t)t->parm[2].s; break; case 2: value = (int64_t)(int32_t)t->parm[2].i; break; case 3: value = (int64_t)t->parm[2].q; break; default: t->interrupt(INT_WRONG_PARAMETERS); value = 0; } value <<= options; uint8_t r1 = t->operands[4]; // first operand. g.p. register value += t->registers[r1]; // read register with full size t->operandType = 3; // change operand size of result t->returnType = (t->returnType & ~7) | 3; // debug return output if (t->vect) t->interrupt(INT_WRONG_PARAMETERS); return (uint64_t)value; } static uint64_t f_compare(CThread * t) { // compare two source operands and generate a boolean result // get condition code uint8_t cond = 0; uint32_t mask = t->parm[3].i; // mask register value or NUMCONTR if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) { cond = t->pInstr->a.im3; // E template. get condition from IM3 } // get operands SNum a = t->parm[1]; SNum b = t->parm[2]; if ((t->fInstr->imm2 & 4) && t->operandType < 5) { b = t->parm[4]; // avoid immediate operand shifted by imm3 } uint64_t result = 0; uint8_t cond1 = cond >> 1 & 3; // bit 1 - 2 of condition bool isnan = false; // select operand type if (t->operandType < 5) { // integer types uint64_t sizeMask = dataSizeMask[t->operandType]; // mask for data size uint64_t signBit = (sizeMask >> 1) + 1; // sign bit a.q &= sizeMask; b.q &= sizeMask; // mask to desired size if (cond1 != 3 && !(cond & 8)) { // signed a.q ^= signBit; b.q ^= signBit; // flip sign bit to use unsigned compare } switch (cond1) { // select condition case 0: // a == b result = a.q == b.q; break; case 1: // a < b result = a.q < b.q; break; case 2: // a > b result = a.q > b.q; break; case 3: // abs(a) < abs(b). Not officially supported in version 1.11 if (a.q & signBit) a.q = (~a.q + 1) & sizeMask; // change sign. overflow allowed if (b.q & signBit) b.q = (~b.q + 1) & sizeMask; // change sign. overflow allowed result = a.q < b.q; break; } } else if (t->operandType == 5) { // float isnan = isnan_f(a.i) || isnan_f(b.i); // check for NAN if (!isnan) { switch (cond1) { // select condition case 0: // a == b result = a.f == b.f; break; case 1: // a < b result = a.f < b.f; break; case 2: // a > b result = a.f > b.f; break; case 3: // abs(a) < abs(b) result = fabsf(a.f) < fabsf(b.f); break; } } } else if (t->operandType == 6) { // double isnan = isnan_d(a.q) || isnan_d(b.q); if (!isnan) { switch (cond1) { // select condition case 0: // a == b result = a.d == b.d; break; case 1: // a < b result = a.d < b.d; break; case 2: // a > b result = a.d > b.d; break; case 3: // abs(a) < abs(b) result = fabs(a.d) < fabs(b.d); break; } } } else t->interrupt(INT_WRONG_PARAMETERS); // unsupported type // invert result if (cond & 1) result ^= 1; // check for NAN if (isnan) { result = (cond >> 3) & 1; // bit 3 tells what to get if unordered //if (t->parm[3].i & MSK_FLOAT_NAN_LOSS) t->interrupt(INT_FLOAT_NAN_LOSS); // mask bit 29: trap if NAN loss } // mask and fallback uint8_t fallbackreg = t->operands[2]; uint64_t fallback = (fallbackreg & 0x1F) != 0x1F ? t->readRegister(fallbackreg) : 0; switch (cond >> 4) { case 0: // normal fallback if (!(mask & 1)) result = fallback; break; case 1: // mask & result & fallback result &= mask & fallback; break; case 2: // mask & (result | fallback) result = mask & (result | fallback); break; case 3: // mask & (result ^ fallback) result = mask & (result ^ fallback); break; } if ((t->returnType & 7) >= 5) t->returnType -= 3; // debug return output must be integer result &= 1; // use only bit 0 of result if ((t->operands[1] & 0x1F) < 7) { // There is a mask. get remaining bits from mask result |= (t->parm[3].q & ~(uint64_t)1); } t->parm[3].b = 1; // prevent normal mask operation return result; } uint64_t f_add(CThread * t) { // add two numbers SNum a = t->parm[1]; SNum b = t->parm[2]; uint32_t mask = t->parm[3].i; SNum result; bool roundingMode = (mask & (3 << MSKI_ROUNDING)) != 0; // non-standard rounding mode bool detectExceptions = (mask & (0xF << MSKI_EXCEPTIONS)) != 0; // make NAN if exceptions uint8_t operandType = t->operandType; if (((mask ^ t->lastMask) & (1<<MSK_SUBNORMAL)) != 0) { // subnormal status changed enableSubnormals (mask & (1<<MSK_SUBNORMAL)); t->lastMask = mask; } // operand type if (operandType < 4) { // integer // uint64_t sizeMask = dataSizeMask[t->operandType]; // mask for data size result.q = a.q + b.q; } else if (operandType == 5) { // float bool nana = isnan_f(a.i); // check for NAN input bool nanb = isnan_f(b.i); if (nana && nanb) { // both are NAN return (a.i << 1) > (b.i << 1) ? a.i : b.i; // return the biggest payload } else if (nana) return a.q; else if (nanb) return b.q; if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); if (detectExceptions) clearExceptionFlags(); // clear previous exceptions result.f = a.f + b.f; // this is the actual addition if (isnan_f(result.i)) { // the result is NAN but neither input is NAN. This must be INF-INF result.q = t->makeNan(nan_invalid_sub, operandType); } if (detectExceptions) { uint32_t x = getExceptionFlags(); // read exceptions if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_add, operandType); else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); } if (roundingMode) setRoundingMode(0); // reset rounding mode } else if (operandType == 6) { // double bool nana = isnan_d(a.q); // check for NAN input bool nanb = isnan_d(b.q); if (nana && nanb) { // both are NAN return (a.q << 1) > (b.q << 1) ? a.q : b.q; // return the biggest payload } else if (nana) return a.q; else if (nanb) return b.q; if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); if (detectExceptions) clearExceptionFlags(); // clear previous exceptions result.d = a.d + b.d; // this is the actual addition if (isnan_d(result.q)) { // the result is NAN but neither input is NAN. This must be INF-INF result.q = t->makeNan(nan_invalid_sub, operandType); } if (detectExceptions) { uint32_t x = getExceptionFlags(); // read exceptions if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_add, operandType); else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); } if (roundingMode) setRoundingMode(0); // reset rounding mode } else { // unsupported operand type t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } return result.q; } uint64_t f_sub(CThread * t) { // subtract two numbers SNum a = t->parm[1]; SNum b = t->parm[2]; uint32_t mask = t->parm[3].i; SNum result; bool roundingMode = (mask & (3 << MSKI_ROUNDING)) != 0; // non-standard rounding mode bool detectExceptions = (mask & (0xF << MSKI_EXCEPTIONS)) != 0; // make NAN if exceptions uint8_t operandType = t->operandType; if (((mask ^ t->lastMask) & (1<<MSK_SUBNORMAL)) != 0) { // subnormal status changed enableSubnormals (mask & (1<<MSK_SUBNORMAL)); t->lastMask = mask; } if (operandType < 4) { // integer result.q = a.q - b.q; // subtract } else if (operandType == 5) { // float bool nana = isnan_f(a.i); // check for NAN input bool nanb = isnan_f(b.i); if (nana && nanb) { // both are NAN return (a.i << 1) > (b.i << 1) ? a.i : b.i; // return the biggest payload } else if (nana) return a.q; else if (nanb) return b.q; if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); if (detectExceptions) clearExceptionFlags(); // clear previous exceptions result.f = a.f - b.f; // this is the actual subtraction if (isnan_f(result.i)) { // the result is NAN but neither input is NAN. This must be INF-INF result.q = t->makeNan(nan_invalid_sub, operandType); } if (detectExceptions) { uint32_t x = getExceptionFlags(); // read exceptions if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_add, operandType); else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); } if (roundingMode) setRoundingMode(0); // reset rounding mode } else if (operandType == 6) {// double bool nana = isnan_d(a.q); // check for NAN input bool nanb = isnan_d(b.q); if (nana && nanb) { // both are NAN return (a.q << 1) > (b.q << 1) ? a.q : b.q; // return the biggest payload } else if (nana) return a.q; else if (nanb) return b.q; if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); if (detectExceptions) clearExceptionFlags(); // clear previous exceptions result.d = a.d - b.d; // this is the actual subtraction if (isnan_d(result.q)) { // the result is NAN but neither input is NAN. This must be INF-INF result.q = t->makeNan(nan_invalid_sub, operandType); } if (detectExceptions) { uint32_t x = getExceptionFlags(); // read exceptions if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_add, operandType); else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); } if (roundingMode) setRoundingMode(0); // reset rounding mode } else { // unsupported operand type t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } return result.q; } extern uint64_t f_sub_rev(CThread * t) { // subtract two numbers, b-a uint64_t temp = t->parm[2].q; // swap operands t->parm[2].q = t->parm[1].q; t->parm[1].q = temp; uint64_t retval = f_sub(t); t->parm[2].q = temp; // restore parm[2] in case it is a constant return retval; } uint64_t f_mul(CThread * t) { // multiply two numbers SNum a = t->parm[1]; SNum b = t->parm[2]; uint32_t mask = t->parm[3].i; SNum result; bool roundingMode = (mask & (3 << MSKI_ROUNDING)) != 0; // non-standard rounding mode bool detectExceptions = (mask & (0xF << MSKI_EXCEPTIONS)) != 0; // make NAN if exceptions uint8_t operandType = t->operandType; if (((mask ^ t->lastMask) & (1<<MSK_SUBNORMAL)) != 0) { // subnormal status changed enableSubnormals (mask & (1<<MSK_SUBNORMAL)); t->lastMask = mask; } if (operandType < 4) { // integer result.q = a.q * b.q; } else if (operandType == 5) { // float bool nana = isnan_f(a.i); // check for NAN input bool nanb = isnan_f(b.i); if (nana && nanb) { // both are NAN return (a.i << 1) > (b.i << 1) ? a.i : b.i; // return the biggest payload } else if (nana) return a.q; else if (nanb) return b.q; if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); if (detectExceptions) clearExceptionFlags(); // clear previous exceptions result.f = a.f * b.f; // this is the actual multiplication if (isnan_f(result.i)) { // the result is NAN but neither input is NAN. This must be 0*INF result.q = t->makeNan(nan_invalid_0mulinf, operandType); } if (detectExceptions) { uint32_t x = getExceptionFlags(); // read exceptions if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_mul, operandType); else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); } if (roundingMode) setRoundingMode(0); // reset rounding mode } else if (operandType == 6) { // double bool nana = isnan_d(a.q); // check for NAN input bool nanb = isnan_d(b.q); if (nana && nanb) { // both are NAN return (a.q << 1) > (b.q << 1) ? a.q : b.q; // return the biggest payload } else if (nana) return a.q; else if (nanb) return b.q; if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); if (detectExceptions) clearExceptionFlags(); // clear previous exceptions result.d = a.d * b.d; // this is the actual multiplication if (isnan_d(result.q)) { // the result is NAN but neither input is NAN. This must be 0*INF result.q = t->makeNan(nan_invalid_0mulinf, operandType); } if (detectExceptions) { uint32_t x = getExceptionFlags(); // read exceptions if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_mul, operandType); else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); } if (roundingMode) setRoundingMode(0); // reset rounding mode } else { // unsupported operand type t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } return result.q; } uint64_t f_div(CThread * t) { // divide two floating point numbers or signed integers SNum a = t->parm[1]; SNum b = t->parm[2]; uint32_t mask = t->parm[3].i; SNum result; bool overflow = false; bool roundingMode = (mask & (3 << MSKI_ROUNDING))!=0; // non-standard floating point rounding mode bool detectExceptions = (mask & (0xF << MSKI_EXCEPTIONS)) != 0; // make NAN if exceptions bool nana, nanb; // inputs are NAN uint8_t operandType = t->operandType; uint32_t intRounding = 0; // integer rounding mode if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) { intRounding = t->pInstr->a.im3; // E template. get integer rounding mode from IM3 } if (((mask ^ t->lastMask) & (1<<MSK_SUBNORMAL)) != 0) { // subnormal status changed enableSubnormals (mask & (1<<MSK_SUBNORMAL)); t->lastMask = mask; } switch (operandType) { case 0: // int8 if (b.b == 0 || (a.b == 0x80 && b.bs == -1)) { result.i = 0x80; overflow = true; } else { result.i = a.bs / b.bs; if (intRounding != 0 && intRounding != 7 && abs(b.bs) != 1) { int rem = a.bs % b.bs; switch (intRounding) { case 4: { // nearest or even uint32_t r2 = 2*abs(rem); uint32_t b2 = abs(b.bs); int s = int8_t(a.i ^ b.i) < 0 ? -1 : 1; // one with sign of result if (r2 > b2 || (r2 == b2 && (result.b & 1))) result.i += s; break;} case 5: // down if (rem != 0 && int8_t(a.i ^ b.i) < 0 && result.b != 0x80u) result.i--; break; case 6: // up if (rem != 0 && int8_t(a.i ^ b.i) >= 0) result.i++; break; } } } break; case 1: // int16 if (b.s == 0 || (a.s == 0x8000u && b.ss == -1)) { result.i = 0x8000; overflow = true; } else { result.i = a.ss / b.ss; if (intRounding != 0 && intRounding != 7 && abs(b.ss) != 1) { int16_t rem = a.ss % b.ss; switch (intRounding) { case 4: { // nearest or even uint16_t r2 = 2*abs(rem); uint16_t b2 = abs(b.is); int16_t s = int16_t(a.s ^ b.s) < 0 ? -1 : 1; // one with sign of result if (r2 > b2 || (r2 == b2 && (result.s & 1))) result.s += s; break;} case 5: // down if (rem != 0 && int16_t(a.s ^ b.s) < 0 && result.s != 0x8000u) result.s--; break; case 6: // up if (rem != 0 && int16_t(a.s ^ b.s) >= 0) result.s++; break; } } } break; case 2: // int32 if (b.i == 0 || (a.i == sign_f && b.is == -1)) { result.i = sign_f; overflow = true; } else { result.i = a.is / b.is; if (intRounding != 0 && intRounding != 7 && abs(b.is) != 1) { int rem = a.is % b.is; switch (intRounding) { case 4: { // nearest or even uint32_t r2 = 2*abs(rem); uint32_t b2 = abs(b.is); int s = int32_t(a.i ^ b.i) < 0 ? -1 : 1; // one with sign of result if (r2 > b2 || (r2 == b2 && (result.i & 1))) result.i += s; break;} case 5: // down if (rem != 0 && int32_t(a.i ^ b.i) < 0 && result.i != 0x80000000u) result.i--; break; case 6: // up if (rem != 0 && int32_t(a.i ^ b.i) >= 0) result.i++; break; } } } break; case 3: // int64 if (b.q == 0 || (a.q == sign_d && b.qs == int64_t(-1))) { result.q = sign_d; overflow = true; } else { result.qs = a.qs / b.qs; if (intRounding != 0 && intRounding != 7 && abs(b.qs) != 1) { int64_t rem = a.qs % b.qs; switch (intRounding) { case 4: { // nearest or even uint64_t r2 = 2*abs(rem); uint64_t b2 = abs(b.qs); int64_t s = int64_t(a.q ^ b.q) < 0 ? -1 : 1; // one with sign of result if (r2 > b2 || (r2 == b2 && (result.i & 1))) result.q += s; break;} case 5: // down if (rem != 0 && int64_t(a.q ^ b.q) < 0 && result.q != 0x8000000000000000u) result.q--; break; case 6: // up if (rem != 0 && int64_t(a.q ^ b.q) >= 0) result.q++; break; } } } break; case 5: // float nana = isnan_f(a.i); // check for NAN input nanb = isnan_f(b.i); if (nana && nanb) { // both are NAN result.i = (a.i << 1) > (b.i << 1) ? a.i : b.i; // return the biggest payload } else if (nana) result.i = a.i; else if (nanb) result.i = b.i; else if (b.i << 1 == 0) { // division by zero if (a.i << 1 == 0) { // 0./0. = nan result.q = t->makeNan(nan_invalid_0div0, operandType); } else { // a / 0. = infinity if (mask & (1<<MSK_DIVZERO)) result.q = t->makeNan(nan_div0, operandType); else result.i = inf_f; } result.i |= (a.i ^ b.i) & sign_f; // sign bit } else if (isinf_f(a.i) && isinf_f(b.i)) { result.i = (uint32_t)t->makeNan(nan_invalid_divinf, operandType); // INF/INF result.i |= (a.i ^ b.i) & sign_f; // sign bit } else { if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); if (detectExceptions) clearExceptionFlags(); // clear previous exceptions result.f = a.f / b.f; // normal division if (detectExceptions) { uint32_t x = getExceptionFlags(); // read exceptions if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_div, operandType); else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); } if (roundingMode) setRoundingMode(0); // reset rounding mode } break; case 6: // double nana = isnan_d(a.q); // check for NAN input nanb = isnan_d(b.q); if (nana && nanb) { // both are NAN result.q = (a.q << 1) > (b.q << 1) ? a.q : b.q; // return the biggest payload } else if (nana) result.q = a.q; else if (nanb) result.q = b.q; else if (b.q << 1 == 0) { // division by zero if (a.q << 1 == 0) { // 0./0. = nan result.q = t->makeNan(nan_invalid_0div0, operandType); } else { // a / 0. = infinity if (mask & (1<<MSK_DIVZERO)) result.q = t->makeNan(nan_div0, operandType); else result.q = inf_d; } result.q |= (a.q ^ b.q) & sign_d; // sign bit } else if (isinf_d(a.q) && isinf_d(b.q)) { result.q = t->makeNan(nan_invalid_divinf, operandType); // INF/INF result.q |= (a.q ^ b.q) & sign_d; // sign bit } else { if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); if (detectExceptions) clearExceptionFlags(); // clear previous exceptions result.d = a.d / b.d; // normal division if (detectExceptions) { uint32_t x = getExceptionFlags(); // read exceptions if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_div, operandType); else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); } if (roundingMode) setRoundingMode(0); // reset rounding mode } break; default: t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } return result.q; } uint64_t f_div_u(CThread * t) { // divide two unsigned numbers if (t->operandType > 4) { return f_div(t); // floating point: same as f_div } SNum a = t->parm[1]; SNum b = t->parm[2]; //SNum mask = t->parm[3]; SNum result; bool overflow = false; uint32_t intRounding = 0; // integer rounding mode if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) { intRounding = t->pInstr->a.im3; // E template. get integer rounding mode from IM3 } switch (t->operandType) { case 0: // int8 if (b.b == 0) { result.i = 0xFF; overflow = true; } else { result.i = a.b / b.b; if (intRounding == 4 || intRounding == 6) { uint32_t rem = a.b % b.b; switch (intRounding) { case 4: // nearest or even if (rem*2 > b.b || (rem*2 == b.b && (result.i & 1))) result.i++; break; case 6: // up if (rem != 0) result.i++; break; } } } break; case 1: // int16 if (b.s == 0) { result.i = 0xFFFF; overflow = true; } else { result.i = a.s / b.s; if (intRounding == 4 || intRounding == 6) { uint32_t rem = a.s % b.s; switch (intRounding) { case 4: // nearest or even if (rem*2 > b.s || (rem*2 == b.s && (result.i & 1))) result.i++; break; case 6: // up if (rem != 0) result.i++; break; } } } break; case 2: // int32 if (b.i == 0) { result.is = -1; overflow = true; } else { result.i = a.i / b.i; if (intRounding == 4 || intRounding == 6) { uint32_t rem = a.i % b.i; switch (intRounding) { case 4: // nearest or even if (rem*2 > b.i || (rem*2 == b.i && (result.i & 1))) result.i++; break; case 6: // up if (rem != 0) result.i++; break; } } } break; case 3: // int64 if (b.q == 0) { result.qs = -(int64_t)1; overflow = true; } else { result.q = a.q / b.q; if (intRounding == 4 || intRounding == 6) { uint64_t rem = a.q % b.q; switch (intRounding) { case 4: // nearest or even if (rem*2 > b.q || (rem*2 == b.q && (result.q & 1))) result.q++; break; case 6: // up if (rem != 0) result.q++; break; } } } break; default: t->interrupt(INT_WRONG_PARAMETERS); } //if (overflow && (mask.i & MSK_OVERFL_UNSIGN)) t->interrupt(INT_OVERFL_UNSIGN); // unsigned integer overflow return result.q; } static uint64_t f_div_rev(CThread * t) { // divide two numbers, b/a uint64_t temp = t->parm[2].q; // swap operands t->parm[2].q = t->parm[1].q; t->parm[1].q = temp; uint64_t retval = f_div(t); t->parm[2].q = temp; // restore parm[2] in case it is a constant return retval; } uint64_t mul64_128u(uint64_t * low, uint64_t a, uint64_t b) { // extended unsigned multiplication 64*64 -> 128 bits. // Note: you may replace this by inline assembly or intrinsic functions on // platforms that have extended multiplication instructions. // The return value is the high half of the product, // *low receives the low half of the product union { // arrays for storing result uint64_t q[2]; uint32_t i[4]; } res; uint64_t t; // temporary product uint64_t k; // temporary carry uint64_t a0 = (uint32_t)a; // low a uint64_t a1 = a >> 32; // high a uint64_t b0 = (uint32_t)b; // low b uint64_t b1 = b >> 32; // high b t = a0 * b0; res.i[0] = (uint32_t)t; k = t >> 32; t = a1 * b0 + k; res.i[1] = (uint32_t)t; k = t >> 32; res.i[2] = (uint32_t)k; t = a0 * b1 + res.i[1]; res.i[1] = (uint32_t)t; k = t >> 32; t = a1 * b1 + k + res.i[2]; res.i[2] = (uint32_t)t; k = t >> 32; res.i[3] = (uint32_t)k; if (low) *low = res.q[0]; return res.q[1]; } int64_t mul64_128s(uint64_t * low, int64_t a, int64_t b) { // extended signed multiplication 64*64 -> 128 bits. // Note: you may replace this by inline assembly or intrinsic functions on // platforms that have extended multiplication instructions. // The return value is the high half of the product, // *low receives the low half of the product bool sign = false; if (a < 0) { a = -a, sign = true; } if (b < 0) { b = -b; sign = !sign; } uint64_t lo, hi; hi = mul64_128u(&lo, a, b); if (sign) { // change sign lo = uint64_t(-int64_t(lo)); hi = ~hi; if (lo == 0) hi++; // carry } if (low) *low = lo; return (int64_t)hi; } static uint64_t f_mul_hi(CThread * t) { // high part of extended signed multiply SNum result; switch (t->operandType) { case 0: // int8 result.qs = ((int32_t)t->parm[1].bs * (int32_t)t->parm[2].bs) >> 8; break; case 1: // int16 result.qs = ((int32_t)t->parm[1].ss * (int32_t)t->parm[2].ss) >> 16; break; case 2: // int32 result.qs = ((int64_t)t->parm[1].is * (int64_t)t->parm[2].is) >> 32; break; case 3: // int64 result.qs = mul64_128s(0, t->parm[1].qs, t->parm[2].qs); break; default: t->interrupt(INT_WRONG_PARAMETERS); result.q = 0; } return result.q; } static uint64_t f_mul_hi_u(CThread * t) { // high part of extended unsigned multiply SNum result; switch (t->operandType) { case 0: // int8 result.q = ((uint32_t)t->parm[1].b * (uint32_t)t->parm[2].b) >> 8; break; case 1: // int16 result.q = ((uint32_t)t->parm[1].s * (uint32_t)t->parm[2].s) >> 16; break; case 2: // int32 result.q = ((uint64_t)t->parm[1].i * (uint64_t)t->parm[2].i) >> 32; break; case 3: // int64 result.q = mul64_128u(0, t->parm[1].q, t->parm[2].q); break; default: t->interrupt(INT_WRONG_PARAMETERS); result.q = 0; } return result.q; } static uint64_t f_rem(CThread * t) { // remainder/modulo of two signed numbers SNum a = t->parm[1]; SNum b = t->parm[2]; //SNum mask = t->parm[3]; SNum result; bool overflow = false; switch (t->operandType) { case 0: // int8 if (b.b == 0 || (a.b == 0x80 && b.bs == -1)) { result.i = 0x80; overflow = true; } else result.is = a.bs % b.bs; break; case 1: // int16 if (b.s == 0 || (a.s == 0x8000 && b.ss == -1)) { result.i = 0x8000; overflow = true; } else result.is = a.ss % b.ss; break; case 2: // int32 if (b.i == 0 || (a.i == sign_f && b.is == -1)) { result.i = sign_f; overflow = true; } else result.is = a.is % b.is; break; case 3: // int64 if (b.q == 0 || (a.q == sign_d && b.qs == int64_t(-1))) { result.q = sign_d; overflow = true; } else result.qs = a.qs % b.qs; break; case 5: // float if (isnan_f(a.i) && isnan_f(b.i)) { // both are NAN result.i = (a.i << 1) > (b.i << 1) ? a.i : b.i; // return the biggest payload } else if (b.i << 1 == 0 || isinf_f(a.i)) { // rem(1,0) or rem(inf,1) result.q = t->makeNan(nan_invalid_rem, 5); } else { result.f = fmodf(a.f, b.f); // normal modulo } break; case 6: // double if (isnan_d(a.q) && isnan_d(b.q)) { // both are NAN result.q = (a.q << 1) > (b.q << 1) ? a.q : b.q; // return the biggest payload } else if (b.q << 1 == 0 || isinf_d(a.q)) { // rem(1,0) or rem(inf,1) result.q = t->makeNan(nan_invalid_rem, 5); } else { result.d = fmod(a.d, b.d); // normal modulo } break; default: t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } //if (overflow&& (mask.i & MSK_OVERFL_SIGN)) t->interrupt(INT_OVERFL_SIGN); // signed integer overflow return result.q; } static uint64_t f_rem_u(CThread * t) { // remainder/modulo of two unsigned numbers if (t->operandType > 4) return f_rem(t); // float types use f_rem SNum a = t->parm[1]; SNum b = t->parm[2]; //SNum mask = t->parm[3]; SNum result; bool overflow = false; switch (t->operandType) { case 0: // int8 if (b.b == 0) { result.i = 0x80; overflow = true; } else result.i = a.b % b.b; break; case 1: // int16 if (b.s == 0) { result.i = 0x8000; overflow = true; } else result.i = a.s % b.s; break; case 2: // int32 if (b.i == 0) { result.i = sign_f; overflow = true; } else result.i = a.i % b.i; break; case 3: // int64 if (b.q == 0) { result.q = sign_d; overflow = true; } else result.q = a.q % b.q; break; default: t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } //if (overflow&& (mask.i & MSK_OVERFL_SIGN)) t->interrupt(INT_OVERFL_SIGN); // signed integer overflow return result.q; } static uint64_t f_min(CThread * t) { // minimum of two signed numbers SNum a = t->parm[1]; SNum b = t->parm[2]; SNum result; int8_t isnan; switch (t->operandType) { case 0: // int8 result.is = a.bs < b.bs ? a.bs : b.bs; break; case 1: // int16 result.is = a.ss < b.ss ? a.ss : b.ss; break; case 2: // int32 result.is = a.is < b.is ? a.is : b.is; break; case 3: // int64 result.qs = a.qs < b.qs ? a.qs : b.qs; break; case 5: // float result.f = a.f < b.f ? a.f : b.f; // check NANs isnan = isnan_f(a.i); // a is nan isnan |= isnan_f(b.i) << 1; // b is nan if (isnan) { // propagate NAN according to the 2019 revision of the IEEE-754 standard if (isnan == 1) result.i = a.i; else if (isnan == 2) result.i = b.i; else result.i = (a.i << 1) > (b.i << 1) ? a.i : b.i; // return the biggest payload } break; case 6: // double result.d = a.d < b.d ? a.d : b.d; // check NANs isnan = isnan_d(a.q); // a is nan isnan |= isnan_d(b.q) << 1; // b is nan if (isnan) { // propagate NAN according to the 2019 revision of the IEEE-754 standard if (isnan == 1) result.q = a.q; else if (isnan == 2) result.q = b.q; else result.q = (a.q << 1) > (b.q << 1) ? a.q : b.q; // return the biggest payload } break; default: t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } return result.q; } static uint64_t f_min_u(CThread * t) { // minimum of two unsigned numbers SNum a = t->parm[1]; SNum b = t->parm[2]; SNum result; switch (t->operandType) { case 0: // int8 result.i = a.b < b.b ? a.b : b.b; break; case 1: // int16 result.i = a.s < b.s ? a.s : b.s; break; case 2: // int32 result.i = a.i < b.i ? a.i : b.i; break; case 3: // int64 result.q = a.q < b.q ? a.q : b.q; break; case 5: // float case 6: // double return f_min(t); default: t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } return result.q; } static uint64_t f_max(CThread * t) { // maximum of two signed numbers SNum a = t->parm[1]; SNum b = t->parm[2]; SNum result; uint8_t isnan; switch (t->operandType) { case 0: // int8 result.is = a.bs > b.bs ? a.bs : b.bs; break; case 1: // int16 result.is = a.ss > b.ss ? a.ss : b.ss; break; case 2: // int32 result.is = a.is > b.is ? a.is : b.is; break; case 3: // int64 result.qs = a.qs > b.qs ? a.qs : b.qs; break; case 5: // float result.f = a.f > b.f ? a.f : b.f; // check NANs isnan = isnan_f(a.i); // a is nan isnan |= isnan_f(b.i) << 1; // b is nan if (isnan) { // propagate NAN according to the 2019 revision of the IEEE-754 standard if (isnan == 1) result.i = a.i; else if (isnan == 2) result.i = b.i; else result.i = (a.i << 1) > (b.i << 1) ? a.i : b.i; // return the biggest payload } break; case 6: // double result.d = a.d > b.d ? a.d : b.d; // check NANs isnan = isnan_d(a.q); // a is nan isnan |= isnan_d(b.q) << 1; // b is nan if (isnan) { // propagate NAN according to the 2019 revision of the IEEE-754 standard if (isnan == 1) result.q = a.q; else if (isnan == 2) result.q = b.q; else result.q = (a.q << 1) > (b.q << 1) ? a.q : b.q; // return the biggest payload } break; default: t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } return result.q; } static uint64_t f_max_u(CThread * t) { // maximum of two unsigned numbers SNum a = t->parm[1]; SNum b = t->parm[2]; SNum result; switch (t->operandType) { case 0: // int8 result.i = a.b > b.b ? a.b : b.b; break; case 1: // int16 result.i = a.s > b.s ? a.s : b.s; break; case 2: // int32 result.i = a.i > b.i ? a.i : b.i; break; case 3: // int64 result.q = a.q > b.q ? a.q : b.q; break; case 5: // float case 6: // double return f_max(t); default: t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } return result.q; } static uint64_t f_and(CThread * t) { // bitwise AND of two numbers return t->parm[1].q & t->parm[2].q; } /* static uint64_t f_and_not(CThread * t) { // a & ~b return t->parm[1].q & ~ t->parm[2].q; }*/ static uint64_t f_or(CThread * t) { // bitwise OR of two numbers return t->parm[1].q | t->parm[2].q; } static uint64_t f_xor(CThread * t) { // bitwise exclusive OR of two numbers return t->parm[1].q ^ t->parm[2].q; } static uint64_t f_select_bits(CThread * t) { // a & c | b & ~c return (t->parm[0].q & t->parm[2].q) | (t->parm[1].q & ~ t->parm[2].q); } static uint64_t f_shift_left(CThread * t) { // integer: a << b, float a * 2^b where b is interpreted as integer SNum a = t->parm[1]; SNum b = t->parm[2]; //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float SNum mask = t->parm[3]; SNum result; uint64_t exponent; switch (t->operandType) { case 0: // int8 result.b = a.b << b.b; if (b.b > 7) result.q = 0; break; case 1: // int16 result.s = a.s << b.s; if (b.b > 15) result.q = 0; break; case 2: // int32 result.i = a.i << b.i; if (b.b > 31) result.q = 0; break; case 3: // int64 result.q = a.q << b.q; if (b.b > 63) result.q = 0; break; case 5: // float if (isnan_f(a.i)) return a.q; // a is nan exponent = a.i >> 23 & 0xFF; // get exponent if (exponent == 0) return a.i & sign_f; // a is zero or subnormal. return zero exponent += b.i; // add integer to exponent if ((int32_t)exponent >= 0xFF) { // overflow result.i = inf_f; } else if ((int32_t)exponent <= 0) { // underflow if ((mask.i & (1<<MSK_UNDERFLOW)) != 0) { // make NAN if exception result.q = t->makeNan(nan_underflow, 5); } else { result.q = 0; } } else { result.i = (a.i & 0x807FFFFF) | uint32_t(exponent) << 23; // insert new exponent } break; case 6: // double if (isnan_d(a.q)) return a.q; // a is nan exponent = a.q >> 52 & 0x7FF; if (exponent == 0) return a.q & sign_d; // a is zero or subnormal. return zero exponent += b.q; // add integer to exponent if ((int64_t)exponent >= 0x7FF) { // overflow result.q = inf_d; //if (mask.i & MSK_OVERFL_FLOAT) t->interrupt(INT_OVERFL_FLOAT); } else if ((int64_t)exponent <= 0) { // underflow if ((mask.i & (1<<MSK_UNDERFLOW)) != 0) { // make NAN if exception result.q = t->makeNan(nan_underflow, 6); } else { result.q = 0; } } else { result.q = (a.q & 0x800FFFFFFFFFFFFF) | (exponent << 52); // insert new exponent } break; default: t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } return result.q; } static uint64_t f_rotate(CThread * t) { // rotate bits left SNum a = t->parm[1]; SNum b = t->parm[2]; //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float SNum result; switch (t->operandType) { case 0: // int8 result.b = a.b << (b.b & 7) | a.b >> (8 - (b.b & 7)); break; case 1: // int16 result.s = a.s << (b.s & 15) | a.s >> (16 - (b.s & 15)); break; case 2: // int32 case 5: // float result.i = a.i << (b.i & 31) | a.i >> (32 - (b.i & 31)); break; case 3: // int64 case 6: // double result.q = a.q << (b.q & 63) | a.q >> (64 - (b.q & 63)); break; default: t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } return result.q; } static uint64_t f_shift_right_s(CThread * t) { // integer only: a >> b, with sign extension SNum a = t->parm[1]; SNum b = t->parm[2]; //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float SNum result; switch (t->operandType) { case 0: // int8 result.bs = a.bs >> b.bs; if (b.b > 7) result.qs = a.bs >> 7; break; case 1: // int16 result.ss = a.ss >> b.ss; if (b.s > 15) result.qs = a.ss >> 15; break; case 2: // int32 result.is = a.is >> b.is; if (b.i > 31) result.qs = a.is >> 31; break; case 3: // int64 result.qs = a.qs >> b.qs; if (b.q > 63) result.qs = a.qs >> 63; break; default: t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } return result.q; } static uint64_t f_shift_right_u(CThread * t) { // integer only: a >> b, with zero extension SNum a = t->parm[1]; SNum b = t->parm[2]; //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float SNum result; switch (t->operandType) { case 0: // int8 result.b = a.b >> b.b; if (b.b > 7) result.q = 0; break; case 1: // int16 result.s = a.s >> b.s; if (b.s > 15) result.q = 0; break; case 2: // int32 result.i = a.i >> b.i; if (b.i > 31) result.q = 0; break; case 3: // int64 result.q = a.q >> b.q; if (b.q > 63) result.q = 0; break; default: t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } return result.q; } static uint64_t f_funnel_shift(CThread * t) { uint64_t shift_count = t->parm[2].q; if ((t->operands[5] & 0x20) && t->operandType > 4) shift_count = t->parm[4].q; // avoid conversion of c to float if (t->vect == 0) { // g.p. registers. shift integers n bytes uint32_t dataSize = dataSizeTableBits[t->operandType]; // operand size, bits uint64_t dataMask = dataSizeMask[t->operandType]; // operand size mask, dataSize bits of 1 if ((shift_count & dataMask) >= dataSize) return 0; // shift count out of range return ((t->parm[0].q & dataMask) >> shift_count | (t->parm[1].q & dataMask) << (dataSize - shift_count)) & dataMask; } else { // vector registers. shift concatenated whole vectors n bytes down // The second operand may be a vector register of incomplete size or a broadcast memory operand. // Both input operands may be the same as the destination register. // The operand size may not match the shift count // The easiest way to handle all these cases is to copy both input vectors into temporary buffers switch (t->operandType) { case 0: *(t->tempBuffer + t->vectorOffset) = t->parm[0].bs; *(t->tempBuffer + t->MaxVectorLength + t->vectorOffset) = t->parm[1].bs; break; case 1: *(uint16_t*)(t->tempBuffer + t->vectorOffset) = t->parm[0].s; *(uint16_t*)(t->tempBuffer + t->MaxVectorLength + t->vectorOffset) = t->parm[1].s; break; case 2: case 5: *(uint32_t*)(t->tempBuffer + t->vectorOffset) = t->parm[0].i; *(uint32_t*)(t->tempBuffer + t->MaxVectorLength + t->vectorOffset) = t->parm[1].i; break; case 3: case 6: *(uint64_t*)(t->tempBuffer + t->vectorOffset) = t->parm[0].q; *(uint64_t*)(t->tempBuffer + t->MaxVectorLength + t->vectorOffset) = t->parm[1].q; break; case 4: case 7: // to do: support 128 bits t->interrupt(INT_WRONG_PARAMETERS); break; } uint32_t dataSizeBytes = dataSizeTable[t->operandType]; // operand size, bits if (t->vectorOffset + dataSizeBytes >= t->vectorLengthR) { // last iteration. Make the result uint8_t rd = t->operands[0]; // destination vector shift_count *= dataSizeBytes; // shift n elements if (shift_count >= t->vectorLengthR) { // shift count out of range. return 0 memset(t->vectors.buf() + t->MaxVectorLength*rd, 0, t->vectorLengthR); } else { // copy upper part of first vector to lower part of destination vector memcpy(t->vectors.buf() + t->MaxVectorLength * rd, t->tempBuffer + shift_count, t->vectorLengthR - shift_count); // copy lower part of second vector to upper part of destination vector memcpy(t->vectors.buf() + t->MaxVectorLength * rd + (t->vectorLengthR - shift_count), t->tempBuffer + t->MaxVectorLength, shift_count); } } t->running = 2; // don't save RD. It is saved by above code return 0; } } static uint64_t f_set_bit(CThread * t) { // a | 1 << b SNum a = t->parm[1]; SNum b = t->parm[2]; //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float SNum result; switch (t->operandType) { case 0: // int8 result.b = a.b; if (b.b < 8) result.b |= 1 << b.b; break; case 1: // int16 result.s = a.s; if (b.s < 16) result.s |= 1 << b.s; break; case 2: // int32 case 5: // float result.i = a.i; if (b.i < 32) result.i |= 1 << b.i; break; case 3: // int64 case 6: // double result.q = a.q; if (b.q < 64) result.q |= (uint64_t)1 << b.q; break; default: t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } return result.q; } static uint64_t f_clear_bit(CThread * t) { // a & ~ (1 << b) SNum a = t->parm[1]; SNum b = t->parm[2]; //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float SNum result; switch (t->operandType) { case 0: // int8 result.b = a.b; if (b.b < 8) result.b &= ~(1 << b.b); break; case 1: // int16 result.s = a.s; if (b.s < 16) result.s &= ~(1 << b.s); break; case 2: // int32 case 5: // float result.i = a.i; if (b.i < 32) result.i &= ~(1 << b.i); break; case 3: // int64 case 6: // double result.q = a.q; if (b.q < 64) result.q &= ~((uint64_t)1 << b.q); break; default: t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } return result.q; } static uint64_t f_toggle_bit(CThread * t) { // a ^ (1 << b) SNum a = t->parm[1]; SNum b = t->parm[2]; //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float SNum result; switch (t->operandType) { case 0: // int8 result.b = a.b; if (b.b < 8) result.b ^= 1 << b.b; break; case 1: // int16 result.s = a.s; if (b.s < 16) result.s ^= 1 << b.s; break; case 2: // int32 case 5: // float result.i = a.i; if (b.i < 32) result.i ^= 1 << b.i; break; case 3: // int64 case 6: // double result.q = a.q; if (b.q < 64) result.q ^= (uint64_t)1 << b.q; break; default: t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } return result.q; } /* static uint64_t f_and_bit(CThread * t) { // clear all bits except one // a & (1 << b) SNum a = t->parm[1]; SNum b = t->parm[2]; //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float SNum result; switch (t->operandType) { case 0: // int8 result.b = a.b; if (b.b < 8) result.b &= 1 << b.b; break; case 1: // int16 result.s = a.s; if (b.s < 16) result.s &= 1 << b.s; break; case 2: // int32 case 5: // float result.i = a.i; if (b.i < 32) result.i &= 1 << b.i; break; case 3: // int64 case 6: // double result.q = a.q; if (b.q < 64) result.q &= (uint64_t)1 << b.q; break; default: t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } return result.q; }*/ static uint64_t f_test_bit(CThread * t) { // test a single bit: a >> b & 1 SNum a = t->parm[1]; SNum b = t->parm[2]; //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float if (t->fInstr->imm2 & 4) { b = t->parm[4]; // avoid immediate operand shifted by imm3 } SNum result; result.q = 0; SNum mask = t->parm[3]; uint8_t fallbackreg = t->operands[2]; // fallback register SNum fallback; // fallback value fallback.q = (fallbackreg & 0x1F) != 0x1F ? t->readRegister(fallbackreg & 0x1F) : 0; switch (t->operandType) { case 0: // int8 if (b.b < 8) result.b = a.b >> b.b & 1; break; case 1: // int16 if (b.s < 16) result.s = a.s >> b.s & 1; break; case 2: // int32 case 5: // float if (b.i < 32) result.i = a.i >> b.i & 1; break; case 3: // int64 case 6: // double if (b.q < 64) result.q = a.q >> b.q & 1; break; default: t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } // get additional options uint8_t options = 0; if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) options = t->pInstr->a.im3; if (options & 4) result.b ^= 1; // invert result if (options & 8) fallback.b ^= 1; // invert fallback if (options & 0x10) mask.b ^= 1; // invert mask switch (options & 3) { case 0: result.b = (mask.b & 1) ? result.b : fallback.b; break; case 1: result.b = mask.b & result.b & fallback.b; break; case 2: result.b = mask.b & (result.b | fallback.b); break; case 3: result.b = mask.b & (result.b ^ fallback.b); } // ignore other bits result.q &= 1; if (options & 0x20) { // get remaining bits from flag or NUMCONTR result.q |= mask.q & ~(uint64_t)1; } // disable normal fallback process t->parm[3].b = 1; return result.q; } static uint64_t f_test_bits_and(CThread * t) { // Test if all the indicated bits are 1 // result = (a & b) == b SNum a = t->parm[1]; SNum b = t->parm[2]; //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float if (t->fInstr->imm2 & 4) { b = t->parm[4]; // avoid immediate operand shifted by imm3 } SNum result; SNum mask = t->parm[3]; uint8_t fallbackreg = t->operands[2]; // fallback register SNum fallback; // fallback value fallback.q = (fallbackreg & 0x1F) != 0x1F ? t->readRegister(fallbackreg & 0x1F) : 0; switch (t->operandType) { case 0: // int8 result.b = (a.b & b.b) == b.b; break; case 1: // int16 result.s = (a.s & b.s) == b.s; break; case 2: // int32 case 5: // float result.i = (a.i & b.i) == b.i; break; case 3: // int64 case 6: // double result.q = (a.q & b.q) == b.q; break; default: t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } // get additional options uint8_t options = 0; if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) options = t->pInstr->a.im3; if (options & 4) result.b ^= 1; // invert result if (options & 8) fallback.b ^= 1; // invert fallback if (options & 0x10) mask.b ^= 1; // invert mask switch (options & 3) { case 0: result.b = (mask.b & 1) ? result.b : fallback.b; break; case 1: result.b &= mask.b & fallback.b; break; case 2: result.b = mask.b & (result.b | fallback.b); break; case 3: result.b = mask.b & (result.b ^ fallback.b); } // ignore other bits result.q &= 1; if (options & 0x20) { // get remaining bits from flag or NUMCONTR result.q |= mask.q & ~(uint64_t)1; } // disable normal fallback process t->parm[3].b = 1; return result.q; } static uint64_t f_test_bits_or(CThread * t) { // Test if at least one of the indicated bits is 1. // result = (a & b) != 0 SNum a = t->parm[1]; SNum b = t->parm[2]; //if (t->fInstr->immSize && t->operandType >= 5) b = t->parm[4]; // avoid conversion of b to float if ((t->operands[5] & 0x20) && t->operandType > 4) b = t->parm[4]; // avoid conversion of b to float if (t->fInstr->imm2 & 4) { b = t->parm[4]; // avoid immediate operand shifted by imm3 } SNum result; SNum mask = t->parm[3]; uint8_t fallbackreg = t->operands[2]; // fallback register SNum fallback; // fallback value fallback.q = (fallbackreg & 0x1F) != 0x1F ? t->readRegister(fallbackreg & 0x1F) : 0; switch (t->operandType) { case 0: // int8 result.b = (a.b & b.b) != 0; break; case 1: // int16 result.s = (a.s & b.s) != 0; break; case 2: // int32 case 5: // float result.i = (a.i & b.i) != 0; break; case 3: // int64 case 6: // double result.q = (a.q & b.q) != 0; break; default: t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } // get additional options uint8_t options = 0; if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) options = t->pInstr->a.im3; if (options & 4) result.b ^= 1; // invert result if (options & 8) fallback.b ^= 1; // invert fallback if (options & 0x10) mask.b ^= 1; // invert mask switch (options & 3) { case 0: result.b = (mask.b & 1) ? result.b : fallback.b; break; case 1: result.b &= mask.b & fallback.b; break; case 2: result.b = mask.b & (result.b | fallback.b); break; case 3: result.b = mask.b & (result.b ^ fallback.b); } // ignore other bits result.q &= 1; if (options & 0x20) { // get remaining bits from flag or NUMCONTR result.q |= mask.q & ~(uint64_t)1; } // disable normal fallback process t->parm[3].b = 1; return result.q; } float mul_add_f(float a, float b, float c) { // calculate a * b + c with extra precision on the intermediate product. #if FMA_AVAILABLE // use FMA instruction for correct precision if available return _mm_cvtss_f32(_mm_fmadd_ss(_mm_load_ss(&a), _mm_load_ss(&b), _mm_load_ss(&c))); #else return float((double)a * (double)b + (double)c); #endif } double mul_add_d(double a, double b, double c) { // calculate a * b + c with extra precision on the intermediate product. #if FMA_AVAILABLE // use FMA instruction for correct precision if available return _mm_cvtsd_f64(_mm_fmadd_sd(_mm_load_sd(&a), _mm_load_sd(&b), _mm_load_sd(&c))); #else // calculate a*b-c with extended precision. This code is not as good as the real FMA instruction SNum aa, bb, ahi, bhi, alo, blo; uint64_t upper_mask = 0xFFFFFFFFF8000000; aa.d = a; bb.d = b; ahi.q = aa.q & upper_mask; // split into high and low parts alo.d = a - ahi.d; bhi.q = bb.q & upper_mask; blo.d = b - bhi.d; double r1 = ahi.d * bhi.d; // this product is exact // perhaps a different order of addition is better here in some cases? double r2 = r1 + c; // add c to high product double r3 = r2 + (ahi.d * blo.d + bhi.d * alo.d) + alo.d * blo.d; // add rest of product return r3; #endif } uint64_t f_mul_add(CThread * t) { // a * b + c, calculated with extra precision on the intermediate product SNum a = t->parm[0]; SNum b = t->parm[1]; SNum c = t->parm[2]; if ((t->fInstr->imm2 & 4) && t->operandType < 5) { c = t->parm[4]; // avoid immediate operand shifted by imm3 } if (t->op == II_MUL_ADD2) { SNum t = b; b = c; c = t; // swap last two operands } uint32_t mask = t->parm[3].i; SNum result; bool roundingMode = (mask & (3 << MSKI_ROUNDING)) != 0; // non-standard rounding mode bool detectExceptions = (mask & (0xF << MSKI_EXCEPTIONS)) != 0; // make NAN if exceptions // get sign options uint8_t options = 0; if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) options = t->pInstr->a.im3; //else if (t->fInstr->tmplate == 0xA) options = (mask >> MSKI_OPTIONS) & 0xF; if (t->vect == 2) { // odd vector element options >>= 1; } bool unsignedOverflow = false; bool signedOverflow = false; uint8_t operandType = t->operandType; switch (operandType) { case 0: // int8 a.is = a.bs; b.is = b.bs; // sign extend to avoid overflow during sign change if (options & 1) a.is = -a.is; if (options & 4) c.is = -c.is; result.is = a.is * b.is + c.bs; signedOverflow = result.is != result.bs; unsignedOverflow = result.i != result.b; break; case 1: // int16 a.is = a.ss; b.is = b.ss; // sign extend to avoid overflow during sign change if (options & 1) a.is = -a.is; if (options & 4) c.is = -c.is; result.is = a.is * b.is + c.ss; signedOverflow = result.is != result.ss; unsignedOverflow = result.i != result.s; break; case 2: // int32 a.qs = a.is; b.qs = b.is; // sign extend to avoid overflow during sign change if (options & 1) a.qs = -a.qs; if (options & 4) c.qs = -c.qs; result.qs = a.qs * b.qs + c.is; signedOverflow = result.qs != result.is; unsignedOverflow = result.q != result.i; break; case 3: // int64 if (options & 1) { if (a.q == sign_d) signedOverflow = true; a.qs = -a.qs; } if (options & 4) { if (b.q == sign_d) signedOverflow = true; c.qs = -c.qs; } result.qs = a.qs * b.qs + c.qs; /* if (mask.b & MSK_OVERFL_UNSIGN) { // check for unsigned overflow if (fabs((double)a.q + (double)b.q * (double)c.q - (double)result.q) > 1.E8) unsignedOverflow = true; } if (mask.b & MSK_OVERFL_SIGN) { // check for signed overflow if (fabs((double)a.qs + (double)b.qs * (double)c.qs - (double)result.qs) > 1.E8) signedOverflow = true; } */ break; case 5: // float if (options & 1) a.f = -a.f; if (options & 4) c.f = -c.f; if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); if (detectExceptions) clearExceptionFlags(); // clear previous exceptions result.f = mul_add_f(a.f, b.f, c.f); // do the calculation if (isnan_or_inf_f(result.i)) { // check for overflow and nan uint32_t nans = 0; // biggest NAN uint32_t infs = 0; // count INF inputs for (int i = 0; i < 3; i++) { // loop through input operands uint32_t tmp = t->parm[i].i & nsign_f; // ignore sign bit if (tmp == inf_f) infs++; // is INF else if (tmp > nans) nans = tmp; // get the biggest if there are multiple NANs } if (nans) result.i = nans; // there is at least one NAN. return the biggest (sign bit is lost) else if (isnan_f(result.i)) { // result is NAN, but no input is NAN. This can be 0*INF or INF-INF if ((a.i << 1 == 0 || b.i << 1 == 0) && infs) result.q = t->makeNan(nan_invalid_0mulinf, operandType); else result.q = t->makeNan(nan_invalid_sub, operandType); } } else if (detectExceptions) { uint32_t x = getExceptionFlags(); // read exceptions if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_mul, operandType); else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); } if (roundingMode) setRoundingMode(0); // reset rounding mode break; case 6: // double if (options & 1) a.d = -a.d; if (options & 4) c.d = -c.d; if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); if (detectExceptions) clearExceptionFlags(); // clear previous exceptions result.d = mul_add_d(a.d, b.d, c.d); // do the calculation if (isnan_or_inf_d(result.q)) { // check for overflow and nan uint64_t nans = 0; // biggest NAN uint32_t infs = 0; // count INF inputs for (int i = 0; i < 3; i++) { // loop through input operands uint64_t tmp = t->parm[i].q & nsign_d; // ignore sign bit if (tmp == inf_d) infs++; // is INF else if (tmp > nans) nans = tmp; // get the biggest if there are multiple NANs } if (nans) result.q = nans; // there is at least one NAN. return the biggest (sign bit is lost) else if (isnan_d(result.q)) { // result is NAN, but no input is NAN. This can be 0*INF or INF-INF if ((a.q << 1 == 0 || b.q << 1 == 0) && infs) result.q = t->makeNan(nan_invalid_0mulinf, operandType); else result.q = t->makeNan(nan_invalid_sub, operandType); } } else if (detectExceptions) { uint32_t x = getExceptionFlags(); // read exceptions if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) result.q = t->makeNan(nan_overflow_mul, operandType); else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) result.q = t->makeNan(nan_underflow, operandType); else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) result.q = t->makeNan(nan_inexact, operandType); } if (roundingMode) setRoundingMode(0); // reset rounding mode break; default: t->interrupt(INT_WRONG_PARAMETERS); result.i = 0; } return result.q; } static uint64_t f_add_add(CThread * t) { // a + b + c, calculated with extra precision on the intermediate sum int i, j; SNum parm[3]; // copy parameters so that we change sign and reorder them without changing original constant for (i = 0; i < 3; i++) parm[i] = t->parm[i]; if ((t->fInstr->imm2 & 4) && t->operandType < 5) { parm[2] = t->parm[4]; // avoid immediate operand shifted by imm3 } uint32_t mask = t->parm[3].i; bool roundingMode = (mask & (3 << MSKI_ROUNDING)) != 0; // non-standard rounding mode bool detectExceptions = (mask & (0xF << MSKI_EXCEPTIONS)) != 0; // make NAN if exceptions uint8_t operandType = t->operandType; SNum sumS, sumU; // signed and unsigned sums SNum nanS; // combined nan's // get sign options uint8_t options = 0; if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) options = t->pInstr->a.im3; //else if (t->fInstr->tmplate == 0xA) options = uint8_t(mask >> MSKI_OPTIONS); uint8_t signedOverflow = 0; uint8_t unsignedOverflow = 0; //bool parmInf = false; sumS.q = sumU.q = 0; uint32_t temp1; uint64_t temp2; switch (operandType) { case 0: // int8 for (i = 0; i < 3; i++) { // loop through operands if (options & 1) { // change sign if (parm[i].b == 0x80) signedOverflow ^= 1; if (parm[i].b != 0) unsignedOverflow ^= 1; parm[i].is = - parm[i].is; } options >>= 1; // get next option bit sumU.i += parm[i].b; // unsigned sum sumS.is += parm[i].bs; // sign-extended sum } if (sumU.b != sumU.i) unsignedOverflow ^= 1; if (sumS.bs != sumS.is) signedOverflow ^= 1; break; case 1: // int16 for (i = 0; i < 3; i++) { // loop through operands if (options & 1) { // change sign if (parm[i].s == 0x8000) signedOverflow ^= 1; if (parm[i].s != 0) unsignedOverflow ^= 1; parm[i].is = - parm[i].is; } options >>= 1; // get next option bit sumU.i += parm[i].s; // unsigned sum sumS.is += parm[i].ss; // sign-extended sum } if (sumU.s != sumU.i) unsignedOverflow ^= 1; if (sumS.ss != sumS.is) signedOverflow ^= 1; break; case 2: // int32 for (i = 0; i < 3; i++) { // loop through operands if (options & 1) { // change sign if (parm[i].i == sign_f) signedOverflow ^= 1; if (parm[i].i != 0) unsignedOverflow ^= 1; parm[i].is = - parm[i].is; } options >>= 1; // get next option bit sumU.q += parm[i].i; // unsigned sum sumS.qs += parm[i].is; // sign-extended sum } if (sumU.i != sumU.q) unsignedOverflow ^= 1; if (sumS.is != sumS.qs) signedOverflow ^= 1; break; case 3: // int64 for (i = 0; i < 3; i++) { // loop through operands if (options & 1) { // change sign if (parm[i].q == sign_d) signedOverflow ^= 1; if (parm[i].q != 0) unsignedOverflow ^= 1; parm[i].qs = - parm[i].qs; } options >>= 1; // get next option bit uint64_t a = parm[i].q; uint64_t b = sumU.q; sumU.q = a + b; // sum if (sumU.q < a) unsignedOverflow ^= 1; if (int64_t(~(a ^ b) & (a ^ sumU.q)) < 0) signedOverflow ^= 1; } break; case 5: // float sumS.is = -1; nanS.i = 0; for (i = 0; i < 3; i++) { // loop through operands if (options & 1) parm[i].f = -parm[i].f; // change sign // find the smallest of the three operands if ((parm[i].i << 1) < sumS.i) { sumS.i = (parm[i].i << 1); j = i; } // find NANs and infs temp1 = parm[i].i & nsign_f; // ignore sign bit if (temp1 > nanS.i) nanS.i = temp1; // find the biggest NAN //if (temp1 == inf_f) parmInf = true; // OR of all INFs options >>= 1; // next option bit } if (nanS.i > inf_f) return nanS.i; // result is NAN // get the smallest operand last to minimize loss of precision if the two biggest operands have opposite signs temp1 = parm[j].i; parm[j].i = parm[2].i; parm[2].i = temp1; if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); if (detectExceptions) clearExceptionFlags(); // clear previous exceptions // calculate sum sumU.f = (parm[0].f + parm[1].f) + parm[2].f; if (isnan_f(sumU.i)) { // the result is NAN but neither input is NAN. This must be INF-INF sumU.q = t->makeNan(nan_invalid_sub, operandType); } if (detectExceptions) { uint32_t x = getExceptionFlags(); // read exceptions if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) sumU.q = t->makeNan(nan_overflow_add, operandType); else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) sumU.q = t->makeNan(nan_underflow, operandType); else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) sumU.q = t->makeNan(nan_inexact, operandType); } if (roundingMode) setRoundingMode(0); // reset rounding mode break; case 6: // double sumS.qs = -1; nanS.q = 0; for (i = 0; i < 3; i++) { // loop through operands if (options & 1) parm[i].d = -parm[i].d; // change sign // find the smallest of the three operands if ((parm[i].q << 1) < sumS.q) { sumS.q = (parm[i].q << 1); j = i; } // find NANs and infs temp2 = parm[i].q & nsign_d; // ignore sign bit if (temp2 > nanS.q) nanS.q = temp2; // find the biggest NAN //if (temp2 == inf_d) parmInf = true; // OR of all INFs options >>= 1; // next option bit } if (nanS.q > inf_d) return nanS.q; // result is NAN // get the smallest operand last to minimize loss of precision if // the two biggest operands have opposite signs temp2 = parm[j].q; parm[j].q = parm[2].q; parm[2].q = temp2; if (roundingMode) setRoundingMode(mask >> MSKI_ROUNDING); if (detectExceptions) clearExceptionFlags(); // clear previous exceptions // calculate sum sumU.d = (parm[0].d + parm[1].d) + parm[2].d; if (isnan_d(sumU.q)) { // the result is NAN but neither input is NAN. This must be INF-INF sumU.q = t->makeNan(nan_invalid_sub, operandType); } if (detectExceptions) { uint32_t x = getExceptionFlags(); // read exceptions if ((mask & (1<<MSK_OVERFLOW)) && (x & 8)) sumU.q = t->makeNan(nan_overflow_add, operandType); else if ((mask & (1<<MSK_UNDERFLOW)) && (x & 0x10)) sumU.q = t->makeNan(nan_underflow, operandType); else if ((mask & (1<<MSK_INEXACT)) && (x & 0x20)) sumU.q = t->makeNan(nan_inexact, operandType); } if (roundingMode) setRoundingMode(0); // reset rounding mode break; default: t->interrupt(INT_WRONG_PARAMETERS); sumU.i = 0; } return sumU.q; } uint64_t f_add_h(CThread * t) { // add two numbers, float16 // (rounding mode not supported) SNum a = t->parm[1]; SNum b = t->parm[2]; uint32_t mask = t->parm[3].i; uint16_t result; if (t->fInstr->immSize == 1) b.s = float2half(b.bs); // convert 8-bit integer to float16 if (t->operandType != 1) t->interrupt(INT_WRONG_PARAMETERS); if (isnan_h(a.s) && isnan_h(b.s)) { // both are NAN result = (a.s << 1) > (b.s << 1) ? a.s : b.s; // return the biggest payload } if (mask & (1<<MSK_INEXACT)) clearExceptionFlags(); // clear previous exceptions // the exact result is obtained with double precision. This makes sure we don't get double rounding errors double resultd = (double)half2float(a.s) + (double)half2float(b.s); // calculate with single precision result = double2half(resultd); // check for exceptions if ((mask & (1<<MSK_OVERFLOW)) && isinf_h(result) && !isinf_h(a.s) && !isinf_h(b.s)) { // overflow result = (uint16_t)t->makeNan(nan_overflow_add, 1); result |= (a.s ^ b.s) & 0x8000; // get the sign } else if ((mask & (1<<MSK_UNDERFLOW)) && is_zero_or_subnormal_h(result) && resultd != 0.0) { // underflow result = (uint16_t)t->makeNan(nan_underflow, 1) | (result & 0x8000); // signed NAN } else if ((mask & (1<<MSK_INEXACT)) && (half2float(result) != resultd || (getExceptionFlags() & 0x20)) != 0) { // inexact result = (uint16_t)t->makeNan(nan_inexact, 1); } uint8_t roundingMode = mask >> MSKI_ROUNDING & 3; if (roundingMode != 0 && !isnan_or_inf_h(result)) { double r = half2float(result); // non-standard rounding mode switch (roundingMode) { case 1: // down if (r > resultd && result != 0xFBFF) { if (result == 0) result = 0x8001; else if ((int16_t)result > 0) result--; else result++; } break; case 2: // up if (r < resultd && result != 0x7BFF) { if ((int16_t)result > 0) result++; else result--; } break; case 3: // towards zero if ((int16_t)result > 0 && r > resultd) result--; else if ((int16_t)result < 0 && r < resultd) result--; } } t->returnType =0x118; return result; } uint64_t f_sub_h(CThread * t) { // subtract two numbers, float16 // (rounding mode not supported) SNum a = t->parm[1]; SNum b = t->parm[2]; uint32_t mask = t->parm[3].i; uint16_t result; if (t->fInstr->immSize == 1) b.s = float2half(b.bs); // convert 8-bit integer to float16 if (t->operandType != 1) t->interrupt(INT_WRONG_PARAMETERS); if (isnan_h(a.s) && isnan_h(b.s)) { // both are NAN result = (a.s << 1) > (b.s << 1) ? a.s : b.s; // return the biggest payload } if (mask & (1<<MSK_INEXACT)) clearExceptionFlags(); // clear previous exceptions // the exact result is obtained with double precision. This makes sure we don't get double rounding errors double resultd = (double)half2float(a.s) - (double)half2float(b.s); // calculate with single precision result = double2half(resultd); // check for exceptions if ((mask & (1<<MSK_OVERFLOW)) && isinf_h(result) && !isinf_h(a.s) && !isinf_h(b.s)) { // overflow result = (uint16_t)t->makeNan(nan_overflow_add, 1); result |= (a.s ^ b.s) & 0x8000; // get the sign } else if ((mask & (1<<MSK_UNDERFLOW)) && is_zero_or_subnormal_h(result) && resultd != 0.0) { // underflow result = (uint16_t)t->makeNan(nan_underflow, 1) | (result & 0x8000); // signed NAN } else if ((mask & (1<<MSK_INEXACT)) && (half2float(result) != resultd || (getExceptionFlags() & 0x20)) != 0) { // inexact result = (uint16_t)t->makeNan(nan_inexact, 1); } uint8_t roundingMode = mask >> MSKI_ROUNDING & 3; if (roundingMode != 0 && !isnan_or_inf_h(result)) { double r = half2float(result); // non-standard rounding mode switch (roundingMode) { case 1: // down if (r > resultd && result != 0xFBFF) { if (result == 0) result = 0x8001; else if ((int16_t)result > 0) result--; else result++; } break; case 2: // up if (r < resultd && result != 0x7BFF) { if ((int16_t)result > 0) result++; else result--; } break; case 3: // towards zero if ((int16_t)result > 0 && r > resultd) result--; else if ((int16_t)result < 0 && r < resultd) result--; } } t->returnType =0x118; return result; } uint64_t f_mul_h(CThread * t) { // multiply two numbers, float16 SNum a = t->parm[1]; SNum b = t->parm[2]; uint32_t mask = t->parm[3].i; uint16_t result; if (t->fInstr->immSize == 1) b.s = float2half(b.bs); // convert 8-bit integer to float16 if (t->operandType != 1) t->interrupt(INT_WRONG_PARAMETERS); if (isnan_h(a.s) && isnan_h(b.s)) { // both are NAN result = (a.s << 1) > (b.s << 1) ? a.s : b.s; // return the biggest payload } if (mask & (1<<MSK_INEXACT)) clearExceptionFlags(); // clear previous exceptions // single precision is sufficient to get an exact multiplication result float resultf = half2float(a.s) * half2float(b.s); // calculate with single precision result = float2half(resultf); // check for exceptions if ((mask & (1<<MSK_OVERFLOW)) && isinf_h(result) && !isinf_h(a.s) && !isinf_h(b.s)) { // overflow result = (uint16_t)t->makeNan(nan_overflow_mul, 1); result |= (a.s ^ b.s) & 0x8000; // get the sign } else if ((mask & (1<<MSK_UNDERFLOW)) && is_zero_or_subnormal_h(result) && resultf != 0.0f) { // underflow result = (uint16_t)t->makeNan(nan_underflow, 1) | (result & 0x8000); // signed NAN } else if ((mask & (1<<MSK_INEXACT)) && (half2float(result) != resultf || (getExceptionFlags() & 0x20)) != 0) { // inexact result = (uint16_t)t->makeNan(nan_inexact, 1); } uint8_t roundingMode = mask >> MSKI_ROUNDING & 3; if (roundingMode != 0 && !isnan_or_inf_h(result)) { // non-standard rounding mode float r = half2float(result); switch (roundingMode) { case 1: // down if (r > resultf && result != 0xFBFF) { if (result == 0) result = 0x8001; else if ((int16_t)result > 0) result--; else result++; } break; case 2: // up if (r < resultf && result != 0x7BFF) { if ((int16_t)result > 0) result++; else result--; } break; case 3: // towards zero if ((int16_t)result > 0 && r > resultf) result--; else if ((int16_t)result < 0 && r < resultf) result--; } } t->returnType =0x118; return result; } uint64_t f_mul_add_h(CThread * t) { // a + b * c, float16 SNum a = t->parm[0]; SNum b = t->parm[1]; SNum c = t->parm[2]; uint32_t mask = t->parm[3].i; if (t->fInstr->imm2 & 4) c = t->parm[4]; // avoid immediate operand shifted by imm3 if (t->fInstr->immSize == 1) c.s = float2half(c.bs); // convert 8-bit integer to float16 // get sign options uint8_t options = 0; if (t->fInstr->tmplate == 0xE && (t->fInstr->imm2 & 2)) options = t->pInstr->a.im3; //else if (t->fInstr->tmplate == 0xA) options = (mask >> MSKI_OPTIONS) & 0xF; if (t->vect == 2) { // odd vector element options >>= 1; } if (t->operandType != 1) t->interrupt(INT_WRONG_PARAMETERS); if (options & 1) a.s ^= 0x8000; // adjust sign if (options & 4) b.s ^= 0x8000; if (mask & (1<<MSK_INEXACT)) clearExceptionFlags(); // clear previous exceptions double resultd = (double)half2float(a.s) + (double)half2float(b.s) * (double)half2float(c.s); uint16_t result = double2half(resultd); uint32_t nans = 0; bool parmInf = false; if (isnan_or_inf_h(result)) { // check for overflow and nan for (int i = 0; i < 3; i++) { // loop through input operands uint32_t tmp = t->parm[i].s & 0x7FFF; // ignore sign bit if (tmp > nans) nans = tmp; // get the biggest if there are multiple NANs if (tmp == inf_h) parmInf = true; // OR of all INFs } if (nans > inf_h) return nans; // there is at least one NAN. return the biggest (sign bit is lost) else if (isnan_h(result)) { // result is NAN, but no input is NAN. This can be 0*INF or INF-INF if ((a.s << 1 == 0 || b.s << 1 == 0) && parmInf) result = (uint16_t)t->makeNan(nan_invalid_0mulinf, 1); else result = (uint16_t)t->makeNan(nan_invalid_sub, 1); } } else if ((mask & (1<<MSK_OVERFLOW)) && isinf_h(result) && !parmInf) result = (uint16_t)t->makeNan(nan_overflow_mul, 1); else if ((mask & (1<<MSK_UNDERFLOW)) && is_zero_or_subnormal_h(result) && resultd != 0.0) result = (uint16_t)t->makeNan(nan_underflow, 1); else if ((mask & (1<<MSK_INEXACT)) && ((getExceptionFlags() & 0x20) != 0 || half2float(result) != resultd)) result = (uint16_t)t->makeNan(nan_inexact, 1); uint8_t roundingMode = mask >> MSKI_ROUNDING & 3; if (roundingMode != 0 && !isnan_or_inf_h(result)) { float r = half2float(result); // non-standard rounding mode switch (roundingMode) { case 1: // down if (r > resultd && result != 0xFBFF) { if (result == 0) result = 0x8001; else if ((int16_t)result > 0) result--; else result++; } break; case 2: // up if (r < resultd && result != 0x7BFF) { if ((int16_t)result > 0) result++; else result--; } break; case 3: // towards zero if ((int16_t)result > 0 && r > resultd) result--; else if ((int16_t)result < 0 && r < resultd) result--; } } t->returnType = 0x118; return result; } // Tables of function pointers // multiformat instructions PFunc funcTab1[64] = { f_nop, f_store, f_move, f_prefetch, f_sign_extend, f_sign_extend_add, 0, f_compare, // 0 - 7 f_add, f_sub, f_sub_rev, f_mul, f_mul_hi, f_mul_hi_u, f_div, f_div_u, // 8 - 15 f_div_rev, 0, f_rem, f_rem_u, f_min, f_min_u, f_max, f_max_u, // 16 - 23 0, 0, f_and, f_or, f_xor, 0, 0, 0, // 24 - 31 f_shift_left, f_rotate, f_shift_right_s, f_shift_right_u, f_clear_bit, f_set_bit, f_toggle_bit, f_test_bit, // 32 - 39 f_test_bits_and, f_test_bits_or, 0, 0, f_add_h, f_sub_h, f_mul_h, 0, // 40 - 47 f_mul_add_h, f_mul_add, f_mul_add, f_add_add, f_select_bits, f_funnel_shift, 0, 0, // 48 - 55 0, 0, 0, 0, 0, 0, 0, 0 // 56 - 63 };
Go to most recent revision | Compare with Previous | Blame | View Log