URL
https://opencores.org/ocsvn/or1k_old/or1k_old/trunk
Subversion Repositories or1k_old
Compare Revisions
- This comparison shows the changes necessary to convert path
/or1k_old/trunk/rc203soc/sw/uClinux/arch/i386/math-emu
- from Rev 1765 to Rev 1782
- ↔ Reverse comparison
Rev 1765 → Rev 1782
/reg_add_sub.c
0,0 → 1,318
/*---------------------------------------------------------------------------+ |
| reg_add_sub.c | |
| | |
| Functions to add or subtract two registers and put the result in a third. | |
| | |
| Copyright (C) 1992,1993 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
/*---------------------------------------------------------------------------+ |
| For each function, the destination may be any FPU_REG, including one of | |
| the source FPU_REGs. | |
+---------------------------------------------------------------------------*/ |
|
#include "exception.h" |
#include "reg_constant.h" |
#include "fpu_emu.h" |
#include "control_w.h" |
#include "fpu_system.h" |
|
|
int reg_add(FPU_REG const *a, FPU_REG const *b, FPU_REG *dest, int control_w) |
{ |
char saved_sign = dest->sign; |
int diff; |
|
if ( !(a->tag | b->tag) ) |
{ |
/* Both registers are valid */ |
if (!(a->sign ^ b->sign)) |
{ |
/* signs are the same */ |
dest->sign = a->sign; |
if ( reg_u_add(a, b, dest, control_w) ) |
{ |
dest->sign = saved_sign; |
return 1; |
} |
return 0; |
} |
|
/* The signs are different, so do a subtraction */ |
diff = a->exp - b->exp; |
if (!diff) |
{ |
diff = a->sigh - b->sigh; /* Works only if ms bits are identical */ |
if (!diff) |
{ |
diff = a->sigl > b->sigl; |
if (!diff) |
diff = -(a->sigl < b->sigl); |
} |
} |
|
if (diff > 0) |
{ |
dest->sign = a->sign; |
if ( reg_u_sub(a, b, dest, control_w) ) |
{ |
dest->sign = saved_sign; |
return 1; |
} |
} |
else if ( diff == 0 ) |
{ |
#ifdef DENORM_OPERAND |
if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) && |
denormal_operand() ) |
return 1; |
#endif DENORM_OPERAND |
reg_move(&CONST_Z, dest); |
/* sign depends upon rounding mode */ |
dest->sign = ((control_w & CW_RC) != RC_DOWN) |
? SIGN_POS : SIGN_NEG; |
} |
else |
{ |
dest->sign = b->sign; |
if ( reg_u_sub(b, a, dest, control_w) ) |
{ |
dest->sign = saved_sign; |
return 1; |
} |
} |
return 0; |
} |
else |
{ |
if ( (a->tag == TW_NaN) || (b->tag == TW_NaN) ) |
{ return real_2op_NaN(a, b, dest); } |
else if (a->tag == TW_Zero) |
{ |
if (b->tag == TW_Zero) |
{ |
char different_signs = a->sign ^ b->sign; |
/* Both are zero, result will be zero. */ |
reg_move(a, dest); |
if (different_signs) |
{ |
/* Signs are different. */ |
/* Sign of answer depends upon rounding mode. */ |
dest->sign = ((control_w & CW_RC) != RC_DOWN) |
? SIGN_POS : SIGN_NEG; |
} |
} |
else |
{ |
#ifdef DENORM_OPERAND |
if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) && |
denormal_operand() ) |
return 1; |
#endif DENORM_OPERAND |
reg_move(b, dest); |
} |
return 0; |
} |
else if (b->tag == TW_Zero) |
{ |
#ifdef DENORM_OPERAND |
if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) && |
denormal_operand() ) |
return 1; |
#endif DENORM_OPERAND |
reg_move(a, dest); return 0; |
} |
else if (a->tag == TW_Infinity) |
{ |
if (b->tag != TW_Infinity) |
{ |
#ifdef DENORM_OPERAND |
if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) && |
denormal_operand() ) |
return 1; |
#endif DENORM_OPERAND |
reg_move(a, dest); return 0; |
} |
if (a->sign == b->sign) |
{ |
/* They are both + or - infinity */ |
reg_move(a, dest); return 0; |
} |
return arith_invalid(dest); /* Infinity-Infinity is undefined. */ |
} |
else if (b->tag == TW_Infinity) |
{ |
#ifdef DENORM_OPERAND |
if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) && |
denormal_operand() ) |
return 1; |
#endif DENORM_OPERAND |
reg_move(b, dest); return 0; |
} |
} |
#ifdef PARANOID |
EXCEPTION(EX_INTERNAL|0x101); |
#endif |
return 1; |
} |
|
|
/* Subtract b from a. (a-b) -> dest */ |
int reg_sub(FPU_REG const *a, FPU_REG const *b, FPU_REG *dest, int control_w) |
{ |
char saved_sign = dest->sign; |
int diff; |
|
if ( !(a->tag | b->tag) ) |
{ |
/* Both registers are valid */ |
diff = a->exp - b->exp; |
if (!diff) |
{ |
diff = a->sigh - b->sigh; /* Works only if ms bits are identical */ |
if (!diff) |
{ |
diff = a->sigl > b->sigl; |
if (!diff) |
diff = -(a->sigl < b->sigl); |
} |
} |
|
switch (a->sign*2 + b->sign) |
{ |
case 0: /* P - P */ |
case 3: /* N - N */ |
if (diff > 0) |
{ |
/* |a| > |b| */ |
dest->sign = a->sign; |
if ( reg_u_sub(a, b, dest, control_w) ) |
{ |
dest->sign = saved_sign; |
return 1; |
} |
return 0; |
} |
else if ( diff == 0 ) |
{ |
#ifdef DENORM_OPERAND |
if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) && |
denormal_operand() ) |
return 1; |
#endif DENORM_OPERAND |
reg_move(&CONST_Z, dest); |
/* sign depends upon rounding mode */ |
dest->sign = ((control_w & CW_RC) != RC_DOWN) |
? SIGN_POS : SIGN_NEG; |
} |
else |
{ |
dest->sign = a->sign ^ SIGN_POS^SIGN_NEG; |
if ( reg_u_sub(b, a, dest, control_w) ) |
{ |
dest->sign = saved_sign; |
return 1; |
} |
} |
break; |
case 1: /* P - N */ |
dest->sign = SIGN_POS; |
if ( reg_u_add(a, b, dest, control_w) ) |
{ |
dest->sign = saved_sign; |
return 1; |
} |
break; |
case 2: /* N - P */ |
dest->sign = SIGN_NEG; |
if ( reg_u_add(a, b, dest, control_w) ) |
{ |
dest->sign = saved_sign; |
return 1; |
} |
break; |
} |
return 0; |
} |
else |
{ |
if ( (a->tag == TW_NaN) || (b->tag == TW_NaN) ) |
{ return real_2op_NaN(b, a, dest); } |
else if (b->tag == TW_Zero) |
{ |
if (a->tag == TW_Zero) |
{ |
char same_signs = !(a->sign ^ b->sign); |
/* Both are zero, result will be zero. */ |
reg_move(a, dest); /* Answer for different signs. */ |
if (same_signs) |
{ |
/* Sign depends upon rounding mode */ |
dest->sign = ((control_w & CW_RC) != RC_DOWN) |
? SIGN_POS : SIGN_NEG; |
} |
} |
else |
{ |
#ifdef DENORM_OPERAND |
if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) && |
denormal_operand() ) |
return 1; |
#endif DENORM_OPERAND |
reg_move(a, dest); |
} |
return 0; |
} |
else if (a->tag == TW_Zero) |
{ |
#ifdef DENORM_OPERAND |
if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) && |
denormal_operand() ) |
return 1; |
#endif DENORM_OPERAND |
reg_move(b, dest); |
dest->sign ^= SIGN_POS^SIGN_NEG; |
return 0; |
} |
else if (a->tag == TW_Infinity) |
{ |
if (b->tag != TW_Infinity) |
{ |
#ifdef DENORM_OPERAND |
if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) && |
denormal_operand() ) |
return 1; |
#endif DENORM_OPERAND |
reg_move(a, dest); return 0; |
} |
/* Both args are Infinity */ |
if (a->sign == b->sign) |
{ |
/* Infinity-Infinity is undefined. */ |
return arith_invalid(dest); |
} |
reg_move(a, dest); |
return 0; |
} |
else if (b->tag == TW_Infinity) |
{ |
#ifdef DENORM_OPERAND |
if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) && |
denormal_operand() ) |
return 1; |
#endif DENORM_OPERAND |
reg_move(b, dest); |
dest->sign ^= SIGN_POS^SIGN_NEG; |
return 0; |
} |
} |
#ifdef PARANOID |
EXCEPTION(EX_INTERNAL|0x110); |
#endif |
return 1; |
} |
|
/reg_u_mul.S
0,0 → 1,160
.file "reg_u_mul.S" |
/*---------------------------------------------------------------------------+ |
| reg_u_mul.S | |
| | |
| Core multiplication routine | |
| | |
| Copyright (C) 1992,1993,1995 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
/*---------------------------------------------------------------------------+ |
| Basic multiplication routine. | |
| Does not check the resulting exponent for overflow/underflow | |
| | |
| reg_u_mul(FPU_REG *a, FPU_REG *b, FPU_REG *c, unsigned int cw); | |
| | |
| Internal working is at approx 128 bits. | |
| Result is rounded to nearest 53 or 64 bits, using "nearest or even". | |
+---------------------------------------------------------------------------*/ |
|
#include "exception.h" |
#include "fpu_emu.h" |
#include "control_w.h" |
|
|
|
#ifndef NON_REENTRANT_FPU |
/* Local storage on the stack: */ |
#define FPU_accum_0 -4(%ebp) /* ms word */ |
#define FPU_accum_1 -8(%ebp) |
|
#else |
/* Local storage in a static area: */ |
.data |
.align 4,0 |
FPU_accum_0: |
.long 0 |
FPU_accum_1: |
.long 0 |
#endif NON_REENTRANT_FPU |
|
|
.text |
ENTRY(reg_u_mul) |
pushl %ebp |
movl %esp,%ebp |
#ifndef NON_REENTRANT_FPU |
subl $8,%esp |
#endif NON_REENTRANT_FPU |
|
pushl %esi |
pushl %edi |
pushl %ebx |
|
movl PARAM1,%esi |
movl PARAM2,%edi |
|
#ifdef PARANOID |
testl $0x80000000,SIGH(%esi) |
jz L_bugged |
testl $0x80000000,SIGH(%edi) |
jz L_bugged |
#endif PARANOID |
|
#ifdef DENORM_OPERAND |
movl EXP(%esi),%eax |
cmpl EXP_UNDER,%eax |
jg xOp1_not_denorm |
|
call SYMBOL_NAME(denormal_operand) |
orl %eax,%eax |
jnz fpu_Arith_exit |
|
xOp1_not_denorm: |
movl EXP(%edi),%eax |
cmpl EXP_UNDER,%eax |
jg xOp2_not_denorm |
|
call SYMBOL_NAME(denormal_operand) |
orl %eax,%eax |
jnz fpu_Arith_exit |
|
xOp2_not_denorm: |
#endif DENORM_OPERAND |
|
xorl %ecx,%ecx |
xorl %ebx,%ebx |
|
movl SIGL(%esi),%eax |
mull SIGL(%edi) |
movl %eax,FPU_accum_0 |
movl %edx,FPU_accum_1 |
|
movl SIGL(%esi),%eax |
mull SIGH(%edi) |
addl %eax,FPU_accum_1 |
adcl %edx,%ebx |
/* adcl $0,%ecx // overflow here is not possible */ |
|
movl SIGH(%esi),%eax |
mull SIGL(%edi) |
addl %eax,FPU_accum_1 |
adcl %edx,%ebx |
adcl $0,%ecx |
|
movl SIGH(%esi),%eax |
mull SIGH(%edi) |
addl %eax,%ebx |
adcl %edx,%ecx |
|
movl EXP(%esi),%eax /* Compute the exponent */ |
addl EXP(%edi),%eax |
subl EXP_BIAS-1,%eax |
|
/* Have now finished with the sources */ |
movl PARAM3,%edi /* Point to the destination */ |
movl %eax,EXP(%edi) |
|
/* Now make sure that the result is normalized */ |
testl $0x80000000,%ecx |
jnz LResult_Normalised |
|
/* Normalize by shifting left one bit */ |
shll $1,FPU_accum_0 |
rcll $1,FPU_accum_1 |
rcll $1,%ebx |
rcll $1,%ecx |
decl EXP(%edi) |
|
LResult_Normalised: |
movl FPU_accum_0,%eax |
movl FPU_accum_1,%edx |
orl %eax,%eax |
jz L_extent_zero |
|
orl $1,%edx |
|
L_extent_zero: |
movl %ecx,%eax |
jmp fpu_reg_round |
|
|
#ifdef PARANOID |
L_bugged: |
pushl EX_INTERNAL|0x205 |
call EXCEPTION |
pop %ebx |
jmp L_exit |
|
L_exit: |
popl %ebx |
popl %edi |
popl %esi |
leave |
ret |
#endif PARANOID |
|
/reg_div.S
0,0 → 1,248
.file "reg_div.S" |
/*---------------------------------------------------------------------------+ |
| reg_div.S | |
| | |
| Divide one FPU_REG by another and put the result in a destination FPU_REG.| |
| | |
| Copyright (C) 1992,1993,1994,1995 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| Call from C as: | |
| void reg_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest, | |
| unsigned int control_word) | |
| | |
+---------------------------------------------------------------------------*/ |
|
#include "exception.h" |
#include "fpu_emu.h" |
|
|
.text |
ENTRY(reg_div) |
pushl %ebp |
movl %esp,%ebp |
#ifndef NON_REENTRANT_FPU |
subl $28,%esp /* Needed by divide_kernel */ |
#endif NON_REENTRANT_FPU |
|
pushl %esi |
pushl %edi |
pushl %ebx |
|
movl PARAM1,%esi |
movl PARAM2,%ebx |
movl PARAM3,%edi |
|
movb TAG(%esi),%al |
orb TAG(%ebx),%al |
|
jne L_div_special /* Not (both numbers TW_Valid) */ |
|
#ifdef DENORM_OPERAND |
/* Check for denormals */ |
cmpl EXP_UNDER,EXP(%esi) |
jg xL_arg1_not_denormal |
|
call SYMBOL_NAME(denormal_operand) |
orl %eax,%eax |
jnz fpu_Arith_exit |
|
xL_arg1_not_denormal: |
cmpl EXP_UNDER,EXP(%ebx) |
jg xL_arg2_not_denormal |
|
call SYMBOL_NAME(denormal_operand) |
orl %eax,%eax |
jnz fpu_Arith_exit |
|
xL_arg2_not_denormal: |
#endif DENORM_OPERAND |
|
/* Both arguments are TW_Valid */ |
movb TW_Valid,TAG(%edi) |
|
movb SIGN(%esi),%cl |
cmpb %cl,SIGN(%ebx) |
setne (%edi) /* Set the sign, requires SIGN_NEG=1, SIGN_POS=0 */ |
|
movl EXP(%esi),%edx |
movl EXP(%ebx),%eax |
subl %eax,%edx |
addl EXP_BIAS,%edx |
movl %edx,EXP(%edi) |
|
jmp SYMBOL_NAME(divide_kernel) |
|
|
/*-----------------------------------------------------------------------*/ |
L_div_special: |
cmpb TW_NaN,TAG(%esi) /* A NaN with anything to give NaN */ |
je L_arg1_NaN |
|
cmpb TW_NaN,TAG(%ebx) /* A NaN with anything to give NaN */ |
jne L_no_NaN_arg |
|
/* Operations on NaNs */ |
L_arg1_NaN: |
L_arg2_NaN: |
pushl %edi /* Destination */ |
pushl %esi |
pushl %ebx /* Ordering is important here */ |
call SYMBOL_NAME(real_2op_NaN) |
jmp LDiv_exit |
|
/* Invalid operations */ |
L_zero_zero: |
L_inf_inf: |
pushl %edi /* Destination */ |
call SYMBOL_NAME(arith_invalid) /* 0/0 or Infinity/Infinity */ |
jmp LDiv_exit |
|
L_no_NaN_arg: |
cmpb TW_Infinity,TAG(%esi) |
jne L_arg1_not_inf |
|
cmpb TW_Infinity,TAG(%ebx) |
je L_inf_inf /* invalid operation */ |
|
cmpb TW_Valid,TAG(%ebx) |
je L_inf_valid |
|
#ifdef PARANOID |
/* arg2 must be zero or valid */ |
cmpb TW_Zero,TAG(%ebx) |
ja L_unknown_tags |
#endif PARANOID |
|
/* Note that p16-9 says that infinity/0 returns infinity */ |
jmp L_copy_arg1 /* Answer is Inf */ |
|
L_inf_valid: |
#ifdef DENORM_OPERAND |
cmpl EXP_UNDER,EXP(%ebx) |
jg L_copy_arg1 /* Answer is Inf */ |
|
call SYMBOL_NAME(denormal_operand) |
orl %eax,%eax |
jnz fpu_Arith_exit |
#endif DENORM_OPERAND |
|
jmp L_copy_arg1 /* Answer is Inf */ |
|
L_arg1_not_inf: |
cmpb TW_Zero,TAG(%ebx) /* Priority to div-by-zero error */ |
jne L_arg2_not_zero |
|
cmpb TW_Zero,TAG(%esi) |
je L_zero_zero /* invalid operation */ |
|
#ifdef PARANOID |
/* arg1 must be valid */ |
cmpb TW_Valid,TAG(%esi) |
ja L_unknown_tags |
#endif PARANOID |
|
/* Division by zero error */ |
pushl %edi /* destination */ |
movb SIGN(%esi),%al |
xorb SIGN(%ebx),%al |
pushl %eax /* lower 8 bits have the sign */ |
call SYMBOL_NAME(divide_by_zero) |
jmp LDiv_exit |
|
L_arg2_not_zero: |
cmpb TW_Infinity,TAG(%ebx) |
jne L_arg2_not_inf |
|
#ifdef DENORM_OPERAND |
cmpb TW_Valid,TAG(%esi) |
jne L_return_zero |
|
cmpl EXP_UNDER,EXP(%esi) |
jg L_return_zero /* Answer is zero */ |
|
call SYMBOL_NAME(denormal_operand) |
orl %eax,%eax |
jnz fpu_Arith_exit |
#endif DENORM_OPERAND |
|
jmp L_return_zero /* Answer is zero */ |
|
L_arg2_not_inf: |
|
#ifdef PARANOID |
cmpb TW_Zero,TAG(%esi) |
jne L_unknown_tags |
#endif PARANOID |
|
/* arg1 is zero, arg2 is not Infinity or a NaN */ |
|
#ifdef DENORM_OPERAND |
cmpl EXP_UNDER,EXP(%ebx) |
jg L_copy_arg1 /* Answer is zero */ |
|
call SYMBOL_NAME(denormal_operand) |
orl %eax,%eax |
jnz fpu_Arith_exit |
#endif DENORM_OPERAND |
|
L_copy_arg1: |
movb TAG(%esi),%ax |
movb %ax,TAG(%edi) |
movl EXP(%esi),%eax |
movl %eax,EXP(%edi) |
movl SIGL(%esi),%eax |
movl %eax,SIGL(%edi) |
movl SIGH(%esi),%eax |
movl %eax,SIGH(%edi) |
|
LDiv_set_result_sign: |
movb SIGN(%esi),%cl |
cmpb %cl,SIGN(%ebx) |
jne LDiv_negative_result |
|
movb SIGN_POS,SIGN(%edi) |
xorl %eax,%eax /* Valid result */ |
jmp LDiv_exit |
|
LDiv_negative_result: |
movb SIGN_NEG,SIGN(%edi) |
xorl %eax,%eax /* Valid result */ |
|
LDiv_exit: |
#ifndef NON_REENTRANT_FPU |
leal -40(%ebp),%esp |
#else |
leal -12(%ebp),%esp |
#endif NON_REENTRANT_FPU |
|
popl %ebx |
popl %edi |
popl %esi |
leave |
ret |
|
|
L_return_zero: |
xorl %eax,%eax |
movl %eax,SIGH(%edi) |
movl %eax,SIGL(%edi) |
movl EXP_UNDER,EXP(%edi) |
movb TW_Zero,TAG(%edi) |
jmp LDiv_set_result_sign |
|
#ifdef PARANOID |
L_unknown_tags: |
pushl EX_INTERNAL | 0x208 |
call EXCEPTION |
|
/* Generate a NaN for unknown tags */ |
movl SYMBOL_NAME(CONST_QNaN),%eax |
movl %eax,(%edi) |
movl SYMBOL_NAME(CONST_QNaN)+4,%eax |
movl %eax,SIGL(%edi) |
movl SYMBOL_NAME(CONST_QNaN)+8,%eax |
movl %eax,SIGH(%edi) |
jmp LDiv_exit /* %eax is nz */ |
#endif PARANOID |
/fpu_asm.h
0,0 → 1,31
/*---------------------------------------------------------------------------+ |
| fpu_asm.h | |
| | |
| Copyright (C) 1992,1995 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@jacobi.maths.monash.edu.au | |
| | |
+---------------------------------------------------------------------------*/ |
|
#ifndef _FPU_ASM_H_ |
#define _FPU_ASM_H_ |
|
#include <linux/linkage.h> |
|
#define EXCEPTION SYMBOL_NAME(exception) |
|
|
#define PARAM1 8(%ebp) |
#define PARAM2 12(%ebp) |
#define PARAM3 16(%ebp) |
#define PARAM4 20(%ebp) |
|
#define SIGL_OFFSET 8 |
#define SIGN(x) (x) |
#define TAG(x) 1(x) |
#define EXP(x) 4(x) |
#define SIG(x) SIGL_OFFSET##(x) |
#define SIGL(x) SIGL_OFFSET##(x) |
#define SIGH(x) 12(x) |
|
#endif _FPU_ASM_H_ |
/fpu_emu.h
0,0 → 1,173
/*---------------------------------------------------------------------------+ |
| fpu_emu.h | |
| | |
| Copyright (C) 1992,1993,1994 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
+---------------------------------------------------------------------------*/ |
|
|
#ifndef _FPU_EMU_H_ |
#define _FPU_EMU_H_ |
|
/* |
* Define DENORM_OPERAND to make the emulator detect denormals |
* and use the denormal flag of the status word. Note: this only |
* affects the flag and corresponding interrupt, the emulator |
* will always generate denormals and operate upon them as required. |
*/ |
#define DENORM_OPERAND |
|
/* |
* Define PECULIAR_486 to get a closer approximation to 80486 behaviour, |
* rather than behaviour which appears to be cleaner. |
* This is a matter of opinion: for all I know, the 80486 may simply |
* be complying with the IEEE spec. Maybe one day I'll get to see the |
* spec... |
*/ |
#define PECULIAR_486 |
|
#ifdef __ASSEMBLY__ |
#include "fpu_asm.h" |
#define Const(x) $##x |
#else |
#define Const(x) x |
#endif |
|
#define EXP_BIAS Const(0) |
#define EXP_OVER Const(0x4000) /* smallest invalid large exponent */ |
#define EXP_UNDER Const(-0x3fff) /* largest invalid small exponent */ |
#define EXP_Infinity EXP_OVER |
#define EXP_NaN EXP_OVER |
|
#define SIGN_POS Const(0) |
#define SIGN_NEG Const(1) |
|
/* Keep the order TW_Valid, TW_Zero, TW_Denormal */ |
#define TW_Valid Const(0) /* valid */ |
#define TW_Zero Const(1) /* zero */ |
/* The following fold to 2 (Special) in the Tag Word */ |
/* #define TW_Denormal Const(4) */ /* De-normal */ |
#define TW_Infinity Const(5) /* + or - infinity */ |
#define TW_NaN Const(6) /* Not a Number */ |
|
#define TW_Empty Const(7) /* empty */ |
|
|
#ifndef __ASSEMBLY__ |
|
#include <asm/sigcontext.h> /* for struct _fpstate */ |
#include <asm/math_emu.h> |
|
#include <linux/linkage.h> |
|
/* |
#define RE_ENTRANT_CHECKING |
*/ |
|
#ifdef RE_ENTRANT_CHECKING |
extern char emulating; |
# define RE_ENTRANT_CHECK_OFF emulating = 0 |
# define RE_ENTRANT_CHECK_ON emulating = 1 |
#else |
# define RE_ENTRANT_CHECK_OFF |
# define RE_ENTRANT_CHECK_ON |
#endif RE_ENTRANT_CHECKING |
|
#define FWAIT_OPCODE 0x9b |
#define OP_SIZE_PREFIX 0x66 |
#define ADDR_SIZE_PREFIX 0x67 |
#define PREFIX_CS 0x2e |
#define PREFIX_DS 0x3e |
#define PREFIX_ES 0x26 |
#define PREFIX_SS 0x36 |
#define PREFIX_FS 0x64 |
#define PREFIX_GS 0x65 |
#define PREFIX_REPE 0xf3 |
#define PREFIX_REPNE 0xf2 |
#define PREFIX_LOCK 0xf0 |
#define PREFIX_CS_ 1 |
#define PREFIX_DS_ 2 |
#define PREFIX_ES_ 3 |
#define PREFIX_FS_ 4 |
#define PREFIX_GS_ 5 |
#define PREFIX_SS_ 6 |
#define PREFIX_DEFAULT 7 |
|
struct address { |
unsigned int offset; |
unsigned short selector; |
unsigned short opcode:11, |
empty:5; |
}; |
typedef void (*FUNC)(void); |
typedef struct fpu_reg FPU_REG; |
typedef void (*FUNC_ST0)(FPU_REG *st0_ptr); |
typedef struct { unsigned char address_size, operand_size, segment; } |
overrides; |
/* This structure is 32 bits: */ |
typedef struct { overrides override; |
unsigned char default_mode; } fpu_addr_modes; |
/* PROTECTED has a restricted meaning in the emulator; it is used |
to signal that the emulator needs to do special things to ensure |
that protection is respected in a segmented model. */ |
#define PROTECTED 4 |
#define SIXTEEN 1 /* We rely upon this being 1 (true) */ |
#define VM86 SIXTEEN |
#define PM16 (SIXTEEN | PROTECTED) |
#define SEG32 PROTECTED |
extern unsigned char const data_sizes_16[32]; |
|
#define st(x) ( regs[((top+x) &7 )] ) |
|
#define STACK_OVERFLOW (st_new_ptr = &st(-1), st_new_ptr->tag != TW_Empty) |
#define NOT_EMPTY(i) (st(i).tag != TW_Empty) |
#define NOT_EMPTY_ST0 (st0_tag ^ TW_Empty) |
|
#define pop() { regs[(top++ & 7 )].tag = TW_Empty; } |
#define poppop() { regs[((top + 1) & 7 )].tag \ |
= regs[(top & 7 )].tag = TW_Empty; \ |
top += 2; } |
|
/* push() does not affect the tags */ |
#define push() { top--; } |
|
|
#define reg_move(x, y) { \ |
*(short *)&((y)->sign) = *(const short *)&((x)->sign); \ |
*(long *)&((y)->exp) = *(const long *)&((x)->exp); \ |
*(long long *)&((y)->sigl) = *(const long long *)&((x)->sigl); } |
|
#define significand(x) ( ((unsigned long long *)&((x)->sigl))[0] ) |
|
|
/*----- Prototypes for functions written in assembler -----*/ |
/* extern void reg_move(FPU_REG *a, FPU_REG *b); */ |
|
asmlinkage void normalize(FPU_REG *x); |
asmlinkage void normalize_nuo(FPU_REG *x); |
asmlinkage int reg_div(FPU_REG const *arg1, FPU_REG const *arg2, |
FPU_REG *answ, unsigned int control_w); |
asmlinkage int reg_u_sub(FPU_REG const *arg1, FPU_REG const *arg2, |
FPU_REG *answ, unsigned int control_w); |
asmlinkage int reg_u_mul(FPU_REG const *arg1, FPU_REG const *arg2, |
FPU_REG *answ, unsigned int control_w); |
asmlinkage int reg_u_div(FPU_REG const *arg1, FPU_REG const *arg2, |
FPU_REG *answ, unsigned int control_w); |
asmlinkage int reg_u_add(FPU_REG const *arg1, FPU_REG const *arg2, |
FPU_REG *answ, unsigned int control_w); |
asmlinkage int wm_sqrt(FPU_REG *n, unsigned int control_w); |
asmlinkage unsigned shrx(void *l, unsigned x); |
asmlinkage unsigned shrxs(void *v, unsigned x); |
asmlinkage unsigned long div_small(unsigned long long *x, unsigned long y); |
asmlinkage void round_reg(FPU_REG *arg, unsigned int extent, |
unsigned int control_w); |
|
#ifndef MAKING_PROTO |
#include "fpu_proto.h" |
#endif |
|
#endif __ASSEMBLY__ |
|
#endif _FPU_EMU_H_ |
/shr_Xsig.S
0,0 → 1,87
.file "shr_Xsig.S" |
/*---------------------------------------------------------------------------+ |
| shr_Xsig.S | |
| | |
| 12 byte right shift function | |
| | |
| Copyright (C) 1992,1994,1995 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| Call from C as: | |
| void shr_Xsig(Xsig *arg, unsigned nr) | |
| | |
| Extended shift right function. | |
| Fastest for small shifts. | |
| Shifts the 12 byte quantity pointed to by the first arg (arg) | |
| right by the number of bits specified by the second arg (nr). | |
| | |
+---------------------------------------------------------------------------*/ |
|
#include "fpu_emu.h" |
|
.text |
ENTRY(shr_Xsig) |
push %ebp |
movl %esp,%ebp |
pushl %esi |
movl PARAM2,%ecx |
movl PARAM1,%esi |
cmpl $32,%ecx /* shrd only works for 0..31 bits */ |
jnc L_more_than_31 |
|
/* less than 32 bits */ |
pushl %ebx |
movl (%esi),%eax /* lsl */ |
movl 4(%esi),%ebx /* midl */ |
movl 8(%esi),%edx /* msl */ |
shrd %cl,%ebx,%eax |
shrd %cl,%edx,%ebx |
shr %cl,%edx |
movl %eax,(%esi) |
movl %ebx,4(%esi) |
movl %edx,8(%esi) |
popl %ebx |
popl %esi |
leave |
ret |
|
L_more_than_31: |
cmpl $64,%ecx |
jnc L_more_than_63 |
|
subb $32,%cl |
movl 4(%esi),%eax /* midl */ |
movl 8(%esi),%edx /* msl */ |
shrd %cl,%edx,%eax |
shr %cl,%edx |
movl %eax,(%esi) |
movl %edx,4(%esi) |
movl $0,8(%esi) |
popl %esi |
leave |
ret |
|
L_more_than_63: |
cmpl $96,%ecx |
jnc L_more_than_95 |
|
subb $64,%cl |
movl 8(%esi),%eax /* msl */ |
shr %cl,%eax |
xorl %edx,%edx |
movl %eax,(%esi) |
movl %edx,4(%esi) |
movl %edx,8(%esi) |
popl %esi |
leave |
ret |
|
L_more_than_95: |
xorl %eax,%eax |
movl %eax,(%esi) |
movl %eax,4(%esi) |
movl %eax,8(%esi) |
popl %esi |
leave |
ret |
/polynom_Xsig.S
0,0 → 1,135
/*---------------------------------------------------------------------------+ |
| polynomial_Xsig.S | |
| | |
| Fixed point arithmetic polynomial evaluation. | |
| | |
| Copyright (C) 1992,1993,1994,1995 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| Call from C as: | |
| void polynomial_Xsig(Xsig *accum, unsigned long long x, | |
| unsigned long long terms[], int n) | |
| | |
| Computes: | |
| terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x | |
| and adds the result to the 12 byte Xsig. | |
| The terms[] are each 8 bytes, but all computation is performed to 12 byte | |
| precision. | |
| | |
| This function must be used carefully: most overflow of intermediate | |
| results is controlled, but overflow of the result is not. | |
| | |
+---------------------------------------------------------------------------*/ |
.file "polynomial_Xsig.S" |
|
#include "fpu_emu.h" |
|
|
#define TERM_SIZE $8 |
#define SUM_MS -20(%ebp) /* sum ms long */ |
#define SUM_MIDDLE -24(%ebp) /* sum middle long */ |
#define SUM_LS -28(%ebp) /* sum ls long */ |
#define ACCUM_MS -4(%ebp) /* accum ms long */ |
#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */ |
#define ACCUM_LS -12(%ebp) /* accum ls long */ |
#define OVERFLOWED -16(%ebp) /* addition overflow flag */ |
|
.text |
ENTRY(polynomial_Xsig) |
pushl %ebp |
movl %esp,%ebp |
subl $32,%esp |
pushl %esi |
pushl %edi |
pushl %ebx |
|
movl PARAM2,%esi /* x */ |
movl PARAM3,%edi /* terms */ |
|
movl TERM_SIZE,%eax |
mull PARAM4 /* n */ |
addl %eax,%edi |
|
movl 4(%edi),%edx /* terms[n] */ |
movl %edx,SUM_MS |
movl (%edi),%edx /* terms[n] */ |
movl %edx,SUM_MIDDLE |
xor %eax,%eax |
movl %eax,SUM_LS |
movb %al,OVERFLOWED |
|
subl TERM_SIZE,%edi |
decl PARAM4 |
js L_accum_done |
|
L_accum_loop: |
xor %eax,%eax |
movl %eax,ACCUM_MS |
movl %eax,ACCUM_MIDDLE |
|
movl SUM_MIDDLE,%eax |
mull (%esi) /* x ls long */ |
movl %edx,ACCUM_LS |
|
movl SUM_MIDDLE,%eax |
mull 4(%esi) /* x ms long */ |
addl %eax,ACCUM_LS |
adcl %edx,ACCUM_MIDDLE |
adcl $0,ACCUM_MS |
|
movl SUM_MS,%eax |
mull (%esi) /* x ls long */ |
addl %eax,ACCUM_LS |
adcl %edx,ACCUM_MIDDLE |
adcl $0,ACCUM_MS |
|
movl SUM_MS,%eax |
mull 4(%esi) /* x ms long */ |
addl %eax,ACCUM_MIDDLE |
adcl %edx,ACCUM_MS |
|
testb $0xff,OVERFLOWED |
jz L_no_overflow |
|
movl (%esi),%eax |
addl %eax,ACCUM_MIDDLE |
movl 4(%esi),%eax |
adcl %eax,ACCUM_MS /* This could overflow too */ |
|
L_no_overflow: |
|
/* |
* Now put the sum of next term and the accumulator |
* into the sum register |
*/ |
movl ACCUM_LS,%eax |
addl (%edi),%eax /* term ls long */ |
movl %eax,SUM_LS |
movl ACCUM_MIDDLE,%eax |
adcl (%edi),%eax /* term ls long */ |
movl %eax,SUM_MIDDLE |
movl ACCUM_MS,%eax |
adcl 4(%edi),%eax /* term ms long */ |
movl %eax,SUM_MS |
sbbb %al,%al |
movb %al,OVERFLOWED /* Used in the next iteration */ |
|
subl TERM_SIZE,%edi |
decl PARAM4 |
jns L_accum_loop |
|
L_accum_done: |
movl PARAM1,%edi /* accum */ |
movl SUM_LS,%eax |
addl %eax,(%edi) |
movl SUM_MIDDLE,%eax |
adcl %eax,4(%edi) |
movl SUM_MS,%eax |
adcl %eax,8(%edi) |
|
popl %ebx |
popl %edi |
popl %esi |
leave |
ret |
/mul_Xsig.S
0,0 → 1,176
/*---------------------------------------------------------------------------+ |
| mul_Xsig.S | |
| | |
| Multiply a 12 byte fixed point number by another fixed point number. | |
| | |
| Copyright (C) 1992,1994,1995 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| Call from C as: | |
| void mul32_Xsig(Xsig *x, unsigned b) | |
| | |
| void mul64_Xsig(Xsig *x, unsigned long long *b) | |
| | |
| void mul_Xsig_Xsig(Xsig *x, unsigned *b) | |
| | |
| The result is neither rounded nor normalized, and the ls bit or so may | |
| be wrong. | |
| | |
+---------------------------------------------------------------------------*/ |
.file "mul_Xsig.S" |
|
|
#include "fpu_emu.h" |
|
.text |
ENTRY(mul32_Xsig) |
pushl %ebp |
movl %esp,%ebp |
subl $16,%esp |
pushl %esi |
|
movl PARAM1,%esi |
movl PARAM2,%ecx |
|
xor %eax,%eax |
movl %eax,-4(%ebp) |
movl %eax,-8(%ebp) |
|
movl (%esi),%eax /* lsl of Xsig */ |
mull %ecx /* msl of b */ |
movl %edx,-12(%ebp) |
|
movl 4(%esi),%eax /* midl of Xsig */ |
mull %ecx /* msl of b */ |
addl %eax,-12(%ebp) |
adcl %edx,-8(%ebp) |
adcl $0,-4(%ebp) |
|
movl 8(%esi),%eax /* msl of Xsig */ |
mull %ecx /* msl of b */ |
addl %eax,-8(%ebp) |
adcl %edx,-4(%ebp) |
|
movl -12(%ebp),%eax |
movl %eax,(%esi) |
movl -8(%ebp),%eax |
movl %eax,4(%esi) |
movl -4(%ebp),%eax |
movl %eax,8(%esi) |
|
popl %esi |
leave |
ret |
|
|
ENTRY(mul64_Xsig) |
pushl %ebp |
movl %esp,%ebp |
subl $16,%esp |
pushl %esi |
|
movl PARAM1,%esi |
movl PARAM2,%ecx |
|
xor %eax,%eax |
movl %eax,-4(%ebp) |
movl %eax,-8(%ebp) |
|
movl (%esi),%eax /* lsl of Xsig */ |
mull 4(%ecx) /* msl of b */ |
movl %edx,-12(%ebp) |
|
movl 4(%esi),%eax /* midl of Xsig */ |
mull (%ecx) /* lsl of b */ |
addl %edx,-12(%ebp) |
adcl $0,-8(%ebp) |
adcl $0,-4(%ebp) |
|
movl 4(%esi),%eax /* midl of Xsig */ |
mull 4(%ecx) /* msl of b */ |
addl %eax,-12(%ebp) |
adcl %edx,-8(%ebp) |
adcl $0,-4(%ebp) |
|
movl 8(%esi),%eax /* msl of Xsig */ |
mull (%ecx) /* lsl of b */ |
addl %eax,-12(%ebp) |
adcl %edx,-8(%ebp) |
adcl $0,-4(%ebp) |
|
movl 8(%esi),%eax /* msl of Xsig */ |
mull 4(%ecx) /* msl of b */ |
addl %eax,-8(%ebp) |
adcl %edx,-4(%ebp) |
|
movl -12(%ebp),%eax |
movl %eax,(%esi) |
movl -8(%ebp),%eax |
movl %eax,4(%esi) |
movl -4(%ebp),%eax |
movl %eax,8(%esi) |
|
popl %esi |
leave |
ret |
|
|
|
ENTRY(mul_Xsig_Xsig) |
pushl %ebp |
movl %esp,%ebp |
subl $16,%esp |
pushl %esi |
|
movl PARAM1,%esi |
movl PARAM2,%ecx |
|
xor %eax,%eax |
movl %eax,-4(%ebp) |
movl %eax,-8(%ebp) |
|
movl (%esi),%eax /* lsl of Xsig */ |
mull 8(%ecx) /* msl of b */ |
movl %edx,-12(%ebp) |
|
movl 4(%esi),%eax /* midl of Xsig */ |
mull 4(%ecx) /* midl of b */ |
addl %edx,-12(%ebp) |
adcl $0,-8(%ebp) |
adcl $0,-4(%ebp) |
|
movl 8(%esi),%eax /* msl of Xsig */ |
mull (%ecx) /* lsl of b */ |
addl %edx,-12(%ebp) |
adcl $0,-8(%ebp) |
adcl $0,-4(%ebp) |
|
movl 4(%esi),%eax /* midl of Xsig */ |
mull 8(%ecx) /* msl of b */ |
addl %eax,-12(%ebp) |
adcl %edx,-8(%ebp) |
adcl $0,-4(%ebp) |
|
movl 8(%esi),%eax /* msl of Xsig */ |
mull 4(%ecx) /* midl of b */ |
addl %eax,-12(%ebp) |
adcl %edx,-8(%ebp) |
adcl $0,-4(%ebp) |
|
movl 8(%esi),%eax /* msl of Xsig */ |
mull 8(%ecx) /* msl of b */ |
addl %eax,-8(%ebp) |
adcl %edx,-4(%ebp) |
|
movl -12(%ebp),%edx |
movl %edx,(%esi) |
movl -8(%ebp),%edx |
movl %edx,4(%esi) |
movl -4(%ebp),%edx |
movl %edx,8(%esi) |
|
popl %esi |
leave |
ret |
|
/fpu_aux.c
0,0 → 1,184
/*---------------------------------------------------------------------------+ |
| fpu_aux.c | |
| | |
| Code to implement some of the FPU auxiliary instructions. | |
| | |
| Copyright (C) 1992,1993,1994 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
#include "fpu_system.h" |
#include "exception.h" |
#include "fpu_emu.h" |
#include "status_w.h" |
#include "control_w.h" |
|
|
static void fnop(void) |
{ |
} |
|
void fclex(void) |
{ |
partial_status &= ~(SW_Backward|SW_Summary|SW_Stack_Fault|SW_Precision| |
SW_Underflow|SW_Overflow|SW_Zero_Div|SW_Denorm_Op| |
SW_Invalid); |
no_ip_update = 1; |
} |
|
/* Needs to be externally visible */ |
void finit() |
{ |
int r; |
control_word = 0x037f; |
partial_status = 0; |
top = 0; /* We don't keep top in the status word internally. */ |
for (r = 0; r < 8; r++) |
{ |
regs[r].tag = TW_Empty; |
} |
/* The behaviour is different to that detailed in |
Section 15.1.6 of the Intel manual */ |
operand_address.offset = 0; |
operand_address.selector = 0; |
instruction_address.offset = 0; |
instruction_address.selector = 0; |
instruction_address.opcode = 0; |
no_ip_update = 1; |
} |
|
/* |
* These are nops on the i387.. |
*/ |
#define feni fnop |
#define fdisi fnop |
#define fsetpm fnop |
|
static FUNC const finit_table[] = { |
feni, fdisi, fclex, finit, |
fsetpm, FPU_illegal, FPU_illegal, FPU_illegal |
}; |
|
void finit_() |
{ |
(finit_table[FPU_rm])(); |
} |
|
|
static void fstsw_ax(void) |
{ |
*(short *) &FPU_EAX = status_word(); |
no_ip_update = 1; |
} |
|
static FUNC const fstsw_table[] = { |
fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal, |
FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal |
}; |
|
void fstsw_() |
{ |
(fstsw_table[FPU_rm])(); |
} |
|
|
static FUNC const fp_nop_table[] = { |
fnop, FPU_illegal, FPU_illegal, FPU_illegal, |
FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal |
}; |
|
void fp_nop() |
{ |
(fp_nop_table[FPU_rm])(); |
} |
|
|
void fld_i_() |
{ |
FPU_REG *st_new_ptr; |
|
if ( STACK_OVERFLOW ) |
{ stack_overflow(); return; } |
|
/* fld st(i) */ |
if ( NOT_EMPTY(FPU_rm) ) |
{ reg_move(&st(FPU_rm), st_new_ptr); push(); } |
else |
{ |
if ( control_word & CW_Invalid ) |
{ |
/* The masked response */ |
stack_underflow(); |
} |
else |
EXCEPTION(EX_StackUnder); |
} |
|
} |
|
|
void fxch_i() |
{ |
/* fxch st(i) */ |
FPU_REG t; |
register FPU_REG *sti_ptr = &st(FPU_rm), *st0_ptr = &st(0); |
|
if ( st0_ptr->tag == TW_Empty ) |
{ |
if ( sti_ptr->tag == TW_Empty ) |
{ |
stack_underflow(); |
stack_underflow_i(FPU_rm); |
return; |
} |
if ( control_word & CW_Invalid ) |
reg_move(sti_ptr, st0_ptr); /* Masked response */ |
stack_underflow_i(FPU_rm); |
return; |
} |
if ( sti_ptr->tag == TW_Empty ) |
{ |
if ( control_word & CW_Invalid ) |
reg_move(st0_ptr, sti_ptr); /* Masked response */ |
stack_underflow(); |
return; |
} |
clear_C1(); |
reg_move(st0_ptr, &t); |
reg_move(sti_ptr, st0_ptr); |
reg_move(&t, sti_ptr); |
} |
|
|
void ffree_() |
{ |
/* ffree st(i) */ |
st(FPU_rm).tag = TW_Empty; |
} |
|
|
void ffreep() |
{ |
/* ffree st(i) + pop - unofficial code */ |
st(FPU_rm).tag = TW_Empty; |
pop(); |
} |
|
|
void fst_i_() |
{ |
/* fst st(i) */ |
reg_move(&st(0), &st(FPU_rm)); |
} |
|
|
void fstp_i() |
{ |
/* fstp st(i) */ |
reg_move(&st(0), &st(FPU_rm)); |
pop(); |
} |
|
/wm_shrx.S
0,0 → 1,204
.file "wm_shrx.S" |
/*---------------------------------------------------------------------------+ |
| wm_shrx.S | |
| | |
| 64 bit right shift functions | |
| | |
| Copyright (C) 1992,1995 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| Call from C as: | |
| unsigned shrx(void *arg1, unsigned arg2) | |
| and | |
| unsigned shrxs(void *arg1, unsigned arg2) | |
| | |
+---------------------------------------------------------------------------*/ |
|
#include "fpu_emu.h" |
|
.text |
/*---------------------------------------------------------------------------+ |
| unsigned shrx(void *arg1, unsigned arg2) | |
| | |
| Extended shift right function. | |
| Fastest for small shifts. | |
| Shifts the 64 bit quantity pointed to by the first arg (arg1) | |
| right by the number of bits specified by the second arg (arg2). | |
| Forms a 96 bit quantity from the 64 bit arg and eax: | |
| [ 64 bit arg ][ eax ] | |
| shift right ---------> | |
| The eax register is initialized to 0 before the shifting. | |
| Results returned in the 64 bit arg and eax. | |
+---------------------------------------------------------------------------*/ |
|
ENTRY(shrx) |
push %ebp |
movl %esp,%ebp |
pushl %esi |
movl PARAM2,%ecx |
movl PARAM1,%esi |
cmpl $32,%ecx /* shrd only works for 0..31 bits */ |
jnc L_more_than_31 |
|
/* less than 32 bits */ |
pushl %ebx |
movl (%esi),%ebx /* lsl */ |
movl 4(%esi),%edx /* msl */ |
xorl %eax,%eax /* extension */ |
shrd %cl,%ebx,%eax |
shrd %cl,%edx,%ebx |
shr %cl,%edx |
movl %ebx,(%esi) |
movl %edx,4(%esi) |
popl %ebx |
popl %esi |
leave |
ret |
|
L_more_than_31: |
cmpl $64,%ecx |
jnc L_more_than_63 |
|
subb $32,%cl |
movl (%esi),%eax /* lsl */ |
movl 4(%esi),%edx /* msl */ |
shrd %cl,%edx,%eax |
shr %cl,%edx |
movl %edx,(%esi) |
movl $0,4(%esi) |
popl %esi |
leave |
ret |
|
L_more_than_63: |
cmpl $96,%ecx |
jnc L_more_than_95 |
|
subb $64,%cl |
movl 4(%esi),%eax /* msl */ |
shr %cl,%eax |
xorl %edx,%edx |
movl %edx,(%esi) |
movl %edx,4(%esi) |
popl %esi |
leave |
ret |
|
L_more_than_95: |
xorl %eax,%eax |
movl %eax,(%esi) |
movl %eax,4(%esi) |
popl %esi |
leave |
ret |
|
|
/*---------------------------------------------------------------------------+ |
| unsigned shrxs(void *arg1, unsigned arg2) | |
| | |
| Extended shift right function (optimized for small floating point | |
| integers). | |
| Shifts the 64 bit quantity pointed to by the first arg (arg1) | |
| right by the number of bits specified by the second arg (arg2). | |
| Forms a 96 bit quantity from the 64 bit arg and eax: | |
| [ 64 bit arg ][ eax ] | |
| shift right ---------> | |
| The eax register is initialized to 0 before the shifting. | |
| The lower 8 bits of eax are lost and replaced by a flag which is | |
| set (to 0x01) if any bit, apart from the first one, is set in the | |
| part which has been shifted out of the arg. | |
| Results returned in the 64 bit arg and eax. | |
+---------------------------------------------------------------------------*/ |
ENTRY(shrxs) |
push %ebp |
movl %esp,%ebp |
pushl %esi |
pushl %ebx |
movl PARAM2,%ecx |
movl PARAM1,%esi |
cmpl $64,%ecx /* shrd only works for 0..31 bits */ |
jnc Ls_more_than_63 |
|
cmpl $32,%ecx /* shrd only works for 0..31 bits */ |
jc Ls_less_than_32 |
|
/* We got here without jumps by assuming that the most common requirement |
is for small integers */ |
/* Shift by [32..63] bits */ |
subb $32,%cl |
movl (%esi),%eax /* lsl */ |
movl 4(%esi),%edx /* msl */ |
xorl %ebx,%ebx |
shrd %cl,%eax,%ebx |
shrd %cl,%edx,%eax |
shr %cl,%edx |
orl %ebx,%ebx /* test these 32 bits */ |
setne %bl |
test $0x7fffffff,%eax /* and 31 bits here */ |
setne %bh |
orw %bx,%bx /* Any of the 63 bit set ? */ |
setne %al |
movl %edx,(%esi) |
movl $0,4(%esi) |
popl %ebx |
popl %esi |
leave |
ret |
|
/* Shift by [0..31] bits */ |
Ls_less_than_32: |
movl (%esi),%ebx /* lsl */ |
movl 4(%esi),%edx /* msl */ |
xorl %eax,%eax /* extension */ |
shrd %cl,%ebx,%eax |
shrd %cl,%edx,%ebx |
shr %cl,%edx |
test $0x7fffffff,%eax /* only need to look at eax here */ |
setne %al |
movl %ebx,(%esi) |
movl %edx,4(%esi) |
popl %ebx |
popl %esi |
leave |
ret |
|
/* Shift by [64..95] bits */ |
Ls_more_than_63: |
cmpl $96,%ecx |
jnc Ls_more_than_95 |
|
subb $64,%cl |
movl (%esi),%ebx /* lsl */ |
movl 4(%esi),%eax /* msl */ |
xorl %edx,%edx /* extension */ |
shrd %cl,%ebx,%edx |
shrd %cl,%eax,%ebx |
shr %cl,%eax |
orl %ebx,%edx |
setne %bl |
test $0x7fffffff,%eax /* only need to look at eax here */ |
setne %bh |
orw %bx,%bx |
setne %al |
xorl %edx,%edx |
movl %edx,(%esi) /* set to zero */ |
movl %edx,4(%esi) /* set to zero */ |
popl %ebx |
popl %esi |
leave |
ret |
|
Ls_more_than_95: |
/* Shift by [96..inf) bits */ |
xorl %eax,%eax |
movl (%esi),%ebx |
orl 4(%esi),%ebx |
setne %al |
xorl %ebx,%ebx |
movl %ebx,(%esi) |
movl %ebx,4(%esi) |
popl %ebx |
popl %esi |
leave |
ret |
/control_w.h
0,0 → 1,45
/*---------------------------------------------------------------------------+ |
| control_w.h | |
| | |
| Copyright (C) 1992,1993 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
+---------------------------------------------------------------------------*/ |
|
#ifndef _CONTROLW_H_ |
#define _CONTROLW_H_ |
|
#ifdef __ASSEMBLY__ |
#define _Const_(x) $##x |
#else |
#define _Const_(x) x |
#endif |
|
#define CW_RC _Const_(0x0C00) /* rounding control */ |
#define CW_PC _Const_(0x0300) /* precision control */ |
|
#define CW_Precision Const_(0x0020) /* loss of precision mask */ |
#define CW_Underflow Const_(0x0010) /* underflow mask */ |
#define CW_Overflow Const_(0x0008) /* overflow mask */ |
#define CW_ZeroDiv Const_(0x0004) /* divide by zero mask */ |
#define CW_Denormal Const_(0x0002) /* denormalized operand mask */ |
#define CW_Invalid Const_(0x0001) /* invalid operation mask */ |
|
#define CW_Exceptions _Const_(0x003f) /* all masks */ |
|
#define RC_RND _Const_(0x0000) |
#define RC_DOWN _Const_(0x0400) |
#define RC_UP _Const_(0x0800) |
#define RC_CHOP _Const_(0x0C00) |
|
/* p 15-5: Precision control bits affect only the following: |
ADD, SUB(R), MUL, DIV(R), and SQRT */ |
#define PR_24_BITS _Const_(0x000) |
#define PR_53_BITS _Const_(0x200) |
#define PR_64_BITS _Const_(0x300) |
#define PR_RESERVED_BITS _Const_(0x100) |
/* FULL_PRECISION simulates all exceptions masked */ |
#define FULL_PRECISION (PR_64_BITS | RC_RND | 0x3f) |
|
#endif _CONTROLW_H_ |
/fpu_entry.c
0,0 → 1,695
/*---------------------------------------------------------------------------+ |
| fpu_entry.c | |
| | |
| The entry functions for wm-FPU-emu | |
| | |
| Copyright (C) 1992,1993,1994,1996 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | |
| E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| See the files "README" and "COPYING" for further copyright and warranty | |
| information. | |
| | |
+---------------------------------------------------------------------------*/ |
|
/*---------------------------------------------------------------------------+ |
| Note: | |
| The file contains code which accesses user memory. | |
| Emulator static data may change when user memory is accessed, due to | |
| other processes using the emulator while swapping is in progress. | |
+---------------------------------------------------------------------------*/ |
|
/*---------------------------------------------------------------------------+ |
| math_emulate(), restore_i387_soft() and save_i387_soft() are the only | |
| entry points for wm-FPU-emu. | |
+---------------------------------------------------------------------------*/ |
|
#include <linux/signal.h> |
|
#include <asm/segment.h> |
|
#include "fpu_system.h" |
#include "fpu_emu.h" |
#include "exception.h" |
#include "control_w.h" |
#include "status_w.h" |
|
#define __BAD__ FPU_illegal /* Illegal on an 80486, causes SIGILL */ |
|
#ifndef NO_UNDOC_CODE /* Un-documented FPU op-codes supported by default. */ |
|
/* WARNING: These codes are not documented by Intel in their 80486 manual |
and may not work on FPU clones or later Intel FPUs. */ |
|
/* Changes to support the un-doc codes provided by Linus Torvalds. */ |
|
#define _d9_d8_ fstp_i /* unofficial code (19) */ |
#define _dc_d0_ fcom_st /* unofficial code (14) */ |
#define _dc_d8_ fcompst /* unofficial code (1c) */ |
#define _dd_c8_ fxch_i /* unofficial code (0d) */ |
#define _de_d0_ fcompst /* unofficial code (16) */ |
#define _df_c0_ ffreep /* unofficial code (07) ffree + pop */ |
#define _df_c8_ fxch_i /* unofficial code (0f) */ |
#define _df_d0_ fstp_i /* unofficial code (17) */ |
#define _df_d8_ fstp_i /* unofficial code (1f) */ |
|
static FUNC const st_instr_table[64] = { |
fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_, |
fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_, |
fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_, |
fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_, |
fsub__, fp_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_, |
fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__, |
fdiv__, trig_a, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__, |
fdivr_, trig_b, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__, |
}; |
|
#else /* Support only documented FPU op-codes */ |
|
static FUNC const st_instr_table[64] = { |
fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__, |
fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__, |
fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__, |
fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__, |
fsub__, fp_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_, |
fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__, |
fdiv__, trig_a, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__, |
fdivr_, trig_b, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__, |
}; |
|
#endif NO_UNDOC_CODE |
|
|
#define _NONE_ 0 /* Take no special action */ |
#define _REG0_ 1 /* Need to check for not empty st(0) */ |
#define _REGI_ 2 /* Need to check for not empty st(0) and st(rm) */ |
#define _REGi_ 0 /* Uses st(rm) */ |
#define _PUSH_ 3 /* Need to check for space to push onto stack */ |
#define _null_ 4 /* Function illegal or not implemented */ |
#define _REGIi 5 /* Uses st(0) and st(rm), result to st(rm) */ |
#define _REGIp 6 /* Uses st(0) and st(rm), result to st(rm) then pop */ |
#define _REGIc 0 /* Compare st(0) and st(rm) */ |
#define _REGIn 0 /* Uses st(0) and st(rm), but handle checks later */ |
|
#ifndef NO_UNDOC_CODE |
|
/* Un-documented FPU op-codes supported by default. (see above) */ |
|
static unsigned char const type_table[64] = { |
_REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_, |
_REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_, |
_REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_, |
_REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_, |
_REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, |
_REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_, |
_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, |
_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_ |
}; |
|
#else /* Support only documented FPU op-codes */ |
|
static unsigned char const type_table[64] = { |
_REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_, |
_REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_, |
_REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_, |
_REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_, |
_REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, |
_REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_, |
_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, |
_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_ |
}; |
|
#endif NO_UNDOC_CODE |
|
|
#ifdef RE_ENTRANT_CHECKING |
char emulating=0; |
#endif RE_ENTRANT_CHECKING |
|
static int valid_prefix(unsigned char *Byte, unsigned char **fpu_eip, |
overrides *override); |
|
asmlinkage void math_emulate(long arg) |
{ |
unsigned char FPU_modrm, byte1; |
unsigned short code; |
fpu_addr_modes addr_modes; |
int unmasked; |
FPU_REG loaded_data; |
void *data_address; |
struct address data_sel_off; |
struct address entry_sel_off; |
unsigned long code_base = 0; |
unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ |
char st0_tag; |
FPU_REG *st0_ptr; |
struct desc_struct code_descriptor; |
|
#ifdef RE_ENTRANT_CHECKING |
if ( emulating ) |
{ |
printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n"); |
} |
RE_ENTRANT_CHECK_ON; |
#endif RE_ENTRANT_CHECKING |
|
if (!current->used_math) |
{ |
int i; |
for ( i = 0; i < 8; i++ ) |
{ |
/* Make sure that the registers are compatible |
with the assumptions of the emulator. */ |
if ( !((regs[i].exp == EXP_UNDER) && (regs[i].sigh == 0) |
&& (regs[i].sigl == 0)) ) |
regs[i].sigh |= 0x80000000; |
} |
finit(); |
current->used_math = 1; |
} |
|
SETUP_DATA_AREA(arg); |
|
FPU_ORIG_EIP = FPU_EIP; |
|
if ( (FPU_EFLAGS & 0x00020000) != 0 ) |
{ |
/* Virtual 8086 mode */ |
addr_modes.default_mode = VM86; |
FPU_EIP += code_base = FPU_CS << 4; |
code_limit = code_base + 0xffff; /* Assumes code_base <= 0xffff0000 */ |
} |
else if ( FPU_CS == USER_CS && FPU_DS == USER_DS ) |
{ |
addr_modes.default_mode = 0; |
} |
else if ( FPU_CS == KERNEL_CS ) |
{ |
printk("math_emulate: %04x:%08lx\n",FPU_CS,FPU_EIP); |
panic("Math emulation needed in kernel"); |
} |
else |
{ |
|
if ( (FPU_CS & 4) != 4 ) /* Must be in the LDT */ |
{ |
/* Can only handle segmented addressing via the LDT |
for now, and it must be 16 bit */ |
printk("FPU emulator: Unsupported addressing mode\n"); |
math_abort(FPU_info, SIGILL); |
} |
|
if ( SEG_D_SIZE(code_descriptor = LDT_DESCRIPTOR(FPU_CS)) ) |
{ |
/* The above test may be wrong, the book is not clear */ |
/* Segmented 32 bit protected mode */ |
addr_modes.default_mode = SEG32; |
} |
else |
{ |
/* 16 bit protected mode */ |
addr_modes.default_mode = PM16; |
} |
FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor); |
code_limit = code_base |
+ (SEG_LIMIT(code_descriptor)+1) * SEG_GRANULARITY(code_descriptor) |
- 1; |
if ( code_limit < code_base ) code_limit = 0xffffffff; |
} |
|
FPU_lookahead = 1; |
if (current->flags & PF_PTRACED) |
FPU_lookahead = 0; |
|
if ( !valid_prefix(&byte1, (unsigned char **)&FPU_EIP, |
&addr_modes.override) ) |
{ |
RE_ENTRANT_CHECK_OFF; |
printk("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n" |
"FPU emulator: self-modifying code! (emulation impossible)\n", |
byte1); |
RE_ENTRANT_CHECK_ON; |
EXCEPTION(EX_INTERNAL|0x126); |
math_abort(FPU_info,SIGILL); |
} |
|
do_another_FPU_instruction: |
|
no_ip_update = 0; |
|
FPU_EIP++; /* We have fetched the prefix and first code bytes. */ |
|
if ( addr_modes.default_mode ) |
{ |
/* This checks for the minimum instruction bytes. |
We also need to check any extra (address mode) code access. */ |
if ( FPU_EIP > code_limit ) |
math_abort(FPU_info,SIGSEGV); |
} |
|
if ( (byte1 & 0xf8) != 0xd8 ) |
{ |
if ( byte1 == FWAIT_OPCODE ) |
{ |
if (partial_status & SW_Summary) |
goto do_the_FPU_interrupt; |
else |
goto FPU_fwait_done; |
} |
#ifdef PARANOID |
EXCEPTION(EX_INTERNAL|0x128); |
math_abort(FPU_info,SIGILL); |
#endif PARANOID |
} |
|
RE_ENTRANT_CHECK_OFF; |
FPU_code_verify_area(1); |
FPU_modrm = get_fs_byte((unsigned char *) FPU_EIP); |
RE_ENTRANT_CHECK_ON; |
FPU_EIP++; |
|
if (partial_status & SW_Summary) |
{ |
/* Ignore the error for now if the current instruction is a no-wait |
control instruction */ |
/* The 80486 manual contradicts itself on this topic, |
but a real 80486 uses the following instructions: |
fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex. |
*/ |
code = (FPU_modrm << 8) | byte1; |
if ( ! ( (((code & 0xf803) == 0xe003) || /* fnclex, fninit, fnstsw */ |
(((code & 0x3003) == 0x3001) && /* fnsave, fnstcw, fnstenv, |
fnstsw */ |
((code & 0xc000) != 0xc000))) ) ) |
{ |
/* |
* We need to simulate the action of the kernel to FPU |
* interrupts here. |
*/ |
do_the_FPU_interrupt: |
FPU_EIP = FPU_ORIG_EIP; /* Point to current FPU instruction. */ |
|
RE_ENTRANT_CHECK_OFF; |
current->tss.trap_no = 16; |
current->tss.error_code = 0; |
send_sig(SIGFPE, current, 1); |
return; |
} |
} |
|
entry_sel_off.offset = FPU_ORIG_EIP; |
entry_sel_off.selector = FPU_CS; |
entry_sel_off.opcode = (byte1 << 8) | FPU_modrm; |
|
FPU_rm = FPU_modrm & 7; |
|
if ( FPU_modrm < 0300 ) |
{ |
/* All of these instructions use the mod/rm byte to get a data address */ |
|
if ( (addr_modes.default_mode & SIXTEEN) |
^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX) ) |
data_address = get_address_16(FPU_modrm, &FPU_EIP, &data_sel_off, |
addr_modes); |
else |
data_address = get_address(FPU_modrm, &FPU_EIP, &data_sel_off, |
addr_modes); |
|
if ( addr_modes.default_mode ) |
{ |
if ( FPU_EIP-1 > code_limit ) |
math_abort(FPU_info,SIGSEGV); |
} |
|
if ( !(byte1 & 1) ) |
{ |
unsigned short status1 = partial_status; |
|
st0_ptr = &st(0); |
st0_tag = st0_ptr->tag; |
|
/* Stack underflow has priority */ |
if ( NOT_EMPTY_ST0 ) |
{ |
if ( addr_modes.default_mode & PROTECTED ) |
{ |
/* This table works for 16 and 32 bit protected mode */ |
if ( access_limit < data_sizes_16[(byte1 >> 1) & 3] ) |
math_abort(FPU_info,SIGSEGV); |
} |
|
unmasked = 0; /* Do this here to stop compiler warnings. */ |
switch ( (byte1 >> 1) & 3 ) |
{ |
case 0: |
unmasked = reg_load_single((float *)data_address, |
&loaded_data); |
break; |
case 1: |
reg_load_int32((long *)data_address, &loaded_data); |
break; |
case 2: |
unmasked = reg_load_double((double *)data_address, |
&loaded_data); |
break; |
case 3: |
reg_load_int16((short *)data_address, &loaded_data); |
break; |
} |
|
/* No more access to user memory, it is safe |
to use static data now */ |
|
/* NaN operands have the next priority. */ |
/* We have to delay looking at st(0) until after |
loading the data, because that data might contain an SNaN */ |
if ( (st0_tag == TW_NaN) || |
(loaded_data.tag == TW_NaN) ) |
{ |
/* Restore the status word; we might have loaded a |
denormal. */ |
partial_status = status1; |
if ( (FPU_modrm & 0x30) == 0x10 ) |
{ |
/* fcom or fcomp */ |
EXCEPTION(EX_Invalid); |
setcc(SW_C3 | SW_C2 | SW_C0); |
if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) ) |
pop(); /* fcomp, masked, so we pop. */ |
} |
else |
{ |
#ifdef PECULIAR_486 |
/* This is not really needed, but gives behaviour |
identical to an 80486 */ |
if ( (FPU_modrm & 0x28) == 0x20 ) |
/* fdiv or fsub */ |
real_2op_NaN(&loaded_data, st0_ptr, |
st0_ptr); |
else |
#endif PECULIAR_486 |
/* fadd, fdivr, fmul, or fsubr */ |
real_2op_NaN(st0_ptr, &loaded_data, |
st0_ptr); |
} |
goto reg_mem_instr_done; |
} |
|
if ( unmasked && !((FPU_modrm & 0x30) == 0x10) ) |
{ |
/* Is not a comparison instruction. */ |
if ( (FPU_modrm & 0x38) == 0x38 ) |
{ |
/* fdivr */ |
if ( (st0_tag == TW_Zero) && |
(loaded_data.tag == TW_Valid) ) |
{ |
if ( divide_by_zero(loaded_data.sign, |
st0_ptr) ) |
{ |
/* We use the fact here that the unmasked |
exception in the loaded data was for a |
denormal operand */ |
/* Restore the state of the denormal op bit */ |
partial_status &= ~SW_Denorm_Op; |
partial_status |= status1 & SW_Denorm_Op; |
} |
} |
} |
goto reg_mem_instr_done; |
} |
|
switch ( (FPU_modrm >> 3) & 7 ) |
{ |
case 0: /* fadd */ |
clear_C1(); |
reg_add(st0_ptr, &loaded_data, st0_ptr, |
control_word); |
break; |
case 1: /* fmul */ |
clear_C1(); |
reg_mul(st0_ptr, &loaded_data, st0_ptr, |
control_word); |
break; |
case 2: /* fcom */ |
compare_st_data(&loaded_data); |
break; |
case 3: /* fcomp */ |
if ( !compare_st_data(&loaded_data) && !unmasked ) |
pop(); |
break; |
case 4: /* fsub */ |
clear_C1(); |
reg_sub(st0_ptr, &loaded_data, st0_ptr, |
control_word); |
break; |
case 5: /* fsubr */ |
clear_C1(); |
reg_sub(&loaded_data, st0_ptr, st0_ptr, |
control_word); |
break; |
case 6: /* fdiv */ |
clear_C1(); |
reg_div(st0_ptr, &loaded_data, st0_ptr, |
control_word); |
break; |
case 7: /* fdivr */ |
clear_C1(); |
if ( st0_tag == TW_Zero ) |
partial_status = status1; /* Undo any denorm tag, |
zero-divide has priority. */ |
reg_div(&loaded_data, st0_ptr, st0_ptr, |
control_word); |
break; |
} |
} |
else |
{ |
if ( (FPU_modrm & 0x30) == 0x10 ) |
{ |
/* The instruction is fcom or fcomp */ |
EXCEPTION(EX_StackUnder); |
setcc(SW_C3 | SW_C2 | SW_C0); |
if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) ) |
pop(); /* fcomp */ |
} |
else |
stack_underflow(); |
} |
reg_mem_instr_done: |
operand_address = data_sel_off; |
} |
else |
{ |
if ( !(no_ip_update = |
load_store_instr(((FPU_modrm & 0x38) | (byte1 & 6)) >> 1, |
addr_modes, data_address)) ) |
{ |
operand_address = data_sel_off; |
} |
} |
|
} |
else |
{ |
/* None of these instructions access user memory */ |
unsigned char instr_index = (FPU_modrm & 0x38) | (byte1 & 7); |
|
#ifdef PECULIAR_486 |
/* This is supposed to be undefined, but a real 80486 seems |
to do this: */ |
operand_address.offset = 0; |
operand_address.selector = FPU_DS; |
#endif PECULIAR_486 |
|
st0_ptr = &st(0); |
st0_tag = st0_ptr->tag; |
switch ( type_table[(int) instr_index] ) |
{ |
case _NONE_: /* also _REGIc: _REGIn */ |
break; |
case _REG0_: |
if ( !NOT_EMPTY_ST0 ) |
{ |
stack_underflow(); |
goto FPU_instruction_done; |
} |
break; |
case _REGIi: |
if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) |
{ |
stack_underflow_i(FPU_rm); |
goto FPU_instruction_done; |
} |
break; |
case _REGIp: |
if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) |
{ |
stack_underflow_pop(FPU_rm); |
goto FPU_instruction_done; |
} |
break; |
case _REGI_: |
if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) |
{ |
stack_underflow(); |
goto FPU_instruction_done; |
} |
break; |
case _PUSH_: /* Only used by the fld st(i) instruction */ |
break; |
case _null_: |
FPU_illegal(); |
goto FPU_instruction_done; |
default: |
EXCEPTION(EX_INTERNAL|0x111); |
goto FPU_instruction_done; |
} |
(*st_instr_table[(int) instr_index])(); |
|
FPU_instruction_done: |
; |
} |
|
if ( ! no_ip_update ) |
instruction_address = entry_sel_off; |
|
FPU_fwait_done: |
|
#ifdef DEBUG |
RE_ENTRANT_CHECK_OFF; |
emu_printall(); |
RE_ENTRANT_CHECK_ON; |
#endif DEBUG |
|
if (FPU_lookahead && !need_resched) |
{ |
FPU_ORIG_EIP = FPU_EIP - code_base; |
if ( valid_prefix(&byte1, (unsigned char **)&FPU_EIP, |
&addr_modes.override) ) |
goto do_another_FPU_instruction; |
} |
|
if ( addr_modes.default_mode ) |
FPU_EIP -= code_base; |
|
RE_ENTRANT_CHECK_OFF; |
} |
|
|
/* Support for prefix bytes is not yet complete. To properly handle |
all prefix bytes, further changes are needed in the emulator code |
which accesses user address space. Access to separate segments is |
important for msdos emulation. */ |
static int valid_prefix(unsigned char *Byte, unsigned char **fpu_eip, |
overrides *override) |
{ |
unsigned char byte; |
unsigned char *ip = *fpu_eip; |
|
*override = (overrides) { 0, 0, PREFIX_DEFAULT }; /* defaults */ |
|
RE_ENTRANT_CHECK_OFF; |
FPU_code_verify_area(1); |
byte = get_fs_byte(ip); |
RE_ENTRANT_CHECK_ON; |
|
while ( 1 ) |
{ |
switch ( byte ) |
{ |
case ADDR_SIZE_PREFIX: |
override->address_size = ADDR_SIZE_PREFIX; |
goto do_next_byte; |
|
case OP_SIZE_PREFIX: |
override->operand_size = OP_SIZE_PREFIX; |
goto do_next_byte; |
|
case PREFIX_CS: |
override->segment = PREFIX_CS_; |
goto do_next_byte; |
case PREFIX_ES: |
override->segment = PREFIX_ES_; |
goto do_next_byte; |
case PREFIX_SS: |
override->segment = PREFIX_SS_; |
goto do_next_byte; |
case PREFIX_FS: |
override->segment = PREFIX_FS_; |
goto do_next_byte; |
case PREFIX_GS: |
override->segment = PREFIX_GS_; |
goto do_next_byte; |
case PREFIX_DS: |
override->segment = PREFIX_DS_; |
goto do_next_byte; |
|
/* lock is not a valid prefix for FPU instructions, |
let the cpu handle it to generate a SIGILL. */ |
/* case PREFIX_LOCK: */ |
|
/* rep.. prefixes have no meaning for FPU instructions */ |
case PREFIX_REPE: |
case PREFIX_REPNE: |
|
do_next_byte: |
ip++; |
RE_ENTRANT_CHECK_OFF; |
FPU_code_verify_area(1); |
byte = get_fs_byte(ip); |
RE_ENTRANT_CHECK_ON; |
break; |
case FWAIT_OPCODE: |
*Byte = byte; |
return 1; |
default: |
if ( (byte & 0xf8) == 0xd8 ) |
{ |
*Byte = byte; |
*fpu_eip = ip; |
return 1; |
} |
else |
{ |
/* Not a valid sequence of prefix bytes followed by |
an FPU instruction. */ |
*Byte = byte; /* Needed for error message. */ |
return 0; |
} |
} |
} |
} |
|
|
void math_abort(struct info * info, unsigned int signal) |
{ |
FPU_EIP = FPU_ORIG_EIP; |
current->tss.trap_no = 16; |
current->tss.error_code = 0; |
send_sig(signal,current,1); |
RE_ENTRANT_CHECK_OFF; |
__asm__("movl %0,%%esp ; ret": :"g" (((long) info)-4)); |
#ifdef PARANOID |
printk("ERROR: wm-FPU-emu math_abort failed!\n"); |
#endif PARANOID |
} |
|
|
|
void restore_i387_soft(struct _fpstate *buf) |
{ |
fpu_addr_modes addr_modes = {{ 0, 0, PREFIX_DEFAULT }, 0}; |
|
frstor(addr_modes, (char *)buf); |
} |
|
|
struct _fpstate * save_i387_soft(struct _fpstate * buf) |
{ |
fpu_addr_modes addr_modes = {{ 0, 0, PREFIX_DEFAULT }, 0}; |
|
fsave(addr_modes, (char *)buf); |
|
return buf; |
} |
/errors.c
0,0 → 1,659
/*---------------------------------------------------------------------------+ |
| errors.c | |
| | |
| The error handling functions for wm-FPU-emu | |
| | |
| Copyright (C) 1992,1993,1994,1996 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | |
| E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
/*---------------------------------------------------------------------------+ |
| Note: | |
| The file contains code which accesses user memory. | |
| Emulator static data may change when user memory is accessed, due to | |
| other processes using the emulator while swapping is in progress. | |
+---------------------------------------------------------------------------*/ |
|
#include <linux/signal.h> |
|
#include <asm/segment.h> |
|
#include "fpu_system.h" |
#include "exception.h" |
#include "fpu_emu.h" |
#include "status_w.h" |
#include "control_w.h" |
#include "reg_constant.h" |
#include "version.h" |
|
/* */ |
#undef PRINT_MESSAGES |
/* */ |
|
|
void Un_impl(void) |
{ |
unsigned char byte1, FPU_modrm; |
unsigned long address = FPU_ORIG_EIP; |
|
RE_ENTRANT_CHECK_OFF; |
/* No need to verify_area(), we have previously fetched these bytes. */ |
printk("Unimplemented FPU Opcode at eip=%p : ", (void *) address); |
if ( FPU_CS == USER_CS ) |
{ |
while ( 1 ) |
{ |
byte1 = get_fs_byte((unsigned char *) address); |
if ( (byte1 & 0xf8) == 0xd8 ) break; |
printk("[%02x]", byte1); |
address++; |
} |
printk("%02x ", byte1); |
FPU_modrm = get_fs_byte(1 + (unsigned char *) address); |
|
if (FPU_modrm >= 0300) |
printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7); |
else |
printk("/%d\n", (FPU_modrm >> 3) & 7); |
} |
else |
{ |
printk("cs selector = %04x\n", FPU_CS); |
} |
|
RE_ENTRANT_CHECK_ON; |
|
EXCEPTION(EX_Invalid); |
|
} |
|
|
/* |
Called for opcodes which are illegal and which are known to result in a |
SIGILL with a real 80486. |
*/ |
void FPU_illegal(void) |
{ |
math_abort(FPU_info,SIGILL); |
} |
|
|
|
void emu_printall(void) |
{ |
int i; |
static const char *tag_desc[] = { "Valid", "Zero", "ERROR", "ERROR", |
"DeNorm", "Inf", "NaN", "Empty" }; |
unsigned char byte1, FPU_modrm; |
unsigned long address = FPU_ORIG_EIP; |
|
RE_ENTRANT_CHECK_OFF; |
/* No need to verify_area(), we have previously fetched these bytes. */ |
printk("At %p:", (void *) address); |
if ( FPU_CS == USER_CS ) |
{ |
#define MAX_PRINTED_BYTES 20 |
for ( i = 0; i < MAX_PRINTED_BYTES; i++ ) |
{ |
byte1 = get_fs_byte((unsigned char *) address); |
if ( (byte1 & 0xf8) == 0xd8 ) |
{ |
printk(" %02x", byte1); |
break; |
} |
printk(" [%02x]", byte1); |
address++; |
} |
if ( i == MAX_PRINTED_BYTES ) |
printk(" [more..]\n"); |
else |
{ |
FPU_modrm = get_fs_byte(1 + (unsigned char *) address); |
|
if (FPU_modrm >= 0300) |
printk(" %02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7); |
else |
printk(" /%d, mod=%d rm=%d\n", |
(FPU_modrm >> 3) & 7, (FPU_modrm >> 6) & 3, FPU_modrm & 7); |
} |
} |
else |
{ |
printk("%04x\n", FPU_CS); |
} |
|
partial_status = status_word(); |
|
#ifdef DEBUGGING |
if ( partial_status & SW_Backward ) printk("SW: backward compatibility\n"); |
if ( partial_status & SW_C3 ) printk("SW: condition bit 3\n"); |
if ( partial_status & SW_C2 ) printk("SW: condition bit 2\n"); |
if ( partial_status & SW_C1 ) printk("SW: condition bit 1\n"); |
if ( partial_status & SW_C0 ) printk("SW: condition bit 0\n"); |
if ( partial_status & SW_Summary ) printk("SW: exception summary\n"); |
if ( partial_status & SW_Stack_Fault ) printk("SW: stack fault\n"); |
if ( partial_status & SW_Precision ) printk("SW: loss of precision\n"); |
if ( partial_status & SW_Underflow ) printk("SW: underflow\n"); |
if ( partial_status & SW_Overflow ) printk("SW: overflow\n"); |
if ( partial_status & SW_Zero_Div ) printk("SW: divide by zero\n"); |
if ( partial_status & SW_Denorm_Op ) printk("SW: denormalized operand\n"); |
if ( partial_status & SW_Invalid ) printk("SW: invalid operation\n"); |
#endif DEBUGGING |
|
printk(" SW: b=%d st=%ld es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n", |
partial_status & 0x8000 ? 1 : 0, /* busy */ |
(partial_status & 0x3800) >> 11, /* stack top pointer */ |
partial_status & 0x80 ? 1 : 0, /* Error summary status */ |
partial_status & 0x40 ? 1 : 0, /* Stack flag */ |
partial_status & SW_C3?1:0, partial_status & SW_C2?1:0, /* cc */ |
partial_status & SW_C1?1:0, partial_status & SW_C0?1:0, /* cc */ |
partial_status & SW_Precision?1:0, partial_status & SW_Underflow?1:0, |
partial_status & SW_Overflow?1:0, partial_status & SW_Zero_Div?1:0, |
partial_status & SW_Denorm_Op?1:0, partial_status & SW_Invalid?1:0); |
|
printk(" CW: ic=%d rc=%ld%ld pc=%ld%ld iem=%d ef=%d%d%d%d%d%d\n", |
control_word & 0x1000 ? 1 : 0, |
(control_word & 0x800) >> 11, (control_word & 0x400) >> 10, |
(control_word & 0x200) >> 9, (control_word & 0x100) >> 8, |
control_word & 0x80 ? 1 : 0, |
control_word & SW_Precision?1:0, control_word & SW_Underflow?1:0, |
control_word & SW_Overflow?1:0, control_word & SW_Zero_Div?1:0, |
control_word & SW_Denorm_Op?1:0, control_word & SW_Invalid?1:0); |
|
for ( i = 0; i < 8; i++ ) |
{ |
FPU_REG *r = &st(i); |
char tagi = r->tag; |
switch (tagi) |
{ |
case TW_Empty: |
continue; |
break; |
case TW_Zero: |
#if 0 |
printk("st(%d) %c .0000 0000 0000 0000 ", |
i, r->sign ? '-' : '+'); |
break; |
#endif |
case TW_Valid: |
case TW_NaN: |
/* case TW_Denormal: */ |
case TW_Infinity: |
printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6ld ", i, |
r->sign ? '-' : '+', |
(long)(r->sigh >> 16), |
(long)(r->sigh & 0xFFFF), |
(long)(r->sigl >> 16), |
(long)(r->sigl & 0xFFFF), |
r->exp - EXP_BIAS + 1); |
break; |
default: |
printk("Whoops! Error in errors.c: tag%d is %d ", i, tagi); |
continue; |
break; |
} |
printk("%s\n", tag_desc[(int) (unsigned) tagi]); |
} |
|
RE_ENTRANT_CHECK_ON; |
|
} |
|
static struct { |
int type; |
const char *name; |
} exception_names[] = { |
{ EX_StackOver, "stack overflow" }, |
{ EX_StackUnder, "stack underflow" }, |
{ EX_Precision, "loss of precision" }, |
{ EX_Underflow, "underflow" }, |
{ EX_Overflow, "overflow" }, |
{ EX_ZeroDiv, "divide by zero" }, |
{ EX_Denormal, "denormalized operand" }, |
{ EX_Invalid, "invalid operation" }, |
{ EX_INTERNAL, "INTERNAL BUG in "FPU_VERSION }, |
{ 0, NULL } |
}; |
|
/* |
EX_INTERNAL is always given with a code which indicates where the |
error was detected. |
|
Internal error types: |
0x14 in fpu_etc.c |
0x1nn in a *.c file: |
0x101 in reg_add_sub.c |
0x102 in reg_mul.c |
0x104 in poly_atan.c |
0x105 in reg_mul.c |
0x107 in fpu_trig.c |
0x108 in reg_compare.c |
0x109 in reg_compare.c |
0x110 in reg_add_sub.c |
0x111 in fpe_entry.c |
0x112 in fpu_trig.c |
0x113 in errors.c |
0x115 in fpu_trig.c |
0x116 in fpu_trig.c |
0x117 in fpu_trig.c |
0x118 in fpu_trig.c |
0x119 in fpu_trig.c |
0x120 in poly_atan.c |
0x121 in reg_compare.c |
0x122 in reg_compare.c |
0x123 in reg_compare.c |
0x125 in fpu_trig.c |
0x126 in fpu_entry.c |
0x127 in poly_2xm1.c |
0x128 in fpu_entry.c |
0x129 in fpu_entry.c |
0x130 in get_address.c |
0x131 in get_address.c |
0x132 in get_address.c |
0x133 in get_address.c |
0x140 in load_store.c |
0x141 in load_store.c |
0x150 in poly_sin.c |
0x151 in poly_sin.c |
0x160 in reg_ld_str.c |
0x161 in reg_ld_str.c |
0x162 in reg_ld_str.c |
0x163 in reg_ld_str.c |
0x2nn in an *.S file: |
0x201 in reg_u_add.S |
0x202 in reg_u_div.S |
0x203 in reg_u_div.S |
0x204 in reg_u_div.S |
0x205 in reg_u_mul.S |
0x206 in reg_u_sub.S |
0x207 in wm_sqrt.S |
0x208 in reg_div.S |
0x209 in reg_u_sub.S |
0x210 in reg_u_sub.S |
0x211 in reg_u_sub.S |
0x212 in reg_u_sub.S |
0x213 in wm_sqrt.S |
0x214 in wm_sqrt.S |
0x215 in wm_sqrt.S |
0x220 in reg_norm.S |
0x221 in reg_norm.S |
0x230 in reg_round.S |
0x231 in reg_round.S |
0x232 in reg_round.S |
0x233 in reg_round.S |
0x234 in reg_round.S |
0x235 in reg_round.S |
0x236 in reg_round.S |
0x240 in div_Xsig.S |
0x241 in div_Xsig.S |
0x242 in div_Xsig.S |
*/ |
|
void exception(int n) |
{ |
int i, int_type; |
|
int_type = 0; /* Needed only to stop compiler warnings */ |
if ( n & EX_INTERNAL ) |
{ |
int_type = n - EX_INTERNAL; |
n = EX_INTERNAL; |
/* Set lots of exception bits! */ |
partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward); |
} |
else |
{ |
/* Extract only the bits which we use to set the status word */ |
n &= (SW_Exc_Mask); |
/* Set the corresponding exception bit */ |
partial_status |= n; |
/* Set summary bits iff exception isn't masked */ |
if ( partial_status & ~control_word & CW_Exceptions ) |
partial_status |= (SW_Summary | SW_Backward); |
if ( n & (SW_Stack_Fault | EX_Precision) ) |
{ |
if ( !(n & SW_C1) ) |
/* This bit distinguishes over- from underflow for a stack fault, |
and roundup from round-down for precision loss. */ |
partial_status &= ~SW_C1; |
} |
} |
|
RE_ENTRANT_CHECK_OFF; |
if ( (~control_word & n & CW_Exceptions) || (n == EX_INTERNAL) ) |
{ |
#ifdef PRINT_MESSAGES |
/* My message from the sponsor */ |
printk(FPU_VERSION" "__DATE__" (C) W. Metzenthen.\n"); |
#endif PRINT_MESSAGES |
|
/* Get a name string for error reporting */ |
for (i=0; exception_names[i].type; i++) |
if ( (exception_names[i].type & n) == exception_names[i].type ) |
break; |
|
if (exception_names[i].type) |
{ |
#ifdef PRINT_MESSAGES |
printk("FP Exception: %s!\n", exception_names[i].name); |
#endif PRINT_MESSAGES |
} |
else |
printk("FPU emulator: Unknown Exception: 0x%04x!\n", n); |
|
if ( n == EX_INTERNAL ) |
{ |
printk("FPU emulator: Internal error type 0x%04x\n", int_type); |
emu_printall(); |
} |
#ifdef PRINT_MESSAGES |
else |
emu_printall(); |
#endif PRINT_MESSAGES |
|
/* |
* The 80486 generates an interrupt on the next non-control FPU |
* instruction. So we need some means of flagging it. |
* We use the ES (Error Summary) bit for this. |
*/ |
} |
RE_ENTRANT_CHECK_ON; |
|
#ifdef __DEBUG__ |
math_abort(FPU_info,SIGFPE); |
#endif __DEBUG__ |
|
} |
|
|
/* Real operation attempted on two operands, one a NaN. */ |
/* Returns nz if the exception is unmasked */ |
asmlinkage int real_2op_NaN(FPU_REG const *a, FPU_REG const *b, FPU_REG *dest) |
{ |
FPU_REG const *x; |
int signalling; |
|
/* The default result for the case of two "equal" NaNs (signs may |
differ) is chosen to reproduce 80486 behaviour */ |
x = a; |
if (a->tag == TW_NaN) |
{ |
if (b->tag == TW_NaN) |
{ |
signalling = !(a->sigh & b->sigh & 0x40000000); |
/* find the "larger" */ |
if ( significand(a) < significand(b) ) |
x = b; |
} |
else |
{ |
/* return the quiet version of the NaN in a */ |
signalling = !(a->sigh & 0x40000000); |
} |
} |
else |
#ifdef PARANOID |
if (b->tag == TW_NaN) |
#endif PARANOID |
{ |
signalling = !(b->sigh & 0x40000000); |
x = b; |
} |
#ifdef PARANOID |
else |
{ |
signalling = 0; |
EXCEPTION(EX_INTERNAL|0x113); |
x = &CONST_QNaN; |
} |
#endif PARANOID |
|
if ( !signalling ) |
{ |
if ( !(x->sigh & 0x80000000) ) /* pseudo-NaN ? */ |
x = &CONST_QNaN; |
reg_move(x, dest); |
return 0; |
} |
|
if ( control_word & CW_Invalid ) |
{ |
/* The masked response */ |
if ( !(x->sigh & 0x80000000) ) /* pseudo-NaN ? */ |
x = &CONST_QNaN; |
reg_move(x, dest); |
/* ensure a Quiet NaN */ |
dest->sigh |= 0x40000000; |
} |
|
EXCEPTION(EX_Invalid); |
|
return !(control_word & CW_Invalid); |
} |
|
|
/* Invalid arith operation on Valid registers */ |
/* Returns nz if the exception is unmasked */ |
asmlinkage int arith_invalid(FPU_REG *dest) |
{ |
|
EXCEPTION(EX_Invalid); |
|
if ( control_word & CW_Invalid ) |
{ |
/* The masked response */ |
reg_move(&CONST_QNaN, dest); |
} |
|
return !(control_word & CW_Invalid); |
|
} |
|
|
/* Divide a finite number by zero */ |
asmlinkage int divide_by_zero(int sign, FPU_REG *dest) |
{ |
|
if ( control_word & CW_ZeroDiv ) |
{ |
/* The masked response */ |
reg_move(&CONST_INF, dest); |
dest->sign = (unsigned char)sign; |
} |
|
EXCEPTION(EX_ZeroDiv); |
|
return !(control_word & CW_ZeroDiv); |
|
} |
|
|
/* This may be called often, so keep it lean */ |
int set_precision_flag(int flags) |
{ |
if ( control_word & CW_Precision ) |
{ |
partial_status &= ~(SW_C1 & flags); |
partial_status |= flags; /* The masked response */ |
return 0; |
} |
else |
{ |
exception(flags); |
return 1; |
} |
} |
|
|
/* This may be called often, so keep it lean */ |
asmlinkage void set_precision_flag_up(void) |
{ |
if ( control_word & CW_Precision ) |
partial_status |= (SW_Precision | SW_C1); /* The masked response */ |
else |
exception(EX_Precision | SW_C1); |
|
} |
|
|
/* This may be called often, so keep it lean */ |
asmlinkage void set_precision_flag_down(void) |
{ |
if ( control_word & CW_Precision ) |
{ /* The masked response */ |
partial_status &= ~SW_C1; |
partial_status |= SW_Precision; |
} |
else |
exception(EX_Precision); |
} |
|
|
asmlinkage int denormal_operand(void) |
{ |
if ( control_word & CW_Denormal ) |
{ /* The masked response */ |
partial_status |= SW_Denorm_Op; |
return 0; |
} |
else |
{ |
exception(EX_Denormal); |
return 1; |
} |
} |
|
|
asmlinkage int arith_overflow(FPU_REG *dest) |
{ |
|
if ( control_word & CW_Overflow ) |
{ |
char sign; |
/* The masked response */ |
/* ###### The response here depends upon the rounding mode */ |
sign = dest->sign; |
reg_move(&CONST_INF, dest); |
dest->sign = sign; |
} |
else |
{ |
/* Subtract the magic number from the exponent */ |
dest->exp -= (3 * (1 << 13)); |
} |
|
EXCEPTION(EX_Overflow); |
if ( control_word & CW_Overflow ) |
{ |
/* The overflow exception is masked. */ |
/* By definition, precision is lost. |
The roundup bit (C1) is also set because we have |
"rounded" upwards to Infinity. */ |
EXCEPTION(EX_Precision | SW_C1); |
return !(control_word & CW_Precision); |
} |
|
return 0; |
|
} |
|
|
asmlinkage int arith_underflow(FPU_REG *dest) |
{ |
|
if ( control_word & CW_Underflow ) |
{ |
/* The masked response */ |
if ( dest->exp <= EXP_UNDER - 63 ) |
{ |
reg_move(&CONST_Z, dest); |
partial_status &= ~SW_C1; /* Round down. */ |
} |
} |
else |
{ |
/* Add the magic number to the exponent. */ |
dest->exp += (3 * (1 << 13)); |
} |
|
EXCEPTION(EX_Underflow); |
if ( control_word & CW_Underflow ) |
{ |
/* The underflow exception is masked. */ |
EXCEPTION(EX_Precision); |
return !(control_word & CW_Precision); |
} |
|
return 0; |
|
} |
|
|
void stack_overflow(void) |
{ |
|
if ( control_word & CW_Invalid ) |
{ |
/* The masked response */ |
top--; |
reg_move(&CONST_QNaN, &st(0)); |
} |
|
EXCEPTION(EX_StackOver); |
|
return; |
|
} |
|
|
void stack_underflow(void) |
{ |
|
if ( control_word & CW_Invalid ) |
{ |
/* The masked response */ |
reg_move(&CONST_QNaN, &st(0)); |
} |
|
EXCEPTION(EX_StackUnder); |
|
return; |
|
} |
|
|
void stack_underflow_i(int i) |
{ |
|
if ( control_word & CW_Invalid ) |
{ |
/* The masked response */ |
reg_move(&CONST_QNaN, &(st(i))); |
} |
|
EXCEPTION(EX_StackUnder); |
|
return; |
|
} |
|
|
void stack_underflow_pop(int i) |
{ |
|
if ( control_word & CW_Invalid ) |
{ |
/* The masked response */ |
reg_move(&CONST_QNaN, &(st(i))); |
pop(); |
} |
|
EXCEPTION(EX_StackUnder); |
|
return; |
|
} |
|
/wm_sqrt.S
0,0 → 1,471
.file "wm_sqrt.S" |
/*---------------------------------------------------------------------------+ |
| wm_sqrt.S | |
| | |
| Fixed point arithmetic square root evaluation. | |
| | |
| Copyright (C) 1992,1993,1995 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| Call from C as: | |
| void wm_sqrt(FPU_REG *n, unsigned int control_word) | |
| | |
+---------------------------------------------------------------------------*/ |
|
/*---------------------------------------------------------------------------+ |
| wm_sqrt(FPU_REG *n, unsigned int control_word) | |
| returns the square root of n in n. | |
| | |
| Use Newton's method to compute the square root of a number, which must | |
| be in the range [1.0 .. 4.0), to 64 bits accuracy. | |
| Does not check the sign or tag of the argument. | |
| Sets the exponent, but not the sign or tag of the result. | |
| | |
| The guess is kept in %esi:%edi | |
+---------------------------------------------------------------------------*/ |
|
#include "exception.h" |
#include "fpu_emu.h" |
|
|
#ifndef NON_REENTRANT_FPU |
/* Local storage on the stack: */ |
#define FPU_accum_3 -4(%ebp) /* ms word */ |
#define FPU_accum_2 -8(%ebp) |
#define FPU_accum_1 -12(%ebp) |
#define FPU_accum_0 -16(%ebp) |
|
/* |
* The de-normalised argument: |
* sq_2 sq_1 sq_0 |
* b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 |
* ^ binary point here |
*/ |
#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */ |
#define FPU_fsqrt_arg_1 -24(%ebp) |
#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */ |
|
#else |
/* Local storage in a static area: */ |
.data |
.align 4,0 |
FPU_accum_3: |
.long 0 /* ms word */ |
FPU_accum_2: |
.long 0 |
FPU_accum_1: |
.long 0 |
FPU_accum_0: |
.long 0 |
|
/* The de-normalised argument: |
sq_2 sq_1 sq_0 |
b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 |
^ binary point here |
*/ |
FPU_fsqrt_arg_2: |
.long 0 /* ms word */ |
FPU_fsqrt_arg_1: |
.long 0 |
FPU_fsqrt_arg_0: |
.long 0 /* ls word, at most the ms bit is set */ |
#endif NON_REENTRANT_FPU |
|
|
.text |
ENTRY(wm_sqrt) |
pushl %ebp |
movl %esp,%ebp |
#ifndef NON_REENTRANT_FPU |
subl $28,%esp |
#endif NON_REENTRANT_FPU |
pushl %esi |
pushl %edi |
pushl %ebx |
|
movl PARAM1,%esi |
|
movl SIGH(%esi),%eax |
movl SIGL(%esi),%ecx |
xorl %edx,%edx |
|
/* We use a rough linear estimate for the first guess.. */ |
|
cmpl EXP_BIAS,EXP(%esi) |
jnz sqrt_arg_ge_2 |
|
shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ |
rcrl $1,%ecx |
rcrl $1,%edx |
|
sqrt_arg_ge_2: |
/* From here on, n is never accessed directly again until it is |
replaced by the answer. */ |
|
movl %eax,FPU_fsqrt_arg_2 /* ms word of n */ |
movl %ecx,FPU_fsqrt_arg_1 |
movl %edx,FPU_fsqrt_arg_0 |
|
/* Make a linear first estimate */ |
shrl $1,%eax |
addl $0x40000000,%eax |
movl $0xaaaaaaaa,%ecx |
mull %ecx |
shll %edx /* max result was 7fff... */ |
testl $0x80000000,%edx /* but min was 3fff... */ |
jnz sqrt_prelim_no_adjust |
|
movl $0x80000000,%edx /* round up */ |
|
sqrt_prelim_no_adjust: |
movl %edx,%esi /* Our first guess */ |
|
/* We have now computed (approx) (2 + x) / 3, which forms the basis |
for a few iterations of Newton's method */ |
|
movl FPU_fsqrt_arg_2,%ecx /* ms word */ |
|
/* |
* From our initial estimate, three iterations are enough to get us |
* to 30 bits or so. This will then allow two iterations at better |
* precision to complete the process. |
*/ |
|
/* Compute (g + n/g)/2 at each iteration (g is the guess). */ |
shrl %ecx /* Doing this first will prevent a divide */ |
/* overflow later. */ |
|
movl %ecx,%edx /* msw of the arg / 2 */ |
divl %esi /* current estimate */ |
shrl %esi /* divide by 2 */ |
addl %eax,%esi /* the new estimate */ |
|
movl %ecx,%edx |
divl %esi |
shrl %esi |
addl %eax,%esi |
|
movl %ecx,%edx |
divl %esi |
shrl %esi |
addl %eax,%esi |
|
/* |
* Now that an estimate accurate to about 30 bits has been obtained (in %esi), |
* we improve it to 60 bits or so. |
* |
* The strategy from now on is to compute new estimates from |
* guess := guess + (n - guess^2) / (2 * guess) |
*/ |
|
/* First, find the square of the guess */ |
movl %esi,%eax |
mull %esi |
/* guess^2 now in %edx:%eax */ |
|
movl FPU_fsqrt_arg_1,%ecx |
subl %ecx,%eax |
movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */ |
sbbl %ecx,%edx |
jnc sqrt_stage_2_positive |
|
/* Subtraction gives a negative result, |
negate the result before division. */ |
notl %edx |
notl %eax |
addl $1,%eax |
adcl $0,%edx |
|
divl %esi |
movl %eax,%ecx |
|
movl %edx,%eax |
divl %esi |
jmp sqrt_stage_2_finish |
|
sqrt_stage_2_positive: |
divl %esi |
movl %eax,%ecx |
|
movl %edx,%eax |
divl %esi |
|
notl %ecx |
notl %eax |
addl $1,%eax |
adcl $0,%ecx |
|
sqrt_stage_2_finish: |
sarl $1,%ecx /* divide by 2 */ |
rcrl $1,%eax |
|
/* Form the new estimate in %esi:%edi */ |
movl %eax,%edi |
addl %ecx,%esi |
|
jnz sqrt_stage_2_done /* result should be [1..2) */ |
|
#ifdef PARANOID |
/* It should be possible to get here only if the arg is ffff....ffff */ |
cmp $0xffffffff,FPU_fsqrt_arg_1 |
jnz sqrt_stage_2_error |
#endif PARANOID |
|
/* The best rounded result. */ |
xorl %eax,%eax |
decl %eax |
movl %eax,%edi |
movl %eax,%esi |
movl $0x7fffffff,%eax |
jmp sqrt_round_result |
|
#ifdef PARANOID |
sqrt_stage_2_error: |
pushl EX_INTERNAL|0x213 |
call EXCEPTION |
#endif PARANOID |
|
sqrt_stage_2_done: |
|
/* Now the square root has been computed to better than 60 bits. */ |
|
/* Find the square of the guess. */ |
movl %edi,%eax /* ls word of guess */ |
mull %edi |
movl %edx,FPU_accum_1 |
|
movl %esi,%eax |
mull %esi |
movl %edx,FPU_accum_3 |
movl %eax,FPU_accum_2 |
|
movl %edi,%eax |
mull %esi |
addl %eax,FPU_accum_1 |
adcl %edx,FPU_accum_2 |
adcl $0,FPU_accum_3 |
|
/* movl %esi,%eax */ |
/* mull %edi */ |
addl %eax,FPU_accum_1 |
adcl %edx,FPU_accum_2 |
adcl $0,FPU_accum_3 |
|
/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ |
|
movl FPU_fsqrt_arg_0,%eax /* get normalized n */ |
subl %eax,FPU_accum_1 |
movl FPU_fsqrt_arg_1,%eax |
sbbl %eax,FPU_accum_2 |
movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */ |
sbbl %eax,FPU_accum_3 |
jnc sqrt_stage_3_positive |
|
/* Subtraction gives a negative result, |
negate the result before division */ |
notl FPU_accum_1 |
notl FPU_accum_2 |
notl FPU_accum_3 |
addl $1,FPU_accum_1 |
adcl $0,FPU_accum_2 |
|
#ifdef PARANOID |
adcl $0,FPU_accum_3 /* This must be zero */ |
jz sqrt_stage_3_no_error |
|
sqrt_stage_3_error: |
pushl EX_INTERNAL|0x207 |
call EXCEPTION |
|
sqrt_stage_3_no_error: |
#endif PARANOID |
|
movl FPU_accum_2,%edx |
movl FPU_accum_1,%eax |
divl %esi |
movl %eax,%ecx |
|
movl %edx,%eax |
divl %esi |
|
sarl $1,%ecx /* divide by 2 */ |
rcrl $1,%eax |
|
/* prepare to round the result */ |
|
addl %ecx,%edi |
adcl $0,%esi |
|
jmp sqrt_stage_3_finished |
|
sqrt_stage_3_positive: |
movl FPU_accum_2,%edx |
movl FPU_accum_1,%eax |
divl %esi |
movl %eax,%ecx |
|
movl %edx,%eax |
divl %esi |
|
sarl $1,%ecx /* divide by 2 */ |
rcrl $1,%eax |
|
/* prepare to round the result */ |
|
notl %eax /* Negate the correction term */ |
notl %ecx |
addl $1,%eax |
adcl $0,%ecx /* carry here ==> correction == 0 */ |
adcl $0xffffffff,%esi |
|
addl %ecx,%edi |
adcl $0,%esi |
|
sqrt_stage_3_finished: |
|
/* |
* The result in %esi:%edi:%esi should be good to about 90 bits here, |
* and the rounding information here does not have sufficient accuracy |
* in a few rare cases. |
*/ |
cmpl $0xffffffe0,%eax |
ja sqrt_near_exact_x |
|
cmpl $0x00000020,%eax |
jb sqrt_near_exact |
|
cmpl $0x7fffffe0,%eax |
jb sqrt_round_result |
|
cmpl $0x80000020,%eax |
jb sqrt_get_more_precision |
|
sqrt_round_result: |
/* Set up for rounding operations */ |
movl %eax,%edx |
movl %esi,%eax |
movl %edi,%ebx |
movl PARAM1,%edi |
movl EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */ |
movl PARAM2,%ecx |
jmp fpu_reg_round_sqrt |
|
|
sqrt_near_exact_x: |
/* First, the estimate must be rounded up. */ |
addl $1,%edi |
adcl $0,%esi |
|
sqrt_near_exact: |
/* |
* This is an easy case because x^1/2 is monotonic. |
* We need just find the square of our estimate, compare it |
* with the argument, and deduce whether our estimate is |
* above, below, or exact. We use the fact that the estimate |
* is known to be accurate to about 90 bits. |
*/ |
movl %edi,%eax /* ls word of guess */ |
mull %edi |
movl %edx,%ebx /* 2nd ls word of square */ |
movl %eax,%ecx /* ls word of square */ |
|
movl %edi,%eax |
mull %esi |
addl %eax,%ebx |
addl %eax,%ebx |
|
#ifdef PARANOID |
cmp $0xffffffb0,%ebx |
jb sqrt_near_exact_ok |
|
cmp $0x00000050,%ebx |
ja sqrt_near_exact_ok |
|
pushl EX_INTERNAL|0x214 |
call EXCEPTION |
|
sqrt_near_exact_ok: |
#endif PARANOID |
|
or %ebx,%ebx |
js sqrt_near_exact_small |
|
jnz sqrt_near_exact_large |
|
or %ebx,%edx |
jnz sqrt_near_exact_large |
|
/* Our estimate is exactly the right answer */ |
xorl %eax,%eax |
jmp sqrt_round_result |
|
sqrt_near_exact_small: |
/* Our estimate is too small */ |
movl $0x000000ff,%eax |
jmp sqrt_round_result |
|
sqrt_near_exact_large: |
/* Our estimate is too large, we need to decrement it */ |
subl $1,%edi |
sbbl $0,%esi |
movl $0xffffff00,%eax |
jmp sqrt_round_result |
|
|
sqrt_get_more_precision: |
/* This case is almost the same as the above, except we start |
with an extra bit of precision in the estimate. */ |
stc /* The extra bit. */ |
rcll $1,%edi /* Shift the estimate left one bit */ |
rcll $1,%esi |
|
movl %edi,%eax /* ls word of guess */ |
mull %edi |
movl %edx,%ebx /* 2nd ls word of square */ |
movl %eax,%ecx /* ls word of square */ |
|
movl %edi,%eax |
mull %esi |
addl %eax,%ebx |
addl %eax,%ebx |
|
/* Put our estimate back to its original value */ |
stc /* The ms bit. */ |
rcrl $1,%esi /* Shift the estimate left one bit */ |
rcrl $1,%edi |
|
#ifdef PARANOID |
cmp $0xffffff60,%ebx |
jb sqrt_more_prec_ok |
|
cmp $0x000000a0,%ebx |
ja sqrt_more_prec_ok |
|
pushl EX_INTERNAL|0x215 |
call EXCEPTION |
|
sqrt_more_prec_ok: |
#endif PARANOID |
|
or %ebx,%ebx |
js sqrt_more_prec_small |
|
jnz sqrt_more_prec_large |
|
or %ebx,%ecx |
jnz sqrt_more_prec_large |
|
/* Our estimate is exactly the right answer */ |
movl $0x80000000,%eax |
jmp sqrt_round_result |
|
sqrt_more_prec_small: |
/* Our estimate is too small */ |
movl $0x800000ff,%eax |
jmp sqrt_round_result |
|
sqrt_more_prec_large: |
/* Our estimate is too large */ |
movl $0x7fffff00,%eax |
jmp sqrt_round_result |
/status_w.h
0,0 → 1,65
/*---------------------------------------------------------------------------+ |
| status_w.h | |
| | |
| Copyright (C) 1992,1993 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
+---------------------------------------------------------------------------*/ |
|
#ifndef _STATUS_H_ |
#define _STATUS_H_ |
|
#include "fpu_emu.h" /* for definition of PECULIAR_486 */ |
|
#ifdef __ASSEMBLY__ |
#define Const__(x) $##x |
#else |
#define Const__(x) x |
#endif |
|
#define SW_Backward Const__(0x8000) /* backward compatibility */ |
#define SW_C3 Const__(0x4000) /* condition bit 3 */ |
#define SW_Top Const__(0x3800) /* top of stack */ |
#define SW_Top_Shift Const__(11) /* shift for top of stack bits */ |
#define SW_C2 Const__(0x0400) /* condition bit 2 */ |
#define SW_C1 Const__(0x0200) /* condition bit 1 */ |
#define SW_C0 Const__(0x0100) /* condition bit 0 */ |
#define SW_Summary Const__(0x0080) /* exception summary */ |
#define SW_Stack_Fault Const__(0x0040) /* stack fault */ |
#define SW_Precision Const__(0x0020) /* loss of precision */ |
#define SW_Underflow Const__(0x0010) /* underflow */ |
#define SW_Overflow Const__(0x0008) /* overflow */ |
#define SW_Zero_Div Const__(0x0004) /* divide by zero */ |
#define SW_Denorm_Op Const__(0x0002) /* denormalized operand */ |
#define SW_Invalid Const__(0x0001) /* invalid operation */ |
|
#define SW_Exc_Mask Const__(0x27f) /* Status word exception bit mask */ |
|
#ifndef __ASSEMBLY__ |
|
#define COMP_A_gt_B 1 |
#define COMP_A_eq_B 2 |
#define COMP_A_lt_B 3 |
#define COMP_No_Comp 4 |
#define COMP_Denormal 0x20 |
#define COMP_NaN 0x40 |
#define COMP_SNaN 0x80 |
|
#define status_word() \ |
((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top)) |
#define setcc(cc) ({ \ |
partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3); \ |
partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3); }) |
|
#ifdef PECULIAR_486 |
/* Default, this conveys no information, but an 80486 does it. */ |
/* Clear the SW_C1 bit, "other bits undefined". */ |
# define clear_C1() { partial_status &= ~SW_C1; } |
# else |
# define clear_C1() |
#endif PECULIAR_486 |
|
#endif __ASSEMBLY__ |
|
#endif _STATUS_H_ |
/poly_l2.c
0,0 → 1,255
/*---------------------------------------------------------------------------+ |
| poly_l2.c | |
| | |
| Compute the base 2 log of a FPU_REG, using a polynomial approximation. | |
| | |
| Copyright (C) 1992,1993,1994 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
|
#include "exception.h" |
#include "reg_constant.h" |
#include "fpu_emu.h" |
#include "control_w.h" |
#include "poly.h" |
|
|
|
static void log2_kernel(FPU_REG const *arg, |
Xsig *accum_result, long int *expon); |
|
|
/*--- poly_l2() -------------------------------------------------------------+ |
| Base 2 logarithm by a polynomial approximation. | |
+---------------------------------------------------------------------------*/ |
void poly_l2(FPU_REG const *arg, FPU_REG const *y, FPU_REG *result) |
{ |
long int exponent, expon, expon_expon; |
Xsig accumulator, expon_accum, yaccum; |
char sign; |
FPU_REG x; |
|
|
exponent = arg->exp - EXP_BIAS; |
|
/* From arg, make a number > sqrt(2)/2 and < sqrt(2) */ |
if ( arg->sigh > (unsigned)0xb504f334 ) |
{ |
/* Treat as sqrt(2)/2 < arg < 1 */ |
significand(&x) = - significand(arg); |
x.sign = SIGN_NEG; |
x.tag = TW_Valid; |
x.exp = EXP_BIAS-1; |
exponent++; |
normalize(&x); |
} |
else |
{ |
/* Treat as 1 <= arg < sqrt(2) */ |
x.sigh = arg->sigh - 0x80000000; |
x.sigl = arg->sigl; |
x.sign = SIGN_POS; |
x.tag = TW_Valid; |
x.exp = EXP_BIAS; |
normalize(&x); |
} |
|
if ( x.tag == TW_Zero ) |
{ |
expon = 0; |
accumulator.msw = accumulator.midw = accumulator.lsw = 0; |
} |
else |
{ |
log2_kernel(&x, &accumulator, &expon); |
} |
|
sign = exponent < 0; |
if ( sign ) exponent = -exponent; |
expon_accum.msw = exponent; expon_accum.midw = expon_accum.lsw = 0; |
if ( exponent ) |
{ |
expon_expon = 31 + norm_Xsig(&expon_accum); |
shr_Xsig(&accumulator, expon_expon - expon); |
|
if ( sign ^ (x.sign == SIGN_NEG) ) |
negate_Xsig(&accumulator); |
add_Xsig_Xsig(&accumulator, &expon_accum); |
} |
else |
{ |
expon_expon = expon; |
sign = x.sign; |
} |
|
yaccum.lsw = 0; XSIG_LL(yaccum) = significand(y); |
mul_Xsig_Xsig(&accumulator, &yaccum); |
|
expon_expon += round_Xsig(&accumulator); |
|
if ( accumulator.msw == 0 ) |
{ |
reg_move(&CONST_Z, y); |
} |
else |
{ |
result->exp = expon_expon + y->exp + 1; |
significand(result) = XSIG_LL(accumulator); |
result->tag = TW_Valid; /* set the tags to Valid */ |
result->sign = sign ^ y->sign; |
} |
|
return; |
} |
|
|
/*--- poly_l2p1() -----------------------------------------------------------+ |
| Base 2 logarithm by a polynomial approximation. | |
| log2(x+1) | |
+---------------------------------------------------------------------------*/ |
int poly_l2p1(FPU_REG const *arg, FPU_REG const *y, FPU_REG *result) |
{ |
char sign; |
long int exponent; |
Xsig accumulator, yaccum; |
|
|
sign = arg->sign; |
|
if ( arg->exp < EXP_BIAS ) |
{ |
log2_kernel(arg, &accumulator, &exponent); |
|
yaccum.lsw = 0; |
XSIG_LL(yaccum) = significand(y); |
mul_Xsig_Xsig(&accumulator, &yaccum); |
|
exponent += round_Xsig(&accumulator); |
|
result->exp = exponent + y->exp + 1; |
significand(result) = XSIG_LL(accumulator); |
result->tag = TW_Valid; /* set the tags to Valid */ |
result->sign = sign ^ y->sign; |
|
return 0; |
} |
else |
{ |
/* The magnitude of arg is far too large. */ |
reg_move(y, result); |
if ( sign != SIGN_POS ) |
{ |
/* Trying to get the log of a negative number. */ |
return 1; |
} |
else |
{ |
return 0; |
} |
} |
|
} |
|
|
|
|
#undef HIPOWER |
#define HIPOWER 10 |
static const unsigned long long logterms[HIPOWER] = |
{ |
0x2a8eca5705fc2ef0LL, |
0xf6384ee1d01febceLL, |
0x093bb62877cdf642LL, |
0x006985d8a9ec439bLL, |
0x0005212c4f55a9c8LL, |
0x00004326a16927f0LL, |
0x0000038d1d80a0e7LL, |
0x0000003141cc80c6LL, |
0x00000002b1668c9fLL, |
0x000000002c7a46aaLL |
}; |
|
static const unsigned long leadterm = 0xb8000000; |
|
|
/*--- log2_kernel() ---------------------------------------------------------+ |
| Base 2 logarithm by a polynomial approximation. | |
| log2(x+1) | |
+---------------------------------------------------------------------------*/ |
static void log2_kernel(FPU_REG const *arg, Xsig *accum_result, |
long int *expon) |
{ |
char sign; |
long int exponent, adj; |
unsigned long long Xsq; |
Xsig accumulator, Numer, Denom, argSignif, arg_signif; |
|
sign = arg->sign; |
|
exponent = arg->exp - EXP_BIAS; |
Numer.lsw = Denom.lsw = 0; |
XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg); |
if ( sign == SIGN_POS ) |
{ |
shr_Xsig(&Denom, 2 - (1 + exponent)); |
Denom.msw |= 0x80000000; |
div_Xsig(&Numer, &Denom, &argSignif); |
} |
else |
{ |
shr_Xsig(&Denom, 1 - (1 + exponent)); |
negate_Xsig(&Denom); |
if ( Denom.msw & 0x80000000 ) |
{ |
div_Xsig(&Numer, &Denom, &argSignif); |
exponent ++; |
} |
else |
{ |
/* Denom must be 1.0 */ |
argSignif.lsw = Numer.lsw; argSignif.midw = Numer.midw; |
argSignif.msw = Numer.msw; |
} |
} |
|
#ifndef PECULIAR_486 |
/* Should check here that |local_arg| is within the valid range */ |
if ( exponent >= -2 ) |
{ |
if ( (exponent > -2) || |
(argSignif.msw > (unsigned)0xafb0ccc0) ) |
{ |
/* The argument is too large */ |
} |
} |
#endif PECULIAR_486 |
|
arg_signif.lsw = argSignif.lsw; XSIG_LL(arg_signif) = XSIG_LL(argSignif); |
adj = norm_Xsig(&argSignif); |
accumulator.lsw = argSignif.lsw; XSIG_LL(accumulator) = XSIG_LL(argSignif); |
mul_Xsig_Xsig(&accumulator, &accumulator); |
shr_Xsig(&accumulator, 2*(-1 - (1 + exponent + adj))); |
Xsq = XSIG_LL(accumulator); |
if ( accumulator.lsw & 0x80000000 ) |
Xsq++; |
|
accumulator.msw = accumulator.midw = accumulator.lsw = 0; |
/* Do the basic fixed point polynomial evaluation */ |
polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER-1); |
|
mul_Xsig_Xsig(&accumulator, &argSignif); |
shr_Xsig(&accumulator, 6 - adj); |
|
mul32_Xsig(&arg_signif, leadterm); |
add_two_Xsig(&accumulator, &arg_signif, &exponent); |
|
*expon = exponent + 1; |
accum_result->lsw = accumulator.lsw; |
accum_result->midw = accumulator.midw; |
accum_result->msw = accumulator.msw; |
|
} |
/fpu_proto.h
0,0 → 1,137
/* errors.c */ |
extern void Un_impl(void); |
extern void FPU_illegal(void); |
extern void emu_printall(void); |
extern void stack_overflow(void); |
extern void stack_underflow(void); |
extern void stack_underflow_i(int i); |
extern void stack_underflow_pop(int i); |
extern int set_precision_flag(int flags); |
asmlinkage void exception(int n); |
asmlinkage int real_2op_NaN(FPU_REG const *a, FPU_REG const *b, FPU_REG *dest); |
asmlinkage int arith_invalid(FPU_REG *dest); |
asmlinkage int divide_by_zero(int sign, FPU_REG *dest); |
asmlinkage void set_precision_flag_up(void); |
asmlinkage void set_precision_flag_down(void); |
asmlinkage int denormal_operand(void); |
asmlinkage int arith_overflow(FPU_REG *dest); |
asmlinkage int arith_underflow(FPU_REG *dest); |
|
/* fpu_arith.c */ |
extern void fadd__(void); |
extern void fmul__(void); |
extern void fsub__(void); |
extern void fsubr_(void); |
extern void fdiv__(void); |
extern void fdivr_(void); |
extern void fadd_i(void); |
extern void fmul_i(void); |
extern void fsubri(void); |
extern void fsub_i(void); |
extern void fdivri(void); |
extern void fdiv_i(void); |
extern void faddp_(void); |
extern void fmulp_(void); |
extern void fsubrp(void); |
extern void fsubp_(void); |
extern void fdivrp(void); |
extern void fdivp_(void); |
|
/* fpu_aux.c */ |
extern void fclex(void); |
extern void finit(void); |
extern void finit_(void); |
extern void fstsw_(void); |
extern void fp_nop(void); |
extern void fld_i_(void); |
extern void fxch_i(void); |
extern void ffree_(void); |
extern void ffreep(void); |
extern void fst_i_(void); |
extern void fstp_i(void); |
|
/* fpu_entry.c */ |
asmlinkage void math_emulate(long arg); |
extern void math_abort(struct info *info, unsigned int signal); |
|
/* fpu_etc.c */ |
extern void fp_etc(void); |
|
/* fpu_trig.c */ |
extern void convert_l2reg(long const *arg, FPU_REG *dest); |
extern void trig_a(void); |
extern void trig_b(void); |
|
/* get_address.c */ |
extern void *get_address(unsigned char FPU_modrm, unsigned long *fpu_eip, |
struct address *addr, |
fpu_addr_modes); |
extern void *get_address_16(unsigned char FPU_modrm, unsigned long *fpu_eip, |
struct address *addr, |
fpu_addr_modes); |
|
/* load_store.c */ |
extern int load_store_instr(unsigned char type, fpu_addr_modes addr_modes, |
void *address); |
|
/* poly_2xm1.c */ |
extern int poly_2xm1(FPU_REG const *arg, FPU_REG *result); |
|
/* poly_atan.c */ |
extern void poly_atan(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *result); |
|
/* poly_l2.c */ |
extern void poly_l2(FPU_REG const *arg, FPU_REG const *y, FPU_REG *result); |
extern int poly_l2p1(FPU_REG const *arg, FPU_REG const *y, FPU_REG *result); |
|
/* poly_sin.c */ |
extern void poly_sine(FPU_REG const *arg, FPU_REG *result); |
extern void poly_cos(FPU_REG const *arg, FPU_REG *result); |
|
/* poly_tan.c */ |
extern void poly_tan(FPU_REG const *arg, FPU_REG *result); |
|
/* reg_add_sub.c */ |
extern int reg_add(FPU_REG const *a, FPU_REG const *b, |
FPU_REG *dest, int control_w); |
extern int reg_sub(FPU_REG const *a, FPU_REG const *b, |
FPU_REG *dest, int control_w); |
|
/* reg_compare.c */ |
extern int compare(FPU_REG const *b); |
extern int compare_st_data(FPU_REG const *b); |
extern void fcom_st(void); |
extern void fcompst(void); |
extern void fcompp(void); |
extern void fucom_(void); |
extern void fucomp(void); |
extern void fucompp(void); |
|
/* reg_constant.c */ |
extern void fconst(void); |
|
/* reg_ld_str.c */ |
extern int reg_load_extended(long double *addr, FPU_REG *loaded_data); |
extern int reg_load_double(double *dfloat, FPU_REG *loaded_data); |
extern int reg_load_single(float *single, FPU_REG *loaded_data); |
extern void reg_load_int64(long long *_s, FPU_REG *loaded_data); |
extern void reg_load_int32(long *_s, FPU_REG *loaded_data); |
extern void reg_load_int16(short *_s, FPU_REG *loaded_data); |
extern void reg_load_bcd(char *s, FPU_REG *loaded_data); |
extern int reg_store_extended(long double *d, FPU_REG *st0_ptr); |
extern int reg_store_double(double *dfloat, FPU_REG *st0_ptr); |
extern int reg_store_single(float *single, FPU_REG *st0_ptr); |
extern int reg_store_int64(long long *d, FPU_REG *st0_ptr); |
extern int reg_store_int32(long *d, FPU_REG *st0_ptr); |
extern int reg_store_int16(short *d, FPU_REG *st0_ptr); |
extern int reg_store_bcd(char *d, FPU_REG *st0_ptr); |
extern int round_to_int(FPU_REG *r); |
extern char *fldenv(fpu_addr_modes addr_modes, char *address); |
extern void frstor(fpu_addr_modes addr_modes, char *address); |
extern unsigned short tag_word(void); |
extern char *fstenv(fpu_addr_modes addr_modes, char *address); |
extern void fsave(fpu_addr_modes addr_modes, char *address); |
|
/* reg_mul.c */ |
extern int reg_mul(FPU_REG const *a, FPU_REG const *b, |
FPU_REG *dest, unsigned int control_w); |
/fpu_etc.c
0,0 → 1,129
/*---------------------------------------------------------------------------+ |
| fpu_etc.c | |
| | |
| Implement a few FPU instructions. | |
| | |
| Copyright (C) 1992,1993,1994 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
#include "fpu_system.h" |
#include "exception.h" |
#include "fpu_emu.h" |
#include "status_w.h" |
#include "reg_constant.h" |
|
|
static void fchs(FPU_REG *st0_ptr) |
{ |
if ( st0_ptr->tag ^ TW_Empty ) |
{ |
st0_ptr->sign ^= SIGN_POS^SIGN_NEG; |
clear_C1(); |
} |
else |
stack_underflow(); |
} |
|
static void fabs(FPU_REG *st0_ptr) |
{ |
if ( st0_ptr->tag ^ TW_Empty ) |
{ |
st0_ptr->sign = SIGN_POS; |
clear_C1(); |
} |
else |
stack_underflow(); |
} |
|
|
static void ftst_(FPU_REG *st0_ptr) |
{ |
switch (st0_ptr->tag) |
{ |
case TW_Zero: |
setcc(SW_C3); |
break; |
case TW_Valid: |
if (st0_ptr->sign == SIGN_POS) |
setcc(0); |
else |
setcc(SW_C0); |
|
#ifdef DENORM_OPERAND |
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
{ |
#ifdef PECULIAR_486 |
/* This is weird! */ |
if (st0_ptr->sign == SIGN_POS) |
setcc(SW_C3); |
#endif PECULIAR_486 |
return; |
} |
#endif DENORM_OPERAND |
|
break; |
case TW_NaN: |
setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */ |
EXCEPTION(EX_Invalid); |
break; |
case TW_Infinity: |
if (st0_ptr->sign == SIGN_POS) |
setcc(0); |
else |
setcc(SW_C0); |
break; |
case TW_Empty: |
setcc(SW_C0|SW_C2|SW_C3); |
EXCEPTION(EX_StackUnder); |
break; |
default: |
setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */ |
EXCEPTION(EX_INTERNAL|0x14); |
break; |
} |
} |
|
static void fxam(FPU_REG *st0_ptr) |
{ |
int c=0; |
switch (st0_ptr->tag) |
{ |
case TW_Empty: |
c = SW_C3|SW_C0; |
break; |
case TW_Zero: |
c = SW_C3; |
break; |
case TW_Valid: |
/* This will need to be changed if TW_Denormal is ever used. */ |
if ( st0_ptr->exp <= EXP_UNDER ) |
c = SW_C2|SW_C3; /* Denormal */ |
else |
c = SW_C2; |
break; |
case TW_NaN: |
c = SW_C0; |
break; |
case TW_Infinity: |
c = SW_C2|SW_C0; |
break; |
} |
if (st0_ptr->sign == SIGN_NEG) |
c |= SW_C1; |
setcc(c); |
} |
|
|
static FUNC_ST0 const fp_etc_table[] = { |
fchs, fabs, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal, |
ftst_, fxam, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal |
}; |
|
void fp_etc() |
{ |
(fp_etc_table[FPU_rm])(&st(0)); |
} |
/poly_atan.c
0,0 → 1,197
/*---------------------------------------------------------------------------+ |
| poly_atan.c | |
| | |
| Compute the arctan of a FPU_REG, using a polynomial approximation. | |
| | |
| Copyright (C) 1992,1993,1994 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
#include "exception.h" |
#include "reg_constant.h" |
#include "fpu_emu.h" |
#include "status_w.h" |
#include "control_w.h" |
#include "poly.h" |
|
|
#define HIPOWERon 6 /* odd poly, negative terms */ |
static const unsigned long long oddnegterms[HIPOWERon] = |
{ |
0x0000000000000000LL, /* Dummy (not for - 1.0) */ |
0x015328437f756467LL, |
0x0005dda27b73dec6LL, |
0x0000226bf2bfb91aLL, |
0x000000ccc439c5f7LL, |
0x0000000355438407LL |
} ; |
|
#define HIPOWERop 6 /* odd poly, positive terms */ |
static const unsigned long long oddplterms[HIPOWERop] = |
{ |
/* 0xaaaaaaaaaaaaaaabLL, transferred to fixedpterm[] */ |
0x0db55a71875c9ac2LL, |
0x0029fce2d67880b0LL, |
0x0000dfd3908b4596LL, |
0x00000550fd61dab4LL, |
0x0000001c9422b3f9LL, |
0x000000003e3301e1LL |
}; |
|
static const unsigned long long denomterm = 0xebd9b842c5c53a0eLL; |
|
static const Xsig fixedpterm = MK_XSIG(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa); |
|
static const Xsig pi_signif = MK_XSIG(0xc90fdaa2, 0x2168c234, 0xc4c6628b); |
|
|
/*--- poly_atan() -----------------------------------------------------------+ |
| | |
+---------------------------------------------------------------------------*/ |
void poly_atan(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *result) |
{ |
char transformed, inverted, |
sign1 = arg1->sign, sign2 = arg2->sign; |
long int exponent, dummy_exp; |
Xsig accumulator, Numer, Denom, accumulatore, argSignif, |
argSq, argSqSq; |
|
|
arg1->sign = arg2->sign = SIGN_POS; |
if ( (compare(arg2) & ~COMP_Denormal) == COMP_A_lt_B ) |
{ |
inverted = 1; |
exponent = arg1->exp - arg2->exp; |
Numer.lsw = Denom.lsw = 0; |
XSIG_LL(Numer) = significand(arg1); |
XSIG_LL(Denom) = significand(arg2); |
} |
else |
{ |
inverted = 0; |
exponent = arg2->exp - arg1->exp; |
Numer.lsw = Denom.lsw = 0; |
XSIG_LL(Numer) = significand(arg2); |
XSIG_LL(Denom) = significand(arg1); |
} |
div_Xsig(&Numer, &Denom, &argSignif); |
exponent += norm_Xsig(&argSignif); |
|
if ( (exponent >= -1) |
|| ((exponent == -2) && (argSignif.msw > 0xd413ccd0)) ) |
{ |
/* The argument is greater than sqrt(2)-1 (=0.414213562...) */ |
/* Convert the argument by an identity for atan */ |
transformed = 1; |
|
if ( exponent >= 0 ) |
{ |
#ifdef PARANOID |
if ( !( (exponent == 0) && |
(argSignif.lsw == 0) && (argSignif.midw == 0) && |
(argSignif.msw == 0x80000000) ) ) |
{ |
EXCEPTION(EX_INTERNAL|0x104); /* There must be a logic error */ |
return; |
} |
#endif PARANOID |
argSignif.msw = 0; /* Make the transformed arg -> 0.0 */ |
} |
else |
{ |
Numer.lsw = Denom.lsw = argSignif.lsw; |
XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif); |
|
if ( exponent < -1 ) |
shr_Xsig(&Numer, -1-exponent); |
negate_Xsig(&Numer); |
|
shr_Xsig(&Denom, -exponent); |
Denom.msw |= 0x80000000; |
|
div_Xsig(&Numer, &Denom, &argSignif); |
|
exponent = -1 + norm_Xsig(&argSignif); |
} |
} |
else |
{ |
transformed = 0; |
} |
|
argSq.lsw = argSignif.lsw; argSq.midw = argSignif.midw; |
argSq.msw = argSignif.msw; |
mul_Xsig_Xsig(&argSq, &argSq); |
|
argSqSq.lsw = argSq.lsw; argSqSq.midw = argSq.midw; argSqSq.msw = argSq.msw; |
mul_Xsig_Xsig(&argSqSq, &argSqSq); |
|
accumulatore.lsw = argSq.lsw; |
XSIG_LL(accumulatore) = XSIG_LL(argSq); |
|
shr_Xsig(&argSq, 2*(-1-exponent-1)); |
shr_Xsig(&argSqSq, 4*(-1-exponent-1)); |
|
/* Now have argSq etc with binary point at the left |
.1xxxxxxxx */ |
|
/* Do the basic fixed point polynomial evaluation */ |
accumulator.msw = accumulator.midw = accumulator.lsw = 0; |
polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), |
oddplterms, HIPOWERop-1); |
mul64_Xsig(&accumulator, &XSIG_LL(argSq)); |
negate_Xsig(&accumulator); |
polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms, HIPOWERon-1); |
negate_Xsig(&accumulator); |
add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp); |
|
mul64_Xsig(&accumulatore, &denomterm); |
shr_Xsig(&accumulatore, 1 + 2*(-1-exponent)); |
accumulatore.msw |= 0x80000000; |
|
div_Xsig(&accumulator, &accumulatore, &accumulator); |
|
mul_Xsig_Xsig(&accumulator, &argSignif); |
mul_Xsig_Xsig(&accumulator, &argSq); |
|
shr_Xsig(&accumulator, 3); |
negate_Xsig(&accumulator); |
add_Xsig_Xsig(&accumulator, &argSignif); |
|
if ( transformed ) |
{ |
/* compute pi/4 - accumulator */ |
shr_Xsig(&accumulator, -1-exponent); |
negate_Xsig(&accumulator); |
add_Xsig_Xsig(&accumulator, &pi_signif); |
exponent = -1; |
} |
|
if ( inverted ) |
{ |
/* compute pi/2 - accumulator */ |
shr_Xsig(&accumulator, -exponent); |
negate_Xsig(&accumulator); |
add_Xsig_Xsig(&accumulator, &pi_signif); |
exponent = 0; |
} |
|
if ( sign1 ) |
{ |
/* compute pi - accumulator */ |
shr_Xsig(&accumulator, 1 - exponent); |
negate_Xsig(&accumulator); |
add_Xsig_Xsig(&accumulator, &pi_signif); |
exponent = 1; |
} |
|
exponent += round_Xsig(&accumulator); |
significand(result) = XSIG_LL(accumulator); |
result->exp = exponent + EXP_BIAS; |
result->tag = TW_Valid; |
result->sign = sign2; |
|
} |
/reg_constant.c
0,0 → 1,125
/*---------------------------------------------------------------------------+ |
| reg_constant.c | |
| | |
| All of the constant FPU_REGs | |
| | |
| Copyright (C) 1992,1993,1994,1996 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
#include "fpu_system.h" |
#include "fpu_emu.h" |
#include "status_w.h" |
#include "reg_constant.h" |
#include "control_w.h" |
|
|
FPU_REG const CONST_1 = { SIGN_POS, TW_Valid, EXP_BIAS, |
0x00000000, 0x80000000 }; |
FPU_REG const CONST_2 = { SIGN_POS, TW_Valid, EXP_BIAS+1, |
0x00000000, 0x80000000 }; |
FPU_REG const CONST_HALF = { SIGN_POS, TW_Valid, EXP_BIAS-1, |
0x00000000, 0x80000000 }; |
FPU_REG const CONST_L2T = { SIGN_POS, TW_Valid, EXP_BIAS+1, |
0xcd1b8afe, 0xd49a784b }; |
FPU_REG const CONST_L2E = { SIGN_POS, TW_Valid, EXP_BIAS, |
0x5c17f0bc, 0xb8aa3b29 }; |
FPU_REG const CONST_PI = { SIGN_POS, TW_Valid, EXP_BIAS+1, |
0x2168c235, 0xc90fdaa2 }; |
FPU_REG const CONST_PI2 = { SIGN_POS, TW_Valid, EXP_BIAS, |
0x2168c235, 0xc90fdaa2 }; |
FPU_REG const CONST_PI4 = { SIGN_POS, TW_Valid, EXP_BIAS-1, |
0x2168c235, 0xc90fdaa2 }; |
FPU_REG const CONST_LG2 = { SIGN_POS, TW_Valid, EXP_BIAS-2, |
0xfbcff799, 0x9a209a84 }; |
FPU_REG const CONST_LN2 = { SIGN_POS, TW_Valid, EXP_BIAS-1, |
0xd1cf79ac, 0xb17217f7 }; |
|
/* Extra bits to take pi/2 to more than 128 bits precision. */ |
FPU_REG const CONST_PI2extra = { SIGN_NEG, TW_Valid, EXP_BIAS-66, |
0xfc8f8cbb, 0xece675d1 }; |
|
/* Only the sign (and tag) is used in internal zeroes */ |
FPU_REG const CONST_Z = { SIGN_POS, TW_Zero, EXP_UNDER, 0x0, 0x0 }; |
|
/* Only the sign and significand (and tag) are used in internal NaNs */ |
/* The 80486 never generates one of these |
FPU_REG const CONST_SNAN = { SIGN_POS, TW_NaN, EXP_OVER, 0x00000001, 0x80000000 }; |
*/ |
/* This is the real indefinite QNaN */ |
FPU_REG const CONST_QNaN = { SIGN_NEG, TW_NaN, EXP_OVER, 0x00000000, 0xC0000000 }; |
|
/* Only the sign (and tag) is used in internal infinities */ |
FPU_REG const CONST_INF = { SIGN_POS, TW_Infinity, EXP_OVER, 0x00000000, 0x80000000 }; |
|
|
|
static void fld_const(FPU_REG const *c, int adj) |
{ |
FPU_REG *st_new_ptr; |
|
if ( STACK_OVERFLOW ) |
{ |
stack_overflow(); |
return; |
} |
push(); |
reg_move(c, st_new_ptr); |
st_new_ptr->sigl += adj; /* For all our fldxxx constants, we don't need to |
borrow or carry. */ |
clear_C1(); |
} |
|
/* A fast way to find out whether x is one of RC_DOWN or RC_CHOP |
(and not one of RC_RND or RC_UP). |
*/ |
#define DOWN_OR_CHOP(x) (x & RC_DOWN) |
|
static void fld1(int rc) |
{ |
fld_const(&CONST_1, 0); |
} |
|
static void fldl2t(int rc) |
{ |
fld_const(&CONST_L2T, (rc == RC_UP) ? 1 : 0); |
} |
|
static void fldl2e(int rc) |
{ |
fld_const(&CONST_L2E, DOWN_OR_CHOP(rc) ? -1 : 0); |
} |
|
static void fldpi(int rc) |
{ |
fld_const(&CONST_PI, DOWN_OR_CHOP(rc) ? -1 : 0); |
} |
|
static void fldlg2(int rc) |
{ |
fld_const(&CONST_LG2, DOWN_OR_CHOP(rc) ? -1 : 0); |
} |
|
static void fldln2(int rc) |
{ |
fld_const(&CONST_LN2, DOWN_OR_CHOP(rc) ? -1 : 0); |
} |
|
static void fldz(int rc) |
{ |
fld_const(&CONST_Z, 0); |
} |
|
typedef void (*FUNC_RC)(int); |
|
static FUNC_RC constants_table[] = { |
fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, (FUNC_RC)FPU_illegal |
}; |
|
void fconst(void) |
{ |
(constants_table[FPU_rm])(control_word & CW_RC); |
} |
/poly.h
0,0 → 1,116
/*---------------------------------------------------------------------------+ |
| poly.h | |
| | |
| Header file for the FPU-emu poly*.c source files. | |
| | |
| Copyright (C) 1994 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
| Declarations and definitions for functions operating on Xsig (12-byte | |
| extended-significand) quantities. | |
| | |
+---------------------------------------------------------------------------*/ |
|
#ifndef _POLY_H |
#define _POLY_H |
|
/* This 12-byte structure is used to improve the accuracy of computation |
of transcendental functions. |
Intended to be used to get results better than 8-byte computation |
allows. 9-byte would probably be sufficient. |
*/ |
typedef struct { |
unsigned long lsw; |
unsigned long midw; |
unsigned long msw; |
} Xsig; |
|
asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b, |
unsigned long long *result); |
asmlinkage void polynomial_Xsig(Xsig *, const unsigned long long *x, |
const unsigned long long terms[], const int n); |
|
asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult); |
asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult); |
asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult); |
|
asmlinkage void shr_Xsig(Xsig *, const int n); |
asmlinkage int round_Xsig(Xsig *); |
asmlinkage int norm_Xsig(Xsig *); |
asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest); |
|
/* Macro to extract the most significant 32 bits from a long long */ |
#define LL_MSW(x) (((unsigned long *)&x)[1]) |
|
/* Macro to initialize an Xsig struct */ |
#define MK_XSIG(a,b,c) { c, b, a } |
|
/* Macro to access the 8 ms bytes of an Xsig as a long long */ |
#define XSIG_LL(x) (*(unsigned long long *)&x.midw) |
|
|
/* |
Need to run gcc with optimizations on to get these to |
actually be in-line. |
*/ |
|
/* Multiply two fixed-point 32 bit numbers. */ |
extern inline void mul_32_32(const unsigned long arg1, |
const unsigned long arg2, |
unsigned long *out) |
{ |
asm volatile ("movl %1,%%eax; mull %2; movl %%edx,%0" \ |
:"=g" (*out) \ |
:"g" (arg1), "g" (arg2) \ |
:"ax","dx"); |
} |
|
|
/* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */ |
extern inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2) |
{ |
asm volatile ("movl %1,%%edi; movl %2,%%esi; |
movl (%%esi),%%eax; addl %%eax,(%%edi); |
movl 4(%%esi),%%eax; adcl %%eax,4(%%edi); |
movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);" |
:"=g" (*dest):"g" (dest), "g" (x2) |
:"ax","si","di"); |
} |
|
|
/* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */ |
/* Note: the constraints in the asm statement didn't always work properly |
with gcc 2.5.8. Changing from using edi to using ecx got around the |
problem, but keep fingers crossed! */ |
extern inline int add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp) |
{ |
asm volatile ("movl %2,%%ecx; movl %3,%%esi; |
movl (%%esi),%%eax; addl %%eax,(%%ecx); |
movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx); |
movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx); |
jnc 0f; |
rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx) |
movl %4,%%ecx; incl (%%ecx) |
movl $1,%%eax; jmp 1f; |
0: xorl %%eax,%%eax; |
1:" |
:"=g" (*exp), "=g" (*dest) |
:"g" (dest), "g" (x2), "g" (exp) |
:"cx","si","ax"); |
} |
|
|
/* Negate (subtract from 1.0) the 12 byte Xsig */ |
/* This is faster in a loop on my 386 than using the "neg" instruction. */ |
extern inline void negate_Xsig(Xsig *x) |
{ |
asm volatile("movl %1,%%esi; " |
"xorl %%ecx,%%ecx; " |
"movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi); " |
"movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi); " |
"movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi); " |
:"=g" (*x):"g" (x):"si","ax","cx"); |
} |
|
#endif _POLY_H |
/reg_mul.c
0,0 → 1,105
/*---------------------------------------------------------------------------+ |
| reg_mul.c | |
| | |
| Multiply one FPU_REG by another, put the result in a destination FPU_REG. | |
| | |
| Copyright (C) 1992,1993 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
/*---------------------------------------------------------------------------+ |
| The destination may be any FPU_REG, including one of the source FPU_REGs. | |
+---------------------------------------------------------------------------*/ |
|
#include "exception.h" |
#include "reg_constant.h" |
#include "fpu_emu.h" |
#include "fpu_system.h" |
|
|
/* This routine must be called with non-empty source registers */ |
int reg_mul(FPU_REG const *a, FPU_REG const *b, |
FPU_REG *dest, unsigned int control_w) |
{ |
char saved_sign = dest->sign; |
char sign = (a->sign ^ b->sign); |
|
if (!(a->tag | b->tag)) |
{ |
/* Both regs Valid, this should be the most common case. */ |
dest->sign = sign; |
if ( reg_u_mul(a, b, dest, control_w) ) |
{ |
dest->sign = saved_sign; |
return 1; |
} |
return 0; |
} |
else if ((a->tag <= TW_Zero) && (b->tag <= TW_Zero)) |
{ |
#ifdef DENORM_OPERAND |
if ( ((b->tag == TW_Valid) && (b->exp <= EXP_UNDER)) || |
((a->tag == TW_Valid) && (a->exp <= EXP_UNDER)) ) |
{ |
if ( denormal_operand() ) return 1; |
} |
#endif DENORM_OPERAND |
/* Must have either both arguments == zero, or |
one valid and the other zero. |
The result is therefore zero. */ |
reg_move(&CONST_Z, dest); |
/* The 80486 book says that the answer is +0, but a real |
80486 behaves this way. |
IEEE-754 apparently says it should be this way. */ |
dest->sign = sign; |
return 0; |
} |
else |
{ |
/* Must have infinities, NaNs, etc */ |
if ( (a->tag == TW_NaN) || (b->tag == TW_NaN) ) |
{ return real_2op_NaN(a, b, dest); } |
else if (a->tag == TW_Infinity) |
{ |
if (b->tag == TW_Zero) |
{ return arith_invalid(dest); } /* Zero*Infinity is invalid */ |
else |
{ |
#ifdef DENORM_OPERAND |
if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) && |
denormal_operand() ) |
return 1; |
#endif DENORM_OPERAND |
reg_move(a, dest); |
dest->sign = sign; |
} |
return 0; |
} |
else if (b->tag == TW_Infinity) |
{ |
if (a->tag == TW_Zero) |
{ return arith_invalid(dest); } /* Zero*Infinity is invalid */ |
else |
{ |
#ifdef DENORM_OPERAND |
if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) && |
denormal_operand() ) |
return 1; |
#endif DENORM_OPERAND |
reg_move(b, dest); |
dest->sign = sign; |
} |
return 0; |
} |
#ifdef PARANOID |
else |
{ |
EXCEPTION(EX_INTERNAL|0x102); |
return 1; |
} |
#endif PARANOID |
} |
} |
/div_small.S
0,0 → 1,47
.file "div_small.S" |
/*---------------------------------------------------------------------------+ |
| div_small.S | |
| | |
| Divide a 64 bit integer by a 32 bit integer & return remainder. | |
| | |
| Copyright (C) 1992,1995 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
/*---------------------------------------------------------------------------+ |
| unsigned long div_small(unsigned long long *x, unsigned long y) | |
+---------------------------------------------------------------------------*/ |
|
#include "fpu_emu.h" |
|
.text |
ENTRY(div_small) |
pushl %ebp |
movl %esp,%ebp |
|
pushl %esi |
|
movl PARAM1,%esi /* pointer to num */ |
movl PARAM2,%ecx /* The denominator */ |
|
movl 4(%esi),%eax /* Get the current num msw */ |
xorl %edx,%edx |
divl %ecx |
|
movl %eax,4(%esi) |
|
movl (%esi),%eax /* Get the num lsw */ |
divl %ecx |
|
movl %eax,(%esi) |
|
movl %edx,%eax /* Return the remainder in eax */ |
|
popl %esi |
|
leave |
ret |
|
/poly_2xm1.c
0,0 → 1,152
/*---------------------------------------------------------------------------+ |
| poly_2xm1.c | |
| | |
| Function to compute 2^x-1 by a polynomial approximation. | |
| | |
| Copyright (C) 1992,1993,1994 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
#include "exception.h" |
#include "reg_constant.h" |
#include "fpu_emu.h" |
#include "control_w.h" |
#include "poly.h" |
|
|
#define HIPOWER 11 |
static const unsigned long long lterms[HIPOWER] = |
{ |
0x0000000000000000LL, /* This term done separately as 12 bytes */ |
0xf5fdeffc162c7543LL, |
0x1c6b08d704a0bfa6LL, |
0x0276556df749cc21LL, |
0x002bb0ffcf14f6b8LL, |
0x0002861225ef751cLL, |
0x00001ffcbfcd5422LL, |
0x00000162c005d5f1LL, |
0x0000000da96ccb1bLL, |
0x0000000078d1b897LL, |
0x000000000422b029LL |
}; |
|
static const Xsig hiterm = MK_XSIG(0xb17217f7, 0xd1cf79ab, 0xc8a39194); |
|
/* Four slices: 0.0 : 0.25 : 0.50 : 0.75 : 1.0, |
These numbers are 2^(1/4), 2^(1/2), and 2^(3/4) |
*/ |
static const Xsig shiftterm0 = MK_XSIG(0, 0, 0); |
static const Xsig shiftterm1 = MK_XSIG(0x9837f051, 0x8db8a96f, 0x46ad2318); |
static const Xsig shiftterm2 = MK_XSIG(0xb504f333, 0xf9de6484, 0x597d89b3); |
static const Xsig shiftterm3 = MK_XSIG(0xd744fcca, 0xd69d6af4, 0x39a68bb9); |
|
static const Xsig *shiftterm[] = { &shiftterm0, &shiftterm1, |
&shiftterm2, &shiftterm3 }; |
|
|
/*--- poly_2xm1() -----------------------------------------------------------+ |
| Requires an argument which is TW_Valid and < 1. | |
+---------------------------------------------------------------------------*/ |
int poly_2xm1(FPU_REG const *arg, FPU_REG *result) |
{ |
long int exponent, shift; |
unsigned long long Xll; |
Xsig accumulator, Denom, argSignif; |
|
|
exponent = arg->exp - EXP_BIAS; |
|
#ifdef PARANOID |
if ( (exponent >= 0) /* Don't want a |number| >= 1.0 */ |
|| (arg->tag != TW_Valid) ) |
{ |
/* Number negative, too large, or not Valid. */ |
EXCEPTION(EX_INTERNAL|0x127); |
return 1; |
} |
#endif PARANOID |
|
argSignif.lsw = 0; |
XSIG_LL(argSignif) = Xll = significand(arg); |
|
if ( exponent == -1 ) |
{ |
shift = (argSignif.msw & 0x40000000) ? 3 : 2; |
/* subtract 0.5 or 0.75 */ |
exponent -= 2; |
XSIG_LL(argSignif) <<= 2; |
Xll <<= 2; |
} |
else if ( exponent == -2 ) |
{ |
shift = 1; |
/* subtract 0.25 */ |
exponent--; |
XSIG_LL(argSignif) <<= 1; |
Xll <<= 1; |
} |
else |
shift = 0; |
|
if ( exponent < -2 ) |
{ |
/* Shift the argument right by the required places. */ |
if ( shrx(&Xll, -2-exponent) >= 0x80000000U ) |
Xll++; /* round up */ |
} |
|
accumulator.lsw = accumulator.midw = accumulator.msw = 0; |
polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER-1); |
mul_Xsig_Xsig(&accumulator, &argSignif); |
shr_Xsig(&accumulator, 3); |
|
mul_Xsig_Xsig(&argSignif, &hiterm); /* The leading term */ |
add_two_Xsig(&accumulator, &argSignif, &exponent); |
|
if ( shift ) |
{ |
/* The argument is large, use the identity: |
f(x+a) = f(a) * (f(x) + 1) - 1; |
*/ |
shr_Xsig(&accumulator, - exponent); |
accumulator.msw |= 0x80000000; /* add 1.0 */ |
mul_Xsig_Xsig(&accumulator, shiftterm[shift]); |
accumulator.msw &= 0x3fffffff; /* subtract 1.0 */ |
exponent = 1; |
} |
|
if ( arg->sign != SIGN_POS ) |
{ |
/* The argument is negative, use the identity: |
f(-x) = -f(x) / (1 + f(x)) |
*/ |
Denom.lsw = accumulator.lsw; |
XSIG_LL(Denom) = XSIG_LL(accumulator); |
if ( exponent < 0 ) |
shr_Xsig(&Denom, - exponent); |
else if ( exponent > 0 ) |
{ |
/* exponent must be 1 here */ |
XSIG_LL(Denom) <<= 1; |
if ( Denom.lsw & 0x80000000 ) |
XSIG_LL(Denom) |= 1; |
(Denom.lsw) <<= 1; |
} |
Denom.msw |= 0x80000000; /* add 1.0 */ |
div_Xsig(&accumulator, &Denom, &accumulator); |
} |
|
/* Convert to 64 bit signed-compatible */ |
exponent += round_Xsig(&accumulator); |
|
significand(result) = XSIG_LL(accumulator); |
result->tag = TW_Valid; |
result->exp = exponent + EXP_BIAS; |
result->sign = arg->sign; |
|
return 0; |
|
} |
/reg_constant.h
0,0 → 1,31
/*---------------------------------------------------------------------------+ |
| reg_constant.h | |
| | |
| Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
+---------------------------------------------------------------------------*/ |
|
#ifndef _REG_CONSTANT_H_ |
#define _REG_CONSTANT_H_ |
|
#include "fpu_emu.h" |
|
extern FPU_REG const CONST_1; |
extern FPU_REG const CONST_2; |
extern FPU_REG const CONST_HALF; |
extern FPU_REG const CONST_L2T; |
extern FPU_REG const CONST_L2E; |
extern FPU_REG const CONST_PI; |
extern FPU_REG const CONST_PI2; |
extern FPU_REG const CONST_PI2extra; |
extern FPU_REG const CONST_PI4; |
extern FPU_REG const CONST_LG2; |
extern FPU_REG const CONST_LN2; |
extern FPU_REG const CONST_Z; |
extern FPU_REG const CONST_PINF; |
extern FPU_REG const CONST_INF; |
extern FPU_REG const CONST_MINF; |
extern FPU_REG const CONST_QNaN; |
|
#endif _REG_CONSTANT_H_ |
/div_Xsig.S
0,0 → 1,365
.file "div_Xsig.S" |
/*---------------------------------------------------------------------------+ |
| div_Xsig.S | |
| | |
| Division subroutine for 96 bit quantities | |
| | |
| Copyright (C) 1994,1995 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
/*---------------------------------------------------------------------------+ |
| Divide the 96 bit quantity pointed to by a, by that pointed to by b, and | |
| put the 96 bit result at the location d. | |
| | |
| The result may not be accurate to 96 bits. It is intended for use where | |
| a result better than 64 bits is required. The result should usually be | |
| good to at least 94 bits. | |
| The returned result is actually divided by one half. This is done to | |
| prevent overflow. | |
| | |
| .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd | |
| | |
| void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) | |
| | |
+---------------------------------------------------------------------------*/ |
|
#include "exception.h" |
#include "fpu_emu.h" |
|
|
#define XsigLL(x) (x) |
#define XsigL(x) 4(x) |
#define XsigH(x) 8(x) |
|
|
#ifndef NON_REENTRANT_FPU |
/* |
Local storage on the stack: |
Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 |
*/ |
#define FPU_accum_3 -4(%ebp) |
#define FPU_accum_2 -8(%ebp) |
#define FPU_accum_1 -12(%ebp) |
#define FPU_accum_0 -16(%ebp) |
#define FPU_result_3 -20(%ebp) |
#define FPU_result_2 -24(%ebp) |
#define FPU_result_1 -28(%ebp) |
|
#else |
.data |
/* |
Local storage in a static area: |
Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 |
*/ |
.align 2,0 |
FPU_accum_3: |
.long 0 |
FPU_accum_2: |
.long 0 |
FPU_accum_1: |
.long 0 |
FPU_accum_0: |
.long 0 |
FPU_result_3: |
.long 0 |
FPU_result_2: |
.long 0 |
FPU_result_1: |
.long 0 |
#endif NON_REENTRANT_FPU |
|
|
.text |
ENTRY(div_Xsig) |
pushl %ebp |
movl %esp,%ebp |
#ifndef NON_REENTRANT_FPU |
subl $28,%esp |
#endif NON_REENTRANT_FPU |
|
pushl %esi |
pushl %edi |
pushl %ebx |
|
movl PARAM1,%esi /* pointer to num */ |
movl PARAM2,%ebx /* pointer to denom */ |
|
#ifdef PARANOID |
testl $0x80000000, XsigH(%ebx) /* Divisor */ |
je L_bugged |
#endif PARANOID |
|
|
/*---------------------------------------------------------------------------+ |
| Divide: Return arg1/arg2 to arg3. | |
| | |
| The maximum returned value is (ignoring exponents) | |
| .ffffffff ffffffff | |
| ------------------ = 1.ffffffff fffffffe | |
| .80000000 00000000 | |
| and the minimum is | |
| .80000000 00000000 | |
| ------------------ = .80000000 00000001 (rounded) | |
| .ffffffff ffffffff | |
| | |
+---------------------------------------------------------------------------*/ |
|
/* Save extended dividend in local register */ |
|
/* Divide by 2 to prevent overflow */ |
clc |
movl XsigH(%esi),%eax |
rcrl %eax |
movl %eax,FPU_accum_3 |
movl XsigL(%esi),%eax |
rcrl %eax |
movl %eax,FPU_accum_2 |
movl XsigLL(%esi),%eax |
rcrl %eax |
movl %eax,FPU_accum_1 |
movl $0,%eax |
rcrl %eax |
movl %eax,FPU_accum_0 |
|
movl FPU_accum_2,%eax /* Get the current num */ |
movl FPU_accum_3,%edx |
|
/*----------------------------------------------------------------------*/ |
/* Initialization done. |
Do the first 32 bits. */ |
|
/* We will divide by a number which is too large */ |
movl XsigH(%ebx),%ecx |
addl $1,%ecx |
jnc LFirst_div_not_1 |
|
/* here we need to divide by 100000000h, |
i.e., no division at all.. */ |
mov %edx,%eax |
jmp LFirst_div_done |
|
LFirst_div_not_1: |
divl %ecx /* Divide the numerator by the augmented |
denom ms dw */ |
|
LFirst_div_done: |
movl %eax,FPU_result_3 /* Put the result in the answer */ |
|
mull XsigH(%ebx) /* mul by the ms dw of the denom */ |
|
subl %eax,FPU_accum_2 /* Subtract from the num local reg */ |
sbbl %edx,FPU_accum_3 |
|
movl FPU_result_3,%eax /* Get the result back */ |
mull XsigL(%ebx) /* now mul the ls dw of the denom */ |
|
subl %eax,FPU_accum_1 /* Subtract from the num local reg */ |
sbbl %edx,FPU_accum_2 |
sbbl $0,FPU_accum_3 |
je LDo_2nd_32_bits /* Must check for non-zero result here */ |
|
#ifdef PARANOID |
jb L_bugged_1 |
#endif PARANOID |
|
/* need to subtract another once of the denom */ |
incl FPU_result_3 /* Correct the answer */ |
|
movl XsigL(%ebx),%eax |
movl XsigH(%ebx),%edx |
subl %eax,FPU_accum_1 /* Subtract from the num local reg */ |
sbbl %edx,FPU_accum_2 |
|
#ifdef PARANOID |
sbbl $0,FPU_accum_3 |
jne L_bugged_1 /* Must check for non-zero result here */ |
#endif PARANOID |
|
/*----------------------------------------------------------------------*/ |
/* Half of the main problem is done, there is just a reduced numerator |
to handle now. |
Work with the second 32 bits, FPU_accum_0 not used from now on */ |
LDo_2nd_32_bits: |
movl FPU_accum_2,%edx /* get the reduced num */ |
movl FPU_accum_1,%eax |
|
/* need to check for possible subsequent overflow */ |
cmpl XsigH(%ebx),%edx |
jb LDo_2nd_div |
ja LPrevent_2nd_overflow |
|
cmpl XsigL(%ebx),%eax |
jb LDo_2nd_div |
|
LPrevent_2nd_overflow: |
/* The numerator is greater or equal, would cause overflow */ |
/* prevent overflow */ |
subl XsigL(%ebx),%eax |
sbbl XsigH(%ebx),%edx |
movl %edx,FPU_accum_2 |
movl %eax,FPU_accum_1 |
|
incl FPU_result_3 /* Reflect the subtraction in the answer */ |
|
#ifdef PARANOID |
je L_bugged_2 /* Can't bump the result to 1.0 */ |
#endif PARANOID |
|
LDo_2nd_div: |
cmpl $0,%ecx /* augmented denom msw */ |
jnz LSecond_div_not_1 |
|
/* %ecx == 0, we are dividing by 1.0 */ |
mov %edx,%eax |
jmp LSecond_div_done |
|
LSecond_div_not_1: |
divl %ecx /* Divide the numerator by the denom ms dw */ |
|
LSecond_div_done: |
movl %eax,FPU_result_2 /* Put the result in the answer */ |
|
mull XsigH(%ebx) /* mul by the ms dw of the denom */ |
|
subl %eax,FPU_accum_1 /* Subtract from the num local reg */ |
sbbl %edx,FPU_accum_2 |
|
#ifdef PARANOID |
jc L_bugged_2 |
#endif PARANOID |
|
movl FPU_result_2,%eax /* Get the result back */ |
mull XsigL(%ebx) /* now mul the ls dw of the denom */ |
|
subl %eax,FPU_accum_0 /* Subtract from the num local reg */ |
sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ |
sbbl $0,FPU_accum_2 |
|
#ifdef PARANOID |
jc L_bugged_2 |
#endif PARANOID |
|
jz LDo_3rd_32_bits |
|
#ifdef PARANOID |
cmpl $1,FPU_accum_2 |
jne L_bugged_2 |
#endif PARANOID |
|
/* need to subtract another once of the denom */ |
movl XsigL(%ebx),%eax |
movl XsigH(%ebx),%edx |
subl %eax,FPU_accum_0 /* Subtract from the num local reg */ |
sbbl %edx,FPU_accum_1 |
sbbl $0,FPU_accum_2 |
|
#ifdef PARANOID |
jc L_bugged_2 |
jne L_bugged_2 |
#endif PARANOID |
|
addl $1,FPU_result_2 /* Correct the answer */ |
adcl $0,FPU_result_3 |
|
#ifdef PARANOID |
jc L_bugged_2 /* Must check for non-zero result here */ |
#endif PARANOID |
|
/*----------------------------------------------------------------------*/ |
/* The division is essentially finished here, we just need to perform |
tidying operations. |
Deal with the 3rd 32 bits */ |
LDo_3rd_32_bits: |
/* We use an approximation for the third 32 bits. |
To take account of the 3rd 32 bits of the divisor |
(call them del), we subtract del * (a/b) */ |
|
movl FPU_result_3,%eax /* a/b */ |
mull XsigLL(%ebx) /* del */ |
|
subl %edx,FPU_accum_1 |
|
/* A borrow indicates that the result is negative */ |
jnb LTest_over |
|
movl XsigH(%ebx),%edx |
addl %edx,FPU_accum_1 |
|
subl $1,FPU_result_2 /* Adjust the answer */ |
sbbl $0,FPU_result_3 |
|
/* The above addition might not have been enough, check again. */ |
movl FPU_accum_1,%edx /* get the reduced num */ |
cmpl XsigH(%ebx),%edx /* denom */ |
jb LDo_3rd_div |
|
movl XsigH(%ebx),%edx |
addl %edx,FPU_accum_1 |
|
subl $1,FPU_result_2 /* Adjust the answer */ |
sbbl $0,FPU_result_3 |
jmp LDo_3rd_div |
|
LTest_over: |
movl FPU_accum_1,%edx /* get the reduced num */ |
|
/* need to check for possible subsequent overflow */ |
cmpl XsigH(%ebx),%edx /* denom */ |
jb LDo_3rd_div |
|
/* prevent overflow */ |
subl XsigH(%ebx),%edx |
movl %edx,FPU_accum_1 |
|
addl $1,FPU_result_2 /* Reflect the subtraction in the answer */ |
adcl $0,FPU_result_3 |
|
LDo_3rd_div: |
movl FPU_accum_0,%eax |
movl FPU_accum_1,%edx |
divl XsigH(%ebx) |
|
movl %eax,FPU_result_1 /* Rough estimate of third word */ |
|
movl PARAM3,%esi /* pointer to answer */ |
|
movl FPU_result_1,%eax |
movl %eax,XsigLL(%esi) |
movl FPU_result_2,%eax |
movl %eax,XsigL(%esi) |
movl FPU_result_3,%eax |
movl %eax,XsigH(%esi) |
|
L_exit: |
popl %ebx |
popl %edi |
popl %esi |
|
leave |
ret |
|
|
#ifdef PARANOID |
/* The logic is wrong if we got here */ |
L_bugged: |
pushl EX_INTERNAL|0x240 |
call EXCEPTION |
pop %ebx |
jmp L_exit |
|
L_bugged_1: |
pushl EX_INTERNAL|0x241 |
call EXCEPTION |
pop %ebx |
jmp L_exit |
|
L_bugged_2: |
pushl EX_INTERNAL|0x242 |
call EXCEPTION |
pop %ebx |
jmp L_exit |
#endif PARANOID |
/Makefile
0,0 → 1,31
# |
# Makefile for wm-FPU-emu |
# |
|
L_TARGET := math.a |
|
#DEBUG = -DDEBUGGING |
DEBUG = |
PARANOID = -DPARANOID |
CFLAGS := $(CFLAGS) $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION) |
|
.S.o: |
$(CC) -D__ASSEMBLY__ $(PARANOID) -c $< |
|
L_OBJS =fpu_entry.o div_small.o errors.o \ |
fpu_arith.o fpu_aux.o fpu_etc.o fpu_trig.o \ |
load_store.o get_address.o \ |
poly_atan.o poly_l2.o poly_2xm1.o poly_sin.o poly_tan.o \ |
reg_add_sub.o reg_compare.o reg_constant.o reg_ld_str.o \ |
reg_div.o reg_mul.o reg_norm.o \ |
reg_u_add.o reg_u_div.o reg_u_mul.o reg_u_sub.o \ |
reg_round.o \ |
wm_shrx.o wm_sqrt.o \ |
div_Xsig.o polynom_Xsig.o round_Xsig.o \ |
shr_Xsig.o mul_Xsig.o \ |
fpu_debug.o |
|
include $(TOPDIR)/Rules.make |
|
proto: |
cproto -e -DMAKING_PROTO *.c >fpu_proto.h |
/fpu_debug.c
0,0 → 1,230
/* Interface with ptrace and core-dumping routines */ |
|
|
#include "fpu_system.h" |
#include "exception.h" |
#include "reg_constant.h" |
#include "fpu_emu.h" |
#include "control_w.h" |
#include "status_w.h" |
|
|
#define EXTENDED_Ebias 0x3fff |
#define EXTENDED_Emin (-0x3ffe) /* smallest valid exponent */ |
|
#define DOUBLE_Emax 1023 /* largest valid exponent */ |
#define DOUBLE_Ebias 1023 |
#define DOUBLE_Emin (-1022) /* smallest valid exponent */ |
|
#define SINGLE_Emax 127 /* largest valid exponent */ |
#define SINGLE_Ebias 127 |
#define SINGLE_Emin (-126) /* smallest valid exponent */ |
|
|
/* Copy and paste from round_to_int. Original comments maintained */ |
/*===========================================================================*/ |
|
/* r gets mangled such that sig is int, sign: |
it is NOT normalized */ |
/* The return value (in eax) is zero if the result is exact, |
if bits are changed due to rounding, truncation, etc, then |
a non-zero value is returned */ |
/* Overflow is signalled by a non-zero return value (in eax). |
In the case of overflow, the returned significand always has the |
largest possible value */ |
|
static int round_to_int_cwd(FPU_REG *r, long int user_control_word) |
{ |
char very_big; |
unsigned eax; |
|
if (r->tag == TW_Zero) |
{ |
/* Make sure that zero is returned */ |
significand(r) = 0; |
return 0; /* o.k. */ |
} |
|
if (r->exp > EXP_BIAS + 63) |
{ |
r->sigl = r->sigh = ~0; /* The largest representable number */ |
return 1; /* overflow */ |
} |
|
eax = shrxs(&r->sigl, EXP_BIAS + 63 - r->exp); |
very_big = !(~(r->sigh) | ~(r->sigl)); /* test for 0xfff...fff */ |
#define half_or_more (eax & 0x80000000) |
#define frac_part (eax) |
#define more_than_half ((eax & 0x80000001) == 0x80000001) |
switch (user_control_word & CW_RC) |
{ |
case RC_RND: |
if ( more_than_half /* nearest */ |
|| (half_or_more && (r->sigl & 1)) ) /* odd -> even */ |
{ |
if ( very_big ) return 1; /* overflow */ |
significand(r) ++; |
return PRECISION_LOST_UP; |
} |
break; |
case RC_DOWN: |
if (frac_part && r->sign) |
{ |
if ( very_big ) return 1; /* overflow */ |
significand(r) ++; |
return PRECISION_LOST_UP; |
} |
break; |
case RC_UP: |
if (frac_part && !r->sign) |
{ |
if ( very_big ) return 1; /* overflow */ |
significand(r) ++; |
return PRECISION_LOST_UP; |
} |
break; |
case RC_CHOP: |
break; |
} |
|
return eax ? PRECISION_LOST_DOWN : 0; |
|
} |
|
|
|
/* Conver a number in the emulator format to the |
* hardware format. |
* Taken from the emulator sources, function reg_load_extended |
*/ |
|
/* Get a long double from the debugger */ |
void hardreg_to_softreg(const char hardreg[10], |
FPU_REG *soft_reg) |
|
{ |
unsigned long sigl, sigh, exp; |
|
sigl = *((unsigned long *) hardreg); |
sigh = *(1 + (unsigned long *) hardreg); |
exp = *(4 + (unsigned short *) hardreg); |
|
soft_reg->tag = TW_Valid; /* Default */ |
soft_reg->sigl = sigl; |
soft_reg->sigh = sigh; |
if (exp & 0x8000) |
soft_reg->sign = SIGN_NEG; |
else |
soft_reg->sign = SIGN_POS; |
exp &= 0x7fff; |
soft_reg->exp = exp - EXTENDED_Ebias + EXP_BIAS; |
|
if ( exp == 0 ) |
{ |
if ( !(sigh | sigl) ) |
{ |
soft_reg->tag = TW_Zero; |
return; |
} |
/* The number is a de-normal or pseudodenormal. */ |
if (sigh & 0x80000000) |
{ |
/* Is a pseudodenormal. */ |
/* Convert it for internal use. */ |
/* This is non-80486 behaviour because the number |
loses its 'denormal' identity. */ |
soft_reg->exp++; |
return; |
} |
else |
{ |
/* Is a denormal. */ |
/* Convert it for internal use. */ |
soft_reg->exp++; |
normalize_nuo(soft_reg); |
return; |
} |
} |
else if ( exp == 0x7fff ) |
{ |
if ( !((sigh ^ 0x80000000) | sigl) ) |
{ |
/* Matches the bit pattern for Infinity. */ |
soft_reg->exp = EXP_Infinity; |
soft_reg->tag = TW_Infinity; |
return; |
} |
|
soft_reg->exp = EXP_NaN; |
soft_reg->tag = TW_NaN; |
if ( !(sigh & 0x80000000) ) |
{ |
/* NaNs have the ms bit set to 1. */ |
/* This is therefore an Unsupported NaN data type. */ |
/* This is non 80486 behaviour */ |
/* This should generate an Invalid Operand exception |
later, so we convert it to a SNaN */ |
soft_reg->sigh = 0x80000000; |
soft_reg->sigl = 0x00000001; |
soft_reg->sign = SIGN_NEG; |
return; |
} |
return; |
} |
|
if ( !(sigh & 0x80000000) ) |
{ |
/* Unsupported data type. */ |
/* Valid numbers have the ms bit set to 1. */ |
/* Unnormal. */ |
/* Convert it for internal use. */ |
/* This is non-80486 behaviour */ |
/* This should generate an Invalid Operand exception |
later, so we convert it to a SNaN */ |
soft_reg->sigh = 0x80000000; |
soft_reg->sigl = 0x00000001; |
soft_reg->sign = SIGN_NEG; |
soft_reg->exp = EXP_NaN; |
soft_reg->tag = TW_NaN; |
return; |
} |
return; |
} |
|
/* Conver a number in the emulator format to the |
* hardware format. |
* Adapted from function write_to_extended |
*/ |
|
|
void softreg_to_hardreg(const FPU_REG *rp, char d[10], long int user_control_word) |
{ |
long e; |
FPU_REG tmp; |
e = rp->exp - EXP_BIAS + EXTENDED_Ebias; |
|
/* |
All numbers except denormals are stored internally in a |
format which is compatible with the extended real number |
format. |
*/ |
if (e > 0) { |
*(unsigned long *) d = rp->sigl; |
*(unsigned long *) (d + 4) = rp->sigh; |
} else { |
/* |
The number is a de-normal stored as a normal using our |
extra exponent range, or is Zero. |
Convert it back to a de-normal, or leave it as Zero. |
*/ |
reg_move(rp, &tmp); |
tmp.exp += -EXTENDED_Emin + 63; /* largest exp to be 63 */ |
round_to_int_cwd(&tmp, user_control_word); |
e = 0; |
*(unsigned long *) d= tmp.sigl; |
*(unsigned long *) (d + 4) = tmp.sigh; |
} |
e |= rp->sign == SIGN_POS ? 0 : 0x8000; |
*(unsigned short *) (d + 8) = e; |
} |
|
/round_Xsig.S
0,0 → 1,141
/*---------------------------------------------------------------------------+ |
| round_Xsig.S | |
| | |
| Copyright (C) 1992,1993,1994,1995 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| Normalize and round a 12 byte quantity. | |
| Call from C as: | |
| int round_Xsig(Xsig *n) | |
| | |
| Normalize a 12 byte quantity. | |
| Call from C as: | |
| int norm_Xsig(Xsig *n) | |
| | |
| Each function returns the size of the shift (nr of bits). | |
| | |
+---------------------------------------------------------------------------*/ |
.file "round_Xsig.S" |
|
#include "fpu_emu.h" |
|
|
.text |
ENTRY(round_Xsig) |
pushl %ebp |
movl %esp,%ebp |
pushl %ebx /* Reserve some space */ |
pushl %ebx |
pushl %esi |
|
movl PARAM1,%esi |
|
movl 8(%esi),%edx |
movl 4(%esi),%ebx |
movl (%esi),%eax |
|
movl $0,-4(%ebp) |
|
orl %edx,%edx /* ms bits */ |
js L_round /* Already normalized */ |
jnz L_shift_1 /* Shift left 1 - 31 bits */ |
|
movl %ebx,%edx |
movl %eax,%ebx |
xorl %eax,%eax |
movl $-32,-4(%ebp) |
|
/* We need to shift left by 1 - 31 bits */ |
L_shift_1: |
bsrl %edx,%ecx /* get the required shift in %ecx */ |
subl $31,%ecx |
negl %ecx |
subl %ecx,-4(%ebp) |
shld %cl,%ebx,%edx |
shld %cl,%eax,%ebx |
shl %cl,%eax |
|
L_round: |
testl $0x80000000,%eax |
jz L_exit |
|
addl $1,%ebx |
adcl $0,%edx |
jnz L_exit |
|
movl $0x80000000,%edx |
incl -4(%ebp) |
|
L_exit: |
movl %edx,8(%esi) |
movl %ebx,4(%esi) |
movl %eax,(%esi) |
|
movl -4(%ebp),%eax |
|
popl %esi |
popl %ebx |
leave |
ret |
|
|
|
|
ENTRY(norm_Xsig) |
pushl %ebp |
movl %esp,%ebp |
pushl %ebx /* Reserve some space */ |
pushl %ebx |
pushl %esi |
|
movl PARAM1,%esi |
|
movl 8(%esi),%edx |
movl 4(%esi),%ebx |
movl (%esi),%eax |
|
movl $0,-4(%ebp) |
|
orl %edx,%edx /* ms bits */ |
js L_n_exit /* Already normalized */ |
jnz L_n_shift_1 /* Shift left 1 - 31 bits */ |
|
movl %ebx,%edx |
movl %eax,%ebx |
xorl %eax,%eax |
movl $-32,-4(%ebp) |
|
orl %edx,%edx /* ms bits */ |
js L_n_exit /* Normalized now */ |
jnz L_n_shift_1 /* Shift left 1 - 31 bits */ |
|
movl %ebx,%edx |
movl %eax,%ebx |
xorl %eax,%eax |
addl $-32,-4(%ebp) |
jmp L_n_exit /* Might not be normalized, |
but shift no more. */ |
|
/* We need to shift left by 1 - 31 bits */ |
L_n_shift_1: |
bsrl %edx,%ecx /* get the required shift in %ecx */ |
subl $31,%ecx |
negl %ecx |
subl %ecx,-4(%ebp) |
shld %cl,%ebx,%edx |
shld %cl,%eax,%ebx |
shl %cl,%eax |
|
L_n_exit: |
movl %edx,8(%esi) |
movl %ebx,4(%esi) |
movl %eax,(%esi) |
|
movl -4(%ebp),%eax |
|
popl %esi |
popl %ebx |
leave |
ret |
|
/fpu_system.h
0,0 → 1,83
/*---------------------------------------------------------------------------+ |
| fpu_system.h | |
| | |
| Copyright (C) 1992,1994 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
+---------------------------------------------------------------------------*/ |
|
#ifndef _FPU_SYSTEM_H |
#define _FPU_SYSTEM_H |
|
/* system dependent definitions */ |
|
#include <linux/sched.h> |
#include <linux/kernel.h> |
#include <linux/mm.h> |
|
/* This sets the pointer FPU_info to point to the argument part |
of the stack frame of math_emulate() */ |
#define SETUP_DATA_AREA(arg) FPU_info = (struct info *) &arg |
|
#define LDT_DESCRIPTOR(s) (current->ldt[(s) >> 3]) |
#define SEG_D_SIZE(x) ((x).b & (3 << 21)) |
#define SEG_G_BIT(x) ((x).b & (1 << 23)) |
#define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) |
#define SEG_286_MODE(x) ((x).b & ( 0xff000000 | 0xf0000 | (1 << 23))) |
#define SEG_BASE_ADDR(s) (((s).b & 0xff000000) \ |
| (((s).b & 0xff) << 16) | ((s).a >> 16)) |
#define SEG_LIMIT(s) (((s).b & 0xff0000) | ((s).a & 0xffff)) |
#define SEG_EXECUTE_ONLY(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 11)) |
#define SEG_WRITE_PERM(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 9)) |
#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ |
== (1 << 10)) |
|
#define I387 (current->tss.i387) |
#define FPU_info (I387.soft.info) |
|
#define FPU_CS (*(unsigned short *) &(FPU_info->___cs)) |
#define FPU_SS (*(unsigned short *) &(FPU_info->___ss)) |
#define FPU_DS (*(unsigned short *) &(FPU_info->___ds)) |
#define FPU_EAX (FPU_info->___eax) |
#define FPU_EFLAGS (FPU_info->___eflags) |
#define FPU_EIP (FPU_info->___eip) |
#define FPU_ORIG_EIP (FPU_info->___orig_eip) |
|
#define FPU_lookahead (I387.soft.lookahead) |
|
/* nz if ip_offset and cs_selector are not to be set for the current |
instruction. */ |
#define no_ip_update (((char *)&(I387.soft.twd))[0]) |
#define FPU_rm (((unsigned char *)&(I387.soft.twd))[1]) |
|
/* Number of bytes of data which can be legally accessed by the current |
instruction. This only needs to hold a number <= 108, so a byte will do. */ |
#define access_limit (((unsigned char *)&(I387.soft.twd))[2]) |
|
#define partial_status (I387.soft.swd) |
#define control_word (I387.soft.cwd) |
#define regs (I387.soft.regs) |
#define top (I387.soft.top) |
|
#define instruction_address (*(struct address *)&I387.soft.fip) |
#define operand_address (*(struct address *)&I387.soft.foo) |
|
#define FPU_verify_area(x,y,z) if ( verify_area(x,y,z) ) \ |
math_abort(FPU_info,SIGSEGV) |
|
#undef FPU_IGNORE_CODE_SEGV |
#ifdef FPU_IGNORE_CODE_SEGV |
/* verify_area() is very expensive, and causes the emulator to run |
about 20% slower if applied to the code. Anyway, errors due to bad |
code addresses should be much rarer than errors due to bad data |
addresses. */ |
#define FPU_code_verify_area(z) |
#else |
/* A simpler test than verify_area() can probably be done for |
FPU_code_verify_area() because the only possible error is to step |
past the upper boundary of a legal code area. */ |
#define FPU_code_verify_area(z) FPU_verify_area(VERIFY_READ,(void *)FPU_EIP,z) |
#endif |
|
#endif |
/exception.h
0,0 → 1,53
/*---------------------------------------------------------------------------+ |
| exception.h | |
| | |
| Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
+---------------------------------------------------------------------------*/ |
|
#ifndef _EXCEPTION_H_ |
#define _EXCEPTION_H_ |
|
|
#ifdef __ASSEMBLY__ |
#define Const_(x) $##x |
#else |
#define Const_(x) x |
#endif |
|
#ifndef SW_C1 |
#include "fpu_emu.h" |
#endif SW_C1 |
|
#define FPU_BUSY Const_(0x8000) /* FPU busy bit (8087 compatibility) */ |
#define EX_ErrorSummary Const_(0x0080) /* Error summary status */ |
/* Special exceptions: */ |
#define EX_INTERNAL Const_(0x8000) /* Internal error in wm-FPU-emu */ |
#define EX_StackOver Const_(0x0041|SW_C1) /* stack overflow */ |
#define EX_StackUnder Const_(0x0041) /* stack underflow */ |
/* Exception flags: */ |
#define EX_Precision Const_(0x0020) /* loss of precision */ |
#define EX_Underflow Const_(0x0010) /* underflow */ |
#define EX_Overflow Const_(0x0008) /* overflow */ |
#define EX_ZeroDiv Const_(0x0004) /* divide by zero */ |
#define EX_Denormal Const_(0x0002) /* denormalized operand */ |
#define EX_Invalid Const_(0x0001) /* invalid operation */ |
|
|
#define PRECISION_LOST_UP Const_((EX_Precision | SW_C1)) |
#define PRECISION_LOST_DOWN Const_(EX_Precision) |
|
|
#ifndef __ASSEMBLY__ |
|
#ifdef DEBUG |
#define EXCEPTION(x) { printk("exception in %s at line %d\n", \ |
__FILE__, __LINE__); exception(x); } |
#else |
#define EXCEPTION(x) exception(x) |
#endif |
|
#endif __ASSEMBLY__ |
|
#endif _EXCEPTION_H_ |
/reg_round.S
0,0 → 1,699
.file "reg_round.S" |
/*---------------------------------------------------------------------------+ |
| reg_round.S | |
| | |
| Rounding/truncation/etc for FPU basic arithmetic functions. | |
| | |
| Copyright (C) 1993,1995 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| This code has four possible entry points. | |
| The following must be entered by a jmp instruction: | |
| fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. | |
| | |
| The _round_reg entry point is intended to be used by C code. | |
| From C, call as: | |
| void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w) | |
| | |
| For correct "up" and "down" rounding, the argument must have the correct | |
| sign. | |
| | |
+---------------------------------------------------------------------------*/ |
|
/*---------------------------------------------------------------------------+ |
| Four entry points. | |
| | |
| Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: | |
| %eax:%ebx 64 bit significand | |
| %edx 32 bit extension of the significand | |
| %edi pointer to an FPU_REG for the result to be stored | |
| stack calling function must have set up a C stack frame and | |
| pushed %esi, %edi, and %ebx | |
| | |
| Needed just for the fpu_reg_round_sqrt entry point: | |
| %cx A control word in the same format as the FPU control word. | |
| Otherwise, PARAM4 must give such a value. | |
| | |
| | |
| The significand and its extension are assumed to be exact in the | |
| following sense: | |
| If the significand by itself is the exact result then the significand | |
| extension (%edx) must contain 0, otherwise the significand extension | |
| must be non-zero. | |
| If the significand extension is non-zero then the significand is | |
| smaller than the magnitude of the correct exact result by an amount | |
| greater than zero and less than one ls bit of the significand. | |
| The significand extension is only required to have three possible | |
| non-zero values: | |
| less than 0x80000000 <=> the significand is less than 1/2 an ls | |
| bit smaller than the magnitude of the | |
| true exact result. | |
| exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit | |
| smaller than the magnitude of the true | |
| exact result. | |
| greater than 0x80000000 <=> the significand is more than 1/2 an ls | |
| bit smaller than the magnitude of the | |
| true exact result. | |
| | |
+---------------------------------------------------------------------------*/ |
|
/*---------------------------------------------------------------------------+ |
| The code in this module has become quite complex, but it should handle | |
| all of the FPU flags which are set at this stage of the basic arithmetic | |
| computations. | |
| There are a few rare cases where the results are not set identically to | |
| a real FPU. These require a bit more thought because at this stage the | |
| results of the code here appear to be more consistent... | |
| This may be changed in a future version. | |
+---------------------------------------------------------------------------*/ |
|
|
#include "fpu_emu.h" |
#include "exception.h" |
#include "control_w.h" |
|
/* Flags for FPU_bits_lost */ |
#define LOST_DOWN $1 |
#define LOST_UP $2 |
|
/* Flags for FPU_denormal */ |
#define DENORMAL $1 |
#define UNMASKED_UNDERFLOW $2 |
|
|
#ifndef NON_REENTRANT_FPU |
/* Make the code re-entrant by putting |
local storage on the stack: */ |
#define FPU_bits_lost (%esp) |
#define FPU_denormal 1(%esp) |
|
#else |
/* Not re-entrant, so we can gain speed by putting |
local storage in a static area: */ |
.data |
.align 2,0 |
FPU_bits_lost: |
.byte 0 |
FPU_denormal: |
.byte 0 |
#endif NON_REENTRANT_FPU |
|
|
.text |
.globl fpu_reg_round |
.globl fpu_reg_round_sqrt |
.globl fpu_Arith_exit |
|
/* Entry point when called from C */ |
ENTRY(round_reg) |
pushl %ebp |
movl %esp,%ebp |
pushl %esi |
pushl %edi |
pushl %ebx |
|
movl PARAM1,%edi |
movl SIGH(%edi),%eax |
movl SIGL(%edi),%ebx |
movl PARAM2,%edx |
movl PARAM3,%ecx |
jmp fpu_reg_round_sqrt |
|
fpu_reg_round: /* Normal entry point */ |
movl PARAM4,%ecx |
|
fpu_reg_round_sqrt: /* Entry point from wm_sqrt.S */ |
|
#ifndef NON_REENTRANT_FPU |
pushl %ebx /* adjust the stack pointer */ |
#endif NON_REENTRANT_FPU |
|
#ifdef PARANOID |
/* Cannot use this here yet */ |
/* orl %eax,%eax */ |
/* jns L_entry_bugged */ |
#endif PARANOID |
|
cmpl EXP_UNDER,EXP(%edi) |
jle xMake_denorm /* The number is a de-normal */ |
|
movb $0,FPU_denormal /* 0 -> not a de-normal */ |
|
xDenorm_done: |
movb $0,FPU_bits_lost /* No bits yet lost in rounding */ |
|
movl %ecx,%esi |
andl CW_PC,%ecx |
cmpl PR_64_BITS,%ecx |
je LRound_To_64 |
|
cmpl PR_53_BITS,%ecx |
je LRound_To_53 |
|
cmpl PR_24_BITS,%ecx |
je LRound_To_24 |
|
#ifdef PECULIAR_486 |
/* With the precision control bits set to 01 "(reserved)", a real 80486 |
behaves as if the precision control bits were set to 11 "64 bits" */ |
cmpl PR_RESERVED_BITS,%ecx |
je LRound_To_64 |
#ifdef PARANOID |
jmp L_bugged_denorm_486 |
#endif PARANOID |
#else |
#ifdef PARANOID |
jmp L_bugged_denorm /* There is no bug, just a bad control word */ |
#endif PARANOID |
#endif PECULIAR_486 |
|
|
/* Round etc to 24 bit precision */ |
LRound_To_24: |
movl %esi,%ecx |
andl CW_RC,%ecx |
cmpl RC_RND,%ecx |
je LRound_nearest_24 |
|
cmpl RC_CHOP,%ecx |
je LCheck_truncate_24 |
|
cmpl RC_UP,%ecx /* Towards +infinity */ |
je LUp_24 |
|
cmpl RC_DOWN,%ecx /* Towards -infinity */ |
je LDown_24 |
|
#ifdef PARANOID |
jmp L_bugged_round24 |
#endif PARANOID |
|
LUp_24: |
cmpb SIGN_POS,SIGN(%edi) |
jne LCheck_truncate_24 /* If negative then up==truncate */ |
|
jmp LCheck_24_round_up |
|
LDown_24: |
cmpb SIGN_POS,SIGN(%edi) |
je LCheck_truncate_24 /* If positive then down==truncate */ |
|
LCheck_24_round_up: |
movl %eax,%ecx |
andl $0x000000ff,%ecx |
orl %ebx,%ecx |
orl %edx,%ecx |
jnz LDo_24_round_up |
jmp LRe_normalise |
|
LRound_nearest_24: |
/* Do rounding of the 24th bit if needed (nearest or even) */ |
movl %eax,%ecx |
andl $0x000000ff,%ecx |
cmpl $0x00000080,%ecx |
jc LCheck_truncate_24 /* less than half, no increment needed */ |
|
jne LGreater_Half_24 /* greater than half, increment needed */ |
|
/* Possibly half, we need to check the ls bits */ |
orl %ebx,%ebx |
jnz LGreater_Half_24 /* greater than half, increment needed */ |
|
orl %edx,%edx |
jnz LGreater_Half_24 /* greater than half, increment needed */ |
|
/* Exactly half, increment only if 24th bit is 1 (round to even) */ |
testl $0x00000100,%eax |
jz LDo_truncate_24 |
|
LGreater_Half_24: /* Rounding: increment at the 24th bit */ |
LDo_24_round_up: |
andl $0xffffff00,%eax /* Truncate to 24 bits */ |
xorl %ebx,%ebx |
movb LOST_UP,FPU_bits_lost |
addl $0x00000100,%eax |
jmp LCheck_Round_Overflow |
|
LCheck_truncate_24: |
movl %eax,%ecx |
andl $0x000000ff,%ecx |
orl %ebx,%ecx |
orl %edx,%ecx |
jz LRe_normalise /* No truncation needed */ |
|
LDo_truncate_24: |
andl $0xffffff00,%eax /* Truncate to 24 bits */ |
xorl %ebx,%ebx |
movb LOST_DOWN,FPU_bits_lost |
jmp LRe_normalise |
|
|
/* Round etc to 53 bit precision */ |
LRound_To_53: |
movl %esi,%ecx |
andl CW_RC,%ecx |
cmpl RC_RND,%ecx |
je LRound_nearest_53 |
|
cmpl RC_CHOP,%ecx |
je LCheck_truncate_53 |
|
cmpl RC_UP,%ecx /* Towards +infinity */ |
je LUp_53 |
|
cmpl RC_DOWN,%ecx /* Towards -infinity */ |
je LDown_53 |
|
#ifdef PARANOID |
jmp L_bugged_round53 |
#endif PARANOID |
|
LUp_53: |
cmpb SIGN_POS,SIGN(%edi) |
jne LCheck_truncate_53 /* If negative then up==truncate */ |
|
jmp LCheck_53_round_up |
|
LDown_53: |
cmpb SIGN_POS,SIGN(%edi) |
je LCheck_truncate_53 /* If positive then down==truncate */ |
|
LCheck_53_round_up: |
movl %ebx,%ecx |
andl $0x000007ff,%ecx |
orl %edx,%ecx |
jnz LDo_53_round_up |
jmp LRe_normalise |
|
LRound_nearest_53: |
/* Do rounding of the 53rd bit if needed (nearest or even) */ |
movl %ebx,%ecx |
andl $0x000007ff,%ecx |
cmpl $0x00000400,%ecx |
jc LCheck_truncate_53 /* less than half, no increment needed */ |
|
jnz LGreater_Half_53 /* greater than half, increment needed */ |
|
/* Possibly half, we need to check the ls bits */ |
orl %edx,%edx |
jnz LGreater_Half_53 /* greater than half, increment needed */ |
|
/* Exactly half, increment only if 53rd bit is 1 (round to even) */ |
testl $0x00000800,%ebx |
jz LTruncate_53 |
|
LGreater_Half_53: /* Rounding: increment at the 53rd bit */ |
LDo_53_round_up: |
movb LOST_UP,FPU_bits_lost |
andl $0xfffff800,%ebx /* Truncate to 53 bits */ |
addl $0x00000800,%ebx |
adcl $0,%eax |
jmp LCheck_Round_Overflow |
|
LCheck_truncate_53: |
movl %ebx,%ecx |
andl $0x000007ff,%ecx |
orl %edx,%ecx |
jz LRe_normalise |
|
LTruncate_53: |
movb LOST_DOWN,FPU_bits_lost |
andl $0xfffff800,%ebx /* Truncate to 53 bits */ |
jmp LRe_normalise |
|
|
/* Round etc to 64 bit precision */ |
LRound_To_64: |
movl %esi,%ecx |
andl CW_RC,%ecx |
cmpl RC_RND,%ecx |
je LRound_nearest_64 |
|
cmpl RC_CHOP,%ecx |
je LCheck_truncate_64 |
|
cmpl RC_UP,%ecx /* Towards +infinity */ |
je LUp_64 |
|
cmpl RC_DOWN,%ecx /* Towards -infinity */ |
je LDown_64 |
|
#ifdef PARANOID |
jmp L_bugged_round64 |
#endif PARANOID |
|
LUp_64: |
cmpb SIGN_POS,SIGN(%edi) |
jne LCheck_truncate_64 /* If negative then up==truncate */ |
|
orl %edx,%edx |
jnz LDo_64_round_up |
jmp LRe_normalise |
|
LDown_64: |
cmpb SIGN_POS,SIGN(%edi) |
je LCheck_truncate_64 /* If positive then down==truncate */ |
|
orl %edx,%edx |
jnz LDo_64_round_up |
jmp LRe_normalise |
|
LRound_nearest_64: |
cmpl $0x80000000,%edx |
jc LCheck_truncate_64 |
|
jne LDo_64_round_up |
|
/* Now test for round-to-even */ |
testb $1,%ebx |
jz LCheck_truncate_64 |
|
LDo_64_round_up: |
movb LOST_UP,FPU_bits_lost |
addl $1,%ebx |
adcl $0,%eax |
|
LCheck_Round_Overflow: |
jnc LRe_normalise |
|
/* Overflow, adjust the result (significand to 1.0) */ |
rcrl $1,%eax |
rcrl $1,%ebx |
incl EXP(%edi) |
jmp LRe_normalise |
|
LCheck_truncate_64: |
orl %edx,%edx |
jz LRe_normalise |
|
LTruncate_64: |
movb LOST_DOWN,FPU_bits_lost |
|
LRe_normalise: |
testb $0xff,FPU_denormal |
jnz xNormalise_result |
|
xL_Normalised: |
cmpb LOST_UP,FPU_bits_lost |
je xL_precision_lost_up |
|
cmpb LOST_DOWN,FPU_bits_lost |
je xL_precision_lost_down |
|
xL_no_precision_loss: |
/* store the result */ |
movb TW_Valid,TAG(%edi) |
|
xL_Store_significand: |
movl %eax,SIGH(%edi) |
movl %ebx,SIGL(%edi) |
|
xorl %eax,%eax /* No errors detected. */ |
|
cmpl EXP_OVER,EXP(%edi) |
jge L_overflow |
|
fpu_reg_round_exit: |
#ifndef NON_REENTRANT_FPU |
popl %ebx /* adjust the stack pointer */ |
#endif NON_REENTRANT_FPU |
|
fpu_Arith_exit: |
popl %ebx |
popl %edi |
popl %esi |
leave |
ret |
|
|
/* |
* Set the FPU status flags to represent precision loss due to |
* round-up. |
*/ |
xL_precision_lost_up: |
push %eax |
call SYMBOL_NAME(set_precision_flag_up) |
popl %eax |
jmp xL_no_precision_loss |
|
/* |
* Set the FPU status flags to represent precision loss due to |
* truncation. |
*/ |
xL_precision_lost_down: |
push %eax |
call SYMBOL_NAME(set_precision_flag_down) |
popl %eax |
jmp xL_no_precision_loss |
|
|
/* |
* The number is a denormal (which might get rounded up to a normal) |
* Shift the number right the required number of bits, which will |
* have to be undone later... |
*/ |
xMake_denorm: |
/* The action to be taken depends upon whether the underflow |
exception is masked */ |
testb CW_Underflow,%cl /* Underflow mask. */ |
jz xUnmasked_underflow /* Do not make a denormal. */ |
|
movb DENORMAL,FPU_denormal |
|
pushl %ecx /* Save */ |
movl EXP_UNDER+1,%ecx |
subl EXP(%edi),%ecx |
|
cmpl $64,%ecx /* shrd only works for 0..31 bits */ |
jnc xDenorm_shift_more_than_63 |
|
cmpl $32,%ecx /* shrd only works for 0..31 bits */ |
jnc xDenorm_shift_more_than_32 |
|
/* |
* We got here without jumps by assuming that the most common requirement |
* is for a small de-normalising shift. |
* Shift by [1..31] bits |
*/ |
addl %ecx,EXP(%edi) |
orl %edx,%edx /* extension */ |
setne %ch /* Save whether %edx is non-zero */ |
xorl %edx,%edx |
shrd %cl,%ebx,%edx |
shrd %cl,%eax,%ebx |
shr %cl,%eax |
orb %ch,%dl |
popl %ecx |
jmp xDenorm_done |
|
/* Shift by [32..63] bits */ |
xDenorm_shift_more_than_32: |
addl %ecx,EXP(%edi) |
subb $32,%cl |
orl %edx,%edx |
setne %ch |
orb %ch,%bl |
xorl %edx,%edx |
shrd %cl,%ebx,%edx |
shrd %cl,%eax,%ebx |
shr %cl,%eax |
orl %edx,%edx /* test these 32 bits */ |
setne %cl |
orb %ch,%bl |
orb %cl,%bl |
movl %ebx,%edx |
movl %eax,%ebx |
xorl %eax,%eax |
popl %ecx |
jmp xDenorm_done |
|
/* Shift by [64..) bits */ |
xDenorm_shift_more_than_63: |
cmpl $64,%ecx |
jne xDenorm_shift_more_than_64 |
|
/* Exactly 64 bit shift */ |
addl %ecx,EXP(%edi) |
xorl %ecx,%ecx |
orl %edx,%edx |
setne %cl |
orl %ebx,%ebx |
setne %ch |
orb %ch,%cl |
orb %cl,%al |
movl %eax,%edx |
xorl %eax,%eax |
xorl %ebx,%ebx |
popl %ecx |
jmp xDenorm_done |
|
xDenorm_shift_more_than_64: |
movl EXP_UNDER+1,EXP(%edi) |
/* This is easy, %eax must be non-zero, so.. */ |
movl $1,%edx |
xorl %eax,%eax |
xorl %ebx,%ebx |
popl %ecx |
jmp xDenorm_done |
|
|
xUnmasked_underflow: |
movb UNMASKED_UNDERFLOW,FPU_denormal |
jmp xDenorm_done |
|
|
/* Undo the de-normalisation. */ |
xNormalise_result: |
cmpb UNMASKED_UNDERFLOW,FPU_denormal |
je xSignal_underflow |
|
/* The number must be a denormal if we got here. */ |
#ifdef PARANOID |
/* But check it... just in case. */ |
cmpl EXP_UNDER+1,EXP(%edi) |
jne L_norm_bugged |
#endif PARANOID |
|
#ifdef PECULIAR_486 |
/* |
* This implements a special feature of 80486 behaviour. |
* Underflow will be signalled even if the number is |
* not a denormal after rounding. |
* This difference occurs only for masked underflow, and not |
* in the unmasked case. |
* Actual 80486 behaviour differs from this in some circumstances. |
*/ |
orl %eax,%eax /* ms bits */ |
js LNormalise_shift_done /* Will be masked underflow */ |
#endif PECULIAR_486 |
|
orl %eax,%eax /* ms bits */ |
js xL_Normalised /* No longer a denormal */ |
|
jnz LNormalise_shift_up_to_31 /* Shift left 0 - 31 bits */ |
|
orl %ebx,%ebx |
jz L_underflow_to_zero /* The contents are zero */ |
|
/* Shift left 32 - 63 bits */ |
movl %ebx,%eax |
xorl %ebx,%ebx |
subl $32,EXP(%edi) |
|
LNormalise_shift_up_to_31: |
bsrl %eax,%ecx /* get the required shift in %ecx */ |
subl $31,%ecx |
negl %ecx |
shld %cl,%ebx,%eax |
shl %cl,%ebx |
subl %ecx,EXP(%edi) |
|
LNormalise_shift_done: |
testb $0xff,FPU_bits_lost /* bits lost == underflow */ |
jz xL_Normalised |
|
/* There must be a masked underflow */ |
push %eax |
pushl EX_Underflow |
call SYMBOL_NAME(exception) |
popl %eax |
popl %eax |
jmp xL_Normalised |
|
|
/* |
* The operations resulted in a number too small to represent. |
* Masked response. |
*/ |
L_underflow_to_zero: |
push %eax |
call SYMBOL_NAME(set_precision_flag_down) |
popl %eax |
|
push %eax |
pushl EX_Underflow |
call SYMBOL_NAME(exception) |
popl %eax |
popl %eax |
|
/* Reduce the exponent to EXP_UNDER */ |
movl EXP_UNDER,EXP(%edi) |
movb TW_Zero,TAG(%edi) |
jmp xL_Store_significand |
|
|
/* The operations resulted in a number too large to represent. */ |
L_overflow: |
push %edi |
call SYMBOL_NAME(arith_overflow) |
pop %edi |
jmp fpu_reg_round_exit |
|
|
xSignal_underflow: |
/* The number may have been changed to a non-denormal */ |
/* by the rounding operations. */ |
cmpl EXP_UNDER,EXP(%edi) |
jle xDo_unmasked_underflow |
|
jmp xL_Normalised |
|
xDo_unmasked_underflow: |
/* Increase the exponent by the magic number */ |
addl $(3*(1<<13)),EXP(%edi) |
push %eax |
pushl EX_Underflow |
call EXCEPTION |
popl %eax |
popl %eax |
jmp xL_Normalised |
|
|
#ifdef PARANOID |
#ifdef PECULIAR_486 |
L_bugged_denorm_486: |
pushl EX_INTERNAL|0x236 |
call EXCEPTION |
popl %ebx |
jmp L_exception_exit |
#else |
L_bugged_denorm: |
pushl EX_INTERNAL|0x230 |
call EXCEPTION |
popl %ebx |
jmp L_exception_exit |
#endif PECULIAR_486 |
|
L_bugged_round24: |
pushl EX_INTERNAL|0x231 |
call EXCEPTION |
popl %ebx |
jmp L_exception_exit |
|
L_bugged_round53: |
pushl EX_INTERNAL|0x232 |
call EXCEPTION |
popl %ebx |
jmp L_exception_exit |
|
L_bugged_round64: |
pushl EX_INTERNAL|0x233 |
call EXCEPTION |
popl %ebx |
jmp L_exception_exit |
|
L_norm_bugged: |
pushl EX_INTERNAL|0x234 |
call EXCEPTION |
popl %ebx |
jmp L_exception_exit |
|
L_entry_bugged: |
pushl EX_INTERNAL|0x235 |
call EXCEPTION |
popl %ebx |
L_exception_exit: |
mov $1,%eax |
jmp fpu_reg_round_exit |
#endif PARANOID |
/README
0,0 → 1,434
+---------------------------------------------------------------------------+ |
| wm-FPU-emu an FPU emulator for 80386 and 80486SX microprocessors. | |
| | |
| Copyright (C) 1992,1993,1994,1995,1996 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@suburbia.net | |
| | |
| This program is free software; you can redistribute it and/or modify | |
| it under the terms of the GNU General Public License version 2 as | |
| published by the Free Software Foundation. | |
| | |
| This program is distributed in the hope that it will be useful, | |
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| GNU General Public License for more details. | |
| | |
| You should have received a copy of the GNU General Public License | |
| along with this program; if not, write to the Free Software | |
| Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
| | |
+---------------------------------------------------------------------------+ |
|
|
|
wm-FPU-emu is an FPU emulator for Linux. It is derived from wm-emu387 |
which was my 80387 emulator for early versions of djgpp (gcc under |
msdos); wm-emu387 was in turn based upon emu387 which was written by |
DJ Delorie for djgpp. The interface to the Linux kernel is based upon |
the original Linux math emulator by Linus Torvalds. |
|
My target FPU for wm-FPU-emu is that described in the Intel486 |
Programmer's Reference Manual (1992 edition). Unfortunately, numerous |
facets of the functioning of the FPU are not well covered in the |
Reference Manual. The information in the manual has been supplemented |
with measurements on real 80486's. Unfortunately, it is simply not |
possible to be sure that all of the peculiarities of the 80486 have |
been discovered, so there is always likely to be obscure differences |
in the detailed behaviour of the emulator and a real 80486. |
|
wm-FPU-emu does not implement all of the behaviour of the 80486 FPU, |
but is very close. See "Limitations" later in this file for a list of |
some differences. |
|
Please report bugs, etc to me at: |
billm@suburbia.net |
|
|
--Bill Metzenthen |
October 1996 |
|
|
----------------------- Internals of wm-FPU-emu ----------------------- |
|
Numeric algorithms: |
(1) Add, subtract, and multiply. Nothing remarkable in these. |
(2) Divide has been tuned to get reasonable performance. The algorithm |
is not the obvious one which most people seem to use, but is designed |
to take advantage of the characteristics of the 80386. I expect that |
it has been invented many times before I discovered it, but I have not |
seen it. It is based upon one of those ideas which one carries around |
for years without ever bothering to check it out. |
(3) The sqrt function has been tuned to get good performance. It is based |
upon Newton's classic method. Performance was improved by capitalizing |
upon the properties of Newton's method, and the code is once again |
structured taking account of the 80386 characteristics. |
(4) The trig, log, and exp functions are based in each case upon quasi- |
"optimal" polynomial approximations. My definition of "optimal" was |
based upon getting good accuracy with reasonable speed. |
(5) The argument reducing code for the trig function effectively uses |
a value of pi which is accurate to more than 128 bits. As a consequence, |
the reduced argument is accurate to more than 64 bits for arguments up |
to a few pi, and accurate to more than 64 bits for most arguments, |
even for arguments approaching 2^63. This is far superior to an |
80486, which uses a value of pi which is accurate to 66 bits. |
|
The code of the emulator is complicated slightly by the need to |
account for a limited form of re-entrancy. Normally, the emulator will |
emulate each FPU instruction to completion without interruption. |
However, it may happen that when the emulator is accessing the user |
memory space, swapping may be needed. In this case the emulator may be |
temporarily suspended while disk i/o takes place. During this time |
another process may use the emulator, thereby perhaps changing static |
variables. The code which accesses user memory is confined to five |
files: |
fpu_entry.c |
reg_ld_str.c |
load_store.c |
get_address.c |
errors.c |
As from version 1.12 of the emulator, no static variables are used |
(apart from those in the kernel's per-process tables). The emulator is |
therefore now fully re-entrant, rather than having just the restricted |
form of re-entrancy which is required by the Linux kernel. |
|
----------------------- Limitations of wm-FPU-emu ----------------------- |
|
There are a number of differences between the current wm-FPU-emu |
(version 1.20) and the 80486 FPU (apart from bugs). Some of the more |
important differences are listed below: |
|
The Roundup flag does not have much meaning for the transcendental |
functions and its 80486 value with these functions is likely to differ |
from its emulator value. |
|
In a few rare cases the Underflow flag obtained with the emulator will |
be different from that obtained with an 80486. This occurs when the |
following conditions apply simultaneously: |
(a) the operands have a higher precision than the current setting of the |
precision control (PC) flags. |
(b) the underflow exception is masked. |
(c) the magnitude of the exact result (before rounding) is less than 2^-16382. |
(d) the magnitude of the final result (after rounding) is exactly 2^-16382. |
(e) the magnitude of the exact result would be exactly 2^-16382 if the |
operands were rounded to the current precision before the arithmetic |
operation was performed. |
If all of these apply, the emulator will set the Underflow flag but a real |
80486 will not. |
|
NOTE: Certain formats of Extended Real are UNSUPPORTED. They are |
unsupported by the 80486. They are the Pseudo-NaNs, Pseudoinfinities, |
and Unnormals. None of these will be generated by an 80486 or by the |
emulator. Do not use them. The emulator treats them differently in |
detail from the way an 80486 does. |
|
The emulator treats PseudoDenormals differently from an 80486. These |
numbers are in fact properly normalised numbers with the exponent |
offset by 1, and the emulator treats them as such. Unlike the 80486, |
the emulator does not generate a Denormal Operand exception for these |
numbers. The arithmetical results produced when using such a number as |
an operand are the same for the emulator and a real 80486 (apart from |
any slight precision difference for the transcendental functions). |
Neither the emulator nor an 80486 produces one of these numbers as the |
result of any arithmetic operation. An 80486 can keep one of these |
numbers in an FPU register with its identity as a PseudoDenormal, but |
the emulator will not; they are always converted to a valid number. |
|
Self modifying code can cause the emulator to fail. An example of such |
code is: |
movl %esp,[%ebx] |
fld1 |
The FPU instruction may be (usually will be) loaded into the pre-fetch |
queue of the cpu before the mov instruction is executed. If the |
destination of the 'movl' overlaps the FPU instruction then the bytes |
in the prefetch queue and memory will be inconsistent when the FPU |
instruction is executed. The emulator will be invoked but will not be |
able to find the instruction which caused the device-not-present |
exception. For this case, the emulator cannot emulate the behaviour of |
an 80486DX. |
|
Handling of the address size override prefix byte (0x67) has not been |
extensively tested yet. A major problem exists because using it in |
vm86 mode can cause a general protection fault. Address offsets |
greater than 0xffff appear to be illegal in vm86 mode but are quite |
acceptable (and work) in real mode. A small test program developed to |
check the addressing, and which runs successfully in real mode, |
crashes dosemu under Linux and also brings Windows down with a general |
protection fault message when run under the MS-DOS prompt of Windows |
3.1. (The program simply reads data from a valid address). |
|
The emulator supports 16-bit protected mode, with one difference from |
an 80486DX. A 80486DX will allow some floating point instructions to |
write a few bytes below the lowest address of the stack. The emulator |
will not allow this in 16-bit protected mode: no instructions are |
allowed to write outside the bounds set by the protection. |
|
----------------------- Performance of wm-FPU-emu ----------------------- |
|
Speed. |
----- |
|
The speed of floating point computation with the emulator will depend |
upon instruction mix. Relative performance is best for the instructions |
which require most computation. The simple instructions are adversely |
affected by the fpu instruction trap overhead. |
|
|
Timing: Some simple timing tests have been made on the emulator functions. |
The times include load/store instructions. All times are in microseconds |
measured on a 33MHz 386 with 64k cache. The Turbo C tests were under |
ms-dos, the next two columns are for emulators running with the djgpp |
ms-dos extender. The final column is for wm-FPU-emu in Linux 0.97, |
using libm4.0 (hard). |
|
function Turbo C djgpp 1.06 WM-emu387 wm-FPU-emu |
|
+ 60.5 154.8 76.5 139.4 |
- 61.1-65.5 157.3-160.8 76.2-79.5 142.9-144.7 |
* 71.0 190.8 79.6 146.6 |
/ 61.2-75.0 261.4-266.9 75.3-91.6 142.2-158.1 |
|
sin() 310.8 4692.0 319.0 398.5 |
cos() 284.4 4855.2 308.0 388.7 |
tan() 495.0 8807.1 394.9 504.7 |
atan() 328.9 4866.4 601.1 419.5-491.9 |
|
sqrt() 128.7 crashed 145.2 227.0 |
log() 413.1-419.1 5103.4-5354.21 254.7-282.2 409.4-437.1 |
exp() 479.1 6619.2 469.1 850.8 |
|
|
The performance under Linux is improved by the use of look-ahead code. |
The following results show the improvement which is obtained under |
Linux due to the look-ahead code. Also given are the times for the |
original Linux emulator with the 4.1 'soft' lib. |
|
[ Linus' note: I changed look-ahead to be the default under linux, as |
there was no reason not to use it after I had edited it to be |
disabled during tracing ] |
|
wm-FPU-emu w original w |
look-ahead 'soft' lib |
+ 106.4 190.2 |
- 108.6-111.6 192.4-216.2 |
* 113.4 193.1 |
/ 108.8-124.4 700.1-706.2 |
|
sin() 390.5 2642.0 |
cos() 381.5 2767.4 |
tan() 496.5 3153.3 |
atan() 367.2-435.5 2439.4-3396.8 |
|
sqrt() 195.1 4732.5 |
log() 358.0-387.5 3359.2-3390.3 |
exp() 619.3 4046.4 |
|
|
These figures are now somewhat out-of-date. The emulator has become |
progressively slower for most functions as more of the 80486 features |
have been implemented. |
|
|
----------------------- Accuracy of wm-FPU-emu ----------------------- |
|
|
The accuracy of the emulator is in almost all cases equal to or better |
than that of an Intel 80486 FPU. |
|
The results of the basic arithmetic functions (+,-,*,/), and fsqrt |
match those of an 80486 FPU. They are the best possible; the error for |
these never exceeds 1/2 an lsb. The fprem and fprem1 instructions |
return exact results; they have no error. |
|
|
The following table compares the emulator accuracy for the sqrt(), |
trig and log functions against the Turbo C "emulator". For this table, |
each function was tested at about 400 points. Ideal worst-case results |
would be 64 bits. The reduced Turbo C accuracy of cos() and tan() for |
arguments greater than pi/4 can be thought of as being related to the |
precision of the argument x; e.g. an argument of pi/2-(1e-10) which is |
accurate to 64 bits can result in a relative accuracy in cos() of |
about 64 + log2(cos(x)) = 31 bits. |
|
|
Function Tested x range Worst result Turbo C |
(relative bits) |
|
sqrt(x) 1 .. 2 64.1 63.2 |
atan(x) 1e-10 .. 200 64.2 62.8 |
cos(x) 0 .. pi/2-(1e-10) 64.4 (x <= pi/4) 62.4 |
64.1 (x = pi/2-(1e-10)) 31.9 |
sin(x) 1e-10 .. pi/2 64.0 62.8 |
tan(x) 1e-10 .. pi/2-(1e-10) 64.0 (x <= pi/4) 62.1 |
64.1 (x = pi/2-(1e-10)) 31.9 |
exp(x) 0 .. 1 63.1 ** 62.9 |
log(x) 1+1e-6 .. 2 63.8 ** 62.1 |
|
** The accuracy for exp() and log() is low because the FPU (emulator) |
does not compute them directly; two operations are required. |
|
|
The emulator passes the "paranoia" tests (compiled with gcc 2.3.3 or |
later) for 'float' variables (24 bit precision numbers) when precision |
control is set to 24, 53 or 64 bits, and for 'double' variables (53 |
bit precision numbers) when precision control is set to 53 bits (a |
properly performing FPU cannot pass the 'paranoia' tests for 'double' |
variables when precision control is set to 64 bits). |
|
The code for reducing the argument for the trig functions (fsin, fcos, |
fptan and fsincos) has been improved and now effectively uses a value |
for pi which is accurate to more than 128 bits precision. As a |
consequence, the accuracy of these functions for large arguments has |
been dramatically improved (and is now very much better than an 80486 |
FPU). There is also now no degradation of accuracy for fcos and fptan |
for operands close to pi/2. Measured results are (note that the |
definition of accuracy has changed slightly from that used for the |
above table): |
|
Function Tested x range Worst result |
(absolute bits) |
|
cos(x) 0 .. 9.22e+18 62.0 |
sin(x) 1e-16 .. 9.22e+18 62.1 |
tan(x) 1e-16 .. 9.22e+18 61.8 |
|
It is possible with some effort to find very large arguments which |
give much degraded precision. For example, the integer number |
8227740058411162616.0 |
is within about 10e-7 of a multiple of pi. To find the tan (for |
example) of this number to 64 bits precision it would be necessary to |
have a value of pi which had about 150 bits precision. The FPU |
emulator computes the result to about 42.6 bits precision (the correct |
result is about -9.739715e-8). On the other hand, an 80486 FPU returns |
0.01059, which in relative terms is hopelessly inaccurate. |
|
For arguments close to critical angles (which occur at multiples of |
pi/2) the emulator is more accurate than an 80486 FPU. For very large |
arguments, the emulator is far more accurate. |
|
|
Prior to version 1.20 of the emulator, the accuracy of the results for |
the transcendental functions (in their principal range) was not as |
good as the results from an 80486 FPU. From version 1.20, the accuracy |
has been considerably improved and these functions now give measured |
worst-case results which are better than the worst-case results given |
by an 80486 FPU. |
|
The following table gives the measured results for the emulator. The |
number of randomly selected arguments in each case is about half a |
million. The group of three columns gives the frequency of the given |
accuracy in number of times per million, thus the second of these |
columns shows that an accuracy of between 63.80 and 63.89 bits was |
found at a rate of 133 times per one million measurements for fsin. |
The results show that the fsin, fcos and fptan instructions return |
results which are in error (i.e. less accurate than the best possible |
result (which is 64 bits)) for about one per cent of all arguments |
between -pi/2 and +pi/2. The other instructions have a lower |
frequency of results which are in error. The last two columns give |
the worst accuracy which was found (in bits) and the approximate value |
of the argument which produced it. |
|
frequency (per M) |
------------------- --------------- |
instr arg range # tests 63.7 63.8 63.9 worst at arg |
bits bits bits bits |
----- ------------ ------- ---- ---- ----- ----- -------- |
fsin (0,pi/2) 547756 0 133 10673 63.89 0.451317 |
fcos (0,pi/2) 547563 0 126 10532 63.85 0.700801 |
fptan (0,pi/2) 536274 11 267 10059 63.74 0.784876 |
fpatan 4 quadrants 517087 0 8 1855 63.88 0.435121 (4q) |
fyl2x (0,20) 541861 0 0 1323 63.94 1.40923 (x) |
fyl2xp1 (-.293,.414) 520256 0 0 5678 63.93 0.408542 (x) |
f2xm1 (-1,1) 538847 4 481 6488 63.79 0.167709 |
|
|
Tests performed on an 80486 FPU showed results of lower accuracy. The |
following table gives the results which were obtained with an AMD |
486DX2/66 (other tests indicate that an Intel 486DX produces |
identical results). The tests were basically the same as those used |
to measure the emulator (the values, being random, were in general not |
the same). The total number of tests for each instruction are given |
at the end of the table, in case each about 100k tests were performed. |
Another line of figures at the end of the table shows that most of the |
instructions return results which are in error for more than 10 |
percent of the arguments tested. |
|
The numbers in the body of the table give the approx number of times a |
result of the given accuracy in bits (given in the left-most column) |
was obtained per one million arguments. For three of the instructions, |
two columns of results are given: * The second column for f2xm1 gives |
the number cases where the results of the first column were for a |
positive argument, this shows that this instruction gives better |
results for positive arguments than it does for negative. * In the |
cases of fcos and fptan, the first column gives the results when all |
cases where arguments greater than 1.5 were removed from the results |
given in the second column. Unlike the emulator, an 80486 FPU returns |
results of relatively poor accuracy for these instructions when the |
argument approaches pi/2. The table does not show those cases when the |
accuracy of the results were less than 62 bits, which occurs quite |
often for fsin and fptan when the argument approaches pi/2. This poor |
accuracy is discussed above in relation to the Turbo C "emulator", and |
the accuracy of the value of pi. |
|
|
bits f2xm1 f2xm1 fpatan fcos fcos fyl2x fyl2xp1 fsin fptan fptan |
62.0 0 0 0 0 437 0 0 0 0 925 |
62.1 0 0 10 0 894 0 0 0 0 1023 |
62.2 14 0 0 0 1033 0 0 0 0 945 |
62.3 57 0 0 0 1202 0 0 0 0 1023 |
62.4 385 0 0 10 1292 0 23 0 0 1178 |
62.5 1140 0 0 119 1649 0 39 0 0 1149 |
62.6 2037 0 0 189 1620 0 16 0 0 1169 |
62.7 5086 14 0 646 2315 10 101 35 39 1402 |
62.8 8818 86 0 984 3050 59 287 131 224 2036 |
62.9 11340 1355 0 2126 4153 79 605 357 321 1948 |
63.0 15557 4750 0 3319 5376 246 1281 862 808 2688 |
63.1 20016 8288 0 4620 6628 511 2569 1723 1510 3302 |
63.2 24945 11127 10 6588 8098 1120 4470 2968 2990 4724 |
63.3 25686 12382 69 8774 10682 1906 6775 4482 5474 7236 |
63.4 29219 14722 79 11109 12311 3094 9414 7259 8912 10587 |
63.5 30458 14936 393 13802 15014 5874 12666 9609 13762 15262 |
63.6 32439 16448 1277 17945 19028 10226 15537 14657 19158 20346 |
63.7 35031 16805 4067 23003 23947 18910 20116 21333 25001 26209 |
63.8 33251 15820 7673 24781 25675 24617 25354 24440 29433 30329 |
63.9 33293 16833 18529 28318 29233 31267 31470 27748 29676 30601 |
|
Per cent with error: |
30.9 3.2 18.5 9.8 13.1 11.6 17.4 |
Total arguments tested: |
70194 70099 101784 100641 100641 101799 128853 114893 102675 102675 |
|
|
------------------------- Contributors ------------------------------- |
|
A number of people have contributed to the development of the |
emulator, often by just reporting bugs, sometimes with suggested |
fixes, and a few kind people have provided me with access in one way |
or another to an 80486 machine. Contributors include (to those people |
who I may have forgotten, please forgive me): |
|
Linus Torvalds |
Tommy.Thorn@daimi.aau.dk |
Andrew.Tridgell@anu.edu.au |
Nick Holloway, alfie@dcs.warwick.ac.uk |
Hermano Moura, moura@dcs.gla.ac.uk |
Jon Jagger, J.Jagger@scp.ac.uk |
Lennart Benschop |
Brian Gallew, geek+@CMU.EDU |
Thomas Staniszewski, ts3v+@andrew.cmu.edu |
Martin Howell, mph@plasma.apana.org.au |
M Saggaf, alsaggaf@athena.mit.edu |
Peter Barker, PETER@socpsy.sci.fau.edu |
tom@vlsivie.tuwien.ac.at |
Dan Russel, russed@rpi.edu |
Daniel Carosone, danielce@ee.mu.oz.au |
cae@jpmorgan.com |
Hamish Coleman, t933093@minyos.xx.rmit.oz.au |
Bruce Evans, bde@kralizec.zeta.org.au |
Timo Korvola, Timo.Korvola@hut.fi |
Rick Lyons, rick@razorback.brisnet.org.au |
Rick, jrs@world.std.com |
|
...and numerous others who responded to my request for help with |
a real 80486. |
|
/fpu_trig.c
0,0 → 1,1718
/*---------------------------------------------------------------------------+ |
| fpu_trig.c | |
| | |
| Implementation of the FPU "transcendental" functions. | |
| | |
| Copyright (C) 1992,1993,1994 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
#include "fpu_system.h" |
#include "exception.h" |
#include "fpu_emu.h" |
#include "status_w.h" |
#include "control_w.h" |
#include "reg_constant.h" |
|
|
static void rem_kernel(unsigned long long st0, unsigned long long *y, |
unsigned long long st1, |
unsigned long long q, int n); |
|
#define BETTER_THAN_486 |
|
#define FCOS 4 |
/* Not needed now with new code |
#define FPTAN 1 |
*/ |
|
/* Used only by fptan, fsin, fcos, and fsincos. */ |
/* This routine produces very accurate results, similar to |
using a value of pi with more than 128 bits precision. */ |
/* Limited measurements show no results worse than 64 bit precision |
except for the results for arguments close to 2^63, where the |
precision of the result sometimes degrades to about 63.9 bits */ |
static int trig_arg(FPU_REG *X, int even) |
{ |
FPU_REG tmp; |
unsigned long long q; |
int old_cw = control_word, saved_status = partial_status; |
|
if ( X->exp >= EXP_BIAS + 63 ) |
{ |
partial_status |= SW_C2; /* Reduction incomplete. */ |
return -1; |
} |
|
control_word &= ~CW_RC; |
control_word |= RC_CHOP; |
|
reg_div(X, &CONST_PI2, &tmp, PR_64_BITS | RC_CHOP | 0x3f); |
round_to_int(&tmp); /* Fortunately, this can't overflow |
to 2^64 */ |
q = significand(&tmp); |
if ( q ) |
{ |
rem_kernel(significand(X), |
&significand(&tmp), |
significand(&CONST_PI2), |
q, X->exp - CONST_PI2.exp); |
tmp.exp = CONST_PI2.exp; |
normalize(&tmp); |
reg_move(&tmp, X); |
} |
|
#ifdef FPTAN |
if ( even == FPTAN ) |
{ |
if ( ((X->exp >= EXP_BIAS) || |
((X->exp == EXP_BIAS-1) |
&& (X->sigh >= 0xc90fdaa2))) ^ (q & 1) ) |
even = FCOS; |
else |
even = 0; |
} |
#endif FPTAN |
|
if ( (even && !(q & 1)) || (!even && (q & 1)) ) |
{ |
reg_sub(&CONST_PI2, X, X, FULL_PRECISION); |
#ifdef BETTER_THAN_486 |
/* So far, the results are exact but based upon a 64 bit |
precision approximation to pi/2. The technique used |
now is equivalent to using an approximation to pi/2 which |
is accurate to about 128 bits. */ |
if ( (X->exp <= CONST_PI2extra.exp + 64) || (q > 1) ) |
{ |
/* This code gives the effect of having p/2 to better than |
128 bits precision. */ |
significand(&tmp) = q + 1; |
tmp.exp = EXP_BIAS + 63; |
tmp.tag = TW_Valid; |
normalize(&tmp); |
reg_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION); |
reg_add(X, &tmp, X, FULL_PRECISION); |
if ( X->sign == SIGN_NEG ) |
{ |
/* CONST_PI2extra is negative, so the result of the addition |
can be negative. This means that the argument is actually |
in a different quadrant. The correction is always < pi/2, |
so it can't overflow into yet another quadrant. */ |
X->sign = SIGN_POS; |
q++; |
} |
} |
#endif BETTER_THAN_486 |
} |
#ifdef BETTER_THAN_486 |
else |
{ |
/* So far, the results are exact but based upon a 64 bit |
precision approximation to pi/2. The technique used |
now is equivalent to using an approximation to pi/2 which |
is accurate to about 128 bits. */ |
if ( ((q > 0) && (X->exp <= CONST_PI2extra.exp + 64)) || (q > 1) ) |
{ |
/* This code gives the effect of having p/2 to better than |
128 bits precision. */ |
significand(&tmp) = q; |
tmp.exp = EXP_BIAS + 63; |
tmp.tag = TW_Valid; |
normalize(&tmp); |
reg_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION); |
reg_sub(X, &tmp, X, FULL_PRECISION); |
if ( (X->exp == CONST_PI2.exp) && |
((X->sigh > CONST_PI2.sigh) |
|| ((X->sigh == CONST_PI2.sigh) |
&& (X->sigl > CONST_PI2.sigl))) ) |
{ |
/* CONST_PI2extra is negative, so the result of the |
subtraction can be larger than pi/2. This means |
that the argument is actually in a different quadrant. |
The correction is always < pi/2, so it can't overflow |
into yet another quadrant. */ |
reg_sub(&CONST_PI, X, X, FULL_PRECISION); |
q++; |
} |
} |
} |
#endif BETTER_THAN_486 |
|
control_word = old_cw; |
partial_status = saved_status & ~SW_C2; /* Reduction complete. */ |
|
return (q & 3) | even; |
} |
|
|
/* Convert a long to register */ |
void convert_l2reg(long const *arg, FPU_REG *dest) |
{ |
long num = *arg; |
|
if (num == 0) |
{ reg_move(&CONST_Z, dest); return; } |
|
if (num > 0) |
dest->sign = SIGN_POS; |
else |
{ num = -num; dest->sign = SIGN_NEG; } |
|
dest->sigh = num; |
dest->sigl = 0; |
dest->exp = EXP_BIAS + 31; |
dest->tag = TW_Valid; |
normalize(dest); |
} |
|
|
static void single_arg_error(FPU_REG *st0_ptr) |
{ |
switch ( st0_ptr->tag ) |
{ |
case TW_NaN: |
if ( !(st0_ptr->sigh & 0x40000000) ) /* Signaling ? */ |
{ |
EXCEPTION(EX_Invalid); |
if ( control_word & CW_Invalid ) |
st0_ptr->sigh |= 0x40000000; /* Convert to a QNaN */ |
} |
break; /* return with a NaN in st(0) */ |
case TW_Empty: |
stack_underflow(); /* Puts a QNaN in st(0) */ |
break; |
#ifdef PARANOID |
default: |
EXCEPTION(EX_INTERNAL|0x0112); |
#endif PARANOID |
} |
} |
|
|
static void single_arg_2_error(FPU_REG *st0_ptr) |
{ |
FPU_REG *st_new_ptr; |
|
switch ( st0_ptr->tag ) |
{ |
case TW_NaN: |
if ( !(st0_ptr->sigh & 0x40000000) ) /* Signaling ? */ |
{ |
EXCEPTION(EX_Invalid); |
if ( control_word & CW_Invalid ) |
{ |
/* The masked response */ |
/* Convert to a QNaN */ |
st0_ptr->sigh |= 0x40000000; |
st_new_ptr = &st(-1); |
push(); |
reg_move(&st(1), st_new_ptr); |
} |
} |
else |
{ |
/* A QNaN */ |
st_new_ptr = &st(-1); |
push(); |
reg_move(&st(1), st_new_ptr); |
} |
break; /* return with a NaN in st(0) */ |
#ifdef PARANOID |
default: |
EXCEPTION(EX_INTERNAL|0x0112); |
#endif PARANOID |
} |
} |
|
|
/*---------------------------------------------------------------------------*/ |
|
static void f2xm1(FPU_REG *st0_ptr) |
{ |
clear_C1(); |
switch ( st0_ptr->tag ) |
{ |
case TW_Valid: |
{ |
if ( st0_ptr->exp >= 0 ) |
{ |
/* For an 80486 FPU, the result is undefined. */ |
} |
#ifdef DENORM_OPERAND |
else if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
else |
{ |
/* poly_2xm1(x) requires 0 < x < 1. */ |
poly_2xm1(st0_ptr, st0_ptr); |
} |
if ( st0_ptr->exp <= EXP_UNDER ) |
{ |
/* A denormal result has been produced. |
Precision must have been lost, this is always |
an underflow. */ |
arith_underflow(st0_ptr); |
} |
set_precision_flag_up(); /* 80486 appears to always do this */ |
return; |
} |
case TW_Zero: |
return; |
case TW_Infinity: |
if ( st0_ptr->sign == SIGN_NEG ) |
{ |
/* -infinity gives -1 (p16-10) */ |
reg_move(&CONST_1, st0_ptr); |
st0_ptr->sign = SIGN_NEG; |
} |
return; |
default: |
single_arg_error(st0_ptr); |
} |
} |
|
|
static void fptan(FPU_REG *st0_ptr) |
{ |
char st0_tag = st0_ptr->tag; |
FPU_REG *st_new_ptr; |
int q; |
char arg_sign = st0_ptr->sign; |
|
/* Stack underflow has higher priority */ |
if ( st0_tag == TW_Empty ) |
{ |
stack_underflow(); /* Puts a QNaN in st(0) */ |
if ( control_word & CW_Invalid ) |
{ |
st_new_ptr = &st(-1); |
push(); |
stack_underflow(); /* Puts a QNaN in the new st(0) */ |
} |
return; |
} |
|
if ( STACK_OVERFLOW ) |
{ stack_overflow(); return; } |
|
switch ( st0_tag ) |
{ |
case TW_Valid: |
if ( st0_ptr->exp > EXP_BIAS - 40 ) |
{ |
st0_ptr->sign = SIGN_POS; |
if ( (q = trig_arg(st0_ptr, 0)) != -1 ) |
{ |
poly_tan(st0_ptr, st0_ptr); |
st0_ptr->sign = (q & 1) ^ arg_sign; |
} |
else |
{ |
/* Operand is out of range */ |
st0_ptr->sign = arg_sign; /* restore st(0) */ |
return; |
} |
set_precision_flag_up(); /* We do not really know if up or down */ |
} |
else |
{ |
/* For a small arg, the result == the argument */ |
/* Underflow may happen */ |
|
if ( st0_ptr->exp <= EXP_UNDER ) |
{ |
#ifdef DENORM_OPERAND |
if ( denormal_operand() ) |
return; |
#endif DENORM_OPERAND |
/* A denormal result has been produced. |
Precision must have been lost, this is always |
an underflow. */ |
if ( arith_underflow(st0_ptr) ) |
return; |
} |
set_precision_flag_down(); /* Must be down. */ |
} |
push(); |
reg_move(&CONST_1, st_new_ptr); |
return; |
break; |
case TW_Infinity: |
/* The 80486 treats infinity as an invalid operand */ |
arith_invalid(st0_ptr); |
if ( control_word & CW_Invalid ) |
{ |
st_new_ptr = &st(-1); |
push(); |
arith_invalid(st_new_ptr); |
} |
return; |
case TW_Zero: |
push(); |
reg_move(&CONST_1, st_new_ptr); |
setcc(0); |
break; |
default: |
single_arg_2_error(st0_ptr); |
break; |
} |
} |
|
|
static void fxtract(FPU_REG *st0_ptr) |
{ |
char st0_tag = st0_ptr->tag; |
FPU_REG *st_new_ptr; |
register FPU_REG *st1_ptr = st0_ptr; /* anticipate */ |
|
if ( STACK_OVERFLOW ) |
{ stack_overflow(); return; } |
clear_C1(); |
if ( !(st0_tag ^ TW_Valid) ) |
{ |
long e; |
|
#ifdef DENORM_OPERAND |
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
|
push(); |
reg_move(st1_ptr, st_new_ptr); |
st_new_ptr->exp = EXP_BIAS; |
e = st1_ptr->exp - EXP_BIAS; |
convert_l2reg(&e, st1_ptr); |
return; |
} |
else if ( st0_tag == TW_Zero ) |
{ |
char sign = st0_ptr->sign; |
if ( divide_by_zero(SIGN_NEG, st0_ptr) ) |
return; |
push(); |
reg_move(&CONST_Z, st_new_ptr); |
st_new_ptr->sign = sign; |
return; |
} |
else if ( st0_tag == TW_Infinity ) |
{ |
char sign = st0_ptr->sign; |
st0_ptr->sign = SIGN_POS; |
push(); |
reg_move(&CONST_INF, st_new_ptr); |
st_new_ptr->sign = sign; |
return; |
} |
else if ( st0_tag == TW_NaN ) |
{ |
if ( real_2op_NaN(st0_ptr, st0_ptr, st0_ptr) ) |
return; |
push(); |
reg_move(st1_ptr, st_new_ptr); |
return; |
} |
else if ( st0_tag == TW_Empty ) |
{ |
/* Is this the correct behaviour? */ |
if ( control_word & EX_Invalid ) |
{ |
stack_underflow(); |
push(); |
stack_underflow(); |
} |
else |
EXCEPTION(EX_StackUnder); |
} |
#ifdef PARANOID |
else |
EXCEPTION(EX_INTERNAL | 0x119); |
#endif PARANOID |
} |
|
|
static void fdecstp(FPU_REG *st0_ptr) |
{ |
clear_C1(); |
top--; /* st0_ptr will be fixed in math_emulate() before the next instr */ |
} |
|
static void fincstp(FPU_REG *st0_ptr) |
{ |
clear_C1(); |
top++; /* st0_ptr will be fixed in math_emulate() before the next instr */ |
} |
|
|
static void fsqrt_(FPU_REG *st0_ptr) |
{ |
char st0_tag = st0_ptr->tag; |
|
clear_C1(); |
if ( !(st0_tag ^ TW_Valid) ) |
{ |
int expon; |
|
if (st0_ptr->sign == SIGN_NEG) |
{ |
arith_invalid(st0_ptr); /* sqrt(negative) is invalid */ |
return; |
} |
|
#ifdef DENORM_OPERAND |
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
|
expon = st0_ptr->exp - EXP_BIAS; |
st0_ptr->exp = EXP_BIAS + (expon & 1); /* make st(0) in [1.0 .. 4.0) */ |
|
wm_sqrt(st0_ptr, control_word); /* Do the computation */ |
|
st0_ptr->exp += expon >> 1; |
st0_ptr->sign = SIGN_POS; |
} |
else if ( st0_tag == TW_Zero ) |
return; |
else if ( st0_tag == TW_Infinity ) |
{ |
if ( st0_ptr->sign == SIGN_NEG ) |
arith_invalid(st0_ptr); /* sqrt(-Infinity) is invalid */ |
return; |
} |
else |
{ single_arg_error(st0_ptr); return; } |
|
} |
|
|
static void frndint_(FPU_REG *st0_ptr) |
{ |
char st0_tag = st0_ptr->tag; |
int flags; |
|
if ( !(st0_tag ^ TW_Valid) ) |
{ |
if (st0_ptr->exp > EXP_BIAS+63) |
return; |
|
#ifdef DENORM_OPERAND |
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
|
/* Fortunately, this can't overflow to 2^64 */ |
if ( (flags = round_to_int(st0_ptr)) ) |
set_precision_flag(flags); |
|
st0_ptr->exp = EXP_BIAS + 63; |
normalize(st0_ptr); |
return; |
} |
else if ( (st0_tag == TW_Zero) || (st0_tag == TW_Infinity) ) |
return; |
else |
single_arg_error(st0_ptr); |
} |
|
|
static void fsin(FPU_REG *st0_ptr) |
{ |
char st0_tag = st0_ptr->tag; |
char arg_sign = st0_ptr->sign; |
|
if ( st0_tag == TW_Valid ) |
{ |
FPU_REG rv; |
int q; |
|
if ( st0_ptr->exp > EXP_BIAS - 40 ) |
{ |
st0_ptr->sign = SIGN_POS; |
if ( (q = trig_arg(st0_ptr, 0)) != -1 ) |
{ |
|
poly_sine(st0_ptr, &rv); |
|
if (q & 2) |
rv.sign ^= SIGN_POS ^ SIGN_NEG; |
rv.sign ^= arg_sign; |
reg_move(&rv, st0_ptr); |
|
/* We do not really know if up or down */ |
set_precision_flag_up(); |
return; |
} |
else |
{ |
/* Operand is out of range */ |
st0_ptr->sign = arg_sign; /* restore st(0) */ |
return; |
} |
} |
else |
{ |
/* For a small arg, the result == the argument */ |
/* Underflow may happen */ |
|
if ( st0_ptr->exp <= EXP_UNDER ) |
{ |
#ifdef DENORM_OPERAND |
if ( denormal_operand() ) |
return; |
#endif DENORM_OPERAND |
/* A denormal result has been produced. |
Precision must have been lost, this is always |
an underflow. */ |
arith_underflow(st0_ptr); |
return; |
} |
|
set_precision_flag_up(); /* Must be up. */ |
} |
} |
else if ( st0_tag == TW_Zero ) |
{ |
setcc(0); |
return; |
} |
else if ( st0_tag == TW_Infinity ) |
{ |
/* The 80486 treats infinity as an invalid operand */ |
arith_invalid(st0_ptr); |
return; |
} |
else |
single_arg_error(st0_ptr); |
} |
|
|
static int f_cos(FPU_REG *arg) |
{ |
char arg_sign = arg->sign; |
|
if ( arg->tag == TW_Valid ) |
{ |
FPU_REG rv; |
int q; |
|
if ( arg->exp > EXP_BIAS - 40 ) |
{ |
arg->sign = SIGN_POS; |
if ( (arg->exp < EXP_BIAS) |
|| ((arg->exp == EXP_BIAS) |
&& (significand(arg) <= 0xc90fdaa22168c234LL)) ) |
{ |
poly_cos(arg, &rv); |
reg_move(&rv, arg); |
|
/* We do not really know if up or down */ |
set_precision_flag_down(); |
|
return 0; |
} |
else if ( (q = trig_arg(arg, FCOS)) != -1 ) |
{ |
poly_sine(arg, &rv); |
|
if ((q+1) & 2) |
rv.sign ^= SIGN_POS ^ SIGN_NEG; |
reg_move(&rv, arg); |
|
/* We do not really know if up or down */ |
set_precision_flag_down(); |
|
return 0; |
} |
else |
{ |
/* Operand is out of range */ |
arg->sign = arg_sign; /* restore st(0) */ |
return 1; |
} |
} |
else |
{ |
#ifdef DENORM_OPERAND |
if ( (arg->exp <= EXP_UNDER) && (denormal_operand()) ) |
return 1; |
#endif DENORM_OPERAND |
|
setcc(0); |
reg_move(&CONST_1, arg); |
#ifdef PECULIAR_486 |
set_precision_flag_down(); /* 80486 appears to do this. */ |
#else |
set_precision_flag_up(); /* Must be up. */ |
#endif PECULIAR_486 |
return 0; |
} |
} |
else if ( arg->tag == TW_Zero ) |
{ |
reg_move(&CONST_1, arg); |
setcc(0); |
return 0; |
} |
else if ( arg->tag == TW_Infinity ) |
{ |
/* The 80486 treats infinity as an invalid operand */ |
arith_invalid(arg); |
return 1; |
} |
else |
{ |
single_arg_error(arg); /* requires arg == &st(0) */ |
return 1; |
} |
} |
|
|
static void fcos(FPU_REG *st0_ptr) |
{ |
f_cos(st0_ptr); |
} |
|
|
static void fsincos(FPU_REG *st0_ptr) |
{ |
char st0_tag = st0_ptr->tag; |
FPU_REG *st_new_ptr; |
FPU_REG arg; |
|
/* Stack underflow has higher priority */ |
if ( st0_tag == TW_Empty ) |
{ |
stack_underflow(); /* Puts a QNaN in st(0) */ |
if ( control_word & CW_Invalid ) |
{ |
st_new_ptr = &st(-1); |
push(); |
stack_underflow(); /* Puts a QNaN in the new st(0) */ |
} |
return; |
} |
|
if ( STACK_OVERFLOW ) |
{ stack_overflow(); return; } |
|
if ( st0_tag == TW_NaN ) |
{ |
single_arg_2_error(st0_ptr); |
return; |
} |
else if ( st0_tag == TW_Infinity ) |
{ |
/* The 80486 treats infinity as an invalid operand */ |
if ( !arith_invalid(st0_ptr) ) |
{ |
/* unmasked response */ |
push(); |
arith_invalid(st_new_ptr); |
} |
return; |
} |
|
reg_move(st0_ptr,&arg); |
if ( !f_cos(&arg) ) |
{ |
fsin(st0_ptr); |
push(); |
reg_move(&arg,st_new_ptr); |
} |
|
} |
|
|
/*---------------------------------------------------------------------------*/ |
/* The following all require two arguments: st(0) and st(1) */ |
|
/* A lean, mean kernel for the fprem instructions. This relies upon |
the division and rounding to an integer in do_fprem giving an |
exact result. Because of this, rem_kernel() needs to deal only with |
the least significant 64 bits, the more significant bits of the |
result must be zero. |
*/ |
static void rem_kernel(unsigned long long st0, unsigned long long *y, |
unsigned long long st1, |
unsigned long long q, int n) |
{ |
unsigned long long x; |
|
x = st0 << n; |
|
/* Do the required multiplication and subtraction in the one operation */ |
asm volatile ("movl %2,%%eax; mull %4; subl %%eax,%0; sbbl %%edx,%1; |
movl %3,%%eax; mull %4; subl %%eax,%1; |
movl %2,%%eax; mull %5; subl %%eax,%1;" |
:"=m" (x), "=m" (((unsigned *)&x)[1]) |
:"m" (st1),"m" (((unsigned *)&st1)[1]), |
"m" (q),"m" (((unsigned *)&q)[1]) |
:"%ax","%dx"); |
|
*y = x; |
} |
|
|
/* Remainder of st(0) / st(1) */ |
/* This routine produces exact results, i.e. there is never any |
rounding or truncation, etc of the result. */ |
static void do_fprem(FPU_REG *st0_ptr, int round) |
{ |
FPU_REG *st1_ptr = &st(1); |
char st1_tag = st1_ptr->tag; |
char st0_tag = st0_ptr->tag; |
char sign = st0_ptr->sign; |
|
if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) ) |
{ |
FPU_REG tmp; |
int old_cw = control_word; |
int expdif = st0_ptr->exp - st1_ptr->exp; |
long long q; |
unsigned short saved_status; |
int cc = 0; |
|
#ifdef DENORM_OPERAND |
if ( ((st0_ptr->exp <= EXP_UNDER) || |
(st1_ptr->exp <= EXP_UNDER)) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
|
/* We want the status following the denorm tests, but don't want |
the status changed by the arithmetic operations. */ |
saved_status = partial_status; |
control_word &= ~CW_RC; |
control_word |= RC_CHOP; |
|
if (expdif < 64) |
{ |
/* This should be the most common case */ |
|
if ( expdif > -2 ) |
{ |
reg_div(st0_ptr, st1_ptr, &tmp, PR_64_BITS | RC_CHOP | 0x3f); |
|
if ( tmp.exp >= EXP_BIAS ) |
{ |
round_to_int(&tmp); /* Fortunately, this can't overflow |
to 2^64 */ |
q = significand(&tmp); |
|
rem_kernel(significand(st0_ptr), |
&significand(&tmp), |
significand(st1_ptr), |
q, expdif); |
|
tmp.exp = st1_ptr->exp; |
} |
else |
{ |
reg_move(st0_ptr, &tmp); |
q = 0; |
} |
tmp.sign = sign; |
|
if ( (round == RC_RND) && (tmp.sigh & 0xc0000000) ) |
{ |
/* We may need to subtract st(1) once more, |
to get a result <= 1/2 of st(1). */ |
unsigned long long x; |
expdif = st1_ptr->exp - tmp.exp; |
if ( expdif <= 1 ) |
{ |
if ( expdif == 0 ) |
x = significand(st1_ptr) - significand(&tmp); |
else /* expdif is 1 */ |
x = (significand(st1_ptr) << 1) - significand(&tmp); |
if ( (x < significand(&tmp)) || |
/* or equi-distant (from 0 & st(1)) and q is odd */ |
((x == significand(&tmp)) && (q & 1) ) ) |
{ |
tmp.sign ^= (SIGN_POS^SIGN_NEG); |
significand(&tmp) = x; |
q++; |
} |
} |
} |
|
if (q & 4) cc |= SW_C0; |
if (q & 2) cc |= SW_C3; |
if (q & 1) cc |= SW_C1; |
} |
else |
{ |
control_word = old_cw; |
setcc(0); |
return; |
} |
} |
else |
{ |
/* There is a large exponent difference ( >= 64 ) */ |
/* To make much sense, the code in this section should |
be done at high precision. */ |
int exp_1; |
|
/* prevent overflow here */ |
/* N is 'a number between 32 and 63' (p26-113) */ |
reg_move(st0_ptr, &tmp); |
tmp.exp = EXP_BIAS + 56; |
exp_1 = st1_ptr->exp; st1_ptr->exp = EXP_BIAS; |
expdif -= 56; |
|
reg_div(&tmp, st1_ptr, &tmp, PR_64_BITS | RC_CHOP | 0x3f); |
st1_ptr->exp = exp_1; |
|
round_to_int(&tmp); /* Fortunately, this can't overflow to 2^64 */ |
|
rem_kernel(significand(st0_ptr), |
&significand(&tmp), |
significand(st1_ptr), |
significand(&tmp), |
tmp.exp - EXP_BIAS |
); |
tmp.exp = exp_1 + expdif; |
tmp.sign = sign; |
|
/* It is possible for the operation to be complete here. |
What does the IEEE standard say? The Intel 80486 manual |
implies that the operation will never be completed at this |
point, and the behaviour of a real 80486 confirms this. |
*/ |
if ( !(tmp.sigh | tmp.sigl) ) |
{ |
/* The result is zero */ |
control_word = old_cw; |
partial_status = saved_status; |
reg_move(&CONST_Z, st0_ptr); |
st0_ptr->sign = sign; |
#ifdef PECULIAR_486 |
setcc(SW_C2); |
#else |
setcc(0); |
#endif PECULIAR_486 |
return; |
} |
cc = SW_C2; |
} |
|
control_word = old_cw; |
partial_status = saved_status; |
normalize_nuo(&tmp); |
reg_move(&tmp, st0_ptr); |
setcc(cc); |
|
/* The only condition to be looked for is underflow, |
and it can occur here only if underflow is unmasked. */ |
if ( (st0_ptr->exp <= EXP_UNDER) && (st0_ptr->tag != TW_Zero) |
&& !(control_word & CW_Underflow) ) |
arith_underflow(st0_ptr); |
|
return; |
} |
else if ( (st0_tag == TW_Empty) | (st1_tag == TW_Empty) ) |
{ |
stack_underflow(); |
return; |
} |
else if ( st0_tag == TW_Zero ) |
{ |
if ( st1_tag == TW_Valid ) |
{ |
#ifdef DENORM_OPERAND |
if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
|
setcc(0); return; |
} |
else if ( st1_tag == TW_Zero ) |
{ arith_invalid(st0_ptr); return; } /* fprem(?,0) always invalid */ |
else if ( st1_tag == TW_Infinity ) |
{ setcc(0); return; } |
} |
else if ( st0_tag == TW_Valid ) |
{ |
if ( st1_tag == TW_Zero ) |
{ |
arith_invalid(st0_ptr); /* fprem(Valid,Zero) is invalid */ |
return; |
} |
else if ( st1_tag != TW_NaN ) |
{ |
#ifdef DENORM_OPERAND |
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
|
if ( st1_tag == TW_Infinity ) |
{ |
/* fprem(Valid,Infinity) is o.k. */ |
setcc(0); return; |
} |
} |
} |
else if ( st0_tag == TW_Infinity ) |
{ |
if ( st1_tag != TW_NaN ) |
{ |
arith_invalid(st0_ptr); /* fprem(Infinity,?) is invalid */ |
return; |
} |
} |
|
/* One of the registers must contain a NaN is we got here. */ |
|
#ifdef PARANOID |
if ( (st0_tag != TW_NaN) && (st1_tag != TW_NaN) ) |
EXCEPTION(EX_INTERNAL | 0x118); |
#endif PARANOID |
|
real_2op_NaN(st1_ptr, st0_ptr, st0_ptr); |
|
} |
|
|
/* ST(1) <- ST(1) * log ST; pop ST */ |
static void fyl2x(FPU_REG *st0_ptr) |
{ |
char st0_tag = st0_ptr->tag; |
FPU_REG *st1_ptr = &st(1), exponent; |
char st1_tag = st1_ptr->tag; |
int e; |
|
clear_C1(); |
if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) ) |
{ |
if ( st0_ptr->sign == SIGN_POS ) |
{ |
#ifdef DENORM_OPERAND |
if ( ((st0_ptr->exp <= EXP_UNDER) || |
(st1_ptr->exp <= EXP_UNDER)) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
|
if ( (st0_ptr->sigh == 0x80000000) && (st0_ptr->sigl == 0) ) |
{ |
/* Special case. The result can be precise. */ |
e = st0_ptr->exp - EXP_BIAS; |
if ( e > 0 ) |
{ |
exponent.sigh = e; |
exponent.sign = SIGN_POS; |
} |
else |
{ |
exponent.sigh = -e; |
exponent.sign = SIGN_NEG; |
} |
exponent.sigl = 0; |
exponent.exp = EXP_BIAS + 31; |
exponent.tag = TW_Valid; |
normalize_nuo(&exponent); |
reg_mul(&exponent, st1_ptr, st1_ptr, FULL_PRECISION); |
} |
else |
{ |
/* The usual case */ |
poly_l2(st0_ptr, st1_ptr, st1_ptr); |
if ( st1_ptr->exp <= EXP_UNDER ) |
{ |
/* A denormal result has been produced. |
Precision must have been lost, this is always |
an underflow. */ |
arith_underflow(st1_ptr); |
} |
else |
set_precision_flag_up(); /* 80486 appears to always do this */ |
} |
pop(); |
return; |
} |
else |
{ |
/* negative */ |
if ( !arith_invalid(st1_ptr) ) |
pop(); |
return; |
} |
} |
else if ( (st0_tag == TW_Empty) || (st1_tag == TW_Empty) ) |
{ |
stack_underflow_pop(1); |
return; |
} |
else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) ) |
{ |
if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) ) |
pop(); |
return; |
} |
else if ( (st0_tag <= TW_Zero) && (st1_tag <= TW_Zero) ) |
{ |
/* one of the args is zero, the other valid, or both zero */ |
if ( st0_tag == TW_Zero ) |
{ |
if ( st1_tag == TW_Zero ) |
{ |
/* Both args zero is invalid */ |
if ( !arith_invalid(st1_ptr) ) |
pop(); |
} |
#ifdef PECULIAR_486 |
/* This case is not specifically covered in the manual, |
but divide-by-zero would seem to be the best response. |
However, a real 80486 does it this way... */ |
else if ( st0_ptr->tag == TW_Infinity ) |
{ |
reg_move(&CONST_INF, st1_ptr); |
pop(); |
} |
#endif PECULIAR_486 |
else |
{ |
if ( !divide_by_zero(st1_ptr->sign^SIGN_NEG^SIGN_POS, st1_ptr) ) |
pop(); |
} |
return; |
} |
else |
{ |
/* st(1) contains zero, st(0) valid <> 0 */ |
/* Zero is the valid answer */ |
char sign = st1_ptr->sign; |
|
if ( st0_ptr->sign == SIGN_NEG ) |
{ |
/* log(negative) */ |
if ( !arith_invalid(st1_ptr) ) |
pop(); |
return; |
} |
|
#ifdef DENORM_OPERAND |
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
|
if ( st0_ptr->exp < EXP_BIAS ) sign ^= SIGN_NEG^SIGN_POS; |
pop(); st0_ptr = &st(0); |
reg_move(&CONST_Z, st0_ptr); |
st0_ptr->sign = sign; |
return; |
} |
} |
/* One or both arg must be an infinity */ |
else if ( st0_tag == TW_Infinity ) |
{ |
if ( (st0_ptr->sign == SIGN_NEG) || (st1_tag == TW_Zero) ) |
{ |
/* log(-infinity) or 0*log(infinity) */ |
if ( !arith_invalid(st1_ptr) ) |
pop(); |
return; |
} |
else |
{ |
char sign = st1_ptr->sign; |
|
#ifdef DENORM_OPERAND |
if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
|
pop(); st0_ptr = &st(0); |
reg_move(&CONST_INF, st0_ptr); |
st0_ptr->sign = sign; |
return; |
} |
} |
/* st(1) must be infinity here */ |
else if ( (st0_tag == TW_Valid) && (st0_ptr->sign == SIGN_POS) ) |
{ |
if ( st0_ptr->exp >= EXP_BIAS ) |
{ |
if ( (st0_ptr->exp == EXP_BIAS) && |
(st0_ptr->sigh == 0x80000000) && |
(st0_ptr->sigl == 0) ) |
{ |
/* st(0) holds 1.0 */ |
/* infinity*log(1) */ |
if ( !arith_invalid(st1_ptr) ) |
pop(); |
return; |
} |
/* st(0) is positive and > 1.0 */ |
pop(); |
} |
else |
{ |
/* st(0) is positive and < 1.0 */ |
|
#ifdef DENORM_OPERAND |
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
|
st1_ptr->sign ^= SIGN_NEG; |
pop(); |
} |
return; |
} |
else |
{ |
/* st(0) must be zero or negative */ |
if ( st0_ptr->tag == TW_Zero ) |
{ |
/* This should be invalid, but a real 80486 is happy with it. */ |
#ifndef PECULIAR_486 |
if ( !divide_by_zero(st1_ptr->sign, st1_ptr) ) |
#endif PECULIAR_486 |
{ |
st1_ptr->sign ^= SIGN_NEG^SIGN_POS; |
pop(); |
} |
} |
else |
{ |
/* log(negative) */ |
if ( !arith_invalid(st1_ptr) ) |
pop(); |
} |
return; |
} |
} |
|
|
static void fpatan(FPU_REG *st0_ptr) |
{ |
char st0_tag = st0_ptr->tag; |
FPU_REG *st1_ptr = &st(1); |
char st1_tag = st1_ptr->tag; |
|
clear_C1(); |
if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) ) |
{ |
#ifdef DENORM_OPERAND |
if ( ((st0_ptr->exp <= EXP_UNDER) || |
(st1_ptr->exp <= EXP_UNDER)) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
|
poly_atan(st0_ptr, st1_ptr, st1_ptr); |
|
if ( st1_ptr->exp <= EXP_UNDER ) |
{ |
/* A denormal result has been produced. |
Precision must have been lost. |
This is by definition an underflow. */ |
arith_underflow(st1_ptr); |
pop(); |
return; |
} |
} |
else if ( (st0_tag == TW_Empty) || (st1_tag == TW_Empty) ) |
{ |
stack_underflow_pop(1); |
return; |
} |
else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) ) |
{ |
if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) ) |
pop(); |
return; |
} |
else if ( (st0_tag == TW_Infinity) || (st1_tag == TW_Infinity) ) |
{ |
char sign = st1_ptr->sign; |
if ( st0_tag == TW_Infinity ) |
{ |
if ( st1_tag == TW_Infinity ) |
{ |
if ( st0_ptr->sign == SIGN_POS ) |
{ reg_move(&CONST_PI4, st1_ptr); } |
else |
reg_add(&CONST_PI4, &CONST_PI2, st1_ptr, FULL_PRECISION); |
} |
else |
{ |
#ifdef DENORM_OPERAND |
if ( st1_tag != TW_Zero ) |
{ |
if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
} |
#endif DENORM_OPERAND |
|
if ( st0_ptr->sign == SIGN_POS ) |
{ |
reg_move(&CONST_Z, st1_ptr); |
st1_ptr->sign = sign; /* An 80486 preserves the sign */ |
pop(); |
return; |
} |
else |
reg_move(&CONST_PI, st1_ptr); |
} |
} |
else |
{ |
/* st(1) is infinity, st(0) not infinity */ |
#ifdef DENORM_OPERAND |
if ( st0_tag != TW_Zero ) |
{ |
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
} |
#endif DENORM_OPERAND |
|
reg_move(&CONST_PI2, st1_ptr); |
} |
st1_ptr->sign = sign; |
} |
else if ( st1_tag == TW_Zero ) |
{ |
/* st(0) must be valid or zero */ |
char sign = st1_ptr->sign; |
|
#ifdef DENORM_OPERAND |
if ( st0_tag != TW_Zero ) |
{ |
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
} |
#endif DENORM_OPERAND |
|
if ( st0_ptr->sign == SIGN_POS ) |
{ /* An 80486 preserves the sign */ pop(); return; } |
else |
reg_move(&CONST_PI, st1_ptr); |
st1_ptr->sign = sign; |
} |
else if ( st0_tag == TW_Zero ) |
{ |
/* st(1) must be TW_Valid here */ |
char sign = st1_ptr->sign; |
|
#ifdef DENORM_OPERAND |
if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
|
reg_move(&CONST_PI2, st1_ptr); |
st1_ptr->sign = sign; |
} |
#ifdef PARANOID |
else |
EXCEPTION(EX_INTERNAL | 0x125); |
#endif PARANOID |
|
pop(); |
set_precision_flag_up(); /* We do not really know if up or down */ |
} |
|
|
static void fprem(FPU_REG *st0_ptr) |
{ |
do_fprem(st0_ptr, RC_CHOP); |
} |
|
|
static void fprem1(FPU_REG *st0_ptr) |
{ |
do_fprem(st0_ptr, RC_RND); |
} |
|
|
static void fyl2xp1(FPU_REG *st0_ptr) |
{ |
char st0_tag = st0_ptr->tag, sign; |
FPU_REG *st1_ptr = &st(1); |
char st1_tag = st1_ptr->tag; |
|
clear_C1(); |
if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) ) |
{ |
#ifdef DENORM_OPERAND |
if ( ((st0_ptr->exp <= EXP_UNDER) || |
(st1_ptr->exp <= EXP_UNDER)) && denormal_operand() ) |
return; |
#endif DENORM_OPERAND |
|
if ( poly_l2p1(st0_ptr, st1_ptr, st1_ptr) ) |
{ |
#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ |
st1_ptr->sign ^= SIGN_POS^SIGN_NEG; |
#else |
if ( arith_invalid(st1_ptr) ) /* poly_l2p1() returned invalid */ |
return; |
#endif PECULIAR_486 |
} |
if ( st1_ptr->exp <= EXP_UNDER ) |
{ |
/* A denormal result has been produced. |
Precision must have been lost, this is always |
an underflow. */ |
sign = st1_ptr->sign; |
arith_underflow(st1_ptr); |
st1_ptr->sign = sign; |
} |
else |
set_precision_flag_up(); /* 80486 appears to always do this */ |
pop(); |
return; |
} |
else if ( (st0_tag == TW_Empty) | (st1_tag == TW_Empty) ) |
{ |
stack_underflow_pop(1); |
return; |
} |
else if ( st0_tag == TW_Zero ) |
{ |
if ( st1_tag <= TW_Zero ) |
{ |
#ifdef DENORM_OPERAND |
if ( (st1_tag == TW_Valid) && (st1_ptr->exp <= EXP_UNDER) && |
(denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
|
st0_ptr->sign ^= st1_ptr->sign; |
reg_move(st0_ptr, st1_ptr); |
} |
else if ( st1_tag == TW_Infinity ) |
{ |
/* Infinity*log(1) */ |
if ( !arith_invalid(st1_ptr) ) |
pop(); |
return; |
} |
else if ( st1_tag == TW_NaN ) |
{ |
if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) ) |
pop(); |
return; |
} |
#ifdef PARANOID |
else |
{ |
EXCEPTION(EX_INTERNAL | 0x116); |
return; |
} |
#endif PARANOID |
pop(); return; |
} |
else if ( st0_tag == TW_Valid ) |
{ |
if ( st1_tag == TW_Zero ) |
{ |
if ( st0_ptr->sign == SIGN_NEG ) |
{ |
if ( st0_ptr->exp >= EXP_BIAS ) |
{ |
/* st(0) holds <= -1.0 */ |
#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ |
st1_ptr->sign ^= SIGN_POS^SIGN_NEG; |
#else |
if ( arith_invalid(st1_ptr) ) return; |
#endif PECULIAR_486 |
pop(); return; |
} |
#ifdef DENORM_OPERAND |
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
st1_ptr->sign ^= SIGN_POS^SIGN_NEG; |
pop(); return; |
} |
#ifdef DENORM_OPERAND |
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
pop(); return; |
} |
if ( st1_tag == TW_Infinity ) |
{ |
if ( st0_ptr->sign == SIGN_NEG ) |
{ |
if ( (st0_ptr->exp >= EXP_BIAS) && |
!((st0_ptr->sigh == 0x80000000) && |
(st0_ptr->sigl == 0)) ) |
{ |
/* st(0) holds < -1.0 */ |
#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ |
st1_ptr->sign ^= SIGN_POS^SIGN_NEG; |
#else |
if ( arith_invalid(st1_ptr) ) return; |
#endif PECULIAR_486 |
pop(); return; |
} |
#ifdef DENORM_OPERAND |
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
st1_ptr->sign ^= SIGN_POS^SIGN_NEG; |
pop(); return; |
} |
#ifdef DENORM_OPERAND |
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
pop(); return; |
} |
if ( st1_tag == TW_NaN ) |
{ |
if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) ) |
pop(); |
return; |
} |
} |
else if ( st0_tag == TW_NaN ) |
{ |
if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) ) |
pop(); |
return; |
} |
else if ( st0_tag == TW_Infinity ) |
{ |
if ( st1_tag == TW_NaN ) |
{ |
if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) ) |
pop(); |
return; |
} |
else if ( st0_ptr->sign == SIGN_NEG ) |
{ |
int exponent = st1_ptr->exp; |
#ifndef PECULIAR_486 |
/* This should have higher priority than denormals, but... */ |
if ( arith_invalid(st1_ptr) ) /* log(-infinity) */ |
return; |
#endif PECULIAR_486 |
#ifdef DENORM_OPERAND |
if ( st1_tag != TW_Zero ) |
{ |
if ( (exponent <= EXP_UNDER) && (denormal_operand()) ) |
return; |
} |
#endif DENORM_OPERAND |
#ifdef PECULIAR_486 |
/* Denormal operands actually get higher priority */ |
if ( arith_invalid(st1_ptr) ) /* log(-infinity) */ |
return; |
#endif PECULIAR_486 |
pop(); |
return; |
} |
else if ( st1_tag == TW_Zero ) |
{ |
/* log(infinity) */ |
if ( !arith_invalid(st1_ptr) ) |
pop(); |
return; |
} |
|
/* st(1) must be valid here. */ |
|
#ifdef DENORM_OPERAND |
if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
|
/* The Manual says that log(Infinity) is invalid, but a real |
80486 sensibly says that it is o.k. */ |
{ char sign = st1_ptr->sign; |
reg_move(&CONST_INF, st1_ptr); |
st1_ptr->sign = sign; |
} |
pop(); |
return; |
} |
#ifdef PARANOID |
else |
{ |
EXCEPTION(EX_INTERNAL | 0x117); |
} |
#endif PARANOID |
} |
|
|
static void fscale(FPU_REG *st0_ptr) |
{ |
char st0_tag = st0_ptr->tag; |
FPU_REG *st1_ptr = &st(1); |
char st1_tag = st1_ptr->tag; |
int old_cw = control_word; |
char sign = st0_ptr->sign; |
|
clear_C1(); |
if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) ) |
{ |
long scale; |
FPU_REG tmp; |
|
#ifdef DENORM_OPERAND |
if ( ((st0_ptr->exp <= EXP_UNDER) || |
(st1_ptr->exp <= EXP_UNDER)) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
|
if ( st1_ptr->exp > EXP_BIAS + 30 ) |
{ |
/* 2^31 is far too large, would require 2^(2^30) or 2^(-2^30) */ |
char sign; |
|
if ( st1_ptr->sign == SIGN_POS ) |
{ |
EXCEPTION(EX_Overflow); |
sign = st0_ptr->sign; |
reg_move(&CONST_INF, st0_ptr); |
st0_ptr->sign = sign; |
} |
else |
{ |
EXCEPTION(EX_Underflow); |
sign = st0_ptr->sign; |
reg_move(&CONST_Z, st0_ptr); |
st0_ptr->sign = sign; |
} |
return; |
} |
|
control_word &= ~CW_RC; |
control_word |= RC_CHOP; |
reg_move(st1_ptr, &tmp); |
round_to_int(&tmp); /* This can never overflow here */ |
control_word = old_cw; |
scale = st1_ptr->sign ? -tmp.sigl : tmp.sigl; |
scale += st0_ptr->exp; |
st0_ptr->exp = scale; |
|
/* Use round_reg() to properly detect under/overflow etc */ |
round_reg(st0_ptr, 0, control_word); |
|
return; |
} |
else if ( st0_tag == TW_Valid ) |
{ |
if ( st1_tag == TW_Zero ) |
{ |
|
#ifdef DENORM_OPERAND |
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
|
return; |
} |
if ( st1_tag == TW_Infinity ) |
{ |
#ifdef DENORM_OPERAND |
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
|
if ( st1_ptr->sign == SIGN_POS ) |
{ reg_move(&CONST_INF, st0_ptr); } |
else |
reg_move(&CONST_Z, st0_ptr); |
st0_ptr->sign = sign; |
return; |
} |
if ( st1_tag == TW_NaN ) |
{ real_2op_NaN(st0_ptr, st1_ptr, st0_ptr); return; } |
} |
else if ( st0_tag == TW_Zero ) |
{ |
if ( st1_tag == TW_Valid ) |
{ |
|
#ifdef DENORM_OPERAND |
if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
|
return; |
} |
else if ( st1_tag == TW_Zero ) { return; } |
else if ( st1_tag == TW_Infinity ) |
{ |
if ( st1_ptr->sign == SIGN_NEG ) |
return; |
else |
{ |
arith_invalid(st0_ptr); /* Zero scaled by +Infinity */ |
return; |
} |
} |
else if ( st1_tag == TW_NaN ) |
{ real_2op_NaN(st0_ptr, st1_ptr, st0_ptr); return; } |
} |
else if ( st0_tag == TW_Infinity ) |
{ |
if ( st1_tag == TW_Valid ) |
{ |
|
#ifdef DENORM_OPERAND |
if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) |
return; |
#endif DENORM_OPERAND |
|
return; |
} |
if ( ((st1_tag == TW_Infinity) && (st1_ptr->sign == SIGN_POS)) |
|| (st1_tag == TW_Zero) ) |
return; |
else if ( st1_tag == TW_Infinity ) |
{ |
arith_invalid(st0_ptr); /* Infinity scaled by -Infinity */ |
return; |
} |
else if ( st1_tag == TW_NaN ) |
{ real_2op_NaN(st0_ptr, st1_ptr, st0_ptr); return; } |
} |
else if ( st0_tag == TW_NaN ) |
{ |
if ( st1_tag != TW_Empty ) |
{ real_2op_NaN(st0_ptr, st1_ptr, st0_ptr); return; } |
} |
|
#ifdef PARANOID |
if ( !((st0_tag == TW_Empty) || (st1_tag == TW_Empty)) ) |
{ |
EXCEPTION(EX_INTERNAL | 0x115); |
return; |
} |
#endif |
|
/* At least one of st(0), st(1) must be empty */ |
stack_underflow(); |
|
} |
|
|
/*---------------------------------------------------------------------------*/ |
|
static FUNC_ST0 const trig_table_a[] = { |
f2xm1, fyl2x, fptan, fpatan, fxtract, fprem1, fdecstp, fincstp |
}; |
|
void trig_a(void) |
{ |
(trig_table_a[FPU_rm])(&st(0)); |
} |
|
|
static FUNC_ST0 const trig_table_b[] = |
{ |
fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, fsin, fcos |
}; |
|
void trig_b(void) |
{ |
(trig_table_b[FPU_rm])(&st(0)); |
} |
/fpu_arith.c
0,0 → 1,179
/*---------------------------------------------------------------------------+ |
| fpu_arith.c | |
| | |
| Code to implement the FPU register/register arithmetic instructions | |
| | |
| Copyright (C) 1992,1993 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
#include "fpu_system.h" |
#include "fpu_emu.h" |
#include "control_w.h" |
#include "status_w.h" |
|
|
void fadd__() |
{ |
/* fadd st,st(i) */ |
clear_C1(); |
reg_add(&st(0), &st(FPU_rm), &st(0), control_word); |
} |
|
|
void fmul__() |
{ |
/* fmul st,st(i) */ |
clear_C1(); |
reg_mul(&st(0), &st(FPU_rm), &st(0), control_word); |
} |
|
|
|
void fsub__() |
{ |
/* fsub st,st(i) */ |
clear_C1(); |
reg_sub(&st(0), &st(FPU_rm), &st(0), control_word); |
} |
|
|
void fsubr_() |
{ |
/* fsubr st,st(i) */ |
clear_C1(); |
reg_sub(&st(FPU_rm), &st(0), &st(0), control_word); |
} |
|
|
void fdiv__() |
{ |
/* fdiv st,st(i) */ |
clear_C1(); |
reg_div(&st(0), &st(FPU_rm), &st(0), control_word); |
} |
|
|
void fdivr_() |
{ |
/* fdivr st,st(i) */ |
clear_C1(); |
reg_div(&st(FPU_rm), &st(0), &st(0), control_word); |
} |
|
|
|
void fadd_i() |
{ |
/* fadd st(i),st */ |
clear_C1(); |
reg_add(&st(0), &st(FPU_rm), &st(FPU_rm), control_word); |
} |
|
|
void fmul_i() |
{ |
/* fmul st(i),st */ |
clear_C1(); |
reg_mul(&st(0), &st(FPU_rm), &st(FPU_rm), control_word); |
} |
|
|
void fsubri() |
{ |
/* fsubr st(i),st */ |
/* This is the sense of the 80486 manual |
reg_sub(&st(FPU_rm), &st(0), &st(FPU_rm), control_word); */ |
clear_C1(); |
reg_sub(&st(0), &st(FPU_rm), &st(FPU_rm), control_word); |
} |
|
|
void fsub_i() |
{ |
/* fsub st(i),st */ |
/* This is the sense of the 80486 manual |
reg_sub(&st(0), &st(FPU_rm), &st(FPU_rm), control_word); */ |
clear_C1(); |
reg_sub(&st(FPU_rm), &st(0), &st(FPU_rm), control_word); |
} |
|
|
void fdivri() |
{ |
/* fdivr st(i),st */ |
clear_C1(); |
reg_div(&st(0), &st(FPU_rm), &st(FPU_rm), control_word); |
} |
|
|
void fdiv_i() |
{ |
/* fdiv st(i),st */ |
clear_C1(); |
reg_div(&st(FPU_rm), &st(0), &st(FPU_rm), control_word); |
} |
|
|
|
void faddp_() |
{ |
/* faddp st(i),st */ |
clear_C1(); |
if ( !reg_add(&st(0), &st(FPU_rm), &st(FPU_rm), control_word) ) |
pop(); |
} |
|
|
void fmulp_() |
{ |
/* fmulp st(i),st */ |
clear_C1(); |
if ( !reg_mul(&st(0), &st(FPU_rm), &st(FPU_rm), control_word) ) |
pop(); |
} |
|
|
|
void fsubrp() |
{ |
/* fsubrp st(i),st */ |
/* This is the sense of the 80486 manual |
reg_sub(&st(FPU_rm), &st(0), &st(FPU_rm), control_word); */ |
clear_C1(); |
if ( !reg_sub(&st(0), &st(FPU_rm), &st(FPU_rm), control_word) ) |
pop(); |
} |
|
|
void fsubp_() |
{ |
/* fsubp st(i),st */ |
/* This is the sense of the 80486 manual |
reg_sub(&st(0), &st(FPU_rm), &st(FPU_rm), control_word); */ |
clear_C1(); |
if ( !reg_sub(&st(FPU_rm), &st(0), &st(FPU_rm), control_word) ) |
pop(); |
} |
|
|
void fdivrp() |
{ |
/* fdivrp st(i),st */ |
clear_C1(); |
if ( !reg_div(&st(0), &st(FPU_rm), &st(FPU_rm), control_word) ) |
pop(); |
} |
|
|
void fdivp_() |
{ |
/* fdivp st(i),st */ |
clear_C1(); |
if ( !reg_div(&st(FPU_rm), &st(0), &st(FPU_rm), control_word) ) |
pop(); |
} |
|
/reg_compare.c
0,0 → 1,378
/*---------------------------------------------------------------------------+ |
| reg_compare.c | |
| | |
| Compare two floating point registers | |
| | |
| Copyright (C) 1992,1993,1994 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
/*---------------------------------------------------------------------------+ |
| compare() is the core FPU_REG comparison function | |
+---------------------------------------------------------------------------*/ |
|
#include "fpu_system.h" |
#include "exception.h" |
#include "fpu_emu.h" |
#include "control_w.h" |
#include "status_w.h" |
|
|
int compare(FPU_REG const *b) |
{ |
int diff; |
char st0_tag; |
FPU_REG *st0_ptr; |
|
st0_ptr = &st(0); |
st0_tag = st0_ptr->tag; |
|
if ( st0_tag | b->tag ) |
{ |
if ( st0_tag == TW_Zero ) |
{ |
if ( b->tag == TW_Zero ) return COMP_A_eq_B; |
if ( b->tag == TW_Valid ) |
{ |
return ((b->sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) |
#ifdef DENORM_OPERAND |
| ((b->exp <= EXP_UNDER) ? |
COMP_Denormal : 0) |
#endif DENORM_OPERAND |
; |
} |
} |
else if ( b->tag == TW_Zero ) |
{ |
if ( st0_tag == TW_Valid ) |
{ |
return ((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B |
: COMP_A_lt_B) |
#ifdef DENORM_OPERAND |
| ((st0_ptr->exp <= EXP_UNDER ) |
? COMP_Denormal : 0 ) |
#endif DENORM_OPERAND |
; |
} |
} |
|
if ( st0_tag == TW_Infinity ) |
{ |
if ( (b->tag == TW_Valid) || (b->tag == TW_Zero) ) |
{ |
return ((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B |
: COMP_A_lt_B) |
#ifdef DENORM_OPERAND |
| (((b->tag == TW_Valid) && (b->exp <= EXP_UNDER)) ? |
COMP_Denormal : 0 ) |
#endif DENORM_OPERAND |
; |
} |
else if ( b->tag == TW_Infinity ) |
{ |
/* The 80486 book says that infinities can be equal! */ |
return (st0_ptr->sign == b->sign) ? COMP_A_eq_B : |
((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); |
} |
/* Fall through to the NaN code */ |
} |
else if ( b->tag == TW_Infinity ) |
{ |
if ( (st0_tag == TW_Valid) || (st0_tag == TW_Zero) ) |
{ |
return ((b->sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) |
#ifdef DENORM_OPERAND |
| (((st0_tag == TW_Valid) |
&& (st0_ptr->exp <= EXP_UNDER)) ? |
COMP_Denormal : 0) |
#endif DENORM_OPERAND |
; |
} |
/* Fall through to the NaN code */ |
} |
|
/* The only possibility now should be that one of the arguments |
is a NaN */ |
if ( (st0_tag == TW_NaN) || (b->tag == TW_NaN) ) |
{ |
if ( ((st0_tag == TW_NaN) && !(st0_ptr->sigh & 0x40000000)) |
|| ((b->tag == TW_NaN) && !(b->sigh & 0x40000000)) ) |
/* At least one arg is a signaling NaN */ |
return COMP_No_Comp | COMP_SNaN | COMP_NaN; |
else |
/* Neither is a signaling NaN */ |
return COMP_No_Comp | COMP_NaN; |
} |
|
EXCEPTION(EX_Invalid); |
} |
|
#ifdef PARANOID |
if (!(st0_ptr->sigh & 0x80000000)) EXCEPTION(EX_Invalid); |
if (!(b->sigh & 0x80000000)) EXCEPTION(EX_Invalid); |
#endif PARANOID |
|
|
if (st0_ptr->sign != b->sign) |
{ |
return ((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) |
#ifdef DENORM_OPERAND |
| |
( ((st0_ptr->exp <= EXP_UNDER) || (b->exp <= EXP_UNDER)) ? |
COMP_Denormal : 0) |
#endif DENORM_OPERAND |
; |
} |
|
diff = st0_ptr->exp - b->exp; |
if ( diff == 0 ) |
{ |
diff = st0_ptr->sigh - b->sigh; /* Works only if ms bits are |
identical */ |
if ( diff == 0 ) |
{ |
diff = st0_ptr->sigl > b->sigl; |
if ( diff == 0 ) |
diff = -(st0_ptr->sigl < b->sigl); |
} |
} |
|
if ( diff > 0 ) |
{ |
return ((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) |
#ifdef DENORM_OPERAND |
| |
( ((st0_ptr->exp <= EXP_UNDER) || (b->exp <= EXP_UNDER)) ? |
COMP_Denormal : 0) |
#endif DENORM_OPERAND |
; |
} |
if ( diff < 0 ) |
{ |
return ((st0_ptr->sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) |
#ifdef DENORM_OPERAND |
| |
( ((st0_ptr->exp <= EXP_UNDER) || (b->exp <= EXP_UNDER)) ? |
COMP_Denormal : 0) |
#endif DENORM_OPERAND |
; |
} |
|
return COMP_A_eq_B |
#ifdef DENORM_OPERAND |
| |
( ((st0_ptr->exp <= EXP_UNDER) || (b->exp <= EXP_UNDER)) ? |
COMP_Denormal : 0) |
#endif DENORM_OPERAND |
; |
|
} |
|
|
/* This function requires that st(0) is not empty */ |
int compare_st_data(FPU_REG const *loaded_data) |
{ |
int f, c; |
|
c = compare(loaded_data); |
|
if (c & COMP_NaN) |
{ |
EXCEPTION(EX_Invalid); |
f = SW_C3 | SW_C2 | SW_C0; |
} |
else |
switch (c & 7) |
{ |
case COMP_A_lt_B: |
f = SW_C0; |
break; |
case COMP_A_eq_B: |
f = SW_C3; |
break; |
case COMP_A_gt_B: |
f = 0; |
break; |
case COMP_No_Comp: |
f = SW_C3 | SW_C2 | SW_C0; |
break; |
#ifdef PARANOID |
default: |
EXCEPTION(EX_INTERNAL|0x121); |
f = SW_C3 | SW_C2 | SW_C0; |
break; |
#endif PARANOID |
} |
setcc(f); |
if (c & COMP_Denormal) |
{ |
return denormal_operand(); |
} |
return 0; |
} |
|
|
static int compare_st_st(int nr) |
{ |
int f, c; |
|
if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) ) |
{ |
setcc(SW_C3 | SW_C2 | SW_C0); |
/* Stack fault */ |
EXCEPTION(EX_StackUnder); |
return !(control_word & CW_Invalid); |
} |
|
c = compare(&st(nr)); |
if (c & COMP_NaN) |
{ |
setcc(SW_C3 | SW_C2 | SW_C0); |
EXCEPTION(EX_Invalid); |
return !(control_word & CW_Invalid); |
} |
else |
switch (c & 7) |
{ |
case COMP_A_lt_B: |
f = SW_C0; |
break; |
case COMP_A_eq_B: |
f = SW_C3; |
break; |
case COMP_A_gt_B: |
f = 0; |
break; |
case COMP_No_Comp: |
f = SW_C3 | SW_C2 | SW_C0; |
break; |
#ifdef PARANOID |
default: |
EXCEPTION(EX_INTERNAL|0x122); |
f = SW_C3 | SW_C2 | SW_C0; |
break; |
#endif PARANOID |
} |
setcc(f); |
if (c & COMP_Denormal) |
{ |
return denormal_operand(); |
} |
return 0; |
} |
|
|
static int compare_u_st_st(int nr) |
{ |
int f, c; |
|
if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) ) |
{ |
setcc(SW_C3 | SW_C2 | SW_C0); |
/* Stack fault */ |
EXCEPTION(EX_StackUnder); |
return !(control_word & CW_Invalid); |
} |
|
c = compare(&st(nr)); |
if (c & COMP_NaN) |
{ |
setcc(SW_C3 | SW_C2 | SW_C0); |
if (c & COMP_SNaN) /* This is the only difference between |
un-ordered and ordinary comparisons */ |
{ |
EXCEPTION(EX_Invalid); |
return !(control_word & CW_Invalid); |
} |
return 0; |
} |
else |
switch (c & 7) |
{ |
case COMP_A_lt_B: |
f = SW_C0; |
break; |
case COMP_A_eq_B: |
f = SW_C3; |
break; |
case COMP_A_gt_B: |
f = 0; |
break; |
case COMP_No_Comp: |
f = SW_C3 | SW_C2 | SW_C0; |
break; |
#ifdef PARANOID |
default: |
EXCEPTION(EX_INTERNAL|0x123); |
f = SW_C3 | SW_C2 | SW_C0; |
break; |
#endif PARANOID |
} |
setcc(f); |
if (c & COMP_Denormal) |
{ |
return denormal_operand(); |
} |
return 0; |
} |
|
/*---------------------------------------------------------------------------*/ |
|
void fcom_st() |
{ |
/* fcom st(i) */ |
compare_st_st(FPU_rm); |
} |
|
|
void fcompst() |
{ |
/* fcomp st(i) */ |
if ( !compare_st_st(FPU_rm) ) |
pop(); |
} |
|
|
void fcompp() |
{ |
/* fcompp */ |
if (FPU_rm != 1) |
{ |
FPU_illegal(); |
return; |
} |
if ( !compare_st_st(1) ) |
poppop(); |
} |
|
|
void fucom_() |
{ |
/* fucom st(i) */ |
compare_u_st_st(FPU_rm); |
|
} |
|
|
void fucomp() |
{ |
/* fucomp st(i) */ |
if ( !compare_u_st_st(FPU_rm) ) |
pop(); |
} |
|
|
void fucompp() |
{ |
/* fucompp */ |
if (FPU_rm == 1) |
{ |
if ( !compare_u_st_st(1) ) |
poppop(); |
} |
else |
FPU_illegal(); |
} |
/reg_u_div.S
0,0 → 1,471
.file "reg_u_div.S" |
/*---------------------------------------------------------------------------+ |
| reg_u_div.S | |
| | |
| Core division routines | |
| | |
| Copyright (C) 1992,1993,1995 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
/*---------------------------------------------------------------------------+ |
| Kernel for the division routines. | |
| | |
| void reg_u_div(FPU_REG *a, FPU_REG *a, | |
| FPU_REG *dest, unsigned int control_word) | |
| | |
| Does not compute the destination exponent, but does adjust it. | |
+---------------------------------------------------------------------------*/ |
|
#include "exception.h" |
#include "fpu_emu.h" |
#include "control_w.h" |
|
|
/* #define dSIGL(x) (x) */ |
/* #define dSIGH(x) 4(x) */ |
|
|
#ifndef NON_REENTRANT_FPU |
/* |
Local storage on the stack: |
Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 |
Overflow flag: ovfl_flag |
*/ |
#define FPU_accum_3 -4(%ebp) |
#define FPU_accum_2 -8(%ebp) |
#define FPU_accum_1 -12(%ebp) |
#define FPU_accum_0 -16(%ebp) |
#define FPU_result_1 -20(%ebp) |
#define FPU_result_2 -24(%ebp) |
#define FPU_ovfl_flag -28(%ebp) |
|
#else |
.data |
/* |
Local storage in a static area: |
Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 |
Overflow flag: ovfl_flag |
*/ |
.align 2,0 |
FPU_accum_3: |
.long 0 |
FPU_accum_2: |
.long 0 |
FPU_accum_1: |
.long 0 |
FPU_accum_0: |
.long 0 |
FPU_result_1: |
.long 0 |
FPU_result_2: |
.long 0 |
FPU_ovfl_flag: |
.byte 0 |
#endif NON_REENTRANT_FPU |
|
|
.text |
ENTRY(reg_u_div) |
pushl %ebp |
movl %esp,%ebp |
#ifndef NON_REENTRANT_FPU |
subl $28,%esp |
#endif NON_REENTRANT_FPU |
|
pushl %esi |
pushl %edi |
pushl %ebx |
|
movl PARAM1,%esi /* pointer to num */ |
movl PARAM2,%ebx /* pointer to denom */ |
movl PARAM3,%edi /* pointer to answer */ |
|
#ifdef DENORM_OPERAND |
movl EXP(%esi),%eax |
cmpl EXP_UNDER,%eax |
jg xOp1_not_denorm |
|
call SYMBOL_NAME(denormal_operand) |
orl %eax,%eax |
jnz fpu_Arith_exit |
|
xOp1_not_denorm: |
movl EXP(%ebx),%eax |
cmpl EXP_UNDER,%eax |
jg xOp2_not_denorm |
|
call SYMBOL_NAME(denormal_operand) |
orl %eax,%eax |
jnz fpu_Arith_exit |
|
xOp2_not_denorm: |
#endif DENORM_OPERAND |
|
ENTRY(divide_kernel) |
#ifdef PARANOID |
/* testl $0x80000000, SIGH(%esi) // Dividend */ |
/* je L_bugged */ |
testl $0x80000000, SIGH(%ebx) /* Divisor */ |
je L_bugged |
#endif PARANOID |
|
/* Check if the divisor can be treated as having just 32 bits */ |
cmpl $0,SIGL(%ebx) |
jnz L_Full_Division /* Can't do a quick divide */ |
|
/* We should be able to zip through the division here */ |
movl SIGH(%ebx),%ecx /* The divisor */ |
movl SIGH(%esi),%edx /* Dividend */ |
movl SIGL(%esi),%eax /* Dividend */ |
|
cmpl %ecx,%edx |
setaeb FPU_ovfl_flag /* Keep a record */ |
jb L_no_adjust |
|
subl %ecx,%edx /* Prevent the overflow */ |
|
L_no_adjust: |
/* Divide the 64 bit number by the 32 bit denominator */ |
divl %ecx |
movl %eax,FPU_result_2 |
|
/* Work on the remainder of the first division */ |
xorl %eax,%eax |
divl %ecx |
movl %eax,FPU_result_1 |
|
/* Work on the remainder of the 64 bit division */ |
xorl %eax,%eax |
divl %ecx |
|
testb $255,FPU_ovfl_flag /* was the num > denom ? */ |
je L_no_overflow |
|
/* Do the shifting here */ |
/* increase the exponent */ |
incl EXP(%edi) |
|
/* shift the mantissa right one bit */ |
stc /* To set the ms bit */ |
rcrl FPU_result_2 |
rcrl FPU_result_1 |
rcrl %eax |
|
L_no_overflow: |
jmp LRound_precision /* Do the rounding as required */ |
|
|
/*---------------------------------------------------------------------------+ |
| Divide: Return arg1/arg2 to arg3. | |
| | |
| This routine does not use the exponents of arg1 and arg2, but does | |
| adjust the exponent of arg3. | |
| | |
| The maximum returned value is (ignoring exponents) | |
| .ffffffff ffffffff | |
| ------------------ = 1.ffffffff fffffffe | |
| .80000000 00000000 | |
| and the minimum is | |
| .80000000 00000000 | |
| ------------------ = .80000000 00000001 (rounded) | |
| .ffffffff ffffffff | |
| | |
+---------------------------------------------------------------------------*/ |
|
|
L_Full_Division: |
/* Save extended dividend in local register */ |
movl SIGL(%esi),%eax |
movl %eax,FPU_accum_2 |
movl SIGH(%esi),%eax |
movl %eax,FPU_accum_3 |
xorl %eax,%eax |
movl %eax,FPU_accum_1 /* zero the extension */ |
movl %eax,FPU_accum_0 /* zero the extension */ |
|
movl SIGL(%esi),%eax /* Get the current num */ |
movl SIGH(%esi),%edx |
|
/*----------------------------------------------------------------------*/ |
/* Initialization done. |
Do the first 32 bits. */ |
|
movb $0,FPU_ovfl_flag |
cmpl SIGH(%ebx),%edx /* Test for imminent overflow */ |
jb LLess_than_1 |
ja LGreater_than_1 |
|
cmpl SIGL(%ebx),%eax |
jb LLess_than_1 |
|
LGreater_than_1: |
/* The dividend is greater or equal, would cause overflow */ |
setaeb FPU_ovfl_flag /* Keep a record */ |
|
subl SIGL(%ebx),%eax |
sbbl SIGH(%ebx),%edx /* Prevent the overflow */ |
movl %eax,FPU_accum_2 |
movl %edx,FPU_accum_3 |
|
LLess_than_1: |
/* At this point, we have a dividend < divisor, with a record of |
adjustment in FPU_ovfl_flag */ |
|
/* We will divide by a number which is too large */ |
movl SIGH(%ebx),%ecx |
addl $1,%ecx |
jnc LFirst_div_not_1 |
|
/* here we need to divide by 100000000h, |
i.e., no division at all.. */ |
mov %edx,%eax |
jmp LFirst_div_done |
|
LFirst_div_not_1: |
divl %ecx /* Divide the numerator by the augmented |
denom ms dw */ |
|
LFirst_div_done: |
movl %eax,FPU_result_2 /* Put the result in the answer */ |
|
mull SIGH(%ebx) /* mul by the ms dw of the denom */ |
|
subl %eax,FPU_accum_2 /* Subtract from the num local reg */ |
sbbl %edx,FPU_accum_3 |
|
movl FPU_result_2,%eax /* Get the result back */ |
mull SIGL(%ebx) /* now mul the ls dw of the denom */ |
|
subl %eax,FPU_accum_1 /* Subtract from the num local reg */ |
sbbl %edx,FPU_accum_2 |
sbbl $0,FPU_accum_3 |
je LDo_2nd_32_bits /* Must check for non-zero result here */ |
|
#ifdef PARANOID |
jb L_bugged_1 |
#endif PARANOID |
|
/* need to subtract another once of the denom */ |
incl FPU_result_2 /* Correct the answer */ |
|
movl SIGL(%ebx),%eax |
movl SIGH(%ebx),%edx |
subl %eax,FPU_accum_1 /* Subtract from the num local reg */ |
sbbl %edx,FPU_accum_2 |
|
#ifdef PARANOID |
sbbl $0,FPU_accum_3 |
jne L_bugged_1 /* Must check for non-zero result here */ |
#endif PARANOID |
|
/*----------------------------------------------------------------------*/ |
/* Half of the main problem is done, there is just a reduced numerator |
to handle now. |
Work with the second 32 bits, FPU_accum_0 not used from now on */ |
LDo_2nd_32_bits: |
movl FPU_accum_2,%edx /* get the reduced num */ |
movl FPU_accum_1,%eax |
|
/* need to check for possible subsequent overflow */ |
cmpl SIGH(%ebx),%edx |
jb LDo_2nd_div |
ja LPrevent_2nd_overflow |
|
cmpl SIGL(%ebx),%eax |
jb LDo_2nd_div |
|
LPrevent_2nd_overflow: |
/* The numerator is greater or equal, would cause overflow */ |
/* prevent overflow */ |
subl SIGL(%ebx),%eax |
sbbl SIGH(%ebx),%edx |
movl %edx,FPU_accum_2 |
movl %eax,FPU_accum_1 |
|
incl FPU_result_2 /* Reflect the subtraction in the answer */ |
|
#ifdef PARANOID |
je L_bugged_2 /* Can't bump the result to 1.0 */ |
#endif PARANOID |
|
LDo_2nd_div: |
cmpl $0,%ecx /* augmented denom msw */ |
jnz LSecond_div_not_1 |
|
/* %ecx == 0, we are dividing by 1.0 */ |
mov %edx,%eax |
jmp LSecond_div_done |
|
LSecond_div_not_1: |
divl %ecx /* Divide the numerator by the denom ms dw */ |
|
LSecond_div_done: |
movl %eax,FPU_result_1 /* Put the result in the answer */ |
|
mull SIGH(%ebx) /* mul by the ms dw of the denom */ |
|
subl %eax,FPU_accum_1 /* Subtract from the num local reg */ |
sbbl %edx,FPU_accum_2 |
|
#ifdef PARANOID |
jc L_bugged_2 |
#endif PARANOID |
|
movl FPU_result_1,%eax /* Get the result back */ |
mull SIGL(%ebx) /* now mul the ls dw of the denom */ |
|
subl %eax,FPU_accum_0 /* Subtract from the num local reg */ |
sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ |
sbbl $0,FPU_accum_2 |
|
#ifdef PARANOID |
jc L_bugged_2 |
#endif PARANOID |
|
jz LDo_3rd_32_bits |
|
#ifdef PARANOID |
cmpl $1,FPU_accum_2 |
jne L_bugged_2 |
#endif PARANOID |
|
/* need to subtract another once of the denom */ |
movl SIGL(%ebx),%eax |
movl SIGH(%ebx),%edx |
subl %eax,FPU_accum_0 /* Subtract from the num local reg */ |
sbbl %edx,FPU_accum_1 |
sbbl $0,FPU_accum_2 |
|
#ifdef PARANOID |
jc L_bugged_2 |
jne L_bugged_2 |
#endif PARANOID |
|
addl $1,FPU_result_1 /* Correct the answer */ |
adcl $0,FPU_result_2 |
|
#ifdef PARANOID |
jc L_bugged_2 /* Must check for non-zero result here */ |
#endif PARANOID |
|
/*----------------------------------------------------------------------*/ |
/* The division is essentially finished here, we just need to perform |
tidying operations. |
Deal with the 3rd 32 bits */ |
LDo_3rd_32_bits: |
movl FPU_accum_1,%edx /* get the reduced num */ |
movl FPU_accum_0,%eax |
|
/* need to check for possible subsequent overflow */ |
cmpl SIGH(%ebx),%edx /* denom */ |
jb LRound_prep |
ja LPrevent_3rd_overflow |
|
cmpl SIGL(%ebx),%eax /* denom */ |
jb LRound_prep |
|
LPrevent_3rd_overflow: |
/* prevent overflow */ |
subl SIGL(%ebx),%eax |
sbbl SIGH(%ebx),%edx |
movl %edx,FPU_accum_1 |
movl %eax,FPU_accum_0 |
|
addl $1,FPU_result_1 /* Reflect the subtraction in the answer */ |
adcl $0,FPU_result_2 |
jne LRound_prep |
jnc LRound_prep |
|
/* This is a tricky spot, there is an overflow of the answer */ |
movb $255,FPU_ovfl_flag /* Overflow -> 1.000 */ |
|
LRound_prep: |
/* |
* Prepare for rounding. |
* To test for rounding, we just need to compare 2*accum with the |
* denom. |
*/ |
movl FPU_accum_0,%ecx |
movl FPU_accum_1,%edx |
movl %ecx,%eax |
orl %edx,%eax |
jz LRound_ovfl /* The accumulator contains zero. */ |
|
/* Multiply by 2 */ |
clc |
rcll $1,%ecx |
rcll $1,%edx |
jc LRound_large /* No need to compare, denom smaller */ |
|
subl SIGL(%ebx),%ecx |
sbbl SIGH(%ebx),%edx |
jnc LRound_not_small |
|
movl $0x70000000,%eax /* Denom was larger */ |
jmp LRound_ovfl |
|
LRound_not_small: |
jnz LRound_large |
|
movl $0x80000000,%eax /* Remainder was exactly 1/2 denom */ |
jmp LRound_ovfl |
|
LRound_large: |
movl $0xff000000,%eax /* Denom was smaller */ |
|
LRound_ovfl: |
/* We are now ready to deal with rounding, but first we must get |
the bits properly aligned */ |
testb $255,FPU_ovfl_flag /* was the num > denom ? */ |
je LRound_precision |
|
incl EXP(%edi) |
|
/* shift the mantissa right one bit */ |
stc /* Will set the ms bit */ |
rcrl FPU_result_2 |
rcrl FPU_result_1 |
rcrl %eax |
|
/* Round the result as required */ |
LRound_precision: |
decl EXP(%edi) /* binary point between 1st & 2nd bits */ |
|
movl %eax,%edx |
movl FPU_result_1,%ebx |
movl FPU_result_2,%eax |
jmp fpu_reg_round |
|
|
#ifdef PARANOID |
/* The logic is wrong if we got here */ |
L_bugged: |
pushl EX_INTERNAL|0x202 |
call EXCEPTION |
pop %ebx |
jmp L_exit |
|
L_bugged_1: |
pushl EX_INTERNAL|0x203 |
call EXCEPTION |
pop %ebx |
jmp L_exit |
|
L_bugged_2: |
pushl EX_INTERNAL|0x204 |
call EXCEPTION |
pop %ebx |
jmp L_exit |
|
L_exit: |
popl %ebx |
popl %edi |
popl %esi |
|
leave |
ret |
#endif PARANOID |
/get_address.c
0,0 → 1,423
/*---------------------------------------------------------------------------+ |
| get_address.c | |
| | |
| Get the effective address from an FPU instruction. | |
| | |
| Copyright (C) 1992,1993,1994 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
/*---------------------------------------------------------------------------+ |
| Note: | |
| The file contains code which accesses user memory. | |
| Emulator static data may change when user memory is accessed, due to | |
| other processes using the emulator while swapping is in progress. | |
+---------------------------------------------------------------------------*/ |
|
|
#include <linux/stddef.h> |
#include <linux/head.h> |
|
#include <asm/segment.h> |
|
#include "fpu_system.h" |
#include "exception.h" |
#include "fpu_emu.h" |
|
|
#define FPU_WRITE_BIT 0x10 |
|
static int reg_offset[] = { |
offsetof(struct info,___eax), |
offsetof(struct info,___ecx), |
offsetof(struct info,___edx), |
offsetof(struct info,___ebx), |
offsetof(struct info,___esp), |
offsetof(struct info,___ebp), |
offsetof(struct info,___esi), |
offsetof(struct info,___edi) |
}; |
|
#define REG_(x) (*(long *)(reg_offset[(x)]+(char *) FPU_info)) |
|
static int reg_offset_vm86[] = { |
offsetof(struct info,___cs), |
offsetof(struct info,___vm86_ds), |
offsetof(struct info,___vm86_es), |
offsetof(struct info,___vm86_fs), |
offsetof(struct info,___vm86_gs), |
offsetof(struct info,___ss), |
offsetof(struct info,___vm86_ds) |
}; |
|
#define VM86_REG_(x) (*(unsigned short *) \ |
(reg_offset_vm86[((unsigned)x)]+(char *) FPU_info)) |
|
static int reg_offset_pm[] = { |
offsetof(struct info,___cs), |
offsetof(struct info,___ds), |
offsetof(struct info,___es), |
offsetof(struct info,___fs), |
offsetof(struct info,___gs), |
offsetof(struct info,___ss), |
offsetof(struct info,___ds) |
}; |
|
#define PM_REG_(x) (*(unsigned short *) \ |
(reg_offset_pm[((unsigned)x)]+(char *) FPU_info)) |
|
|
/* Decode the SIB byte. This function assumes mod != 0 */ |
static int sib(int mod, unsigned long *fpu_eip) |
{ |
unsigned char ss,index,base; |
long offset; |
|
RE_ENTRANT_CHECK_OFF; |
FPU_code_verify_area(1); |
base = get_fs_byte((char *) (*fpu_eip)); /* The SIB byte */ |
RE_ENTRANT_CHECK_ON; |
(*fpu_eip)++; |
ss = base >> 6; |
index = (base >> 3) & 7; |
base &= 7; |
|
if ((mod == 0) && (base == 5)) |
offset = 0; /* No base register */ |
else |
offset = REG_(base); |
|
if (index == 4) |
{ |
/* No index register */ |
/* A non-zero ss is illegal */ |
if ( ss ) |
EXCEPTION(EX_Invalid); |
} |
else |
{ |
offset += (REG_(index)) << ss; |
} |
|
if (mod == 1) |
{ |
/* 8 bit signed displacement */ |
RE_ENTRANT_CHECK_OFF; |
FPU_code_verify_area(1); |
offset += (signed char) get_fs_byte((char *) (*fpu_eip)); |
RE_ENTRANT_CHECK_ON; |
(*fpu_eip)++; |
} |
else if (mod == 2 || base == 5) /* The second condition also has mod==0 */ |
{ |
/* 32 bit displacement */ |
RE_ENTRANT_CHECK_OFF; |
FPU_code_verify_area(4); |
offset += (signed) get_fs_long((unsigned long *) (*fpu_eip)); |
RE_ENTRANT_CHECK_ON; |
(*fpu_eip) += 4; |
} |
|
return offset; |
} |
|
|
static unsigned long vm86_segment(unsigned char segment, |
unsigned short *selector) |
{ |
segment--; |
#ifdef PARANOID |
if ( segment > PREFIX_SS_ ) |
{ |
EXCEPTION(EX_INTERNAL|0x130); |
math_abort(FPU_info,SIGSEGV); |
} |
#endif PARANOID |
*selector = VM86_REG_(segment); |
return (unsigned long)VM86_REG_(segment) << 4; |
} |
|
|
/* This should work for 16 and 32 bit protected mode. */ |
static long pm_address(unsigned char FPU_modrm, unsigned char segment, |
unsigned short *selector, long offset) |
{ |
struct desc_struct descriptor; |
unsigned long base_address, limit, address, seg_top; |
|
segment--; |
#ifdef PARANOID |
if ( segment > PREFIX_SS_ ) |
{ |
EXCEPTION(EX_INTERNAL|0x132); |
math_abort(FPU_info,SIGSEGV); |
} |
#endif PARANOID |
|
*selector = PM_REG_(segment); |
|
descriptor = LDT_DESCRIPTOR(PM_REG_(segment)); |
base_address = SEG_BASE_ADDR(descriptor); |
address = base_address + offset; |
limit = base_address |
+ (SEG_LIMIT(descriptor)+1) * SEG_GRANULARITY(descriptor) - 1; |
if ( limit < base_address ) limit = 0xffffffff; |
|
if ( SEG_EXPAND_DOWN(descriptor) ) |
{ |
if ( SEG_G_BIT(descriptor) ) |
seg_top = 0xffffffff; |
else |
{ |
seg_top = base_address + (1 << 20); |
if ( seg_top < base_address ) seg_top = 0xffffffff; |
} |
access_limit = |
(address <= limit) || (address >= seg_top) ? 0 : |
((seg_top-address) >= 255 ? 255 : seg_top-address); |
} |
else |
{ |
access_limit = |
(address > limit) || (address < base_address) ? 0 : |
((limit-address) >= 254 ? 255 : limit-address+1); |
} |
if ( SEG_EXECUTE_ONLY(descriptor) || |
(!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT)) ) |
{ |
access_limit = 0; |
} |
return address; |
} |
|
|
/* |
MOD R/M byte: MOD == 3 has a special use for the FPU |
SIB byte used iff R/M = 100b |
|
7 6 5 4 3 2 1 0 |
..... ......... ......... |
MOD OPCODE(2) R/M |
|
|
SIB byte |
|
7 6 5 4 3 2 1 0 |
..... ......... ......... |
SS INDEX BASE |
|
*/ |
|
void *get_address(unsigned char FPU_modrm, unsigned long *fpu_eip, |
struct address *addr, |
/* unsigned short *selector, unsigned long *offset, */ |
fpu_addr_modes addr_modes) |
{ |
unsigned char mod; |
unsigned rm = FPU_modrm & 7; |
long *cpu_reg_ptr; |
int address = 0; /* Initialized just to stop compiler warnings. */ |
|
/* Memory accessed via the cs selector is write protected |
in `non-segmented' 32 bit protected mode. */ |
if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT) |
&& (addr_modes.override.segment == PREFIX_CS_) ) |
{ |
math_abort(FPU_info,SIGSEGV); |
} |
|
addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */ |
|
mod = (FPU_modrm >> 6) & 3; |
|
if (rm == 4 && mod != 3) |
{ |
address = sib(mod, fpu_eip); |
} |
else |
{ |
cpu_reg_ptr = & REG_(rm); |
switch (mod) |
{ |
case 0: |
if (rm == 5) |
{ |
/* Special case: disp32 */ |
RE_ENTRANT_CHECK_OFF; |
FPU_code_verify_area(4); |
address = get_fs_long((unsigned long *) (*fpu_eip)); |
(*fpu_eip) += 4; |
RE_ENTRANT_CHECK_ON; |
addr->offset = address; |
return (void *) address; |
} |
else |
{ |
address = *cpu_reg_ptr; /* Just return the contents |
of the cpu register */ |
addr->offset = address; |
return (void *) address; |
} |
case 1: |
/* 8 bit signed displacement */ |
RE_ENTRANT_CHECK_OFF; |
FPU_code_verify_area(1); |
address = (signed char) get_fs_byte((char *) (*fpu_eip)); |
RE_ENTRANT_CHECK_ON; |
(*fpu_eip)++; |
break; |
case 2: |
/* 32 bit displacement */ |
RE_ENTRANT_CHECK_OFF; |
FPU_code_verify_area(4); |
address = (signed) get_fs_long((unsigned long *) (*fpu_eip)); |
(*fpu_eip) += 4; |
RE_ENTRANT_CHECK_ON; |
break; |
case 3: |
/* Not legal for the FPU */ |
EXCEPTION(EX_Invalid); |
} |
address += *cpu_reg_ptr; |
} |
|
addr->offset = address; |
|
switch ( addr_modes.default_mode ) |
{ |
case 0: |
break; |
case VM86: |
address += vm86_segment(addr_modes.override.segment, |
(unsigned short *)&(addr->selector)); |
break; |
case PM16: |
case SEG32: |
address = pm_address(FPU_modrm, addr_modes.override.segment, |
(unsigned short *)&(addr->selector), address); |
break; |
default: |
EXCEPTION(EX_INTERNAL|0x133); |
} |
|
return (void *)address; |
} |
|
|
void *get_address_16(unsigned char FPU_modrm, unsigned long *fpu_eip, |
struct address *addr, |
/* unsigned short *selector, unsigned long *offset, */ |
fpu_addr_modes addr_modes) |
{ |
unsigned char mod; |
unsigned rm = FPU_modrm & 7; |
int address = 0; /* Default used for mod == 0 */ |
|
/* Memory accessed via the cs selector is write protected |
in `non-segmented' 32 bit protected mode. */ |
if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT) |
&& (addr_modes.override.segment == PREFIX_CS_) ) |
{ |
math_abort(FPU_info,SIGSEGV); |
} |
|
addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */ |
|
mod = (FPU_modrm >> 6) & 3; |
|
switch (mod) |
{ |
case 0: |
if (rm == 6) |
{ |
/* Special case: disp16 */ |
RE_ENTRANT_CHECK_OFF; |
FPU_code_verify_area(2); |
address = (unsigned short)get_fs_word((unsigned short *) (*fpu_eip)); |
(*fpu_eip) += 2; |
RE_ENTRANT_CHECK_ON; |
goto add_segment; |
} |
break; |
case 1: |
/* 8 bit signed displacement */ |
RE_ENTRANT_CHECK_OFF; |
FPU_code_verify_area(1); |
address = (signed char) get_fs_byte((signed char *) (*fpu_eip)); |
RE_ENTRANT_CHECK_ON; |
(*fpu_eip)++; |
break; |
case 2: |
/* 16 bit displacement */ |
RE_ENTRANT_CHECK_OFF; |
FPU_code_verify_area(2); |
address = (unsigned) get_fs_word((unsigned short *) (*fpu_eip)); |
(*fpu_eip) += 2; |
RE_ENTRANT_CHECK_ON; |
break; |
case 3: |
/* Not legal for the FPU */ |
EXCEPTION(EX_Invalid); |
break; |
} |
switch ( rm ) |
{ |
case 0: |
address += FPU_info->___ebx + FPU_info->___esi; |
break; |
case 1: |
address += FPU_info->___ebx + FPU_info->___edi; |
break; |
case 2: |
address += FPU_info->___ebp + FPU_info->___esi; |
if ( addr_modes.override.segment == PREFIX_DEFAULT ) |
addr_modes.override.segment = PREFIX_SS_; |
break; |
case 3: |
address += FPU_info->___ebp + FPU_info->___edi; |
if ( addr_modes.override.segment == PREFIX_DEFAULT ) |
addr_modes.override.segment = PREFIX_SS_; |
break; |
case 4: |
address += FPU_info->___esi; |
break; |
case 5: |
address += FPU_info->___edi; |
break; |
case 6: |
address += FPU_info->___ebp; |
if ( addr_modes.override.segment == PREFIX_DEFAULT ) |
addr_modes.override.segment = PREFIX_SS_; |
break; |
case 7: |
address += FPU_info->___ebx; |
break; |
} |
|
add_segment: |
address &= 0xffff; |
|
addr->offset = address; |
|
switch ( addr_modes.default_mode ) |
{ |
case 0: |
break; |
case VM86: |
address += vm86_segment(addr_modes.override.segment, |
(unsigned short *)&(addr->selector)); |
break; |
case PM16: |
case SEG32: |
address = pm_address(FPU_modrm, addr_modes.override.segment, |
(unsigned short *)&(addr->selector), address); |
break; |
default: |
EXCEPTION(EX_INTERNAL|0x131); |
} |
|
return (void *)address ; |
} |
/reg_ld_str.c
0,0 → 1,1452
/*---------------------------------------------------------------------------+ |
| reg_ld_str.c | |
| | |
| All of the functions which transfer data between user memory and FPU_REGs.| |
| | |
| Copyright (C) 1992,1993,1994,1996 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | |
| E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
/*---------------------------------------------------------------------------+ |
| Note: | |
| The file contains code which accesses user memory. | |
| Emulator static data may change when user memory is accessed, due to | |
| other processes using the emulator while swapping is in progress. | |
+---------------------------------------------------------------------------*/ |
|
#include <asm/segment.h> |
|
#include "fpu_system.h" |
#include "exception.h" |
#include "reg_constant.h" |
#include "fpu_emu.h" |
#include "control_w.h" |
#include "status_w.h" |
|
|
#define EXTENDED_Ebias 0x3fff |
#define EXTENDED_Emin (-0x3ffe) /* smallest valid exponent */ |
|
#define DOUBLE_Emax 1023 /* largest valid exponent */ |
#define DOUBLE_Ebias 1023 |
#define DOUBLE_Emin (-1022) /* smallest valid exponent */ |
|
#define SINGLE_Emax 127 /* largest valid exponent */ |
#define SINGLE_Ebias 127 |
#define SINGLE_Emin (-126) /* smallest valid exponent */ |
|
static void write_to_extended(FPU_REG *rp, char *d); |
|
|
/* Get a long double from user memory */ |
int reg_load_extended(long double *s, FPU_REG *loaded_data) |
{ |
unsigned long sigl, sigh, exp; |
|
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_READ, s, 10); |
sigl = get_fs_long((unsigned long *) s); |
sigh = get_fs_long(1 + (unsigned long *) s); |
exp = get_fs_word(4 + (unsigned short *) s); |
RE_ENTRANT_CHECK_ON; |
|
loaded_data->tag = TW_Valid; /* Default */ |
loaded_data->sigl = sigl; |
loaded_data->sigh = sigh; |
if (exp & 0x8000) |
loaded_data->sign = SIGN_NEG; |
else |
loaded_data->sign = SIGN_POS; |
exp &= 0x7fff; |
loaded_data->exp = exp - EXTENDED_Ebias + EXP_BIAS; |
|
if ( exp == 0 ) |
{ |
if ( !(sigh | sigl) ) |
{ |
loaded_data->tag = TW_Zero; |
return 0; |
} |
/* The number is a de-normal or pseudodenormal. */ |
if (sigh & 0x80000000) |
{ |
/* Is a pseudodenormal. */ |
/* Convert it for internal use. */ |
/* This is non-80486 behaviour because the number |
loses its 'denormal' identity. */ |
loaded_data->exp++; |
return 1; |
} |
else |
{ |
/* Is a denormal. */ |
/* Convert it for internal use. */ |
loaded_data->exp++; |
normalize_nuo(loaded_data); |
return 0; |
} |
} |
else if ( exp == 0x7fff ) |
{ |
if ( !((sigh ^ 0x80000000) | sigl) ) |
{ |
/* Matches the bit pattern for Infinity. */ |
loaded_data->exp = EXP_Infinity; |
loaded_data->tag = TW_Infinity; |
return 0; |
} |
|
loaded_data->exp = EXP_NaN; |
loaded_data->tag = TW_NaN; |
if ( !(sigh & 0x80000000) ) |
{ |
/* NaNs have the ms bit set to 1. */ |
/* This is therefore an Unsupported NaN data type. */ |
/* This is non 80486 behaviour */ |
/* This should generate an Invalid Operand exception |
later, so we convert it to a SNaN */ |
loaded_data->sigh = 0x80000000; |
loaded_data->sigl = 0x00000001; |
loaded_data->sign = SIGN_NEG; |
return 1; |
} |
return 0; |
} |
|
if ( !(sigh & 0x80000000) ) |
{ |
/* Unsupported data type. */ |
/* Valid numbers have the ms bit set to 1. */ |
/* Unnormal. */ |
/* Convert it for internal use. */ |
/* This is non-80486 behaviour */ |
/* This should generate an Invalid Operand exception |
later, so we convert it to a SNaN */ |
loaded_data->sigh = 0x80000000; |
loaded_data->sigl = 0x00000001; |
loaded_data->sign = SIGN_NEG; |
loaded_data->exp = EXP_NaN; |
loaded_data->tag = TW_NaN; |
return 1; |
} |
return 0; |
} |
|
|
/* Get a double from user memory */ |
int reg_load_double(double *dfloat, FPU_REG *loaded_data) |
{ |
int exp; |
unsigned m64, l64; |
|
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_READ, dfloat, 8); |
m64 = get_fs_long(1 + (unsigned long *) dfloat); |
l64 = get_fs_long((unsigned long *) dfloat); |
RE_ENTRANT_CHECK_ON; |
|
if (m64 & 0x80000000) |
loaded_data->sign = SIGN_NEG; |
else |
loaded_data->sign = SIGN_POS; |
exp = ((m64 & 0x7ff00000) >> 20) - DOUBLE_Ebias; |
m64 &= 0xfffff; |
if (exp > DOUBLE_Emax) |
{ |
/* Infinity or NaN */ |
if ((m64 == 0) && (l64 == 0)) |
{ |
/* +- infinity */ |
loaded_data->sigh = 0x80000000; |
loaded_data->sigl = 0x00000000; |
loaded_data->exp = EXP_Infinity; |
loaded_data->tag = TW_Infinity; |
return 0; |
} |
else |
{ |
/* Must be a signaling or quiet NaN */ |
loaded_data->exp = EXP_NaN; |
loaded_data->tag = TW_NaN; |
loaded_data->sigh = (m64 << 11) | 0x80000000; |
loaded_data->sigh |= l64 >> 21; |
loaded_data->sigl = l64 << 11; |
return 0; /* The calling function must look for NaNs */ |
} |
} |
else if ( exp < DOUBLE_Emin ) |
{ |
/* Zero or de-normal */ |
if ((m64 == 0) && (l64 == 0)) |
{ |
/* Zero */ |
int c = loaded_data->sign; |
reg_move(&CONST_Z, loaded_data); |
loaded_data->sign = c; |
return 0; |
} |
else |
{ |
/* De-normal */ |
loaded_data->exp = DOUBLE_Emin + EXP_BIAS; |
loaded_data->tag = TW_Valid; |
loaded_data->sigh = m64 << 11; |
loaded_data->sigh |= l64 >> 21; |
loaded_data->sigl = l64 << 11; |
normalize_nuo(loaded_data); |
return denormal_operand(); |
} |
} |
else |
{ |
loaded_data->exp = exp + EXP_BIAS; |
loaded_data->tag = TW_Valid; |
loaded_data->sigh = (m64 << 11) | 0x80000000; |
loaded_data->sigh |= l64 >> 21; |
loaded_data->sigl = l64 << 11; |
|
return 0; |
} |
} |
|
|
/* Get a float from user memory */ |
int reg_load_single(float *single, FPU_REG *loaded_data) |
{ |
unsigned m32; |
int exp; |
|
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_READ, single, 4); |
m32 = get_fs_long((unsigned long *) single); |
RE_ENTRANT_CHECK_ON; |
|
if (m32 & 0x80000000) |
loaded_data->sign = SIGN_NEG; |
else |
loaded_data->sign = SIGN_POS; |
if (!(m32 & 0x7fffffff)) |
{ |
/* Zero */ |
int c = loaded_data->sign; |
reg_move(&CONST_Z, loaded_data); |
loaded_data->sign = c; |
return 0; |
} |
exp = ((m32 & 0x7f800000) >> 23) - SINGLE_Ebias; |
m32 = (m32 & 0x7fffff) << 8; |
if ( exp < SINGLE_Emin ) |
{ |
/* De-normals */ |
loaded_data->exp = SINGLE_Emin + EXP_BIAS; |
loaded_data->tag = TW_Valid; |
loaded_data->sigh = m32; |
loaded_data->sigl = 0; |
normalize_nuo(loaded_data); |
return denormal_operand(); |
} |
else if ( exp > SINGLE_Emax ) |
{ |
/* Infinity or NaN */ |
if ( m32 == 0 ) |
{ |
/* +- infinity */ |
loaded_data->sigh = 0x80000000; |
loaded_data->sigl = 0x00000000; |
loaded_data->exp = EXP_Infinity; |
loaded_data->tag = TW_Infinity; |
return 0; |
} |
else |
{ |
/* Must be a signaling or quiet NaN */ |
loaded_data->exp = EXP_NaN; |
loaded_data->tag = TW_NaN; |
loaded_data->sigh = m32 | 0x80000000; |
loaded_data->sigl = 0; |
return 0; /* The calling function must look for NaNs */ |
} |
} |
else |
{ |
loaded_data->exp = exp + EXP_BIAS; |
loaded_data->sigh = m32 | 0x80000000; |
loaded_data->sigl = 0; |
loaded_data->tag = TW_Valid; |
return 0; |
} |
} |
|
|
/* Get a long long from user memory */ |
void reg_load_int64(long long *_s, FPU_REG *loaded_data) |
{ |
int e; |
long long s; |
|
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_READ, _s, 8); |
((unsigned long *)&s)[0] = get_fs_long((unsigned long *) _s); |
((unsigned long *)&s)[1] = get_fs_long(1 + (unsigned long *) _s); |
RE_ENTRANT_CHECK_ON; |
|
if (s == 0) |
{ reg_move(&CONST_Z, loaded_data); return; } |
|
if (s > 0) |
loaded_data->sign = SIGN_POS; |
else |
{ |
s = -s; |
loaded_data->sign = SIGN_NEG; |
} |
|
e = EXP_BIAS + 63; |
significand(loaded_data) = s; |
loaded_data->exp = e; |
loaded_data->tag = TW_Valid; |
normalize_nuo(loaded_data); |
} |
|
|
/* Get a long from user memory */ |
void reg_load_int32(long *_s, FPU_REG *loaded_data) |
{ |
long s; |
int e; |
|
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_READ, _s, 4); |
s = (long)get_fs_long((unsigned long *) _s); |
RE_ENTRANT_CHECK_ON; |
|
if (s == 0) |
{ reg_move(&CONST_Z, loaded_data); return; } |
|
if (s > 0) |
loaded_data->sign = SIGN_POS; |
else |
{ |
s = -s; |
loaded_data->sign = SIGN_NEG; |
} |
|
e = EXP_BIAS + 31; |
loaded_data->sigh = s; |
loaded_data->sigl = 0; |
loaded_data->exp = e; |
loaded_data->tag = TW_Valid; |
normalize_nuo(loaded_data); |
} |
|
|
/* Get a short from user memory */ |
void reg_load_int16(short *_s, FPU_REG *loaded_data) |
{ |
int s, e; |
|
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_READ, _s, 2); |
/* Cast as short to get the sign extended. */ |
s = (short)get_fs_word((unsigned short *) _s); |
RE_ENTRANT_CHECK_ON; |
|
if (s == 0) |
{ reg_move(&CONST_Z, loaded_data); return; } |
|
if (s > 0) |
loaded_data->sign = SIGN_POS; |
else |
{ |
s = -s; |
loaded_data->sign = SIGN_NEG; |
} |
|
e = EXP_BIAS + 15; |
loaded_data->sigh = s << 16; |
|
loaded_data->sigl = 0; |
loaded_data->exp = e; |
loaded_data->tag = TW_Valid; |
normalize_nuo(loaded_data); |
} |
|
|
/* Get a packed bcd array from user memory */ |
void reg_load_bcd(char *s, FPU_REG *loaded_data) |
{ |
int pos; |
unsigned char bcd; |
long long l=0; |
|
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_READ, s, 10); |
RE_ENTRANT_CHECK_ON; |
for ( pos = 8; pos >= 0; pos--) |
{ |
l *= 10; |
RE_ENTRANT_CHECK_OFF; |
bcd = (unsigned char)get_fs_byte((unsigned char *) s+pos); |
RE_ENTRANT_CHECK_ON; |
l += bcd >> 4; |
l *= 10; |
l += bcd & 0x0f; |
} |
|
RE_ENTRANT_CHECK_OFF; |
loaded_data->sign = |
((unsigned char)get_fs_byte((unsigned char *) s+9)) & 0x80 ? |
SIGN_NEG : SIGN_POS; |
RE_ENTRANT_CHECK_ON; |
|
if (l == 0) |
{ |
char sign = loaded_data->sign; |
reg_move(&CONST_Z, loaded_data); |
loaded_data->sign = sign; |
} |
else |
{ |
significand(loaded_data) = l; |
loaded_data->exp = EXP_BIAS + 63; |
loaded_data->tag = TW_Valid; |
normalize_nuo(loaded_data); |
} |
} |
|
/*===========================================================================*/ |
|
/* Put a long double into user memory */ |
int reg_store_extended(long double *d, FPU_REG *st0_ptr) |
{ |
/* |
The only exception raised by an attempt to store to an |
extended format is the Invalid Stack exception, i.e. |
attempting to store from an empty register. |
*/ |
|
if ( st0_ptr->tag != TW_Empty ) |
{ |
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_WRITE, d, 10); |
RE_ENTRANT_CHECK_ON; |
write_to_extended(st0_ptr, (char *) d); |
return 1; |
} |
|
/* Empty register (stack underflow) */ |
EXCEPTION(EX_StackUnder); |
if ( control_word & CW_Invalid ) |
{ |
/* The masked response */ |
/* Put out the QNaN indefinite */ |
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_WRITE,d,10); |
put_fs_long(0, (unsigned long *) d); |
put_fs_long(0xc0000000, 1 + (unsigned long *) d); |
put_fs_word(0xffff, 4 + (short *) d); |
RE_ENTRANT_CHECK_ON; |
return 1; |
} |
else |
return 0; |
|
} |
|
|
/* Put a double into user memory */ |
int reg_store_double(double *dfloat, FPU_REG *st0_ptr) |
{ |
unsigned long l[2]; |
unsigned long increment = 0; /* avoid gcc warnings */ |
char st0_tag = st0_ptr->tag; |
|
if (st0_tag == TW_Valid) |
{ |
int precision_loss; |
int exp; |
FPU_REG tmp; |
|
reg_move(st0_ptr, &tmp); |
exp = tmp.exp - EXP_BIAS; |
|
if ( exp < DOUBLE_Emin ) /* It may be a denormal */ |
{ |
/* A denormal will always underflow. */ |
#ifndef PECULIAR_486 |
/* An 80486 is supposed to be able to generate |
a denormal exception here, but... */ |
if ( st0_ptr->exp <= EXP_UNDER ) |
{ |
/* Underflow has priority. */ |
if ( control_word & CW_Underflow ) |
denormal_operand(); |
} |
#endif PECULIAR_486 |
|
tmp.exp += -DOUBLE_Emin + 52; /* largest exp to be 51 */ |
|
if ( (precision_loss = round_to_int(&tmp)) ) |
{ |
#ifdef PECULIAR_486 |
/* Did it round to a non-denormal ? */ |
/* This behaviour might be regarded as peculiar, it appears |
that the 80486 rounds to the dest precision, then |
converts to decide underflow. */ |
if ( !((tmp.sigh == 0x00100000) && (tmp.sigl == 0) && |
(st0_ptr->sigl & 0x000007ff)) ) |
#endif PECULIAR_486 |
{ |
EXCEPTION(EX_Underflow); |
/* This is a special case: see sec 16.2.5.1 of |
the 80486 book */ |
if ( !(control_word & CW_Underflow) ) |
return 0; |
} |
EXCEPTION(precision_loss); |
if ( !(control_word & CW_Precision) ) |
return 0; |
} |
l[0] = tmp.sigl; |
l[1] = tmp.sigh; |
} |
else |
{ |
if ( tmp.sigl & 0x000007ff ) |
{ |
precision_loss = 1; |
switch (control_word & CW_RC) |
{ |
case RC_RND: |
/* Rounding can get a little messy.. */ |
increment = ((tmp.sigl & 0x7ff) > 0x400) | /* nearest */ |
((tmp.sigl & 0xc00) == 0xc00); /* odd -> even */ |
break; |
case RC_DOWN: /* towards -infinity */ |
increment = (tmp.sign == SIGN_POS) ? 0 : tmp.sigl & 0x7ff; |
break; |
case RC_UP: /* towards +infinity */ |
increment = (tmp.sign == SIGN_POS) ? tmp.sigl & 0x7ff : 0; |
break; |
case RC_CHOP: |
increment = 0; |
break; |
} |
|
/* Truncate the mantissa */ |
tmp.sigl &= 0xfffff800; |
|
if ( increment ) |
{ |
if ( tmp.sigl >= 0xfffff800 ) |
{ |
/* the sigl part overflows */ |
if ( tmp.sigh == 0xffffffff ) |
{ |
/* The sigh part overflows */ |
tmp.sigh = 0x80000000; |
exp++; |
if (exp >= EXP_OVER) |
goto overflow; |
} |
else |
{ |
tmp.sigh ++; |
} |
tmp.sigl = 0x00000000; |
} |
else |
{ |
/* We only need to increment sigl */ |
tmp.sigl += 0x00000800; |
} |
} |
} |
else |
precision_loss = 0; |
|
l[0] = (tmp.sigl >> 11) | (tmp.sigh << 21); |
l[1] = ((tmp.sigh >> 11) & 0xfffff); |
|
if ( exp > DOUBLE_Emax ) |
{ |
overflow: |
EXCEPTION(EX_Overflow); |
if ( !(control_word & CW_Overflow) ) |
return 0; |
set_precision_flag_up(); |
if ( !(control_word & CW_Precision) ) |
return 0; |
|
/* This is a special case: see sec 16.2.5.1 of the 80486 book */ |
/* Overflow to infinity */ |
l[0] = 0x00000000; /* Set to */ |
l[1] = 0x7ff00000; /* + INF */ |
} |
else |
{ |
if ( precision_loss ) |
{ |
if ( increment ) |
set_precision_flag_up(); |
else |
set_precision_flag_down(); |
} |
/* Add the exponent */ |
l[1] |= (((exp+DOUBLE_Ebias) & 0x7ff) << 20); |
} |
} |
} |
else if (st0_tag == TW_Zero) |
{ |
/* Number is zero */ |
l[0] = 0; |
l[1] = 0; |
} |
else if (st0_tag == TW_Infinity) |
{ |
l[0] = 0; |
l[1] = 0x7ff00000; |
} |
else if (st0_tag == TW_NaN) |
{ |
/* See if we can get a valid NaN from the FPU_REG */ |
l[0] = (st0_ptr->sigl >> 11) | (st0_ptr->sigh << 21); |
l[1] = ((st0_ptr->sigh >> 11) & 0xfffff); |
if ( !(st0_ptr->sigh & 0x40000000) ) |
{ |
/* It is a signalling NaN */ |
EXCEPTION(EX_Invalid); |
if ( !(control_word & CW_Invalid) ) |
return 0; |
l[1] |= (0x40000000 >> 11); |
} |
l[1] |= 0x7ff00000; |
} |
else if ( st0_tag == TW_Empty ) |
{ |
/* Empty register (stack underflow) */ |
EXCEPTION(EX_StackUnder); |
if ( control_word & CW_Invalid ) |
{ |
/* The masked response */ |
/* Put out the QNaN indefinite */ |
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_WRITE,(void *)dfloat,8); |
put_fs_long(0, (unsigned long *) dfloat); |
put_fs_long(0xfff80000, 1 + (unsigned long *) dfloat); |
RE_ENTRANT_CHECK_ON; |
return 1; |
} |
else |
return 0; |
} |
if ( st0_ptr->sign ) |
l[1] |= 0x80000000; |
|
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_WRITE,(void *)dfloat,8); |
put_fs_long(l[0], (unsigned long *)dfloat); |
put_fs_long(l[1], 1 + (unsigned long *)dfloat); |
RE_ENTRANT_CHECK_ON; |
|
return 1; |
} |
|
|
/* Put a float into user memory */ |
int reg_store_single(float *single, FPU_REG *st0_ptr) |
{ |
long templ; |
unsigned long increment = 0; /* avoid gcc warnings */ |
char st0_tag = st0_ptr->tag; |
|
if (st0_tag == TW_Valid) |
{ |
int precision_loss; |
int exp; |
FPU_REG tmp; |
|
reg_move(st0_ptr, &tmp); |
exp = tmp.exp - EXP_BIAS; |
|
if ( exp < SINGLE_Emin ) |
{ |
/* A denormal will always underflow. */ |
#ifndef PECULIAR_486 |
/* An 80486 is supposed to be able to generate |
a denormal exception here, but... */ |
if ( st0_ptr->exp <= EXP_UNDER ) |
{ |
/* Underflow has priority. */ |
if ( control_word & CW_Underflow ) |
denormal_operand(); |
} |
#endif PECULIAR_486 |
|
tmp.exp += -SINGLE_Emin + 23; /* largest exp to be 22 */ |
|
if ( (precision_loss = round_to_int(&tmp)) ) |
{ |
#ifdef PECULIAR_486 |
/* Did it round to a non-denormal ? */ |
/* This behaviour might be regarded as peculiar, it appears |
that the 80486 rounds to the dest precision, then |
converts to decide underflow. */ |
if ( !((tmp.sigl == 0x00800000) && |
((st0_ptr->sigh & 0x000000ff) || st0_ptr->sigl)) ) |
#endif PECULIAR_486 |
{ |
EXCEPTION(EX_Underflow); |
/* This is a special case: see sec 16.2.5.1 of |
the 80486 book */ |
if ( !(control_word & EX_Underflow) ) |
return 0; |
} |
EXCEPTION(precision_loss); |
if ( !(control_word & EX_Precision) ) |
return 0; |
} |
templ = tmp.sigl; |
} |
else |
{ |
if ( tmp.sigl | (tmp.sigh & 0x000000ff) ) |
{ |
unsigned long sigh = tmp.sigh; |
unsigned long sigl = tmp.sigl; |
|
precision_loss = 1; |
switch (control_word & CW_RC) |
{ |
case RC_RND: |
increment = ((sigh & 0xff) > 0x80) /* more than half */ |
|| (((sigh & 0xff) == 0x80) && sigl) /* more than half */ |
|| ((sigh & 0x180) == 0x180); /* round to even */ |
break; |
case RC_DOWN: /* towards -infinity */ |
increment = (tmp.sign == SIGN_POS) |
? 0 : (sigl | (sigh & 0xff)); |
break; |
case RC_UP: /* towards +infinity */ |
increment = (tmp.sign == SIGN_POS) |
? (sigl | (sigh & 0xff)) : 0; |
break; |
case RC_CHOP: |
increment = 0; |
break; |
} |
|
/* Truncate part of the mantissa */ |
tmp.sigl = 0; |
|
if (increment) |
{ |
if ( sigh >= 0xffffff00 ) |
{ |
/* The sigh part overflows */ |
tmp.sigh = 0x80000000; |
exp++; |
if ( exp >= EXP_OVER ) |
goto overflow; |
} |
else |
{ |
tmp.sigh &= 0xffffff00; |
tmp.sigh += 0x100; |
} |
} |
else |
{ |
tmp.sigh &= 0xffffff00; /* Finish the truncation */ |
} |
} |
else |
precision_loss = 0; |
|
templ = (tmp.sigh >> 8) & 0x007fffff; |
|
if ( exp > SINGLE_Emax ) |
{ |
overflow: |
EXCEPTION(EX_Overflow); |
if ( !(control_word & CW_Overflow) ) |
return 0; |
set_precision_flag_up(); |
if ( !(control_word & CW_Precision) ) |
return 0; |
|
/* This is a special case: see sec 16.2.5.1 of the 80486 book. */ |
/* Masked response is overflow to infinity. */ |
templ = 0x7f800000; |
} |
else |
{ |
if ( precision_loss ) |
{ |
if ( increment ) |
set_precision_flag_up(); |
else |
set_precision_flag_down(); |
} |
/* Add the exponent */ |
templ |= ((exp+SINGLE_Ebias) & 0xff) << 23; |
} |
} |
} |
else if (st0_tag == TW_Zero) |
{ |
templ = 0; |
} |
else if (st0_tag == TW_Infinity) |
{ |
templ = 0x7f800000; |
} |
else if (st0_tag == TW_NaN) |
{ |
/* See if we can get a valid NaN from the FPU_REG */ |
templ = st0_ptr->sigh >> 8; |
if ( !(st0_ptr->sigh & 0x40000000) ) |
{ |
/* It is a signalling NaN */ |
EXCEPTION(EX_Invalid); |
if ( !(control_word & CW_Invalid) ) |
return 0; |
templ |= (0x40000000 >> 8); |
} |
templ |= 0x7f800000; |
} |
else if ( st0_tag == TW_Empty ) |
{ |
/* Empty register (stack underflow) */ |
EXCEPTION(EX_StackUnder); |
if ( control_word & EX_Invalid ) |
{ |
/* The masked response */ |
/* Put out the QNaN indefinite */ |
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_WRITE,(void *)single,4); |
put_fs_long(0xffc00000, (unsigned long *) single); |
RE_ENTRANT_CHECK_ON; |
return 1; |
} |
else |
return 0; |
} |
#ifdef PARANOID |
else |
{ |
EXCEPTION(EX_INTERNAL|0x163); |
return 0; |
} |
#endif |
if (st0_ptr->sign) |
templ |= 0x80000000; |
|
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_WRITE,(void *)single,4); |
put_fs_long(templ,(unsigned long *) single); |
RE_ENTRANT_CHECK_ON; |
|
return 1; |
} |
|
|
/* Put a long long into user memory */ |
int reg_store_int64(long long *d, FPU_REG *st0_ptr) |
{ |
FPU_REG t; |
long long tll; |
int precision_loss; |
char st0_tag = st0_ptr->tag; |
|
if ( st0_tag == TW_Empty ) |
{ |
/* Empty register (stack underflow) */ |
EXCEPTION(EX_StackUnder); |
goto invalid_operand; |
} |
else if ( (st0_tag == TW_Infinity) || |
(st0_tag == TW_NaN) ) |
{ |
EXCEPTION(EX_Invalid); |
goto invalid_operand; |
} |
|
reg_move(st0_ptr, &t); |
precision_loss = round_to_int(&t); |
((long *)&tll)[0] = t.sigl; |
((long *)&tll)[1] = t.sigh; |
if ( (precision_loss == 1) || |
((t.sigh & 0x80000000) && |
!((t.sigh == 0x80000000) && (t.sigl == 0) && |
(t.sign == SIGN_NEG))) ) |
{ |
EXCEPTION(EX_Invalid); |
/* This is a special case: see sec 16.2.5.1 of the 80486 book */ |
invalid_operand: |
if ( control_word & EX_Invalid ) |
{ |
/* Produce something like QNaN "indefinite" */ |
tll = 0x8000000000000000LL; |
} |
else |
return 0; |
} |
else |
{ |
if ( precision_loss ) |
set_precision_flag(precision_loss); |
if ( t.sign ) |
tll = - tll; |
} |
|
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_WRITE,(void *)d,8); |
put_fs_long(((long *)&tll)[0],(unsigned long *) d); |
put_fs_long(((long *)&tll)[1],1 + (unsigned long *) d); |
RE_ENTRANT_CHECK_ON; |
|
return 1; |
} |
|
|
/* Put a long into user memory */ |
int reg_store_int32(long *d, FPU_REG *st0_ptr) |
{ |
FPU_REG t; |
int precision_loss; |
char st0_tag = st0_ptr->tag; |
|
if ( st0_tag == TW_Empty ) |
{ |
/* Empty register (stack underflow) */ |
EXCEPTION(EX_StackUnder); |
goto invalid_operand; |
} |
else if ( (st0_tag == TW_Infinity) || |
(st0_tag == TW_NaN) ) |
{ |
EXCEPTION(EX_Invalid); |
goto invalid_operand; |
} |
|
reg_move(st0_ptr, &t); |
precision_loss = round_to_int(&t); |
if (t.sigh || |
((t.sigl & 0x80000000) && |
!((t.sigl == 0x80000000) && (t.sign == SIGN_NEG))) ) |
{ |
EXCEPTION(EX_Invalid); |
/* This is a special case: see sec 16.2.5.1 of the 80486 book */ |
invalid_operand: |
if ( control_word & EX_Invalid ) |
{ |
/* Produce something like QNaN "indefinite" */ |
t.sigl = 0x80000000; |
} |
else |
return 0; |
} |
else |
{ |
if ( precision_loss ) |
set_precision_flag(precision_loss); |
if ( t.sign ) |
t.sigl = -(long)t.sigl; |
} |
|
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_WRITE,d,4); |
put_fs_long(t.sigl, (unsigned long *) d); |
RE_ENTRANT_CHECK_ON; |
|
return 1; |
} |
|
|
/* Put a short into user memory */ |
int reg_store_int16(short *d, FPU_REG *st0_ptr) |
{ |
FPU_REG t; |
int precision_loss; |
char st0_tag = st0_ptr->tag; |
|
if ( st0_tag == TW_Empty ) |
{ |
/* Empty register (stack underflow) */ |
EXCEPTION(EX_StackUnder); |
goto invalid_operand; |
} |
else if ( (st0_tag == TW_Infinity) || |
(st0_tag == TW_NaN) ) |
{ |
EXCEPTION(EX_Invalid); |
goto invalid_operand; |
} |
|
reg_move(st0_ptr, &t); |
precision_loss = round_to_int(&t); |
if (t.sigh || |
((t.sigl & 0xffff8000) && |
!((t.sigl == 0x8000) && (t.sign == SIGN_NEG))) ) |
{ |
EXCEPTION(EX_Invalid); |
/* This is a special case: see sec 16.2.5.1 of the 80486 book */ |
invalid_operand: |
if ( control_word & EX_Invalid ) |
{ |
/* Produce something like QNaN "indefinite" */ |
t.sigl = 0x8000; |
} |
else |
return 0; |
} |
else |
{ |
if ( precision_loss ) |
set_precision_flag(precision_loss); |
if ( t.sign ) |
t.sigl = -t.sigl; |
} |
|
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_WRITE,d,2); |
put_fs_word((short)t.sigl,(short *) d); |
RE_ENTRANT_CHECK_ON; |
|
return 1; |
} |
|
|
/* Put a packed bcd array into user memory */ |
int reg_store_bcd(char *d, FPU_REG *st0_ptr) |
{ |
FPU_REG t; |
unsigned long long ll; |
unsigned char b; |
int i, precision_loss; |
unsigned char sign = (st0_ptr->sign == SIGN_NEG) ? 0x80 : 0; |
char st0_tag = st0_ptr->tag; |
|
if ( st0_tag == TW_Empty ) |
{ |
/* Empty register (stack underflow) */ |
EXCEPTION(EX_StackUnder); |
goto invalid_operand; |
} |
|
reg_move(st0_ptr, &t); |
precision_loss = round_to_int(&t); |
ll = significand(&t); |
|
/* Check for overflow, by comparing with 999999999999999999 decimal. */ |
if ( (t.sigh > 0x0de0b6b3) || |
((t.sigh == 0x0de0b6b3) && (t.sigl > 0xa763ffff)) ) |
{ |
EXCEPTION(EX_Invalid); |
/* This is a special case: see sec 16.2.5.1 of the 80486 book */ |
invalid_operand: |
if ( control_word & CW_Invalid ) |
{ |
/* Produce the QNaN "indefinite" */ |
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_WRITE,d,10); |
for ( i = 0; i < 7; i++) |
put_fs_byte(0, (unsigned char *) d+i); /* These bytes "undefined" */ |
put_fs_byte(0xc0, (unsigned char *) d+7); /* This byte "undefined" */ |
put_fs_byte(0xff, (unsigned char *) d+8); |
put_fs_byte(0xff, (unsigned char *) d+9); |
RE_ENTRANT_CHECK_ON; |
return 1; |
} |
else |
return 0; |
} |
else if ( precision_loss ) |
{ |
/* Precision loss doesn't stop the data transfer */ |
set_precision_flag(precision_loss); |
} |
|
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_WRITE,d,10); |
RE_ENTRANT_CHECK_ON; |
for ( i = 0; i < 9; i++) |
{ |
b = div_small(&ll, 10); |
b |= (div_small(&ll, 10)) << 4; |
RE_ENTRANT_CHECK_OFF; |
put_fs_byte(b,(unsigned char *) d+i); |
RE_ENTRANT_CHECK_ON; |
} |
RE_ENTRANT_CHECK_OFF; |
put_fs_byte(sign,(unsigned char *) d+9); |
RE_ENTRANT_CHECK_ON; |
|
return 1; |
} |
|
/*===========================================================================*/ |
|
/* r gets mangled such that sig is int, sign: |
it is NOT normalized */ |
/* The return value (in eax) is zero if the result is exact, |
if bits are changed due to rounding, truncation, etc, then |
a non-zero value is returned */ |
/* Overflow is signalled by a non-zero return value (in eax). |
In the case of overflow, the returned significand always has the |
largest possible value */ |
int round_to_int(FPU_REG *r) |
{ |
char very_big; |
unsigned eax; |
|
if (r->tag == TW_Zero) |
{ |
/* Make sure that zero is returned */ |
significand(r) = 0; |
return 0; /* o.k. */ |
} |
|
if (r->exp > EXP_BIAS + 63) |
{ |
r->sigl = r->sigh = ~0; /* The largest representable number */ |
return 1; /* overflow */ |
} |
|
eax = shrxs(&r->sigl, EXP_BIAS + 63 - r->exp); |
very_big = !(~(r->sigh) | ~(r->sigl)); /* test for 0xfff...fff */ |
#define half_or_more (eax & 0x80000000) |
#define frac_part (eax) |
#define more_than_half ((eax & 0x80000001) == 0x80000001) |
switch (control_word & CW_RC) |
{ |
case RC_RND: |
if ( more_than_half /* nearest */ |
|| (half_or_more && (r->sigl & 1)) ) /* odd -> even */ |
{ |
if ( very_big ) return 1; /* overflow */ |
significand(r) ++; |
return PRECISION_LOST_UP; |
} |
break; |
case RC_DOWN: |
if (frac_part && r->sign) |
{ |
if ( very_big ) return 1; /* overflow */ |
significand(r) ++; |
return PRECISION_LOST_UP; |
} |
break; |
case RC_UP: |
if (frac_part && !r->sign) |
{ |
if ( very_big ) return 1; /* overflow */ |
significand(r) ++; |
return PRECISION_LOST_UP; |
} |
break; |
case RC_CHOP: |
break; |
} |
|
return eax ? PRECISION_LOST_DOWN : 0; |
|
} |
|
/*===========================================================================*/ |
|
char *fldenv(fpu_addr_modes addr_modes, char *s) |
{ |
unsigned short tag_word = 0; |
unsigned char tag; |
int i; |
|
if ( (addr_modes.default_mode == VM86) || |
((addr_modes.default_mode == PM16) |
^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) ) |
{ |
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_READ, s, 0x0e); |
control_word = get_fs_word((unsigned short *) s); |
partial_status = get_fs_word((unsigned short *) (s+2)); |
tag_word = get_fs_word((unsigned short *) (s+4)); |
instruction_address.offset = get_fs_word((unsigned short *) (s+6)); |
instruction_address.selector = get_fs_word((unsigned short *) (s+8)); |
operand_address.offset = get_fs_word((unsigned short *) (s+0x0a)); |
operand_address.selector = get_fs_word((unsigned short *) (s+0x0c)); |
RE_ENTRANT_CHECK_ON; |
s += 0x0e; |
if ( addr_modes.default_mode == VM86 ) |
{ |
instruction_address.offset |
+= (instruction_address.selector & 0xf000) << 4; |
operand_address.offset += (operand_address.selector & 0xf000) << 4; |
} |
} |
else |
{ |
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_READ, s, 0x1c); |
control_word = get_fs_word((unsigned short *) s); |
partial_status = get_fs_word((unsigned short *) (s+4)); |
tag_word = get_fs_word((unsigned short *) (s+8)); |
instruction_address.offset = get_fs_long((unsigned long *) (s+0x0c)); |
instruction_address.selector = get_fs_word((unsigned short *) (s+0x10)); |
instruction_address.opcode = get_fs_word((unsigned short *) (s+0x12)); |
operand_address.offset = get_fs_long((unsigned long *) (s+0x14)); |
operand_address.selector = get_fs_long((unsigned long *) (s+0x18)); |
RE_ENTRANT_CHECK_ON; |
s += 0x1c; |
} |
|
#ifdef PECULIAR_486 |
control_word &= ~0xe080; |
#endif PECULIAR_486 |
|
top = (partial_status >> SW_Top_Shift) & 7; |
|
if ( partial_status & ~control_word & CW_Exceptions ) |
partial_status |= (SW_Summary | SW_Backward); |
else |
partial_status &= ~(SW_Summary | SW_Backward); |
|
for ( i = 0; i < 8; i++ ) |
{ |
tag = tag_word & 3; |
tag_word >>= 2; |
|
if ( tag == 3 ) |
/* New tag is empty. Accept it */ |
regs[i].tag = TW_Empty; |
else if ( regs[i].tag == TW_Empty ) |
{ |
/* Old tag is empty and new tag is not empty. New tag is determined |
by old reg contents */ |
if ( regs[i].exp == EXP_BIAS - EXTENDED_Ebias ) |
{ |
if ( !(regs[i].sigl | regs[i].sigh) ) |
regs[i].tag = TW_Zero; |
else |
regs[i].tag = TW_Valid; |
} |
else if ( regs[i].exp == 0x7fff + EXP_BIAS - EXTENDED_Ebias ) |
{ |
if ( !((regs[i].sigh & ~0x80000000) | regs[i].sigl) ) |
regs[i].tag = TW_Infinity; |
else |
regs[i].tag = TW_NaN; |
} |
else |
regs[i].tag = TW_Valid; |
} |
/* Else old tag is not empty and new tag is not empty. Old tag |
remains correct */ |
} |
|
return s; |
} |
|
|
void frstor(fpu_addr_modes addr_modes, char *data_address) |
{ |
int i, stnr; |
unsigned char tag; |
char *s = fldenv(addr_modes, data_address); |
|
for ( i = 0; i < 8; i++ ) |
{ |
/* Load each register. */ |
stnr = (i+top) & 7; |
tag = regs[stnr].tag; /* Derived from the fldenv() loaded tag word. */ |
reg_load_extended((long double *)(s+i*10), ®s[stnr]); |
if ( tag == TW_Empty ) /* The loaded data over-rides all other cases. */ |
regs[stnr].tag = tag; |
} |
|
} |
|
|
unsigned short tag_word(void) |
{ |
unsigned short word = 0; |
unsigned char tag; |
int i; |
|
for ( i = 7; i >= 0; i-- ) |
{ |
switch ( tag = regs[i].tag ) |
{ |
case TW_Valid: |
if ( regs[i].exp <= (EXP_BIAS - EXTENDED_Ebias) ) |
tag = 2; |
break; |
case TW_Infinity: |
case TW_NaN: |
tag = 2; |
break; |
case TW_Empty: |
tag = 3; |
break; |
/* TW_Zero already has the correct value */ |
} |
word <<= 2; |
word |= tag; |
} |
return word; |
} |
|
|
char *fstenv(fpu_addr_modes addr_modes, char *d) |
{ |
if ( (addr_modes.default_mode == VM86) || |
((addr_modes.default_mode == PM16) |
^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) ) |
{ |
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_WRITE,d,14); |
#ifdef PECULIAR_486 |
put_fs_long(control_word & ~0xe080, (unsigned short *) d); |
#else |
put_fs_word(control_word, (unsigned short *) d); |
#endif PECULIAR_486 |
put_fs_word(status_word(), (unsigned short *) (d+2)); |
put_fs_word(tag_word(), (unsigned short *) (d+4)); |
put_fs_word(instruction_address.offset, (unsigned short *) (d+6)); |
put_fs_word(operand_address.offset, (unsigned short *) (d+0x0a)); |
if ( addr_modes.default_mode == VM86 ) |
{ |
put_fs_word((instruction_address.offset & 0xf0000) >> 4, |
(unsigned short *) (d+8)); |
put_fs_word((operand_address.offset & 0xf0000) >> 4, |
(unsigned short *) (d+0x0c)); |
} |
else |
{ |
put_fs_word(instruction_address.selector, (unsigned short *) (d+8)); |
put_fs_word(operand_address.selector, (unsigned short *) (d+0x0c)); |
} |
RE_ENTRANT_CHECK_ON; |
d += 0x0e; |
} |
else |
{ |
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_WRITE,d,28); |
#ifdef PECULIAR_486 |
/* An 80486 sets all the reserved bits to 1. */ |
put_fs_long(0xffff0040 | (control_word & ~0xe080), (unsigned long *) d); |
put_fs_long(0xffff0000 | status_word(), (unsigned long *) (d+4)); |
put_fs_long(0xffff0000 | tag_word(), (unsigned long *) (d+8)); |
#else |
put_fs_word(control_word, (unsigned short *) d); |
put_fs_word(status_word(), (unsigned short *) (d+4)); |
put_fs_word(tag_word(), (unsigned short *) (d+8)); |
#endif PECULIAR_486 |
put_fs_long(instruction_address.offset, (unsigned long *) (d+0x0c)); |
put_fs_word(instruction_address.selector, (unsigned short *) (d+0x10)); |
put_fs_word(instruction_address.opcode, (unsigned short *) (d+0x12)); |
put_fs_long(operand_address.offset, (unsigned long *) (d+0x14)); |
#ifdef PECULIAR_486 |
/* An 80486 sets all the reserved bits to 1. */ |
put_fs_word(operand_address.selector, (unsigned short *) (d+0x18)); |
put_fs_word(0xffff, (unsigned short *) (d+0x1a)); |
#else |
put_fs_long(operand_address.selector, (unsigned long *) (d+0x18)); |
#endif PECULIAR_486 |
RE_ENTRANT_CHECK_ON; |
d += 0x1c; |
} |
|
control_word |= CW_Exceptions; |
partial_status &= ~(SW_Summary | SW_Backward); |
|
return d; |
} |
|
|
void fsave(fpu_addr_modes addr_modes, char *data_address) |
{ |
char *d; |
int i; |
|
d = fstenv(addr_modes, data_address); |
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_WRITE,d,80); |
RE_ENTRANT_CHECK_ON; |
for ( i = 0; i < 8; i++ ) |
write_to_extended(®s[(top + i) & 7], d + 10 * i); |
|
finit(); |
|
} |
|
/*===========================================================================*/ |
|
/* |
A call to this function must be preceded by a call to |
FPU_verify_area() to verify access to the 10 bytes at d |
*/ |
static void write_to_extended(FPU_REG *rp, char *d) |
{ |
long e; |
FPU_REG tmp; |
|
e = rp->exp - EXP_BIAS + EXTENDED_Ebias; |
|
#ifdef PARANOID |
switch ( rp->tag ) |
{ |
case TW_Zero: |
if ( rp->sigh | rp->sigl | e ) |
EXCEPTION(EX_INTERNAL | 0x160); |
break; |
case TW_Infinity: |
case TW_NaN: |
if ( (e ^ 0x7fff) | !(rp->sigh & 0x80000000) ) |
EXCEPTION(EX_INTERNAL | 0x161); |
break; |
default: |
if (e > 0x7fff || e < -63) |
EXCEPTION(EX_INTERNAL | 0x162); |
} |
#endif PARANOID |
|
/* |
All numbers except denormals are stored internally in a |
format which is compatible with the extended real number |
format. |
*/ |
if ( e > 0 ) |
{ |
/* just copy the reg */ |
RE_ENTRANT_CHECK_OFF; |
put_fs_long(rp->sigl, (unsigned long *) d); |
put_fs_long(rp->sigh, (unsigned long *) (d + 4)); |
RE_ENTRANT_CHECK_ON; |
} |
else |
{ |
/* |
The number is a de-normal stored as a normal using our |
extra exponent range, or is Zero. |
Convert it back to a de-normal, or leave it as Zero. |
*/ |
reg_move(rp, &tmp); |
tmp.exp += -EXTENDED_Emin + 63; /* largest exp to be 63 */ |
round_to_int(&tmp); |
e = 0; |
RE_ENTRANT_CHECK_OFF; |
put_fs_long(tmp.sigl, (unsigned long *) d); |
put_fs_long(tmp.sigh, (unsigned long *) (d + 4)); |
RE_ENTRANT_CHECK_ON; |
} |
e |= rp->sign == SIGN_POS ? 0 : 0x8000; |
RE_ENTRANT_CHECK_OFF; |
put_fs_word(e, (unsigned short *) (d + 8)); |
RE_ENTRANT_CHECK_ON; |
} |
/poly_tan.c
0,0 → 1,213
/*---------------------------------------------------------------------------+ |
| poly_tan.c | |
| | |
| Compute the tan of a FPU_REG, using a polynomial approximation. | |
| | |
| Copyright (C) 1992,1993,1994 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
#include "exception.h" |
#include "reg_constant.h" |
#include "fpu_emu.h" |
#include "control_w.h" |
#include "poly.h" |
|
|
#define HiPOWERop 3 /* odd poly, positive terms */ |
static const unsigned long long oddplterm[HiPOWERop] = |
{ |
0x0000000000000000LL, |
0x0051a1cf08fca228LL, |
0x0000000071284ff7LL |
}; |
|
#define HiPOWERon 2 /* odd poly, negative terms */ |
static const unsigned long long oddnegterm[HiPOWERon] = |
{ |
0x1291a9a184244e80LL, |
0x0000583245819c21LL |
}; |
|
#define HiPOWERep 2 /* even poly, positive terms */ |
static const unsigned long long evenplterm[HiPOWERep] = |
{ |
0x0e848884b539e888LL, |
0x00003c7f18b887daLL |
}; |
|
#define HiPOWERen 2 /* even poly, negative terms */ |
static const unsigned long long evennegterm[HiPOWERen] = |
{ |
0xf1f0200fd51569ccLL, |
0x003afb46105c4432LL |
}; |
|
static const unsigned long long twothirds = 0xaaaaaaaaaaaaaaabLL; |
|
|
/*--- poly_tan() ------------------------------------------------------------+ |
| | |
+---------------------------------------------------------------------------*/ |
void poly_tan(FPU_REG const *arg, FPU_REG *result) |
{ |
long int exponent; |
int invert; |
Xsig argSq, argSqSq, accumulatoro, accumulatore, accum, |
argSignif, fix_up; |
unsigned long adj; |
|
exponent = arg->exp - EXP_BIAS; |
|
#ifdef PARANOID |
if ( arg->sign != 0 ) /* Can't hack a number < 0.0 */ |
{ arith_invalid(result); return; } /* Need a positive number */ |
#endif PARANOID |
|
/* Split the problem into two domains, smaller and larger than pi/4 */ |
if ( (exponent == 0) || ((exponent == -1) && (arg->sigh > 0xc90fdaa2)) ) |
{ |
/* The argument is greater than (approx) pi/4 */ |
invert = 1; |
accum.lsw = 0; |
XSIG_LL(accum) = significand(arg); |
|
if ( exponent == 0 ) |
{ |
/* The argument is >= 1.0 */ |
/* Put the binary point at the left. */ |
XSIG_LL(accum) <<= 1; |
} |
/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ |
XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum); |
|
argSignif.lsw = accum.lsw; |
XSIG_LL(argSignif) = XSIG_LL(accum); |
exponent = -1 + norm_Xsig(&argSignif); |
} |
else |
{ |
invert = 0; |
argSignif.lsw = 0; |
XSIG_LL(accum) = XSIG_LL(argSignif) = significand(arg); |
|
if ( exponent < -1 ) |
{ |
/* shift the argument right by the required places */ |
if ( shrx(&XSIG_LL(accum), -1-exponent) >= 0x80000000U ) |
XSIG_LL(accum) ++; /* round up */ |
} |
} |
|
XSIG_LL(argSq) = XSIG_LL(accum); argSq.lsw = accum.lsw; |
mul_Xsig_Xsig(&argSq, &argSq); |
XSIG_LL(argSqSq) = XSIG_LL(argSq); argSqSq.lsw = argSq.lsw; |
mul_Xsig_Xsig(&argSqSq, &argSqSq); |
|
/* Compute the negative terms for the numerator polynomial */ |
accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0; |
polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm, HiPOWERon-1); |
mul_Xsig_Xsig(&accumulatoro, &argSq); |
negate_Xsig(&accumulatoro); |
/* Add the positive terms */ |
polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm, HiPOWERop-1); |
|
|
/* Compute the positive terms for the denominator polynomial */ |
accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0; |
polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm, HiPOWERep-1); |
mul_Xsig_Xsig(&accumulatore, &argSq); |
negate_Xsig(&accumulatore); |
/* Add the negative terms */ |
polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm, HiPOWERen-1); |
/* Multiply by arg^2 */ |
mul64_Xsig(&accumulatore, &XSIG_LL(argSignif)); |
mul64_Xsig(&accumulatore, &XSIG_LL(argSignif)); |
/* de-normalize and divide by 2 */ |
shr_Xsig(&accumulatore, -2*(1+exponent) + 1); |
negate_Xsig(&accumulatore); /* This does 1 - accumulator */ |
|
/* Now find the ratio. */ |
if ( accumulatore.msw == 0 ) |
{ |
/* accumulatoro must contain 1.0 here, (actually, 0) but it |
really doesn't matter what value we use because it will |
have negligible effect in later calculations |
*/ |
XSIG_LL(accum) = 0x8000000000000000LL; |
accum.lsw = 0; |
} |
else |
{ |
div_Xsig(&accumulatoro, &accumulatore, &accum); |
} |
|
/* Multiply by 1/3 * arg^3 */ |
mul64_Xsig(&accum, &XSIG_LL(argSignif)); |
mul64_Xsig(&accum, &XSIG_LL(argSignif)); |
mul64_Xsig(&accum, &XSIG_LL(argSignif)); |
mul64_Xsig(&accum, &twothirds); |
shr_Xsig(&accum, -2*(exponent+1)); |
|
/* tan(arg) = arg + accum */ |
add_two_Xsig(&accum, &argSignif, &exponent); |
|
if ( invert ) |
{ |
/* We now have the value of tan(pi_2 - arg) where pi_2 is an |
approximation for pi/2 |
*/ |
/* The next step is to fix the answer to compensate for the |
error due to the approximation used for pi/2 |
*/ |
|
/* This is (approx) delta, the error in our approx for pi/2 |
(see above). It has an exponent of -65 |
*/ |
XSIG_LL(fix_up) = 0x898cc51701b839a2LL; |
fix_up.lsw = 0; |
|
if ( exponent == 0 ) |
adj = 0xffffffff; /* We want approx 1.0 here, but |
this is close enough. */ |
else if ( exponent > -30 ) |
{ |
adj = accum.msw >> -(exponent+1); /* tan */ |
mul_32_32(adj, adj, &adj); /* tan^2 */ |
} |
else |
adj = 0; |
mul_32_32(0x898cc517, adj, &adj); /* delta * tan^2 */ |
|
fix_up.msw += adj; |
if ( !(fix_up.msw & 0x80000000) ) /* did fix_up overflow ? */ |
{ |
/* Yes, we need to add an msb */ |
shr_Xsig(&fix_up, 1); |
fix_up.msw |= 0x80000000; |
shr_Xsig(&fix_up, 64 + exponent); |
} |
else |
shr_Xsig(&fix_up, 65 + exponent); |
|
add_two_Xsig(&accum, &fix_up, &exponent); |
|
/* accum now contains tan(pi/2 - arg). |
Use tan(arg) = 1.0 / tan(pi/2 - arg) |
*/ |
accumulatoro.lsw = accumulatoro.midw = 0; |
accumulatoro.msw = 0x80000000; |
div_Xsig(&accumulatoro, &accum, &accum); |
exponent = - exponent - 1; |
} |
|
/* Transfer the result */ |
round_Xsig(&accum); |
*(short *)&(result->sign) = 0; |
significand(result) = XSIG_LL(accum); |
result->exp = EXP_BIAS + exponent; |
|
} |
/reg_norm.S
0,0 → 1,143
/*---------------------------------------------------------------------------+ |
| reg_norm.S | |
| | |
| Copyright (C) 1992,1993,1994,1995 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| Normalize the value in a FPU_REG. | |
| | |
| Call from C as: | |
| void normalize(FPU_REG *n) | |
| | |
| void normalize_nuo(FPU_REG *n) | |
| | |
+---------------------------------------------------------------------------*/ |
|
#include "fpu_emu.h" |
|
|
.text |
ENTRY(normalize) |
pushl %ebp |
movl %esp,%ebp |
pushl %ebx |
|
movl PARAM1,%ebx |
|
#ifdef PARANOID |
cmpb TW_Valid,TAG(%ebx) |
je L_ok |
|
pushl $0x220 |
call SYMBOL_NAME(exception) |
addl $4,%esp |
|
L_ok: |
#endif PARANOID |
|
movl SIGH(%ebx),%edx |
movl SIGL(%ebx),%eax |
|
orl %edx,%edx /* ms bits */ |
js L_done /* Already normalized */ |
jnz L_shift_1 /* Shift left 1 - 31 bits */ |
|
orl %eax,%eax |
jz L_zero /* The contents are zero */ |
|
movl %eax,%edx |
xorl %eax,%eax |
subl $32,EXP(%ebx) /* This can cause an underflow */ |
|
/* We need to shift left by 1 - 31 bits */ |
L_shift_1: |
bsrl %edx,%ecx /* get the required shift in %ecx */ |
subl $31,%ecx |
negl %ecx |
shld %cl,%eax,%edx |
shl %cl,%eax |
subl %ecx,EXP(%ebx) /* This can cause an underflow */ |
|
movl %edx,SIGH(%ebx) |
movl %eax,SIGL(%ebx) |
|
L_done: |
cmpl EXP_OVER,EXP(%ebx) |
jge L_overflow |
|
cmpl EXP_UNDER,EXP(%ebx) |
jle L_underflow |
|
L_exit: |
popl %ebx |
leave |
ret |
|
|
L_zero: |
movl EXP_UNDER,EXP(%ebx) |
movb TW_Zero,TAG(%ebx) |
jmp L_exit |
|
L_underflow: |
push %ebx |
call SYMBOL_NAME(arith_underflow) |
pop %ebx |
jmp L_exit |
|
L_overflow: |
push %ebx |
call SYMBOL_NAME(arith_overflow) |
pop %ebx |
jmp L_exit |
|
|
|
/* Normalise without reporting underflow or overflow */ |
ENTRY(normalize_nuo) |
pushl %ebp |
movl %esp,%ebp |
pushl %ebx |
|
movl PARAM1,%ebx |
|
#ifdef PARANOID |
cmpb TW_Valid,TAG(%ebx) |
je L_ok_nuo |
|
pushl $0x221 |
call SYMBOL_NAME(exception) |
addl $4,%esp |
|
L_ok_nuo: |
#endif PARANOID |
|
movl SIGH(%ebx),%edx |
movl SIGL(%ebx),%eax |
|
orl %edx,%edx /* ms bits */ |
js L_exit /* Already normalized */ |
jnz L_nuo_shift_1 /* Shift left 1 - 31 bits */ |
|
orl %eax,%eax |
jz L_zero /* The contents are zero */ |
|
movl %eax,%edx |
xorl %eax,%eax |
subl $32,EXP(%ebx) /* This can cause an underflow */ |
|
/* We need to shift left by 1 - 31 bits */ |
L_nuo_shift_1: |
bsrl %edx,%ecx /* get the required shift in %ecx */ |
subl $31,%ecx |
negl %ecx |
shld %cl,%eax,%edx |
shl %cl,%eax |
subl %ecx,EXP(%ebx) /* This can cause an underflow */ |
|
movl %edx,SIGH(%ebx) |
movl %eax,SIGL(%ebx) |
jmp L_exit |
|
|
/reg_u_add.S
0,0 → 1,187
.file "reg_u_add.S" |
/*---------------------------------------------------------------------------+ |
| reg_u_add.S | |
| | |
| Add two valid (TW_Valid) FPU_REG numbers, of the same sign, and put the | |
| result in a destination FPU_REG. | |
| | |
| Copyright (C) 1992,1993,1995 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| Call from C as: | |
| void reg_u_add(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, | |
| int control_w) | |
| | |
+---------------------------------------------------------------------------*/ |
|
/* |
| Kernel addition routine reg_u_add(reg *arg1, reg *arg2, reg *answ). |
| Takes two valid reg f.p. numbers (TW_Valid), which are |
| treated as unsigned numbers, |
| and returns their sum as a TW_Valid or TW_S f.p. number. |
| The returned number is normalized. |
| Basic checks are performed if PARANOID is defined. |
*/ |
|
#include "exception.h" |
#include "fpu_emu.h" |
#include "control_w.h" |
|
.text |
ENTRY(reg_u_add) |
pushl %ebp |
movl %esp,%ebp |
pushl %esi |
pushl %edi |
pushl %ebx |
|
movl PARAM1,%esi /* source 1 */ |
movl PARAM2,%edi /* source 2 */ |
|
#ifdef DENORM_OPERAND |
cmpl EXP_UNDER,EXP(%esi) |
jg xOp1_not_denorm |
|
call SYMBOL_NAME(denormal_operand) |
orl %eax,%eax |
jnz fpu_Arith_exit |
|
xOp1_not_denorm: |
cmpl EXP_UNDER,EXP(%edi) |
jg xOp2_not_denorm |
|
call SYMBOL_NAME(denormal_operand) |
orl %eax,%eax |
jnz fpu_Arith_exit |
|
xOp2_not_denorm: |
#endif DENORM_OPERAND |
|
movl EXP(%esi),%ecx |
subl EXP(%edi),%ecx /* exp1 - exp2 */ |
jge L_arg1_larger |
|
/* num1 is smaller */ |
movl SIGL(%esi),%ebx |
movl SIGH(%esi),%eax |
|
movl %edi,%esi |
negw %cx |
jmp L_accum_loaded |
|
L_arg1_larger: |
/* num1 has larger or equal exponent */ |
movl SIGL(%edi),%ebx |
movl SIGH(%edi),%eax |
|
L_accum_loaded: |
movl PARAM3,%edi /* destination */ |
/* movb SIGN(%esi),%dl |
movb %dl,SIGN(%edi) */ /* Copy the sign from the first arg */ |
|
|
movl EXP(%esi),%edx |
movl %edx,EXP(%edi) /* Copy exponent to destination */ |
|
xorl %edx,%edx /* clear the extension */ |
|
#ifdef PARANOID |
testl $0x80000000,%eax |
je L_bugged |
|
testl $0x80000000,SIGH(%esi) |
je L_bugged |
#endif PARANOID |
|
/* The number to be shifted is in %eax:%ebx:%edx */ |
cmpw $32,%cx /* shrd only works for 0..31 bits */ |
jnc L_more_than_31 |
|
/* less than 32 bits */ |
shrd %cl,%ebx,%edx |
shrd %cl,%eax,%ebx |
shr %cl,%eax |
jmp L_shift_done |
|
L_more_than_31: |
cmpw $64,%cx |
jnc L_more_than_63 |
|
subb $32,%cl |
jz L_exactly_32 |
|
shrd %cl,%eax,%edx |
shr %cl,%eax |
orl %ebx,%ebx |
jz L_more_31_no_low /* none of the lowest bits is set */ |
|
orl $1,%edx /* record the fact in the extension */ |
|
L_more_31_no_low: |
movl %eax,%ebx |
xorl %eax,%eax |
jmp L_shift_done |
|
L_exactly_32: |
movl %ebx,%edx |
movl %eax,%ebx |
xorl %eax,%eax |
jmp L_shift_done |
|
L_more_than_63: |
cmpw $65,%cx |
jnc L_more_than_64 |
|
movl %eax,%edx |
orl %ebx,%ebx |
jz L_more_63_no_low |
|
orl $1,%edx |
jmp L_more_63_no_low |
|
L_more_than_64: |
movl $1,%edx /* The shifted nr always at least one '1' */ |
|
L_more_63_no_low: |
xorl %ebx,%ebx |
xorl %eax,%eax |
|
L_shift_done: |
/* Now do the addition */ |
addl SIGL(%esi),%ebx |
adcl SIGH(%esi),%eax |
jnc L_round_the_result |
|
/* Overflow, adjust the result */ |
rcrl $1,%eax |
rcrl $1,%ebx |
rcrl $1,%edx |
jnc L_no_bit_lost |
|
orl $1,%edx |
|
L_no_bit_lost: |
incl EXP(%edi) |
|
L_round_the_result: |
jmp fpu_reg_round /* Round the result */ |
|
|
|
#ifdef PARANOID |
/* If we ever get here then we have problems! */ |
L_bugged: |
pushl EX_INTERNAL|0x201 |
call EXCEPTION |
pop %ebx |
jmp L_exit |
#endif PARANOID |
|
|
L_exit: |
popl %ebx |
popl %edi |
popl %esi |
leave |
ret |
/reg_u_sub.S
0,0 → 1,290
.file "reg_u_sub.S" |
/*---------------------------------------------------------------------------+ |
| reg_u_sub.S | |
| | |
| Core floating point subtraction routine. | |
| | |
| Copyright (C) 1992,1993,1995 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| Call from C as: | |
| void reg_u_sub(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, | |
| int control_w) | |
| | |
+---------------------------------------------------------------------------*/ |
|
/* |
| Kernel subtraction routine reg_u_sub(reg *arg1, reg *arg2, reg *answ). |
| Takes two valid reg f.p. numbers (TW_Valid), which are |
| treated as unsigned numbers, |
| and returns their difference as a TW_Valid or TW_Zero f.p. |
| number. |
| The first number (arg1) must be the larger. |
| The returned number is normalized. |
| Basic checks are performed if PARANOID is defined. |
*/ |
|
#include "exception.h" |
#include "fpu_emu.h" |
#include "control_w.h" |
|
.text |
ENTRY(reg_u_sub) |
pushl %ebp |
movl %esp,%ebp |
pushl %esi |
pushl %edi |
pushl %ebx |
|
movl PARAM1,%esi /* source 1 */ |
movl PARAM2,%edi /* source 2 */ |
|
#ifdef DENORM_OPERAND |
cmpl EXP_UNDER,EXP(%esi) |
jg xOp1_not_denorm |
|
call SYMBOL_NAME(denormal_operand) |
orl %eax,%eax |
jnz fpu_Arith_exit |
|
xOp1_not_denorm: |
cmpl EXP_UNDER,EXP(%edi) |
jg xOp2_not_denorm |
|
call SYMBOL_NAME(denormal_operand) |
orl %eax,%eax |
jnz fpu_Arith_exit |
|
xOp2_not_denorm: |
#endif DENORM_OPERAND |
|
movl EXP(%esi),%ecx |
subl EXP(%edi),%ecx /* exp1 - exp2 */ |
|
#ifdef PARANOID |
/* source 2 is always smaller than source 1 */ |
js L_bugged_1 |
|
testl $0x80000000,SIGH(%edi) /* The args are assumed to be normalized */ |
je L_bugged_2 |
|
testl $0x80000000,SIGH(%esi) |
je L_bugged_2 |
#endif PARANOID |
|
/*--------------------------------------+ |
| Form a register holding the | |
| smaller number | |
+--------------------------------------*/ |
movl SIGH(%edi),%eax /* register ms word */ |
movl SIGL(%edi),%ebx /* register ls word */ |
|
movl PARAM3,%edi /* destination */ |
movl EXP(%esi),%edx |
movl %edx,EXP(%edi) /* Copy exponent to destination */ |
/* movb SIGN(%esi),%dl |
movb %dl,SIGN(%edi) */ /* Copy the sign from the first arg */ |
|
xorl %edx,%edx /* register extension */ |
|
/*--------------------------------------+ |
| Shift the temporary register | |
| right the required number of | |
| places. | |
+--------------------------------------*/ |
L_shift_r: |
cmpl $32,%ecx /* shrd only works for 0..31 bits */ |
jnc L_more_than_31 |
|
/* less than 32 bits */ |
shrd %cl,%ebx,%edx |
shrd %cl,%eax,%ebx |
shr %cl,%eax |
jmp L_shift_done |
|
L_more_than_31: |
cmpl $64,%ecx |
jnc L_more_than_63 |
|
subb $32,%cl |
jz L_exactly_32 |
|
shrd %cl,%eax,%edx |
shr %cl,%eax |
orl %ebx,%ebx |
jz L_more_31_no_low /* none of the lowest bits is set */ |
|
orl $1,%edx /* record the fact in the extension */ |
|
L_more_31_no_low: |
movl %eax,%ebx |
xorl %eax,%eax |
jmp L_shift_done |
|
L_exactly_32: |
movl %ebx,%edx |
movl %eax,%ebx |
xorl %eax,%eax |
jmp L_shift_done |
|
L_more_than_63: |
cmpw $65,%cx |
jnc L_more_than_64 |
|
/* Shift right by 64 bits */ |
movl %eax,%edx |
orl %ebx,%ebx |
jz L_more_63_no_low |
|
orl $1,%edx |
jmp L_more_63_no_low |
|
L_more_than_64: |
jne L_more_than_65 |
|
/* Shift right by 65 bits */ |
/* Carry is clear if we get here */ |
movl %eax,%edx |
rcrl %edx |
jnc L_shift_65_nc |
|
orl $1,%edx |
jmp L_more_63_no_low |
|
L_shift_65_nc: |
orl %ebx,%ebx |
jz L_more_63_no_low |
|
orl $1,%edx |
jmp L_more_63_no_low |
|
L_more_than_65: |
movl $1,%edx /* The shifted nr always at least one '1' */ |
|
L_more_63_no_low: |
xorl %ebx,%ebx |
xorl %eax,%eax |
|
L_shift_done: |
L_subtr: |
/*------------------------------+ |
| Do the subtraction | |
+------------------------------*/ |
xorl %ecx,%ecx |
subl %edx,%ecx |
movl %ecx,%edx |
movl SIGL(%esi),%ecx |
sbbl %ebx,%ecx |
movl %ecx,%ebx |
movl SIGH(%esi),%ecx |
sbbl %eax,%ecx |
movl %ecx,%eax |
|
#ifdef PARANOID |
/* We can never get a borrow */ |
jc L_bugged |
#endif PARANOID |
|
/*--------------------------------------+ |
| Normalize the result | |
+--------------------------------------*/ |
testl $0x80000000,%eax |
jnz L_round /* no shifting needed */ |
|
orl %eax,%eax |
jnz L_shift_1 /* shift left 1 - 31 bits */ |
|
orl %ebx,%ebx |
jnz L_shift_32 /* shift left 32 - 63 bits */ |
|
/* |
* A rare case, the only one which is non-zero if we got here |
* is: 1000000 .... 0000 |
* -0111111 .... 1111 1 |
* -------------------- |
* 0000000 .... 0000 1 |
*/ |
|
cmpl $0x80000000,%edx |
jnz L_must_be_zero |
|
/* Shift left 64 bits */ |
subl $64,EXP(%edi) |
xchg %edx,%eax |
jmp fpu_reg_round |
|
L_must_be_zero: |
#ifdef PARANOID |
orl %edx,%edx |
jnz L_bugged_3 |
#endif PARANOID |
|
/* The result is zero */ |
movb TW_Zero,TAG(%edi) |
movl $0,EXP(%edi) /* exponent */ |
movl $0,SIGL(%edi) |
movl $0,SIGH(%edi) |
jmp L_exit /* %eax contains zero */ |
|
L_shift_32: |
movl %ebx,%eax |
movl %edx,%ebx |
movl $0,%edx |
subl $32,EXP(%edi) /* Can get underflow here */ |
|
/* We need to shift left by 1 - 31 bits */ |
L_shift_1: |
bsrl %eax,%ecx /* get the required shift in %ecx */ |
subl $31,%ecx |
negl %ecx |
shld %cl,%ebx,%eax |
shld %cl,%edx,%ebx |
shl %cl,%edx |
subl %ecx,EXP(%edi) /* Can get underflow here */ |
|
L_round: |
jmp fpu_reg_round /* Round the result */ |
|
|
#ifdef PARANOID |
L_bugged_1: |
pushl EX_INTERNAL|0x206 |
call EXCEPTION |
pop %ebx |
jmp L_error_exit |
|
L_bugged_2: |
pushl EX_INTERNAL|0x209 |
call EXCEPTION |
pop %ebx |
jmp L_error_exit |
|
L_bugged_3: |
pushl EX_INTERNAL|0x210 |
call EXCEPTION |
pop %ebx |
jmp L_error_exit |
|
L_bugged_4: |
pushl EX_INTERNAL|0x211 |
call EXCEPTION |
pop %ebx |
jmp L_error_exit |
|
L_bugged: |
pushl EX_INTERNAL|0x212 |
call EXCEPTION |
pop %ebx |
jmp L_error_exit |
#endif PARANOID |
|
|
L_error_exit: |
movl $1,%eax |
L_exit: |
popl %ebx |
popl %edi |
popl %esi |
leave |
ret |
/version.h
0,0 → 1,12
/*---------------------------------------------------------------------------+ |
| version.h | |
| | |
| | |
| Copyright (C) 1992,1993,1994,1996 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | |
| E-mail billm@jacobi.maths.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
#define FPU_VERSION "wm-FPU-emu version 1.22" |
/load_store.c
0,0 → 1,260
/*---------------------------------------------------------------------------+ |
| load_store.c | |
| | |
| This file contains most of the code to interpret the FPU instructions | |
| which load and store from user memory. | |
| | |
| Copyright (C) 1992,1993,1994 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
/*---------------------------------------------------------------------------+ |
| Note: | |
| The file contains code which accesses user memory. | |
| Emulator static data may change when user memory is accessed, due to | |
| other processes using the emulator while swapping is in progress. | |
+---------------------------------------------------------------------------*/ |
|
#include <asm/segment.h> |
|
#include "fpu_system.h" |
#include "exception.h" |
#include "fpu_emu.h" |
#include "status_w.h" |
#include "control_w.h" |
|
|
#define _NONE_ 0 /* st0_ptr etc not needed */ |
#define _REG0_ 1 /* Will be storing st(0) */ |
#define _PUSH_ 3 /* Need to check for space to push onto stack */ |
#define _null_ 4 /* Function illegal or not implemented */ |
|
#define pop_0() { st0_ptr->tag = TW_Empty; top++; } |
|
|
static unsigned char const type_table[32] = { |
_PUSH_, _PUSH_, _PUSH_, _PUSH_, |
_null_, _null_, _null_, _null_, |
_REG0_, _REG0_, _REG0_, _REG0_, |
_REG0_, _REG0_, _REG0_, _REG0_, |
_NONE_, _null_, _NONE_, _PUSH_, |
_NONE_, _PUSH_, _null_, _PUSH_, |
_NONE_, _null_, _NONE_, _REG0_, |
_NONE_, _REG0_, _NONE_, _REG0_ |
}; |
|
unsigned char const data_sizes_16[32] = { |
4, 4, 8, 2, 0, 0, 0, 0, |
4, 4, 8, 2, 4, 4, 8, 2, |
14, 0, 94, 10, 2, 10, 0, 8, |
14, 0, 94, 10, 2, 10, 2, 8 |
}; |
|
unsigned char const data_sizes_32[32] = { |
4, 4, 8, 2, 0, 0, 0, 0, |
4, 4, 8, 2, 4, 4, 8, 2, |
28, 0,108, 10, 2, 10, 0, 8, |
28, 0,108, 10, 2, 10, 2, 8 |
}; |
|
int load_store_instr(unsigned char type, fpu_addr_modes addr_modes, |
void *data_address) |
{ |
FPU_REG loaded_data; |
FPU_REG *st0_ptr; |
|
st0_ptr = NULL; /* Initialized just to stop compiler warnings. */ |
|
if ( addr_modes.default_mode & PROTECTED ) |
{ |
if ( addr_modes.default_mode == SEG32 ) |
{ |
if ( access_limit < data_sizes_32[type] ) |
math_abort(FPU_info,SIGSEGV); |
} |
else if ( addr_modes.default_mode == PM16 ) |
{ |
if ( access_limit < data_sizes_16[type] ) |
math_abort(FPU_info,SIGSEGV); |
} |
#ifdef PARANOID |
else |
EXCEPTION(EX_INTERNAL|0x140); |
#endif PARANOID |
} |
|
switch ( type_table[type] ) |
{ |
case _NONE_: |
break; |
case _REG0_: |
st0_ptr = &st(0); /* Some of these instructions pop after |
storing */ |
break; |
case _PUSH_: |
{ |
st0_ptr = &st(-1); |
if ( st0_ptr->tag != TW_Empty ) |
{ stack_overflow(); return 0; } |
top--; |
} |
break; |
case _null_: |
FPU_illegal(); |
return 0; |
#ifdef PARANOID |
default: |
EXCEPTION(EX_INTERNAL|0x141); |
return 0; |
#endif PARANOID |
} |
|
switch ( type ) |
{ |
case 000: /* fld m32real */ |
clear_C1(); |
reg_load_single((float *)data_address, &loaded_data); |
if ( (loaded_data.tag == TW_NaN) && |
real_2op_NaN(&loaded_data, &loaded_data, &loaded_data) ) |
{ |
top++; |
break; |
} |
reg_move(&loaded_data, st0_ptr); |
break; |
case 001: /* fild m32int */ |
clear_C1(); |
reg_load_int32((long *)data_address, st0_ptr); |
break; |
case 002: /* fld m64real */ |
clear_C1(); |
reg_load_double((double *)data_address, &loaded_data); |
if ( (loaded_data.tag == TW_NaN) && |
real_2op_NaN(&loaded_data, &loaded_data, &loaded_data) ) |
{ |
top++; |
break; |
} |
reg_move(&loaded_data, st0_ptr); |
break; |
case 003: /* fild m16int */ |
clear_C1(); |
reg_load_int16((short *)data_address, st0_ptr); |
break; |
case 010: /* fst m32real */ |
clear_C1(); |
reg_store_single((float *)data_address, st0_ptr); |
break; |
case 011: /* fist m32int */ |
clear_C1(); |
reg_store_int32((long *)data_address, st0_ptr); |
break; |
case 012: /* fst m64real */ |
clear_C1(); |
reg_store_double((double *)data_address, st0_ptr); |
break; |
case 013: /* fist m16int */ |
clear_C1(); |
reg_store_int16((short *)data_address, st0_ptr); |
break; |
case 014: /* fstp m32real */ |
clear_C1(); |
if ( reg_store_single((float *)data_address, st0_ptr) ) |
pop_0(); /* pop only if the number was actually stored |
(see the 80486 manual p16-28) */ |
break; |
case 015: /* fistp m32int */ |
clear_C1(); |
if ( reg_store_int32((long *)data_address, st0_ptr) ) |
pop_0(); /* pop only if the number was actually stored |
(see the 80486 manual p16-28) */ |
break; |
case 016: /* fstp m64real */ |
clear_C1(); |
if ( reg_store_double((double *)data_address, st0_ptr) ) |
pop_0(); /* pop only if the number was actually stored |
(see the 80486 manual p16-28) */ |
break; |
case 017: /* fistp m16int */ |
clear_C1(); |
if ( reg_store_int16((short *)data_address, st0_ptr) ) |
pop_0(); /* pop only if the number was actually stored |
(see the 80486 manual p16-28) */ |
break; |
case 020: /* fldenv m14/28byte */ |
fldenv(addr_modes, (char *)data_address); |
/* Ensure that the values just loaded are not changed by |
fix-up operations. */ |
return 1; |
case 022: /* frstor m94/108byte */ |
frstor(addr_modes, (char *)data_address); |
/* Ensure that the values just loaded are not changed by |
fix-up operations. */ |
return 1; |
case 023: /* fbld m80dec */ |
clear_C1(); |
reg_load_bcd((char *)data_address, st0_ptr); |
break; |
case 024: /* fldcw */ |
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_READ, data_address, 2); |
control_word = get_fs_word((unsigned short *) data_address); |
RE_ENTRANT_CHECK_ON; |
if ( partial_status & ~control_word & CW_Exceptions ) |
partial_status |= (SW_Summary | SW_Backward); |
else |
partial_status &= ~(SW_Summary | SW_Backward); |
#ifdef PECULIAR_486 |
control_word |= 0x40; /* An 80486 appears to always set this bit */ |
#endif PECULIAR_486 |
return 1; |
case 025: /* fld m80real */ |
clear_C1(); |
reg_load_extended((long double *)data_address, st0_ptr); |
break; |
case 027: /* fild m64int */ |
clear_C1(); |
reg_load_int64((long long *)data_address, st0_ptr); |
break; |
case 030: /* fstenv m14/28byte */ |
fstenv(addr_modes, (char *)data_address); |
return 1; |
case 032: /* fsave */ |
fsave(addr_modes, (char *)data_address); |
return 1; |
case 033: /* fbstp m80dec */ |
clear_C1(); |
if ( reg_store_bcd((char *)data_address, st0_ptr) ) |
pop_0(); /* pop only if the number was actually stored |
(see the 80486 manual p16-28) */ |
break; |
case 034: /* fstcw m16int */ |
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_WRITE,data_address,2); |
put_fs_word(control_word, (short *) data_address); |
RE_ENTRANT_CHECK_ON; |
return 1; |
case 035: /* fstp m80real */ |
clear_C1(); |
if ( reg_store_extended((long double *)data_address, st0_ptr) ) |
pop_0(); /* pop only if the number was actually stored |
(see the 80486 manual p16-28) */ |
break; |
case 036: /* fstsw m2byte */ |
RE_ENTRANT_CHECK_OFF; |
FPU_verify_area(VERIFY_WRITE,data_address,2); |
put_fs_word(status_word(),(short *) data_address); |
RE_ENTRANT_CHECK_ON; |
return 1; |
case 037: /* fistp m64int */ |
clear_C1(); |
if ( reg_store_int64((long long *)data_address, st0_ptr) ) |
pop_0(); /* pop only if the number was actually stored |
(see the 80486 manual p16-28) */ |
break; |
} |
return 0; |
} |
/poly_sin.c
0,0 → 1,408
/*---------------------------------------------------------------------------+ |
| poly_sin.c | |
| | |
| Computation of an approximation of the sin function and the cosine | |
| function by a polynomial. | |
| | |
| Copyright (C) 1992,1993,1994 | |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| Australia. E-mail billm@vaxc.cc.monash.edu.au | |
| | |
| | |
+---------------------------------------------------------------------------*/ |
|
|
#include "exception.h" |
#include "reg_constant.h" |
#include "fpu_emu.h" |
#include "control_w.h" |
#include "poly.h" |
|
|
#define N_COEFF_P 4 |
#define N_COEFF_N 4 |
|
static const unsigned long long pos_terms_l[N_COEFF_P] = |
{ |
0xaaaaaaaaaaaaaaabLL, |
0x00d00d00d00cf906LL, |
0x000006b99159a8bbLL, |
0x000000000d7392e6LL |
}; |
|
static const unsigned long long neg_terms_l[N_COEFF_N] = |
{ |
0x2222222222222167LL, |
0x0002e3bc74aab624LL, |
0x0000000b09229062LL, |
0x00000000000c7973LL |
}; |
|
|
|
#define N_COEFF_PH 4 |
#define N_COEFF_NH 4 |
static const unsigned long long pos_terms_h[N_COEFF_PH] = |
{ |
0x0000000000000000LL, |
0x05b05b05b05b0406LL, |
0x000049f93edd91a9LL, |
0x00000000c9c9ed62LL |
}; |
|
static const unsigned long long neg_terms_h[N_COEFF_NH] = |
{ |
0xaaaaaaaaaaaaaa98LL, |
0x001a01a01a019064LL, |
0x0000008f76c68a77LL, |
0x0000000000d58f5eLL |
}; |
|
|
/*--- poly_sine() -----------------------------------------------------------+ |
| | |
+---------------------------------------------------------------------------*/ |
void poly_sine(FPU_REG const *arg, FPU_REG *result) |
{ |
int exponent, echange; |
Xsig accumulator, argSqrd, argTo4; |
unsigned long fix_up, adj; |
unsigned long long fixed_arg; |
|
|
#ifdef PARANOID |
if ( arg->tag == TW_Zero ) |
{ |
/* Return 0.0 */ |
reg_move(&CONST_Z, result); |
return; |
} |
#endif PARANOID |
|
exponent = arg->exp - EXP_BIAS; |
|
accumulator.lsw = accumulator.midw = accumulator.msw = 0; |
|
/* Split into two ranges, for arguments below and above 1.0 */ |
/* The boundary between upper and lower is approx 0.88309101259 */ |
if ( (exponent < -1) || ((exponent == -1) && (arg->sigh <= 0xe21240aa)) ) |
{ |
/* The argument is <= 0.88309101259 */ |
|
argSqrd.msw = arg->sigh; argSqrd.midw = arg->sigl; argSqrd.lsw = 0; |
mul64_Xsig(&argSqrd, &significand(arg)); |
shr_Xsig(&argSqrd, 2*(-1-exponent)); |
argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; |
argTo4.lsw = argSqrd.lsw; |
mul_Xsig_Xsig(&argTo4, &argTo4); |
|
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, |
N_COEFF_N-1); |
mul_Xsig_Xsig(&accumulator, &argSqrd); |
negate_Xsig(&accumulator); |
|
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, |
N_COEFF_P-1); |
|
shr_Xsig(&accumulator, 2); /* Divide by four */ |
accumulator.msw |= 0x80000000; /* Add 1.0 */ |
|
mul64_Xsig(&accumulator, &significand(arg)); |
mul64_Xsig(&accumulator, &significand(arg)); |
mul64_Xsig(&accumulator, &significand(arg)); |
|
/* Divide by four, FPU_REG compatible, etc */ |
exponent = 3*exponent + EXP_BIAS; |
|
/* The minimum exponent difference is 3 */ |
shr_Xsig(&accumulator, arg->exp - exponent); |
|
negate_Xsig(&accumulator); |
XSIG_LL(accumulator) += significand(arg); |
|
echange = round_Xsig(&accumulator); |
|
result->exp = arg->exp + echange; |
} |
else |
{ |
/* The argument is > 0.88309101259 */ |
/* We use sin(arg) = cos(pi/2-arg) */ |
|
fixed_arg = significand(arg); |
|
if ( exponent == 0 ) |
{ |
/* The argument is >= 1.0 */ |
|
/* Put the binary point at the left. */ |
fixed_arg <<= 1; |
} |
/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ |
fixed_arg = 0x921fb54442d18469LL - fixed_arg; |
|
XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; |
mul64_Xsig(&argSqrd, &fixed_arg); |
|
XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw; |
mul_Xsig_Xsig(&argTo4, &argTo4); |
|
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, |
N_COEFF_NH-1); |
mul_Xsig_Xsig(&accumulator, &argSqrd); |
negate_Xsig(&accumulator); |
|
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, |
N_COEFF_PH-1); |
negate_Xsig(&accumulator); |
|
mul64_Xsig(&accumulator, &fixed_arg); |
mul64_Xsig(&accumulator, &fixed_arg); |
|
shr_Xsig(&accumulator, 3); |
negate_Xsig(&accumulator); |
|
add_Xsig_Xsig(&accumulator, &argSqrd); |
|
shr_Xsig(&accumulator, 1); |
|
accumulator.lsw |= 1; /* A zero accumulator here would cause problems */ |
negate_Xsig(&accumulator); |
|
/* The basic computation is complete. Now fix the answer to |
compensate for the error due to the approximation used for |
pi/2 |
*/ |
|
/* This has an exponent of -65 */ |
fix_up = 0x898cc517; |
/* The fix-up needs to be improved for larger args */ |
if ( argSqrd.msw & 0xffc00000 ) |
{ |
/* Get about 32 bit precision in these: */ |
mul_32_32(0x898cc517, argSqrd.msw, &adj); |
fix_up -= adj/6; |
} |
mul_32_32(fix_up, LL_MSW(fixed_arg), &fix_up); |
|
adj = accumulator.lsw; /* temp save */ |
accumulator.lsw -= fix_up; |
if ( accumulator.lsw > adj ) |
XSIG_LL(accumulator) --; |
|
echange = round_Xsig(&accumulator); |
|
result->exp = EXP_BIAS - 1 + echange; |
} |
|
significand(result) = XSIG_LL(accumulator); |
result->tag = TW_Valid; |
result->sign = arg->sign; |
|
#ifdef PARANOID |
if ( (result->exp >= EXP_BIAS) |
&& (significand(result) > 0x8000000000000000LL) ) |
{ |
EXCEPTION(EX_INTERNAL|0x150); |
} |
#endif PARANOID |
|
} |
|
|
|
/*--- poly_cos() ------------------------------------------------------------+ |
| | |
+---------------------------------------------------------------------------*/ |
void poly_cos(FPU_REG const *arg, FPU_REG *result) |
{ |
long int exponent, exp2, echange; |
Xsig accumulator, argSqrd, fix_up, argTo4; |
unsigned long adj; |
unsigned long long fixed_arg; |
|
|
#ifdef PARANOID |
if ( arg->tag == TW_Zero ) |
{ |
/* Return 1.0 */ |
reg_move(&CONST_1, result); |
return; |
} |
|
if ( (arg->exp > EXP_BIAS) |
|| ((arg->exp == EXP_BIAS) |
&& (significand(arg) > 0xc90fdaa22168c234LL)) ) |
{ |
EXCEPTION(EX_Invalid); |
reg_move(&CONST_QNaN, result); |
return; |
} |
#endif PARANOID |
|
exponent = arg->exp - EXP_BIAS; |
|
accumulator.lsw = accumulator.midw = accumulator.msw = 0; |
|
if ( (exponent < -1) || ((exponent == -1) && (arg->sigh <= 0xb00d6f54)) ) |
{ |
/* arg is < 0.687705 */ |
|
argSqrd.msw = arg->sigh; argSqrd.midw = arg->sigl; argSqrd.lsw = 0; |
mul64_Xsig(&argSqrd, &significand(arg)); |
|
if ( exponent < -1 ) |
{ |
/* shift the argument right by the required places */ |
shr_Xsig(&argSqrd, 2*(-1-exponent)); |
} |
|
argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; |
argTo4.lsw = argSqrd.lsw; |
mul_Xsig_Xsig(&argTo4, &argTo4); |
|
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, |
N_COEFF_NH-1); |
mul_Xsig_Xsig(&accumulator, &argSqrd); |
negate_Xsig(&accumulator); |
|
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, |
N_COEFF_PH-1); |
negate_Xsig(&accumulator); |
|
mul64_Xsig(&accumulator, &significand(arg)); |
mul64_Xsig(&accumulator, &significand(arg)); |
shr_Xsig(&accumulator, -2*(1+exponent)); |
|
shr_Xsig(&accumulator, 3); |
negate_Xsig(&accumulator); |
|
add_Xsig_Xsig(&accumulator, &argSqrd); |
|
shr_Xsig(&accumulator, 1); |
|
/* It doesn't matter if accumulator is all zero here, the |
following code will work ok */ |
negate_Xsig(&accumulator); |
|
if ( accumulator.lsw & 0x80000000 ) |
XSIG_LL(accumulator) ++; |
if ( accumulator.msw == 0 ) |
{ |
/* The result is 1.0 */ |
reg_move(&CONST_1, result); |
} |
else |
{ |
significand(result) = XSIG_LL(accumulator); |
|
/* will be a valid positive nr with expon = -1 */ |
*(short *)&(result->sign) = 0; |
result->exp = EXP_BIAS - 1; |
} |
} |
else |
{ |
fixed_arg = significand(arg); |
|
if ( exponent == 0 ) |
{ |
/* The argument is >= 1.0 */ |
|
/* Put the binary point at the left. */ |
fixed_arg <<= 1; |
} |
/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ |
fixed_arg = 0x921fb54442d18469LL - fixed_arg; |
|
exponent = -1; |
exp2 = -1; |
|
/* A shift is needed here only for a narrow range of arguments, |
i.e. for fixed_arg approx 2^-32, but we pick up more... */ |
if ( !(LL_MSW(fixed_arg) & 0xffff0000) ) |
{ |
fixed_arg <<= 16; |
exponent -= 16; |
exp2 -= 16; |
} |
|
XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; |
mul64_Xsig(&argSqrd, &fixed_arg); |
|
if ( exponent < -1 ) |
{ |
/* shift the argument right by the required places */ |
shr_Xsig(&argSqrd, 2*(-1-exponent)); |
} |
|
argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; |
argTo4.lsw = argSqrd.lsw; |
mul_Xsig_Xsig(&argTo4, &argTo4); |
|
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, |
N_COEFF_N-1); |
mul_Xsig_Xsig(&accumulator, &argSqrd); |
negate_Xsig(&accumulator); |
|
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, |
N_COEFF_P-1); |
|
shr_Xsig(&accumulator, 2); /* Divide by four */ |
accumulator.msw |= 0x80000000; /* Add 1.0 */ |
|
mul64_Xsig(&accumulator, &fixed_arg); |
mul64_Xsig(&accumulator, &fixed_arg); |
mul64_Xsig(&accumulator, &fixed_arg); |
|
/* Divide by four, FPU_REG compatible, etc */ |
exponent = 3*exponent; |
|
/* The minimum exponent difference is 3 */ |
shr_Xsig(&accumulator, exp2 - exponent); |
|
negate_Xsig(&accumulator); |
XSIG_LL(accumulator) += fixed_arg; |
|
/* The basic computation is complete. Now fix the answer to |
compensate for the error due to the approximation used for |
pi/2 |
*/ |
|
/* This has an exponent of -65 */ |
XSIG_LL(fix_up) = 0x898cc51701b839a2ll; |
fix_up.lsw = 0; |
|
/* The fix-up needs to be improved for larger args */ |
if ( argSqrd.msw & 0xffc00000 ) |
{ |
/* Get about 32 bit precision in these: */ |
mul_32_32(0x898cc517, argSqrd.msw, &adj); |
fix_up.msw -= adj/2; |
mul_32_32(0x898cc517, argTo4.msw, &adj); |
fix_up.msw += adj/24; |
} |
|
exp2 += norm_Xsig(&accumulator); |
shr_Xsig(&accumulator, 1); /* Prevent overflow */ |
exp2++; |
shr_Xsig(&fix_up, 65 + exp2); |
|
add_Xsig_Xsig(&accumulator, &fix_up); |
|
echange = round_Xsig(&accumulator); |
|
result->exp = exp2 + EXP_BIAS + echange; |
*(short *)&(result->sign) = 0; /* Is a valid positive nr */ |
significand(result) = XSIG_LL(accumulator); |
} |
|
#ifdef PARANOID |
if ( (result->exp >= EXP_BIAS) |
&& (significand(result) > 0x8000000000000000LL) ) |
{ |
EXCEPTION(EX_INTERNAL|0x151); |
} |
#endif PARANOID |
|
} |