OpenCores
URL https://opencores.org/ocsvn/or1k_old/or1k_old/trunk

Subversion Repositories or1k_old

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /or1k_old/trunk/rc203soc/sw/uClinux/arch/i386/math-emu
    from Rev 1765 to Rev 1782
    Reverse comparison

Rev 1765 → Rev 1782

/reg_add_sub.c
0,0 → 1,318
/*---------------------------------------------------------------------------+
| reg_add_sub.c |
| |
| Functions to add or subtract two registers and put the result in a third. |
| |
| Copyright (C) 1992,1993 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
/*---------------------------------------------------------------------------+
| For each function, the destination may be any FPU_REG, including one of |
| the source FPU_REGs. |
+---------------------------------------------------------------------------*/
 
#include "exception.h"
#include "reg_constant.h"
#include "fpu_emu.h"
#include "control_w.h"
#include "fpu_system.h"
 
 
int reg_add(FPU_REG const *a, FPU_REG const *b, FPU_REG *dest, int control_w)
{
char saved_sign = dest->sign;
int diff;
if ( !(a->tag | b->tag) )
{
/* Both registers are valid */
if (!(a->sign ^ b->sign))
{
/* signs are the same */
dest->sign = a->sign;
if ( reg_u_add(a, b, dest, control_w) )
{
dest->sign = saved_sign;
return 1;
}
return 0;
}
/* The signs are different, so do a subtraction */
diff = a->exp - b->exp;
if (!diff)
{
diff = a->sigh - b->sigh; /* Works only if ms bits are identical */
if (!diff)
{
diff = a->sigl > b->sigl;
if (!diff)
diff = -(a->sigl < b->sigl);
}
}
if (diff > 0)
{
dest->sign = a->sign;
if ( reg_u_sub(a, b, dest, control_w) )
{
dest->sign = saved_sign;
return 1;
}
}
else if ( diff == 0 )
{
#ifdef DENORM_OPERAND
if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) &&
denormal_operand() )
return 1;
#endif DENORM_OPERAND
reg_move(&CONST_Z, dest);
/* sign depends upon rounding mode */
dest->sign = ((control_w & CW_RC) != RC_DOWN)
? SIGN_POS : SIGN_NEG;
}
else
{
dest->sign = b->sign;
if ( reg_u_sub(b, a, dest, control_w) )
{
dest->sign = saved_sign;
return 1;
}
}
return 0;
}
else
{
if ( (a->tag == TW_NaN) || (b->tag == TW_NaN) )
{ return real_2op_NaN(a, b, dest); }
else if (a->tag == TW_Zero)
{
if (b->tag == TW_Zero)
{
char different_signs = a->sign ^ b->sign;
/* Both are zero, result will be zero. */
reg_move(a, dest);
if (different_signs)
{
/* Signs are different. */
/* Sign of answer depends upon rounding mode. */
dest->sign = ((control_w & CW_RC) != RC_DOWN)
? SIGN_POS : SIGN_NEG;
}
}
else
{
#ifdef DENORM_OPERAND
if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) &&
denormal_operand() )
return 1;
#endif DENORM_OPERAND
reg_move(b, dest);
}
return 0;
}
else if (b->tag == TW_Zero)
{
#ifdef DENORM_OPERAND
if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) &&
denormal_operand() )
return 1;
#endif DENORM_OPERAND
reg_move(a, dest); return 0;
}
else if (a->tag == TW_Infinity)
{
if (b->tag != TW_Infinity)
{
#ifdef DENORM_OPERAND
if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) &&
denormal_operand() )
return 1;
#endif DENORM_OPERAND
reg_move(a, dest); return 0;
}
if (a->sign == b->sign)
{
/* They are both + or - infinity */
reg_move(a, dest); return 0;
}
return arith_invalid(dest); /* Infinity-Infinity is undefined. */
}
else if (b->tag == TW_Infinity)
{
#ifdef DENORM_OPERAND
if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) &&
denormal_operand() )
return 1;
#endif DENORM_OPERAND
reg_move(b, dest); return 0;
}
}
#ifdef PARANOID
EXCEPTION(EX_INTERNAL|0x101);
#endif
return 1;
}
 
 
/* Subtract b from a. (a-b) -> dest */
int reg_sub(FPU_REG const *a, FPU_REG const *b, FPU_REG *dest, int control_w)
{
char saved_sign = dest->sign;
int diff;
 
if ( !(a->tag | b->tag) )
{
/* Both registers are valid */
diff = a->exp - b->exp;
if (!diff)
{
diff = a->sigh - b->sigh; /* Works only if ms bits are identical */
if (!diff)
{
diff = a->sigl > b->sigl;
if (!diff)
diff = -(a->sigl < b->sigl);
}
}
 
switch (a->sign*2 + b->sign)
{
case 0: /* P - P */
case 3: /* N - N */
if (diff > 0)
{
/* |a| > |b| */
dest->sign = a->sign;
if ( reg_u_sub(a, b, dest, control_w) )
{
dest->sign = saved_sign;
return 1;
}
return 0;
}
else if ( diff == 0 )
{
#ifdef DENORM_OPERAND
if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) &&
denormal_operand() )
return 1;
#endif DENORM_OPERAND
reg_move(&CONST_Z, dest);
/* sign depends upon rounding mode */
dest->sign = ((control_w & CW_RC) != RC_DOWN)
? SIGN_POS : SIGN_NEG;
}
else
{
dest->sign = a->sign ^ SIGN_POS^SIGN_NEG;
if ( reg_u_sub(b, a, dest, control_w) )
{
dest->sign = saved_sign;
return 1;
}
}
break;
case 1: /* P - N */
dest->sign = SIGN_POS;
if ( reg_u_add(a, b, dest, control_w) )
{
dest->sign = saved_sign;
return 1;
}
break;
case 2: /* N - P */
dest->sign = SIGN_NEG;
if ( reg_u_add(a, b, dest, control_w) )
{
dest->sign = saved_sign;
return 1;
}
break;
}
return 0;
}
else
{
if ( (a->tag == TW_NaN) || (b->tag == TW_NaN) )
{ return real_2op_NaN(b, a, dest); }
else if (b->tag == TW_Zero)
{
if (a->tag == TW_Zero)
{
char same_signs = !(a->sign ^ b->sign);
/* Both are zero, result will be zero. */
reg_move(a, dest); /* Answer for different signs. */
if (same_signs)
{
/* Sign depends upon rounding mode */
dest->sign = ((control_w & CW_RC) != RC_DOWN)
? SIGN_POS : SIGN_NEG;
}
}
else
{
#ifdef DENORM_OPERAND
if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) &&
denormal_operand() )
return 1;
#endif DENORM_OPERAND
reg_move(a, dest);
}
return 0;
}
else if (a->tag == TW_Zero)
{
#ifdef DENORM_OPERAND
if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) &&
denormal_operand() )
return 1;
#endif DENORM_OPERAND
reg_move(b, dest);
dest->sign ^= SIGN_POS^SIGN_NEG;
return 0;
}
else if (a->tag == TW_Infinity)
{
if (b->tag != TW_Infinity)
{
#ifdef DENORM_OPERAND
if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) &&
denormal_operand() )
return 1;
#endif DENORM_OPERAND
reg_move(a, dest); return 0;
}
/* Both args are Infinity */
if (a->sign == b->sign)
{
/* Infinity-Infinity is undefined. */
return arith_invalid(dest);
}
reg_move(a, dest);
return 0;
}
else if (b->tag == TW_Infinity)
{
#ifdef DENORM_OPERAND
if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) &&
denormal_operand() )
return 1;
#endif DENORM_OPERAND
reg_move(b, dest);
dest->sign ^= SIGN_POS^SIGN_NEG;
return 0;
}
}
#ifdef PARANOID
EXCEPTION(EX_INTERNAL|0x110);
#endif
return 1;
}
 
/reg_u_mul.S
0,0 → 1,160
.file "reg_u_mul.S"
/*---------------------------------------------------------------------------+
| reg_u_mul.S |
| |
| Core multiplication routine |
| |
| Copyright (C) 1992,1993,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
/*---------------------------------------------------------------------------+
| Basic multiplication routine. |
| Does not check the resulting exponent for overflow/underflow |
| |
| reg_u_mul(FPU_REG *a, FPU_REG *b, FPU_REG *c, unsigned int cw); |
| |
| Internal working is at approx 128 bits. |
| Result is rounded to nearest 53 or 64 bits, using "nearest or even". |
+---------------------------------------------------------------------------*/
 
#include "exception.h"
#include "fpu_emu.h"
#include "control_w.h"
 
 
 
#ifndef NON_REENTRANT_FPU
/* Local storage on the stack: */
#define FPU_accum_0 -4(%ebp) /* ms word */
#define FPU_accum_1 -8(%ebp)
 
#else
/* Local storage in a static area: */
.data
.align 4,0
FPU_accum_0:
.long 0
FPU_accum_1:
.long 0
#endif NON_REENTRANT_FPU
 
 
.text
ENTRY(reg_u_mul)
pushl %ebp
movl %esp,%ebp
#ifndef NON_REENTRANT_FPU
subl $8,%esp
#endif NON_REENTRANT_FPU
 
pushl %esi
pushl %edi
pushl %ebx
 
movl PARAM1,%esi
movl PARAM2,%edi
 
#ifdef PARANOID
testl $0x80000000,SIGH(%esi)
jz L_bugged
testl $0x80000000,SIGH(%edi)
jz L_bugged
#endif PARANOID
 
#ifdef DENORM_OPERAND
movl EXP(%esi),%eax
cmpl EXP_UNDER,%eax
jg xOp1_not_denorm
 
call SYMBOL_NAME(denormal_operand)
orl %eax,%eax
jnz fpu_Arith_exit
 
xOp1_not_denorm:
movl EXP(%edi),%eax
cmpl EXP_UNDER,%eax
jg xOp2_not_denorm
 
call SYMBOL_NAME(denormal_operand)
orl %eax,%eax
jnz fpu_Arith_exit
 
xOp2_not_denorm:
#endif DENORM_OPERAND
 
xorl %ecx,%ecx
xorl %ebx,%ebx
 
movl SIGL(%esi),%eax
mull SIGL(%edi)
movl %eax,FPU_accum_0
movl %edx,FPU_accum_1
 
movl SIGL(%esi),%eax
mull SIGH(%edi)
addl %eax,FPU_accum_1
adcl %edx,%ebx
/* adcl $0,%ecx // overflow here is not possible */
 
movl SIGH(%esi),%eax
mull SIGL(%edi)
addl %eax,FPU_accum_1
adcl %edx,%ebx
adcl $0,%ecx
 
movl SIGH(%esi),%eax
mull SIGH(%edi)
addl %eax,%ebx
adcl %edx,%ecx
 
movl EXP(%esi),%eax /* Compute the exponent */
addl EXP(%edi),%eax
subl EXP_BIAS-1,%eax
 
/* Have now finished with the sources */
movl PARAM3,%edi /* Point to the destination */
movl %eax,EXP(%edi)
 
/* Now make sure that the result is normalized */
testl $0x80000000,%ecx
jnz LResult_Normalised
 
/* Normalize by shifting left one bit */
shll $1,FPU_accum_0
rcll $1,FPU_accum_1
rcll $1,%ebx
rcll $1,%ecx
decl EXP(%edi)
 
LResult_Normalised:
movl FPU_accum_0,%eax
movl FPU_accum_1,%edx
orl %eax,%eax
jz L_extent_zero
 
orl $1,%edx
 
L_extent_zero:
movl %ecx,%eax
jmp fpu_reg_round
 
 
#ifdef PARANOID
L_bugged:
pushl EX_INTERNAL|0x205
call EXCEPTION
pop %ebx
jmp L_exit
 
L_exit:
popl %ebx
popl %edi
popl %esi
leave
ret
#endif PARANOID
 
/reg_div.S
0,0 → 1,248
.file "reg_div.S"
/*---------------------------------------------------------------------------+
| reg_div.S |
| |
| Divide one FPU_REG by another and put the result in a destination FPU_REG.|
| |
| Copyright (C) 1992,1993,1994,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| Call from C as: |
| void reg_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest, |
| unsigned int control_word) |
| |
+---------------------------------------------------------------------------*/
 
#include "exception.h"
#include "fpu_emu.h"
 
 
.text
ENTRY(reg_div)
pushl %ebp
movl %esp,%ebp
#ifndef NON_REENTRANT_FPU
subl $28,%esp /* Needed by divide_kernel */
#endif NON_REENTRANT_FPU
 
pushl %esi
pushl %edi
pushl %ebx
 
movl PARAM1,%esi
movl PARAM2,%ebx
movl PARAM3,%edi
 
movb TAG(%esi),%al
orb TAG(%ebx),%al
 
jne L_div_special /* Not (both numbers TW_Valid) */
 
#ifdef DENORM_OPERAND
/* Check for denormals */
cmpl EXP_UNDER,EXP(%esi)
jg xL_arg1_not_denormal
 
call SYMBOL_NAME(denormal_operand)
orl %eax,%eax
jnz fpu_Arith_exit
 
xL_arg1_not_denormal:
cmpl EXP_UNDER,EXP(%ebx)
jg xL_arg2_not_denormal
 
call SYMBOL_NAME(denormal_operand)
orl %eax,%eax
jnz fpu_Arith_exit
 
xL_arg2_not_denormal:
#endif DENORM_OPERAND
 
/* Both arguments are TW_Valid */
movb TW_Valid,TAG(%edi)
 
movb SIGN(%esi),%cl
cmpb %cl,SIGN(%ebx)
setne (%edi) /* Set the sign, requires SIGN_NEG=1, SIGN_POS=0 */
 
movl EXP(%esi),%edx
movl EXP(%ebx),%eax
subl %eax,%edx
addl EXP_BIAS,%edx
movl %edx,EXP(%edi)
 
jmp SYMBOL_NAME(divide_kernel)
 
 
/*-----------------------------------------------------------------------*/
L_div_special:
cmpb TW_NaN,TAG(%esi) /* A NaN with anything to give NaN */
je L_arg1_NaN
 
cmpb TW_NaN,TAG(%ebx) /* A NaN with anything to give NaN */
jne L_no_NaN_arg
 
/* Operations on NaNs */
L_arg1_NaN:
L_arg2_NaN:
pushl %edi /* Destination */
pushl %esi
pushl %ebx /* Ordering is important here */
call SYMBOL_NAME(real_2op_NaN)
jmp LDiv_exit
 
/* Invalid operations */
L_zero_zero:
L_inf_inf:
pushl %edi /* Destination */
call SYMBOL_NAME(arith_invalid) /* 0/0 or Infinity/Infinity */
jmp LDiv_exit
 
L_no_NaN_arg:
cmpb TW_Infinity,TAG(%esi)
jne L_arg1_not_inf
 
cmpb TW_Infinity,TAG(%ebx)
je L_inf_inf /* invalid operation */
 
cmpb TW_Valid,TAG(%ebx)
je L_inf_valid
 
#ifdef PARANOID
/* arg2 must be zero or valid */
cmpb TW_Zero,TAG(%ebx)
ja L_unknown_tags
#endif PARANOID
 
/* Note that p16-9 says that infinity/0 returns infinity */
jmp L_copy_arg1 /* Answer is Inf */
 
L_inf_valid:
#ifdef DENORM_OPERAND
cmpl EXP_UNDER,EXP(%ebx)
jg L_copy_arg1 /* Answer is Inf */
 
call SYMBOL_NAME(denormal_operand)
orl %eax,%eax
jnz fpu_Arith_exit
#endif DENORM_OPERAND
 
jmp L_copy_arg1 /* Answer is Inf */
 
L_arg1_not_inf:
cmpb TW_Zero,TAG(%ebx) /* Priority to div-by-zero error */
jne L_arg2_not_zero
 
cmpb TW_Zero,TAG(%esi)
je L_zero_zero /* invalid operation */
 
#ifdef PARANOID
/* arg1 must be valid */
cmpb TW_Valid,TAG(%esi)
ja L_unknown_tags
#endif PARANOID
 
/* Division by zero error */
pushl %edi /* destination */
movb SIGN(%esi),%al
xorb SIGN(%ebx),%al
pushl %eax /* lower 8 bits have the sign */
call SYMBOL_NAME(divide_by_zero)
jmp LDiv_exit
 
L_arg2_not_zero:
cmpb TW_Infinity,TAG(%ebx)
jne L_arg2_not_inf
 
#ifdef DENORM_OPERAND
cmpb TW_Valid,TAG(%esi)
jne L_return_zero
 
cmpl EXP_UNDER,EXP(%esi)
jg L_return_zero /* Answer is zero */
 
call SYMBOL_NAME(denormal_operand)
orl %eax,%eax
jnz fpu_Arith_exit
#endif DENORM_OPERAND
 
jmp L_return_zero /* Answer is zero */
 
L_arg2_not_inf:
 
#ifdef PARANOID
cmpb TW_Zero,TAG(%esi)
jne L_unknown_tags
#endif PARANOID
 
/* arg1 is zero, arg2 is not Infinity or a NaN */
 
#ifdef DENORM_OPERAND
cmpl EXP_UNDER,EXP(%ebx)
jg L_copy_arg1 /* Answer is zero */
 
call SYMBOL_NAME(denormal_operand)
orl %eax,%eax
jnz fpu_Arith_exit
#endif DENORM_OPERAND
 
L_copy_arg1:
movb TAG(%esi),%ax
movb %ax,TAG(%edi)
movl EXP(%esi),%eax
movl %eax,EXP(%edi)
movl SIGL(%esi),%eax
movl %eax,SIGL(%edi)
movl SIGH(%esi),%eax
movl %eax,SIGH(%edi)
 
LDiv_set_result_sign:
movb SIGN(%esi),%cl
cmpb %cl,SIGN(%ebx)
jne LDiv_negative_result
 
movb SIGN_POS,SIGN(%edi)
xorl %eax,%eax /* Valid result */
jmp LDiv_exit
 
LDiv_negative_result:
movb SIGN_NEG,SIGN(%edi)
xorl %eax,%eax /* Valid result */
 
LDiv_exit:
#ifndef NON_REENTRANT_FPU
leal -40(%ebp),%esp
#else
leal -12(%ebp),%esp
#endif NON_REENTRANT_FPU
 
popl %ebx
popl %edi
popl %esi
leave
ret
 
 
L_return_zero:
xorl %eax,%eax
movl %eax,SIGH(%edi)
movl %eax,SIGL(%edi)
movl EXP_UNDER,EXP(%edi)
movb TW_Zero,TAG(%edi)
jmp LDiv_set_result_sign
 
#ifdef PARANOID
L_unknown_tags:
pushl EX_INTERNAL | 0x208
call EXCEPTION
 
/* Generate a NaN for unknown tags */
movl SYMBOL_NAME(CONST_QNaN),%eax
movl %eax,(%edi)
movl SYMBOL_NAME(CONST_QNaN)+4,%eax
movl %eax,SIGL(%edi)
movl SYMBOL_NAME(CONST_QNaN)+8,%eax
movl %eax,SIGH(%edi)
jmp LDiv_exit /* %eax is nz */
#endif PARANOID
/fpu_asm.h
0,0 → 1,31
/*---------------------------------------------------------------------------+
| fpu_asm.h |
| |
| Copyright (C) 1992,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
+---------------------------------------------------------------------------*/
 
#ifndef _FPU_ASM_H_
#define _FPU_ASM_H_
 
#include <linux/linkage.h>
 
#define EXCEPTION SYMBOL_NAME(exception)
 
 
#define PARAM1 8(%ebp)
#define PARAM2 12(%ebp)
#define PARAM3 16(%ebp)
#define PARAM4 20(%ebp)
 
#define SIGL_OFFSET 8
#define SIGN(x) (x)
#define TAG(x) 1(x)
#define EXP(x) 4(x)
#define SIG(x) SIGL_OFFSET##(x)
#define SIGL(x) SIGL_OFFSET##(x)
#define SIGH(x) 12(x)
 
#endif _FPU_ASM_H_
/fpu_emu.h
0,0 → 1,173
/*---------------------------------------------------------------------------+
| fpu_emu.h |
| |
| Copyright (C) 1992,1993,1994 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
+---------------------------------------------------------------------------*/
 
 
#ifndef _FPU_EMU_H_
#define _FPU_EMU_H_
 
/*
* Define DENORM_OPERAND to make the emulator detect denormals
* and use the denormal flag of the status word. Note: this only
* affects the flag and corresponding interrupt, the emulator
* will always generate denormals and operate upon them as required.
*/
#define DENORM_OPERAND
 
/*
* Define PECULIAR_486 to get a closer approximation to 80486 behaviour,
* rather than behaviour which appears to be cleaner.
* This is a matter of opinion: for all I know, the 80486 may simply
* be complying with the IEEE spec. Maybe one day I'll get to see the
* spec...
*/
#define PECULIAR_486
 
#ifdef __ASSEMBLY__
#include "fpu_asm.h"
#define Const(x) $##x
#else
#define Const(x) x
#endif
 
#define EXP_BIAS Const(0)
#define EXP_OVER Const(0x4000) /* smallest invalid large exponent */
#define EXP_UNDER Const(-0x3fff) /* largest invalid small exponent */
#define EXP_Infinity EXP_OVER
#define EXP_NaN EXP_OVER
 
#define SIGN_POS Const(0)
#define SIGN_NEG Const(1)
 
/* Keep the order TW_Valid, TW_Zero, TW_Denormal */
#define TW_Valid Const(0) /* valid */
#define TW_Zero Const(1) /* zero */
/* The following fold to 2 (Special) in the Tag Word */
/* #define TW_Denormal Const(4) */ /* De-normal */
#define TW_Infinity Const(5) /* + or - infinity */
#define TW_NaN Const(6) /* Not a Number */
 
#define TW_Empty Const(7) /* empty */
 
 
#ifndef __ASSEMBLY__
 
#include <asm/sigcontext.h> /* for struct _fpstate */
#include <asm/math_emu.h>
 
#include <linux/linkage.h>
 
/*
#define RE_ENTRANT_CHECKING
*/
 
#ifdef RE_ENTRANT_CHECKING
extern char emulating;
# define RE_ENTRANT_CHECK_OFF emulating = 0
# define RE_ENTRANT_CHECK_ON emulating = 1
#else
# define RE_ENTRANT_CHECK_OFF
# define RE_ENTRANT_CHECK_ON
#endif RE_ENTRANT_CHECKING
 
#define FWAIT_OPCODE 0x9b
#define OP_SIZE_PREFIX 0x66
#define ADDR_SIZE_PREFIX 0x67
#define PREFIX_CS 0x2e
#define PREFIX_DS 0x3e
#define PREFIX_ES 0x26
#define PREFIX_SS 0x36
#define PREFIX_FS 0x64
#define PREFIX_GS 0x65
#define PREFIX_REPE 0xf3
#define PREFIX_REPNE 0xf2
#define PREFIX_LOCK 0xf0
#define PREFIX_CS_ 1
#define PREFIX_DS_ 2
#define PREFIX_ES_ 3
#define PREFIX_FS_ 4
#define PREFIX_GS_ 5
#define PREFIX_SS_ 6
#define PREFIX_DEFAULT 7
 
struct address {
unsigned int offset;
unsigned short selector;
unsigned short opcode:11,
empty:5;
};
typedef void (*FUNC)(void);
typedef struct fpu_reg FPU_REG;
typedef void (*FUNC_ST0)(FPU_REG *st0_ptr);
typedef struct { unsigned char address_size, operand_size, segment; }
overrides;
/* This structure is 32 bits: */
typedef struct { overrides override;
unsigned char default_mode; } fpu_addr_modes;
/* PROTECTED has a restricted meaning in the emulator; it is used
to signal that the emulator needs to do special things to ensure
that protection is respected in a segmented model. */
#define PROTECTED 4
#define SIXTEEN 1 /* We rely upon this being 1 (true) */
#define VM86 SIXTEEN
#define PM16 (SIXTEEN | PROTECTED)
#define SEG32 PROTECTED
extern unsigned char const data_sizes_16[32];
 
#define st(x) ( regs[((top+x) &7 )] )
 
#define STACK_OVERFLOW (st_new_ptr = &st(-1), st_new_ptr->tag != TW_Empty)
#define NOT_EMPTY(i) (st(i).tag != TW_Empty)
#define NOT_EMPTY_ST0 (st0_tag ^ TW_Empty)
 
#define pop() { regs[(top++ & 7 )].tag = TW_Empty; }
#define poppop() { regs[((top + 1) & 7 )].tag \
= regs[(top & 7 )].tag = TW_Empty; \
top += 2; }
 
/* push() does not affect the tags */
#define push() { top--; }
 
 
#define reg_move(x, y) { \
*(short *)&((y)->sign) = *(const short *)&((x)->sign); \
*(long *)&((y)->exp) = *(const long *)&((x)->exp); \
*(long long *)&((y)->sigl) = *(const long long *)&((x)->sigl); }
 
#define significand(x) ( ((unsigned long long *)&((x)->sigl))[0] )
 
 
/*----- Prototypes for functions written in assembler -----*/
/* extern void reg_move(FPU_REG *a, FPU_REG *b); */
 
asmlinkage void normalize(FPU_REG *x);
asmlinkage void normalize_nuo(FPU_REG *x);
asmlinkage int reg_div(FPU_REG const *arg1, FPU_REG const *arg2,
FPU_REG *answ, unsigned int control_w);
asmlinkage int reg_u_sub(FPU_REG const *arg1, FPU_REG const *arg2,
FPU_REG *answ, unsigned int control_w);
asmlinkage int reg_u_mul(FPU_REG const *arg1, FPU_REG const *arg2,
FPU_REG *answ, unsigned int control_w);
asmlinkage int reg_u_div(FPU_REG const *arg1, FPU_REG const *arg2,
FPU_REG *answ, unsigned int control_w);
asmlinkage int reg_u_add(FPU_REG const *arg1, FPU_REG const *arg2,
FPU_REG *answ, unsigned int control_w);
asmlinkage int wm_sqrt(FPU_REG *n, unsigned int control_w);
asmlinkage unsigned shrx(void *l, unsigned x);
asmlinkage unsigned shrxs(void *v, unsigned x);
asmlinkage unsigned long div_small(unsigned long long *x, unsigned long y);
asmlinkage void round_reg(FPU_REG *arg, unsigned int extent,
unsigned int control_w);
 
#ifndef MAKING_PROTO
#include "fpu_proto.h"
#endif
 
#endif __ASSEMBLY__
 
#endif _FPU_EMU_H_
/shr_Xsig.S
0,0 → 1,87
.file "shr_Xsig.S"
/*---------------------------------------------------------------------------+
| shr_Xsig.S |
| |
| 12 byte right shift function |
| |
| Copyright (C) 1992,1994,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| Call from C as: |
| void shr_Xsig(Xsig *arg, unsigned nr) |
| |
| Extended shift right function. |
| Fastest for small shifts. |
| Shifts the 12 byte quantity pointed to by the first arg (arg) |
| right by the number of bits specified by the second arg (nr). |
| |
+---------------------------------------------------------------------------*/
 
#include "fpu_emu.h"
 
.text
ENTRY(shr_Xsig)
push %ebp
movl %esp,%ebp
pushl %esi
movl PARAM2,%ecx
movl PARAM1,%esi
cmpl $32,%ecx /* shrd only works for 0..31 bits */
jnc L_more_than_31
 
/* less than 32 bits */
pushl %ebx
movl (%esi),%eax /* lsl */
movl 4(%esi),%ebx /* midl */
movl 8(%esi),%edx /* msl */
shrd %cl,%ebx,%eax
shrd %cl,%edx,%ebx
shr %cl,%edx
movl %eax,(%esi)
movl %ebx,4(%esi)
movl %edx,8(%esi)
popl %ebx
popl %esi
leave
ret
 
L_more_than_31:
cmpl $64,%ecx
jnc L_more_than_63
 
subb $32,%cl
movl 4(%esi),%eax /* midl */
movl 8(%esi),%edx /* msl */
shrd %cl,%edx,%eax
shr %cl,%edx
movl %eax,(%esi)
movl %edx,4(%esi)
movl $0,8(%esi)
popl %esi
leave
ret
 
L_more_than_63:
cmpl $96,%ecx
jnc L_more_than_95
 
subb $64,%cl
movl 8(%esi),%eax /* msl */
shr %cl,%eax
xorl %edx,%edx
movl %eax,(%esi)
movl %edx,4(%esi)
movl %edx,8(%esi)
popl %esi
leave
ret
 
L_more_than_95:
xorl %eax,%eax
movl %eax,(%esi)
movl %eax,4(%esi)
movl %eax,8(%esi)
popl %esi
leave
ret
/polynom_Xsig.S
0,0 → 1,135
/*---------------------------------------------------------------------------+
| polynomial_Xsig.S |
| |
| Fixed point arithmetic polynomial evaluation. |
| |
| Copyright (C) 1992,1993,1994,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| Call from C as: |
| void polynomial_Xsig(Xsig *accum, unsigned long long x, |
| unsigned long long terms[], int n) |
| |
| Computes: |
| terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x |
| and adds the result to the 12 byte Xsig. |
| The terms[] are each 8 bytes, but all computation is performed to 12 byte |
| precision. |
| |
| This function must be used carefully: most overflow of intermediate |
| results is controlled, but overflow of the result is not. |
| |
+---------------------------------------------------------------------------*/
.file "polynomial_Xsig.S"
 
#include "fpu_emu.h"
 
 
#define TERM_SIZE $8
#define SUM_MS -20(%ebp) /* sum ms long */
#define SUM_MIDDLE -24(%ebp) /* sum middle long */
#define SUM_LS -28(%ebp) /* sum ls long */
#define ACCUM_MS -4(%ebp) /* accum ms long */
#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */
#define ACCUM_LS -12(%ebp) /* accum ls long */
#define OVERFLOWED -16(%ebp) /* addition overflow flag */
 
.text
ENTRY(polynomial_Xsig)
pushl %ebp
movl %esp,%ebp
subl $32,%esp
pushl %esi
pushl %edi
pushl %ebx
 
movl PARAM2,%esi /* x */
movl PARAM3,%edi /* terms */
 
movl TERM_SIZE,%eax
mull PARAM4 /* n */
addl %eax,%edi
 
movl 4(%edi),%edx /* terms[n] */
movl %edx,SUM_MS
movl (%edi),%edx /* terms[n] */
movl %edx,SUM_MIDDLE
xor %eax,%eax
movl %eax,SUM_LS
movb %al,OVERFLOWED
 
subl TERM_SIZE,%edi
decl PARAM4
js L_accum_done
 
L_accum_loop:
xor %eax,%eax
movl %eax,ACCUM_MS
movl %eax,ACCUM_MIDDLE
 
movl SUM_MIDDLE,%eax
mull (%esi) /* x ls long */
movl %edx,ACCUM_LS
 
movl SUM_MIDDLE,%eax
mull 4(%esi) /* x ms long */
addl %eax,ACCUM_LS
adcl %edx,ACCUM_MIDDLE
adcl $0,ACCUM_MS
 
movl SUM_MS,%eax
mull (%esi) /* x ls long */
addl %eax,ACCUM_LS
adcl %edx,ACCUM_MIDDLE
adcl $0,ACCUM_MS
 
movl SUM_MS,%eax
mull 4(%esi) /* x ms long */
addl %eax,ACCUM_MIDDLE
adcl %edx,ACCUM_MS
 
testb $0xff,OVERFLOWED
jz L_no_overflow
 
movl (%esi),%eax
addl %eax,ACCUM_MIDDLE
movl 4(%esi),%eax
adcl %eax,ACCUM_MS /* This could overflow too */
 
L_no_overflow:
 
/*
* Now put the sum of next term and the accumulator
* into the sum register
*/
movl ACCUM_LS,%eax
addl (%edi),%eax /* term ls long */
movl %eax,SUM_LS
movl ACCUM_MIDDLE,%eax
adcl (%edi),%eax /* term ls long */
movl %eax,SUM_MIDDLE
movl ACCUM_MS,%eax
adcl 4(%edi),%eax /* term ms long */
movl %eax,SUM_MS
sbbb %al,%al
movb %al,OVERFLOWED /* Used in the next iteration */
 
subl TERM_SIZE,%edi
decl PARAM4
jns L_accum_loop
 
L_accum_done:
movl PARAM1,%edi /* accum */
movl SUM_LS,%eax
addl %eax,(%edi)
movl SUM_MIDDLE,%eax
adcl %eax,4(%edi)
movl SUM_MS,%eax
adcl %eax,8(%edi)
 
popl %ebx
popl %edi
popl %esi
leave
ret
/mul_Xsig.S
0,0 → 1,176
/*---------------------------------------------------------------------------+
| mul_Xsig.S |
| |
| Multiply a 12 byte fixed point number by another fixed point number. |
| |
| Copyright (C) 1992,1994,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| Call from C as: |
| void mul32_Xsig(Xsig *x, unsigned b) |
| |
| void mul64_Xsig(Xsig *x, unsigned long long *b) |
| |
| void mul_Xsig_Xsig(Xsig *x, unsigned *b) |
| |
| The result is neither rounded nor normalized, and the ls bit or so may |
| be wrong. |
| |
+---------------------------------------------------------------------------*/
.file "mul_Xsig.S"
 
 
#include "fpu_emu.h"
 
.text
ENTRY(mul32_Xsig)
pushl %ebp
movl %esp,%ebp
subl $16,%esp
pushl %esi
 
movl PARAM1,%esi
movl PARAM2,%ecx
 
xor %eax,%eax
movl %eax,-4(%ebp)
movl %eax,-8(%ebp)
 
movl (%esi),%eax /* lsl of Xsig */
mull %ecx /* msl of b */
movl %edx,-12(%ebp)
 
movl 4(%esi),%eax /* midl of Xsig */
mull %ecx /* msl of b */
addl %eax,-12(%ebp)
adcl %edx,-8(%ebp)
adcl $0,-4(%ebp)
 
movl 8(%esi),%eax /* msl of Xsig */
mull %ecx /* msl of b */
addl %eax,-8(%ebp)
adcl %edx,-4(%ebp)
 
movl -12(%ebp),%eax
movl %eax,(%esi)
movl -8(%ebp),%eax
movl %eax,4(%esi)
movl -4(%ebp),%eax
movl %eax,8(%esi)
 
popl %esi
leave
ret
 
 
ENTRY(mul64_Xsig)
pushl %ebp
movl %esp,%ebp
subl $16,%esp
pushl %esi
 
movl PARAM1,%esi
movl PARAM2,%ecx
 
xor %eax,%eax
movl %eax,-4(%ebp)
movl %eax,-8(%ebp)
 
movl (%esi),%eax /* lsl of Xsig */
mull 4(%ecx) /* msl of b */
movl %edx,-12(%ebp)
 
movl 4(%esi),%eax /* midl of Xsig */
mull (%ecx) /* lsl of b */
addl %edx,-12(%ebp)
adcl $0,-8(%ebp)
adcl $0,-4(%ebp)
 
movl 4(%esi),%eax /* midl of Xsig */
mull 4(%ecx) /* msl of b */
addl %eax,-12(%ebp)
adcl %edx,-8(%ebp)
adcl $0,-4(%ebp)
 
movl 8(%esi),%eax /* msl of Xsig */
mull (%ecx) /* lsl of b */
addl %eax,-12(%ebp)
adcl %edx,-8(%ebp)
adcl $0,-4(%ebp)
 
movl 8(%esi),%eax /* msl of Xsig */
mull 4(%ecx) /* msl of b */
addl %eax,-8(%ebp)
adcl %edx,-4(%ebp)
 
movl -12(%ebp),%eax
movl %eax,(%esi)
movl -8(%ebp),%eax
movl %eax,4(%esi)
movl -4(%ebp),%eax
movl %eax,8(%esi)
 
popl %esi
leave
ret
 
 
 
ENTRY(mul_Xsig_Xsig)
pushl %ebp
movl %esp,%ebp
subl $16,%esp
pushl %esi
 
movl PARAM1,%esi
movl PARAM2,%ecx
 
xor %eax,%eax
movl %eax,-4(%ebp)
movl %eax,-8(%ebp)
 
movl (%esi),%eax /* lsl of Xsig */
mull 8(%ecx) /* msl of b */
movl %edx,-12(%ebp)
 
movl 4(%esi),%eax /* midl of Xsig */
mull 4(%ecx) /* midl of b */
addl %edx,-12(%ebp)
adcl $0,-8(%ebp)
adcl $0,-4(%ebp)
 
movl 8(%esi),%eax /* msl of Xsig */
mull (%ecx) /* lsl of b */
addl %edx,-12(%ebp)
adcl $0,-8(%ebp)
adcl $0,-4(%ebp)
 
movl 4(%esi),%eax /* midl of Xsig */
mull 8(%ecx) /* msl of b */
addl %eax,-12(%ebp)
adcl %edx,-8(%ebp)
adcl $0,-4(%ebp)
 
movl 8(%esi),%eax /* msl of Xsig */
mull 4(%ecx) /* midl of b */
addl %eax,-12(%ebp)
adcl %edx,-8(%ebp)
adcl $0,-4(%ebp)
 
movl 8(%esi),%eax /* msl of Xsig */
mull 8(%ecx) /* msl of b */
addl %eax,-8(%ebp)
adcl %edx,-4(%ebp)
 
movl -12(%ebp),%edx
movl %edx,(%esi)
movl -8(%ebp),%edx
movl %edx,4(%esi)
movl -4(%ebp),%edx
movl %edx,8(%esi)
 
popl %esi
leave
ret
 
/fpu_aux.c
0,0 → 1,184
/*---------------------------------------------------------------------------+
| fpu_aux.c |
| |
| Code to implement some of the FPU auxiliary instructions. |
| |
| Copyright (C) 1992,1993,1994 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
#include "fpu_system.h"
#include "exception.h"
#include "fpu_emu.h"
#include "status_w.h"
#include "control_w.h"
 
 
static void fnop(void)
{
}
 
void fclex(void)
{
partial_status &= ~(SW_Backward|SW_Summary|SW_Stack_Fault|SW_Precision|
SW_Underflow|SW_Overflow|SW_Zero_Div|SW_Denorm_Op|
SW_Invalid);
no_ip_update = 1;
}
 
/* Needs to be externally visible */
void finit()
{
int r;
control_word = 0x037f;
partial_status = 0;
top = 0; /* We don't keep top in the status word internally. */
for (r = 0; r < 8; r++)
{
regs[r].tag = TW_Empty;
}
/* The behaviour is different to that detailed in
Section 15.1.6 of the Intel manual */
operand_address.offset = 0;
operand_address.selector = 0;
instruction_address.offset = 0;
instruction_address.selector = 0;
instruction_address.opcode = 0;
no_ip_update = 1;
}
 
/*
* These are nops on the i387..
*/
#define feni fnop
#define fdisi fnop
#define fsetpm fnop
 
static FUNC const finit_table[] = {
feni, fdisi, fclex, finit,
fsetpm, FPU_illegal, FPU_illegal, FPU_illegal
};
 
void finit_()
{
(finit_table[FPU_rm])();
}
 
 
static void fstsw_ax(void)
{
*(short *) &FPU_EAX = status_word();
no_ip_update = 1;
}
 
static FUNC const fstsw_table[] = {
fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal,
FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
};
 
void fstsw_()
{
(fstsw_table[FPU_rm])();
}
 
 
static FUNC const fp_nop_table[] = {
fnop, FPU_illegal, FPU_illegal, FPU_illegal,
FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
};
 
void fp_nop()
{
(fp_nop_table[FPU_rm])();
}
 
 
void fld_i_()
{
FPU_REG *st_new_ptr;
 
if ( STACK_OVERFLOW )
{ stack_overflow(); return; }
 
/* fld st(i) */
if ( NOT_EMPTY(FPU_rm) )
{ reg_move(&st(FPU_rm), st_new_ptr); push(); }
else
{
if ( control_word & CW_Invalid )
{
/* The masked response */
stack_underflow();
}
else
EXCEPTION(EX_StackUnder);
}
 
}
 
 
void fxch_i()
{
/* fxch st(i) */
FPU_REG t;
register FPU_REG *sti_ptr = &st(FPU_rm), *st0_ptr = &st(0);
 
if ( st0_ptr->tag == TW_Empty )
{
if ( sti_ptr->tag == TW_Empty )
{
stack_underflow();
stack_underflow_i(FPU_rm);
return;
}
if ( control_word & CW_Invalid )
reg_move(sti_ptr, st0_ptr); /* Masked response */
stack_underflow_i(FPU_rm);
return;
}
if ( sti_ptr->tag == TW_Empty )
{
if ( control_word & CW_Invalid )
reg_move(st0_ptr, sti_ptr); /* Masked response */
stack_underflow();
return;
}
clear_C1();
reg_move(st0_ptr, &t);
reg_move(sti_ptr, st0_ptr);
reg_move(&t, sti_ptr);
}
 
 
void ffree_()
{
/* ffree st(i) */
st(FPU_rm).tag = TW_Empty;
}
 
 
void ffreep()
{
/* ffree st(i) + pop - unofficial code */
st(FPU_rm).tag = TW_Empty;
pop();
}
 
 
void fst_i_()
{
/* fst st(i) */
reg_move(&st(0), &st(FPU_rm));
}
 
 
void fstp_i()
{
/* fstp st(i) */
reg_move(&st(0), &st(FPU_rm));
pop();
}
 
/wm_shrx.S
0,0 → 1,204
.file "wm_shrx.S"
/*---------------------------------------------------------------------------+
| wm_shrx.S |
| |
| 64 bit right shift functions |
| |
| Copyright (C) 1992,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| Call from C as: |
| unsigned shrx(void *arg1, unsigned arg2) |
| and |
| unsigned shrxs(void *arg1, unsigned arg2) |
| |
+---------------------------------------------------------------------------*/
 
#include "fpu_emu.h"
 
.text
/*---------------------------------------------------------------------------+
| unsigned shrx(void *arg1, unsigned arg2) |
| |
| Extended shift right function. |
| Fastest for small shifts. |
| Shifts the 64 bit quantity pointed to by the first arg (arg1) |
| right by the number of bits specified by the second arg (arg2). |
| Forms a 96 bit quantity from the 64 bit arg and eax: |
| [ 64 bit arg ][ eax ] |
| shift right ---------> |
| The eax register is initialized to 0 before the shifting. |
| Results returned in the 64 bit arg and eax. |
+---------------------------------------------------------------------------*/
 
ENTRY(shrx)
push %ebp
movl %esp,%ebp
pushl %esi
movl PARAM2,%ecx
movl PARAM1,%esi
cmpl $32,%ecx /* shrd only works for 0..31 bits */
jnc L_more_than_31
 
/* less than 32 bits */
pushl %ebx
movl (%esi),%ebx /* lsl */
movl 4(%esi),%edx /* msl */
xorl %eax,%eax /* extension */
shrd %cl,%ebx,%eax
shrd %cl,%edx,%ebx
shr %cl,%edx
movl %ebx,(%esi)
movl %edx,4(%esi)
popl %ebx
popl %esi
leave
ret
 
L_more_than_31:
cmpl $64,%ecx
jnc L_more_than_63
 
subb $32,%cl
movl (%esi),%eax /* lsl */
movl 4(%esi),%edx /* msl */
shrd %cl,%edx,%eax
shr %cl,%edx
movl %edx,(%esi)
movl $0,4(%esi)
popl %esi
leave
ret
 
L_more_than_63:
cmpl $96,%ecx
jnc L_more_than_95
 
subb $64,%cl
movl 4(%esi),%eax /* msl */
shr %cl,%eax
xorl %edx,%edx
movl %edx,(%esi)
movl %edx,4(%esi)
popl %esi
leave
ret
 
L_more_than_95:
xorl %eax,%eax
movl %eax,(%esi)
movl %eax,4(%esi)
popl %esi
leave
ret
 
 
/*---------------------------------------------------------------------------+
| unsigned shrxs(void *arg1, unsigned arg2) |
| |
| Extended shift right function (optimized for small floating point |
| integers). |
| Shifts the 64 bit quantity pointed to by the first arg (arg1) |
| right by the number of bits specified by the second arg (arg2). |
| Forms a 96 bit quantity from the 64 bit arg and eax: |
| [ 64 bit arg ][ eax ] |
| shift right ---------> |
| The eax register is initialized to 0 before the shifting. |
| The lower 8 bits of eax are lost and replaced by a flag which is |
| set (to 0x01) if any bit, apart from the first one, is set in the |
| part which has been shifted out of the arg. |
| Results returned in the 64 bit arg and eax. |
+---------------------------------------------------------------------------*/
ENTRY(shrxs)
push %ebp
movl %esp,%ebp
pushl %esi
pushl %ebx
movl PARAM2,%ecx
movl PARAM1,%esi
cmpl $64,%ecx /* shrd only works for 0..31 bits */
jnc Ls_more_than_63
 
cmpl $32,%ecx /* shrd only works for 0..31 bits */
jc Ls_less_than_32
 
/* We got here without jumps by assuming that the most common requirement
is for small integers */
/* Shift by [32..63] bits */
subb $32,%cl
movl (%esi),%eax /* lsl */
movl 4(%esi),%edx /* msl */
xorl %ebx,%ebx
shrd %cl,%eax,%ebx
shrd %cl,%edx,%eax
shr %cl,%edx
orl %ebx,%ebx /* test these 32 bits */
setne %bl
test $0x7fffffff,%eax /* and 31 bits here */
setne %bh
orw %bx,%bx /* Any of the 63 bit set ? */
setne %al
movl %edx,(%esi)
movl $0,4(%esi)
popl %ebx
popl %esi
leave
ret
 
/* Shift by [0..31] bits */
Ls_less_than_32:
movl (%esi),%ebx /* lsl */
movl 4(%esi),%edx /* msl */
xorl %eax,%eax /* extension */
shrd %cl,%ebx,%eax
shrd %cl,%edx,%ebx
shr %cl,%edx
test $0x7fffffff,%eax /* only need to look at eax here */
setne %al
movl %ebx,(%esi)
movl %edx,4(%esi)
popl %ebx
popl %esi
leave
ret
 
/* Shift by [64..95] bits */
Ls_more_than_63:
cmpl $96,%ecx
jnc Ls_more_than_95
 
subb $64,%cl
movl (%esi),%ebx /* lsl */
movl 4(%esi),%eax /* msl */
xorl %edx,%edx /* extension */
shrd %cl,%ebx,%edx
shrd %cl,%eax,%ebx
shr %cl,%eax
orl %ebx,%edx
setne %bl
test $0x7fffffff,%eax /* only need to look at eax here */
setne %bh
orw %bx,%bx
setne %al
xorl %edx,%edx
movl %edx,(%esi) /* set to zero */
movl %edx,4(%esi) /* set to zero */
popl %ebx
popl %esi
leave
ret
 
Ls_more_than_95:
/* Shift by [96..inf) bits */
xorl %eax,%eax
movl (%esi),%ebx
orl 4(%esi),%ebx
setne %al
xorl %ebx,%ebx
movl %ebx,(%esi)
movl %ebx,4(%esi)
popl %ebx
popl %esi
leave
ret
/control_w.h
0,0 → 1,45
/*---------------------------------------------------------------------------+
| control_w.h |
| |
| Copyright (C) 1992,1993 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
+---------------------------------------------------------------------------*/
 
#ifndef _CONTROLW_H_
#define _CONTROLW_H_
 
#ifdef __ASSEMBLY__
#define _Const_(x) $##x
#else
#define _Const_(x) x
#endif
 
#define CW_RC _Const_(0x0C00) /* rounding control */
#define CW_PC _Const_(0x0300) /* precision control */
 
#define CW_Precision Const_(0x0020) /* loss of precision mask */
#define CW_Underflow Const_(0x0010) /* underflow mask */
#define CW_Overflow Const_(0x0008) /* overflow mask */
#define CW_ZeroDiv Const_(0x0004) /* divide by zero mask */
#define CW_Denormal Const_(0x0002) /* denormalized operand mask */
#define CW_Invalid Const_(0x0001) /* invalid operation mask */
 
#define CW_Exceptions _Const_(0x003f) /* all masks */
 
#define RC_RND _Const_(0x0000)
#define RC_DOWN _Const_(0x0400)
#define RC_UP _Const_(0x0800)
#define RC_CHOP _Const_(0x0C00)
 
/* p 15-5: Precision control bits affect only the following:
ADD, SUB(R), MUL, DIV(R), and SQRT */
#define PR_24_BITS _Const_(0x000)
#define PR_53_BITS _Const_(0x200)
#define PR_64_BITS _Const_(0x300)
#define PR_RESERVED_BITS _Const_(0x100)
/* FULL_PRECISION simulates all exceptions masked */
#define FULL_PRECISION (PR_64_BITS | RC_RND | 0x3f)
 
#endif _CONTROLW_H_
/fpu_entry.c
0,0 → 1,695
/*---------------------------------------------------------------------------+
| fpu_entry.c |
| |
| The entry functions for wm-FPU-emu |
| |
| Copyright (C) 1992,1993,1994,1996 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
| E-mail billm@jacobi.maths.monash.edu.au |
| |
| See the files "README" and "COPYING" for further copyright and warranty |
| information. |
| |
+---------------------------------------------------------------------------*/
 
/*---------------------------------------------------------------------------+
| Note: |
| The file contains code which accesses user memory. |
| Emulator static data may change when user memory is accessed, due to |
| other processes using the emulator while swapping is in progress. |
+---------------------------------------------------------------------------*/
 
/*---------------------------------------------------------------------------+
| math_emulate(), restore_i387_soft() and save_i387_soft() are the only |
| entry points for wm-FPU-emu. |
+---------------------------------------------------------------------------*/
 
#include <linux/signal.h>
 
#include <asm/segment.h>
 
#include "fpu_system.h"
#include "fpu_emu.h"
#include "exception.h"
#include "control_w.h"
#include "status_w.h"
 
#define __BAD__ FPU_illegal /* Illegal on an 80486, causes SIGILL */
 
#ifndef NO_UNDOC_CODE /* Un-documented FPU op-codes supported by default. */
 
/* WARNING: These codes are not documented by Intel in their 80486 manual
and may not work on FPU clones or later Intel FPUs. */
 
/* Changes to support the un-doc codes provided by Linus Torvalds. */
 
#define _d9_d8_ fstp_i /* unofficial code (19) */
#define _dc_d0_ fcom_st /* unofficial code (14) */
#define _dc_d8_ fcompst /* unofficial code (1c) */
#define _dd_c8_ fxch_i /* unofficial code (0d) */
#define _de_d0_ fcompst /* unofficial code (16) */
#define _df_c0_ ffreep /* unofficial code (07) ffree + pop */
#define _df_c8_ fxch_i /* unofficial code (0f) */
#define _df_d0_ fstp_i /* unofficial code (17) */
#define _df_d8_ fstp_i /* unofficial code (1f) */
 
static FUNC const st_instr_table[64] = {
fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_,
fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_,
fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_,
fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_,
fsub__, fp_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
fdiv__, trig_a, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
fdivr_, trig_b, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
};
 
#else /* Support only documented FPU op-codes */
 
static FUNC const st_instr_table[64] = {
fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__,
fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__,
fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__,
fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__,
fsub__, fp_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
fdiv__, trig_a, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
fdivr_, trig_b, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
};
 
#endif NO_UNDOC_CODE
 
 
#define _NONE_ 0 /* Take no special action */
#define _REG0_ 1 /* Need to check for not empty st(0) */
#define _REGI_ 2 /* Need to check for not empty st(0) and st(rm) */
#define _REGi_ 0 /* Uses st(rm) */
#define _PUSH_ 3 /* Need to check for space to push onto stack */
#define _null_ 4 /* Function illegal or not implemented */
#define _REGIi 5 /* Uses st(0) and st(rm), result to st(rm) */
#define _REGIp 6 /* Uses st(0) and st(rm), result to st(rm) then pop */
#define _REGIc 0 /* Compare st(0) and st(rm) */
#define _REGIn 0 /* Uses st(0) and st(rm), but handle checks later */
 
#ifndef NO_UNDOC_CODE
 
/* Un-documented FPU op-codes supported by default. (see above) */
 
static unsigned char const type_table[64] = {
_REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_,
_REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_,
_REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
_REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
_REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
_REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
};
 
#else /* Support only documented FPU op-codes */
 
static unsigned char const type_table[64] = {
_REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_,
_REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
_REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_,
_REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_,
_REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
_REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
};
 
#endif NO_UNDOC_CODE
 
 
#ifdef RE_ENTRANT_CHECKING
char emulating=0;
#endif RE_ENTRANT_CHECKING
 
static int valid_prefix(unsigned char *Byte, unsigned char **fpu_eip,
overrides *override);
 
asmlinkage void math_emulate(long arg)
{
unsigned char FPU_modrm, byte1;
unsigned short code;
fpu_addr_modes addr_modes;
int unmasked;
FPU_REG loaded_data;
void *data_address;
struct address data_sel_off;
struct address entry_sel_off;
unsigned long code_base = 0;
unsigned long code_limit = 0; /* Initialized to stop compiler warnings */
char st0_tag;
FPU_REG *st0_ptr;
struct desc_struct code_descriptor;
 
#ifdef RE_ENTRANT_CHECKING
if ( emulating )
{
printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n");
}
RE_ENTRANT_CHECK_ON;
#endif RE_ENTRANT_CHECKING
 
if (!current->used_math)
{
int i;
for ( i = 0; i < 8; i++ )
{
/* Make sure that the registers are compatible
with the assumptions of the emulator. */
if ( !((regs[i].exp == EXP_UNDER) && (regs[i].sigh == 0)
&& (regs[i].sigl == 0)) )
regs[i].sigh |= 0x80000000;
}
finit();
current->used_math = 1;
}
 
SETUP_DATA_AREA(arg);
 
FPU_ORIG_EIP = FPU_EIP;
 
if ( (FPU_EFLAGS & 0x00020000) != 0 )
{
/* Virtual 8086 mode */
addr_modes.default_mode = VM86;
FPU_EIP += code_base = FPU_CS << 4;
code_limit = code_base + 0xffff; /* Assumes code_base <= 0xffff0000 */
}
else if ( FPU_CS == USER_CS && FPU_DS == USER_DS )
{
addr_modes.default_mode = 0;
}
else if ( FPU_CS == KERNEL_CS )
{
printk("math_emulate: %04x:%08lx\n",FPU_CS,FPU_EIP);
panic("Math emulation needed in kernel");
}
else
{
 
if ( (FPU_CS & 4) != 4 ) /* Must be in the LDT */
{
/* Can only handle segmented addressing via the LDT
for now, and it must be 16 bit */
printk("FPU emulator: Unsupported addressing mode\n");
math_abort(FPU_info, SIGILL);
}
 
if ( SEG_D_SIZE(code_descriptor = LDT_DESCRIPTOR(FPU_CS)) )
{
/* The above test may be wrong, the book is not clear */
/* Segmented 32 bit protected mode */
addr_modes.default_mode = SEG32;
}
else
{
/* 16 bit protected mode */
addr_modes.default_mode = PM16;
}
FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor);
code_limit = code_base
+ (SEG_LIMIT(code_descriptor)+1) * SEG_GRANULARITY(code_descriptor)
- 1;
if ( code_limit < code_base ) code_limit = 0xffffffff;
}
 
FPU_lookahead = 1;
if (current->flags & PF_PTRACED)
FPU_lookahead = 0;
 
if ( !valid_prefix(&byte1, (unsigned char **)&FPU_EIP,
&addr_modes.override) )
{
RE_ENTRANT_CHECK_OFF;
printk("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n"
"FPU emulator: self-modifying code! (emulation impossible)\n",
byte1);
RE_ENTRANT_CHECK_ON;
EXCEPTION(EX_INTERNAL|0x126);
math_abort(FPU_info,SIGILL);
}
 
do_another_FPU_instruction:
 
no_ip_update = 0;
 
FPU_EIP++; /* We have fetched the prefix and first code bytes. */
 
if ( addr_modes.default_mode )
{
/* This checks for the minimum instruction bytes.
We also need to check any extra (address mode) code access. */
if ( FPU_EIP > code_limit )
math_abort(FPU_info,SIGSEGV);
}
 
if ( (byte1 & 0xf8) != 0xd8 )
{
if ( byte1 == FWAIT_OPCODE )
{
if (partial_status & SW_Summary)
goto do_the_FPU_interrupt;
else
goto FPU_fwait_done;
}
#ifdef PARANOID
EXCEPTION(EX_INTERNAL|0x128);
math_abort(FPU_info,SIGILL);
#endif PARANOID
}
 
RE_ENTRANT_CHECK_OFF;
FPU_code_verify_area(1);
FPU_modrm = get_fs_byte((unsigned char *) FPU_EIP);
RE_ENTRANT_CHECK_ON;
FPU_EIP++;
 
if (partial_status & SW_Summary)
{
/* Ignore the error for now if the current instruction is a no-wait
control instruction */
/* The 80486 manual contradicts itself on this topic,
but a real 80486 uses the following instructions:
fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex.
*/
code = (FPU_modrm << 8) | byte1;
if ( ! ( (((code & 0xf803) == 0xe003) || /* fnclex, fninit, fnstsw */
(((code & 0x3003) == 0x3001) && /* fnsave, fnstcw, fnstenv,
fnstsw */
((code & 0xc000) != 0xc000))) ) )
{
/*
* We need to simulate the action of the kernel to FPU
* interrupts here.
*/
do_the_FPU_interrupt:
FPU_EIP = FPU_ORIG_EIP; /* Point to current FPU instruction. */
 
RE_ENTRANT_CHECK_OFF;
current->tss.trap_no = 16;
current->tss.error_code = 0;
send_sig(SIGFPE, current, 1);
return;
}
}
 
entry_sel_off.offset = FPU_ORIG_EIP;
entry_sel_off.selector = FPU_CS;
entry_sel_off.opcode = (byte1 << 8) | FPU_modrm;
 
FPU_rm = FPU_modrm & 7;
 
if ( FPU_modrm < 0300 )
{
/* All of these instructions use the mod/rm byte to get a data address */
 
if ( (addr_modes.default_mode & SIXTEEN)
^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX) )
data_address = get_address_16(FPU_modrm, &FPU_EIP, &data_sel_off,
addr_modes);
else
data_address = get_address(FPU_modrm, &FPU_EIP, &data_sel_off,
addr_modes);
 
if ( addr_modes.default_mode )
{
if ( FPU_EIP-1 > code_limit )
math_abort(FPU_info,SIGSEGV);
}
 
if ( !(byte1 & 1) )
{
unsigned short status1 = partial_status;
 
st0_ptr = &st(0);
st0_tag = st0_ptr->tag;
 
/* Stack underflow has priority */
if ( NOT_EMPTY_ST0 )
{
if ( addr_modes.default_mode & PROTECTED )
{
/* This table works for 16 and 32 bit protected mode */
if ( access_limit < data_sizes_16[(byte1 >> 1) & 3] )
math_abort(FPU_info,SIGSEGV);
}
 
unmasked = 0; /* Do this here to stop compiler warnings. */
switch ( (byte1 >> 1) & 3 )
{
case 0:
unmasked = reg_load_single((float *)data_address,
&loaded_data);
break;
case 1:
reg_load_int32((long *)data_address, &loaded_data);
break;
case 2:
unmasked = reg_load_double((double *)data_address,
&loaded_data);
break;
case 3:
reg_load_int16((short *)data_address, &loaded_data);
break;
}
/* No more access to user memory, it is safe
to use static data now */
 
/* NaN operands have the next priority. */
/* We have to delay looking at st(0) until after
loading the data, because that data might contain an SNaN */
if ( (st0_tag == TW_NaN) ||
(loaded_data.tag == TW_NaN) )
{
/* Restore the status word; we might have loaded a
denormal. */
partial_status = status1;
if ( (FPU_modrm & 0x30) == 0x10 )
{
/* fcom or fcomp */
EXCEPTION(EX_Invalid);
setcc(SW_C3 | SW_C2 | SW_C0);
if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) )
pop(); /* fcomp, masked, so we pop. */
}
else
{
#ifdef PECULIAR_486
/* This is not really needed, but gives behaviour
identical to an 80486 */
if ( (FPU_modrm & 0x28) == 0x20 )
/* fdiv or fsub */
real_2op_NaN(&loaded_data, st0_ptr,
st0_ptr);
else
#endif PECULIAR_486
/* fadd, fdivr, fmul, or fsubr */
real_2op_NaN(st0_ptr, &loaded_data,
st0_ptr);
}
goto reg_mem_instr_done;
}
 
if ( unmasked && !((FPU_modrm & 0x30) == 0x10) )
{
/* Is not a comparison instruction. */
if ( (FPU_modrm & 0x38) == 0x38 )
{
/* fdivr */
if ( (st0_tag == TW_Zero) &&
(loaded_data.tag == TW_Valid) )
{
if ( divide_by_zero(loaded_data.sign,
st0_ptr) )
{
/* We use the fact here that the unmasked
exception in the loaded data was for a
denormal operand */
/* Restore the state of the denormal op bit */
partial_status &= ~SW_Denorm_Op;
partial_status |= status1 & SW_Denorm_Op;
}
}
}
goto reg_mem_instr_done;
}
 
switch ( (FPU_modrm >> 3) & 7 )
{
case 0: /* fadd */
clear_C1();
reg_add(st0_ptr, &loaded_data, st0_ptr,
control_word);
break;
case 1: /* fmul */
clear_C1();
reg_mul(st0_ptr, &loaded_data, st0_ptr,
control_word);
break;
case 2: /* fcom */
compare_st_data(&loaded_data);
break;
case 3: /* fcomp */
if ( !compare_st_data(&loaded_data) && !unmasked )
pop();
break;
case 4: /* fsub */
clear_C1();
reg_sub(st0_ptr, &loaded_data, st0_ptr,
control_word);
break;
case 5: /* fsubr */
clear_C1();
reg_sub(&loaded_data, st0_ptr, st0_ptr,
control_word);
break;
case 6: /* fdiv */
clear_C1();
reg_div(st0_ptr, &loaded_data, st0_ptr,
control_word);
break;
case 7: /* fdivr */
clear_C1();
if ( st0_tag == TW_Zero )
partial_status = status1; /* Undo any denorm tag,
zero-divide has priority. */
reg_div(&loaded_data, st0_ptr, st0_ptr,
control_word);
break;
}
}
else
{
if ( (FPU_modrm & 0x30) == 0x10 )
{
/* The instruction is fcom or fcomp */
EXCEPTION(EX_StackUnder);
setcc(SW_C3 | SW_C2 | SW_C0);
if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) )
pop(); /* fcomp */
}
else
stack_underflow();
}
reg_mem_instr_done:
operand_address = data_sel_off;
}
else
{
if ( !(no_ip_update =
load_store_instr(((FPU_modrm & 0x38) | (byte1 & 6)) >> 1,
addr_modes, data_address)) )
{
operand_address = data_sel_off;
}
}
 
}
else
{
/* None of these instructions access user memory */
unsigned char instr_index = (FPU_modrm & 0x38) | (byte1 & 7);
 
#ifdef PECULIAR_486
/* This is supposed to be undefined, but a real 80486 seems
to do this: */
operand_address.offset = 0;
operand_address.selector = FPU_DS;
#endif PECULIAR_486
 
st0_ptr = &st(0);
st0_tag = st0_ptr->tag;
switch ( type_table[(int) instr_index] )
{
case _NONE_: /* also _REGIc: _REGIn */
break;
case _REG0_:
if ( !NOT_EMPTY_ST0 )
{
stack_underflow();
goto FPU_instruction_done;
}
break;
case _REGIi:
if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
{
stack_underflow_i(FPU_rm);
goto FPU_instruction_done;
}
break;
case _REGIp:
if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
{
stack_underflow_pop(FPU_rm);
goto FPU_instruction_done;
}
break;
case _REGI_:
if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
{
stack_underflow();
goto FPU_instruction_done;
}
break;
case _PUSH_: /* Only used by the fld st(i) instruction */
break;
case _null_:
FPU_illegal();
goto FPU_instruction_done;
default:
EXCEPTION(EX_INTERNAL|0x111);
goto FPU_instruction_done;
}
(*st_instr_table[(int) instr_index])();
 
FPU_instruction_done:
;
}
 
if ( ! no_ip_update )
instruction_address = entry_sel_off;
 
FPU_fwait_done:
 
#ifdef DEBUG
RE_ENTRANT_CHECK_OFF;
emu_printall();
RE_ENTRANT_CHECK_ON;
#endif DEBUG
 
if (FPU_lookahead && !need_resched)
{
FPU_ORIG_EIP = FPU_EIP - code_base;
if ( valid_prefix(&byte1, (unsigned char **)&FPU_EIP,
&addr_modes.override) )
goto do_another_FPU_instruction;
}
 
if ( addr_modes.default_mode )
FPU_EIP -= code_base;
 
RE_ENTRANT_CHECK_OFF;
}
 
 
/* Support for prefix bytes is not yet complete. To properly handle
all prefix bytes, further changes are needed in the emulator code
which accesses user address space. Access to separate segments is
important for msdos emulation. */
static int valid_prefix(unsigned char *Byte, unsigned char **fpu_eip,
overrides *override)
{
unsigned char byte;
unsigned char *ip = *fpu_eip;
 
*override = (overrides) { 0, 0, PREFIX_DEFAULT }; /* defaults */
 
RE_ENTRANT_CHECK_OFF;
FPU_code_verify_area(1);
byte = get_fs_byte(ip);
RE_ENTRANT_CHECK_ON;
 
while ( 1 )
{
switch ( byte )
{
case ADDR_SIZE_PREFIX:
override->address_size = ADDR_SIZE_PREFIX;
goto do_next_byte;
 
case OP_SIZE_PREFIX:
override->operand_size = OP_SIZE_PREFIX;
goto do_next_byte;
 
case PREFIX_CS:
override->segment = PREFIX_CS_;
goto do_next_byte;
case PREFIX_ES:
override->segment = PREFIX_ES_;
goto do_next_byte;
case PREFIX_SS:
override->segment = PREFIX_SS_;
goto do_next_byte;
case PREFIX_FS:
override->segment = PREFIX_FS_;
goto do_next_byte;
case PREFIX_GS:
override->segment = PREFIX_GS_;
goto do_next_byte;
case PREFIX_DS:
override->segment = PREFIX_DS_;
goto do_next_byte;
 
/* lock is not a valid prefix for FPU instructions,
let the cpu handle it to generate a SIGILL. */
/* case PREFIX_LOCK: */
 
/* rep.. prefixes have no meaning for FPU instructions */
case PREFIX_REPE:
case PREFIX_REPNE:
 
do_next_byte:
ip++;
RE_ENTRANT_CHECK_OFF;
FPU_code_verify_area(1);
byte = get_fs_byte(ip);
RE_ENTRANT_CHECK_ON;
break;
case FWAIT_OPCODE:
*Byte = byte;
return 1;
default:
if ( (byte & 0xf8) == 0xd8 )
{
*Byte = byte;
*fpu_eip = ip;
return 1;
}
else
{
/* Not a valid sequence of prefix bytes followed by
an FPU instruction. */
*Byte = byte; /* Needed for error message. */
return 0;
}
}
}
}
 
 
void math_abort(struct info * info, unsigned int signal)
{
FPU_EIP = FPU_ORIG_EIP;
current->tss.trap_no = 16;
current->tss.error_code = 0;
send_sig(signal,current,1);
RE_ENTRANT_CHECK_OFF;
__asm__("movl %0,%%esp ; ret": :"g" (((long) info)-4));
#ifdef PARANOID
printk("ERROR: wm-FPU-emu math_abort failed!\n");
#endif PARANOID
}
 
 
 
void restore_i387_soft(struct _fpstate *buf)
{
fpu_addr_modes addr_modes = {{ 0, 0, PREFIX_DEFAULT }, 0};
 
frstor(addr_modes, (char *)buf);
}
 
 
struct _fpstate * save_i387_soft(struct _fpstate * buf)
{
fpu_addr_modes addr_modes = {{ 0, 0, PREFIX_DEFAULT }, 0};
 
fsave(addr_modes, (char *)buf);
 
return buf;
}
/errors.c
0,0 → 1,659
/*---------------------------------------------------------------------------+
| errors.c |
| |
| The error handling functions for wm-FPU-emu |
| |
| Copyright (C) 1992,1993,1994,1996 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
| E-mail billm@jacobi.maths.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
/*---------------------------------------------------------------------------+
| Note: |
| The file contains code which accesses user memory. |
| Emulator static data may change when user memory is accessed, due to |
| other processes using the emulator while swapping is in progress. |
+---------------------------------------------------------------------------*/
 
#include <linux/signal.h>
 
#include <asm/segment.h>
 
#include "fpu_system.h"
#include "exception.h"
#include "fpu_emu.h"
#include "status_w.h"
#include "control_w.h"
#include "reg_constant.h"
#include "version.h"
 
/* */
#undef PRINT_MESSAGES
/* */
 
 
void Un_impl(void)
{
unsigned char byte1, FPU_modrm;
unsigned long address = FPU_ORIG_EIP;
 
RE_ENTRANT_CHECK_OFF;
/* No need to verify_area(), we have previously fetched these bytes. */
printk("Unimplemented FPU Opcode at eip=%p : ", (void *) address);
if ( FPU_CS == USER_CS )
{
while ( 1 )
{
byte1 = get_fs_byte((unsigned char *) address);
if ( (byte1 & 0xf8) == 0xd8 ) break;
printk("[%02x]", byte1);
address++;
}
printk("%02x ", byte1);
FPU_modrm = get_fs_byte(1 + (unsigned char *) address);
if (FPU_modrm >= 0300)
printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7);
else
printk("/%d\n", (FPU_modrm >> 3) & 7);
}
else
{
printk("cs selector = %04x\n", FPU_CS);
}
 
RE_ENTRANT_CHECK_ON;
 
EXCEPTION(EX_Invalid);
 
}
 
 
/*
Called for opcodes which are illegal and which are known to result in a
SIGILL with a real 80486.
*/
void FPU_illegal(void)
{
math_abort(FPU_info,SIGILL);
}
 
 
 
void emu_printall(void)
{
int i;
static const char *tag_desc[] = { "Valid", "Zero", "ERROR", "ERROR",
"DeNorm", "Inf", "NaN", "Empty" };
unsigned char byte1, FPU_modrm;
unsigned long address = FPU_ORIG_EIP;
 
RE_ENTRANT_CHECK_OFF;
/* No need to verify_area(), we have previously fetched these bytes. */
printk("At %p:", (void *) address);
if ( FPU_CS == USER_CS )
{
#define MAX_PRINTED_BYTES 20
for ( i = 0; i < MAX_PRINTED_BYTES; i++ )
{
byte1 = get_fs_byte((unsigned char *) address);
if ( (byte1 & 0xf8) == 0xd8 )
{
printk(" %02x", byte1);
break;
}
printk(" [%02x]", byte1);
address++;
}
if ( i == MAX_PRINTED_BYTES )
printk(" [more..]\n");
else
{
FPU_modrm = get_fs_byte(1 + (unsigned char *) address);
if (FPU_modrm >= 0300)
printk(" %02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7);
else
printk(" /%d, mod=%d rm=%d\n",
(FPU_modrm >> 3) & 7, (FPU_modrm >> 6) & 3, FPU_modrm & 7);
}
}
else
{
printk("%04x\n", FPU_CS);
}
 
partial_status = status_word();
 
#ifdef DEBUGGING
if ( partial_status & SW_Backward ) printk("SW: backward compatibility\n");
if ( partial_status & SW_C3 ) printk("SW: condition bit 3\n");
if ( partial_status & SW_C2 ) printk("SW: condition bit 2\n");
if ( partial_status & SW_C1 ) printk("SW: condition bit 1\n");
if ( partial_status & SW_C0 ) printk("SW: condition bit 0\n");
if ( partial_status & SW_Summary ) printk("SW: exception summary\n");
if ( partial_status & SW_Stack_Fault ) printk("SW: stack fault\n");
if ( partial_status & SW_Precision ) printk("SW: loss of precision\n");
if ( partial_status & SW_Underflow ) printk("SW: underflow\n");
if ( partial_status & SW_Overflow ) printk("SW: overflow\n");
if ( partial_status & SW_Zero_Div ) printk("SW: divide by zero\n");
if ( partial_status & SW_Denorm_Op ) printk("SW: denormalized operand\n");
if ( partial_status & SW_Invalid ) printk("SW: invalid operation\n");
#endif DEBUGGING
 
printk(" SW: b=%d st=%ld es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n",
partial_status & 0x8000 ? 1 : 0, /* busy */
(partial_status & 0x3800) >> 11, /* stack top pointer */
partial_status & 0x80 ? 1 : 0, /* Error summary status */
partial_status & 0x40 ? 1 : 0, /* Stack flag */
partial_status & SW_C3?1:0, partial_status & SW_C2?1:0, /* cc */
partial_status & SW_C1?1:0, partial_status & SW_C0?1:0, /* cc */
partial_status & SW_Precision?1:0, partial_status & SW_Underflow?1:0,
partial_status & SW_Overflow?1:0, partial_status & SW_Zero_Div?1:0,
partial_status & SW_Denorm_Op?1:0, partial_status & SW_Invalid?1:0);
printk(" CW: ic=%d rc=%ld%ld pc=%ld%ld iem=%d ef=%d%d%d%d%d%d\n",
control_word & 0x1000 ? 1 : 0,
(control_word & 0x800) >> 11, (control_word & 0x400) >> 10,
(control_word & 0x200) >> 9, (control_word & 0x100) >> 8,
control_word & 0x80 ? 1 : 0,
control_word & SW_Precision?1:0, control_word & SW_Underflow?1:0,
control_word & SW_Overflow?1:0, control_word & SW_Zero_Div?1:0,
control_word & SW_Denorm_Op?1:0, control_word & SW_Invalid?1:0);
 
for ( i = 0; i < 8; i++ )
{
FPU_REG *r = &st(i);
char tagi = r->tag;
switch (tagi)
{
case TW_Empty:
continue;
break;
case TW_Zero:
#if 0
printk("st(%d) %c .0000 0000 0000 0000 ",
i, r->sign ? '-' : '+');
break;
#endif
case TW_Valid:
case TW_NaN:
/* case TW_Denormal: */
case TW_Infinity:
printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6ld ", i,
r->sign ? '-' : '+',
(long)(r->sigh >> 16),
(long)(r->sigh & 0xFFFF),
(long)(r->sigl >> 16),
(long)(r->sigl & 0xFFFF),
r->exp - EXP_BIAS + 1);
break;
default:
printk("Whoops! Error in errors.c: tag%d is %d ", i, tagi);
continue;
break;
}
printk("%s\n", tag_desc[(int) (unsigned) tagi]);
}
 
RE_ENTRANT_CHECK_ON;
 
}
 
static struct {
int type;
const char *name;
} exception_names[] = {
{ EX_StackOver, "stack overflow" },
{ EX_StackUnder, "stack underflow" },
{ EX_Precision, "loss of precision" },
{ EX_Underflow, "underflow" },
{ EX_Overflow, "overflow" },
{ EX_ZeroDiv, "divide by zero" },
{ EX_Denormal, "denormalized operand" },
{ EX_Invalid, "invalid operation" },
{ EX_INTERNAL, "INTERNAL BUG in "FPU_VERSION },
{ 0, NULL }
};
 
/*
EX_INTERNAL is always given with a code which indicates where the
error was detected.
 
Internal error types:
0x14 in fpu_etc.c
0x1nn in a *.c file:
0x101 in reg_add_sub.c
0x102 in reg_mul.c
0x104 in poly_atan.c
0x105 in reg_mul.c
0x107 in fpu_trig.c
0x108 in reg_compare.c
0x109 in reg_compare.c
0x110 in reg_add_sub.c
0x111 in fpe_entry.c
0x112 in fpu_trig.c
0x113 in errors.c
0x115 in fpu_trig.c
0x116 in fpu_trig.c
0x117 in fpu_trig.c
0x118 in fpu_trig.c
0x119 in fpu_trig.c
0x120 in poly_atan.c
0x121 in reg_compare.c
0x122 in reg_compare.c
0x123 in reg_compare.c
0x125 in fpu_trig.c
0x126 in fpu_entry.c
0x127 in poly_2xm1.c
0x128 in fpu_entry.c
0x129 in fpu_entry.c
0x130 in get_address.c
0x131 in get_address.c
0x132 in get_address.c
0x133 in get_address.c
0x140 in load_store.c
0x141 in load_store.c
0x150 in poly_sin.c
0x151 in poly_sin.c
0x160 in reg_ld_str.c
0x161 in reg_ld_str.c
0x162 in reg_ld_str.c
0x163 in reg_ld_str.c
0x2nn in an *.S file:
0x201 in reg_u_add.S
0x202 in reg_u_div.S
0x203 in reg_u_div.S
0x204 in reg_u_div.S
0x205 in reg_u_mul.S
0x206 in reg_u_sub.S
0x207 in wm_sqrt.S
0x208 in reg_div.S
0x209 in reg_u_sub.S
0x210 in reg_u_sub.S
0x211 in reg_u_sub.S
0x212 in reg_u_sub.S
0x213 in wm_sqrt.S
0x214 in wm_sqrt.S
0x215 in wm_sqrt.S
0x220 in reg_norm.S
0x221 in reg_norm.S
0x230 in reg_round.S
0x231 in reg_round.S
0x232 in reg_round.S
0x233 in reg_round.S
0x234 in reg_round.S
0x235 in reg_round.S
0x236 in reg_round.S
0x240 in div_Xsig.S
0x241 in div_Xsig.S
0x242 in div_Xsig.S
*/
 
void exception(int n)
{
int i, int_type;
 
int_type = 0; /* Needed only to stop compiler warnings */
if ( n & EX_INTERNAL )
{
int_type = n - EX_INTERNAL;
n = EX_INTERNAL;
/* Set lots of exception bits! */
partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward);
}
else
{
/* Extract only the bits which we use to set the status word */
n &= (SW_Exc_Mask);
/* Set the corresponding exception bit */
partial_status |= n;
/* Set summary bits iff exception isn't masked */
if ( partial_status & ~control_word & CW_Exceptions )
partial_status |= (SW_Summary | SW_Backward);
if ( n & (SW_Stack_Fault | EX_Precision) )
{
if ( !(n & SW_C1) )
/* This bit distinguishes over- from underflow for a stack fault,
and roundup from round-down for precision loss. */
partial_status &= ~SW_C1;
}
}
 
RE_ENTRANT_CHECK_OFF;
if ( (~control_word & n & CW_Exceptions) || (n == EX_INTERNAL) )
{
#ifdef PRINT_MESSAGES
/* My message from the sponsor */
printk(FPU_VERSION" "__DATE__" (C) W. Metzenthen.\n");
#endif PRINT_MESSAGES
/* Get a name string for error reporting */
for (i=0; exception_names[i].type; i++)
if ( (exception_names[i].type & n) == exception_names[i].type )
break;
if (exception_names[i].type)
{
#ifdef PRINT_MESSAGES
printk("FP Exception: %s!\n", exception_names[i].name);
#endif PRINT_MESSAGES
}
else
printk("FPU emulator: Unknown Exception: 0x%04x!\n", n);
if ( n == EX_INTERNAL )
{
printk("FPU emulator: Internal error type 0x%04x\n", int_type);
emu_printall();
}
#ifdef PRINT_MESSAGES
else
emu_printall();
#endif PRINT_MESSAGES
 
/*
* The 80486 generates an interrupt on the next non-control FPU
* instruction. So we need some means of flagging it.
* We use the ES (Error Summary) bit for this.
*/
}
RE_ENTRANT_CHECK_ON;
 
#ifdef __DEBUG__
math_abort(FPU_info,SIGFPE);
#endif __DEBUG__
 
}
 
 
/* Real operation attempted on two operands, one a NaN. */
/* Returns nz if the exception is unmasked */
asmlinkage int real_2op_NaN(FPU_REG const *a, FPU_REG const *b, FPU_REG *dest)
{
FPU_REG const *x;
int signalling;
 
/* The default result for the case of two "equal" NaNs (signs may
differ) is chosen to reproduce 80486 behaviour */
x = a;
if (a->tag == TW_NaN)
{
if (b->tag == TW_NaN)
{
signalling = !(a->sigh & b->sigh & 0x40000000);
/* find the "larger" */
if ( significand(a) < significand(b) )
x = b;
}
else
{
/* return the quiet version of the NaN in a */
signalling = !(a->sigh & 0x40000000);
}
}
else
#ifdef PARANOID
if (b->tag == TW_NaN)
#endif PARANOID
{
signalling = !(b->sigh & 0x40000000);
x = b;
}
#ifdef PARANOID
else
{
signalling = 0;
EXCEPTION(EX_INTERNAL|0x113);
x = &CONST_QNaN;
}
#endif PARANOID
 
if ( !signalling )
{
if ( !(x->sigh & 0x80000000) ) /* pseudo-NaN ? */
x = &CONST_QNaN;
reg_move(x, dest);
return 0;
}
 
if ( control_word & CW_Invalid )
{
/* The masked response */
if ( !(x->sigh & 0x80000000) ) /* pseudo-NaN ? */
x = &CONST_QNaN;
reg_move(x, dest);
/* ensure a Quiet NaN */
dest->sigh |= 0x40000000;
}
 
EXCEPTION(EX_Invalid);
return !(control_word & CW_Invalid);
}
 
 
/* Invalid arith operation on Valid registers */
/* Returns nz if the exception is unmasked */
asmlinkage int arith_invalid(FPU_REG *dest)
{
 
EXCEPTION(EX_Invalid);
if ( control_word & CW_Invalid )
{
/* The masked response */
reg_move(&CONST_QNaN, dest);
}
return !(control_word & CW_Invalid);
 
}
 
 
/* Divide a finite number by zero */
asmlinkage int divide_by_zero(int sign, FPU_REG *dest)
{
 
if ( control_word & CW_ZeroDiv )
{
/* The masked response */
reg_move(&CONST_INF, dest);
dest->sign = (unsigned char)sign;
}
EXCEPTION(EX_ZeroDiv);
 
return !(control_word & CW_ZeroDiv);
 
}
 
 
/* This may be called often, so keep it lean */
int set_precision_flag(int flags)
{
if ( control_word & CW_Precision )
{
partial_status &= ~(SW_C1 & flags);
partial_status |= flags; /* The masked response */
return 0;
}
else
{
exception(flags);
return 1;
}
}
 
 
/* This may be called often, so keep it lean */
asmlinkage void set_precision_flag_up(void)
{
if ( control_word & CW_Precision )
partial_status |= (SW_Precision | SW_C1); /* The masked response */
else
exception(EX_Precision | SW_C1);
 
}
 
 
/* This may be called often, so keep it lean */
asmlinkage void set_precision_flag_down(void)
{
if ( control_word & CW_Precision )
{ /* The masked response */
partial_status &= ~SW_C1;
partial_status |= SW_Precision;
}
else
exception(EX_Precision);
}
 
 
asmlinkage int denormal_operand(void)
{
if ( control_word & CW_Denormal )
{ /* The masked response */
partial_status |= SW_Denorm_Op;
return 0;
}
else
{
exception(EX_Denormal);
return 1;
}
}
 
 
asmlinkage int arith_overflow(FPU_REG *dest)
{
 
if ( control_word & CW_Overflow )
{
char sign;
/* The masked response */
/* ###### The response here depends upon the rounding mode */
sign = dest->sign;
reg_move(&CONST_INF, dest);
dest->sign = sign;
}
else
{
/* Subtract the magic number from the exponent */
dest->exp -= (3 * (1 << 13));
}
 
EXCEPTION(EX_Overflow);
if ( control_word & CW_Overflow )
{
/* The overflow exception is masked. */
/* By definition, precision is lost.
The roundup bit (C1) is also set because we have
"rounded" upwards to Infinity. */
EXCEPTION(EX_Precision | SW_C1);
return !(control_word & CW_Precision);
}
 
return 0;
 
}
 
 
asmlinkage int arith_underflow(FPU_REG *dest)
{
 
if ( control_word & CW_Underflow )
{
/* The masked response */
if ( dest->exp <= EXP_UNDER - 63 )
{
reg_move(&CONST_Z, dest);
partial_status &= ~SW_C1; /* Round down. */
}
}
else
{
/* Add the magic number to the exponent. */
dest->exp += (3 * (1 << 13));
}
 
EXCEPTION(EX_Underflow);
if ( control_word & CW_Underflow )
{
/* The underflow exception is masked. */
EXCEPTION(EX_Precision);
return !(control_word & CW_Precision);
}
 
return 0;
 
}
 
 
void stack_overflow(void)
{
 
if ( control_word & CW_Invalid )
{
/* The masked response */
top--;
reg_move(&CONST_QNaN, &st(0));
}
 
EXCEPTION(EX_StackOver);
 
return;
 
}
 
 
void stack_underflow(void)
{
 
if ( control_word & CW_Invalid )
{
/* The masked response */
reg_move(&CONST_QNaN, &st(0));
}
 
EXCEPTION(EX_StackUnder);
 
return;
 
}
 
 
void stack_underflow_i(int i)
{
 
if ( control_word & CW_Invalid )
{
/* The masked response */
reg_move(&CONST_QNaN, &(st(i)));
}
 
EXCEPTION(EX_StackUnder);
 
return;
 
}
 
 
void stack_underflow_pop(int i)
{
 
if ( control_word & CW_Invalid )
{
/* The masked response */
reg_move(&CONST_QNaN, &(st(i)));
pop();
}
 
EXCEPTION(EX_StackUnder);
 
return;
 
}
 
/wm_sqrt.S
0,0 → 1,471
.file "wm_sqrt.S"
/*---------------------------------------------------------------------------+
| wm_sqrt.S |
| |
| Fixed point arithmetic square root evaluation. |
| |
| Copyright (C) 1992,1993,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| Call from C as: |
| void wm_sqrt(FPU_REG *n, unsigned int control_word) |
| |
+---------------------------------------------------------------------------*/
 
/*---------------------------------------------------------------------------+
| wm_sqrt(FPU_REG *n, unsigned int control_word) |
| returns the square root of n in n. |
| |
| Use Newton's method to compute the square root of a number, which must |
| be in the range [1.0 .. 4.0), to 64 bits accuracy. |
| Does not check the sign or tag of the argument. |
| Sets the exponent, but not the sign or tag of the result. |
| |
| The guess is kept in %esi:%edi |
+---------------------------------------------------------------------------*/
 
#include "exception.h"
#include "fpu_emu.h"
 
 
#ifndef NON_REENTRANT_FPU
/* Local storage on the stack: */
#define FPU_accum_3 -4(%ebp) /* ms word */
#define FPU_accum_2 -8(%ebp)
#define FPU_accum_1 -12(%ebp)
#define FPU_accum_0 -16(%ebp)
 
/*
* The de-normalised argument:
* sq_2 sq_1 sq_0
* b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
* ^ binary point here
*/
#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
#define FPU_fsqrt_arg_1 -24(%ebp)
#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */
 
#else
/* Local storage in a static area: */
.data
.align 4,0
FPU_accum_3:
.long 0 /* ms word */
FPU_accum_2:
.long 0
FPU_accum_1:
.long 0
FPU_accum_0:
.long 0
 
/* The de-normalised argument:
sq_2 sq_1 sq_0
b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
^ binary point here
*/
FPU_fsqrt_arg_2:
.long 0 /* ms word */
FPU_fsqrt_arg_1:
.long 0
FPU_fsqrt_arg_0:
.long 0 /* ls word, at most the ms bit is set */
#endif NON_REENTRANT_FPU
 
 
.text
ENTRY(wm_sqrt)
pushl %ebp
movl %esp,%ebp
#ifndef NON_REENTRANT_FPU
subl $28,%esp
#endif NON_REENTRANT_FPU
pushl %esi
pushl %edi
pushl %ebx
 
movl PARAM1,%esi
 
movl SIGH(%esi),%eax
movl SIGL(%esi),%ecx
xorl %edx,%edx
 
/* We use a rough linear estimate for the first guess.. */
 
cmpl EXP_BIAS,EXP(%esi)
jnz sqrt_arg_ge_2
 
shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
rcrl $1,%ecx
rcrl $1,%edx
 
sqrt_arg_ge_2:
/* From here on, n is never accessed directly again until it is
replaced by the answer. */
 
movl %eax,FPU_fsqrt_arg_2 /* ms word of n */
movl %ecx,FPU_fsqrt_arg_1
movl %edx,FPU_fsqrt_arg_0
 
/* Make a linear first estimate */
shrl $1,%eax
addl $0x40000000,%eax
movl $0xaaaaaaaa,%ecx
mull %ecx
shll %edx /* max result was 7fff... */
testl $0x80000000,%edx /* but min was 3fff... */
jnz sqrt_prelim_no_adjust
 
movl $0x80000000,%edx /* round up */
 
sqrt_prelim_no_adjust:
movl %edx,%esi /* Our first guess */
 
/* We have now computed (approx) (2 + x) / 3, which forms the basis
for a few iterations of Newton's method */
 
movl FPU_fsqrt_arg_2,%ecx /* ms word */
 
/*
* From our initial estimate, three iterations are enough to get us
* to 30 bits or so. This will then allow two iterations at better
* precision to complete the process.
*/
 
/* Compute (g + n/g)/2 at each iteration (g is the guess). */
shrl %ecx /* Doing this first will prevent a divide */
/* overflow later. */
 
movl %ecx,%edx /* msw of the arg / 2 */
divl %esi /* current estimate */
shrl %esi /* divide by 2 */
addl %eax,%esi /* the new estimate */
 
movl %ecx,%edx
divl %esi
shrl %esi
addl %eax,%esi
 
movl %ecx,%edx
divl %esi
shrl %esi
addl %eax,%esi
 
/*
* Now that an estimate accurate to about 30 bits has been obtained (in %esi),
* we improve it to 60 bits or so.
*
* The strategy from now on is to compute new estimates from
* guess := guess + (n - guess^2) / (2 * guess)
*/
 
/* First, find the square of the guess */
movl %esi,%eax
mull %esi
/* guess^2 now in %edx:%eax */
 
movl FPU_fsqrt_arg_1,%ecx
subl %ecx,%eax
movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */
sbbl %ecx,%edx
jnc sqrt_stage_2_positive
 
/* Subtraction gives a negative result,
negate the result before division. */
notl %edx
notl %eax
addl $1,%eax
adcl $0,%edx
 
divl %esi
movl %eax,%ecx
 
movl %edx,%eax
divl %esi
jmp sqrt_stage_2_finish
 
sqrt_stage_2_positive:
divl %esi
movl %eax,%ecx
 
movl %edx,%eax
divl %esi
 
notl %ecx
notl %eax
addl $1,%eax
adcl $0,%ecx
 
sqrt_stage_2_finish:
sarl $1,%ecx /* divide by 2 */
rcrl $1,%eax
 
/* Form the new estimate in %esi:%edi */
movl %eax,%edi
addl %ecx,%esi
 
jnz sqrt_stage_2_done /* result should be [1..2) */
 
#ifdef PARANOID
/* It should be possible to get here only if the arg is ffff....ffff */
cmp $0xffffffff,FPU_fsqrt_arg_1
jnz sqrt_stage_2_error
#endif PARANOID
 
/* The best rounded result. */
xorl %eax,%eax
decl %eax
movl %eax,%edi
movl %eax,%esi
movl $0x7fffffff,%eax
jmp sqrt_round_result
 
#ifdef PARANOID
sqrt_stage_2_error:
pushl EX_INTERNAL|0x213
call EXCEPTION
#endif PARANOID
 
sqrt_stage_2_done:
 
/* Now the square root has been computed to better than 60 bits. */
 
/* Find the square of the guess. */
movl %edi,%eax /* ls word of guess */
mull %edi
movl %edx,FPU_accum_1
 
movl %esi,%eax
mull %esi
movl %edx,FPU_accum_3
movl %eax,FPU_accum_2
 
movl %edi,%eax
mull %esi
addl %eax,FPU_accum_1
adcl %edx,FPU_accum_2
adcl $0,FPU_accum_3
 
/* movl %esi,%eax */
/* mull %edi */
addl %eax,FPU_accum_1
adcl %edx,FPU_accum_2
adcl $0,FPU_accum_3
 
/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
 
movl FPU_fsqrt_arg_0,%eax /* get normalized n */
subl %eax,FPU_accum_1
movl FPU_fsqrt_arg_1,%eax
sbbl %eax,FPU_accum_2
movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */
sbbl %eax,FPU_accum_3
jnc sqrt_stage_3_positive
 
/* Subtraction gives a negative result,
negate the result before division */
notl FPU_accum_1
notl FPU_accum_2
notl FPU_accum_3
addl $1,FPU_accum_1
adcl $0,FPU_accum_2
 
#ifdef PARANOID
adcl $0,FPU_accum_3 /* This must be zero */
jz sqrt_stage_3_no_error
 
sqrt_stage_3_error:
pushl EX_INTERNAL|0x207
call EXCEPTION
 
sqrt_stage_3_no_error:
#endif PARANOID
 
movl FPU_accum_2,%edx
movl FPU_accum_1,%eax
divl %esi
movl %eax,%ecx
 
movl %edx,%eax
divl %esi
 
sarl $1,%ecx /* divide by 2 */
rcrl $1,%eax
 
/* prepare to round the result */
 
addl %ecx,%edi
adcl $0,%esi
 
jmp sqrt_stage_3_finished
 
sqrt_stage_3_positive:
movl FPU_accum_2,%edx
movl FPU_accum_1,%eax
divl %esi
movl %eax,%ecx
 
movl %edx,%eax
divl %esi
 
sarl $1,%ecx /* divide by 2 */
rcrl $1,%eax
 
/* prepare to round the result */
 
notl %eax /* Negate the correction term */
notl %ecx
addl $1,%eax
adcl $0,%ecx /* carry here ==> correction == 0 */
adcl $0xffffffff,%esi
 
addl %ecx,%edi
adcl $0,%esi
 
sqrt_stage_3_finished:
 
/*
* The result in %esi:%edi:%esi should be good to about 90 bits here,
* and the rounding information here does not have sufficient accuracy
* in a few rare cases.
*/
cmpl $0xffffffe0,%eax
ja sqrt_near_exact_x
 
cmpl $0x00000020,%eax
jb sqrt_near_exact
 
cmpl $0x7fffffe0,%eax
jb sqrt_round_result
 
cmpl $0x80000020,%eax
jb sqrt_get_more_precision
 
sqrt_round_result:
/* Set up for rounding operations */
movl %eax,%edx
movl %esi,%eax
movl %edi,%ebx
movl PARAM1,%edi
movl EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */
movl PARAM2,%ecx
jmp fpu_reg_round_sqrt
 
 
sqrt_near_exact_x:
/* First, the estimate must be rounded up. */
addl $1,%edi
adcl $0,%esi
 
sqrt_near_exact:
/*
* This is an easy case because x^1/2 is monotonic.
* We need just find the square of our estimate, compare it
* with the argument, and deduce whether our estimate is
* above, below, or exact. We use the fact that the estimate
* is known to be accurate to about 90 bits.
*/
movl %edi,%eax /* ls word of guess */
mull %edi
movl %edx,%ebx /* 2nd ls word of square */
movl %eax,%ecx /* ls word of square */
 
movl %edi,%eax
mull %esi
addl %eax,%ebx
addl %eax,%ebx
 
#ifdef PARANOID
cmp $0xffffffb0,%ebx
jb sqrt_near_exact_ok
 
cmp $0x00000050,%ebx
ja sqrt_near_exact_ok
 
pushl EX_INTERNAL|0x214
call EXCEPTION
 
sqrt_near_exact_ok:
#endif PARANOID
 
or %ebx,%ebx
js sqrt_near_exact_small
 
jnz sqrt_near_exact_large
 
or %ebx,%edx
jnz sqrt_near_exact_large
 
/* Our estimate is exactly the right answer */
xorl %eax,%eax
jmp sqrt_round_result
 
sqrt_near_exact_small:
/* Our estimate is too small */
movl $0x000000ff,%eax
jmp sqrt_round_result
sqrt_near_exact_large:
/* Our estimate is too large, we need to decrement it */
subl $1,%edi
sbbl $0,%esi
movl $0xffffff00,%eax
jmp sqrt_round_result
 
 
sqrt_get_more_precision:
/* This case is almost the same as the above, except we start
with an extra bit of precision in the estimate. */
stc /* The extra bit. */
rcll $1,%edi /* Shift the estimate left one bit */
rcll $1,%esi
 
movl %edi,%eax /* ls word of guess */
mull %edi
movl %edx,%ebx /* 2nd ls word of square */
movl %eax,%ecx /* ls word of square */
 
movl %edi,%eax
mull %esi
addl %eax,%ebx
addl %eax,%ebx
 
/* Put our estimate back to its original value */
stc /* The ms bit. */
rcrl $1,%esi /* Shift the estimate left one bit */
rcrl $1,%edi
 
#ifdef PARANOID
cmp $0xffffff60,%ebx
jb sqrt_more_prec_ok
 
cmp $0x000000a0,%ebx
ja sqrt_more_prec_ok
 
pushl EX_INTERNAL|0x215
call EXCEPTION
 
sqrt_more_prec_ok:
#endif PARANOID
 
or %ebx,%ebx
js sqrt_more_prec_small
 
jnz sqrt_more_prec_large
 
or %ebx,%ecx
jnz sqrt_more_prec_large
 
/* Our estimate is exactly the right answer */
movl $0x80000000,%eax
jmp sqrt_round_result
 
sqrt_more_prec_small:
/* Our estimate is too small */
movl $0x800000ff,%eax
jmp sqrt_round_result
sqrt_more_prec_large:
/* Our estimate is too large */
movl $0x7fffff00,%eax
jmp sqrt_round_result
/status_w.h
0,0 → 1,65
/*---------------------------------------------------------------------------+
| status_w.h |
| |
| Copyright (C) 1992,1993 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
+---------------------------------------------------------------------------*/
 
#ifndef _STATUS_H_
#define _STATUS_H_
 
#include "fpu_emu.h" /* for definition of PECULIAR_486 */
 
#ifdef __ASSEMBLY__
#define Const__(x) $##x
#else
#define Const__(x) x
#endif
 
#define SW_Backward Const__(0x8000) /* backward compatibility */
#define SW_C3 Const__(0x4000) /* condition bit 3 */
#define SW_Top Const__(0x3800) /* top of stack */
#define SW_Top_Shift Const__(11) /* shift for top of stack bits */
#define SW_C2 Const__(0x0400) /* condition bit 2 */
#define SW_C1 Const__(0x0200) /* condition bit 1 */
#define SW_C0 Const__(0x0100) /* condition bit 0 */
#define SW_Summary Const__(0x0080) /* exception summary */
#define SW_Stack_Fault Const__(0x0040) /* stack fault */
#define SW_Precision Const__(0x0020) /* loss of precision */
#define SW_Underflow Const__(0x0010) /* underflow */
#define SW_Overflow Const__(0x0008) /* overflow */
#define SW_Zero_Div Const__(0x0004) /* divide by zero */
#define SW_Denorm_Op Const__(0x0002) /* denormalized operand */
#define SW_Invalid Const__(0x0001) /* invalid operation */
 
#define SW_Exc_Mask Const__(0x27f) /* Status word exception bit mask */
 
#ifndef __ASSEMBLY__
 
#define COMP_A_gt_B 1
#define COMP_A_eq_B 2
#define COMP_A_lt_B 3
#define COMP_No_Comp 4
#define COMP_Denormal 0x20
#define COMP_NaN 0x40
#define COMP_SNaN 0x80
 
#define status_word() \
((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top))
#define setcc(cc) ({ \
partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3); \
partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3); })
 
#ifdef PECULIAR_486
/* Default, this conveys no information, but an 80486 does it. */
/* Clear the SW_C1 bit, "other bits undefined". */
# define clear_C1() { partial_status &= ~SW_C1; }
# else
# define clear_C1()
#endif PECULIAR_486
 
#endif __ASSEMBLY__
 
#endif _STATUS_H_
/poly_l2.c
0,0 → 1,255
/*---------------------------------------------------------------------------+
| poly_l2.c |
| |
| Compute the base 2 log of a FPU_REG, using a polynomial approximation. |
| |
| Copyright (C) 1992,1993,1994 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
 
#include "exception.h"
#include "reg_constant.h"
#include "fpu_emu.h"
#include "control_w.h"
#include "poly.h"
 
 
 
static void log2_kernel(FPU_REG const *arg,
Xsig *accum_result, long int *expon);
 
 
/*--- poly_l2() -------------------------------------------------------------+
| Base 2 logarithm by a polynomial approximation. |
+---------------------------------------------------------------------------*/
void poly_l2(FPU_REG const *arg, FPU_REG const *y, FPU_REG *result)
{
long int exponent, expon, expon_expon;
Xsig accumulator, expon_accum, yaccum;
char sign;
FPU_REG x;
 
 
exponent = arg->exp - EXP_BIAS;
 
/* From arg, make a number > sqrt(2)/2 and < sqrt(2) */
if ( arg->sigh > (unsigned)0xb504f334 )
{
/* Treat as sqrt(2)/2 < arg < 1 */
significand(&x) = - significand(arg);
x.sign = SIGN_NEG;
x.tag = TW_Valid;
x.exp = EXP_BIAS-1;
exponent++;
normalize(&x);
}
else
{
/* Treat as 1 <= arg < sqrt(2) */
x.sigh = arg->sigh - 0x80000000;
x.sigl = arg->sigl;
x.sign = SIGN_POS;
x.tag = TW_Valid;
x.exp = EXP_BIAS;
normalize(&x);
}
 
if ( x.tag == TW_Zero )
{
expon = 0;
accumulator.msw = accumulator.midw = accumulator.lsw = 0;
}
else
{
log2_kernel(&x, &accumulator, &expon);
}
 
sign = exponent < 0;
if ( sign ) exponent = -exponent;
expon_accum.msw = exponent; expon_accum.midw = expon_accum.lsw = 0;
if ( exponent )
{
expon_expon = 31 + norm_Xsig(&expon_accum);
shr_Xsig(&accumulator, expon_expon - expon);
 
if ( sign ^ (x.sign == SIGN_NEG) )
negate_Xsig(&accumulator);
add_Xsig_Xsig(&accumulator, &expon_accum);
}
else
{
expon_expon = expon;
sign = x.sign;
}
 
yaccum.lsw = 0; XSIG_LL(yaccum) = significand(y);
mul_Xsig_Xsig(&accumulator, &yaccum);
 
expon_expon += round_Xsig(&accumulator);
 
if ( accumulator.msw == 0 )
{
reg_move(&CONST_Z, y);
}
else
{
result->exp = expon_expon + y->exp + 1;
significand(result) = XSIG_LL(accumulator);
result->tag = TW_Valid; /* set the tags to Valid */
result->sign = sign ^ y->sign;
}
 
return;
}
 
 
/*--- poly_l2p1() -----------------------------------------------------------+
| Base 2 logarithm by a polynomial approximation. |
| log2(x+1) |
+---------------------------------------------------------------------------*/
int poly_l2p1(FPU_REG const *arg, FPU_REG const *y, FPU_REG *result)
{
char sign;
long int exponent;
Xsig accumulator, yaccum;
 
 
sign = arg->sign;
 
if ( arg->exp < EXP_BIAS )
{
log2_kernel(arg, &accumulator, &exponent);
 
yaccum.lsw = 0;
XSIG_LL(yaccum) = significand(y);
mul_Xsig_Xsig(&accumulator, &yaccum);
 
exponent += round_Xsig(&accumulator);
 
result->exp = exponent + y->exp + 1;
significand(result) = XSIG_LL(accumulator);
result->tag = TW_Valid; /* set the tags to Valid */
result->sign = sign ^ y->sign;
 
return 0;
}
else
{
/* The magnitude of arg is far too large. */
reg_move(y, result);
if ( sign != SIGN_POS )
{
/* Trying to get the log of a negative number. */
return 1;
}
else
{
return 0;
}
}
 
}
 
 
 
 
#undef HIPOWER
#define HIPOWER 10
static const unsigned long long logterms[HIPOWER] =
{
0x2a8eca5705fc2ef0LL,
0xf6384ee1d01febceLL,
0x093bb62877cdf642LL,
0x006985d8a9ec439bLL,
0x0005212c4f55a9c8LL,
0x00004326a16927f0LL,
0x0000038d1d80a0e7LL,
0x0000003141cc80c6LL,
0x00000002b1668c9fLL,
0x000000002c7a46aaLL
};
 
static const unsigned long leadterm = 0xb8000000;
 
 
/*--- log2_kernel() ---------------------------------------------------------+
| Base 2 logarithm by a polynomial approximation. |
| log2(x+1) |
+---------------------------------------------------------------------------*/
static void log2_kernel(FPU_REG const *arg, Xsig *accum_result,
long int *expon)
{
char sign;
long int exponent, adj;
unsigned long long Xsq;
Xsig accumulator, Numer, Denom, argSignif, arg_signif;
 
sign = arg->sign;
 
exponent = arg->exp - EXP_BIAS;
Numer.lsw = Denom.lsw = 0;
XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg);
if ( sign == SIGN_POS )
{
shr_Xsig(&Denom, 2 - (1 + exponent));
Denom.msw |= 0x80000000;
div_Xsig(&Numer, &Denom, &argSignif);
}
else
{
shr_Xsig(&Denom, 1 - (1 + exponent));
negate_Xsig(&Denom);
if ( Denom.msw & 0x80000000 )
{
div_Xsig(&Numer, &Denom, &argSignif);
exponent ++;
}
else
{
/* Denom must be 1.0 */
argSignif.lsw = Numer.lsw; argSignif.midw = Numer.midw;
argSignif.msw = Numer.msw;
}
}
 
#ifndef PECULIAR_486
/* Should check here that |local_arg| is within the valid range */
if ( exponent >= -2 )
{
if ( (exponent > -2) ||
(argSignif.msw > (unsigned)0xafb0ccc0) )
{
/* The argument is too large */
}
}
#endif PECULIAR_486
 
arg_signif.lsw = argSignif.lsw; XSIG_LL(arg_signif) = XSIG_LL(argSignif);
adj = norm_Xsig(&argSignif);
accumulator.lsw = argSignif.lsw; XSIG_LL(accumulator) = XSIG_LL(argSignif);
mul_Xsig_Xsig(&accumulator, &accumulator);
shr_Xsig(&accumulator, 2*(-1 - (1 + exponent + adj)));
Xsq = XSIG_LL(accumulator);
if ( accumulator.lsw & 0x80000000 )
Xsq++;
 
accumulator.msw = accumulator.midw = accumulator.lsw = 0;
/* Do the basic fixed point polynomial evaluation */
polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER-1);
 
mul_Xsig_Xsig(&accumulator, &argSignif);
shr_Xsig(&accumulator, 6 - adj);
 
mul32_Xsig(&arg_signif, leadterm);
add_two_Xsig(&accumulator, &arg_signif, &exponent);
 
*expon = exponent + 1;
accum_result->lsw = accumulator.lsw;
accum_result->midw = accumulator.midw;
accum_result->msw = accumulator.msw;
 
}
/fpu_proto.h
0,0 → 1,137
/* errors.c */
extern void Un_impl(void);
extern void FPU_illegal(void);
extern void emu_printall(void);
extern void stack_overflow(void);
extern void stack_underflow(void);
extern void stack_underflow_i(int i);
extern void stack_underflow_pop(int i);
extern int set_precision_flag(int flags);
asmlinkage void exception(int n);
asmlinkage int real_2op_NaN(FPU_REG const *a, FPU_REG const *b, FPU_REG *dest);
asmlinkage int arith_invalid(FPU_REG *dest);
asmlinkage int divide_by_zero(int sign, FPU_REG *dest);
asmlinkage void set_precision_flag_up(void);
asmlinkage void set_precision_flag_down(void);
asmlinkage int denormal_operand(void);
asmlinkage int arith_overflow(FPU_REG *dest);
asmlinkage int arith_underflow(FPU_REG *dest);
 
/* fpu_arith.c */
extern void fadd__(void);
extern void fmul__(void);
extern void fsub__(void);
extern void fsubr_(void);
extern void fdiv__(void);
extern void fdivr_(void);
extern void fadd_i(void);
extern void fmul_i(void);
extern void fsubri(void);
extern void fsub_i(void);
extern void fdivri(void);
extern void fdiv_i(void);
extern void faddp_(void);
extern void fmulp_(void);
extern void fsubrp(void);
extern void fsubp_(void);
extern void fdivrp(void);
extern void fdivp_(void);
 
/* fpu_aux.c */
extern void fclex(void);
extern void finit(void);
extern void finit_(void);
extern void fstsw_(void);
extern void fp_nop(void);
extern void fld_i_(void);
extern void fxch_i(void);
extern void ffree_(void);
extern void ffreep(void);
extern void fst_i_(void);
extern void fstp_i(void);
 
/* fpu_entry.c */
asmlinkage void math_emulate(long arg);
extern void math_abort(struct info *info, unsigned int signal);
 
/* fpu_etc.c */
extern void fp_etc(void);
 
/* fpu_trig.c */
extern void convert_l2reg(long const *arg, FPU_REG *dest);
extern void trig_a(void);
extern void trig_b(void);
 
/* get_address.c */
extern void *get_address(unsigned char FPU_modrm, unsigned long *fpu_eip,
struct address *addr,
fpu_addr_modes);
extern void *get_address_16(unsigned char FPU_modrm, unsigned long *fpu_eip,
struct address *addr,
fpu_addr_modes);
 
/* load_store.c */
extern int load_store_instr(unsigned char type, fpu_addr_modes addr_modes,
void *address);
 
/* poly_2xm1.c */
extern int poly_2xm1(FPU_REG const *arg, FPU_REG *result);
 
/* poly_atan.c */
extern void poly_atan(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *result);
 
/* poly_l2.c */
extern void poly_l2(FPU_REG const *arg, FPU_REG const *y, FPU_REG *result);
extern int poly_l2p1(FPU_REG const *arg, FPU_REG const *y, FPU_REG *result);
 
/* poly_sin.c */
extern void poly_sine(FPU_REG const *arg, FPU_REG *result);
extern void poly_cos(FPU_REG const *arg, FPU_REG *result);
 
/* poly_tan.c */
extern void poly_tan(FPU_REG const *arg, FPU_REG *result);
 
/* reg_add_sub.c */
extern int reg_add(FPU_REG const *a, FPU_REG const *b,
FPU_REG *dest, int control_w);
extern int reg_sub(FPU_REG const *a, FPU_REG const *b,
FPU_REG *dest, int control_w);
 
/* reg_compare.c */
extern int compare(FPU_REG const *b);
extern int compare_st_data(FPU_REG const *b);
extern void fcom_st(void);
extern void fcompst(void);
extern void fcompp(void);
extern void fucom_(void);
extern void fucomp(void);
extern void fucompp(void);
 
/* reg_constant.c */
extern void fconst(void);
 
/* reg_ld_str.c */
extern int reg_load_extended(long double *addr, FPU_REG *loaded_data);
extern int reg_load_double(double *dfloat, FPU_REG *loaded_data);
extern int reg_load_single(float *single, FPU_REG *loaded_data);
extern void reg_load_int64(long long *_s, FPU_REG *loaded_data);
extern void reg_load_int32(long *_s, FPU_REG *loaded_data);
extern void reg_load_int16(short *_s, FPU_REG *loaded_data);
extern void reg_load_bcd(char *s, FPU_REG *loaded_data);
extern int reg_store_extended(long double *d, FPU_REG *st0_ptr);
extern int reg_store_double(double *dfloat, FPU_REG *st0_ptr);
extern int reg_store_single(float *single, FPU_REG *st0_ptr);
extern int reg_store_int64(long long *d, FPU_REG *st0_ptr);
extern int reg_store_int32(long *d, FPU_REG *st0_ptr);
extern int reg_store_int16(short *d, FPU_REG *st0_ptr);
extern int reg_store_bcd(char *d, FPU_REG *st0_ptr);
extern int round_to_int(FPU_REG *r);
extern char *fldenv(fpu_addr_modes addr_modes, char *address);
extern void frstor(fpu_addr_modes addr_modes, char *address);
extern unsigned short tag_word(void);
extern char *fstenv(fpu_addr_modes addr_modes, char *address);
extern void fsave(fpu_addr_modes addr_modes, char *address);
 
/* reg_mul.c */
extern int reg_mul(FPU_REG const *a, FPU_REG const *b,
FPU_REG *dest, unsigned int control_w);
/fpu_etc.c
0,0 → 1,129
/*---------------------------------------------------------------------------+
| fpu_etc.c |
| |
| Implement a few FPU instructions. |
| |
| Copyright (C) 1992,1993,1994 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
#include "fpu_system.h"
#include "exception.h"
#include "fpu_emu.h"
#include "status_w.h"
#include "reg_constant.h"
 
 
static void fchs(FPU_REG *st0_ptr)
{
if ( st0_ptr->tag ^ TW_Empty )
{
st0_ptr->sign ^= SIGN_POS^SIGN_NEG;
clear_C1();
}
else
stack_underflow();
}
 
static void fabs(FPU_REG *st0_ptr)
{
if ( st0_ptr->tag ^ TW_Empty )
{
st0_ptr->sign = SIGN_POS;
clear_C1();
}
else
stack_underflow();
}
 
 
static void ftst_(FPU_REG *st0_ptr)
{
switch (st0_ptr->tag)
{
case TW_Zero:
setcc(SW_C3);
break;
case TW_Valid:
if (st0_ptr->sign == SIGN_POS)
setcc(0);
else
setcc(SW_C0);
 
#ifdef DENORM_OPERAND
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
{
#ifdef PECULIAR_486
/* This is weird! */
if (st0_ptr->sign == SIGN_POS)
setcc(SW_C3);
#endif PECULIAR_486
return;
}
#endif DENORM_OPERAND
 
break;
case TW_NaN:
setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */
EXCEPTION(EX_Invalid);
break;
case TW_Infinity:
if (st0_ptr->sign == SIGN_POS)
setcc(0);
else
setcc(SW_C0);
break;
case TW_Empty:
setcc(SW_C0|SW_C2|SW_C3);
EXCEPTION(EX_StackUnder);
break;
default:
setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */
EXCEPTION(EX_INTERNAL|0x14);
break;
}
}
 
static void fxam(FPU_REG *st0_ptr)
{
int c=0;
switch (st0_ptr->tag)
{
case TW_Empty:
c = SW_C3|SW_C0;
break;
case TW_Zero:
c = SW_C3;
break;
case TW_Valid:
/* This will need to be changed if TW_Denormal is ever used. */
if ( st0_ptr->exp <= EXP_UNDER )
c = SW_C2|SW_C3; /* Denormal */
else
c = SW_C2;
break;
case TW_NaN:
c = SW_C0;
break;
case TW_Infinity:
c = SW_C2|SW_C0;
break;
}
if (st0_ptr->sign == SIGN_NEG)
c |= SW_C1;
setcc(c);
}
 
 
static FUNC_ST0 const fp_etc_table[] = {
fchs, fabs, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal,
ftst_, fxam, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal
};
 
void fp_etc()
{
(fp_etc_table[FPU_rm])(&st(0));
}
/poly_atan.c
0,0 → 1,197
/*---------------------------------------------------------------------------+
| poly_atan.c |
| |
| Compute the arctan of a FPU_REG, using a polynomial approximation. |
| |
| Copyright (C) 1992,1993,1994 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
#include "exception.h"
#include "reg_constant.h"
#include "fpu_emu.h"
#include "status_w.h"
#include "control_w.h"
#include "poly.h"
 
 
#define HIPOWERon 6 /* odd poly, negative terms */
static const unsigned long long oddnegterms[HIPOWERon] =
{
0x0000000000000000LL, /* Dummy (not for - 1.0) */
0x015328437f756467LL,
0x0005dda27b73dec6LL,
0x0000226bf2bfb91aLL,
0x000000ccc439c5f7LL,
0x0000000355438407LL
} ;
 
#define HIPOWERop 6 /* odd poly, positive terms */
static const unsigned long long oddplterms[HIPOWERop] =
{
/* 0xaaaaaaaaaaaaaaabLL, transferred to fixedpterm[] */
0x0db55a71875c9ac2LL,
0x0029fce2d67880b0LL,
0x0000dfd3908b4596LL,
0x00000550fd61dab4LL,
0x0000001c9422b3f9LL,
0x000000003e3301e1LL
};
 
static const unsigned long long denomterm = 0xebd9b842c5c53a0eLL;
 
static const Xsig fixedpterm = MK_XSIG(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa);
 
static const Xsig pi_signif = MK_XSIG(0xc90fdaa2, 0x2168c234, 0xc4c6628b);
 
 
/*--- poly_atan() -----------------------------------------------------------+
| |
+---------------------------------------------------------------------------*/
void poly_atan(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *result)
{
char transformed, inverted,
sign1 = arg1->sign, sign2 = arg2->sign;
long int exponent, dummy_exp;
Xsig accumulator, Numer, Denom, accumulatore, argSignif,
argSq, argSqSq;
 
arg1->sign = arg2->sign = SIGN_POS;
if ( (compare(arg2) & ~COMP_Denormal) == COMP_A_lt_B )
{
inverted = 1;
exponent = arg1->exp - arg2->exp;
Numer.lsw = Denom.lsw = 0;
XSIG_LL(Numer) = significand(arg1);
XSIG_LL(Denom) = significand(arg2);
}
else
{
inverted = 0;
exponent = arg2->exp - arg1->exp;
Numer.lsw = Denom.lsw = 0;
XSIG_LL(Numer) = significand(arg2);
XSIG_LL(Denom) = significand(arg1);
}
div_Xsig(&Numer, &Denom, &argSignif);
exponent += norm_Xsig(&argSignif);
 
if ( (exponent >= -1)
|| ((exponent == -2) && (argSignif.msw > 0xd413ccd0)) )
{
/* The argument is greater than sqrt(2)-1 (=0.414213562...) */
/* Convert the argument by an identity for atan */
transformed = 1;
 
if ( exponent >= 0 )
{
#ifdef PARANOID
if ( !( (exponent == 0) &&
(argSignif.lsw == 0) && (argSignif.midw == 0) &&
(argSignif.msw == 0x80000000) ) )
{
EXCEPTION(EX_INTERNAL|0x104); /* There must be a logic error */
return;
}
#endif PARANOID
argSignif.msw = 0; /* Make the transformed arg -> 0.0 */
}
else
{
Numer.lsw = Denom.lsw = argSignif.lsw;
XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif);
 
if ( exponent < -1 )
shr_Xsig(&Numer, -1-exponent);
negate_Xsig(&Numer);
shr_Xsig(&Denom, -exponent);
Denom.msw |= 0x80000000;
div_Xsig(&Numer, &Denom, &argSignif);
 
exponent = -1 + norm_Xsig(&argSignif);
}
}
else
{
transformed = 0;
}
 
argSq.lsw = argSignif.lsw; argSq.midw = argSignif.midw;
argSq.msw = argSignif.msw;
mul_Xsig_Xsig(&argSq, &argSq);
argSqSq.lsw = argSq.lsw; argSqSq.midw = argSq.midw; argSqSq.msw = argSq.msw;
mul_Xsig_Xsig(&argSqSq, &argSqSq);
 
accumulatore.lsw = argSq.lsw;
XSIG_LL(accumulatore) = XSIG_LL(argSq);
 
shr_Xsig(&argSq, 2*(-1-exponent-1));
shr_Xsig(&argSqSq, 4*(-1-exponent-1));
 
/* Now have argSq etc with binary point at the left
.1xxxxxxxx */
 
/* Do the basic fixed point polynomial evaluation */
accumulator.msw = accumulator.midw = accumulator.lsw = 0;
polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq),
oddplterms, HIPOWERop-1);
mul64_Xsig(&accumulator, &XSIG_LL(argSq));
negate_Xsig(&accumulator);
polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms, HIPOWERon-1);
negate_Xsig(&accumulator);
add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp);
 
mul64_Xsig(&accumulatore, &denomterm);
shr_Xsig(&accumulatore, 1 + 2*(-1-exponent));
accumulatore.msw |= 0x80000000;
 
div_Xsig(&accumulator, &accumulatore, &accumulator);
 
mul_Xsig_Xsig(&accumulator, &argSignif);
mul_Xsig_Xsig(&accumulator, &argSq);
 
shr_Xsig(&accumulator, 3);
negate_Xsig(&accumulator);
add_Xsig_Xsig(&accumulator, &argSignif);
 
if ( transformed )
{
/* compute pi/4 - accumulator */
shr_Xsig(&accumulator, -1-exponent);
negate_Xsig(&accumulator);
add_Xsig_Xsig(&accumulator, &pi_signif);
exponent = -1;
}
 
if ( inverted )
{
/* compute pi/2 - accumulator */
shr_Xsig(&accumulator, -exponent);
negate_Xsig(&accumulator);
add_Xsig_Xsig(&accumulator, &pi_signif);
exponent = 0;
}
 
if ( sign1 )
{
/* compute pi - accumulator */
shr_Xsig(&accumulator, 1 - exponent);
negate_Xsig(&accumulator);
add_Xsig_Xsig(&accumulator, &pi_signif);
exponent = 1;
}
 
exponent += round_Xsig(&accumulator);
significand(result) = XSIG_LL(accumulator);
result->exp = exponent + EXP_BIAS;
result->tag = TW_Valid;
result->sign = sign2;
 
}
/reg_constant.c
0,0 → 1,125
/*---------------------------------------------------------------------------+
| reg_constant.c |
| |
| All of the constant FPU_REGs |
| |
| Copyright (C) 1992,1993,1994,1996 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
#include "fpu_system.h"
#include "fpu_emu.h"
#include "status_w.h"
#include "reg_constant.h"
#include "control_w.h"
 
 
FPU_REG const CONST_1 = { SIGN_POS, TW_Valid, EXP_BIAS,
0x00000000, 0x80000000 };
FPU_REG const CONST_2 = { SIGN_POS, TW_Valid, EXP_BIAS+1,
0x00000000, 0x80000000 };
FPU_REG const CONST_HALF = { SIGN_POS, TW_Valid, EXP_BIAS-1,
0x00000000, 0x80000000 };
FPU_REG const CONST_L2T = { SIGN_POS, TW_Valid, EXP_BIAS+1,
0xcd1b8afe, 0xd49a784b };
FPU_REG const CONST_L2E = { SIGN_POS, TW_Valid, EXP_BIAS,
0x5c17f0bc, 0xb8aa3b29 };
FPU_REG const CONST_PI = { SIGN_POS, TW_Valid, EXP_BIAS+1,
0x2168c235, 0xc90fdaa2 };
FPU_REG const CONST_PI2 = { SIGN_POS, TW_Valid, EXP_BIAS,
0x2168c235, 0xc90fdaa2 };
FPU_REG const CONST_PI4 = { SIGN_POS, TW_Valid, EXP_BIAS-1,
0x2168c235, 0xc90fdaa2 };
FPU_REG const CONST_LG2 = { SIGN_POS, TW_Valid, EXP_BIAS-2,
0xfbcff799, 0x9a209a84 };
FPU_REG const CONST_LN2 = { SIGN_POS, TW_Valid, EXP_BIAS-1,
0xd1cf79ac, 0xb17217f7 };
 
/* Extra bits to take pi/2 to more than 128 bits precision. */
FPU_REG const CONST_PI2extra = { SIGN_NEG, TW_Valid, EXP_BIAS-66,
0xfc8f8cbb, 0xece675d1 };
 
/* Only the sign (and tag) is used in internal zeroes */
FPU_REG const CONST_Z = { SIGN_POS, TW_Zero, EXP_UNDER, 0x0, 0x0 };
 
/* Only the sign and significand (and tag) are used in internal NaNs */
/* The 80486 never generates one of these
FPU_REG const CONST_SNAN = { SIGN_POS, TW_NaN, EXP_OVER, 0x00000001, 0x80000000 };
*/
/* This is the real indefinite QNaN */
FPU_REG const CONST_QNaN = { SIGN_NEG, TW_NaN, EXP_OVER, 0x00000000, 0xC0000000 };
 
/* Only the sign (and tag) is used in internal infinities */
FPU_REG const CONST_INF = { SIGN_POS, TW_Infinity, EXP_OVER, 0x00000000, 0x80000000 };
 
 
 
static void fld_const(FPU_REG const *c, int adj)
{
FPU_REG *st_new_ptr;
 
if ( STACK_OVERFLOW )
{
stack_overflow();
return;
}
push();
reg_move(c, st_new_ptr);
st_new_ptr->sigl += adj; /* For all our fldxxx constants, we don't need to
borrow or carry. */
clear_C1();
}
 
/* A fast way to find out whether x is one of RC_DOWN or RC_CHOP
(and not one of RC_RND or RC_UP).
*/
#define DOWN_OR_CHOP(x) (x & RC_DOWN)
 
static void fld1(int rc)
{
fld_const(&CONST_1, 0);
}
 
static void fldl2t(int rc)
{
fld_const(&CONST_L2T, (rc == RC_UP) ? 1 : 0);
}
 
static void fldl2e(int rc)
{
fld_const(&CONST_L2E, DOWN_OR_CHOP(rc) ? -1 : 0);
}
 
static void fldpi(int rc)
{
fld_const(&CONST_PI, DOWN_OR_CHOP(rc) ? -1 : 0);
}
 
static void fldlg2(int rc)
{
fld_const(&CONST_LG2, DOWN_OR_CHOP(rc) ? -1 : 0);
}
 
static void fldln2(int rc)
{
fld_const(&CONST_LN2, DOWN_OR_CHOP(rc) ? -1 : 0);
}
 
static void fldz(int rc)
{
fld_const(&CONST_Z, 0);
}
 
typedef void (*FUNC_RC)(int);
 
static FUNC_RC constants_table[] = {
fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, (FUNC_RC)FPU_illegal
};
 
void fconst(void)
{
(constants_table[FPU_rm])(control_word & CW_RC);
}
/poly.h
0,0 → 1,116
/*---------------------------------------------------------------------------+
| poly.h |
| |
| Header file for the FPU-emu poly*.c source files. |
| |
| Copyright (C) 1994 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
| Declarations and definitions for functions operating on Xsig (12-byte |
| extended-significand) quantities. |
| |
+---------------------------------------------------------------------------*/
 
#ifndef _POLY_H
#define _POLY_H
 
/* This 12-byte structure is used to improve the accuracy of computation
of transcendental functions.
Intended to be used to get results better than 8-byte computation
allows. 9-byte would probably be sufficient.
*/
typedef struct {
unsigned long lsw;
unsigned long midw;
unsigned long msw;
} Xsig;
 
asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b,
unsigned long long *result);
asmlinkage void polynomial_Xsig(Xsig *, const unsigned long long *x,
const unsigned long long terms[], const int n);
 
asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult);
asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult);
asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult);
 
asmlinkage void shr_Xsig(Xsig *, const int n);
asmlinkage int round_Xsig(Xsig *);
asmlinkage int norm_Xsig(Xsig *);
asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest);
 
/* Macro to extract the most significant 32 bits from a long long */
#define LL_MSW(x) (((unsigned long *)&x)[1])
 
/* Macro to initialize an Xsig struct */
#define MK_XSIG(a,b,c) { c, b, a }
 
/* Macro to access the 8 ms bytes of an Xsig as a long long */
#define XSIG_LL(x) (*(unsigned long long *)&x.midw)
 
 
/*
Need to run gcc with optimizations on to get these to
actually be in-line.
*/
 
/* Multiply two fixed-point 32 bit numbers. */
extern inline void mul_32_32(const unsigned long arg1,
const unsigned long arg2,
unsigned long *out)
{
asm volatile ("movl %1,%%eax; mull %2; movl %%edx,%0" \
:"=g" (*out) \
:"g" (arg1), "g" (arg2) \
:"ax","dx");
}
 
 
/* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */
extern inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2)
{
asm volatile ("movl %1,%%edi; movl %2,%%esi;
movl (%%esi),%%eax; addl %%eax,(%%edi);
movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);
movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);"
:"=g" (*dest):"g" (dest), "g" (x2)
:"ax","si","di");
}
 
 
/* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */
/* Note: the constraints in the asm statement didn't always work properly
with gcc 2.5.8. Changing from using edi to using ecx got around the
problem, but keep fingers crossed! */
extern inline int add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp)
{
asm volatile ("movl %2,%%ecx; movl %3,%%esi;
movl (%%esi),%%eax; addl %%eax,(%%ecx);
movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);
movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);
jnc 0f;
rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)
movl %4,%%ecx; incl (%%ecx)
movl $1,%%eax; jmp 1f;
0: xorl %%eax,%%eax;
1:"
:"=g" (*exp), "=g" (*dest)
:"g" (dest), "g" (x2), "g" (exp)
:"cx","si","ax");
}
 
 
/* Negate (subtract from 1.0) the 12 byte Xsig */
/* This is faster in a loop on my 386 than using the "neg" instruction. */
extern inline void negate_Xsig(Xsig *x)
{
asm volatile("movl %1,%%esi; "
"xorl %%ecx,%%ecx; "
"movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi); "
"movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi); "
"movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi); "
:"=g" (*x):"g" (x):"si","ax","cx");
}
 
#endif _POLY_H
/reg_mul.c
0,0 → 1,105
/*---------------------------------------------------------------------------+
| reg_mul.c |
| |
| Multiply one FPU_REG by another, put the result in a destination FPU_REG. |
| |
| Copyright (C) 1992,1993 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
/*---------------------------------------------------------------------------+
| The destination may be any FPU_REG, including one of the source FPU_REGs. |
+---------------------------------------------------------------------------*/
 
#include "exception.h"
#include "reg_constant.h"
#include "fpu_emu.h"
#include "fpu_system.h"
 
 
/* This routine must be called with non-empty source registers */
int reg_mul(FPU_REG const *a, FPU_REG const *b,
FPU_REG *dest, unsigned int control_w)
{
char saved_sign = dest->sign;
char sign = (a->sign ^ b->sign);
 
if (!(a->tag | b->tag))
{
/* Both regs Valid, this should be the most common case. */
dest->sign = sign;
if ( reg_u_mul(a, b, dest, control_w) )
{
dest->sign = saved_sign;
return 1;
}
return 0;
}
else if ((a->tag <= TW_Zero) && (b->tag <= TW_Zero))
{
#ifdef DENORM_OPERAND
if ( ((b->tag == TW_Valid) && (b->exp <= EXP_UNDER)) ||
((a->tag == TW_Valid) && (a->exp <= EXP_UNDER)) )
{
if ( denormal_operand() ) return 1;
}
#endif DENORM_OPERAND
/* Must have either both arguments == zero, or
one valid and the other zero.
The result is therefore zero. */
reg_move(&CONST_Z, dest);
/* The 80486 book says that the answer is +0, but a real
80486 behaves this way.
IEEE-754 apparently says it should be this way. */
dest->sign = sign;
return 0;
}
else
{
/* Must have infinities, NaNs, etc */
if ( (a->tag == TW_NaN) || (b->tag == TW_NaN) )
{ return real_2op_NaN(a, b, dest); }
else if (a->tag == TW_Infinity)
{
if (b->tag == TW_Zero)
{ return arith_invalid(dest); } /* Zero*Infinity is invalid */
else
{
#ifdef DENORM_OPERAND
if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) &&
denormal_operand() )
return 1;
#endif DENORM_OPERAND
reg_move(a, dest);
dest->sign = sign;
}
return 0;
}
else if (b->tag == TW_Infinity)
{
if (a->tag == TW_Zero)
{ return arith_invalid(dest); } /* Zero*Infinity is invalid */
else
{
#ifdef DENORM_OPERAND
if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) &&
denormal_operand() )
return 1;
#endif DENORM_OPERAND
reg_move(b, dest);
dest->sign = sign;
}
return 0;
}
#ifdef PARANOID
else
{
EXCEPTION(EX_INTERNAL|0x102);
return 1;
}
#endif PARANOID
}
}
/div_small.S
0,0 → 1,47
.file "div_small.S"
/*---------------------------------------------------------------------------+
| div_small.S |
| |
| Divide a 64 bit integer by a 32 bit integer & return remainder. |
| |
| Copyright (C) 1992,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
/*---------------------------------------------------------------------------+
| unsigned long div_small(unsigned long long *x, unsigned long y) |
+---------------------------------------------------------------------------*/
 
#include "fpu_emu.h"
 
.text
ENTRY(div_small)
pushl %ebp
movl %esp,%ebp
 
pushl %esi
 
movl PARAM1,%esi /* pointer to num */
movl PARAM2,%ecx /* The denominator */
 
movl 4(%esi),%eax /* Get the current num msw */
xorl %edx,%edx
divl %ecx
 
movl %eax,4(%esi)
 
movl (%esi),%eax /* Get the num lsw */
divl %ecx
 
movl %eax,(%esi)
 
movl %edx,%eax /* Return the remainder in eax */
 
popl %esi
 
leave
ret
 
/poly_2xm1.c
0,0 → 1,152
/*---------------------------------------------------------------------------+
| poly_2xm1.c |
| |
| Function to compute 2^x-1 by a polynomial approximation. |
| |
| Copyright (C) 1992,1993,1994 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
#include "exception.h"
#include "reg_constant.h"
#include "fpu_emu.h"
#include "control_w.h"
#include "poly.h"
 
 
#define HIPOWER 11
static const unsigned long long lterms[HIPOWER] =
{
0x0000000000000000LL, /* This term done separately as 12 bytes */
0xf5fdeffc162c7543LL,
0x1c6b08d704a0bfa6LL,
0x0276556df749cc21LL,
0x002bb0ffcf14f6b8LL,
0x0002861225ef751cLL,
0x00001ffcbfcd5422LL,
0x00000162c005d5f1LL,
0x0000000da96ccb1bLL,
0x0000000078d1b897LL,
0x000000000422b029LL
};
 
static const Xsig hiterm = MK_XSIG(0xb17217f7, 0xd1cf79ab, 0xc8a39194);
 
/* Four slices: 0.0 : 0.25 : 0.50 : 0.75 : 1.0,
These numbers are 2^(1/4), 2^(1/2), and 2^(3/4)
*/
static const Xsig shiftterm0 = MK_XSIG(0, 0, 0);
static const Xsig shiftterm1 = MK_XSIG(0x9837f051, 0x8db8a96f, 0x46ad2318);
static const Xsig shiftterm2 = MK_XSIG(0xb504f333, 0xf9de6484, 0x597d89b3);
static const Xsig shiftterm3 = MK_XSIG(0xd744fcca, 0xd69d6af4, 0x39a68bb9);
 
static const Xsig *shiftterm[] = { &shiftterm0, &shiftterm1,
&shiftterm2, &shiftterm3 };
 
 
/*--- poly_2xm1() -----------------------------------------------------------+
| Requires an argument which is TW_Valid and < 1. |
+---------------------------------------------------------------------------*/
int poly_2xm1(FPU_REG const *arg, FPU_REG *result)
{
long int exponent, shift;
unsigned long long Xll;
Xsig accumulator, Denom, argSignif;
 
 
exponent = arg->exp - EXP_BIAS;
 
#ifdef PARANOID
if ( (exponent >= 0) /* Don't want a |number| >= 1.0 */
|| (arg->tag != TW_Valid) )
{
/* Number negative, too large, or not Valid. */
EXCEPTION(EX_INTERNAL|0x127);
return 1;
}
#endif PARANOID
 
argSignif.lsw = 0;
XSIG_LL(argSignif) = Xll = significand(arg);
 
if ( exponent == -1 )
{
shift = (argSignif.msw & 0x40000000) ? 3 : 2;
/* subtract 0.5 or 0.75 */
exponent -= 2;
XSIG_LL(argSignif) <<= 2;
Xll <<= 2;
}
else if ( exponent == -2 )
{
shift = 1;
/* subtract 0.25 */
exponent--;
XSIG_LL(argSignif) <<= 1;
Xll <<= 1;
}
else
shift = 0;
 
if ( exponent < -2 )
{
/* Shift the argument right by the required places. */
if ( shrx(&Xll, -2-exponent) >= 0x80000000U )
Xll++; /* round up */
}
 
accumulator.lsw = accumulator.midw = accumulator.msw = 0;
polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER-1);
mul_Xsig_Xsig(&accumulator, &argSignif);
shr_Xsig(&accumulator, 3);
 
mul_Xsig_Xsig(&argSignif, &hiterm); /* The leading term */
add_two_Xsig(&accumulator, &argSignif, &exponent);
 
if ( shift )
{
/* The argument is large, use the identity:
f(x+a) = f(a) * (f(x) + 1) - 1;
*/
shr_Xsig(&accumulator, - exponent);
accumulator.msw |= 0x80000000; /* add 1.0 */
mul_Xsig_Xsig(&accumulator, shiftterm[shift]);
accumulator.msw &= 0x3fffffff; /* subtract 1.0 */
exponent = 1;
}
 
if ( arg->sign != SIGN_POS )
{
/* The argument is negative, use the identity:
f(-x) = -f(x) / (1 + f(x))
*/
Denom.lsw = accumulator.lsw;
XSIG_LL(Denom) = XSIG_LL(accumulator);
if ( exponent < 0 )
shr_Xsig(&Denom, - exponent);
else if ( exponent > 0 )
{
/* exponent must be 1 here */
XSIG_LL(Denom) <<= 1;
if ( Denom.lsw & 0x80000000 )
XSIG_LL(Denom) |= 1;
(Denom.lsw) <<= 1;
}
Denom.msw |= 0x80000000; /* add 1.0 */
div_Xsig(&accumulator, &Denom, &accumulator);
}
 
/* Convert to 64 bit signed-compatible */
exponent += round_Xsig(&accumulator);
 
significand(result) = XSIG_LL(accumulator);
result->tag = TW_Valid;
result->exp = exponent + EXP_BIAS;
result->sign = arg->sign;
 
return 0;
 
}
/reg_constant.h
0,0 → 1,31
/*---------------------------------------------------------------------------+
| reg_constant.h |
| |
| Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
+---------------------------------------------------------------------------*/
 
#ifndef _REG_CONSTANT_H_
#define _REG_CONSTANT_H_
 
#include "fpu_emu.h"
 
extern FPU_REG const CONST_1;
extern FPU_REG const CONST_2;
extern FPU_REG const CONST_HALF;
extern FPU_REG const CONST_L2T;
extern FPU_REG const CONST_L2E;
extern FPU_REG const CONST_PI;
extern FPU_REG const CONST_PI2;
extern FPU_REG const CONST_PI2extra;
extern FPU_REG const CONST_PI4;
extern FPU_REG const CONST_LG2;
extern FPU_REG const CONST_LN2;
extern FPU_REG const CONST_Z;
extern FPU_REG const CONST_PINF;
extern FPU_REG const CONST_INF;
extern FPU_REG const CONST_MINF;
extern FPU_REG const CONST_QNaN;
 
#endif _REG_CONSTANT_H_
/div_Xsig.S
0,0 → 1,365
.file "div_Xsig.S"
/*---------------------------------------------------------------------------+
| div_Xsig.S |
| |
| Division subroutine for 96 bit quantities |
| |
| Copyright (C) 1994,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
/*---------------------------------------------------------------------------+
| Divide the 96 bit quantity pointed to by a, by that pointed to by b, and |
| put the 96 bit result at the location d. |
| |
| The result may not be accurate to 96 bits. It is intended for use where |
| a result better than 64 bits is required. The result should usually be |
| good to at least 94 bits. |
| The returned result is actually divided by one half. This is done to |
| prevent overflow. |
| |
| .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd |
| |
| void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) |
| |
+---------------------------------------------------------------------------*/
 
#include "exception.h"
#include "fpu_emu.h"
 
 
#define XsigLL(x) (x)
#define XsigL(x) 4(x)
#define XsigH(x) 8(x)
 
 
#ifndef NON_REENTRANT_FPU
/*
Local storage on the stack:
Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
*/
#define FPU_accum_3 -4(%ebp)
#define FPU_accum_2 -8(%ebp)
#define FPU_accum_1 -12(%ebp)
#define FPU_accum_0 -16(%ebp)
#define FPU_result_3 -20(%ebp)
#define FPU_result_2 -24(%ebp)
#define FPU_result_1 -28(%ebp)
 
#else
.data
/*
Local storage in a static area:
Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
*/
.align 2,0
FPU_accum_3:
.long 0
FPU_accum_2:
.long 0
FPU_accum_1:
.long 0
FPU_accum_0:
.long 0
FPU_result_3:
.long 0
FPU_result_2:
.long 0
FPU_result_1:
.long 0
#endif NON_REENTRANT_FPU
 
 
.text
ENTRY(div_Xsig)
pushl %ebp
movl %esp,%ebp
#ifndef NON_REENTRANT_FPU
subl $28,%esp
#endif NON_REENTRANT_FPU
 
pushl %esi
pushl %edi
pushl %ebx
 
movl PARAM1,%esi /* pointer to num */
movl PARAM2,%ebx /* pointer to denom */
 
#ifdef PARANOID
testl $0x80000000, XsigH(%ebx) /* Divisor */
je L_bugged
#endif PARANOID
 
 
/*---------------------------------------------------------------------------+
| Divide: Return arg1/arg2 to arg3. |
| |
| The maximum returned value is (ignoring exponents) |
| .ffffffff ffffffff |
| ------------------ = 1.ffffffff fffffffe |
| .80000000 00000000 |
| and the minimum is |
| .80000000 00000000 |
| ------------------ = .80000000 00000001 (rounded) |
| .ffffffff ffffffff |
| |
+---------------------------------------------------------------------------*/
 
/* Save extended dividend in local register */
 
/* Divide by 2 to prevent overflow */
clc
movl XsigH(%esi),%eax
rcrl %eax
movl %eax,FPU_accum_3
movl XsigL(%esi),%eax
rcrl %eax
movl %eax,FPU_accum_2
movl XsigLL(%esi),%eax
rcrl %eax
movl %eax,FPU_accum_1
movl $0,%eax
rcrl %eax
movl %eax,FPU_accum_0
 
movl FPU_accum_2,%eax /* Get the current num */
movl FPU_accum_3,%edx
 
/*----------------------------------------------------------------------*/
/* Initialization done.
Do the first 32 bits. */
 
/* We will divide by a number which is too large */
movl XsigH(%ebx),%ecx
addl $1,%ecx
jnc LFirst_div_not_1
 
/* here we need to divide by 100000000h,
i.e., no division at all.. */
mov %edx,%eax
jmp LFirst_div_done
 
LFirst_div_not_1:
divl %ecx /* Divide the numerator by the augmented
denom ms dw */
 
LFirst_div_done:
movl %eax,FPU_result_3 /* Put the result in the answer */
 
mull XsigH(%ebx) /* mul by the ms dw of the denom */
 
subl %eax,FPU_accum_2 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_3
 
movl FPU_result_3,%eax /* Get the result back */
mull XsigL(%ebx) /* now mul the ls dw of the denom */
 
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_2
sbbl $0,FPU_accum_3
je LDo_2nd_32_bits /* Must check for non-zero result here */
 
#ifdef PARANOID
jb L_bugged_1
#endif PARANOID
 
/* need to subtract another once of the denom */
incl FPU_result_3 /* Correct the answer */
 
movl XsigL(%ebx),%eax
movl XsigH(%ebx),%edx
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_2
 
#ifdef PARANOID
sbbl $0,FPU_accum_3
jne L_bugged_1 /* Must check for non-zero result here */
#endif PARANOID
 
/*----------------------------------------------------------------------*/
/* Half of the main problem is done, there is just a reduced numerator
to handle now.
Work with the second 32 bits, FPU_accum_0 not used from now on */
LDo_2nd_32_bits:
movl FPU_accum_2,%edx /* get the reduced num */
movl FPU_accum_1,%eax
 
/* need to check for possible subsequent overflow */
cmpl XsigH(%ebx),%edx
jb LDo_2nd_div
ja LPrevent_2nd_overflow
 
cmpl XsigL(%ebx),%eax
jb LDo_2nd_div
 
LPrevent_2nd_overflow:
/* The numerator is greater or equal, would cause overflow */
/* prevent overflow */
subl XsigL(%ebx),%eax
sbbl XsigH(%ebx),%edx
movl %edx,FPU_accum_2
movl %eax,FPU_accum_1
 
incl FPU_result_3 /* Reflect the subtraction in the answer */
 
#ifdef PARANOID
je L_bugged_2 /* Can't bump the result to 1.0 */
#endif PARANOID
 
LDo_2nd_div:
cmpl $0,%ecx /* augmented denom msw */
jnz LSecond_div_not_1
 
/* %ecx == 0, we are dividing by 1.0 */
mov %edx,%eax
jmp LSecond_div_done
 
LSecond_div_not_1:
divl %ecx /* Divide the numerator by the denom ms dw */
 
LSecond_div_done:
movl %eax,FPU_result_2 /* Put the result in the answer */
 
mull XsigH(%ebx) /* mul by the ms dw of the denom */
 
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_2
 
#ifdef PARANOID
jc L_bugged_2
#endif PARANOID
 
movl FPU_result_2,%eax /* Get the result back */
mull XsigL(%ebx) /* now mul the ls dw of the denom */
 
subl %eax,FPU_accum_0 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */
sbbl $0,FPU_accum_2
 
#ifdef PARANOID
jc L_bugged_2
#endif PARANOID
 
jz LDo_3rd_32_bits
 
#ifdef PARANOID
cmpl $1,FPU_accum_2
jne L_bugged_2
#endif PARANOID
 
/* need to subtract another once of the denom */
movl XsigL(%ebx),%eax
movl XsigH(%ebx),%edx
subl %eax,FPU_accum_0 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_1
sbbl $0,FPU_accum_2
 
#ifdef PARANOID
jc L_bugged_2
jne L_bugged_2
#endif PARANOID
 
addl $1,FPU_result_2 /* Correct the answer */
adcl $0,FPU_result_3
 
#ifdef PARANOID
jc L_bugged_2 /* Must check for non-zero result here */
#endif PARANOID
 
/*----------------------------------------------------------------------*/
/* The division is essentially finished here, we just need to perform
tidying operations.
Deal with the 3rd 32 bits */
LDo_3rd_32_bits:
/* We use an approximation for the third 32 bits.
To take account of the 3rd 32 bits of the divisor
(call them del), we subtract del * (a/b) */
 
movl FPU_result_3,%eax /* a/b */
mull XsigLL(%ebx) /* del */
 
subl %edx,FPU_accum_1
 
/* A borrow indicates that the result is negative */
jnb LTest_over
 
movl XsigH(%ebx),%edx
addl %edx,FPU_accum_1
 
subl $1,FPU_result_2 /* Adjust the answer */
sbbl $0,FPU_result_3
 
/* The above addition might not have been enough, check again. */
movl FPU_accum_1,%edx /* get the reduced num */
cmpl XsigH(%ebx),%edx /* denom */
jb LDo_3rd_div
 
movl XsigH(%ebx),%edx
addl %edx,FPU_accum_1
 
subl $1,FPU_result_2 /* Adjust the answer */
sbbl $0,FPU_result_3
jmp LDo_3rd_div
 
LTest_over:
movl FPU_accum_1,%edx /* get the reduced num */
 
/* need to check for possible subsequent overflow */
cmpl XsigH(%ebx),%edx /* denom */
jb LDo_3rd_div
 
/* prevent overflow */
subl XsigH(%ebx),%edx
movl %edx,FPU_accum_1
 
addl $1,FPU_result_2 /* Reflect the subtraction in the answer */
adcl $0,FPU_result_3
 
LDo_3rd_div:
movl FPU_accum_0,%eax
movl FPU_accum_1,%edx
divl XsigH(%ebx)
 
movl %eax,FPU_result_1 /* Rough estimate of third word */
 
movl PARAM3,%esi /* pointer to answer */
 
movl FPU_result_1,%eax
movl %eax,XsigLL(%esi)
movl FPU_result_2,%eax
movl %eax,XsigL(%esi)
movl FPU_result_3,%eax
movl %eax,XsigH(%esi)
 
L_exit:
popl %ebx
popl %edi
popl %esi
 
leave
ret
 
 
#ifdef PARANOID
/* The logic is wrong if we got here */
L_bugged:
pushl EX_INTERNAL|0x240
call EXCEPTION
pop %ebx
jmp L_exit
 
L_bugged_1:
pushl EX_INTERNAL|0x241
call EXCEPTION
pop %ebx
jmp L_exit
 
L_bugged_2:
pushl EX_INTERNAL|0x242
call EXCEPTION
pop %ebx
jmp L_exit
#endif PARANOID
/Makefile
0,0 → 1,31
#
# Makefile for wm-FPU-emu
#
 
L_TARGET := math.a
 
#DEBUG = -DDEBUGGING
DEBUG =
PARANOID = -DPARANOID
CFLAGS := $(CFLAGS) $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION)
 
.S.o:
$(CC) -D__ASSEMBLY__ $(PARANOID) -c $<
 
L_OBJS =fpu_entry.o div_small.o errors.o \
fpu_arith.o fpu_aux.o fpu_etc.o fpu_trig.o \
load_store.o get_address.o \
poly_atan.o poly_l2.o poly_2xm1.o poly_sin.o poly_tan.o \
reg_add_sub.o reg_compare.o reg_constant.o reg_ld_str.o \
reg_div.o reg_mul.o reg_norm.o \
reg_u_add.o reg_u_div.o reg_u_mul.o reg_u_sub.o \
reg_round.o \
wm_shrx.o wm_sqrt.o \
div_Xsig.o polynom_Xsig.o round_Xsig.o \
shr_Xsig.o mul_Xsig.o \
fpu_debug.o
 
include $(TOPDIR)/Rules.make
 
proto:
cproto -e -DMAKING_PROTO *.c >fpu_proto.h
/fpu_debug.c
0,0 → 1,230
/* Interface with ptrace and core-dumping routines */
 
 
#include "fpu_system.h"
#include "exception.h"
#include "reg_constant.h"
#include "fpu_emu.h"
#include "control_w.h"
#include "status_w.h"
 
 
#define EXTENDED_Ebias 0x3fff
#define EXTENDED_Emin (-0x3ffe) /* smallest valid exponent */
 
#define DOUBLE_Emax 1023 /* largest valid exponent */
#define DOUBLE_Ebias 1023
#define DOUBLE_Emin (-1022) /* smallest valid exponent */
 
#define SINGLE_Emax 127 /* largest valid exponent */
#define SINGLE_Ebias 127
#define SINGLE_Emin (-126) /* smallest valid exponent */
 
 
/* Copy and paste from round_to_int. Original comments maintained */
/*===========================================================================*/
 
/* r gets mangled such that sig is int, sign:
it is NOT normalized */
/* The return value (in eax) is zero if the result is exact,
if bits are changed due to rounding, truncation, etc, then
a non-zero value is returned */
/* Overflow is signalled by a non-zero return value (in eax).
In the case of overflow, the returned significand always has the
largest possible value */
 
static int round_to_int_cwd(FPU_REG *r, long int user_control_word)
{
char very_big;
unsigned eax;
 
if (r->tag == TW_Zero)
{
/* Make sure that zero is returned */
significand(r) = 0;
return 0; /* o.k. */
}
if (r->exp > EXP_BIAS + 63)
{
r->sigl = r->sigh = ~0; /* The largest representable number */
return 1; /* overflow */
}
 
eax = shrxs(&r->sigl, EXP_BIAS + 63 - r->exp);
very_big = !(~(r->sigh) | ~(r->sigl)); /* test for 0xfff...fff */
#define half_or_more (eax & 0x80000000)
#define frac_part (eax)
#define more_than_half ((eax & 0x80000001) == 0x80000001)
switch (user_control_word & CW_RC)
{
case RC_RND:
if ( more_than_half /* nearest */
|| (half_or_more && (r->sigl & 1)) ) /* odd -> even */
{
if ( very_big ) return 1; /* overflow */
significand(r) ++;
return PRECISION_LOST_UP;
}
break;
case RC_DOWN:
if (frac_part && r->sign)
{
if ( very_big ) return 1; /* overflow */
significand(r) ++;
return PRECISION_LOST_UP;
}
break;
case RC_UP:
if (frac_part && !r->sign)
{
if ( very_big ) return 1; /* overflow */
significand(r) ++;
return PRECISION_LOST_UP;
}
break;
case RC_CHOP:
break;
}
 
return eax ? PRECISION_LOST_DOWN : 0;
 
}
 
 
 
/* Conver a number in the emulator format to the
* hardware format.
* Taken from the emulator sources, function reg_load_extended
*/
 
/* Get a long double from the debugger */
void hardreg_to_softreg(const char hardreg[10],
FPU_REG *soft_reg)
 
{
unsigned long sigl, sigh, exp;
sigl = *((unsigned long *) hardreg);
sigh = *(1 + (unsigned long *) hardreg);
exp = *(4 + (unsigned short *) hardreg);
soft_reg->tag = TW_Valid; /* Default */
soft_reg->sigl = sigl;
soft_reg->sigh = sigh;
if (exp & 0x8000)
soft_reg->sign = SIGN_NEG;
else
soft_reg->sign = SIGN_POS;
exp &= 0x7fff;
soft_reg->exp = exp - EXTENDED_Ebias + EXP_BIAS;
if ( exp == 0 )
{
if ( !(sigh | sigl) )
{
soft_reg->tag = TW_Zero;
return;
}
/* The number is a de-normal or pseudodenormal. */
if (sigh & 0x80000000)
{
/* Is a pseudodenormal. */
/* Convert it for internal use. */
/* This is non-80486 behaviour because the number
loses its 'denormal' identity. */
soft_reg->exp++;
return;
}
else
{
/* Is a denormal. */
/* Convert it for internal use. */
soft_reg->exp++;
normalize_nuo(soft_reg);
return;
}
}
else if ( exp == 0x7fff )
{
if ( !((sigh ^ 0x80000000) | sigl) )
{
/* Matches the bit pattern for Infinity. */
soft_reg->exp = EXP_Infinity;
soft_reg->tag = TW_Infinity;
return;
}
soft_reg->exp = EXP_NaN;
soft_reg->tag = TW_NaN;
if ( !(sigh & 0x80000000) )
{
/* NaNs have the ms bit set to 1. */
/* This is therefore an Unsupported NaN data type. */
/* This is non 80486 behaviour */
/* This should generate an Invalid Operand exception
later, so we convert it to a SNaN */
soft_reg->sigh = 0x80000000;
soft_reg->sigl = 0x00000001;
soft_reg->sign = SIGN_NEG;
return;
}
return;
}
if ( !(sigh & 0x80000000) )
{
/* Unsupported data type. */
/* Valid numbers have the ms bit set to 1. */
/* Unnormal. */
/* Convert it for internal use. */
/* This is non-80486 behaviour */
/* This should generate an Invalid Operand exception
later, so we convert it to a SNaN */
soft_reg->sigh = 0x80000000;
soft_reg->sigl = 0x00000001;
soft_reg->sign = SIGN_NEG;
soft_reg->exp = EXP_NaN;
soft_reg->tag = TW_NaN;
return;
}
return;
}
 
/* Conver a number in the emulator format to the
* hardware format.
* Adapted from function write_to_extended
*/
 
 
void softreg_to_hardreg(const FPU_REG *rp, char d[10], long int user_control_word)
{
long e;
FPU_REG tmp;
e = rp->exp - EXP_BIAS + EXTENDED_Ebias;
 
/*
All numbers except denormals are stored internally in a
format which is compatible with the extended real number
format.
*/
if (e > 0) {
*(unsigned long *) d = rp->sigl;
*(unsigned long *) (d + 4) = rp->sigh;
} else {
/*
The number is a de-normal stored as a normal using our
extra exponent range, or is Zero.
Convert it back to a de-normal, or leave it as Zero.
*/
reg_move(rp, &tmp);
tmp.exp += -EXTENDED_Emin + 63; /* largest exp to be 63 */
round_to_int_cwd(&tmp, user_control_word);
e = 0;
*(unsigned long *) d= tmp.sigl;
*(unsigned long *) (d + 4) = tmp.sigh;
}
e |= rp->sign == SIGN_POS ? 0 : 0x8000;
*(unsigned short *) (d + 8) = e;
}
 
/round_Xsig.S
0,0 → 1,141
/*---------------------------------------------------------------------------+
| round_Xsig.S |
| |
| Copyright (C) 1992,1993,1994,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| Normalize and round a 12 byte quantity. |
| Call from C as: |
| int round_Xsig(Xsig *n) |
| |
| Normalize a 12 byte quantity. |
| Call from C as: |
| int norm_Xsig(Xsig *n) |
| |
| Each function returns the size of the shift (nr of bits). |
| |
+---------------------------------------------------------------------------*/
.file "round_Xsig.S"
 
#include "fpu_emu.h"
 
 
.text
ENTRY(round_Xsig)
pushl %ebp
movl %esp,%ebp
pushl %ebx /* Reserve some space */
pushl %ebx
pushl %esi
 
movl PARAM1,%esi
 
movl 8(%esi),%edx
movl 4(%esi),%ebx
movl (%esi),%eax
 
movl $0,-4(%ebp)
 
orl %edx,%edx /* ms bits */
js L_round /* Already normalized */
jnz L_shift_1 /* Shift left 1 - 31 bits */
 
movl %ebx,%edx
movl %eax,%ebx
xorl %eax,%eax
movl $-32,-4(%ebp)
 
/* We need to shift left by 1 - 31 bits */
L_shift_1:
bsrl %edx,%ecx /* get the required shift in %ecx */
subl $31,%ecx
negl %ecx
subl %ecx,-4(%ebp)
shld %cl,%ebx,%edx
shld %cl,%eax,%ebx
shl %cl,%eax
 
L_round:
testl $0x80000000,%eax
jz L_exit
 
addl $1,%ebx
adcl $0,%edx
jnz L_exit
 
movl $0x80000000,%edx
incl -4(%ebp)
 
L_exit:
movl %edx,8(%esi)
movl %ebx,4(%esi)
movl %eax,(%esi)
 
movl -4(%ebp),%eax
 
popl %esi
popl %ebx
leave
ret
 
 
 
 
ENTRY(norm_Xsig)
pushl %ebp
movl %esp,%ebp
pushl %ebx /* Reserve some space */
pushl %ebx
pushl %esi
 
movl PARAM1,%esi
 
movl 8(%esi),%edx
movl 4(%esi),%ebx
movl (%esi),%eax
 
movl $0,-4(%ebp)
 
orl %edx,%edx /* ms bits */
js L_n_exit /* Already normalized */
jnz L_n_shift_1 /* Shift left 1 - 31 bits */
 
movl %ebx,%edx
movl %eax,%ebx
xorl %eax,%eax
movl $-32,-4(%ebp)
 
orl %edx,%edx /* ms bits */
js L_n_exit /* Normalized now */
jnz L_n_shift_1 /* Shift left 1 - 31 bits */
 
movl %ebx,%edx
movl %eax,%ebx
xorl %eax,%eax
addl $-32,-4(%ebp)
jmp L_n_exit /* Might not be normalized,
but shift no more. */
 
/* We need to shift left by 1 - 31 bits */
L_n_shift_1:
bsrl %edx,%ecx /* get the required shift in %ecx */
subl $31,%ecx
negl %ecx
subl %ecx,-4(%ebp)
shld %cl,%ebx,%edx
shld %cl,%eax,%ebx
shl %cl,%eax
 
L_n_exit:
movl %edx,8(%esi)
movl %ebx,4(%esi)
movl %eax,(%esi)
 
movl -4(%ebp),%eax
 
popl %esi
popl %ebx
leave
ret
 
/fpu_system.h
0,0 → 1,83
/*---------------------------------------------------------------------------+
| fpu_system.h |
| |
| Copyright (C) 1992,1994 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
+---------------------------------------------------------------------------*/
 
#ifndef _FPU_SYSTEM_H
#define _FPU_SYSTEM_H
 
/* system dependent definitions */
 
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
 
/* This sets the pointer FPU_info to point to the argument part
of the stack frame of math_emulate() */
#define SETUP_DATA_AREA(arg) FPU_info = (struct info *) &arg
 
#define LDT_DESCRIPTOR(s) (current->ldt[(s) >> 3])
#define SEG_D_SIZE(x) ((x).b & (3 << 21))
#define SEG_G_BIT(x) ((x).b & (1 << 23))
#define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1)
#define SEG_286_MODE(x) ((x).b & ( 0xff000000 | 0xf0000 | (1 << 23)))
#define SEG_BASE_ADDR(s) (((s).b & 0xff000000) \
| (((s).b & 0xff) << 16) | ((s).a >> 16))
#define SEG_LIMIT(s) (((s).b & 0xff0000) | ((s).a & 0xffff))
#define SEG_EXECUTE_ONLY(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 11))
#define SEG_WRITE_PERM(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 9))
#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \
== (1 << 10))
 
#define I387 (current->tss.i387)
#define FPU_info (I387.soft.info)
 
#define FPU_CS (*(unsigned short *) &(FPU_info->___cs))
#define FPU_SS (*(unsigned short *) &(FPU_info->___ss))
#define FPU_DS (*(unsigned short *) &(FPU_info->___ds))
#define FPU_EAX (FPU_info->___eax)
#define FPU_EFLAGS (FPU_info->___eflags)
#define FPU_EIP (FPU_info->___eip)
#define FPU_ORIG_EIP (FPU_info->___orig_eip)
 
#define FPU_lookahead (I387.soft.lookahead)
 
/* nz if ip_offset and cs_selector are not to be set for the current
instruction. */
#define no_ip_update (((char *)&(I387.soft.twd))[0])
#define FPU_rm (((unsigned char *)&(I387.soft.twd))[1])
 
/* Number of bytes of data which can be legally accessed by the current
instruction. This only needs to hold a number <= 108, so a byte will do. */
#define access_limit (((unsigned char *)&(I387.soft.twd))[2])
 
#define partial_status (I387.soft.swd)
#define control_word (I387.soft.cwd)
#define regs (I387.soft.regs)
#define top (I387.soft.top)
 
#define instruction_address (*(struct address *)&I387.soft.fip)
#define operand_address (*(struct address *)&I387.soft.foo)
 
#define FPU_verify_area(x,y,z) if ( verify_area(x,y,z) ) \
math_abort(FPU_info,SIGSEGV)
 
#undef FPU_IGNORE_CODE_SEGV
#ifdef FPU_IGNORE_CODE_SEGV
/* verify_area() is very expensive, and causes the emulator to run
about 20% slower if applied to the code. Anyway, errors due to bad
code addresses should be much rarer than errors due to bad data
addresses. */
#define FPU_code_verify_area(z)
#else
/* A simpler test than verify_area() can probably be done for
FPU_code_verify_area() because the only possible error is to step
past the upper boundary of a legal code area. */
#define FPU_code_verify_area(z) FPU_verify_area(VERIFY_READ,(void *)FPU_EIP,z)
#endif
 
#endif
/exception.h
0,0 → 1,53
/*---------------------------------------------------------------------------+
| exception.h |
| |
| Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
+---------------------------------------------------------------------------*/
 
#ifndef _EXCEPTION_H_
#define _EXCEPTION_H_
 
 
#ifdef __ASSEMBLY__
#define Const_(x) $##x
#else
#define Const_(x) x
#endif
 
#ifndef SW_C1
#include "fpu_emu.h"
#endif SW_C1
 
#define FPU_BUSY Const_(0x8000) /* FPU busy bit (8087 compatibility) */
#define EX_ErrorSummary Const_(0x0080) /* Error summary status */
/* Special exceptions: */
#define EX_INTERNAL Const_(0x8000) /* Internal error in wm-FPU-emu */
#define EX_StackOver Const_(0x0041|SW_C1) /* stack overflow */
#define EX_StackUnder Const_(0x0041) /* stack underflow */
/* Exception flags: */
#define EX_Precision Const_(0x0020) /* loss of precision */
#define EX_Underflow Const_(0x0010) /* underflow */
#define EX_Overflow Const_(0x0008) /* overflow */
#define EX_ZeroDiv Const_(0x0004) /* divide by zero */
#define EX_Denormal Const_(0x0002) /* denormalized operand */
#define EX_Invalid Const_(0x0001) /* invalid operation */
 
 
#define PRECISION_LOST_UP Const_((EX_Precision | SW_C1))
#define PRECISION_LOST_DOWN Const_(EX_Precision)
 
 
#ifndef __ASSEMBLY__
 
#ifdef DEBUG
#define EXCEPTION(x) { printk("exception in %s at line %d\n", \
__FILE__, __LINE__); exception(x); }
#else
#define EXCEPTION(x) exception(x)
#endif
 
#endif __ASSEMBLY__
 
#endif _EXCEPTION_H_
/reg_round.S
0,0 → 1,699
.file "reg_round.S"
/*---------------------------------------------------------------------------+
| reg_round.S |
| |
| Rounding/truncation/etc for FPU basic arithmetic functions. |
| |
| Copyright (C) 1993,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| This code has four possible entry points. |
| The following must be entered by a jmp instruction: |
| fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. |
| |
| The _round_reg entry point is intended to be used by C code. |
| From C, call as: |
| void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
| |
| For correct "up" and "down" rounding, the argument must have the correct |
| sign. |
| |
+---------------------------------------------------------------------------*/
 
/*---------------------------------------------------------------------------+
| Four entry points. |
| |
| Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: |
| %eax:%ebx 64 bit significand |
| %edx 32 bit extension of the significand |
| %edi pointer to an FPU_REG for the result to be stored |
| stack calling function must have set up a C stack frame and |
| pushed %esi, %edi, and %ebx |
| |
| Needed just for the fpu_reg_round_sqrt entry point: |
| %cx A control word in the same format as the FPU control word. |
| Otherwise, PARAM4 must give such a value. |
| |
| |
| The significand and its extension are assumed to be exact in the |
| following sense: |
| If the significand by itself is the exact result then the significand |
| extension (%edx) must contain 0, otherwise the significand extension |
| must be non-zero. |
| If the significand extension is non-zero then the significand is |
| smaller than the magnitude of the correct exact result by an amount |
| greater than zero and less than one ls bit of the significand. |
| The significand extension is only required to have three possible |
| non-zero values: |
| less than 0x80000000 <=> the significand is less than 1/2 an ls |
| bit smaller than the magnitude of the |
| true exact result. |
| exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit |
| smaller than the magnitude of the true |
| exact result. |
| greater than 0x80000000 <=> the significand is more than 1/2 an ls |
| bit smaller than the magnitude of the |
| true exact result. |
| |
+---------------------------------------------------------------------------*/
 
/*---------------------------------------------------------------------------+
| The code in this module has become quite complex, but it should handle |
| all of the FPU flags which are set at this stage of the basic arithmetic |
| computations. |
| There are a few rare cases where the results are not set identically to |
| a real FPU. These require a bit more thought because at this stage the |
| results of the code here appear to be more consistent... |
| This may be changed in a future version. |
+---------------------------------------------------------------------------*/
 
 
#include "fpu_emu.h"
#include "exception.h"
#include "control_w.h"
 
/* Flags for FPU_bits_lost */
#define LOST_DOWN $1
#define LOST_UP $2
 
/* Flags for FPU_denormal */
#define DENORMAL $1
#define UNMASKED_UNDERFLOW $2
 
 
#ifndef NON_REENTRANT_FPU
/* Make the code re-entrant by putting
local storage on the stack: */
#define FPU_bits_lost (%esp)
#define FPU_denormal 1(%esp)
 
#else
/* Not re-entrant, so we can gain speed by putting
local storage in a static area: */
.data
.align 2,0
FPU_bits_lost:
.byte 0
FPU_denormal:
.byte 0
#endif NON_REENTRANT_FPU
 
 
.text
.globl fpu_reg_round
.globl fpu_reg_round_sqrt
.globl fpu_Arith_exit
 
/* Entry point when called from C */
ENTRY(round_reg)
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %edi
pushl %ebx
 
movl PARAM1,%edi
movl SIGH(%edi),%eax
movl SIGL(%edi),%ebx
movl PARAM2,%edx
movl PARAM3,%ecx
jmp fpu_reg_round_sqrt
 
fpu_reg_round: /* Normal entry point */
movl PARAM4,%ecx
 
fpu_reg_round_sqrt: /* Entry point from wm_sqrt.S */
 
#ifndef NON_REENTRANT_FPU
pushl %ebx /* adjust the stack pointer */
#endif NON_REENTRANT_FPU
 
#ifdef PARANOID
/* Cannot use this here yet */
/* orl %eax,%eax */
/* jns L_entry_bugged */
#endif PARANOID
 
cmpl EXP_UNDER,EXP(%edi)
jle xMake_denorm /* The number is a de-normal */
 
movb $0,FPU_denormal /* 0 -> not a de-normal */
 
xDenorm_done:
movb $0,FPU_bits_lost /* No bits yet lost in rounding */
 
movl %ecx,%esi
andl CW_PC,%ecx
cmpl PR_64_BITS,%ecx
je LRound_To_64
 
cmpl PR_53_BITS,%ecx
je LRound_To_53
 
cmpl PR_24_BITS,%ecx
je LRound_To_24
 
#ifdef PECULIAR_486
/* With the precision control bits set to 01 "(reserved)", a real 80486
behaves as if the precision control bits were set to 11 "64 bits" */
cmpl PR_RESERVED_BITS,%ecx
je LRound_To_64
#ifdef PARANOID
jmp L_bugged_denorm_486
#endif PARANOID
#else
#ifdef PARANOID
jmp L_bugged_denorm /* There is no bug, just a bad control word */
#endif PARANOID
#endif PECULIAR_486
 
 
/* Round etc to 24 bit precision */
LRound_To_24:
movl %esi,%ecx
andl CW_RC,%ecx
cmpl RC_RND,%ecx
je LRound_nearest_24
 
cmpl RC_CHOP,%ecx
je LCheck_truncate_24
 
cmpl RC_UP,%ecx /* Towards +infinity */
je LUp_24
 
cmpl RC_DOWN,%ecx /* Towards -infinity */
je LDown_24
 
#ifdef PARANOID
jmp L_bugged_round24
#endif PARANOID
 
LUp_24:
cmpb SIGN_POS,SIGN(%edi)
jne LCheck_truncate_24 /* If negative then up==truncate */
 
jmp LCheck_24_round_up
 
LDown_24:
cmpb SIGN_POS,SIGN(%edi)
je LCheck_truncate_24 /* If positive then down==truncate */
 
LCheck_24_round_up:
movl %eax,%ecx
andl $0x000000ff,%ecx
orl %ebx,%ecx
orl %edx,%ecx
jnz LDo_24_round_up
jmp LRe_normalise
 
LRound_nearest_24:
/* Do rounding of the 24th bit if needed (nearest or even) */
movl %eax,%ecx
andl $0x000000ff,%ecx
cmpl $0x00000080,%ecx
jc LCheck_truncate_24 /* less than half, no increment needed */
 
jne LGreater_Half_24 /* greater than half, increment needed */
 
/* Possibly half, we need to check the ls bits */
orl %ebx,%ebx
jnz LGreater_Half_24 /* greater than half, increment needed */
 
orl %edx,%edx
jnz LGreater_Half_24 /* greater than half, increment needed */
 
/* Exactly half, increment only if 24th bit is 1 (round to even) */
testl $0x00000100,%eax
jz LDo_truncate_24
 
LGreater_Half_24: /* Rounding: increment at the 24th bit */
LDo_24_round_up:
andl $0xffffff00,%eax /* Truncate to 24 bits */
xorl %ebx,%ebx
movb LOST_UP,FPU_bits_lost
addl $0x00000100,%eax
jmp LCheck_Round_Overflow
 
LCheck_truncate_24:
movl %eax,%ecx
andl $0x000000ff,%ecx
orl %ebx,%ecx
orl %edx,%ecx
jz LRe_normalise /* No truncation needed */
 
LDo_truncate_24:
andl $0xffffff00,%eax /* Truncate to 24 bits */
xorl %ebx,%ebx
movb LOST_DOWN,FPU_bits_lost
jmp LRe_normalise
 
 
/* Round etc to 53 bit precision */
LRound_To_53:
movl %esi,%ecx
andl CW_RC,%ecx
cmpl RC_RND,%ecx
je LRound_nearest_53
 
cmpl RC_CHOP,%ecx
je LCheck_truncate_53
 
cmpl RC_UP,%ecx /* Towards +infinity */
je LUp_53
 
cmpl RC_DOWN,%ecx /* Towards -infinity */
je LDown_53
 
#ifdef PARANOID
jmp L_bugged_round53
#endif PARANOID
 
LUp_53:
cmpb SIGN_POS,SIGN(%edi)
jne LCheck_truncate_53 /* If negative then up==truncate */
 
jmp LCheck_53_round_up
 
LDown_53:
cmpb SIGN_POS,SIGN(%edi)
je LCheck_truncate_53 /* If positive then down==truncate */
 
LCheck_53_round_up:
movl %ebx,%ecx
andl $0x000007ff,%ecx
orl %edx,%ecx
jnz LDo_53_round_up
jmp LRe_normalise
 
LRound_nearest_53:
/* Do rounding of the 53rd bit if needed (nearest or even) */
movl %ebx,%ecx
andl $0x000007ff,%ecx
cmpl $0x00000400,%ecx
jc LCheck_truncate_53 /* less than half, no increment needed */
 
jnz LGreater_Half_53 /* greater than half, increment needed */
 
/* Possibly half, we need to check the ls bits */
orl %edx,%edx
jnz LGreater_Half_53 /* greater than half, increment needed */
 
/* Exactly half, increment only if 53rd bit is 1 (round to even) */
testl $0x00000800,%ebx
jz LTruncate_53
 
LGreater_Half_53: /* Rounding: increment at the 53rd bit */
LDo_53_round_up:
movb LOST_UP,FPU_bits_lost
andl $0xfffff800,%ebx /* Truncate to 53 bits */
addl $0x00000800,%ebx
adcl $0,%eax
jmp LCheck_Round_Overflow
 
LCheck_truncate_53:
movl %ebx,%ecx
andl $0x000007ff,%ecx
orl %edx,%ecx
jz LRe_normalise
 
LTruncate_53:
movb LOST_DOWN,FPU_bits_lost
andl $0xfffff800,%ebx /* Truncate to 53 bits */
jmp LRe_normalise
 
 
/* Round etc to 64 bit precision */
LRound_To_64:
movl %esi,%ecx
andl CW_RC,%ecx
cmpl RC_RND,%ecx
je LRound_nearest_64
 
cmpl RC_CHOP,%ecx
je LCheck_truncate_64
 
cmpl RC_UP,%ecx /* Towards +infinity */
je LUp_64
 
cmpl RC_DOWN,%ecx /* Towards -infinity */
je LDown_64
 
#ifdef PARANOID
jmp L_bugged_round64
#endif PARANOID
 
LUp_64:
cmpb SIGN_POS,SIGN(%edi)
jne LCheck_truncate_64 /* If negative then up==truncate */
 
orl %edx,%edx
jnz LDo_64_round_up
jmp LRe_normalise
 
LDown_64:
cmpb SIGN_POS,SIGN(%edi)
je LCheck_truncate_64 /* If positive then down==truncate */
 
orl %edx,%edx
jnz LDo_64_round_up
jmp LRe_normalise
 
LRound_nearest_64:
cmpl $0x80000000,%edx
jc LCheck_truncate_64
 
jne LDo_64_round_up
 
/* Now test for round-to-even */
testb $1,%ebx
jz LCheck_truncate_64
 
LDo_64_round_up:
movb LOST_UP,FPU_bits_lost
addl $1,%ebx
adcl $0,%eax
 
LCheck_Round_Overflow:
jnc LRe_normalise
 
/* Overflow, adjust the result (significand to 1.0) */
rcrl $1,%eax
rcrl $1,%ebx
incl EXP(%edi)
jmp LRe_normalise
 
LCheck_truncate_64:
orl %edx,%edx
jz LRe_normalise
 
LTruncate_64:
movb LOST_DOWN,FPU_bits_lost
 
LRe_normalise:
testb $0xff,FPU_denormal
jnz xNormalise_result
 
xL_Normalised:
cmpb LOST_UP,FPU_bits_lost
je xL_precision_lost_up
 
cmpb LOST_DOWN,FPU_bits_lost
je xL_precision_lost_down
 
xL_no_precision_loss:
/* store the result */
movb TW_Valid,TAG(%edi)
 
xL_Store_significand:
movl %eax,SIGH(%edi)
movl %ebx,SIGL(%edi)
 
xorl %eax,%eax /* No errors detected. */
 
cmpl EXP_OVER,EXP(%edi)
jge L_overflow
 
fpu_reg_round_exit:
#ifndef NON_REENTRANT_FPU
popl %ebx /* adjust the stack pointer */
#endif NON_REENTRANT_FPU
 
fpu_Arith_exit:
popl %ebx
popl %edi
popl %esi
leave
ret
 
 
/*
* Set the FPU status flags to represent precision loss due to
* round-up.
*/
xL_precision_lost_up:
push %eax
call SYMBOL_NAME(set_precision_flag_up)
popl %eax
jmp xL_no_precision_loss
 
/*
* Set the FPU status flags to represent precision loss due to
* truncation.
*/
xL_precision_lost_down:
push %eax
call SYMBOL_NAME(set_precision_flag_down)
popl %eax
jmp xL_no_precision_loss
 
 
/*
* The number is a denormal (which might get rounded up to a normal)
* Shift the number right the required number of bits, which will
* have to be undone later...
*/
xMake_denorm:
/* The action to be taken depends upon whether the underflow
exception is masked */
testb CW_Underflow,%cl /* Underflow mask. */
jz xUnmasked_underflow /* Do not make a denormal. */
 
movb DENORMAL,FPU_denormal
 
pushl %ecx /* Save */
movl EXP_UNDER+1,%ecx
subl EXP(%edi),%ecx
 
cmpl $64,%ecx /* shrd only works for 0..31 bits */
jnc xDenorm_shift_more_than_63
 
cmpl $32,%ecx /* shrd only works for 0..31 bits */
jnc xDenorm_shift_more_than_32
 
/*
* We got here without jumps by assuming that the most common requirement
* is for a small de-normalising shift.
* Shift by [1..31] bits
*/
addl %ecx,EXP(%edi)
orl %edx,%edx /* extension */
setne %ch /* Save whether %edx is non-zero */
xorl %edx,%edx
shrd %cl,%ebx,%edx
shrd %cl,%eax,%ebx
shr %cl,%eax
orb %ch,%dl
popl %ecx
jmp xDenorm_done
 
/* Shift by [32..63] bits */
xDenorm_shift_more_than_32:
addl %ecx,EXP(%edi)
subb $32,%cl
orl %edx,%edx
setne %ch
orb %ch,%bl
xorl %edx,%edx
shrd %cl,%ebx,%edx
shrd %cl,%eax,%ebx
shr %cl,%eax
orl %edx,%edx /* test these 32 bits */
setne %cl
orb %ch,%bl
orb %cl,%bl
movl %ebx,%edx
movl %eax,%ebx
xorl %eax,%eax
popl %ecx
jmp xDenorm_done
 
/* Shift by [64..) bits */
xDenorm_shift_more_than_63:
cmpl $64,%ecx
jne xDenorm_shift_more_than_64
 
/* Exactly 64 bit shift */
addl %ecx,EXP(%edi)
xorl %ecx,%ecx
orl %edx,%edx
setne %cl
orl %ebx,%ebx
setne %ch
orb %ch,%cl
orb %cl,%al
movl %eax,%edx
xorl %eax,%eax
xorl %ebx,%ebx
popl %ecx
jmp xDenorm_done
 
xDenorm_shift_more_than_64:
movl EXP_UNDER+1,EXP(%edi)
/* This is easy, %eax must be non-zero, so.. */
movl $1,%edx
xorl %eax,%eax
xorl %ebx,%ebx
popl %ecx
jmp xDenorm_done
 
 
xUnmasked_underflow:
movb UNMASKED_UNDERFLOW,FPU_denormal
jmp xDenorm_done
 
 
/* Undo the de-normalisation. */
xNormalise_result:
cmpb UNMASKED_UNDERFLOW,FPU_denormal
je xSignal_underflow
 
/* The number must be a denormal if we got here. */
#ifdef PARANOID
/* But check it... just in case. */
cmpl EXP_UNDER+1,EXP(%edi)
jne L_norm_bugged
#endif PARANOID
 
#ifdef PECULIAR_486
/*
* This implements a special feature of 80486 behaviour.
* Underflow will be signalled even if the number is
* not a denormal after rounding.
* This difference occurs only for masked underflow, and not
* in the unmasked case.
* Actual 80486 behaviour differs from this in some circumstances.
*/
orl %eax,%eax /* ms bits */
js LNormalise_shift_done /* Will be masked underflow */
#endif PECULIAR_486
 
orl %eax,%eax /* ms bits */
js xL_Normalised /* No longer a denormal */
 
jnz LNormalise_shift_up_to_31 /* Shift left 0 - 31 bits */
 
orl %ebx,%ebx
jz L_underflow_to_zero /* The contents are zero */
 
/* Shift left 32 - 63 bits */
movl %ebx,%eax
xorl %ebx,%ebx
subl $32,EXP(%edi)
 
LNormalise_shift_up_to_31:
bsrl %eax,%ecx /* get the required shift in %ecx */
subl $31,%ecx
negl %ecx
shld %cl,%ebx,%eax
shl %cl,%ebx
subl %ecx,EXP(%edi)
 
LNormalise_shift_done:
testb $0xff,FPU_bits_lost /* bits lost == underflow */
jz xL_Normalised
 
/* There must be a masked underflow */
push %eax
pushl EX_Underflow
call SYMBOL_NAME(exception)
popl %eax
popl %eax
jmp xL_Normalised
 
 
/*
* The operations resulted in a number too small to represent.
* Masked response.
*/
L_underflow_to_zero:
push %eax
call SYMBOL_NAME(set_precision_flag_down)
popl %eax
 
push %eax
pushl EX_Underflow
call SYMBOL_NAME(exception)
popl %eax
popl %eax
 
/* Reduce the exponent to EXP_UNDER */
movl EXP_UNDER,EXP(%edi)
movb TW_Zero,TAG(%edi)
jmp xL_Store_significand
 
 
/* The operations resulted in a number too large to represent. */
L_overflow:
push %edi
call SYMBOL_NAME(arith_overflow)
pop %edi
jmp fpu_reg_round_exit
 
 
xSignal_underflow:
/* The number may have been changed to a non-denormal */
/* by the rounding operations. */
cmpl EXP_UNDER,EXP(%edi)
jle xDo_unmasked_underflow
 
jmp xL_Normalised
 
xDo_unmasked_underflow:
/* Increase the exponent by the magic number */
addl $(3*(1<<13)),EXP(%edi)
push %eax
pushl EX_Underflow
call EXCEPTION
popl %eax
popl %eax
jmp xL_Normalised
 
 
#ifdef PARANOID
#ifdef PECULIAR_486
L_bugged_denorm_486:
pushl EX_INTERNAL|0x236
call EXCEPTION
popl %ebx
jmp L_exception_exit
#else
L_bugged_denorm:
pushl EX_INTERNAL|0x230
call EXCEPTION
popl %ebx
jmp L_exception_exit
#endif PECULIAR_486
 
L_bugged_round24:
pushl EX_INTERNAL|0x231
call EXCEPTION
popl %ebx
jmp L_exception_exit
 
L_bugged_round53:
pushl EX_INTERNAL|0x232
call EXCEPTION
popl %ebx
jmp L_exception_exit
 
L_bugged_round64:
pushl EX_INTERNAL|0x233
call EXCEPTION
popl %ebx
jmp L_exception_exit
 
L_norm_bugged:
pushl EX_INTERNAL|0x234
call EXCEPTION
popl %ebx
jmp L_exception_exit
 
L_entry_bugged:
pushl EX_INTERNAL|0x235
call EXCEPTION
popl %ebx
L_exception_exit:
mov $1,%eax
jmp fpu_reg_round_exit
#endif PARANOID
/README
0,0 → 1,434
+---------------------------------------------------------------------------+
| wm-FPU-emu an FPU emulator for 80386 and 80486SX microprocessors. |
| |
| Copyright (C) 1992,1993,1994,1995,1996 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@suburbia.net |
| |
| This program is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License version 2 as |
| published by the Free Software Foundation. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
| |
+---------------------------------------------------------------------------+
 
 
 
wm-FPU-emu is an FPU emulator for Linux. It is derived from wm-emu387
which was my 80387 emulator for early versions of djgpp (gcc under
msdos); wm-emu387 was in turn based upon emu387 which was written by
DJ Delorie for djgpp. The interface to the Linux kernel is based upon
the original Linux math emulator by Linus Torvalds.
 
My target FPU for wm-FPU-emu is that described in the Intel486
Programmer's Reference Manual (1992 edition). Unfortunately, numerous
facets of the functioning of the FPU are not well covered in the
Reference Manual. The information in the manual has been supplemented
with measurements on real 80486's. Unfortunately, it is simply not
possible to be sure that all of the peculiarities of the 80486 have
been discovered, so there is always likely to be obscure differences
in the detailed behaviour of the emulator and a real 80486.
 
wm-FPU-emu does not implement all of the behaviour of the 80486 FPU,
but is very close. See "Limitations" later in this file for a list of
some differences.
 
Please report bugs, etc to me at:
billm@suburbia.net
 
 
--Bill Metzenthen
October 1996
 
 
----------------------- Internals of wm-FPU-emu -----------------------
 
Numeric algorithms:
(1) Add, subtract, and multiply. Nothing remarkable in these.
(2) Divide has been tuned to get reasonable performance. The algorithm
is not the obvious one which most people seem to use, but is designed
to take advantage of the characteristics of the 80386. I expect that
it has been invented many times before I discovered it, but I have not
seen it. It is based upon one of those ideas which one carries around
for years without ever bothering to check it out.
(3) The sqrt function has been tuned to get good performance. It is based
upon Newton's classic method. Performance was improved by capitalizing
upon the properties of Newton's method, and the code is once again
structured taking account of the 80386 characteristics.
(4) The trig, log, and exp functions are based in each case upon quasi-
"optimal" polynomial approximations. My definition of "optimal" was
based upon getting good accuracy with reasonable speed.
(5) The argument reducing code for the trig function effectively uses
a value of pi which is accurate to more than 128 bits. As a consequence,
the reduced argument is accurate to more than 64 bits for arguments up
to a few pi, and accurate to more than 64 bits for most arguments,
even for arguments approaching 2^63. This is far superior to an
80486, which uses a value of pi which is accurate to 66 bits.
 
The code of the emulator is complicated slightly by the need to
account for a limited form of re-entrancy. Normally, the emulator will
emulate each FPU instruction to completion without interruption.
However, it may happen that when the emulator is accessing the user
memory space, swapping may be needed. In this case the emulator may be
temporarily suspended while disk i/o takes place. During this time
another process may use the emulator, thereby perhaps changing static
variables. The code which accesses user memory is confined to five
files:
fpu_entry.c
reg_ld_str.c
load_store.c
get_address.c
errors.c
As from version 1.12 of the emulator, no static variables are used
(apart from those in the kernel's per-process tables). The emulator is
therefore now fully re-entrant, rather than having just the restricted
form of re-entrancy which is required by the Linux kernel.
 
----------------------- Limitations of wm-FPU-emu -----------------------
 
There are a number of differences between the current wm-FPU-emu
(version 1.20) and the 80486 FPU (apart from bugs). Some of the more
important differences are listed below:
 
The Roundup flag does not have much meaning for the transcendental
functions and its 80486 value with these functions is likely to differ
from its emulator value.
 
In a few rare cases the Underflow flag obtained with the emulator will
be different from that obtained with an 80486. This occurs when the
following conditions apply simultaneously:
(a) the operands have a higher precision than the current setting of the
precision control (PC) flags.
(b) the underflow exception is masked.
(c) the magnitude of the exact result (before rounding) is less than 2^-16382.
(d) the magnitude of the final result (after rounding) is exactly 2^-16382.
(e) the magnitude of the exact result would be exactly 2^-16382 if the
operands were rounded to the current precision before the arithmetic
operation was performed.
If all of these apply, the emulator will set the Underflow flag but a real
80486 will not.
 
NOTE: Certain formats of Extended Real are UNSUPPORTED. They are
unsupported by the 80486. They are the Pseudo-NaNs, Pseudoinfinities,
and Unnormals. None of these will be generated by an 80486 or by the
emulator. Do not use them. The emulator treats them differently in
detail from the way an 80486 does.
 
The emulator treats PseudoDenormals differently from an 80486. These
numbers are in fact properly normalised numbers with the exponent
offset by 1, and the emulator treats them as such. Unlike the 80486,
the emulator does not generate a Denormal Operand exception for these
numbers. The arithmetical results produced when using such a number as
an operand are the same for the emulator and a real 80486 (apart from
any slight precision difference for the transcendental functions).
Neither the emulator nor an 80486 produces one of these numbers as the
result of any arithmetic operation. An 80486 can keep one of these
numbers in an FPU register with its identity as a PseudoDenormal, but
the emulator will not; they are always converted to a valid number.
 
Self modifying code can cause the emulator to fail. An example of such
code is:
movl %esp,[%ebx]
fld1
The FPU instruction may be (usually will be) loaded into the pre-fetch
queue of the cpu before the mov instruction is executed. If the
destination of the 'movl' overlaps the FPU instruction then the bytes
in the prefetch queue and memory will be inconsistent when the FPU
instruction is executed. The emulator will be invoked but will not be
able to find the instruction which caused the device-not-present
exception. For this case, the emulator cannot emulate the behaviour of
an 80486DX.
 
Handling of the address size override prefix byte (0x67) has not been
extensively tested yet. A major problem exists because using it in
vm86 mode can cause a general protection fault. Address offsets
greater than 0xffff appear to be illegal in vm86 mode but are quite
acceptable (and work) in real mode. A small test program developed to
check the addressing, and which runs successfully in real mode,
crashes dosemu under Linux and also brings Windows down with a general
protection fault message when run under the MS-DOS prompt of Windows
3.1. (The program simply reads data from a valid address).
 
The emulator supports 16-bit protected mode, with one difference from
an 80486DX. A 80486DX will allow some floating point instructions to
write a few bytes below the lowest address of the stack. The emulator
will not allow this in 16-bit protected mode: no instructions are
allowed to write outside the bounds set by the protection.
 
----------------------- Performance of wm-FPU-emu -----------------------
 
Speed.
-----
 
The speed of floating point computation with the emulator will depend
upon instruction mix. Relative performance is best for the instructions
which require most computation. The simple instructions are adversely
affected by the fpu instruction trap overhead.
 
 
Timing: Some simple timing tests have been made on the emulator functions.
The times include load/store instructions. All times are in microseconds
measured on a 33MHz 386 with 64k cache. The Turbo C tests were under
ms-dos, the next two columns are for emulators running with the djgpp
ms-dos extender. The final column is for wm-FPU-emu in Linux 0.97,
using libm4.0 (hard).
 
function Turbo C djgpp 1.06 WM-emu387 wm-FPU-emu
 
+ 60.5 154.8 76.5 139.4
- 61.1-65.5 157.3-160.8 76.2-79.5 142.9-144.7
* 71.0 190.8 79.6 146.6
/ 61.2-75.0 261.4-266.9 75.3-91.6 142.2-158.1
 
sin() 310.8 4692.0 319.0 398.5
cos() 284.4 4855.2 308.0 388.7
tan() 495.0 8807.1 394.9 504.7
atan() 328.9 4866.4 601.1 419.5-491.9
 
sqrt() 128.7 crashed 145.2 227.0
log() 413.1-419.1 5103.4-5354.21 254.7-282.2 409.4-437.1
exp() 479.1 6619.2 469.1 850.8
 
 
The performance under Linux is improved by the use of look-ahead code.
The following results show the improvement which is obtained under
Linux due to the look-ahead code. Also given are the times for the
original Linux emulator with the 4.1 'soft' lib.
 
[ Linus' note: I changed look-ahead to be the default under linux, as
there was no reason not to use it after I had edited it to be
disabled during tracing ]
 
wm-FPU-emu w original w
look-ahead 'soft' lib
+ 106.4 190.2
- 108.6-111.6 192.4-216.2
* 113.4 193.1
/ 108.8-124.4 700.1-706.2
 
sin() 390.5 2642.0
cos() 381.5 2767.4
tan() 496.5 3153.3
atan() 367.2-435.5 2439.4-3396.8
 
sqrt() 195.1 4732.5
log() 358.0-387.5 3359.2-3390.3
exp() 619.3 4046.4
 
 
These figures are now somewhat out-of-date. The emulator has become
progressively slower for most functions as more of the 80486 features
have been implemented.
 
 
----------------------- Accuracy of wm-FPU-emu -----------------------
 
 
The accuracy of the emulator is in almost all cases equal to or better
than that of an Intel 80486 FPU.
 
The results of the basic arithmetic functions (+,-,*,/), and fsqrt
match those of an 80486 FPU. They are the best possible; the error for
these never exceeds 1/2 an lsb. The fprem and fprem1 instructions
return exact results; they have no error.
 
 
The following table compares the emulator accuracy for the sqrt(),
trig and log functions against the Turbo C "emulator". For this table,
each function was tested at about 400 points. Ideal worst-case results
would be 64 bits. The reduced Turbo C accuracy of cos() and tan() for
arguments greater than pi/4 can be thought of as being related to the
precision of the argument x; e.g. an argument of pi/2-(1e-10) which is
accurate to 64 bits can result in a relative accuracy in cos() of
about 64 + log2(cos(x)) = 31 bits.
 
 
Function Tested x range Worst result Turbo C
(relative bits)
 
sqrt(x) 1 .. 2 64.1 63.2
atan(x) 1e-10 .. 200 64.2 62.8
cos(x) 0 .. pi/2-(1e-10) 64.4 (x <= pi/4) 62.4
64.1 (x = pi/2-(1e-10)) 31.9
sin(x) 1e-10 .. pi/2 64.0 62.8
tan(x) 1e-10 .. pi/2-(1e-10) 64.0 (x <= pi/4) 62.1
64.1 (x = pi/2-(1e-10)) 31.9
exp(x) 0 .. 1 63.1 ** 62.9
log(x) 1+1e-6 .. 2 63.8 ** 62.1
 
** The accuracy for exp() and log() is low because the FPU (emulator)
does not compute them directly; two operations are required.
 
 
The emulator passes the "paranoia" tests (compiled with gcc 2.3.3 or
later) for 'float' variables (24 bit precision numbers) when precision
control is set to 24, 53 or 64 bits, and for 'double' variables (53
bit precision numbers) when precision control is set to 53 bits (a
properly performing FPU cannot pass the 'paranoia' tests for 'double'
variables when precision control is set to 64 bits).
 
The code for reducing the argument for the trig functions (fsin, fcos,
fptan and fsincos) has been improved and now effectively uses a value
for pi which is accurate to more than 128 bits precision. As a
consequence, the accuracy of these functions for large arguments has
been dramatically improved (and is now very much better than an 80486
FPU). There is also now no degradation of accuracy for fcos and fptan
for operands close to pi/2. Measured results are (note that the
definition of accuracy has changed slightly from that used for the
above table):
 
Function Tested x range Worst result
(absolute bits)
 
cos(x) 0 .. 9.22e+18 62.0
sin(x) 1e-16 .. 9.22e+18 62.1
tan(x) 1e-16 .. 9.22e+18 61.8
 
It is possible with some effort to find very large arguments which
give much degraded precision. For example, the integer number
8227740058411162616.0
is within about 10e-7 of a multiple of pi. To find the tan (for
example) of this number to 64 bits precision it would be necessary to
have a value of pi which had about 150 bits precision. The FPU
emulator computes the result to about 42.6 bits precision (the correct
result is about -9.739715e-8). On the other hand, an 80486 FPU returns
0.01059, which in relative terms is hopelessly inaccurate.
 
For arguments close to critical angles (which occur at multiples of
pi/2) the emulator is more accurate than an 80486 FPU. For very large
arguments, the emulator is far more accurate.
 
 
Prior to version 1.20 of the emulator, the accuracy of the results for
the transcendental functions (in their principal range) was not as
good as the results from an 80486 FPU. From version 1.20, the accuracy
has been considerably improved and these functions now give measured
worst-case results which are better than the worst-case results given
by an 80486 FPU.
 
The following table gives the measured results for the emulator. The
number of randomly selected arguments in each case is about half a
million. The group of three columns gives the frequency of the given
accuracy in number of times per million, thus the second of these
columns shows that an accuracy of between 63.80 and 63.89 bits was
found at a rate of 133 times per one million measurements for fsin.
The results show that the fsin, fcos and fptan instructions return
results which are in error (i.e. less accurate than the best possible
result (which is 64 bits)) for about one per cent of all arguments
between -pi/2 and +pi/2. The other instructions have a lower
frequency of results which are in error. The last two columns give
the worst accuracy which was found (in bits) and the approximate value
of the argument which produced it.
 
frequency (per M)
------------------- ---------------
instr arg range # tests 63.7 63.8 63.9 worst at arg
bits bits bits bits
----- ------------ ------- ---- ---- ----- ----- --------
fsin (0,pi/2) 547756 0 133 10673 63.89 0.451317
fcos (0,pi/2) 547563 0 126 10532 63.85 0.700801
fptan (0,pi/2) 536274 11 267 10059 63.74 0.784876
fpatan 4 quadrants 517087 0 8 1855 63.88 0.435121 (4q)
fyl2x (0,20) 541861 0 0 1323 63.94 1.40923 (x)
fyl2xp1 (-.293,.414) 520256 0 0 5678 63.93 0.408542 (x)
f2xm1 (-1,1) 538847 4 481 6488 63.79 0.167709
 
 
Tests performed on an 80486 FPU showed results of lower accuracy. The
following table gives the results which were obtained with an AMD
486DX2/66 (other tests indicate that an Intel 486DX produces
identical results). The tests were basically the same as those used
to measure the emulator (the values, being random, were in general not
the same). The total number of tests for each instruction are given
at the end of the table, in case each about 100k tests were performed.
Another line of figures at the end of the table shows that most of the
instructions return results which are in error for more than 10
percent of the arguments tested.
 
The numbers in the body of the table give the approx number of times a
result of the given accuracy in bits (given in the left-most column)
was obtained per one million arguments. For three of the instructions,
two columns of results are given: * The second column for f2xm1 gives
the number cases where the results of the first column were for a
positive argument, this shows that this instruction gives better
results for positive arguments than it does for negative. * In the
cases of fcos and fptan, the first column gives the results when all
cases where arguments greater than 1.5 were removed from the results
given in the second column. Unlike the emulator, an 80486 FPU returns
results of relatively poor accuracy for these instructions when the
argument approaches pi/2. The table does not show those cases when the
accuracy of the results were less than 62 bits, which occurs quite
often for fsin and fptan when the argument approaches pi/2. This poor
accuracy is discussed above in relation to the Turbo C "emulator", and
the accuracy of the value of pi.
 
 
bits f2xm1 f2xm1 fpatan fcos fcos fyl2x fyl2xp1 fsin fptan fptan
62.0 0 0 0 0 437 0 0 0 0 925
62.1 0 0 10 0 894 0 0 0 0 1023
62.2 14 0 0 0 1033 0 0 0 0 945
62.3 57 0 0 0 1202 0 0 0 0 1023
62.4 385 0 0 10 1292 0 23 0 0 1178
62.5 1140 0 0 119 1649 0 39 0 0 1149
62.6 2037 0 0 189 1620 0 16 0 0 1169
62.7 5086 14 0 646 2315 10 101 35 39 1402
62.8 8818 86 0 984 3050 59 287 131 224 2036
62.9 11340 1355 0 2126 4153 79 605 357 321 1948
63.0 15557 4750 0 3319 5376 246 1281 862 808 2688
63.1 20016 8288 0 4620 6628 511 2569 1723 1510 3302
63.2 24945 11127 10 6588 8098 1120 4470 2968 2990 4724
63.3 25686 12382 69 8774 10682 1906 6775 4482 5474 7236
63.4 29219 14722 79 11109 12311 3094 9414 7259 8912 10587
63.5 30458 14936 393 13802 15014 5874 12666 9609 13762 15262
63.6 32439 16448 1277 17945 19028 10226 15537 14657 19158 20346
63.7 35031 16805 4067 23003 23947 18910 20116 21333 25001 26209
63.8 33251 15820 7673 24781 25675 24617 25354 24440 29433 30329
63.9 33293 16833 18529 28318 29233 31267 31470 27748 29676 30601
 
Per cent with error:
30.9 3.2 18.5 9.8 13.1 11.6 17.4
Total arguments tested:
70194 70099 101784 100641 100641 101799 128853 114893 102675 102675
 
 
------------------------- Contributors -------------------------------
 
A number of people have contributed to the development of the
emulator, often by just reporting bugs, sometimes with suggested
fixes, and a few kind people have provided me with access in one way
or another to an 80486 machine. Contributors include (to those people
who I may have forgotten, please forgive me):
 
Linus Torvalds
Tommy.Thorn@daimi.aau.dk
Andrew.Tridgell@anu.edu.au
Nick Holloway, alfie@dcs.warwick.ac.uk
Hermano Moura, moura@dcs.gla.ac.uk
Jon Jagger, J.Jagger@scp.ac.uk
Lennart Benschop
Brian Gallew, geek+@CMU.EDU
Thomas Staniszewski, ts3v+@andrew.cmu.edu
Martin Howell, mph@plasma.apana.org.au
M Saggaf, alsaggaf@athena.mit.edu
Peter Barker, PETER@socpsy.sci.fau.edu
tom@vlsivie.tuwien.ac.at
Dan Russel, russed@rpi.edu
Daniel Carosone, danielce@ee.mu.oz.au
cae@jpmorgan.com
Hamish Coleman, t933093@minyos.xx.rmit.oz.au
Bruce Evans, bde@kralizec.zeta.org.au
Timo Korvola, Timo.Korvola@hut.fi
Rick Lyons, rick@razorback.brisnet.org.au
Rick, jrs@world.std.com
...and numerous others who responded to my request for help with
a real 80486.
 
/fpu_trig.c
0,0 → 1,1718
/*---------------------------------------------------------------------------+
| fpu_trig.c |
| |
| Implementation of the FPU "transcendental" functions. |
| |
| Copyright (C) 1992,1993,1994 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
#include "fpu_system.h"
#include "exception.h"
#include "fpu_emu.h"
#include "status_w.h"
#include "control_w.h"
#include "reg_constant.h"
 
 
static void rem_kernel(unsigned long long st0, unsigned long long *y,
unsigned long long st1,
unsigned long long q, int n);
 
#define BETTER_THAN_486
 
#define FCOS 4
/* Not needed now with new code
#define FPTAN 1
*/
 
/* Used only by fptan, fsin, fcos, and fsincos. */
/* This routine produces very accurate results, similar to
using a value of pi with more than 128 bits precision. */
/* Limited measurements show no results worse than 64 bit precision
except for the results for arguments close to 2^63, where the
precision of the result sometimes degrades to about 63.9 bits */
static int trig_arg(FPU_REG *X, int even)
{
FPU_REG tmp;
unsigned long long q;
int old_cw = control_word, saved_status = partial_status;
 
if ( X->exp >= EXP_BIAS + 63 )
{
partial_status |= SW_C2; /* Reduction incomplete. */
return -1;
}
 
control_word &= ~CW_RC;
control_word |= RC_CHOP;
 
reg_div(X, &CONST_PI2, &tmp, PR_64_BITS | RC_CHOP | 0x3f);
round_to_int(&tmp); /* Fortunately, this can't overflow
to 2^64 */
q = significand(&tmp);
if ( q )
{
rem_kernel(significand(X),
&significand(&tmp),
significand(&CONST_PI2),
q, X->exp - CONST_PI2.exp);
tmp.exp = CONST_PI2.exp;
normalize(&tmp);
reg_move(&tmp, X);
}
 
#ifdef FPTAN
if ( even == FPTAN )
{
if ( ((X->exp >= EXP_BIAS) ||
((X->exp == EXP_BIAS-1)
&& (X->sigh >= 0xc90fdaa2))) ^ (q & 1) )
even = FCOS;
else
even = 0;
}
#endif FPTAN
 
if ( (even && !(q & 1)) || (!even && (q & 1)) )
{
reg_sub(&CONST_PI2, X, X, FULL_PRECISION);
#ifdef BETTER_THAN_486
/* So far, the results are exact but based upon a 64 bit
precision approximation to pi/2. The technique used
now is equivalent to using an approximation to pi/2 which
is accurate to about 128 bits. */
if ( (X->exp <= CONST_PI2extra.exp + 64) || (q > 1) )
{
/* This code gives the effect of having p/2 to better than
128 bits precision. */
significand(&tmp) = q + 1;
tmp.exp = EXP_BIAS + 63;
tmp.tag = TW_Valid;
normalize(&tmp);
reg_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION);
reg_add(X, &tmp, X, FULL_PRECISION);
if ( X->sign == SIGN_NEG )
{
/* CONST_PI2extra is negative, so the result of the addition
can be negative. This means that the argument is actually
in a different quadrant. The correction is always < pi/2,
so it can't overflow into yet another quadrant. */
X->sign = SIGN_POS;
q++;
}
}
#endif BETTER_THAN_486
}
#ifdef BETTER_THAN_486
else
{
/* So far, the results are exact but based upon a 64 bit
precision approximation to pi/2. The technique used
now is equivalent to using an approximation to pi/2 which
is accurate to about 128 bits. */
if ( ((q > 0) && (X->exp <= CONST_PI2extra.exp + 64)) || (q > 1) )
{
/* This code gives the effect of having p/2 to better than
128 bits precision. */
significand(&tmp) = q;
tmp.exp = EXP_BIAS + 63;
tmp.tag = TW_Valid;
normalize(&tmp);
reg_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION);
reg_sub(X, &tmp, X, FULL_PRECISION);
if ( (X->exp == CONST_PI2.exp) &&
((X->sigh > CONST_PI2.sigh)
|| ((X->sigh == CONST_PI2.sigh)
&& (X->sigl > CONST_PI2.sigl))) )
{
/* CONST_PI2extra is negative, so the result of the
subtraction can be larger than pi/2. This means
that the argument is actually in a different quadrant.
The correction is always < pi/2, so it can't overflow
into yet another quadrant. */
reg_sub(&CONST_PI, X, X, FULL_PRECISION);
q++;
}
}
}
#endif BETTER_THAN_486
 
control_word = old_cw;
partial_status = saved_status & ~SW_C2; /* Reduction complete. */
 
return (q & 3) | even;
}
 
 
/* Convert a long to register */
void convert_l2reg(long const *arg, FPU_REG *dest)
{
long num = *arg;
 
if (num == 0)
{ reg_move(&CONST_Z, dest); return; }
 
if (num > 0)
dest->sign = SIGN_POS;
else
{ num = -num; dest->sign = SIGN_NEG; }
 
dest->sigh = num;
dest->sigl = 0;
dest->exp = EXP_BIAS + 31;
dest->tag = TW_Valid;
normalize(dest);
}
 
 
static void single_arg_error(FPU_REG *st0_ptr)
{
switch ( st0_ptr->tag )
{
case TW_NaN:
if ( !(st0_ptr->sigh & 0x40000000) ) /* Signaling ? */
{
EXCEPTION(EX_Invalid);
if ( control_word & CW_Invalid )
st0_ptr->sigh |= 0x40000000; /* Convert to a QNaN */
}
break; /* return with a NaN in st(0) */
case TW_Empty:
stack_underflow(); /* Puts a QNaN in st(0) */
break;
#ifdef PARANOID
default:
EXCEPTION(EX_INTERNAL|0x0112);
#endif PARANOID
}
}
 
 
static void single_arg_2_error(FPU_REG *st0_ptr)
{
FPU_REG *st_new_ptr;
 
switch ( st0_ptr->tag )
{
case TW_NaN:
if ( !(st0_ptr->sigh & 0x40000000) ) /* Signaling ? */
{
EXCEPTION(EX_Invalid);
if ( control_word & CW_Invalid )
{
/* The masked response */
/* Convert to a QNaN */
st0_ptr->sigh |= 0x40000000;
st_new_ptr = &st(-1);
push();
reg_move(&st(1), st_new_ptr);
}
}
else
{
/* A QNaN */
st_new_ptr = &st(-1);
push();
reg_move(&st(1), st_new_ptr);
}
break; /* return with a NaN in st(0) */
#ifdef PARANOID
default:
EXCEPTION(EX_INTERNAL|0x0112);
#endif PARANOID
}
}
 
 
/*---------------------------------------------------------------------------*/
 
static void f2xm1(FPU_REG *st0_ptr)
{
clear_C1();
switch ( st0_ptr->tag )
{
case TW_Valid:
{
if ( st0_ptr->exp >= 0 )
{
/* For an 80486 FPU, the result is undefined. */
}
#ifdef DENORM_OPERAND
else if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
else
{
/* poly_2xm1(x) requires 0 < x < 1. */
poly_2xm1(st0_ptr, st0_ptr);
}
if ( st0_ptr->exp <= EXP_UNDER )
{
/* A denormal result has been produced.
Precision must have been lost, this is always
an underflow. */
arith_underflow(st0_ptr);
}
set_precision_flag_up(); /* 80486 appears to always do this */
return;
}
case TW_Zero:
return;
case TW_Infinity:
if ( st0_ptr->sign == SIGN_NEG )
{
/* -infinity gives -1 (p16-10) */
reg_move(&CONST_1, st0_ptr);
st0_ptr->sign = SIGN_NEG;
}
return;
default:
single_arg_error(st0_ptr);
}
}
 
 
static void fptan(FPU_REG *st0_ptr)
{
char st0_tag = st0_ptr->tag;
FPU_REG *st_new_ptr;
int q;
char arg_sign = st0_ptr->sign;
 
/* Stack underflow has higher priority */
if ( st0_tag == TW_Empty )
{
stack_underflow(); /* Puts a QNaN in st(0) */
if ( control_word & CW_Invalid )
{
st_new_ptr = &st(-1);
push();
stack_underflow(); /* Puts a QNaN in the new st(0) */
}
return;
}
 
if ( STACK_OVERFLOW )
{ stack_overflow(); return; }
 
switch ( st0_tag )
{
case TW_Valid:
if ( st0_ptr->exp > EXP_BIAS - 40 )
{
st0_ptr->sign = SIGN_POS;
if ( (q = trig_arg(st0_ptr, 0)) != -1 )
{
poly_tan(st0_ptr, st0_ptr);
st0_ptr->sign = (q & 1) ^ arg_sign;
}
else
{
/* Operand is out of range */
st0_ptr->sign = arg_sign; /* restore st(0) */
return;
}
set_precision_flag_up(); /* We do not really know if up or down */
}
else
{
/* For a small arg, the result == the argument */
/* Underflow may happen */
 
if ( st0_ptr->exp <= EXP_UNDER )
{
#ifdef DENORM_OPERAND
if ( denormal_operand() )
return;
#endif DENORM_OPERAND
/* A denormal result has been produced.
Precision must have been lost, this is always
an underflow. */
if ( arith_underflow(st0_ptr) )
return;
}
set_precision_flag_down(); /* Must be down. */
}
push();
reg_move(&CONST_1, st_new_ptr);
return;
break;
case TW_Infinity:
/* The 80486 treats infinity as an invalid operand */
arith_invalid(st0_ptr);
if ( control_word & CW_Invalid )
{
st_new_ptr = &st(-1);
push();
arith_invalid(st_new_ptr);
}
return;
case TW_Zero:
push();
reg_move(&CONST_1, st_new_ptr);
setcc(0);
break;
default:
single_arg_2_error(st0_ptr);
break;
}
}
 
 
static void fxtract(FPU_REG *st0_ptr)
{
char st0_tag = st0_ptr->tag;
FPU_REG *st_new_ptr;
register FPU_REG *st1_ptr = st0_ptr; /* anticipate */
 
if ( STACK_OVERFLOW )
{ stack_overflow(); return; }
clear_C1();
if ( !(st0_tag ^ TW_Valid) )
{
long e;
 
#ifdef DENORM_OPERAND
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
push();
reg_move(st1_ptr, st_new_ptr);
st_new_ptr->exp = EXP_BIAS;
e = st1_ptr->exp - EXP_BIAS;
convert_l2reg(&e, st1_ptr);
return;
}
else if ( st0_tag == TW_Zero )
{
char sign = st0_ptr->sign;
if ( divide_by_zero(SIGN_NEG, st0_ptr) )
return;
push();
reg_move(&CONST_Z, st_new_ptr);
st_new_ptr->sign = sign;
return;
}
else if ( st0_tag == TW_Infinity )
{
char sign = st0_ptr->sign;
st0_ptr->sign = SIGN_POS;
push();
reg_move(&CONST_INF, st_new_ptr);
st_new_ptr->sign = sign;
return;
}
else if ( st0_tag == TW_NaN )
{
if ( real_2op_NaN(st0_ptr, st0_ptr, st0_ptr) )
return;
push();
reg_move(st1_ptr, st_new_ptr);
return;
}
else if ( st0_tag == TW_Empty )
{
/* Is this the correct behaviour? */
if ( control_word & EX_Invalid )
{
stack_underflow();
push();
stack_underflow();
}
else
EXCEPTION(EX_StackUnder);
}
#ifdef PARANOID
else
EXCEPTION(EX_INTERNAL | 0x119);
#endif PARANOID
}
 
 
static void fdecstp(FPU_REG *st0_ptr)
{
clear_C1();
top--; /* st0_ptr will be fixed in math_emulate() before the next instr */
}
 
static void fincstp(FPU_REG *st0_ptr)
{
clear_C1();
top++; /* st0_ptr will be fixed in math_emulate() before the next instr */
}
 
 
static void fsqrt_(FPU_REG *st0_ptr)
{
char st0_tag = st0_ptr->tag;
 
clear_C1();
if ( !(st0_tag ^ TW_Valid) )
{
int expon;
if (st0_ptr->sign == SIGN_NEG)
{
arith_invalid(st0_ptr); /* sqrt(negative) is invalid */
return;
}
 
#ifdef DENORM_OPERAND
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
 
expon = st0_ptr->exp - EXP_BIAS;
st0_ptr->exp = EXP_BIAS + (expon & 1); /* make st(0) in [1.0 .. 4.0) */
wm_sqrt(st0_ptr, control_word); /* Do the computation */
st0_ptr->exp += expon >> 1;
st0_ptr->sign = SIGN_POS;
}
else if ( st0_tag == TW_Zero )
return;
else if ( st0_tag == TW_Infinity )
{
if ( st0_ptr->sign == SIGN_NEG )
arith_invalid(st0_ptr); /* sqrt(-Infinity) is invalid */
return;
}
else
{ single_arg_error(st0_ptr); return; }
 
}
 
 
static void frndint_(FPU_REG *st0_ptr)
{
char st0_tag = st0_ptr->tag;
int flags;
 
if ( !(st0_tag ^ TW_Valid) )
{
if (st0_ptr->exp > EXP_BIAS+63)
return;
 
#ifdef DENORM_OPERAND
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
 
/* Fortunately, this can't overflow to 2^64 */
if ( (flags = round_to_int(st0_ptr)) )
set_precision_flag(flags);
 
st0_ptr->exp = EXP_BIAS + 63;
normalize(st0_ptr);
return;
}
else if ( (st0_tag == TW_Zero) || (st0_tag == TW_Infinity) )
return;
else
single_arg_error(st0_ptr);
}
 
 
static void fsin(FPU_REG *st0_ptr)
{
char st0_tag = st0_ptr->tag;
char arg_sign = st0_ptr->sign;
 
if ( st0_tag == TW_Valid )
{
FPU_REG rv;
int q;
 
if ( st0_ptr->exp > EXP_BIAS - 40 )
{
st0_ptr->sign = SIGN_POS;
if ( (q = trig_arg(st0_ptr, 0)) != -1 )
{
 
poly_sine(st0_ptr, &rv);
 
if (q & 2)
rv.sign ^= SIGN_POS ^ SIGN_NEG;
rv.sign ^= arg_sign;
reg_move(&rv, st0_ptr);
 
/* We do not really know if up or down */
set_precision_flag_up();
return;
}
else
{
/* Operand is out of range */
st0_ptr->sign = arg_sign; /* restore st(0) */
return;
}
}
else
{
/* For a small arg, the result == the argument */
/* Underflow may happen */
 
if ( st0_ptr->exp <= EXP_UNDER )
{
#ifdef DENORM_OPERAND
if ( denormal_operand() )
return;
#endif DENORM_OPERAND
/* A denormal result has been produced.
Precision must have been lost, this is always
an underflow. */
arith_underflow(st0_ptr);
return;
}
 
set_precision_flag_up(); /* Must be up. */
}
}
else if ( st0_tag == TW_Zero )
{
setcc(0);
return;
}
else if ( st0_tag == TW_Infinity )
{
/* The 80486 treats infinity as an invalid operand */
arith_invalid(st0_ptr);
return;
}
else
single_arg_error(st0_ptr);
}
 
 
static int f_cos(FPU_REG *arg)
{
char arg_sign = arg->sign;
 
if ( arg->tag == TW_Valid )
{
FPU_REG rv;
int q;
 
if ( arg->exp > EXP_BIAS - 40 )
{
arg->sign = SIGN_POS;
if ( (arg->exp < EXP_BIAS)
|| ((arg->exp == EXP_BIAS)
&& (significand(arg) <= 0xc90fdaa22168c234LL)) )
{
poly_cos(arg, &rv);
reg_move(&rv, arg);
 
/* We do not really know if up or down */
set_precision_flag_down();
return 0;
}
else if ( (q = trig_arg(arg, FCOS)) != -1 )
{
poly_sine(arg, &rv);
 
if ((q+1) & 2)
rv.sign ^= SIGN_POS ^ SIGN_NEG;
reg_move(&rv, arg);
 
/* We do not really know if up or down */
set_precision_flag_down();
return 0;
}
else
{
/* Operand is out of range */
arg->sign = arg_sign; /* restore st(0) */
return 1;
}
}
else
{
#ifdef DENORM_OPERAND
if ( (arg->exp <= EXP_UNDER) && (denormal_operand()) )
return 1;
#endif DENORM_OPERAND
 
setcc(0);
reg_move(&CONST_1, arg);
#ifdef PECULIAR_486
set_precision_flag_down(); /* 80486 appears to do this. */
#else
set_precision_flag_up(); /* Must be up. */
#endif PECULIAR_486
return 0;
}
}
else if ( arg->tag == TW_Zero )
{
reg_move(&CONST_1, arg);
setcc(0);
return 0;
}
else if ( arg->tag == TW_Infinity )
{
/* The 80486 treats infinity as an invalid operand */
arith_invalid(arg);
return 1;
}
else
{
single_arg_error(arg); /* requires arg == &st(0) */
return 1;
}
}
 
 
static void fcos(FPU_REG *st0_ptr)
{
f_cos(st0_ptr);
}
 
 
static void fsincos(FPU_REG *st0_ptr)
{
char st0_tag = st0_ptr->tag;
FPU_REG *st_new_ptr;
FPU_REG arg;
 
/* Stack underflow has higher priority */
if ( st0_tag == TW_Empty )
{
stack_underflow(); /* Puts a QNaN in st(0) */
if ( control_word & CW_Invalid )
{
st_new_ptr = &st(-1);
push();
stack_underflow(); /* Puts a QNaN in the new st(0) */
}
return;
}
 
if ( STACK_OVERFLOW )
{ stack_overflow(); return; }
 
if ( st0_tag == TW_NaN )
{
single_arg_2_error(st0_ptr);
return;
}
else if ( st0_tag == TW_Infinity )
{
/* The 80486 treats infinity as an invalid operand */
if ( !arith_invalid(st0_ptr) )
{
/* unmasked response */
push();
arith_invalid(st_new_ptr);
}
return;
}
 
reg_move(st0_ptr,&arg);
if ( !f_cos(&arg) )
{
fsin(st0_ptr);
push();
reg_move(&arg,st_new_ptr);
}
 
}
 
 
/*---------------------------------------------------------------------------*/
/* The following all require two arguments: st(0) and st(1) */
 
/* A lean, mean kernel for the fprem instructions. This relies upon
the division and rounding to an integer in do_fprem giving an
exact result. Because of this, rem_kernel() needs to deal only with
the least significant 64 bits, the more significant bits of the
result must be zero.
*/
static void rem_kernel(unsigned long long st0, unsigned long long *y,
unsigned long long st1,
unsigned long long q, int n)
{
unsigned long long x;
 
x = st0 << n;
 
/* Do the required multiplication and subtraction in the one operation */
asm volatile ("movl %2,%%eax; mull %4; subl %%eax,%0; sbbl %%edx,%1;
movl %3,%%eax; mull %4; subl %%eax,%1;
movl %2,%%eax; mull %5; subl %%eax,%1;"
:"=m" (x), "=m" (((unsigned *)&x)[1])
:"m" (st1),"m" (((unsigned *)&st1)[1]),
"m" (q),"m" (((unsigned *)&q)[1])
:"%ax","%dx");
 
*y = x;
}
 
 
/* Remainder of st(0) / st(1) */
/* This routine produces exact results, i.e. there is never any
rounding or truncation, etc of the result. */
static void do_fprem(FPU_REG *st0_ptr, int round)
{
FPU_REG *st1_ptr = &st(1);
char st1_tag = st1_ptr->tag;
char st0_tag = st0_ptr->tag;
char sign = st0_ptr->sign;
 
if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) )
{
FPU_REG tmp;
int old_cw = control_word;
int expdif = st0_ptr->exp - st1_ptr->exp;
long long q;
unsigned short saved_status;
int cc = 0;
 
#ifdef DENORM_OPERAND
if ( ((st0_ptr->exp <= EXP_UNDER) ||
(st1_ptr->exp <= EXP_UNDER)) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
/* We want the status following the denorm tests, but don't want
the status changed by the arithmetic operations. */
saved_status = partial_status;
control_word &= ~CW_RC;
control_word |= RC_CHOP;
 
if (expdif < 64)
{
/* This should be the most common case */
 
if ( expdif > -2 )
{
reg_div(st0_ptr, st1_ptr, &tmp, PR_64_BITS | RC_CHOP | 0x3f);
 
if ( tmp.exp >= EXP_BIAS )
{
round_to_int(&tmp); /* Fortunately, this can't overflow
to 2^64 */
q = significand(&tmp);
 
rem_kernel(significand(st0_ptr),
&significand(&tmp),
significand(st1_ptr),
q, expdif);
 
tmp.exp = st1_ptr->exp;
}
else
{
reg_move(st0_ptr, &tmp);
q = 0;
}
tmp.sign = sign;
 
if ( (round == RC_RND) && (tmp.sigh & 0xc0000000) )
{
/* We may need to subtract st(1) once more,
to get a result <= 1/2 of st(1). */
unsigned long long x;
expdif = st1_ptr->exp - tmp.exp;
if ( expdif <= 1 )
{
if ( expdif == 0 )
x = significand(st1_ptr) - significand(&tmp);
else /* expdif is 1 */
x = (significand(st1_ptr) << 1) - significand(&tmp);
if ( (x < significand(&tmp)) ||
/* or equi-distant (from 0 & st(1)) and q is odd */
((x == significand(&tmp)) && (q & 1) ) )
{
tmp.sign ^= (SIGN_POS^SIGN_NEG);
significand(&tmp) = x;
q++;
}
}
}
 
if (q & 4) cc |= SW_C0;
if (q & 2) cc |= SW_C3;
if (q & 1) cc |= SW_C1;
}
else
{
control_word = old_cw;
setcc(0);
return;
}
}
else
{
/* There is a large exponent difference ( >= 64 ) */
/* To make much sense, the code in this section should
be done at high precision. */
int exp_1;
 
/* prevent overflow here */
/* N is 'a number between 32 and 63' (p26-113) */
reg_move(st0_ptr, &tmp);
tmp.exp = EXP_BIAS + 56;
exp_1 = st1_ptr->exp; st1_ptr->exp = EXP_BIAS;
expdif -= 56;
 
reg_div(&tmp, st1_ptr, &tmp, PR_64_BITS | RC_CHOP | 0x3f);
st1_ptr->exp = exp_1;
 
round_to_int(&tmp); /* Fortunately, this can't overflow to 2^64 */
 
rem_kernel(significand(st0_ptr),
&significand(&tmp),
significand(st1_ptr),
significand(&tmp),
tmp.exp - EXP_BIAS
);
tmp.exp = exp_1 + expdif;
tmp.sign = sign;
 
/* It is possible for the operation to be complete here.
What does the IEEE standard say? The Intel 80486 manual
implies that the operation will never be completed at this
point, and the behaviour of a real 80486 confirms this.
*/
if ( !(tmp.sigh | tmp.sigl) )
{
/* The result is zero */
control_word = old_cw;
partial_status = saved_status;
reg_move(&CONST_Z, st0_ptr);
st0_ptr->sign = sign;
#ifdef PECULIAR_486
setcc(SW_C2);
#else
setcc(0);
#endif PECULIAR_486
return;
}
cc = SW_C2;
}
 
control_word = old_cw;
partial_status = saved_status;
normalize_nuo(&tmp);
reg_move(&tmp, st0_ptr);
setcc(cc);
 
/* The only condition to be looked for is underflow,
and it can occur here only if underflow is unmasked. */
if ( (st0_ptr->exp <= EXP_UNDER) && (st0_ptr->tag != TW_Zero)
&& !(control_word & CW_Underflow) )
arith_underflow(st0_ptr);
 
return;
}
else if ( (st0_tag == TW_Empty) | (st1_tag == TW_Empty) )
{
stack_underflow();
return;
}
else if ( st0_tag == TW_Zero )
{
if ( st1_tag == TW_Valid )
{
#ifdef DENORM_OPERAND
if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
 
setcc(0); return;
}
else if ( st1_tag == TW_Zero )
{ arith_invalid(st0_ptr); return; } /* fprem(?,0) always invalid */
else if ( st1_tag == TW_Infinity )
{ setcc(0); return; }
}
else if ( st0_tag == TW_Valid )
{
if ( st1_tag == TW_Zero )
{
arith_invalid(st0_ptr); /* fprem(Valid,Zero) is invalid */
return;
}
else if ( st1_tag != TW_NaN )
{
#ifdef DENORM_OPERAND
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
 
if ( st1_tag == TW_Infinity )
{
/* fprem(Valid,Infinity) is o.k. */
setcc(0); return;
}
}
}
else if ( st0_tag == TW_Infinity )
{
if ( st1_tag != TW_NaN )
{
arith_invalid(st0_ptr); /* fprem(Infinity,?) is invalid */
return;
}
}
 
/* One of the registers must contain a NaN is we got here. */
 
#ifdef PARANOID
if ( (st0_tag != TW_NaN) && (st1_tag != TW_NaN) )
EXCEPTION(EX_INTERNAL | 0x118);
#endif PARANOID
 
real_2op_NaN(st1_ptr, st0_ptr, st0_ptr);
 
}
 
 
/* ST(1) <- ST(1) * log ST; pop ST */
static void fyl2x(FPU_REG *st0_ptr)
{
char st0_tag = st0_ptr->tag;
FPU_REG *st1_ptr = &st(1), exponent;
char st1_tag = st1_ptr->tag;
int e;
 
clear_C1();
if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) )
{
if ( st0_ptr->sign == SIGN_POS )
{
#ifdef DENORM_OPERAND
if ( ((st0_ptr->exp <= EXP_UNDER) ||
(st1_ptr->exp <= EXP_UNDER)) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
 
if ( (st0_ptr->sigh == 0x80000000) && (st0_ptr->sigl == 0) )
{
/* Special case. The result can be precise. */
e = st0_ptr->exp - EXP_BIAS;
if ( e > 0 )
{
exponent.sigh = e;
exponent.sign = SIGN_POS;
}
else
{
exponent.sigh = -e;
exponent.sign = SIGN_NEG;
}
exponent.sigl = 0;
exponent.exp = EXP_BIAS + 31;
exponent.tag = TW_Valid;
normalize_nuo(&exponent);
reg_mul(&exponent, st1_ptr, st1_ptr, FULL_PRECISION);
}
else
{
/* The usual case */
poly_l2(st0_ptr, st1_ptr, st1_ptr);
if ( st1_ptr->exp <= EXP_UNDER )
{
/* A denormal result has been produced.
Precision must have been lost, this is always
an underflow. */
arith_underflow(st1_ptr);
}
else
set_precision_flag_up(); /* 80486 appears to always do this */
}
pop();
return;
}
else
{
/* negative */
if ( !arith_invalid(st1_ptr) )
pop();
return;
}
}
else if ( (st0_tag == TW_Empty) || (st1_tag == TW_Empty) )
{
stack_underflow_pop(1);
return;
}
else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) )
{
if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) )
pop();
return;
}
else if ( (st0_tag <= TW_Zero) && (st1_tag <= TW_Zero) )
{
/* one of the args is zero, the other valid, or both zero */
if ( st0_tag == TW_Zero )
{
if ( st1_tag == TW_Zero )
{
/* Both args zero is invalid */
if ( !arith_invalid(st1_ptr) )
pop();
}
#ifdef PECULIAR_486
/* This case is not specifically covered in the manual,
but divide-by-zero would seem to be the best response.
However, a real 80486 does it this way... */
else if ( st0_ptr->tag == TW_Infinity )
{
reg_move(&CONST_INF, st1_ptr);
pop();
}
#endif PECULIAR_486
else
{
if ( !divide_by_zero(st1_ptr->sign^SIGN_NEG^SIGN_POS, st1_ptr) )
pop();
}
return;
}
else
{
/* st(1) contains zero, st(0) valid <> 0 */
/* Zero is the valid answer */
char sign = st1_ptr->sign;
 
if ( st0_ptr->sign == SIGN_NEG )
{
/* log(negative) */
if ( !arith_invalid(st1_ptr) )
pop();
return;
}
 
#ifdef DENORM_OPERAND
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
 
if ( st0_ptr->exp < EXP_BIAS ) sign ^= SIGN_NEG^SIGN_POS;
pop(); st0_ptr = &st(0);
reg_move(&CONST_Z, st0_ptr);
st0_ptr->sign = sign;
return;
}
}
/* One or both arg must be an infinity */
else if ( st0_tag == TW_Infinity )
{
if ( (st0_ptr->sign == SIGN_NEG) || (st1_tag == TW_Zero) )
{
/* log(-infinity) or 0*log(infinity) */
if ( !arith_invalid(st1_ptr) )
pop();
return;
}
else
{
char sign = st1_ptr->sign;
 
#ifdef DENORM_OPERAND
if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
 
pop(); st0_ptr = &st(0);
reg_move(&CONST_INF, st0_ptr);
st0_ptr->sign = sign;
return;
}
}
/* st(1) must be infinity here */
else if ( (st0_tag == TW_Valid) && (st0_ptr->sign == SIGN_POS) )
{
if ( st0_ptr->exp >= EXP_BIAS )
{
if ( (st0_ptr->exp == EXP_BIAS) &&
(st0_ptr->sigh == 0x80000000) &&
(st0_ptr->sigl == 0) )
{
/* st(0) holds 1.0 */
/* infinity*log(1) */
if ( !arith_invalid(st1_ptr) )
pop();
return;
}
/* st(0) is positive and > 1.0 */
pop();
}
else
{
/* st(0) is positive and < 1.0 */
 
#ifdef DENORM_OPERAND
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
 
st1_ptr->sign ^= SIGN_NEG;
pop();
}
return;
}
else
{
/* st(0) must be zero or negative */
if ( st0_ptr->tag == TW_Zero )
{
/* This should be invalid, but a real 80486 is happy with it. */
#ifndef PECULIAR_486
if ( !divide_by_zero(st1_ptr->sign, st1_ptr) )
#endif PECULIAR_486
{
st1_ptr->sign ^= SIGN_NEG^SIGN_POS;
pop();
}
}
else
{
/* log(negative) */
if ( !arith_invalid(st1_ptr) )
pop();
}
return;
}
}
 
 
static void fpatan(FPU_REG *st0_ptr)
{
char st0_tag = st0_ptr->tag;
FPU_REG *st1_ptr = &st(1);
char st1_tag = st1_ptr->tag;
 
clear_C1();
if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) )
{
#ifdef DENORM_OPERAND
if ( ((st0_ptr->exp <= EXP_UNDER) ||
(st1_ptr->exp <= EXP_UNDER)) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
 
poly_atan(st0_ptr, st1_ptr, st1_ptr);
 
if ( st1_ptr->exp <= EXP_UNDER )
{
/* A denormal result has been produced.
Precision must have been lost.
This is by definition an underflow. */
arith_underflow(st1_ptr);
pop();
return;
}
}
else if ( (st0_tag == TW_Empty) || (st1_tag == TW_Empty) )
{
stack_underflow_pop(1);
return;
}
else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) )
{
if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) )
pop();
return;
}
else if ( (st0_tag == TW_Infinity) || (st1_tag == TW_Infinity) )
{
char sign = st1_ptr->sign;
if ( st0_tag == TW_Infinity )
{
if ( st1_tag == TW_Infinity )
{
if ( st0_ptr->sign == SIGN_POS )
{ reg_move(&CONST_PI4, st1_ptr); }
else
reg_add(&CONST_PI4, &CONST_PI2, st1_ptr, FULL_PRECISION);
}
else
{
#ifdef DENORM_OPERAND
if ( st1_tag != TW_Zero )
{
if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
}
#endif DENORM_OPERAND
 
if ( st0_ptr->sign == SIGN_POS )
{
reg_move(&CONST_Z, st1_ptr);
st1_ptr->sign = sign; /* An 80486 preserves the sign */
pop();
return;
}
else
reg_move(&CONST_PI, st1_ptr);
}
}
else
{
/* st(1) is infinity, st(0) not infinity */
#ifdef DENORM_OPERAND
if ( st0_tag != TW_Zero )
{
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
}
#endif DENORM_OPERAND
 
reg_move(&CONST_PI2, st1_ptr);
}
st1_ptr->sign = sign;
}
else if ( st1_tag == TW_Zero )
{
/* st(0) must be valid or zero */
char sign = st1_ptr->sign;
 
#ifdef DENORM_OPERAND
if ( st0_tag != TW_Zero )
{
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
}
#endif DENORM_OPERAND
 
if ( st0_ptr->sign == SIGN_POS )
{ /* An 80486 preserves the sign */ pop(); return; }
else
reg_move(&CONST_PI, st1_ptr);
st1_ptr->sign = sign;
}
else if ( st0_tag == TW_Zero )
{
/* st(1) must be TW_Valid here */
char sign = st1_ptr->sign;
 
#ifdef DENORM_OPERAND
if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
 
reg_move(&CONST_PI2, st1_ptr);
st1_ptr->sign = sign;
}
#ifdef PARANOID
else
EXCEPTION(EX_INTERNAL | 0x125);
#endif PARANOID
 
pop();
set_precision_flag_up(); /* We do not really know if up or down */
}
 
 
static void fprem(FPU_REG *st0_ptr)
{
do_fprem(st0_ptr, RC_CHOP);
}
 
 
static void fprem1(FPU_REG *st0_ptr)
{
do_fprem(st0_ptr, RC_RND);
}
 
 
static void fyl2xp1(FPU_REG *st0_ptr)
{
char st0_tag = st0_ptr->tag, sign;
FPU_REG *st1_ptr = &st(1);
char st1_tag = st1_ptr->tag;
 
clear_C1();
if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) )
{
#ifdef DENORM_OPERAND
if ( ((st0_ptr->exp <= EXP_UNDER) ||
(st1_ptr->exp <= EXP_UNDER)) && denormal_operand() )
return;
#endif DENORM_OPERAND
 
if ( poly_l2p1(st0_ptr, st1_ptr, st1_ptr) )
{
#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */
st1_ptr->sign ^= SIGN_POS^SIGN_NEG;
#else
if ( arith_invalid(st1_ptr) ) /* poly_l2p1() returned invalid */
return;
#endif PECULIAR_486
}
if ( st1_ptr->exp <= EXP_UNDER )
{
/* A denormal result has been produced.
Precision must have been lost, this is always
an underflow. */
sign = st1_ptr->sign;
arith_underflow(st1_ptr);
st1_ptr->sign = sign;
}
else
set_precision_flag_up(); /* 80486 appears to always do this */
pop();
return;
}
else if ( (st0_tag == TW_Empty) | (st1_tag == TW_Empty) )
{
stack_underflow_pop(1);
return;
}
else if ( st0_tag == TW_Zero )
{
if ( st1_tag <= TW_Zero )
{
#ifdef DENORM_OPERAND
if ( (st1_tag == TW_Valid) && (st1_ptr->exp <= EXP_UNDER) &&
(denormal_operand()) )
return;
#endif DENORM_OPERAND
st0_ptr->sign ^= st1_ptr->sign;
reg_move(st0_ptr, st1_ptr);
}
else if ( st1_tag == TW_Infinity )
{
/* Infinity*log(1) */
if ( !arith_invalid(st1_ptr) )
pop();
return;
}
else if ( st1_tag == TW_NaN )
{
if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) )
pop();
return;
}
#ifdef PARANOID
else
{
EXCEPTION(EX_INTERNAL | 0x116);
return;
}
#endif PARANOID
pop(); return;
}
else if ( st0_tag == TW_Valid )
{
if ( st1_tag == TW_Zero )
{
if ( st0_ptr->sign == SIGN_NEG )
{
if ( st0_ptr->exp >= EXP_BIAS )
{
/* st(0) holds <= -1.0 */
#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */
st1_ptr->sign ^= SIGN_POS^SIGN_NEG;
#else
if ( arith_invalid(st1_ptr) ) return;
#endif PECULIAR_486
pop(); return;
}
#ifdef DENORM_OPERAND
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
st1_ptr->sign ^= SIGN_POS^SIGN_NEG;
pop(); return;
}
#ifdef DENORM_OPERAND
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
pop(); return;
}
if ( st1_tag == TW_Infinity )
{
if ( st0_ptr->sign == SIGN_NEG )
{
if ( (st0_ptr->exp >= EXP_BIAS) &&
!((st0_ptr->sigh == 0x80000000) &&
(st0_ptr->sigl == 0)) )
{
/* st(0) holds < -1.0 */
#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */
st1_ptr->sign ^= SIGN_POS^SIGN_NEG;
#else
if ( arith_invalid(st1_ptr) ) return;
#endif PECULIAR_486
pop(); return;
}
#ifdef DENORM_OPERAND
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
st1_ptr->sign ^= SIGN_POS^SIGN_NEG;
pop(); return;
}
#ifdef DENORM_OPERAND
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
pop(); return;
}
if ( st1_tag == TW_NaN )
{
if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) )
pop();
return;
}
}
else if ( st0_tag == TW_NaN )
{
if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) )
pop();
return;
}
else if ( st0_tag == TW_Infinity )
{
if ( st1_tag == TW_NaN )
{
if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) )
pop();
return;
}
else if ( st0_ptr->sign == SIGN_NEG )
{
int exponent = st1_ptr->exp;
#ifndef PECULIAR_486
/* This should have higher priority than denormals, but... */
if ( arith_invalid(st1_ptr) ) /* log(-infinity) */
return;
#endif PECULIAR_486
#ifdef DENORM_OPERAND
if ( st1_tag != TW_Zero )
{
if ( (exponent <= EXP_UNDER) && (denormal_operand()) )
return;
}
#endif DENORM_OPERAND
#ifdef PECULIAR_486
/* Denormal operands actually get higher priority */
if ( arith_invalid(st1_ptr) ) /* log(-infinity) */
return;
#endif PECULIAR_486
pop();
return;
}
else if ( st1_tag == TW_Zero )
{
/* log(infinity) */
if ( !arith_invalid(st1_ptr) )
pop();
return;
}
/* st(1) must be valid here. */
 
#ifdef DENORM_OPERAND
if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
 
/* The Manual says that log(Infinity) is invalid, but a real
80486 sensibly says that it is o.k. */
{ char sign = st1_ptr->sign;
reg_move(&CONST_INF, st1_ptr);
st1_ptr->sign = sign;
}
pop();
return;
}
#ifdef PARANOID
else
{
EXCEPTION(EX_INTERNAL | 0x117);
}
#endif PARANOID
}
 
 
static void fscale(FPU_REG *st0_ptr)
{
char st0_tag = st0_ptr->tag;
FPU_REG *st1_ptr = &st(1);
char st1_tag = st1_ptr->tag;
int old_cw = control_word;
char sign = st0_ptr->sign;
 
clear_C1();
if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) )
{
long scale;
FPU_REG tmp;
 
#ifdef DENORM_OPERAND
if ( ((st0_ptr->exp <= EXP_UNDER) ||
(st1_ptr->exp <= EXP_UNDER)) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
 
if ( st1_ptr->exp > EXP_BIAS + 30 )
{
/* 2^31 is far too large, would require 2^(2^30) or 2^(-2^30) */
char sign;
 
if ( st1_ptr->sign == SIGN_POS )
{
EXCEPTION(EX_Overflow);
sign = st0_ptr->sign;
reg_move(&CONST_INF, st0_ptr);
st0_ptr->sign = sign;
}
else
{
EXCEPTION(EX_Underflow);
sign = st0_ptr->sign;
reg_move(&CONST_Z, st0_ptr);
st0_ptr->sign = sign;
}
return;
}
 
control_word &= ~CW_RC;
control_word |= RC_CHOP;
reg_move(st1_ptr, &tmp);
round_to_int(&tmp); /* This can never overflow here */
control_word = old_cw;
scale = st1_ptr->sign ? -tmp.sigl : tmp.sigl;
scale += st0_ptr->exp;
st0_ptr->exp = scale;
 
/* Use round_reg() to properly detect under/overflow etc */
round_reg(st0_ptr, 0, control_word);
 
return;
}
else if ( st0_tag == TW_Valid )
{
if ( st1_tag == TW_Zero )
{
 
#ifdef DENORM_OPERAND
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
 
return;
}
if ( st1_tag == TW_Infinity )
{
#ifdef DENORM_OPERAND
if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
 
if ( st1_ptr->sign == SIGN_POS )
{ reg_move(&CONST_INF, st0_ptr); }
else
reg_move(&CONST_Z, st0_ptr);
st0_ptr->sign = sign;
return;
}
if ( st1_tag == TW_NaN )
{ real_2op_NaN(st0_ptr, st1_ptr, st0_ptr); return; }
}
else if ( st0_tag == TW_Zero )
{
if ( st1_tag == TW_Valid )
{
 
#ifdef DENORM_OPERAND
if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
 
return;
}
else if ( st1_tag == TW_Zero ) { return; }
else if ( st1_tag == TW_Infinity )
{
if ( st1_ptr->sign == SIGN_NEG )
return;
else
{
arith_invalid(st0_ptr); /* Zero scaled by +Infinity */
return;
}
}
else if ( st1_tag == TW_NaN )
{ real_2op_NaN(st0_ptr, st1_ptr, st0_ptr); return; }
}
else if ( st0_tag == TW_Infinity )
{
if ( st1_tag == TW_Valid )
{
 
#ifdef DENORM_OPERAND
if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
return;
#endif DENORM_OPERAND
 
return;
}
if ( ((st1_tag == TW_Infinity) && (st1_ptr->sign == SIGN_POS))
|| (st1_tag == TW_Zero) )
return;
else if ( st1_tag == TW_Infinity )
{
arith_invalid(st0_ptr); /* Infinity scaled by -Infinity */
return;
}
else if ( st1_tag == TW_NaN )
{ real_2op_NaN(st0_ptr, st1_ptr, st0_ptr); return; }
}
else if ( st0_tag == TW_NaN )
{
if ( st1_tag != TW_Empty )
{ real_2op_NaN(st0_ptr, st1_ptr, st0_ptr); return; }
}
 
#ifdef PARANOID
if ( !((st0_tag == TW_Empty) || (st1_tag == TW_Empty)) )
{
EXCEPTION(EX_INTERNAL | 0x115);
return;
}
#endif
 
/* At least one of st(0), st(1) must be empty */
stack_underflow();
 
}
 
 
/*---------------------------------------------------------------------------*/
 
static FUNC_ST0 const trig_table_a[] = {
f2xm1, fyl2x, fptan, fpatan, fxtract, fprem1, fdecstp, fincstp
};
 
void trig_a(void)
{
(trig_table_a[FPU_rm])(&st(0));
}
 
 
static FUNC_ST0 const trig_table_b[] =
{
fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, fsin, fcos
};
 
void trig_b(void)
{
(trig_table_b[FPU_rm])(&st(0));
}
/fpu_arith.c
0,0 → 1,179
/*---------------------------------------------------------------------------+
| fpu_arith.c |
| |
| Code to implement the FPU register/register arithmetic instructions |
| |
| Copyright (C) 1992,1993 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
#include "fpu_system.h"
#include "fpu_emu.h"
#include "control_w.h"
#include "status_w.h"
 
 
void fadd__()
{
/* fadd st,st(i) */
clear_C1();
reg_add(&st(0), &st(FPU_rm), &st(0), control_word);
}
 
 
void fmul__()
{
/* fmul st,st(i) */
clear_C1();
reg_mul(&st(0), &st(FPU_rm), &st(0), control_word);
}
 
 
 
void fsub__()
{
/* fsub st,st(i) */
clear_C1();
reg_sub(&st(0), &st(FPU_rm), &st(0), control_word);
}
 
 
void fsubr_()
{
/* fsubr st,st(i) */
clear_C1();
reg_sub(&st(FPU_rm), &st(0), &st(0), control_word);
}
 
 
void fdiv__()
{
/* fdiv st,st(i) */
clear_C1();
reg_div(&st(0), &st(FPU_rm), &st(0), control_word);
}
 
 
void fdivr_()
{
/* fdivr st,st(i) */
clear_C1();
reg_div(&st(FPU_rm), &st(0), &st(0), control_word);
}
 
 
 
void fadd_i()
{
/* fadd st(i),st */
clear_C1();
reg_add(&st(0), &st(FPU_rm), &st(FPU_rm), control_word);
}
 
 
void fmul_i()
{
/* fmul st(i),st */
clear_C1();
reg_mul(&st(0), &st(FPU_rm), &st(FPU_rm), control_word);
}
 
 
void fsubri()
{
/* fsubr st(i),st */
/* This is the sense of the 80486 manual
reg_sub(&st(FPU_rm), &st(0), &st(FPU_rm), control_word); */
clear_C1();
reg_sub(&st(0), &st(FPU_rm), &st(FPU_rm), control_word);
}
 
 
void fsub_i()
{
/* fsub st(i),st */
/* This is the sense of the 80486 manual
reg_sub(&st(0), &st(FPU_rm), &st(FPU_rm), control_word); */
clear_C1();
reg_sub(&st(FPU_rm), &st(0), &st(FPU_rm), control_word);
}
 
 
void fdivri()
{
/* fdivr st(i),st */
clear_C1();
reg_div(&st(0), &st(FPU_rm), &st(FPU_rm), control_word);
}
 
 
void fdiv_i()
{
/* fdiv st(i),st */
clear_C1();
reg_div(&st(FPU_rm), &st(0), &st(FPU_rm), control_word);
}
 
 
 
void faddp_()
{
/* faddp st(i),st */
clear_C1();
if ( !reg_add(&st(0), &st(FPU_rm), &st(FPU_rm), control_word) )
pop();
}
 
 
void fmulp_()
{
/* fmulp st(i),st */
clear_C1();
if ( !reg_mul(&st(0), &st(FPU_rm), &st(FPU_rm), control_word) )
pop();
}
 
 
 
void fsubrp()
{
/* fsubrp st(i),st */
/* This is the sense of the 80486 manual
reg_sub(&st(FPU_rm), &st(0), &st(FPU_rm), control_word); */
clear_C1();
if ( !reg_sub(&st(0), &st(FPU_rm), &st(FPU_rm), control_word) )
pop();
}
 
 
void fsubp_()
{
/* fsubp st(i),st */
/* This is the sense of the 80486 manual
reg_sub(&st(0), &st(FPU_rm), &st(FPU_rm), control_word); */
clear_C1();
if ( !reg_sub(&st(FPU_rm), &st(0), &st(FPU_rm), control_word) )
pop();
}
 
 
void fdivrp()
{
/* fdivrp st(i),st */
clear_C1();
if ( !reg_div(&st(0), &st(FPU_rm), &st(FPU_rm), control_word) )
pop();
}
 
 
void fdivp_()
{
/* fdivp st(i),st */
clear_C1();
if ( !reg_div(&st(FPU_rm), &st(0), &st(FPU_rm), control_word) )
pop();
}
 
/reg_compare.c
0,0 → 1,378
/*---------------------------------------------------------------------------+
| reg_compare.c |
| |
| Compare two floating point registers |
| |
| Copyright (C) 1992,1993,1994 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
/*---------------------------------------------------------------------------+
| compare() is the core FPU_REG comparison function |
+---------------------------------------------------------------------------*/
 
#include "fpu_system.h"
#include "exception.h"
#include "fpu_emu.h"
#include "control_w.h"
#include "status_w.h"
 
 
int compare(FPU_REG const *b)
{
int diff;
char st0_tag;
FPU_REG *st0_ptr;
 
st0_ptr = &st(0);
st0_tag = st0_ptr->tag;
 
if ( st0_tag | b->tag )
{
if ( st0_tag == TW_Zero )
{
if ( b->tag == TW_Zero ) return COMP_A_eq_B;
if ( b->tag == TW_Valid )
{
return ((b->sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
#ifdef DENORM_OPERAND
| ((b->exp <= EXP_UNDER) ?
COMP_Denormal : 0)
#endif DENORM_OPERAND
;
}
}
else if ( b->tag == TW_Zero )
{
if ( st0_tag == TW_Valid )
{
return ((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B
: COMP_A_lt_B)
#ifdef DENORM_OPERAND
| ((st0_ptr->exp <= EXP_UNDER )
? COMP_Denormal : 0 )
#endif DENORM_OPERAND
;
}
}
 
if ( st0_tag == TW_Infinity )
{
if ( (b->tag == TW_Valid) || (b->tag == TW_Zero) )
{
return ((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B
: COMP_A_lt_B)
#ifdef DENORM_OPERAND
| (((b->tag == TW_Valid) && (b->exp <= EXP_UNDER)) ?
COMP_Denormal : 0 )
#endif DENORM_OPERAND
;
}
else if ( b->tag == TW_Infinity )
{
/* The 80486 book says that infinities can be equal! */
return (st0_ptr->sign == b->sign) ? COMP_A_eq_B :
((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
}
/* Fall through to the NaN code */
}
else if ( b->tag == TW_Infinity )
{
if ( (st0_tag == TW_Valid) || (st0_tag == TW_Zero) )
{
return ((b->sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
#ifdef DENORM_OPERAND
| (((st0_tag == TW_Valid)
&& (st0_ptr->exp <= EXP_UNDER)) ?
COMP_Denormal : 0)
#endif DENORM_OPERAND
;
}
/* Fall through to the NaN code */
}
 
/* The only possibility now should be that one of the arguments
is a NaN */
if ( (st0_tag == TW_NaN) || (b->tag == TW_NaN) )
{
if ( ((st0_tag == TW_NaN) && !(st0_ptr->sigh & 0x40000000))
|| ((b->tag == TW_NaN) && !(b->sigh & 0x40000000)) )
/* At least one arg is a signaling NaN */
return COMP_No_Comp | COMP_SNaN | COMP_NaN;
else
/* Neither is a signaling NaN */
return COMP_No_Comp | COMP_NaN;
}
EXCEPTION(EX_Invalid);
}
#ifdef PARANOID
if (!(st0_ptr->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
if (!(b->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
#endif PARANOID
 
if (st0_ptr->sign != b->sign)
{
return ((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
#ifdef DENORM_OPERAND
|
( ((st0_ptr->exp <= EXP_UNDER) || (b->exp <= EXP_UNDER)) ?
COMP_Denormal : 0)
#endif DENORM_OPERAND
;
}
 
diff = st0_ptr->exp - b->exp;
if ( diff == 0 )
{
diff = st0_ptr->sigh - b->sigh; /* Works only if ms bits are
identical */
if ( diff == 0 )
{
diff = st0_ptr->sigl > b->sigl;
if ( diff == 0 )
diff = -(st0_ptr->sigl < b->sigl);
}
}
 
if ( diff > 0 )
{
return ((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
#ifdef DENORM_OPERAND
|
( ((st0_ptr->exp <= EXP_UNDER) || (b->exp <= EXP_UNDER)) ?
COMP_Denormal : 0)
#endif DENORM_OPERAND
;
}
if ( diff < 0 )
{
return ((st0_ptr->sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
#ifdef DENORM_OPERAND
|
( ((st0_ptr->exp <= EXP_UNDER) || (b->exp <= EXP_UNDER)) ?
COMP_Denormal : 0)
#endif DENORM_OPERAND
;
}
 
return COMP_A_eq_B
#ifdef DENORM_OPERAND
|
( ((st0_ptr->exp <= EXP_UNDER) || (b->exp <= EXP_UNDER)) ?
COMP_Denormal : 0)
#endif DENORM_OPERAND
;
 
}
 
 
/* This function requires that st(0) is not empty */
int compare_st_data(FPU_REG const *loaded_data)
{
int f, c;
 
c = compare(loaded_data);
 
if (c & COMP_NaN)
{
EXCEPTION(EX_Invalid);
f = SW_C3 | SW_C2 | SW_C0;
}
else
switch (c & 7)
{
case COMP_A_lt_B:
f = SW_C0;
break;
case COMP_A_eq_B:
f = SW_C3;
break;
case COMP_A_gt_B:
f = 0;
break;
case COMP_No_Comp:
f = SW_C3 | SW_C2 | SW_C0;
break;
#ifdef PARANOID
default:
EXCEPTION(EX_INTERNAL|0x121);
f = SW_C3 | SW_C2 | SW_C0;
break;
#endif PARANOID
}
setcc(f);
if (c & COMP_Denormal)
{
return denormal_operand();
}
return 0;
}
 
 
static int compare_st_st(int nr)
{
int f, c;
 
if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
{
setcc(SW_C3 | SW_C2 | SW_C0);
/* Stack fault */
EXCEPTION(EX_StackUnder);
return !(control_word & CW_Invalid);
}
 
c = compare(&st(nr));
if (c & COMP_NaN)
{
setcc(SW_C3 | SW_C2 | SW_C0);
EXCEPTION(EX_Invalid);
return !(control_word & CW_Invalid);
}
else
switch (c & 7)
{
case COMP_A_lt_B:
f = SW_C0;
break;
case COMP_A_eq_B:
f = SW_C3;
break;
case COMP_A_gt_B:
f = 0;
break;
case COMP_No_Comp:
f = SW_C3 | SW_C2 | SW_C0;
break;
#ifdef PARANOID
default:
EXCEPTION(EX_INTERNAL|0x122);
f = SW_C3 | SW_C2 | SW_C0;
break;
#endif PARANOID
}
setcc(f);
if (c & COMP_Denormal)
{
return denormal_operand();
}
return 0;
}
 
 
static int compare_u_st_st(int nr)
{
int f, c;
 
if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
{
setcc(SW_C3 | SW_C2 | SW_C0);
/* Stack fault */
EXCEPTION(EX_StackUnder);
return !(control_word & CW_Invalid);
}
 
c = compare(&st(nr));
if (c & COMP_NaN)
{
setcc(SW_C3 | SW_C2 | SW_C0);
if (c & COMP_SNaN) /* This is the only difference between
un-ordered and ordinary comparisons */
{
EXCEPTION(EX_Invalid);
return !(control_word & CW_Invalid);
}
return 0;
}
else
switch (c & 7)
{
case COMP_A_lt_B:
f = SW_C0;
break;
case COMP_A_eq_B:
f = SW_C3;
break;
case COMP_A_gt_B:
f = 0;
break;
case COMP_No_Comp:
f = SW_C3 | SW_C2 | SW_C0;
break;
#ifdef PARANOID
default:
EXCEPTION(EX_INTERNAL|0x123);
f = SW_C3 | SW_C2 | SW_C0;
break;
#endif PARANOID
}
setcc(f);
if (c & COMP_Denormal)
{
return denormal_operand();
}
return 0;
}
 
/*---------------------------------------------------------------------------*/
 
void fcom_st()
{
/* fcom st(i) */
compare_st_st(FPU_rm);
}
 
 
void fcompst()
{
/* fcomp st(i) */
if ( !compare_st_st(FPU_rm) )
pop();
}
 
 
void fcompp()
{
/* fcompp */
if (FPU_rm != 1)
{
FPU_illegal();
return;
}
if ( !compare_st_st(1) )
poppop();
}
 
 
void fucom_()
{
/* fucom st(i) */
compare_u_st_st(FPU_rm);
 
}
 
 
void fucomp()
{
/* fucomp st(i) */
if ( !compare_u_st_st(FPU_rm) )
pop();
}
 
 
void fucompp()
{
/* fucompp */
if (FPU_rm == 1)
{
if ( !compare_u_st_st(1) )
poppop();
}
else
FPU_illegal();
}
/reg_u_div.S
0,0 → 1,471
.file "reg_u_div.S"
/*---------------------------------------------------------------------------+
| reg_u_div.S |
| |
| Core division routines |
| |
| Copyright (C) 1992,1993,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
/*---------------------------------------------------------------------------+
| Kernel for the division routines. |
| |
| void reg_u_div(FPU_REG *a, FPU_REG *a, |
| FPU_REG *dest, unsigned int control_word) |
| |
| Does not compute the destination exponent, but does adjust it. |
+---------------------------------------------------------------------------*/
 
#include "exception.h"
#include "fpu_emu.h"
#include "control_w.h"
 
 
/* #define dSIGL(x) (x) */
/* #define dSIGH(x) 4(x) */
 
 
#ifndef NON_REENTRANT_FPU
/*
Local storage on the stack:
Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
Overflow flag: ovfl_flag
*/
#define FPU_accum_3 -4(%ebp)
#define FPU_accum_2 -8(%ebp)
#define FPU_accum_1 -12(%ebp)
#define FPU_accum_0 -16(%ebp)
#define FPU_result_1 -20(%ebp)
#define FPU_result_2 -24(%ebp)
#define FPU_ovfl_flag -28(%ebp)
 
#else
.data
/*
Local storage in a static area:
Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
Overflow flag: ovfl_flag
*/
.align 2,0
FPU_accum_3:
.long 0
FPU_accum_2:
.long 0
FPU_accum_1:
.long 0
FPU_accum_0:
.long 0
FPU_result_1:
.long 0
FPU_result_2:
.long 0
FPU_ovfl_flag:
.byte 0
#endif NON_REENTRANT_FPU
 
 
.text
ENTRY(reg_u_div)
pushl %ebp
movl %esp,%ebp
#ifndef NON_REENTRANT_FPU
subl $28,%esp
#endif NON_REENTRANT_FPU
 
pushl %esi
pushl %edi
pushl %ebx
 
movl PARAM1,%esi /* pointer to num */
movl PARAM2,%ebx /* pointer to denom */
movl PARAM3,%edi /* pointer to answer */
 
#ifdef DENORM_OPERAND
movl EXP(%esi),%eax
cmpl EXP_UNDER,%eax
jg xOp1_not_denorm
 
call SYMBOL_NAME(denormal_operand)
orl %eax,%eax
jnz fpu_Arith_exit
 
xOp1_not_denorm:
movl EXP(%ebx),%eax
cmpl EXP_UNDER,%eax
jg xOp2_not_denorm
 
call SYMBOL_NAME(denormal_operand)
orl %eax,%eax
jnz fpu_Arith_exit
 
xOp2_not_denorm:
#endif DENORM_OPERAND
 
ENTRY(divide_kernel)
#ifdef PARANOID
/* testl $0x80000000, SIGH(%esi) // Dividend */
/* je L_bugged */
testl $0x80000000, SIGH(%ebx) /* Divisor */
je L_bugged
#endif PARANOID
 
/* Check if the divisor can be treated as having just 32 bits */
cmpl $0,SIGL(%ebx)
jnz L_Full_Division /* Can't do a quick divide */
 
/* We should be able to zip through the division here */
movl SIGH(%ebx),%ecx /* The divisor */
movl SIGH(%esi),%edx /* Dividend */
movl SIGL(%esi),%eax /* Dividend */
 
cmpl %ecx,%edx
setaeb FPU_ovfl_flag /* Keep a record */
jb L_no_adjust
 
subl %ecx,%edx /* Prevent the overflow */
 
L_no_adjust:
/* Divide the 64 bit number by the 32 bit denominator */
divl %ecx
movl %eax,FPU_result_2
 
/* Work on the remainder of the first division */
xorl %eax,%eax
divl %ecx
movl %eax,FPU_result_1
 
/* Work on the remainder of the 64 bit division */
xorl %eax,%eax
divl %ecx
 
testb $255,FPU_ovfl_flag /* was the num > denom ? */
je L_no_overflow
 
/* Do the shifting here */
/* increase the exponent */
incl EXP(%edi)
 
/* shift the mantissa right one bit */
stc /* To set the ms bit */
rcrl FPU_result_2
rcrl FPU_result_1
rcrl %eax
 
L_no_overflow:
jmp LRound_precision /* Do the rounding as required */
 
 
/*---------------------------------------------------------------------------+
| Divide: Return arg1/arg2 to arg3. |
| |
| This routine does not use the exponents of arg1 and arg2, but does |
| adjust the exponent of arg3. |
| |
| The maximum returned value is (ignoring exponents) |
| .ffffffff ffffffff |
| ------------------ = 1.ffffffff fffffffe |
| .80000000 00000000 |
| and the minimum is |
| .80000000 00000000 |
| ------------------ = .80000000 00000001 (rounded) |
| .ffffffff ffffffff |
| |
+---------------------------------------------------------------------------*/
 
 
L_Full_Division:
/* Save extended dividend in local register */
movl SIGL(%esi),%eax
movl %eax,FPU_accum_2
movl SIGH(%esi),%eax
movl %eax,FPU_accum_3
xorl %eax,%eax
movl %eax,FPU_accum_1 /* zero the extension */
movl %eax,FPU_accum_0 /* zero the extension */
 
movl SIGL(%esi),%eax /* Get the current num */
movl SIGH(%esi),%edx
 
/*----------------------------------------------------------------------*/
/* Initialization done.
Do the first 32 bits. */
 
movb $0,FPU_ovfl_flag
cmpl SIGH(%ebx),%edx /* Test for imminent overflow */
jb LLess_than_1
ja LGreater_than_1
 
cmpl SIGL(%ebx),%eax
jb LLess_than_1
 
LGreater_than_1:
/* The dividend is greater or equal, would cause overflow */
setaeb FPU_ovfl_flag /* Keep a record */
 
subl SIGL(%ebx),%eax
sbbl SIGH(%ebx),%edx /* Prevent the overflow */
movl %eax,FPU_accum_2
movl %edx,FPU_accum_3
 
LLess_than_1:
/* At this point, we have a dividend < divisor, with a record of
adjustment in FPU_ovfl_flag */
 
/* We will divide by a number which is too large */
movl SIGH(%ebx),%ecx
addl $1,%ecx
jnc LFirst_div_not_1
 
/* here we need to divide by 100000000h,
i.e., no division at all.. */
mov %edx,%eax
jmp LFirst_div_done
 
LFirst_div_not_1:
divl %ecx /* Divide the numerator by the augmented
denom ms dw */
 
LFirst_div_done:
movl %eax,FPU_result_2 /* Put the result in the answer */
 
mull SIGH(%ebx) /* mul by the ms dw of the denom */
 
subl %eax,FPU_accum_2 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_3
 
movl FPU_result_2,%eax /* Get the result back */
mull SIGL(%ebx) /* now mul the ls dw of the denom */
 
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_2
sbbl $0,FPU_accum_3
je LDo_2nd_32_bits /* Must check for non-zero result here */
 
#ifdef PARANOID
jb L_bugged_1
#endif PARANOID
 
/* need to subtract another once of the denom */
incl FPU_result_2 /* Correct the answer */
 
movl SIGL(%ebx),%eax
movl SIGH(%ebx),%edx
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_2
 
#ifdef PARANOID
sbbl $0,FPU_accum_3
jne L_bugged_1 /* Must check for non-zero result here */
#endif PARANOID
 
/*----------------------------------------------------------------------*/
/* Half of the main problem is done, there is just a reduced numerator
to handle now.
Work with the second 32 bits, FPU_accum_0 not used from now on */
LDo_2nd_32_bits:
movl FPU_accum_2,%edx /* get the reduced num */
movl FPU_accum_1,%eax
 
/* need to check for possible subsequent overflow */
cmpl SIGH(%ebx),%edx
jb LDo_2nd_div
ja LPrevent_2nd_overflow
 
cmpl SIGL(%ebx),%eax
jb LDo_2nd_div
 
LPrevent_2nd_overflow:
/* The numerator is greater or equal, would cause overflow */
/* prevent overflow */
subl SIGL(%ebx),%eax
sbbl SIGH(%ebx),%edx
movl %edx,FPU_accum_2
movl %eax,FPU_accum_1
 
incl FPU_result_2 /* Reflect the subtraction in the answer */
 
#ifdef PARANOID
je L_bugged_2 /* Can't bump the result to 1.0 */
#endif PARANOID
 
LDo_2nd_div:
cmpl $0,%ecx /* augmented denom msw */
jnz LSecond_div_not_1
 
/* %ecx == 0, we are dividing by 1.0 */
mov %edx,%eax
jmp LSecond_div_done
 
LSecond_div_not_1:
divl %ecx /* Divide the numerator by the denom ms dw */
 
LSecond_div_done:
movl %eax,FPU_result_1 /* Put the result in the answer */
 
mull SIGH(%ebx) /* mul by the ms dw of the denom */
 
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_2
 
#ifdef PARANOID
jc L_bugged_2
#endif PARANOID
 
movl FPU_result_1,%eax /* Get the result back */
mull SIGL(%ebx) /* now mul the ls dw of the denom */
 
subl %eax,FPU_accum_0 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */
sbbl $0,FPU_accum_2
 
#ifdef PARANOID
jc L_bugged_2
#endif PARANOID
 
jz LDo_3rd_32_bits
 
#ifdef PARANOID
cmpl $1,FPU_accum_2
jne L_bugged_2
#endif PARANOID
 
/* need to subtract another once of the denom */
movl SIGL(%ebx),%eax
movl SIGH(%ebx),%edx
subl %eax,FPU_accum_0 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_1
sbbl $0,FPU_accum_2
 
#ifdef PARANOID
jc L_bugged_2
jne L_bugged_2
#endif PARANOID
 
addl $1,FPU_result_1 /* Correct the answer */
adcl $0,FPU_result_2
 
#ifdef PARANOID
jc L_bugged_2 /* Must check for non-zero result here */
#endif PARANOID
 
/*----------------------------------------------------------------------*/
/* The division is essentially finished here, we just need to perform
tidying operations.
Deal with the 3rd 32 bits */
LDo_3rd_32_bits:
movl FPU_accum_1,%edx /* get the reduced num */
movl FPU_accum_0,%eax
 
/* need to check for possible subsequent overflow */
cmpl SIGH(%ebx),%edx /* denom */
jb LRound_prep
ja LPrevent_3rd_overflow
 
cmpl SIGL(%ebx),%eax /* denom */
jb LRound_prep
 
LPrevent_3rd_overflow:
/* prevent overflow */
subl SIGL(%ebx),%eax
sbbl SIGH(%ebx),%edx
movl %edx,FPU_accum_1
movl %eax,FPU_accum_0
 
addl $1,FPU_result_1 /* Reflect the subtraction in the answer */
adcl $0,FPU_result_2
jne LRound_prep
jnc LRound_prep
 
/* This is a tricky spot, there is an overflow of the answer */
movb $255,FPU_ovfl_flag /* Overflow -> 1.000 */
 
LRound_prep:
/*
* Prepare for rounding.
* To test for rounding, we just need to compare 2*accum with the
* denom.
*/
movl FPU_accum_0,%ecx
movl FPU_accum_1,%edx
movl %ecx,%eax
orl %edx,%eax
jz LRound_ovfl /* The accumulator contains zero. */
 
/* Multiply by 2 */
clc
rcll $1,%ecx
rcll $1,%edx
jc LRound_large /* No need to compare, denom smaller */
 
subl SIGL(%ebx),%ecx
sbbl SIGH(%ebx),%edx
jnc LRound_not_small
 
movl $0x70000000,%eax /* Denom was larger */
jmp LRound_ovfl
 
LRound_not_small:
jnz LRound_large
 
movl $0x80000000,%eax /* Remainder was exactly 1/2 denom */
jmp LRound_ovfl
 
LRound_large:
movl $0xff000000,%eax /* Denom was smaller */
 
LRound_ovfl:
/* We are now ready to deal with rounding, but first we must get
the bits properly aligned */
testb $255,FPU_ovfl_flag /* was the num > denom ? */
je LRound_precision
 
incl EXP(%edi)
 
/* shift the mantissa right one bit */
stc /* Will set the ms bit */
rcrl FPU_result_2
rcrl FPU_result_1
rcrl %eax
 
/* Round the result as required */
LRound_precision:
decl EXP(%edi) /* binary point between 1st & 2nd bits */
 
movl %eax,%edx
movl FPU_result_1,%ebx
movl FPU_result_2,%eax
jmp fpu_reg_round
 
 
#ifdef PARANOID
/* The logic is wrong if we got here */
L_bugged:
pushl EX_INTERNAL|0x202
call EXCEPTION
pop %ebx
jmp L_exit
 
L_bugged_1:
pushl EX_INTERNAL|0x203
call EXCEPTION
pop %ebx
jmp L_exit
 
L_bugged_2:
pushl EX_INTERNAL|0x204
call EXCEPTION
pop %ebx
jmp L_exit
 
L_exit:
popl %ebx
popl %edi
popl %esi
 
leave
ret
#endif PARANOID
/get_address.c
0,0 → 1,423
/*---------------------------------------------------------------------------+
| get_address.c |
| |
| Get the effective address from an FPU instruction. |
| |
| Copyright (C) 1992,1993,1994 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
/*---------------------------------------------------------------------------+
| Note: |
| The file contains code which accesses user memory. |
| Emulator static data may change when user memory is accessed, due to |
| other processes using the emulator while swapping is in progress. |
+---------------------------------------------------------------------------*/
 
 
#include <linux/stddef.h>
#include <linux/head.h>
 
#include <asm/segment.h>
 
#include "fpu_system.h"
#include "exception.h"
#include "fpu_emu.h"
 
 
#define FPU_WRITE_BIT 0x10
 
static int reg_offset[] = {
offsetof(struct info,___eax),
offsetof(struct info,___ecx),
offsetof(struct info,___edx),
offsetof(struct info,___ebx),
offsetof(struct info,___esp),
offsetof(struct info,___ebp),
offsetof(struct info,___esi),
offsetof(struct info,___edi)
};
 
#define REG_(x) (*(long *)(reg_offset[(x)]+(char *) FPU_info))
 
static int reg_offset_vm86[] = {
offsetof(struct info,___cs),
offsetof(struct info,___vm86_ds),
offsetof(struct info,___vm86_es),
offsetof(struct info,___vm86_fs),
offsetof(struct info,___vm86_gs),
offsetof(struct info,___ss),
offsetof(struct info,___vm86_ds)
};
 
#define VM86_REG_(x) (*(unsigned short *) \
(reg_offset_vm86[((unsigned)x)]+(char *) FPU_info))
 
static int reg_offset_pm[] = {
offsetof(struct info,___cs),
offsetof(struct info,___ds),
offsetof(struct info,___es),
offsetof(struct info,___fs),
offsetof(struct info,___gs),
offsetof(struct info,___ss),
offsetof(struct info,___ds)
};
 
#define PM_REG_(x) (*(unsigned short *) \
(reg_offset_pm[((unsigned)x)]+(char *) FPU_info))
 
 
/* Decode the SIB byte. This function assumes mod != 0 */
static int sib(int mod, unsigned long *fpu_eip)
{
unsigned char ss,index,base;
long offset;
 
RE_ENTRANT_CHECK_OFF;
FPU_code_verify_area(1);
base = get_fs_byte((char *) (*fpu_eip)); /* The SIB byte */
RE_ENTRANT_CHECK_ON;
(*fpu_eip)++;
ss = base >> 6;
index = (base >> 3) & 7;
base &= 7;
 
if ((mod == 0) && (base == 5))
offset = 0; /* No base register */
else
offset = REG_(base);
 
if (index == 4)
{
/* No index register */
/* A non-zero ss is illegal */
if ( ss )
EXCEPTION(EX_Invalid);
}
else
{
offset += (REG_(index)) << ss;
}
 
if (mod == 1)
{
/* 8 bit signed displacement */
RE_ENTRANT_CHECK_OFF;
FPU_code_verify_area(1);
offset += (signed char) get_fs_byte((char *) (*fpu_eip));
RE_ENTRANT_CHECK_ON;
(*fpu_eip)++;
}
else if (mod == 2 || base == 5) /* The second condition also has mod==0 */
{
/* 32 bit displacement */
RE_ENTRANT_CHECK_OFF;
FPU_code_verify_area(4);
offset += (signed) get_fs_long((unsigned long *) (*fpu_eip));
RE_ENTRANT_CHECK_ON;
(*fpu_eip) += 4;
}
 
return offset;
}
 
 
static unsigned long vm86_segment(unsigned char segment,
unsigned short *selector)
{
segment--;
#ifdef PARANOID
if ( segment > PREFIX_SS_ )
{
EXCEPTION(EX_INTERNAL|0x130);
math_abort(FPU_info,SIGSEGV);
}
#endif PARANOID
*selector = VM86_REG_(segment);
return (unsigned long)VM86_REG_(segment) << 4;
}
 
 
/* This should work for 16 and 32 bit protected mode. */
static long pm_address(unsigned char FPU_modrm, unsigned char segment,
unsigned short *selector, long offset)
{
struct desc_struct descriptor;
unsigned long base_address, limit, address, seg_top;
 
segment--;
#ifdef PARANOID
if ( segment > PREFIX_SS_ )
{
EXCEPTION(EX_INTERNAL|0x132);
math_abort(FPU_info,SIGSEGV);
}
#endif PARANOID
 
*selector = PM_REG_(segment);
 
descriptor = LDT_DESCRIPTOR(PM_REG_(segment));
base_address = SEG_BASE_ADDR(descriptor);
address = base_address + offset;
limit = base_address
+ (SEG_LIMIT(descriptor)+1) * SEG_GRANULARITY(descriptor) - 1;
if ( limit < base_address ) limit = 0xffffffff;
 
if ( SEG_EXPAND_DOWN(descriptor) )
{
if ( SEG_G_BIT(descriptor) )
seg_top = 0xffffffff;
else
{
seg_top = base_address + (1 << 20);
if ( seg_top < base_address ) seg_top = 0xffffffff;
}
access_limit =
(address <= limit) || (address >= seg_top) ? 0 :
((seg_top-address) >= 255 ? 255 : seg_top-address);
}
else
{
access_limit =
(address > limit) || (address < base_address) ? 0 :
((limit-address) >= 254 ? 255 : limit-address+1);
}
if ( SEG_EXECUTE_ONLY(descriptor) ||
(!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT)) )
{
access_limit = 0;
}
return address;
}
 
 
/*
MOD R/M byte: MOD == 3 has a special use for the FPU
SIB byte used iff R/M = 100b
 
7 6 5 4 3 2 1 0
..... ......... .........
MOD OPCODE(2) R/M
 
 
SIB byte
 
7 6 5 4 3 2 1 0
..... ......... .........
SS INDEX BASE
 
*/
 
void *get_address(unsigned char FPU_modrm, unsigned long *fpu_eip,
struct address *addr,
/* unsigned short *selector, unsigned long *offset, */
fpu_addr_modes addr_modes)
{
unsigned char mod;
unsigned rm = FPU_modrm & 7;
long *cpu_reg_ptr;
int address = 0; /* Initialized just to stop compiler warnings. */
 
/* Memory accessed via the cs selector is write protected
in `non-segmented' 32 bit protected mode. */
if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
&& (addr_modes.override.segment == PREFIX_CS_) )
{
math_abort(FPU_info,SIGSEGV);
}
 
addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */
 
mod = (FPU_modrm >> 6) & 3;
 
if (rm == 4 && mod != 3)
{
address = sib(mod, fpu_eip);
}
else
{
cpu_reg_ptr = & REG_(rm);
switch (mod)
{
case 0:
if (rm == 5)
{
/* Special case: disp32 */
RE_ENTRANT_CHECK_OFF;
FPU_code_verify_area(4);
address = get_fs_long((unsigned long *) (*fpu_eip));
(*fpu_eip) += 4;
RE_ENTRANT_CHECK_ON;
addr->offset = address;
return (void *) address;
}
else
{
address = *cpu_reg_ptr; /* Just return the contents
of the cpu register */
addr->offset = address;
return (void *) address;
}
case 1:
/* 8 bit signed displacement */
RE_ENTRANT_CHECK_OFF;
FPU_code_verify_area(1);
address = (signed char) get_fs_byte((char *) (*fpu_eip));
RE_ENTRANT_CHECK_ON;
(*fpu_eip)++;
break;
case 2:
/* 32 bit displacement */
RE_ENTRANT_CHECK_OFF;
FPU_code_verify_area(4);
address = (signed) get_fs_long((unsigned long *) (*fpu_eip));
(*fpu_eip) += 4;
RE_ENTRANT_CHECK_ON;
break;
case 3:
/* Not legal for the FPU */
EXCEPTION(EX_Invalid);
}
address += *cpu_reg_ptr;
}
 
addr->offset = address;
 
switch ( addr_modes.default_mode )
{
case 0:
break;
case VM86:
address += vm86_segment(addr_modes.override.segment,
(unsigned short *)&(addr->selector));
break;
case PM16:
case SEG32:
address = pm_address(FPU_modrm, addr_modes.override.segment,
(unsigned short *)&(addr->selector), address);
break;
default:
EXCEPTION(EX_INTERNAL|0x133);
}
 
return (void *)address;
}
 
 
void *get_address_16(unsigned char FPU_modrm, unsigned long *fpu_eip,
struct address *addr,
/* unsigned short *selector, unsigned long *offset, */
fpu_addr_modes addr_modes)
{
unsigned char mod;
unsigned rm = FPU_modrm & 7;
int address = 0; /* Default used for mod == 0 */
 
/* Memory accessed via the cs selector is write protected
in `non-segmented' 32 bit protected mode. */
if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
&& (addr_modes.override.segment == PREFIX_CS_) )
{
math_abort(FPU_info,SIGSEGV);
}
 
addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */
 
mod = (FPU_modrm >> 6) & 3;
 
switch (mod)
{
case 0:
if (rm == 6)
{
/* Special case: disp16 */
RE_ENTRANT_CHECK_OFF;
FPU_code_verify_area(2);
address = (unsigned short)get_fs_word((unsigned short *) (*fpu_eip));
(*fpu_eip) += 2;
RE_ENTRANT_CHECK_ON;
goto add_segment;
}
break;
case 1:
/* 8 bit signed displacement */
RE_ENTRANT_CHECK_OFF;
FPU_code_verify_area(1);
address = (signed char) get_fs_byte((signed char *) (*fpu_eip));
RE_ENTRANT_CHECK_ON;
(*fpu_eip)++;
break;
case 2:
/* 16 bit displacement */
RE_ENTRANT_CHECK_OFF;
FPU_code_verify_area(2);
address = (unsigned) get_fs_word((unsigned short *) (*fpu_eip));
(*fpu_eip) += 2;
RE_ENTRANT_CHECK_ON;
break;
case 3:
/* Not legal for the FPU */
EXCEPTION(EX_Invalid);
break;
}
switch ( rm )
{
case 0:
address += FPU_info->___ebx + FPU_info->___esi;
break;
case 1:
address += FPU_info->___ebx + FPU_info->___edi;
break;
case 2:
address += FPU_info->___ebp + FPU_info->___esi;
if ( addr_modes.override.segment == PREFIX_DEFAULT )
addr_modes.override.segment = PREFIX_SS_;
break;
case 3:
address += FPU_info->___ebp + FPU_info->___edi;
if ( addr_modes.override.segment == PREFIX_DEFAULT )
addr_modes.override.segment = PREFIX_SS_;
break;
case 4:
address += FPU_info->___esi;
break;
case 5:
address += FPU_info->___edi;
break;
case 6:
address += FPU_info->___ebp;
if ( addr_modes.override.segment == PREFIX_DEFAULT )
addr_modes.override.segment = PREFIX_SS_;
break;
case 7:
address += FPU_info->___ebx;
break;
}
 
add_segment:
address &= 0xffff;
 
addr->offset = address;
 
switch ( addr_modes.default_mode )
{
case 0:
break;
case VM86:
address += vm86_segment(addr_modes.override.segment,
(unsigned short *)&(addr->selector));
break;
case PM16:
case SEG32:
address = pm_address(FPU_modrm, addr_modes.override.segment,
(unsigned short *)&(addr->selector), address);
break;
default:
EXCEPTION(EX_INTERNAL|0x131);
}
 
return (void *)address ;
}
/reg_ld_str.c
0,0 → 1,1452
/*---------------------------------------------------------------------------+
| reg_ld_str.c |
| |
| All of the functions which transfer data between user memory and FPU_REGs.|
| |
| Copyright (C) 1992,1993,1994,1996 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
| E-mail billm@jacobi.maths.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
/*---------------------------------------------------------------------------+
| Note: |
| The file contains code which accesses user memory. |
| Emulator static data may change when user memory is accessed, due to |
| other processes using the emulator while swapping is in progress. |
+---------------------------------------------------------------------------*/
 
#include <asm/segment.h>
 
#include "fpu_system.h"
#include "exception.h"
#include "reg_constant.h"
#include "fpu_emu.h"
#include "control_w.h"
#include "status_w.h"
 
 
#define EXTENDED_Ebias 0x3fff
#define EXTENDED_Emin (-0x3ffe) /* smallest valid exponent */
 
#define DOUBLE_Emax 1023 /* largest valid exponent */
#define DOUBLE_Ebias 1023
#define DOUBLE_Emin (-1022) /* smallest valid exponent */
 
#define SINGLE_Emax 127 /* largest valid exponent */
#define SINGLE_Ebias 127
#define SINGLE_Emin (-126) /* smallest valid exponent */
 
static void write_to_extended(FPU_REG *rp, char *d);
 
 
/* Get a long double from user memory */
int reg_load_extended(long double *s, FPU_REG *loaded_data)
{
unsigned long sigl, sigh, exp;
 
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_READ, s, 10);
sigl = get_fs_long((unsigned long *) s);
sigh = get_fs_long(1 + (unsigned long *) s);
exp = get_fs_word(4 + (unsigned short *) s);
RE_ENTRANT_CHECK_ON;
 
loaded_data->tag = TW_Valid; /* Default */
loaded_data->sigl = sigl;
loaded_data->sigh = sigh;
if (exp & 0x8000)
loaded_data->sign = SIGN_NEG;
else
loaded_data->sign = SIGN_POS;
exp &= 0x7fff;
loaded_data->exp = exp - EXTENDED_Ebias + EXP_BIAS;
 
if ( exp == 0 )
{
if ( !(sigh | sigl) )
{
loaded_data->tag = TW_Zero;
return 0;
}
/* The number is a de-normal or pseudodenormal. */
if (sigh & 0x80000000)
{
/* Is a pseudodenormal. */
/* Convert it for internal use. */
/* This is non-80486 behaviour because the number
loses its 'denormal' identity. */
loaded_data->exp++;
return 1;
}
else
{
/* Is a denormal. */
/* Convert it for internal use. */
loaded_data->exp++;
normalize_nuo(loaded_data);
return 0;
}
}
else if ( exp == 0x7fff )
{
if ( !((sigh ^ 0x80000000) | sigl) )
{
/* Matches the bit pattern for Infinity. */
loaded_data->exp = EXP_Infinity;
loaded_data->tag = TW_Infinity;
return 0;
}
 
loaded_data->exp = EXP_NaN;
loaded_data->tag = TW_NaN;
if ( !(sigh & 0x80000000) )
{
/* NaNs have the ms bit set to 1. */
/* This is therefore an Unsupported NaN data type. */
/* This is non 80486 behaviour */
/* This should generate an Invalid Operand exception
later, so we convert it to a SNaN */
loaded_data->sigh = 0x80000000;
loaded_data->sigl = 0x00000001;
loaded_data->sign = SIGN_NEG;
return 1;
}
return 0;
}
 
if ( !(sigh & 0x80000000) )
{
/* Unsupported data type. */
/* Valid numbers have the ms bit set to 1. */
/* Unnormal. */
/* Convert it for internal use. */
/* This is non-80486 behaviour */
/* This should generate an Invalid Operand exception
later, so we convert it to a SNaN */
loaded_data->sigh = 0x80000000;
loaded_data->sigl = 0x00000001;
loaded_data->sign = SIGN_NEG;
loaded_data->exp = EXP_NaN;
loaded_data->tag = TW_NaN;
return 1;
}
return 0;
}
 
 
/* Get a double from user memory */
int reg_load_double(double *dfloat, FPU_REG *loaded_data)
{
int exp;
unsigned m64, l64;
 
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_READ, dfloat, 8);
m64 = get_fs_long(1 + (unsigned long *) dfloat);
l64 = get_fs_long((unsigned long *) dfloat);
RE_ENTRANT_CHECK_ON;
 
if (m64 & 0x80000000)
loaded_data->sign = SIGN_NEG;
else
loaded_data->sign = SIGN_POS;
exp = ((m64 & 0x7ff00000) >> 20) - DOUBLE_Ebias;
m64 &= 0xfffff;
if (exp > DOUBLE_Emax)
{
/* Infinity or NaN */
if ((m64 == 0) && (l64 == 0))
{
/* +- infinity */
loaded_data->sigh = 0x80000000;
loaded_data->sigl = 0x00000000;
loaded_data->exp = EXP_Infinity;
loaded_data->tag = TW_Infinity;
return 0;
}
else
{
/* Must be a signaling or quiet NaN */
loaded_data->exp = EXP_NaN;
loaded_data->tag = TW_NaN;
loaded_data->sigh = (m64 << 11) | 0x80000000;
loaded_data->sigh |= l64 >> 21;
loaded_data->sigl = l64 << 11;
return 0; /* The calling function must look for NaNs */
}
}
else if ( exp < DOUBLE_Emin )
{
/* Zero or de-normal */
if ((m64 == 0) && (l64 == 0))
{
/* Zero */
int c = loaded_data->sign;
reg_move(&CONST_Z, loaded_data);
loaded_data->sign = c;
return 0;
}
else
{
/* De-normal */
loaded_data->exp = DOUBLE_Emin + EXP_BIAS;
loaded_data->tag = TW_Valid;
loaded_data->sigh = m64 << 11;
loaded_data->sigh |= l64 >> 21;
loaded_data->sigl = l64 << 11;
normalize_nuo(loaded_data);
return denormal_operand();
}
}
else
{
loaded_data->exp = exp + EXP_BIAS;
loaded_data->tag = TW_Valid;
loaded_data->sigh = (m64 << 11) | 0x80000000;
loaded_data->sigh |= l64 >> 21;
loaded_data->sigl = l64 << 11;
 
return 0;
}
}
 
 
/* Get a float from user memory */
int reg_load_single(float *single, FPU_REG *loaded_data)
{
unsigned m32;
int exp;
 
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_READ, single, 4);
m32 = get_fs_long((unsigned long *) single);
RE_ENTRANT_CHECK_ON;
 
if (m32 & 0x80000000)
loaded_data->sign = SIGN_NEG;
else
loaded_data->sign = SIGN_POS;
if (!(m32 & 0x7fffffff))
{
/* Zero */
int c = loaded_data->sign;
reg_move(&CONST_Z, loaded_data);
loaded_data->sign = c;
return 0;
}
exp = ((m32 & 0x7f800000) >> 23) - SINGLE_Ebias;
m32 = (m32 & 0x7fffff) << 8;
if ( exp < SINGLE_Emin )
{
/* De-normals */
loaded_data->exp = SINGLE_Emin + EXP_BIAS;
loaded_data->tag = TW_Valid;
loaded_data->sigh = m32;
loaded_data->sigl = 0;
normalize_nuo(loaded_data);
return denormal_operand();
}
else if ( exp > SINGLE_Emax )
{
/* Infinity or NaN */
if ( m32 == 0 )
{
/* +- infinity */
loaded_data->sigh = 0x80000000;
loaded_data->sigl = 0x00000000;
loaded_data->exp = EXP_Infinity;
loaded_data->tag = TW_Infinity;
return 0;
}
else
{
/* Must be a signaling or quiet NaN */
loaded_data->exp = EXP_NaN;
loaded_data->tag = TW_NaN;
loaded_data->sigh = m32 | 0x80000000;
loaded_data->sigl = 0;
return 0; /* The calling function must look for NaNs */
}
}
else
{
loaded_data->exp = exp + EXP_BIAS;
loaded_data->sigh = m32 | 0x80000000;
loaded_data->sigl = 0;
loaded_data->tag = TW_Valid;
return 0;
}
}
 
 
/* Get a long long from user memory */
void reg_load_int64(long long *_s, FPU_REG *loaded_data)
{
int e;
long long s;
 
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_READ, _s, 8);
((unsigned long *)&s)[0] = get_fs_long((unsigned long *) _s);
((unsigned long *)&s)[1] = get_fs_long(1 + (unsigned long *) _s);
RE_ENTRANT_CHECK_ON;
 
if (s == 0)
{ reg_move(&CONST_Z, loaded_data); return; }
 
if (s > 0)
loaded_data->sign = SIGN_POS;
else
{
s = -s;
loaded_data->sign = SIGN_NEG;
}
 
e = EXP_BIAS + 63;
significand(loaded_data) = s;
loaded_data->exp = e;
loaded_data->tag = TW_Valid;
normalize_nuo(loaded_data);
}
 
 
/* Get a long from user memory */
void reg_load_int32(long *_s, FPU_REG *loaded_data)
{
long s;
int e;
 
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_READ, _s, 4);
s = (long)get_fs_long((unsigned long *) _s);
RE_ENTRANT_CHECK_ON;
 
if (s == 0)
{ reg_move(&CONST_Z, loaded_data); return; }
 
if (s > 0)
loaded_data->sign = SIGN_POS;
else
{
s = -s;
loaded_data->sign = SIGN_NEG;
}
 
e = EXP_BIAS + 31;
loaded_data->sigh = s;
loaded_data->sigl = 0;
loaded_data->exp = e;
loaded_data->tag = TW_Valid;
normalize_nuo(loaded_data);
}
 
 
/* Get a short from user memory */
void reg_load_int16(short *_s, FPU_REG *loaded_data)
{
int s, e;
 
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_READ, _s, 2);
/* Cast as short to get the sign extended. */
s = (short)get_fs_word((unsigned short *) _s);
RE_ENTRANT_CHECK_ON;
 
if (s == 0)
{ reg_move(&CONST_Z, loaded_data); return; }
 
if (s > 0)
loaded_data->sign = SIGN_POS;
else
{
s = -s;
loaded_data->sign = SIGN_NEG;
}
 
e = EXP_BIAS + 15;
loaded_data->sigh = s << 16;
 
loaded_data->sigl = 0;
loaded_data->exp = e;
loaded_data->tag = TW_Valid;
normalize_nuo(loaded_data);
}
 
 
/* Get a packed bcd array from user memory */
void reg_load_bcd(char *s, FPU_REG *loaded_data)
{
int pos;
unsigned char bcd;
long long l=0;
 
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_READ, s, 10);
RE_ENTRANT_CHECK_ON;
for ( pos = 8; pos >= 0; pos--)
{
l *= 10;
RE_ENTRANT_CHECK_OFF;
bcd = (unsigned char)get_fs_byte((unsigned char *) s+pos);
RE_ENTRANT_CHECK_ON;
l += bcd >> 4;
l *= 10;
l += bcd & 0x0f;
}
RE_ENTRANT_CHECK_OFF;
loaded_data->sign =
((unsigned char)get_fs_byte((unsigned char *) s+9)) & 0x80 ?
SIGN_NEG : SIGN_POS;
RE_ENTRANT_CHECK_ON;
 
if (l == 0)
{
char sign = loaded_data->sign;
reg_move(&CONST_Z, loaded_data);
loaded_data->sign = sign;
}
else
{
significand(loaded_data) = l;
loaded_data->exp = EXP_BIAS + 63;
loaded_data->tag = TW_Valid;
normalize_nuo(loaded_data);
}
}
 
/*===========================================================================*/
 
/* Put a long double into user memory */
int reg_store_extended(long double *d, FPU_REG *st0_ptr)
{
/*
The only exception raised by an attempt to store to an
extended format is the Invalid Stack exception, i.e.
attempting to store from an empty register.
*/
 
if ( st0_ptr->tag != TW_Empty )
{
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_WRITE, d, 10);
RE_ENTRANT_CHECK_ON;
write_to_extended(st0_ptr, (char *) d);
return 1;
}
 
/* Empty register (stack underflow) */
EXCEPTION(EX_StackUnder);
if ( control_word & CW_Invalid )
{
/* The masked response */
/* Put out the QNaN indefinite */
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_WRITE,d,10);
put_fs_long(0, (unsigned long *) d);
put_fs_long(0xc0000000, 1 + (unsigned long *) d);
put_fs_word(0xffff, 4 + (short *) d);
RE_ENTRANT_CHECK_ON;
return 1;
}
else
return 0;
 
}
 
 
/* Put a double into user memory */
int reg_store_double(double *dfloat, FPU_REG *st0_ptr)
{
unsigned long l[2];
unsigned long increment = 0; /* avoid gcc warnings */
char st0_tag = st0_ptr->tag;
 
if (st0_tag == TW_Valid)
{
int precision_loss;
int exp;
FPU_REG tmp;
 
reg_move(st0_ptr, &tmp);
exp = tmp.exp - EXP_BIAS;
 
if ( exp < DOUBLE_Emin ) /* It may be a denormal */
{
/* A denormal will always underflow. */
#ifndef PECULIAR_486
/* An 80486 is supposed to be able to generate
a denormal exception here, but... */
if ( st0_ptr->exp <= EXP_UNDER )
{
/* Underflow has priority. */
if ( control_word & CW_Underflow )
denormal_operand();
}
#endif PECULIAR_486
 
tmp.exp += -DOUBLE_Emin + 52; /* largest exp to be 51 */
 
if ( (precision_loss = round_to_int(&tmp)) )
{
#ifdef PECULIAR_486
/* Did it round to a non-denormal ? */
/* This behaviour might be regarded as peculiar, it appears
that the 80486 rounds to the dest precision, then
converts to decide underflow. */
if ( !((tmp.sigh == 0x00100000) && (tmp.sigl == 0) &&
(st0_ptr->sigl & 0x000007ff)) )
#endif PECULIAR_486
{
EXCEPTION(EX_Underflow);
/* This is a special case: see sec 16.2.5.1 of
the 80486 book */
if ( !(control_word & CW_Underflow) )
return 0;
}
EXCEPTION(precision_loss);
if ( !(control_word & CW_Precision) )
return 0;
}
l[0] = tmp.sigl;
l[1] = tmp.sigh;
}
else
{
if ( tmp.sigl & 0x000007ff )
{
precision_loss = 1;
switch (control_word & CW_RC)
{
case RC_RND:
/* Rounding can get a little messy.. */
increment = ((tmp.sigl & 0x7ff) > 0x400) | /* nearest */
((tmp.sigl & 0xc00) == 0xc00); /* odd -> even */
break;
case RC_DOWN: /* towards -infinity */
increment = (tmp.sign == SIGN_POS) ? 0 : tmp.sigl & 0x7ff;
break;
case RC_UP: /* towards +infinity */
increment = (tmp.sign == SIGN_POS) ? tmp.sigl & 0x7ff : 0;
break;
case RC_CHOP:
increment = 0;
break;
}
/* Truncate the mantissa */
tmp.sigl &= 0xfffff800;
if ( increment )
{
if ( tmp.sigl >= 0xfffff800 )
{
/* the sigl part overflows */
if ( tmp.sigh == 0xffffffff )
{
/* The sigh part overflows */
tmp.sigh = 0x80000000;
exp++;
if (exp >= EXP_OVER)
goto overflow;
}
else
{
tmp.sigh ++;
}
tmp.sigl = 0x00000000;
}
else
{
/* We only need to increment sigl */
tmp.sigl += 0x00000800;
}
}
}
else
precision_loss = 0;
l[0] = (tmp.sigl >> 11) | (tmp.sigh << 21);
l[1] = ((tmp.sigh >> 11) & 0xfffff);
 
if ( exp > DOUBLE_Emax )
{
overflow:
EXCEPTION(EX_Overflow);
if ( !(control_word & CW_Overflow) )
return 0;
set_precision_flag_up();
if ( !(control_word & CW_Precision) )
return 0;
 
/* This is a special case: see sec 16.2.5.1 of the 80486 book */
/* Overflow to infinity */
l[0] = 0x00000000; /* Set to */
l[1] = 0x7ff00000; /* + INF */
}
else
{
if ( precision_loss )
{
if ( increment )
set_precision_flag_up();
else
set_precision_flag_down();
}
/* Add the exponent */
l[1] |= (((exp+DOUBLE_Ebias) & 0x7ff) << 20);
}
}
}
else if (st0_tag == TW_Zero)
{
/* Number is zero */
l[0] = 0;
l[1] = 0;
}
else if (st0_tag == TW_Infinity)
{
l[0] = 0;
l[1] = 0x7ff00000;
}
else if (st0_tag == TW_NaN)
{
/* See if we can get a valid NaN from the FPU_REG */
l[0] = (st0_ptr->sigl >> 11) | (st0_ptr->sigh << 21);
l[1] = ((st0_ptr->sigh >> 11) & 0xfffff);
if ( !(st0_ptr->sigh & 0x40000000) )
{
/* It is a signalling NaN */
EXCEPTION(EX_Invalid);
if ( !(control_word & CW_Invalid) )
return 0;
l[1] |= (0x40000000 >> 11);
}
l[1] |= 0x7ff00000;
}
else if ( st0_tag == TW_Empty )
{
/* Empty register (stack underflow) */
EXCEPTION(EX_StackUnder);
if ( control_word & CW_Invalid )
{
/* The masked response */
/* Put out the QNaN indefinite */
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_WRITE,(void *)dfloat,8);
put_fs_long(0, (unsigned long *) dfloat);
put_fs_long(0xfff80000, 1 + (unsigned long *) dfloat);
RE_ENTRANT_CHECK_ON;
return 1;
}
else
return 0;
}
if ( st0_ptr->sign )
l[1] |= 0x80000000;
 
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_WRITE,(void *)dfloat,8);
put_fs_long(l[0], (unsigned long *)dfloat);
put_fs_long(l[1], 1 + (unsigned long *)dfloat);
RE_ENTRANT_CHECK_ON;
 
return 1;
}
 
 
/* Put a float into user memory */
int reg_store_single(float *single, FPU_REG *st0_ptr)
{
long templ;
unsigned long increment = 0; /* avoid gcc warnings */
char st0_tag = st0_ptr->tag;
 
if (st0_tag == TW_Valid)
{
int precision_loss;
int exp;
FPU_REG tmp;
 
reg_move(st0_ptr, &tmp);
exp = tmp.exp - EXP_BIAS;
 
if ( exp < SINGLE_Emin )
{
/* A denormal will always underflow. */
#ifndef PECULIAR_486
/* An 80486 is supposed to be able to generate
a denormal exception here, but... */
if ( st0_ptr->exp <= EXP_UNDER )
{
/* Underflow has priority. */
if ( control_word & CW_Underflow )
denormal_operand();
}
#endif PECULIAR_486
 
tmp.exp += -SINGLE_Emin + 23; /* largest exp to be 22 */
 
if ( (precision_loss = round_to_int(&tmp)) )
{
#ifdef PECULIAR_486
/* Did it round to a non-denormal ? */
/* This behaviour might be regarded as peculiar, it appears
that the 80486 rounds to the dest precision, then
converts to decide underflow. */
if ( !((tmp.sigl == 0x00800000) &&
((st0_ptr->sigh & 0x000000ff) || st0_ptr->sigl)) )
#endif PECULIAR_486
{
EXCEPTION(EX_Underflow);
/* This is a special case: see sec 16.2.5.1 of
the 80486 book */
if ( !(control_word & EX_Underflow) )
return 0;
}
EXCEPTION(precision_loss);
if ( !(control_word & EX_Precision) )
return 0;
}
templ = tmp.sigl;
}
else
{
if ( tmp.sigl | (tmp.sigh & 0x000000ff) )
{
unsigned long sigh = tmp.sigh;
unsigned long sigl = tmp.sigl;
precision_loss = 1;
switch (control_word & CW_RC)
{
case RC_RND:
increment = ((sigh & 0xff) > 0x80) /* more than half */
|| (((sigh & 0xff) == 0x80) && sigl) /* more than half */
|| ((sigh & 0x180) == 0x180); /* round to even */
break;
case RC_DOWN: /* towards -infinity */
increment = (tmp.sign == SIGN_POS)
? 0 : (sigl | (sigh & 0xff));
break;
case RC_UP: /* towards +infinity */
increment = (tmp.sign == SIGN_POS)
? (sigl | (sigh & 0xff)) : 0;
break;
case RC_CHOP:
increment = 0;
break;
}
/* Truncate part of the mantissa */
tmp.sigl = 0;
if (increment)
{
if ( sigh >= 0xffffff00 )
{
/* The sigh part overflows */
tmp.sigh = 0x80000000;
exp++;
if ( exp >= EXP_OVER )
goto overflow;
}
else
{
tmp.sigh &= 0xffffff00;
tmp.sigh += 0x100;
}
}
else
{
tmp.sigh &= 0xffffff00; /* Finish the truncation */
}
}
else
precision_loss = 0;
 
templ = (tmp.sigh >> 8) & 0x007fffff;
 
if ( exp > SINGLE_Emax )
{
overflow:
EXCEPTION(EX_Overflow);
if ( !(control_word & CW_Overflow) )
return 0;
set_precision_flag_up();
if ( !(control_word & CW_Precision) )
return 0;
 
/* This is a special case: see sec 16.2.5.1 of the 80486 book. */
/* Masked response is overflow to infinity. */
templ = 0x7f800000;
}
else
{
if ( precision_loss )
{
if ( increment )
set_precision_flag_up();
else
set_precision_flag_down();
}
/* Add the exponent */
templ |= ((exp+SINGLE_Ebias) & 0xff) << 23;
}
}
}
else if (st0_tag == TW_Zero)
{
templ = 0;
}
else if (st0_tag == TW_Infinity)
{
templ = 0x7f800000;
}
else if (st0_tag == TW_NaN)
{
/* See if we can get a valid NaN from the FPU_REG */
templ = st0_ptr->sigh >> 8;
if ( !(st0_ptr->sigh & 0x40000000) )
{
/* It is a signalling NaN */
EXCEPTION(EX_Invalid);
if ( !(control_word & CW_Invalid) )
return 0;
templ |= (0x40000000 >> 8);
}
templ |= 0x7f800000;
}
else if ( st0_tag == TW_Empty )
{
/* Empty register (stack underflow) */
EXCEPTION(EX_StackUnder);
if ( control_word & EX_Invalid )
{
/* The masked response */
/* Put out the QNaN indefinite */
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_WRITE,(void *)single,4);
put_fs_long(0xffc00000, (unsigned long *) single);
RE_ENTRANT_CHECK_ON;
return 1;
}
else
return 0;
}
#ifdef PARANOID
else
{
EXCEPTION(EX_INTERNAL|0x163);
return 0;
}
#endif
if (st0_ptr->sign)
templ |= 0x80000000;
 
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_WRITE,(void *)single,4);
put_fs_long(templ,(unsigned long *) single);
RE_ENTRANT_CHECK_ON;
 
return 1;
}
 
 
/* Put a long long into user memory */
int reg_store_int64(long long *d, FPU_REG *st0_ptr)
{
FPU_REG t;
long long tll;
int precision_loss;
char st0_tag = st0_ptr->tag;
 
if ( st0_tag == TW_Empty )
{
/* Empty register (stack underflow) */
EXCEPTION(EX_StackUnder);
goto invalid_operand;
}
else if ( (st0_tag == TW_Infinity) ||
(st0_tag == TW_NaN) )
{
EXCEPTION(EX_Invalid);
goto invalid_operand;
}
 
reg_move(st0_ptr, &t);
precision_loss = round_to_int(&t);
((long *)&tll)[0] = t.sigl;
((long *)&tll)[1] = t.sigh;
if ( (precision_loss == 1) ||
((t.sigh & 0x80000000) &&
!((t.sigh == 0x80000000) && (t.sigl == 0) &&
(t.sign == SIGN_NEG))) )
{
EXCEPTION(EX_Invalid);
/* This is a special case: see sec 16.2.5.1 of the 80486 book */
invalid_operand:
if ( control_word & EX_Invalid )
{
/* Produce something like QNaN "indefinite" */
tll = 0x8000000000000000LL;
}
else
return 0;
}
else
{
if ( precision_loss )
set_precision_flag(precision_loss);
if ( t.sign )
tll = - tll;
}
 
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_WRITE,(void *)d,8);
put_fs_long(((long *)&tll)[0],(unsigned long *) d);
put_fs_long(((long *)&tll)[1],1 + (unsigned long *) d);
RE_ENTRANT_CHECK_ON;
 
return 1;
}
 
 
/* Put a long into user memory */
int reg_store_int32(long *d, FPU_REG *st0_ptr)
{
FPU_REG t;
int precision_loss;
char st0_tag = st0_ptr->tag;
 
if ( st0_tag == TW_Empty )
{
/* Empty register (stack underflow) */
EXCEPTION(EX_StackUnder);
goto invalid_operand;
}
else if ( (st0_tag == TW_Infinity) ||
(st0_tag == TW_NaN) )
{
EXCEPTION(EX_Invalid);
goto invalid_operand;
}
 
reg_move(st0_ptr, &t);
precision_loss = round_to_int(&t);
if (t.sigh ||
((t.sigl & 0x80000000) &&
!((t.sigl == 0x80000000) && (t.sign == SIGN_NEG))) )
{
EXCEPTION(EX_Invalid);
/* This is a special case: see sec 16.2.5.1 of the 80486 book */
invalid_operand:
if ( control_word & EX_Invalid )
{
/* Produce something like QNaN "indefinite" */
t.sigl = 0x80000000;
}
else
return 0;
}
else
{
if ( precision_loss )
set_precision_flag(precision_loss);
if ( t.sign )
t.sigl = -(long)t.sigl;
}
 
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_WRITE,d,4);
put_fs_long(t.sigl, (unsigned long *) d);
RE_ENTRANT_CHECK_ON;
 
return 1;
}
 
 
/* Put a short into user memory */
int reg_store_int16(short *d, FPU_REG *st0_ptr)
{
FPU_REG t;
int precision_loss;
char st0_tag = st0_ptr->tag;
 
if ( st0_tag == TW_Empty )
{
/* Empty register (stack underflow) */
EXCEPTION(EX_StackUnder);
goto invalid_operand;
}
else if ( (st0_tag == TW_Infinity) ||
(st0_tag == TW_NaN) )
{
EXCEPTION(EX_Invalid);
goto invalid_operand;
}
 
reg_move(st0_ptr, &t);
precision_loss = round_to_int(&t);
if (t.sigh ||
((t.sigl & 0xffff8000) &&
!((t.sigl == 0x8000) && (t.sign == SIGN_NEG))) )
{
EXCEPTION(EX_Invalid);
/* This is a special case: see sec 16.2.5.1 of the 80486 book */
invalid_operand:
if ( control_word & EX_Invalid )
{
/* Produce something like QNaN "indefinite" */
t.sigl = 0x8000;
}
else
return 0;
}
else
{
if ( precision_loss )
set_precision_flag(precision_loss);
if ( t.sign )
t.sigl = -t.sigl;
}
 
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_WRITE,d,2);
put_fs_word((short)t.sigl,(short *) d);
RE_ENTRANT_CHECK_ON;
 
return 1;
}
 
 
/* Put a packed bcd array into user memory */
int reg_store_bcd(char *d, FPU_REG *st0_ptr)
{
FPU_REG t;
unsigned long long ll;
unsigned char b;
int i, precision_loss;
unsigned char sign = (st0_ptr->sign == SIGN_NEG) ? 0x80 : 0;
char st0_tag = st0_ptr->tag;
 
if ( st0_tag == TW_Empty )
{
/* Empty register (stack underflow) */
EXCEPTION(EX_StackUnder);
goto invalid_operand;
}
 
reg_move(st0_ptr, &t);
precision_loss = round_to_int(&t);
ll = significand(&t);
 
/* Check for overflow, by comparing with 999999999999999999 decimal. */
if ( (t.sigh > 0x0de0b6b3) ||
((t.sigh == 0x0de0b6b3) && (t.sigl > 0xa763ffff)) )
{
EXCEPTION(EX_Invalid);
/* This is a special case: see sec 16.2.5.1 of the 80486 book */
invalid_operand:
if ( control_word & CW_Invalid )
{
/* Produce the QNaN "indefinite" */
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_WRITE,d,10);
for ( i = 0; i < 7; i++)
put_fs_byte(0, (unsigned char *) d+i); /* These bytes "undefined" */
put_fs_byte(0xc0, (unsigned char *) d+7); /* This byte "undefined" */
put_fs_byte(0xff, (unsigned char *) d+8);
put_fs_byte(0xff, (unsigned char *) d+9);
RE_ENTRANT_CHECK_ON;
return 1;
}
else
return 0;
}
else if ( precision_loss )
{
/* Precision loss doesn't stop the data transfer */
set_precision_flag(precision_loss);
}
 
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_WRITE,d,10);
RE_ENTRANT_CHECK_ON;
for ( i = 0; i < 9; i++)
{
b = div_small(&ll, 10);
b |= (div_small(&ll, 10)) << 4;
RE_ENTRANT_CHECK_OFF;
put_fs_byte(b,(unsigned char *) d+i);
RE_ENTRANT_CHECK_ON;
}
RE_ENTRANT_CHECK_OFF;
put_fs_byte(sign,(unsigned char *) d+9);
RE_ENTRANT_CHECK_ON;
 
return 1;
}
 
/*===========================================================================*/
 
/* r gets mangled such that sig is int, sign:
it is NOT normalized */
/* The return value (in eax) is zero if the result is exact,
if bits are changed due to rounding, truncation, etc, then
a non-zero value is returned */
/* Overflow is signalled by a non-zero return value (in eax).
In the case of overflow, the returned significand always has the
largest possible value */
int round_to_int(FPU_REG *r)
{
char very_big;
unsigned eax;
 
if (r->tag == TW_Zero)
{
/* Make sure that zero is returned */
significand(r) = 0;
return 0; /* o.k. */
}
if (r->exp > EXP_BIAS + 63)
{
r->sigl = r->sigh = ~0; /* The largest representable number */
return 1; /* overflow */
}
 
eax = shrxs(&r->sigl, EXP_BIAS + 63 - r->exp);
very_big = !(~(r->sigh) | ~(r->sigl)); /* test for 0xfff...fff */
#define half_or_more (eax & 0x80000000)
#define frac_part (eax)
#define more_than_half ((eax & 0x80000001) == 0x80000001)
switch (control_word & CW_RC)
{
case RC_RND:
if ( more_than_half /* nearest */
|| (half_or_more && (r->sigl & 1)) ) /* odd -> even */
{
if ( very_big ) return 1; /* overflow */
significand(r) ++;
return PRECISION_LOST_UP;
}
break;
case RC_DOWN:
if (frac_part && r->sign)
{
if ( very_big ) return 1; /* overflow */
significand(r) ++;
return PRECISION_LOST_UP;
}
break;
case RC_UP:
if (frac_part && !r->sign)
{
if ( very_big ) return 1; /* overflow */
significand(r) ++;
return PRECISION_LOST_UP;
}
break;
case RC_CHOP:
break;
}
 
return eax ? PRECISION_LOST_DOWN : 0;
 
}
 
/*===========================================================================*/
 
char *fldenv(fpu_addr_modes addr_modes, char *s)
{
unsigned short tag_word = 0;
unsigned char tag;
int i;
 
if ( (addr_modes.default_mode == VM86) ||
((addr_modes.default_mode == PM16)
^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) )
{
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_READ, s, 0x0e);
control_word = get_fs_word((unsigned short *) s);
partial_status = get_fs_word((unsigned short *) (s+2));
tag_word = get_fs_word((unsigned short *) (s+4));
instruction_address.offset = get_fs_word((unsigned short *) (s+6));
instruction_address.selector = get_fs_word((unsigned short *) (s+8));
operand_address.offset = get_fs_word((unsigned short *) (s+0x0a));
operand_address.selector = get_fs_word((unsigned short *) (s+0x0c));
RE_ENTRANT_CHECK_ON;
s += 0x0e;
if ( addr_modes.default_mode == VM86 )
{
instruction_address.offset
+= (instruction_address.selector & 0xf000) << 4;
operand_address.offset += (operand_address.selector & 0xf000) << 4;
}
}
else
{
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_READ, s, 0x1c);
control_word = get_fs_word((unsigned short *) s);
partial_status = get_fs_word((unsigned short *) (s+4));
tag_word = get_fs_word((unsigned short *) (s+8));
instruction_address.offset = get_fs_long((unsigned long *) (s+0x0c));
instruction_address.selector = get_fs_word((unsigned short *) (s+0x10));
instruction_address.opcode = get_fs_word((unsigned short *) (s+0x12));
operand_address.offset = get_fs_long((unsigned long *) (s+0x14));
operand_address.selector = get_fs_long((unsigned long *) (s+0x18));
RE_ENTRANT_CHECK_ON;
s += 0x1c;
}
 
#ifdef PECULIAR_486
control_word &= ~0xe080;
#endif PECULIAR_486
 
top = (partial_status >> SW_Top_Shift) & 7;
 
if ( partial_status & ~control_word & CW_Exceptions )
partial_status |= (SW_Summary | SW_Backward);
else
partial_status &= ~(SW_Summary | SW_Backward);
 
for ( i = 0; i < 8; i++ )
{
tag = tag_word & 3;
tag_word >>= 2;
 
if ( tag == 3 )
/* New tag is empty. Accept it */
regs[i].tag = TW_Empty;
else if ( regs[i].tag == TW_Empty )
{
/* Old tag is empty and new tag is not empty. New tag is determined
by old reg contents */
if ( regs[i].exp == EXP_BIAS - EXTENDED_Ebias )
{
if ( !(regs[i].sigl | regs[i].sigh) )
regs[i].tag = TW_Zero;
else
regs[i].tag = TW_Valid;
}
else if ( regs[i].exp == 0x7fff + EXP_BIAS - EXTENDED_Ebias )
{
if ( !((regs[i].sigh & ~0x80000000) | regs[i].sigl) )
regs[i].tag = TW_Infinity;
else
regs[i].tag = TW_NaN;
}
else
regs[i].tag = TW_Valid;
}
/* Else old tag is not empty and new tag is not empty. Old tag
remains correct */
}
 
return s;
}
 
 
void frstor(fpu_addr_modes addr_modes, char *data_address)
{
int i, stnr;
unsigned char tag;
char *s = fldenv(addr_modes, data_address);
 
for ( i = 0; i < 8; i++ )
{
/* Load each register. */
stnr = (i+top) & 7;
tag = regs[stnr].tag; /* Derived from the fldenv() loaded tag word. */
reg_load_extended((long double *)(s+i*10), &regs[stnr]);
if ( tag == TW_Empty ) /* The loaded data over-rides all other cases. */
regs[stnr].tag = tag;
}
 
}
 
 
unsigned short tag_word(void)
{
unsigned short word = 0;
unsigned char tag;
int i;
 
for ( i = 7; i >= 0; i-- )
{
switch ( tag = regs[i].tag )
{
case TW_Valid:
if ( regs[i].exp <= (EXP_BIAS - EXTENDED_Ebias) )
tag = 2;
break;
case TW_Infinity:
case TW_NaN:
tag = 2;
break;
case TW_Empty:
tag = 3;
break;
/* TW_Zero already has the correct value */
}
word <<= 2;
word |= tag;
}
return word;
}
 
 
char *fstenv(fpu_addr_modes addr_modes, char *d)
{
if ( (addr_modes.default_mode == VM86) ||
((addr_modes.default_mode == PM16)
^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) )
{
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_WRITE,d,14);
#ifdef PECULIAR_486
put_fs_long(control_word & ~0xe080, (unsigned short *) d);
#else
put_fs_word(control_word, (unsigned short *) d);
#endif PECULIAR_486
put_fs_word(status_word(), (unsigned short *) (d+2));
put_fs_word(tag_word(), (unsigned short *) (d+4));
put_fs_word(instruction_address.offset, (unsigned short *) (d+6));
put_fs_word(operand_address.offset, (unsigned short *) (d+0x0a));
if ( addr_modes.default_mode == VM86 )
{
put_fs_word((instruction_address.offset & 0xf0000) >> 4,
(unsigned short *) (d+8));
put_fs_word((operand_address.offset & 0xf0000) >> 4,
(unsigned short *) (d+0x0c));
}
else
{
put_fs_word(instruction_address.selector, (unsigned short *) (d+8));
put_fs_word(operand_address.selector, (unsigned short *) (d+0x0c));
}
RE_ENTRANT_CHECK_ON;
d += 0x0e;
}
else
{
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_WRITE,d,28);
#ifdef PECULIAR_486
/* An 80486 sets all the reserved bits to 1. */
put_fs_long(0xffff0040 | (control_word & ~0xe080), (unsigned long *) d);
put_fs_long(0xffff0000 | status_word(), (unsigned long *) (d+4));
put_fs_long(0xffff0000 | tag_word(), (unsigned long *) (d+8));
#else
put_fs_word(control_word, (unsigned short *) d);
put_fs_word(status_word(), (unsigned short *) (d+4));
put_fs_word(tag_word(), (unsigned short *) (d+8));
#endif PECULIAR_486
put_fs_long(instruction_address.offset, (unsigned long *) (d+0x0c));
put_fs_word(instruction_address.selector, (unsigned short *) (d+0x10));
put_fs_word(instruction_address.opcode, (unsigned short *) (d+0x12));
put_fs_long(operand_address.offset, (unsigned long *) (d+0x14));
#ifdef PECULIAR_486
/* An 80486 sets all the reserved bits to 1. */
put_fs_word(operand_address.selector, (unsigned short *) (d+0x18));
put_fs_word(0xffff, (unsigned short *) (d+0x1a));
#else
put_fs_long(operand_address.selector, (unsigned long *) (d+0x18));
#endif PECULIAR_486
RE_ENTRANT_CHECK_ON;
d += 0x1c;
}
control_word |= CW_Exceptions;
partial_status &= ~(SW_Summary | SW_Backward);
 
return d;
}
 
 
void fsave(fpu_addr_modes addr_modes, char *data_address)
{
char *d;
int i;
 
d = fstenv(addr_modes, data_address);
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_WRITE,d,80);
RE_ENTRANT_CHECK_ON;
for ( i = 0; i < 8; i++ )
write_to_extended(&regs[(top + i) & 7], d + 10 * i);
 
finit();
 
}
 
/*===========================================================================*/
 
/*
A call to this function must be preceded by a call to
FPU_verify_area() to verify access to the 10 bytes at d
*/
static void write_to_extended(FPU_REG *rp, char *d)
{
long e;
FPU_REG tmp;
e = rp->exp - EXP_BIAS + EXTENDED_Ebias;
 
#ifdef PARANOID
switch ( rp->tag )
{
case TW_Zero:
if ( rp->sigh | rp->sigl | e )
EXCEPTION(EX_INTERNAL | 0x160);
break;
case TW_Infinity:
case TW_NaN:
if ( (e ^ 0x7fff) | !(rp->sigh & 0x80000000) )
EXCEPTION(EX_INTERNAL | 0x161);
break;
default:
if (e > 0x7fff || e < -63)
EXCEPTION(EX_INTERNAL | 0x162);
}
#endif PARANOID
 
/*
All numbers except denormals are stored internally in a
format which is compatible with the extended real number
format.
*/
if ( e > 0 )
{
/* just copy the reg */
RE_ENTRANT_CHECK_OFF;
put_fs_long(rp->sigl, (unsigned long *) d);
put_fs_long(rp->sigh, (unsigned long *) (d + 4));
RE_ENTRANT_CHECK_ON;
}
else
{
/*
The number is a de-normal stored as a normal using our
extra exponent range, or is Zero.
Convert it back to a de-normal, or leave it as Zero.
*/
reg_move(rp, &tmp);
tmp.exp += -EXTENDED_Emin + 63; /* largest exp to be 63 */
round_to_int(&tmp);
e = 0;
RE_ENTRANT_CHECK_OFF;
put_fs_long(tmp.sigl, (unsigned long *) d);
put_fs_long(tmp.sigh, (unsigned long *) (d + 4));
RE_ENTRANT_CHECK_ON;
}
e |= rp->sign == SIGN_POS ? 0 : 0x8000;
RE_ENTRANT_CHECK_OFF;
put_fs_word(e, (unsigned short *) (d + 8));
RE_ENTRANT_CHECK_ON;
}
/poly_tan.c
0,0 → 1,213
/*---------------------------------------------------------------------------+
| poly_tan.c |
| |
| Compute the tan of a FPU_REG, using a polynomial approximation. |
| |
| Copyright (C) 1992,1993,1994 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
#include "exception.h"
#include "reg_constant.h"
#include "fpu_emu.h"
#include "control_w.h"
#include "poly.h"
 
 
#define HiPOWERop 3 /* odd poly, positive terms */
static const unsigned long long oddplterm[HiPOWERop] =
{
0x0000000000000000LL,
0x0051a1cf08fca228LL,
0x0000000071284ff7LL
};
 
#define HiPOWERon 2 /* odd poly, negative terms */
static const unsigned long long oddnegterm[HiPOWERon] =
{
0x1291a9a184244e80LL,
0x0000583245819c21LL
};
 
#define HiPOWERep 2 /* even poly, positive terms */
static const unsigned long long evenplterm[HiPOWERep] =
{
0x0e848884b539e888LL,
0x00003c7f18b887daLL
};
 
#define HiPOWERen 2 /* even poly, negative terms */
static const unsigned long long evennegterm[HiPOWERen] =
{
0xf1f0200fd51569ccLL,
0x003afb46105c4432LL
};
 
static const unsigned long long twothirds = 0xaaaaaaaaaaaaaaabLL;
 
 
/*--- poly_tan() ------------------------------------------------------------+
| |
+---------------------------------------------------------------------------*/
void poly_tan(FPU_REG const *arg, FPU_REG *result)
{
long int exponent;
int invert;
Xsig argSq, argSqSq, accumulatoro, accumulatore, accum,
argSignif, fix_up;
unsigned long adj;
 
exponent = arg->exp - EXP_BIAS;
 
#ifdef PARANOID
if ( arg->sign != 0 ) /* Can't hack a number < 0.0 */
{ arith_invalid(result); return; } /* Need a positive number */
#endif PARANOID
 
/* Split the problem into two domains, smaller and larger than pi/4 */
if ( (exponent == 0) || ((exponent == -1) && (arg->sigh > 0xc90fdaa2)) )
{
/* The argument is greater than (approx) pi/4 */
invert = 1;
accum.lsw = 0;
XSIG_LL(accum) = significand(arg);
if ( exponent == 0 )
{
/* The argument is >= 1.0 */
/* Put the binary point at the left. */
XSIG_LL(accum) <<= 1;
}
/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum);
 
argSignif.lsw = accum.lsw;
XSIG_LL(argSignif) = XSIG_LL(accum);
exponent = -1 + norm_Xsig(&argSignif);
}
else
{
invert = 0;
argSignif.lsw = 0;
XSIG_LL(accum) = XSIG_LL(argSignif) = significand(arg);
if ( exponent < -1 )
{
/* shift the argument right by the required places */
if ( shrx(&XSIG_LL(accum), -1-exponent) >= 0x80000000U )
XSIG_LL(accum) ++; /* round up */
}
}
 
XSIG_LL(argSq) = XSIG_LL(accum); argSq.lsw = accum.lsw;
mul_Xsig_Xsig(&argSq, &argSq);
XSIG_LL(argSqSq) = XSIG_LL(argSq); argSqSq.lsw = argSq.lsw;
mul_Xsig_Xsig(&argSqSq, &argSqSq);
 
/* Compute the negative terms for the numerator polynomial */
accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0;
polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm, HiPOWERon-1);
mul_Xsig_Xsig(&accumulatoro, &argSq);
negate_Xsig(&accumulatoro);
/* Add the positive terms */
polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm, HiPOWERop-1);
 
/* Compute the positive terms for the denominator polynomial */
accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0;
polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm, HiPOWERep-1);
mul_Xsig_Xsig(&accumulatore, &argSq);
negate_Xsig(&accumulatore);
/* Add the negative terms */
polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm, HiPOWERen-1);
/* Multiply by arg^2 */
mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
/* de-normalize and divide by 2 */
shr_Xsig(&accumulatore, -2*(1+exponent) + 1);
negate_Xsig(&accumulatore); /* This does 1 - accumulator */
 
/* Now find the ratio. */
if ( accumulatore.msw == 0 )
{
/* accumulatoro must contain 1.0 here, (actually, 0) but it
really doesn't matter what value we use because it will
have negligible effect in later calculations
*/
XSIG_LL(accum) = 0x8000000000000000LL;
accum.lsw = 0;
}
else
{
div_Xsig(&accumulatoro, &accumulatore, &accum);
}
 
/* Multiply by 1/3 * arg^3 */
mul64_Xsig(&accum, &XSIG_LL(argSignif));
mul64_Xsig(&accum, &XSIG_LL(argSignif));
mul64_Xsig(&accum, &XSIG_LL(argSignif));
mul64_Xsig(&accum, &twothirds);
shr_Xsig(&accum, -2*(exponent+1));
 
/* tan(arg) = arg + accum */
add_two_Xsig(&accum, &argSignif, &exponent);
 
if ( invert )
{
/* We now have the value of tan(pi_2 - arg) where pi_2 is an
approximation for pi/2
*/
/* The next step is to fix the answer to compensate for the
error due to the approximation used for pi/2
*/
 
/* This is (approx) delta, the error in our approx for pi/2
(see above). It has an exponent of -65
*/
XSIG_LL(fix_up) = 0x898cc51701b839a2LL;
fix_up.lsw = 0;
 
if ( exponent == 0 )
adj = 0xffffffff; /* We want approx 1.0 here, but
this is close enough. */
else if ( exponent > -30 )
{
adj = accum.msw >> -(exponent+1); /* tan */
mul_32_32(adj, adj, &adj); /* tan^2 */
}
else
adj = 0;
mul_32_32(0x898cc517, adj, &adj); /* delta * tan^2 */
 
fix_up.msw += adj;
if ( !(fix_up.msw & 0x80000000) ) /* did fix_up overflow ? */
{
/* Yes, we need to add an msb */
shr_Xsig(&fix_up, 1);
fix_up.msw |= 0x80000000;
shr_Xsig(&fix_up, 64 + exponent);
}
else
shr_Xsig(&fix_up, 65 + exponent);
 
add_two_Xsig(&accum, &fix_up, &exponent);
 
/* accum now contains tan(pi/2 - arg).
Use tan(arg) = 1.0 / tan(pi/2 - arg)
*/
accumulatoro.lsw = accumulatoro.midw = 0;
accumulatoro.msw = 0x80000000;
div_Xsig(&accumulatoro, &accum, &accum);
exponent = - exponent - 1;
}
 
/* Transfer the result */
round_Xsig(&accum);
*(short *)&(result->sign) = 0;
significand(result) = XSIG_LL(accum);
result->exp = EXP_BIAS + exponent;
 
}
/reg_norm.S
0,0 → 1,143
/*---------------------------------------------------------------------------+
| reg_norm.S |
| |
| Copyright (C) 1992,1993,1994,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| Normalize the value in a FPU_REG. |
| |
| Call from C as: |
| void normalize(FPU_REG *n) |
| |
| void normalize_nuo(FPU_REG *n) |
| |
+---------------------------------------------------------------------------*/
 
#include "fpu_emu.h"
 
 
.text
ENTRY(normalize)
pushl %ebp
movl %esp,%ebp
pushl %ebx
 
movl PARAM1,%ebx
 
#ifdef PARANOID
cmpb TW_Valid,TAG(%ebx)
je L_ok
 
pushl $0x220
call SYMBOL_NAME(exception)
addl $4,%esp
 
L_ok:
#endif PARANOID
 
movl SIGH(%ebx),%edx
movl SIGL(%ebx),%eax
 
orl %edx,%edx /* ms bits */
js L_done /* Already normalized */
jnz L_shift_1 /* Shift left 1 - 31 bits */
 
orl %eax,%eax
jz L_zero /* The contents are zero */
 
movl %eax,%edx
xorl %eax,%eax
subl $32,EXP(%ebx) /* This can cause an underflow */
 
/* We need to shift left by 1 - 31 bits */
L_shift_1:
bsrl %edx,%ecx /* get the required shift in %ecx */
subl $31,%ecx
negl %ecx
shld %cl,%eax,%edx
shl %cl,%eax
subl %ecx,EXP(%ebx) /* This can cause an underflow */
 
movl %edx,SIGH(%ebx)
movl %eax,SIGL(%ebx)
 
L_done:
cmpl EXP_OVER,EXP(%ebx)
jge L_overflow
 
cmpl EXP_UNDER,EXP(%ebx)
jle L_underflow
 
L_exit:
popl %ebx
leave
ret
 
 
L_zero:
movl EXP_UNDER,EXP(%ebx)
movb TW_Zero,TAG(%ebx)
jmp L_exit
 
L_underflow:
push %ebx
call SYMBOL_NAME(arith_underflow)
pop %ebx
jmp L_exit
 
L_overflow:
push %ebx
call SYMBOL_NAME(arith_overflow)
pop %ebx
jmp L_exit
 
 
 
/* Normalise without reporting underflow or overflow */
ENTRY(normalize_nuo)
pushl %ebp
movl %esp,%ebp
pushl %ebx
 
movl PARAM1,%ebx
 
#ifdef PARANOID
cmpb TW_Valid,TAG(%ebx)
je L_ok_nuo
 
pushl $0x221
call SYMBOL_NAME(exception)
addl $4,%esp
 
L_ok_nuo:
#endif PARANOID
 
movl SIGH(%ebx),%edx
movl SIGL(%ebx),%eax
 
orl %edx,%edx /* ms bits */
js L_exit /* Already normalized */
jnz L_nuo_shift_1 /* Shift left 1 - 31 bits */
 
orl %eax,%eax
jz L_zero /* The contents are zero */
 
movl %eax,%edx
xorl %eax,%eax
subl $32,EXP(%ebx) /* This can cause an underflow */
 
/* We need to shift left by 1 - 31 bits */
L_nuo_shift_1:
bsrl %edx,%ecx /* get the required shift in %ecx */
subl $31,%ecx
negl %ecx
shld %cl,%eax,%edx
shl %cl,%eax
subl %ecx,EXP(%ebx) /* This can cause an underflow */
 
movl %edx,SIGH(%ebx)
movl %eax,SIGL(%ebx)
jmp L_exit
 
 
/reg_u_add.S
0,0 → 1,187
.file "reg_u_add.S"
/*---------------------------------------------------------------------------+
| reg_u_add.S |
| |
| Add two valid (TW_Valid) FPU_REG numbers, of the same sign, and put the |
| result in a destination FPU_REG. |
| |
| Copyright (C) 1992,1993,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| Call from C as: |
| void reg_u_add(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, |
| int control_w) |
| |
+---------------------------------------------------------------------------*/
 
/*
| Kernel addition routine reg_u_add(reg *arg1, reg *arg2, reg *answ).
| Takes two valid reg f.p. numbers (TW_Valid), which are
| treated as unsigned numbers,
| and returns their sum as a TW_Valid or TW_S f.p. number.
| The returned number is normalized.
| Basic checks are performed if PARANOID is defined.
*/
 
#include "exception.h"
#include "fpu_emu.h"
#include "control_w.h"
 
.text
ENTRY(reg_u_add)
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %edi
pushl %ebx
 
movl PARAM1,%esi /* source 1 */
movl PARAM2,%edi /* source 2 */
 
#ifdef DENORM_OPERAND
cmpl EXP_UNDER,EXP(%esi)
jg xOp1_not_denorm
 
call SYMBOL_NAME(denormal_operand)
orl %eax,%eax
jnz fpu_Arith_exit
 
xOp1_not_denorm:
cmpl EXP_UNDER,EXP(%edi)
jg xOp2_not_denorm
 
call SYMBOL_NAME(denormal_operand)
orl %eax,%eax
jnz fpu_Arith_exit
 
xOp2_not_denorm:
#endif DENORM_OPERAND
 
movl EXP(%esi),%ecx
subl EXP(%edi),%ecx /* exp1 - exp2 */
jge L_arg1_larger
 
/* num1 is smaller */
movl SIGL(%esi),%ebx
movl SIGH(%esi),%eax
 
movl %edi,%esi
negw %cx
jmp L_accum_loaded
 
L_arg1_larger:
/* num1 has larger or equal exponent */
movl SIGL(%edi),%ebx
movl SIGH(%edi),%eax
 
L_accum_loaded:
movl PARAM3,%edi /* destination */
/* movb SIGN(%esi),%dl
movb %dl,SIGN(%edi) */ /* Copy the sign from the first arg */
 
 
movl EXP(%esi),%edx
movl %edx,EXP(%edi) /* Copy exponent to destination */
 
xorl %edx,%edx /* clear the extension */
 
#ifdef PARANOID
testl $0x80000000,%eax
je L_bugged
 
testl $0x80000000,SIGH(%esi)
je L_bugged
#endif PARANOID
 
/* The number to be shifted is in %eax:%ebx:%edx */
cmpw $32,%cx /* shrd only works for 0..31 bits */
jnc L_more_than_31
 
/* less than 32 bits */
shrd %cl,%ebx,%edx
shrd %cl,%eax,%ebx
shr %cl,%eax
jmp L_shift_done
 
L_more_than_31:
cmpw $64,%cx
jnc L_more_than_63
 
subb $32,%cl
jz L_exactly_32
 
shrd %cl,%eax,%edx
shr %cl,%eax
orl %ebx,%ebx
jz L_more_31_no_low /* none of the lowest bits is set */
 
orl $1,%edx /* record the fact in the extension */
 
L_more_31_no_low:
movl %eax,%ebx
xorl %eax,%eax
jmp L_shift_done
 
L_exactly_32:
movl %ebx,%edx
movl %eax,%ebx
xorl %eax,%eax
jmp L_shift_done
 
L_more_than_63:
cmpw $65,%cx
jnc L_more_than_64
 
movl %eax,%edx
orl %ebx,%ebx
jz L_more_63_no_low
 
orl $1,%edx
jmp L_more_63_no_low
 
L_more_than_64:
movl $1,%edx /* The shifted nr always at least one '1' */
 
L_more_63_no_low:
xorl %ebx,%ebx
xorl %eax,%eax
 
L_shift_done:
/* Now do the addition */
addl SIGL(%esi),%ebx
adcl SIGH(%esi),%eax
jnc L_round_the_result
 
/* Overflow, adjust the result */
rcrl $1,%eax
rcrl $1,%ebx
rcrl $1,%edx
jnc L_no_bit_lost
 
orl $1,%edx
 
L_no_bit_lost:
incl EXP(%edi)
 
L_round_the_result:
jmp fpu_reg_round /* Round the result */
 
 
 
#ifdef PARANOID
/* If we ever get here then we have problems! */
L_bugged:
pushl EX_INTERNAL|0x201
call EXCEPTION
pop %ebx
jmp L_exit
#endif PARANOID
 
 
L_exit:
popl %ebx
popl %edi
popl %esi
leave
ret
/reg_u_sub.S
0,0 → 1,290
.file "reg_u_sub.S"
/*---------------------------------------------------------------------------+
| reg_u_sub.S |
| |
| Core floating point subtraction routine. |
| |
| Copyright (C) 1992,1993,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| Call from C as: |
| void reg_u_sub(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, |
| int control_w) |
| |
+---------------------------------------------------------------------------*/
 
/*
| Kernel subtraction routine reg_u_sub(reg *arg1, reg *arg2, reg *answ).
| Takes two valid reg f.p. numbers (TW_Valid), which are
| treated as unsigned numbers,
| and returns their difference as a TW_Valid or TW_Zero f.p.
| number.
| The first number (arg1) must be the larger.
| The returned number is normalized.
| Basic checks are performed if PARANOID is defined.
*/
 
#include "exception.h"
#include "fpu_emu.h"
#include "control_w.h"
 
.text
ENTRY(reg_u_sub)
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %edi
pushl %ebx
 
movl PARAM1,%esi /* source 1 */
movl PARAM2,%edi /* source 2 */
 
#ifdef DENORM_OPERAND
cmpl EXP_UNDER,EXP(%esi)
jg xOp1_not_denorm
 
call SYMBOL_NAME(denormal_operand)
orl %eax,%eax
jnz fpu_Arith_exit
 
xOp1_not_denorm:
cmpl EXP_UNDER,EXP(%edi)
jg xOp2_not_denorm
 
call SYMBOL_NAME(denormal_operand)
orl %eax,%eax
jnz fpu_Arith_exit
 
xOp2_not_denorm:
#endif DENORM_OPERAND
 
movl EXP(%esi),%ecx
subl EXP(%edi),%ecx /* exp1 - exp2 */
 
#ifdef PARANOID
/* source 2 is always smaller than source 1 */
js L_bugged_1
 
testl $0x80000000,SIGH(%edi) /* The args are assumed to be normalized */
je L_bugged_2
 
testl $0x80000000,SIGH(%esi)
je L_bugged_2
#endif PARANOID
 
/*--------------------------------------+
| Form a register holding the |
| smaller number |
+--------------------------------------*/
movl SIGH(%edi),%eax /* register ms word */
movl SIGL(%edi),%ebx /* register ls word */
 
movl PARAM3,%edi /* destination */
movl EXP(%esi),%edx
movl %edx,EXP(%edi) /* Copy exponent to destination */
/* movb SIGN(%esi),%dl
movb %dl,SIGN(%edi) */ /* Copy the sign from the first arg */
 
xorl %edx,%edx /* register extension */
 
/*--------------------------------------+
| Shift the temporary register |
| right the required number of |
| places. |
+--------------------------------------*/
L_shift_r:
cmpl $32,%ecx /* shrd only works for 0..31 bits */
jnc L_more_than_31
 
/* less than 32 bits */
shrd %cl,%ebx,%edx
shrd %cl,%eax,%ebx
shr %cl,%eax
jmp L_shift_done
 
L_more_than_31:
cmpl $64,%ecx
jnc L_more_than_63
 
subb $32,%cl
jz L_exactly_32
 
shrd %cl,%eax,%edx
shr %cl,%eax
orl %ebx,%ebx
jz L_more_31_no_low /* none of the lowest bits is set */
 
orl $1,%edx /* record the fact in the extension */
 
L_more_31_no_low:
movl %eax,%ebx
xorl %eax,%eax
jmp L_shift_done
 
L_exactly_32:
movl %ebx,%edx
movl %eax,%ebx
xorl %eax,%eax
jmp L_shift_done
 
L_more_than_63:
cmpw $65,%cx
jnc L_more_than_64
 
/* Shift right by 64 bits */
movl %eax,%edx
orl %ebx,%ebx
jz L_more_63_no_low
 
orl $1,%edx
jmp L_more_63_no_low
 
L_more_than_64:
jne L_more_than_65
 
/* Shift right by 65 bits */
/* Carry is clear if we get here */
movl %eax,%edx
rcrl %edx
jnc L_shift_65_nc
 
orl $1,%edx
jmp L_more_63_no_low
 
L_shift_65_nc:
orl %ebx,%ebx
jz L_more_63_no_low
 
orl $1,%edx
jmp L_more_63_no_low
 
L_more_than_65:
movl $1,%edx /* The shifted nr always at least one '1' */
 
L_more_63_no_low:
xorl %ebx,%ebx
xorl %eax,%eax
 
L_shift_done:
L_subtr:
/*------------------------------+
| Do the subtraction |
+------------------------------*/
xorl %ecx,%ecx
subl %edx,%ecx
movl %ecx,%edx
movl SIGL(%esi),%ecx
sbbl %ebx,%ecx
movl %ecx,%ebx
movl SIGH(%esi),%ecx
sbbl %eax,%ecx
movl %ecx,%eax
 
#ifdef PARANOID
/* We can never get a borrow */
jc L_bugged
#endif PARANOID
 
/*--------------------------------------+
| Normalize the result |
+--------------------------------------*/
testl $0x80000000,%eax
jnz L_round /* no shifting needed */
 
orl %eax,%eax
jnz L_shift_1 /* shift left 1 - 31 bits */
 
orl %ebx,%ebx
jnz L_shift_32 /* shift left 32 - 63 bits */
 
/*
* A rare case, the only one which is non-zero if we got here
* is: 1000000 .... 0000
* -0111111 .... 1111 1
* --------------------
* 0000000 .... 0000 1
*/
 
cmpl $0x80000000,%edx
jnz L_must_be_zero
 
/* Shift left 64 bits */
subl $64,EXP(%edi)
xchg %edx,%eax
jmp fpu_reg_round
 
L_must_be_zero:
#ifdef PARANOID
orl %edx,%edx
jnz L_bugged_3
#endif PARANOID
 
/* The result is zero */
movb TW_Zero,TAG(%edi)
movl $0,EXP(%edi) /* exponent */
movl $0,SIGL(%edi)
movl $0,SIGH(%edi)
jmp L_exit /* %eax contains zero */
 
L_shift_32:
movl %ebx,%eax
movl %edx,%ebx
movl $0,%edx
subl $32,EXP(%edi) /* Can get underflow here */
 
/* We need to shift left by 1 - 31 bits */
L_shift_1:
bsrl %eax,%ecx /* get the required shift in %ecx */
subl $31,%ecx
negl %ecx
shld %cl,%ebx,%eax
shld %cl,%edx,%ebx
shl %cl,%edx
subl %ecx,EXP(%edi) /* Can get underflow here */
 
L_round:
jmp fpu_reg_round /* Round the result */
 
 
#ifdef PARANOID
L_bugged_1:
pushl EX_INTERNAL|0x206
call EXCEPTION
pop %ebx
jmp L_error_exit
 
L_bugged_2:
pushl EX_INTERNAL|0x209
call EXCEPTION
pop %ebx
jmp L_error_exit
 
L_bugged_3:
pushl EX_INTERNAL|0x210
call EXCEPTION
pop %ebx
jmp L_error_exit
 
L_bugged_4:
pushl EX_INTERNAL|0x211
call EXCEPTION
pop %ebx
jmp L_error_exit
 
L_bugged:
pushl EX_INTERNAL|0x212
call EXCEPTION
pop %ebx
jmp L_error_exit
#endif PARANOID
 
 
L_error_exit:
movl $1,%eax
L_exit:
popl %ebx
popl %edi
popl %esi
leave
ret
/version.h
0,0 → 1,12
/*---------------------------------------------------------------------------+
| version.h |
| |
| |
| Copyright (C) 1992,1993,1994,1996 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
| E-mail billm@jacobi.maths.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
#define FPU_VERSION "wm-FPU-emu version 1.22"
/load_store.c
0,0 → 1,260
/*---------------------------------------------------------------------------+
| load_store.c |
| |
| This file contains most of the code to interpret the FPU instructions |
| which load and store from user memory. |
| |
| Copyright (C) 1992,1993,1994 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
/*---------------------------------------------------------------------------+
| Note: |
| The file contains code which accesses user memory. |
| Emulator static data may change when user memory is accessed, due to |
| other processes using the emulator while swapping is in progress. |
+---------------------------------------------------------------------------*/
 
#include <asm/segment.h>
 
#include "fpu_system.h"
#include "exception.h"
#include "fpu_emu.h"
#include "status_w.h"
#include "control_w.h"
 
 
#define _NONE_ 0 /* st0_ptr etc not needed */
#define _REG0_ 1 /* Will be storing st(0) */
#define _PUSH_ 3 /* Need to check for space to push onto stack */
#define _null_ 4 /* Function illegal or not implemented */
 
#define pop_0() { st0_ptr->tag = TW_Empty; top++; }
 
 
static unsigned char const type_table[32] = {
_PUSH_, _PUSH_, _PUSH_, _PUSH_,
_null_, _null_, _null_, _null_,
_REG0_, _REG0_, _REG0_, _REG0_,
_REG0_, _REG0_, _REG0_, _REG0_,
_NONE_, _null_, _NONE_, _PUSH_,
_NONE_, _PUSH_, _null_, _PUSH_,
_NONE_, _null_, _NONE_, _REG0_,
_NONE_, _REG0_, _NONE_, _REG0_
};
 
unsigned char const data_sizes_16[32] = {
4, 4, 8, 2, 0, 0, 0, 0,
4, 4, 8, 2, 4, 4, 8, 2,
14, 0, 94, 10, 2, 10, 0, 8,
14, 0, 94, 10, 2, 10, 2, 8
};
 
unsigned char const data_sizes_32[32] = {
4, 4, 8, 2, 0, 0, 0, 0,
4, 4, 8, 2, 4, 4, 8, 2,
28, 0,108, 10, 2, 10, 0, 8,
28, 0,108, 10, 2, 10, 2, 8
};
 
int load_store_instr(unsigned char type, fpu_addr_modes addr_modes,
void *data_address)
{
FPU_REG loaded_data;
FPU_REG *st0_ptr;
 
st0_ptr = NULL; /* Initialized just to stop compiler warnings. */
 
if ( addr_modes.default_mode & PROTECTED )
{
if ( addr_modes.default_mode == SEG32 )
{
if ( access_limit < data_sizes_32[type] )
math_abort(FPU_info,SIGSEGV);
}
else if ( addr_modes.default_mode == PM16 )
{
if ( access_limit < data_sizes_16[type] )
math_abort(FPU_info,SIGSEGV);
}
#ifdef PARANOID
else
EXCEPTION(EX_INTERNAL|0x140);
#endif PARANOID
}
 
switch ( type_table[type] )
{
case _NONE_:
break;
case _REG0_:
st0_ptr = &st(0); /* Some of these instructions pop after
storing */
break;
case _PUSH_:
{
st0_ptr = &st(-1);
if ( st0_ptr->tag != TW_Empty )
{ stack_overflow(); return 0; }
top--;
}
break;
case _null_:
FPU_illegal();
return 0;
#ifdef PARANOID
default:
EXCEPTION(EX_INTERNAL|0x141);
return 0;
#endif PARANOID
}
 
switch ( type )
{
case 000: /* fld m32real */
clear_C1();
reg_load_single((float *)data_address, &loaded_data);
if ( (loaded_data.tag == TW_NaN) &&
real_2op_NaN(&loaded_data, &loaded_data, &loaded_data) )
{
top++;
break;
}
reg_move(&loaded_data, st0_ptr);
break;
case 001: /* fild m32int */
clear_C1();
reg_load_int32((long *)data_address, st0_ptr);
break;
case 002: /* fld m64real */
clear_C1();
reg_load_double((double *)data_address, &loaded_data);
if ( (loaded_data.tag == TW_NaN) &&
real_2op_NaN(&loaded_data, &loaded_data, &loaded_data) )
{
top++;
break;
}
reg_move(&loaded_data, st0_ptr);
break;
case 003: /* fild m16int */
clear_C1();
reg_load_int16((short *)data_address, st0_ptr);
break;
case 010: /* fst m32real */
clear_C1();
reg_store_single((float *)data_address, st0_ptr);
break;
case 011: /* fist m32int */
clear_C1();
reg_store_int32((long *)data_address, st0_ptr);
break;
case 012: /* fst m64real */
clear_C1();
reg_store_double((double *)data_address, st0_ptr);
break;
case 013: /* fist m16int */
clear_C1();
reg_store_int16((short *)data_address, st0_ptr);
break;
case 014: /* fstp m32real */
clear_C1();
if ( reg_store_single((float *)data_address, st0_ptr) )
pop_0(); /* pop only if the number was actually stored
(see the 80486 manual p16-28) */
break;
case 015: /* fistp m32int */
clear_C1();
if ( reg_store_int32((long *)data_address, st0_ptr) )
pop_0(); /* pop only if the number was actually stored
(see the 80486 manual p16-28) */
break;
case 016: /* fstp m64real */
clear_C1();
if ( reg_store_double((double *)data_address, st0_ptr) )
pop_0(); /* pop only if the number was actually stored
(see the 80486 manual p16-28) */
break;
case 017: /* fistp m16int */
clear_C1();
if ( reg_store_int16((short *)data_address, st0_ptr) )
pop_0(); /* pop only if the number was actually stored
(see the 80486 manual p16-28) */
break;
case 020: /* fldenv m14/28byte */
fldenv(addr_modes, (char *)data_address);
/* Ensure that the values just loaded are not changed by
fix-up operations. */
return 1;
case 022: /* frstor m94/108byte */
frstor(addr_modes, (char *)data_address);
/* Ensure that the values just loaded are not changed by
fix-up operations. */
return 1;
case 023: /* fbld m80dec */
clear_C1();
reg_load_bcd((char *)data_address, st0_ptr);
break;
case 024: /* fldcw */
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_READ, data_address, 2);
control_word = get_fs_word((unsigned short *) data_address);
RE_ENTRANT_CHECK_ON;
if ( partial_status & ~control_word & CW_Exceptions )
partial_status |= (SW_Summary | SW_Backward);
else
partial_status &= ~(SW_Summary | SW_Backward);
#ifdef PECULIAR_486
control_word |= 0x40; /* An 80486 appears to always set this bit */
#endif PECULIAR_486
return 1;
case 025: /* fld m80real */
clear_C1();
reg_load_extended((long double *)data_address, st0_ptr);
break;
case 027: /* fild m64int */
clear_C1();
reg_load_int64((long long *)data_address, st0_ptr);
break;
case 030: /* fstenv m14/28byte */
fstenv(addr_modes, (char *)data_address);
return 1;
case 032: /* fsave */
fsave(addr_modes, (char *)data_address);
return 1;
case 033: /* fbstp m80dec */
clear_C1();
if ( reg_store_bcd((char *)data_address, st0_ptr) )
pop_0(); /* pop only if the number was actually stored
(see the 80486 manual p16-28) */
break;
case 034: /* fstcw m16int */
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_WRITE,data_address,2);
put_fs_word(control_word, (short *) data_address);
RE_ENTRANT_CHECK_ON;
return 1;
case 035: /* fstp m80real */
clear_C1();
if ( reg_store_extended((long double *)data_address, st0_ptr) )
pop_0(); /* pop only if the number was actually stored
(see the 80486 manual p16-28) */
break;
case 036: /* fstsw m2byte */
RE_ENTRANT_CHECK_OFF;
FPU_verify_area(VERIFY_WRITE,data_address,2);
put_fs_word(status_word(),(short *) data_address);
RE_ENTRANT_CHECK_ON;
return 1;
case 037: /* fistp m64int */
clear_C1();
if ( reg_store_int64((long long *)data_address, st0_ptr) )
pop_0(); /* pop only if the number was actually stored
(see the 80486 manual p16-28) */
break;
}
return 0;
}
/poly_sin.c
0,0 → 1,408
/*---------------------------------------------------------------------------+
| poly_sin.c |
| |
| Computation of an approximation of the sin function and the cosine |
| function by a polynomial. |
| |
| Copyright (C) 1992,1993,1994 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
 
 
#include "exception.h"
#include "reg_constant.h"
#include "fpu_emu.h"
#include "control_w.h"
#include "poly.h"
 
 
#define N_COEFF_P 4
#define N_COEFF_N 4
 
static const unsigned long long pos_terms_l[N_COEFF_P] =
{
0xaaaaaaaaaaaaaaabLL,
0x00d00d00d00cf906LL,
0x000006b99159a8bbLL,
0x000000000d7392e6LL
};
 
static const unsigned long long neg_terms_l[N_COEFF_N] =
{
0x2222222222222167LL,
0x0002e3bc74aab624LL,
0x0000000b09229062LL,
0x00000000000c7973LL
};
 
 
 
#define N_COEFF_PH 4
#define N_COEFF_NH 4
static const unsigned long long pos_terms_h[N_COEFF_PH] =
{
0x0000000000000000LL,
0x05b05b05b05b0406LL,
0x000049f93edd91a9LL,
0x00000000c9c9ed62LL
};
 
static const unsigned long long neg_terms_h[N_COEFF_NH] =
{
0xaaaaaaaaaaaaaa98LL,
0x001a01a01a019064LL,
0x0000008f76c68a77LL,
0x0000000000d58f5eLL
};
 
 
/*--- poly_sine() -----------------------------------------------------------+
| |
+---------------------------------------------------------------------------*/
void poly_sine(FPU_REG const *arg, FPU_REG *result)
{
int exponent, echange;
Xsig accumulator, argSqrd, argTo4;
unsigned long fix_up, adj;
unsigned long long fixed_arg;
 
 
#ifdef PARANOID
if ( arg->tag == TW_Zero )
{
/* Return 0.0 */
reg_move(&CONST_Z, result);
return;
}
#endif PARANOID
 
exponent = arg->exp - EXP_BIAS;
 
accumulator.lsw = accumulator.midw = accumulator.msw = 0;
 
/* Split into two ranges, for arguments below and above 1.0 */
/* The boundary between upper and lower is approx 0.88309101259 */
if ( (exponent < -1) || ((exponent == -1) && (arg->sigh <= 0xe21240aa)) )
{
/* The argument is <= 0.88309101259 */
 
argSqrd.msw = arg->sigh; argSqrd.midw = arg->sigl; argSqrd.lsw = 0;
mul64_Xsig(&argSqrd, &significand(arg));
shr_Xsig(&argSqrd, 2*(-1-exponent));
argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
argTo4.lsw = argSqrd.lsw;
mul_Xsig_Xsig(&argTo4, &argTo4);
 
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
N_COEFF_N-1);
mul_Xsig_Xsig(&accumulator, &argSqrd);
negate_Xsig(&accumulator);
 
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
N_COEFF_P-1);
 
shr_Xsig(&accumulator, 2); /* Divide by four */
accumulator.msw |= 0x80000000; /* Add 1.0 */
 
mul64_Xsig(&accumulator, &significand(arg));
mul64_Xsig(&accumulator, &significand(arg));
mul64_Xsig(&accumulator, &significand(arg));
 
/* Divide by four, FPU_REG compatible, etc */
exponent = 3*exponent + EXP_BIAS;
 
/* The minimum exponent difference is 3 */
shr_Xsig(&accumulator, arg->exp - exponent);
 
negate_Xsig(&accumulator);
XSIG_LL(accumulator) += significand(arg);
 
echange = round_Xsig(&accumulator);
 
result->exp = arg->exp + echange;
}
else
{
/* The argument is > 0.88309101259 */
/* We use sin(arg) = cos(pi/2-arg) */
 
fixed_arg = significand(arg);
 
if ( exponent == 0 )
{
/* The argument is >= 1.0 */
 
/* Put the binary point at the left. */
fixed_arg <<= 1;
}
/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
fixed_arg = 0x921fb54442d18469LL - fixed_arg;
 
XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
mul64_Xsig(&argSqrd, &fixed_arg);
 
XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw;
mul_Xsig_Xsig(&argTo4, &argTo4);
 
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
N_COEFF_NH-1);
mul_Xsig_Xsig(&accumulator, &argSqrd);
negate_Xsig(&accumulator);
 
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
N_COEFF_PH-1);
negate_Xsig(&accumulator);
 
mul64_Xsig(&accumulator, &fixed_arg);
mul64_Xsig(&accumulator, &fixed_arg);
 
shr_Xsig(&accumulator, 3);
negate_Xsig(&accumulator);
 
add_Xsig_Xsig(&accumulator, &argSqrd);
 
shr_Xsig(&accumulator, 1);
 
accumulator.lsw |= 1; /* A zero accumulator here would cause problems */
negate_Xsig(&accumulator);
 
/* The basic computation is complete. Now fix the answer to
compensate for the error due to the approximation used for
pi/2
*/
 
/* This has an exponent of -65 */
fix_up = 0x898cc517;
/* The fix-up needs to be improved for larger args */
if ( argSqrd.msw & 0xffc00000 )
{
/* Get about 32 bit precision in these: */
mul_32_32(0x898cc517, argSqrd.msw, &adj);
fix_up -= adj/6;
}
mul_32_32(fix_up, LL_MSW(fixed_arg), &fix_up);
 
adj = accumulator.lsw; /* temp save */
accumulator.lsw -= fix_up;
if ( accumulator.lsw > adj )
XSIG_LL(accumulator) --;
 
echange = round_Xsig(&accumulator);
 
result->exp = EXP_BIAS - 1 + echange;
}
 
significand(result) = XSIG_LL(accumulator);
result->tag = TW_Valid;
result->sign = arg->sign;
 
#ifdef PARANOID
if ( (result->exp >= EXP_BIAS)
&& (significand(result) > 0x8000000000000000LL) )
{
EXCEPTION(EX_INTERNAL|0x150);
}
#endif PARANOID
 
}
 
 
 
/*--- poly_cos() ------------------------------------------------------------+
| |
+---------------------------------------------------------------------------*/
void poly_cos(FPU_REG const *arg, FPU_REG *result)
{
long int exponent, exp2, echange;
Xsig accumulator, argSqrd, fix_up, argTo4;
unsigned long adj;
unsigned long long fixed_arg;
 
 
#ifdef PARANOID
if ( arg->tag == TW_Zero )
{
/* Return 1.0 */
reg_move(&CONST_1, result);
return;
}
 
if ( (arg->exp > EXP_BIAS)
|| ((arg->exp == EXP_BIAS)
&& (significand(arg) > 0xc90fdaa22168c234LL)) )
{
EXCEPTION(EX_Invalid);
reg_move(&CONST_QNaN, result);
return;
}
#endif PARANOID
 
exponent = arg->exp - EXP_BIAS;
 
accumulator.lsw = accumulator.midw = accumulator.msw = 0;
 
if ( (exponent < -1) || ((exponent == -1) && (arg->sigh <= 0xb00d6f54)) )
{
/* arg is < 0.687705 */
 
argSqrd.msw = arg->sigh; argSqrd.midw = arg->sigl; argSqrd.lsw = 0;
mul64_Xsig(&argSqrd, &significand(arg));
 
if ( exponent < -1 )
{
/* shift the argument right by the required places */
shr_Xsig(&argSqrd, 2*(-1-exponent));
}
 
argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
argTo4.lsw = argSqrd.lsw;
mul_Xsig_Xsig(&argTo4, &argTo4);
 
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
N_COEFF_NH-1);
mul_Xsig_Xsig(&accumulator, &argSqrd);
negate_Xsig(&accumulator);
 
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
N_COEFF_PH-1);
negate_Xsig(&accumulator);
 
mul64_Xsig(&accumulator, &significand(arg));
mul64_Xsig(&accumulator, &significand(arg));
shr_Xsig(&accumulator, -2*(1+exponent));
 
shr_Xsig(&accumulator, 3);
negate_Xsig(&accumulator);
 
add_Xsig_Xsig(&accumulator, &argSqrd);
 
shr_Xsig(&accumulator, 1);
 
/* It doesn't matter if accumulator is all zero here, the
following code will work ok */
negate_Xsig(&accumulator);
 
if ( accumulator.lsw & 0x80000000 )
XSIG_LL(accumulator) ++;
if ( accumulator.msw == 0 )
{
/* The result is 1.0 */
reg_move(&CONST_1, result);
}
else
{
significand(result) = XSIG_LL(accumulator);
/* will be a valid positive nr with expon = -1 */
*(short *)&(result->sign) = 0;
result->exp = EXP_BIAS - 1;
}
}
else
{
fixed_arg = significand(arg);
 
if ( exponent == 0 )
{
/* The argument is >= 1.0 */
 
/* Put the binary point at the left. */
fixed_arg <<= 1;
}
/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
fixed_arg = 0x921fb54442d18469LL - fixed_arg;
 
exponent = -1;
exp2 = -1;
 
/* A shift is needed here only for a narrow range of arguments,
i.e. for fixed_arg approx 2^-32, but we pick up more... */
if ( !(LL_MSW(fixed_arg) & 0xffff0000) )
{
fixed_arg <<= 16;
exponent -= 16;
exp2 -= 16;
}
 
XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
mul64_Xsig(&argSqrd, &fixed_arg);
 
if ( exponent < -1 )
{
/* shift the argument right by the required places */
shr_Xsig(&argSqrd, 2*(-1-exponent));
}
 
argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
argTo4.lsw = argSqrd.lsw;
mul_Xsig_Xsig(&argTo4, &argTo4);
 
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
N_COEFF_N-1);
mul_Xsig_Xsig(&accumulator, &argSqrd);
negate_Xsig(&accumulator);
 
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
N_COEFF_P-1);
 
shr_Xsig(&accumulator, 2); /* Divide by four */
accumulator.msw |= 0x80000000; /* Add 1.0 */
 
mul64_Xsig(&accumulator, &fixed_arg);
mul64_Xsig(&accumulator, &fixed_arg);
mul64_Xsig(&accumulator, &fixed_arg);
 
/* Divide by four, FPU_REG compatible, etc */
exponent = 3*exponent;
 
/* The minimum exponent difference is 3 */
shr_Xsig(&accumulator, exp2 - exponent);
 
negate_Xsig(&accumulator);
XSIG_LL(accumulator) += fixed_arg;
 
/* The basic computation is complete. Now fix the answer to
compensate for the error due to the approximation used for
pi/2
*/
 
/* This has an exponent of -65 */
XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
fix_up.lsw = 0;
 
/* The fix-up needs to be improved for larger args */
if ( argSqrd.msw & 0xffc00000 )
{
/* Get about 32 bit precision in these: */
mul_32_32(0x898cc517, argSqrd.msw, &adj);
fix_up.msw -= adj/2;
mul_32_32(0x898cc517, argTo4.msw, &adj);
fix_up.msw += adj/24;
}
 
exp2 += norm_Xsig(&accumulator);
shr_Xsig(&accumulator, 1); /* Prevent overflow */
exp2++;
shr_Xsig(&fix_up, 65 + exp2);
 
add_Xsig_Xsig(&accumulator, &fix_up);
 
echange = round_Xsig(&accumulator);
 
result->exp = exp2 + EXP_BIAS + echange;
*(short *)&(result->sign) = 0; /* Is a valid positive nr */
significand(result) = XSIG_LL(accumulator);
}
 
#ifdef PARANOID
if ( (result->exp >= EXP_BIAS)
&& (significand(result) > 0x8000000000000000LL) )
{
EXCEPTION(EX_INTERNAL|0x151);
}
#endif PARANOID
 
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.