OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /or1k/trunk/uclinux/uClinux-2.0.x/arch/m68k/fpsp040
    from Rev 199 to Rev 1765
    Reverse comparison

Rev 199 → Rev 1765

/sto_res.S
0,0 → 1,98
|
| sto_res.sa 3.1 12/10/90
|
| Takes the result and puts it in where the user expects it.
| Library functions return result in fp0. If fp0 is not the
| user's destination register then fp0 is moved to the
| correct floating-point destination register. fp0 and fp1
| are then restored to the original contents.
|
| Input: result in fp0,fp1
|
| d2 & a0 should be kept unmodified
|
| Output: moves the result to the true destination reg or mem
|
| Modifies: destination floating point register
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
STO_RES: |idnt 2,1 | Motorola 040 Floating Point Software Package
 
 
|section 8
 
.include "fpsp.h"
 
.global sto_cos
sto_cos:
bfextu CMDREG1B(%a6){#13:#3},%d0 |extract cos destination
cmpib #3,%d0 |check for fp0/fp1 cases
bles c_fp0123
fmovemx %fp1-%fp1,-(%a7)
moveql #7,%d1
subl %d0,%d1 |d1 = 7- (dest. reg. no.)
clrl %d0
bsetl %d1,%d0 |d0 is dynamic register mask
fmovemx (%a7)+,%d0
rts
c_fp0123:
cmpib #0,%d0
beqs c_is_fp0
cmpib #1,%d0
beqs c_is_fp1
cmpib #2,%d0
beqs c_is_fp2
c_is_fp3:
fmovemx %fp1-%fp1,USER_FP3(%a6)
rts
c_is_fp2:
fmovemx %fp1-%fp1,USER_FP2(%a6)
rts
c_is_fp1:
fmovemx %fp1-%fp1,USER_FP1(%a6)
rts
c_is_fp0:
fmovemx %fp1-%fp1,USER_FP0(%a6)
rts
 
 
.global sto_res
sto_res:
bfextu CMDREG1B(%a6){#6:#3},%d0 |extract destination register
cmpib #3,%d0 |check for fp0/fp1 cases
bles fp0123
fmovemx %fp0-%fp0,-(%a7)
moveql #7,%d1
subl %d0,%d1 |d1 = 7- (dest. reg. no.)
clrl %d0
bsetl %d1,%d0 |d0 is dynamic register mask
fmovemx (%a7)+,%d0
rts
fp0123:
cmpib #0,%d0
beqs is_fp0
cmpib #1,%d0
beqs is_fp1
cmpib #2,%d0
beqs is_fp2
is_fp3:
fmovemx %fp0-%fp0,USER_FP3(%a6)
rts
is_fp2:
fmovemx %fp0-%fp0,USER_FP2(%a6)
rts
is_fp1:
fmovemx %fp0-%fp0,USER_FP1(%a6)
rts
is_fp0:
fmovemx %fp0-%fp0,USER_FP0(%a6)
rts
 
|end
/smovecr.S
0,0 → 1,162
|
| smovecr.sa 3.1 12/10/90
|
| The entry point sMOVECR returns the constant at the
| offset given in the instruction field.
|
| Input: An offset in the instruction word.
|
| Output: The constant rounded to the user's rounding
| mode unchecked for overflow.
|
| Modified: fp0.
|
|
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|SMOVECR idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
|xref nrm_set
|xref round
|xref PIRN
|xref PIRZRM
|xref PIRP
|xref SMALRN
|xref SMALRZRM
|xref SMALRP
|xref BIGRN
|xref BIGRZRM
|xref BIGRP
 
FZERO: .long 00000000
|
| FMOVECR
|
.global smovcr
smovcr:
bfextu CMDREG1B(%a6){#9:#7},%d0 |get offset
bfextu USER_FPCR(%a6){#26:#2},%d1 |get rmode
|
| check range of offset
|
tstb %d0 |if zero, offset is to pi
beqs PI_TBL |it is pi
cmpib #0x0a,%d0 |check range $01 - $0a
bles Z_VAL |if in this range, return zero
cmpib #0x0e,%d0 |check range $0b - $0e
bles SM_TBL |valid constants in this range
cmpib #0x2f,%d0 |check range $10 - $2f
bles Z_VAL |if in this range, return zero
cmpib #0x3f,%d0 |check range $30 - $3f
ble BG_TBL |valid constants in this range
Z_VAL:
fmoves FZERO,%fp0
rts
PI_TBL:
tstb %d1 |offset is zero, check for rmode
beqs PI_RN |if zero, rn mode
cmpib #0x3,%d1 |check for rp
beqs PI_RP |if 3, rp mode
PI_RZRM:
leal PIRZRM,%a0 |rmode is rz or rm, load PIRZRM in a0
bra set_finx
PI_RN:
leal PIRN,%a0 |rmode is rn, load PIRN in a0
bra set_finx
PI_RP:
leal PIRP,%a0 |rmode is rp, load PIRP in a0
bra set_finx
SM_TBL:
subil #0xb,%d0 |make offset in 0 - 4 range
tstb %d1 |check for rmode
beqs SM_RN |if zero, rn mode
cmpib #0x3,%d1 |check for rp
beqs SM_RP |if 3, rp mode
SM_RZRM:
leal SMALRZRM,%a0 |rmode is rz or rm, load SMRZRM in a0
cmpib #0x2,%d0 |check if result is inex
ble set_finx |if 0 - 2, it is inexact
bra no_finx |if 3, it is exact
SM_RN:
leal SMALRN,%a0 |rmode is rn, load SMRN in a0
cmpib #0x2,%d0 |check if result is inex
ble set_finx |if 0 - 2, it is inexact
bra no_finx |if 3, it is exact
SM_RP:
leal SMALRP,%a0 |rmode is rp, load SMRP in a0
cmpib #0x2,%d0 |check if result is inex
ble set_finx |if 0 - 2, it is inexact
bra no_finx |if 3, it is exact
BG_TBL:
subil #0x30,%d0 |make offset in 0 - f range
tstb %d1 |check for rmode
beqs BG_RN |if zero, rn mode
cmpib #0x3,%d1 |check for rp
beqs BG_RP |if 3, rp mode
BG_RZRM:
leal BIGRZRM,%a0 |rmode is rz or rm, load BGRZRM in a0
cmpib #0x1,%d0 |check if result is inex
ble set_finx |if 0 - 1, it is inexact
cmpib #0x7,%d0 |second check
ble no_finx |if 0 - 7, it is exact
bra set_finx |if 8 - f, it is inexact
BG_RN:
leal BIGRN,%a0 |rmode is rn, load BGRN in a0
cmpib #0x1,%d0 |check if result is inex
ble set_finx |if 0 - 1, it is inexact
cmpib #0x7,%d0 |second check
ble no_finx |if 0 - 7, it is exact
bra set_finx |if 8 - f, it is inexact
BG_RP:
leal BIGRP,%a0 |rmode is rp, load SMRP in a0
cmpib #0x1,%d0 |check if result is inex
ble set_finx |if 0 - 1, it is inexact
cmpib #0x7,%d0 |second check
ble no_finx |if 0 - 7, it is exact
| bra set_finx ;if 8 - f, it is inexact
set_finx:
orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
no_finx:
mulul #12,%d0 |use offset to point into tables
movel %d1,L_SCR1(%a6) |load mode for round call
bfextu USER_FPCR(%a6){#24:#2},%d1 |get precision
tstl %d1 |check if extended precision
|
| Precision is extended
|
bnes not_ext |if extended, do not call round
fmovemx (%a0,%d0),%fp0-%fp0 |return result in fp0
rts
|
| Precision is single or double
|
not_ext:
swap %d1 |rnd prec in upper word of d1
addl L_SCR1(%a6),%d1 |merge rmode in low word of d1
movel (%a0,%d0),FP_SCR1(%a6) |load first word to temp storage
movel 4(%a0,%d0),FP_SCR1+4(%a6) |load second word
movel 8(%a0,%d0),FP_SCR1+8(%a6) |load third word
clrl %d0 |clear g,r,s
lea FP_SCR1(%a6),%a0
btstb #sign_bit,LOCAL_EX(%a0)
sne LOCAL_SGN(%a0) |convert to internal ext. format
bsr round |go round the mantissa
 
bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format
beqs fin_fcr
bsetb #sign_bit,LOCAL_EX(%a0)
fin_fcr:
fmovemx (%a0),%fp0-%fp0
rts
 
|end
/x_unimp.S
0,0 → 1,77
|
| x_unimp.sa 3.3 7/1/91
|
| fpsp_unimp --- FPSP handler for unimplemented instruction
| exception.
|
| Invoked when the user program encounters a floating-point
| op-code that hardware does not support. Trap vector# 11
| (See table 8-1 MC68030 User's Manual).
|
|
| Note: An fsave for an unimplemented inst. will create a short
| fsave stack.
|
| Input: 1. Six word stack frame for unimplemented inst, four word
| for illegal
| (See table 8-7 MC68030 User's Manual).
| 2. Unimp (short) fsave state frame created here by fsave
| instruction.
|
|
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
X_UNIMP: |idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
|xref get_op
|xref do_func
|xref sto_res
|xref gen_except
|xref fpsp_fmt_error
 
.global fpsp_unimp
.global uni_2
fpsp_unimp:
link %a6,#-LOCAL_SIZE
fsave -(%a7)
uni_2:
moveml %d0-%d1/%a0-%a1,USER_DA(%a6)
fmovemx %fp0-%fp3,USER_FP0(%a6)
fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
moveb (%a7),%d0 |test for valid version num
andib #0xf0,%d0 |test for $4x
cmpib #VER_4,%d0 |must be $4x or exit
bnel fpsp_fmt_error
|
| Temporary D25B Fix
| The following lines are used to ensure that the FPSR
| exception byte and condition codes are clear before proceeding
|
movel USER_FPSR(%a6),%d0
andl #0xFF00FF,%d0 |clear all but accrued exceptions
movel %d0,USER_FPSR(%a6)
fmovel #0,%FPSR |clear all user bits
fmovel #0,%FPCR |clear all user exceptions for FPSP
 
clrb UFLG_TMP(%a6) |clr flag for unsupp data
 
bsrl get_op |go get operand(s)
clrb STORE_FLG(%a6)
bsrl do_func |do the function
fsave -(%a7) |capture possible exc state
tstb STORE_FLG(%a6)
bnes no_store |if STORE_FLG is set, no store
bsrl sto_res |store the result in user space
no_store:
bral gen_except |post any exceptions and return
 
|end
/gen_except.S
0,0 → 1,468
|
| gen_except.sa 3.7 1/16/92
|
| gen_except --- FPSP routine to detect reportable exceptions
|
| This routine compares the exception enable byte of the
| user_fpcr on the stack with the exception status byte
| of the user_fpsr.
|
| Any routine which may report an exceptions must load
| the stack frame in memory with the exceptional operand(s).
|
| Priority for exceptions is:
|
| Highest: bsun
| snan
| operr
| ovfl
| unfl
| dz
| inex2
| Lowest: inex1
|
| Note: The IEEE standard specifies that inex2 is to be
| reported if ovfl occurs and the ovfl enable bit is not
| set but the inex2 enable bit is.
|
|
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
GEN_EXCEPT: |idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
|xref real_trace
|xref fpsp_done
|xref fpsp_fmt_error
 
exc_tbl:
.long bsun_exc
.long commonE1
.long commonE1
.long ovfl_unfl
.long ovfl_unfl
.long commonE1
.long commonE3
.long commonE3
.long no_match
 
.global gen_except
gen_except:
cmpib #IDLE_SIZE-4,1(%a7) |test for idle frame
beq do_check |go handle idle frame
cmpib #UNIMP_40_SIZE-4,1(%a7) |test for orig unimp frame
beqs unimp_x |go handle unimp frame
cmpib #UNIMP_41_SIZE-4,1(%a7) |test for rev unimp frame
beqs unimp_x |go handle unimp frame
cmpib #BUSY_SIZE-4,1(%a7) |if size <> $60, fmt error
bnel fpsp_fmt_error
leal BUSY_SIZE+LOCAL_SIZE(%a7),%a1 |init a1 so fpsp.h
| ;equates will work
| Fix up the new busy frame with entries from the unimp frame
|
movel ETEMP_EX(%a6),ETEMP_EX(%a1) |copy etemp from unimp
movel ETEMP_HI(%a6),ETEMP_HI(%a1) |frame to busy frame
movel ETEMP_LO(%a6),ETEMP_LO(%a1)
movel CMDREG1B(%a6),CMDREG1B(%a1) |set inst in frame to unimp
movel CMDREG1B(%a6),%d0 |fix cmd1b to make it
andl #0x03c30000,%d0 |work for cmd3b
bfextu CMDREG1B(%a6){#13:#1},%d1 |extract bit 2
lsll #5,%d1
swap %d1
orl %d1,%d0 |put it in the right place
bfextu CMDREG1B(%a6){#10:#3},%d1 |extract bit 3,4,5
lsll #2,%d1
swap %d1
orl %d1,%d0 |put them in the right place
movel %d0,CMDREG3B(%a1) |in the busy frame
|
| Or in the FPSR from the emulation with the USER_FPSR on the stack.
|
fmovel %FPSR,%d0
orl %d0,USER_FPSR(%a6)
movel USER_FPSR(%a6),FPSR_SHADOW(%a1) |set exc bits
orl #sx_mask,E_BYTE(%a1)
bra do_clean
 
|
| Frame is an unimp frame possible resulting from an fmove <ea>,fp0
| that caused an exception
|
| a1 is modified to point into the new frame allowing fpsp equates
| to be valid.
|
unimp_x:
cmpib #UNIMP_40_SIZE-4,1(%a7) |test for orig unimp frame
bnes test_rev
leal UNIMP_40_SIZE+LOCAL_SIZE(%a7),%a1
bras unimp_con
test_rev:
cmpib #UNIMP_41_SIZE-4,1(%a7) |test for rev unimp frame
bnel fpsp_fmt_error |if not $28 or $30
leal UNIMP_41_SIZE+LOCAL_SIZE(%a7),%a1
unimp_con:
|
| Fix up the new unimp frame with entries from the old unimp frame
|
movel CMDREG1B(%a6),CMDREG1B(%a1) |set inst in frame to unimp
|
| Or in the FPSR from the emulation with the USER_FPSR on the stack.
|
fmovel %FPSR,%d0
orl %d0,USER_FPSR(%a6)
bra do_clean
 
|
| Frame is idle, so check for exceptions reported through
| USER_FPSR and set the unimp frame accordingly.
| A7 must be incremented to the point before the
| idle fsave vector to the unimp vector.
|
do_check:
addl #4,%a7 |point A7 back to unimp frame
|
| Or in the FPSR from the emulation with the USER_FPSR on the stack.
|
fmovel %FPSR,%d0
orl %d0,USER_FPSR(%a6)
|
| On a busy frame, we must clear the nmnexc bits.
|
cmpib #BUSY_SIZE-4,1(%a7) |check frame type
bnes check_fr |if busy, clr nmnexc
clrw NMNEXC(%a6) |clr nmnexc & nmcexc
btstb #5,CMDREG1B(%a6) |test for fmove out
bnes frame_com
movel USER_FPSR(%a6),FPSR_SHADOW(%a6) |set exc bits
orl #sx_mask,E_BYTE(%a6)
bras frame_com
check_fr:
cmpb #UNIMP_40_SIZE-4,1(%a7)
beqs frame_com
clrw NMNEXC(%a6)
frame_com:
moveb FPCR_ENABLE(%a6),%d0 |get fpcr enable byte
andb FPSR_EXCEPT(%a6),%d0 |and in the fpsr exc byte
bfffo %d0{#24:#8},%d1 |test for first set bit
leal exc_tbl,%a0 |load jmp table address
subib #24,%d1 |normalize bit offset to 0-8
movel (%a0,%d1.w*4),%a0 |load routine address based
| ;based on first enabled exc
jmp (%a0) |jump to routine
|
| Bsun is not possible in unimp or unsupp
|
bsun_exc:
bra do_clean
|
| The typical work to be done to the unimp frame to report an
| exception is to set the E1/E3 byte and clr the U flag.
| commonE1 does this for E1 exceptions, which are snan,
| operr, and dz. commonE3 does this for E3 exceptions, which
| are inex2 and inex1, and also clears the E1 exception bit
| left over from the unimp exception.
|
commonE1:
bsetb #E1,E_BYTE(%a6) |set E1 flag
bra commonE |go clean and exit
 
commonE3:
tstb UFLG_TMP(%a6) |test flag for unsup/unimp state
bnes unsE3
uniE3:
bsetb #E3,E_BYTE(%a6) |set E3 flag
bclrb #E1,E_BYTE(%a6) |clr E1 from unimp
bra commonE
 
unsE3:
tstb RES_FLG(%a6)
bnes unsE3_0
unsE3_1:
bsetb #E3,E_BYTE(%a6) |set E3 flag
unsE3_0:
bclrb #E1,E_BYTE(%a6) |clr E1 flag
movel CMDREG1B(%a6),%d0
andl #0x03c30000,%d0 |work for cmd3b
bfextu CMDREG1B(%a6){#13:#1},%d1 |extract bit 2
lsll #5,%d1
swap %d1
orl %d1,%d0 |put it in the right place
bfextu CMDREG1B(%a6){#10:#3},%d1 |extract bit 3,4,5
lsll #2,%d1
swap %d1
orl %d1,%d0 |put them in the right place
movel %d0,CMDREG3B(%a6) |in the busy frame
 
commonE:
bclrb #UFLAG,T_BYTE(%a6) |clr U flag from unimp
bra do_clean |go clean and exit
|
| No bits in the enable byte match existing exceptions. Check for
| the case of the ovfl exc without the ovfl enabled, but with
| inex2 enabled.
|
no_match:
btstb #inex2_bit,FPCR_ENABLE(%a6) |check for ovfl/inex2 case
beqs no_exc |if clear, exit
btstb #ovfl_bit,FPSR_EXCEPT(%a6) |now check ovfl
beqs no_exc |if clear, exit
bras ovfl_unfl |go to unfl_ovfl to determine if
| ;it is an unsupp or unimp exc
| No exceptions are to be reported. If the instruction was
| unimplemented, no FPU restore is necessary. If it was
| unsupported, we must perform the restore.
no_exc:
tstb UFLG_TMP(%a6) |test flag for unsupp/unimp state
beqs uni_no_exc
uns_no_exc:
tstb RES_FLG(%a6) |check if frestore is needed
bne do_clean |if clear, no frestore needed
uni_no_exc:
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
unlk %a6
bra finish_up
|
| Unsupported Data Type Handler:
| Ovfl:
| An fmoveout that results in an overflow is reported this way.
| Unfl:
| An fmoveout that results in an underflow is reported this way.
|
| Unimplemented Instruction Handler:
| Ovfl:
| Only scosh, setox, ssinh, stwotox, and scale can set overflow in
| this manner.
| Unfl:
| Stwotox, setox, and scale can set underflow in this manner.
| Any of the other Library Routines such that f(x)=x in which
| x is an extended denorm can report an underflow exception.
| It is the responsibility of the exception-causing exception
| to make sure that WBTEMP is correct.
|
| The exceptional operand is in FP_SCR1.
|
ovfl_unfl:
tstb UFLG_TMP(%a6) |test flag for unsupp/unimp state
beqs ofuf_con
|
| The caller was from an unsupported data type trap. Test if the
| caller set CU_ONLY. If so, the exceptional operand is expected in
| FPTEMP, rather than WBTEMP.
|
tstb CU_ONLY(%a6) |test if inst is cu-only
beq unsE3
| move.w #$fe,CU_SAVEPC(%a6)
clrb CU_SAVEPC(%a6)
bsetb #E1,E_BYTE(%a6) |set E1 exception flag
movew ETEMP_EX(%a6),FPTEMP_EX(%a6)
movel ETEMP_HI(%a6),FPTEMP_HI(%a6)
movel ETEMP_LO(%a6),FPTEMP_LO(%a6)
bsetb #fptemp15_bit,DTAG(%a6) |set fpte15
bclrb #UFLAG,T_BYTE(%a6) |clr U flag from unimp
bra do_clean |go clean and exit
 
ofuf_con:
moveb (%a7),VER_TMP(%a6) |save version number
cmpib #BUSY_SIZE-4,1(%a7) |check for busy frame
beqs busy_fr |if unimp, grow to busy
cmpib #VER_40,(%a7) |test for orig unimp frame
bnes try_41 |if not, test for rev frame
moveql #13,%d0 |need to zero 14 lwords
bras ofuf_fin
try_41:
cmpib #VER_41,(%a7) |test for rev unimp frame
bnel fpsp_fmt_error |if neither, exit with error
moveql #11,%d0 |need to zero 12 lwords
 
ofuf_fin:
clrl (%a7)
loop1:
clrl -(%a7) |clear and dec a7
dbra %d0,loop1
moveb VER_TMP(%a6),(%a7)
moveb #BUSY_SIZE-4,1(%a7) |write busy fmt word.
busy_fr:
movel FP_SCR1(%a6),WBTEMP_EX(%a6) |write
movel FP_SCR1+4(%a6),WBTEMP_HI(%a6) |exceptional op to
movel FP_SCR1+8(%a6),WBTEMP_LO(%a6) |wbtemp
bsetb #E3,E_BYTE(%a6) |set E3 flag
bclrb #E1,E_BYTE(%a6) |make sure E1 is clear
bclrb #UFLAG,T_BYTE(%a6) |clr U flag
movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
orl #sx_mask,E_BYTE(%a6)
movel CMDREG1B(%a6),%d0 |fix cmd1b to make it
andl #0x03c30000,%d0 |work for cmd3b
bfextu CMDREG1B(%a6){#13:#1},%d1 |extract bit 2
lsll #5,%d1
swap %d1
orl %d1,%d0 |put it in the right place
bfextu CMDREG1B(%a6){#10:#3},%d1 |extract bit 3,4,5
lsll #2,%d1
swap %d1
orl %d1,%d0 |put them in the right place
movel %d0,CMDREG3B(%a6) |in the busy frame
 
|
| Check if the frame to be restored is busy or unimp.
|** NOTE *** Bug fix for errata (0d43b #3)
| If the frame is unimp, we must create a busy frame to
| fix the bug with the nmnexc bits in cases in which they
| are set by a previous instruction and not cleared by
| the save. The frame will be unimp only if the final
| instruction in an emulation routine caused the exception
| by doing an fmove <ea>,fp0. The exception operand, in
| internal format, is in fptemp.
|
do_clean:
cmpib #UNIMP_40_SIZE-4,1(%a7)
bnes do_con
moveql #13,%d0 |in orig, need to zero 14 lwords
bras do_build
do_con:
cmpib #UNIMP_41_SIZE-4,1(%a7)
bnes do_restore |frame must be busy
moveql #11,%d0 |in rev, need to zero 12 lwords
 
do_build:
moveb (%a7),VER_TMP(%a6)
clrl (%a7)
loop2:
clrl -(%a7) |clear and dec a7
dbra %d0,loop2
|
| Use a1 as pointer into new frame. a6 is not correct if an unimp or
| busy frame was created as the result of an exception on the final
| instruction of an emulation routine.
|
| We need to set the nmcexc bits if the exception is E1. Otherwise,
| the exc taken will be inex2.
|
leal BUSY_SIZE+LOCAL_SIZE(%a7),%a1 |init a1 for new frame
moveb VER_TMP(%a6),(%a7) |write busy fmt word
moveb #BUSY_SIZE-4,1(%a7)
movel FP_SCR1(%a6),WBTEMP_EX(%a1) |write
movel FP_SCR1+4(%a6),WBTEMP_HI(%a1) |exceptional op to
movel FP_SCR1+8(%a6),WBTEMP_LO(%a1) |wbtemp
| btst.b #E1,E_BYTE(%a1)
| beq.b do_restore
bfextu USER_FPSR(%a6){#17:#4},%d0 |get snan/operr/ovfl/unfl bits
bfins %d0,NMCEXC(%a1){#4:#4} |and insert them in nmcexc
movel USER_FPSR(%a6),FPSR_SHADOW(%a1) |set exc bits
orl #sx_mask,E_BYTE(%a1)
do_restore:
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
frestore (%a7)+
tstb RES_FLG(%a6) |RES_FLG indicates a "continuation" frame
beq cont
bsr bug1384
cont:
unlk %a6
|
| If trace mode enabled, then go to trace handler. This handler
| cannot have any fp instructions. If there are fp inst's and an
| exception has been restored into the machine then the exception
| will occur upon execution of the fp inst. This is not desirable
| in the kernel (supervisor mode). See MC68040 manual Section 9.3.8.
|
finish_up:
btstb #7,(%a7) |test T1 in SR
bnes g_trace
btstb #6,(%a7) |test T0 in SR
bnes g_trace
bral fpsp_done
|
| Change integer stack to look like trace stack
| The address of the instruction that caused the
| exception is already in the integer stack (is
| the same as the saved friar)
|
| If the current frame is already a 6-word stack then all
| that needs to be done is to change the vector# to TRACE.
| If the frame is only a 4-word stack (meaning we got here
| on an Unsupported data type exception), then we need to grow
| the stack an extra 2 words and get the FPIAR from the FPU.
|
g_trace:
bftst EXC_VEC-4(%sp){#0:#4}
bne g_easy
 
subw #4,%sp | make room
movel 4(%sp),(%sp)
movel 8(%sp),4(%sp)
subw #BUSY_SIZE,%sp
fsave (%sp)
fmovel %fpiar,BUSY_SIZE+EXC_EA-4(%sp)
frestore (%sp)
addw #BUSY_SIZE,%sp
 
g_easy:
movew #TRACE_VEC,EXC_VEC-4(%a7)
bral real_trace
|
| This is a work-around for hardware bug 1384.
|
bug1384:
link %a5,#0
fsave -(%sp)
cmpib #0x41,(%sp) | check for correct frame
beq frame_41
bgt nofix | if more advanced mask, do nada
 
frame_40:
tstb 1(%sp) | check to see if idle
bne notidle
idle40:
clrl (%sp) | get rid of old fsave frame
movel %d1,USER_D1(%a6) | save d1
movew #8,%d1 | place unimp frame instead
loop40: clrl -(%sp)
dbra %d1,loop40
movel USER_D1(%a6),%d1 | restore d1
movel #0x40280000,-(%sp)
frestore (%sp)+
unlk %a5
rts
 
frame_41:
tstb 1(%sp) | check to see if idle
bne notidle
idle41:
clrl (%sp) | get rid of old fsave frame
movel %d1,USER_D1(%a6) | save d1
movew #10,%d1 | place unimp frame instead
loop41: clrl -(%sp)
dbra %d1,loop41
movel USER_D1(%a6),%d1 | restore d1
movel #0x41300000,-(%sp)
frestore (%sp)+
unlk %a5
rts
 
notidle:
bclrb #etemp15_bit,-40(%a5)
frestore (%sp)+
unlk %a5
rts
 
nofix:
frestore (%sp)+
unlk %a5
rts
 
|end
/x_unsupp.S
0,0 → 1,83
|
| x_unsupp.sa 3.3 7/1/91
|
| fpsp_unsupp --- FPSP handler for unsupported data type exception
|
| Trap vector #55 (See table 8-1 Mc68030 User's manual).
| Invoked when the user program encounters a data format (packed) that
| hardware does not support or a data type (denormalized numbers or un-
| normalized numbers).
| Normalizes denorms and unnorms, unpacks packed numbers then stores
| them back into the machine to let the 040 finish the operation.
|
| Unsupp calls two routines:
| 1. get_op - gets the operand(s)
| 2. res_func - restore the function back into the 040 or
| if fmove.p fpm,<ea> then pack source (fpm)
| and store in users memory <ea>.
|
| Input: Long fsave stack frame
|
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
X_UNSUPP: |idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
|xref get_op
|xref res_func
|xref gen_except
|xref fpsp_fmt_error
 
.global fpsp_unsupp
fpsp_unsupp:
|
link %a6,#-LOCAL_SIZE
fsave -(%a7)
moveml %d0-%d1/%a0-%a1,USER_DA(%a6)
fmovemx %fp0-%fp3,USER_FP0(%a6)
fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
 
 
moveb (%a7),VER_TMP(%a6) |save version number
moveb (%a7),%d0 |test for valid version num
andib #0xf0,%d0 |test for $4x
cmpib #VER_4,%d0 |must be $4x or exit
bnel fpsp_fmt_error
 
fmovel #0,%FPSR |clear all user status bits
fmovel #0,%FPCR |clear all user control bits
|
| The following lines are used to ensure that the FPSR
| exception byte and condition codes are clear before proceeding,
| except in the case of fmove, which leaves the cc's intact.
|
unsupp_con:
movel USER_FPSR(%a6),%d1
btst #5,CMDREG1B(%a6) |looking for fmove out
bne fmove_con
andl #0xFF00FF,%d1 |clear all but aexcs and qbyte
bras end_fix
fmove_con:
andl #0x0FFF40FF,%d1 |clear all but cc's, snan bit, aexcs, and qbyte
end_fix:
movel %d1,USER_FPSR(%a6)
 
st UFLG_TMP(%a6) |set flag for unsupp data
 
bsrl get_op |everything okay, go get operand(s)
bsrl res_func |fix up stack frame so can restore it
clrl -(%a7)
moveb VER_TMP(%a6),(%a7) |move idle fmt word to top of stack
bral gen_except
|
|end
/slogn.S
0,0 → 1,592
|
| slogn.sa 3.1 12/10/90
|
| slogn computes the natural logarithm of an
| input value. slognd does the same except the input value is a
| denormalized number. slognp1 computes log(1+X), and slognp1d
| computes log(1+X) for denormalized X.
|
| Input: Double-extended value in memory location pointed to by address
| register a0.
|
| Output: log(X) or log(1+X) returned in floating-point register Fp0.
|
| Accuracy and Monotonicity: The returned result is within 2 ulps in
| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
| result is subsequently rounded to double precision. The
| result is provably monotonic in double precision.
|
| Speed: The program slogn takes approximately 190 cycles for input
| argument X such that |X-1| >= 1/16, which is the usual
| situation. For those arguments, slognp1 takes approximately
| 210 cycles. For the less common arguments, the program will
| run no worse than 10% slower.
|
| Algorithm:
| LOGN:
| Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
| u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
|
| Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
| significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
| 2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
|
| Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
| log(1+u) = poly.
|
| Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
| by k*log(2) + (log(F) + poly). The values of log(F) are calculated
| beforehand and stored in the program.
|
| lognp1:
| Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
| u where u = 2X/(2+X). Otherwise, move on to Step 2.
|
| Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
| of the algorithm for LOGN and compute log(1+X) as
| k*log(2) + log(F) + poly where poly approximates log(1+u),
| u = (Y-F)/F.
|
| Implementation Notes:
| Note 1. There are 64 different possible values for F, thus 64 log(F)'s
| need to be tabulated. Moreover, the values of 1/F are also
| tabulated so that the division in (Y-F)/F can be performed by a
| multiplication.
|
| Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
| Y-F has to be calculated carefully when 1/2 <= X < 3/2.
|
| Note 3. To fully exploit the pipeline, polynomials are usually separated
| into two parts evaluated independently before being added up.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|slogn idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
BOUNDS1: .long 0x3FFEF07D,0x3FFF8841
BOUNDS2: .long 0x3FFE8000,0x3FFFC000
 
LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
 
one: .long 0x3F800000
zero: .long 0x00000000
infty: .long 0x7F800000
negone: .long 0xBF800000
 
LOGA6: .long 0x3FC2499A,0xB5E4040B
LOGA5: .long 0xBFC555B5,0x848CB7DB
 
LOGA4: .long 0x3FC99999,0x987D8730
LOGA3: .long 0xBFCFFFFF,0xFF6F7E97
 
LOGA2: .long 0x3FD55555,0x555555a4
LOGA1: .long 0xBFE00000,0x00000008
 
LOGB5: .long 0x3F175496,0xADD7DAD6
LOGB4: .long 0x3F3C71C2,0xFE80C7E0
 
LOGB3: .long 0x3F624924,0x928BCCFF
LOGB2: .long 0x3F899999,0x999995EC
 
LOGB1: .long 0x3FB55555,0x55555555
TWO: .long 0x40000000,0x00000000
 
LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000
 
LOGTBL:
.long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
.long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000
.long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
.long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
.long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
.long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
.long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
.long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
.long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
.long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
.long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
.long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
.long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
.long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
.long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
.long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
.long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
.long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
.long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
.long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
.long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
.long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
.long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
.long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
.long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
.long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
.long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
.long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
.long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
.long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
.long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
.long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
.long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
.long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
.long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
.long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
.long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
.long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
.long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
.long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
.long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
.long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
.long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
.long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
.long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
.long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
.long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
.long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
.long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
.long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
.long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
.long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
.long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
.long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
.long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
.long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
.long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
.long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
.long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
.long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
.long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
.long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
.long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
.long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
.long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
.long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
.long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
.long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
.long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
.long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
.long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
.long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
.long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
.long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
.long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
.long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
.long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
.long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
.long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
.long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
.long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
.long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
.long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
.long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
.long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
.long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000
.long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000
.long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
.long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
.long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
.long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000
.long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
.long 0x3FFE0000,0x94458094,0x45809446,0x00000000
.long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
.long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000
.long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
.long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
.long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
.long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
.long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
.long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
.long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
.long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
.long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
.long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
.long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
.long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
.long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
.long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
.long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
.long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
.long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
.long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
.long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
.long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
.long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
.long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
.long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
.long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
.long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
.long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
.long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
.long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
.long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
.long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
.long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
.long 0x3FFE0000,0x80808080,0x80808081,0x00000000
.long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
 
.set ADJK,L_SCR1
 
.set X,FP_SCR1
.set XDCARE,X+2
.set XFRAC,X+4
 
.set F,FP_SCR2
.set FFRAC,F+4
 
.set KLOG2,FP_SCR3
 
.set SAVEU,FP_SCR4
 
| xref t_frcinx
|xref t_extdnrm
|xref t_operr
|xref t_dz
 
.global slognd
slognd:
|--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
 
movel #-100,ADJK(%a6) | ...INPUT = 2^(ADJK) * FP0
 
|----normalize the input value by left shifting k bits (k to be determined
|----below), adjusting exponent and storing -k to ADJK
|----the value TWOTO100 is no longer needed.
|----Note that this code assumes the denormalized input is NON-ZERO.
 
moveml %d2-%d7,-(%a7) | ...save some registers
movel #0x00000000,%d3 | ...D3 is exponent of smallest norm. #
movel 4(%a0),%d4
movel 8(%a0),%d5 | ...(D4,D5) is (Hi_X,Lo_X)
clrl %d2 | ...D2 used for holding K
 
tstl %d4
bnes HiX_not0
 
HiX_0:
movel %d5,%d4
clrl %d5
movel #32,%d2
clrl %d6
bfffo %d4{#0:#32},%d6
lsll %d6,%d4
addl %d6,%d2 | ...(D3,D4,D5) is normalized
 
movel %d3,X(%a6)
movel %d4,XFRAC(%a6)
movel %d5,XFRAC+4(%a6)
negl %d2
movel %d2,ADJK(%a6)
fmovex X(%a6),%fp0
moveml (%a7)+,%d2-%d7 | ...restore registers
lea X(%a6),%a0
bras LOGBGN | ...begin regular log(X)
 
 
HiX_not0:
clrl %d6
bfffo %d4{#0:#32},%d6 | ...find first 1
movel %d6,%d2 | ...get k
lsll %d6,%d4
movel %d5,%d7 | ...a copy of D5
lsll %d6,%d5
negl %d6
addil #32,%d6
lsrl %d6,%d7
orl %d7,%d4 | ...(D3,D4,D5) normalized
 
movel %d3,X(%a6)
movel %d4,XFRAC(%a6)
movel %d5,XFRAC+4(%a6)
negl %d2
movel %d2,ADJK(%a6)
fmovex X(%a6),%fp0
moveml (%a7)+,%d2-%d7 | ...restore registers
lea X(%a6),%a0
bras LOGBGN | ...begin regular log(X)
 
 
.global slogn
slogn:
|--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
 
fmovex (%a0),%fp0 | ...LOAD INPUT
movel #0x00000000,ADJK(%a6)
 
LOGBGN:
|--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
|--A FINITE, NON-ZERO, NORMALIZED NUMBER.
 
movel (%a0),%d0
movew 4(%a0),%d0
 
movel (%a0),X(%a6)
movel 4(%a0),X+4(%a6)
movel 8(%a0),X+8(%a6)
 
cmpil #0,%d0 | ...CHECK IF X IS NEGATIVE
blt LOGNEG | ...LOG OF NEGATIVE ARGUMENT IS INVALID
cmp2l BOUNDS1,%d0 | ...X IS POSITIVE, CHECK IF X IS NEAR 1
bcc LOGNEAR1 | ...BOUNDS IS ROUGHLY [15/16, 17/16]
 
LOGMAIN:
|--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
 
|--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
|--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
|--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
|-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
|--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
|--LOG(1+U) CAN BE VERY EFFICIENT.
|--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
|--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
 
|--GET K, Y, F, AND ADDRESS OF 1/F.
asrl #8,%d0
asrl #8,%d0 | ...SHIFTED 16 BITS, BIASED EXPO. OF X
subil #0x3FFF,%d0 | ...THIS IS K
addl ADJK(%a6),%d0 | ...ADJUST K, ORIGINAL INPUT MAY BE DENORM.
lea LOGTBL,%a0 | ...BASE ADDRESS OF 1/F AND LOG(F)
fmovel %d0,%fp1 | ...CONVERT K TO FLOATING-POINT FORMAT
 
|--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
movel #0x3FFF0000,X(%a6) | ...X IS NOW Y, I.E. 2^(-K)*X
movel XFRAC(%a6),FFRAC(%a6)
andil #0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y
oril #0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT
movel FFRAC(%a6),%d0 | ...READY TO GET ADDRESS OF 1/F
andil #0x7E000000,%d0
asrl #8,%d0
asrl #8,%d0
asrl #4,%d0 | ...SHIFTED 20, D0 IS THE DISPLACEMENT
addal %d0,%a0 | ...A0 IS THE ADDRESS FOR 1/F
 
fmovex X(%a6),%fp0
movel #0x3fff0000,F(%a6)
clrl F+8(%a6)
fsubx F(%a6),%fp0 | ...Y-F
fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 WHILE FP0 IS NOT READY
|--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
|--REGISTERS SAVED: FPCR, FP1, FP2
 
LP1CONT1:
|--AN RE-ENTRY POINT FOR LOGNP1
fmulx (%a0),%fp0 | ...FP0 IS U = (Y-F)/F
fmulx LOGOF2,%fp1 | ...GET K*LOG2 WHILE FP0 IS NOT READY
fmovex %fp0,%fp2
fmulx %fp2,%fp2 | ...FP2 IS V=U*U
fmovex %fp1,KLOG2(%a6) | ...PUT K*LOG2 IN MEMORY, FREE FP1
 
|--LOG(1+U) IS APPROXIMATED BY
|--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
|--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))]
 
fmovex %fp2,%fp3
fmovex %fp2,%fp1
 
fmuld LOGA6,%fp1 | ...V*A6
fmuld LOGA5,%fp2 | ...V*A5
 
faddd LOGA4,%fp1 | ...A4+V*A6
faddd LOGA3,%fp2 | ...A3+V*A5
 
fmulx %fp3,%fp1 | ...V*(A4+V*A6)
fmulx %fp3,%fp2 | ...V*(A3+V*A5)
 
faddd LOGA2,%fp1 | ...A2+V*(A4+V*A6)
faddd LOGA1,%fp2 | ...A1+V*(A3+V*A5)
 
fmulx %fp3,%fp1 | ...V*(A2+V*(A4+V*A6))
addal #16,%a0 | ...ADDRESS OF LOG(F)
fmulx %fp3,%fp2 | ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
 
fmulx %fp0,%fp1 | ...U*V*(A2+V*(A4+V*A6))
faddx %fp2,%fp0 | ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
 
faddx (%a0),%fp1 | ...LOG(F)+U*V*(A2+V*(A4+V*A6))
fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...RESTORE FP2
faddx %fp1,%fp0 | ...FP0 IS LOG(F) + LOG(1+U)
 
fmovel %d1,%fpcr
faddx KLOG2(%a6),%fp0 | ...FINAL ADD
bra t_frcinx
 
 
LOGNEAR1:
|--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
fmovex %fp0,%fp1
fsubs one,%fp1 | ...FP1 IS X-1
fadds one,%fp0 | ...FP0 IS X+1
faddx %fp1,%fp1 | ...FP1 IS 2(X-1)
|--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
|--IN U, U = 2(X-1)/(X+1) = FP1/FP0
 
LP1CONT2:
|--THIS IS AN RE-ENTRY POINT FOR LOGNP1
fdivx %fp0,%fp1 | ...FP1 IS U
fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2
|--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
|--LET V=U*U, W=V*V, CALCULATE
|--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
|--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] )
fmovex %fp1,%fp0
fmulx %fp0,%fp0 | ...FP0 IS V
fmovex %fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1
fmovex %fp0,%fp1
fmulx %fp1,%fp1 | ...FP1 IS W
 
fmoved LOGB5,%fp3
fmoved LOGB4,%fp2
 
fmulx %fp1,%fp3 | ...W*B5
fmulx %fp1,%fp2 | ...W*B4
 
faddd LOGB3,%fp3 | ...B3+W*B5
faddd LOGB2,%fp2 | ...B2+W*B4
 
fmulx %fp3,%fp1 | ...W*(B3+W*B5), FP3 RELEASED
 
fmulx %fp0,%fp2 | ...V*(B2+W*B4)
 
faddd LOGB1,%fp1 | ...B1+W*(B3+W*B5)
fmulx SAVEU(%a6),%fp0 | ...FP0 IS U*V
 
faddx %fp2,%fp1 | ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED
 
fmulx %fp1,%fp0 | ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
 
fmovel %d1,%fpcr
faddx SAVEU(%a6),%fp0
bra t_frcinx
rts
 
LOGNEG:
|--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
bra t_operr
 
.global slognp1d
slognp1d:
|--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
| Simply return the denorm
 
bra t_extdnrm
 
.global slognp1
slognp1:
|--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
 
fmovex (%a0),%fp0 | ...LOAD INPUT
fabsx %fp0 |test magnitude
fcmpx LTHOLD,%fp0 |compare with min threshold
fbgt LP1REAL |if greater, continue
fmovel #0,%fpsr |clr N flag from compare
fmovel %d1,%fpcr
fmovex (%a0),%fp0 |return signed argument
bra t_frcinx
 
LP1REAL:
fmovex (%a0),%fp0 | ...LOAD INPUT
movel #0x00000000,ADJK(%a6)
fmovex %fp0,%fp1 | ...FP1 IS INPUT Z
fadds one,%fp0 | ...X := ROUND(1+Z)
fmovex %fp0,X(%a6)
movew XFRAC(%a6),XDCARE(%a6)
movel X(%a6),%d0
cmpil #0,%d0
ble LP1NEG0 | ...LOG OF ZERO OR -VE
cmp2l BOUNDS2,%d0
bcs LOGMAIN | ...BOUNDS2 IS [1/2,3/2]
|--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
|--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
|--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
 
LP1NEAR1:
|--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
cmp2l BOUNDS1,%d0
bcss LP1CARE
 
LP1ONE16:
|--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
|--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
faddx %fp1,%fp1 | ...FP1 IS 2Z
fadds one,%fp0 | ...FP0 IS 1+X
|--U = FP1/FP0
bra LP1CONT2
 
LP1CARE:
|--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
|--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
|--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
|--THERE ARE ONLY TWO CASES.
|--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
|--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z
|--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
|--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
 
movel XFRAC(%a6),FFRAC(%a6)
andil #0xFE000000,FFRAC(%a6)
oril #0x01000000,FFRAC(%a6) | ...F OBTAINED
cmpil #0x3FFF8000,%d0 | ...SEE IF 1+Z > 1
bges KISZERO
 
KISNEG1:
fmoves TWO,%fp0
movel #0x3fff0000,F(%a6)
clrl F+8(%a6)
fsubx F(%a6),%fp0 | ...2-F
movel FFRAC(%a6),%d0
andil #0x7E000000,%d0
asrl #8,%d0
asrl #8,%d0
asrl #4,%d0 | ...D0 CONTAINS DISPLACEMENT FOR 1/F
faddx %fp1,%fp1 | ...GET 2Z
fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2
faddx %fp1,%fp0 | ...FP0 IS Y-F = (2-F)+2Z
lea LOGTBL,%a0 | ...A0 IS ADDRESS OF 1/F
addal %d0,%a0
fmoves negone,%fp1 | ...FP1 IS K = -1
bra LP1CONT1
 
KISZERO:
fmoves one,%fp0
movel #0x3fff0000,F(%a6)
clrl F+8(%a6)
fsubx F(%a6),%fp0 | ...1-F
movel FFRAC(%a6),%d0
andil #0x7E000000,%d0
asrl #8,%d0
asrl #8,%d0
asrl #4,%d0
faddx %fp1,%fp0 | ...FP0 IS Y-F
fmovemx %fp2-%fp2/%fp3,-(%sp) | ...FP2 SAVED
lea LOGTBL,%a0
addal %d0,%a0 | ...A0 IS ADDRESS OF 1/F
fmoves zero,%fp1 | ...FP1 IS K = 0
bra LP1CONT1
 
LP1NEG0:
|--FPCR SAVED. D0 IS X IN COMPACT FORM.
cmpil #0,%d0
blts LP1NEG
LP1ZERO:
fmoves negone,%fp0
 
fmovel %d1,%fpcr
bra t_dz
 
LP1NEG:
fmoves zero,%fp0
 
fmovel %d1,%fpcr
bra t_operr
 
|end
/ssinh.S
0,0 → 1,135
|
| ssinh.sa 3.1 12/10/90
|
| The entry point sSinh computes the hyperbolic sine of
| an input argument; sSinhd does the same except for denormalized
| input.
|
| Input: Double-extended number X in location pointed to
| by address register a0.
|
| Output: The value sinh(X) returned in floating-point register Fp0.
|
| Accuracy and Monotonicity: The returned result is within 3 ulps in
| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
| result is subsequently rounded to double precision. The
| result is provably monotonic in double precision.
|
| Speed: The program sSINH takes approximately 280 cycles.
|
| Algorithm:
|
| SINH
| 1. If |X| > 16380 log2, go to 3.
|
| 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formulae
| y = |X|, sgn = sign(X), and z = expm1(Y),
| sinh(X) = sgn*(1/2)*( z + z/(1+z) ).
| Exit.
|
| 3. If |X| > 16480 log2, go to 5.
|
| 4. (16380 log2 < |X| <= 16480 log2)
| sinh(X) = sign(X) * exp(|X|)/2.
| However, invoking exp(|X|) may cause premature overflow.
| Thus, we calculate sinh(X) as follows:
| Y := |X|
| sgn := sign(X)
| sgnFact := sgn * 2**(16380)
| Y' := Y - 16381 log2
| sinh(X) := sgnFact * exp(Y').
| Exit.
|
| 5. (|X| > 16480 log2) sinh(X) must overflow. Return
| sign(X)*Huge*Huge to generate overflow and an infinity with
| the appropriate sign. Huge is the largest finite number in
| extended format. Exit.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|SSINH idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
T1: .long 0x40C62D38,0xD3D64634 | ... 16381 LOG2 LEAD
T2: .long 0x3D6F90AE,0xB1E75CC7 | ... 16381 LOG2 TRAIL
 
|xref t_frcinx
|xref t_ovfl
|xref t_extdnrm
|xref setox
|xref setoxm1
 
.global ssinhd
ssinhd:
|--SINH(X) = X FOR DENORMALIZED X
 
bra t_extdnrm
 
.global ssinh
ssinh:
fmovex (%a0),%fp0 | ...LOAD INPUT
 
movel (%a0),%d0
movew 4(%a0),%d0
movel %d0,%a1 | save a copy of original (compacted) operand
andl #0x7FFFFFFF,%d0
cmpl #0x400CB167,%d0
bgts SINHBIG
 
|--THIS IS THE USUAL CASE, |X| < 16380 LOG2
|--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
 
fabsx %fp0 | ...Y = |X|
 
moveml %a1/%d1,-(%sp)
fmovemx %fp0-%fp0,(%a0)
clrl %d1
bsr setoxm1 | ...FP0 IS Z = EXPM1(Y)
fmovel #0,%fpcr
moveml (%sp)+,%a1/%d1
 
fmovex %fp0,%fp1
fadds #0x3F800000,%fp1 | ...1+Z
fmovex %fp0,-(%sp)
fdivx %fp1,%fp0 | ...Z/(1+Z)
movel %a1,%d0
andl #0x80000000,%d0
orl #0x3F000000,%d0
faddx (%sp)+,%fp0
movel %d0,-(%sp)
 
fmovel %d1,%fpcr
fmuls (%sp)+,%fp0 |last fp inst - possible exceptions set
 
bra t_frcinx
 
SINHBIG:
cmpl #0x400CB2B3,%d0
bgt t_ovfl
fabsx %fp0
fsubd T1(%pc),%fp0 | ...(|X|-16381LOG2_LEAD)
movel #0,-(%sp)
movel #0x80000000,-(%sp)
movel %a1,%d0
andl #0x80000000,%d0
orl #0x7FFB0000,%d0
movel %d0,-(%sp) | ...EXTENDED FMT
fsubd T2(%pc),%fp0 | ...|X| - 16381 LOG2, ACCURATE
 
movel %d1,-(%sp)
clrl %d1
fmovemx %fp0-%fp0,(%a0)
bsr setox
fmovel (%sp)+,%fpcr
 
fmulx (%sp)+,%fp0 |possible exception
bra t_frcinx
 
|end
/bindec.S
0,0 → 1,920
|
| bindec.sa 3.4 1/3/91
|
| bindec
|
| Description:
| Converts an input in extended precision format
| to bcd format.
|
| Input:
| a0 points to the input extended precision value
| value in memory; d0 contains the k-factor sign-extended
| to 32-bits. The input may be either normalized,
| unnormalized, or denormalized.
|
| Output: result in the FP_SCR1 space on the stack.
|
| Saves and Modifies: D2-D7,A2,FP2
|
| Algorithm:
|
| A1. Set RM and size ext; Set SIGMA = sign of input.
| The k-factor is saved for use in d7. Clear the
| BINDEC_FLG for separating normalized/denormalized
| input. If input is unnormalized or denormalized,
| normalize it.
|
| A2. Set X = abs(input).
|
| A3. Compute ILOG.
| ILOG is the log base 10 of the input value. It is
| approximated by adding e + 0.f when the original
| value is viewed as 2^^e * 1.f in extended precision.
| This value is stored in d6.
|
| A4. Clr INEX bit.
| The operation in A3 above may have set INEX2.
|
| A5. Set ICTR = 0;
| ICTR is a flag used in A13. It must be set before the
| loop entry A6.
|
| A6. Calculate LEN.
| LEN is the number of digits to be displayed. The
| k-factor can dictate either the total number of digits,
| if it is a positive number, or the number of digits
| after the decimal point which are to be included as
| significant. See the 68882 manual for examples.
| If LEN is computed to be greater than 17, set OPERR in
| USER_FPSR. LEN is stored in d4.
|
| A7. Calculate SCALE.
| SCALE is equal to 10^ISCALE, where ISCALE is the number
| of decimal places needed to insure LEN integer digits
| in the output before conversion to bcd. LAMBDA is the
| sign of ISCALE, used in A9. Fp1 contains
| 10^^(abs(ISCALE)) using a rounding mode which is a
| function of the original rounding mode and the signs
| of ISCALE and X. A table is given in the code.
|
| A8. Clr INEX; Force RZ.
| The operation in A3 above may have set INEX2.
| RZ mode is forced for the scaling operation to insure
| only one rounding error. The grs bits are collected in
| the INEX flag for use in A10.
|
| A9. Scale X -> Y.
| The mantissa is scaled to the desired number of
| significant digits. The excess digits are collected
| in INEX2.
|
| A10. Or in INEX.
| If INEX is set, round error occurred. This is
| compensated for by 'or-ing' in the INEX2 flag to
| the lsb of Y.
|
| A11. Restore original FPCR; set size ext.
| Perform FINT operation in the user's rounding mode.
| Keep the size to extended.
|
| A12. Calculate YINT = FINT(Y) according to user's rounding
| mode. The FPSP routine sintd0 is used. The output
| is in fp0.
|
| A13. Check for LEN digits.
| If the int operation results in more than LEN digits,
| or less than LEN -1 digits, adjust ILOG and repeat from
| A6. This test occurs only on the first pass. If the
| result is exactly 10^LEN, decrement ILOG and divide
| the mantissa by 10.
|
| A14. Convert the mantissa to bcd.
| The binstr routine is used to convert the LEN digit
| mantissa to bcd in memory. The input to binstr is
| to be a fraction; i.e. (mantissa)/10^LEN and adjusted
| such that the decimal point is to the left of bit 63.
| The bcd digits are stored in the correct position in
| the final string area in memory.
|
| A15. Convert the exponent to bcd.
| As in A14 above, the exp is converted to bcd and the
| digits are stored in the final string.
| Test the length of the final exponent string. If the
| length is 4, set operr.
|
| A16. Write sign bits to final string.
|
| Implementation Notes:
|
| The registers are used as follows:
|
| d0: scratch; LEN input to binstr
| d1: scratch
| d2: upper 32-bits of mantissa for binstr
| d3: scratch;lower 32-bits of mantissa for binstr
| d4: LEN
| d5: LAMBDA/ICTR
| d6: ILOG
| d7: k-factor
| a0: ptr for original operand/final result
| a1: scratch pointer
| a2: pointer to FP_X; abs(original value) in ext
| fp0: scratch
| fp1: scratch
| fp2: scratch
| F_SCR1:
| F_SCR2:
| L_SCR1:
| L_SCR2:
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|BINDEC idnt 2,1 | Motorola 040 Floating Point Software Package
 
.include "fpsp.h"
 
|section 8
 
| Constants in extended precision
LOG2: .long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
LOG2UP1: .long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
 
| Constants in single precision
FONE: .long 0x3F800000,0x00000000,0x00000000,0x00000000
FTWO: .long 0x40000000,0x00000000,0x00000000,0x00000000
FTEN: .long 0x41200000,0x00000000,0x00000000,0x00000000
F4933: .long 0x459A2800,0x00000000,0x00000000,0x00000000
 
RBDTBL: .byte 0,0,0,0
.byte 3,3,2,2
.byte 3,2,2,3
.byte 2,3,3,2
 
|xref binstr
|xref sintdo
|xref ptenrn,ptenrm,ptenrp
 
.global bindec
.global sc_mul
bindec:
moveml %d2-%d7/%a2,-(%a7)
fmovemx %fp0-%fp2,-(%a7)
 
| A1. Set RM and size ext. Set SIGMA = sign input;
| The k-factor is saved for use in d7. Clear BINDEC_FLG for
| separating normalized/denormalized input. If the input
| is a denormalized number, set the BINDEC_FLG memory word
| to signal denorm. If the input is unnormalized, normalize
| the input and test for denormalized result.
|
fmovel #rm_mode,%FPCR |set RM and ext
movel (%a0),L_SCR2(%a6) |save exponent for sign check
movel %d0,%d7 |move k-factor to d7
clrb BINDEC_FLG(%a6) |clr norm/denorm flag
movew STAG(%a6),%d0 |get stag
andiw #0xe000,%d0 |isolate stag bits
beq A2_str |if zero, input is norm
|
| Normalize the denorm
|
un_de_norm:
movew (%a0),%d0
andiw #0x7fff,%d0 |strip sign of normalized exp
movel 4(%a0),%d1
movel 8(%a0),%d2
norm_loop:
subw #1,%d0
lsll #1,%d2
roxll #1,%d1
tstl %d1
bges norm_loop
|
| Test if the normalized input is denormalized
|
tstw %d0
bgts pos_exp |if greater than zero, it is a norm
st BINDEC_FLG(%a6) |set flag for denorm
pos_exp:
andiw #0x7fff,%d0 |strip sign of normalized exp
movew %d0,(%a0)
movel %d1,4(%a0)
movel %d2,8(%a0)
 
| A2. Set X = abs(input).
|
A2_str:
movel (%a0),FP_SCR2(%a6) | move input to work space
movel 4(%a0),FP_SCR2+4(%a6) | move input to work space
movel 8(%a0),FP_SCR2+8(%a6) | move input to work space
andil #0x7fffffff,FP_SCR2(%a6) |create abs(X)
 
| A3. Compute ILOG.
| ILOG is the log base 10 of the input value. It is approx-
| imated by adding e + 0.f when the original value is viewed
| as 2^^e * 1.f in extended precision. This value is stored
| in d6.
|
| Register usage:
| Input/Output
| d0: k-factor/exponent
| d2: x/x
| d3: x/x
| d4: x/x
| d5: x/x
| d6: x/ILOG
| d7: k-factor/Unchanged
| a0: ptr for original operand/final result
| a1: x/x
| a2: x/x
| fp0: x/float(ILOG)
| fp1: x/x
| fp2: x/x
| F_SCR1:x/x
| F_SCR2:Abs(X)/Abs(X) with $3fff exponent
| L_SCR1:x/x
| L_SCR2:first word of X packed/Unchanged
 
tstb BINDEC_FLG(%a6) |check for denorm
beqs A3_cont |if clr, continue with norm
movel #-4933,%d6 |force ILOG = -4933
bras A4_str
A3_cont:
movew FP_SCR2(%a6),%d0 |move exp to d0
movew #0x3fff,FP_SCR2(%a6) |replace exponent with 0x3fff
fmovex FP_SCR2(%a6),%fp0 |now fp0 has 1.f
subw #0x3fff,%d0 |strip off bias
faddw %d0,%fp0 |add in exp
fsubs FONE,%fp0 |subtract off 1.0
fbge pos_res |if pos, branch
fmulx LOG2UP1,%fp0 |if neg, mul by LOG2UP1
fmovel %fp0,%d6 |put ILOG in d6 as a lword
bras A4_str |go move out ILOG
pos_res:
fmulx LOG2,%fp0 |if pos, mul by LOG2
fmovel %fp0,%d6 |put ILOG in d6 as a lword
 
 
| A4. Clr INEX bit.
| The operation in A3 above may have set INEX2.
 
A4_str:
fmovel #0,%FPSR |zero all of fpsr - nothing needed
 
 
| A5. Set ICTR = 0;
| ICTR is a flag used in A13. It must be set before the
| loop entry A6. The lower word of d5 is used for ICTR.
 
clrw %d5 |clear ICTR
 
 
| A6. Calculate LEN.
| LEN is the number of digits to be displayed. The k-factor
| can dictate either the total number of digits, if it is
| a positive number, or the number of digits after the
| original decimal point which are to be included as
| significant. See the 68882 manual for examples.
| If LEN is computed to be greater than 17, set OPERR in
| USER_FPSR. LEN is stored in d4.
|
| Register usage:
| Input/Output
| d0: exponent/Unchanged
| d2: x/x/scratch
| d3: x/x
| d4: exc picture/LEN
| d5: ICTR/Unchanged
| d6: ILOG/Unchanged
| d7: k-factor/Unchanged
| a0: ptr for original operand/final result
| a1: x/x
| a2: x/x
| fp0: float(ILOG)/Unchanged
| fp1: x/x
| fp2: x/x
| F_SCR1:x/x
| F_SCR2:Abs(X) with $3fff exponent/Unchanged
| L_SCR1:x/x
| L_SCR2:first word of X packed/Unchanged
 
A6_str:
tstl %d7 |branch on sign of k
bles k_neg |if k <= 0, LEN = ILOG + 1 - k
movel %d7,%d4 |if k > 0, LEN = k
bras len_ck |skip to LEN check
k_neg:
movel %d6,%d4 |first load ILOG to d4
subl %d7,%d4 |subtract off k
addql #1,%d4 |add in the 1
len_ck:
tstl %d4 |LEN check: branch on sign of LEN
bles LEN_ng |if neg, set LEN = 1
cmpl #17,%d4 |test if LEN > 17
bles A7_str |if not, forget it
movel #17,%d4 |set max LEN = 17
tstl %d7 |if negative, never set OPERR
bles A7_str |if positive, continue
orl #opaop_mask,USER_FPSR(%a6) |set OPERR & AIOP in USER_FPSR
bras A7_str |finished here
LEN_ng:
moveql #1,%d4 |min LEN is 1
 
 
| A7. Calculate SCALE.
| SCALE is equal to 10^ISCALE, where ISCALE is the number
| of decimal places needed to insure LEN integer digits
| in the output before conversion to bcd. LAMBDA is the sign
| of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
| the rounding mode as given in the following table (see
| Coonen, p. 7.23 as ref.; however, the SCALE variable is
| of opposite sign in bindec.sa from Coonen).
|
| Initial USE
| FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
| ----------------------------------------------
| RN 00 0 0 00/0 RN
| RN 00 0 1 00/0 RN
| RN 00 1 0 00/0 RN
| RN 00 1 1 00/0 RN
| RZ 01 0 0 11/3 RP
| RZ 01 0 1 11/3 RP
| RZ 01 1 0 10/2 RM
| RZ 01 1 1 10/2 RM
| RM 10 0 0 11/3 RP
| RM 10 0 1 10/2 RM
| RM 10 1 0 10/2 RM
| RM 10 1 1 11/3 RP
| RP 11 0 0 10/2 RM
| RP 11 0 1 11/3 RP
| RP 11 1 0 11/3 RP
| RP 11 1 1 10/2 RM
|
| Register usage:
| Input/Output
| d0: exponent/scratch - final is 0
| d2: x/0 or 24 for A9
| d3: x/scratch - offset ptr into PTENRM array
| d4: LEN/Unchanged
| d5: 0/ICTR:LAMBDA
| d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
| d7: k-factor/Unchanged
| a0: ptr for original operand/final result
| a1: x/ptr to PTENRM array
| a2: x/x
| fp0: float(ILOG)/Unchanged
| fp1: x/10^ISCALE
| fp2: x/x
| F_SCR1:x/x
| F_SCR2:Abs(X) with $3fff exponent/Unchanged
| L_SCR1:x/x
| L_SCR2:first word of X packed/Unchanged
 
A7_str:
tstl %d7 |test sign of k
bgts k_pos |if pos and > 0, skip this
cmpl %d6,%d7 |test k - ILOG
blts k_pos |if ILOG >= k, skip this
movel %d7,%d6 |if ((k<0) & (ILOG < k)) ILOG = k
k_pos:
movel %d6,%d0 |calc ILOG + 1 - LEN in d0
addql #1,%d0 |add the 1
subl %d4,%d0 |sub off LEN
swap %d5 |use upper word of d5 for LAMBDA
clrw %d5 |set it zero initially
clrw %d2 |set up d2 for very small case
tstl %d0 |test sign of ISCALE
bges iscale |if pos, skip next inst
addqw #1,%d5 |if neg, set LAMBDA true
cmpl #0xffffecd4,%d0 |test iscale <= -4908
bgts no_inf |if false, skip rest
addil #24,%d0 |add in 24 to iscale
movel #24,%d2 |put 24 in d2 for A9
no_inf:
negl %d0 |and take abs of ISCALE
iscale:
fmoves FONE,%fp1 |init fp1 to 1
bfextu USER_FPCR(%a6){#26:#2},%d1 |get initial rmode bits
lslw #1,%d1 |put them in bits 2:1
addw %d5,%d1 |add in LAMBDA
lslw #1,%d1 |put them in bits 3:1
tstl L_SCR2(%a6) |test sign of original x
bges x_pos |if pos, don't set bit 0
addql #1,%d1 |if neg, set bit 0
x_pos:
leal RBDTBL,%a2 |load rbdtbl base
moveb (%a2,%d1),%d3 |load d3 with new rmode
lsll #4,%d3 |put bits in proper position
fmovel %d3,%fpcr |load bits into fpu
lsrl #4,%d3 |put bits in proper position
tstb %d3 |decode new rmode for pten table
bnes not_rn |if zero, it is RN
leal PTENRN,%a1 |load a1 with RN table base
bras rmode |exit decode
not_rn:
lsrb #1,%d3 |get lsb in carry
bccs not_rp |if carry clear, it is RM
leal PTENRP,%a1 |load a1 with RP table base
bras rmode |exit decode
not_rp:
leal PTENRM,%a1 |load a1 with RM table base
rmode:
clrl %d3 |clr table index
e_loop:
lsrl #1,%d0 |shift next bit into carry
bccs e_next |if zero, skip the mul
fmulx (%a1,%d3),%fp1 |mul by 10**(d3_bit_no)
e_next:
addl #12,%d3 |inc d3 to next pwrten table entry
tstl %d0 |test if ISCALE is zero
bnes e_loop |if not, loop
 
 
| A8. Clr INEX; Force RZ.
| The operation in A3 above may have set INEX2.
| RZ mode is forced for the scaling operation to insure
| only one rounding error. The grs bits are collected in
| the INEX flag for use in A10.
|
| Register usage:
| Input/Output
 
fmovel #0,%FPSR |clr INEX
fmovel #rz_mode,%FPCR |set RZ rounding mode
 
 
| A9. Scale X -> Y.
| The mantissa is scaled to the desired number of significant
| digits. The excess digits are collected in INEX2. If mul,
| Check d2 for excess 10 exponential value. If not zero,
| the iscale value would have caused the pwrten calculation
| to overflow. Only a negative iscale can cause this, so
| multiply by 10^(d2), which is now only allowed to be 24,
| with a multiply by 10^8 and 10^16, which is exact since
| 10^24 is exact. If the input was denormalized, we must
| create a busy stack frame with the mul command and the
| two operands, and allow the fpu to complete the multiply.
|
| Register usage:
| Input/Output
| d0: FPCR with RZ mode/Unchanged
| d2: 0 or 24/unchanged
| d3: x/x
| d4: LEN/Unchanged
| d5: ICTR:LAMBDA
| d6: ILOG/Unchanged
| d7: k-factor/Unchanged
| a0: ptr for original operand/final result
| a1: ptr to PTENRM array/Unchanged
| a2: x/x
| fp0: float(ILOG)/X adjusted for SCALE (Y)
| fp1: 10^ISCALE/Unchanged
| fp2: x/x
| F_SCR1:x/x
| F_SCR2:Abs(X) with $3fff exponent/Unchanged
| L_SCR1:x/x
| L_SCR2:first word of X packed/Unchanged
 
A9_str:
fmovex (%a0),%fp0 |load X from memory
fabsx %fp0 |use abs(X)
tstw %d5 |LAMBDA is in lower word of d5
bnes sc_mul |if neg (LAMBDA = 1), scale by mul
fdivx %fp1,%fp0 |calculate X / SCALE -> Y to fp0
bras A10_st |branch to A10
 
sc_mul:
tstb BINDEC_FLG(%a6) |check for denorm
beqs A9_norm |if norm, continue with mul
fmovemx %fp1-%fp1,-(%a7) |load ETEMP with 10^ISCALE
movel 8(%a0),-(%a7) |load FPTEMP with input arg
movel 4(%a0),-(%a7)
movel (%a0),-(%a7)
movel #18,%d3 |load count for busy stack
A9_loop:
clrl -(%a7) |clear lword on stack
dbf %d3,A9_loop
moveb VER_TMP(%a6),(%a7) |write current version number
moveb #BUSY_SIZE-4,1(%a7) |write current busy size
moveb #0x10,0x44(%a7) |set fcefpte[15] bit
movew #0x0023,0x40(%a7) |load cmdreg1b with mul command
moveb #0xfe,0x8(%a7) |load all 1s to cu savepc
frestore (%a7)+ |restore frame to fpu for completion
fmulx 36(%a1),%fp0 |multiply fp0 by 10^8
fmulx 48(%a1),%fp0 |multiply fp0 by 10^16
bras A10_st
A9_norm:
tstw %d2 |test for small exp case
beqs A9_con |if zero, continue as normal
fmulx 36(%a1),%fp0 |multiply fp0 by 10^8
fmulx 48(%a1),%fp0 |multiply fp0 by 10^16
A9_con:
fmulx %fp1,%fp0 |calculate X * SCALE -> Y to fp0
 
 
| A10. Or in INEX.
| If INEX is set, round error occurred. This is compensated
| for by 'or-ing' in the INEX2 flag to the lsb of Y.
|
| Register usage:
| Input/Output
| d0: FPCR with RZ mode/FPSR with INEX2 isolated
| d2: x/x
| d3: x/x
| d4: LEN/Unchanged
| d5: ICTR:LAMBDA
| d6: ILOG/Unchanged
| d7: k-factor/Unchanged
| a0: ptr for original operand/final result
| a1: ptr to PTENxx array/Unchanged
| a2: x/ptr to FP_SCR2(a6)
| fp0: Y/Y with lsb adjusted
| fp1: 10^ISCALE/Unchanged
| fp2: x/x
 
A10_st:
fmovel %FPSR,%d0 |get FPSR
fmovex %fp0,FP_SCR2(%a6) |move Y to memory
leal FP_SCR2(%a6),%a2 |load a2 with ptr to FP_SCR2
btstl #9,%d0 |check if INEX2 set
beqs A11_st |if clear, skip rest
oril #1,8(%a2) |or in 1 to lsb of mantissa
fmovex FP_SCR2(%a6),%fp0 |write adjusted Y back to fpu
 
 
| A11. Restore original FPCR; set size ext.
| Perform FINT operation in the user's rounding mode. Keep
| the size to extended. The sintdo entry point in the sint
| routine expects the FPCR value to be in USER_FPCR for
| mode and precision. The original FPCR is saved in L_SCR1.
 
A11_st:
movel USER_FPCR(%a6),L_SCR1(%a6) |save it for later
andil #0x00000030,USER_FPCR(%a6) |set size to ext,
| ;block exceptions
 
 
| A12. Calculate YINT = FINT(Y) according to user's rounding mode.
| The FPSP routine sintd0 is used. The output is in fp0.
|
| Register usage:
| Input/Output
| d0: FPSR with AINEX cleared/FPCR with size set to ext
| d2: x/x/scratch
| d3: x/x
| d4: LEN/Unchanged
| d5: ICTR:LAMBDA/Unchanged
| d6: ILOG/Unchanged
| d7: k-factor/Unchanged
| a0: ptr for original operand/src ptr for sintdo
| a1: ptr to PTENxx array/Unchanged
| a2: ptr to FP_SCR2(a6)/Unchanged
| a6: temp pointer to FP_SCR2(a6) - orig value saved and restored
| fp0: Y/YINT
| fp1: 10^ISCALE/Unchanged
| fp2: x/x
| F_SCR1:x/x
| F_SCR2:Y adjusted for inex/Y with original exponent
| L_SCR1:x/original USER_FPCR
| L_SCR2:first word of X packed/Unchanged
 
A12_st:
moveml %d0-%d1/%a0-%a1,-(%a7) |save regs used by sintd0
movel L_SCR1(%a6),-(%a7)
movel L_SCR2(%a6),-(%a7)
leal FP_SCR2(%a6),%a0 |a0 is ptr to F_SCR2(a6)
fmovex %fp0,(%a0) |move Y to memory at FP_SCR2(a6)
tstl L_SCR2(%a6) |test sign of original operand
bges do_fint |if pos, use Y
orl #0x80000000,(%a0) |if neg, use -Y
do_fint:
movel USER_FPSR(%a6),-(%a7)
bsr sintdo |sint routine returns int in fp0
moveb (%a7),USER_FPSR(%a6)
addl #4,%a7
movel (%a7)+,L_SCR2(%a6)
movel (%a7)+,L_SCR1(%a6)
moveml (%a7)+,%d0-%d1/%a0-%a1 |restore regs used by sint
movel L_SCR2(%a6),FP_SCR2(%a6) |restore original exponent
movel L_SCR1(%a6),USER_FPCR(%a6) |restore user's FPCR
 
 
| A13. Check for LEN digits.
| If the int operation results in more than LEN digits,
| or less than LEN -1 digits, adjust ILOG and repeat from
| A6. This test occurs only on the first pass. If the
| result is exactly 10^LEN, decrement ILOG and divide
| the mantissa by 10. The calculation of 10^LEN cannot
| be inexact, since all powers of ten upto 10^27 are exact
| in extended precision, so the use of a previous power-of-ten
| table will introduce no error.
|
|
| Register usage:
| Input/Output
| d0: FPCR with size set to ext/scratch final = 0
| d2: x/x
| d3: x/scratch final = x
| d4: LEN/LEN adjusted
| d5: ICTR:LAMBDA/LAMBDA:ICTR
| d6: ILOG/ILOG adjusted
| d7: k-factor/Unchanged
| a0: pointer into memory for packed bcd string formation
| a1: ptr to PTENxx array/Unchanged
| a2: ptr to FP_SCR2(a6)/Unchanged
| fp0: int portion of Y/abs(YINT) adjusted
| fp1: 10^ISCALE/Unchanged
| fp2: x/10^LEN
| F_SCR1:x/x
| F_SCR2:Y with original exponent/Unchanged
| L_SCR1:original USER_FPCR/Unchanged
| L_SCR2:first word of X packed/Unchanged
 
A13_st:
swap %d5 |put ICTR in lower word of d5
tstw %d5 |check if ICTR = 0
bne not_zr |if non-zero, go to second test
|
| Compute 10^(LEN-1)
|
fmoves FONE,%fp2 |init fp2 to 1.0
movel %d4,%d0 |put LEN in d0
subql #1,%d0 |d0 = LEN -1
clrl %d3 |clr table index
l_loop:
lsrl #1,%d0 |shift next bit into carry
bccs l_next |if zero, skip the mul
fmulx (%a1,%d3),%fp2 |mul by 10**(d3_bit_no)
l_next:
addl #12,%d3 |inc d3 to next pwrten table entry
tstl %d0 |test if LEN is zero
bnes l_loop |if not, loop
|
| 10^LEN-1 is computed for this test and A14. If the input was
| denormalized, check only the case in which YINT > 10^LEN.
|
tstb BINDEC_FLG(%a6) |check if input was norm
beqs A13_con |if norm, continue with checking
fabsx %fp0 |take abs of YINT
bra test_2
|
| Compare abs(YINT) to 10^(LEN-1) and 10^LEN
|
A13_con:
fabsx %fp0 |take abs of YINT
fcmpx %fp2,%fp0 |compare abs(YINT) with 10^(LEN-1)
fbge test_2 |if greater, do next test
subql #1,%d6 |subtract 1 from ILOG
movew #1,%d5 |set ICTR
fmovel #rm_mode,%FPCR |set rmode to RM
fmuls FTEN,%fp2 |compute 10^LEN
bra A6_str |return to A6 and recompute YINT
test_2:
fmuls FTEN,%fp2 |compute 10^LEN
fcmpx %fp2,%fp0 |compare abs(YINT) with 10^LEN
fblt A14_st |if less, all is ok, go to A14
fbgt fix_ex |if greater, fix and redo
fdivs FTEN,%fp0 |if equal, divide by 10
addql #1,%d6 | and inc ILOG
bras A14_st | and continue elsewhere
fix_ex:
addql #1,%d6 |increment ILOG by 1
movew #1,%d5 |set ICTR
fmovel #rm_mode,%FPCR |set rmode to RM
bra A6_str |return to A6 and recompute YINT
|
| Since ICTR <> 0, we have already been through one adjustment,
| and shouldn't have another; this is to check if abs(YINT) = 10^LEN
| 10^LEN is again computed using whatever table is in a1 since the
| value calculated cannot be inexact.
|
not_zr:
fmoves FONE,%fp2 |init fp2 to 1.0
movel %d4,%d0 |put LEN in d0
clrl %d3 |clr table index
z_loop:
lsrl #1,%d0 |shift next bit into carry
bccs z_next |if zero, skip the mul
fmulx (%a1,%d3),%fp2 |mul by 10**(d3_bit_no)
z_next:
addl #12,%d3 |inc d3 to next pwrten table entry
tstl %d0 |test if LEN is zero
bnes z_loop |if not, loop
fabsx %fp0 |get abs(YINT)
fcmpx %fp2,%fp0 |check if abs(YINT) = 10^LEN
fbne A14_st |if not, skip this
fdivs FTEN,%fp0 |divide abs(YINT) by 10
addql #1,%d6 |and inc ILOG by 1
addql #1,%d4 | and inc LEN
fmuls FTEN,%fp2 | if LEN++, the get 10^^LEN
 
 
| A14. Convert the mantissa to bcd.
| The binstr routine is used to convert the LEN digit
| mantissa to bcd in memory. The input to binstr is
| to be a fraction; i.e. (mantissa)/10^LEN and adjusted
| such that the decimal point is to the left of bit 63.
| The bcd digits are stored in the correct position in
| the final string area in memory.
|
|
| Register usage:
| Input/Output
| d0: x/LEN call to binstr - final is 0
| d1: x/0
| d2: x/ms 32-bits of mant of abs(YINT)
| d3: x/ls 32-bits of mant of abs(YINT)
| d4: LEN/Unchanged
| d5: ICTR:LAMBDA/LAMBDA:ICTR
| d6: ILOG
| d7: k-factor/Unchanged
| a0: pointer into memory for packed bcd string formation
| /ptr to first mantissa byte in result string
| a1: ptr to PTENxx array/Unchanged
| a2: ptr to FP_SCR2(a6)/Unchanged
| fp0: int portion of Y/abs(YINT) adjusted
| fp1: 10^ISCALE/Unchanged
| fp2: 10^LEN/Unchanged
| F_SCR1:x/Work area for final result
| F_SCR2:Y with original exponent/Unchanged
| L_SCR1:original USER_FPCR/Unchanged
| L_SCR2:first word of X packed/Unchanged
 
A14_st:
fmovel #rz_mode,%FPCR |force rz for conversion
fdivx %fp2,%fp0 |divide abs(YINT) by 10^LEN
leal FP_SCR1(%a6),%a0
fmovex %fp0,(%a0) |move abs(YINT)/10^LEN to memory
movel 4(%a0),%d2 |move 2nd word of FP_RES to d2
movel 8(%a0),%d3 |move 3rd word of FP_RES to d3
clrl 4(%a0) |zero word 2 of FP_RES
clrl 8(%a0) |zero word 3 of FP_RES
movel (%a0),%d0 |move exponent to d0
swap %d0 |put exponent in lower word
beqs no_sft |if zero, don't shift
subil #0x3ffd,%d0 |sub bias less 2 to make fract
tstl %d0 |check if > 1
bgts no_sft |if so, don't shift
negl %d0 |make exp positive
m_loop:
lsrl #1,%d2 |shift d2:d3 right, add 0s
roxrl #1,%d3 |the number of places
dbf %d0,m_loop |given in d0
no_sft:
tstl %d2 |check for mantissa of zero
bnes no_zr |if not, go on
tstl %d3 |continue zero check
beqs zer_m |if zero, go directly to binstr
no_zr:
clrl %d1 |put zero in d1 for addx
addil #0x00000080,%d3 |inc at bit 7
addxl %d1,%d2 |continue inc
andil #0xffffff80,%d3 |strip off lsb not used by 882
zer_m:
movel %d4,%d0 |put LEN in d0 for binstr call
addql #3,%a0 |a0 points to M16 byte in result
bsr binstr |call binstr to convert mant
 
 
| A15. Convert the exponent to bcd.
| As in A14 above, the exp is converted to bcd and the
| digits are stored in the final string.
|
| Digits are stored in L_SCR1(a6) on return from BINDEC as:
|
| 32 16 15 0
| -----------------------------------------
| | 0 | e3 | e2 | e1 | e4 | X | X | X |
| -----------------------------------------
|
| And are moved into their proper places in FP_SCR1. If digit e4
| is non-zero, OPERR is signaled. In all cases, all 4 digits are
| written as specified in the 881/882 manual for packed decimal.
|
| Register usage:
| Input/Output
| d0: x/LEN call to binstr - final is 0
| d1: x/scratch (0);shift count for final exponent packing
| d2: x/ms 32-bits of exp fraction/scratch
| d3: x/ls 32-bits of exp fraction
| d4: LEN/Unchanged
| d5: ICTR:LAMBDA/LAMBDA:ICTR
| d6: ILOG
| d7: k-factor/Unchanged
| a0: ptr to result string/ptr to L_SCR1(a6)
| a1: ptr to PTENxx array/Unchanged
| a2: ptr to FP_SCR2(a6)/Unchanged
| fp0: abs(YINT) adjusted/float(ILOG)
| fp1: 10^ISCALE/Unchanged
| fp2: 10^LEN/Unchanged
| F_SCR1:Work area for final result/BCD result
| F_SCR2:Y with original exponent/ILOG/10^4
| L_SCR1:original USER_FPCR/Exponent digits on return from binstr
| L_SCR2:first word of X packed/Unchanged
 
A15_st:
tstb BINDEC_FLG(%a6) |check for denorm
beqs not_denorm
ftstx %fp0 |test for zero
fbeq den_zero |if zero, use k-factor or 4933
fmovel %d6,%fp0 |float ILOG
fabsx %fp0 |get abs of ILOG
bras convrt
den_zero:
tstl %d7 |check sign of the k-factor
blts use_ilog |if negative, use ILOG
fmoves F4933,%fp0 |force exponent to 4933
bras convrt |do it
use_ilog:
fmovel %d6,%fp0 |float ILOG
fabsx %fp0 |get abs of ILOG
bras convrt
not_denorm:
ftstx %fp0 |test for zero
fbne not_zero |if zero, force exponent
fmoves FONE,%fp0 |force exponent to 1
bras convrt |do it
not_zero:
fmovel %d6,%fp0 |float ILOG
fabsx %fp0 |get abs of ILOG
convrt:
fdivx 24(%a1),%fp0 |compute ILOG/10^4
fmovex %fp0,FP_SCR2(%a6) |store fp0 in memory
movel 4(%a2),%d2 |move word 2 to d2
movel 8(%a2),%d3 |move word 3 to d3
movew (%a2),%d0 |move exp to d0
beqs x_loop_fin |if zero, skip the shift
subiw #0x3ffd,%d0 |subtract off bias
negw %d0 |make exp positive
x_loop:
lsrl #1,%d2 |shift d2:d3 right
roxrl #1,%d3 |the number of places
dbf %d0,x_loop |given in d0
x_loop_fin:
clrl %d1 |put zero in d1 for addx
addil #0x00000080,%d3 |inc at bit 6
addxl %d1,%d2 |continue inc
andil #0xffffff80,%d3 |strip off lsb not used by 882
movel #4,%d0 |put 4 in d0 for binstr call
leal L_SCR1(%a6),%a0 |a0 is ptr to L_SCR1 for exp digits
bsr binstr |call binstr to convert exp
movel L_SCR1(%a6),%d0 |load L_SCR1 lword to d0
movel #12,%d1 |use d1 for shift count
lsrl %d1,%d0 |shift d0 right by 12
bfins %d0,FP_SCR1(%a6){#4:#12} |put e3:e2:e1 in FP_SCR1
lsrl %d1,%d0 |shift d0 right by 12
bfins %d0,FP_SCR1(%a6){#16:#4} |put e4 in FP_SCR1
tstb %d0 |check if e4 is zero
beqs A16_st |if zero, skip rest
orl #opaop_mask,USER_FPSR(%a6) |set OPERR & AIOP in USER_FPSR
 
 
| A16. Write sign bits to final string.
| Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
|
| Register usage:
| Input/Output
| d0: x/scratch - final is x
| d2: x/x
| d3: x/x
| d4: LEN/Unchanged
| d5: ICTR:LAMBDA/LAMBDA:ICTR
| d6: ILOG/ILOG adjusted
| d7: k-factor/Unchanged
| a0: ptr to L_SCR1(a6)/Unchanged
| a1: ptr to PTENxx array/Unchanged
| a2: ptr to FP_SCR2(a6)/Unchanged
| fp0: float(ILOG)/Unchanged
| fp1: 10^ISCALE/Unchanged
| fp2: 10^LEN/Unchanged
| F_SCR1:BCD result with correct signs
| F_SCR2:ILOG/10^4
| L_SCR1:Exponent digits on return from binstr
| L_SCR2:first word of X packed/Unchanged
 
A16_st:
clrl %d0 |clr d0 for collection of signs
andib #0x0f,FP_SCR1(%a6) |clear first nibble of FP_SCR1
tstl L_SCR2(%a6) |check sign of original mantissa
bges mant_p |if pos, don't set SM
moveql #2,%d0 |move 2 in to d0 for SM
mant_p:
tstl %d6 |check sign of ILOG
bges wr_sgn |if pos, don't set SE
addql #1,%d0 |set bit 0 in d0 for SE
wr_sgn:
bfins %d0,FP_SCR1(%a6){#0:#2} |insert SM and SE into FP_SCR1
 
| Clean up and restore all registers used.
 
fmovel #0,%FPSR |clear possible inex2/ainex bits
fmovemx (%a7)+,%fp0-%fp2
moveml (%a7)+,%d2-%d7/%a2
rts
 
|end
/bugfix.S
0,0 → 1,496
|
| bugfix.sa 3.2 1/31/91
|
|
| This file contains workarounds for bugs in the 040
| relating to the Floating-Point Software Package (FPSP)
|
| Fixes for bugs: 1238
|
| Bug: 1238
|
|
| /* The following dirty_bit clear should be left in
| * the handler permanently to improve throughput.
| * The dirty_bits are located at bits [23:16] in
| * longword $08 in the busy frame $4x60. Bit 16
| * corresponds to FP0, bit 17 corresponds to FP1,
| * and so on.
| */
| if (E3_exception_just_serviced) {
| dirty_bit[cmdreg3b[9:7]] = 0;
| }
|
| if (fsave_format_version != $40) {goto NOFIX}
|
| if !(E3_exception_just_serviced) {goto NOFIX}
| if (cupc == 0000000) {goto NOFIX}
| if ((cmdreg1b[15:13] != 000) &&
| (cmdreg1b[15:10] != 010001)) {goto NOFIX}
| if (((cmdreg1b[15:13] != 000) || ((cmdreg1b[12:10] != cmdreg2b[9:7]) &&
| (cmdreg1b[12:10] != cmdreg3b[9:7])) ) &&
| ((cmdreg1b[ 9: 7] != cmdreg2b[9:7]) &&
| (cmdreg1b[ 9: 7] != cmdreg3b[9:7])) ) {goto NOFIX}
|
| /* Note: for 6d43b or 8d43b, you may want to add the following code
| * to get better coverage. (If you do not insert this code, the part
| * won't lock up; it will simply get the wrong answer.)
| * Do NOT insert this code for 10d43b or later parts.
| *
| * if (fpiarcu == integer stack return address) {
| * cupc = 0000000;
| * goto NOFIX;
| * }
| */
|
| if (cmdreg1b[15:13] != 000) {goto FIX_OPCLASS2}
| FIX_OPCLASS0:
| if (((cmdreg1b[12:10] == cmdreg2b[9:7]) ||
| (cmdreg1b[ 9: 7] == cmdreg2b[9:7])) &&
| (cmdreg1b[12:10] != cmdreg3b[9:7]) &&
| (cmdreg1b[ 9: 7] != cmdreg3b[9:7])) { /* xu conflict only */
| /* We execute the following code if there is an
| xu conflict and NOT an nu conflict */
|
| /* first save some values on the fsave frame */
| stag_temp = STAG[fsave_frame];
| cmdreg1b_temp = CMDREG1B[fsave_frame];
| dtag_temp = DTAG[fsave_frame];
| ete15_temp = ETE15[fsave_frame];
|
| CUPC[fsave_frame] = 0000000;
| FRESTORE
| FSAVE
|
| /* If the xu instruction is exceptional, we punt.
| * Otherwise, we would have to include OVFL/UNFL handler
| * code here to get the correct answer.
| */
| if (fsave_frame_format == $4060) {goto KILL_PROCESS}
|
| fsave_frame = /* build a long frame of all zeros */
| fsave_frame_format = $4060; /* label it as long frame */
|
| /* load it with the temps we saved */
| STAG[fsave_frame] = stag_temp;
| CMDREG1B[fsave_frame] = cmdreg1b_temp;
| DTAG[fsave_frame] = dtag_temp;
| ETE15[fsave_frame] = ete15_temp;
|
| /* Make sure that the cmdreg3b dest reg is not going to
| * be destroyed by a FMOVEM at the end of all this code.
| * If it is, you should move the current value of the reg
| * onto the stack so that the reg will loaded with that value.
| */
|
| /* All done. Proceed with the code below */
| }
|
| etemp = FP_reg_[cmdreg1b[12:10]];
| ete15 = ~ete14;
| cmdreg1b[15:10] = 010010;
| clear(bug_flag_procIDxxxx);
| FRESTORE and return;
|
|
| FIX_OPCLASS2:
| if ((cmdreg1b[9:7] == cmdreg2b[9:7]) &&
| (cmdreg1b[9:7] != cmdreg3b[9:7])) { /* xu conflict only */
| /* We execute the following code if there is an
| xu conflict and NOT an nu conflict */
|
| /* first save some values on the fsave frame */
| stag_temp = STAG[fsave_frame];
| cmdreg1b_temp = CMDREG1B[fsave_frame];
| dtag_temp = DTAG[fsave_frame];
| ete15_temp = ETE15[fsave_frame];
| etemp_temp = ETEMP[fsave_frame];
|
| CUPC[fsave_frame] = 0000000;
| FRESTORE
| FSAVE
|
|
| /* If the xu instruction is exceptional, we punt.
| * Otherwise, we would have to include OVFL/UNFL handler
| * code here to get the correct answer.
| */
| if (fsave_frame_format == $4060) {goto KILL_PROCESS}
|
| fsave_frame = /* build a long frame of all zeros */
| fsave_frame_format = $4060; /* label it as long frame */
|
| /* load it with the temps we saved */
| STAG[fsave_frame] = stag_temp;
| CMDREG1B[fsave_frame] = cmdreg1b_temp;
| DTAG[fsave_frame] = dtag_temp;
| ETE15[fsave_frame] = ete15_temp;
| ETEMP[fsave_frame] = etemp_temp;
|
| /* Make sure that the cmdreg3b dest reg is not going to
| * be destroyed by a FMOVEM at the end of all this code.
| * If it is, you should move the current value of the reg
| * onto the stack so that the reg will loaded with that value.
| */
|
| /* All done. Proceed with the code below */
| }
|
| if (etemp_exponent == min_sgl) etemp_exponent = min_dbl;
| if (etemp_exponent == max_sgl) etemp_exponent = max_dbl;
| cmdreg1b[15:10] = 010101;
| clear(bug_flag_procIDxxxx);
| FRESTORE and return;
|
|
| NOFIX:
| clear(bug_flag_procIDxxxx);
| FRESTORE and return;
|
 
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|BUGFIX idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
|xref fpsp_fmt_error
 
.global b1238_fix
b1238_fix:
|
| This code is entered only on completion of the handling of an
| nu-generated ovfl, unfl, or inex exception. If the version
| number of the fsave is not $40, this handler is not necessary.
| Simply branch to fix_done and exit normally.
|
cmpib #VER_40,4(%a7)
bne fix_done
|
| Test for cu_savepc equal to zero. If not, this is not a bug
| #1238 case.
|
moveb CU_SAVEPC(%a6),%d0
andib #0xFE,%d0
beq fix_done |if zero, this is not bug #1238
 
|
| Test the register conflict aspect. If opclass0, check for
| cu src equal to xu dest or equal to nu dest. If so, go to
| op0. Else, or if opclass2, check for cu dest equal to
| xu dest or equal to nu dest. If so, go to tst_opcl. Else,
| exit, it is not the bug case.
|
| Check for opclass 0. If not, go and check for opclass 2 and sgl.
|
movew CMDREG1B(%a6),%d0
andiw #0xE000,%d0 |strip all but opclass
bne op2sgl |not opclass 0, check op2
|
| Check for cu and nu register conflict. If one exists, this takes
| priority over a cu and xu conflict.
|
bfextu CMDREG1B(%a6){#3:#3},%d0 |get 1st src
bfextu CMDREG3B(%a6){#6:#3},%d1 |get 3rd dest
cmpb %d0,%d1
beqs op0 |if equal, continue bugfix
|
| Check for cu dest equal to nu dest. If so, go and fix the
| bug condition. Otherwise, exit.
|
bfextu CMDREG1B(%a6){#6:#3},%d0 |get 1st dest
cmpb %d0,%d1 |cmp 1st dest with 3rd dest
beqs op0 |if equal, continue bugfix
|
| Check for cu and xu register conflict.
|
bfextu CMDREG2B(%a6){#6:#3},%d1 |get 2nd dest
cmpb %d0,%d1 |cmp 1st dest with 2nd dest
beqs op0_xu |if equal, continue bugfix
bfextu CMDREG1B(%a6){#3:#3},%d0 |get 1st src
cmpb %d0,%d1 |cmp 1st src with 2nd dest
beq op0_xu
bne fix_done |if the reg checks fail, exit
|
| We have the opclass 0 situation.
|
op0:
bfextu CMDREG1B(%a6){#3:#3},%d0 |get source register no
movel #7,%d1
subl %d0,%d1
clrl %d0
bsetl %d1,%d0
fmovemx %d0,ETEMP(%a6) |load source to ETEMP
 
moveb #0x12,%d0
bfins %d0,CMDREG1B(%a6){#0:#6} |opclass 2, extended
|
| Set ETEMP exponent bit 15 as the opposite of ete14
|
btst #6,ETEMP_EX(%a6) |check etemp exponent bit 14
beq setete15
bclr #etemp15_bit,STAG(%a6)
bra finish
setete15:
bset #etemp15_bit,STAG(%a6)
bra finish
 
|
| We have the case in which a conflict exists between the cu src or
| dest and the dest of the xu. We must clear the instruction in
| the cu and restore the state, allowing the instruction in the
| xu to complete. Remember, the instruction in the nu
| was exceptional, and was completed by the appropriate handler.
| If the result of the xu instruction is not exceptional, we can
| restore the instruction from the cu to the frame and continue
| processing the original exception. If the result is also
| exceptional, we choose to kill the process.
|
| Items saved from the stack:
|
| $3c stag - L_SCR1
| $40 cmdreg1b - L_SCR2
| $44 dtag - L_SCR3
|
| The cu savepc is set to zero, and the frame is restored to the
| fpu.
|
op0_xu:
movel STAG(%a6),L_SCR1(%a6)
movel CMDREG1B(%a6),L_SCR2(%a6)
movel DTAG(%a6),L_SCR3(%a6)
andil #0xe0000000,L_SCR3(%a6)
moveb #0,CU_SAVEPC(%a6)
movel (%a7)+,%d1 |save return address from bsr
frestore (%a7)+
fsave -(%a7)
|
| Check if the instruction which just completed was exceptional.
|
cmpw #0x4060,(%a7)
beq op0_xb
|
| It is necessary to isolate the result of the instruction in the
| xu if it is to fp0 - fp3 and write that value to the USER_FPn
| locations on the stack. The correct destination register is in
| cmdreg2b.
|
bfextu CMDREG2B(%a6){#6:#3},%d0 |get dest register no
cmpil #3,%d0
bgts op0_xi
beqs op0_fp3
cmpil #1,%d0
blts op0_fp0
beqs op0_fp1
op0_fp2:
fmovemx %fp2-%fp2,USER_FP2(%a6)
bras op0_xi
op0_fp1:
fmovemx %fp1-%fp1,USER_FP1(%a6)
bras op0_xi
op0_fp0:
fmovemx %fp0-%fp0,USER_FP0(%a6)
bras op0_xi
op0_fp3:
fmovemx %fp3-%fp3,USER_FP3(%a6)
|
| The frame returned is idle. We must build a busy frame to hold
| the cu state information and setup etemp.
|
op0_xi:
movel #22,%d0 |clear 23 lwords
clrl (%a7)
op0_loop:
clrl -(%a7)
dbf %d0,op0_loop
movel #0x40600000,-(%a7)
movel L_SCR1(%a6),STAG(%a6)
movel L_SCR2(%a6),CMDREG1B(%a6)
movel L_SCR3(%a6),DTAG(%a6)
moveb #0x6,CU_SAVEPC(%a6)
movel %d1,-(%a7) |return bsr return address
bfextu CMDREG1B(%a6){#3:#3},%d0 |get source register no
movel #7,%d1
subl %d0,%d1
clrl %d0
bsetl %d1,%d0
fmovemx %d0,ETEMP(%a6) |load source to ETEMP
 
moveb #0x12,%d0
bfins %d0,CMDREG1B(%a6){#0:#6} |opclass 2, extended
|
| Set ETEMP exponent bit 15 as the opposite of ete14
|
btst #6,ETEMP_EX(%a6) |check etemp exponent bit 14
beq op0_sete15
bclr #etemp15_bit,STAG(%a6)
bra finish
op0_sete15:
bset #etemp15_bit,STAG(%a6)
bra finish
 
|
| The frame returned is busy. It is not possible to reconstruct
| the code sequence to allow completion. We will jump to
| fpsp_fmt_error and allow the kernel to kill the process.
|
op0_xb:
jmp fpsp_fmt_error
 
|
| Check for opclass 2 and single size. If not both, exit.
|
op2sgl:
movew CMDREG1B(%a6),%d0
andiw #0xFC00,%d0 |strip all but opclass and size
cmpiw #0x4400,%d0 |test for opclass 2 and size=sgl
bne fix_done |if not, it is not bug 1238
|
| Check for cu dest equal to nu dest or equal to xu dest, with
| a cu and nu conflict taking priority an nu conflict. If either,
| go and fix the bug condition. Otherwise, exit.
|
bfextu CMDREG1B(%a6){#6:#3},%d0 |get 1st dest
bfextu CMDREG3B(%a6){#6:#3},%d1 |get 3rd dest
cmpb %d0,%d1 |cmp 1st dest with 3rd dest
beq op2_com |if equal, continue bugfix
bfextu CMDREG2B(%a6){#6:#3},%d1 |get 2nd dest
cmpb %d0,%d1 |cmp 1st dest with 2nd dest
bne fix_done |if the reg checks fail, exit
|
| We have the case in which a conflict exists between the cu src or
| dest and the dest of the xu. We must clear the instruction in
| the cu and restore the state, allowing the instruction in the
| xu to complete. Remember, the instruction in the nu
| was exceptional, and was completed by the appropriate handler.
| If the result of the xu instruction is not exceptional, we can
| restore the instruction from the cu to the frame and continue
| processing the original exception. If the result is also
| exceptional, we choose to kill the process.
|
| Items saved from the stack:
|
| $3c stag - L_SCR1
| $40 cmdreg1b - L_SCR2
| $44 dtag - L_SCR3
| etemp - FP_SCR2
|
| The cu savepc is set to zero, and the frame is restored to the
| fpu.
|
op2_xu:
movel STAG(%a6),L_SCR1(%a6)
movel CMDREG1B(%a6),L_SCR2(%a6)
movel DTAG(%a6),L_SCR3(%a6)
andil #0xe0000000,L_SCR3(%a6)
moveb #0,CU_SAVEPC(%a6)
movel ETEMP(%a6),FP_SCR2(%a6)
movel ETEMP_HI(%a6),FP_SCR2+4(%a6)
movel ETEMP_LO(%a6),FP_SCR2+8(%a6)
movel (%a7)+,%d1 |save return address from bsr
frestore (%a7)+
fsave -(%a7)
|
| Check if the instruction which just completed was exceptional.
|
cmpw #0x4060,(%a7)
beq op2_xb
|
| It is necessary to isolate the result of the instruction in the
| xu if it is to fp0 - fp3 and write that value to the USER_FPn
| locations on the stack. The correct destination register is in
| cmdreg2b.
|
bfextu CMDREG2B(%a6){#6:#3},%d0 |get dest register no
cmpil #3,%d0
bgts op2_xi
beqs op2_fp3
cmpil #1,%d0
blts op2_fp0
beqs op2_fp1
op2_fp2:
fmovemx %fp2-%fp2,USER_FP2(%a6)
bras op2_xi
op2_fp1:
fmovemx %fp1-%fp1,USER_FP1(%a6)
bras op2_xi
op2_fp0:
fmovemx %fp0-%fp0,USER_FP0(%a6)
bras op2_xi
op2_fp3:
fmovemx %fp3-%fp3,USER_FP3(%a6)
|
| The frame returned is idle. We must build a busy frame to hold
| the cu state information and fix up etemp.
|
op2_xi:
movel #22,%d0 |clear 23 lwords
clrl (%a7)
op2_loop:
clrl -(%a7)
dbf %d0,op2_loop
movel #0x40600000,-(%a7)
movel L_SCR1(%a6),STAG(%a6)
movel L_SCR2(%a6),CMDREG1B(%a6)
movel L_SCR3(%a6),DTAG(%a6)
moveb #0x6,CU_SAVEPC(%a6)
movel FP_SCR2(%a6),ETEMP(%a6)
movel FP_SCR2+4(%a6),ETEMP_HI(%a6)
movel FP_SCR2+8(%a6),ETEMP_LO(%a6)
movel %d1,-(%a7)
bra op2_com
 
|
| We have the opclass 2 single source situation.
|
op2_com:
moveb #0x15,%d0
bfins %d0,CMDREG1B(%a6){#0:#6} |opclass 2, double
 
cmpw #0x407F,ETEMP_EX(%a6) |single +max
bnes case2
movew #0x43FF,ETEMP_EX(%a6) |to double +max
bra finish
case2:
cmpw #0xC07F,ETEMP_EX(%a6) |single -max
bnes case3
movew #0xC3FF,ETEMP_EX(%a6) |to double -max
bra finish
case3:
cmpw #0x3F80,ETEMP_EX(%a6) |single +min
bnes case4
movew #0x3C00,ETEMP_EX(%a6) |to double +min
bra finish
case4:
cmpw #0xBF80,ETEMP_EX(%a6) |single -min
bne fix_done
movew #0xBC00,ETEMP_EX(%a6) |to double -min
bra finish
|
| The frame returned is busy. It is not possible to reconstruct
| the code sequence to allow completion. fpsp_fmt_error causes
| an fline illegal instruction to be executed.
|
| You should replace the jump to fpsp_fmt_error with a jump
| to the entry point used to kill a process.
|
op2_xb:
jmp fpsp_fmt_error
 
|
| Enter here if the case is not of the situations affected by
| bug #1238, or if the fix is completed, and exit.
|
finish:
fix_done:
rts
 
|end
/x_fline.S
0,0 → 1,104
|
| x_fline.sa 3.3 1/10/91
|
| fpsp_fline --- FPSP handler for fline exception
|
| First determine if the exception is one of the unimplemented
| floating point instructions. If so, let fpsp_unimp handle it.
| Next, determine if the instruction is an fmovecr with a non-zero
| <ea> field. If so, handle here and return. Otherwise, it
| must be a real F-line exception.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
X_FLINE: |idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
|xref real_fline
|xref fpsp_unimp
|xref uni_2
|xref mem_read
|xref fpsp_fmt_error
 
.global fpsp_fline
fpsp_fline:
|
| check for unimplemented vector first. Use EXC_VEC-4 because
| the equate is valid only after a 'link a6' has pushed one more
| long onto the stack.
|
cmpw #UNIMP_VEC,EXC_VEC-4(%a7)
beql fpsp_unimp
 
|
| fmovecr with non-zero <ea> handling here
|
subl #4,%a7 |4 accounts for 2-word difference
| ;between six word frame (unimp) and
| ;four word frame
link %a6,#-LOCAL_SIZE
fsave -(%a7)
moveml %d0-%d1/%a0-%a1,USER_DA(%a6)
moveal EXC_PC+4(%a6),%a0 |get address of fline instruction
leal L_SCR1(%a6),%a1 |use L_SCR1 as scratch
movel #4,%d0
addl #4,%a6 |to offset the sub.l #4,a7 above so that
| ;a6 can point correctly to the stack frame
| ;before branching to mem_read
bsrl mem_read
subl #4,%a6
movel L_SCR1(%a6),%d0 |d0 contains the fline and command word
bfextu %d0{#4:#3},%d1 |extract coprocessor id
cmpib #1,%d1 |check if cpid=1
bne not_mvcr |exit if not
bfextu %d0{#16:#6},%d1
cmpib #0x17,%d1 |check if it is an FMOVECR encoding
bne not_mvcr
| ;if an FMOVECR instruction, fix stack
| ;and go to FPSP_UNIMP
fix_stack:
cmpib #VER_40,(%a7) |test for orig unimp frame
bnes ck_rev
subl #UNIMP_40_SIZE-4,%a7 |emulate an orig fsave
moveb #VER_40,(%a7)
moveb #UNIMP_40_SIZE-4,1(%a7)
clrw 2(%a7)
bras fix_con
ck_rev:
cmpib #VER_41,(%a7) |test for rev unimp frame
bnel fpsp_fmt_error |if not $40 or $41, exit with error
subl #UNIMP_41_SIZE-4,%a7 |emulate a rev fsave
moveb #VER_41,(%a7)
moveb #UNIMP_41_SIZE-4,1(%a7)
clrw 2(%a7)
fix_con:
movew EXC_SR+4(%a6),EXC_SR(%a6) |move stacked sr to new position
movel EXC_PC+4(%a6),EXC_PC(%a6) |move stacked pc to new position
fmovel EXC_PC(%a6),%FPIAR |point FPIAR to fline inst
movel #4,%d1
addl %d1,EXC_PC(%a6) |increment stacked pc value to next inst
movew #0x202c,EXC_VEC(%a6) |reformat vector to unimp
clrl EXC_EA(%a6) |clear the EXC_EA field
movew %d0,CMDREG1B(%a6) |move the lower word into CMDREG1B
clrl E_BYTE(%a6)
bsetb #UFLAG,T_BYTE(%a6)
moveml USER_DA(%a6),%d0-%d1/%a0-%a1 |restore data registers
bral uni_2
 
not_mvcr:
moveml USER_DA(%a6),%d0-%d1/%a0-%a1 |restore data registers
frestore (%a7)+
unlk %a6
addl #4,%a7
bral real_fline
 
|end
/slog2.S
0,0 → 1,188
|
| slog2.sa 3.1 12/10/90
|
| The entry point slog10 computes the base-10
| logarithm of an input argument X.
| slog10d does the same except the input value is a
| denormalized number.
| sLog2 and sLog2d are the base-2 analogues.
|
| INPUT: Double-extended value in memory location pointed to
| by address register a0.
|
| OUTPUT: log_10(X) or log_2(X) returned in floating-point
| register fp0.
|
| ACCURACY and MONOTONICITY: The returned result is within 1.7
| ulps in 64 significant bit, i.e. within 0.5003 ulp
| to 53 bits if the result is subsequently rounded
| to double precision. The result is provably monotonic
| in double precision.
|
| SPEED: Two timings are measured, both in the copy-back mode.
| The first one is measured when the function is invoked
| the first time (so the instructions and data are not
| in cache), and the second one is measured when the
| function is reinvoked at the same input argument.
|
| ALGORITHM and IMPLEMENTATION NOTES:
|
| slog10d:
|
| Step 0. If X < 0, create a NaN and raise the invalid operation
| flag. Otherwise, save FPCR in D1; set FpCR to default.
| Notes: Default means round-to-nearest mode, no floating-point
| traps, and precision control = double extended.
|
| Step 1. Call slognd to obtain Y = log(X), the natural log of X.
| Notes: Even if X is denormalized, log(X) is always normalized.
|
| Step 2. Compute log_10(X) = log(X) * (1/log(10)).
| 2.1 Restore the user FPCR
| 2.2 Return ans := Y * INV_L10.
|
|
| slog10:
|
| Step 0. If X < 0, create a NaN and raise the invalid operation
| flag. Otherwise, save FPCR in D1; set FpCR to default.
| Notes: Default means round-to-nearest mode, no floating-point
| traps, and precision control = double extended.
|
| Step 1. Call sLogN to obtain Y = log(X), the natural log of X.
|
| Step 2. Compute log_10(X) = log(X) * (1/log(10)).
| 2.1 Restore the user FPCR
| 2.2 Return ans := Y * INV_L10.
|
|
| sLog2d:
|
| Step 0. If X < 0, create a NaN and raise the invalid operation
| flag. Otherwise, save FPCR in D1; set FpCR to default.
| Notes: Default means round-to-nearest mode, no floating-point
| traps, and precision control = double extended.
|
| Step 1. Call slognd to obtain Y = log(X), the natural log of X.
| Notes: Even if X is denormalized, log(X) is always normalized.
|
| Step 2. Compute log_10(X) = log(X) * (1/log(2)).
| 2.1 Restore the user FPCR
| 2.2 Return ans := Y * INV_L2.
|
|
| sLog2:
|
| Step 0. If X < 0, create a NaN and raise the invalid operation
| flag. Otherwise, save FPCR in D1; set FpCR to default.
| Notes: Default means round-to-nearest mode, no floating-point
| traps, and precision control = double extended.
|
| Step 1. If X is not an integer power of two, i.e., X != 2^k,
| go to Step 3.
|
| Step 2. Return k.
| 2.1 Get integer k, X = 2^k.
| 2.2 Restore the user FPCR.
| 2.3 Return ans := convert-to-double-extended(k).
|
| Step 3. Call sLogN to obtain Y = log(X), the natural log of X.
|
| Step 4. Compute log_2(X) = log(X) * (1/log(2)).
| 4.1 Restore the user FPCR
| 4.2 Return ans := Y * INV_L2.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|SLOG2 idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
|xref t_frcinx
|xref t_operr
|xref slogn
|xref slognd
 
INV_L10: .long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
 
INV_L2: .long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
 
.global slog10d
slog10d:
|--entry point for Log10(X), X is denormalized
movel (%a0),%d0
blt invalid
movel %d1,-(%sp)
clrl %d1
bsr slognd | ...log(X), X denorm.
fmovel (%sp)+,%fpcr
fmulx INV_L10,%fp0
bra t_frcinx
 
.global slog10
slog10:
|--entry point for Log10(X), X is normalized
 
movel (%a0),%d0
blt invalid
movel %d1,-(%sp)
clrl %d1
bsr slogn | ...log(X), X normal.
fmovel (%sp)+,%fpcr
fmulx INV_L10,%fp0
bra t_frcinx
 
 
.global slog2d
slog2d:
|--entry point for Log2(X), X is denormalized
 
movel (%a0),%d0
blt invalid
movel %d1,-(%sp)
clrl %d1
bsr slognd | ...log(X), X denorm.
fmovel (%sp)+,%fpcr
fmulx INV_L2,%fp0
bra t_frcinx
 
.global slog2
slog2:
|--entry point for Log2(X), X is normalized
movel (%a0),%d0
blt invalid
 
movel 8(%a0),%d0
bnes continue | ...X is not 2^k
 
movel 4(%a0),%d0
andl #0x7FFFFFFF,%d0
tstl %d0
bnes continue
 
|--X = 2^k.
movew (%a0),%d0
andl #0x00007FFF,%d0
subl #0x3FFF,%d0
fmovel %d1,%fpcr
fmovel %d0,%fp0
bra t_frcinx
 
continue:
movel %d1,-(%sp)
clrl %d1
bsr slogn | ...log(X), X normal.
fmovel (%sp)+,%fpcr
fmulx INV_L2,%fp0
bra t_frcinx
 
invalid:
bra t_operr
 
|end
/x_snan.S
0,0 → 1,277
|
| x_snan.sa 3.3 7/1/91
|
| fpsp_snan --- FPSP handler for signalling NAN exception
|
| SNAN for float -> integer conversions (integer conversion of
| an SNAN) is a non-maskable run-time exception.
|
| For trap disabled the 040 does the following:
| If the dest data format is s, d, or x, then the SNAN bit in the NAN
| is set to one and the resulting non-signaling NAN (truncated if
| necessary) is transferred to the dest. If the dest format is b, w,
| or l, then garbage is written to the dest (actually the upper 32 bits
| of the mantissa are sent to the integer unit).
|
| For trap enabled the 040 does the following:
| If the inst is move_out, then the results are the same as for trap
| disabled with the exception posted. If the instruction is not move_
| out, the dest. is not modified, and the exception is posted.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
X_SNAN: |idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
|xref get_fline
|xref mem_write
|xref real_snan
|xref real_inex
|xref fpsp_done
|xref reg_dest
 
.global fpsp_snan
fpsp_snan:
link %a6,#-LOCAL_SIZE
fsave -(%a7)
moveml %d0-%d1/%a0-%a1,USER_DA(%a6)
fmovemx %fp0-%fp3,USER_FP0(%a6)
fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
 
|
| Check if trap enabled
|
btstb #snan_bit,FPCR_ENABLE(%a6)
bnes ena |If enabled, then branch
 
bsrl move_out |else SNAN disabled
|
| It is possible to have an inex1 exception with the
| snan. If the inex enable bit is set in the FPCR, and either
| inex2 or inex1 occurred, we must clean up and branch to the
| real inex handler.
|
ck_inex:
moveb FPCR_ENABLE(%a6),%d0
andb FPSR_EXCEPT(%a6),%d0
andib #0x3,%d0
beq end_snan
|
| Inexact enabled and reported, and we must take an inexact exception.
|
take_inex:
moveb #INEX_VEC,EXC_VEC+1(%a6)
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
frestore (%a7)+
unlk %a6
bral real_inex
|
| SNAN is enabled. Check if inst is move_out.
| Make any corrections to the 040 output as necessary.
|
ena:
btstb #5,CMDREG1B(%a6) |if set, inst is move out
beq not_out
 
bsrl move_out
 
report_snan:
moveb (%a7),VER_TMP(%a6)
cmpib #VER_40,(%a7) |test for orig unimp frame
bnes ck_rev
moveql #13,%d0 |need to zero 14 lwords
bras rep_con
ck_rev:
moveql #11,%d0 |need to zero 12 lwords
rep_con:
clrl (%a7)
loop1:
clrl -(%a7) |clear and dec a7
dbra %d0,loop1
moveb VER_TMP(%a6),(%a7) |format a busy frame
moveb #BUSY_SIZE-4,1(%a7)
movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
orl #sx_mask,E_BYTE(%a6)
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
frestore (%a7)+
unlk %a6
bral real_snan
|
| Exit snan handler by expanding the unimp frame into a busy frame
|
end_snan:
bclrb #E1,E_BYTE(%a6)
 
moveb (%a7),VER_TMP(%a6)
cmpib #VER_40,(%a7) |test for orig unimp frame
bnes ck_rev2
moveql #13,%d0 |need to zero 14 lwords
bras rep_con2
ck_rev2:
moveql #11,%d0 |need to zero 12 lwords
rep_con2:
clrl (%a7)
loop2:
clrl -(%a7) |clear and dec a7
dbra %d0,loop2
moveb VER_TMP(%a6),(%a7) |format a busy frame
moveb #BUSY_SIZE-4,1(%a7) |write busy size
movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
orl #sx_mask,E_BYTE(%a6)
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
frestore (%a7)+
unlk %a6
bral fpsp_done
 
|
| Move_out
|
move_out:
movel EXC_EA(%a6),%a0 |get <ea> from exc frame
 
bfextu CMDREG1B(%a6){#3:#3},%d0 |move rx field to d0{2:0}
cmpil #0,%d0 |check for long
beqs sto_long |branch if move_out long
cmpil #4,%d0 |check for word
beqs sto_word |branch if move_out word
cmpil #6,%d0 |check for byte
beqs sto_byte |branch if move_out byte
|
| Not byte, word or long
|
rts
|
| Get the 32 most significant bits of etemp mantissa
|
sto_long:
movel ETEMP_HI(%a6),%d1
movel #4,%d0 |load byte count
|
| Set signalling nan bit
|
bsetl #30,%d1
|
| Store to the users destination address
|
tstl %a0 |check if <ea> is 0
beqs wrt_dn |destination is a data register
movel %d1,-(%a7) |move the snan onto the stack
movel %a0,%a1 |load dest addr into a1
movel %a7,%a0 |load src addr of snan into a0
bsrl mem_write |write snan to user memory
movel (%a7)+,%d1 |clear off stack
rts
|
| Get the 16 most significant bits of etemp mantissa
|
sto_word:
movel ETEMP_HI(%a6),%d1
movel #2,%d0 |load byte count
|
| Set signalling nan bit
|
bsetl #30,%d1
|
| Store to the users destination address
|
tstl %a0 |check if <ea> is 0
beqs wrt_dn |destination is a data register
 
movel %d1,-(%a7) |move the snan onto the stack
movel %a0,%a1 |load dest addr into a1
movel %a7,%a0 |point to low word
bsrl mem_write |write snan to user memory
movel (%a7)+,%d1 |clear off stack
rts
|
| Get the 8 most significant bits of etemp mantissa
|
sto_byte:
movel ETEMP_HI(%a6),%d1
movel #1,%d0 |load byte count
|
| Set signalling nan bit
|
bsetl #30,%d1
|
| Store to the users destination address
|
tstl %a0 |check if <ea> is 0
beqs wrt_dn |destination is a data register
movel %d1,-(%a7) |move the snan onto the stack
movel %a0,%a1 |load dest addr into a1
movel %a7,%a0 |point to source byte
bsrl mem_write |write snan to user memory
movel (%a7)+,%d1 |clear off stack
rts
 
|
| wrt_dn --- write to a data register
|
| We get here with D1 containing the data to write and D0 the
| number of bytes to write: 1=byte,2=word,4=long.
|
wrt_dn:
movel %d1,L_SCR1(%a6) |data
movel %d0,-(%a7) |size
bsrl get_fline |returns fline word in d0
movel %d0,%d1
andil #0x7,%d1 |d1 now holds register number
movel (%sp)+,%d0 |get original size
cmpil #4,%d0
beqs wrt_long
cmpil #2,%d0
bnes wrt_byte
wrt_word:
orl #0x8,%d1
bral reg_dest
wrt_long:
orl #0x10,%d1
bral reg_dest
wrt_byte:
bral reg_dest
|
| Check if it is a src nan or dst nan
|
not_out:
movel DTAG(%a6),%d0
bfextu %d0{#0:#3},%d0 |isolate dtag in lsbs
 
cmpib #3,%d0 |check for nan in destination
bnes issrc |destination nan has priority
dst_nan:
btstb #6,FPTEMP_HI(%a6) |check if dest nan is an snan
bnes issrc |no, so check source for snan
movew FPTEMP_EX(%a6),%d0
bras cont
issrc:
movew ETEMP_EX(%a6),%d0
cont:
btstl #15,%d0 |test for sign of snan
beqs clr_neg
bsetb #neg_bit,FPSR_CC(%a6)
bra report_snan
clr_neg:
bclrb #neg_bit,FPSR_CC(%a6)
bra report_snan
 
|end
/round.S
0,0 → 1,649
|
| round.sa 3.4 7/29/91
|
| handle rounding and normalization tasks
|
|
|
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|ROUND idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
|
| round --- round result according to precision/mode
|
| a0 points to the input operand in the internal extended format
| d1(high word) contains rounding precision:
| ext = $0000xxxx
| sgl = $0001xxxx
| dbl = $0002xxxx
| d1(low word) contains rounding mode:
| RN = $xxxx0000
| RZ = $xxxx0001
| RM = $xxxx0010
| RP = $xxxx0011
| d0{31:29} contains the g,r,s bits (extended)
|
| On return the value pointed to by a0 is correctly rounded,
| a0 is preserved and the g-r-s bits in d0 are cleared.
| The result is not typed - the tag field is invalid. The
| result is still in the internal extended format.
|
| The INEX bit of USER_FPSR will be set if the rounded result was
| inexact (i.e. if any of the g-r-s bits were set).
|
 
.global round
round:
| If g=r=s=0 then result is exact and round is done, else set
| the inex flag in status reg and continue.
|
bsrs ext_grs |this subroutine looks at the
| :rounding precision and sets
| ;the appropriate g-r-s bits.
tstl %d0 |if grs are zero, go force
bne rnd_cont |lower bits to zero for size
swap %d1 |set up d1.w for round prec.
bra truncate
 
rnd_cont:
|
| Use rounding mode as an index into a jump table for these modes.
|
orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
lea mode_tab,%a1
movel (%a1,%d1.w*4),%a1
jmp (%a1)
|
| Jump table indexed by rounding mode in d1.w. All following assumes
| grs != 0.
|
mode_tab:
.long rnd_near
.long rnd_zero
.long rnd_mnus
.long rnd_plus
|
| ROUND PLUS INFINITY
|
| If sign of fp number = 0 (positive), then add 1 to l.
|
rnd_plus:
swap %d1 |set up d1 for round prec.
tstb LOCAL_SGN(%a0) |check for sign
bmi truncate |if positive then truncate
movel #0xffffffff,%d0 |force g,r,s to be all f's
lea add_to_l,%a1
movel (%a1,%d1.w*4),%a1
jmp (%a1)
|
| ROUND MINUS INFINITY
|
| If sign of fp number = 1 (negative), then add 1 to l.
|
rnd_mnus:
swap %d1 |set up d1 for round prec.
tstb LOCAL_SGN(%a0) |check for sign
bpl truncate |if negative then truncate
movel #0xffffffff,%d0 |force g,r,s to be all f's
lea add_to_l,%a1
movel (%a1,%d1.w*4),%a1
jmp (%a1)
|
| ROUND ZERO
|
| Always truncate.
rnd_zero:
swap %d1 |set up d1 for round prec.
bra truncate
|
|
| ROUND NEAREST
|
| If (g=1), then add 1 to l and if (r=s=0), then clear l
| Note that this will round to even in case of a tie.
|
rnd_near:
swap %d1 |set up d1 for round prec.
asll #1,%d0 |shift g-bit to c-bit
bcc truncate |if (g=1) then
lea add_to_l,%a1
movel (%a1,%d1.w*4),%a1
jmp (%a1)
 
|
| ext_grs --- extract guard, round and sticky bits
|
| Input: d1 = PREC:ROUND
| Output: d0{31:29}= guard, round, sticky
|
| The ext_grs extract the guard/round/sticky bits according to the
| selected rounding precision. It is called by the round subroutine
| only. All registers except d0 are kept intact. d0 becomes an
| updated guard,round,sticky in d0{31:29}
|
| Notes: the ext_grs uses the round PREC, and therefore has to swap d1
| prior to usage, and needs to restore d1 to original.
|
ext_grs:
swap %d1 |have d1.w point to round precision
cmpiw #0,%d1
bnes sgl_or_dbl
bras end_ext_grs
sgl_or_dbl:
moveml %d2/%d3,-(%a7) |make some temp registers
cmpiw #1,%d1
bnes grs_dbl
grs_sgl:
bfextu LOCAL_HI(%a0){#24:#2},%d3 |sgl prec. g-r are 2 bits right
movel #30,%d2 |of the sgl prec. limits
lsll %d2,%d3 |shift g-r bits to MSB of d3
movel LOCAL_HI(%a0),%d2 |get word 2 for s-bit test
andil #0x0000003f,%d2 |s bit is the or of all other
bnes st_stky |bits to the right of g-r
tstl LOCAL_LO(%a0) |test lower mantissa
bnes st_stky |if any are set, set sticky
tstl %d0 |test original g,r,s
bnes st_stky |if any are set, set sticky
bras end_sd |if words 3 and 4 are clr, exit
grs_dbl:
bfextu LOCAL_LO(%a0){#21:#2},%d3 |dbl-prec. g-r are 2 bits right
movel #30,%d2 |of the dbl prec. limits
lsll %d2,%d3 |shift g-r bits to the MSB of d3
movel LOCAL_LO(%a0),%d2 |get lower mantissa for s-bit test
andil #0x000001ff,%d2 |s bit is the or-ing of all
bnes st_stky |other bits to the right of g-r
tstl %d0 |test word original g,r,s
bnes st_stky |if any are set, set sticky
bras end_sd |if clear, exit
st_stky:
bset #rnd_stky_bit,%d3
end_sd:
movel %d3,%d0 |return grs to d0
moveml (%a7)+,%d2/%d3 |restore scratch registers
end_ext_grs:
swap %d1 |restore d1 to original
rts
 
|******************* Local Equates
.set ad_1_sgl,0x00000100 | constant to add 1 to l-bit in sgl prec
.set ad_1_dbl,0x00000800 | constant to add 1 to l-bit in dbl prec
 
 
|Jump table for adding 1 to the l-bit indexed by rnd prec
 
add_to_l:
.long add_ext
.long add_sgl
.long add_dbl
.long add_dbl
|
| ADD SINGLE
|
add_sgl:
addl #ad_1_sgl,LOCAL_HI(%a0)
bccs scc_clr |no mantissa overflow
roxrw LOCAL_HI(%a0) |shift v-bit back in
roxrw LOCAL_HI+2(%a0) |shift v-bit back in
addw #0x1,LOCAL_EX(%a0) |and incr exponent
scc_clr:
tstl %d0 |test for rs = 0
bnes sgl_done
andiw #0xfe00,LOCAL_HI+2(%a0) |clear the l-bit
sgl_done:
andil #0xffffff00,LOCAL_HI(%a0) |truncate bits beyond sgl limit
clrl LOCAL_LO(%a0) |clear d2
rts
 
|
| ADD EXTENDED
|
add_ext:
addql #1,LOCAL_LO(%a0) |add 1 to l-bit
bccs xcc_clr |test for carry out
addql #1,LOCAL_HI(%a0) |propagate carry
bccs xcc_clr
roxrw LOCAL_HI(%a0) |mant is 0 so restore v-bit
roxrw LOCAL_HI+2(%a0) |mant is 0 so restore v-bit
roxrw LOCAL_LO(%a0)
roxrw LOCAL_LO+2(%a0)
addw #0x1,LOCAL_EX(%a0) |and inc exp
xcc_clr:
tstl %d0 |test rs = 0
bnes add_ext_done
andib #0xfe,LOCAL_LO+3(%a0) |clear the l bit
add_ext_done:
rts
|
| ADD DOUBLE
|
add_dbl:
addl #ad_1_dbl,LOCAL_LO(%a0)
bccs dcc_clr
addql #1,LOCAL_HI(%a0) |propagate carry
bccs dcc_clr
roxrw LOCAL_HI(%a0) |mant is 0 so restore v-bit
roxrw LOCAL_HI+2(%a0) |mant is 0 so restore v-bit
roxrw LOCAL_LO(%a0)
roxrw LOCAL_LO+2(%a0)
addw #0x1,LOCAL_EX(%a0) |incr exponent
dcc_clr:
tstl %d0 |test for rs = 0
bnes dbl_done
andiw #0xf000,LOCAL_LO+2(%a0) |clear the l-bit
 
dbl_done:
andil #0xfffff800,LOCAL_LO(%a0) |truncate bits beyond dbl limit
rts
 
error:
rts
|
| Truncate all other bits
|
trunct:
.long end_rnd
.long sgl_done
.long dbl_done
.long dbl_done
 
truncate:
lea trunct,%a1
movel (%a1,%d1.w*4),%a1
jmp (%a1)
 
end_rnd:
rts
 
|
| NORMALIZE
|
| These routines (nrm_zero & nrm_set) normalize the unnorm. This
| is done by shifting the mantissa left while decrementing the
| exponent.
|
| NRM_SET shifts and decrements until there is a 1 set in the integer
| bit of the mantissa (msb in d1).
|
| NRM_ZERO shifts and decrements until there is a 1 set in the integer
| bit of the mantissa (msb in d1) unless this would mean the exponent
| would go less than 0. In that case the number becomes a denorm - the
| exponent (d0) is set to 0 and the mantissa (d1 & d2) is not
| normalized.
|
| Note that both routines have been optimized (for the worst case) and
| therefore do not have the easy to follow decrement/shift loop.
|
| NRM_ZERO
|
| Distance to first 1 bit in mantissa = X
| Distance to 0 from exponent = Y
| If X < Y
| Then
| nrm_set
| Else
| shift mantissa by Y
| set exponent = 0
|
|input:
| FP_SCR1 = exponent, ms mantissa part, ls mantissa part
|output:
| L_SCR1{4} = fpte15 or ete15 bit
|
.global nrm_zero
nrm_zero:
movew LOCAL_EX(%a0),%d0
cmpw #64,%d0 |see if exp > 64
bmis d0_less
bsr nrm_set |exp > 64 so exp won't exceed 0
rts
d0_less:
moveml %d2/%d3/%d5/%d6,-(%a7)
movel LOCAL_HI(%a0),%d1
movel LOCAL_LO(%a0),%d2
 
bfffo %d1{#0:#32},%d3 |get the distance to the first 1
| ;in ms mant
beqs ms_clr |branch if no bits were set
cmpw %d3,%d0 |of X>Y
bmis greater |then exp will go past 0 (neg) if
| ;it is just shifted
bsr nrm_set |else exp won't go past 0
moveml (%a7)+,%d2/%d3/%d5/%d6
rts
greater:
movel %d2,%d6 |save ls mant in d6
lsll %d0,%d2 |shift ls mant by count
lsll %d0,%d1 |shift ms mant by count
movel #32,%d5
subl %d0,%d5 |make op a denorm by shifting bits
lsrl %d5,%d6 |by the number in the exp, then
| ;set exp = 0.
orl %d6,%d1 |shift the ls mant bits into the ms mant
movel #0,%d0 |same as if decremented exp to 0
| ;while shifting
movew %d0,LOCAL_EX(%a0)
movel %d1,LOCAL_HI(%a0)
movel %d2,LOCAL_LO(%a0)
moveml (%a7)+,%d2/%d3/%d5/%d6
rts
ms_clr:
bfffo %d2{#0:#32},%d3 |check if any bits set in ls mant
beqs all_clr |branch if none set
addw #32,%d3
cmpw %d3,%d0 |if X>Y
bmis greater |then branch
bsr nrm_set |else exp won't go past 0
moveml (%a7)+,%d2/%d3/%d5/%d6
rts
all_clr:
movew #0,LOCAL_EX(%a0) |no mantissa bits set. Set exp = 0.
moveml (%a7)+,%d2/%d3/%d5/%d6
rts
|
| NRM_SET
|
.global nrm_set
nrm_set:
movel %d7,-(%a7)
bfffo LOCAL_HI(%a0){#0:#32},%d7 |find first 1 in ms mant to d7)
beqs lower |branch if ms mant is all 0's
 
movel %d6,-(%a7)
 
subw %d7,LOCAL_EX(%a0) |sub exponent by count
movel LOCAL_HI(%a0),%d0 |d0 has ms mant
movel LOCAL_LO(%a0),%d1 |d1 has ls mant
 
lsll %d7,%d0 |shift first 1 to j bit position
movel %d1,%d6 |copy ls mant into d6
lsll %d7,%d6 |shift ls mant by count
movel %d6,LOCAL_LO(%a0) |store ls mant into memory
moveql #32,%d6
subl %d7,%d6 |continue shift
lsrl %d6,%d1 |shift off all bits but those that will
| ;be shifted into ms mant
orl %d1,%d0 |shift the ls mant bits into the ms mant
movel %d0,LOCAL_HI(%a0) |store ms mant into memory
moveml (%a7)+,%d7/%d6 |restore registers
rts
 
|
| We get here if ms mant was = 0, and we assume ls mant has bits
| set (otherwise this would have been tagged a zero not a denorm).
|
lower:
movew LOCAL_EX(%a0),%d0 |d0 has exponent
movel LOCAL_LO(%a0),%d1 |d1 has ls mant
subw #32,%d0 |account for ms mant being all zeros
bfffo %d1{#0:#32},%d7 |find first 1 in ls mant to d7)
subw %d7,%d0 |subtract shift count from exp
lsll %d7,%d1 |shift first 1 to integer bit in ms mant
movew %d0,LOCAL_EX(%a0) |store ms mant
movel %d1,LOCAL_HI(%a0) |store exp
clrl LOCAL_LO(%a0) |clear ls mant
movel (%a7)+,%d7
rts
|
| denorm --- denormalize an intermediate result
|
| Used by underflow.
|
| Input:
| a0 points to the operand to be denormalized
| (in the internal extended format)
|
| d0: rounding precision
| Output:
| a0 points to the denormalized result
| (in the internal extended format)
|
| d0 is guard,round,sticky
|
| d0 comes into this routine with the rounding precision. It
| is then loaded with the denormalized exponent threshold for the
| rounding precision.
|
 
.global denorm
denorm:
btstb #6,LOCAL_EX(%a0) |check for exponents between $7fff-$4000
beqs no_sgn_ext
bsetb #7,LOCAL_EX(%a0) |sign extend if it is so
no_sgn_ext:
 
cmpib #0,%d0 |if 0 then extended precision
bnes not_ext |else branch
 
clrl %d1 |load d1 with ext threshold
clrl %d0 |clear the sticky flag
bsr dnrm_lp |denormalize the number
tstb %d1 |check for inex
beq no_inex |if clr, no inex
bras dnrm_inex |if set, set inex
 
not_ext:
cmpil #1,%d0 |if 1 then single precision
beqs load_sgl |else must be 2, double prec
 
load_dbl:
movew #dbl_thresh,%d1 |put copy of threshold in d1
movel %d1,%d0 |copy d1 into d0
subw LOCAL_EX(%a0),%d0 |diff = threshold - exp
cmpw #67,%d0 |if diff > 67 (mant + grs bits)
bpls chk_stky |then branch (all bits would be
| ; shifted off in denorm routine)
clrl %d0 |else clear the sticky flag
bsr dnrm_lp |denormalize the number
tstb %d1 |check flag
beqs no_inex |if clr, no inex
bras dnrm_inex |if set, set inex
 
load_sgl:
movew #sgl_thresh,%d1 |put copy of threshold in d1
movel %d1,%d0 |copy d1 into d0
subw LOCAL_EX(%a0),%d0 |diff = threshold - exp
cmpw #67,%d0 |if diff > 67 (mant + grs bits)
bpls chk_stky |then branch (all bits would be
| ; shifted off in denorm routine)
clrl %d0 |else clear the sticky flag
bsr dnrm_lp |denormalize the number
tstb %d1 |check flag
beqs no_inex |if clr, no inex
bras dnrm_inex |if set, set inex
 
chk_stky:
tstl LOCAL_HI(%a0) |check for any bits set
bnes set_stky
tstl LOCAL_LO(%a0) |check for any bits set
bnes set_stky
bras clr_mant
set_stky:
orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
movel #0x20000000,%d0 |set sticky bit in return value
clr_mant:
movew %d1,LOCAL_EX(%a0) |load exp with threshold
movel #0,LOCAL_HI(%a0) |set d1 = 0 (ms mantissa)
movel #0,LOCAL_LO(%a0) |set d2 = 0 (ms mantissa)
rts
dnrm_inex:
orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
no_inex:
rts
 
|
| dnrm_lp --- normalize exponent/mantissa to specified threshold
|
| Input:
| a0 points to the operand to be denormalized
| d0{31:29} initial guard,round,sticky
| d1{15:0} denormalization threshold
| Output:
| a0 points to the denormalized operand
| d0{31:29} final guard,round,sticky
| d1.b inexact flag: all ones means inexact result
|
| The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2
| so that bfext can be used to extract the new low part of the mantissa.
| Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there
| is no LOCAL_GRS scratch word following it on the fsave frame.
|
.global dnrm_lp
dnrm_lp:
movel %d2,-(%sp) |save d2 for temp use
btstb #E3,E_BYTE(%a6) |test for type E3 exception
beqs not_E3 |not type E3 exception
bfextu WBTEMP_GRS(%a6){#6:#3},%d2 |extract guard,round, sticky bit
movel #29,%d0
lsll %d0,%d2 |shift g,r,s to their positions
movel %d2,%d0
not_E3:
movel (%sp)+,%d2 |restore d2
movel LOCAL_LO(%a0),FP_SCR2+LOCAL_LO(%a6)
movel %d0,FP_SCR2+LOCAL_GRS(%a6)
movel %d1,%d0 |copy the denorm threshold
subw LOCAL_EX(%a0),%d1 |d1 = threshold - uns exponent
bles no_lp |d1 <= 0
cmpw #32,%d1
blts case_1 |0 = d1 < 32
cmpw #64,%d1
blts case_2 |32 <= d1 < 64
bra case_3 |d1 >= 64
|
| No normalization necessary
|
no_lp:
clrb %d1 |set no inex2 reported
movel FP_SCR2+LOCAL_GRS(%a6),%d0 |restore original g,r,s
rts
|
| case (0<d1<32)
|
case_1:
movel %d2,-(%sp)
movew %d0,LOCAL_EX(%a0) |exponent = denorm threshold
movel #32,%d0
subw %d1,%d0 |d0 = 32 - d1
bfextu LOCAL_EX(%a0){%d0:#32},%d2
bfextu %d2{%d1:%d0},%d2 |d2 = new LOCAL_HI
bfextu LOCAL_HI(%a0){%d0:#32},%d1 |d1 = new LOCAL_LO
bfextu FP_SCR2+LOCAL_LO(%a6){%d0:#32},%d0 |d0 = new G,R,S
movel %d2,LOCAL_HI(%a0) |store new LOCAL_HI
movel %d1,LOCAL_LO(%a0) |store new LOCAL_LO
clrb %d1
bftst %d0{#2:#30}
beqs c1nstky
bsetl #rnd_stky_bit,%d0
st %d1
c1nstky:
movel FP_SCR2+LOCAL_GRS(%a6),%d2 |restore original g,r,s
andil #0xe0000000,%d2 |clear all but G,R,S
tstl %d2 |test if original G,R,S are clear
beqs grs_clear
orl #0x20000000,%d0 |set sticky bit in d0
grs_clear:
andil #0xe0000000,%d0 |clear all but G,R,S
movel (%sp)+,%d2
rts
|
| case (32<=d1<64)
|
case_2:
movel %d2,-(%sp)
movew %d0,LOCAL_EX(%a0) |unsigned exponent = threshold
subw #32,%d1 |d1 now between 0 and 32
movel #32,%d0
subw %d1,%d0 |d0 = 32 - d1
bfextu LOCAL_EX(%a0){%d0:#32},%d2
bfextu %d2{%d1:%d0},%d2 |d2 = new LOCAL_LO
bfextu LOCAL_HI(%a0){%d0:#32},%d1 |d1 = new G,R,S
bftst %d1{#2:#30}
bnes c2_sstky |bra if sticky bit to be set
bftst FP_SCR2+LOCAL_LO(%a6){%d0:#32}
bnes c2_sstky |bra if sticky bit to be set
movel %d1,%d0
clrb %d1
bras end_c2
c2_sstky:
movel %d1,%d0
bsetl #rnd_stky_bit,%d0
st %d1
end_c2:
clrl LOCAL_HI(%a0) |store LOCAL_HI = 0
movel %d2,LOCAL_LO(%a0) |store LOCAL_LO
movel FP_SCR2+LOCAL_GRS(%a6),%d2 |restore original g,r,s
andil #0xe0000000,%d2 |clear all but G,R,S
tstl %d2 |test if original G,R,S are clear
beqs clear_grs
orl #0x20000000,%d0 |set sticky bit in d0
clear_grs:
andil #0xe0000000,%d0 |get rid of all but G,R,S
movel (%sp)+,%d2
rts
|
| d1 >= 64 Force the exponent to be the denorm threshold with the
| correct sign.
|
case_3:
movew %d0,LOCAL_EX(%a0)
tstw LOCAL_SGN(%a0)
bges c3con
c3neg:
orl #0x80000000,LOCAL_EX(%a0)
c3con:
cmpw #64,%d1
beqs sixty_four
cmpw #65,%d1
beqs sixty_five
|
| Shift value is out of range. Set d1 for inex2 flag and
| return a zero with the given threshold.
|
clrl LOCAL_HI(%a0)
clrl LOCAL_LO(%a0)
movel #0x20000000,%d0
st %d1
rts
 
sixty_four:
movel LOCAL_HI(%a0),%d0
bfextu %d0{#2:#30},%d1
andil #0xc0000000,%d0
bras c3com
sixty_five:
movel LOCAL_HI(%a0),%d0
bfextu %d0{#1:#31},%d1
andil #0x80000000,%d0
lsrl #1,%d0 |shift high bit into R bit
 
c3com:
tstl %d1
bnes c3ssticky
tstl LOCAL_LO(%a0)
bnes c3ssticky
tstb FP_SCR2+LOCAL_GRS(%a6)
bnes c3ssticky
clrb %d1
bras c3end
 
c3ssticky:
bsetl #rnd_stky_bit,%d0
st %d1
c3end:
clrl LOCAL_HI(%a0)
clrl LOCAL_LO(%a0)
rts
 
|end
/binstr.S
0,0 → 1,140
|
| binstr.sa 3.3 12/19/90
|
|
| Description: Converts a 64-bit binary integer to bcd.
|
| Input: 64-bit binary integer in d2:d3, desired length (LEN) in
| d0, and a pointer to start in memory for bcd characters
| in d0. (This pointer must point to byte 4 of the first
| lword of the packed decimal memory string.)
|
| Output: LEN bcd digits representing the 64-bit integer.
|
| Algorithm:
| The 64-bit binary is assumed to have a decimal point before
| bit 63. The fraction is multiplied by 10 using a mul by 2
| shift and a mul by 8 shift. The bits shifted out of the
| msb form a decimal digit. This process is iterated until
| LEN digits are formed.
|
| A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the
| digit formed will be assumed the least significant. This is
| to force the first byte formed to have a 0 in the upper 4 bits.
|
| A2. Beginning of the loop:
| Copy the fraction in d2:d3 to d4:d5.
|
| A3. Multiply the fraction in d2:d3 by 8 using bit-field
| extracts and shifts. The three msbs from d2 will go into
| d1.
|
| A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb
| will be collected by the carry.
|
| A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5
| into d2:d3. D1 will contain the bcd digit formed.
|
| A6. Test d7. If zero, the digit formed is the ms digit. If non-
| zero, it is the ls digit. Put the digit in its place in the
| upper word of d0. If it is the ls digit, write the word
| from d0 to memory.
|
| A7. Decrement d6 (LEN counter) and repeat the loop until zero.
|
| Implementation Notes:
|
| The registers are used as follows:
|
| d0: LEN counter
| d1: temp used to form the digit
| d2: upper 32-bits of fraction for mul by 8
| d3: lower 32-bits of fraction for mul by 8
| d4: upper 32-bits of fraction for mul by 2
| d5: lower 32-bits of fraction for mul by 2
| d6: temp for bit-field extracts
| d7: byte digit formation word;digit count {0,1}
| a0: pointer into memory for packed bcd string formation
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|BINSTR idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
.global binstr
binstr:
moveml %d0-%d7,-(%a7)
|
| A1: Init d7
|
moveql #1,%d7 |init d7 for second digit
subql #1,%d0 |for dbf d0 would have LEN+1 passes
|
| A2. Copy d2:d3 to d4:d5. Start loop.
|
loop:
movel %d2,%d4 |copy the fraction before muls
movel %d3,%d5 |to d4:d5
|
| A3. Multiply d2:d3 by 8; extract msbs into d1.
|
bfextu %d2{#0:#3},%d1 |copy 3 msbs of d2 into d1
asll #3,%d2 |shift d2 left by 3 places
bfextu %d3{#0:#3},%d6 |copy 3 msbs of d3 into d6
asll #3,%d3 |shift d3 left by 3 places
orl %d6,%d2 |or in msbs from d3 into d2
|
| A4. Multiply d4:d5 by 2; add carry out to d1.
|
asll #1,%d5 |mul d5 by 2
roxll #1,%d4 |mul d4 by 2
swap %d6 |put 0 in d6 lower word
addxw %d6,%d1 |add in extend from mul by 2
|
| A5. Add mul by 8 to mul by 2. D1 contains the digit formed.
|
addl %d5,%d3 |add lower 32 bits
nop |ERRATA ; FIX #13 (Rev. 1.2 6/6/90)
addxl %d4,%d2 |add with extend upper 32 bits
nop |ERRATA ; FIX #13 (Rev. 1.2 6/6/90)
addxw %d6,%d1 |add in extend from add to d1
swap %d6 |with d6 = 0; put 0 in upper word
|
| A6. Test d7 and branch.
|
tstw %d7 |if zero, store digit & to loop
beqs first_d |if non-zero, form byte & write
sec_d:
swap %d7 |bring first digit to word d7b
aslw #4,%d7 |first digit in upper 4 bits d7b
addw %d1,%d7 |add in ls digit to d7b
moveb %d7,(%a0)+ |store d7b byte in memory
swap %d7 |put LEN counter in word d7a
clrw %d7 |set d7a to signal no digits done
dbf %d0,loop |do loop some more!
bras end_bstr |finished, so exit
first_d:
swap %d7 |put digit word in d7b
movew %d1,%d7 |put new digit in d7b
swap %d7 |put LEN counter in word d7a
addqw #1,%d7 |set d7a to signal first digit done
dbf %d0,loop |do loop some more!
swap %d7 |put last digit in string
lslw #4,%d7 |move it to upper 4 bits
moveb %d7,(%a0)+ |store it in memory string
|
| Clean up and return with result in fp0.
|
end_bstr:
moveml (%a7)+,%d0-%d7
rts
|end
/tbldo.S
0,0 → 1,554
|
| tbldo.sa 3.1 12/10/90
|
| Modified:
| 8/16/90 chinds The table was constructed to use only one level
| of indirection in do_func for monadic
| functions. Dyadic functions require two
| levels, and the tables are still contained
| in do_func. The table is arranged for
| index with a 10-bit index, with the first
| 7 bits the opcode, and the remaining 3
| the stag. For dyadic functions, all
| valid addresses are to the generic entry
| point.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|TBLDO idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
|xref ld_pinf,ld_pone,ld_ppi2
|xref t_dz2,t_operr
|xref serror,sone,szero,sinf,snzrinx
|xref sopr_inf,spi_2,src_nan,szr_inf
 
|xref smovcr
|xref pmod,prem,pscale
|xref satanh,satanhd
|xref sacos,sacosd,sasin,sasind,satan,satand
|xref setox,setoxd,setoxm1,setoxm1d,setoxm1i
|xref sgetexp,sgetexpd,sgetman,sgetmand
|xref sint,sintd,sintrz
|xref ssincos,ssincosd,ssincosi,ssincosnan,ssincosz
|xref scos,scosd,ssin,ssind,stan,stand
|xref scosh,scoshd,ssinh,ssinhd,stanh,stanhd
|xref sslog10,sslog2,sslogn,sslognp1
|xref sslog10d,sslog2d,sslognd,slognp1d
|xref stentox,stentoxd,stwotox,stwotoxd
 
| instruction ;opcode-stag Notes
.global tblpre
tblpre:
.long smovcr |$00-0 fmovecr all
.long smovcr |$00-1 fmovecr all
.long smovcr |$00-2 fmovecr all
.long smovcr |$00-3 fmovecr all
.long smovcr |$00-4 fmovecr all
.long smovcr |$00-5 fmovecr all
.long smovcr |$00-6 fmovecr all
.long smovcr |$00-7 fmovecr all
 
.long sint |$01-0 fint norm
.long szero |$01-1 fint zero
.long sinf |$01-2 fint inf
.long src_nan |$01-3 fint nan
.long sintd |$01-4 fint denorm inx
.long serror |$01-5 fint ERROR
.long serror |$01-6 fint ERROR
.long serror |$01-7 fint ERROR
 
.long ssinh |$02-0 fsinh norm
.long szero |$02-1 fsinh zero
.long sinf |$02-2 fsinh inf
.long src_nan |$02-3 fsinh nan
.long ssinhd |$02-4 fsinh denorm
.long serror |$02-5 fsinh ERROR
.long serror |$02-6 fsinh ERROR
.long serror |$02-7 fsinh ERROR
 
.long sintrz |$03-0 fintrz norm
.long szero |$03-1 fintrz zero
.long sinf |$03-2 fintrz inf
.long src_nan |$03-3 fintrz nan
.long snzrinx |$03-4 fintrz denorm inx
.long serror |$03-5 fintrz ERROR
.long serror |$03-6 fintrz ERROR
.long serror |$03-7 fintrz ERROR
 
.long serror |$04-0 ERROR - illegal extension
.long serror |$04-1 ERROR - illegal extension
.long serror |$04-2 ERROR - illegal extension
.long serror |$04-3 ERROR - illegal extension
.long serror |$04-4 ERROR - illegal extension
.long serror |$04-5 ERROR - illegal extension
.long serror |$04-6 ERROR - illegal extension
.long serror |$04-7 ERROR - illegal extension
 
.long serror |$05-0 ERROR - illegal extension
.long serror |$05-1 ERROR - illegal extension
.long serror |$05-2 ERROR - illegal extension
.long serror |$05-3 ERROR - illegal extension
.long serror |$05-4 ERROR - illegal extension
.long serror |$05-5 ERROR - illegal extension
.long serror |$05-6 ERROR - illegal extension
.long serror |$05-7 ERROR - illegal extension
 
.long sslognp1 |$06-0 flognp1 norm
.long szero |$06-1 flognp1 zero
.long sopr_inf |$06-2 flognp1 inf
.long src_nan |$06-3 flognp1 nan
.long slognp1d |$06-4 flognp1 denorm
.long serror |$06-5 flognp1 ERROR
.long serror |$06-6 flognp1 ERROR
.long serror |$06-7 flognp1 ERROR
 
.long serror |$07-0 ERROR - illegal extension
.long serror |$07-1 ERROR - illegal extension
.long serror |$07-2 ERROR - illegal extension
.long serror |$07-3 ERROR - illegal extension
.long serror |$07-4 ERROR - illegal extension
.long serror |$07-5 ERROR - illegal extension
.long serror |$07-6 ERROR - illegal extension
.long serror |$07-7 ERROR - illegal extension
 
.long setoxm1 |$08-0 fetoxm1 norm
.long szero |$08-1 fetoxm1 zero
.long setoxm1i |$08-2 fetoxm1 inf
.long src_nan |$08-3 fetoxm1 nan
.long setoxm1d |$08-4 fetoxm1 denorm
.long serror |$08-5 fetoxm1 ERROR
.long serror |$08-6 fetoxm1 ERROR
.long serror |$08-7 fetoxm1 ERROR
 
.long stanh |$09-0 ftanh norm
.long szero |$09-1 ftanh zero
.long sone |$09-2 ftanh inf
.long src_nan |$09-3 ftanh nan
.long stanhd |$09-4 ftanh denorm
.long serror |$09-5 ftanh ERROR
.long serror |$09-6 ftanh ERROR
.long serror |$09-7 ftanh ERROR
 
.long satan |$0a-0 fatan norm
.long szero |$0a-1 fatan zero
.long spi_2 |$0a-2 fatan inf
.long src_nan |$0a-3 fatan nan
.long satand |$0a-4 fatan denorm
.long serror |$0a-5 fatan ERROR
.long serror |$0a-6 fatan ERROR
.long serror |$0a-7 fatan ERROR
 
.long serror |$0b-0 ERROR - illegal extension
.long serror |$0b-1 ERROR - illegal extension
.long serror |$0b-2 ERROR - illegal extension
.long serror |$0b-3 ERROR - illegal extension
.long serror |$0b-4 ERROR - illegal extension
.long serror |$0b-5 ERROR - illegal extension
.long serror |$0b-6 ERROR - illegal extension
.long serror |$0b-7 ERROR - illegal extension
 
.long sasin |$0c-0 fasin norm
.long szero |$0c-1 fasin zero
.long t_operr |$0c-2 fasin inf
.long src_nan |$0c-3 fasin nan
.long sasind |$0c-4 fasin denorm
.long serror |$0c-5 fasin ERROR
.long serror |$0c-6 fasin ERROR
.long serror |$0c-7 fasin ERROR
 
.long satanh |$0d-0 fatanh norm
.long szero |$0d-1 fatanh zero
.long t_operr |$0d-2 fatanh inf
.long src_nan |$0d-3 fatanh nan
.long satanhd |$0d-4 fatanh denorm
.long serror |$0d-5 fatanh ERROR
.long serror |$0d-6 fatanh ERROR
.long serror |$0d-7 fatanh ERROR
 
.long ssin |$0e-0 fsin norm
.long szero |$0e-1 fsin zero
.long t_operr |$0e-2 fsin inf
.long src_nan |$0e-3 fsin nan
.long ssind |$0e-4 fsin denorm
.long serror |$0e-5 fsin ERROR
.long serror |$0e-6 fsin ERROR
.long serror |$0e-7 fsin ERROR
 
.long stan |$0f-0 ftan norm
.long szero |$0f-1 ftan zero
.long t_operr |$0f-2 ftan inf
.long src_nan |$0f-3 ftan nan
.long stand |$0f-4 ftan denorm
.long serror |$0f-5 ftan ERROR
.long serror |$0f-6 ftan ERROR
.long serror |$0f-7 ftan ERROR
 
.long setox |$10-0 fetox norm
.long ld_pone |$10-1 fetox zero
.long szr_inf |$10-2 fetox inf
.long src_nan |$10-3 fetox nan
.long setoxd |$10-4 fetox denorm
.long serror |$10-5 fetox ERROR
.long serror |$10-6 fetox ERROR
.long serror |$10-7 fetox ERROR
 
.long stwotox |$11-0 ftwotox norm
.long ld_pone |$11-1 ftwotox zero
.long szr_inf |$11-2 ftwotox inf
.long src_nan |$11-3 ftwotox nan
.long stwotoxd |$11-4 ftwotox denorm
.long serror |$11-5 ftwotox ERROR
.long serror |$11-6 ftwotox ERROR
.long serror |$11-7 ftwotox ERROR
 
.long stentox |$12-0 ftentox norm
.long ld_pone |$12-1 ftentox zero
.long szr_inf |$12-2 ftentox inf
.long src_nan |$12-3 ftentox nan
.long stentoxd |$12-4 ftentox denorm
.long serror |$12-5 ftentox ERROR
.long serror |$12-6 ftentox ERROR
.long serror |$12-7 ftentox ERROR
 
.long serror |$13-0 ERROR - illegal extension
.long serror |$13-1 ERROR - illegal extension
.long serror |$13-2 ERROR - illegal extension
.long serror |$13-3 ERROR - illegal extension
.long serror |$13-4 ERROR - illegal extension
.long serror |$13-5 ERROR - illegal extension
.long serror |$13-6 ERROR - illegal extension
.long serror |$13-7 ERROR - illegal extension
 
.long sslogn |$14-0 flogn norm
.long t_dz2 |$14-1 flogn zero
.long sopr_inf |$14-2 flogn inf
.long src_nan |$14-3 flogn nan
.long sslognd |$14-4 flogn denorm
.long serror |$14-5 flogn ERROR
.long serror |$14-6 flogn ERROR
.long serror |$14-7 flogn ERROR
 
.long sslog10 |$15-0 flog10 norm
.long t_dz2 |$15-1 flog10 zero
.long sopr_inf |$15-2 flog10 inf
.long src_nan |$15-3 flog10 nan
.long sslog10d |$15-4 flog10 denorm
.long serror |$15-5 flog10 ERROR
.long serror |$15-6 flog10 ERROR
.long serror |$15-7 flog10 ERROR
 
.long sslog2 |$16-0 flog2 norm
.long t_dz2 |$16-1 flog2 zero
.long sopr_inf |$16-2 flog2 inf
.long src_nan |$16-3 flog2 nan
.long sslog2d |$16-4 flog2 denorm
.long serror |$16-5 flog2 ERROR
.long serror |$16-6 flog2 ERROR
.long serror |$16-7 flog2 ERROR
 
.long serror |$17-0 ERROR - illegal extension
.long serror |$17-1 ERROR - illegal extension
.long serror |$17-2 ERROR - illegal extension
.long serror |$17-3 ERROR - illegal extension
.long serror |$17-4 ERROR - illegal extension
.long serror |$17-5 ERROR - illegal extension
.long serror |$17-6 ERROR - illegal extension
.long serror |$17-7 ERROR - illegal extension
 
.long serror |$18-0 ERROR - illegal extension
.long serror |$18-1 ERROR - illegal extension
.long serror |$18-2 ERROR - illegal extension
.long serror |$18-3 ERROR - illegal extension
.long serror |$18-4 ERROR - illegal extension
.long serror |$18-5 ERROR - illegal extension
.long serror |$18-6 ERROR - illegal extension
.long serror |$18-7 ERROR - illegal extension
 
.long scosh |$19-0 fcosh norm
.long ld_pone |$19-1 fcosh zero
.long ld_pinf |$19-2 fcosh inf
.long src_nan |$19-3 fcosh nan
.long scoshd |$19-4 fcosh denorm
.long serror |$19-5 fcosh ERROR
.long serror |$19-6 fcosh ERROR
.long serror |$19-7 fcosh ERROR
 
.long serror |$1a-0 ERROR - illegal extension
.long serror |$1a-1 ERROR - illegal extension
.long serror |$1a-2 ERROR - illegal extension
.long serror |$1a-3 ERROR - illegal extension
.long serror |$1a-4 ERROR - illegal extension
.long serror |$1a-5 ERROR - illegal extension
.long serror |$1a-6 ERROR - illegal extension
.long serror |$1a-7 ERROR - illegal extension
 
.long serror |$1b-0 ERROR - illegal extension
.long serror |$1b-1 ERROR - illegal extension
.long serror |$1b-2 ERROR - illegal extension
.long serror |$1b-3 ERROR - illegal extension
.long serror |$1b-4 ERROR - illegal extension
.long serror |$1b-5 ERROR - illegal extension
.long serror |$1b-6 ERROR - illegal extension
.long serror |$1b-7 ERROR - illegal extension
 
.long sacos |$1c-0 facos norm
.long ld_ppi2 |$1c-1 facos zero
.long t_operr |$1c-2 facos inf
.long src_nan |$1c-3 facos nan
.long sacosd |$1c-4 facos denorm
.long serror |$1c-5 facos ERROR
.long serror |$1c-6 facos ERROR
.long serror |$1c-7 facos ERROR
 
.long scos |$1d-0 fcos norm
.long ld_pone |$1d-1 fcos zero
.long t_operr |$1d-2 fcos inf
.long src_nan |$1d-3 fcos nan
.long scosd |$1d-4 fcos denorm
.long serror |$1d-5 fcos ERROR
.long serror |$1d-6 fcos ERROR
.long serror |$1d-7 fcos ERROR
 
.long sgetexp |$1e-0 fgetexp norm
.long szero |$1e-1 fgetexp zero
.long t_operr |$1e-2 fgetexp inf
.long src_nan |$1e-3 fgetexp nan
.long sgetexpd |$1e-4 fgetexp denorm
.long serror |$1e-5 fgetexp ERROR
.long serror |$1e-6 fgetexp ERROR
.long serror |$1e-7 fgetexp ERROR
 
.long sgetman |$1f-0 fgetman norm
.long szero |$1f-1 fgetman zero
.long t_operr |$1f-2 fgetman inf
.long src_nan |$1f-3 fgetman nan
.long sgetmand |$1f-4 fgetman denorm
.long serror |$1f-5 fgetman ERROR
.long serror |$1f-6 fgetman ERROR
.long serror |$1f-7 fgetman ERROR
 
.long serror |$20-0 ERROR - illegal extension
.long serror |$20-1 ERROR - illegal extension
.long serror |$20-2 ERROR - illegal extension
.long serror |$20-3 ERROR - illegal extension
.long serror |$20-4 ERROR - illegal extension
.long serror |$20-5 ERROR - illegal extension
.long serror |$20-6 ERROR - illegal extension
.long serror |$20-7 ERROR - illegal extension
 
.long pmod |$21-0 fmod all
.long pmod |$21-1 fmod all
.long pmod |$21-2 fmod all
.long pmod |$21-3 fmod all
.long pmod |$21-4 fmod all
.long serror |$21-5 fmod ERROR
.long serror |$21-6 fmod ERROR
.long serror |$21-7 fmod ERROR
 
.long serror |$22-0 ERROR - illegal extension
.long serror |$22-1 ERROR - illegal extension
.long serror |$22-2 ERROR - illegal extension
.long serror |$22-3 ERROR - illegal extension
.long serror |$22-4 ERROR - illegal extension
.long serror |$22-5 ERROR - illegal extension
.long serror |$22-6 ERROR - illegal extension
.long serror |$22-7 ERROR - illegal extension
 
.long serror |$23-0 ERROR - illegal extension
.long serror |$23-1 ERROR - illegal extension
.long serror |$23-2 ERROR - illegal extension
.long serror |$23-3 ERROR - illegal extension
.long serror |$23-4 ERROR - illegal extension
.long serror |$23-5 ERROR - illegal extension
.long serror |$23-6 ERROR - illegal extension
.long serror |$23-7 ERROR - illegal extension
 
.long serror |$24-0 ERROR - illegal extension
.long serror |$24-1 ERROR - illegal extension
.long serror |$24-2 ERROR - illegal extension
.long serror |$24-3 ERROR - illegal extension
.long serror |$24-4 ERROR - illegal extension
.long serror |$24-5 ERROR - illegal extension
.long serror |$24-6 ERROR - illegal extension
.long serror |$24-7 ERROR - illegal extension
 
.long prem |$25-0 frem all
.long prem |$25-1 frem all
.long prem |$25-2 frem all
.long prem |$25-3 frem all
.long prem |$25-4 frem all
.long serror |$25-5 frem ERROR
.long serror |$25-6 frem ERROR
.long serror |$25-7 frem ERROR
 
.long pscale |$26-0 fscale all
.long pscale |$26-1 fscale all
.long pscale |$26-2 fscale all
.long pscale |$26-3 fscale all
.long pscale |$26-4 fscale all
.long serror |$26-5 fscale ERROR
.long serror |$26-6 fscale ERROR
.long serror |$26-7 fscale ERROR
 
.long serror |$27-0 ERROR - illegal extension
.long serror |$27-1 ERROR - illegal extension
.long serror |$27-2 ERROR - illegal extension
.long serror |$27-3 ERROR - illegal extension
.long serror |$27-4 ERROR - illegal extension
.long serror |$27-5 ERROR - illegal extension
.long serror |$27-6 ERROR - illegal extension
.long serror |$27-7 ERROR - illegal extension
 
.long serror |$28-0 ERROR - illegal extension
.long serror |$28-1 ERROR - illegal extension
.long serror |$28-2 ERROR - illegal extension
.long serror |$28-3 ERROR - illegal extension
.long serror |$28-4 ERROR - illegal extension
.long serror |$28-5 ERROR - illegal extension
.long serror |$28-6 ERROR - illegal extension
.long serror |$28-7 ERROR - illegal extension
 
.long serror |$29-0 ERROR - illegal extension
.long serror |$29-1 ERROR - illegal extension
.long serror |$29-2 ERROR - illegal extension
.long serror |$29-3 ERROR - illegal extension
.long serror |$29-4 ERROR - illegal extension
.long serror |$29-5 ERROR - illegal extension
.long serror |$29-6 ERROR - illegal extension
.long serror |$29-7 ERROR - illegal extension
 
.long serror |$2a-0 ERROR - illegal extension
.long serror |$2a-1 ERROR - illegal extension
.long serror |$2a-2 ERROR - illegal extension
.long serror |$2a-3 ERROR - illegal extension
.long serror |$2a-4 ERROR - illegal extension
.long serror |$2a-5 ERROR - illegal extension
.long serror |$2a-6 ERROR - illegal extension
.long serror |$2a-7 ERROR - illegal extension
 
.long serror |$2b-0 ERROR - illegal extension
.long serror |$2b-1 ERROR - illegal extension
.long serror |$2b-2 ERROR - illegal extension
.long serror |$2b-3 ERROR - illegal extension
.long serror |$2b-4 ERROR - illegal extension
.long serror |$2b-5 ERROR - illegal extension
.long serror |$2b-6 ERROR - illegal extension
.long serror |$2b-7 ERROR - illegal extension
 
.long serror |$2c-0 ERROR - illegal extension
.long serror |$2c-1 ERROR - illegal extension
.long serror |$2c-2 ERROR - illegal extension
.long serror |$2c-3 ERROR - illegal extension
.long serror |$2c-4 ERROR - illegal extension
.long serror |$2c-5 ERROR - illegal extension
.long serror |$2c-6 ERROR - illegal extension
.long serror |$2c-7 ERROR - illegal extension
 
.long serror |$2d-0 ERROR - illegal extension
.long serror |$2d-1 ERROR - illegal extension
.long serror |$2d-2 ERROR - illegal extension
.long serror |$2d-3 ERROR - illegal extension
.long serror |$2d-4 ERROR - illegal extension
.long serror |$2d-5 ERROR - illegal extension
.long serror |$2d-6 ERROR - illegal extension
.long serror |$2d-7 ERROR - illegal extension
 
.long serror |$2e-0 ERROR - illegal extension
.long serror |$2e-1 ERROR - illegal extension
.long serror |$2e-2 ERROR - illegal extension
.long serror |$2e-3 ERROR - illegal extension
.long serror |$2e-4 ERROR - illegal extension
.long serror |$2e-5 ERROR - illegal extension
.long serror |$2e-6 ERROR - illegal extension
.long serror |$2e-7 ERROR - illegal extension
 
.long serror |$2f-0 ERROR - illegal extension
.long serror |$2f-1 ERROR - illegal extension
.long serror |$2f-2 ERROR - illegal extension
.long serror |$2f-3 ERROR - illegal extension
.long serror |$2f-4 ERROR - illegal extension
.long serror |$2f-5 ERROR - illegal extension
.long serror |$2f-6 ERROR - illegal extension
.long serror |$2f-7 ERROR - illegal extension
 
.long ssincos |$30-0 fsincos norm
.long ssincosz |$30-1 fsincos zero
.long ssincosi |$30-2 fsincos inf
.long ssincosnan |$30-3 fsincos nan
.long ssincosd |$30-4 fsincos denorm
.long serror |$30-5 fsincos ERROR
.long serror |$30-6 fsincos ERROR
.long serror |$30-7 fsincos ERROR
 
.long ssincos |$31-0 fsincos norm
.long ssincosz |$31-1 fsincos zero
.long ssincosi |$31-2 fsincos inf
.long ssincosnan |$31-3 fsincos nan
.long ssincosd |$31-4 fsincos denorm
.long serror |$31-5 fsincos ERROR
.long serror |$31-6 fsincos ERROR
.long serror |$31-7 fsincos ERROR
 
.long ssincos |$32-0 fsincos norm
.long ssincosz |$32-1 fsincos zero
.long ssincosi |$32-2 fsincos inf
.long ssincosnan |$32-3 fsincos nan
.long ssincosd |$32-4 fsincos denorm
.long serror |$32-5 fsincos ERROR
.long serror |$32-6 fsincos ERROR
.long serror |$32-7 fsincos ERROR
 
.long ssincos |$33-0 fsincos norm
.long ssincosz |$33-1 fsincos zero
.long ssincosi |$33-2 fsincos inf
.long ssincosnan |$33-3 fsincos nan
.long ssincosd |$33-4 fsincos denorm
.long serror |$33-5 fsincos ERROR
.long serror |$33-6 fsincos ERROR
.long serror |$33-7 fsincos ERROR
 
.long ssincos |$34-0 fsincos norm
.long ssincosz |$34-1 fsincos zero
.long ssincosi |$34-2 fsincos inf
.long ssincosnan |$34-3 fsincos nan
.long ssincosd |$34-4 fsincos denorm
.long serror |$34-5 fsincos ERROR
.long serror |$34-6 fsincos ERROR
.long serror |$34-7 fsincos ERROR
 
.long ssincos |$35-0 fsincos norm
.long ssincosz |$35-1 fsincos zero
.long ssincosi |$35-2 fsincos inf
.long ssincosnan |$35-3 fsincos nan
.long ssincosd |$35-4 fsincos denorm
.long serror |$35-5 fsincos ERROR
.long serror |$35-6 fsincos ERROR
.long serror |$35-7 fsincos ERROR
 
.long ssincos |$36-0 fsincos norm
.long ssincosz |$36-1 fsincos zero
.long ssincosi |$36-2 fsincos inf
.long ssincosnan |$36-3 fsincos nan
.long ssincosd |$36-4 fsincos denorm
.long serror |$36-5 fsincos ERROR
.long serror |$36-6 fsincos ERROR
.long serror |$36-7 fsincos ERROR
 
.long ssincos |$37-0 fsincos norm
.long ssincosz |$37-1 fsincos zero
.long ssincosi |$37-2 fsincos inf
.long ssincosnan |$37-3 fsincos nan
.long ssincosd |$37-4 fsincos denorm
.long serror |$37-5 fsincos ERROR
.long serror |$37-6 fsincos ERROR
.long serror |$37-7 fsincos ERROR
 
|end
/srem_mod.S
0,0 → 1,422
|
| srem_mod.sa 3.1 12/10/90
|
| The entry point sMOD computes the floating point MOD of the
| input values X and Y. The entry point sREM computes the floating
| point (IEEE) REM of the input values X and Y.
|
| INPUT
| -----
| Double-extended value Y is pointed to by address in register
| A0. Double-extended value X is located in -12(A0). The values
| of X and Y are both nonzero and finite; although either or both
| of them can be denormalized. The special cases of zeros, NaNs,
| and infinities are handled elsewhere.
|
| OUTPUT
| ------
| FREM(X,Y) or FMOD(X,Y), depending on entry point.
|
| ALGORITHM
| ---------
|
| Step 1. Save and strip signs of X and Y: signX := sign(X),
| signY := sign(Y), X := |X|, Y := |Y|,
| signQ := signX EOR signY. Record whether MOD or REM
| is requested.
|
| Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0.
| If (L < 0) then
| R := X, go to Step 4.
| else
| R := 2^(-L)X, j := L.
| endif
|
| Step 3. Perform MOD(X,Y)
| 3.1 If R = Y, go to Step 9.
| 3.2 If R > Y, then { R := R - Y, Q := Q + 1}
| 3.3 If j = 0, go to Step 4.
| 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to
| Step 3.1.
|
| Step 4. At this point, R = X - QY = MOD(X,Y). Set
| Last_Subtract := false (used in Step 7 below). If
| MOD is requested, go to Step 6.
|
| Step 5. R = MOD(X,Y), but REM(X,Y) is requested.
| 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to
| Step 6.
| 5.2 If R > Y/2, then { set Last_Subtract := true,
| Q := Q + 1, Y := signY*Y }. Go to Step 6.
| 5.3 This is the tricky case of R = Y/2. If Q is odd,
| then { Q := Q + 1, signX := -signX }.
|
| Step 6. R := signX*R.
|
| Step 7. If Last_Subtract = true, R := R - Y.
|
| Step 8. Return signQ, last 7 bits of Q, and R as required.
|
| Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus,
| X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),
| R := 0. Return signQ, last 7 bits of Q, and R.
|
|
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
SREM_MOD: |idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
.set Mod_Flag,L_SCR3
.set SignY,FP_SCR3+4
.set SignX,FP_SCR3+8
.set SignQ,FP_SCR3+12
.set Sc_Flag,FP_SCR4
 
.set Y,FP_SCR1
.set Y_Hi,Y+4
.set Y_Lo,Y+8
 
.set R,FP_SCR2
.set R_Hi,R+4
.set R_Lo,R+8
 
 
Scale: .long 0x00010000,0x80000000,0x00000000,0x00000000
 
|xref t_avoid_unsupp
 
.global smod
smod:
 
movel #0,Mod_Flag(%a6)
bras Mod_Rem
 
.global srem
srem:
 
movel #1,Mod_Flag(%a6)
 
Mod_Rem:
|..Save sign of X and Y
moveml %d2-%d7,-(%a7) | ...save data registers
movew (%a0),%d3
movew %d3,SignY(%a6)
andil #0x00007FFF,%d3 | ...Y := |Y|
 
|
movel 4(%a0),%d4
movel 8(%a0),%d5 | ...(D3,D4,D5) is |Y|
 
tstl %d3
bnes Y_Normal
 
movel #0x00003FFE,%d3 | ...$3FFD + 1
tstl %d4
bnes HiY_not0
 
HiY_0:
movel %d5,%d4
clrl %d5
subil #32,%d3
clrl %d6
bfffo %d4{#0:#32},%d6
lsll %d6,%d4
subl %d6,%d3 | ...(D3,D4,D5) is normalized
| ...with bias $7FFD
bras Chk_X
 
HiY_not0:
clrl %d6
bfffo %d4{#0:#32},%d6
subl %d6,%d3
lsll %d6,%d4
movel %d5,%d7 | ...a copy of D5
lsll %d6,%d5
negl %d6
addil #32,%d6
lsrl %d6,%d7
orl %d7,%d4 | ...(D3,D4,D5) normalized
| ...with bias $7FFD
bras Chk_X
 
Y_Normal:
addil #0x00003FFE,%d3 | ...(D3,D4,D5) normalized
| ...with bias $7FFD
 
Chk_X:
movew -12(%a0),%d0
movew %d0,SignX(%a6)
movew SignY(%a6),%d1
eorl %d0,%d1
andil #0x00008000,%d1
movew %d1,SignQ(%a6) | ...sign(Q) obtained
andil #0x00007FFF,%d0
movel -8(%a0),%d1
movel -4(%a0),%d2 | ...(D0,D1,D2) is |X|
tstl %d0
bnes X_Normal
movel #0x00003FFE,%d0
tstl %d1
bnes HiX_not0
 
HiX_0:
movel %d2,%d1
clrl %d2
subil #32,%d0
clrl %d6
bfffo %d1{#0:#32},%d6
lsll %d6,%d1
subl %d6,%d0 | ...(D0,D1,D2) is normalized
| ...with bias $7FFD
bras Init
 
HiX_not0:
clrl %d6
bfffo %d1{#0:#32},%d6
subl %d6,%d0
lsll %d6,%d1
movel %d2,%d7 | ...a copy of D2
lsll %d6,%d2
negl %d6
addil #32,%d6
lsrl %d6,%d7
orl %d7,%d1 | ...(D0,D1,D2) normalized
| ...with bias $7FFD
bras Init
 
X_Normal:
addil #0x00003FFE,%d0 | ...(D0,D1,D2) normalized
| ...with bias $7FFD
 
Init:
|
movel %d3,L_SCR1(%a6) | ...save biased expo(Y)
movel %d0,L_SCR2(%a6) |save d0
subl %d3,%d0 | ...L := expo(X)-expo(Y)
| Move.L D0,L ...D0 is j
clrl %d6 | ...D6 := carry <- 0
clrl %d3 | ...D3 is Q
moveal #0,%a1 | ...A1 is k; j+k=L, Q=0
 
|..(Carry,D1,D2) is R
tstl %d0
bges Mod_Loop
 
|..expo(X) < expo(Y). Thus X = mod(X,Y)
|
movel L_SCR2(%a6),%d0 |restore d0
bra Get_Mod
 
|..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L
 
 
Mod_Loop:
tstl %d6 | ...test carry bit
bgts R_GT_Y
 
|..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
cmpl %d4,%d1 | ...compare hi(R) and hi(Y)
bnes R_NE_Y
cmpl %d5,%d2 | ...compare lo(R) and lo(Y)
bnes R_NE_Y
 
|..At this point, R = Y
bra Rem_is_0
 
R_NE_Y:
|..use the borrow of the previous compare
bcss R_LT_Y | ...borrow is set iff R < Y
 
R_GT_Y:
|..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
|..and Y < (D1,D2) < 2Y. Either way, perform R - Y
subl %d5,%d2 | ...lo(R) - lo(Y)
subxl %d4,%d1 | ...hi(R) - hi(Y)
clrl %d6 | ...clear carry
addql #1,%d3 | ...Q := Q + 1
 
R_LT_Y:
|..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
tstl %d0 | ...see if j = 0.
beqs PostLoop
 
addl %d3,%d3 | ...Q := 2Q
addl %d2,%d2 | ...lo(R) = 2lo(R)
roxll #1,%d1 | ...hi(R) = 2hi(R) + carry
scs %d6 | ...set Carry if 2(R) overflows
addql #1,%a1 | ...k := k+1
subql #1,%d0 | ...j := j - 1
|..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
 
bras Mod_Loop
 
PostLoop:
|..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
 
|..normalize R.
movel L_SCR1(%a6),%d0 | ...new biased expo of R
tstl %d1
bnes HiR_not0
 
HiR_0:
movel %d2,%d1
clrl %d2
subil #32,%d0
clrl %d6
bfffo %d1{#0:#32},%d6
lsll %d6,%d1
subl %d6,%d0 | ...(D0,D1,D2) is normalized
| ...with bias $7FFD
bras Get_Mod
 
HiR_not0:
clrl %d6
bfffo %d1{#0:#32},%d6
bmis Get_Mod | ...already normalized
subl %d6,%d0
lsll %d6,%d1
movel %d2,%d7 | ...a copy of D2
lsll %d6,%d2
negl %d6
addil #32,%d6
lsrl %d6,%d7
orl %d7,%d1 | ...(D0,D1,D2) normalized
 
|
Get_Mod:
cmpil #0x000041FE,%d0
bges No_Scale
Do_Scale:
movew %d0,R(%a6)
clrw R+2(%a6)
movel %d1,R_Hi(%a6)
movel %d2,R_Lo(%a6)
movel L_SCR1(%a6),%d6
movew %d6,Y(%a6)
clrw Y+2(%a6)
movel %d4,Y_Hi(%a6)
movel %d5,Y_Lo(%a6)
fmovex R(%a6),%fp0 | ...no exception
movel #1,Sc_Flag(%a6)
bras ModOrRem
No_Scale:
movel %d1,R_Hi(%a6)
movel %d2,R_Lo(%a6)
subil #0x3FFE,%d0
movew %d0,R(%a6)
clrw R+2(%a6)
movel L_SCR1(%a6),%d6
subil #0x3FFE,%d6
movel %d6,L_SCR1(%a6)
fmovex R(%a6),%fp0
movew %d6,Y(%a6)
movel %d4,Y_Hi(%a6)
movel %d5,Y_Lo(%a6)
movel #0,Sc_Flag(%a6)
 
|
 
 
ModOrRem:
movel Mod_Flag(%a6),%d6
beqs Fix_Sign
 
movel L_SCR1(%a6),%d6 | ...new biased expo(Y)
subql #1,%d6 | ...biased expo(Y/2)
cmpl %d6,%d0
blts Fix_Sign
bgts Last_Sub
 
cmpl %d4,%d1
bnes Not_EQ
cmpl %d5,%d2
bnes Not_EQ
bra Tie_Case
 
Not_EQ:
bcss Fix_Sign
 
Last_Sub:
|
fsubx Y(%a6),%fp0 | ...no exceptions
addql #1,%d3 | ...Q := Q + 1
 
|
 
Fix_Sign:
|..Get sign of X
movew SignX(%a6),%d6
bges Get_Q
fnegx %fp0
 
|..Get Q
|
Get_Q:
clrl %d6
movew SignQ(%a6),%d6 | ...D6 is sign(Q)
movel #8,%d7
lsrl %d7,%d6
andil #0x0000007F,%d3 | ...7 bits of Q
orl %d6,%d3 | ...sign and bits of Q
swap %d3
fmovel %fpsr,%d6
andil #0xFF00FFFF,%d6
orl %d3,%d6
fmovel %d6,%fpsr | ...put Q in fpsr
 
|
Restore:
moveml (%a7)+,%d2-%d7
fmovel USER_FPCR(%a6),%fpcr
movel Sc_Flag(%a6),%d0
beqs Finish
fmulx Scale(%pc),%fp0 | ...may cause underflow
bra t_avoid_unsupp |check for denorm as a
| ;result of the scaling
 
Finish:
fmovex %fp0,%fp0 |capture exceptions & round
rts
 
Rem_is_0:
|..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
addql #1,%d3
cmpil #8,%d0 | ...D0 is j
bges Q_Big
 
lsll %d0,%d3
bras Set_R_0
 
Q_Big:
clrl %d3
 
Set_R_0:
fmoves #0x00000000,%fp0
movel #0,Sc_Flag(%a6)
bra Fix_Sign
 
Tie_Case:
|..Check parity of Q
movel %d3,%d6
andil #0x00000001,%d6
tstl %d6
beq Fix_Sign | ...Q is even
 
|..Q is odd, Q := Q + 1, signX := -signX
addql #1,%d3
movew SignX(%a6),%d6
eoril #0x00008000,%d6
movew %d6,SignX(%a6)
bra Fix_Sign
 
|end
/satan.S
0,0 → 1,478
|
| satan.sa 3.3 12/19/90
|
| The entry point satan computes the arctangent of an
| input value. satand does the same except the input value is a
| denormalized number.
|
| Input: Double-extended value in memory location pointed to by address
| register a0.
|
| Output: Arctan(X) returned in floating-point register Fp0.
|
| Accuracy and Monotonicity: The returned result is within 2 ulps in
| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
| result is subsequently rounded to double precision. The
| result is provably monotonic in double precision.
|
| Speed: The program satan takes approximately 160 cycles for input
| argument X such that 1/16 < |X| < 16. For the other arguments,
| the program will run no worse than 10% slower.
|
| Algorithm:
| Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.
|
| Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3.
| Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits
| of X with a bit-1 attached at the 6-th bit position. Define u
| to be u = (X-F) / (1 + X*F).
|
| Step 3. Approximate arctan(u) by a polynomial poly.
|
| Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values
| calculated beforehand. Exit.
|
| Step 5. If |X| >= 16, go to Step 7.
|
| Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.
|
| Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'.
| Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|satan idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
BOUNDS1: .long 0x3FFB8000,0x4002FFFF
 
ONE: .long 0x3F800000
 
.long 0x00000000
 
ATANA3: .long 0xBFF6687E,0x314987D8
ATANA2: .long 0x4002AC69,0x34A26DB3
 
ATANA1: .long 0xBFC2476F,0x4E1DA28E
ATANB6: .long 0x3FB34444,0x7F876989
 
ATANB5: .long 0xBFB744EE,0x7FAF45DB
ATANB4: .long 0x3FBC71C6,0x46940220
 
ATANB3: .long 0xBFC24924,0x921872F9
ATANB2: .long 0x3FC99999,0x99998FA9
 
ATANB1: .long 0xBFD55555,0x55555555
ATANC5: .long 0xBFB70BF3,0x98539E6A
 
ATANC4: .long 0x3FBC7187,0x962D1D7D
ATANC3: .long 0xBFC24924,0x827107B8
 
ATANC2: .long 0x3FC99999,0x9996263E
ATANC1: .long 0xBFD55555,0x55555536
 
PPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
NPIBY2: .long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
PTINY: .long 0x00010000,0x80000000,0x00000000,0x00000000
NTINY: .long 0x80010000,0x80000000,0x00000000,0x00000000
 
ATANTBL:
.long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
.long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
.long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
.long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
.long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
.long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000
.long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
.long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
.long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
.long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
.long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
.long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
.long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
.long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
.long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
.long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
.long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
.long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
.long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
.long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
.long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
.long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
.long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
.long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
.long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
.long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
.long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
.long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
.long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
.long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
.long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
.long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
.long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
.long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
.long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
.long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
.long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
.long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
.long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
.long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
.long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
.long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
.long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
.long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
.long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
.long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
.long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
.long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
.long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
.long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
.long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
.long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
.long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
.long 0x3FFE0000,0x97731420,0x365E538C,0x00000000
.long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
.long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
.long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
.long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
.long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
.long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
.long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
.long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
.long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
.long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
.long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
.long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
.long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
.long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
.long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000
.long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
.long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
.long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
.long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
.long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
.long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
.long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
.long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
.long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
.long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
.long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
.long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
.long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
.long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
.long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
.long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
.long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
.long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
.long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000
.long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
.long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
.long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
.long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
.long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
.long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
.long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
.long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
.long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
.long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
.long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
.long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
.long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
.long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
.long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
.long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
.long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
.long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000
.long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
.long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
.long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
.long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
.long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
.long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
.long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
.long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
.long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
.long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
.long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
.long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
.long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
.long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
.long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
.long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
.long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
.long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
.long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
.long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
.long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
.long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
 
.set X,FP_SCR1
.set XDCARE,X+2
.set XFRAC,X+4
.set XFRACLO,X+8
 
.set ATANF,FP_SCR2
.set ATANFHI,ATANF+4
.set ATANFLO,ATANF+8
 
 
| xref t_frcinx
|xref t_extdnrm
 
.global satand
satand:
|--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
 
bra t_extdnrm
 
.global satan
satan:
|--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
 
fmovex (%a0),%fp0 | ...LOAD INPUT
 
movel (%a0),%d0
movew 4(%a0),%d0
fmovex %fp0,X(%a6)
andil #0x7FFFFFFF,%d0
 
cmpil #0x3FFB8000,%d0 | ...|X| >= 1/16?
bges ATANOK1
bra ATANSM
 
ATANOK1:
cmpil #0x4002FFFF,%d0 | ...|X| < 16 ?
bles ATANMAIN
bra ATANBIG
 
 
|--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
|--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
|--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
|--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
|--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
|--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
|--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
|--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
|--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
|--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
|--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
|--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
|--WILL INVOLVE A VERY LONG POLYNOMIAL.
 
|--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
|--WE CHOSE F TO BE +-2^K * 1.BBBB1
|--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
|--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
|--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
|-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
 
ATANMAIN:
 
movew #0x0000,XDCARE(%a6) | ...CLEAN UP X JUST IN CASE
andil #0xF8000000,XFRAC(%a6) | ...FIRST 5 BITS
oril #0x04000000,XFRAC(%a6) | ...SET 6-TH BIT TO 1
movel #0x00000000,XFRACLO(%a6) | ...LOCATION OF X IS NOW F
 
fmovex %fp0,%fp1 | ...FP1 IS X
fmulx X(%a6),%fp1 | ...FP1 IS X*F, NOTE THAT X*F > 0
fsubx X(%a6),%fp0 | ...FP0 IS X-F
fadds #0x3F800000,%fp1 | ...FP1 IS 1 + X*F
fdivx %fp1,%fp0 | ...FP0 IS U = (X-F)/(1+X*F)
 
|--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
|--CREATE ATAN(F) AND STORE IT IN ATANF, AND
|--SAVE REGISTERS FP2.
 
movel %d2,-(%a7) | ...SAVE d2 TEMPORARILY
movel %d0,%d2 | ...THE EXPO AND 16 BITS OF X
andil #0x00007800,%d0 | ...4 VARYING BITS OF F'S FRACTION
andil #0x7FFF0000,%d2 | ...EXPONENT OF F
subil #0x3FFB0000,%d2 | ...K+4
asrl #1,%d2
addl %d2,%d0 | ...THE 7 BITS IDENTIFYING F
asrl #7,%d0 | ...INDEX INTO TBL OF ATAN(|F|)
lea ATANTBL,%a1
addal %d0,%a1 | ...ADDRESS OF ATAN(|F|)
movel (%a1)+,ATANF(%a6)
movel (%a1)+,ATANFHI(%a6)
movel (%a1)+,ATANFLO(%a6) | ...ATANF IS NOW ATAN(|F|)
movel X(%a6),%d0 | ...LOAD SIGN AND EXPO. AGAIN
andil #0x80000000,%d0 | ...SIGN(F)
orl %d0,ATANF(%a6) | ...ATANF IS NOW SIGN(F)*ATAN(|F|)
movel (%a7)+,%d2 | ...RESTORE d2
 
|--THAT'S ALL I HAVE TO DO FOR NOW,
|--BUT ALAS, THE DIVIDE IS STILL CRANKING!
 
|--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
|--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
|--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
|--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
|--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
|--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
|--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
 
fmovex %fp0,%fp1
fmulx %fp1,%fp1
fmoved ATANA3,%fp2
faddx %fp1,%fp2 | ...A3+V
fmulx %fp1,%fp2 | ...V*(A3+V)
fmulx %fp0,%fp1 | ...U*V
faddd ATANA2,%fp2 | ...A2+V*(A3+V)
fmuld ATANA1,%fp1 | ...A1*U*V
fmulx %fp2,%fp1 | ...A1*U*V*(A2+V*(A3+V))
faddx %fp1,%fp0 | ...ATAN(U), FP1 RELEASED
fmovel %d1,%FPCR |restore users exceptions
faddx ATANF(%a6),%fp0 | ...ATAN(X)
bra t_frcinx
 
ATANBORS:
|--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
|--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
cmpil #0x3FFF8000,%d0
bgt ATANBIG | ...I.E. |X| >= 16
 
ATANSM:
|--|X| <= 1/16
|--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
|--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
|--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
|--WHERE Y = X*X, AND Z = Y*Y.
 
cmpil #0x3FD78000,%d0
blt ATANTINY
|--COMPUTE POLYNOMIAL
fmulx %fp0,%fp0 | ...FP0 IS Y = X*X
 
movew #0x0000,XDCARE(%a6)
 
fmovex %fp0,%fp1
fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y
 
fmoved ATANB6,%fp2
fmoved ATANB5,%fp3
 
fmulx %fp1,%fp2 | ...Z*B6
fmulx %fp1,%fp3 | ...Z*B5
 
faddd ATANB4,%fp2 | ...B4+Z*B6
faddd ATANB3,%fp3 | ...B3+Z*B5
 
fmulx %fp1,%fp2 | ...Z*(B4+Z*B6)
fmulx %fp3,%fp1 | ...Z*(B3+Z*B5)
 
faddd ATANB2,%fp2 | ...B2+Z*(B4+Z*B6)
faddd ATANB1,%fp1 | ...B1+Z*(B3+Z*B5)
 
fmulx %fp0,%fp2 | ...Y*(B2+Z*(B4+Z*B6))
fmulx X(%a6),%fp0 | ...X*Y
 
faddx %fp2,%fp1 | ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
 
fmulx %fp1,%fp0 | ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
 
fmovel %d1,%FPCR |restore users exceptions
faddx X(%a6),%fp0
 
bra t_frcinx
 
ATANTINY:
|--|X| < 2^(-40), ATAN(X) = X
movew #0x0000,XDCARE(%a6)
 
fmovel %d1,%FPCR |restore users exceptions
fmovex X(%a6),%fp0 |last inst - possible exception set
 
bra t_frcinx
 
ATANBIG:
|--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,
|--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
cmpil #0x40638000,%d0
bgt ATANHUGE
 
|--APPROXIMATE ATAN(-1/X) BY
|--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
|--THIS CAN BE RE-WRITTEN AS
|--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
 
fmoves #0xBF800000,%fp1 | ...LOAD -1
fdivx %fp0,%fp1 | ...FP1 IS -1/X
 
|--DIVIDE IS STILL CRANKING
 
fmovex %fp1,%fp0 | ...FP0 IS X'
fmulx %fp0,%fp0 | ...FP0 IS Y = X'*X'
fmovex %fp1,X(%a6) | ...X IS REALLY X'
 
fmovex %fp0,%fp1
fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y
 
fmoved ATANC5,%fp3
fmoved ATANC4,%fp2
 
fmulx %fp1,%fp3 | ...Z*C5
fmulx %fp1,%fp2 | ...Z*B4
 
faddd ATANC3,%fp3 | ...C3+Z*C5
faddd ATANC2,%fp2 | ...C2+Z*C4
 
fmulx %fp3,%fp1 | ...Z*(C3+Z*C5), FP3 RELEASED
fmulx %fp0,%fp2 | ...Y*(C2+Z*C4)
 
faddd ATANC1,%fp1 | ...C1+Z*(C3+Z*C5)
fmulx X(%a6),%fp0 | ...X'*Y
 
faddx %fp2,%fp1 | ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
 
fmulx %fp1,%fp0 | ...X'*Y*([B1+Z*(B3+Z*B5)]
| ... +[Y*(B2+Z*(B4+Z*B6))])
faddx X(%a6),%fp0
 
fmovel %d1,%FPCR |restore users exceptions
btstb #7,(%a0)
beqs pos_big
 
neg_big:
faddx NPIBY2,%fp0
bra t_frcinx
 
pos_big:
faddx PPIBY2,%fp0
bra t_frcinx
 
ATANHUGE:
|--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
btstb #7,(%a0)
beqs pos_huge
 
neg_huge:
fmovex NPIBY2,%fp0
fmovel %d1,%fpcr
fsubx NTINY,%fp0
bra t_frcinx
 
pos_huge:
fmovex PPIBY2,%fp0
fmovel %d1,%fpcr
fsubx PTINY,%fp0
bra t_frcinx
|end
/sacos.S
0,0 → 1,115
|
| sacos.sa 3.3 12/19/90
|
| Description: The entry point sAcos computes the inverse cosine of
| an input argument; sAcosd does the same except for denormalized
| input.
|
| Input: Double-extended number X in location pointed to
| by address register a0.
|
| Output: The value arccos(X) returned in floating-point register Fp0.
|
| Accuracy and Monotonicity: The returned result is within 3 ulps in
| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
| result is subsequently rounded to double precision. The
| result is provably monotonic in double precision.
|
| Speed: The program sCOS takes approximately 310 cycles.
|
| Algorithm:
|
| ACOS
| 1. If |X| >= 1, go to 3.
|
| 2. (|X| < 1) Calculate acos(X) by
| z := (1-X) / (1+X)
| acos(X) = 2 * atan( sqrt(z) ).
| Exit.
|
| 3. If |X| > 1, go to 5.
|
| 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.
|
| 5. (|X| > 1) Generate an invalid operation by 0 * infinity.
| Exit.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|SACOS idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
PI: .long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
PIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
 
|xref t_operr
|xref t_frcinx
|xref satan
 
.global sacosd
sacosd:
|--ACOS(X) = PI/2 FOR DENORMALIZED X
fmovel %d1,%fpcr | ...load user's rounding mode/precision
fmovex PIBY2,%fp0
bra t_frcinx
 
.global sacos
sacos:
fmovex (%a0),%fp0 | ...LOAD INPUT
 
movel (%a0),%d0 | ...pack exponent with upper 16 fraction
movew 4(%a0),%d0
andil #0x7FFFFFFF,%d0
cmpil #0x3FFF8000,%d0
bges ACOSBIG
 
|--THIS IS THE USUAL CASE, |X| < 1
|--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) )
 
fmoves #0x3F800000,%fp1
faddx %fp0,%fp1 | ...1+X
fnegx %fp0 | ... -X
fadds #0x3F800000,%fp0 | ...1-X
fdivx %fp1,%fp0 | ...(1-X)/(1+X)
fsqrtx %fp0 | ...SQRT((1-X)/(1+X))
fmovemx %fp0-%fp0,(%a0) | ...overwrite input
movel %d1,-(%sp) |save original users fpcr
clrl %d1
bsr satan | ...ATAN(SQRT([1-X]/[1+X]))
fmovel (%sp)+,%fpcr |restore users exceptions
faddx %fp0,%fp0 | ...2 * ATAN( STUFF )
bra t_frcinx
 
ACOSBIG:
fabsx %fp0
fcmps #0x3F800000,%fp0
fbgt t_operr |cause an operr exception
 
|--|X| = 1, ACOS(X) = 0 OR PI
movel (%a0),%d0 | ...pack exponent with upper 16 fraction
movew 4(%a0),%d0
cmpl #0,%d0 |D0 has original exponent+fraction
bgts ACOSP1
 
|--X = -1
|Returns PI and inexact exception
fmovex PI,%fp0
fmovel %d1,%FPCR
fadds #0x00800000,%fp0 |cause an inexact exception to be put
| ;into the 040 - will not trap until next
| ;fp inst.
bra t_frcinx
 
ACOSP1:
fmovel %d1,%FPCR
fmoves #0x00000000,%fp0
rts |Facos ; of +1 is exact
 
|end
/Makefile
0,0 → 1,31
#
# Makefile for Linux arch/m68k/fpsp040 source directory
#
# Note! Dependencies are done automagically by 'make dep', which also
# removes any old dependencies. DON'T put your own dependencies here
# unless it's something special (ie not a .c file).
#
# Note 2! The CFLAGS definitions are now in the main makefile...
 
.S.o:
$(CC) -D__ASSEMBLY__ -traditional -c -o $*.o $<
# $(AS) -o $*.o $<
 
OS_TARGET := fpsp.o
 
OS_OBJS := bindec.o binstr.o decbin.o do_func.o gen_except.o get_op.o \
kernel_ex.o res_func.o round.o sacos.o sasin.o satan.o satanh.o \
scosh.o setox.o sgetem.o sint.o slog2.o slogn.o \
smovecr.o srem_mod.o scale.o \
ssin.o ssinh.o stan.o stanh.o sto_res.o stwotox.o tbldo.o util.o \
x_bsun.o x_fline.o x_operr.o x_ovfl.o x_snan.o x_store.o \
x_unfl.o x_unimp.o x_unsupp.o bugfix.o skeleton.o
 
all: $(OS_TARGET)
 
include $(TOPDIR)/Rules.make
 
$(OS_OBJS): fpsp.h
 
$(OS_TARGET): $(OS_OBJS) fpsp.h
$(LD) -x -r -o $(OS_TARGET) $(OS_OBJS)
/stanh.S
0,0 → 1,185
|
| stanh.sa 3.1 12/10/90
|
| The entry point sTanh computes the hyperbolic tangent of
| an input argument; sTanhd does the same except for denormalized
| input.
|
| Input: Double-extended number X in location pointed to
| by address register a0.
|
| Output: The value tanh(X) returned in floating-point register Fp0.
|
| Accuracy and Monotonicity: The returned result is within 3 ulps in
| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
| result is subsequently rounded to double precision. The
| result is provably monotonic in double precision.
|
| Speed: The program stanh takes approximately 270 cycles.
|
| Algorithm:
|
| TANH
| 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3.
|
| 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by
| sgn := sign(X), y := 2|X|, z := expm1(Y), and
| tanh(X) = sgn*( z/(2+z) ).
| Exit.
|
| 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1,
| go to 7.
|
| 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6.
|
| 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by
| sgn := sign(X), y := 2|X|, z := exp(Y),
| tanh(X) = sgn - [ sgn*2/(1+z) ].
| Exit.
|
| 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we
| calculate Tanh(X) by
| sgn := sign(X), Tiny := 2**(-126),
| tanh(X) := sgn - sgn*Tiny.
| Exit.
|
| 7. (|X| < 2**(-40)). Tanh(X) = X. Exit.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|STANH idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
.include "fpsp.h"
 
.set X,FP_SCR5
.set XDCARE,X+2
.set XFRAC,X+4
 
.set SGN,L_SCR3
 
.set V,FP_SCR6
 
BOUNDS1: .long 0x3FD78000,0x3FFFDDCE | ... 2^(-40), (5/2)LOG2
 
|xref t_frcinx
|xref t_extdnrm
|xref setox
|xref setoxm1
 
.global stanhd
stanhd:
|--TANH(X) = X FOR DENORMALIZED X
 
bra t_extdnrm
 
.global stanh
stanh:
fmovex (%a0),%fp0 | ...LOAD INPUT
 
fmovex %fp0,X(%a6)
movel (%a0),%d0
movew 4(%a0),%d0
movel %d0,X(%a6)
andl #0x7FFFFFFF,%d0
cmp2l BOUNDS1(%pc),%d0 | ...2**(-40) < |X| < (5/2)LOG2 ?
bcss TANHBORS
 
|--THIS IS THE USUAL CASE
|--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
 
movel X(%a6),%d0
movel %d0,SGN(%a6)
andl #0x7FFF0000,%d0
addl #0x00010000,%d0 | ...EXPONENT OF 2|X|
movel %d0,X(%a6)
andl #0x80000000,SGN(%a6)
fmovex X(%a6),%fp0 | ...FP0 IS Y = 2|X|
 
movel %d1,-(%a7)
clrl %d1
fmovemx %fp0-%fp0,(%a0)
bsr setoxm1 | ...FP0 IS Z = EXPM1(Y)
movel (%a7)+,%d1
 
fmovex %fp0,%fp1
fadds #0x40000000,%fp1 | ...Z+2
movel SGN(%a6),%d0
fmovex %fp1,V(%a6)
eorl %d0,V(%a6)
 
fmovel %d1,%FPCR |restore users exceptions
fdivx V(%a6),%fp0
bra t_frcinx
 
TANHBORS:
cmpl #0x3FFF8000,%d0
blt TANHSM
 
cmpl #0x40048AA1,%d0
bgt TANHHUGE
 
|-- (5/2) LOG2 < |X| < 50 LOG2,
|--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
|--TANH(X) = SGN - SGN*2/[EXP(Y)+1].
 
movel X(%a6),%d0
movel %d0,SGN(%a6)
andl #0x7FFF0000,%d0
addl #0x00010000,%d0 | ...EXPO OF 2|X|
movel %d0,X(%a6) | ...Y = 2|X|
andl #0x80000000,SGN(%a6)
movel SGN(%a6),%d0
fmovex X(%a6),%fp0 | ...Y = 2|X|
 
movel %d1,-(%a7)
clrl %d1
fmovemx %fp0-%fp0,(%a0)
bsr setox | ...FP0 IS EXP(Y)
movel (%a7)+,%d1
movel SGN(%a6),%d0
fadds #0x3F800000,%fp0 | ...EXP(Y)+1
 
eorl #0xC0000000,%d0 | ...-SIGN(X)*2
fmoves %d0,%fp1 | ...-SIGN(X)*2 IN SGL FMT
fdivx %fp0,%fp1 | ...-SIGN(X)2 / [EXP(Y)+1 ]
 
movel SGN(%a6),%d0
orl #0x3F800000,%d0 | ...SGN
fmoves %d0,%fp0 | ...SGN IN SGL FMT
 
fmovel %d1,%FPCR |restore users exceptions
faddx %fp1,%fp0
 
bra t_frcinx
 
TANHSM:
movew #0x0000,XDCARE(%a6)
 
fmovel %d1,%FPCR |restore users exceptions
fmovex X(%a6),%fp0 |last inst - possible exception set
 
bra t_frcinx
 
TANHHUGE:
|---RETURN SGN(X) - SGN(X)EPS
movel X(%a6),%d0
andl #0x80000000,%d0
orl #0x3F800000,%d0
fmoves %d0,%fp0
andl #0x80000000,%d0
eorl #0x80800000,%d0 | ...-SIGN(X)*EPS
 
fmovel %d1,%FPCR |restore users exceptions
fadds %d0,%fp0
 
bra t_frcinx
 
|end
/sint.S
0,0 → 1,247
|
| sint.sa 3.1 12/10/90
|
| The entry point sINT computes the rounded integer
| equivalent of the input argument, sINTRZ computes
| the integer rounded to zero of the input argument.
|
| Entry points sint and sintrz are called from do_func
| to emulate the fint and fintrz unimplemented instructions,
| respectively. Entry point sintdo is used by bindec.
|
| Input: (Entry points sint and sintrz) Double-extended
| number X in the ETEMP space in the floating-point
| save stack.
| (Entry point sintdo) Double-extended number X in
| location pointed to by the address register a0.
| (Entry point sintd) Double-extended denormalized
| number X in the ETEMP space in the floating-point
| save stack.
|
| Output: The function returns int(X) or intrz(X) in fp0.
|
| Modifies: fp0.
|
| Algorithm: (sint and sintrz)
|
| 1. If exp(X) >= 63, return X.
| If exp(X) < 0, return +/- 0 or +/- 1, according to
| the rounding mode.
|
| 2. (X is in range) set rsc = 63 - exp(X). Unnormalize the
| result to the exponent $403e.
|
| 3. Round the result in the mode given in USER_FPCR. For
| sintrz, force round-to-zero mode.
|
| 4. Normalize the rounded result; store in fp0.
|
| For the denormalized cases, force the correct result
| for the given sign and rounding mode.
|
| Sign(X)
| RMODE + -
| ----- --------
| RN +0 -0
| RZ +0 -0
| RM +0 -1
| RP +1 -0
|
|
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|SINT idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
|xref dnrm_lp
|xref nrm_set
|xref round
|xref t_inx2
|xref ld_pone
|xref ld_mone
|xref ld_pzero
|xref ld_mzero
|xref snzrinx
 
|
| FINT
|
.global sint
sint:
bfextu FPCR_MODE(%a6){#2:#2},%d1 |use user's mode for rounding
| ;implicitly has extend precision
| ;in upper word.
movel %d1,L_SCR1(%a6) |save mode bits
bras sintexc
 
|
| FINT with extended denorm inputs.
|
.global sintd
sintd:
btstb #5,FPCR_MODE(%a6)
beq snzrinx |if round nearest or round zero, +/- 0
btstb #4,FPCR_MODE(%a6)
beqs rnd_mns
rnd_pls:
btstb #sign_bit,LOCAL_EX(%a0)
bnes sintmz
bsr ld_pone |if round plus inf and pos, answer is +1
bra t_inx2
rnd_mns:
btstb #sign_bit,LOCAL_EX(%a0)
beqs sintpz
bsr ld_mone |if round mns inf and neg, answer is -1
bra t_inx2
sintpz:
bsr ld_pzero
bra t_inx2
sintmz:
bsr ld_mzero
bra t_inx2
 
|
| FINTRZ
|
.global sintrz
sintrz:
movel #1,L_SCR1(%a6) |use rz mode for rounding
| ;implicitly has extend precision
| ;in upper word.
bras sintexc
|
| SINTDO
|
| Input: a0 points to an IEEE extended format operand
| Output: fp0 has the result
|
| Exceptions:
|
| If the subroutine results in an inexact operation, the inx2 and
| ainx bits in the USER_FPSR are set.
|
|
.global sintdo
sintdo:
bfextu FPCR_MODE(%a6){#2:#2},%d1 |use user's mode for rounding
| ;implicitly has ext precision
| ;in upper word.
movel %d1,L_SCR1(%a6) |save mode bits
|
| Real work of sint is in sintexc
|
sintexc:
bclrb #sign_bit,LOCAL_EX(%a0) |convert to internal extended
| ;format
sne LOCAL_SGN(%a0)
cmpw #0x403e,LOCAL_EX(%a0) |check if (unbiased) exp > 63
bgts out_rnge |branch if exp < 63
cmpw #0x3ffd,LOCAL_EX(%a0) |check if (unbiased) exp < 0
bgt in_rnge |if 63 >= exp > 0, do calc
|
| Input is less than zero. Restore sign, and check for directed
| rounding modes. L_SCR1 contains the rmode in the lower byte.
|
un_rnge:
btstb #1,L_SCR1+3(%a6) |check for rn and rz
beqs un_rnrz
tstb LOCAL_SGN(%a0) |check for sign
bnes un_rmrp_neg
|
| Sign is +. If rp, load +1.0, if rm, load +0.0
|
cmpib #3,L_SCR1+3(%a6) |check for rp
beqs un_ldpone |if rp, load +1.0
bsr ld_pzero |if rm, load +0.0
bra t_inx2
un_ldpone:
bsr ld_pone
bra t_inx2
|
| Sign is -. If rm, load -1.0, if rp, load -0.0
|
un_rmrp_neg:
cmpib #2,L_SCR1+3(%a6) |check for rm
beqs un_ldmone |if rm, load -1.0
bsr ld_mzero |if rp, load -0.0
bra t_inx2
un_ldmone:
bsr ld_mone
bra t_inx2
|
| Rmode is rn or rz; return signed zero
|
un_rnrz:
tstb LOCAL_SGN(%a0) |check for sign
bnes un_rnrz_neg
bsr ld_pzero
bra t_inx2
un_rnrz_neg:
bsr ld_mzero
bra t_inx2
|
| Input is greater than 2^63. All bits are significant. Return
| the input.
|
out_rnge:
bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format
beqs intps
bsetb #sign_bit,LOCAL_EX(%a0)
intps:
fmovel %fpcr,-(%sp)
fmovel #0,%fpcr
fmovex LOCAL_EX(%a0),%fp0 |if exp > 63
| ;then return X to the user
| ;there are no fraction bits
fmovel (%sp)+,%fpcr
rts
 
in_rnge:
| ;shift off fraction bits
clrl %d0 |clear d0 - initial g,r,s for
| ;dnrm_lp
movel #0x403e,%d1 |set threshold for dnrm_lp
| ;assumes a0 points to operand
bsr dnrm_lp
| ;returns unnormalized number
| ;pointed by a0
| ;output d0 supplies g,r,s
| ;used by round
movel L_SCR1(%a6),%d1 |use selected rounding mode
|
|
bsr round |round the unnorm based on users
| ;input a0 ptr to ext X
| ; d0 g,r,s bits
| ; d1 PREC/MODE info
| ;output a0 ptr to rounded result
| ;inexact flag set in USER_FPSR
| ;if initial grs set
|
| normalize the rounded result and store value in fp0
|
bsr nrm_set |normalize the unnorm
| ;Input: a0 points to operand to
| ;be normalized
| ;Output: a0 points to normalized
| ;result
bfclr LOCAL_SGN(%a0){#0:#8}
beqs nrmrndp
bsetb #sign_bit,LOCAL_EX(%a0) |return to IEEE extended format
nrmrndp:
fmovel %fpcr,-(%sp)
fmovel #0,%fpcr
fmovex LOCAL_EX(%a0),%fp0 |move result to fp0
fmovel (%sp)+,%fpcr
rts
 
|end
/sasin.S
0,0 → 1,104
|
| sasin.sa 3.3 12/19/90
|
| Description: The entry point sAsin computes the inverse sine of
| an input argument; sAsind does the same except for denormalized
| input.
|
| Input: Double-extended number X in location pointed to
| by address register a0.
|
| Output: The value arcsin(X) returned in floating-point register Fp0.
|
| Accuracy and Monotonicity: The returned result is within 3 ulps in
| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
| result is subsequently rounded to double precision. The
| result is provably monotonic in double precision.
|
| Speed: The program sASIN takes approximately 310 cycles.
|
| Algorithm:
|
| ASIN
| 1. If |X| >= 1, go to 3.
|
| 2. (|X| < 1) Calculate asin(X) by
| z := sqrt( [1-X][1+X] )
| asin(X) = atan( x / z ).
| Exit.
|
| 3. If |X| > 1, go to 5.
|
| 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.
|
| 5. (|X| > 1) Generate an invalid operation by 0 * infinity.
| Exit.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|SASIN idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
PIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
 
|xref t_operr
|xref t_frcinx
|xref t_extdnrm
|xref satan
 
.global sasind
sasind:
|--ASIN(X) = X FOR DENORMALIZED X
 
bra t_extdnrm
 
.global sasin
sasin:
fmovex (%a0),%fp0 | ...LOAD INPUT
 
movel (%a0),%d0
movew 4(%a0),%d0
andil #0x7FFFFFFF,%d0
cmpil #0x3FFF8000,%d0
bges asinbig
 
|--THIS IS THE USUAL CASE, |X| < 1
|--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
 
fmoves #0x3F800000,%fp1
fsubx %fp0,%fp1 | ...1-X
fmovemx %fp2-%fp2,-(%a7)
fmoves #0x3F800000,%fp2
faddx %fp0,%fp2 | ...1+X
fmulx %fp2,%fp1 | ...(1+X)(1-X)
fmovemx (%a7)+,%fp2-%fp2
fsqrtx %fp1 | ...SQRT([1-X][1+X])
fdivx %fp1,%fp0 | ...X/SQRT([1-X][1+X])
fmovemx %fp0-%fp0,(%a0)
bsr satan
bra t_frcinx
 
asinbig:
fabsx %fp0 | ...|X|
fcmps #0x3F800000,%fp0
fbgt t_operr |cause an operr exception
 
|--|X| = 1, ASIN(X) = +- PI/2.
 
fmovex PIBY2,%fp0
movel (%a0),%d0
andil #0x80000000,%d0 | ...SIGN BIT OF X
oril #0x3F800000,%d0 | ...+-1 IN SGL FORMAT
movel %d0,-(%sp) | ...push SIGN(X) IN SGL-FMT
fmovel %d1,%FPCR
fmuls (%sp)+,%fp0
bra t_frcinx
 
|end
/satanh.S
0,0 → 1,104
|
| satanh.sa 3.3 12/19/90
|
| The entry point satanh computes the inverse
| hyperbolic tangent of
| an input argument; satanhd does the same except for denormalized
| input.
|
| Input: Double-extended number X in location pointed to
| by address register a0.
|
| Output: The value arctanh(X) returned in floating-point register Fp0.
|
| Accuracy and Monotonicity: The returned result is within 3 ulps in
| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
| result is subsequently rounded to double precision. The
| result is provably monotonic in double precision.
|
| Speed: The program satanh takes approximately 270 cycles.
|
| Algorithm:
|
| ATANH
| 1. If |X| >= 1, go to 3.
|
| 2. (|X| < 1) Calculate atanh(X) by
| sgn := sign(X)
| y := |X|
| z := 2y/(1-y)
| atanh(X) := sgn * (1/2) * logp1(z)
| Exit.
|
| 3. If |X| > 1, go to 5.
|
| 4. (|X| = 1) Generate infinity with an appropriate sign and
| divide-by-zero by
| sgn := sign(X)
| atan(X) := sgn / (+0).
| Exit.
|
| 5. (|X| > 1) Generate an invalid operation by 0 * infinity.
| Exit.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|satanh idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
|xref t_dz
|xref t_operr
|xref t_frcinx
|xref t_extdnrm
|xref slognp1
 
.global satanhd
satanhd:
|--ATANH(X) = X FOR DENORMALIZED X
 
bra t_extdnrm
 
.global satanh
satanh:
movel (%a0),%d0
movew 4(%a0),%d0
andil #0x7FFFFFFF,%d0
cmpil #0x3FFF8000,%d0
bges ATANHBIG
 
|--THIS IS THE USUAL CASE, |X| < 1
|--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
 
fabsx (%a0),%fp0 | ...Y = |X|
fmovex %fp0,%fp1
fnegx %fp1 | ...-Y
faddx %fp0,%fp0 | ...2Y
fadds #0x3F800000,%fp1 | ...1-Y
fdivx %fp1,%fp0 | ...2Y/(1-Y)
movel (%a0),%d0
andil #0x80000000,%d0
oril #0x3F000000,%d0 | ...SIGN(X)*HALF
movel %d0,-(%sp)
 
fmovemx %fp0-%fp0,(%a0) | ...overwrite input
movel %d1,-(%sp)
clrl %d1
bsr slognp1 | ...LOG1P(Z)
fmovel (%sp)+,%fpcr
fmuls (%sp)+,%fp0
bra t_frcinx
 
ATANHBIG:
fabsx (%a0),%fp0 | ...|X|
fcmps #0x3F800000,%fp0
fbgt t_operr
bra t_dz
 
|end
/x_operr.S
0,0 → 1,356
|
| x_operr.sa 3.5 7/1/91
|
| fpsp_operr --- FPSP handler for operand error exception
|
| See 68040 User's Manual pp. 9-44f
|
| Note 1: For trap disabled 040 does the following:
| If the dest is a fp reg, then an extended precision non_signaling
| NAN is stored in the dest reg. If the dest format is b, w, or l and
| the source op is a NAN, then garbage is stored as the result (actually
| the upper 32 bits of the mantissa are sent to the integer unit). If
| the dest format is integer (b, w, l) and the operr is caused by
| integer overflow, or the source op is inf, then the result stored is
| garbage.
| There are three cases in which operr is incorrectly signaled on the
| 040. This occurs for move_out of format b, w, or l for the largest
| negative integer (-2^7 for b, -2^15 for w, -2^31 for l).
|
| On opclass = 011 fmove.(b,w,l) that causes a conversion
| overflow -> OPERR, the exponent in wbte (and fpte) is:
| byte 56 - (62 - exp)
| word 48 - (62 - exp)
| long 32 - (62 - exp)
|
| where exp = (true exp) - 1
|
| So, wbtemp and fptemp will contain the following on erroneously
| signalled operr:
| fpts = 1
| fpte = $4000 (15 bit externally)
| byte fptm = $ffffffff ffffff80
| word fptm = $ffffffff ffff8000
| long fptm = $ffffffff 80000000
|
| Note 2: For trap enabled 040 does the following:
| If the inst is move_out, then same as Note 1.
| If the inst is not move_out, the dest is not modified.
| The exceptional operand is not defined for integer overflow
| during a move_out.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
X_OPERR: |idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
|xref mem_write
|xref real_operr
|xref real_inex
|xref get_fline
|xref fpsp_done
|xref reg_dest
 
.global fpsp_operr
fpsp_operr:
|
link %a6,#-LOCAL_SIZE
fsave -(%a7)
moveml %d0-%d1/%a0-%a1,USER_DA(%a6)
fmovemx %fp0-%fp3,USER_FP0(%a6)
fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
 
|
| Check if this is an opclass 3 instruction.
| If so, fall through, else branch to operr_end
|
btstb #TFLAG,T_BYTE(%a6)
beqs operr_end
 
|
| If the destination size is B,W,or L, the operr must be
| handled here.
|
movel CMDREG1B(%a6),%d0
bfextu %d0{#3:#3},%d0 |0=long, 4=word, 6=byte
cmpib #0,%d0 |determine size; check long
beq operr_long
cmpib #4,%d0 |check word
beq operr_word
cmpib #6,%d0 |check byte
beq operr_byte
 
|
| The size is not B,W,or L, so the operr is handled by the
| kernel handler. Set the operr bits and clean up, leaving
| only the integer exception frame on the stack, and the
| fpu in the original exceptional state.
|
operr_end:
bsetb #operr_bit,FPSR_EXCEPT(%a6)
bsetb #aiop_bit,FPSR_AEXCEPT(%a6)
 
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
frestore (%a7)+
unlk %a6
bral real_operr
 
operr_long:
moveql #4,%d1 |write size to d1
moveb STAG(%a6),%d0 |test stag for nan
andib #0xe0,%d0 |clr all but tag
cmpib #0x60,%d0 |check for nan
beq operr_nan
cmpil #0x80000000,FPTEMP_LO(%a6) |test if ls lword is special
bnes chklerr |if not equal, check for incorrect operr
bsr check_upper |check if exp and ms mant are special
tstl %d0
bnes chklerr |if d0 is true, check for incorrect operr
movel #0x80000000,%d0 |store special case result
bsr operr_store
bra not_enabled |clean and exit
|
| CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
|
chklerr:
movew FPTEMP_EX(%a6),%d0
andw #0x7FFF,%d0 |ignore sign bit
cmpw #0x3FFE,%d0 |this is the only possible exponent value
bnes chklerr2
fixlong:
movel FPTEMP_LO(%a6),%d0
bsr operr_store
bra not_enabled
chklerr2:
movew FPTEMP_EX(%a6),%d0
andw #0x7FFF,%d0 |ignore sign bit
cmpw #0x4000,%d0
bcc store_max |exponent out of range
 
movel FPTEMP_LO(%a6),%d0
andl #0x7FFF0000,%d0 |look for all 1's on bits 30-16
cmpl #0x7FFF0000,%d0
beqs fixlong
 
tstl FPTEMP_LO(%a6)
bpls chklepos
cmpl #0xFFFFFFFF,FPTEMP_HI(%a6)
beqs fixlong
bra store_max
chklepos:
tstl FPTEMP_HI(%a6)
beqs fixlong
bra store_max
 
operr_word:
moveql #2,%d1 |write size to d1
moveb STAG(%a6),%d0 |test stag for nan
andib #0xe0,%d0 |clr all but tag
cmpib #0x60,%d0 |check for nan
beq operr_nan
cmpil #0xffff8000,FPTEMP_LO(%a6) |test if ls lword is special
bnes chkwerr |if not equal, check for incorrect operr
bsr check_upper |check if exp and ms mant are special
tstl %d0
bnes chkwerr |if d0 is true, check for incorrect operr
movel #0x80000000,%d0 |store special case result
bsr operr_store
bra not_enabled |clean and exit
|
| CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
|
chkwerr:
movew FPTEMP_EX(%a6),%d0
andw #0x7FFF,%d0 |ignore sign bit
cmpw #0x3FFE,%d0 |this is the only possible exponent value
bnes store_max
movel FPTEMP_LO(%a6),%d0
swap %d0
bsr operr_store
bra not_enabled
 
operr_byte:
moveql #1,%d1 |write size to d1
moveb STAG(%a6),%d0 |test stag for nan
andib #0xe0,%d0 |clr all but tag
cmpib #0x60,%d0 |check for nan
beqs operr_nan
cmpil #0xffffff80,FPTEMP_LO(%a6) |test if ls lword is special
bnes chkberr |if not equal, check for incorrect operr
bsr check_upper |check if exp and ms mant are special
tstl %d0
bnes chkberr |if d0 is true, check for incorrect operr
movel #0x80000000,%d0 |store special case result
bsr operr_store
bra not_enabled |clean and exit
|
| CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
|
chkberr:
movew FPTEMP_EX(%a6),%d0
andw #0x7FFF,%d0 |ignore sign bit
cmpw #0x3FFE,%d0 |this is the only possible exponent value
bnes store_max
movel FPTEMP_LO(%a6),%d0
asll #8,%d0
swap %d0
bsr operr_store
bra not_enabled
 
|
| This operr condition is not of the special case. Set operr
| and aiop and write the portion of the nan to memory for the
| given size.
|
operr_nan:
orl #opaop_mask,USER_FPSR(%a6) |set operr & aiop
 
movel ETEMP_HI(%a6),%d0 |output will be from upper 32 bits
bsr operr_store
bra end_operr
|
| Store_max loads the max pos or negative for the size, sets
| the operr and aiop bits, and clears inex and ainex, incorrectly
| set by the 040.
|
store_max:
orl #opaop_mask,USER_FPSR(%a6) |set operr & aiop
bclrb #inex2_bit,FPSR_EXCEPT(%a6)
bclrb #ainex_bit,FPSR_AEXCEPT(%a6)
fmovel #0,%FPSR
tstw FPTEMP_EX(%a6) |check sign
blts load_neg
movel #0x7fffffff,%d0
bsr operr_store
bra end_operr
load_neg:
movel #0x80000000,%d0
bsr operr_store
bra end_operr
 
|
| This routine stores the data in d0, for the given size in d1,
| to memory or data register as required. A read of the fline
| is required to determine the destination.
|
operr_store:
movel %d0,L_SCR1(%a6) |move write data to L_SCR1
movel %d1,-(%a7) |save register size
bsrl get_fline |fline returned in d0
movel (%a7)+,%d1
bftst %d0{#26:#3} |if mode is zero, dest is Dn
bnes dest_mem
|
| Destination is Dn. Get register number from d0. Data is on
| the stack at (a7). D1 has size: 1=byte,2=word,4=long/single
|
andil #7,%d0 |isolate register number
cmpil #4,%d1
beqs op_long |the most frequent case
cmpil #2,%d1
bnes op_con
orl #8,%d0
bras op_con
op_long:
orl #0x10,%d0
op_con:
movel %d0,%d1 |format size:reg for reg_dest
bral reg_dest |call to reg_dest returns to caller
| ;of operr_store
|
| Destination is memory. Get <ea> from integer exception frame
| and call mem_write.
|
dest_mem:
leal L_SCR1(%a6),%a0 |put ptr to write data in a0
movel EXC_EA(%a6),%a1 |put user destination address in a1
movel %d1,%d0 |put size in d0
bsrl mem_write
rts
|
| Check the exponent for $c000 and the upper 32 bits of the
| mantissa for $ffffffff. If both are true, return d0 clr
| and store the lower n bits of the least lword of FPTEMP
| to d0 for write out. If not, it is a real operr, and set d0.
|
check_upper:
cmpil #0xffffffff,FPTEMP_HI(%a6) |check if first byte is all 1's
bnes true_operr |if not all 1's then was true operr
cmpiw #0xc000,FPTEMP_EX(%a6) |check if incorrectly signalled
beqs not_true_operr |branch if not true operr
cmpiw #0xbfff,FPTEMP_EX(%a6) |check if incorrectly signalled
beqs not_true_operr |branch if not true operr
true_operr:
movel #1,%d0 |signal real operr
rts
not_true_operr:
clrl %d0 |signal no real operr
rts
 
|
| End_operr tests for operr enabled. If not, it cleans up the stack
| and does an rte. If enabled, it cleans up the stack and branches
| to the kernel operr handler with only the integer exception
| frame on the stack and the fpu in the original exceptional state
| with correct data written to the destination.
|
end_operr:
btstb #operr_bit,FPCR_ENABLE(%a6)
beqs not_enabled
enabled:
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
frestore (%a7)+
unlk %a6
bral real_operr
 
not_enabled:
|
| It is possible to have either inex2 or inex1 exceptions with the
| operr. If the inex enable bit is set in the FPCR, and either
| inex2 or inex1 occurred, we must clean up and branch to the
| real inex handler.
|
ck_inex:
moveb FPCR_ENABLE(%a6),%d0
andb FPSR_EXCEPT(%a6),%d0
andib #0x3,%d0
beq operr_exit
|
| Inexact enabled and reported, and we must take an inexact exception.
|
take_inex:
moveb #INEX_VEC,EXC_VEC+1(%a6)
movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
orl #sx_mask,E_BYTE(%a6)
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
frestore (%a7)+
unlk %a6
bral real_inex
|
| Since operr is only an E1 exception, there is no need to frestore
| any state back to the fpu.
|
operr_exit:
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
unlk %a6
bral fpsp_done
 
|end
/scosh.S
0,0 → 1,132
|
| scosh.sa 3.1 12/10/90
|
| The entry point sCosh computes the hyperbolic cosine of
| an input argument; sCoshd does the same except for denormalized
| input.
|
| Input: Double-extended number X in location pointed to
| by address register a0.
|
| Output: The value cosh(X) returned in floating-point register Fp0.
|
| Accuracy and Monotonicity: The returned result is within 3 ulps in
| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
| result is subsequently rounded to double precision. The
| result is provably monotonic in double precision.
|
| Speed: The program sCOSH takes approximately 250 cycles.
|
| Algorithm:
|
| COSH
| 1. If |X| > 16380 log2, go to 3.
|
| 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae
| y = |X|, z = exp(Y), and
| cosh(X) = (1/2)*( z + 1/z ).
| Exit.
|
| 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5.
|
| 4. (16380 log2 < |X| <= 16480 log2)
| cosh(X) = sign(X) * exp(|X|)/2.
| However, invoking exp(|X|) may cause premature overflow.
| Thus, we calculate sinh(X) as follows:
| Y := |X|
| Fact := 2**(16380)
| Y' := Y - 16381 log2
| cosh(X) := Fact * exp(Y').
| Exit.
|
| 5. (|X| > 16480 log2) sinh(X) must overflow. Return
| Huge*Huge to generate overflow and an infinity with
| the appropriate sign. Huge is the largest finite number in
| extended format. Exit.
|
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|SCOSH idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
|xref t_ovfl
|xref t_frcinx
|xref setox
 
T1: .long 0x40C62D38,0xD3D64634 | ... 16381 LOG2 LEAD
T2: .long 0x3D6F90AE,0xB1E75CC7 | ... 16381 LOG2 TRAIL
 
TWO16380: .long 0x7FFB0000,0x80000000,0x00000000,0x00000000
 
.global scoshd
scoshd:
|--COSH(X) = 1 FOR DENORMALIZED X
 
fmoves #0x3F800000,%fp0
 
fmovel %d1,%FPCR
fadds #0x00800000,%fp0
bra t_frcinx
 
.global scosh
scosh:
fmovex (%a0),%fp0 | ...LOAD INPUT
 
movel (%a0),%d0
movew 4(%a0),%d0
andil #0x7FFFFFFF,%d0
cmpil #0x400CB167,%d0
bgts COSHBIG
 
|--THIS IS THE USUAL CASE, |X| < 16380 LOG2
|--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
 
fabsx %fp0 | ...|X|
 
movel %d1,-(%sp)
clrl %d1
fmovemx %fp0-%fp0,(%a0) |pass parameter to setox
bsr setox | ...FP0 IS EXP(|X|)
fmuls #0x3F000000,%fp0 | ...(1/2)EXP(|X|)
movel (%sp)+,%d1
 
fmoves #0x3E800000,%fp1 | ...(1/4)
fdivx %fp0,%fp1 | ...1/(2 EXP(|X|))
 
fmovel %d1,%FPCR
faddx %fp1,%fp0
 
bra t_frcinx
 
COSHBIG:
cmpil #0x400CB2B3,%d0
bgts COSHHUGE
 
fabsx %fp0
fsubd T1(%pc),%fp0 | ...(|X|-16381LOG2_LEAD)
fsubd T2(%pc),%fp0 | ...|X| - 16381 LOG2, ACCURATE
 
movel %d1,-(%sp)
clrl %d1
fmovemx %fp0-%fp0,(%a0)
bsr setox
fmovel (%sp)+,%fpcr
 
fmulx TWO16380(%pc),%fp0
bra t_frcinx
 
COSHHUGE:
fmovel #0,%fpsr |clr N bit if set by source
bclrb #7,(%a0) |always return positive value
fmovemx (%a0),%fp0-%fp0
bra t_ovfl
 
|end
/x_store.S
0,0 → 1,256
|
| x_store.sa 3.2 1/24/91
|
| store --- store operand to memory or register
|
| Used by underflow and overflow handlers.
|
| a6 = points to fp value to be stored.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
X_STORE: |idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
fpreg_mask:
.byte 0x80,0x40,0x20,0x10,0x08,0x04,0x02,0x01
 
.include "fpsp.h"
 
|xref mem_write
|xref get_fline
|xref g_opcls
|xref g_dfmtou
|xref reg_dest
 
.global dest_ext
.global dest_dbl
.global dest_sgl
 
.global store
store:
btstb #E3,E_BYTE(%a6)
beqs E1_sto
E3_sto:
movel CMDREG3B(%a6),%d0
bfextu %d0{#6:#3},%d0 |isolate dest. reg from cmdreg3b
sto_fp:
lea fpreg_mask,%a1
moveb (%a1,%d0.w),%d0 |convert reg# to dynamic register mask
tstb LOCAL_SGN(%a0)
beqs is_pos
bsetb #sign_bit,LOCAL_EX(%a0)
is_pos:
fmovemx (%a0),%d0 |move to correct register
|
| if fp0-fp3 is being modified, we must put a copy
| in the USER_FPn variable on the stack because all exception
| handlers restore fp0-fp3 from there.
|
cmpb #0x80,%d0
bnes not_fp0
fmovemx %fp0-%fp0,USER_FP0(%a6)
rts
not_fp0:
cmpb #0x40,%d0
bnes not_fp1
fmovemx %fp1-%fp1,USER_FP1(%a6)
rts
not_fp1:
cmpb #0x20,%d0
bnes not_fp2
fmovemx %fp2-%fp2,USER_FP2(%a6)
rts
not_fp2:
cmpb #0x10,%d0
bnes not_fp3
fmovemx %fp3-%fp3,USER_FP3(%a6)
rts
not_fp3:
rts
 
E1_sto:
bsrl g_opcls |returns opclass in d0
cmpib #3,%d0
beq opc011 |branch if opclass 3
movel CMDREG1B(%a6),%d0
bfextu %d0{#6:#3},%d0 |extract destination register
bras sto_fp
 
opc011:
bsrl g_dfmtou |returns dest format in d0
| ;ext=00, sgl=01, dbl=10
movel %a0,%a1 |save source addr in a1
movel EXC_EA(%a6),%a0 |get the address
cmpil #0,%d0 |if dest format is extended
beq dest_ext |then branch
cmpil #1,%d0 |if dest format is single
beqs dest_sgl |then branch
|
| fall through to dest_dbl
|
 
|
| dest_dbl --- write double precision value to user space
|
|Input
| a0 -> destination address
| a1 -> source in extended precision
|Output
| a0 -> destroyed
| a1 -> destroyed
| d0 -> 0
|
|Changes extended precision to double precision.
| Note: no attempt is made to round the extended value to double.
| dbl_sign = ext_sign
| dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)
| get rid of ext integer bit
| dbl_mant = ext_mant{62:12}
|
| --------------- --------------- ---------------
| extended -> |s| exp | |1| ms mant | | ls mant |
| --------------- --------------- ---------------
| 95 64 63 62 32 31 11 0
| | |
| | |
| | |
| v v
| --------------- ---------------
| double -> |s|exp| mant | | mant |
| --------------- ---------------
| 63 51 32 31 0
|
dest_dbl:
clrl %d0 |clear d0
movew LOCAL_EX(%a1),%d0 |get exponent
subw #0x3fff,%d0 |subtract extended precision bias
cmpw #0x4000,%d0 |check if inf
beqs inf |if so, special case
addw #0x3ff,%d0 |add double precision bias
swap %d0 |d0 now in upper word
lsll #4,%d0 |d0 now in proper place for dbl prec exp
tstb LOCAL_SGN(%a1)
beqs get_mant |if positive, go process mantissa
bsetl #31,%d0 |if negative, put in sign information
| ; before continuing
bras get_mant |go process mantissa
inf:
movel #0x7ff00000,%d0 |load dbl inf exponent
clrl LOCAL_HI(%a1) |clear msb
tstb LOCAL_SGN(%a1)
beqs dbl_inf |if positive, go ahead and write it
bsetl #31,%d0 |if negative put in sign information
dbl_inf:
movel %d0,LOCAL_EX(%a1) |put the new exp back on the stack
bras dbl_wrt
get_mant:
movel LOCAL_HI(%a1),%d1 |get ms mantissa
bfextu %d1{#1:#20},%d1 |get upper 20 bits of ms
orl %d1,%d0 |put these bits in ms word of double
movel %d0,LOCAL_EX(%a1) |put the new exp back on the stack
movel LOCAL_HI(%a1),%d1 |get ms mantissa
movel #21,%d0 |load shift count
lsll %d0,%d1 |put lower 11 bits in upper bits
movel %d1,LOCAL_HI(%a1) |build lower lword in memory
movel LOCAL_LO(%a1),%d1 |get ls mantissa
bfextu %d1{#0:#21},%d0 |get ls 21 bits of double
orl %d0,LOCAL_HI(%a1) |put them in double result
dbl_wrt:
movel #0x8,%d0 |byte count for double precision number
exg %a0,%a1 |a0=supervisor source, a1=user dest
bsrl mem_write |move the number to the user's memory
rts
|
| dest_sgl --- write single precision value to user space
|
|Input
| a0 -> destination address
| a1 -> source in extended precision
|
|Output
| a0 -> destroyed
| a1 -> destroyed
| d0 -> 0
|
|Changes extended precision to single precision.
| sgl_sign = ext_sign
| sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)
| get rid of ext integer bit
| sgl_mant = ext_mant{62:12}
|
| --------------- --------------- ---------------
| extended -> |s| exp | |1| ms mant | | ls mant |
| --------------- --------------- ---------------
| 95 64 63 62 40 32 31 12 0
| | |
| | |
| | |
| v v
| ---------------
| single -> |s|exp| mant |
| ---------------
| 31 22 0
|
dest_sgl:
clrl %d0
movew LOCAL_EX(%a1),%d0 |get exponent
subw #0x3fff,%d0 |subtract extended precision bias
cmpw #0x4000,%d0 |check if inf
beqs sinf |if so, special case
addw #0x7f,%d0 |add single precision bias
swap %d0 |put exp in upper word of d0
lsll #7,%d0 |shift it into single exp bits
tstb LOCAL_SGN(%a1)
beqs get_sman |if positive, continue
bsetl #31,%d0 |if negative, put in sign first
bras get_sman |get mantissa
sinf:
movel #0x7f800000,%d0 |load single inf exp to d0
tstb LOCAL_SGN(%a1)
beqs sgl_wrt |if positive, continue
bsetl #31,%d0 |if negative, put in sign info
bras sgl_wrt
 
get_sman:
movel LOCAL_HI(%a1),%d1 |get ms mantissa
bfextu %d1{#1:#23},%d1 |get upper 23 bits of ms
orl %d1,%d0 |put these bits in ms word of single
 
sgl_wrt:
movel %d0,L_SCR1(%a6) |put the new exp back on the stack
movel #0x4,%d0 |byte count for single precision number
tstl %a0 |users destination address
beqs sgl_Dn |destination is a data register
exg %a0,%a1 |a0=supervisor source, a1=user dest
leal L_SCR1(%a6),%a0 |point a0 to data
bsrl mem_write |move the number to the user's memory
rts
sgl_Dn:
bsrl get_fline |returns fline word in d0
andw #0x7,%d0 |isolate register number
movel %d0,%d1 |d1 has size:reg formatted for reg_dest
orl #0x10,%d1 |reg_dest wants size added to reg#
bral reg_dest |size is X, rts in reg_dest will
| ;return to caller of dest_sgl
dest_ext:
tstb LOCAL_SGN(%a1) |put back sign into exponent word
beqs dstx_cont
bsetb #sign_bit,LOCAL_EX(%a1)
dstx_cont:
clrb LOCAL_SGN(%a1) |clear out the sign byte
 
movel #0x0c,%d0 |byte count for extended number
exg %a0,%a1 |a0=supervisor source, a1=user dest
bsrl mem_write |move the number to the user's memory
rts
 
|end
/skeleton.S
0,0 → 1,568
|
| skeleton.sa 3.2 4/26/91
|
| This file contains code that is system dependent and will
| need to be modified to install the FPSP.
|
| Each entry point for exception 'xxxx' begins with a 'jmp fpsp_xxxx'.
| Put any target system specific handling that must be done immediately
| before the jump instruction. If there no handling necessary, then
| the 'fpsp_xxxx' handler entry point should be placed in the exception
| table so that the 'jmp' can be eliminated. If the FPSP determines that the
| exception is one that must be reported then there will be a
| return from the package by a 'jmp real_xxxx'. At that point
| the machine state will be identical to the state before
| the FPSP was entered. In particular, whatever condition
| that caused the exception will still be pending when the FPSP
| package returns. Thus, there will be system specific code
| to handle the exception.
|
| If the exception was completely handled by the package, then
| the return will be via a 'jmp fpsp_done'. Unless there is
| OS specific work to be done (such as handling a context switch or
| interrupt) the user program can be resumed via 'rte'.
|
| In the following skeleton code, some typical 'real_xxxx' handling
| code is shown. This code may need to be moved to an appropriate
| place in the target system, or rewritten.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|
| Modified for Linux-1.3.x by Jes Sorensen (jds@kom.auc.dk)
|
 
#include <linux/linkage.h>
 
|SKELETON idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 15
|
| The following counters are used for standalone testing
|
sigunimp: .long 0
sigbsun: .long 0
siginex: .long 0
sigdz: .long 0
sigunfl: .long 0
sigovfl: .long 0
sigoperr: .long 0
sigsnan: .long 0
sigunsupp: .long 0
 
|section 8
.include "fpsp.h"
 
LOFF_ORIG_D0 = 0x20
 
#define SAVE_ALL \
clrl %sp@-; /* stk_adj */ \
movel %d0,%sp@-; /* orig d0 */ \
movel %d0,%sp@-; /* d0 */ \
moveml %d1-%d5/%a0-%a1,%sp@-
 
|xref b1238_fix
 
|
| Divide by Zero exception
|
| All dz exceptions are 'real', hence no fpsp_dz entry point.
|
.global dz
.global real_dz
dz:
real_dz:
link %a6,#-LOCAL_SIZE
fsave -(%sp)
bclrb #E1,E_BYTE(%a6)
frestore (%sp)+
unlk %a6
 
addl #1,sigdz |for standalone testing
 
SAVE_ALL
moveq #-1,%d0
movel %d0,%sp@(LOFF_ORIG_D0) | a -1 in the ORIG_D0 field
| signifies that the stack frame
| is NOT for syscall
movel %sp,%sp@- | stack frame pointer argument
bsrl SYMBOL_NAME(trap_c)
addql #4,%sp
bral SYMBOL_NAME(ret_from_exception)
 
|
| Inexact exception
|
| All inexact exceptions are real, but the 'real' handler
| will probably want to clear the pending exception.
| The provided code will clear the E3 exception (if pending),
| otherwise clear the E1 exception. The frestore is not really
| necessary for E1 exceptions.
|
| Code following the 'inex' label is to handle bug #1232. In this
| bug, if an E1 snan, ovfl, or unfl occurred, and the process was
| swapped out before taking the exception, the exception taken on
| return was inex, rather than the correct exception. The snan, ovfl,
| and unfl exception to be taken must not have been enabled. The
| fix is to check for E1, and the existence of one of snan, ovfl,
| or unfl bits set in the fpsr. If any of these are set, branch
| to the appropriate handler for the exception in the fpsr. Note
| that this fix is only for d43b parts, and is skipped if the
| version number is not $40.
|
|
.global real_inex
.global inex
inex:
link %a6,#-LOCAL_SIZE
fsave -(%sp)
cmpib #VER_40,(%sp) |test version number
bnes not_fmt40
fmovel %fpsr,-(%sp)
btstb #E1,E_BYTE(%a6) |test for E1 set
beqs not_b1232
btstb #snan_bit,2(%sp) |test for snan
beq inex_ckofl
addl #4,%sp
frestore (%sp)+
unlk %a6
bra snan
inex_ckofl:
btstb #ovfl_bit,2(%sp) |test for ovfl
beq inex_ckufl
addl #4,%sp
frestore (%sp)+
unlk %a6
bra ovfl
inex_ckufl:
btstb #unfl_bit,2(%sp) |test for unfl
beq not_b1232
addl #4,%sp
frestore (%sp)+
unlk %a6
bra unfl
 
|
| We do not have the bug 1232 case. Clean up the stack and call
| real_inex.
|
not_b1232:
addl #4,%sp
frestore (%sp)+
unlk %a6
 
real_inex:
 
addl #1,siginex |for standalone testing
 
link %a6,#-LOCAL_SIZE
fsave -(%sp)
not_fmt40:
bclrb #E3,E_BYTE(%a6) |clear and test E3 flag
beqs inex_cke1
|
| Clear dirty bit on dest resister in the frame before branching
| to b1238_fix.
|
moveml %d0/%d1,USER_DA(%a6)
bfextu CMDREG1B(%a6){#6:#3},%d0 |get dest reg no
bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit
bsrl b1238_fix |test for bug1238 case
moveml USER_DA(%a6),%d0/%d1
bras inex_done
inex_cke1:
bclrb #E1,E_BYTE(%a6)
inex_done:
frestore (%sp)+
unlk %a6
 
SAVE_ALL
moveq #-1,%d0
movel %d0,%sp@(LOFF_ORIG_D0) | a -1 in the ORIG_D0 field
| signifies that the stack frame
| is NOT for syscall
movel %sp,%sp@- | stack frame pointer argument
bsrl SYMBOL_NAME(trap_c)
addql #4,%sp
bral SYMBOL_NAME(ret_from_exception)
|
| Overflow exception
|
|xref fpsp_ovfl
.global real_ovfl
.global ovfl
ovfl:
jmp fpsp_ovfl
real_ovfl:
 
addl #1,sigovfl |for standalone testing
 
link %a6,#-LOCAL_SIZE
fsave -(%sp)
bclrb #E3,E_BYTE(%a6) |clear and test E3 flag
bnes ovfl_done
bclrb #E1,E_BYTE(%a6)
ovfl_done:
frestore (%sp)+
unlk %a6
 
SAVE_ALL
moveq #-1,%d0
movel %d0,%sp@(LOFF_ORIG_D0) | a -1 in the ORIG_D0 field
| signifies that the stack frame
| is NOT for syscall
movel %sp,%sp@- | stack frame pointer argument
bsrl SYMBOL_NAME(trap_c)
addql #4,%sp
bral SYMBOL_NAME(ret_from_exception)
|
| Underflow exception
|
|xref fpsp_unfl
.global real_unfl
.global unfl
unfl:
jmp fpsp_unfl
real_unfl:
 
addl #1,sigunfl |for standalone testing
 
link %a6,#-LOCAL_SIZE
fsave -(%sp)
bclrb #E3,E_BYTE(%a6) |clear and test E3 flag
bnes unfl_done
bclrb #E1,E_BYTE(%a6)
unfl_done:
frestore (%sp)+
unlk %a6
 
SAVE_ALL
moveq #-1,%d0
movel %d0,%sp@(LOFF_ORIG_D0) | a -1 in the ORIG_D0 field
| signifies that the stack frame
| is NOT for syscall
movel %sp,%sp@- | stack frame pointer argument
bsrl SYMBOL_NAME(trap_c)
addql #4,%sp
bral SYMBOL_NAME(ret_from_exception)
|
| Signalling NAN exception
|
|xref fpsp_snan
.global real_snan
.global snan
snan:
jmp fpsp_snan
real_snan:
link %a6,#-LOCAL_SIZE
fsave -(%sp)
bclrb #E1,E_BYTE(%a6) |snan is always an E1 exception
frestore (%sp)+
unlk %a6
 
addl #1,sigsnan |for standalone testing
 
SAVE_ALL
moveq #-1,%d0
movel %d0,%sp@(LOFF_ORIG_D0) | a -1 in the ORIG_D0 field
| signifies that the stack frame
| is NOT for syscall
movel %sp,%sp@- | stack frame pointer argument
bsrl SYMBOL_NAME(trap_c)
addql #4,%sp
bral SYMBOL_NAME(ret_from_exception)
|
| Operand Error exception
|
|xref fpsp_operr
.global real_operr
.global operr
operr:
jmp fpsp_operr
real_operr:
link %a6,#-LOCAL_SIZE
fsave -(%sp)
bclrb #E1,E_BYTE(%a6) |operr is always an E1 exception
frestore (%sp)+
unlk %a6
 
addl #1,sigoperr |for standalone testing
 
 
SAVE_ALL
moveq #-1,%d0
movel %d0,%sp@(LOFF_ORIG_D0) | a -1 in the ORIG_D0 field
| signifies that the stack frame
| is NOT for syscall
movel %sp,%sp@- | stack frame pointer argument
bsrl SYMBOL_NAME(trap_c)
addql #4,%sp
bral SYMBOL_NAME(ret_from_exception)
 
|
| BSUN exception
|
| This sample handler simply clears the nan bit in the FPSR.
|
|xref fpsp_bsun
.global real_bsun
.global bsun
bsun:
jmp fpsp_bsun
real_bsun:
link %a6,#-LOCAL_SIZE
fsave -(%sp)
bclrb #E1,E_BYTE(%a6) |bsun is always an E1 exception
fmovel %FPSR,-(%sp)
bclrb #nan_bit,(%sp)
fmovel (%sp)+,%FPSR
frestore (%sp)+
unlk %a6
 
addl #1,sigbsun |for standalone testing
 
 
 
SAVE_ALL
moveq #-1,%d0
movel %d0,%sp@(LOFF_ORIG_D0) | a -1 in the ORIG_D0 field
| signifies that the stack frame
| is NOT for syscall
movel %sp,%sp@- | stack frame pointer argument
bsrl SYMBOL_NAME(trap_c)
addql #4,%sp
bral SYMBOL_NAME(ret_from_exception)
 
|
| F-line exception
|
| A 'real' F-line exception is one that the FPSP isn't supposed to
| handle. E.g. an instruction with a co-processor ID that is not 1.
|
|
|xref fpsp_fline
.global real_fline
.global fline
fline:
jmp fpsp_fline
real_fline:
 
addl #1,sigunimp |for standalone testing
 
 
SAVE_ALL
moveq #-1,%d0
movel %d0,%sp@(LOFF_ORIG_D0) | a -1 in the ORIG_D0 field
| signifies that the stack frame
| is NOT for syscall
movel %sp,%sp@- | stack frame pointer argument
bsrl SYMBOL_NAME(trap_c)
addql #4,%sp
bral SYMBOL_NAME(ret_from_exception)
 
|
| Unsupported data type exception
|
|xref fpsp_unsupp
.global real_unsupp
.global unsupp
unsupp:
jmp fpsp_unsupp
real_unsupp:
link %a6,#-LOCAL_SIZE
fsave -(%sp)
bclrb #E1,E_BYTE(%a6) |unsupp is always an E1 exception
frestore (%sp)+
unlk %a6
 
addl #1,sigunsupp |for standalone testing
 
 
SAVE_ALL
moveq #-1,%d0
movel %d0,%sp@(LOFF_ORIG_D0) | a -1 in the ORIG_D0 field
| signifies that the stack frame
| is NOT for syscall
movel %sp,%sp@- | stack frame pointer argument
bsrl SYMBOL_NAME(trap_c)
addql #4,%sp
bral SYMBOL_NAME(ret_from_exception)
 
|
| Trace exception
|
.global real_trace
real_trace:
|
bral SYMBOL_NAME(trap)
 
|
| fpsp_fmt_error --- exit point for frame format error
|
| The fpu stack frame does not match the frames existing
| or planned at the time of this writing. The fpsp is
| unable to handle frame sizes not in the following
| version:size pairs:
|
| {4060, 4160} - busy frame
| {4028, 4130} - unimp frame
| {4000, 4100} - idle frame
|
| This entry point simply holds an f-line illegal value.
| Replace this with a call to your kernel panic code or
| code to handle future revisions of the fpu.
|
.global fpsp_fmt_error
fpsp_fmt_error:
 
.long 0xf27f0000 |f-line illegal
 
|
| fpsp_done --- FPSP exit point
|
| The exception has been handled by the package and we are ready
| to return to user mode, but there may be OS specific code
| to execute before we do. If there is, do it now.
|
|
 
.global fpsp_done
fpsp_done:
btst #0x5,%sp@ | supervisor bit set in saved SR?
beq Lnotkern
rte
Lnotkern:
tstl SYMBOL_NAME(need_resched)
bne Lmustsched
rte
Lmustsched:
SAVE_ALL
moveq #-1,%d0
movel %d0,%sp@(LOFF_ORIG_D0) | indicate stack frame not for syscall
bral SYMBOL_NAME(ret_from_exception) | deliver signals, reschedule etc..
 
 
|
| mem_write --- write to user or supervisor address space
|
| Writes to memory while in supervisor mode. copyout accomplishes
| this via a 'moves' instruction. copyout is a UNIX SVR3 (and later) function.
| If you don't have copyout, use the local copy of the function below.
|
| a0 - supervisor source address
| a1 - user destination address
| d0 - number of bytes to write (maximum count is 12)
|
| The supervisor source address is guaranteed to point into the supervisor
| stack. The result is that a UNIX
| process is allowed to sleep as a consequence of a page fault during
| copyout. The probability of a page fault is exceedingly small because
| the 68040 always reads the destination address and thus the page
| faults should have already been handled.
|
| If the EXC_SR shows that the exception was from supervisor space,
| then just do a dumb (and slow) memory move. In a UNIX environment
| there shouldn't be any supervisor mode floating point exceptions.
|
.global mem_write
mem_write:
btstb #5,EXC_SR(%a6) |check for supervisor state
beqs user_write
super_write:
moveb (%a0)+,(%a1)+
subql #1,%d0
bnes super_write
rts
user_write:
movel %d1,-(%sp) |preserve d1 just in case
movel %d0,-(%sp)
movel %a1,-(%sp)
movel %a0,-(%sp)
jsr copyout
addw #12,%sp
movel (%sp)+,%d1
rts
|
| mem_read --- read from user or supervisor address space
|
| Reads from memory while in supervisor mode. copyin accomplishes
| this via a 'moves' instruction. copyin is a UNIX SVR3 (and later) function.
| If you don't have copyin, use the local copy of the function below.
|
| The FPSP calls mem_read to read the original F-line instruction in order
| to extract the data register number when the 'Dn' addressing mode is
| used.
|
|Input:
| a0 - user source address
| a1 - supervisor destination address
| d0 - number of bytes to read (maximum count is 12)
|
| Like mem_write, mem_read always reads with a supervisor
| destination address on the supervisor stack. Also like mem_write,
| the EXC_SR is checked and a simple memory copy is done if reading
| from supervisor space is indicated.
|
.global mem_read
mem_read:
btstb #5,EXC_SR(%a6) |check for supervisor state
beqs user_read
super_read:
moveb (%a0)+,(%a1)+
subql #1,%d0
bnes super_read
rts
user_read:
movel %d1,-(%sp) |preserve d1 just in case
movel %d0,-(%sp)
movel %a1,-(%sp)
movel %a0,-(%sp)
jsr copyin
addw #12,%sp
movel (%sp)+,%d1
rts
 
|
| Use these routines if your kernel doesn't have copyout/copyin equivalents.
| Assumes that D0/D1/A0/A1 are scratch registers. copyout overwrites DFC,
| and copyin overwrites SFC.
|
copyout:
movel 4(%sp),%a0 | source
movel 8(%sp),%a1 | destination
movel 12(%sp),%d0 | count
subl #1,%d0 | dec count by 1 for dbra
movel #1,%d1
movec %d1,%DFC | set dfc for user data space
moreout:
moveb (%a0)+,%d1 | fetch supervisor byte
movesb %d1,(%a1)+ | write user byte
dbf %d0,moreout
rts
 
copyin:
movel 4(%sp),%a0 | source
movel 8(%sp),%a1 | destination
movel 12(%sp),%d0 | count
subl #1,%d0 | dec count by 1 for dbra
movel #1,%d1
movec %d1,%SFC | set sfc for user space
morein:
movesb (%a0)+,%d1 | fetch user byte
moveb %d1,(%a1)+ | write supervisor byte
dbf %d0,morein
rts
 
|end
/decbin.S
0,0 → 1,506
|
| decbin.sa 3.3 12/19/90
|
| Description: Converts normalized packed bcd value pointed to by
| register A6 to extended-precision value in FP0.
|
| Input: Normalized packed bcd value in ETEMP(a6).
|
| Output: Exact floating-point representation of the packed bcd value.
|
| Saves and Modifies: D2-D5
|
| Speed: The program decbin takes ??? cycles to execute.
|
| Object Size:
|
| External Reference(s): None.
|
| Algorithm:
| Expected is a normal bcd (i.e. non-exceptional; all inf, zero,
| and NaN operands are dispatched without entering this routine)
| value in 68881/882 format at location ETEMP(A6).
|
| A1. Convert the bcd exponent to binary by successive adds and muls.
| Set the sign according to SE. Subtract 16 to compensate
| for the mantissa which is to be interpreted as 17 integer
| digits, rather than 1 integer and 16 fraction digits.
| Note: this operation can never overflow.
|
| A2. Convert the bcd mantissa to binary by successive
| adds and muls in FP0. Set the sign according to SM.
| The mantissa digits will be converted with the decimal point
| assumed following the least-significant digit.
| Note: this operation can never overflow.
|
| A3. Count the number of leading/trailing zeros in the
| bcd string. If SE is positive, count the leading zeros;
| if negative, count the trailing zeros. Set the adjusted
| exponent equal to the exponent from A1 and the zero count
| added if SM = 1 and subtracted if SM = 0. Scale the
| mantissa the equivalent of forcing in the bcd value:
|
| SM = 0 a non-zero digit in the integer position
| SM = 1 a non-zero digit in Mant0, lsd of the fraction
|
| this will insure that any value, regardless of its
| representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted
| consistently.
|
| A4. Calculate the factor 10^exp in FP1 using a table of
| 10^(2^n) values. To reduce the error in forming factors
| greater than 10^27, a directed rounding scheme is used with
| tables rounded to RN, RM, and RP, according to the table
| in the comments of the pwrten section.
|
| A5. Form the final binary number by scaling the mantissa by
| the exponent factor. This is done by multiplying the
| mantissa in FP0 by the factor in FP1 if the adjusted
| exponent sign is positive, and dividing FP0 by FP1 if
| it is negative.
|
| Clean up and return. Check if the final mul or div resulted
| in an inex2 exception. If so, set inex1 in the fpsr and
| check if the inex1 exception is enabled. If so, set d7 upper
| word to $0100. This will signal unimp.sa that an enabled inex1
| exception occurred. Unimp will fix the stack.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|DECBIN idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
|
| PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
| to nearest, minus, and plus, respectively. The tables include
| 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding
| is required until the power is greater than 27, however, all
| tables include the first 5 for ease of indexing.
|
|xref PTENRN
|xref PTENRM
|xref PTENRP
 
RTABLE: .byte 0,0,0,0
.byte 2,3,2,3
.byte 2,3,3,2
.byte 3,2,2,3
 
.global decbin
.global calc_e
.global pwrten
.global calc_m
.global norm
.global ap_st_z
.global ap_st_n
|
.set FNIBS,7
.set FSTRT,0
|
.set ESTRT,4
.set EDIGITS,2 |
|
| Constants in single precision
FZERO: .long 0x00000000
FONE: .long 0x3F800000
FTEN: .long 0x41200000
 
.set TEN,10
 
|
decbin:
| fmovel #0,FPCR ;clr real fpcr
moveml %d2-%d5,-(%a7)
|
| Calculate exponent:
| 1. Copy bcd value in memory for use as a working copy.
| 2. Calculate absolute value of exponent in d1 by mul and add.
| 3. Correct for exponent sign.
| 4. Subtract 16 to compensate for interpreting the mant as all integer digits.
| (i.e., all digits assumed left of the decimal point.)
|
| Register usage:
|
| calc_e:
| (*) d0: temp digit storage
| (*) d1: accumulator for binary exponent
| (*) d2: digit count
| (*) d3: offset pointer
| ( ) d4: first word of bcd
| ( ) a0: pointer to working bcd value
| ( ) a6: pointer to original bcd value
| (*) FP_SCR1: working copy of original bcd value
| (*) L_SCR1: copy of original exponent word
|
calc_e:
movel #EDIGITS,%d2 |# of nibbles (digits) in fraction part
moveql #ESTRT,%d3 |counter to pick up digits
leal FP_SCR1(%a6),%a0 |load tmp bcd storage address
movel ETEMP(%a6),(%a0) |save input bcd value
movel ETEMP_HI(%a6),4(%a0) |save words 2 and 3
movel ETEMP_LO(%a6),8(%a0) |and work with these
movel (%a0),%d4 |get first word of bcd
clrl %d1 |zero d1 for accumulator
e_gd:
mulul #TEN,%d1 |mul partial product by one digit place
bfextu %d4{%d3:#4},%d0 |get the digit and zero extend into d0
addl %d0,%d1 |d1 = d1 + d0
addqb #4,%d3 |advance d3 to the next digit
dbf %d2,e_gd |if we have used all 3 digits, exit loop
btst #30,%d4 |get SE
beqs e_pos |don't negate if pos
negl %d1 |negate before subtracting
e_pos:
subl #16,%d1 |sub to compensate for shift of mant
bges e_save |if still pos, do not neg
negl %d1 |now negative, make pos and set SE
orl #0x40000000,%d4 |set SE in d4,
orl #0x40000000,(%a0) |and in working bcd
e_save:
movel %d1,L_SCR1(%a6) |save exp in memory
|
|
| Calculate mantissa:
| 1. Calculate absolute value of mantissa in fp0 by mul and add.
| 2. Correct for mantissa sign.
| (i.e., all digits assumed left of the decimal point.)
|
| Register usage:
|
| calc_m:
| (*) d0: temp digit storage
| (*) d1: lword counter
| (*) d2: digit count
| (*) d3: offset pointer
| ( ) d4: words 2 and 3 of bcd
| ( ) a0: pointer to working bcd value
| ( ) a6: pointer to original bcd value
| (*) fp0: mantissa accumulator
| ( ) FP_SCR1: working copy of original bcd value
| ( ) L_SCR1: copy of original exponent word
|
calc_m:
moveql #1,%d1 |word counter, init to 1
fmoves FZERO,%fp0 |accumulator
|
|
| Since the packed number has a long word between the first & second parts,
| get the integer digit then skip down & get the rest of the
| mantissa. We will unroll the loop once.
|
bfextu (%a0){#28:#4},%d0 |integer part is ls digit in long word
faddb %d0,%fp0 |add digit to sum in fp0
|
|
| Get the rest of the mantissa.
|
loadlw:
movel (%a0,%d1.L*4),%d4 |load mantissa longword into d4
moveql #FSTRT,%d3 |counter to pick up digits
moveql #FNIBS,%d2 |reset number of digits per a0 ptr
md2b:
fmuls FTEN,%fp0 |fp0 = fp0 * 10
bfextu %d4{%d3:#4},%d0 |get the digit and zero extend
faddb %d0,%fp0 |fp0 = fp0 + digit
|
|
| If all the digits (8) in that long word have been converted (d2=0),
| then inc d1 (=2) to point to the next long word and reset d3 to 0
| to initialize the digit offset, and set d2 to 7 for the digit count;
| else continue with this long word.
|
addqb #4,%d3 |advance d3 to the next digit
dbf %d2,md2b |check for last digit in this lw
nextlw:
addql #1,%d1 |inc lw pointer in mantissa
cmpl #2,%d1 |test for last lw
ble loadlw |if not, get last one
|
| Check the sign of the mant and make the value in fp0 the same sign.
|
m_sign:
btst #31,(%a0) |test sign of the mantissa
beqs ap_st_z |if clear, go to append/strip zeros
fnegx %fp0 |if set, negate fp0
|
| Append/strip zeros:
|
| For adjusted exponents which have an absolute value greater than 27*,
| this routine calculates the amount needed to normalize the mantissa
| for the adjusted exponent. That number is subtracted from the exp
| if the exp was positive, and added if it was negative. The purpose
| of this is to reduce the value of the exponent and the possibility
| of error in calculation of pwrten.
|
| 1. Branch on the sign of the adjusted exponent.
| 2p.(positive exp)
| 2. Check M16 and the digits in lwords 2 and 3 in descending order.
| 3. Add one for each zero encountered until a non-zero digit.
| 4. Subtract the count from the exp.
| 5. Check if the exp has crossed zero in #3 above; make the exp abs
| and set SE.
| 6. Multiply the mantissa by 10**count.
| 2n.(negative exp)
| 2. Check the digits in lwords 3 and 2 in descending order.
| 3. Add one for each zero encountered until a non-zero digit.
| 4. Add the count to the exp.
| 5. Check if the exp has crossed zero in #3 above; clear SE.
| 6. Divide the mantissa by 10**count.
|
| *Why 27? If the adjusted exponent is within -28 < expA < 28, than
| any adjustment due to append/strip zeros will drive the resultant
| exponent towards zero. Since all pwrten constants with a power
| of 27 or less are exact, there is no need to use this routine to
| attempt to lessen the resultant exponent.
|
| Register usage:
|
| ap_st_z:
| (*) d0: temp digit storage
| (*) d1: zero count
| (*) d2: digit count
| (*) d3: offset pointer
| ( ) d4: first word of bcd
| (*) d5: lword counter
| ( ) a0: pointer to working bcd value
| ( ) FP_SCR1: working copy of original bcd value
| ( ) L_SCR1: copy of original exponent word
|
|
| First check the absolute value of the exponent to see if this
| routine is necessary. If so, then check the sign of the exponent
| and do append (+) or strip (-) zeros accordingly.
| This section handles a positive adjusted exponent.
|
ap_st_z:
movel L_SCR1(%a6),%d1 |load expA for range test
cmpl #27,%d1 |test is with 27
ble pwrten |if abs(expA) <28, skip ap/st zeros
btst #30,(%a0) |check sign of exp
bnes ap_st_n |if neg, go to neg side
clrl %d1 |zero count reg
movel (%a0),%d4 |load lword 1 to d4
bfextu %d4{#28:#4},%d0 |get M16 in d0
bnes ap_p_fx |if M16 is non-zero, go fix exp
addql #1,%d1 |inc zero count
moveql #1,%d5 |init lword counter
movel (%a0,%d5.L*4),%d4 |get lword 2 to d4
bnes ap_p_cl |if lw 2 is zero, skip it
addql #8,%d1 |and inc count by 8
addql #1,%d5 |inc lword counter
movel (%a0,%d5.L*4),%d4 |get lword 3 to d4
ap_p_cl:
clrl %d3 |init offset reg
moveql #7,%d2 |init digit counter
ap_p_gd:
bfextu %d4{%d3:#4},%d0 |get digit
bnes ap_p_fx |if non-zero, go to fix exp
addql #4,%d3 |point to next digit
addql #1,%d1 |inc digit counter
dbf %d2,ap_p_gd |get next digit
ap_p_fx:
movel %d1,%d0 |copy counter to d2
movel L_SCR1(%a6),%d1 |get adjusted exp from memory
subl %d0,%d1 |subtract count from exp
bges ap_p_fm |if still pos, go to pwrten
negl %d1 |now its neg; get abs
movel (%a0),%d4 |load lword 1 to d4
orl #0x40000000,%d4 | and set SE in d4
orl #0x40000000,(%a0) | and in memory
|
| Calculate the mantissa multiplier to compensate for the striping of
| zeros from the mantissa.
|
ap_p_fm:
movel #PTENRN,%a1 |get address of power-of-ten table
clrl %d3 |init table index
fmoves FONE,%fp1 |init fp1 to 1
moveql #3,%d2 |init d2 to count bits in counter
ap_p_el:
asrl #1,%d0 |shift lsb into carry
bccs ap_p_en |if 1, mul fp1 by pwrten factor
fmulx (%a1,%d3),%fp1 |mul by 10**(d3_bit_no)
ap_p_en:
addl #12,%d3 |inc d3 to next rtable entry
tstl %d0 |check if d0 is zero
bnes ap_p_el |if not, get next bit
fmulx %fp1,%fp0 |mul mantissa by 10**(no_bits_shifted)
bras pwrten |go calc pwrten
|
| This section handles a negative adjusted exponent.
|
ap_st_n:
clrl %d1 |clr counter
moveql #2,%d5 |set up d5 to point to lword 3
movel (%a0,%d5.L*4),%d4 |get lword 3
bnes ap_n_cl |if not zero, check digits
subl #1,%d5 |dec d5 to point to lword 2
addql #8,%d1 |inc counter by 8
movel (%a0,%d5.L*4),%d4 |get lword 2
ap_n_cl:
movel #28,%d3 |point to last digit
moveql #7,%d2 |init digit counter
ap_n_gd:
bfextu %d4{%d3:#4},%d0 |get digit
bnes ap_n_fx |if non-zero, go to exp fix
subql #4,%d3 |point to previous digit
addql #1,%d1 |inc digit counter
dbf %d2,ap_n_gd |get next digit
ap_n_fx:
movel %d1,%d0 |copy counter to d0
movel L_SCR1(%a6),%d1 |get adjusted exp from memory
subl %d0,%d1 |subtract count from exp
bgts ap_n_fm |if still pos, go fix mantissa
negl %d1 |take abs of exp and clr SE
movel (%a0),%d4 |load lword 1 to d4
andl #0xbfffffff,%d4 | and clr SE in d4
andl #0xbfffffff,(%a0) | and in memory
|
| Calculate the mantissa multiplier to compensate for the appending of
| zeros to the mantissa.
|
ap_n_fm:
movel #PTENRN,%a1 |get address of power-of-ten table
clrl %d3 |init table index
fmoves FONE,%fp1 |init fp1 to 1
moveql #3,%d2 |init d2 to count bits in counter
ap_n_el:
asrl #1,%d0 |shift lsb into carry
bccs ap_n_en |if 1, mul fp1 by pwrten factor
fmulx (%a1,%d3),%fp1 |mul by 10**(d3_bit_no)
ap_n_en:
addl #12,%d3 |inc d3 to next rtable entry
tstl %d0 |check if d0 is zero
bnes ap_n_el |if not, get next bit
fdivx %fp1,%fp0 |div mantissa by 10**(no_bits_shifted)
|
|
| Calculate power-of-ten factor from adjusted and shifted exponent.
|
| Register usage:
|
| pwrten:
| (*) d0: temp
| ( ) d1: exponent
| (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
| (*) d3: FPCR work copy
| ( ) d4: first word of bcd
| (*) a1: RTABLE pointer
| calc_p:
| (*) d0: temp
| ( ) d1: exponent
| (*) d3: PWRTxx table index
| ( ) a0: pointer to working copy of bcd
| (*) a1: PWRTxx pointer
| (*) fp1: power-of-ten accumulator
|
| Pwrten calculates the exponent factor in the selected rounding mode
| according to the following table:
|
| Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode
|
| ANY ANY RN RN
|
| + + RP RP
| - + RP RM
| + - RP RM
| - - RP RP
|
| + + RM RM
| - + RM RP
| + - RM RP
| - - RM RM
|
| + + RZ RM
| - + RZ RM
| + - RZ RP
| - - RZ RP
|
|
pwrten:
movel USER_FPCR(%a6),%d3 |get user's FPCR
bfextu %d3{#26:#2},%d2 |isolate rounding mode bits
movel (%a0),%d4 |reload 1st bcd word to d4
asll #2,%d2 |format d2 to be
bfextu %d4{#0:#2},%d0 | {FPCR[6],FPCR[5],SM,SE}
addl %d0,%d2 |in d2 as index into RTABLE
leal RTABLE,%a1 |load rtable base
moveb (%a1,%d2),%d0 |load new rounding bits from table
clrl %d3 |clear d3 to force no exc and extended
bfins %d0,%d3{#26:#2} |stuff new rounding bits in FPCR
fmovel %d3,%FPCR |write new FPCR
asrl #1,%d0 |write correct PTENxx table
bccs not_rp |to a1
leal PTENRP,%a1 |it is RP
bras calc_p |go to init section
not_rp:
asrl #1,%d0 |keep checking
bccs not_rm
leal PTENRM,%a1 |it is RM
bras calc_p |go to init section
not_rm:
leal PTENRN,%a1 |it is RN
calc_p:
movel %d1,%d0 |copy exp to d0;use d0
bpls no_neg |if exp is negative,
negl %d0 |invert it
orl #0x40000000,(%a0) |and set SE bit
no_neg:
clrl %d3 |table index
fmoves FONE,%fp1 |init fp1 to 1
e_loop:
asrl #1,%d0 |shift next bit into carry
bccs e_next |if zero, skip the mul
fmulx (%a1,%d3),%fp1 |mul by 10**(d3_bit_no)
e_next:
addl #12,%d3 |inc d3 to next rtable entry
tstl %d0 |check if d0 is zero
bnes e_loop |not zero, continue shifting
|
|
| Check the sign of the adjusted exp and make the value in fp0 the
| same sign. If the exp was pos then multiply fp1*fp0;
| else divide fp0/fp1.
|
| Register Usage:
| norm:
| ( ) a0: pointer to working bcd value
| (*) fp0: mantissa accumulator
| ( ) fp1: scaling factor - 10**(abs(exp))
|
norm:
btst #30,(%a0) |test the sign of the exponent
beqs mul |if clear, go to multiply
div:
fdivx %fp1,%fp0 |exp is negative, so divide mant by exp
bras end_dec
mul:
fmulx %fp1,%fp0 |exp is positive, so multiply by exp
|
|
| Clean up and return with result in fp0.
|
| If the final mul/div in decbin incurred an inex exception,
| it will be inex2, but will be reported as inex1 by get_op.
|
end_dec:
fmovel %FPSR,%d0 |get status register
bclrl #inex2_bit+8,%d0 |test for inex2 and clear it
fmovel %d0,%FPSR |return status reg w/o inex2
beqs no_exc |skip this if no exc
orl #inx1a_mask,USER_FPSR(%a6) |set inex1/ainex
no_exc:
moveml (%a7)+,%d2-%d5
rts
|end
/sgetem.S
0,0 → 1,141
|
| sgetem.sa 3.1 12/10/90
|
| The entry point sGETEXP returns the exponent portion
| of the input argument. The exponent bias is removed
| and the exponent value is returned as an extended
| precision number in fp0. sGETEXPD handles denormalized
| numbers.
|
| The entry point sGETMAN extracts the mantissa of the
| input argument. The mantissa is converted to an
| extended precision number and returned in fp0. The
| range of the result is [1.0 - 2.0).
|
|
| Input: Double-extended number X in the ETEMP space in
| the floating-point save stack.
|
| Output: The functions return exp(X) or man(X) in fp0.
|
| Modified: fp0.
|
|
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|SGETEM idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
|xref nrm_set
 
|
| This entry point is used by the unimplemented instruction exception
| handler. It points a0 to the input operand.
|
|
|
| SGETEXP
|
 
.global sgetexp
sgetexp:
movew LOCAL_EX(%a0),%d0 |get the exponent
bclrl #15,%d0 |clear the sign bit
subw #0x3fff,%d0 |subtract off the bias
fmovew %d0,%fp0 |move the exp to fp0
rts
 
.global sgetexpd
sgetexpd:
bclrb #sign_bit,LOCAL_EX(%a0)
bsr nrm_set |normalize (exp will go negative)
movew LOCAL_EX(%a0),%d0 |load resulting exponent into d0
subw #0x3fff,%d0 |subtract off the bias
fmovew %d0,%fp0 |move the exp to fp0
rts
|
|
| This entry point is used by the unimplemented instruction exception
| handler. It points a0 to the input operand.
|
|
|
| SGETMAN
|
|
| For normalized numbers, leave the mantissa alone, simply load
| with an exponent of +/- $3fff.
|
.global sgetman
sgetman:
movel USER_FPCR(%a6),%d0
andil #0xffffff00,%d0 |clear rounding precision and mode
fmovel %d0,%fpcr |this fpcr setting is used by the 882
movew LOCAL_EX(%a0),%d0 |get the exp (really just want sign bit)
orw #0x7fff,%d0 |clear old exp
bclrl #14,%d0 |make it the new exp +-3fff
movew %d0,LOCAL_EX(%a0) |move the sign & exp back to fsave stack
fmovex (%a0),%fp0 |put new value back in fp0
rts
 
|
| For denormalized numbers, shift the mantissa until the j-bit = 1,
| then load the exponent with +/1 $3fff.
|
.global sgetmand
sgetmand:
movel LOCAL_HI(%a0),%d0 |load ms mant in d0
movel LOCAL_LO(%a0),%d1 |load ls mant in d1
bsr shft |shift mantissa bits till msbit is set
movel %d0,LOCAL_HI(%a0) |put ms mant back on stack
movel %d1,LOCAL_LO(%a0) |put ls mant back on stack
bras sgetman
 
|
| SHFT
|
| Shifts the mantissa bits until msbit is set.
| input:
| ms mantissa part in d0
| ls mantissa part in d1
| output:
| shifted bits in d0 and d1
shft:
tstl %d0 |if any bits set in ms mant
bnes upper |then branch
| ;else no bits set in ms mant
tstl %d1 |test if any bits set in ls mant
bnes cont |if set then continue
bras shft_end |else return
cont:
movel %d3,-(%a7) |save d3
exg %d0,%d1 |shift ls mant to ms mant
bfffo %d0{#0:#32},%d3 |find first 1 in ls mant to d0
lsll %d3,%d0 |shift first 1 to integer bit in ms mant
movel (%a7)+,%d3 |restore d3
bras shft_end
upper:
 
moveml %d3/%d5/%d6,-(%a7) |save registers
bfffo %d0{#0:#32},%d3 |find first 1 in ls mant to d0
lsll %d3,%d0 |shift ms mant until j-bit is set
movel %d1,%d6 |save ls mant in d6
lsll %d3,%d1 |shift ls mant by count
movel #32,%d5
subl %d3,%d5 |sub 32 from shift for ls mant
lsrl %d5,%d6 |shift off all bits but those that will
| ;be shifted into ms mant
orl %d6,%d0 |shift the ls mant bits into the ms mant
moveml (%a7)+,%d3/%d5/%d6 |restore registers
shft_end:
rts
 
|end
/stwotox.S
0,0 → 1,427
|
| stwotox.sa 3.1 12/10/90
|
| stwotox --- 2**X
| stwotoxd --- 2**X for denormalized X
| stentox --- 10**X
| stentoxd --- 10**X for denormalized X
|
| Input: Double-extended number X in location pointed to
| by address register a0.
|
| Output: The function values are returned in Fp0.
|
| Accuracy and Monotonicity: The returned result is within 2 ulps in
| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
| result is subsequently rounded to double precision. The
| result is provably monotonic in double precision.
|
| Speed: The program stwotox takes approximately 190 cycles and the
| program stentox takes approximately 200 cycles.
|
| Algorithm:
|
| twotox
| 1. If |X| > 16480, go to ExpBig.
|
| 2. If |X| < 2**(-70), go to ExpSm.
|
| 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
| decompose N as
| N = 64(M + M') + j, j = 0,1,2,...,63.
|
| 4. Overwrite r := r * log2. Then
| 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
| Go to expr to compute that expression.
|
| tentox
| 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
|
| 2. If |X| < 2**(-70), go to ExpSm.
|
| 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
| N := round-to-int(y). Decompose N as
| N = 64(M + M') + j, j = 0,1,2,...,63.
|
| 4. Define r as
| r := ((X - N*L1)-N*L2) * L10
| where L1, L2 are the leading and trailing parts of log_10(2)/64
| and L10 is the natural log of 10. Then
| 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
| Go to expr to compute that expression.
|
| expr
| 1. Fetch 2**(j/64) from table as Fact1 and Fact2.
|
| 2. Overwrite Fact1 and Fact2 by
| Fact1 := 2**(M) * Fact1
| Fact2 := 2**(M) * Fact2
| Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
|
| 3. Calculate P where 1 + P approximates exp(r):
| P = r + r*r*(A1+r*(A2+...+r*A5)).
|
| 4. Let AdjFact := 2**(M'). Return
| AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
| Exit.
|
| ExpBig
| 1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
| underflow by Tiny * Tiny.
|
| ExpSm
| 1. Return 1 + X.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|STWOTOX idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
BOUNDS1: .long 0x3FB98000,0x400D80C0 | ... 2^(-70),16480
BOUNDS2: .long 0x3FB98000,0x400B9B07 | ... 2^(-70),16480 LOG2/LOG10
 
L2TEN64: .long 0x406A934F,0x0979A371 | ... 64LOG10/LOG2
L10TWO1: .long 0x3F734413,0x509F8000 | ... LOG2/64LOG10
 
L10TWO2: .long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
 
LOG10: .long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
 
LOG2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
 
EXPA5: .long 0x3F56C16D,0x6F7BD0B2
EXPA4: .long 0x3F811112,0x302C712C
EXPA3: .long 0x3FA55555,0x55554CC1
EXPA2: .long 0x3FC55555,0x55554A54
EXPA1: .long 0x3FE00000,0x00000000,0x00000000,0x00000000
 
HUGE: .long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
TINY: .long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
 
EXPTBL:
.long 0x3FFF0000,0x80000000,0x00000000,0x3F738000
.long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
.long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
.long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
.long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
.long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
.long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
.long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
.long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
.long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
.long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
.long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
.long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
.long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
.long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
.long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
.long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
.long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
.long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
.long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
.long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
.long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
.long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
.long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
.long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
.long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
.long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
.long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
.long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
.long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
.long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
.long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
.long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
.long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
.long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
.long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
.long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
.long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
.long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
.long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
.long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
.long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
.long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
.long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
.long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
.long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
.long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
.long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
.long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
.long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
.long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
.long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
.long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
.long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
.long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
.long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
.long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
.long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
.long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
.long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
.long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
.long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
.long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
.long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
 
.set N,L_SCR1
 
.set X,FP_SCR1
.set XDCARE,X+2
.set XFRAC,X+4
 
.set ADJFACT,FP_SCR2
 
.set FACT1,FP_SCR3
.set FACT1HI,FACT1+4
.set FACT1LOW,FACT1+8
 
.set FACT2,FP_SCR4
.set FACT2HI,FACT2+4
.set FACT2LOW,FACT2+8
 
| xref t_unfl
|xref t_ovfl
|xref t_frcinx
 
.global stwotoxd
stwotoxd:
|--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
 
fmovel %d1,%fpcr | ...set user's rounding mode/precision
fmoves #0x3F800000,%fp0 | ...RETURN 1 + X
movel (%a0),%d0
orl #0x00800001,%d0
fadds %d0,%fp0
bra t_frcinx
 
.global stwotox
stwotox:
|--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
 
movel (%a0),%d0
movew 4(%a0),%d0
fmovex %fp0,X(%a6)
andil #0x7FFFFFFF,%d0
 
cmpil #0x3FB98000,%d0 | ...|X| >= 2**(-70)?
bges TWOOK1
bra EXPBORS
 
TWOOK1:
cmpil #0x400D80C0,%d0 | ...|X| > 16480?
bles TWOMAIN
bra EXPBORS
 
TWOMAIN:
|--USUAL CASE, 2^(-70) <= |X| <= 16480
 
fmovex %fp0,%fp1
fmuls #0x42800000,%fp1 | ...64 * X
fmovel %fp1,N(%a6) | ...N = ROUND-TO-INT(64 X)
movel %d2,-(%sp)
lea EXPTBL,%a1 | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
fmovel N(%a6),%fp1 | ...N --> FLOATING FMT
movel N(%a6),%d0
movel %d0,%d2
andil #0x3F,%d0 | ...D0 IS J
asll #4,%d0 | ...DISPLACEMENT FOR 2^(J/64)
addal %d0,%a1 | ...ADDRESS FOR 2^(J/64)
asrl #6,%d2 | ...d2 IS L, N = 64L + J
movel %d2,%d0
asrl #1,%d0 | ...D0 IS M
subl %d0,%d2 | ...d2 IS M', N = 64(M+M') + J
addil #0x3FFF,%d2
movew %d2,ADJFACT(%a6) | ...ADJFACT IS 2^(M')
movel (%sp)+,%d2
|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
|--ADJFACT = 2^(M').
|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
 
fmuls #0x3C800000,%fp1 | ...(1/64)*N
movel (%a1)+,FACT1(%a6)
movel (%a1)+,FACT1HI(%a6)
movel (%a1)+,FACT1LOW(%a6)
movew (%a1)+,FACT2(%a6)
clrw FACT2+2(%a6)
 
fsubx %fp1,%fp0 | ...X - (1/64)*INT(64 X)
 
movew (%a1)+,FACT2HI(%a6)
clrw FACT2HI+2(%a6)
clrl FACT2LOW(%a6)
addw %d0,FACT1(%a6)
fmulx LOG2,%fp0 | ...FP0 IS R
addw %d0,FACT2(%a6)
 
bra expr
 
EXPBORS:
|--FPCR, D0 SAVED
cmpil #0x3FFF8000,%d0
bgts EXPBIG
 
EXPSM:
|--|X| IS SMALL, RETURN 1 + X
 
fmovel %d1,%FPCR |restore users exceptions
fadds #0x3F800000,%fp0 | ...RETURN 1 + X
 
bra t_frcinx
 
EXPBIG:
|--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
|--REGISTERS SAVE SO FAR ARE FPCR AND D0
movel X(%a6),%d0
cmpil #0,%d0
blts EXPNEG
 
bclrb #7,(%a0) |t_ovfl expects positive value
bra t_ovfl
 
EXPNEG:
bclrb #7,(%a0) |t_unfl expects positive value
bra t_unfl
 
.global stentoxd
stentoxd:
|--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
 
fmovel %d1,%fpcr | ...set user's rounding mode/precision
fmoves #0x3F800000,%fp0 | ...RETURN 1 + X
movel (%a0),%d0
orl #0x00800001,%d0
fadds %d0,%fp0
bra t_frcinx
 
.global stentox
stentox:
|--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
 
movel (%a0),%d0
movew 4(%a0),%d0
fmovex %fp0,X(%a6)
andil #0x7FFFFFFF,%d0
 
cmpil #0x3FB98000,%d0 | ...|X| >= 2**(-70)?
bges TENOK1
bra EXPBORS
 
TENOK1:
cmpil #0x400B9B07,%d0 | ...|X| <= 16480*log2/log10 ?
bles TENMAIN
bra EXPBORS
 
TENMAIN:
|--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
 
fmovex %fp0,%fp1
fmuld L2TEN64,%fp1 | ...X*64*LOG10/LOG2
fmovel %fp1,N(%a6) | ...N=INT(X*64*LOG10/LOG2)
movel %d2,-(%sp)
lea EXPTBL,%a1 | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
fmovel N(%a6),%fp1 | ...N --> FLOATING FMT
movel N(%a6),%d0
movel %d0,%d2
andil #0x3F,%d0 | ...D0 IS J
asll #4,%d0 | ...DISPLACEMENT FOR 2^(J/64)
addal %d0,%a1 | ...ADDRESS FOR 2^(J/64)
asrl #6,%d2 | ...d2 IS L, N = 64L + J
movel %d2,%d0
asrl #1,%d0 | ...D0 IS M
subl %d0,%d2 | ...d2 IS M', N = 64(M+M') + J
addil #0x3FFF,%d2
movew %d2,ADJFACT(%a6) | ...ADJFACT IS 2^(M')
movel (%sp)+,%d2
 
|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
|--ADJFACT = 2^(M').
|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
 
fmovex %fp1,%fp2
 
fmuld L10TWO1,%fp1 | ...N*(LOG2/64LOG10)_LEAD
movel (%a1)+,FACT1(%a6)
 
fmulx L10TWO2,%fp2 | ...N*(LOG2/64LOG10)_TRAIL
 
movel (%a1)+,FACT1HI(%a6)
movel (%a1)+,FACT1LOW(%a6)
fsubx %fp1,%fp0 | ...X - N L_LEAD
movew (%a1)+,FACT2(%a6)
 
fsubx %fp2,%fp0 | ...X - N L_TRAIL
 
clrw FACT2+2(%a6)
movew (%a1)+,FACT2HI(%a6)
clrw FACT2HI+2(%a6)
clrl FACT2LOW(%a6)
 
fmulx LOG10,%fp0 | ...FP0 IS R
addw %d0,FACT1(%a6)
addw %d0,FACT2(%a6)
 
expr:
|--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
|--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
|--FP0 IS R. THE FOLLOWING CODE COMPUTES
|-- 2**(M'+M) * 2**(J/64) * EXP(R)
 
fmovex %fp0,%fp1
fmulx %fp1,%fp1 | ...FP1 IS S = R*R
 
fmoved EXPA5,%fp2 | ...FP2 IS A5
fmoved EXPA4,%fp3 | ...FP3 IS A4
 
fmulx %fp1,%fp2 | ...FP2 IS S*A5
fmulx %fp1,%fp3 | ...FP3 IS S*A4
 
faddd EXPA3,%fp2 | ...FP2 IS A3+S*A5
faddd EXPA2,%fp3 | ...FP3 IS A2+S*A4
 
fmulx %fp1,%fp2 | ...FP2 IS S*(A3+S*A5)
fmulx %fp1,%fp3 | ...FP3 IS S*(A2+S*A4)
 
faddd EXPA1,%fp2 | ...FP2 IS A1+S*(A3+S*A5)
fmulx %fp0,%fp3 | ...FP3 IS R*S*(A2+S*A4)
 
fmulx %fp1,%fp2 | ...FP2 IS S*(A1+S*(A3+S*A5))
faddx %fp3,%fp0 | ...FP0 IS R+R*S*(A2+S*A4)
faddx %fp2,%fp0 | ...FP0 IS EXP(R) - 1
 
|--FINAL RECONSTRUCTION PROCESS
|--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0)
 
fmulx FACT1(%a6),%fp0
faddx FACT2(%a6),%fp0
faddx FACT1(%a6),%fp0
 
fmovel %d1,%FPCR |restore users exceptions
clrw ADJFACT+2(%a6)
movel #0x80000000,ADJFACT+4(%a6)
clrl ADJFACT+8(%a6)
fmulx ADJFACT(%a6),%fp0 | ...FINAL ADJUSTMENT
 
bra t_frcinx
 
|end
/scale.S
0,0 → 1,371
|
| scale.sa 3.3 7/30/91
|
| The entry point sSCALE computes the destination operand
| scaled by the source operand. If the absolute value of
| the source operand is (>= 2^14) an overflow or underflow
| is returned.
|
| The entry point sscale is called from do_func to emulate
| the fscale unimplemented instruction.
|
| Input: Double-extended destination operand in FPTEMP,
| double-extended source operand in ETEMP.
|
| Output: The function returns scale(X,Y) to fp0.
|
| Modifies: fp0.
|
| Algorithm:
|
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|SCALE idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
|xref t_ovfl2
|xref t_unfl
|xref round
|xref t_resdnrm
 
SRC_BNDS: .short 0x3fff,0x400c
 
|
| This entry point is used by the unimplemented instruction exception
| handler.
|
|
|
| FSCALE
|
.global sscale
sscale:
fmovel #0,%fpcr |clr user enabled exc
clrl %d1
movew FPTEMP(%a6),%d1 |get dest exponent
smi L_SCR1(%a6) |use L_SCR1 to hold sign
andil #0x7fff,%d1 |strip sign
movew ETEMP(%a6),%d0 |check src bounds
andiw #0x7fff,%d0 |clr sign bit
cmp2w SRC_BNDS,%d0
bccs src_in
cmpiw #0x400c,%d0 |test for too large
bge src_out
|
| The source input is below 1, so we check for denormalized numbers
| and set unfl.
|
src_small:
moveb DTAG(%a6),%d0
andib #0xe0,%d0
tstb %d0
beqs no_denorm
st STORE_FLG(%a6) |dest already contains result
orl #unfl_mask,USER_FPSR(%a6) |set UNFL
den_done:
leal FPTEMP(%a6),%a0
bra t_resdnrm
no_denorm:
fmovel USER_FPCR(%a6),%FPCR
fmovex FPTEMP(%a6),%fp0 |simply return dest
rts
 
 
|
| Source is within 2^14 range. To perform the int operation,
| move it to d0.
|
src_in:
fmovex ETEMP(%a6),%fp0 |move in src for int
fmovel #rz_mode,%fpcr |force rz for src conversion
fmovel %fp0,%d0 |int src to d0
fmovel #0,%FPSR |clr status from above
tstw ETEMP(%a6) |check src sign
blt src_neg
|
| Source is positive. Add the src to the dest exponent.
| The result can be denormalized, if src = 0, or overflow,
| if the result of the add sets a bit in the upper word.
|
src_pos:
tstw %d1 |check for denorm
beq dst_dnrm
addl %d0,%d1 |add src to dest exp
beqs denorm |if zero, result is denorm
cmpil #0x7fff,%d1 |test for overflow
bges ovfl
tstb L_SCR1(%a6)
beqs spos_pos
orw #0x8000,%d1
spos_pos:
movew %d1,FPTEMP(%a6) |result in FPTEMP
fmovel USER_FPCR(%a6),%FPCR
fmovex FPTEMP(%a6),%fp0 |write result to fp0
rts
ovfl:
tstb L_SCR1(%a6)
beqs sovl_pos
orw #0x8000,%d1
sovl_pos:
movew FPTEMP(%a6),ETEMP(%a6) |result in ETEMP
movel FPTEMP_HI(%a6),ETEMP_HI(%a6)
movel FPTEMP_LO(%a6),ETEMP_LO(%a6)
bra t_ovfl2
 
denorm:
tstb L_SCR1(%a6)
beqs den_pos
orw #0x8000,%d1
den_pos:
tstl FPTEMP_HI(%a6) |check j bit
blts nden_exit |if set, not denorm
movew %d1,ETEMP(%a6) |input expected in ETEMP
movel FPTEMP_HI(%a6),ETEMP_HI(%a6)
movel FPTEMP_LO(%a6),ETEMP_LO(%a6)
orl #unfl_bit,USER_FPSR(%a6) |set unfl
leal ETEMP(%a6),%a0
bra t_resdnrm
nden_exit:
movew %d1,FPTEMP(%a6) |result in FPTEMP
fmovel USER_FPCR(%a6),%FPCR
fmovex FPTEMP(%a6),%fp0 |write result to fp0
rts
 
|
| Source is negative. Add the src to the dest exponent.
| (The result exponent will be reduced). The result can be
| denormalized.
|
src_neg:
addl %d0,%d1 |add src to dest
beqs denorm |if zero, result is denorm
blts fix_dnrm |if negative, result is
| ;needing denormalization
tstb L_SCR1(%a6)
beqs sneg_pos
orw #0x8000,%d1
sneg_pos:
movew %d1,FPTEMP(%a6) |result in FPTEMP
fmovel USER_FPCR(%a6),%FPCR
fmovex FPTEMP(%a6),%fp0 |write result to fp0
rts
 
 
|
| The result exponent is below denorm value. Test for catastrophic
| underflow and force zero if true. If not, try to shift the
| mantissa right until a zero exponent exists.
|
fix_dnrm:
cmpiw #0xffc0,%d1 |lower bound for normalization
blt fix_unfl |if lower, catastrophic unfl
movew %d1,%d0 |use d0 for exp
movel %d2,-(%a7) |free d2 for norm
movel FPTEMP_HI(%a6),%d1
movel FPTEMP_LO(%a6),%d2
clrl L_SCR2(%a6)
fix_loop:
addw #1,%d0 |drive d0 to 0
lsrl #1,%d1 |while shifting the
roxrl #1,%d2 |mantissa to the right
bccs no_carry
st L_SCR2(%a6) |use L_SCR2 to capture inex
no_carry:
tstw %d0 |it is finished when
blts fix_loop |d0 is zero or the mantissa
tstb L_SCR2(%a6)
beqs tst_zero
orl #unfl_inx_mask,USER_FPSR(%a6)
| ;set unfl, aunfl, ainex
|
| Test for zero. If zero, simply use fmove to return +/- zero
| to the fpu.
|
tst_zero:
clrw FPTEMP_EX(%a6)
tstb L_SCR1(%a6) |test for sign
beqs tst_con
orw #0x8000,FPTEMP_EX(%a6) |set sign bit
tst_con:
movel %d1,FPTEMP_HI(%a6)
movel %d2,FPTEMP_LO(%a6)
movel (%a7)+,%d2
tstl %d1
bnes not_zero
tstl FPTEMP_LO(%a6)
bnes not_zero
|
| Result is zero. Check for rounding mode to set lsb. If the
| mode is rp, and the zero is positive, return smallest denorm.
| If the mode is rm, and the zero is negative, return smallest
| negative denorm.
|
btstb #5,FPCR_MODE(%a6) |test if rm or rp
beqs no_dir
btstb #4,FPCR_MODE(%a6) |check which one
beqs zer_rm
zer_rp:
tstb L_SCR1(%a6) |check sign
bnes no_dir |if set, neg op, no inc
movel #1,FPTEMP_LO(%a6) |set lsb
bras sm_dnrm
zer_rm:
tstb L_SCR1(%a6) |check sign
beqs no_dir |if clr, neg op, no inc
movel #1,FPTEMP_LO(%a6) |set lsb
orl #neg_mask,USER_FPSR(%a6) |set N
bras sm_dnrm
no_dir:
fmovel USER_FPCR(%a6),%FPCR
fmovex FPTEMP(%a6),%fp0 |use fmove to set cc's
rts
 
|
| The rounding mode changed the zero to a smallest denorm. Call
| t_resdnrm with exceptional operand in ETEMP.
|
sm_dnrm:
movel FPTEMP_EX(%a6),ETEMP_EX(%a6)
movel FPTEMP_HI(%a6),ETEMP_HI(%a6)
movel FPTEMP_LO(%a6),ETEMP_LO(%a6)
leal ETEMP(%a6),%a0
bra t_resdnrm
 
|
| Result is still denormalized.
|
not_zero:
orl #unfl_mask,USER_FPSR(%a6) |set unfl
tstb L_SCR1(%a6) |check for sign
beqs fix_exit
orl #neg_mask,USER_FPSR(%a6) |set N
fix_exit:
bras sm_dnrm
 
|
| The result has underflowed to zero. Return zero and set
| unfl, aunfl, and ainex.
|
fix_unfl:
orl #unfl_inx_mask,USER_FPSR(%a6)
btstb #5,FPCR_MODE(%a6) |test if rm or rp
beqs no_dir2
btstb #4,FPCR_MODE(%a6) |check which one
beqs zer_rm2
zer_rp2:
tstb L_SCR1(%a6) |check sign
bnes no_dir2 |if set, neg op, no inc
clrl FPTEMP_EX(%a6)
clrl FPTEMP_HI(%a6)
movel #1,FPTEMP_LO(%a6) |set lsb
bras sm_dnrm |return smallest denorm
zer_rm2:
tstb L_SCR1(%a6) |check sign
beqs no_dir2 |if clr, neg op, no inc
movew #0x8000,FPTEMP_EX(%a6)
clrl FPTEMP_HI(%a6)
movel #1,FPTEMP_LO(%a6) |set lsb
orl #neg_mask,USER_FPSR(%a6) |set N
bra sm_dnrm |return smallest denorm
 
no_dir2:
tstb L_SCR1(%a6)
bges pos_zero
neg_zero:
clrl FP_SCR1(%a6) |clear the exceptional operand
clrl FP_SCR1+4(%a6) |for gen_except.
clrl FP_SCR1+8(%a6)
fmoves #0x80000000,%fp0
rts
pos_zero:
clrl FP_SCR1(%a6) |clear the exceptional operand
clrl FP_SCR1+4(%a6) |for gen_except.
clrl FP_SCR1+8(%a6)
fmoves #0x00000000,%fp0
rts
 
|
| The destination is a denormalized number. It must be handled
| by first shifting the bits in the mantissa until it is normalized,
| then adding the remainder of the source to the exponent.
|
dst_dnrm:
moveml %d2/%d3,-(%a7)
movew FPTEMP_EX(%a6),%d1
movel FPTEMP_HI(%a6),%d2
movel FPTEMP_LO(%a6),%d3
dst_loop:
tstl %d2 |test for normalized result
blts dst_norm |exit loop if so
tstl %d0 |otherwise, test shift count
beqs dst_fin |if zero, shifting is done
subil #1,%d0 |dec src
lsll #1,%d3
roxll #1,%d2
bras dst_loop
|
| Destination became normalized. Simply add the remaining
| portion of the src to the exponent.
|
dst_norm:
addw %d0,%d1 |dst is normalized; add src
tstb L_SCR1(%a6)
beqs dnrm_pos
orl #0x8000,%d1
dnrm_pos:
movemw %d1,FPTEMP_EX(%a6)
moveml %d2,FPTEMP_HI(%a6)
moveml %d3,FPTEMP_LO(%a6)
fmovel USER_FPCR(%a6),%FPCR
fmovex FPTEMP(%a6),%fp0
moveml (%a7)+,%d2/%d3
rts
 
|
| Destination remained denormalized. Call t_excdnrm with
| exceptional operand in ETEMP.
|
dst_fin:
tstb L_SCR1(%a6) |check for sign
beqs dst_exit
orl #neg_mask,USER_FPSR(%a6) |set N
orl #0x8000,%d1
dst_exit:
movemw %d1,ETEMP_EX(%a6)
moveml %d2,ETEMP_HI(%a6)
moveml %d3,ETEMP_LO(%a6)
orl #unfl_mask,USER_FPSR(%a6) |set unfl
moveml (%a7)+,%d2/%d3
leal ETEMP(%a6),%a0
bra t_resdnrm
 
|
| Source is outside of 2^14 range. Test the sign and branch
| to the appropriate exception handler.
|
src_out:
tstb L_SCR1(%a6)
beqs scro_pos
orl #0x8000,%d1
scro_pos:
movel FPTEMP_HI(%a6),ETEMP_HI(%a6)
movel FPTEMP_LO(%a6),ETEMP_LO(%a6)
tstw ETEMP(%a6)
blts res_neg
res_pos:
movew %d1,ETEMP(%a6) |result in ETEMP
bra t_ovfl2
res_neg:
movew %d1,ETEMP(%a6) |result in ETEMP
leal ETEMP(%a6),%a0
bra t_unfl
|end
/x_unfl.S
0,0 → 1,269
|
| x_unfl.sa 3.4 7/1/91
|
| fpsp_unfl --- FPSP handler for underflow exception
|
| Trap disabled results
| For 881/2 compatibility, sw must denormalize the intermediate
| result, then store the result. Denormalization is accomplished
| by taking the intermediate result (which is always normalized) and
| shifting the mantissa right while incrementing the exponent until
| it is equal to the denormalized exponent for the destination
| format. After denormalization, the result is rounded to the
| destination format.
|
| Trap enabled results
| All trap disabled code applies. In addition the exceptional
| operand needs to made available to the user with a bias of $6000
| added to the exponent.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
X_UNFL: |idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
|xref denorm
|xref round
|xref store
|xref g_rndpr
|xref g_opcls
|xref g_dfmtou
|xref real_unfl
|xref real_inex
|xref fpsp_done
|xref b1238_fix
 
.global fpsp_unfl
fpsp_unfl:
link %a6,#-LOCAL_SIZE
fsave -(%a7)
moveml %d0-%d1/%a0-%a1,USER_DA(%a6)
fmovemx %fp0-%fp3,USER_FP0(%a6)
fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
 
|
bsrl unf_res |denormalize, round & store interm op
|
| If underflow exceptions are not enabled, check for inexact
| exception
|
btstb #unfl_bit,FPCR_ENABLE(%a6)
beqs ck_inex
 
btstb #E3,E_BYTE(%a6)
beqs no_e3_1
|
| Clear dirty bit on dest resister in the frame before branching
| to b1238_fix.
|
bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no
bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit
bsrl b1238_fix |test for bug1238 case
movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
orl #sx_mask,E_BYTE(%a6)
no_e3_1:
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
frestore (%a7)+
unlk %a6
bral real_unfl
|
| It is possible to have either inex2 or inex1 exceptions with the
| unfl. If the inex enable bit is set in the FPCR, and either
| inex2 or inex1 occurred, we must clean up and branch to the
| real inex handler.
|
ck_inex:
moveb FPCR_ENABLE(%a6),%d0
andb FPSR_EXCEPT(%a6),%d0
andib #0x3,%d0
beqs unfl_done
 
|
| Inexact enabled and reported, and we must take an inexact exception
|
take_inex:
btstb #E3,E_BYTE(%a6)
beqs no_e3_2
|
| Clear dirty bit on dest resister in the frame before branching
| to b1238_fix.
|
bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no
bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit
bsrl b1238_fix |test for bug1238 case
movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
orl #sx_mask,E_BYTE(%a6)
no_e3_2:
moveb #INEX_VEC,EXC_VEC+1(%a6)
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
frestore (%a7)+
unlk %a6
bral real_inex
 
unfl_done:
bclrb #E3,E_BYTE(%a6)
beqs e1_set |if set then branch
|
| Clear dirty bit on dest resister in the frame before branching
| to b1238_fix.
|
bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no
bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit
bsrl b1238_fix |test for bug1238 case
movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
orl #sx_mask,E_BYTE(%a6)
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
frestore (%a7)+
unlk %a6
bral fpsp_done
e1_set:
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
unlk %a6
bral fpsp_done
|
| unf_res --- underflow result calculation
|
unf_res:
bsrl g_rndpr |returns RND_PREC in d0 0=ext,
| ;1=sgl, 2=dbl
| ;we need the RND_PREC in the
| ;upper word for round
movew #0,-(%a7)
movew %d0,-(%a7) |copy RND_PREC to stack
|
|
| If the exception bit set is E3, the exceptional operand from the
| fpu is in WBTEMP; else it is in FPTEMP.
|
btstb #E3,E_BYTE(%a6)
beqs unf_E1
unf_E3:
lea WBTEMP(%a6),%a0 |a0 now points to operand
|
| Test for fsgldiv and fsglmul. If the inst was one of these, then
| force the precision to extended for the denorm routine. Use
| the user's precision for the round routine.
|
movew CMDREG3B(%a6),%d1 |check for fsgldiv or fsglmul
andiw #0x7f,%d1
cmpiw #0x30,%d1 |check for sgldiv
beqs unf_sgl
cmpiw #0x33,%d1 |check for sglmul
bnes unf_cont |if not, use fpcr prec in round
unf_sgl:
clrl %d0
movew #0x1,(%a7) |override g_rndpr precision
| ;force single
bras unf_cont
unf_E1:
lea FPTEMP(%a6),%a0 |a0 now points to operand
unf_cont:
bclrb #sign_bit,LOCAL_EX(%a0) |clear sign bit
sne LOCAL_SGN(%a0) |store sign
 
bsrl denorm |returns denorm, a0 points to it
|
| WARNING:
| ;d0 has guard,round sticky bit
| ;make sure that it is not corrupted
| ;before it reaches the round subroutine
| ;also ensure that a0 isn't corrupted
 
|
| Set up d1 for round subroutine d1 contains the PREC/MODE
| information respectively on upper/lower register halves.
|
bfextu FPCR_MODE(%a6){#2:#2},%d1 |get mode from FPCR
| ;mode in lower d1
addl (%a7)+,%d1 |merge PREC/MODE
|
| WARNING: a0 and d0 are assumed to be intact between the denorm and
| round subroutines. All code between these two subroutines
| must not corrupt a0 and d0.
|
|
| Perform Round
| Input: a0 points to input operand
| d0{31:29} has guard, round, sticky
| d1{01:00} has rounding mode
| d1{17:16} has rounding precision
| Output: a0 points to rounded operand
|
 
bsrl round |returns rounded denorm at (a0)
|
| Differentiate between store to memory vs. store to register
|
unf_store:
bsrl g_opcls |returns opclass in d0{2:0}
cmpib #0x3,%d0
bnes not_opc011
|
| At this point, a store to memory is pending
|
opc011:
bsrl g_dfmtou
tstb %d0
beqs ext_opc011 |If extended, do not subtract
| ;If destination format is sgl/dbl,
tstb LOCAL_HI(%a0) |If rounded result is normal,don't
| ;subtract
bmis ext_opc011
subqw #1,LOCAL_EX(%a0) |account for denorm bias vs.
| ;normalized bias
| ; normalized denormalized
| ;single $7f $7e
| ;double $3ff $3fe
|
ext_opc011:
bsrl store |stores to memory
bras unf_done |finish up
 
|
| At this point, a store to a float register is pending
|
not_opc011:
bsrl store |stores to float register
| ;a0 is not corrupted on a store to a
| ;float register.
|
| Set the condition codes according to result
|
tstl LOCAL_HI(%a0) |check upper mantissa
bnes ck_sgn
tstl LOCAL_LO(%a0) |check lower mantissa
bnes ck_sgn
bsetb #z_bit,FPSR_CC(%a6) |set condition codes if zero
ck_sgn:
btstb #sign_bit,LOCAL_EX(%a0) |check the sign bit
beqs unf_done
bsetb #neg_bit,FPSR_CC(%a6)
 
|
| Finish.
|
unf_done:
btstb #inex2_bit,FPSR_EXCEPT(%a6)
beqs no_aunfl
bsetb #aunfl_bit,FPSR_AEXCEPT(%a6)
no_aunfl:
rts
 
|end
/README
0,0 → 1,30
 
MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
M68000 Hi-Performance Microprocessor Division
M68040 Software Package
 
M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
All rights reserved.
 
THE SOFTWARE is provided on an "AS IS" basis and without warranty.
To the maximum extent permitted by applicable law,
MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
PARTICULAR PURPOSE and any warranty against infringement with
regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
and any accompanying written materials.
 
To the maximum extent permitted by applicable law,
IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
SOFTWARE. Motorola assumes no responsibility for the maintenance
and support of the SOFTWARE.
 
You are hereby granted a copyright license to use, modify, and
distribute the SOFTWARE so long as this entire notice is retained
without alteration in any modified and/or redistributed versions,
and that such modified versions are clearly identified as such.
No licenses are granted by implication, estoppel or otherwise
under any patents or trademarks of Motorola, Inc.
/fpsp.h
0,0 → 1,348
|
| fpsp.h 3.3 3.3
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
| fpsp.h --- stack frame offsets during FPSP exception handling
|
| These equates are used to access the exception frame, the fsave
| frame and any local variables needed by the FPSP package.
|
| All FPSP handlers begin by executing:
|
| link a6,#-LOCAL_SIZE
| fsave -(a7)
| movem.l d0-d1/a0-a1,USER_DA(a6)
| fmovem.x fp0-fp3,USER_FP0(a6)
| fmove.l fpsr/fpcr/fpiar,USER_FPSR(a6)
|
| After initialization, the stack looks like this:
|
| A7 ---> +-------------------------------+
| | |
| | FPU fsave area |
| | |
| +-------------------------------+
| | |
| | FPSP Local Variables |
| | including |
| | saved registers |
| | |
| +-------------------------------+
| A6 ---> | Saved A6 |
| +-------------------------------+
| | |
| | Exception Frame |
| | |
| | |
|
| Positive offsets from A6 refer to the exception frame. Negative
| offsets refer to the Local Variable area and the fsave area.
| The fsave frame is also accessible from the top via A7.
|
| On exit, the handlers execute:
|
| movem.l USER_DA(a6),d0-d1/a0-a1
| fmovem.x USER_FP0(a6),fp0-fp3
| fmove.l USER_FPSR(a6),fpsr/fpcr/fpiar
| frestore (a7)+
| unlk a6
|
| and then either "bra fpsp_done" if the exception was completely
| handled by the package, or "bra real_xxxx" which is an external
| label to a routine that will process a real exception of the
| type that was generated. Some handlers may omit the "frestore"
| if the FPU state after the exception is idle.
|
| Sometimes the exception handler will transform the fsave area
| because it needs to report an exception back to the user. This
| can happen if the package is entered for an unimplemented float
| instruction that generates (say) an underflow. Alternatively,
| a second fsave frame can be pushed onto the stack and the
| handler exit code will reload the new frame and discard the old.
|
| The registers d0, d1, a0, a1 and fp0-fp3 are always saved and
| restored from the "local variable" area and can be used as
| temporaries. If a routine needs to change any
| of these registers, it should modify the saved copy and let
| the handler exit code restore the value.
|
|----------------------------------------------------------------------
|
| Local Variables on the stack
|
.set LOCAL_SIZE,192 | bytes needed for local variables
.set LV,-LOCAL_SIZE | convenient base value
|
.set USER_DA,LV+0 | save space for D0-D1,A0-A1
.set USER_D0,LV+0 | saved user D0
.set USER_D1,LV+4 | saved user D1
.set USER_A0,LV+8 | saved user A0
.set USER_A1,LV+12 | saved user A1
.set USER_FP0,LV+16 | saved user FP0
.set USER_FP1,LV+28 | saved user FP1
.set USER_FP2,LV+40 | saved user FP2
.set USER_FP3,LV+52 | saved user FP3
.set USER_FPCR,LV+64 | saved user FPCR
.set FPCR_ENABLE,USER_FPCR+2 | FPCR exception enable
.set FPCR_MODE,USER_FPCR+3 | FPCR rounding mode control
.set USER_FPSR,LV+68 | saved user FPSR
.set FPSR_CC,USER_FPSR+0 | FPSR condition code
.set FPSR_QBYTE,USER_FPSR+1 | FPSR quotient
.set FPSR_EXCEPT,USER_FPSR+2 | FPSR exception
.set FPSR_AEXCEPT,USER_FPSR+3 | FPSR accrued exception
.set USER_FPIAR,LV+72 | saved user FPIAR
.set FP_SCR1,LV+76 | room for a temporary float value
.set FP_SCR2,LV+92 | room for a temporary float value
.set L_SCR1,LV+108 | room for a temporary long value
.set L_SCR2,LV+112 | room for a temporary long value
.set STORE_FLG,LV+116
.set BINDEC_FLG,LV+117 | used in bindec
.set DNRM_FLG,LV+118 | used in res_func
.set RES_FLG,LV+119 | used in res_func
.set DY_MO_FLG,LV+120 | dyadic/monadic flag
.set UFLG_TMP,LV+121 | temporary for uflag errata
.set CU_ONLY,LV+122 | cu-only flag
.set VER_TMP,LV+123 | temp holding for version number
.set L_SCR3,LV+124 | room for a temporary long value
.set FP_SCR3,LV+128 | room for a temporary float value
.set FP_SCR4,LV+144 | room for a temporary float value
.set FP_SCR5,LV+160 | room for a temporary float value
.set FP_SCR6,LV+176
|
|NEXT equ LV+192 ;need to increase LOCAL_SIZE
|
|--------------------------------------------------------------------------
|
| fsave offsets and bit definitions
|
| Offsets are defined from the end of an fsave because the last 10
| words of a busy frame are the same as the unimplemented frame.
|
.set CU_SAVEPC,LV-92 | micro-pc for CU (1 byte)
.set FPR_DIRTY_BITS,LV-91 | fpr dirty bits
|
.set WBTEMP,LV-76 | write back temp (12 bytes)
.set WBTEMP_EX,WBTEMP | wbtemp sign and exponent (2 bytes)
.set WBTEMP_HI,WBTEMP+4 | wbtemp mantissa [63:32] (4 bytes)
.set WBTEMP_LO,WBTEMP+8 | wbtemp mantissa [31:00] (4 bytes)
|
.set WBTEMP_SGN,WBTEMP+2 | used to store sign
|
.set FPSR_SHADOW,LV-64 | fpsr shadow reg
|
.set FPIARCU,LV-60 | Instr. addr. reg. for CU (4 bytes)
|
.set CMDREG2B,LV-52 | cmd reg for machine 2
.set CMDREG3B,LV-48 | cmd reg for E3 exceptions (2 bytes)
|
.set NMNEXC,LV-44 | NMNEXC (unsup,snan bits only)
.set nmn_unsup_bit,1 |
.set nmn_snan_bit,0 |
|
.set NMCEXC,LV-43 | NMNEXC & NMCEXC
.set nmn_operr_bit,7
.set nmn_ovfl_bit,6
.set nmn_unfl_bit,5
.set nmc_unsup_bit,4
.set nmc_snan_bit,3
.set nmc_operr_bit,2
.set nmc_ovfl_bit,1
.set nmc_unfl_bit,0
|
.set STAG,LV-40 | source tag (1 byte)
.set WBTEMP_GRS,LV-40 | alias wbtemp guard, round, sticky
.set guard_bit,1 | guard bit is bit number 1
.set round_bit,0 | round bit is bit number 0
.set stag_mask,0xE0 | upper 3 bits are source tag type
.set denorm_bit,7 | bit determines if denorm or unnorm
.set etemp15_bit,4 | etemp exponent bit #15
.set wbtemp66_bit,2 | wbtemp mantissa bit #66
.set wbtemp1_bit,1 | wbtemp mantissa bit #1
.set wbtemp0_bit,0 | wbtemp mantissa bit #0
|
.set STICKY,LV-39 | holds sticky bit
.set sticky_bit,7
|
.set CMDREG1B,LV-36 | cmd reg for E1 exceptions (2 bytes)
.set kfact_bit,12 | distinguishes static/dynamic k-factor
| ;on packed move outs. NOTE: this
| ;equate only works when CMDREG1B is in
| ;a register.
|
.set CMDWORD,LV-35 | command word in cmd1b
.set direction_bit,5 | bit 0 in opclass
.set size_bit2,12 | bit 2 in size field
|
.set DTAG,LV-32 | dest tag (1 byte)
.set dtag_mask,0xE0 | upper 3 bits are dest type tag
.set fptemp15_bit,4 | fptemp exponent bit #15
|
.set WB_BYTE,LV-31 | holds WBTE15 bit (1 byte)
.set wbtemp15_bit,4 | wbtemp exponent bit #15
|
.set E_BYTE,LV-28 | holds E1 and E3 bits (1 byte)
.set E1,2 | which bit is E1 flag
.set E3,1 | which bit is E3 flag
.set SFLAG,0 | which bit is S flag
|
.set T_BYTE,LV-27 | holds T and U bits (1 byte)
.set XFLAG,7 | which bit is X flag
.set UFLAG,5 | which bit is U flag
.set TFLAG,4 | which bit is T flag
|
.set FPTEMP,LV-24 | fptemp (12 bytes)
.set FPTEMP_EX,FPTEMP | fptemp sign and exponent (2 bytes)
.set FPTEMP_HI,FPTEMP+4 | fptemp mantissa [63:32] (4 bytes)
.set FPTEMP_LO,FPTEMP+8 | fptemp mantissa [31:00] (4 bytes)
|
.set FPTEMP_SGN,FPTEMP+2 | used to store sign
|
.set ETEMP,LV-12 | etemp (12 bytes)
.set ETEMP_EX,ETEMP | etemp sign and exponent (2 bytes)
.set ETEMP_HI,ETEMP+4 | etemp mantissa [63:32] (4 bytes)
.set ETEMP_LO,ETEMP+8 | etemp mantissa [31:00] (4 bytes)
|
.set ETEMP_SGN,ETEMP+2 | used to store sign
|
.set EXC_SR,4 | exception frame status register
.set EXC_PC,6 | exception frame program counter
.set EXC_VEC,10 | exception frame vector (format+vector#)
.set EXC_EA,12 | exception frame effective address
|
|--------------------------------------------------------------------------
|
| FPSR/FPCR bits
|
.set neg_bit,3 | negative result
.set z_bit,2 | zero result
.set inf_bit,1 | infinity result
.set nan_bit,0 | not-a-number result
|
.set q_sn_bit,7 | sign bit of quotient byte
|
.set bsun_bit,7 | branch on unordered
.set snan_bit,6 | signalling nan
.set operr_bit,5 | operand error
.set ovfl_bit,4 | overflow
.set unfl_bit,3 | underflow
.set dz_bit,2 | divide by zero
.set inex2_bit,1 | inexact result 2
.set inex1_bit,0 | inexact result 1
|
.set aiop_bit,7 | accrued illegal operation
.set aovfl_bit,6 | accrued overflow
.set aunfl_bit,5 | accrued underflow
.set adz_bit,4 | accrued divide by zero
.set ainex_bit,3 | accrued inexact
|
| FPSR individual bit masks
|
.set neg_mask,0x08000000
.set z_mask,0x04000000
.set inf_mask,0x02000000
.set nan_mask,0x01000000
|
.set bsun_mask,0x00008000 |
.set snan_mask,0x00004000
.set operr_mask,0x00002000
.set ovfl_mask,0x00001000
.set unfl_mask,0x00000800
.set dz_mask,0x00000400
.set inex2_mask,0x00000200
.set inex1_mask,0x00000100
|
.set aiop_mask,0x00000080 | accrued illegal operation
.set aovfl_mask,0x00000040 | accrued overflow
.set aunfl_mask,0x00000020 | accrued underflow
.set adz_mask,0x00000010 | accrued divide by zero
.set ainex_mask,0x00000008 | accrued inexact
|
| FPSR combinations used in the FPSP
|
.set dzinf_mask,inf_mask+dz_mask+adz_mask
.set opnan_mask,nan_mask+operr_mask+aiop_mask
.set nzi_mask,0x01ffffff | clears N, Z, and I
.set unfinx_mask,unfl_mask+inex2_mask+aunfl_mask+ainex_mask
.set unf2inx_mask,unfl_mask+inex2_mask+ainex_mask
.set ovfinx_mask,ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
.set inx1a_mask,inex1_mask+ainex_mask
.set inx2a_mask,inex2_mask+ainex_mask
.set snaniop_mask,nan_mask+snan_mask+aiop_mask
.set naniop_mask,nan_mask+aiop_mask
.set neginf_mask,neg_mask+inf_mask
.set infaiop_mask,inf_mask+aiop_mask
.set negz_mask,neg_mask+z_mask
.set opaop_mask,operr_mask+aiop_mask
.set unfl_inx_mask,unfl_mask+aunfl_mask+ainex_mask
.set ovfl_inx_mask,ovfl_mask+aovfl_mask+ainex_mask
|
|--------------------------------------------------------------------------
|
| FPCR rounding modes
|
.set x_mode,0x00 | round to extended
.set s_mode,0x40 | round to single
.set d_mode,0x80 | round to double
|
.set rn_mode,0x00 | round nearest
.set rz_mode,0x10 | round to zero
.set rm_mode,0x20 | round to minus infinity
.set rp_mode,0x30 | round to plus infinity
|
|--------------------------------------------------------------------------
|
| Miscellaneous equates
|
.set signan_bit,6 | signalling nan bit in mantissa
.set sign_bit,7
|
.set rnd_stky_bit,29 | round/sticky bit of mantissa
| this can only be used if in a data register
.set sx_mask,0x01800000 | set s and x bits in word $48
|
.set LOCAL_EX,0
.set LOCAL_SGN,2
.set LOCAL_HI,4
.set LOCAL_LO,8
.set LOCAL_GRS,12 | valid ONLY for FP_SCR1, FP_SCR2
|
|
.set norm_tag,0x00 | tag bits in {7:5} position
.set zero_tag,0x20
.set inf_tag,0x40
.set nan_tag,0x60
.set dnrm_tag,0x80
|
| fsave sizes and formats
|
.set VER_4,0x40 | fpsp compatible version numbers
| are in the $40s {$40-$4f}
.set VER_40,0x40 | original version number
.set VER_41,0x41 | revision version number
|
.set BUSY_SIZE,100 | size of busy frame
.set BUSY_FRAME,LV-BUSY_SIZE | start of busy frame
|
.set UNIMP_40_SIZE,44 | size of orig unimp frame
.set UNIMP_41_SIZE,52 | size of rev unimp frame
|
.set IDLE_SIZE,4 | size of idle frame
.set IDLE_FRAME,LV-IDLE_SIZE | start of idle frame
|
| exception vectors
|
.set TRACE_VEC,0x2024 | trace trap
.set FLINE_VEC,0x002C | real F-line
.set UNIMP_VEC,0x202C | unimplemented
.set INEX_VEC,0x00C4
|
.set dbl_thresh,0x3C01
.set sgl_thresh,0x3F81
|
/x_ovfl.S
0,0 → 1,186
|
| x_ovfl.sa 3.5 7/1/91
|
| fpsp_ovfl --- FPSP handler for overflow exception
|
| Overflow occurs when a floating-point intermediate result is
| too large to be represented in a floating-point data register,
| or when storing to memory, the contents of a floating-point
| data register are too large to be represented in the
| destination format.
|
| Trap disabled results
|
| If the instruction is move_out, then garbage is stored in the
| destination. If the instruction is not move_out, then the
| destination is not affected. For 68881 compatibility, the
| following values should be stored at the destination, based
| on the current rounding mode:
|
| RN Infinity with the sign of the intermediate result.
| RZ Largest magnitude number, with the sign of the
| intermediate result.
| RM For pos overflow, the largest pos number. For neg overflow,
| -infinity
| RP For pos overflow, +infinity. For neg overflow, the largest
| neg number
|
| Trap enabled results
| All trap disabled code applies. In addition the exceptional
| operand needs to be made available to the users exception handler
| with a bias of $6000 subtracted from the exponent.
|
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
X_OVFL: |idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
|xref ovf_r_x2
|xref ovf_r_x3
|xref store
|xref real_ovfl
|xref real_inex
|xref fpsp_done
|xref g_opcls
|xref b1238_fix
 
.global fpsp_ovfl
fpsp_ovfl:
link %a6,#-LOCAL_SIZE
fsave -(%a7)
moveml %d0-%d1/%a0-%a1,USER_DA(%a6)
fmovemx %fp0-%fp3,USER_FP0(%a6)
fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
 
|
| The 040 doesn't set the AINEX bit in the FPSR, the following
| line temporarily rectifies this error.
|
bsetb #ainex_bit,FPSR_AEXCEPT(%a6)
|
bsrl ovf_adj |denormalize, round & store interm op
|
| if overflow traps not enabled check for inexact exception
|
btstb #ovfl_bit,FPCR_ENABLE(%a6)
beqs ck_inex
|
btstb #E3,E_BYTE(%a6)
beqs no_e3_1
bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no
bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit
bsrl b1238_fix
movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
orl #sx_mask,E_BYTE(%a6)
no_e3_1:
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
frestore (%a7)+
unlk %a6
bral real_ovfl
|
| It is possible to have either inex2 or inex1 exceptions with the
| ovfl. If the inex enable bit is set in the FPCR, and either
| inex2 or inex1 occurred, we must clean up and branch to the
| real inex handler.
|
ck_inex:
| move.b FPCR_ENABLE(%a6),%d0
| and.b FPSR_EXCEPT(%a6),%d0
| andi.b #$3,%d0
btstb #inex2_bit,FPCR_ENABLE(%a6)
beqs ovfl_exit
|
| Inexact enabled and reported, and we must take an inexact exception.
|
take_inex:
btstb #E3,E_BYTE(%a6)
beqs no_e3_2
bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no
bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit
bsrl b1238_fix
movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
orl #sx_mask,E_BYTE(%a6)
no_e3_2:
moveb #INEX_VEC,EXC_VEC+1(%a6)
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
frestore (%a7)+
unlk %a6
bral real_inex
ovfl_exit:
bclrb #E3,E_BYTE(%a6) |test and clear E3 bit
beqs e1_set
|
| Clear dirty bit on dest resister in the frame before branching
| to b1238_fix.
|
bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no
bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit
bsrl b1238_fix |test for bug1238 case
 
movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
orl #sx_mask,E_BYTE(%a6)
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
frestore (%a7)+
unlk %a6
bral fpsp_done
e1_set:
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
unlk %a6
bral fpsp_done
 
|
| ovf_adj
|
ovf_adj:
|
| Have a0 point to the correct operand.
|
btstb #E3,E_BYTE(%a6) |test E3 bit
beqs ovf_e1
 
lea WBTEMP(%a6),%a0
bras ovf_com
ovf_e1:
lea ETEMP(%a6),%a0
 
ovf_com:
bclrb #sign_bit,LOCAL_EX(%a0)
sne LOCAL_SGN(%a0)
 
bsrl g_opcls |returns opclass in d0
cmpiw #3,%d0 |check for opclass3
bnes not_opc011
 
|
| FPSR_CC is saved and restored because ovf_r_x3 affects it. The
| CCs are defined to be 'not affected' for the opclass3 instruction.
|
moveb FPSR_CC(%a6),L_SCR1(%a6)
bsrl ovf_r_x3 |returns a0 pointing to result
moveb L_SCR1(%a6),FPSR_CC(%a6)
bral store |stores to memory or register
not_opc011:
bsrl ovf_r_x2 |returns a0 pointing to result
bral store |stores to memory or register
 
|end
/x_bsun.S
0,0 → 1,47
|
| x_bsun.sa 3.3 7/1/91
|
| fpsp_bsun --- FPSP handler for branch/set on unordered exception
|
| Copy the PC to FPIAR to maintain 881/882 compatibility
|
| The real_bsun handler will need to perform further corrective
| measures as outlined in the 040 User's Manual on pages
| 9-41f, section 9.8.3.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
X_BSUN: |idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
|xref real_bsun
 
.global fpsp_bsun
fpsp_bsun:
|
link %a6,#-LOCAL_SIZE
fsave -(%a7)
moveml %d0-%d1/%a0-%a1,USER_DA(%a6)
fmovemx %fp0-%fp3,USER_FP0(%a6)
fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
 
|
movel EXC_PC(%a6),USER_FPIAR(%a6)
|
moveml USER_DA(%a6),%d0-%d1/%a0-%a1
fmovemx USER_FP0(%a6),%fp0-%fp3
fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
frestore (%a7)+
unlk %a6
bral real_bsun
|
|end
/setox.S
0,0 → 1,865
|
| setox.sa 3.1 12/10/90
|
| The entry point setox computes the exponential of a value.
| setoxd does the same except the input value is a denormalized
| number. setoxm1 computes exp(X)-1, and setoxm1d computes
| exp(X)-1 for denormalized X.
|
| INPUT
| -----
| Double-extended value in memory location pointed to by address
| register a0.
|
| OUTPUT
| ------
| exp(X) or exp(X)-1 returned in floating-point register fp0.
|
| ACCURACY and MONOTONICITY
| -------------------------
| The returned result is within 0.85 ulps in 64 significant bit, i.e.
| within 0.5001 ulp to 53 bits if the result is subsequently rounded
| to double precision. The result is provably monotonic in double
| precision.
|
| SPEED
| -----
| Two timings are measured, both in the copy-back mode. The
| first one is measured when the function is invoked the first time
| (so the instructions and data are not in cache), and the
| second one is measured when the function is reinvoked at the same
| input argument.
|
| The program setox takes approximately 210/190 cycles for input
| argument X whose magnitude is less than 16380 log2, which
| is the usual situation. For the less common arguments,
| depending on their values, the program may run faster or slower --
| but no worse than 10% slower even in the extreme cases.
|
| The program setoxm1 takes approximately ???/??? cycles for input
| argument X, 0.25 <= |X| < 70log2. For |X| < 0.25, it takes
| approximately ???/??? cycles. For the less common arguments,
| depending on their values, the program may run faster or slower --
| but no worse than 10% slower even in the extreme cases.
|
| ALGORITHM and IMPLEMENTATION NOTES
| ----------------------------------
|
| setoxd
| ------
| Step 1. Set ans := 1.0
|
| Step 2. Return ans := ans + sign(X)*2^(-126). Exit.
| Notes: This will always generate one exception -- inexact.
|
|
| setox
| -----
|
| Step 1. Filter out extreme cases of input argument.
| 1.1 If |X| >= 2^(-65), go to Step 1.3.
| 1.2 Go to Step 7.
| 1.3 If |X| < 16380 log(2), go to Step 2.
| 1.4 Go to Step 8.
| Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.
| To avoid the use of floating-point comparisons, a
| compact representation of |X| is used. This format is a
| 32-bit integer, the upper (more significant) 16 bits are
| the sign and biased exponent field of |X|; the lower 16
| bits are the 16 most significant fraction (including the
| explicit bit) bits of |X|. Consequently, the comparisons
| in Steps 1.1 and 1.3 can be performed by integer comparison.
| Note also that the constant 16380 log(2) used in Step 1.3
| is also in the compact form. Thus taking the branch
| to Step 2 guarantees |X| < 16380 log(2). There is no harm
| to have a small number of cases where |X| is less than,
| but close to, 16380 log(2) and the branch to Step 9 is
| taken.
|
| Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ).
| 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 was taken)
| 2.2 N := round-to-nearest-integer( X * 64/log2 ).
| 2.3 Calculate J = N mod 64; so J = 0,1,2,..., or 63.
| 2.4 Calculate M = (N - J)/64; so N = 64M + J.
| 2.5 Calculate the address of the stored value of 2^(J/64).
| 2.6 Create the value Scale = 2^M.
| Notes: The calculation in 2.2 is really performed by
|
| Z := X * constant
| N := round-to-nearest-integer(Z)
|
| where
|
| constant := single-precision( 64/log 2 ).
|
| Using a single-precision constant avoids memory access.
| Another effect of using a single-precision "constant" is
| that the calculated value Z is
|
| Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24).
|
| This error has to be considered later in Steps 3 and 4.
|
| Step 3. Calculate X - N*log2/64.
| 3.1 R := X + N*L1, where L1 := single-precision(-log2/64).
| 3.2 R := R + N*L2, L2 := extended-precision(-log2/64 - L1).
| Notes: a) The way L1 and L2 are chosen ensures L1+L2 approximate
| the value -log2/64 to 88 bits of accuracy.
| b) N*L1 is exact because N is no longer than 22 bits and
| L1 is no longer than 24 bits.
| c) The calculation X+N*L1 is also exact due to cancellation.
| Thus, R is practically X+N(L1+L2) to full 64 bits.
| d) It is important to estimate how large can |R| be after
| Step 3.2.
|
| N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24)
| X*64/log2 (1+eps) = N + f, |f| <= 0.5
| X*64/log2 - N = f - eps*X 64/log2
| X - N*log2/64 = f*log2/64 - eps*X
|
|
| Now |X| <= 16446 log2, thus
|
| |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64
| <= 0.57 log2/64.
| This bound will be used in Step 4.
|
| Step 4. Approximate exp(R)-1 by a polynomial
| p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
| Notes: a) In order to reduce memory access, the coefficients are
| made as "short" as possible: A1 (which is 1/2), A4 and A5
| are single precision; A2 and A3 are double precision.
| b) Even with the restrictions above,
| |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062.
| Note that 0.0062 is slightly bigger than 0.57 log2/64.
| c) To fully utilize the pipeline, p is separated into
| two independent pieces of roughly equal complexities
| p = [ R + R*S*(A2 + S*A4) ] +
| [ S*(A1 + S*(A3 + S*A5)) ]
| where S = R*R.
|
| Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by
| ans := T + ( T*p + t)
| where T and t are the stored values for 2^(J/64).
| Notes: 2^(J/64) is stored as T and t where T+t approximates
| 2^(J/64) to roughly 85 bits; T is in extended precision
| and t is in single precision. Note also that T is rounded
| to 62 bits so that the last two bits of T are zero. The
| reason for such a special form is that T-1, T-2, and T-8
| will all be exact --- a property that will give much
| more accurate computation of the function EXPM1.
|
| Step 6. Reconstruction of exp(X)
| exp(X) = 2^M * 2^(J/64) * exp(R).
| 6.1 If AdjFlag = 0, go to 6.3
| 6.2 ans := ans * AdjScale
| 6.3 Restore the user FPCR
| 6.4 Return ans := ans * Scale. Exit.
| Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R,
| |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will
| neither overflow nor underflow. If AdjFlag = 1, that
| means that
| X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380.
| Hence, exp(X) may overflow or underflow or neither.
| When that is the case, AdjScale = 2^(M1) where M1 is
| approximately M. Thus 6.2 will never cause over/underflow.
| Possible exception in 6.4 is overflow or underflow.
| The inexact exception is not generated in 6.4. Although
| one can argue that the inexact flag should always be
| raised, to simulate that exception cost to much than the
| flag is worth in practical uses.
|
| Step 7. Return 1 + X.
| 7.1 ans := X
| 7.2 Restore user FPCR.
| 7.3 Return ans := 1 + ans. Exit
| Notes: For non-zero X, the inexact exception will always be
| raised by 7.3. That is the only exception raised by 7.3.
| Note also that we use the FMOVEM instruction to move X
| in Step 7.1 to avoid unnecessary trapping. (Although
| the FMOVEM may not seem relevant since X is normalized,
| the precaution will be useful in the library version of
| this code where the separate entry for denormalized inputs
| will be done away with.)
|
| Step 8. Handle exp(X) where |X| >= 16380log2.
| 8.1 If |X| > 16480 log2, go to Step 9.
| (mimic 2.2 - 2.6)
| 8.2 N := round-to-integer( X * 64/log2 )
| 8.3 Calculate J = N mod 64, J = 0,1,...,63
| 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, AdjFlag := 1.
| 8.5 Calculate the address of the stored value 2^(J/64).
| 8.6 Create the values Scale = 2^M, AdjScale = 2^M1.
| 8.7 Go to Step 3.
| Notes: Refer to notes for 2.2 - 2.6.
|
| Step 9. Handle exp(X), |X| > 16480 log2.
| 9.1 If X < 0, go to 9.3
| 9.2 ans := Huge, go to 9.4
| 9.3 ans := Tiny.
| 9.4 Restore user FPCR.
| 9.5 Return ans := ans * ans. Exit.
| Notes: Exp(X) will surely overflow or underflow, depending on
| X's sign. "Huge" and "Tiny" are respectively large/tiny
| extended-precision numbers whose square over/underflow
| with an inexact result. Thus, 9.5 always raises the
| inexact together with either overflow or underflow.
|
|
| setoxm1d
| --------
|
| Step 1. Set ans := 0
|
| Step 2. Return ans := X + ans. Exit.
| Notes: This will return X with the appropriate rounding
| precision prescribed by the user FPCR.
|
| setoxm1
| -------
|
| Step 1. Check |X|
| 1.1 If |X| >= 1/4, go to Step 1.3.
| 1.2 Go to Step 7.
| 1.3 If |X| < 70 log(2), go to Step 2.
| 1.4 Go to Step 10.
| Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.
| However, it is conceivable |X| can be small very often
| because EXPM1 is intended to evaluate exp(X)-1 accurately
| when |X| is small. For further details on the comparisons,
| see the notes on Step 1 of setox.
|
| Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ).
| 2.1 N := round-to-nearest-integer( X * 64/log2 ).
| 2.2 Calculate J = N mod 64; so J = 0,1,2,..., or 63.
| 2.3 Calculate M = (N - J)/64; so N = 64M + J.
| 2.4 Calculate the address of the stored value of 2^(J/64).
| 2.5 Create the values Sc = 2^M and OnebySc := -2^(-M).
| Notes: See the notes on Step 2 of setox.
|
| Step 3. Calculate X - N*log2/64.
| 3.1 R := X + N*L1, where L1 := single-precision(-log2/64).
| 3.2 R := R + N*L2, L2 := extended-precision(-log2/64 - L1).
| Notes: Applying the analysis of Step 3 of setox in this case
| shows that |R| <= 0.0055 (note that |X| <= 70 log2 in
| this case).
|
| Step 4. Approximate exp(R)-1 by a polynomial
| p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6)))))
| Notes: a) In order to reduce memory access, the coefficients are
| made as "short" as possible: A1 (which is 1/2), A5 and A6
| are single precision; A2, A3 and A4 are double precision.
| b) Even with the restriction above,
| |p - (exp(R)-1)| < |R| * 2^(-72.7)
| for all |R| <= 0.0055.
| c) To fully utilize the pipeline, p is separated into
| two independent pieces of roughly equal complexity
| p = [ R*S*(A2 + S*(A4 + S*A6)) ] +
| [ R + S*(A1 + S*(A3 + S*A5)) ]
| where S = R*R.
|
| Step 5. Compute 2^(J/64)*p by
| p := T*p
| where T and t are the stored values for 2^(J/64).
| Notes: 2^(J/64) is stored as T and t where T+t approximates
| 2^(J/64) to roughly 85 bits; T is in extended precision
| and t is in single precision. Note also that T is rounded
| to 62 bits so that the last two bits of T are zero. The
| reason for such a special form is that T-1, T-2, and T-8
| will all be exact --- a property that will be exploited
| in Step 6 below. The total relative error in p is no
| bigger than 2^(-67.7) compared to the final result.
|
| Step 6. Reconstruction of exp(X)-1
| exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ).
| 6.1 If M <= 63, go to Step 6.3.
| 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6
| 6.3 If M >= -3, go to 6.5.
| 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6
| 6.5 ans := (T + OnebySc) + (p + t).
| 6.6 Restore user FPCR.
| 6.7 Return ans := Sc * ans. Exit.
| Notes: The various arrangements of the expressions give accurate
| evaluations.
|
| Step 7. exp(X)-1 for |X| < 1/4.
| 7.1 If |X| >= 2^(-65), go to Step 9.
| 7.2 Go to Step 8.
|
| Step 8. Calculate exp(X)-1, |X| < 2^(-65).
| 8.1 If |X| < 2^(-16312), goto 8.3
| 8.2 Restore FPCR; return ans := X - 2^(-16382). Exit.
| 8.3 X := X * 2^(140).
| 8.4 Restore FPCR; ans := ans - 2^(-16382).
| Return ans := ans*2^(140). Exit
| Notes: The idea is to return "X - tiny" under the user
| precision and rounding modes. To avoid unnecessary
| inefficiency, we stay away from denormalized numbers the
| best we can. For |X| >= 2^(-16312), the straightforward
| 8.2 generates the inexact exception as the case warrants.
|
| Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial
| p = X + X*X*(B1 + X*(B2 + ... + X*B12))
| Notes: a) In order to reduce memory access, the coefficients are
| made as "short" as possible: B1 (which is 1/2), B9 to B12
| are single precision; B3 to B8 are double precision; and
| B2 is double extended.
| b) Even with the restriction above,
| |p - (exp(X)-1)| < |X| 2^(-70.6)
| for all |X| <= 0.251.
| Note that 0.251 is slightly bigger than 1/4.
| c) To fully preserve accuracy, the polynomial is computed
| as X + ( S*B1 + Q ) where S = X*X and
| Q = X*S*(B2 + X*(B3 + ... + X*B12))
| d) To fully utilize the pipeline, Q is separated into
| two independent pieces of roughly equal complexity
| Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] +
| [ S*S*(B3 + S*(B5 + ... + S*B11)) ]
|
| Step 10. Calculate exp(X)-1 for |X| >= 70 log 2.
| 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all practical
| purposes. Therefore, go to Step 1 of setox.
| 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical purposes.
| ans := -1
| Restore user FPCR
| Return ans := ans + 2^(-126). Exit.
| Notes: 10.2 will always create an inexact and return -1 + tiny
| in the user rounding precision and mode.
|
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|setox idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
L2: .long 0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
 
EXPA3: .long 0x3FA55555,0x55554431
EXPA2: .long 0x3FC55555,0x55554018
 
HUGE: .long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
TINY: .long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
 
EM1A4: .long 0x3F811111,0x11174385
EM1A3: .long 0x3FA55555,0x55554F5A
 
EM1A2: .long 0x3FC55555,0x55555555,0x00000000,0x00000000
 
EM1B8: .long 0x3EC71DE3,0xA5774682
EM1B7: .long 0x3EFA01A0,0x19D7CB68
 
EM1B6: .long 0x3F2A01A0,0x1A019DF3
EM1B5: .long 0x3F56C16C,0x16C170E2
 
EM1B4: .long 0x3F811111,0x11111111
EM1B3: .long 0x3FA55555,0x55555555
 
EM1B2: .long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
.long 0x00000000
 
TWO140: .long 0x48B00000,0x00000000
TWON140: .long 0x37300000,0x00000000
 
EXPTBL:
.long 0x3FFF0000,0x80000000,0x00000000,0x00000000
.long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
.long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
.long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
.long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
.long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
.long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
.long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
.long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
.long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
.long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
.long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
.long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
.long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
.long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
.long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
.long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
.long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
.long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
.long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
.long 0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
.long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
.long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
.long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
.long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
.long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
.long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
.long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
.long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
.long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
.long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
.long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
.long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
.long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
.long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
.long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
.long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
.long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
.long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
.long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
.long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
.long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
.long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
.long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
.long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
.long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
.long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
.long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
.long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
.long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
.long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
.long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
.long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
.long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
.long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
.long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
.long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
.long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
.long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
.long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
.long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
.long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
.long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
.long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
 
.set ADJFLAG,L_SCR2
.set SCALE,FP_SCR1
.set ADJSCALE,FP_SCR2
.set SC,FP_SCR3
.set ONEBYSC,FP_SCR4
 
| xref t_frcinx
|xref t_extdnrm
|xref t_unfl
|xref t_ovfl
 
.global setoxd
setoxd:
|--entry point for EXP(X), X is denormalized
movel (%a0),%d0
andil #0x80000000,%d0
oril #0x00800000,%d0 | ...sign(X)*2^(-126)
movel %d0,-(%sp)
fmoves #0x3F800000,%fp0
fmovel %d1,%fpcr
fadds (%sp)+,%fp0
bra t_frcinx
 
.global setox
setox:
|--entry point for EXP(X), here X is finite, non-zero, and not NaN's
 
|--Step 1.
movel (%a0),%d0 | ...load part of input X
andil #0x7FFF0000,%d0 | ...biased expo. of X
cmpil #0x3FBE0000,%d0 | ...2^(-65)
bges EXPC1 | ...normal case
bra EXPSM
 
EXPC1:
|--The case |X| >= 2^(-65)
movew 4(%a0),%d0 | ...expo. and partial sig. of |X|
cmpil #0x400CB167,%d0 | ...16380 log2 trunc. 16 bits
blts EXPMAIN | ...normal case
bra EXPBIG
 
EXPMAIN:
|--Step 2.
|--This is the normal branch: 2^(-65) <= |X| < 16380 log2.
fmovex (%a0),%fp0 | ...load input from (a0)
 
fmovex %fp0,%fp1
fmuls #0x42B8AA3B,%fp0 | ...64/log2 * X
fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2
movel #0,ADJFLAG(%a6)
fmovel %fp0,%d0 | ...N = int( X * 64/log2 )
lea EXPTBL,%a1
fmovel %d0,%fp0 | ...convert to floating-format
 
movel %d0,L_SCR1(%a6) | ...save N temporarily
andil #0x3F,%d0 | ...D0 is J = N mod 64
lsll #4,%d0
addal %d0,%a1 | ...address of 2^(J/64)
movel L_SCR1(%a6),%d0
asrl #6,%d0 | ...D0 is M
addiw #0x3FFF,%d0 | ...biased expo. of 2^(M)
movew L2,L_SCR1(%a6) | ...prefetch L2, no need in CB
 
EXPCONT1:
|--Step 3.
|--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
|--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
fmovex %fp0,%fp2
fmuls #0xBC317218,%fp0 | ...N * L1, L1 = lead(-log2/64)
fmulx L2,%fp2 | ...N * L2, L1+L2 = -log2/64
faddx %fp1,%fp0 | ...X + N*L1
faddx %fp2,%fp0 | ...fp0 is R, reduced arg.
| MOVE.W #$3FA5,EXPA3 ...load EXPA3 in cache
 
|--Step 4.
|--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
|-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
|--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
|--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
 
fmovex %fp0,%fp1
fmulx %fp1,%fp1 | ...fp1 IS S = R*R
 
fmoves #0x3AB60B70,%fp2 | ...fp2 IS A5
| MOVE.W #0,2(%a1) ...load 2^(J/64) in cache
 
fmulx %fp1,%fp2 | ...fp2 IS S*A5
fmovex %fp1,%fp3
fmuls #0x3C088895,%fp3 | ...fp3 IS S*A4
 
faddd EXPA3,%fp2 | ...fp2 IS A3+S*A5
faddd EXPA2,%fp3 | ...fp3 IS A2+S*A4
 
fmulx %fp1,%fp2 | ...fp2 IS S*(A3+S*A5)
movew %d0,SCALE(%a6) | ...SCALE is 2^(M) in extended
clrw SCALE+2(%a6)
movel #0x80000000,SCALE+4(%a6)
clrl SCALE+8(%a6)
 
fmulx %fp1,%fp3 | ...fp3 IS S*(A2+S*A4)
 
fadds #0x3F000000,%fp2 | ...fp2 IS A1+S*(A3+S*A5)
fmulx %fp0,%fp3 | ...fp3 IS R*S*(A2+S*A4)
 
fmulx %fp1,%fp2 | ...fp2 IS S*(A1+S*(A3+S*A5))
faddx %fp3,%fp0 | ...fp0 IS R+R*S*(A2+S*A4),
| ...fp3 released
 
fmovex (%a1)+,%fp1 | ...fp1 is lead. pt. of 2^(J/64)
faddx %fp2,%fp0 | ...fp0 is EXP(R) - 1
| ...fp2 released
 
|--Step 5
|--final reconstruction process
|--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
 
fmulx %fp1,%fp0 | ...2^(J/64)*(Exp(R)-1)
fmovemx (%a7)+,%fp2-%fp2/%fp3 | ...fp2 restored
fadds (%a1),%fp0 | ...accurate 2^(J/64)
 
faddx %fp1,%fp0 | ...2^(J/64) + 2^(J/64)*...
movel ADJFLAG(%a6),%d0
 
|--Step 6
tstl %d0
beqs NORMAL
ADJUST:
fmulx ADJSCALE(%a6),%fp0
NORMAL:
fmovel %d1,%FPCR | ...restore user FPCR
fmulx SCALE(%a6),%fp0 | ...multiply 2^(M)
bra t_frcinx
 
EXPSM:
|--Step 7
fmovemx (%a0),%fp0-%fp0 | ...in case X is denormalized
fmovel %d1,%FPCR
fadds #0x3F800000,%fp0 | ...1+X in user mode
bra t_frcinx
 
EXPBIG:
|--Step 8
cmpil #0x400CB27C,%d0 | ...16480 log2
bgts EXP2BIG
|--Steps 8.2 -- 8.6
fmovex (%a0),%fp0 | ...load input from (a0)
 
fmovex %fp0,%fp1
fmuls #0x42B8AA3B,%fp0 | ...64/log2 * X
fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2
movel #1,ADJFLAG(%a6)
fmovel %fp0,%d0 | ...N = int( X * 64/log2 )
lea EXPTBL,%a1
fmovel %d0,%fp0 | ...convert to floating-format
movel %d0,L_SCR1(%a6) | ...save N temporarily
andil #0x3F,%d0 | ...D0 is J = N mod 64
lsll #4,%d0
addal %d0,%a1 | ...address of 2^(J/64)
movel L_SCR1(%a6),%d0
asrl #6,%d0 | ...D0 is K
movel %d0,L_SCR1(%a6) | ...save K temporarily
asrl #1,%d0 | ...D0 is M1
subl %d0,L_SCR1(%a6) | ...a1 is M
addiw #0x3FFF,%d0 | ...biased expo. of 2^(M1)
movew %d0,ADJSCALE(%a6) | ...ADJSCALE := 2^(M1)
clrw ADJSCALE+2(%a6)
movel #0x80000000,ADJSCALE+4(%a6)
clrl ADJSCALE+8(%a6)
movel L_SCR1(%a6),%d0 | ...D0 is M
addiw #0x3FFF,%d0 | ...biased expo. of 2^(M)
bra EXPCONT1 | ...go back to Step 3
 
EXP2BIG:
|--Step 9
fmovel %d1,%FPCR
movel (%a0),%d0
bclrb #sign_bit,(%a0) | ...setox always returns positive
cmpil #0,%d0
blt t_unfl
bra t_ovfl
 
.global setoxm1d
setoxm1d:
|--entry point for EXPM1(X), here X is denormalized
|--Step 0.
bra t_extdnrm
 
 
.global setoxm1
setoxm1:
|--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
 
|--Step 1.
|--Step 1.1
movel (%a0),%d0 | ...load part of input X
andil #0x7FFF0000,%d0 | ...biased expo. of X
cmpil #0x3FFD0000,%d0 | ...1/4
bges EM1CON1 | ...|X| >= 1/4
bra EM1SM
 
EM1CON1:
|--Step 1.3
|--The case |X| >= 1/4
movew 4(%a0),%d0 | ...expo. and partial sig. of |X|
cmpil #0x4004C215,%d0 | ...70log2 rounded up to 16 bits
bles EM1MAIN | ...1/4 <= |X| <= 70log2
bra EM1BIG
 
EM1MAIN:
|--Step 2.
|--This is the case: 1/4 <= |X| <= 70 log2.
fmovex (%a0),%fp0 | ...load input from (a0)
 
fmovex %fp0,%fp1
fmuls #0x42B8AA3B,%fp0 | ...64/log2 * X
fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2
| MOVE.W #$3F81,EM1A4 ...prefetch in CB mode
fmovel %fp0,%d0 | ...N = int( X * 64/log2 )
lea EXPTBL,%a1
fmovel %d0,%fp0 | ...convert to floating-format
 
movel %d0,L_SCR1(%a6) | ...save N temporarily
andil #0x3F,%d0 | ...D0 is J = N mod 64
lsll #4,%d0
addal %d0,%a1 | ...address of 2^(J/64)
movel L_SCR1(%a6),%d0
asrl #6,%d0 | ...D0 is M
movel %d0,L_SCR1(%a6) | ...save a copy of M
| MOVE.W #$3FDC,L2 ...prefetch L2 in CB mode
 
|--Step 3.
|--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
|--a0 points to 2^(J/64), D0 and a1 both contain M
fmovex %fp0,%fp2
fmuls #0xBC317218,%fp0 | ...N * L1, L1 = lead(-log2/64)
fmulx L2,%fp2 | ...N * L2, L1+L2 = -log2/64
faddx %fp1,%fp0 | ...X + N*L1
faddx %fp2,%fp0 | ...fp0 is R, reduced arg.
| MOVE.W #$3FC5,EM1A2 ...load EM1A2 in cache
addiw #0x3FFF,%d0 | ...D0 is biased expo. of 2^M
 
|--Step 4.
|--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
|-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
|--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
|--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
 
fmovex %fp0,%fp1
fmulx %fp1,%fp1 | ...fp1 IS S = R*R
 
fmoves #0x3950097B,%fp2 | ...fp2 IS a6
| MOVE.W #0,2(%a1) ...load 2^(J/64) in cache
 
fmulx %fp1,%fp2 | ...fp2 IS S*A6
fmovex %fp1,%fp3
fmuls #0x3AB60B6A,%fp3 | ...fp3 IS S*A5
 
faddd EM1A4,%fp2 | ...fp2 IS A4+S*A6
faddd EM1A3,%fp3 | ...fp3 IS A3+S*A5
movew %d0,SC(%a6) | ...SC is 2^(M) in extended
clrw SC+2(%a6)
movel #0x80000000,SC+4(%a6)
clrl SC+8(%a6)
 
fmulx %fp1,%fp2 | ...fp2 IS S*(A4+S*A6)
movel L_SCR1(%a6),%d0 | ...D0 is M
negw %d0 | ...D0 is -M
fmulx %fp1,%fp3 | ...fp3 IS S*(A3+S*A5)
addiw #0x3FFF,%d0 | ...biased expo. of 2^(-M)
faddd EM1A2,%fp2 | ...fp2 IS A2+S*(A4+S*A6)
fadds #0x3F000000,%fp3 | ...fp3 IS A1+S*(A3+S*A5)
 
fmulx %fp1,%fp2 | ...fp2 IS S*(A2+S*(A4+S*A6))
oriw #0x8000,%d0 | ...signed/expo. of -2^(-M)
movew %d0,ONEBYSC(%a6) | ...OnebySc is -2^(-M)
clrw ONEBYSC+2(%a6)
movel #0x80000000,ONEBYSC+4(%a6)
clrl ONEBYSC+8(%a6)
fmulx %fp3,%fp1 | ...fp1 IS S*(A1+S*(A3+S*A5))
| ...fp3 released
 
fmulx %fp0,%fp2 | ...fp2 IS R*S*(A2+S*(A4+S*A6))
faddx %fp1,%fp0 | ...fp0 IS R+S*(A1+S*(A3+S*A5))
| ...fp1 released
 
faddx %fp2,%fp0 | ...fp0 IS EXP(R)-1
| ...fp2 released
fmovemx (%a7)+,%fp2-%fp2/%fp3 | ...fp2 restored
 
|--Step 5
|--Compute 2^(J/64)*p
 
fmulx (%a1),%fp0 | ...2^(J/64)*(Exp(R)-1)
 
|--Step 6
|--Step 6.1
movel L_SCR1(%a6),%d0 | ...retrieve M
cmpil #63,%d0
bles MLE63
|--Step 6.2 M >= 64
fmoves 12(%a1),%fp1 | ...fp1 is t
faddx ONEBYSC(%a6),%fp1 | ...fp1 is t+OnebySc
faddx %fp1,%fp0 | ...p+(t+OnebySc), fp1 released
faddx (%a1),%fp0 | ...T+(p+(t+OnebySc))
bras EM1SCALE
MLE63:
|--Step 6.3 M <= 63
cmpil #-3,%d0
bges MGEN3
MLTN3:
|--Step 6.4 M <= -4
fadds 12(%a1),%fp0 | ...p+t
faddx (%a1),%fp0 | ...T+(p+t)
faddx ONEBYSC(%a6),%fp0 | ...OnebySc + (T+(p+t))
bras EM1SCALE
MGEN3:
|--Step 6.5 -3 <= M <= 63
fmovex (%a1)+,%fp1 | ...fp1 is T
fadds (%a1),%fp0 | ...fp0 is p+t
faddx ONEBYSC(%a6),%fp1 | ...fp1 is T+OnebySc
faddx %fp1,%fp0 | ...(T+OnebySc)+(p+t)
 
EM1SCALE:
|--Step 6.6
fmovel %d1,%FPCR
fmulx SC(%a6),%fp0
 
bra t_frcinx
 
EM1SM:
|--Step 7 |X| < 1/4.
cmpil #0x3FBE0000,%d0 | ...2^(-65)
bges EM1POLY
 
EM1TINY:
|--Step 8 |X| < 2^(-65)
cmpil #0x00330000,%d0 | ...2^(-16312)
blts EM12TINY
|--Step 8.2
movel #0x80010000,SC(%a6) | ...SC is -2^(-16382)
movel #0x80000000,SC+4(%a6)
clrl SC+8(%a6)
fmovex (%a0),%fp0
fmovel %d1,%FPCR
faddx SC(%a6),%fp0
 
bra t_frcinx
 
EM12TINY:
|--Step 8.3
fmovex (%a0),%fp0
fmuld TWO140,%fp0
movel #0x80010000,SC(%a6)
movel #0x80000000,SC+4(%a6)
clrl SC+8(%a6)
faddx SC(%a6),%fp0
fmovel %d1,%FPCR
fmuld TWON140,%fp0
 
bra t_frcinx
 
EM1POLY:
|--Step 9 exp(X)-1 by a simple polynomial
fmovex (%a0),%fp0 | ...fp0 is X
fmulx %fp0,%fp0 | ...fp0 is S := X*X
fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2
fmoves #0x2F30CAA8,%fp1 | ...fp1 is B12
fmulx %fp0,%fp1 | ...fp1 is S*B12
fmoves #0x310F8290,%fp2 | ...fp2 is B11
fadds #0x32D73220,%fp1 | ...fp1 is B10+S*B12
 
fmulx %fp0,%fp2 | ...fp2 is S*B11
fmulx %fp0,%fp1 | ...fp1 is S*(B10 + ...
 
fadds #0x3493F281,%fp2 | ...fp2 is B9+S*...
faddd EM1B8,%fp1 | ...fp1 is B8+S*...
 
fmulx %fp0,%fp2 | ...fp2 is S*(B9+...
fmulx %fp0,%fp1 | ...fp1 is S*(B8+...
 
faddd EM1B7,%fp2 | ...fp2 is B7+S*...
faddd EM1B6,%fp1 | ...fp1 is B6+S*...
 
fmulx %fp0,%fp2 | ...fp2 is S*(B7+...
fmulx %fp0,%fp1 | ...fp1 is S*(B6+...
 
faddd EM1B5,%fp2 | ...fp2 is B5+S*...
faddd EM1B4,%fp1 | ...fp1 is B4+S*...
 
fmulx %fp0,%fp2 | ...fp2 is S*(B5+...
fmulx %fp0,%fp1 | ...fp1 is S*(B4+...
 
faddd EM1B3,%fp2 | ...fp2 is B3+S*...
faddx EM1B2,%fp1 | ...fp1 is B2+S*...
 
fmulx %fp0,%fp2 | ...fp2 is S*(B3+...
fmulx %fp0,%fp1 | ...fp1 is S*(B2+...
 
fmulx %fp0,%fp2 | ...fp2 is S*S*(B3+...)
fmulx (%a0),%fp1 | ...fp1 is X*S*(B2...
 
fmuls #0x3F000000,%fp0 | ...fp0 is S*B1
faddx %fp2,%fp1 | ...fp1 is Q
| ...fp2 released
 
fmovemx (%a7)+,%fp2-%fp2/%fp3 | ...fp2 restored
 
faddx %fp1,%fp0 | ...fp0 is S*B1+Q
| ...fp1 released
 
fmovel %d1,%FPCR
faddx (%a0),%fp0
 
bra t_frcinx
 
EM1BIG:
|--Step 10 |X| > 70 log2
movel (%a0),%d0
cmpil #0,%d0
bgt EXPC1
|--Step 10.2
fmoves #0xBF800000,%fp0 | ...fp0 is -1
fmovel %d1,%FPCR
fadds #0x00800000,%fp0 | ...-1 + 2^(-126)
 
bra t_frcinx
 
|end
/res_func.S
0,0 → 1,2040
|
| res_func.sa 3.9 7/29/91
|
| Normalizes denormalized numbers if necessary and updates the
| stack frame. The function is then restored back into the
| machine and the 040 completes the operation. This routine
| is only used by the unsupported data type/format handler.
| (Exception vector 55).
|
| For packed move out (fmove.p fpm,<ea>) the operation is
| completed here; data is packed and moved to user memory.
| The stack is restored to the 040 only in the case of a
| reportable exception in the conversion.
|
|
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
RES_FUNC: |idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
sp_bnds: .short 0x3f81,0x407e
.short 0x3f6a,0x0000
dp_bnds: .short 0x3c01,0x43fe
.short 0x3bcd,0x0000
 
|xref mem_write
|xref bindec
|xref get_fline
|xref round
|xref denorm
|xref dest_ext
|xref dest_dbl
|xref dest_sgl
|xref unf_sub
|xref nrm_set
|xref dnrm_lp
|xref ovf_res
|xref reg_dest
|xref t_ovfl
|xref t_unfl
 
.global res_func
.global p_move
 
res_func:
clrb DNRM_FLG(%a6)
clrb RES_FLG(%a6)
clrb CU_ONLY(%a6)
tstb DY_MO_FLG(%a6)
beqs monadic
dyadic:
btstb #7,DTAG(%a6) |if dop = norm=000, zero=001,
| ;inf=010 or nan=011
beqs monadic |then branch
| ;else denorm
| HANDLE DESTINATION DENORM HERE
| ;set dtag to norm
| ;write the tag & fpte15 to the fstack
leal FPTEMP(%a6),%a0
 
bclrb #sign_bit,LOCAL_EX(%a0)
sne LOCAL_SGN(%a0)
 
bsr nrm_set |normalize number (exp will go negative)
bclrb #sign_bit,LOCAL_EX(%a0) |get rid of false sign
bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format
beqs dpos
bsetb #sign_bit,LOCAL_EX(%a0)
dpos:
bfclr DTAG(%a6){#0:#4} |set tag to normalized, FPTE15 = 0
bsetb #4,DTAG(%a6) |set FPTE15
orb #0x0f,DNRM_FLG(%a6)
monadic:
leal ETEMP(%a6),%a0
btstb #direction_bit,CMDREG1B(%a6) |check direction
bne opclass3 |it is a mv out
|
| At this point, only opclass 0 and 2 possible
|
btstb #7,STAG(%a6) |if sop = norm=000, zero=001,
| ;inf=010 or nan=011
bne mon_dnrm |else denorm
tstb DY_MO_FLG(%a6) |all cases of dyadic instructions would
bne normal |require normalization of denorm
 
| At this point:
| monadic instructions: fabs = $18 fneg = $1a ftst = $3a
| fmove = $00 fsmove = $40 fdmove = $44
| fsqrt = $05* fssqrt = $41 fdsqrt = $45
| (*fsqrt reencoded to $05)
|
movew CMDREG1B(%a6),%d0 |get command register
andil #0x7f,%d0 |strip to only command word
|
| At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and
| fdsqrt are possible.
| For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
| For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
|
btstl #0,%d0
bne normal |weed out fsqrt instructions
|
| cu_norm handles fmove in instructions with normalized inputs.
| The routine round is used to correctly round the input for the
| destination precision and mode.
|
cu_norm:
st CU_ONLY(%a6) |set cu-only inst flag
movew CMDREG1B(%a6),%d0
andib #0x3b,%d0 |isolate bits to select inst
tstb %d0
beql cu_nmove |if zero, it is an fmove
cmpib #0x18,%d0
beql cu_nabs |if $18, it is fabs
cmpib #0x1a,%d0
beql cu_nneg |if $1a, it is fneg
|
| Inst is ftst. Check the source operand and set the cc's accordingly.
| No write is done, so simply rts.
|
cu_ntst:
movew LOCAL_EX(%a0),%d0
bclrl #15,%d0
sne LOCAL_SGN(%a0)
beqs cu_ntpo
orl #neg_mask,USER_FPSR(%a6) |set N
cu_ntpo:
cmpiw #0x7fff,%d0 |test for inf/nan
bnes cu_ntcz
tstl LOCAL_HI(%a0)
bnes cu_ntn
tstl LOCAL_LO(%a0)
bnes cu_ntn
orl #inf_mask,USER_FPSR(%a6)
rts
cu_ntn:
orl #nan_mask,USER_FPSR(%a6)
movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for
| ;snan handler
 
rts
cu_ntcz:
tstl LOCAL_HI(%a0)
bnel cu_ntsx
tstl LOCAL_LO(%a0)
bnel cu_ntsx
orl #z_mask,USER_FPSR(%a6)
cu_ntsx:
rts
|
| Inst is fabs. Execute the absolute value function on the input.
| Branch to the fmove code. If the operand is NaN, do nothing.
|
cu_nabs:
moveb STAG(%a6),%d0
btstl #5,%d0 |test for NaN or zero
bne wr_etemp |if either, simply write it
bclrb #7,LOCAL_EX(%a0) |do abs
bras cu_nmove |fmove code will finish
|
| Inst is fneg. Execute the negate value function on the input.
| Fall though to the fmove code. If the operand is NaN, do nothing.
|
cu_nneg:
moveb STAG(%a6),%d0
btstl #5,%d0 |test for NaN or zero
bne wr_etemp |if either, simply write it
bchgb #7,LOCAL_EX(%a0) |do neg
|
| Inst is fmove. This code also handles all result writes.
| If bit 2 is set, round is forced to double. If it is clear,
| and bit 6 is set, round is forced to single. If both are clear,
| the round precision is found in the fpcr. If the rounding precision
| is double or single, round the result before the write.
|
cu_nmove:
moveb STAG(%a6),%d0
andib #0xe0,%d0 |isolate stag bits
bne wr_etemp |if not norm, simply write it
btstb #2,CMDREG1B+1(%a6) |check for rd
bne cu_nmrd
btstb #6,CMDREG1B+1(%a6) |check for rs
bne cu_nmrs
|
| The move or operation is not with forced precision. Test for
| nan or inf as the input; if so, simply write it to FPn. Use the
| FPCR_MODE byte to get rounding on norms and zeros.
|
cu_nmnr:
bfextu FPCR_MODE(%a6){#0:#2},%d0
tstb %d0 |check for extended
beq cu_wrexn |if so, just write result
cmpib #1,%d0 |check for single
beq cu_nmrs |fall through to double
|
| The move is fdmove or round precision is double.
|
cu_nmrd:
movel #2,%d0 |set up the size for denorm
movew LOCAL_EX(%a0),%d1 |compare exponent to double threshold
andw #0x7fff,%d1
cmpw #0x3c01,%d1
bls cu_nunfl
bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rmode
orl #0x00020000,%d1 |or in rprec (double)
clrl %d0 |clear g,r,s for round
bclrb #sign_bit,LOCAL_EX(%a0) |convert to internal format
sne LOCAL_SGN(%a0)
bsrl round
bfclr LOCAL_SGN(%a0){#0:#8}
beqs cu_nmrdc
bsetb #sign_bit,LOCAL_EX(%a0)
cu_nmrdc:
movew LOCAL_EX(%a0),%d1 |check for overflow
andw #0x7fff,%d1
cmpw #0x43ff,%d1
bge cu_novfl |take care of overflow case
bra cu_wrexn
|
| The move is fsmove or round precision is single.
|
cu_nmrs:
movel #1,%d0
movew LOCAL_EX(%a0),%d1
andw #0x7fff,%d1
cmpw #0x3f81,%d1
bls cu_nunfl
bfextu FPCR_MODE(%a6){#2:#2},%d1
orl #0x00010000,%d1
clrl %d0
bclrb #sign_bit,LOCAL_EX(%a0)
sne LOCAL_SGN(%a0)
bsrl round
bfclr LOCAL_SGN(%a0){#0:#8}
beqs cu_nmrsc
bsetb #sign_bit,LOCAL_EX(%a0)
cu_nmrsc:
movew LOCAL_EX(%a0),%d1
andw #0x7FFF,%d1
cmpw #0x407f,%d1
blt cu_wrexn
|
| The operand is above precision boundaries. Use t_ovfl to
| generate the correct value.
|
cu_novfl:
bsr t_ovfl
bra cu_wrexn
|
| The operand is below precision boundaries. Use denorm to
| generate the correct value.
|
cu_nunfl:
bclrb #sign_bit,LOCAL_EX(%a0)
sne LOCAL_SGN(%a0)
bsr denorm
bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format
beqs cu_nucont
bsetb #sign_bit,LOCAL_EX(%a0)
cu_nucont:
bfextu FPCR_MODE(%a6){#2:#2},%d1
btstb #2,CMDREG1B+1(%a6) |check for rd
bne inst_d
btstb #6,CMDREG1B+1(%a6) |check for rs
bne inst_s
swap %d1
moveb FPCR_MODE(%a6),%d1
lsrb #6,%d1
swap %d1
bra inst_sd
inst_d:
orl #0x00020000,%d1
bra inst_sd
inst_s:
orl #0x00010000,%d1
inst_sd:
bclrb #sign_bit,LOCAL_EX(%a0)
sne LOCAL_SGN(%a0)
bsrl round
bfclr LOCAL_SGN(%a0){#0:#8}
beqs cu_nuflp
bsetb #sign_bit,LOCAL_EX(%a0)
cu_nuflp:
btstb #inex2_bit,FPSR_EXCEPT(%a6)
beqs cu_nuninx
orl #aunfl_mask,USER_FPSR(%a6) |if the round was inex, set AUNFL
cu_nuninx:
tstl LOCAL_HI(%a0) |test for zero
bnes cu_nunzro
tstl LOCAL_LO(%a0)
bnes cu_nunzro
|
| The mantissa is zero from the denorm loop. Check sign and rmode
| to see if rounding should have occurred which would leave the lsb.
|
movel USER_FPCR(%a6),%d0
andil #0x30,%d0 |isolate rmode
cmpil #0x20,%d0
blts cu_nzro
bnes cu_nrp
cu_nrm:
tstw LOCAL_EX(%a0) |if positive, set lsb
bges cu_nzro
btstb #7,FPCR_MODE(%a6) |check for double
beqs cu_nincs
bras cu_nincd
cu_nrp:
tstw LOCAL_EX(%a0) |if positive, set lsb
blts cu_nzro
btstb #7,FPCR_MODE(%a6) |check for double
beqs cu_nincs
cu_nincd:
orl #0x800,LOCAL_LO(%a0) |inc for double
bra cu_nunzro
cu_nincs:
orl #0x100,LOCAL_HI(%a0) |inc for single
bra cu_nunzro
cu_nzro:
orl #z_mask,USER_FPSR(%a6)
moveb STAG(%a6),%d0
andib #0xe0,%d0
cmpib #0x40,%d0 |check if input was tagged zero
beqs cu_numv
cu_nunzro:
orl #unfl_mask,USER_FPSR(%a6) |set unfl
cu_numv:
movel (%a0),ETEMP(%a6)
movel 4(%a0),ETEMP_HI(%a6)
movel 8(%a0),ETEMP_LO(%a6)
|
| Write the result to memory, setting the fpsr cc bits. NaN and Inf
| bypass cu_wrexn.
|
cu_wrexn:
tstw LOCAL_EX(%a0) |test for zero
beqs cu_wrzero
cmpw #0x8000,LOCAL_EX(%a0) |test for zero
bnes cu_wreon
cu_wrzero:
orl #z_mask,USER_FPSR(%a6) |set Z bit
cu_wreon:
tstw LOCAL_EX(%a0)
bpl wr_etemp
orl #neg_mask,USER_FPSR(%a6)
bra wr_etemp
 
|
| HANDLE SOURCE DENORM HERE
|
| ;clear denorm stag to norm
| ;write the new tag & ete15 to the fstack
mon_dnrm:
|
| At this point, check for the cases in which normalizing the
| denorm produces incorrect results.
|
tstb DY_MO_FLG(%a6) |all cases of dyadic instructions would
bnes nrm_src |require normalization of denorm
 
| At this point:
| monadic instructions: fabs = $18 fneg = $1a ftst = $3a
| fmove = $00 fsmove = $40 fdmove = $44
| fsqrt = $05* fssqrt = $41 fdsqrt = $45
| (*fsqrt reencoded to $05)
|
movew CMDREG1B(%a6),%d0 |get command register
andil #0x7f,%d0 |strip to only command word
|
| At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and
| fdsqrt are possible.
| For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
| For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
|
btstl #0,%d0
bnes nrm_src |weed out fsqrt instructions
st CU_ONLY(%a6) |set cu-only inst flag
bra cu_dnrm |fmove, fabs, fneg, ftst
| ;cases go to cu_dnrm
nrm_src:
bclrb #sign_bit,LOCAL_EX(%a0)
sne LOCAL_SGN(%a0)
bsr nrm_set |normalize number (exponent will go
| ; negative)
bclrb #sign_bit,LOCAL_EX(%a0) |get rid of false sign
 
bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format
beqs spos
bsetb #sign_bit,LOCAL_EX(%a0)
spos:
bfclr STAG(%a6){#0:#4} |set tag to normalized, FPTE15 = 0
bsetb #4,STAG(%a6) |set ETE15
orb #0xf0,DNRM_FLG(%a6)
normal:
tstb DNRM_FLG(%a6) |check if any of the ops were denorms
bne ck_wrap |if so, check if it is a potential
| ;wrap-around case
fix_stk:
moveb #0xfe,CU_SAVEPC(%a6)
bclrb #E1,E_BYTE(%a6)
 
clrw NMNEXC(%a6)
 
st RES_FLG(%a6) |indicate that a restore is needed
rts
 
|
| cu_dnrm handles all cu-only instructions (fmove, fabs, fneg, and
| ftst) completely in software without an frestore to the 040.
|
cu_dnrm:
st CU_ONLY(%a6)
movew CMDREG1B(%a6),%d0
andib #0x3b,%d0 |isolate bits to select inst
tstb %d0
beql cu_dmove |if zero, it is an fmove
cmpib #0x18,%d0
beql cu_dabs |if $18, it is fabs
cmpib #0x1a,%d0
beql cu_dneg |if $1a, it is fneg
|
| Inst is ftst. Check the source operand and set the cc's accordingly.
| No write is done, so simply rts.
|
cu_dtst:
movew LOCAL_EX(%a0),%d0
bclrl #15,%d0
sne LOCAL_SGN(%a0)
beqs cu_dtpo
orl #neg_mask,USER_FPSR(%a6) |set N
cu_dtpo:
cmpiw #0x7fff,%d0 |test for inf/nan
bnes cu_dtcz
tstl LOCAL_HI(%a0)
bnes cu_dtn
tstl LOCAL_LO(%a0)
bnes cu_dtn
orl #inf_mask,USER_FPSR(%a6)
rts
cu_dtn:
orl #nan_mask,USER_FPSR(%a6)
movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for
| ;snan handler
rts
cu_dtcz:
tstl LOCAL_HI(%a0)
bnel cu_dtsx
tstl LOCAL_LO(%a0)
bnel cu_dtsx
orl #z_mask,USER_FPSR(%a6)
cu_dtsx:
rts
|
| Inst is fabs. Execute the absolute value function on the input.
| Branch to the fmove code.
|
cu_dabs:
bclrb #7,LOCAL_EX(%a0) |do abs
bras cu_dmove |fmove code will finish
|
| Inst is fneg. Execute the negate value function on the input.
| Fall though to the fmove code.
|
cu_dneg:
bchgb #7,LOCAL_EX(%a0) |do neg
|
| Inst is fmove. This code also handles all result writes.
| If bit 2 is set, round is forced to double. If it is clear,
| and bit 6 is set, round is forced to single. If both are clear,
| the round precision is found in the fpcr. If the rounding precision
| is double or single, the result is zero, and the mode is checked
| to determine if the lsb of the result should be set.
|
cu_dmove:
btstb #2,CMDREG1B+1(%a6) |check for rd
bne cu_dmrd
btstb #6,CMDREG1B+1(%a6) |check for rs
bne cu_dmrs
|
| The move or operation is not with forced precision. Use the
| FPCR_MODE byte to get rounding.
|
cu_dmnr:
bfextu FPCR_MODE(%a6){#0:#2},%d0
tstb %d0 |check for extended
beq cu_wrexd |if so, just write result
cmpib #1,%d0 |check for single
beq cu_dmrs |fall through to double
|
| The move is fdmove or round precision is double. Result is zero.
| Check rmode for rp or rm and set lsb accordingly.
|
cu_dmrd:
bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rmode
tstw LOCAL_EX(%a0) |check sign
blts cu_dmdn
cmpib #3,%d1 |check for rp
bne cu_dpd |load double pos zero
bra cu_dpdr |load double pos zero w/lsb
cu_dmdn:
cmpib #2,%d1 |check for rm
bne cu_dnd |load double neg zero
bra cu_dndr |load double neg zero w/lsb
|
| The move is fsmove or round precision is single. Result is zero.
| Check for rp or rm and set lsb accordingly.
|
cu_dmrs:
bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rmode
tstw LOCAL_EX(%a0) |check sign
blts cu_dmsn
cmpib #3,%d1 |check for rp
bne cu_spd |load single pos zero
bra cu_spdr |load single pos zero w/lsb
cu_dmsn:
cmpib #2,%d1 |check for rm
bne cu_snd |load single neg zero
bra cu_sndr |load single neg zero w/lsb
|
| The precision is extended, so the result in etemp is correct.
| Simply set unfl (not inex2 or aunfl) and write the result to
| the correct fp register.
cu_wrexd:
orl #unfl_mask,USER_FPSR(%a6)
tstw LOCAL_EX(%a0)
beq wr_etemp
orl #neg_mask,USER_FPSR(%a6)
bra wr_etemp
|
| These routines write +/- zero in double format. The routines
| cu_dpdr and cu_dndr set the double lsb.
|
cu_dpd:
movel #0x3c010000,LOCAL_EX(%a0) |force pos double zero
clrl LOCAL_HI(%a0)
clrl LOCAL_LO(%a0)
orl #z_mask,USER_FPSR(%a6)
orl #unfinx_mask,USER_FPSR(%a6)
bra wr_etemp
cu_dpdr:
movel #0x3c010000,LOCAL_EX(%a0) |force pos double zero
clrl LOCAL_HI(%a0)
movel #0x800,LOCAL_LO(%a0) |with lsb set
orl #unfinx_mask,USER_FPSR(%a6)
bra wr_etemp
cu_dnd:
movel #0xbc010000,LOCAL_EX(%a0) |force pos double zero
clrl LOCAL_HI(%a0)
clrl LOCAL_LO(%a0)
orl #z_mask,USER_FPSR(%a6)
orl #neg_mask,USER_FPSR(%a6)
orl #unfinx_mask,USER_FPSR(%a6)
bra wr_etemp
cu_dndr:
movel #0xbc010000,LOCAL_EX(%a0) |force pos double zero
clrl LOCAL_HI(%a0)
movel #0x800,LOCAL_LO(%a0) |with lsb set
orl #neg_mask,USER_FPSR(%a6)
orl #unfinx_mask,USER_FPSR(%a6)
bra wr_etemp
|
| These routines write +/- zero in single format. The routines
| cu_dpdr and cu_dndr set the single lsb.
|
cu_spd:
movel #0x3f810000,LOCAL_EX(%a0) |force pos single zero
clrl LOCAL_HI(%a0)
clrl LOCAL_LO(%a0)
orl #z_mask,USER_FPSR(%a6)
orl #unfinx_mask,USER_FPSR(%a6)
bra wr_etemp
cu_spdr:
movel #0x3f810000,LOCAL_EX(%a0) |force pos single zero
movel #0x100,LOCAL_HI(%a0) |with lsb set
clrl LOCAL_LO(%a0)
orl #unfinx_mask,USER_FPSR(%a6)
bra wr_etemp
cu_snd:
movel #0xbf810000,LOCAL_EX(%a0) |force pos single zero
clrl LOCAL_HI(%a0)
clrl LOCAL_LO(%a0)
orl #z_mask,USER_FPSR(%a6)
orl #neg_mask,USER_FPSR(%a6)
orl #unfinx_mask,USER_FPSR(%a6)
bra wr_etemp
cu_sndr:
movel #0xbf810000,LOCAL_EX(%a0) |force pos single zero
movel #0x100,LOCAL_HI(%a0) |with lsb set
clrl LOCAL_LO(%a0)
orl #neg_mask,USER_FPSR(%a6)
orl #unfinx_mask,USER_FPSR(%a6)
bra wr_etemp
|
| This code checks for 16-bit overflow conditions on dyadic
| operations which are not restorable into the floating-point
| unit and must be completed in software. Basically, this
| condition exists with a very large norm and a denorm. One
| of the operands must be denormalized to enter this code.
|
| Flags used:
| DY_MO_FLG contains 0 for monadic op, $ff for dyadic
| DNRM_FLG contains $00 for neither op denormalized
| $0f for the destination op denormalized
| $f0 for the source op denormalized
| $ff for both ops denormalized
|
| The wrap-around condition occurs for add, sub, div, and cmp
| when
|
| abs(dest_exp - src_exp) >= $8000
|
| and for mul when
|
| (dest_exp + src_exp) < $0
|
| we must process the operation here if this case is true.
|
| The rts following the frcfpn routine is the exit from res_func
| for this condition. The restore flag (RES_FLG) is left clear.
| No frestore is done unless an exception is to be reported.
|
| For fadd:
| if(sign_of(dest) != sign_of(src))
| replace exponent of src with $3fff (keep sign)
| use fpu to perform dest+new_src (user's rmode and X)
| clr sticky
| else
| set sticky
| call round with user's precision and mode
| move result to fpn and wbtemp
|
| For fsub:
| if(sign_of(dest) == sign_of(src))
| replace exponent of src with $3fff (keep sign)
| use fpu to perform dest+new_src (user's rmode and X)
| clr sticky
| else
| set sticky
| call round with user's precision and mode
| move result to fpn and wbtemp
|
| For fdiv/fsgldiv:
| if(both operands are denorm)
| restore_to_fpu;
| if(dest is norm)
| force_ovf;
| else(dest is denorm)
| force_unf:
|
| For fcmp:
| if(dest is norm)
| N = sign_of(dest);
| else(dest is denorm)
| N = sign_of(src);
|
| For fmul:
| if(both operands are denorm)
| force_unf;
| if((dest_exp + src_exp) < 0)
| force_unf:
| else
| restore_to_fpu;
|
| local equates:
.set addcode,0x22
.set subcode,0x28
.set mulcode,0x23
.set divcode,0x20
.set cmpcode,0x38
ck_wrap:
| tstb DY_MO_FLG(%a6) ;check for fsqrt
beq fix_stk |if zero, it is fsqrt
movew CMDREG1B(%a6),%d0
andiw #0x3b,%d0 |strip to command bits
cmpiw #addcode,%d0
beq wrap_add
cmpiw #subcode,%d0
beq wrap_sub
cmpiw #mulcode,%d0
beq wrap_mul
cmpiw #cmpcode,%d0
beq wrap_cmp
|
| Inst is fdiv.
|
wrap_div:
cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm,
beq fix_stk |restore to fpu
|
| One of the ops is denormalized. Test for wrap condition
| and force the result.
|
cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm
bnes div_srcd
div_destd:
bsrl ckinf_ns
bne fix_stk
bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos)
bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg)
subl %d1,%d0 |subtract dest from src
cmpl #0x7fff,%d0
blt fix_stk |if less, not wrap case
clrb WBTEMP_SGN(%a6)
movew ETEMP_EX(%a6),%d0 |find the sign of the result
movew FPTEMP_EX(%a6),%d1
eorw %d1,%d0
andiw #0x8000,%d0
beq force_unf
st WBTEMP_SGN(%a6)
bra force_unf
 
ckinf_ns:
moveb STAG(%a6),%d0 |check source tag for inf or nan
bra ck_in_com
ckinf_nd:
moveb DTAG(%a6),%d0 |check destination tag for inf or nan
ck_in_com:
andib #0x60,%d0 |isolate tag bits
cmpb #0x40,%d0 |is it inf?
beq nan_or_inf |not wrap case
cmpb #0x60,%d0 |is it nan?
beq nan_or_inf |yes, not wrap case?
cmpb #0x20,%d0 |is it a zero?
beq nan_or_inf |yes
clrl %d0
rts |then ; it is either a zero of norm,
| ;check wrap case
nan_or_inf:
moveql #-1,%d0
rts
 
 
 
div_srcd:
bsrl ckinf_nd
bne fix_stk
bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos)
bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg)
subl %d1,%d0 |subtract src from dest
cmpl #0x8000,%d0
blt fix_stk |if less, not wrap case
clrb WBTEMP_SGN(%a6)
movew ETEMP_EX(%a6),%d0 |find the sign of the result
movew FPTEMP_EX(%a6),%d1
eorw %d1,%d0
andiw #0x8000,%d0
beqs force_ovf
st WBTEMP_SGN(%a6)
|
| This code handles the case of the instruction resulting in
| an overflow condition.
|
force_ovf:
bclrb #E1,E_BYTE(%a6)
orl #ovfl_inx_mask,USER_FPSR(%a6)
clrw NMNEXC(%a6)
leal WBTEMP(%a6),%a0 |point a0 to memory location
movew CMDREG1B(%a6),%d0
btstl #6,%d0 |test for forced precision
beqs frcovf_fpcr
btstl #2,%d0 |check for double
bnes frcovf_dbl
movel #0x1,%d0 |inst is forced single
bras frcovf_rnd
frcovf_dbl:
movel #0x2,%d0 |inst is forced double
bras frcovf_rnd
frcovf_fpcr:
bfextu FPCR_MODE(%a6){#0:#2},%d0 |inst not forced - use fpcr prec
frcovf_rnd:
 
| The 881/882 does not set inex2 for the following case, so the
| line is commented out to be compatible with 881/882
| tst.b %d0
| beq.b frcovf_x
| or.l #inex2_mask,USER_FPSR(%a6) ;if prec is s or d, set inex2
 
|frcovf_x:
bsrl ovf_res |get correct result based on
| ;round precision/mode. This
| ;sets FPSR_CC correctly
| ;returns in external format
bfclr WBTEMP_SGN(%a6){#0:#8}
beq frcfpn
bsetb #sign_bit,WBTEMP_EX(%a6)
bra frcfpn
|
| Inst is fadd.
|
wrap_add:
cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm,
beq fix_stk |restore to fpu
|
| One of the ops is denormalized. Test for wrap condition
| and complete the instruction.
|
cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm
bnes add_srcd
add_destd:
bsrl ckinf_ns
bne fix_stk
bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos)
bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg)
subl %d1,%d0 |subtract dest from src
cmpl #0x8000,%d0
blt fix_stk |if less, not wrap case
bra add_wrap
add_srcd:
bsrl ckinf_nd
bne fix_stk
bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos)
bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg)
subl %d1,%d0 |subtract src from dest
cmpl #0x8000,%d0
blt fix_stk |if less, not wrap case
|
| Check the signs of the operands. If they are unlike, the fpu
| can be used to add the norm and 1.0 with the sign of the
| denorm and it will correctly generate the result in extended
| precision. We can then call round with no sticky and the result
| will be correct for the user's rounding mode and precision. If
| the signs are the same, we call round with the sticky bit set
| and the result will be correct for the user's rounding mode and
| precision.
|
add_wrap:
movew ETEMP_EX(%a6),%d0
movew FPTEMP_EX(%a6),%d1
eorw %d1,%d0
andiw #0x8000,%d0
beq add_same
|
| The signs are unlike.
|
cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm?
bnes add_u_srcd
movew FPTEMP_EX(%a6),%d0
andiw #0x8000,%d0
orw #0x3fff,%d0 |force the exponent to +/- 1
movew %d0,FPTEMP_EX(%a6) |in the denorm
movel USER_FPCR(%a6),%d0
andil #0x30,%d0
fmovel %d0,%fpcr |set up users rmode and X
fmovex ETEMP(%a6),%fp0
faddx FPTEMP(%a6),%fp0
leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame
fmovel %fpsr,%d1
orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd
fmovex %fp0,WBTEMP(%a6) |write result to memory
lsrl #4,%d0 |put rmode in lower 2 bits
movel USER_FPCR(%a6),%d1
andil #0xc0,%d1
lsrl #6,%d1 |put precision in upper word
swap %d1
orl %d0,%d1 |set up for round call
clrl %d0 |force sticky to zero
bclrb #sign_bit,WBTEMP_EX(%a6)
sne WBTEMP_SGN(%a6)
bsrl round |round result to users rmode & prec
bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
beq frcfpnr
bsetb #sign_bit,WBTEMP_EX(%a6)
bra frcfpnr
add_u_srcd:
movew ETEMP_EX(%a6),%d0
andiw #0x8000,%d0
orw #0x3fff,%d0 |force the exponent to +/- 1
movew %d0,ETEMP_EX(%a6) |in the denorm
movel USER_FPCR(%a6),%d0
andil #0x30,%d0
fmovel %d0,%fpcr |set up users rmode and X
fmovex ETEMP(%a6),%fp0
faddx FPTEMP(%a6),%fp0
fmovel %fpsr,%d1
orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd
leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame
fmovex %fp0,WBTEMP(%a6) |write result to memory
lsrl #4,%d0 |put rmode in lower 2 bits
movel USER_FPCR(%a6),%d1
andil #0xc0,%d1
lsrl #6,%d1 |put precision in upper word
swap %d1
orl %d0,%d1 |set up for round call
clrl %d0 |force sticky to zero
bclrb #sign_bit,WBTEMP_EX(%a6)
sne WBTEMP_SGN(%a6) |use internal format for round
bsrl round |round result to users rmode & prec
bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
beq frcfpnr
bsetb #sign_bit,WBTEMP_EX(%a6)
bra frcfpnr
|
| Signs are alike:
|
add_same:
cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm?
bnes add_s_srcd
add_s_destd:
leal ETEMP(%a6),%a0
movel USER_FPCR(%a6),%d0
andil #0x30,%d0
lsrl #4,%d0 |put rmode in lower 2 bits
movel USER_FPCR(%a6),%d1
andil #0xc0,%d1
lsrl #6,%d1 |put precision in upper word
swap %d1
orl %d0,%d1 |set up for round call
movel #0x20000000,%d0 |set sticky for round
bclrb #sign_bit,ETEMP_EX(%a6)
sne ETEMP_SGN(%a6)
bsrl round |round result to users rmode & prec
bfclr ETEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
beqs add_s_dclr
bsetb #sign_bit,ETEMP_EX(%a6)
add_s_dclr:
leal WBTEMP(%a6),%a0
movel ETEMP(%a6),(%a0) |write result to wbtemp
movel ETEMP_HI(%a6),4(%a0)
movel ETEMP_LO(%a6),8(%a0)
tstw ETEMP_EX(%a6)
bgt add_ckovf
orl #neg_mask,USER_FPSR(%a6)
bra add_ckovf
add_s_srcd:
leal FPTEMP(%a6),%a0
movel USER_FPCR(%a6),%d0
andil #0x30,%d0
lsrl #4,%d0 |put rmode in lower 2 bits
movel USER_FPCR(%a6),%d1
andil #0xc0,%d1
lsrl #6,%d1 |put precision in upper word
swap %d1
orl %d0,%d1 |set up for round call
movel #0x20000000,%d0 |set sticky for round
bclrb #sign_bit,FPTEMP_EX(%a6)
sne FPTEMP_SGN(%a6)
bsrl round |round result to users rmode & prec
bfclr FPTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
beqs add_s_sclr
bsetb #sign_bit,FPTEMP_EX(%a6)
add_s_sclr:
leal WBTEMP(%a6),%a0
movel FPTEMP(%a6),(%a0) |write result to wbtemp
movel FPTEMP_HI(%a6),4(%a0)
movel FPTEMP_LO(%a6),8(%a0)
tstw FPTEMP_EX(%a6)
bgt add_ckovf
orl #neg_mask,USER_FPSR(%a6)
add_ckovf:
movew WBTEMP_EX(%a6),%d0
andiw #0x7fff,%d0
cmpiw #0x7fff,%d0
bne frcfpnr
|
| The result has overflowed to $7fff exponent. Set I, ovfl,
| and aovfl, and clr the mantissa (incorrectly set by the
| round routine.)
|
orl #inf_mask+ovfl_inx_mask,USER_FPSR(%a6)
clrl 4(%a0)
bra frcfpnr
|
| Inst is fsub.
|
wrap_sub:
cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm,
beq fix_stk |restore to fpu
|
| One of the ops is denormalized. Test for wrap condition
| and complete the instruction.
|
cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm
bnes sub_srcd
sub_destd:
bsrl ckinf_ns
bne fix_stk
bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos)
bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg)
subl %d1,%d0 |subtract src from dest
cmpl #0x8000,%d0
blt fix_stk |if less, not wrap case
bra sub_wrap
sub_srcd:
bsrl ckinf_nd
bne fix_stk
bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos)
bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg)
subl %d1,%d0 |subtract dest from src
cmpl #0x8000,%d0
blt fix_stk |if less, not wrap case
|
| Check the signs of the operands. If they are alike, the fpu
| can be used to subtract from the norm 1.0 with the sign of the
| denorm and it will correctly generate the result in extended
| precision. We can then call round with no sticky and the result
| will be correct for the user's rounding mode and precision. If
| the signs are unlike, we call round with the sticky bit set
| and the result will be correct for the user's rounding mode and
| precision.
|
sub_wrap:
movew ETEMP_EX(%a6),%d0
movew FPTEMP_EX(%a6),%d1
eorw %d1,%d0
andiw #0x8000,%d0
bne sub_diff
|
| The signs are alike.
|
cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm?
bnes sub_u_srcd
movew FPTEMP_EX(%a6),%d0
andiw #0x8000,%d0
orw #0x3fff,%d0 |force the exponent to +/- 1
movew %d0,FPTEMP_EX(%a6) |in the denorm
movel USER_FPCR(%a6),%d0
andil #0x30,%d0
fmovel %d0,%fpcr |set up users rmode and X
fmovex FPTEMP(%a6),%fp0
fsubx ETEMP(%a6),%fp0
fmovel %fpsr,%d1
orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd
leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame
fmovex %fp0,WBTEMP(%a6) |write result to memory
lsrl #4,%d0 |put rmode in lower 2 bits
movel USER_FPCR(%a6),%d1
andil #0xc0,%d1
lsrl #6,%d1 |put precision in upper word
swap %d1
orl %d0,%d1 |set up for round call
clrl %d0 |force sticky to zero
bclrb #sign_bit,WBTEMP_EX(%a6)
sne WBTEMP_SGN(%a6)
bsrl round |round result to users rmode & prec
bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
beq frcfpnr
bsetb #sign_bit,WBTEMP_EX(%a6)
bra frcfpnr
sub_u_srcd:
movew ETEMP_EX(%a6),%d0
andiw #0x8000,%d0
orw #0x3fff,%d0 |force the exponent to +/- 1
movew %d0,ETEMP_EX(%a6) |in the denorm
movel USER_FPCR(%a6),%d0
andil #0x30,%d0
fmovel %d0,%fpcr |set up users rmode and X
fmovex FPTEMP(%a6),%fp0
fsubx ETEMP(%a6),%fp0
fmovel %fpsr,%d1
orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd
leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame
fmovex %fp0,WBTEMP(%a6) |write result to memory
lsrl #4,%d0 |put rmode in lower 2 bits
movel USER_FPCR(%a6),%d1
andil #0xc0,%d1
lsrl #6,%d1 |put precision in upper word
swap %d1
orl %d0,%d1 |set up for round call
clrl %d0 |force sticky to zero
bclrb #sign_bit,WBTEMP_EX(%a6)
sne WBTEMP_SGN(%a6)
bsrl round |round result to users rmode & prec
bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
beq frcfpnr
bsetb #sign_bit,WBTEMP_EX(%a6)
bra frcfpnr
|
| Signs are unlike:
|
sub_diff:
cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm?
bnes sub_s_srcd
sub_s_destd:
leal ETEMP(%a6),%a0
movel USER_FPCR(%a6),%d0
andil #0x30,%d0
lsrl #4,%d0 |put rmode in lower 2 bits
movel USER_FPCR(%a6),%d1
andil #0xc0,%d1
lsrl #6,%d1 |put precision in upper word
swap %d1
orl %d0,%d1 |set up for round call
movel #0x20000000,%d0 |set sticky for round
|
| Since the dest is the denorm, the sign is the opposite of the
| norm sign.
|
eoriw #0x8000,ETEMP_EX(%a6) |flip sign on result
tstw ETEMP_EX(%a6)
bgts sub_s_dwr
orl #neg_mask,USER_FPSR(%a6)
sub_s_dwr:
bclrb #sign_bit,ETEMP_EX(%a6)
sne ETEMP_SGN(%a6)
bsrl round |round result to users rmode & prec
bfclr ETEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
beqs sub_s_dclr
bsetb #sign_bit,ETEMP_EX(%a6)
sub_s_dclr:
leal WBTEMP(%a6),%a0
movel ETEMP(%a6),(%a0) |write result to wbtemp
movel ETEMP_HI(%a6),4(%a0)
movel ETEMP_LO(%a6),8(%a0)
bra sub_ckovf
sub_s_srcd:
leal FPTEMP(%a6),%a0
movel USER_FPCR(%a6),%d0
andil #0x30,%d0
lsrl #4,%d0 |put rmode in lower 2 bits
movel USER_FPCR(%a6),%d1
andil #0xc0,%d1
lsrl #6,%d1 |put precision in upper word
swap %d1
orl %d0,%d1 |set up for round call
movel #0x20000000,%d0 |set sticky for round
bclrb #sign_bit,FPTEMP_EX(%a6)
sne FPTEMP_SGN(%a6)
bsrl round |round result to users rmode & prec
bfclr FPTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
beqs sub_s_sclr
bsetb #sign_bit,FPTEMP_EX(%a6)
sub_s_sclr:
leal WBTEMP(%a6),%a0
movel FPTEMP(%a6),(%a0) |write result to wbtemp
movel FPTEMP_HI(%a6),4(%a0)
movel FPTEMP_LO(%a6),8(%a0)
tstw FPTEMP_EX(%a6)
bgt sub_ckovf
orl #neg_mask,USER_FPSR(%a6)
sub_ckovf:
movew WBTEMP_EX(%a6),%d0
andiw #0x7fff,%d0
cmpiw #0x7fff,%d0
bne frcfpnr
|
| The result has overflowed to $7fff exponent. Set I, ovfl,
| and aovfl, and clr the mantissa (incorrectly set by the
| round routine.)
|
orl #inf_mask+ovfl_inx_mask,USER_FPSR(%a6)
clrl 4(%a0)
bra frcfpnr
|
| Inst is fcmp.
|
wrap_cmp:
cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm,
beq fix_stk |restore to fpu
|
| One of the ops is denormalized. Test for wrap condition
| and complete the instruction.
|
cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm
bnes cmp_srcd
cmp_destd:
bsrl ckinf_ns
bne fix_stk
bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos)
bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg)
subl %d1,%d0 |subtract dest from src
cmpl #0x8000,%d0
blt fix_stk |if less, not wrap case
tstw ETEMP_EX(%a6) |set N to ~sign_of(src)
bge cmp_setn
rts
cmp_srcd:
bsrl ckinf_nd
bne fix_stk
bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos)
bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg)
subl %d1,%d0 |subtract src from dest
cmpl #0x8000,%d0
blt fix_stk |if less, not wrap case
tstw FPTEMP_EX(%a6) |set N to sign_of(dest)
blt cmp_setn
rts
cmp_setn:
orl #neg_mask,USER_FPSR(%a6)
rts
 
|
| Inst is fmul.
|
wrap_mul:
cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm,
beq force_unf |force an underflow (really!)
|
| One of the ops is denormalized. Test for wrap condition
| and complete the instruction.
|
cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm
bnes mul_srcd
mul_destd:
bsrl ckinf_ns
bne fix_stk
bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos)
bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg)
addl %d1,%d0 |subtract dest from src
bgt fix_stk
bra force_unf
mul_srcd:
bsrl ckinf_nd
bne fix_stk
bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos)
bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg)
addl %d1,%d0 |subtract src from dest
bgt fix_stk
|
| This code handles the case of the instruction resulting in
| an underflow condition.
|
force_unf:
bclrb #E1,E_BYTE(%a6)
orl #unfinx_mask,USER_FPSR(%a6)
clrw NMNEXC(%a6)
clrb WBTEMP_SGN(%a6)
movew ETEMP_EX(%a6),%d0 |find the sign of the result
movew FPTEMP_EX(%a6),%d1
eorw %d1,%d0
andiw #0x8000,%d0
beqs frcunfcont
st WBTEMP_SGN(%a6)
frcunfcont:
lea WBTEMP(%a6),%a0 |point a0 to memory location
movew CMDREG1B(%a6),%d0
btstl #6,%d0 |test for forced precision
beqs frcunf_fpcr
btstl #2,%d0 |check for double
bnes frcunf_dbl
movel #0x1,%d0 |inst is forced single
bras frcunf_rnd
frcunf_dbl:
movel #0x2,%d0 |inst is forced double
bras frcunf_rnd
frcunf_fpcr:
bfextu FPCR_MODE(%a6){#0:#2},%d0 |inst not forced - use fpcr prec
frcunf_rnd:
bsrl unf_sub |get correct result based on
| ;round precision/mode. This
| ;sets FPSR_CC correctly
bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
beqs frcfpn
bsetb #sign_bit,WBTEMP_EX(%a6)
bra frcfpn
 
|
| Write the result to the user's fpn. All results must be HUGE to be
| written; otherwise the results would have overflowed or underflowed.
| If the rounding precision is single or double, the ovf_res routine
| is needed to correctly supply the max value.
|
frcfpnr:
movew CMDREG1B(%a6),%d0
btstl #6,%d0 |test for forced precision
beqs frcfpn_fpcr
btstl #2,%d0 |check for double
bnes frcfpn_dbl
movel #0x1,%d0 |inst is forced single
bras frcfpn_rnd
frcfpn_dbl:
movel #0x2,%d0 |inst is forced double
bras frcfpn_rnd
frcfpn_fpcr:
bfextu FPCR_MODE(%a6){#0:#2},%d0 |inst not forced - use fpcr prec
tstb %d0
beqs frcfpn |if extended, write what you got
frcfpn_rnd:
bclrb #sign_bit,WBTEMP_EX(%a6)
sne WBTEMP_SGN(%a6)
bsrl ovf_res |get correct result based on
| ;round precision/mode. This
| ;sets FPSR_CC correctly
bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format
beqs frcfpn_clr
bsetb #sign_bit,WBTEMP_EX(%a6)
frcfpn_clr:
orl #ovfinx_mask,USER_FPSR(%a6)
|
| Perform the write.
|
frcfpn:
bfextu CMDREG1B(%a6){#6:#3},%d0 |extract fp destination register
cmpib #3,%d0
bles frc0123 |check if dest is fp0-fp3
movel #7,%d1
subl %d0,%d1
clrl %d0
bsetl %d1,%d0
fmovemx WBTEMP(%a6),%d0
rts
frc0123:
cmpib #0,%d0
beqs frc0_dst
cmpib #1,%d0
beqs frc1_dst
cmpib #2,%d0
beqs frc2_dst
frc3_dst:
movel WBTEMP_EX(%a6),USER_FP3(%a6)
movel WBTEMP_HI(%a6),USER_FP3+4(%a6)
movel WBTEMP_LO(%a6),USER_FP3+8(%a6)
rts
frc2_dst:
movel WBTEMP_EX(%a6),USER_FP2(%a6)
movel WBTEMP_HI(%a6),USER_FP2+4(%a6)
movel WBTEMP_LO(%a6),USER_FP2+8(%a6)
rts
frc1_dst:
movel WBTEMP_EX(%a6),USER_FP1(%a6)
movel WBTEMP_HI(%a6),USER_FP1+4(%a6)
movel WBTEMP_LO(%a6),USER_FP1+8(%a6)
rts
frc0_dst:
movel WBTEMP_EX(%a6),USER_FP0(%a6)
movel WBTEMP_HI(%a6),USER_FP0+4(%a6)
movel WBTEMP_LO(%a6),USER_FP0+8(%a6)
rts
 
|
| Write etemp to fpn.
| A check is made on enabled and signalled snan exceptions,
| and the destination is not overwritten if this condition exists.
| This code is designed to make fmoveins of unsupported data types
| faster.
|
wr_etemp:
btstb #snan_bit,FPSR_EXCEPT(%a6) |if snan is set, and
beqs fmoveinc |enabled, force restore
btstb #snan_bit,FPCR_ENABLE(%a6) |and don't overwrite
beqs fmoveinc |the dest
movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for
| ;snan handler
tstb ETEMP(%a6) |check for negative
blts snan_neg
rts
snan_neg:
orl #neg_bit,USER_FPSR(%a6) |snan is negative; set N
rts
fmoveinc:
clrw NMNEXC(%a6)
bclrb #E1,E_BYTE(%a6)
moveb STAG(%a6),%d0 |check if stag is inf
andib #0xe0,%d0
cmpib #0x40,%d0
bnes fminc_cnan
orl #inf_mask,USER_FPSR(%a6) |if inf, nothing yet has set I
tstw LOCAL_EX(%a0) |check sign
bges fminc_con
orl #neg_mask,USER_FPSR(%a6)
bra fminc_con
fminc_cnan:
cmpib #0x60,%d0 |check if stag is NaN
bnes fminc_czero
orl #nan_mask,USER_FPSR(%a6) |if nan, nothing yet has set NaN
movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for
| ;snan handler
tstw LOCAL_EX(%a0) |check sign
bges fminc_con
orl #neg_mask,USER_FPSR(%a6)
bra fminc_con
fminc_czero:
cmpib #0x20,%d0 |check if zero
bnes fminc_con
orl #z_mask,USER_FPSR(%a6) |if zero, set Z
tstw LOCAL_EX(%a0) |check sign
bges fminc_con
orl #neg_mask,USER_FPSR(%a6)
fminc_con:
bfextu CMDREG1B(%a6){#6:#3},%d0 |extract fp destination register
cmpib #3,%d0
bles fp0123 |check if dest is fp0-fp3
movel #7,%d1
subl %d0,%d1
clrl %d0
bsetl %d1,%d0
fmovemx ETEMP(%a6),%d0
rts
 
fp0123:
cmpib #0,%d0
beqs fp0_dst
cmpib #1,%d0
beqs fp1_dst
cmpib #2,%d0
beqs fp2_dst
fp3_dst:
movel ETEMP_EX(%a6),USER_FP3(%a6)
movel ETEMP_HI(%a6),USER_FP3+4(%a6)
movel ETEMP_LO(%a6),USER_FP3+8(%a6)
rts
fp2_dst:
movel ETEMP_EX(%a6),USER_FP2(%a6)
movel ETEMP_HI(%a6),USER_FP2+4(%a6)
movel ETEMP_LO(%a6),USER_FP2+8(%a6)
rts
fp1_dst:
movel ETEMP_EX(%a6),USER_FP1(%a6)
movel ETEMP_HI(%a6),USER_FP1+4(%a6)
movel ETEMP_LO(%a6),USER_FP1+8(%a6)
rts
fp0_dst:
movel ETEMP_EX(%a6),USER_FP0(%a6)
movel ETEMP_HI(%a6),USER_FP0+4(%a6)
movel ETEMP_LO(%a6),USER_FP0+8(%a6)
rts
 
opclass3:
st CU_ONLY(%a6)
movew CMDREG1B(%a6),%d0 |check if packed moveout
andiw #0x0c00,%d0 |isolate last 2 bits of size field
cmpiw #0x0c00,%d0 |if size is 011 or 111, it is packed
beq pack_out |else it is norm or denorm
bra mv_out
 
|
| MOVE OUT
|
 
mv_tbl:
.long li
.long sgp
.long xp
.long mvout_end |should never be taken
.long wi
.long dp
.long bi
.long mvout_end |should never be taken
mv_out:
bfextu CMDREG1B(%a6){#3:#3},%d1 |put source specifier in d1
leal mv_tbl,%a0
movel %a0@(%d1:l:4),%a0
jmp (%a0)
 
|
| This exit is for move-out to memory. The aunfl bit is
| set if the result is inex and unfl is signalled.
|
mvout_end:
btstb #inex2_bit,FPSR_EXCEPT(%a6)
beqs no_aufl
btstb #unfl_bit,FPSR_EXCEPT(%a6)
beqs no_aufl
bsetb #aunfl_bit,FPSR_AEXCEPT(%a6)
no_aufl:
clrw NMNEXC(%a6)
bclrb #E1,E_BYTE(%a6)
fmovel #0,%FPSR |clear any cc bits from res_func
|
| Return ETEMP to extended format from internal extended format so
| that gen_except will have a correctly signed value for ovfl/unfl
| handlers.
|
bfclr ETEMP_SGN(%a6){#0:#8}
beqs mvout_con
bsetb #sign_bit,ETEMP_EX(%a6)
mvout_con:
rts
|
| This exit is for move-out to int register. The aunfl bit is
| not set in any case for this move.
|
mvouti_end:
clrw NMNEXC(%a6)
bclrb #E1,E_BYTE(%a6)
fmovel #0,%FPSR |clear any cc bits from res_func
|
| Return ETEMP to extended format from internal extended format so
| that gen_except will have a correctly signed value for ovfl/unfl
| handlers.
|
bfclr ETEMP_SGN(%a6){#0:#8}
beqs mvouti_con
bsetb #sign_bit,ETEMP_EX(%a6)
mvouti_con:
rts
|
| li is used to handle a long integer source specifier
|
 
li:
moveql #4,%d0 |set byte count
 
btstb #7,STAG(%a6) |check for extended denorm
bne int_dnrm |if so, branch
 
fmovemx ETEMP(%a6),%fp0-%fp0
fcmpd #0x41dfffffffc00000,%fp0
| 41dfffffffc00000 in dbl prec = 401d0000fffffffe00000000 in ext prec
fbge lo_plrg
fcmpd #0xc1e0000000000000,%fp0
| c1e0000000000000 in dbl prec = c01e00008000000000000000 in ext prec
fble lo_nlrg
|
| at this point, the answer is between the largest pos and neg values
|
movel USER_FPCR(%a6),%d1 |use user's rounding mode
andil #0x30,%d1
fmovel %d1,%fpcr
fmovel %fp0,L_SCR1(%a6) |let the 040 perform conversion
fmovel %fpsr,%d1
orl %d1,USER_FPSR(%a6) |capture inex2/ainex if set
bra int_wrt
 
 
lo_plrg:
movel #0x7fffffff,L_SCR1(%a6) |answer is largest positive int
fbeq int_wrt |exact answer
fcmpd #0x41dfffffffe00000,%fp0
| 41dfffffffe00000 in dbl prec = 401d0000ffffffff00000000 in ext prec
fbge int_operr |set operr
bra int_inx |set inexact
 
lo_nlrg:
movel #0x80000000,L_SCR1(%a6)
fbeq int_wrt |exact answer
fcmpd #0xc1e0000000100000,%fp0
| c1e0000000100000 in dbl prec = c01e00008000000080000000 in ext prec
fblt int_operr |set operr
bra int_inx |set inexact
 
|
| wi is used to handle a word integer source specifier
|
 
wi:
moveql #2,%d0 |set byte count
 
btstb #7,STAG(%a6) |check for extended denorm
bne int_dnrm |branch if so
 
fmovemx ETEMP(%a6),%fp0-%fp0
fcmps #0x46fffe00,%fp0
| 46fffe00 in sgl prec = 400d0000fffe000000000000 in ext prec
fbge wo_plrg
fcmps #0xc7000000,%fp0
| c7000000 in sgl prec = c00e00008000000000000000 in ext prec
fble wo_nlrg
 
|
| at this point, the answer is between the largest pos and neg values
|
movel USER_FPCR(%a6),%d1 |use user's rounding mode
andil #0x30,%d1
fmovel %d1,%fpcr
fmovew %fp0,L_SCR1(%a6) |let the 040 perform conversion
fmovel %fpsr,%d1
orl %d1,USER_FPSR(%a6) |capture inex2/ainex if set
bra int_wrt
 
wo_plrg:
movew #0x7fff,L_SCR1(%a6) |answer is largest positive int
fbeq int_wrt |exact answer
fcmps #0x46ffff00,%fp0
| 46ffff00 in sgl prec = 400d0000ffff000000000000 in ext prec
fbge int_operr |set operr
bra int_inx |set inexact
 
wo_nlrg:
movew #0x8000,L_SCR1(%a6)
fbeq int_wrt |exact answer
fcmps #0xc7000080,%fp0
| c7000080 in sgl prec = c00e00008000800000000000 in ext prec
fblt int_operr |set operr
bra int_inx |set inexact
 
|
| bi is used to handle a byte integer source specifier
|
 
bi:
moveql #1,%d0 |set byte count
 
btstb #7,STAG(%a6) |check for extended denorm
bne int_dnrm |branch if so
 
fmovemx ETEMP(%a6),%fp0-%fp0
fcmps #0x42fe0000,%fp0
| 42fe0000 in sgl prec = 40050000fe00000000000000 in ext prec
fbge by_plrg
fcmps #0xc3000000,%fp0
| c3000000 in sgl prec = c00600008000000000000000 in ext prec
fble by_nlrg
 
|
| at this point, the answer is between the largest pos and neg values
|
movel USER_FPCR(%a6),%d1 |use user's rounding mode
andil #0x30,%d1
fmovel %d1,%fpcr
fmoveb %fp0,L_SCR1(%a6) |let the 040 perform conversion
fmovel %fpsr,%d1
orl %d1,USER_FPSR(%a6) |capture inex2/ainex if set
bra int_wrt
 
by_plrg:
moveb #0x7f,L_SCR1(%a6) |answer is largest positive int
fbeq int_wrt |exact answer
fcmps #0x42ff0000,%fp0
| 42ff0000 in sgl prec = 40050000ff00000000000000 in ext prec
fbge int_operr |set operr
bra int_inx |set inexact
 
by_nlrg:
moveb #0x80,L_SCR1(%a6)
fbeq int_wrt |exact answer
fcmps #0xc3008000,%fp0
| c3008000 in sgl prec = c00600008080000000000000 in ext prec
fblt int_operr |set operr
bra int_inx |set inexact
 
|
| Common integer routines
|
| int_drnrm---account for possible nonzero result for round up with positive
| operand and round down for negative answer. In the first case (result = 1)
| byte-width (store in d0) of result must be honored. In the second case,
| -1 in L_SCR1(a6) will cover all contingencies (FMOVE.B/W/L out).
 
int_dnrm:
movel #0,L_SCR1(%a6) | initialize result to 0
bfextu FPCR_MODE(%a6){#2:#2},%d1 | d1 is the rounding mode
cmpb #2,%d1
bmis int_inx | if RN or RZ, done
bnes int_rp | if RP, continue below
tstw ETEMP(%a6) | RM: store -1 in L_SCR1 if src is negative
bpls int_inx | otherwise result is 0
movel #-1,L_SCR1(%a6)
bras int_inx
int_rp:
tstw ETEMP(%a6) | RP: store +1 of proper width in L_SCR1 if
| ; source is greater than 0
bmis int_inx | otherwise, result is 0
lea L_SCR1(%a6),%a1 | a1 is address of L_SCR1
addal %d0,%a1 | offset by destination width -1
subal #1,%a1
bsetb #0,(%a1) | set low bit at a1 address
int_inx:
oril #inx2a_mask,USER_FPSR(%a6)
bras int_wrt
int_operr:
fmovemx %fp0-%fp0,FPTEMP(%a6) |FPTEMP must contain the extended
| ;precision source that needs to be
| ;converted to integer this is required
| ;if the operr exception is enabled.
| ;set operr/aiop (no inex2 on int ovfl)
 
oril #opaop_mask,USER_FPSR(%a6)
| ;fall through to perform int_wrt
int_wrt:
movel EXC_EA(%a6),%a1 |load destination address
tstl %a1 |check to see if it is a dest register
beqs wrt_dn |write data register
lea L_SCR1(%a6),%a0 |point to supervisor source address
bsrl mem_write
bra mvouti_end
 
wrt_dn:
movel %d0,-(%sp) |d0 currently contains the size to write
bsrl get_fline |get_fline returns Dn in d0
andiw #0x7,%d0 |isolate register
movel (%sp)+,%d1 |get size
cmpil #4,%d1 |most frequent case
beqs sz_long
cmpil #2,%d1
bnes sz_con
orl #8,%d0 |add 'word' size to register#
bras sz_con
sz_long:
orl #0x10,%d0 |add 'long' size to register#
sz_con:
movel %d0,%d1 |reg_dest expects size:reg in d1
bsrl reg_dest |load proper data register
bra mvouti_end
xp:
lea ETEMP(%a6),%a0
bclrb #sign_bit,LOCAL_EX(%a0)
sne LOCAL_SGN(%a0)
btstb #7,STAG(%a6) |check for extended denorm
bne xdnrm
clrl %d0
bras do_fp |do normal case
sgp:
lea ETEMP(%a6),%a0
bclrb #sign_bit,LOCAL_EX(%a0)
sne LOCAL_SGN(%a0)
btstb #7,STAG(%a6) |check for extended denorm
bne sp_catas |branch if so
movew LOCAL_EX(%a0),%d0
lea sp_bnds,%a1
cmpw (%a1),%d0
blt sp_under
cmpw 2(%a1),%d0
bgt sp_over
movel #1,%d0 |set destination format to single
bras do_fp |do normal case
dp:
lea ETEMP(%a6),%a0
bclrb #sign_bit,LOCAL_EX(%a0)
sne LOCAL_SGN(%a0)
 
btstb #7,STAG(%a6) |check for extended denorm
bne dp_catas |branch if so
 
movew LOCAL_EX(%a0),%d0
lea dp_bnds,%a1
 
cmpw (%a1),%d0
blt dp_under
cmpw 2(%a1),%d0
bgt dp_over
movel #2,%d0 |set destination format to double
| ;fall through to do_fp
|
do_fp:
bfextu FPCR_MODE(%a6){#2:#2},%d1 |rnd mode in d1
swap %d0 |rnd prec in upper word
addl %d0,%d1 |d1 has PREC/MODE info
clrl %d0 |clear g,r,s
 
bsrl round |round
 
movel %a0,%a1
movel EXC_EA(%a6),%a0
 
bfextu CMDREG1B(%a6){#3:#3},%d1 |extract destination format
| ;at this point only the dest
| ;formats sgl, dbl, ext are
| ;possible
cmpb #2,%d1
bgts ddbl |double=5, extended=2, single=1
bnes dsgl
| ;fall through to dext
dext:
bsrl dest_ext
bra mvout_end
dsgl:
bsrl dest_sgl
bra mvout_end
ddbl:
bsrl dest_dbl
bra mvout_end
 
|
| Handle possible denorm or catastrophic underflow cases here
|
xdnrm:
bsr set_xop |initialize WBTEMP
bsetb #wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15
 
movel %a0,%a1
movel EXC_EA(%a6),%a0 |a0 has the destination pointer
bsrl dest_ext |store to memory
bsetb #unfl_bit,FPSR_EXCEPT(%a6)
bra mvout_end
sp_under:
bsetb #etemp15_bit,STAG(%a6)
 
cmpw 4(%a1),%d0
blts sp_catas |catastrophic underflow case
 
movel #1,%d0 |load in round precision
movel #sgl_thresh,%d1 |load in single denorm threshold
bsrl dpspdnrm |expects d1 to have the proper
| ;denorm threshold
bsrl dest_sgl |stores value to destination
bsetb #unfl_bit,FPSR_EXCEPT(%a6)
bra mvout_end |exit
 
dp_under:
bsetb #etemp15_bit,STAG(%a6)
 
cmpw 4(%a1),%d0
blts dp_catas |catastrophic underflow case
movel #dbl_thresh,%d1 |load in double precision threshold
movel #2,%d0
bsrl dpspdnrm |expects d1 to have proper
| ;denorm threshold
| ;expects d0 to have round precision
bsrl dest_dbl |store value to destination
bsetb #unfl_bit,FPSR_EXCEPT(%a6)
bra mvout_end |exit
 
|
| Handle catastrophic underflow cases here
|
sp_catas:
| Temp fix for z bit set in unf_sub
movel USER_FPSR(%a6),-(%a7)
 
movel #1,%d0 |set round precision to sgl
 
bsrl unf_sub |a0 points to result
 
movel (%a7)+,USER_FPSR(%a6)
 
movel #1,%d0
subw %d0,LOCAL_EX(%a0) |account for difference between
| ;denorm/norm bias
 
movel %a0,%a1 |a1 has the operand input
movel EXC_EA(%a6),%a0 |a0 has the destination pointer
bsrl dest_sgl |store the result
oril #unfinx_mask,USER_FPSR(%a6)
bra mvout_end
dp_catas:
| Temp fix for z bit set in unf_sub
movel USER_FPSR(%a6),-(%a7)
 
movel #2,%d0 |set round precision to dbl
bsrl unf_sub |a0 points to result
 
movel (%a7)+,USER_FPSR(%a6)
 
movel #1,%d0
subw %d0,LOCAL_EX(%a0) |account for difference between
| ;denorm/norm bias
 
movel %a0,%a1 |a1 has the operand input
movel EXC_EA(%a6),%a0 |a0 has the destination pointer
bsrl dest_dbl |store the result
oril #unfinx_mask,USER_FPSR(%a6)
bra mvout_end
 
|
| Handle catastrophic overflow cases here
|
sp_over:
| Temp fix for z bit set in unf_sub
movel USER_FPSR(%a6),-(%a7)
 
movel #1,%d0
leal FP_SCR1(%a6),%a0 |use FP_SCR1 for creating result
movel ETEMP_EX(%a6),(%a0)
movel ETEMP_HI(%a6),4(%a0)
movel ETEMP_LO(%a6),8(%a0)
bsrl ovf_res
 
movel (%a7)+,USER_FPSR(%a6)
 
movel %a0,%a1
movel EXC_EA(%a6),%a0
bsrl dest_sgl
orl #ovfinx_mask,USER_FPSR(%a6)
bra mvout_end
 
dp_over:
| Temp fix for z bit set in ovf_res
movel USER_FPSR(%a6),-(%a7)
 
movel #2,%d0
leal FP_SCR1(%a6),%a0 |use FP_SCR1 for creating result
movel ETEMP_EX(%a6),(%a0)
movel ETEMP_HI(%a6),4(%a0)
movel ETEMP_LO(%a6),8(%a0)
bsrl ovf_res
 
movel (%a7)+,USER_FPSR(%a6)
 
movel %a0,%a1
movel EXC_EA(%a6),%a0
bsrl dest_dbl
orl #ovfinx_mask,USER_FPSR(%a6)
bra mvout_end
 
|
| DPSPDNRM
|
| This subroutine takes an extended normalized number and denormalizes
| it to the given round precision. This subroutine also decrements
| the input operand's exponent by 1 to account for the fact that
| dest_sgl or dest_dbl expects a normalized number's bias.
|
| Input: a0 points to a normalized number in internal extended format
| d0 is the round precision (=1 for sgl; =2 for dbl)
| d1 is the single precision or double precision
| denorm threshold
|
| Output: (In the format for dest_sgl or dest_dbl)
| a0 points to the destination
| a1 points to the operand
|
| Exceptions: Reports inexact 2 exception by setting USER_FPSR bits
|
dpspdnrm:
movel %d0,-(%a7) |save round precision
clrl %d0 |clear initial g,r,s
bsrl dnrm_lp |careful with d0, it's needed by round
 
bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rounding mode
swap %d1
movew 2(%a7),%d1 |set rounding precision
swap %d1 |at this point d1 has PREC/MODE info
bsrl round |round result, sets the inex bit in
| ;USER_FPSR if needed
 
movew #1,%d0
subw %d0,LOCAL_EX(%a0) |account for difference in denorm
| ;vs norm bias
 
movel %a0,%a1 |a1 has the operand input
movel EXC_EA(%a6),%a0 |a0 has the destination pointer
addw #4,%a7 |pop stack
rts
|
| SET_XOP initialized WBTEMP with the value pointed to by a0
| input: a0 points to input operand in the internal extended format
|
set_xop:
movel LOCAL_EX(%a0),WBTEMP_EX(%a6)
movel LOCAL_HI(%a0),WBTEMP_HI(%a6)
movel LOCAL_LO(%a0),WBTEMP_LO(%a6)
bfclr WBTEMP_SGN(%a6){#0:#8}
beqs sxop
bsetb #sign_bit,WBTEMP_EX(%a6)
sxop:
bfclr STAG(%a6){#5:#4} |clear wbtm66,wbtm1,wbtm0,sbit
rts
|
| P_MOVE
|
p_movet:
.long p_move
.long p_movez
.long p_movei
.long p_moven
.long p_move
p_regd:
.long p_dyd0
.long p_dyd1
.long p_dyd2
.long p_dyd3
.long p_dyd4
.long p_dyd5
.long p_dyd6
.long p_dyd7
 
pack_out:
leal p_movet,%a0 |load jmp table address
movew STAG(%a6),%d0 |get source tag
bfextu %d0{#16:#3},%d0 |isolate source bits
movel (%a0,%d0.w*4),%a0 |load a0 with routine label for tag
jmp (%a0) |go to the routine
 
p_write:
movel #0x0c,%d0 |get byte count
movel EXC_EA(%a6),%a1 |get the destination address
bsr mem_write |write the user's destination
moveb #0,CU_SAVEPC(%a6) |set the cu save pc to all 0's
 
|
| Also note that the dtag must be set to norm here - this is because
| the 040 uses the dtag to execute the correct microcode.
|
bfclr DTAG(%a6){#0:#3} |set dtag to norm
 
rts
 
| Notes on handling of special case (zero, inf, and nan) inputs:
| 1. Operr is not signalled if the k-factor is greater than 18.
| 2. Per the manual, status bits are not set.
|
 
p_move:
movew CMDREG1B(%a6),%d0
btstl #kfact_bit,%d0 |test for dynamic k-factor
beqs statick |if clear, k-factor is static
dynamick:
bfextu %d0{#25:#3},%d0 |isolate register for dynamic k-factor
lea p_regd,%a0
movel %a0@(%d0:l:4),%a0
jmp (%a0)
statick:
andiw #0x007f,%d0 |get k-factor
bfexts %d0{#25:#7},%d0 |sign extend d0 for bindec
leal ETEMP(%a6),%a0 |a0 will point to the packed decimal
bsrl bindec |perform the convert; data at a6
leal FP_SCR1(%a6),%a0 |load a0 with result address
bral p_write
p_movez:
leal ETEMP(%a6),%a0 |a0 will point to the packed decimal
clrw 2(%a0) |clear lower word of exp
clrl 4(%a0) |load second lword of ZERO
clrl 8(%a0) |load third lword of ZERO
bra p_write |go write results
p_movei:
fmovel #0,%FPSR |clear aiop
leal ETEMP(%a6),%a0 |a0 will point to the packed decimal
clrw 2(%a0) |clear lower word of exp
bra p_write |go write the result
p_moven:
leal ETEMP(%a6),%a0 |a0 will point to the packed decimal
clrw 2(%a0) |clear lower word of exp
bra p_write |go write the result
 
|
| Routines to read the dynamic k-factor from Dn.
|
p_dyd0:
movel USER_D0(%a6),%d0
bras statick
p_dyd1:
movel USER_D1(%a6),%d0
bras statick
p_dyd2:
movel %d2,%d0
bras statick
p_dyd3:
movel %d3,%d0
bras statick
p_dyd4:
movel %d4,%d0
bras statick
p_dyd5:
movel %d5,%d0
bras statick
p_dyd6:
movel %d6,%d0
bra statick
p_dyd7:
movel %d7,%d0
bra statick
 
|end
/stan.S
0,0 → 1,455
|
| stan.sa 3.3 7/29/91
|
| The entry point stan computes the tangent of
| an input argument;
| stand does the same except for denormalized input.
|
| Input: Double-extended number X in location pointed to
| by address register a0.
|
| Output: The value tan(X) returned in floating-point register Fp0.
|
| Accuracy and Monotonicity: The returned result is within 3 ulp in
| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
| result is subsequently rounded to double precision. The
| result is provably monotonic in double precision.
|
| Speed: The program sTAN takes approximately 170 cycles for
| input argument X such that |X| < 15Pi, which is the usual
| situation.
|
| Algorithm:
|
| 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
|
| 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
| k = N mod 2, so in particular, k = 0 or 1.
|
| 3. If k is odd, go to 5.
|
| 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a
| rational function U/V where
| U = r + r*s*(P1 + s*(P2 + s*P3)), and
| V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r.
| Exit.
|
| 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a
| rational function U/V where
| U = r + r*s*(P1 + s*(P2 + s*P3)), and
| V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,
| -Cot(r) = -V/U. Exit.
|
| 6. If |X| > 1, go to 8.
|
| 7. (|X|<2**(-40)) Tan(X) = X. Exit.
|
| 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|STAN idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
BOUNDS1: .long 0x3FD78000,0x4004BC7E
TWOBYPI: .long 0x3FE45F30,0x6DC9C883
 
TANQ4: .long 0x3EA0B759,0xF50F8688
TANP3: .long 0xBEF2BAA5,0xA8924F04
 
TANQ3: .long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000
 
TANP2: .long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
 
TANQ2: .long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
 
TANP1: .long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
 
TANQ1: .long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
 
INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
 
TWOPI1: .long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
TWOPI2: .long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
 
|--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
|--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
|--MOST 69 BITS LONG.
.global PITBL
PITBL:
.long 0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
.long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
.long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
.long 0xC0040000,0xB6365E22,0xEE46F000,0x21480000
.long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
.long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
.long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
.long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
.long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
.long 0xC0040000,0x90836524,0x88034B96,0x20B00000
.long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
.long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
.long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
.long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
.long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
.long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
.long 0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
.long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
.long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
.long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
.long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
.long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
.long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
.long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
.long 0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
.long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
.long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
.long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
.long 0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
.long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
.long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
.long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
.long 0x00000000,0x00000000,0x00000000,0x00000000
.long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
.long 0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
.long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
.long 0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
.long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
.long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
.long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
.long 0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
.long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
.long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
.long 0x40030000,0x8A3AE64F,0x76F80584,0x21080000
.long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
.long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
.long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
.long 0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
.long 0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
.long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
.long 0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
.long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
.long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
.long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
.long 0x40040000,0x8A3AE64F,0x76F80584,0x21880000
.long 0x40040000,0x90836524,0x88034B96,0xA0B00000
.long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
.long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
.long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
.long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
.long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
.long 0x40040000,0xB6365E22,0xEE46F000,0xA1480000
.long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
.long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
.long 0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
 
.set INARG,FP_SCR4
 
.set TWOTO63,L_SCR1
.set ENDFLAG,L_SCR2
.set N,L_SCR3
 
| xref t_frcinx
|xref t_extdnrm
 
.global stand
stand:
|--TAN(X) = X FOR DENORMALIZED X
 
bra t_extdnrm
 
.global stan
stan:
fmovex (%a0),%fp0 | ...LOAD INPUT
 
movel (%a0),%d0
movew 4(%a0),%d0
andil #0x7FFFFFFF,%d0
 
cmpil #0x3FD78000,%d0 | ...|X| >= 2**(-40)?
bges TANOK1
bra TANSM
TANOK1:
cmpil #0x4004BC7E,%d0 | ...|X| < 15 PI?
blts TANMAIN
bra REDUCEX
 
 
TANMAIN:
|--THIS IS THE USUAL CASE, |X| <= 15 PI.
|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
fmovex %fp0,%fp1
fmuld TWOBYPI,%fp1 | ...X*2/PI
 
|--HIDE THE NEXT TWO INSTRUCTIONS
leal PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
 
|--FP1 IS NOW READY
fmovel %fp1,%d0 | ...CONVERT TO INTEGER
 
asll #4,%d0
addal %d0,%a1 | ...ADDRESS N*PIBY2 IN Y1, Y2
 
fsubx (%a1)+,%fp0 | ...X-Y1
|--HIDE THE NEXT ONE
 
fsubs (%a1),%fp0 | ...FP0 IS R = (X-Y1)-Y2
 
rorl #5,%d0
andil #0x80000000,%d0 | ...D0 WAS ODD IFF D0 < 0
 
TANCONT:
 
cmpil #0,%d0
blt NODD
 
fmovex %fp0,%fp1
fmulx %fp1,%fp1 | ...S = R*R
 
fmoved TANQ4,%fp3
fmoved TANP3,%fp2
 
fmulx %fp1,%fp3 | ...SQ4
fmulx %fp1,%fp2 | ...SP3
 
faddd TANQ3,%fp3 | ...Q3+SQ4
faddx TANP2,%fp2 | ...P2+SP3
 
fmulx %fp1,%fp3 | ...S(Q3+SQ4)
fmulx %fp1,%fp2 | ...S(P2+SP3)
 
faddx TANQ2,%fp3 | ...Q2+S(Q3+SQ4)
faddx TANP1,%fp2 | ...P1+S(P2+SP3)
 
fmulx %fp1,%fp3 | ...S(Q2+S(Q3+SQ4))
fmulx %fp1,%fp2 | ...S(P1+S(P2+SP3))
 
faddx TANQ1,%fp3 | ...Q1+S(Q2+S(Q3+SQ4))
fmulx %fp0,%fp2 | ...RS(P1+S(P2+SP3))
 
fmulx %fp3,%fp1 | ...S(Q1+S(Q2+S(Q3+SQ4)))
 
faddx %fp2,%fp0 | ...R+RS(P1+S(P2+SP3))
 
fadds #0x3F800000,%fp1 | ...1+S(Q1+...)
 
fmovel %d1,%fpcr |restore users exceptions
fdivx %fp1,%fp0 |last inst - possible exception set
 
bra t_frcinx
 
NODD:
fmovex %fp0,%fp1
fmulx %fp0,%fp0 | ...S = R*R
 
fmoved TANQ4,%fp3
fmoved TANP3,%fp2
 
fmulx %fp0,%fp3 | ...SQ4
fmulx %fp0,%fp2 | ...SP3
 
faddd TANQ3,%fp3 | ...Q3+SQ4
faddx TANP2,%fp2 | ...P2+SP3
 
fmulx %fp0,%fp3 | ...S(Q3+SQ4)
fmulx %fp0,%fp2 | ...S(P2+SP3)
 
faddx TANQ2,%fp3 | ...Q2+S(Q3+SQ4)
faddx TANP1,%fp2 | ...P1+S(P2+SP3)
 
fmulx %fp0,%fp3 | ...S(Q2+S(Q3+SQ4))
fmulx %fp0,%fp2 | ...S(P1+S(P2+SP3))
 
faddx TANQ1,%fp3 | ...Q1+S(Q2+S(Q3+SQ4))
fmulx %fp1,%fp2 | ...RS(P1+S(P2+SP3))
 
fmulx %fp3,%fp0 | ...S(Q1+S(Q2+S(Q3+SQ4)))
 
faddx %fp2,%fp1 | ...R+RS(P1+S(P2+SP3))
fadds #0x3F800000,%fp0 | ...1+S(Q1+...)
 
fmovex %fp1,-(%sp)
eoril #0x80000000,(%sp)
 
fmovel %d1,%fpcr |restore users exceptions
fdivx (%sp)+,%fp0 |last inst - possible exception set
 
bra t_frcinx
 
TANBORS:
|--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
|--IF |X| < 2**(-40), RETURN X OR 1.
cmpil #0x3FFF8000,%d0
bgts REDUCEX
 
TANSM:
 
fmovex %fp0,-(%sp)
fmovel %d1,%fpcr |restore users exceptions
fmovex (%sp)+,%fp0 |last inst - possible exception set
 
bra t_frcinx
 
 
REDUCEX:
|--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
|--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
|--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
 
fmovemx %fp2-%fp5,-(%a7) | ...save FP2 through FP5
movel %d2,-(%a7)
fmoves #0x00000000,%fp1
 
|--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
|--there is a danger of unwanted overflow in first LOOP iteration. In this
|--case, reduce argument by one remainder step to make subsequent reduction
|--safe.
cmpil #0x7ffeffff,%d0 |is argument dangerously large?
bnes LOOP
movel #0x7ffe0000,FP_SCR2(%a6) |yes
| ;create 2**16383*PI/2
movel #0xc90fdaa2,FP_SCR2+4(%a6)
clrl FP_SCR2+8(%a6)
ftstx %fp0 |test sign of argument
movel #0x7fdc0000,FP_SCR3(%a6) |create low half of 2**16383*
| ;PI/2 at FP_SCR3
movel #0x85a308d3,FP_SCR3+4(%a6)
clrl FP_SCR3+8(%a6)
fblt red_neg
orw #0x8000,FP_SCR2(%a6) |positive arg
orw #0x8000,FP_SCR3(%a6)
red_neg:
faddx FP_SCR2(%a6),%fp0 |high part of reduction is exact
fmovex %fp0,%fp1 |save high result in fp1
faddx FP_SCR3(%a6),%fp0 |low part of reduction
fsubx %fp0,%fp1 |determine low component of result
faddx FP_SCR3(%a6),%fp1 |fp0/fp1 are reduced argument.
 
|--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
|--integer quotient will be stored in N
|--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
 
LOOP:
fmovex %fp0,INARG(%a6) | ...+-2**K * F, 1 <= F < 2
movew INARG(%a6),%d0
movel %d0,%a1 | ...save a copy of D0
andil #0x00007FFF,%d0
subil #0x00003FFF,%d0 | ...D0 IS K
cmpil #28,%d0
bles LASTLOOP
CONTLOOP:
subil #27,%d0 | ...D0 IS L := K-27
movel #0,ENDFLAG(%a6)
bras WORK
LASTLOOP:
clrl %d0 | ...D0 IS L := 0
movel #1,ENDFLAG(%a6)
 
WORK:
|--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
|--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
 
|--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
|--2**L * (PIby2_1), 2**L * (PIby2_2)
 
movel #0x00003FFE,%d2 | ...BIASED EXPO OF 2/PI
subl %d0,%d2 | ...BIASED EXPO OF 2**(-L)*(2/PI)
 
movel #0xA2F9836E,FP_SCR1+4(%a6)
movel #0x4E44152A,FP_SCR1+8(%a6)
movew %d2,FP_SCR1(%a6) | ...FP_SCR1 is 2**(-L)*(2/PI)
 
fmovex %fp0,%fp2
fmulx FP_SCR1(%a6),%fp2
|--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
|--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
|--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
|--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
|--US THE DESIRED VALUE IN FLOATING POINT.
 
|--HIDE SIX CYCLES OF INSTRUCTION
movel %a1,%d2
swap %d2
andil #0x80000000,%d2
oril #0x5F000000,%d2 | ...D2 IS SIGN(INARG)*2**63 IN SGL
movel %d2,TWOTO63(%a6)
 
movel %d0,%d2
addil #0x00003FFF,%d2 | ...BIASED EXPO OF 2**L * (PI/2)
 
|--FP2 IS READY
fadds TWOTO63(%a6),%fp2 | ...THE FRACTIONAL PART OF FP1 IS ROUNDED
 
|--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2
movew %d2,FP_SCR2(%a6)
clrw FP_SCR2+2(%a6)
movel #0xC90FDAA2,FP_SCR2+4(%a6)
clrl FP_SCR2+8(%a6) | ...FP_SCR2 is 2**(L) * Piby2_1
 
|--FP2 IS READY
fsubs TWOTO63(%a6),%fp2 | ...FP2 is N
 
addil #0x00003FDD,%d0
movew %d0,FP_SCR3(%a6)
clrw FP_SCR3+2(%a6)
movel #0x85A308D3,FP_SCR3+4(%a6)
clrl FP_SCR3+8(%a6) | ...FP_SCR3 is 2**(L) * Piby2_2
 
movel ENDFLAG(%a6),%d0
 
|--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
|--P2 = 2**(L) * Piby2_2
fmovex %fp2,%fp4
fmulx FP_SCR2(%a6),%fp4 | ...W = N*P1
fmovex %fp2,%fp5
fmulx FP_SCR3(%a6),%fp5 | ...w = N*P2
fmovex %fp4,%fp3
|--we want P+p = W+w but |p| <= half ulp of P
|--Then, we need to compute A := R-P and a := r-p
faddx %fp5,%fp3 | ...FP3 is P
fsubx %fp3,%fp4 | ...W-P
 
fsubx %fp3,%fp0 | ...FP0 is A := R - P
faddx %fp5,%fp4 | ...FP4 is p = (W-P)+w
 
fmovex %fp0,%fp3 | ...FP3 A
fsubx %fp4,%fp1 | ...FP1 is a := r - p
 
|--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
|--|r| <= half ulp of R.
faddx %fp1,%fp0 | ...FP0 is R := A+a
|--No need to calculate r if this is the last loop
cmpil #0,%d0
bgt RESTORE
 
|--Need to calculate r
fsubx %fp0,%fp3 | ...A-R
faddx %fp3,%fp1 | ...FP1 is r := (A-R)+a
bra LOOP
 
RESTORE:
fmovel %fp2,N(%a6)
movel (%a7)+,%d2
fmovemx (%a7)+,%fp2-%fp5
 
movel N(%a6),%d0
rorl #1,%d0
 
 
bra TANCONT
 
|end
/ssin.S
0,0 → 1,746
|
| ssin.sa 3.3 7/29/91
|
| The entry point sSIN computes the sine of an input argument
| sCOS computes the cosine, and sSINCOS computes both. The
| corresponding entry points with a "d" computes the same
| corresponding function values for denormalized inputs.
|
| Input: Double-extended number X in location pointed to
| by address register a0.
|
| Output: The function value sin(X) or cos(X) returned in Fp0 if SIN or
| COS is requested. Otherwise, for SINCOS, sin(X) is returned
| in Fp0, and cos(X) is returned in Fp1.
|
| Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS.
|
| Accuracy and Monotonicity: The returned result is within 1 ulp in
| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
| result is subsequently rounded to double precision. The
| result is provably monotonic in double precision.
|
| Speed: The programs sSIN and sCOS take approximately 150 cycles for
| input argument X such that |X| < 15Pi, which is the usual
| situation. The speed for sSINCOS is approximately 190 cycles.
|
| Algorithm:
|
| SIN and COS:
| 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.
|
| 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.
|
| 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
| k = N mod 4, so in particular, k = 0,1,2,or 3. Overwrite
| k by k := k + AdjN.
|
| 4. If k is even, go to 6.
|
| 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r)
| where cos(r) is approximated by an even polynomial in r,
| 1 + r*r*(B1+s*(B2+ ... + s*B8)), s = r*r.
| Exit.
|
| 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)
| where sin(r) is approximated by an odd polynomial in r
| r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r.
| Exit.
|
| 7. If |X| > 1, go to 9.
|
| 8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1.
|
| 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3.
|
| SINCOS:
| 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
|
| 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
| k = N mod 4, so in particular, k = 0,1,2,or 3.
|
| 3. If k is even, go to 5.
|
| 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e.
| j1 exclusive or with the l.s.b. of k.
| sgn1 := (-1)**j1, sgn2 := (-1)**j2.
| SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where
| sin(r) and cos(r) are computed as odd and even polynomials
| in r, respectively. Exit
|
| 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.
| SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where
| sin(r) and cos(r) are computed as odd and even polynomials
| in r, respectively. Exit
|
| 6. If |X| > 1, go to 8.
|
| 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.
|
| 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|SSIN idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
BOUNDS1: .long 0x3FD78000,0x4004BC7E
TWOBYPI: .long 0x3FE45F30,0x6DC9C883
 
SINA7: .long 0xBD6AAA77,0xCCC994F5
SINA6: .long 0x3DE61209,0x7AAE8DA1
 
SINA5: .long 0xBE5AE645,0x2A118AE4
SINA4: .long 0x3EC71DE3,0xA5341531
 
SINA3: .long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
 
SINA2: .long 0x3FF80000,0x88888888,0x888859AF,0x00000000
 
SINA1: .long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
 
COSB8: .long 0x3D2AC4D0,0xD6011EE3
COSB7: .long 0xBDA9396F,0x9F45AC19
 
COSB6: .long 0x3E21EED9,0x0612C972
COSB5: .long 0xBE927E4F,0xB79D9FCF
 
COSB4: .long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
 
COSB3: .long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
 
COSB2: .long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
COSB1: .long 0xBF000000
 
INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A
 
TWOPI1: .long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
TWOPI2: .long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
 
|xref PITBL
 
.set INARG,FP_SCR4
 
.set X,FP_SCR5
.set XDCARE,X+2
.set XFRAC,X+4
 
.set RPRIME,FP_SCR1
.set SPRIME,FP_SCR2
 
.set POSNEG1,L_SCR1
.set TWOTO63,L_SCR1
 
.set ENDFLAG,L_SCR2
.set N,L_SCR2
 
.set ADJN,L_SCR3
 
| xref t_frcinx
|xref t_extdnrm
|xref sto_cos
 
.global ssind
ssind:
|--SIN(X) = X FOR DENORMALIZED X
bra t_extdnrm
 
.global scosd
scosd:
|--COS(X) = 1 FOR DENORMALIZED X
 
fmoves #0x3F800000,%fp0
|
| 9D25B Fix: Sometimes the previous fmove.s sets fpsr bits
|
fmovel #0,%fpsr
|
bra t_frcinx
 
.global ssin
ssin:
|--SET ADJN TO 0
movel #0,ADJN(%a6)
bras SINBGN
 
.global scos
scos:
|--SET ADJN TO 1
movel #1,ADJN(%a6)
 
SINBGN:
|--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
 
fmovex (%a0),%fp0 | ...LOAD INPUT
 
movel (%a0),%d0
movew 4(%a0),%d0
fmovex %fp0,X(%a6)
andil #0x7FFFFFFF,%d0 | ...COMPACTIFY X
 
cmpil #0x3FD78000,%d0 | ...|X| >= 2**(-40)?
bges SOK1
bra SINSM
 
SOK1:
cmpil #0x4004BC7E,%d0 | ...|X| < 15 PI?
blts SINMAIN
bra REDUCEX
 
SINMAIN:
|--THIS IS THE USUAL CASE, |X| <= 15 PI.
|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
fmovex %fp0,%fp1
fmuld TWOBYPI,%fp1 | ...X*2/PI
 
|--HIDE THE NEXT THREE INSTRUCTIONS
lea PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
 
|--FP1 IS NOW READY
fmovel %fp1,N(%a6) | ...CONVERT TO INTEGER
 
movel N(%a6),%d0
asll #4,%d0
addal %d0,%a1 | ...A1 IS THE ADDRESS OF N*PIBY2
| ...WHICH IS IN TWO PIECES Y1 & Y2
 
fsubx (%a1)+,%fp0 | ...X-Y1
|--HIDE THE NEXT ONE
fsubs (%a1),%fp0 | ...FP0 IS R = (X-Y1)-Y2
 
SINCONT:
|--continuation from REDUCEX
 
|--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
movel N(%a6),%d0
addl ADJN(%a6),%d0 | ...SEE IF D0 IS ODD OR EVEN
rorl #1,%d0 | ...D0 WAS ODD IFF D0 IS NEGATIVE
cmpil #0,%d0
blt COSPOLY
 
SINPOLY:
|--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
|--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
|--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
|--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
|--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
|--WHERE T=S*S.
|--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
|--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
fmovex %fp0,X(%a6) | ...X IS R
fmulx %fp0,%fp0 | ...FP0 IS S
|---HIDE THE NEXT TWO WHILE WAITING FOR FP0
fmoved SINA7,%fp3
fmoved SINA6,%fp2
|--FP0 IS NOW READY
fmovex %fp0,%fp1
fmulx %fp1,%fp1 | ...FP1 IS T
|--HIDE THE NEXT TWO WHILE WAITING FOR FP1
 
rorl #1,%d0
andil #0x80000000,%d0
| ...LEAST SIG. BIT OF D0 IN SIGN POSITION
eorl %d0,X(%a6) | ...X IS NOW R'= SGN*R
 
fmulx %fp1,%fp3 | ...TA7
fmulx %fp1,%fp2 | ...TA6
 
faddd SINA5,%fp3 | ...A5+TA7
faddd SINA4,%fp2 | ...A4+TA6
 
fmulx %fp1,%fp3 | ...T(A5+TA7)
fmulx %fp1,%fp2 | ...T(A4+TA6)
 
faddd SINA3,%fp3 | ...A3+T(A5+TA7)
faddx SINA2,%fp2 | ...A2+T(A4+TA6)
 
fmulx %fp3,%fp1 | ...T(A3+T(A5+TA7))
 
fmulx %fp0,%fp2 | ...S(A2+T(A4+TA6))
faddx SINA1,%fp1 | ...A1+T(A3+T(A5+TA7))
fmulx X(%a6),%fp0 | ...R'*S
 
faddx %fp2,%fp1 | ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
|--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
|--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING
 
fmulx %fp1,%fp0 | ...SIN(R')-R'
|--FP1 RELEASED.
 
fmovel %d1,%FPCR |restore users exceptions
faddx X(%a6),%fp0 |last inst - possible exception set
bra t_frcinx
 
 
COSPOLY:
|--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
|--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY
|--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
|--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
|--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
|--WHERE T=S*S.
|--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
|--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
|--AND IS THEREFORE STORED AS SINGLE PRECISION.
 
fmulx %fp0,%fp0 | ...FP0 IS S
|---HIDE THE NEXT TWO WHILE WAITING FOR FP0
fmoved COSB8,%fp2
fmoved COSB7,%fp3
|--FP0 IS NOW READY
fmovex %fp0,%fp1
fmulx %fp1,%fp1 | ...FP1 IS T
|--HIDE THE NEXT TWO WHILE WAITING FOR FP1
fmovex %fp0,X(%a6) | ...X IS S
rorl #1,%d0
andil #0x80000000,%d0
| ...LEAST SIG. BIT OF D0 IN SIGN POSITION
 
fmulx %fp1,%fp2 | ...TB8
|--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
eorl %d0,X(%a6) | ...X IS NOW S'= SGN*S
andil #0x80000000,%d0
 
fmulx %fp1,%fp3 | ...TB7
|--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
oril #0x3F800000,%d0 | ...D0 IS SGN IN SINGLE
movel %d0,POSNEG1(%a6)
 
faddd COSB6,%fp2 | ...B6+TB8
faddd COSB5,%fp3 | ...B5+TB7
 
fmulx %fp1,%fp2 | ...T(B6+TB8)
fmulx %fp1,%fp3 | ...T(B5+TB7)
 
faddd COSB4,%fp2 | ...B4+T(B6+TB8)
faddx COSB3,%fp3 | ...B3+T(B5+TB7)
 
fmulx %fp1,%fp2 | ...T(B4+T(B6+TB8))
fmulx %fp3,%fp1 | ...T(B3+T(B5+TB7))
 
faddx COSB2,%fp2 | ...B2+T(B4+T(B6+TB8))
fadds COSB1,%fp1 | ...B1+T(B3+T(B5+TB7))
 
fmulx %fp2,%fp0 | ...S(B2+T(B4+T(B6+TB8)))
|--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
|--FP2 RELEASED.
 
faddx %fp1,%fp0
|--FP1 RELEASED
 
fmulx X(%a6),%fp0
 
fmovel %d1,%FPCR |restore users exceptions
fadds POSNEG1(%a6),%fp0 |last inst - possible exception set
bra t_frcinx
 
 
SINBORS:
|--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
|--IF |X| < 2**(-40), RETURN X OR 1.
cmpil #0x3FFF8000,%d0
bgts REDUCEX
 
SINSM:
movel ADJN(%a6),%d0
cmpil #0,%d0
bgts COSTINY
 
SINTINY:
movew #0x0000,XDCARE(%a6) | ...JUST IN CASE
fmovel %d1,%FPCR |restore users exceptions
fmovex X(%a6),%fp0 |last inst - possible exception set
bra t_frcinx
 
 
COSTINY:
fmoves #0x3F800000,%fp0
 
fmovel %d1,%FPCR |restore users exceptions
fsubs #0x00800000,%fp0 |last inst - possible exception set
bra t_frcinx
 
 
REDUCEX:
|--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
|--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
|--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
 
fmovemx %fp2-%fp5,-(%a7) | ...save FP2 through FP5
movel %d2,-(%a7)
fmoves #0x00000000,%fp1
|--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
|--there is a danger of unwanted overflow in first LOOP iteration. In this
|--case, reduce argument by one remainder step to make subsequent reduction
|--safe.
cmpil #0x7ffeffff,%d0 |is argument dangerously large?
bnes LOOP
movel #0x7ffe0000,FP_SCR2(%a6) |yes
| ;create 2**16383*PI/2
movel #0xc90fdaa2,FP_SCR2+4(%a6)
clrl FP_SCR2+8(%a6)
ftstx %fp0 |test sign of argument
movel #0x7fdc0000,FP_SCR3(%a6) |create low half of 2**16383*
| ;PI/2 at FP_SCR3
movel #0x85a308d3,FP_SCR3+4(%a6)
clrl FP_SCR3+8(%a6)
fblt red_neg
orw #0x8000,FP_SCR2(%a6) |positive arg
orw #0x8000,FP_SCR3(%a6)
red_neg:
faddx FP_SCR2(%a6),%fp0 |high part of reduction is exact
fmovex %fp0,%fp1 |save high result in fp1
faddx FP_SCR3(%a6),%fp0 |low part of reduction
fsubx %fp0,%fp1 |determine low component of result
faddx FP_SCR3(%a6),%fp1 |fp0/fp1 are reduced argument.
 
|--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
|--integer quotient will be stored in N
|--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
 
LOOP:
fmovex %fp0,INARG(%a6) | ...+-2**K * F, 1 <= F < 2
movew INARG(%a6),%d0
movel %d0,%a1 | ...save a copy of D0
andil #0x00007FFF,%d0
subil #0x00003FFF,%d0 | ...D0 IS K
cmpil #28,%d0
bles LASTLOOP
CONTLOOP:
subil #27,%d0 | ...D0 IS L := K-27
movel #0,ENDFLAG(%a6)
bras WORK
LASTLOOP:
clrl %d0 | ...D0 IS L := 0
movel #1,ENDFLAG(%a6)
 
WORK:
|--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
|--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
 
|--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
|--2**L * (PIby2_1), 2**L * (PIby2_2)
 
movel #0x00003FFE,%d2 | ...BIASED EXPO OF 2/PI
subl %d0,%d2 | ...BIASED EXPO OF 2**(-L)*(2/PI)
 
movel #0xA2F9836E,FP_SCR1+4(%a6)
movel #0x4E44152A,FP_SCR1+8(%a6)
movew %d2,FP_SCR1(%a6) | ...FP_SCR1 is 2**(-L)*(2/PI)
 
fmovex %fp0,%fp2
fmulx FP_SCR1(%a6),%fp2
|--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
|--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
|--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
|--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
|--US THE DESIRED VALUE IN FLOATING POINT.
 
|--HIDE SIX CYCLES OF INSTRUCTION
movel %a1,%d2
swap %d2
andil #0x80000000,%d2
oril #0x5F000000,%d2 | ...D2 IS SIGN(INARG)*2**63 IN SGL
movel %d2,TWOTO63(%a6)
 
movel %d0,%d2
addil #0x00003FFF,%d2 | ...BIASED EXPO OF 2**L * (PI/2)
 
|--FP2 IS READY
fadds TWOTO63(%a6),%fp2 | ...THE FRACTIONAL PART OF FP1 IS ROUNDED
 
|--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2
movew %d2,FP_SCR2(%a6)
clrw FP_SCR2+2(%a6)
movel #0xC90FDAA2,FP_SCR2+4(%a6)
clrl FP_SCR2+8(%a6) | ...FP_SCR2 is 2**(L) * Piby2_1
 
|--FP2 IS READY
fsubs TWOTO63(%a6),%fp2 | ...FP2 is N
 
addil #0x00003FDD,%d0
movew %d0,FP_SCR3(%a6)
clrw FP_SCR3+2(%a6)
movel #0x85A308D3,FP_SCR3+4(%a6)
clrl FP_SCR3+8(%a6) | ...FP_SCR3 is 2**(L) * Piby2_2
 
movel ENDFLAG(%a6),%d0
 
|--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
|--P2 = 2**(L) * Piby2_2
fmovex %fp2,%fp4
fmulx FP_SCR2(%a6),%fp4 | ...W = N*P1
fmovex %fp2,%fp5
fmulx FP_SCR3(%a6),%fp5 | ...w = N*P2
fmovex %fp4,%fp3
|--we want P+p = W+w but |p| <= half ulp of P
|--Then, we need to compute A := R-P and a := r-p
faddx %fp5,%fp3 | ...FP3 is P
fsubx %fp3,%fp4 | ...W-P
 
fsubx %fp3,%fp0 | ...FP0 is A := R - P
faddx %fp5,%fp4 | ...FP4 is p = (W-P)+w
 
fmovex %fp0,%fp3 | ...FP3 A
fsubx %fp4,%fp1 | ...FP1 is a := r - p
 
|--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
|--|r| <= half ulp of R.
faddx %fp1,%fp0 | ...FP0 is R := A+a
|--No need to calculate r if this is the last loop
cmpil #0,%d0
bgt RESTORE
 
|--Need to calculate r
fsubx %fp0,%fp3 | ...A-R
faddx %fp3,%fp1 | ...FP1 is r := (A-R)+a
bra LOOP
 
RESTORE:
fmovel %fp2,N(%a6)
movel (%a7)+,%d2
fmovemx (%a7)+,%fp2-%fp5
 
movel ADJN(%a6),%d0
cmpil #4,%d0
 
blt SINCONT
bras SCCONT
 
.global ssincosd
ssincosd:
|--SIN AND COS OF X FOR DENORMALIZED X
 
fmoves #0x3F800000,%fp1
bsr sto_cos |store cosine result
bra t_extdnrm
 
.global ssincos
ssincos:
|--SET ADJN TO 4
movel #4,ADJN(%a6)
 
fmovex (%a0),%fp0 | ...LOAD INPUT
 
movel (%a0),%d0
movew 4(%a0),%d0
fmovex %fp0,X(%a6)
andil #0x7FFFFFFF,%d0 | ...COMPACTIFY X
 
cmpil #0x3FD78000,%d0 | ...|X| >= 2**(-40)?
bges SCOK1
bra SCSM
 
SCOK1:
cmpil #0x4004BC7E,%d0 | ...|X| < 15 PI?
blts SCMAIN
bra REDUCEX
 
 
SCMAIN:
|--THIS IS THE USUAL CASE, |X| <= 15 PI.
|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
fmovex %fp0,%fp1
fmuld TWOBYPI,%fp1 | ...X*2/PI
 
|--HIDE THE NEXT THREE INSTRUCTIONS
lea PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
 
|--FP1 IS NOW READY
fmovel %fp1,N(%a6) | ...CONVERT TO INTEGER
 
movel N(%a6),%d0
asll #4,%d0
addal %d0,%a1 | ...ADDRESS OF N*PIBY2, IN Y1, Y2
 
fsubx (%a1)+,%fp0 | ...X-Y1
fsubs (%a1),%fp0 | ...FP0 IS R = (X-Y1)-Y2
 
SCCONT:
|--continuation point from REDUCEX
 
|--HIDE THE NEXT TWO
movel N(%a6),%d0
rorl #1,%d0
cmpil #0,%d0 | ...D0 < 0 IFF N IS ODD
bge NEVEN
 
NODD:
|--REGISTERS SAVED SO FAR: D0, A0, FP2.
 
fmovex %fp0,RPRIME(%a6)
fmulx %fp0,%fp0 | ...FP0 IS S = R*R
fmoved SINA7,%fp1 | ...A7
fmoved COSB8,%fp2 | ...B8
fmulx %fp0,%fp1 | ...SA7
movel %d2,-(%a7)
movel %d0,%d2
fmulx %fp0,%fp2 | ...SB8
rorl #1,%d2
andil #0x80000000,%d2
 
faddd SINA6,%fp1 | ...A6+SA7
eorl %d0,%d2
andil #0x80000000,%d2
faddd COSB7,%fp2 | ...B7+SB8
 
fmulx %fp0,%fp1 | ...S(A6+SA7)
eorl %d2,RPRIME(%a6)
movel (%a7)+,%d2
fmulx %fp0,%fp2 | ...S(B7+SB8)
rorl #1,%d0
andil #0x80000000,%d0
 
faddd SINA5,%fp1 | ...A5+S(A6+SA7)
movel #0x3F800000,POSNEG1(%a6)
eorl %d0,POSNEG1(%a6)
faddd COSB6,%fp2 | ...B6+S(B7+SB8)
 
fmulx %fp0,%fp1 | ...S(A5+S(A6+SA7))
fmulx %fp0,%fp2 | ...S(B6+S(B7+SB8))
fmovex %fp0,SPRIME(%a6)
 
faddd SINA4,%fp1 | ...A4+S(A5+S(A6+SA7))
eorl %d0,SPRIME(%a6)
faddd COSB5,%fp2 | ...B5+S(B6+S(B7+SB8))
 
fmulx %fp0,%fp1 | ...S(A4+...)
fmulx %fp0,%fp2 | ...S(B5+...)
 
faddd SINA3,%fp1 | ...A3+S(A4+...)
faddd COSB4,%fp2 | ...B4+S(B5+...)
 
fmulx %fp0,%fp1 | ...S(A3+...)
fmulx %fp0,%fp2 | ...S(B4+...)
 
faddx SINA2,%fp1 | ...A2+S(A3+...)
faddx COSB3,%fp2 | ...B3+S(B4+...)
 
fmulx %fp0,%fp1 | ...S(A2+...)
fmulx %fp0,%fp2 | ...S(B3+...)
 
faddx SINA1,%fp1 | ...A1+S(A2+...)
faddx COSB2,%fp2 | ...B2+S(B3+...)
 
fmulx %fp0,%fp1 | ...S(A1+...)
fmulx %fp2,%fp0 | ...S(B2+...)
 
 
fmulx RPRIME(%a6),%fp1 | ...R'S(A1+...)
fadds COSB1,%fp0 | ...B1+S(B2...)
fmulx SPRIME(%a6),%fp0 | ...S'(B1+S(B2+...))
 
movel %d1,-(%sp) |restore users mode & precision
andil #0xff,%d1 |mask off all exceptions
fmovel %d1,%FPCR
faddx RPRIME(%a6),%fp1 | ...COS(X)
bsr sto_cos |store cosine result
fmovel (%sp)+,%FPCR |restore users exceptions
fadds POSNEG1(%a6),%fp0 | ...SIN(X)
 
bra t_frcinx
 
 
NEVEN:
|--REGISTERS SAVED SO FAR: FP2.
 
fmovex %fp0,RPRIME(%a6)
fmulx %fp0,%fp0 | ...FP0 IS S = R*R
fmoved COSB8,%fp1 | ...B8
fmoved SINA7,%fp2 | ...A7
fmulx %fp0,%fp1 | ...SB8
fmovex %fp0,SPRIME(%a6)
fmulx %fp0,%fp2 | ...SA7
rorl #1,%d0
andil #0x80000000,%d0
faddd COSB7,%fp1 | ...B7+SB8
faddd SINA6,%fp2 | ...A6+SA7
eorl %d0,RPRIME(%a6)
eorl %d0,SPRIME(%a6)
fmulx %fp0,%fp1 | ...S(B7+SB8)
oril #0x3F800000,%d0
movel %d0,POSNEG1(%a6)
fmulx %fp0,%fp2 | ...S(A6+SA7)
 
faddd COSB6,%fp1 | ...B6+S(B7+SB8)
faddd SINA5,%fp2 | ...A5+S(A6+SA7)
 
fmulx %fp0,%fp1 | ...S(B6+S(B7+SB8))
fmulx %fp0,%fp2 | ...S(A5+S(A6+SA7))
 
faddd COSB5,%fp1 | ...B5+S(B6+S(B7+SB8))
faddd SINA4,%fp2 | ...A4+S(A5+S(A6+SA7))
 
fmulx %fp0,%fp1 | ...S(B5+...)
fmulx %fp0,%fp2 | ...S(A4+...)
 
faddd COSB4,%fp1 | ...B4+S(B5+...)
faddd SINA3,%fp2 | ...A3+S(A4+...)
 
fmulx %fp0,%fp1 | ...S(B4+...)
fmulx %fp0,%fp2 | ...S(A3+...)
 
faddx COSB3,%fp1 | ...B3+S(B4+...)
faddx SINA2,%fp2 | ...A2+S(A3+...)
 
fmulx %fp0,%fp1 | ...S(B3+...)
fmulx %fp0,%fp2 | ...S(A2+...)
 
faddx COSB2,%fp1 | ...B2+S(B3+...)
faddx SINA1,%fp2 | ...A1+S(A2+...)
 
fmulx %fp0,%fp1 | ...S(B2+...)
fmulx %fp2,%fp0 | ...s(a1+...)
 
 
fadds COSB1,%fp1 | ...B1+S(B2...)
fmulx RPRIME(%a6),%fp0 | ...R'S(A1+...)
fmulx SPRIME(%a6),%fp1 | ...S'(B1+S(B2+...))
 
movel %d1,-(%sp) |save users mode & precision
andil #0xff,%d1 |mask off all exceptions
fmovel %d1,%FPCR
fadds POSNEG1(%a6),%fp1 | ...COS(X)
bsr sto_cos |store cosine result
fmovel (%sp)+,%FPCR |restore users exceptions
faddx RPRIME(%a6),%fp0 | ...SIN(X)
 
bra t_frcinx
 
SCBORS:
cmpil #0x3FFF8000,%d0
bgt REDUCEX
 
SCSM:
movew #0x0000,XDCARE(%a6)
fmoves #0x3F800000,%fp1
 
movel %d1,-(%sp) |save users mode & precision
andil #0xff,%d1 |mask off all exceptions
fmovel %d1,%FPCR
fsubs #0x00800000,%fp1
bsr sto_cos |store cosine result
fmovel (%sp)+,%FPCR |restore users exceptions
fmovex X(%a6),%fp0
bra t_frcinx
 
|end
/kernel_ex.S
0,0 → 1,494
|
| kernel_ex.sa 3.3 12/19/90
|
| This file contains routines to force exception status in the
| fpu for exceptional cases detected or reported within the
| transcendental functions. Typically, the t_xx routine will
| set the appropriate bits in the USER_FPSR word on the stack.
| The bits are tested in gen_except.sa to determine if an exceptional
| situation needs to be created on return from the FPSP.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
KERNEL_EX: |idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
mns_inf: .long 0xffff0000,0x00000000,0x00000000
pls_inf: .long 0x7fff0000,0x00000000,0x00000000
nan: .long 0x7fff0000,0xffffffff,0xffffffff
huge: .long 0x7ffe0000,0xffffffff,0xffffffff
 
|xref ovf_r_k
|xref unf_sub
|xref nrm_set
 
.global t_dz
.global t_dz2
.global t_operr
.global t_unfl
.global t_ovfl
.global t_ovfl2
.global t_inx2
.global t_frcinx
.global t_extdnrm
.global t_resdnrm
.global dst_nan
.global src_nan
|
| DZ exception
|
|
| if dz trap disabled
| store properly signed inf (use sign of etemp) into fp0
| set FPSR exception status dz bit, condition code
| inf bit, and accrued dz bit
| return
| frestore the frame into the machine (done by unimp_hd)
|
| else dz trap enabled
| set exception status bit & accrued bits in FPSR
| set flag to disable sto_res from corrupting fp register
| return
| frestore the frame into the machine (done by unimp_hd)
|
| t_dz2 is used by monadic functions such as flogn (from do_func).
| t_dz is used by monadic functions such as satanh (from the
| transcendental function).
|
t_dz2:
bsetb #neg_bit,FPSR_CC(%a6) |set neg bit in FPSR
fmovel #0,%FPSR |clr status bits (Z set)
btstb #dz_bit,FPCR_ENABLE(%a6) |test FPCR for dz exc enabled
bnes dz_ena_end
bras m_inf |flogx always returns -inf
t_dz:
fmovel #0,%FPSR |clr status bits (Z set)
btstb #dz_bit,FPCR_ENABLE(%a6) |test FPCR for dz exc enabled
bnes dz_ena
|
| dz disabled
|
btstb #sign_bit,ETEMP_EX(%a6) |check sign for neg or pos
beqs p_inf |branch if pos sign
 
m_inf:
fmovemx mns_inf,%fp0-%fp0 |load -inf
bsetb #neg_bit,FPSR_CC(%a6) |set neg bit in FPSR
bras set_fpsr
p_inf:
fmovemx pls_inf,%fp0-%fp0 |load +inf
set_fpsr:
orl #dzinf_mask,USER_FPSR(%a6) |set I,DZ,ADZ
rts
|
| dz enabled
|
dz_ena:
btstb #sign_bit,ETEMP_EX(%a6) |check sign for neg or pos
beqs dz_ena_end
bsetb #neg_bit,FPSR_CC(%a6) |set neg bit in FPSR
dz_ena_end:
orl #dzinf_mask,USER_FPSR(%a6) |set I,DZ,ADZ
st STORE_FLG(%a6)
rts
|
| OPERR exception
|
| if (operr trap disabled)
| set FPSR exception status operr bit, condition code
| nan bit; Store default NAN into fp0
| frestore the frame into the machine (done by unimp_hd)
|
| else (operr trap enabled)
| set FPSR exception status operr bit, accrued operr bit
| set flag to disable sto_res from corrupting fp register
| frestore the frame into the machine (done by unimp_hd)
|
t_operr:
orl #opnan_mask,USER_FPSR(%a6) |set NaN, OPERR, AIOP
 
btstb #operr_bit,FPCR_ENABLE(%a6) |test FPCR for operr enabled
bnes op_ena
 
fmovemx nan,%fp0-%fp0 |load default nan
rts
op_ena:
st STORE_FLG(%a6) |do not corrupt destination
rts
 
|
| t_unfl --- UNFL exception
|
| This entry point is used by all routines requiring unfl, inex2,
| aunfl, and ainex to be set on exit.
|
| On entry, a0 points to the exceptional operand. The final exceptional
| operand is built in FP_SCR1 and only the sign from the original operand
| is used.
|
t_unfl:
clrl FP_SCR1(%a6) |set exceptional operand to zero
clrl FP_SCR1+4(%a6)
clrl FP_SCR1+8(%a6)
tstb (%a0) |extract sign from caller's exop
bpls unfl_signok
bset #sign_bit,FP_SCR1(%a6)
unfl_signok:
leal FP_SCR1(%a6),%a0
orl #unfinx_mask,USER_FPSR(%a6)
| ;set UNFL, INEX2, AUNFL, AINEX
unfl_con:
btstb #unfl_bit,FPCR_ENABLE(%a6)
beqs unfl_dis
 
unfl_ena:
bfclr STAG(%a6){#5:#3} |clear wbtm66,wbtm1,wbtm0
bsetb #wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15
bsetb #sticky_bit,STICKY(%a6) |set sticky bit
 
bclrb #E1,E_BYTE(%a6)
 
unfl_dis:
bfextu FPCR_MODE(%a6){#0:#2},%d0 |get round precision
bclrb #sign_bit,LOCAL_EX(%a0)
sne LOCAL_SGN(%a0) |convert to internal ext format
 
bsr unf_sub |returns IEEE result at a0
| ;and sets FPSR_CC accordingly
bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format
beqs unfl_fin
 
bsetb #sign_bit,LOCAL_EX(%a0)
bsetb #sign_bit,FP_SCR1(%a6) |set sign bit of exc operand
 
unfl_fin:
fmovemx (%a0),%fp0-%fp0 |store result in fp0
rts
 
|
| t_ovfl2 --- OVFL exception (without inex2 returned)
|
| This entry is used by scale to force catastrophic overflow. The
| ovfl, aovfl, and ainex bits are set, but not the inex2 bit.
|
t_ovfl2:
orl #ovfl_inx_mask,USER_FPSR(%a6)
movel ETEMP(%a6),FP_SCR1(%a6)
movel ETEMP_HI(%a6),FP_SCR1+4(%a6)
movel ETEMP_LO(%a6),FP_SCR1+8(%a6)
|
| Check for single or double round precision. If single, check if
| the lower 40 bits of ETEMP are zero; if not, set inex2. If double,
| check if the lower 21 bits are zero; if not, set inex2.
|
moveb FPCR_MODE(%a6),%d0
andib #0xc0,%d0
beq t_work |if extended, finish ovfl processing
cmpib #0x40,%d0 |test for single
bnes t_dbl
t_sgl:
tstb ETEMP_LO(%a6)
bnes t_setinx2
movel ETEMP_HI(%a6),%d0
andil #0xff,%d0 |look at only lower 8 bits
bnes t_setinx2
bra t_work
t_dbl:
movel ETEMP_LO(%a6),%d0
andil #0x7ff,%d0 |look at only lower 11 bits
beq t_work
t_setinx2:
orl #inex2_mask,USER_FPSR(%a6)
bras t_work
|
| t_ovfl --- OVFL exception
|
|** Note: the exc operand is returned in ETEMP.
|
t_ovfl:
orl #ovfinx_mask,USER_FPSR(%a6)
t_work:
btstb #ovfl_bit,FPCR_ENABLE(%a6) |test FPCR for ovfl enabled
beqs ovf_dis
 
ovf_ena:
clrl FP_SCR1(%a6) |set exceptional operand
clrl FP_SCR1+4(%a6)
clrl FP_SCR1+8(%a6)
 
bfclr STAG(%a6){#5:#3} |clear wbtm66,wbtm1,wbtm0
bclrb #wbtemp15_bit,WB_BYTE(%a6) |clear wbtemp15
bsetb #sticky_bit,STICKY(%a6) |set sticky bit
 
bclrb #E1,E_BYTE(%a6)
| ;fall through to disabled case
 
| For disabled overflow call 'ovf_r_k'. This routine loads the
| correct result based on the rounding precision, destination
| format, rounding mode and sign.
|
ovf_dis:
bsr ovf_r_k |returns unsigned ETEMP_EX
| ;and sets FPSR_CC accordingly.
bfclr ETEMP_SGN(%a6){#0:#8} |fix sign
beqs ovf_pos
bsetb #sign_bit,ETEMP_EX(%a6)
bsetb #sign_bit,FP_SCR1(%a6) |set exceptional operand sign
ovf_pos:
fmovemx ETEMP(%a6),%fp0-%fp0 |move the result to fp0
rts
 
 
|
| INEX2 exception
|
| The inex2 and ainex bits are set.
|
t_inx2:
orl #inx2a_mask,USER_FPSR(%a6) |set INEX2, AINEX
rts
 
|
| Force Inex2
|
| This routine is called by the transcendental routines to force
| the inex2 exception bits set in the FPSR. If the underflow bit
| is set, but the underflow trap was not taken, the aunfl bit in
| the FPSR must be set.
|
t_frcinx:
orl #inx2a_mask,USER_FPSR(%a6) |set INEX2, AINEX
btstb #unfl_bit,FPSR_EXCEPT(%a6) |test for unfl bit set
beqs no_uacc1 |if clear, do not set aunfl
bsetb #aunfl_bit,FPSR_AEXCEPT(%a6)
no_uacc1:
rts
 
|
| DST_NAN
|
| Determine if the destination nan is signalling or non-signalling,
| and set the FPSR bits accordingly. See the MC68040 User's Manual
| section 3.2.2.5 NOT-A-NUMBERS.
|
dst_nan:
btstb #sign_bit,FPTEMP_EX(%a6) |test sign of nan
beqs dst_pos |if clr, it was positive
bsetb #neg_bit,FPSR_CC(%a6) |set N bit
dst_pos:
btstb #signan_bit,FPTEMP_HI(%a6) |check if signalling
beqs dst_snan |branch if signalling
 
fmovel %d1,%fpcr |restore user's rmode/prec
fmovex FPTEMP(%a6),%fp0 |return the non-signalling nan
|
| Check the source nan. If it is signalling, snan will be reported.
|
moveb STAG(%a6),%d0
andib #0xe0,%d0
cmpib #0x60,%d0
bnes no_snan
btstb #signan_bit,ETEMP_HI(%a6) |check if signalling
bnes no_snan
orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
no_snan:
rts
 
dst_snan:
btstb #snan_bit,FPCR_ENABLE(%a6) |check if trap enabled
beqs dst_dis |branch if disabled
 
orb #nan_tag,DTAG(%a6) |set up dtag for nan
st STORE_FLG(%a6) |do not store a result
orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
rts
 
dst_dis:
bsetb #signan_bit,FPTEMP_HI(%a6) |set SNAN bit in sop
fmovel %d1,%fpcr |restore user's rmode/prec
fmovex FPTEMP(%a6),%fp0 |load non-sign. nan
orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
rts
 
|
| SRC_NAN
|
| Determine if the source nan is signalling or non-signalling,
| and set the FPSR bits accordingly. See the MC68040 User's Manual
| section 3.2.2.5 NOT-A-NUMBERS.
|
src_nan:
btstb #sign_bit,ETEMP_EX(%a6) |test sign of nan
beqs src_pos |if clr, it was positive
bsetb #neg_bit,FPSR_CC(%a6) |set N bit
src_pos:
btstb #signan_bit,ETEMP_HI(%a6) |check if signalling
beqs src_snan |branch if signalling
fmovel %d1,%fpcr |restore user's rmode/prec
fmovex ETEMP(%a6),%fp0 |return the non-signalling nan
rts
 
src_snan:
btstb #snan_bit,FPCR_ENABLE(%a6) |check if trap enabled
beqs src_dis |branch if disabled
bsetb #signan_bit,ETEMP_HI(%a6) |set SNAN bit in sop
orb #norm_tag,DTAG(%a6) |set up dtag for norm
orb #nan_tag,STAG(%a6) |set up stag for nan
st STORE_FLG(%a6) |do not store a result
orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
rts
 
src_dis:
bsetb #signan_bit,ETEMP_HI(%a6) |set SNAN bit in sop
fmovel %d1,%fpcr |restore user's rmode/prec
fmovex ETEMP(%a6),%fp0 |load non-sign. nan
orl #snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
rts
 
|
| For all functions that have a denormalized input and that f(x)=x,
| this is the entry point
|
t_extdnrm:
orl #unfinx_mask,USER_FPSR(%a6)
| ;set UNFL, INEX2, AUNFL, AINEX
bras xdnrm_con
|
| Entry point for scale with extended denorm. The function does
| not set inex2, aunfl, or ainex.
|
t_resdnrm:
orl #unfl_mask,USER_FPSR(%a6)
 
xdnrm_con:
btstb #unfl_bit,FPCR_ENABLE(%a6)
beqs xdnrm_dis
 
|
| If exceptions are enabled, the additional task of setting up WBTEMP
| is needed so that when the underflow exception handler is entered,
| the user perceives no difference between what the 040 provides vs.
| what the FPSP provides.
|
xdnrm_ena:
movel %a0,-(%a7)
 
movel LOCAL_EX(%a0),FP_SCR1(%a6)
movel LOCAL_HI(%a0),FP_SCR1+4(%a6)
movel LOCAL_LO(%a0),FP_SCR1+8(%a6)
 
lea FP_SCR1(%a6),%a0
 
bclrb #sign_bit,LOCAL_EX(%a0)
sne LOCAL_SGN(%a0) |convert to internal ext format
tstw LOCAL_EX(%a0) |check if input is denorm
beqs xdnrm_dn |if so, skip nrm_set
bsr nrm_set |normalize the result (exponent
| ;will be negative
xdnrm_dn:
bclrb #sign_bit,LOCAL_EX(%a0) |take off false sign
bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format
beqs xdep
bsetb #sign_bit,LOCAL_EX(%a0)
xdep:
bfclr STAG(%a6){#5:#3} |clear wbtm66,wbtm1,wbtm0
bsetb #wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15
bclrb #sticky_bit,STICKY(%a6) |clear sticky bit
bclrb #E1,E_BYTE(%a6)
movel (%a7)+,%a0
xdnrm_dis:
bfextu FPCR_MODE(%a6){#0:#2},%d0 |get round precision
bnes not_ext |if not round extended, store
| ;IEEE defaults
is_ext:
btstb #sign_bit,LOCAL_EX(%a0)
beqs xdnrm_store
 
bsetb #neg_bit,FPSR_CC(%a6) |set N bit in FPSR_CC
 
bras xdnrm_store
 
not_ext:
bclrb #sign_bit,LOCAL_EX(%a0)
sne LOCAL_SGN(%a0) |convert to internal ext format
bsr unf_sub |returns IEEE result pointed by
| ;a0; sets FPSR_CC accordingly
bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format
beqs xdnrm_store
bsetb #sign_bit,LOCAL_EX(%a0)
xdnrm_store:
fmovemx (%a0),%fp0-%fp0 |store result in fp0
rts
 
|
| This subroutine is used for dyadic operations that use an extended
| denorm within the kernel. The approach used is to capture the frame,
| fix/restore.
|
.global t_avoid_unsupp
t_avoid_unsupp:
link %a2,#-LOCAL_SIZE |so that a2 fpsp.h negative
| ;offsets may be used
fsave -(%a7)
tstb 1(%a7) |check if idle, exit if so
beq idle_end
btstb #E1,E_BYTE(%a2) |check for an E1 exception if
| ;enabled, there is an unsupp
beq end_avun |else, exit
btstb #7,DTAG(%a2) |check for denorm destination
beqs src_den |else, must be a source denorm
|
| handle destination denorm
|
lea FPTEMP(%a2),%a0
btstb #sign_bit,LOCAL_EX(%a0)
sne LOCAL_SGN(%a0) |convert to internal ext format
bclrb #7,DTAG(%a2) |set DTAG to norm
bsr nrm_set |normalize result, exponent
| ;will become negative
bclrb #sign_bit,LOCAL_EX(%a0) |get rid of fake sign
bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format
beqs ck_src_den |check if source is also denorm
bsetb #sign_bit,LOCAL_EX(%a0)
ck_src_den:
btstb #7,STAG(%a2)
beqs end_avun
src_den:
lea ETEMP(%a2),%a0
btstb #sign_bit,LOCAL_EX(%a0)
sne LOCAL_SGN(%a0) |convert to internal ext format
bclrb #7,STAG(%a2) |set STAG to norm
bsr nrm_set |normalize result, exponent
| ;will become negative
bclrb #sign_bit,LOCAL_EX(%a0) |get rid of fake sign
bfclr LOCAL_SGN(%a0){#0:#8} |convert back to IEEE ext format
beqs den_com
bsetb #sign_bit,LOCAL_EX(%a0)
den_com:
moveb #0xfe,CU_SAVEPC(%a2) |set continue frame
clrw NMNEXC(%a2) |clear NMNEXC
bclrb #E1,E_BYTE(%a2)
| fmove.l %FPSR,FPSR_SHADOW(%a2)
| bset.b #SFLAG,E_BYTE(%a2)
| bset.b #XFLAG,T_BYTE(%a2)
end_avun:
frestore (%a7)+
unlk %a2
rts
idle_end:
addl #4,%a7
unlk %a2
rts
|end
/util.S
0,0 → 1,748
|
| util.sa 3.7 7/29/91
|
| This file contains routines used by other programs.
|
| ovf_res: used by overflow to force the correct
| result. ovf_r_k, ovf_r_x2, ovf_r_x3 are
| derivatives of this routine.
| get_fline: get user's opcode word
| g_dfmtou: returns the destination format.
| g_opcls: returns the opclass of the float instruction.
| g_rndpr: returns the rounding precision.
| reg_dest: write byte, word, or long data to Dn
|
|
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
|UTIL idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
|xref mem_read
 
.global g_dfmtou
.global g_opcls
.global g_rndpr
.global get_fline
.global reg_dest
 
|
| Final result table for ovf_res. Note that the negative counterparts
| are unnecessary as ovf_res always returns the sign separately from
| the exponent.
| ;+inf
EXT_PINF: .long 0x7fff0000,0x00000000,0x00000000,0x00000000
| ;largest +ext
EXT_PLRG: .long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000
| ;largest magnitude +sgl in ext
SGL_PLRG: .long 0x407e0000,0xffffff00,0x00000000,0x00000000
| ;largest magnitude +dbl in ext
DBL_PLRG: .long 0x43fe0000,0xffffffff,0xfffff800,0x00000000
| ;largest -ext
 
tblovfl:
.long EXT_RN
.long EXT_RZ
.long EXT_RM
.long EXT_RP
.long SGL_RN
.long SGL_RZ
.long SGL_RM
.long SGL_RP
.long DBL_RN
.long DBL_RZ
.long DBL_RM
.long DBL_RP
.long error
.long error
.long error
.long error
 
 
|
| ovf_r_k --- overflow result calculation
|
| This entry point is used by kernel_ex.
|
| This forces the destination precision to be extended
|
| Input: operand in ETEMP
| Output: a result is in ETEMP (internal extended format)
|
.global ovf_r_k
ovf_r_k:
lea ETEMP(%a6),%a0 |a0 points to source operand
bclrb #sign_bit,ETEMP_EX(%a6)
sne ETEMP_SGN(%a6) |convert to internal IEEE format
 
|
| ovf_r_x2 --- overflow result calculation
|
| This entry point used by x_ovfl. (opclass 0 and 2)
|
| Input a0 points to an operand in the internal extended format
| Output a0 points to the result in the internal extended format
|
| This sets the round precision according to the user's FPCR unless the
| instruction is fsgldiv or fsglmul or fsadd, fdadd, fsub, fdsub, fsmul,
| fdmul, fsdiv, fddiv, fssqrt, fsmove, fdmove, fsabs, fdabs, fsneg, fdneg.
| If the instruction is fsgldiv of fsglmul, the rounding precision must be
| extended. If the instruction is not fsgldiv or fsglmul but a force-
| precision instruction, the rounding precision is then set to the force
| precision.
 
.global ovf_r_x2
ovf_r_x2:
btstb #E3,E_BYTE(%a6) |check for nu exception
beql ovf_e1_exc |it is cu exception
ovf_e3_exc:
movew CMDREG3B(%a6),%d0 |get the command word
andiw #0x00000060,%d0 |clear all bits except 6 and 5
cmpil #0x00000040,%d0
beql ovff_sgl |force precision is single
cmpil #0x00000060,%d0
beql ovff_dbl |force precision is double
movew CMDREG3B(%a6),%d0 |get the command word again
andil #0x7f,%d0 |clear all except operation
cmpil #0x33,%d0
beql ovf_fsgl |fsglmul or fsgldiv
cmpil #0x30,%d0
beql ovf_fsgl
bra ovf_fpcr |instruction is none of the above
| ;use FPCR
ovf_e1_exc:
movew CMDREG1B(%a6),%d0 |get command word
andil #0x00000044,%d0 |clear all bits except 6 and 2
cmpil #0x00000040,%d0
beql ovff_sgl |the instruction is force single
cmpil #0x00000044,%d0
beql ovff_dbl |the instruction is force double
movew CMDREG1B(%a6),%d0 |again get the command word
andil #0x0000007f,%d0 |clear all except the op code
cmpil #0x00000027,%d0
beql ovf_fsgl |fsglmul
cmpil #0x00000024,%d0
beql ovf_fsgl |fsgldiv
bra ovf_fpcr |none of the above, use FPCR
|
|
| Inst is either fsgldiv or fsglmul. Force extended precision.
|
ovf_fsgl:
clrl %d0
bras ovf_res
 
ovff_sgl:
movel #0x00000001,%d0 |set single
bras ovf_res
ovff_dbl:
movel #0x00000002,%d0 |set double
bras ovf_res
|
| The precision is in the fpcr.
|
ovf_fpcr:
bfextu FPCR_MODE(%a6){#0:#2},%d0 |set round precision
bras ovf_res
|
|
| ovf_r_x3 --- overflow result calculation
|
| This entry point used by x_ovfl. (opclass 3 only)
|
| Input a0 points to an operand in the internal extended format
| Output a0 points to the result in the internal extended format
|
| This sets the round precision according to the destination size.
|
.global ovf_r_x3
ovf_r_x3:
bsr g_dfmtou |get dest fmt in d0{1:0}
| ;for fmovout, the destination format
| ;is the rounding precision
 
|
| ovf_res --- overflow result calculation
|
| Input:
| a0 points to operand in internal extended format
| Output:
| a0 points to result in internal extended format
|
.global ovf_res
ovf_res:
lsll #2,%d0 |move round precision to d0{3:2}
bfextu FPCR_MODE(%a6){#2:#2},%d1 |set round mode
orl %d1,%d0 |index is fmt:mode in d0{3:0}
leal tblovfl,%a1 |load a1 with table address
movel %a1@(%d0:l:4),%a1 |use d0 as index to the table
jmp (%a1) |go to the correct routine
|
|case DEST_FMT = EXT
|
EXT_RN:
leal EXT_PINF,%a1 |answer is +/- infinity
bsetb #inf_bit,FPSR_CC(%a6)
bra set_sign |now go set the sign
EXT_RZ:
leal EXT_PLRG,%a1 |answer is +/- large number
bra set_sign |now go set the sign
EXT_RM:
tstb LOCAL_SGN(%a0) |if negative overflow
beqs e_rm_pos
e_rm_neg:
leal EXT_PINF,%a1 |answer is negative infinity
orl #neginf_mask,USER_FPSR(%a6)
bra end_ovfr
e_rm_pos:
leal EXT_PLRG,%a1 |answer is large positive number
bra end_ovfr
EXT_RP:
tstb LOCAL_SGN(%a0) |if negative overflow
beqs e_rp_pos
e_rp_neg:
leal EXT_PLRG,%a1 |answer is large negative number
bsetb #neg_bit,FPSR_CC(%a6)
bra end_ovfr
e_rp_pos:
leal EXT_PINF,%a1 |answer is positive infinity
bsetb #inf_bit,FPSR_CC(%a6)
bra end_ovfr
|
|case DEST_FMT = DBL
|
DBL_RN:
leal EXT_PINF,%a1 |answer is +/- infinity
bsetb #inf_bit,FPSR_CC(%a6)
bra set_sign
DBL_RZ:
leal DBL_PLRG,%a1 |answer is +/- large number
bra set_sign |now go set the sign
DBL_RM:
tstb LOCAL_SGN(%a0) |if negative overflow
beqs d_rm_pos
d_rm_neg:
leal EXT_PINF,%a1 |answer is negative infinity
orl #neginf_mask,USER_FPSR(%a6)
bra end_ovfr |inf is same for all precisions (ext,dbl,sgl)
d_rm_pos:
leal DBL_PLRG,%a1 |answer is large positive number
bra end_ovfr
DBL_RP:
tstb LOCAL_SGN(%a0) |if negative overflow
beqs d_rp_pos
d_rp_neg:
leal DBL_PLRG,%a1 |answer is large negative number
bsetb #neg_bit,FPSR_CC(%a6)
bra end_ovfr
d_rp_pos:
leal EXT_PINF,%a1 |answer is positive infinity
bsetb #inf_bit,FPSR_CC(%a6)
bra end_ovfr
|
|case DEST_FMT = SGL
|
SGL_RN:
leal EXT_PINF,%a1 |answer is +/- infinity
bsetb #inf_bit,FPSR_CC(%a6)
bras set_sign
SGL_RZ:
leal SGL_PLRG,%a1 |answer is +/- large number
bras set_sign
SGL_RM:
tstb LOCAL_SGN(%a0) |if negative overflow
beqs s_rm_pos
s_rm_neg:
leal EXT_PINF,%a1 |answer is negative infinity
orl #neginf_mask,USER_FPSR(%a6)
bras end_ovfr
s_rm_pos:
leal SGL_PLRG,%a1 |answer is large positive number
bras end_ovfr
SGL_RP:
tstb LOCAL_SGN(%a0) |if negative overflow
beqs s_rp_pos
s_rp_neg:
leal SGL_PLRG,%a1 |answer is large negative number
bsetb #neg_bit,FPSR_CC(%a6)
bras end_ovfr
s_rp_pos:
leal EXT_PINF,%a1 |answer is positive infinity
bsetb #inf_bit,FPSR_CC(%a6)
bras end_ovfr
 
set_sign:
tstb LOCAL_SGN(%a0) |if negative overflow
beqs end_ovfr
neg_sign:
bsetb #neg_bit,FPSR_CC(%a6)
 
end_ovfr:
movew LOCAL_EX(%a1),LOCAL_EX(%a0) |do not overwrite sign
movel LOCAL_HI(%a1),LOCAL_HI(%a0)
movel LOCAL_LO(%a1),LOCAL_LO(%a0)
rts
 
 
|
| ERROR
|
error:
rts
|
| get_fline --- get f-line opcode of interrupted instruction
|
| Returns opcode in the low word of d0.
|
get_fline:
movel USER_FPIAR(%a6),%a0 |opcode address
movel #0,-(%a7) |reserve a word on the stack
leal 2(%a7),%a1 |point to low word of temporary
movel #2,%d0 |count
bsrl mem_read
movel (%a7)+,%d0
rts
|
| g_rndpr --- put rounding precision in d0{1:0}
|
| valid return codes are:
| 00 - extended
| 01 - single
| 10 - double
|
| begin
| get rounding precision (cmdreg3b{6:5})
| begin
| case opclass = 011 (move out)
| get destination format - this is the also the rounding precision
|
| case opclass = 0x0
| if E3
| *case RndPr(from cmdreg3b{6:5} = 11 then RND_PREC = DBL
| *case RndPr(from cmdreg3b{6:5} = 10 then RND_PREC = SGL
| case RndPr(from cmdreg3b{6:5} = 00 | 01
| use precision from FPCR{7:6}
| case 00 then RND_PREC = EXT
| case 01 then RND_PREC = SGL
| case 10 then RND_PREC = DBL
| else E1
| use precision in FPCR{7:6}
| case 00 then RND_PREC = EXT
| case 01 then RND_PREC = SGL
| case 10 then RND_PREC = DBL
| end
|
g_rndpr:
bsr g_opcls |get opclass in d0{2:0}
cmpw #0x0003,%d0 |check for opclass 011
bnes op_0x0
 
|
| For move out instructions (opclass 011) the destination format
| is the same as the rounding precision. Pass results from g_dfmtou.
|
bsr g_dfmtou
rts
op_0x0:
btstb #E3,E_BYTE(%a6)
beql unf_e1_exc |branch to e1 underflow
unf_e3_exc:
movel CMDREG3B(%a6),%d0 |rounding precision in d0{10:9}
bfextu %d0{#9:#2},%d0 |move the rounding prec bits to d0{1:0}
cmpil #0x2,%d0
beql unff_sgl |force precision is single
cmpil #0x3,%d0 |force precision is double
beql unff_dbl
movew CMDREG3B(%a6),%d0 |get the command word again
andil #0x7f,%d0 |clear all except operation
cmpil #0x33,%d0
beql unf_fsgl |fsglmul or fsgldiv
cmpil #0x30,%d0
beql unf_fsgl |fsgldiv or fsglmul
bra unf_fpcr
unf_e1_exc:
movel CMDREG1B(%a6),%d0 |get 32 bits off the stack, 1st 16 bits
| ;are the command word
andil #0x00440000,%d0 |clear all bits except bits 6 and 2
cmpil #0x00400000,%d0
beql unff_sgl |force single
cmpil #0x00440000,%d0 |force double
beql unff_dbl
movel CMDREG1B(%a6),%d0 |get the command word again
andil #0x007f0000,%d0 |clear all bits except the operation
cmpil #0x00270000,%d0
beql unf_fsgl |fsglmul
cmpil #0x00240000,%d0
beql unf_fsgl |fsgldiv
bra unf_fpcr
 
|
| Convert to return format. The values from cmdreg3b and the return
| values are:
| cmdreg3b return precision
| -------- ------ ---------
| 00,01 0 ext
| 10 1 sgl
| 11 2 dbl
| Force single
|
unff_sgl:
movel #1,%d0 |return 1
rts
|
| Force double
|
unff_dbl:
movel #2,%d0 |return 2
rts
|
| Force extended
|
unf_fsgl:
movel #0,%d0
rts
|
| Get rounding precision set in FPCR{7:6}.
|
unf_fpcr:
movel USER_FPCR(%a6),%d0 |rounding precision bits in d0{7:6}
bfextu %d0{#24:#2},%d0 |move the rounding prec bits to d0{1:0}
rts
|
| g_opcls --- put opclass in d0{2:0}
|
g_opcls:
btstb #E3,E_BYTE(%a6)
beqs opc_1b |if set, go to cmdreg1b
opc_3b:
clrl %d0 |if E3, only opclass 0x0 is possible
rts
opc_1b:
movel CMDREG1B(%a6),%d0
bfextu %d0{#0:#3},%d0 |shift opclass bits d0{31:29} to d0{2:0}
rts
|
| g_dfmtou --- put destination format in d0{1:0}
|
| If E1, the format is from cmdreg1b{12:10}
| If E3, the format is extended.
|
| Dest. Fmt.
| extended 010 -> 00
| single 001 -> 01
| double 101 -> 10
|
g_dfmtou:
btstb #E3,E_BYTE(%a6)
beqs op011
clrl %d0 |if E1, size is always ext
rts
op011:
movel CMDREG1B(%a6),%d0
bfextu %d0{#3:#3},%d0 |dest fmt from cmdreg1b{12:10}
cmpb #1,%d0 |check for single
bnes not_sgl
movel #1,%d0
rts
not_sgl:
cmpb #5,%d0 |check for double
bnes not_dbl
movel #2,%d0
rts
not_dbl:
clrl %d0 |must be extended
rts
 
|
|
| Final result table for unf_sub. Note that the negative counterparts
| are unnecessary as unf_sub always returns the sign separately from
| the exponent.
| ;+zero
EXT_PZRO: .long 0x00000000,0x00000000,0x00000000,0x00000000
| ;+zero
SGL_PZRO: .long 0x3f810000,0x00000000,0x00000000,0x00000000
| ;+zero
DBL_PZRO: .long 0x3c010000,0x00000000,0x00000000,0x00000000
| ;smallest +ext denorm
EXT_PSML: .long 0x00000000,0x00000000,0x00000001,0x00000000
| ;smallest +sgl denorm
SGL_PSML: .long 0x3f810000,0x00000100,0x00000000,0x00000000
| ;smallest +dbl denorm
DBL_PSML: .long 0x3c010000,0x00000000,0x00000800,0x00000000
|
| UNF_SUB --- underflow result calculation
|
| Input:
| d0 contains round precision
| a0 points to input operand in the internal extended format
|
| Output:
| a0 points to correct internal extended precision result.
|
 
tblunf:
.long uEXT_RN
.long uEXT_RZ
.long uEXT_RM
.long uEXT_RP
.long uSGL_RN
.long uSGL_RZ
.long uSGL_RM
.long uSGL_RP
.long uDBL_RN
.long uDBL_RZ
.long uDBL_RM
.long uDBL_RP
.long uDBL_RN
.long uDBL_RZ
.long uDBL_RM
.long uDBL_RP
 
.global unf_sub
unf_sub:
lsll #2,%d0 |move round precision to d0{3:2}
bfextu FPCR_MODE(%a6){#2:#2},%d1 |set round mode
orl %d1,%d0 |index is fmt:mode in d0{3:0}
leal tblunf,%a1 |load a1 with table address
movel %a1@(%d0:l:4),%a1 |use d0 as index to the table
jmp (%a1) |go to the correct routine
|
|case DEST_FMT = EXT
|
uEXT_RN:
leal EXT_PZRO,%a1 |answer is +/- zero
bsetb #z_bit,FPSR_CC(%a6)
bra uset_sign |now go set the sign
uEXT_RZ:
leal EXT_PZRO,%a1 |answer is +/- zero
bsetb #z_bit,FPSR_CC(%a6)
bra uset_sign |now go set the sign
uEXT_RM:
tstb LOCAL_SGN(%a0) |if negative underflow
beqs ue_rm_pos
ue_rm_neg:
leal EXT_PSML,%a1 |answer is negative smallest denorm
bsetb #neg_bit,FPSR_CC(%a6)
bra end_unfr
ue_rm_pos:
leal EXT_PZRO,%a1 |answer is positive zero
bsetb #z_bit,FPSR_CC(%a6)
bra end_unfr
uEXT_RP:
tstb LOCAL_SGN(%a0) |if negative underflow
beqs ue_rp_pos
ue_rp_neg:
leal EXT_PZRO,%a1 |answer is negative zero
oril #negz_mask,USER_FPSR(%a6)
bra end_unfr
ue_rp_pos:
leal EXT_PSML,%a1 |answer is positive smallest denorm
bra end_unfr
|
|case DEST_FMT = DBL
|
uDBL_RN:
leal DBL_PZRO,%a1 |answer is +/- zero
bsetb #z_bit,FPSR_CC(%a6)
bra uset_sign
uDBL_RZ:
leal DBL_PZRO,%a1 |answer is +/- zero
bsetb #z_bit,FPSR_CC(%a6)
bra uset_sign |now go set the sign
uDBL_RM:
tstb LOCAL_SGN(%a0) |if negative overflow
beqs ud_rm_pos
ud_rm_neg:
leal DBL_PSML,%a1 |answer is smallest denormalized negative
bsetb #neg_bit,FPSR_CC(%a6)
bra end_unfr
ud_rm_pos:
leal DBL_PZRO,%a1 |answer is positive zero
bsetb #z_bit,FPSR_CC(%a6)
bra end_unfr
uDBL_RP:
tstb LOCAL_SGN(%a0) |if negative overflow
beqs ud_rp_pos
ud_rp_neg:
leal DBL_PZRO,%a1 |answer is negative zero
oril #negz_mask,USER_FPSR(%a6)
bra end_unfr
ud_rp_pos:
leal DBL_PSML,%a1 |answer is smallest denormalized negative
bra end_unfr
|
|case DEST_FMT = SGL
|
uSGL_RN:
leal SGL_PZRO,%a1 |answer is +/- zero
bsetb #z_bit,FPSR_CC(%a6)
bras uset_sign
uSGL_RZ:
leal SGL_PZRO,%a1 |answer is +/- zero
bsetb #z_bit,FPSR_CC(%a6)
bras uset_sign
uSGL_RM:
tstb LOCAL_SGN(%a0) |if negative overflow
beqs us_rm_pos
us_rm_neg:
leal SGL_PSML,%a1 |answer is smallest denormalized negative
bsetb #neg_bit,FPSR_CC(%a6)
bras end_unfr
us_rm_pos:
leal SGL_PZRO,%a1 |answer is positive zero
bsetb #z_bit,FPSR_CC(%a6)
bras end_unfr
uSGL_RP:
tstb LOCAL_SGN(%a0) |if negative overflow
beqs us_rp_pos
us_rp_neg:
leal SGL_PZRO,%a1 |answer is negative zero
oril #negz_mask,USER_FPSR(%a6)
bras end_unfr
us_rp_pos:
leal SGL_PSML,%a1 |answer is smallest denormalized positive
bras end_unfr
 
uset_sign:
tstb LOCAL_SGN(%a0) |if negative overflow
beqs end_unfr
uneg_sign:
bsetb #neg_bit,FPSR_CC(%a6)
 
end_unfr:
movew LOCAL_EX(%a1),LOCAL_EX(%a0) |be careful not to overwrite sign
movel LOCAL_HI(%a1),LOCAL_HI(%a0)
movel LOCAL_LO(%a1),LOCAL_LO(%a0)
rts
|
| reg_dest --- write byte, word, or long data to Dn
|
|
| Input:
| L_SCR1: Data
| d1: data size and dest register number formatted as:
|
| 32 5 4 3 2 1 0
| -----------------------------------------------
| | 0 | Size | Dest Reg # |
| -----------------------------------------------
|
| Size is:
| 0 - Byte
| 1 - Word
| 2 - Long/Single
|
pregdst:
.long byte_d0
.long byte_d1
.long byte_d2
.long byte_d3
.long byte_d4
.long byte_d5
.long byte_d6
.long byte_d7
.long word_d0
.long word_d1
.long word_d2
.long word_d3
.long word_d4
.long word_d5
.long word_d6
.long word_d7
.long long_d0
.long long_d1
.long long_d2
.long long_d3
.long long_d4
.long long_d5
.long long_d6
.long long_d7
 
reg_dest:
leal pregdst,%a0
movel %a0@(%d1:l:4),%a0
jmp (%a0)
 
byte_d0:
moveb L_SCR1(%a6),USER_D0+3(%a6)
rts
byte_d1:
moveb L_SCR1(%a6),USER_D1+3(%a6)
rts
byte_d2:
moveb L_SCR1(%a6),%d2
rts
byte_d3:
moveb L_SCR1(%a6),%d3
rts
byte_d4:
moveb L_SCR1(%a6),%d4
rts
byte_d5:
moveb L_SCR1(%a6),%d5
rts
byte_d6:
moveb L_SCR1(%a6),%d6
rts
byte_d7:
moveb L_SCR1(%a6),%d7
rts
word_d0:
movew L_SCR1(%a6),USER_D0+2(%a6)
rts
word_d1:
movew L_SCR1(%a6),USER_D1+2(%a6)
rts
word_d2:
movew L_SCR1(%a6),%d2
rts
word_d3:
movew L_SCR1(%a6),%d3
rts
word_d4:
movew L_SCR1(%a6),%d4
rts
word_d5:
movew L_SCR1(%a6),%d5
rts
word_d6:
movew L_SCR1(%a6),%d6
rts
word_d7:
movew L_SCR1(%a6),%d7
rts
long_d0:
movel L_SCR1(%a6),USER_D0(%a6)
rts
long_d1:
movel L_SCR1(%a6),USER_D1(%a6)
rts
long_d2:
movel L_SCR1(%a6),%d2
rts
long_d3:
movel L_SCR1(%a6),%d3
rts
long_d4:
movel L_SCR1(%a6),%d4
rts
long_d5:
movel L_SCR1(%a6),%d5
rts
long_d6:
movel L_SCR1(%a6),%d6
rts
long_d7:
movel L_SCR1(%a6),%d7
rts
|end
/do_func.S
0,0 → 1,559
|
| do_func.sa 3.4 2/18/91
|
| Do_func performs the unimplemented operation. The operation
| to be performed is determined from the lower 7 bits of the
| extension word (except in the case of fmovecr and fsincos).
| The opcode and tag bits form an index into a jump table in
| tbldo.sa. Cases of zero, infinity and NaN are handled in
| do_func by forcing the default result. Normalized and
| denormalized (there are no unnormalized numbers at this
| point) are passed onto the emulation code.
|
| CMDREG1B and STAG are extracted from the fsave frame
| and combined to form the table index. The function called
| will start with a0 pointing to the ETEMP operand. Dyadic
| functions can find FPTEMP at -12(a0).
|
| Called functions return their result in fp0. Sincos returns
| sin(x) in fp0 and cos(x) in fp1.
|
 
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
DO_FUNC: |idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
|xref t_dz2
|xref t_operr
|xref t_inx2
|xref t_resdnrm
|xref dst_nan
|xref src_nan
|xref nrm_set
|xref sto_cos
 
|xref tblpre
|xref slognp1,slogn,slog10,slog2
|xref slognd,slog10d,slog2d
|xref smod,srem
|xref sscale
|xref smovcr
 
PONE: .long 0x3fff0000,0x80000000,0x00000000 |+1
MONE: .long 0xbfff0000,0x80000000,0x00000000 |-1
PZERO: .long 0x00000000,0x00000000,0x00000000 |+0
MZERO: .long 0x80000000,0x00000000,0x00000000 |-0
PINF: .long 0x7fff0000,0x00000000,0x00000000 |+inf
MINF: .long 0xffff0000,0x00000000,0x00000000 |-inf
QNAN: .long 0x7fff0000,0xffffffff,0xffffffff |non-signaling nan
PPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235 |+PI/2
MPIBY2: .long 0xbFFF0000,0xC90FDAA2,0x2168C235 |-PI/2
 
.global do_func
do_func:
clrb CU_ONLY(%a6)
|
| Check for fmovecr. It does not follow the format of fp gen
| unimplemented instructions. The test is on the upper 6 bits;
| if they are $17, the inst is fmovecr. Call entry smovcr
| directly.
|
bfextu CMDREG1B(%a6){#0:#6},%d0 |get opclass and src fields
cmpil #0x17,%d0 |if op class and size fields are $17,
| ;it is FMOVECR; if not, continue
bnes not_fmovecr
jmp smovcr |fmovecr; jmp directly to emulation
 
not_fmovecr:
movew CMDREG1B(%a6),%d0
andl #0x7F,%d0
cmpil #0x38,%d0 |if the extension is >= $38,
bges serror |it is illegal
bfextu STAG(%a6){#0:#3},%d1
lsll #3,%d0 |make room for STAG
addl %d1,%d0 |combine for final index into table
leal tblpre,%a1 |start of monster jump table
movel (%a1,%d0.w*4),%a1 |real target address
leal ETEMP(%a6),%a0 |a0 is pointer to src op
movel USER_FPCR(%a6),%d1
andl #0xFF,%d1 | discard all but rounding mode/prec
fmovel #0,%fpcr
jmp (%a1)
|
| ERROR
|
.global serror
serror:
st STORE_FLG(%a6)
rts
|
| These routines load forced values into fp0. They are called
| by index into tbldo.
|
| Load a signed zero to fp0 and set inex2/ainex
|
.global snzrinx
snzrinx:
btstb #sign_bit,LOCAL_EX(%a0) |get sign of source operand
bnes ld_mzinx |if negative, branch
bsr ld_pzero |bsr so we can return and set inx
bra t_inx2 |now, set the inx for the next inst
ld_mzinx:
bsr ld_mzero |if neg, load neg zero, return here
bra t_inx2 |now, set the inx for the next inst
|
| Load a signed zero to fp0; do not set inex2/ainex
|
.global szero
szero:
btstb #sign_bit,LOCAL_EX(%a0) |get sign of source operand
bne ld_mzero |if neg, load neg zero
bra ld_pzero |load positive zero
|
| Load a signed infinity to fp0; do not set inex2/ainex
|
.global sinf
sinf:
btstb #sign_bit,LOCAL_EX(%a0) |get sign of source operand
bne ld_minf |if negative branch
bra ld_pinf
|
| Load a signed one to fp0; do not set inex2/ainex
|
.global sone
sone:
btstb #sign_bit,LOCAL_EX(%a0) |check sign of source
bne ld_mone
bra ld_pone
|
| Load a signed pi/2 to fp0; do not set inex2/ainex
|
.global spi_2
spi_2:
btstb #sign_bit,LOCAL_EX(%a0) |check sign of source
bne ld_mpi2
bra ld_ppi2
|
| Load either a +0 or +inf for plus/minus operand
|
.global szr_inf
szr_inf:
btstb #sign_bit,LOCAL_EX(%a0) |check sign of source
bne ld_pzero
bra ld_pinf
|
| Result is either an operr or +inf for plus/minus operand
| [Used by slogn, slognp1, slog10, and slog2]
|
.global sopr_inf
sopr_inf:
btstb #sign_bit,LOCAL_EX(%a0) |check sign of source
bne t_operr
bra ld_pinf
|
| FLOGNP1
|
.global sslognp1
sslognp1:
fmovemx (%a0),%fp0-%fp0
fcmpb #-1,%fp0
fbgt slognp1
fbeq t_dz2 |if = -1, divide by zero exception
fmovel #0,%FPSR |clr N flag
bra t_operr |take care of operands < -1
|
| FETOXM1
|
.global setoxm1i
setoxm1i:
btstb #sign_bit,LOCAL_EX(%a0) |check sign of source
bne ld_mone
bra ld_pinf
|
| FLOGN
|
| Test for 1.0 as an input argument, returning +zero. Also check
| the sign and return operr if negative.
|
.global sslogn
sslogn:
btstb #sign_bit,LOCAL_EX(%a0)
bne t_operr |take care of operands < 0
cmpiw #0x3fff,LOCAL_EX(%a0) |test for 1.0 input
bne slogn
cmpil #0x80000000,LOCAL_HI(%a0)
bne slogn
tstl LOCAL_LO(%a0)
bne slogn
fmovex PZERO,%fp0
rts
 
.global sslognd
sslognd:
btstb #sign_bit,LOCAL_EX(%a0)
beq slognd
bra t_operr |take care of operands < 0
 
|
| FLOG10
|
.global sslog10
sslog10:
btstb #sign_bit,LOCAL_EX(%a0)
bne t_operr |take care of operands < 0
cmpiw #0x3fff,LOCAL_EX(%a0) |test for 1.0 input
bne slog10
cmpil #0x80000000,LOCAL_HI(%a0)
bne slog10
tstl LOCAL_LO(%a0)
bne slog10
fmovex PZERO,%fp0
rts
 
.global sslog10d
sslog10d:
btstb #sign_bit,LOCAL_EX(%a0)
beq slog10d
bra t_operr |take care of operands < 0
 
|
| FLOG2
|
.global sslog2
sslog2:
btstb #sign_bit,LOCAL_EX(%a0)
bne t_operr |take care of operands < 0
cmpiw #0x3fff,LOCAL_EX(%a0) |test for 1.0 input
bne slog2
cmpil #0x80000000,LOCAL_HI(%a0)
bne slog2
tstl LOCAL_LO(%a0)
bne slog2
fmovex PZERO,%fp0
rts
 
.global sslog2d
sslog2d:
btstb #sign_bit,LOCAL_EX(%a0)
beq slog2d
bra t_operr |take care of operands < 0
 
|
| FMOD
|
pmodt:
| ;$21 fmod
| ;dtag,stag
.long smod | 00,00 norm,norm = normal
.long smod_oper | 00,01 norm,zero = nan with operr
.long smod_fpn | 00,10 norm,inf = fpn
.long smod_snan | 00,11 norm,nan = nan
.long smod_zro | 01,00 zero,norm = +-zero
.long smod_oper | 01,01 zero,zero = nan with operr
.long smod_zro | 01,10 zero,inf = +-zero
.long smod_snan | 01,11 zero,nan = nan
.long smod_oper | 10,00 inf,norm = nan with operr
.long smod_oper | 10,01 inf,zero = nan with operr
.long smod_oper | 10,10 inf,inf = nan with operr
.long smod_snan | 10,11 inf,nan = nan
.long smod_dnan | 11,00 nan,norm = nan
.long smod_dnan | 11,01 nan,zero = nan
.long smod_dnan | 11,10 nan,inf = nan
.long smod_dnan | 11,11 nan,nan = nan
 
.global pmod
pmod:
clrb FPSR_QBYTE(%a6) | clear quotient field
bfextu STAG(%a6){#0:#3},%d0 |stag = d0
bfextu DTAG(%a6){#0:#3},%d1 |dtag = d1
 
|
| Alias extended denorms to norms for the jump table.
|
bclrl #2,%d0
bclrl #2,%d1
 
lslb #2,%d1
orb %d0,%d1 |d1{3:2} = dtag, d1{1:0} = stag
| ;Tag values:
| ;00 = norm or denorm
| ;01 = zero
| ;10 = inf
| ;11 = nan
lea pmodt,%a1
movel (%a1,%d1.w*4),%a1
jmp (%a1)
 
smod_snan:
bra src_nan
smod_dnan:
bra dst_nan
smod_oper:
bra t_operr
smod_zro:
moveb ETEMP(%a6),%d1 |get sign of src op
moveb FPTEMP(%a6),%d0 |get sign of dst op
eorb %d0,%d1 |get exor of sign bits
btstl #7,%d1 |test for sign
beqs smod_zsn |if clr, do not set sign big
bsetb #q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit
smod_zsn:
btstl #7,%d0 |test if + or -
beq ld_pzero |if pos then load +0
bra ld_mzero |else neg load -0
smod_fpn:
moveb ETEMP(%a6),%d1 |get sign of src op
moveb FPTEMP(%a6),%d0 |get sign of dst op
eorb %d0,%d1 |get exor of sign bits
btstl #7,%d1 |test for sign
beqs smod_fsn |if clr, do not set sign big
bsetb #q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit
smod_fsn:
tstb DTAG(%a6) |filter out denormal destination case
bpls smod_nrm |
leal FPTEMP(%a6),%a0 |a0<- addr(FPTEMP)
bra t_resdnrm |force UNFL(but exact) result
smod_nrm:
fmovel USER_FPCR(%a6),%fpcr |use user's rmode and precision
fmovex FPTEMP(%a6),%fp0 |return dest to fp0
rts
|
| FREM
|
premt:
| ;$25 frem
| ;dtag,stag
.long srem | 00,00 norm,norm = normal
.long srem_oper | 00,01 norm,zero = nan with operr
.long srem_fpn | 00,10 norm,inf = fpn
.long srem_snan | 00,11 norm,nan = nan
.long srem_zro | 01,00 zero,norm = +-zero
.long srem_oper | 01,01 zero,zero = nan with operr
.long srem_zro | 01,10 zero,inf = +-zero
.long srem_snan | 01,11 zero,nan = nan
.long srem_oper | 10,00 inf,norm = nan with operr
.long srem_oper | 10,01 inf,zero = nan with operr
.long srem_oper | 10,10 inf,inf = nan with operr
.long srem_snan | 10,11 inf,nan = nan
.long srem_dnan | 11,00 nan,norm = nan
.long srem_dnan | 11,01 nan,zero = nan
.long srem_dnan | 11,10 nan,inf = nan
.long srem_dnan | 11,11 nan,nan = nan
 
.global prem
prem:
clrb FPSR_QBYTE(%a6) |clear quotient field
bfextu STAG(%a6){#0:#3},%d0 |stag = d0
bfextu DTAG(%a6){#0:#3},%d1 |dtag = d1
|
| Alias extended denorms to norms for the jump table.
|
bclr #2,%d0
bclr #2,%d1
 
lslb #2,%d1
orb %d0,%d1 |d1{3:2} = dtag, d1{1:0} = stag
| ;Tag values:
| ;00 = norm or denorm
| ;01 = zero
| ;10 = inf
| ;11 = nan
lea premt,%a1
movel (%a1,%d1.w*4),%a1
jmp (%a1)
srem_snan:
bra src_nan
srem_dnan:
bra dst_nan
srem_oper:
bra t_operr
srem_zro:
moveb ETEMP(%a6),%d1 |get sign of src op
moveb FPTEMP(%a6),%d0 |get sign of dst op
eorb %d0,%d1 |get exor of sign bits
btstl #7,%d1 |test for sign
beqs srem_zsn |if clr, do not set sign big
bsetb #q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit
srem_zsn:
btstl #7,%d0 |test if + or -
beq ld_pzero |if pos then load +0
bra ld_mzero |else neg load -0
srem_fpn:
moveb ETEMP(%a6),%d1 |get sign of src op
moveb FPTEMP(%a6),%d0 |get sign of dst op
eorb %d0,%d1 |get exor of sign bits
btstl #7,%d1 |test for sign
beqs srem_fsn |if clr, do not set sign big
bsetb #q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit
srem_fsn:
tstb DTAG(%a6) |filter out denormal destination case
bpls srem_nrm |
leal FPTEMP(%a6),%a0 |a0<- addr(FPTEMP)
bra t_resdnrm |force UNFL(but exact) result
srem_nrm:
fmovel USER_FPCR(%a6),%fpcr |use user's rmode and precision
fmovex FPTEMP(%a6),%fp0 |return dest to fp0
rts
|
| FSCALE
|
pscalet:
| ;$26 fscale
| ;dtag,stag
.long sscale | 00,00 norm,norm = result
.long sscale | 00,01 norm,zero = fpn
.long scl_opr | 00,10 norm,inf = nan with operr
.long scl_snan | 00,11 norm,nan = nan
.long scl_zro | 01,00 zero,norm = +-zero
.long scl_zro | 01,01 zero,zero = +-zero
.long scl_opr | 01,10 zero,inf = nan with operr
.long scl_snan | 01,11 zero,nan = nan
.long scl_inf | 10,00 inf,norm = +-inf
.long scl_inf | 10,01 inf,zero = +-inf
.long scl_opr | 10,10 inf,inf = nan with operr
.long scl_snan | 10,11 inf,nan = nan
.long scl_dnan | 11,00 nan,norm = nan
.long scl_dnan | 11,01 nan,zero = nan
.long scl_dnan | 11,10 nan,inf = nan
.long scl_dnan | 11,11 nan,nan = nan
 
.global pscale
pscale:
bfextu STAG(%a6){#0:#3},%d0 |stag in d0
bfextu DTAG(%a6){#0:#3},%d1 |dtag in d1
bclrl #2,%d0 |alias denorm into norm
bclrl #2,%d1 |alias denorm into norm
lslb #2,%d1
orb %d0,%d1 |d1{4:2} = dtag, d1{1:0} = stag
| ;dtag values stag values:
| ;000 = norm 00 = norm
| ;001 = zero 01 = zero
| ;010 = inf 10 = inf
| ;011 = nan 11 = nan
| ;100 = dnrm
|
|
leal pscalet,%a1 |load start of jump table
movel (%a1,%d1.w*4),%a1 |load a1 with label depending on tag
jmp (%a1) |go to the routine
 
scl_opr:
bra t_operr
 
scl_dnan:
bra dst_nan
 
scl_zro:
btstb #sign_bit,FPTEMP_EX(%a6) |test if + or -
beq ld_pzero |if pos then load +0
bra ld_mzero |if neg then load -0
scl_inf:
btstb #sign_bit,FPTEMP_EX(%a6) |test if + or -
beq ld_pinf |if pos then load +inf
bra ld_minf |else neg load -inf
scl_snan:
bra src_nan
|
| FSINCOS
|
.global ssincosz
ssincosz:
btstb #sign_bit,ETEMP(%a6) |get sign
beqs sincosp
fmovex MZERO,%fp0
bras sincoscom
sincosp:
fmovex PZERO,%fp0
sincoscom:
fmovemx PONE,%fp1-%fp1 |do not allow FPSR to be affected
bra sto_cos |store cosine result
 
.global ssincosi
ssincosi:
fmovex QNAN,%fp1 |load NAN
bsr sto_cos |store cosine result
fmovex QNAN,%fp0 |load NAN
bra t_operr
 
.global ssincosnan
ssincosnan:
movel ETEMP_EX(%a6),FP_SCR1(%a6)
movel ETEMP_HI(%a6),FP_SCR1+4(%a6)
movel ETEMP_LO(%a6),FP_SCR1+8(%a6)
bsetb #signan_bit,FP_SCR1+4(%a6)
fmovemx FP_SCR1(%a6),%fp1-%fp1
bsr sto_cos
bra src_nan
|
| This code forces default values for the zero, inf, and nan cases
| in the transcendentals code. The CC bits must be set in the
| stacked FPSR to be correctly reported.
|
|**Returns +PI/2
.global ld_ppi2
ld_ppi2:
fmovex PPIBY2,%fp0 |load +pi/2
bra t_inx2 |set inex2 exc
 
|**Returns -PI/2
.global ld_mpi2
ld_mpi2:
fmovex MPIBY2,%fp0 |load -pi/2
orl #neg_mask,USER_FPSR(%a6) |set N bit
bra t_inx2 |set inex2 exc
 
|**Returns +inf
.global ld_pinf
ld_pinf:
fmovex PINF,%fp0 |load +inf
orl #inf_mask,USER_FPSR(%a6) |set I bit
rts
 
|**Returns -inf
.global ld_minf
ld_minf:
fmovex MINF,%fp0 |load -inf
orl #neg_mask+inf_mask,USER_FPSR(%a6) |set N and I bits
rts
 
|**Returns +1
.global ld_pone
ld_pone:
fmovex PONE,%fp0 |load +1
rts
 
|**Returns -1
.global ld_mone
ld_mone:
fmovex MONE,%fp0 |load -1
orl #neg_mask,USER_FPSR(%a6) |set N bit
rts
 
|**Returns +0
.global ld_pzero
ld_pzero:
fmovex PZERO,%fp0 |load +0
orl #z_mask,USER_FPSR(%a6) |set Z bit
rts
 
|**Returns -0
.global ld_mzero
ld_mzero:
fmovex MZERO,%fp0 |load -0
orl #neg_mask+z_mask,USER_FPSR(%a6) |set N and Z bits
rts
 
|end
/get_op.S
0,0 → 1,676
|
| get_op.sa 3.6 5/19/92
|
| get_op.sa 3.5 4/26/91
|
| Description: This routine is called by the unsupported format/data
| type exception handler ('unsupp' - vector 55) and the unimplemented
| instruction exception handler ('unimp' - vector 11). 'get_op'
| determines the opclass (0, 2, or 3) and branches to the
| opclass handler routine. See 68881/2 User's Manual table 4-11
| for a description of the opclasses.
|
| For UNSUPPORTED data/format (exception vector 55) and for
| UNIMPLEMENTED instructions (exception vector 11) the following
| applies:
|
| - For unnormalized numbers (opclass 0, 2, or 3) the
| number(s) is normalized and the operand type tag is updated.
|
| - For a packed number (opclass 2) the number is unpacked and the
| operand type tag is updated.
|
| - For denormalized numbers (opclass 0 or 2) the number(s) is not
| changed but passed to the next module. The next module for
| unimp is do_func, the next module for unsupp is res_func.
|
| For UNSUPPORTED data/format (exception vector 55) only the
| following applies:
|
| - If there is a move out with a packed number (opclass 3) the
| number is packed and written to user memory. For the other
| opclasses the number(s) are written back to the fsave stack
| and the instruction is then restored back into the '040. The
| '040 is then able to complete the instruction.
|
| For example:
| fadd.x fpm,fpn where the fpm contains an unnormalized number.
| The '040 takes an unsupported data trap and gets to this
| routine. The number is normalized, put back on the stack and
| then an frestore is done to restore the instruction back into
| the '040. The '040 then re-executes the fadd.x fpm,fpn with
| a normalized number in the source and the instruction is
| successful.
|
| Next consider if in the process of normalizing the un-
| normalized number it becomes a denormalized number. The
| routine which converts the unnorm to a norm (called mk_norm)
| detects this and tags the number as a denorm. The routine
| res_func sees the denorm tag and converts the denorm to a
| norm. The instruction is then restored back into the '040
| which re_executes the instruction.
|
|
| Copyright (C) Motorola, Inc. 1990
| All Rights Reserved
|
| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
| The copyright notice above does not evidence any
| actual or intended publication of such source code.
 
GET_OP: |idnt 2,1 | Motorola 040 Floating Point Software Package
 
|section 8
 
.include "fpsp.h"
 
.global PIRN,PIRZRM,PIRP
.global SMALRN,SMALRZRM,SMALRP
.global BIGRN,BIGRZRM,BIGRP
 
PIRN:
.long 0x40000000,0xc90fdaa2,0x2168c235 |pi
PIRZRM:
.long 0x40000000,0xc90fdaa2,0x2168c234 |pi
PIRP:
.long 0x40000000,0xc90fdaa2,0x2168c235 |pi
 
|round to nearest
SMALRN:
.long 0x3ffd0000,0x9a209a84,0xfbcff798 |log10(2)
.long 0x40000000,0xadf85458,0xa2bb4a9a |e
.long 0x3fff0000,0xb8aa3b29,0x5c17f0bc |log2(e)
.long 0x3ffd0000,0xde5bd8a9,0x37287195 |log10(e)
.long 0x00000000,0x00000000,0x00000000 |0.0
| round to zero;round to negative infinity
SMALRZRM:
.long 0x3ffd0000,0x9a209a84,0xfbcff798 |log10(2)
.long 0x40000000,0xadf85458,0xa2bb4a9a |e
.long 0x3fff0000,0xb8aa3b29,0x5c17f0bb |log2(e)
.long 0x3ffd0000,0xde5bd8a9,0x37287195 |log10(e)
.long 0x00000000,0x00000000,0x00000000 |0.0
| round to positive infinity
SMALRP:
.long 0x3ffd0000,0x9a209a84,0xfbcff799 |log10(2)
.long 0x40000000,0xadf85458,0xa2bb4a9b |e
.long 0x3fff0000,0xb8aa3b29,0x5c17f0bc |log2(e)
.long 0x3ffd0000,0xde5bd8a9,0x37287195 |log10(e)
.long 0x00000000,0x00000000,0x00000000 |0.0
 
|round to nearest
BIGRN:
.long 0x3ffe0000,0xb17217f7,0xd1cf79ac |ln(2)
.long 0x40000000,0x935d8ddd,0xaaa8ac17 |ln(10)
.long 0x3fff0000,0x80000000,0x00000000 |10 ^ 0
 
.global PTENRN
PTENRN:
.long 0x40020000,0xA0000000,0x00000000 |10 ^ 1
.long 0x40050000,0xC8000000,0x00000000 |10 ^ 2
.long 0x400C0000,0x9C400000,0x00000000 |10 ^ 4
.long 0x40190000,0xBEBC2000,0x00000000 |10 ^ 8
.long 0x40340000,0x8E1BC9BF,0x04000000 |10 ^ 16
.long 0x40690000,0x9DC5ADA8,0x2B70B59E |10 ^ 32
.long 0x40D30000,0xC2781F49,0xFFCFA6D5 |10 ^ 64
.long 0x41A80000,0x93BA47C9,0x80E98CE0 |10 ^ 128
.long 0x43510000,0xAA7EEBFB,0x9DF9DE8E |10 ^ 256
.long 0x46A30000,0xE319A0AE,0xA60E91C7 |10 ^ 512
.long 0x4D480000,0xC9767586,0x81750C17 |10 ^ 1024
.long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 |10 ^ 2048
.long 0x75250000,0xC4605202,0x8A20979B |10 ^ 4096
|round to minus infinity
BIGRZRM:
.long 0x3ffe0000,0xb17217f7,0xd1cf79ab |ln(2)
.long 0x40000000,0x935d8ddd,0xaaa8ac16 |ln(10)
.long 0x3fff0000,0x80000000,0x00000000 |10 ^ 0
 
.global PTENRM
PTENRM:
.long 0x40020000,0xA0000000,0x00000000 |10 ^ 1
.long 0x40050000,0xC8000000,0x00000000 |10 ^ 2
.long 0x400C0000,0x9C400000,0x00000000 |10 ^ 4
.long 0x40190000,0xBEBC2000,0x00000000 |10 ^ 8
.long 0x40340000,0x8E1BC9BF,0x04000000 |10 ^ 16
.long 0x40690000,0x9DC5ADA8,0x2B70B59D |10 ^ 32
.long 0x40D30000,0xC2781F49,0xFFCFA6D5 |10 ^ 64
.long 0x41A80000,0x93BA47C9,0x80E98CDF |10 ^ 128
.long 0x43510000,0xAA7EEBFB,0x9DF9DE8D |10 ^ 256
.long 0x46A30000,0xE319A0AE,0xA60E91C6 |10 ^ 512
.long 0x4D480000,0xC9767586,0x81750C17 |10 ^ 1024
.long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 |10 ^ 2048
.long 0x75250000,0xC4605202,0x8A20979A |10 ^ 4096
|round to positive infinity
BIGRP:
.long 0x3ffe0000,0xb17217f7,0xd1cf79ac |ln(2)
.long 0x40000000,0x935d8ddd,0xaaa8ac17 |ln(10)
.long 0x3fff0000,0x80000000,0x00000000 |10 ^ 0
 
.global PTENRP
PTENRP:
.long 0x40020000,0xA0000000,0x00000000 |10 ^ 1
.long 0x40050000,0xC8000000,0x00000000 |10 ^ 2
.long 0x400C0000,0x9C400000,0x00000000 |10 ^ 4
.long 0x40190000,0xBEBC2000,0x00000000 |10 ^ 8
.long 0x40340000,0x8E1BC9BF,0x04000000 |10 ^ 16
.long 0x40690000,0x9DC5ADA8,0x2B70B59E |10 ^ 32
.long 0x40D30000,0xC2781F49,0xFFCFA6D6 |10 ^ 64
.long 0x41A80000,0x93BA47C9,0x80E98CE0 |10 ^ 128
.long 0x43510000,0xAA7EEBFB,0x9DF9DE8E |10 ^ 256
.long 0x46A30000,0xE319A0AE,0xA60E91C7 |10 ^ 512
.long 0x4D480000,0xC9767586,0x81750C18 |10 ^ 1024
.long 0x5A920000,0x9E8B3B5D,0xC53D5DE6 |10 ^ 2048
.long 0x75250000,0xC4605202,0x8A20979B |10 ^ 4096
 
|xref nrm_zero
|xref decbin
|xref round
 
.global get_op
.global uns_getop
.global uni_getop
get_op:
clrb DY_MO_FLG(%a6)
tstb UFLG_TMP(%a6) |test flag for unsupp/unimp state
beqs uni_getop
 
uns_getop:
btstb #direction_bit,CMDREG1B(%a6)
bne opclass3 |branch if a fmove out (any kind)
btstb #6,CMDREG1B(%a6)
beqs uns_notpacked
 
bfextu CMDREG1B(%a6){#3:#3},%d0
cmpb #3,%d0
beq pack_source |check for a packed src op, branch if so
uns_notpacked:
bsr chk_dy_mo |set the dyadic/monadic flag
tstb DY_MO_FLG(%a6)
beqs src_op_ck |if monadic, go check src op
| ;else, check dst op (fall through)
 
btstb #7,DTAG(%a6)
beqs src_op_ck |if dst op is norm, check src op
bras dst_ex_dnrm |else, handle destination unnorm/dnrm
 
uni_getop:
bfextu CMDREG1B(%a6){#0:#6},%d0 |get opclass and src fields
cmpil #0x17,%d0 |if op class and size fields are $17,
| ;it is FMOVECR; if not, continue
|
| If the instruction is fmovecr, exit get_op. It is handled
| in do_func and smovecr.sa.
|
bne not_fmovecr |handle fmovecr as an unimplemented inst
rts
 
not_fmovecr:
btstb #E1,E_BYTE(%a6) |if set, there is a packed operand
bne pack_source |check for packed src op, branch if so
 
| The following lines of are coded to optimize on normalized operands
moveb STAG(%a6),%d0
orb DTAG(%a6),%d0 |check if either of STAG/DTAG msb set
bmis dest_op_ck |if so, some op needs to be fixed
rts
 
dest_op_ck:
btstb #7,DTAG(%a6) |check for unsupported data types in
beqs src_op_ck |the destination, if not, check src op
bsr chk_dy_mo |set dyadic/monadic flag
tstb DY_MO_FLG(%a6) |
beqs src_op_ck |if monadic, check src op
|
| At this point, destination has an extended denorm or unnorm.
|
dst_ex_dnrm:
movew FPTEMP_EX(%a6),%d0 |get destination exponent
andiw #0x7fff,%d0 |mask sign, check if exp = 0000
beqs src_op_ck |if denorm then check source op.
| ;denorms are taken care of in res_func
| ;(unsupp) or do_func (unimp)
| ;else unnorm fall through
leal FPTEMP(%a6),%a0 |point a0 to dop - used in mk_norm
bsr mk_norm |go normalize - mk_norm returns:
| ;L_SCR1{7:5} = operand tag
| ; (000 = norm, 100 = denorm)
| ;L_SCR1{4} = fpte15 or ete15
| ; 0 = exp > $3fff
| ; 1 = exp <= $3fff
| ;and puts the normalized num back
| ;on the fsave stack
|
moveb L_SCR1(%a6),DTAG(%a6) |write the new tag & fpte15
| ;to the fsave stack and fall
| ;through to check source operand
|
src_op_ck:
btstb #7,STAG(%a6)
beq end_getop |check for unsupported data types on the
| ;source operand
btstb #5,STAG(%a6)
bnes src_sd_dnrm |if bit 5 set, handle sgl/dbl denorms
|
| At this point only unnorms or extended denorms are possible.
|
src_ex_dnrm:
movew ETEMP_EX(%a6),%d0 |get source exponent
andiw #0x7fff,%d0 |mask sign, check if exp = 0000
beq end_getop |if denorm then exit, denorms are
| ;handled in do_func
leal ETEMP(%a6),%a0 |point a0 to sop - used in mk_norm
bsr mk_norm |go normalize - mk_norm returns:
| ;L_SCR1{7:5} = operand tag
| ; (000 = norm, 100 = denorm)
| ;L_SCR1{4} = fpte15 or ete15
| ; 0 = exp > $3fff
| ; 1 = exp <= $3fff
| ;and puts the normalized num back
| ;on the fsave stack
|
moveb L_SCR1(%a6),STAG(%a6) |write the new tag & ete15
rts |end_getop
 
|
| At this point, only single or double denorms are possible.
| If the inst is not fmove, normalize the source. If it is,
| do nothing to the input.
|
src_sd_dnrm:
btstb #4,CMDREG1B(%a6) |differentiate between sgl/dbl denorm
bnes is_double
is_single:
movew #0x3f81,%d1 |write bias for sgl denorm
bras common |goto the common code
is_double:
movew #0x3c01,%d1 |write the bias for a dbl denorm
common:
btstb #sign_bit,ETEMP_EX(%a6) |grab sign bit of mantissa
beqs pos
bset #15,%d1 |set sign bit because it is negative
pos:
movew %d1,ETEMP_EX(%a6)
| ;put exponent on stack
 
movew CMDREG1B(%a6),%d1
andw #0xe3ff,%d1 |clear out source specifier
orw #0x0800,%d1 |set source specifier to extended prec
movew %d1,CMDREG1B(%a6) |write back to the command word in stack
| ;this is needed to fix unsupp data stack
leal ETEMP(%a6),%a0 |point a0 to sop
bsr mk_norm |convert sgl/dbl denorm to norm
moveb L_SCR1(%a6),STAG(%a6) |put tag into source tag reg - d0
rts |end_getop
|
| At this point, the source is definitely packed, whether
| instruction is dyadic or monadic is still unknown
|
pack_source:
movel FPTEMP_LO(%a6),ETEMP(%a6) |write ms part of packed
| ;number to etemp slot
bsr chk_dy_mo |set dyadic/monadic flag
bsr unpack
 
tstb DY_MO_FLG(%a6)
beqs end_getop |if monadic, exit
| ;else, fix FPTEMP
pack_dya:
bfextu CMDREG1B(%a6){#6:#3},%d0 |extract dest fp reg
movel #7,%d1
subl %d0,%d1
clrl %d0
bsetl %d1,%d0 |set up d0 as a dynamic register mask
fmovemx %d0,FPTEMP(%a6) |write to FPTEMP
 
btstb #7,DTAG(%a6) |check dest tag for unnorm or denorm
bne dst_ex_dnrm |else, handle the unnorm or ext denorm
|
| Dest is not denormalized. Check for norm, and set fpte15
| accordingly.
|
moveb DTAG(%a6),%d0
andib #0xf0,%d0 |strip to only dtag:fpte15
tstb %d0 |check for normalized value
bnes end_getop |if inf/nan/zero leave get_op
movew FPTEMP_EX(%a6),%d0
andiw #0x7fff,%d0
cmpiw #0x3fff,%d0 |check if fpte15 needs setting
bges end_getop |if >= $3fff, leave fpte15=0
orb #0x10,DTAG(%a6)
bras end_getop
 
|
| At this point, it is either an fmoveout packed, unnorm or denorm
|
opclass3:
clrb DY_MO_FLG(%a6) |set dyadic/monadic flag to monadic
bfextu CMDREG1B(%a6){#4:#2},%d0
cmpib #3,%d0
bne src_ex_dnrm |if not equal, must be unnorm or denorm
| ;else it is a packed move out
| ;exit
end_getop:
rts
 
|
| Sets the DY_MO_FLG correctly. This is used only on if it is an
| unsupported data type exception. Set if dyadic.
|
chk_dy_mo:
movew CMDREG1B(%a6),%d0
btstl #5,%d0 |testing extension command word
beqs set_mon |if bit 5 = 0 then monadic
btstl #4,%d0 |know that bit 5 = 1
beqs set_dya |if bit 4 = 0 then dyadic
andiw #0x007f,%d0 |get rid of all but extension bits {6:0}
cmpiw #0x0038,%d0 |if extension = $38 then fcmp (dyadic)
bnes set_mon
set_dya:
st DY_MO_FLG(%a6) |set the inst flag type to dyadic
rts
set_mon:
clrb DY_MO_FLG(%a6) |set the inst flag type to monadic
rts
|
| MK_NORM
|
| Normalizes unnormalized numbers, sets tag to norm or denorm, sets unfl
| exception if denorm.
|
| CASE opclass 0x0 unsupp
| mk_norm till msb set
| set tag = norm
|
| CASE opclass 0x0 unimp
| mk_norm till msb set or exp = 0
| if integer bit = 0
| tag = denorm
| else
| tag = norm
|
| CASE opclass 011 unsupp
| mk_norm till msb set or exp = 0
| if integer bit = 0
| tag = denorm
| set unfl_nmcexe = 1
| else
| tag = norm
|
| if exp <= $3fff
| set ete15 or fpte15 = 1
| else set ete15 or fpte15 = 0
 
| input:
| a0 = points to operand to be normalized
| output:
| L_SCR1{7:5} = operand tag (000 = norm, 100 = denorm)
| L_SCR1{4} = fpte15 or ete15 (0 = exp > $3fff, 1 = exp <=$3fff)
| the normalized operand is placed back on the fsave stack
mk_norm:
clrl L_SCR1(%a6)
bclrb #sign_bit,LOCAL_EX(%a0)
sne LOCAL_SGN(%a0) |transform into internal extended format
 
cmpib #0x2c,1+EXC_VEC(%a6) |check if unimp
bnes uns_data |branch if unsupp
bsr uni_inst |call if unimp (opclass 0x0)
bras reload
uns_data:
btstb #direction_bit,CMDREG1B(%a6) |check transfer direction
bnes bit_set |branch if set (opclass 011)
bsr uns_opx |call if opclass 0x0
bras reload
bit_set:
bsr uns_op3 |opclass 011
reload:
cmpw #0x3fff,LOCAL_EX(%a0) |if exp > $3fff
bgts end_mk | fpte15/ete15 already set to 0
bsetb #4,L_SCR1(%a6) |else set fpte15/ete15 to 1
| ;calling routine actually sets the
| ;value on the stack (along with the
| ;tag), since this routine doesn't
| ;know if it should set ete15 or fpte15
| ;ie, it doesn't know if this is the
| ;src op or dest op.
end_mk:
bfclr LOCAL_SGN(%a0){#0:#8}
beqs end_mk_pos
bsetb #sign_bit,LOCAL_EX(%a0) |convert back to IEEE format
end_mk_pos:
rts
|
| CASE opclass 011 unsupp
|
uns_op3:
bsr nrm_zero |normalize till msb = 1 or exp = zero
btstb #7,LOCAL_HI(%a0) |if msb = 1
bnes no_unfl |then branch
set_unfl:
orw #dnrm_tag,L_SCR1(%a6) |set denorm tag
bsetb #unfl_bit,FPSR_EXCEPT(%a6) |set unfl exception bit
no_unfl:
rts
|
| CASE opclass 0x0 unsupp
|
uns_opx:
bsr nrm_zero |normalize the number
btstb #7,LOCAL_HI(%a0) |check if integer bit (j-bit) is set
beqs uns_den |if clear then now have a denorm
uns_nrm:
orb #norm_tag,L_SCR1(%a6) |set tag to norm
rts
uns_den:
orb #dnrm_tag,L_SCR1(%a6) |set tag to denorm
rts
|
| CASE opclass 0x0 unimp
|
uni_inst:
bsr nrm_zero
btstb #7,LOCAL_HI(%a0) |check if integer bit (j-bit) is set
beqs uni_den |if clear then now have a denorm
uni_nrm:
orb #norm_tag,L_SCR1(%a6) |set tag to norm
rts
uni_den:
orb #dnrm_tag,L_SCR1(%a6) |set tag to denorm
rts
 
|
| Decimal to binary conversion
|
| Special cases of inf and NaNs are completed outside of decbin.
| If the input is an snan, the snan bit is not set.
|
| input:
| ETEMP(a6) - points to packed decimal string in memory
| output:
| fp0 - contains packed string converted to extended precision
| ETEMP - same as fp0
unpack:
movew CMDREG1B(%a6),%d0 |examine command word, looking for fmove's
andw #0x3b,%d0
beq move_unpack |special handling for fmove: must set FPSR_CC
 
movew ETEMP(%a6),%d0 |get word with inf information
bfextu %d0{#20:#12},%d1 |get exponent into d1
cmpiw #0x0fff,%d1 |test for inf or NaN
bnes try_zero |if not equal, it is not special
bfextu %d0{#17:#3},%d1 |get SE and y bits into d1
cmpiw #7,%d1 |SE and y bits must be on for special
bnes try_zero |if not on, it is not special
|input is of the special cases of inf and NaN
tstl ETEMP_HI(%a6) |check ms mantissa
bnes fix_nan |if non-zero, it is a NaN
tstl ETEMP_LO(%a6) |check ls mantissa
bnes fix_nan |if non-zero, it is a NaN
bra finish |special already on stack
fix_nan:
btstb #signan_bit,ETEMP_HI(%a6) |test for snan
bne finish
orl #snaniop_mask,USER_FPSR(%a6) |always set snan if it is so
bra finish
try_zero:
movew ETEMP_EX+2(%a6),%d0 |get word 4
andiw #0x000f,%d0 |clear all but last ni(y)bble
tstw %d0 |check for zero.
bne not_spec
tstl ETEMP_HI(%a6) |check words 3 and 2
bne not_spec
tstl ETEMP_LO(%a6) |check words 1 and 0
bne not_spec
tstl ETEMP(%a6) |test sign of the zero
bges pos_zero
movel #0x80000000,ETEMP(%a6) |write neg zero to etemp
clrl ETEMP_HI(%a6)
clrl ETEMP_LO(%a6)
bra finish
pos_zero:
clrl ETEMP(%a6)
clrl ETEMP_HI(%a6)
clrl ETEMP_LO(%a6)
bra finish
 
not_spec:
fmovemx %fp0-%fp1,-(%a7) |save fp0 - decbin returns in it
bsr decbin
fmovex %fp0,ETEMP(%a6) |put the unpacked sop in the fsave stack
fmovemx (%a7)+,%fp0-%fp1
fmovel #0,%FPSR |clr fpsr from decbin
bra finish
 
|
| Special handling for packed move in: Same results as all other
| packed cases, but we must set the FPSR condition codes properly.
|
move_unpack:
movew ETEMP(%a6),%d0 |get word with inf information
bfextu %d0{#20:#12},%d1 |get exponent into d1
cmpiw #0x0fff,%d1 |test for inf or NaN
bnes mtry_zero |if not equal, it is not special
bfextu %d0{#17:#3},%d1 |get SE and y bits into d1
cmpiw #7,%d1 |SE and y bits must be on for special
bnes mtry_zero |if not on, it is not special
|input is of the special cases of inf and NaN
tstl ETEMP_HI(%a6) |check ms mantissa
bnes mfix_nan |if non-zero, it is a NaN
tstl ETEMP_LO(%a6) |check ls mantissa
bnes mfix_nan |if non-zero, it is a NaN
|input is inf
orl #inf_mask,USER_FPSR(%a6) |set I bit
tstl ETEMP(%a6) |check sign
bge finish
orl #neg_mask,USER_FPSR(%a6) |set N bit
bra finish |special already on stack
mfix_nan:
orl #nan_mask,USER_FPSR(%a6) |set NaN bit
moveb #nan_tag,STAG(%a6) |set stag to NaN
btstb #signan_bit,ETEMP_HI(%a6) |test for snan
bnes mn_snan
orl #snaniop_mask,USER_FPSR(%a6) |set snan bit
btstb #snan_bit,FPCR_ENABLE(%a6) |test for snan enabled
bnes mn_snan
bsetb #signan_bit,ETEMP_HI(%a6) |force snans to qnans
mn_snan:
tstl ETEMP(%a6) |check for sign
bge finish |if clr, go on
orl #neg_mask,USER_FPSR(%a6) |set N bit
bra finish
 
mtry_zero:
movew ETEMP_EX+2(%a6),%d0 |get word 4
andiw #0x000f,%d0 |clear all but last ni(y)bble
tstw %d0 |check for zero.
bnes mnot_spec
tstl ETEMP_HI(%a6) |check words 3 and 2
bnes mnot_spec
tstl ETEMP_LO(%a6) |check words 1 and 0
bnes mnot_spec
tstl ETEMP(%a6) |test sign of the zero
bges mpos_zero
orl #neg_mask+z_mask,USER_FPSR(%a6) |set N and Z
movel #0x80000000,ETEMP(%a6) |write neg zero to etemp
clrl ETEMP_HI(%a6)
clrl ETEMP_LO(%a6)
bras finish
mpos_zero:
orl #z_mask,USER_FPSR(%a6) |set Z
clrl ETEMP(%a6)
clrl ETEMP_HI(%a6)
clrl ETEMP_LO(%a6)
bras finish
 
mnot_spec:
fmovemx %fp0-%fp1,-(%a7) |save fp0 ,fp1 - decbin returns in fp0
bsr decbin
fmovex %fp0,ETEMP(%a6)
| ;put the unpacked sop in the fsave stack
fmovemx (%a7)+,%fp0-%fp1
 
finish:
movew CMDREG1B(%a6),%d0 |get the command word
andw #0xfbff,%d0 |change the source specifier field to
| ;extended (was packed).
movew %d0,CMDREG1B(%a6) |write command word back to fsave stack
| ;we need to do this so the 040 will
| ;re-execute the inst. without taking
| ;another packed trap.
 
fix_stag:
|Converted result is now in etemp on fsave stack, now set the source
|tag (stag)
| if (ete =$7fff) then INF or NAN
| if (etemp = $x.0----0) then
| stag = INF
| else
| stag = NAN
| else
| if (ete = $0000) then
| stag = ZERO
| else
| stag = NORM
|
| Note also that the etemp_15 bit (just right of the stag) must
| be set accordingly.
|
movew ETEMP_EX(%a6),%d1
andiw #0x7fff,%d1 |strip sign
cmpw #0x7fff,%d1
bnes z_or_nrm
movel ETEMP_HI(%a6),%d1
bnes is_nan
movel ETEMP_LO(%a6),%d1
bnes is_nan
is_inf:
moveb #0x40,STAG(%a6)
movel #0x40,%d0
rts
is_nan:
moveb #0x60,STAG(%a6)
movel #0x60,%d0
rts
z_or_nrm:
tstw %d1
bnes is_nrm
is_zro:
| For a zero, set etemp_15
moveb #0x30,STAG(%a6)
movel #0x20,%d0
rts
is_nrm:
| For a norm, check if the exp <= $3fff; if so, set etemp_15
cmpiw #0x3fff,%d1
bles set_bit15
moveb #0,STAG(%a6)
bras end_is_nrm
set_bit15:
moveb #0x10,STAG(%a6)
end_is_nrm:
movel #0,%d0
end_fix:
rts
end_get:
rts
|end

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.