1 |
1275 |
phoenix |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
2 |
|
|
MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
|
3 |
|
|
M68000 Hi-Performance Microprocessor Division
|
4 |
|
|
M68060 Software Package
|
5 |
|
|
Production Release P1.00 -- October 10, 1994
|
6 |
|
|
|
7 |
|
|
M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved.
|
8 |
|
|
|
9 |
|
|
THE SOFTWARE is provided on an "AS IS" basis and without warranty.
|
10 |
|
|
To the maximum extent permitted by applicable law,
|
11 |
|
|
MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
|
12 |
|
|
INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
|
13 |
|
|
and any warranty against infringement with regard to the SOFTWARE
|
14 |
|
|
(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
|
15 |
|
|
|
16 |
|
|
To the maximum extent permitted by applicable law,
|
17 |
|
|
IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
|
18 |
|
|
(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
|
19 |
|
|
BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
|
20 |
|
|
ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
|
21 |
|
|
Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
|
22 |
|
|
|
23 |
|
|
You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
|
24 |
|
|
so long as this entire notice is retained without alteration in any modified and/or
|
25 |
|
|
redistributed versions, and that such modified versions are clearly identified as such.
|
26 |
|
|
No licenses are granted by implication, estoppel or otherwise under any patents
|
27 |
|
|
or trademarks of Motorola, Inc.
|
28 |
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
29 |
|
|
# litop.s:
|
30 |
|
|
# This file is appended to the top of the 060FPLSP package
|
31 |
|
|
# and contains the entry points into the package. The user, in
|
32 |
|
|
# effect, branches to one of the branch table entries located here.
|
33 |
|
|
#
|
34 |
|
|
|
35 |
|
|
bra.l _060LSP__idivs64_
|
36 |
|
|
short 0x0000
|
37 |
|
|
bra.l _060LSP__idivu64_
|
38 |
|
|
short 0x0000
|
39 |
|
|
|
40 |
|
|
bra.l _060LSP__imuls64_
|
41 |
|
|
short 0x0000
|
42 |
|
|
bra.l _060LSP__imulu64_
|
43 |
|
|
short 0x0000
|
44 |
|
|
|
45 |
|
|
bra.l _060LSP__cmp2_Ab_
|
46 |
|
|
short 0x0000
|
47 |
|
|
bra.l _060LSP__cmp2_Aw_
|
48 |
|
|
short 0x0000
|
49 |
|
|
bra.l _060LSP__cmp2_Al_
|
50 |
|
|
short 0x0000
|
51 |
|
|
bra.l _060LSP__cmp2_Db_
|
52 |
|
|
short 0x0000
|
53 |
|
|
bra.l _060LSP__cmp2_Dw_
|
54 |
|
|
short 0x0000
|
55 |
|
|
bra.l _060LSP__cmp2_Dl_
|
56 |
|
|
short 0x0000
|
57 |
|
|
|
58 |
|
|
# leave room for future possible aditions.
|
59 |
|
|
align 0x200
|
60 |
|
|
|
61 |
|
|
#########################################################################
|
62 |
|
|
# XDEF **************************************************************** #
|
63 |
|
|
# _060LSP__idivu64_(): Emulate 64-bit unsigned div instruction. #
|
64 |
|
|
# _060LSP__idivs64_(): Emulate 64-bit signed div instruction. #
|
65 |
|
|
# #
|
66 |
|
|
# This is the library version which is accessed as a subroutine #
|
67 |
|
|
# and therefore does not work exactly like the 680X0 div{s,u}.l #
|
68 |
|
|
# 64-bit divide instruction. #
|
69 |
|
|
# #
|
70 |
|
|
# XREF **************************************************************** #
|
71 |
|
|
# None. #
|
72 |
|
|
# #
|
73 |
|
|
# INPUT *************************************************************** #
|
74 |
|
|
# 0x4(sp) = divisor #
|
75 |
|
|
# 0x8(sp) = hi(dividend) #
|
76 |
|
|
# 0xc(sp) = lo(dividend) #
|
77 |
|
|
# 0x10(sp) = pointer to location to place quotient/remainder #
|
78 |
|
|
# #
|
79 |
|
|
# OUTPUT ************************************************************** #
|
80 |
|
|
# 0x10(sp) = points to location of remainder/quotient. #
|
81 |
|
|
# remainder is in first longword, quotient is in 2nd. #
|
82 |
|
|
# #
|
83 |
|
|
# ALGORITHM *********************************************************** #
|
84 |
|
|
# If the operands are signed, make them unsigned and save the #
|
85 |
|
|
# sign info for later. Separate out special cases like divide-by-zero #
|
86 |
|
|
# or 32-bit divides if possible. Else, use a special math algorithm #
|
87 |
|
|
# to calculate the result. #
|
88 |
|
|
# Restore sign info if signed instruction. Set the condition #
|
89 |
|
|
# codes before performing the final "rts". If the divisor was equal to #
|
90 |
|
|
# zero, then perform a divide-by-zero using a 16-bit implemented #
|
91 |
|
|
# divide instruction. This way, the operating system can record that #
|
92 |
|
|
# the event occurred even though it may not point to the correct place. #
|
93 |
|
|
# #
|
94 |
|
|
#########################################################################
|
95 |
|
|
|
96 |
|
|
set POSNEG, -1
|
97 |
|
|
set NDIVISOR, -2
|
98 |
|
|
set NDIVIDEND, -3
|
99 |
|
|
set DDSECOND, -4
|
100 |
|
|
set DDNORMAL, -8
|
101 |
|
|
set DDQUOTIENT, -12
|
102 |
|
|
set DIV64_CC, -16
|
103 |
|
|
|
104 |
|
|
##########
|
105 |
|
|
# divs.l #
|
106 |
|
|
##########
|
107 |
|
|
global _060LSP__idivs64_
|
108 |
|
|
_060LSP__idivs64_:
|
109 |
|
|
# PROLOGUE BEGIN ########################################################
|
110 |
|
|
link.w %a6,&-16
|
111 |
|
|
movm.l &0x3f00,-(%sp) # save d2-d7
|
112 |
|
|
# fmovm.l &0x0,-(%sp) # save no fpregs
|
113 |
|
|
# PROLOGUE END ##########################################################
|
114 |
|
|
|
115 |
|
|
mov.w %cc,DIV64_CC(%a6)
|
116 |
|
|
st POSNEG(%a6) # signed operation
|
117 |
|
|
bra.b ldiv64_cont
|
118 |
|
|
|
119 |
|
|
##########
|
120 |
|
|
# divu.l #
|
121 |
|
|
##########
|
122 |
|
|
global _060LSP__idivu64_
|
123 |
|
|
_060LSP__idivu64_:
|
124 |
|
|
# PROLOGUE BEGIN ########################################################
|
125 |
|
|
link.w %a6,&-16
|
126 |
|
|
movm.l &0x3f00,-(%sp) # save d2-d7
|
127 |
|
|
# fmovm.l &0x0,-(%sp) # save no fpregs
|
128 |
|
|
# PROLOGUE END ##########################################################
|
129 |
|
|
|
130 |
|
|
mov.w %cc,DIV64_CC(%a6)
|
131 |
|
|
sf POSNEG(%a6) # unsigned operation
|
132 |
|
|
|
133 |
|
|
ldiv64_cont:
|
134 |
|
|
mov.l 0x8(%a6),%d7 # fetch divisor
|
135 |
|
|
|
136 |
|
|
beq.w ldiv64eq0 # divisor is = 0!!!
|
137 |
|
|
|
138 |
|
|
mov.l 0xc(%a6), %d5 # get dividend hi
|
139 |
|
|
mov.l 0x10(%a6), %d6 # get dividend lo
|
140 |
|
|
|
141 |
|
|
# separate signed and unsigned divide
|
142 |
|
|
tst.b POSNEG(%a6) # signed or unsigned?
|
143 |
|
|
beq.b ldspecialcases # use positive divide
|
144 |
|
|
|
145 |
|
|
# save the sign of the divisor
|
146 |
|
|
# make divisor unsigned if it's negative
|
147 |
|
|
tst.l %d7 # chk sign of divisor
|
148 |
|
|
slt NDIVISOR(%a6) # save sign of divisor
|
149 |
|
|
bpl.b ldsgndividend
|
150 |
|
|
neg.l %d7 # complement negative divisor
|
151 |
|
|
|
152 |
|
|
# save the sign of the dividend
|
153 |
|
|
# make dividend unsigned if it's negative
|
154 |
|
|
ldsgndividend:
|
155 |
|
|
tst.l %d5 # chk sign of hi(dividend)
|
156 |
|
|
slt NDIVIDEND(%a6) # save sign of dividend
|
157 |
|
|
bpl.b ldspecialcases
|
158 |
|
|
|
159 |
|
|
mov.w &0x0, %cc # clear 'X' cc bit
|
160 |
|
|
negx.l %d6 # complement signed dividend
|
161 |
|
|
negx.l %d5
|
162 |
|
|
|
163 |
|
|
# extract some special cases:
|
164 |
|
|
# - is (dividend == 0) ?
|
165 |
|
|
# - is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
|
166 |
|
|
ldspecialcases:
|
167 |
|
|
tst.l %d5 # is (hi(dividend) == 0)
|
168 |
|
|
bne.b ldnormaldivide # no, so try it the long way
|
169 |
|
|
|
170 |
|
|
tst.l %d6 # is (lo(dividend) == 0), too
|
171 |
|
|
beq.w lddone # yes, so (dividend == 0)
|
172 |
|
|
|
173 |
|
|
cmp.l %d7,%d6 # is (divisor <= lo(dividend))
|
174 |
|
|
bls.b ld32bitdivide # yes, so use 32 bit divide
|
175 |
|
|
|
176 |
|
|
exg %d5,%d6 # q = 0, r = dividend
|
177 |
|
|
bra.w ldivfinish # can't divide, we're done.
|
178 |
|
|
|
179 |
|
|
ld32bitdivide:
|
180 |
|
|
tdivu.l %d7, %d5:%d6 # it's only a 32/32 bit div!
|
181 |
|
|
|
182 |
|
|
bra.b ldivfinish
|
183 |
|
|
|
184 |
|
|
ldnormaldivide:
|
185 |
|
|
# last special case:
|
186 |
|
|
# - is hi(dividend) >= divisor ? if yes, then overflow
|
187 |
|
|
cmp.l %d7,%d5
|
188 |
|
|
bls.b lddovf # answer won't fit in 32 bits
|
189 |
|
|
|
190 |
|
|
# perform the divide algorithm:
|
191 |
|
|
bsr.l ldclassical # do int divide
|
192 |
|
|
|
193 |
|
|
# separate into signed and unsigned finishes.
|
194 |
|
|
ldivfinish:
|
195 |
|
|
tst.b POSNEG(%a6) # do divs, divu separately
|
196 |
|
|
beq.b lddone # divu has no processing!!!
|
197 |
|
|
|
198 |
|
|
# it was a divs.l, so ccode setting is a little more complicated...
|
199 |
|
|
tst.b NDIVIDEND(%a6) # remainder has same sign
|
200 |
|
|
beq.b ldcc # as dividend.
|
201 |
|
|
neg.l %d5 # sgn(rem) = sgn(dividend)
|
202 |
|
|
ldcc:
|
203 |
|
|
mov.b NDIVISOR(%a6), %d0
|
204 |
|
|
eor.b %d0, NDIVIDEND(%a6) # chk if quotient is negative
|
205 |
|
|
beq.b ldqpos # branch to quot positive
|
206 |
|
|
|
207 |
|
|
# 0x80000000 is the largest number representable as a 32-bit negative
|
208 |
|
|
# number. the negative of 0x80000000 is 0x80000000.
|
209 |
|
|
cmpi.l %d6, &0x80000000 # will (-quot) fit in 32 bits?
|
210 |
|
|
bhi.b lddovf
|
211 |
|
|
|
212 |
|
|
neg.l %d6 # make (-quot) 2's comp
|
213 |
|
|
|
214 |
|
|
bra.b lddone
|
215 |
|
|
|
216 |
|
|
ldqpos:
|
217 |
|
|
btst &0x1f, %d6 # will (+quot) fit in 32 bits?
|
218 |
|
|
bne.b lddovf
|
219 |
|
|
|
220 |
|
|
lddone:
|
221 |
|
|
# if the register numbers are the same, only the quotient gets saved.
|
222 |
|
|
# so, if we always save the quotient second, we save ourselves a cmp&beq
|
223 |
|
|
andi.w &0x10,DIV64_CC(%a6)
|
224 |
|
|
mov.w DIV64_CC(%a6),%cc
|
225 |
|
|
tst.l %d6 # may set 'N' ccode bit
|
226 |
|
|
|
227 |
|
|
# here, the result is in d1 and d0. the current strategy is to save
|
228 |
|
|
# the values at the location pointed to by a0.
|
229 |
|
|
# use movm here to not disturb the condition codes.
|
230 |
|
|
ldexit:
|
231 |
|
|
movm.l &0x0060,([0x14,%a6]) # save result
|
232 |
|
|
|
233 |
|
|
# EPILOGUE BEGIN ########################################################
|
234 |
|
|
# fmovm.l (%sp)+,&0x0 # restore no fpregs
|
235 |
|
|
movm.l (%sp)+,&0x00fc # restore d2-d7
|
236 |
|
|
unlk %a6
|
237 |
|
|
# EPILOGUE END ##########################################################
|
238 |
|
|
|
239 |
|
|
rts
|
240 |
|
|
|
241 |
|
|
# the result should be the unchanged dividend
|
242 |
|
|
lddovf:
|
243 |
|
|
mov.l 0xc(%a6), %d5 # get dividend hi
|
244 |
|
|
mov.l 0x10(%a6), %d6 # get dividend lo
|
245 |
|
|
|
246 |
|
|
andi.w &0x1c,DIV64_CC(%a6)
|
247 |
|
|
ori.w &0x02,DIV64_CC(%a6) # set 'V' ccode bit
|
248 |
|
|
mov.w DIV64_CC(%a6),%cc
|
249 |
|
|
|
250 |
|
|
bra.b ldexit
|
251 |
|
|
|
252 |
|
|
ldiv64eq0:
|
253 |
|
|
mov.l 0xc(%a6),([0x14,%a6])
|
254 |
|
|
mov.l 0x10(%a6),([0x14,%a6],0x4)
|
255 |
|
|
|
256 |
|
|
mov.w DIV64_CC(%a6),%cc
|
257 |
|
|
|
258 |
|
|
# EPILOGUE BEGIN ########################################################
|
259 |
|
|
# fmovm.l (%sp)+,&0x0 # restore no fpregs
|
260 |
|
|
movm.l (%sp)+,&0x00fc # restore d2-d7
|
261 |
|
|
unlk %a6
|
262 |
|
|
# EPILOGUE END ##########################################################
|
263 |
|
|
|
264 |
|
|
divu.w &0x0,%d0 # force a divbyzero exception
|
265 |
|
|
rts
|
266 |
|
|
|
267 |
|
|
###########################################################################
|
268 |
|
|
#########################################################################
|
269 |
|
|
# This routine uses the 'classical' Algorithm D from Donald Knuth's #
|
270 |
|
|
# Art of Computer Programming, vol II, Seminumerical Algorithms. #
|
271 |
|
|
# For this implementation b=2**16, and the target is U1U2U3U4/V1V2, #
|
272 |
|
|
# where U,V are words of the quadword dividend and longword divisor, #
|
273 |
|
|
# and U1, V1 are the most significant words. #
|
274 |
|
|
# #
|
275 |
|
|
# The most sig. longword of the 64 bit dividend must be in %d5, least #
|
276 |
|
|
# in %d6. The divisor must be in the variable ddivisor, and the #
|
277 |
|
|
# signed/unsigned flag ddusign must be set (0=unsigned,1=signed). #
|
278 |
|
|
# The quotient is returned in %d6, remainder in %d5, unless the #
|
279 |
|
|
# v (overflow) bit is set in the saved %ccr. If overflow, the dividend #
|
280 |
|
|
# is unchanged. #
|
281 |
|
|
#########################################################################
|
282 |
|
|
ldclassical:
|
283 |
|
|
# if the divisor msw is 0, use simpler algorithm then the full blown
|
284 |
|
|
# one at ddknuth:
|
285 |
|
|
|
286 |
|
|
cmpi.l %d7, &0xffff
|
287 |
|
|
bhi.b lddknuth # go use D. Knuth algorithm
|
288 |
|
|
|
289 |
|
|
# Since the divisor is only a word (and larger than the mslw of the dividend),
|
290 |
|
|
# a simpler algorithm may be used :
|
291 |
|
|
# In the general case, four quotient words would be created by
|
292 |
|
|
# dividing the divisor word into each dividend word. In this case,
|
293 |
|
|
# the first two quotient words must be zero, or overflow would occur.
|
294 |
|
|
# Since we already checked this case above, we can treat the most significant
|
295 |
|
|
# longword of the dividend as (0) remainder (see Knuth) and merely complete
|
296 |
|
|
# the last two divisions to get a quotient longword and word remainder:
|
297 |
|
|
|
298 |
|
|
clr.l %d1
|
299 |
|
|
swap %d5 # same as r*b if previous step rqd
|
300 |
|
|
swap %d6 # get u3 to lsw position
|
301 |
|
|
mov.w %d6, %d5 # rb + u3
|
302 |
|
|
|
303 |
|
|
divu.w %d7, %d5
|
304 |
|
|
|
305 |
|
|
mov.w %d5, %d1 # first quotient word
|
306 |
|
|
swap %d6 # get u4
|
307 |
|
|
mov.w %d6, %d5 # rb + u4
|
308 |
|
|
|
309 |
|
|
divu.w %d7, %d5
|
310 |
|
|
|
311 |
|
|
swap %d1
|
312 |
|
|
mov.w %d5, %d1 # 2nd quotient 'digit'
|
313 |
|
|
clr.w %d5
|
314 |
|
|
swap %d5 # now remainder
|
315 |
|
|
mov.l %d1, %d6 # and quotient
|
316 |
|
|
|
317 |
|
|
rts
|
318 |
|
|
|
319 |
|
|
lddknuth:
|
320 |
|
|
# In this algorithm, the divisor is treated as a 2 digit (word) number
|
321 |
|
|
# which is divided into a 3 digit (word) dividend to get one quotient
|
322 |
|
|
# digit (word). After subtraction, the dividend is shifted and the
|
323 |
|
|
# process repeated. Before beginning, the divisor and quotient are
|
324 |
|
|
# 'normalized' so that the process of estimating the quotient digit
|
325 |
|
|
# will yield verifiably correct results..
|
326 |
|
|
|
327 |
|
|
clr.l DDNORMAL(%a6) # count of shifts for normalization
|
328 |
|
|
clr.b DDSECOND(%a6) # clear flag for quotient digits
|
329 |
|
|
clr.l %d1 # %d1 will hold trial quotient
|
330 |
|
|
lddnchk:
|
331 |
|
|
btst &31, %d7 # must we normalize? first word of
|
332 |
|
|
bne.b lddnormalized # divisor (V1) must be >= 65536/2
|
333 |
|
|
addq.l &0x1, DDNORMAL(%a6) # count normalization shifts
|
334 |
|
|
lsl.l &0x1, %d7 # shift the divisor
|
335 |
|
|
lsl.l &0x1, %d6 # shift u4,u3 with overflow to u2
|
336 |
|
|
roxl.l &0x1, %d5 # shift u1,u2
|
337 |
|
|
bra.w lddnchk
|
338 |
|
|
lddnormalized:
|
339 |
|
|
|
340 |
|
|
# Now calculate an estimate of the quotient words (msw first, then lsw).
|
341 |
|
|
# The comments use subscripts for the first quotient digit determination.
|
342 |
|
|
mov.l %d7, %d3 # divisor
|
343 |
|
|
mov.l %d5, %d2 # dividend mslw
|
344 |
|
|
swap %d2
|
345 |
|
|
swap %d3
|
346 |
|
|
cmp.w %d2, %d3 # V1 = U1 ?
|
347 |
|
|
bne.b lddqcalc1
|
348 |
|
|
mov.w &0xffff, %d1 # use max trial quotient word
|
349 |
|
|
bra.b lddadj0
|
350 |
|
|
lddqcalc1:
|
351 |
|
|
mov.l %d5, %d1
|
352 |
|
|
|
353 |
|
|
divu.w %d3, %d1 # use quotient of mslw/msw
|
354 |
|
|
|
355 |
|
|
andi.l &0x0000ffff, %d1 # zero any remainder
|
356 |
|
|
lddadj0:
|
357 |
|
|
|
358 |
|
|
# now test the trial quotient and adjust. This step plus the
|
359 |
|
|
# normalization assures (according to Knuth) that the trial
|
360 |
|
|
# quotient will be at worst 1 too large.
|
361 |
|
|
mov.l %d6, -(%sp)
|
362 |
|
|
clr.w %d6 # word u3 left
|
363 |
|
|
swap %d6 # in lsw position
|
364 |
|
|
lddadj1: mov.l %d7, %d3
|
365 |
|
|
mov.l %d1, %d2
|
366 |
|
|
mulu.w %d7, %d2 # V2q
|
367 |
|
|
swap %d3
|
368 |
|
|
mulu.w %d1, %d3 # V1q
|
369 |
|
|
mov.l %d5, %d4 # U1U2
|
370 |
|
|
sub.l %d3, %d4 # U1U2 - V1q
|
371 |
|
|
|
372 |
|
|
swap %d4
|
373 |
|
|
|
374 |
|
|
mov.w %d4,%d0
|
375 |
|
|
mov.w %d6,%d4 # insert lower word (U3)
|
376 |
|
|
|
377 |
|
|
tst.w %d0 # is upper word set?
|
378 |
|
|
bne.w lddadjd1
|
379 |
|
|
|
380 |
|
|
# add.l %d6, %d4 # (U1U2 - V1q) + U3
|
381 |
|
|
|
382 |
|
|
cmp.l %d2, %d4
|
383 |
|
|
bls.b lddadjd1 # is V2q > (U1U2-V1q) + U3 ?
|
384 |
|
|
subq.l &0x1, %d1 # yes, decrement and recheck
|
385 |
|
|
bra.b lddadj1
|
386 |
|
|
lddadjd1:
|
387 |
|
|
# now test the word by multiplying it by the divisor (V1V2) and comparing
|
388 |
|
|
# the 3 digit (word) result with the current dividend words
|
389 |
|
|
mov.l %d5, -(%sp) # save %d5 (%d6 already saved)
|
390 |
|
|
mov.l %d1, %d6
|
391 |
|
|
swap %d6 # shift answer to ms 3 words
|
392 |
|
|
mov.l %d7, %d5
|
393 |
|
|
bsr.l ldmm2
|
394 |
|
|
mov.l %d5, %d2 # now %d2,%d3 are trial*divisor
|
395 |
|
|
mov.l %d6, %d3
|
396 |
|
|
mov.l (%sp)+, %d5 # restore dividend
|
397 |
|
|
mov.l (%sp)+, %d6
|
398 |
|
|
sub.l %d3, %d6
|
399 |
|
|
subx.l %d2, %d5 # subtract double precision
|
400 |
|
|
bcc ldd2nd # no carry, do next quotient digit
|
401 |
|
|
subq.l &0x1, %d1 # q is one too large
|
402 |
|
|
# need to add back divisor longword to current ms 3 digits of dividend
|
403 |
|
|
# - according to Knuth, this is done only 2 out of 65536 times for random
|
404 |
|
|
# divisor, dividend selection.
|
405 |
|
|
clr.l %d2
|
406 |
|
|
mov.l %d7, %d3
|
407 |
|
|
swap %d3
|
408 |
|
|
clr.w %d3 # %d3 now ls word of divisor
|
409 |
|
|
add.l %d3, %d6 # aligned with 3rd word of dividend
|
410 |
|
|
addx.l %d2, %d5
|
411 |
|
|
mov.l %d7, %d3
|
412 |
|
|
clr.w %d3 # %d3 now ms word of divisor
|
413 |
|
|
swap %d3 # aligned with 2nd word of dividend
|
414 |
|
|
add.l %d3, %d5
|
415 |
|
|
ldd2nd:
|
416 |
|
|
tst.b DDSECOND(%a6) # both q words done?
|
417 |
|
|
bne.b lddremain
|
418 |
|
|
# first quotient digit now correct. store digit and shift the
|
419 |
|
|
# (subtracted) dividend
|
420 |
|
|
mov.w %d1, DDQUOTIENT(%a6)
|
421 |
|
|
clr.l %d1
|
422 |
|
|
swap %d5
|
423 |
|
|
swap %d6
|
424 |
|
|
mov.w %d6, %d5
|
425 |
|
|
clr.w %d6
|
426 |
|
|
st DDSECOND(%a6) # second digit
|
427 |
|
|
bra.w lddnormalized
|
428 |
|
|
lddremain:
|
429 |
|
|
# add 2nd word to quotient, get the remainder.
|
430 |
|
|
mov.w %d1, DDQUOTIENT+2(%a6)
|
431 |
|
|
# shift down one word/digit to renormalize remainder.
|
432 |
|
|
mov.w %d5, %d6
|
433 |
|
|
swap %d6
|
434 |
|
|
swap %d5
|
435 |
|
|
mov.l DDNORMAL(%a6), %d7 # get norm shift count
|
436 |
|
|
beq.b lddrn
|
437 |
|
|
subq.l &0x1, %d7 # set for loop count
|
438 |
|
|
lddnlp:
|
439 |
|
|
lsr.l &0x1, %d5 # shift into %d6
|
440 |
|
|
roxr.l &0x1, %d6
|
441 |
|
|
dbf %d7, lddnlp
|
442 |
|
|
lddrn:
|
443 |
|
|
mov.l %d6, %d5 # remainder
|
444 |
|
|
mov.l DDQUOTIENT(%a6), %d6 # quotient
|
445 |
|
|
|
446 |
|
|
rts
|
447 |
|
|
ldmm2:
|
448 |
|
|
# factors for the 32X32->64 multiplication are in %d5 and %d6.
|
449 |
|
|
# returns 64 bit result in %d5 (hi) %d6(lo).
|
450 |
|
|
# destroys %d2,%d3,%d4.
|
451 |
|
|
|
452 |
|
|
# multiply hi,lo words of each factor to get 4 intermediate products
|
453 |
|
|
mov.l %d6, %d2
|
454 |
|
|
mov.l %d6, %d3
|
455 |
|
|
mov.l %d5, %d4
|
456 |
|
|
swap %d3
|
457 |
|
|
swap %d4
|
458 |
|
|
mulu.w %d5, %d6 # %d6 <- lsw*lsw
|
459 |
|
|
mulu.w %d3, %d5 # %d5 <- msw-dest*lsw-source
|
460 |
|
|
mulu.w %d4, %d2 # %d2 <- msw-source*lsw-dest
|
461 |
|
|
mulu.w %d4, %d3 # %d3 <- msw*msw
|
462 |
|
|
# now use swap and addx to consolidate to two longwords
|
463 |
|
|
clr.l %d4
|
464 |
|
|
swap %d6
|
465 |
|
|
add.w %d5, %d6 # add msw of l*l to lsw of m*l product
|
466 |
|
|
addx.w %d4, %d3 # add any carry to m*m product
|
467 |
|
|
add.w %d2, %d6 # add in lsw of other m*l product
|
468 |
|
|
addx.w %d4, %d3 # add any carry to m*m product
|
469 |
|
|
swap %d6 # %d6 is low 32 bits of final product
|
470 |
|
|
clr.w %d5
|
471 |
|
|
clr.w %d2 # lsw of two mixed products used,
|
472 |
|
|
swap %d5 # now use msws of longwords
|
473 |
|
|
swap %d2
|
474 |
|
|
add.l %d2, %d5
|
475 |
|
|
add.l %d3, %d5 # %d5 now ms 32 bits of final product
|
476 |
|
|
rts
|
477 |
|
|
|
478 |
|
|
#########################################################################
|
479 |
|
|
# XDEF **************************************************************** #
|
480 |
|
|
# _060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction #
|
481 |
|
|
# _060LSP__imuls64_(): Emulate 64-bit signed mul instruction. #
|
482 |
|
|
# #
|
483 |
|
|
# This is the library version which is accessed as a subroutine #
|
484 |
|
|
# and therefore does not work exactly like the 680X0 mul{s,u}.l #
|
485 |
|
|
# 64-bit multiply instruction. #
|
486 |
|
|
# #
|
487 |
|
|
# XREF **************************************************************** #
|
488 |
|
|
# None #
|
489 |
|
|
# #
|
490 |
|
|
# INPUT *************************************************************** #
|
491 |
|
|
# 0x4(sp) = multiplier #
|
492 |
|
|
# 0x8(sp) = multiplicand #
|
493 |
|
|
# 0xc(sp) = pointer to location to place 64-bit result #
|
494 |
|
|
# #
|
495 |
|
|
# OUTPUT ************************************************************** #
|
496 |
|
|
# 0xc(sp) = points to location of 64-bit result #
|
497 |
|
|
# #
|
498 |
|
|
# ALGORITHM *********************************************************** #
|
499 |
|
|
# Perform the multiply in pieces using 16x16->32 unsigned #
|
500 |
|
|
# multiplies and "add" instructions. #
|
501 |
|
|
# Set the condition codes as appropriate before performing an #
|
502 |
|
|
# "rts". #
|
503 |
|
|
# #
|
504 |
|
|
#########################################################################
|
505 |
|
|
|
506 |
|
|
set MUL64_CC, -4
|
507 |
|
|
|
508 |
|
|
global _060LSP__imulu64_
|
509 |
|
|
_060LSP__imulu64_:
|
510 |
|
|
|
511 |
|
|
# PROLOGUE BEGIN ########################################################
|
512 |
|
|
link.w %a6,&-4
|
513 |
|
|
movm.l &0x3800,-(%sp) # save d2-d4
|
514 |
|
|
# fmovm.l &0x0,-(%sp) # save no fpregs
|
515 |
|
|
# PROLOGUE END ##########################################################
|
516 |
|
|
|
517 |
|
|
mov.w %cc,MUL64_CC(%a6) # save incoming ccodes
|
518 |
|
|
|
519 |
|
|
mov.l 0x8(%a6),%d0 # store multiplier in d0
|
520 |
|
|
beq.w mulu64_zero # handle zero separately
|
521 |
|
|
|
522 |
|
|
mov.l 0xc(%a6),%d1 # get multiplicand in d1
|
523 |
|
|
beq.w mulu64_zero # handle zero separately
|
524 |
|
|
|
525 |
|
|
#########################################################################
|
526 |
|
|
# 63 32 0 #
|
527 |
|
|
# ---------------------------- #
|
528 |
|
|
# | hi(mplier) * hi(mplicand)| #
|
529 |
|
|
# ---------------------------- #
|
530 |
|
|
# ----------------------------- #
|
531 |
|
|
# | hi(mplier) * lo(mplicand) | #
|
532 |
|
|
# ----------------------------- #
|
533 |
|
|
# ----------------------------- #
|
534 |
|
|
# | lo(mplier) * hi(mplicand) | #
|
535 |
|
|
# ----------------------------- #
|
536 |
|
|
# | ----------------------------- #
|
537 |
|
|
# --|-- | lo(mplier) * lo(mplicand) | #
|
538 |
|
|
# | ----------------------------- #
|
539 |
|
|
# ======================================================== #
|
540 |
|
|
# -------------------------------------------------------- #
|
541 |
|
|
# | hi(result) | lo(result) | #
|
542 |
|
|
# -------------------------------------------------------- #
|
543 |
|
|
#########################################################################
|
544 |
|
|
mulu64_alg:
|
545 |
|
|
# load temp registers with operands
|
546 |
|
|
mov.l %d0,%d2 # mr in d2
|
547 |
|
|
mov.l %d0,%d3 # mr in d3
|
548 |
|
|
mov.l %d1,%d4 # md in d4
|
549 |
|
|
swap %d3 # hi(mr) in lo d3
|
550 |
|
|
swap %d4 # hi(md) in lo d4
|
551 |
|
|
|
552 |
|
|
# complete necessary multiplies:
|
553 |
|
|
mulu.w %d1,%d0 # [1] lo(mr) * lo(md)
|
554 |
|
|
mulu.w %d3,%d1 # [2] hi(mr) * lo(md)
|
555 |
|
|
mulu.w %d4,%d2 # [3] lo(mr) * hi(md)
|
556 |
|
|
mulu.w %d4,%d3 # [4] hi(mr) * hi(md)
|
557 |
|
|
|
558 |
|
|
# add lo portions of [2],[3] to hi portion of [1].
|
559 |
|
|
# add carries produced from these adds to [4].
|
560 |
|
|
# lo([1]) is the final lo 16 bits of the result.
|
561 |
|
|
clr.l %d4 # load d4 w/ zero value
|
562 |
|
|
swap %d0 # hi([1]) <==> lo([1])
|
563 |
|
|
add.w %d1,%d0 # hi([1]) + lo([2])
|
564 |
|
|
addx.l %d4,%d3 # [4] + carry
|
565 |
|
|
add.w %d2,%d0 # hi([1]) + lo([3])
|
566 |
|
|
addx.l %d4,%d3 # [4] + carry
|
567 |
|
|
swap %d0 # lo([1]) <==> hi([1])
|
568 |
|
|
|
569 |
|
|
# lo portions of [2],[3] have been added in to final result.
|
570 |
|
|
# now, clear lo, put hi in lo reg, and add to [4]
|
571 |
|
|
clr.w %d1 # clear lo([2])
|
572 |
|
|
clr.w %d2 # clear hi([3])
|
573 |
|
|
swap %d1 # hi([2]) in lo d1
|
574 |
|
|
swap %d2 # hi([3]) in lo d2
|
575 |
|
|
add.l %d2,%d1 # [4] + hi([2])
|
576 |
|
|
add.l %d3,%d1 # [4] + hi([3])
|
577 |
|
|
|
578 |
|
|
# now, grab the condition codes. only one that can be set is 'N'.
|
579 |
|
|
# 'N' CAN be set if the operation is unsigned if bit 63 is set.
|
580 |
|
|
mov.w MUL64_CC(%a6),%d4
|
581 |
|
|
andi.b &0x10,%d4 # keep old 'X' bit
|
582 |
|
|
tst.l %d1 # may set 'N' bit
|
583 |
|
|
bpl.b mulu64_ddone
|
584 |
|
|
ori.b &0x8,%d4 # set 'N' bit
|
585 |
|
|
mulu64_ddone:
|
586 |
|
|
mov.w %d4,%cc
|
587 |
|
|
|
588 |
|
|
# here, the result is in d1 and d0. the current strategy is to save
|
589 |
|
|
# the values at the location pointed to by a0.
|
590 |
|
|
# use movm here to not disturb the condition codes.
|
591 |
|
|
mulu64_end:
|
592 |
|
|
exg %d1,%d0
|
593 |
|
|
movm.l &0x0003,([0x10,%a6]) # save result
|
594 |
|
|
|
595 |
|
|
# EPILOGUE BEGIN ########################################################
|
596 |
|
|
# fmovm.l (%sp)+,&0x0 # restore no fpregs
|
597 |
|
|
movm.l (%sp)+,&0x001c # restore d2-d4
|
598 |
|
|
unlk %a6
|
599 |
|
|
# EPILOGUE END ##########################################################
|
600 |
|
|
|
601 |
|
|
rts
|
602 |
|
|
|
603 |
|
|
# one or both of the operands is zero so the result is also zero.
|
604 |
|
|
# save the zero result to the register file and set the 'Z' ccode bit.
|
605 |
|
|
mulu64_zero:
|
606 |
|
|
clr.l %d0
|
607 |
|
|
clr.l %d1
|
608 |
|
|
|
609 |
|
|
mov.w MUL64_CC(%a6),%d4
|
610 |
|
|
andi.b &0x10,%d4
|
611 |
|
|
ori.b &0x4,%d4
|
612 |
|
|
mov.w %d4,%cc # set 'Z' ccode bit
|
613 |
|
|
|
614 |
|
|
bra.b mulu64_end
|
615 |
|
|
|
616 |
|
|
##########
|
617 |
|
|
# muls.l #
|
618 |
|
|
##########
|
619 |
|
|
global _060LSP__imuls64_
|
620 |
|
|
_060LSP__imuls64_:
|
621 |
|
|
|
622 |
|
|
# PROLOGUE BEGIN ########################################################
|
623 |
|
|
link.w %a6,&-4
|
624 |
|
|
movm.l &0x3c00,-(%sp) # save d2-d5
|
625 |
|
|
# fmovm.l &0x0,-(%sp) # save no fpregs
|
626 |
|
|
# PROLOGUE END ##########################################################
|
627 |
|
|
|
628 |
|
|
mov.w %cc,MUL64_CC(%a6) # save incoming ccodes
|
629 |
|
|
|
630 |
|
|
mov.l 0x8(%a6),%d0 # store multiplier in d0
|
631 |
|
|
beq.b mulu64_zero # handle zero separately
|
632 |
|
|
|
633 |
|
|
mov.l 0xc(%a6),%d1 # get multiplicand in d1
|
634 |
|
|
beq.b mulu64_zero # handle zero separately
|
635 |
|
|
|
636 |
|
|
clr.b %d5 # clear sign tag
|
637 |
|
|
tst.l %d0 # is multiplier negative?
|
638 |
|
|
bge.b muls64_chk_md_sgn # no
|
639 |
|
|
neg.l %d0 # make multiplier positive
|
640 |
|
|
|
641 |
|
|
ori.b &0x1,%d5 # save multiplier sgn
|
642 |
|
|
|
643 |
|
|
# the result sign is the exclusive or of the operand sign bits.
|
644 |
|
|
muls64_chk_md_sgn:
|
645 |
|
|
tst.l %d1 # is multiplicand negative?
|
646 |
|
|
bge.b muls64_alg # no
|
647 |
|
|
neg.l %d1 # make multiplicand positive
|
648 |
|
|
|
649 |
|
|
eori.b &0x1,%d5 # calculate correct sign
|
650 |
|
|
|
651 |
|
|
#########################################################################
|
652 |
|
|
# 63 32 0 #
|
653 |
|
|
# ---------------------------- #
|
654 |
|
|
# | hi(mplier) * hi(mplicand)| #
|
655 |
|
|
# ---------------------------- #
|
656 |
|
|
# ----------------------------- #
|
657 |
|
|
# | hi(mplier) * lo(mplicand) | #
|
658 |
|
|
# ----------------------------- #
|
659 |
|
|
# ----------------------------- #
|
660 |
|
|
# | lo(mplier) * hi(mplicand) | #
|
661 |
|
|
# ----------------------------- #
|
662 |
|
|
# | ----------------------------- #
|
663 |
|
|
# --|-- | lo(mplier) * lo(mplicand) | #
|
664 |
|
|
# | ----------------------------- #
|
665 |
|
|
# ======================================================== #
|
666 |
|
|
# -------------------------------------------------------- #
|
667 |
|
|
# | hi(result) | lo(result) | #
|
668 |
|
|
# -------------------------------------------------------- #
|
669 |
|
|
#########################################################################
|
670 |
|
|
muls64_alg:
|
671 |
|
|
# load temp registers with operands
|
672 |
|
|
mov.l %d0,%d2 # mr in d2
|
673 |
|
|
mov.l %d0,%d3 # mr in d3
|
674 |
|
|
mov.l %d1,%d4 # md in d4
|
675 |
|
|
swap %d3 # hi(mr) in lo d3
|
676 |
|
|
swap %d4 # hi(md) in lo d4
|
677 |
|
|
|
678 |
|
|
# complete necessary multiplies:
|
679 |
|
|
mulu.w %d1,%d0 # [1] lo(mr) * lo(md)
|
680 |
|
|
mulu.w %d3,%d1 # [2] hi(mr) * lo(md)
|
681 |
|
|
mulu.w %d4,%d2 # [3] lo(mr) * hi(md)
|
682 |
|
|
mulu.w %d4,%d3 # [4] hi(mr) * hi(md)
|
683 |
|
|
|
684 |
|
|
# add lo portions of [2],[3] to hi portion of [1].
|
685 |
|
|
# add carries produced from these adds to [4].
|
686 |
|
|
# lo([1]) is the final lo 16 bits of the result.
|
687 |
|
|
clr.l %d4 # load d4 w/ zero value
|
688 |
|
|
swap %d0 # hi([1]) <==> lo([1])
|
689 |
|
|
add.w %d1,%d0 # hi([1]) + lo([2])
|
690 |
|
|
addx.l %d4,%d3 # [4] + carry
|
691 |
|
|
add.w %d2,%d0 # hi([1]) + lo([3])
|
692 |
|
|
addx.l %d4,%d3 # [4] + carry
|
693 |
|
|
swap %d0 # lo([1]) <==> hi([1])
|
694 |
|
|
|
695 |
|
|
# lo portions of [2],[3] have been added in to final result.
|
696 |
|
|
# now, clear lo, put hi in lo reg, and add to [4]
|
697 |
|
|
clr.w %d1 # clear lo([2])
|
698 |
|
|
clr.w %d2 # clear hi([3])
|
699 |
|
|
swap %d1 # hi([2]) in lo d1
|
700 |
|
|
swap %d2 # hi([3]) in lo d2
|
701 |
|
|
add.l %d2,%d1 # [4] + hi([2])
|
702 |
|
|
add.l %d3,%d1 # [4] + hi([3])
|
703 |
|
|
|
704 |
|
|
tst.b %d5 # should result be signed?
|
705 |
|
|
beq.b muls64_done # no
|
706 |
|
|
|
707 |
|
|
# result should be a signed negative number.
|
708 |
|
|
# compute 2's complement of the unsigned number:
|
709 |
|
|
# -negate all bits and add 1
|
710 |
|
|
muls64_neg:
|
711 |
|
|
not.l %d0 # negate lo(result) bits
|
712 |
|
|
not.l %d1 # negate hi(result) bits
|
713 |
|
|
addq.l &1,%d0 # add 1 to lo(result)
|
714 |
|
|
addx.l %d4,%d1 # add carry to hi(result)
|
715 |
|
|
|
716 |
|
|
muls64_done:
|
717 |
|
|
mov.w MUL64_CC(%a6),%d4
|
718 |
|
|
andi.b &0x10,%d4 # keep old 'X' bit
|
719 |
|
|
tst.l %d1 # may set 'N' bit
|
720 |
|
|
bpl.b muls64_ddone
|
721 |
|
|
ori.b &0x8,%d4 # set 'N' bit
|
722 |
|
|
muls64_ddone:
|
723 |
|
|
mov.w %d4,%cc
|
724 |
|
|
|
725 |
|
|
# here, the result is in d1 and d0. the current strategy is to save
|
726 |
|
|
# the values at the location pointed to by a0.
|
727 |
|
|
# use movm here to not disturb the condition codes.
|
728 |
|
|
muls64_end:
|
729 |
|
|
exg %d1,%d0
|
730 |
|
|
movm.l &0x0003,([0x10,%a6]) # save result at (a0)
|
731 |
|
|
|
732 |
|
|
# EPILOGUE BEGIN ########################################################
|
733 |
|
|
# fmovm.l (%sp)+,&0x0 # restore no fpregs
|
734 |
|
|
movm.l (%sp)+,&0x003c # restore d2-d5
|
735 |
|
|
unlk %a6
|
736 |
|
|
# EPILOGUE END ##########################################################
|
737 |
|
|
|
738 |
|
|
rts
|
739 |
|
|
|
740 |
|
|
# one or both of the operands is zero so the result is also zero.
|
741 |
|
|
# save the zero result to the register file and set the 'Z' ccode bit.
|
742 |
|
|
muls64_zero:
|
743 |
|
|
clr.l %d0
|
744 |
|
|
clr.l %d1
|
745 |
|
|
|
746 |
|
|
mov.w MUL64_CC(%a6),%d4
|
747 |
|
|
andi.b &0x10,%d4
|
748 |
|
|
ori.b &0x4,%d4
|
749 |
|
|
mov.w %d4,%cc # set 'Z' ccode bit
|
750 |
|
|
|
751 |
|
|
bra.b muls64_end
|
752 |
|
|
|
753 |
|
|
#########################################################################
|
754 |
|
|
# XDEF **************************************************************** #
|
755 |
|
|
# _060LSP__cmp2_Ab_(): Emulate "cmp2.b An,". #
|
756 |
|
|
# _060LSP__cmp2_Aw_(): Emulate "cmp2.w An,". #
|
757 |
|
|
# _060LSP__cmp2_Al_(): Emulate "cmp2.l An,". #
|
758 |
|
|
# _060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,". #
|
759 |
|
|
# _060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,". #
|
760 |
|
|
# _060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,". #
|
761 |
|
|
# #
|
762 |
|
|
# This is the library version which is accessed as a subroutine #
|
763 |
|
|
# and therefore does not work exactly like the 680X0 "cmp2" #
|
764 |
|
|
# instruction. #
|
765 |
|
|
# #
|
766 |
|
|
# XREF **************************************************************** #
|
767 |
|
|
# None #
|
768 |
|
|
# #
|
769 |
|
|
# INPUT *************************************************************** #
|
770 |
|
|
# 0x4(sp) = Rn #
|
771 |
|
|
# 0x8(sp) = pointer to boundary pair #
|
772 |
|
|
# #
|
773 |
|
|
# OUTPUT ************************************************************** #
|
774 |
|
|
# cc = condition codes are set correctly #
|
775 |
|
|
# #
|
776 |
|
|
# ALGORITHM *********************************************************** #
|
777 |
|
|
# In the interest of simplicity, all operands are converted to #
|
778 |
|
|
# longword size whether the operation is byte, word, or long. The #
|
779 |
|
|
# bounds are sign extended accordingly. If Rn is a data regsiter, Rn is #
|
780 |
|
|
# also sign extended. If Rn is an address register, it need not be sign #
|
781 |
|
|
# extended since the full register is always used. #
|
782 |
|
|
# The condition codes are set correctly before the final "rts". #
|
783 |
|
|
# #
|
784 |
|
|
#########################################################################
|
785 |
|
|
|
786 |
|
|
set CMP2_CC, -4
|
787 |
|
|
|
788 |
|
|
global _060LSP__cmp2_Ab_
|
789 |
|
|
_060LSP__cmp2_Ab_:
|
790 |
|
|
|
791 |
|
|
# PROLOGUE BEGIN ########################################################
|
792 |
|
|
link.w %a6,&-4
|
793 |
|
|
movm.l &0x3800,-(%sp) # save d2-d4
|
794 |
|
|
# fmovm.l &0x0,-(%sp) # save no fpregs
|
795 |
|
|
# PROLOGUE END ##########################################################
|
796 |
|
|
|
797 |
|
|
mov.w %cc,CMP2_CC(%a6)
|
798 |
|
|
mov.l 0x8(%a6), %d2 # get regval
|
799 |
|
|
|
800 |
|
|
mov.b ([0xc,%a6],0x0),%d0
|
801 |
|
|
mov.b ([0xc,%a6],0x1),%d1
|
802 |
|
|
|
803 |
|
|
extb.l %d0 # sign extend lo bnd
|
804 |
|
|
extb.l %d1 # sign extend hi bnd
|
805 |
|
|
bra.w l_cmp2_cmp # go do the compare emulation
|
806 |
|
|
|
807 |
|
|
global _060LSP__cmp2_Aw_
|
808 |
|
|
_060LSP__cmp2_Aw_:
|
809 |
|
|
|
810 |
|
|
# PROLOGUE BEGIN ########################################################
|
811 |
|
|
link.w %a6,&-4
|
812 |
|
|
movm.l &0x3800,-(%sp) # save d2-d4
|
813 |
|
|
# fmovm.l &0x0,-(%sp) # save no fpregs
|
814 |
|
|
# PROLOGUE END ##########################################################
|
815 |
|
|
|
816 |
|
|
mov.w %cc,CMP2_CC(%a6)
|
817 |
|
|
mov.l 0x8(%a6), %d2 # get regval
|
818 |
|
|
|
819 |
|
|
mov.w ([0xc,%a6],0x0),%d0
|
820 |
|
|
mov.w ([0xc,%a6],0x2),%d1
|
821 |
|
|
|
822 |
|
|
ext.l %d0 # sign extend lo bnd
|
823 |
|
|
ext.l %d1 # sign extend hi bnd
|
824 |
|
|
bra.w l_cmp2_cmp # go do the compare emulation
|
825 |
|
|
|
826 |
|
|
global _060LSP__cmp2_Al_
|
827 |
|
|
_060LSP__cmp2_Al_:
|
828 |
|
|
|
829 |
|
|
# PROLOGUE BEGIN ########################################################
|
830 |
|
|
link.w %a6,&-4
|
831 |
|
|
movm.l &0x3800,-(%sp) # save d2-d4
|
832 |
|
|
# fmovm.l &0x0,-(%sp) # save no fpregs
|
833 |
|
|
# PROLOGUE END ##########################################################
|
834 |
|
|
|
835 |
|
|
mov.w %cc,CMP2_CC(%a6)
|
836 |
|
|
mov.l 0x8(%a6), %d2 # get regval
|
837 |
|
|
|
838 |
|
|
mov.l ([0xc,%a6],0x0),%d0
|
839 |
|
|
mov.l ([0xc,%a6],0x4),%d1
|
840 |
|
|
bra.w l_cmp2_cmp # go do the compare emulation
|
841 |
|
|
|
842 |
|
|
global _060LSP__cmp2_Db_
|
843 |
|
|
_060LSP__cmp2_Db_:
|
844 |
|
|
|
845 |
|
|
# PROLOGUE BEGIN ########################################################
|
846 |
|
|
link.w %a6,&-4
|
847 |
|
|
movm.l &0x3800,-(%sp) # save d2-d4
|
848 |
|
|
# fmovm.l &0x0,-(%sp) # save no fpregs
|
849 |
|
|
# PROLOGUE END ##########################################################
|
850 |
|
|
|
851 |
|
|
mov.w %cc,CMP2_CC(%a6)
|
852 |
|
|
mov.l 0x8(%a6), %d2 # get regval
|
853 |
|
|
|
854 |
|
|
mov.b ([0xc,%a6],0x0),%d0
|
855 |
|
|
mov.b ([0xc,%a6],0x1),%d1
|
856 |
|
|
|
857 |
|
|
extb.l %d0 # sign extend lo bnd
|
858 |
|
|
extb.l %d1 # sign extend hi bnd
|
859 |
|
|
|
860 |
|
|
# operation is a data register compare.
|
861 |
|
|
# sign extend byte to long so we can do simple longword compares.
|
862 |
|
|
extb.l %d2 # sign extend data byte
|
863 |
|
|
bra.w l_cmp2_cmp # go do the compare emulation
|
864 |
|
|
|
865 |
|
|
global _060LSP__cmp2_Dw_
|
866 |
|
|
_060LSP__cmp2_Dw_:
|
867 |
|
|
|
868 |
|
|
# PROLOGUE BEGIN ########################################################
|
869 |
|
|
link.w %a6,&-4
|
870 |
|
|
movm.l &0x3800,-(%sp) # save d2-d4
|
871 |
|
|
# fmovm.l &0x0,-(%sp) # save no fpregs
|
872 |
|
|
# PROLOGUE END ##########################################################
|
873 |
|
|
|
874 |
|
|
mov.w %cc,CMP2_CC(%a6)
|
875 |
|
|
mov.l 0x8(%a6), %d2 # get regval
|
876 |
|
|
|
877 |
|
|
mov.w ([0xc,%a6],0x0),%d0
|
878 |
|
|
mov.w ([0xc,%a6],0x2),%d1
|
879 |
|
|
|
880 |
|
|
ext.l %d0 # sign extend lo bnd
|
881 |
|
|
ext.l %d1 # sign extend hi bnd
|
882 |
|
|
|
883 |
|
|
# operation is a data register compare.
|
884 |
|
|
# sign extend word to long so we can do simple longword compares.
|
885 |
|
|
ext.l %d2 # sign extend data word
|
886 |
|
|
bra.w l_cmp2_cmp # go emulate compare
|
887 |
|
|
|
888 |
|
|
global _060LSP__cmp2_Dl_
|
889 |
|
|
_060LSP__cmp2_Dl_:
|
890 |
|
|
|
891 |
|
|
# PROLOGUE BEGIN ########################################################
|
892 |
|
|
link.w %a6,&-4
|
893 |
|
|
movm.l &0x3800,-(%sp) # save d2-d4
|
894 |
|
|
# fmovm.l &0x0,-(%sp) # save no fpregs
|
895 |
|
|
# PROLOGUE END ##########################################################
|
896 |
|
|
|
897 |
|
|
mov.w %cc,CMP2_CC(%a6)
|
898 |
|
|
mov.l 0x8(%a6), %d2 # get regval
|
899 |
|
|
|
900 |
|
|
mov.l ([0xc,%a6],0x0),%d0
|
901 |
|
|
mov.l ([0xc,%a6],0x4),%d1
|
902 |
|
|
|
903 |
|
|
#
|
904 |
|
|
# To set the ccodes correctly:
|
905 |
|
|
# (1) save 'Z' bit from (Rn - lo)
|
906 |
|
|
# (2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
|
907 |
|
|
# (3) keep 'X', 'N', and 'V' from before instruction
|
908 |
|
|
# (4) combine ccodes
|
909 |
|
|
#
|
910 |
|
|
l_cmp2_cmp:
|
911 |
|
|
sub.l %d0, %d2 # (Rn - lo)
|
912 |
|
|
mov.w %cc, %d3 # fetch resulting ccodes
|
913 |
|
|
andi.b &0x4, %d3 # keep 'Z' bit
|
914 |
|
|
sub.l %d0, %d1 # (hi - lo)
|
915 |
|
|
cmp.l %d1,%d2 # ((hi - lo) - (Rn - hi))
|
916 |
|
|
|
917 |
|
|
mov.w %cc, %d4 # fetch resulting ccodes
|
918 |
|
|
or.b %d4, %d3 # combine w/ earlier ccodes
|
919 |
|
|
andi.b &0x5, %d3 # keep 'Z' and 'N'
|
920 |
|
|
|
921 |
|
|
mov.w CMP2_CC(%a6), %d4 # fetch old ccodes
|
922 |
|
|
andi.b &0x1a, %d4 # keep 'X','N','V' bits
|
923 |
|
|
or.b %d3, %d4 # insert new ccodes
|
924 |
|
|
mov.w %d4,%cc # save new ccodes
|
925 |
|
|
|
926 |
|
|
# EPILOGUE BEGIN ########################################################
|
927 |
|
|
# fmovm.l (%sp)+,&0x0 # restore no fpregs
|
928 |
|
|
movm.l (%sp)+,&0x001c # restore d2-d4
|
929 |
|
|
unlk %a6
|
930 |
|
|
# EPILOGUE END ##########################################################
|
931 |
|
|
|
932 |
|
|
rts
|