OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [uclinux/] [uClinux-2.0.x/] [arch/] [sparc/] [lib/] [urem.S] - Blame information for rev 1781

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 199 simons
/* $Id: urem.S,v 1.1.1.1 2001-09-10 07:44:03 simons Exp $
2
 * urem.S:      This routine was taken from glibc-1.09 and is covered
3
 *              by the GNU Library General Public License Version 2.
4
 */
5
 
6
/* This file is generated from divrem.m4; DO NOT EDIT! */
7
/*
8
 * Division and remainder, from Appendix E of the Sparc Version 8
9
 * Architecture Manual, with fixes from Gordon Irlam.
10
 */
11
 
12
/*
13
 * Input: dividend and divisor in %o0 and %o1 respectively.
14
 *
15
 * m4 parameters:
16
 *  .urem       name of function to generate
17
 *  rem         rem=div => %o0 / %o1; rem=rem => %o0 % %o1
18
 *  false               false=true => signed; false=false => unsigned
19
 *
20
 * Algorithm parameters:
21
 *  N           how many bits per iteration we try to get (4)
22
 *  WORDSIZE    total number of bits (32)
23
 *
24
 * Derived constants:
25
 *  TOPBITS     number of bits in the top decade of a number
26
 *
27
 * Important variables:
28
 *  Q           the partial quotient under development (initially 0)
29
 *  R           the remainder so far, initially the dividend
30
 *  ITER        number of main division loop iterations required;
31
 *              equal to ceil(log2(quotient) / N).  Note that this
32
 *              is the log base (2^N) of the quotient.
33
 *  V           the current comparand, initially divisor*2^(ITER*N-1)
34
 *
35
 * Cost:
36
 *  Current estimate for non-large dividend is
37
 *      ceil(log2(quotient) / N) * (10 + 7N/2) + C
38
 *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
39
 *  different path, as the upper bits of the quotient must be developed
40
 *  one bit at a time.
41
 */
42
 
43
        .globl .urem
44
.urem:
45
 
46
        ! Ready to divide.  Compute size of quotient; scale comparand.
47
        orcc    %o1, %g0, %o5
48
        bne     1f
49
        mov     %o0, %o3
50
 
51
                ! Divide by zero trap.  If it returns, return 0 (about as
52
                ! wrong as possible, but that is what SunOS does...).
53
                ta      ST_DIV0
54
                retl
55
                clr     %o0
56
 
57
1:
58
        cmp     %o3, %o5                        ! if %o1 exceeds %o0, done
59
        blu     Lgot_result             ! (and algorithm fails otherwise)
60
        clr     %o2
61
        sethi   %hi(1 << (32 - 4 - 1)), %g1
62
        cmp     %o3, %g1
63
        blu     Lnot_really_big
64
        clr     %o4
65
 
66
        ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
67
        ! as our usual N-at-a-shot divide step will cause overflow and havoc.
68
        ! The number of bits in the result here is N*ITER+SC, where SC <= N.
69
        ! Compute ITER in an unorthodox manner: know we need to shift V into
70
        ! the top decade: so do not even bother to compare to R.
71
        1:
72
                cmp     %o5, %g1
73
                bgeu    3f
74
                mov     1, %g7
75
                sll     %o5, 4, %o5
76
                b       1b
77
                add     %o4, 1, %o4
78
 
79
        ! Now compute %g7.
80
        2:      addcc   %o5, %o5, %o5
81
                bcc     Lnot_too_big
82
                add     %g7, 1, %g7
83
 
84
                ! We get here if the %o1 overflowed while shifting.
85
                ! This means that %o3 has the high-order bit set.
86
                ! Restore %o5 and subtract from %o3.
87
                sll     %g1, 4, %g1     ! high order bit
88
                srl     %o5, 1, %o5             ! rest of %o5
89
                add     %o5, %g1, %o5
90
                b       Ldo_single_div
91
                sub     %g7, 1, %g7
92
 
93
        Lnot_too_big:
94
        3:      cmp     %o5, %o3
95
                blu     2b
96
                nop
97
                be      Ldo_single_div
98
                nop
99
        /* NB: these are commented out in the V8-Sparc manual as well */
100
        /* (I do not understand this) */
101
        ! %o5 > %o3: went too far: back up 1 step
102
        !       srl     %o5, 1, %o5
103
        !       dec     %g7
104
        ! do single-bit divide steps
105
        !
106
        ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
107
        ! first divide step without thinking.  BUT, the others are conditional,
108
        ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
109
        ! order bit set in the first step, just falling into the regular
110
        ! division loop will mess up the first time around.
111
        ! So we unroll slightly...
112
        Ldo_single_div:
113
                subcc   %g7, 1, %g7
114
                bl      Lend_regular_divide
115
                nop
116
                sub     %o3, %o5, %o3
117
                mov     1, %o2
118
                b       Lend_single_divloop
119
                nop
120
        Lsingle_divloop:
121
                sll     %o2, 1, %o2
122
                bl      1f
123
                srl     %o5, 1, %o5
124
                ! %o3 >= 0
125
                sub     %o3, %o5, %o3
126
                b       2f
127
                add     %o2, 1, %o2
128
        1:      ! %o3 < 0
129
                add     %o3, %o5, %o3
130
                sub     %o2, 1, %o2
131
        2:
132
        Lend_single_divloop:
133
                subcc   %g7, 1, %g7
134
                bge     Lsingle_divloop
135
                tst     %o3
136
                b,a     Lend_regular_divide
137
 
138
Lnot_really_big:
139
1:
140
        sll     %o5, 4, %o5
141
        cmp     %o5, %o3
142
        bleu    1b
143
        addcc   %o4, 1, %o4
144
        be      Lgot_result
145
        sub     %o4, 1, %o4
146
 
147
        tst     %o3     ! set up for initial iteration
148
Ldivloop:
149
        sll     %o2, 4, %o2
150
                ! depth 1, accumulated bits 0
151
        bl      L.1.16
152
        srl     %o5,1,%o5
153
        ! remainder is positive
154
        subcc   %o3,%o5,%o3
155
                        ! depth 2, accumulated bits 1
156
        bl      L.2.17
157
        srl     %o5,1,%o5
158
        ! remainder is positive
159
        subcc   %o3,%o5,%o3
160
                        ! depth 3, accumulated bits 3
161
        bl      L.3.19
162
        srl     %o5,1,%o5
163
        ! remainder is positive
164
        subcc   %o3,%o5,%o3
165
                        ! depth 4, accumulated bits 7
166
        bl      L.4.23
167
        srl     %o5,1,%o5
168
        ! remainder is positive
169
        subcc   %o3,%o5,%o3
170
                b       9f
171
                add     %o2, (7*2+1), %o2
172
 
173
L.4.23:
174
        ! remainder is negative
175
        addcc   %o3,%o5,%o3
176
                b       9f
177
                add     %o2, (7*2-1), %o2
178
 
179
 
180
L.3.19:
181
        ! remainder is negative
182
        addcc   %o3,%o5,%o3
183
                        ! depth 4, accumulated bits 5
184
        bl      L.4.21
185
        srl     %o5,1,%o5
186
        ! remainder is positive
187
        subcc   %o3,%o5,%o3
188
                b       9f
189
                add     %o2, (5*2+1), %o2
190
 
191
L.4.21:
192
        ! remainder is negative
193
        addcc   %o3,%o5,%o3
194
                b       9f
195
                add     %o2, (5*2-1), %o2
196
 
197
 
198
 
199
L.2.17:
200
        ! remainder is negative
201
        addcc   %o3,%o5,%o3
202
                        ! depth 3, accumulated bits 1
203
        bl      L.3.17
204
        srl     %o5,1,%o5
205
        ! remainder is positive
206
        subcc   %o3,%o5,%o3
207
                        ! depth 4, accumulated bits 3
208
        bl      L.4.19
209
        srl     %o5,1,%o5
210
        ! remainder is positive
211
        subcc   %o3,%o5,%o3
212
                b       9f
213
                add     %o2, (3*2+1), %o2
214
 
215
L.4.19:
216
        ! remainder is negative
217
        addcc   %o3,%o5,%o3
218
                b       9f
219
                add     %o2, (3*2-1), %o2
220
 
221
 
222
L.3.17:
223
        ! remainder is negative
224
        addcc   %o3,%o5,%o3
225
                        ! depth 4, accumulated bits 1
226
        bl      L.4.17
227
        srl     %o5,1,%o5
228
        ! remainder is positive
229
        subcc   %o3,%o5,%o3
230
                b       9f
231
                add     %o2, (1*2+1), %o2
232
 
233
L.4.17:
234
        ! remainder is negative
235
        addcc   %o3,%o5,%o3
236
                b       9f
237
                add     %o2, (1*2-1), %o2
238
 
239
 
240
 
241
 
242
L.1.16:
243
        ! remainder is negative
244
        addcc   %o3,%o5,%o3
245
                        ! depth 2, accumulated bits -1
246
        bl      L.2.15
247
        srl     %o5,1,%o5
248
        ! remainder is positive
249
        subcc   %o3,%o5,%o3
250
                        ! depth 3, accumulated bits -1
251
        bl      L.3.15
252
        srl     %o5,1,%o5
253
        ! remainder is positive
254
        subcc   %o3,%o5,%o3
255
                        ! depth 4, accumulated bits -1
256
        bl      L.4.15
257
        srl     %o5,1,%o5
258
        ! remainder is positive
259
        subcc   %o3,%o5,%o3
260
                b       9f
261
                add     %o2, (-1*2+1), %o2
262
 
263
L.4.15:
264
        ! remainder is negative
265
        addcc   %o3,%o5,%o3
266
                b       9f
267
                add     %o2, (-1*2-1), %o2
268
 
269
 
270
L.3.15:
271
        ! remainder is negative
272
        addcc   %o3,%o5,%o3
273
                        ! depth 4, accumulated bits -3
274
        bl      L.4.13
275
        srl     %o5,1,%o5
276
        ! remainder is positive
277
        subcc   %o3,%o5,%o3
278
                b       9f
279
                add     %o2, (-3*2+1), %o2
280
 
281
L.4.13:
282
        ! remainder is negative
283
        addcc   %o3,%o5,%o3
284
                b       9f
285
                add     %o2, (-3*2-1), %o2
286
 
287
 
288
 
289
L.2.15:
290
        ! remainder is negative
291
        addcc   %o3,%o5,%o3
292
                        ! depth 3, accumulated bits -3
293
        bl      L.3.13
294
        srl     %o5,1,%o5
295
        ! remainder is positive
296
        subcc   %o3,%o5,%o3
297
                        ! depth 4, accumulated bits -5
298
        bl      L.4.11
299
        srl     %o5,1,%o5
300
        ! remainder is positive
301
        subcc   %o3,%o5,%o3
302
                b       9f
303
                add     %o2, (-5*2+1), %o2
304
 
305
L.4.11:
306
        ! remainder is negative
307
        addcc   %o3,%o5,%o3
308
                b       9f
309
                add     %o2, (-5*2-1), %o2
310
 
311
 
312
L.3.13:
313
        ! remainder is negative
314
        addcc   %o3,%o5,%o3
315
                        ! depth 4, accumulated bits -7
316
        bl      L.4.9
317
        srl     %o5,1,%o5
318
        ! remainder is positive
319
        subcc   %o3,%o5,%o3
320
                b       9f
321
                add     %o2, (-7*2+1), %o2
322
 
323
L.4.9:
324
        ! remainder is negative
325
        addcc   %o3,%o5,%o3
326
                b       9f
327
                add     %o2, (-7*2-1), %o2
328
 
329
 
330
 
331
 
332
        9:
333
Lend_regular_divide:
334
        subcc   %o4, 1, %o4
335
        bge     Ldivloop
336
        tst     %o3
337
        bl,a    Lgot_result
338
        ! non-restoring fixup here (one instruction only!)
339
        add     %o3, %o1, %o3
340
 
341
 
342
Lgot_result:
343
 
344
        retl
345
        mov %o3, %o0

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.