OpenCores
URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [arch/] [sparc/] [lib/] [urem.S] - Blame information for rev 17

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 xianfeng
/* $Id: urem.S,v 1.4 1996/09/30 02:22:42 davem Exp $
2
 * urem.S:      This routine was taken from glibc-1.09 and is covered
3
 *              by the GNU Library General Public License Version 2.
4
 */
5
 
6
/* This file is generated from divrem.m4; DO NOT EDIT! */
7
/*
8
 * Division and remainder, from Appendix E of the Sparc Version 8
9
 * Architecture Manual, with fixes from Gordon Irlam.
10
 */
11
 
12
/*
13
 * Input: dividend and divisor in %o0 and %o1 respectively.
14
 *
15
 * m4 parameters:
16
 *  .urem       name of function to generate
17
 *  rem         rem=div => %o0 / %o1; rem=rem => %o0 % %o1
18
 *  false               false=true => signed; false=false => unsigned
19
 *
20
 * Algorithm parameters:
21
 *  N           how many bits per iteration we try to get (4)
22
 *  WORDSIZE    total number of bits (32)
23
 *
24
 * Derived constants:
25
 *  TOPBITS     number of bits in the top decade of a number
26
 *
27
 * Important variables:
28
 *  Q           the partial quotient under development (initially 0)
29
 *  R           the remainder so far, initially the dividend
30
 *  ITER        number of main division loop iterations required;
31
 *              equal to ceil(log2(quotient) / N).  Note that this
32
 *              is the log base (2^N) of the quotient.
33
 *  V           the current comparand, initially divisor*2^(ITER*N-1)
34
 *
35
 * Cost:
36
 *  Current estimate for non-large dividend is
37
 *      ceil(log2(quotient) / N) * (10 + 7N/2) + C
38
 *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
39
 *  different path, as the upper bits of the quotient must be developed
40
 *  one bit at a time.
41
 */
42
 
43
        .globl .urem
44
        .globl _Urem
45
.urem:
46
_Urem:  /* needed for export */
47
 
48
        ! Ready to divide.  Compute size of quotient; scale comparand.
49
        orcc    %o1, %g0, %o5
50
        bne     1f
51
         mov    %o0, %o3
52
 
53
                ! Divide by zero trap.  If it returns, return 0 (about as
54
                ! wrong as possible, but that is what SunOS does...).
55
                ta      ST_DIV0
56
                retl
57
                 clr    %o0
58
 
59
1:
60
        cmp     %o3, %o5                        ! if %o1 exceeds %o0, done
61
        blu     Lgot_result             ! (and algorithm fails otherwise)
62
         clr    %o2
63
 
64
        sethi   %hi(1 << (32 - 4 - 1)), %g1
65
 
66
        cmp     %o3, %g1
67
        blu     Lnot_really_big
68
         clr    %o4
69
 
70
        ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
71
        ! as our usual N-at-a-shot divide step will cause overflow and havoc.
72
        ! The number of bits in the result here is N*ITER+SC, where SC <= N.
73
        ! Compute ITER in an unorthodox manner: know we need to shift V into
74
        ! the top decade: so do not even bother to compare to R.
75
        1:
76
                cmp     %o5, %g1
77
                bgeu    3f
78
                 mov    1, %g7
79
 
80
                sll     %o5, 4, %o5
81
 
82
                b       1b
83
                 add    %o4, 1, %o4
84
 
85
        ! Now compute %g7.
86
        2:
87
                addcc   %o5, %o5, %o5
88
                bcc     Lnot_too_big
89
                 add    %g7, 1, %g7
90
 
91
                ! We get here if the %o1 overflowed while shifting.
92
                ! This means that %o3 has the high-order bit set.
93
                ! Restore %o5 and subtract from %o3.
94
                sll     %g1, 4, %g1     ! high order bit
95
                srl     %o5, 1, %o5             ! rest of %o5
96
                add     %o5, %g1, %o5
97
 
98
                b       Ldo_single_div
99
                 sub    %g7, 1, %g7
100
 
101
        Lnot_too_big:
102
        3:
103
                cmp     %o5, %o3
104
                blu     2b
105
                 nop
106
 
107
                be      Ldo_single_div
108
                 nop
109
        /* NB: these are commented out in the V8-Sparc manual as well */
110
        /* (I do not understand this) */
111
        ! %o5 > %o3: went too far: back up 1 step
112
        !       srl     %o5, 1, %o5
113
        !       dec     %g7
114
        ! do single-bit divide steps
115
        !
116
        ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
117
        ! first divide step without thinking.  BUT, the others are conditional,
118
        ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
119
        ! order bit set in the first step, just falling into the regular
120
        ! division loop will mess up the first time around.
121
        ! So we unroll slightly...
122
        Ldo_single_div:
123
                subcc   %g7, 1, %g7
124
                bl      Lend_regular_divide
125
                 nop
126
 
127
                sub     %o3, %o5, %o3
128
                mov     1, %o2
129
 
130
                b       Lend_single_divloop
131
                 nop
132
        Lsingle_divloop:
133
                sll     %o2, 1, %o2
134
                bl      1f
135
                 srl    %o5, 1, %o5
136
                ! %o3 >= 0
137
                sub     %o3, %o5, %o3
138
                b       2f
139
                 add    %o2, 1, %o2
140
        1:      ! %o3 < 0
141
                add     %o3, %o5, %o3
142
                sub     %o2, 1, %o2
143
        2:
144
        Lend_single_divloop:
145
                subcc   %g7, 1, %g7
146
                bge     Lsingle_divloop
147
                 tst    %o3
148
 
149
                b,a     Lend_regular_divide
150
 
151
Lnot_really_big:
152
1:
153
        sll     %o5, 4, %o5
154
 
155
        cmp     %o5, %o3
156
        bleu    1b
157
         addcc  %o4, 1, %o4
158
 
159
        be      Lgot_result
160
         sub    %o4, 1, %o4
161
 
162
        tst     %o3     ! set up for initial iteration
163
Ldivloop:
164
        sll     %o2, 4, %o2
165
                ! depth 1, accumulated bits 0
166
        bl      L.1.16
167
         srl    %o5,1,%o5
168
        ! remainder is positive
169
        subcc   %o3,%o5,%o3
170
                        ! depth 2, accumulated bits 1
171
        bl      L.2.17
172
         srl    %o5,1,%o5
173
        ! remainder is positive
174
        subcc   %o3,%o5,%o3
175
                        ! depth 3, accumulated bits 3
176
        bl      L.3.19
177
         srl    %o5,1,%o5
178
        ! remainder is positive
179
        subcc   %o3,%o5,%o3
180
                        ! depth 4, accumulated bits 7
181
        bl      L.4.23
182
         srl    %o5,1,%o5
183
        ! remainder is positive
184
        subcc   %o3,%o5,%o3
185
        b       9f
186
         add    %o2, (7*2+1), %o2
187
 
188
L.4.23:
189
        ! remainder is negative
190
        addcc   %o3,%o5,%o3
191
        b       9f
192
         add    %o2, (7*2-1), %o2
193
 
194
L.3.19:
195
        ! remainder is negative
196
        addcc   %o3,%o5,%o3
197
                        ! depth 4, accumulated bits 5
198
        bl      L.4.21
199
         srl    %o5,1,%o5
200
        ! remainder is positive
201
        subcc   %o3,%o5,%o3
202
        b       9f
203
         add    %o2, (5*2+1), %o2
204
 
205
L.4.21:
206
        ! remainder is negative
207
        addcc   %o3,%o5,%o3
208
        b       9f
209
         add    %o2, (5*2-1), %o2
210
 
211
L.2.17:
212
        ! remainder is negative
213
        addcc   %o3,%o5,%o3
214
                        ! depth 3, accumulated bits 1
215
        bl      L.3.17
216
         srl    %o5,1,%o5
217
        ! remainder is positive
218
        subcc   %o3,%o5,%o3
219
                        ! depth 4, accumulated bits 3
220
        bl      L.4.19
221
         srl    %o5,1,%o5
222
        ! remainder is positive
223
        subcc   %o3,%o5,%o3
224
        b       9f
225
         add    %o2, (3*2+1), %o2
226
 
227
L.4.19:
228
        ! remainder is negative
229
        addcc   %o3,%o5,%o3
230
        b       9f
231
         add    %o2, (3*2-1), %o2
232
 
233
L.3.17:
234
        ! remainder is negative
235
        addcc   %o3,%o5,%o3
236
                        ! depth 4, accumulated bits 1
237
        bl      L.4.17
238
         srl    %o5,1,%o5
239
        ! remainder is positive
240
        subcc   %o3,%o5,%o3
241
        b       9f
242
         add    %o2, (1*2+1), %o2
243
 
244
L.4.17:
245
        ! remainder is negative
246
        addcc   %o3,%o5,%o3
247
        b       9f
248
         add    %o2, (1*2-1), %o2
249
 
250
L.1.16:
251
        ! remainder is negative
252
        addcc   %o3,%o5,%o3
253
                        ! depth 2, accumulated bits -1
254
        bl      L.2.15
255
         srl    %o5,1,%o5
256
        ! remainder is positive
257
        subcc   %o3,%o5,%o3
258
                        ! depth 3, accumulated bits -1
259
        bl      L.3.15
260
         srl    %o5,1,%o5
261
        ! remainder is positive
262
        subcc   %o3,%o5,%o3
263
                        ! depth 4, accumulated bits -1
264
        bl      L.4.15
265
         srl    %o5,1,%o5
266
        ! remainder is positive
267
        subcc   %o3,%o5,%o3
268
        b       9f
269
         add    %o2, (-1*2+1), %o2
270
 
271
L.4.15:
272
        ! remainder is negative
273
        addcc   %o3,%o5,%o3
274
        b       9f
275
         add    %o2, (-1*2-1), %o2
276
 
277
L.3.15:
278
        ! remainder is negative
279
        addcc   %o3,%o5,%o3
280
                        ! depth 4, accumulated bits -3
281
        bl      L.4.13
282
         srl    %o5,1,%o5
283
        ! remainder is positive
284
        subcc   %o3,%o5,%o3
285
        b       9f
286
         add    %o2, (-3*2+1), %o2
287
 
288
L.4.13:
289
        ! remainder is negative
290
        addcc   %o3,%o5,%o3
291
        b       9f
292
         add    %o2, (-3*2-1), %o2
293
 
294
L.2.15:
295
        ! remainder is negative
296
        addcc   %o3,%o5,%o3
297
                        ! depth 3, accumulated bits -3
298
        bl      L.3.13
299
         srl    %o5,1,%o5
300
        ! remainder is positive
301
        subcc   %o3,%o5,%o3
302
                        ! depth 4, accumulated bits -5
303
        bl      L.4.11
304
         srl    %o5,1,%o5
305
        ! remainder is positive
306
        subcc   %o3,%o5,%o3
307
        b       9f
308
         add    %o2, (-5*2+1), %o2
309
 
310
L.4.11:
311
        ! remainder is negative
312
        addcc   %o3,%o5,%o3
313
        b       9f
314
         add    %o2, (-5*2-1), %o2
315
 
316
L.3.13:
317
        ! remainder is negative
318
        addcc   %o3,%o5,%o3
319
                        ! depth 4, accumulated bits -7
320
        bl      L.4.9
321
         srl    %o5,1,%o5
322
        ! remainder is positive
323
        subcc   %o3,%o5,%o3
324
        b       9f
325
         add    %o2, (-7*2+1), %o2
326
 
327
L.4.9:
328
        ! remainder is negative
329
        addcc   %o3,%o5,%o3
330
        b       9f
331
         add    %o2, (-7*2-1), %o2
332
 
333
        9:
334
Lend_regular_divide:
335
        subcc   %o4, 1, %o4
336
        bge     Ldivloop
337
         tst    %o3
338
 
339
        bl,a    Lgot_result
340
        ! non-restoring fixup here (one instruction only!)
341
        add     %o3, %o1, %o3
342
 
343
Lgot_result:
344
 
345
        retl
346
         mov %o3, %o0
347
 
348
        .globl  .urem_patch
349
.urem_patch:
350
        wr      %g0, 0x0, %y
351
        nop
352
        nop
353
        nop
354
        udiv    %o0, %o1, %o2
355
        umul    %o2, %o1, %o2
356
        retl
357
         sub    %o0, %o2, %o0

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.