OpenCores
URL https://opencores.org/ocsvn/openrisc_2011-10-31/openrisc_2011-10-31/trunk

Subversion Repositories openrisc_2011-10-31

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [config/] [sparc/] [lb1spc.asm] - Blame information for rev 404

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 282 jeremybenn
/* This is an assembly language implementation of mulsi3, divsi3, and modsi3
2
   for the sparc processor.
3
 
4
   These routines are derived from the SPARC Architecture Manual, version 8,
5
   slightly edited to match the desired calling convention, and also to
6
   optimize them for our purposes.  */
7
 
8
#ifdef L_mulsi3
9
.text
10
        .align 4
11
        .global .umul
12
        .proc 4
13
.umul:
14
        or      %o0, %o1, %o4   ! logical or of multiplier and multiplicand
15
        mov     %o0, %y         ! multiplier to Y register
16
        andncc  %o4, 0xfff, %o5 ! mask out lower 12 bits
17
        be      mul_shortway    ! can do it the short way
18
        andcc   %g0, %g0, %o4   ! zero the partial product and clear NV cc
19
        !
20
        ! long multiply
21
        !
22
        mulscc  %o4, %o1, %o4   ! first iteration of 33
23
        mulscc  %o4, %o1, %o4
24
        mulscc  %o4, %o1, %o4
25
        mulscc  %o4, %o1, %o4
26
        mulscc  %o4, %o1, %o4
27
        mulscc  %o4, %o1, %o4
28
        mulscc  %o4, %o1, %o4
29
        mulscc  %o4, %o1, %o4
30
        mulscc  %o4, %o1, %o4
31
        mulscc  %o4, %o1, %o4
32
        mulscc  %o4, %o1, %o4
33
        mulscc  %o4, %o1, %o4
34
        mulscc  %o4, %o1, %o4
35
        mulscc  %o4, %o1, %o4
36
        mulscc  %o4, %o1, %o4
37
        mulscc  %o4, %o1, %o4
38
        mulscc  %o4, %o1, %o4
39
        mulscc  %o4, %o1, %o4
40
        mulscc  %o4, %o1, %o4
41
        mulscc  %o4, %o1, %o4
42
        mulscc  %o4, %o1, %o4
43
        mulscc  %o4, %o1, %o4
44
        mulscc  %o4, %o1, %o4
45
        mulscc  %o4, %o1, %o4
46
        mulscc  %o4, %o1, %o4
47
        mulscc  %o4, %o1, %o4
48
        mulscc  %o4, %o1, %o4
49
        mulscc  %o4, %o1, %o4
50
        mulscc  %o4, %o1, %o4
51
        mulscc  %o4, %o1, %o4
52
        mulscc  %o4, %o1, %o4
53
        mulscc  %o4, %o1, %o4   ! 32nd iteration
54
        mulscc  %o4, %g0, %o4   ! last iteration only shifts
55
        ! the upper 32 bits of product are wrong, but we do not care
56
        retl
57
        rd      %y, %o0
58
        !
59
        ! short multiply
60
        !
61
mul_shortway:
62
        mulscc  %o4, %o1, %o4   ! first iteration of 13
63
        mulscc  %o4, %o1, %o4
64
        mulscc  %o4, %o1, %o4
65
        mulscc  %o4, %o1, %o4
66
        mulscc  %o4, %o1, %o4
67
        mulscc  %o4, %o1, %o4
68
        mulscc  %o4, %o1, %o4
69
        mulscc  %o4, %o1, %o4
70
        mulscc  %o4, %o1, %o4
71
        mulscc  %o4, %o1, %o4
72
        mulscc  %o4, %o1, %o4
73
        mulscc  %o4, %o1, %o4   ! 12th iteration
74
        mulscc  %o4, %g0, %o4   ! last iteration only shifts
75
        rd      %y, %o5
76
        sll     %o4, 12, %o4    ! left shift partial product by 12 bits
77
        srl     %o5, 20, %o5    ! right shift partial product by 20 bits
78
        retl
79
        or      %o5, %o4, %o0   ! merge for true product
80
#endif
81
 
82
#ifdef L_divsi3
83
/*
84
 * Division and remainder, from Appendix E of the SPARC Version 8
85
 * Architecture Manual, with fixes from Gordon Irlam.
86
 */
87
 
88
/*
89
 * Input: dividend and divisor in %o0 and %o1 respectively.
90
 *
91
 * m4 parameters:
92
 *  .div        name of function to generate
93
 *  div         div=div => %o0 / %o1; div=rem => %o0 % %o1
94
 *  true                true=true => signed; true=false => unsigned
95
 *
96
 * Algorithm parameters:
97
 *  N           how many bits per iteration we try to get (4)
98
 *  WORDSIZE    total number of bits (32)
99
 *
100
 * Derived constants:
101
 *  TOPBITS     number of bits in the top decade of a number
102
 *
103
 * Important variables:
104
 *  Q           the partial quotient under development (initially 0)
105
 *  R           the remainder so far, initially the dividend
106
 *  ITER        number of main division loop iterations required;
107
 *              equal to ceil(log2(quotient) / N).  Note that this
108
 *              is the log base (2^N) of the quotient.
109
 *  V           the current comparand, initially divisor*2^(ITER*N-1)
110
 *
111
 * Cost:
112
 *  Current estimate for non-large dividend is
113
 *      ceil(log2(quotient) / N) * (10 + 7N/2) + C
114
 *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
115
 *  different path, as the upper bits of the quotient must be developed
116
 *  one bit at a time.
117
 */
118
        .global .udiv
119
        .align 4
120
        .proc 4
121
        .text
122
.udiv:
123
         b ready_to_divide
124
         mov 0, %g3             ! result is always positive
125
 
126
        .global .div
127
        .align 4
128
        .proc 4
129
        .text
130
.div:
131
        ! compute sign of result; if neither is negative, no problem
132
        orcc    %o1, %o0, %g0   ! either negative?
133
        bge     ready_to_divide ! no, go do the divide
134
        xor     %o1, %o0, %g3   ! compute sign in any case
135
        tst     %o1
136
        bge     1f
137
        tst     %o0
138
        ! %o1 is definitely negative; %o0 might also be negative
139
        bge     ready_to_divide ! if %o0 not negative...
140
        sub     %g0, %o1, %o1   ! in any case, make %o1 nonneg
141
1:      ! %o0 is negative, %o1 is nonnegative
142
        sub     %g0, %o0, %o0   ! make %o0 nonnegative
143
 
144
 
145
ready_to_divide:
146
 
147
        ! Ready to divide.  Compute size of quotient; scale comparand.
148
        orcc    %o1, %g0, %o5
149
        bne     1f
150
        mov     %o0, %o3
151
 
152
        ! Divide by zero trap.  If it returns, return 0 (about as
153
        ! wrong as possible, but that is what SunOS does...).
154
        ta      0x2             ! ST_DIV0
155
        retl
156
        clr     %o0
157
 
158
1:
159
        cmp     %o3, %o5                ! if %o1 exceeds %o0, done
160
        blu     got_result              ! (and algorithm fails otherwise)
161
        clr     %o2
162
        sethi   %hi(1 << (32 - 4 - 1)), %g1
163
        cmp     %o3, %g1
164
        blu     not_really_big
165
        clr     %o4
166
 
167
        ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
168
        ! as our usual N-at-a-shot divide step will cause overflow and havoc.
169
        ! The number of bits in the result here is N*ITER+SC, where SC <= N.
170
        ! Compute ITER in an unorthodox manner: know we need to shift V into
171
        ! the top decade: so do not even bother to compare to R.
172
        1:
173
                cmp     %o5, %g1
174
                bgeu    3f
175
                mov     1, %g2
176
                sll     %o5, 4, %o5
177
                b       1b
178
                add     %o4, 1, %o4
179
 
180
        ! Now compute %g2.
181
        2:      addcc   %o5, %o5, %o5
182
                bcc     not_too_big
183
                add     %g2, 1, %g2
184
 
185
                ! We get here if the %o1 overflowed while shifting.
186
                ! This means that %o3 has the high-order bit set.
187
                ! Restore %o5 and subtract from %o3.
188
                sll     %g1, 4, %g1     ! high order bit
189
                srl     %o5, 1, %o5     ! rest of %o5
190
                add     %o5, %g1, %o5
191
                b       do_single_div
192
                sub     %g2, 1, %g2
193
 
194
        not_too_big:
195
        3:      cmp     %o5, %o3
196
                blu     2b
197
                nop
198
                be      do_single_div
199
                nop
200
        /* NB: these are commented out in the V8-SPARC manual as well */
201
        /* (I do not understand this) */
202
        ! %o5 > %o3: went too far: back up 1 step
203
        !       srl     %o5, 1, %o5
204
        !       dec     %g2
205
        ! do single-bit divide steps
206
        !
207
        ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
208
        ! first divide step without thinking.  BUT, the others are conditional,
209
        ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
210
        ! order bit set in the first step, just falling into the regular
211
        ! division loop will mess up the first time around.
212
        ! So we unroll slightly...
213
        do_single_div:
214
                subcc   %g2, 1, %g2
215
                bl      end_regular_divide
216
                nop
217
                sub     %o3, %o5, %o3
218
                mov     1, %o2
219
                b       end_single_divloop
220
                nop
221
        single_divloop:
222
                sll     %o2, 1, %o2
223
                bl      1f
224
                srl     %o5, 1, %o5
225
                ! %o3 >= 0
226
                sub     %o3, %o5, %o3
227
                b       2f
228
                add     %o2, 1, %o2
229
        1:      ! %o3 < 0
230
                add     %o3, %o5, %o3
231
                sub     %o2, 1, %o2
232
        2:
233
        end_single_divloop:
234
                subcc   %g2, 1, %g2
235
                bge     single_divloop
236
                tst     %o3
237
                b,a     end_regular_divide
238
 
239
not_really_big:
240
1:
241
        sll     %o5, 4, %o5
242
        cmp     %o5, %o3
243
        bleu    1b
244
        addcc   %o4, 1, %o4
245
        be      got_result
246
        sub     %o4, 1, %o4
247
 
248
        tst     %o3     ! set up for initial iteration
249
divloop:
250
        sll     %o2, 4, %o2
251
        ! depth 1, accumulated bits 0
252
        bl      L1.16
253
        srl     %o5,1,%o5
254
        ! remainder is positive
255
        subcc   %o3,%o5,%o3
256
        ! depth 2, accumulated bits 1
257
        bl      L2.17
258
        srl     %o5,1,%o5
259
        ! remainder is positive
260
        subcc   %o3,%o5,%o3
261
        ! depth 3, accumulated bits 3
262
        bl      L3.19
263
        srl     %o5,1,%o5
264
        ! remainder is positive
265
        subcc   %o3,%o5,%o3
266
        ! depth 4, accumulated bits 7
267
        bl      L4.23
268
        srl     %o5,1,%o5
269
        ! remainder is positive
270
        subcc   %o3,%o5,%o3
271
        b       9f
272
        add     %o2, (7*2+1), %o2
273
 
274
L4.23:
275
        ! remainder is negative
276
        addcc   %o3,%o5,%o3
277
        b       9f
278
        add     %o2, (7*2-1), %o2
279
 
280
 
281
L3.19:
282
        ! remainder is negative
283
        addcc   %o3,%o5,%o3
284
        ! depth 4, accumulated bits 5
285
        bl      L4.21
286
        srl     %o5,1,%o5
287
        ! remainder is positive
288
        subcc   %o3,%o5,%o3
289
        b       9f
290
        add     %o2, (5*2+1), %o2
291
 
292
L4.21:
293
        ! remainder is negative
294
        addcc   %o3,%o5,%o3
295
        b       9f
296
        add     %o2, (5*2-1), %o2
297
 
298
L2.17:
299
        ! remainder is negative
300
        addcc   %o3,%o5,%o3
301
        ! depth 3, accumulated bits 1
302
        bl      L3.17
303
        srl     %o5,1,%o5
304
        ! remainder is positive
305
        subcc   %o3,%o5,%o3
306
        ! depth 4, accumulated bits 3
307
        bl      L4.19
308
        srl     %o5,1,%o5
309
        ! remainder is positive
310
        subcc   %o3,%o5,%o3
311
        b       9f
312
        add     %o2, (3*2+1), %o2
313
 
314
L4.19:
315
        ! remainder is negative
316
        addcc   %o3,%o5,%o3
317
        b       9f
318
        add     %o2, (3*2-1), %o2
319
 
320
L3.17:
321
        ! remainder is negative
322
        addcc   %o3,%o5,%o3
323
        ! depth 4, accumulated bits 1
324
        bl      L4.17
325
        srl     %o5,1,%o5
326
        ! remainder is positive
327
        subcc   %o3,%o5,%o3
328
        b       9f
329
        add     %o2, (1*2+1), %o2
330
 
331
L4.17:
332
        ! remainder is negative
333
        addcc   %o3,%o5,%o3
334
        b       9f
335
        add     %o2, (1*2-1), %o2
336
 
337
L1.16:
338
        ! remainder is negative
339
        addcc   %o3,%o5,%o3
340
        ! depth 2, accumulated bits -1
341
        bl      L2.15
342
        srl     %o5,1,%o5
343
        ! remainder is positive
344
        subcc   %o3,%o5,%o3
345
        ! depth 3, accumulated bits -1
346
        bl      L3.15
347
        srl     %o5,1,%o5
348
        ! remainder is positive
349
        subcc   %o3,%o5,%o3
350
        ! depth 4, accumulated bits -1
351
        bl      L4.15
352
        srl     %o5,1,%o5
353
        ! remainder is positive
354
        subcc   %o3,%o5,%o3
355
        b       9f
356
        add     %o2, (-1*2+1), %o2
357
 
358
L4.15:
359
        ! remainder is negative
360
        addcc   %o3,%o5,%o3
361
        b       9f
362
        add     %o2, (-1*2-1), %o2
363
 
364
L3.15:
365
        ! remainder is negative
366
        addcc   %o3,%o5,%o3
367
        ! depth 4, accumulated bits -3
368
        bl      L4.13
369
        srl     %o5,1,%o5
370
        ! remainder is positive
371
        subcc   %o3,%o5,%o3
372
        b       9f
373
        add     %o2, (-3*2+1), %o2
374
 
375
L4.13:
376
        ! remainder is negative
377
        addcc   %o3,%o5,%o3
378
        b       9f
379
        add     %o2, (-3*2-1), %o2
380
 
381
L2.15:
382
        ! remainder is negative
383
        addcc   %o3,%o5,%o3
384
        ! depth 3, accumulated bits -3
385
        bl      L3.13
386
        srl     %o5,1,%o5
387
        ! remainder is positive
388
        subcc   %o3,%o5,%o3
389
        ! depth 4, accumulated bits -5
390
        bl      L4.11
391
        srl     %o5,1,%o5
392
        ! remainder is positive
393
        subcc   %o3,%o5,%o3
394
        b       9f
395
        add     %o2, (-5*2+1), %o2
396
 
397
L4.11:
398
        ! remainder is negative
399
        addcc   %o3,%o5,%o3
400
        b       9f
401
        add     %o2, (-5*2-1), %o2
402
 
403
L3.13:
404
        ! remainder is negative
405
        addcc   %o3,%o5,%o3
406
        ! depth 4, accumulated bits -7
407
        bl      L4.9
408
        srl     %o5,1,%o5
409
        ! remainder is positive
410
        subcc   %o3,%o5,%o3
411
        b       9f
412
        add     %o2, (-7*2+1), %o2
413
 
414
L4.9:
415
        ! remainder is negative
416
        addcc   %o3,%o5,%o3
417
        b       9f
418
        add     %o2, (-7*2-1), %o2
419
 
420
        9:
421
end_regular_divide:
422
        subcc   %o4, 1, %o4
423
        bge     divloop
424
        tst     %o3
425
        bl,a    got_result
426
        ! non-restoring fixup here (one instruction only!)
427
        sub     %o2, 1, %o2
428
 
429
 
430
got_result:
431
        ! check to see if answer should be < 0
432
        tst     %g3
433
        bl,a    1f
434
        sub %g0, %o2, %o2
435
1:
436
        retl
437
        mov %o2, %o0
438
#endif
439
 
440
#ifdef L_modsi3
441
/* This implementation was taken from glibc:
442
 *
443
 * Input: dividend and divisor in %o0 and %o1 respectively.
444
 *
445
 * Algorithm parameters:
446
 *  N           how many bits per iteration we try to get (4)
447
 *  WORDSIZE    total number of bits (32)
448
 *
449
 * Derived constants:
450
 *  TOPBITS     number of bits in the top decade of a number
451
 *
452
 * Important variables:
453
 *  Q           the partial quotient under development (initially 0)
454
 *  R           the remainder so far, initially the dividend
455
 *  ITER        number of main division loop iterations required;
456
 *              equal to ceil(log2(quotient) / N).  Note that this
457
 *              is the log base (2^N) of the quotient.
458
 *  V           the current comparand, initially divisor*2^(ITER*N-1)
459
 *
460
 * Cost:
461
 *  Current estimate for non-large dividend is
462
 *      ceil(log2(quotient) / N) * (10 + 7N/2) + C
463
 *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
464
 *  different path, as the upper bits of the quotient must be developed
465
 *  one bit at a time.
466
 */
467
.text
468
        .align 4
469
        .global .urem
470
        .proc 4
471
.urem:
472
        b       divide
473
        mov     0, %g3          ! result always positive
474
 
475
        .align 4
476
        .global .rem
477
        .proc 4
478
.rem:
479
        ! compute sign of result; if neither is negative, no problem
480
        orcc    %o1, %o0, %g0   ! either negative?
481
        bge     2f                      ! no, go do the divide
482
        mov     %o0, %g3                ! sign of remainder matches %o0
483
        tst     %o1
484
        bge     1f
485
        tst     %o0
486
        ! %o1 is definitely negative; %o0 might also be negative
487
        bge     2f                      ! if %o0 not negative...
488
        sub     %g0, %o1, %o1   ! in any case, make %o1 nonneg
489
1:      ! %o0 is negative, %o1 is nonnegative
490
        sub     %g0, %o0, %o0   ! make %o0 nonnegative
491
2:
492
 
493
        ! Ready to divide.  Compute size of quotient; scale comparand.
494
divide:
495
        orcc    %o1, %g0, %o5
496
        bne     1f
497
        mov     %o0, %o3
498
 
499
                ! Divide by zero trap.  If it returns, return 0 (about as
500
                ! wrong as possible, but that is what SunOS does...).
501
                ta      0x2   !ST_DIV0
502
                retl
503
                clr     %o0
504
 
505
1:
506
        cmp     %o3, %o5                ! if %o1 exceeds %o0, done
507
        blu     got_result              ! (and algorithm fails otherwise)
508
        clr     %o2
509
        sethi   %hi(1 << (32 - 4 - 1)), %g1
510
        cmp     %o3, %g1
511
        blu     not_really_big
512
        clr     %o4
513
 
514
        ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
515
        ! as our usual N-at-a-shot divide step will cause overflow and havoc.
516
        ! The number of bits in the result here is N*ITER+SC, where SC <= N.
517
        ! Compute ITER in an unorthodox manner: know we need to shift V into
518
        ! the top decade: so do not even bother to compare to R.
519
        1:
520
                cmp     %o5, %g1
521
                bgeu    3f
522
                mov     1, %g2
523
                sll     %o5, 4, %o5
524
                b       1b
525
                add     %o4, 1, %o4
526
 
527
        ! Now compute %g2.
528
        2:      addcc   %o5, %o5, %o5
529
                bcc     not_too_big
530
                add     %g2, 1, %g2
531
 
532
                ! We get here if the %o1 overflowed while shifting.
533
                ! This means that %o3 has the high-order bit set.
534
                ! Restore %o5 and subtract from %o3.
535
                sll     %g1, 4, %g1     ! high order bit
536
                srl     %o5, 1, %o5             ! rest of %o5
537
                add     %o5, %g1, %o5
538
                b       do_single_div
539
                sub     %g2, 1, %g2
540
 
541
        not_too_big:
542
        3:      cmp     %o5, %o3
543
                blu     2b
544
                nop
545
                be      do_single_div
546
                nop
547
        /* NB: these are commented out in the V8-SPARC manual as well */
548
        /* (I do not understand this) */
549
        ! %o5 > %o3: went too far: back up 1 step
550
        !       srl     %o5, 1, %o5
551
        !       dec     %g2
552
        ! do single-bit divide steps
553
        !
554
        ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
555
        ! first divide step without thinking.  BUT, the others are conditional,
556
        ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
557
        ! order bit set in the first step, just falling into the regular
558
        ! division loop will mess up the first time around.
559
        ! So we unroll slightly...
560
        do_single_div:
561
                subcc   %g2, 1, %g2
562
                bl      end_regular_divide
563
                nop
564
                sub     %o3, %o5, %o3
565
                mov     1, %o2
566
                b       end_single_divloop
567
                nop
568
        single_divloop:
569
                sll     %o2, 1, %o2
570
                bl      1f
571
                srl     %o5, 1, %o5
572
                ! %o3 >= 0
573
                sub     %o3, %o5, %o3
574
                b       2f
575
                add     %o2, 1, %o2
576
        1:      ! %o3 < 0
577
                add     %o3, %o5, %o3
578
                sub     %o2, 1, %o2
579
        2:
580
        end_single_divloop:
581
                subcc   %g2, 1, %g2
582
                bge     single_divloop
583
                tst     %o3
584
                b,a     end_regular_divide
585
 
586
not_really_big:
587
1:
588
        sll     %o5, 4, %o5
589
        cmp     %o5, %o3
590
        bleu    1b
591
        addcc   %o4, 1, %o4
592
        be      got_result
593
        sub     %o4, 1, %o4
594
 
595
        tst     %o3     ! set up for initial iteration
596
divloop:
597
        sll     %o2, 4, %o2
598
                ! depth 1, accumulated bits 0
599
        bl      L1.16
600
        srl     %o5,1,%o5
601
        ! remainder is positive
602
        subcc   %o3,%o5,%o3
603
        ! depth 2, accumulated bits 1
604
        bl      L2.17
605
        srl     %o5,1,%o5
606
        ! remainder is positive
607
        subcc   %o3,%o5,%o3
608
        ! depth 3, accumulated bits 3
609
        bl      L3.19
610
        srl     %o5,1,%o5
611
        ! remainder is positive
612
        subcc   %o3,%o5,%o3
613
        ! depth 4, accumulated bits 7
614
        bl      L4.23
615
        srl     %o5,1,%o5
616
        ! remainder is positive
617
        subcc   %o3,%o5,%o3
618
        b       9f
619
        add     %o2, (7*2+1), %o2
620
L4.23:
621
        ! remainder is negative
622
        addcc   %o3,%o5,%o3
623
        b       9f
624
        add     %o2, (7*2-1), %o2
625
 
626
L3.19:
627
        ! remainder is negative
628
        addcc   %o3,%o5,%o3
629
        ! depth 4, accumulated bits 5
630
        bl      L4.21
631
        srl     %o5,1,%o5
632
        ! remainder is positive
633
        subcc   %o3,%o5,%o3
634
        b       9f
635
        add     %o2, (5*2+1), %o2
636
 
637
L4.21:
638
        ! remainder is negative
639
        addcc   %o3,%o5,%o3
640
        b       9f
641
        add     %o2, (5*2-1), %o2
642
 
643
L2.17:
644
        ! remainder is negative
645
        addcc   %o3,%o5,%o3
646
        ! depth 3, accumulated bits 1
647
        bl      L3.17
648
        srl     %o5,1,%o5
649
        ! remainder is positive
650
        subcc   %o3,%o5,%o3
651
        ! depth 4, accumulated bits 3
652
        bl      L4.19
653
        srl     %o5,1,%o5
654
        ! remainder is positive
655
        subcc   %o3,%o5,%o3
656
        b       9f
657
        add     %o2, (3*2+1), %o2
658
 
659
L4.19:
660
        ! remainder is negative
661
        addcc   %o3,%o5,%o3
662
        b       9f
663
        add     %o2, (3*2-1), %o2
664
 
665
L3.17:
666
        ! remainder is negative
667
        addcc   %o3,%o5,%o3
668
        ! depth 4, accumulated bits 1
669
        bl      L4.17
670
        srl     %o5,1,%o5
671
        ! remainder is positive
672
        subcc   %o3,%o5,%o3
673
        b       9f
674
        add     %o2, (1*2+1), %o2
675
 
676
L4.17:
677
        ! remainder is negative
678
        addcc   %o3,%o5,%o3
679
        b       9f
680
        add     %o2, (1*2-1), %o2
681
 
682
L1.16:
683
        ! remainder is negative
684
        addcc   %o3,%o5,%o3
685
        ! depth 2, accumulated bits -1
686
        bl      L2.15
687
        srl     %o5,1,%o5
688
        ! remainder is positive
689
        subcc   %o3,%o5,%o3
690
        ! depth 3, accumulated bits -1
691
        bl      L3.15
692
        srl     %o5,1,%o5
693
        ! remainder is positive
694
        subcc   %o3,%o5,%o3
695
        ! depth 4, accumulated bits -1
696
        bl      L4.15
697
        srl     %o5,1,%o5
698
        ! remainder is positive
699
        subcc   %o3,%o5,%o3
700
        b       9f
701
        add     %o2, (-1*2+1), %o2
702
 
703
L4.15:
704
        ! remainder is negative
705
        addcc   %o3,%o5,%o3
706
        b       9f
707
        add     %o2, (-1*2-1), %o2
708
 
709
L3.15:
710
        ! remainder is negative
711
        addcc   %o3,%o5,%o3
712
        ! depth 4, accumulated bits -3
713
        bl      L4.13
714
        srl     %o5,1,%o5
715
        ! remainder is positive
716
        subcc   %o3,%o5,%o3
717
        b       9f
718
        add     %o2, (-3*2+1), %o2
719
 
720
L4.13:
721
        ! remainder is negative
722
        addcc   %o3,%o5,%o3
723
        b       9f
724
        add     %o2, (-3*2-1), %o2
725
 
726
L2.15:
727
        ! remainder is negative
728
        addcc   %o3,%o5,%o3
729
        ! depth 3, accumulated bits -3
730
        bl      L3.13
731
        srl     %o5,1,%o5
732
        ! remainder is positive
733
        subcc   %o3,%o5,%o3
734
        ! depth 4, accumulated bits -5
735
        bl      L4.11
736
        srl     %o5,1,%o5
737
        ! remainder is positive
738
        subcc   %o3,%o5,%o3
739
        b       9f
740
        add     %o2, (-5*2+1), %o2
741
 
742
L4.11:
743
        ! remainder is negative
744
        addcc   %o3,%o5,%o3
745
        b       9f
746
        add     %o2, (-5*2-1), %o2
747
 
748
L3.13:
749
        ! remainder is negative
750
        addcc   %o3,%o5,%o3
751
        ! depth 4, accumulated bits -7
752
        bl      L4.9
753
        srl     %o5,1,%o5
754
        ! remainder is positive
755
        subcc   %o3,%o5,%o3
756
        b       9f
757
        add     %o2, (-7*2+1), %o2
758
 
759
L4.9:
760
        ! remainder is negative
761
        addcc   %o3,%o5,%o3
762
        b       9f
763
        add     %o2, (-7*2-1), %o2
764
 
765
        9:
766
end_regular_divide:
767
        subcc   %o4, 1, %o4
768
        bge     divloop
769
        tst     %o3
770
        bl,a    got_result
771
        ! non-restoring fixup here (one instruction only!)
772
        add     %o3, %o1, %o3
773
 
774
got_result:
775
        ! check to see if answer should be < 0
776
        tst     %g3
777
        bl,a    1f
778
        sub %g0, %o3, %o3
779
1:
780
        retl
781
        mov %o3, %o0
782
 
783
#endif
784
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.