OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgcc/] [config/] [sh/] [lib1funcs.S] - Blame information for rev 734

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 734 jeremybenn
/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
2
   2004, 2005, 2006, 2009
3
   Free Software Foundation, Inc.
4
 
5
This file is free software; you can redistribute it and/or modify it
6
under the terms of the GNU General Public License as published by the
7
Free Software Foundation; either version 3, or (at your option) any
8
later version.
9
 
10
This file is distributed in the hope that it will be useful, but
11
WITHOUT ANY WARRANTY; without even the implied warranty of
12
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
General Public License for more details.
14
 
15
Under Section 7 of GPL version 3, you are granted additional
16
permissions described in the GCC Runtime Library Exception, version
17
3.1, as published by the Free Software Foundation.
18
 
19
You should have received a copy of the GNU General Public License and
20
a copy of the GCC Runtime Library Exception along with this program;
21
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22
.  */
23
 
24
 
25
!! libgcc routines for the Renesas / SuperH SH CPUs.
26
!! Contributed by Steve Chamberlain.
27
!! sac@cygnus.com
28
 
29
!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
30
!! recoded in assembly by Toshiyasu Morita
31
!! tm@netcom.com
32
 
33
#if defined(__ELF__) && defined(__linux__)
34
.section .note.GNU-stack,"",%progbits
35
.previous
36
#endif
37
 
38
/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
39
   ELF local label prefixes by J"orn Rennecke
40
   amylaar@cygnus.com  */
41
 
42
#include "lib1funcs.h"
43
 
44
/* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
45
   so it is more convenient to define NO_FPSCR_VALUES here than to
46
   define it on the command line.  */
47
#if defined __vxworks && defined __PIC__
48
#define NO_FPSCR_VALUES
49
#endif
50
 
51
#if ! __SH5__
52
#ifdef L_ashiftrt
53
        .global GLOBAL(ashiftrt_r4_0)
54
        .global GLOBAL(ashiftrt_r4_1)
55
        .global GLOBAL(ashiftrt_r4_2)
56
        .global GLOBAL(ashiftrt_r4_3)
57
        .global GLOBAL(ashiftrt_r4_4)
58
        .global GLOBAL(ashiftrt_r4_5)
59
        .global GLOBAL(ashiftrt_r4_6)
60
        .global GLOBAL(ashiftrt_r4_7)
61
        .global GLOBAL(ashiftrt_r4_8)
62
        .global GLOBAL(ashiftrt_r4_9)
63
        .global GLOBAL(ashiftrt_r4_10)
64
        .global GLOBAL(ashiftrt_r4_11)
65
        .global GLOBAL(ashiftrt_r4_12)
66
        .global GLOBAL(ashiftrt_r4_13)
67
        .global GLOBAL(ashiftrt_r4_14)
68
        .global GLOBAL(ashiftrt_r4_15)
69
        .global GLOBAL(ashiftrt_r4_16)
70
        .global GLOBAL(ashiftrt_r4_17)
71
        .global GLOBAL(ashiftrt_r4_18)
72
        .global GLOBAL(ashiftrt_r4_19)
73
        .global GLOBAL(ashiftrt_r4_20)
74
        .global GLOBAL(ashiftrt_r4_21)
75
        .global GLOBAL(ashiftrt_r4_22)
76
        .global GLOBAL(ashiftrt_r4_23)
77
        .global GLOBAL(ashiftrt_r4_24)
78
        .global GLOBAL(ashiftrt_r4_25)
79
        .global GLOBAL(ashiftrt_r4_26)
80
        .global GLOBAL(ashiftrt_r4_27)
81
        .global GLOBAL(ashiftrt_r4_28)
82
        .global GLOBAL(ashiftrt_r4_29)
83
        .global GLOBAL(ashiftrt_r4_30)
84
        .global GLOBAL(ashiftrt_r4_31)
85
        .global GLOBAL(ashiftrt_r4_32)
86
 
87
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
88
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
89
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
90
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
91
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
92
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
93
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
94
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
95
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
96
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
97
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
98
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
99
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
100
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
101
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
102
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
103
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
104
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
105
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
106
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
107
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
108
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
109
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
110
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
111
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
112
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
113
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
114
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
115
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
116
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
117
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
118
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
119
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
120
 
121
        .align  1
122
GLOBAL(ashiftrt_r4_32):
123
GLOBAL(ashiftrt_r4_31):
124
        rotcl   r4
125
        rts
126
        subc    r4,r4
127
 
128
GLOBAL(ashiftrt_r4_30):
129
        shar    r4
130
GLOBAL(ashiftrt_r4_29):
131
        shar    r4
132
GLOBAL(ashiftrt_r4_28):
133
        shar    r4
134
GLOBAL(ashiftrt_r4_27):
135
        shar    r4
136
GLOBAL(ashiftrt_r4_26):
137
        shar    r4
138
GLOBAL(ashiftrt_r4_25):
139
        shar    r4
140
GLOBAL(ashiftrt_r4_24):
141
        shlr16  r4
142
        shlr8   r4
143
        rts
144
        exts.b  r4,r4
145
 
146
GLOBAL(ashiftrt_r4_23):
147
        shar    r4
148
GLOBAL(ashiftrt_r4_22):
149
        shar    r4
150
GLOBAL(ashiftrt_r4_21):
151
        shar    r4
152
GLOBAL(ashiftrt_r4_20):
153
        shar    r4
154
GLOBAL(ashiftrt_r4_19):
155
        shar    r4
156
GLOBAL(ashiftrt_r4_18):
157
        shar    r4
158
GLOBAL(ashiftrt_r4_17):
159
        shar    r4
160
GLOBAL(ashiftrt_r4_16):
161
        shlr16  r4
162
        rts
163
        exts.w  r4,r4
164
 
165
GLOBAL(ashiftrt_r4_15):
166
        shar    r4
167
GLOBAL(ashiftrt_r4_14):
168
        shar    r4
169
GLOBAL(ashiftrt_r4_13):
170
        shar    r4
171
GLOBAL(ashiftrt_r4_12):
172
        shar    r4
173
GLOBAL(ashiftrt_r4_11):
174
        shar    r4
175
GLOBAL(ashiftrt_r4_10):
176
        shar    r4
177
GLOBAL(ashiftrt_r4_9):
178
        shar    r4
179
GLOBAL(ashiftrt_r4_8):
180
        shar    r4
181
GLOBAL(ashiftrt_r4_7):
182
        shar    r4
183
GLOBAL(ashiftrt_r4_6):
184
        shar    r4
185
GLOBAL(ashiftrt_r4_5):
186
        shar    r4
187
GLOBAL(ashiftrt_r4_4):
188
        shar    r4
189
GLOBAL(ashiftrt_r4_3):
190
        shar    r4
191
GLOBAL(ashiftrt_r4_2):
192
        shar    r4
193
GLOBAL(ashiftrt_r4_1):
194
        rts
195
        shar    r4
196
 
197
GLOBAL(ashiftrt_r4_0):
198
        rts
199
        nop
200
 
201
        ENDFUNC(GLOBAL(ashiftrt_r4_0))
202
        ENDFUNC(GLOBAL(ashiftrt_r4_1))
203
        ENDFUNC(GLOBAL(ashiftrt_r4_2))
204
        ENDFUNC(GLOBAL(ashiftrt_r4_3))
205
        ENDFUNC(GLOBAL(ashiftrt_r4_4))
206
        ENDFUNC(GLOBAL(ashiftrt_r4_5))
207
        ENDFUNC(GLOBAL(ashiftrt_r4_6))
208
        ENDFUNC(GLOBAL(ashiftrt_r4_7))
209
        ENDFUNC(GLOBAL(ashiftrt_r4_8))
210
        ENDFUNC(GLOBAL(ashiftrt_r4_9))
211
        ENDFUNC(GLOBAL(ashiftrt_r4_10))
212
        ENDFUNC(GLOBAL(ashiftrt_r4_11))
213
        ENDFUNC(GLOBAL(ashiftrt_r4_12))
214
        ENDFUNC(GLOBAL(ashiftrt_r4_13))
215
        ENDFUNC(GLOBAL(ashiftrt_r4_14))
216
        ENDFUNC(GLOBAL(ashiftrt_r4_15))
217
        ENDFUNC(GLOBAL(ashiftrt_r4_16))
218
        ENDFUNC(GLOBAL(ashiftrt_r4_17))
219
        ENDFUNC(GLOBAL(ashiftrt_r4_18))
220
        ENDFUNC(GLOBAL(ashiftrt_r4_19))
221
        ENDFUNC(GLOBAL(ashiftrt_r4_20))
222
        ENDFUNC(GLOBAL(ashiftrt_r4_21))
223
        ENDFUNC(GLOBAL(ashiftrt_r4_22))
224
        ENDFUNC(GLOBAL(ashiftrt_r4_23))
225
        ENDFUNC(GLOBAL(ashiftrt_r4_24))
226
        ENDFUNC(GLOBAL(ashiftrt_r4_25))
227
        ENDFUNC(GLOBAL(ashiftrt_r4_26))
228
        ENDFUNC(GLOBAL(ashiftrt_r4_27))
229
        ENDFUNC(GLOBAL(ashiftrt_r4_28))
230
        ENDFUNC(GLOBAL(ashiftrt_r4_29))
231
        ENDFUNC(GLOBAL(ashiftrt_r4_30))
232
        ENDFUNC(GLOBAL(ashiftrt_r4_31))
233
        ENDFUNC(GLOBAL(ashiftrt_r4_32))
234
#endif
235
 
236
#ifdef L_ashiftrt_n
237
 
238
!
239
! GLOBAL(ashrsi3)
240
!
241
! Entry:
242
!
243
! r4: Value to shift
244
! r5: Shifts
245
!
246
! Exit:
247
!
248
! r0: Result
249
!
250
! Destroys:
251
!
252
! (none)
253
!
254
 
255
        .global GLOBAL(ashrsi3)
256
        HIDDEN_FUNC(GLOBAL(ashrsi3))
257
        .align  2
258
GLOBAL(ashrsi3):
259
        mov     #31,r0
260
        and     r0,r5
261
        mova    LOCAL(ashrsi3_table),r0
262
        mov.b   @(r0,r5),r5
263
#ifdef __sh1__
264
        add     r5,r0
265
        jmp     @r0
266
#else
267
        braf    r5
268
#endif
269
        mov     r4,r0
270
 
271
        .align  2
272
LOCAL(ashrsi3_table):
273
        .byte           LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
274
        .byte           LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
275
        .byte           LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
276
        .byte           LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
277
        .byte           LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
278
        .byte           LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
279
        .byte           LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
280
        .byte           LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
281
        .byte           LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
282
        .byte           LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
283
        .byte           LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
284
        .byte           LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
285
        .byte           LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
286
        .byte           LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
287
        .byte           LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
288
        .byte           LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
289
        .byte           LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
290
        .byte           LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
291
        .byte           LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
292
        .byte           LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
293
        .byte           LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
294
        .byte           LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
295
        .byte           LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
296
        .byte           LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
297
        .byte           LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
298
        .byte           LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
299
        .byte           LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
300
        .byte           LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
301
        .byte           LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
302
        .byte           LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
303
        .byte           LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
304
        .byte           LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
305
 
306
LOCAL(ashrsi3_31):
307
        rotcl   r0
308
        rts
309
        subc    r0,r0
310
 
311
LOCAL(ashrsi3_30):
312
        shar    r0
313
LOCAL(ashrsi3_29):
314
        shar    r0
315
LOCAL(ashrsi3_28):
316
        shar    r0
317
LOCAL(ashrsi3_27):
318
        shar    r0
319
LOCAL(ashrsi3_26):
320
        shar    r0
321
LOCAL(ashrsi3_25):
322
        shar    r0
323
LOCAL(ashrsi3_24):
324
        shlr16  r0
325
        shlr8   r0
326
        rts
327
        exts.b  r0,r0
328
 
329
LOCAL(ashrsi3_23):
330
        shar    r0
331
LOCAL(ashrsi3_22):
332
        shar    r0
333
LOCAL(ashrsi3_21):
334
        shar    r0
335
LOCAL(ashrsi3_20):
336
        shar    r0
337
LOCAL(ashrsi3_19):
338
        shar    r0
339
LOCAL(ashrsi3_18):
340
        shar    r0
341
LOCAL(ashrsi3_17):
342
        shar    r0
343
LOCAL(ashrsi3_16):
344
        shlr16  r0
345
        rts
346
        exts.w  r0,r0
347
 
348
LOCAL(ashrsi3_15):
349
        shar    r0
350
LOCAL(ashrsi3_14):
351
        shar    r0
352
LOCAL(ashrsi3_13):
353
        shar    r0
354
LOCAL(ashrsi3_12):
355
        shar    r0
356
LOCAL(ashrsi3_11):
357
        shar    r0
358
LOCAL(ashrsi3_10):
359
        shar    r0
360
LOCAL(ashrsi3_9):
361
        shar    r0
362
LOCAL(ashrsi3_8):
363
        shar    r0
364
LOCAL(ashrsi3_7):
365
        shar    r0
366
LOCAL(ashrsi3_6):
367
        shar    r0
368
LOCAL(ashrsi3_5):
369
        shar    r0
370
LOCAL(ashrsi3_4):
371
        shar    r0
372
LOCAL(ashrsi3_3):
373
        shar    r0
374
LOCAL(ashrsi3_2):
375
        shar    r0
376
LOCAL(ashrsi3_1):
377
        rts
378
        shar    r0
379
 
380
LOCAL(ashrsi3_0):
381
        rts
382
        nop
383
 
384
        ENDFUNC(GLOBAL(ashrsi3))
385
#endif
386
 
387
#ifdef L_ashiftlt
388
 
389
!
390
! GLOBAL(ashlsi3)
391
!
392
! Entry:
393
!
394
! r4: Value to shift
395
! r5: Shifts
396
!
397
! Exit:
398
!
399
! r0: Result
400
!
401
! Destroys:
402
!
403
! (none)
404
!
405
        .global GLOBAL(ashlsi3)
406
        HIDDEN_FUNC(GLOBAL(ashlsi3))
407
        .align  2
408
GLOBAL(ashlsi3):
409
        mov     #31,r0
410
        and     r0,r5
411
        mova    LOCAL(ashlsi3_table),r0
412
        mov.b   @(r0,r5),r5
413
#ifdef __sh1__
414
        add     r5,r0
415
        jmp     @r0
416
#else
417
        braf    r5
418
#endif
419
        mov     r4,r0
420
 
421
        .align  2
422
LOCAL(ashlsi3_table):
423
        .byte           LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
424
        .byte           LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
425
        .byte           LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
426
        .byte           LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
427
        .byte           LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
428
        .byte           LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
429
        .byte           LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
430
        .byte           LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
431
        .byte           LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
432
        .byte           LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
433
        .byte           LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
434
        .byte           LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
435
        .byte           LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
436
        .byte           LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
437
        .byte           LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
438
        .byte           LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
439
        .byte           LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
440
        .byte           LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
441
        .byte           LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
442
        .byte           LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
443
        .byte           LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
444
        .byte           LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
445
        .byte           LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
446
        .byte           LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
447
        .byte           LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
448
        .byte           LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
449
        .byte           LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
450
        .byte           LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
451
        .byte           LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
452
        .byte           LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
453
        .byte           LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
454
        .byte           LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
455
 
456
LOCAL(ashlsi3_6):
457
        shll2   r0
458
LOCAL(ashlsi3_4):
459
        shll2   r0
460
LOCAL(ashlsi3_2):
461
        rts
462
        shll2   r0
463
 
464
LOCAL(ashlsi3_7):
465
        shll2   r0
466
LOCAL(ashlsi3_5):
467
        shll2   r0
468
LOCAL(ashlsi3_3):
469
        shll2   r0
470
LOCAL(ashlsi3_1):
471
        rts
472
        shll    r0
473
 
474
LOCAL(ashlsi3_14):
475
        shll2   r0
476
LOCAL(ashlsi3_12):
477
        shll2   r0
478
LOCAL(ashlsi3_10):
479
        shll2   r0
480
LOCAL(ashlsi3_8):
481
        rts
482
        shll8   r0
483
 
484
LOCAL(ashlsi3_15):
485
        shll2   r0
486
LOCAL(ashlsi3_13):
487
        shll2   r0
488
LOCAL(ashlsi3_11):
489
        shll2   r0
490
LOCAL(ashlsi3_9):
491
        shll8   r0
492
        rts
493
        shll    r0
494
 
495
LOCAL(ashlsi3_22):
496
        shll2   r0
497
LOCAL(ashlsi3_20):
498
        shll2   r0
499
LOCAL(ashlsi3_18):
500
        shll2   r0
501
LOCAL(ashlsi3_16):
502
        rts
503
        shll16  r0
504
 
505
LOCAL(ashlsi3_23):
506
        shll2   r0
507
LOCAL(ashlsi3_21):
508
        shll2   r0
509
LOCAL(ashlsi3_19):
510
        shll2   r0
511
LOCAL(ashlsi3_17):
512
        shll16  r0
513
        rts
514
        shll    r0
515
 
516
LOCAL(ashlsi3_30):
517
        shll2   r0
518
LOCAL(ashlsi3_28):
519
        shll2   r0
520
LOCAL(ashlsi3_26):
521
        shll2   r0
522
LOCAL(ashlsi3_24):
523
        shll16  r0
524
        rts
525
        shll8   r0
526
 
527
LOCAL(ashlsi3_31):
528
        shll2   r0
529
LOCAL(ashlsi3_29):
530
        shll2   r0
531
LOCAL(ashlsi3_27):
532
        shll2   r0
533
LOCAL(ashlsi3_25):
534
        shll16  r0
535
        shll8   r0
536
        rts
537
        shll    r0
538
 
539
LOCAL(ashlsi3_0):
540
        rts
541
        nop
542
 
543
        ENDFUNC(GLOBAL(ashlsi3))
544
#endif
545
 
546
#ifdef L_lshiftrt
547
 
548
!
549
! GLOBAL(lshrsi3)
550
!
551
! Entry:
552
!
553
! r4: Value to shift
554
! r5: Shifts
555
!
556
! Exit:
557
!
558
! r0: Result
559
!
560
! Destroys:
561
!
562
! (none)
563
!
564
        .global GLOBAL(lshrsi3)
565
        HIDDEN_FUNC(GLOBAL(lshrsi3))
566
        .align  2
567
GLOBAL(lshrsi3):
568
        mov     #31,r0
569
        and     r0,r5
570
        mova    LOCAL(lshrsi3_table),r0
571
        mov.b   @(r0,r5),r5
572
#ifdef __sh1__
573
        add     r5,r0
574
        jmp     @r0
575
#else
576
        braf    r5
577
#endif
578
        mov     r4,r0
579
 
580
        .align  2
581
LOCAL(lshrsi3_table):
582
        .byte           LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
583
        .byte           LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
584
        .byte           LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
585
        .byte           LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
586
        .byte           LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
587
        .byte           LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
588
        .byte           LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
589
        .byte           LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
590
        .byte           LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
591
        .byte           LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
592
        .byte           LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
593
        .byte           LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
594
        .byte           LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
595
        .byte           LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
596
        .byte           LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
597
        .byte           LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
598
        .byte           LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
599
        .byte           LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
600
        .byte           LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
601
        .byte           LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
602
        .byte           LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
603
        .byte           LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
604
        .byte           LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
605
        .byte           LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
606
        .byte           LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
607
        .byte           LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
608
        .byte           LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
609
        .byte           LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
610
        .byte           LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
611
        .byte           LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
612
        .byte           LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
613
        .byte           LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
614
 
615
LOCAL(lshrsi3_6):
616
        shlr2   r0
617
LOCAL(lshrsi3_4):
618
        shlr2   r0
619
LOCAL(lshrsi3_2):
620
        rts
621
        shlr2   r0
622
 
623
LOCAL(lshrsi3_7):
624
        shlr2   r0
625
LOCAL(lshrsi3_5):
626
        shlr2   r0
627
LOCAL(lshrsi3_3):
628
        shlr2   r0
629
LOCAL(lshrsi3_1):
630
        rts
631
        shlr    r0
632
 
633
LOCAL(lshrsi3_14):
634
        shlr2   r0
635
LOCAL(lshrsi3_12):
636
        shlr2   r0
637
LOCAL(lshrsi3_10):
638
        shlr2   r0
639
LOCAL(lshrsi3_8):
640
        rts
641
        shlr8   r0
642
 
643
LOCAL(lshrsi3_15):
644
        shlr2   r0
645
LOCAL(lshrsi3_13):
646
        shlr2   r0
647
LOCAL(lshrsi3_11):
648
        shlr2   r0
649
LOCAL(lshrsi3_9):
650
        shlr8   r0
651
        rts
652
        shlr    r0
653
 
654
LOCAL(lshrsi3_22):
655
        shlr2   r0
656
LOCAL(lshrsi3_20):
657
        shlr2   r0
658
LOCAL(lshrsi3_18):
659
        shlr2   r0
660
LOCAL(lshrsi3_16):
661
        rts
662
        shlr16  r0
663
 
664
LOCAL(lshrsi3_23):
665
        shlr2   r0
666
LOCAL(lshrsi3_21):
667
        shlr2   r0
668
LOCAL(lshrsi3_19):
669
        shlr2   r0
670
LOCAL(lshrsi3_17):
671
        shlr16  r0
672
        rts
673
        shlr    r0
674
 
675
LOCAL(lshrsi3_30):
676
        shlr2   r0
677
LOCAL(lshrsi3_28):
678
        shlr2   r0
679
LOCAL(lshrsi3_26):
680
        shlr2   r0
681
LOCAL(lshrsi3_24):
682
        shlr16  r0
683
        rts
684
        shlr8   r0
685
 
686
LOCAL(lshrsi3_31):
687
        shlr2   r0
688
LOCAL(lshrsi3_29):
689
        shlr2   r0
690
LOCAL(lshrsi3_27):
691
        shlr2   r0
692
LOCAL(lshrsi3_25):
693
        shlr16  r0
694
        shlr8   r0
695
        rts
696
        shlr    r0
697
 
698
LOCAL(lshrsi3_0):
699
        rts
700
        nop
701
 
702
        ENDFUNC(GLOBAL(lshrsi3))
703
#endif
704
 
705
#ifdef L_movmem
706
        .text
707
        .balign 4
708
        .global GLOBAL(movmem)
709
        HIDDEN_FUNC(GLOBAL(movmem))
710
        HIDDEN_ALIAS(movstr,movmem)
711
        /* This would be a lot simpler if r6 contained the byte count
712
           minus 64, and we wouldn't be called here for a byte count of 64.  */
713
GLOBAL(movmem):
714
        sts.l   pr,@-r15
715
        shll2   r6
716
        bsr     GLOBAL(movmemSI52+2)
717
        mov.l   @(48,r5),r0
718
        .balign 4
719
LOCAL(movmem_loop): /* Reached with rts */
720
        mov.l   @(60,r5),r0
721
        add     #-64,r6
722
        mov.l   r0,@(60,r4)
723
        tst     r6,r6
724
        mov.l   @(56,r5),r0
725
        bt      LOCAL(movmem_done)
726
        mov.l   r0,@(56,r4)
727
        cmp/pl  r6
728
        mov.l   @(52,r5),r0
729
        add     #64,r5
730
        mov.l   r0,@(52,r4)
731
        add     #64,r4
732
        bt      GLOBAL(movmemSI52)
733
! done all the large groups, do the remainder
734
! jump to movmem+
735
        mova    GLOBAL(movmemSI4)+4,r0
736
        add     r6,r0
737
        jmp     @r0
738
LOCAL(movmem_done): ! share slot insn, works out aligned.
739
        lds.l   @r15+,pr
740
        mov.l   r0,@(56,r4)
741
        mov.l   @(52,r5),r0
742
        rts
743
        mov.l   r0,@(52,r4)
744
        .balign 4
745
! ??? We need aliases movstr* for movmem* for the older libraries.  These
746
! aliases will be removed at the some point in the future.
747
        .global GLOBAL(movmemSI64)
748
        HIDDEN_FUNC(GLOBAL(movmemSI64))
749
        HIDDEN_ALIAS(movstrSI64,movmemSI64)
750
GLOBAL(movmemSI64):
751
        mov.l   @(60,r5),r0
752
        mov.l   r0,@(60,r4)
753
        .global GLOBAL(movmemSI60)
754
        HIDDEN_FUNC(GLOBAL(movmemSI60))
755
        HIDDEN_ALIAS(movstrSI60,movmemSI60)
756
GLOBAL(movmemSI60):
757
        mov.l   @(56,r5),r0
758
        mov.l   r0,@(56,r4)
759
        .global GLOBAL(movmemSI56)
760
        HIDDEN_FUNC(GLOBAL(movmemSI56))
761
        HIDDEN_ALIAS(movstrSI56,movmemSI56)
762
GLOBAL(movmemSI56):
763
        mov.l   @(52,r5),r0
764
        mov.l   r0,@(52,r4)
765
        .global GLOBAL(movmemSI52)
766
        HIDDEN_FUNC(GLOBAL(movmemSI52))
767
        HIDDEN_ALIAS(movstrSI52,movmemSI52)
768
GLOBAL(movmemSI52):
769
        mov.l   @(48,r5),r0
770
        mov.l   r0,@(48,r4)
771
        .global GLOBAL(movmemSI48)
772
        HIDDEN_FUNC(GLOBAL(movmemSI48))
773
        HIDDEN_ALIAS(movstrSI48,movmemSI48)
774
GLOBAL(movmemSI48):
775
        mov.l   @(44,r5),r0
776
        mov.l   r0,@(44,r4)
777
        .global GLOBAL(movmemSI44)
778
        HIDDEN_FUNC(GLOBAL(movmemSI44))
779
        HIDDEN_ALIAS(movstrSI44,movmemSI44)
780
GLOBAL(movmemSI44):
781
        mov.l   @(40,r5),r0
782
        mov.l   r0,@(40,r4)
783
        .global GLOBAL(movmemSI40)
784
        HIDDEN_FUNC(GLOBAL(movmemSI40))
785
        HIDDEN_ALIAS(movstrSI40,movmemSI40)
786
GLOBAL(movmemSI40):
787
        mov.l   @(36,r5),r0
788
        mov.l   r0,@(36,r4)
789
        .global GLOBAL(movmemSI36)
790
        HIDDEN_FUNC(GLOBAL(movmemSI36))
791
        HIDDEN_ALIAS(movstrSI36,movmemSI36)
792
GLOBAL(movmemSI36):
793
        mov.l   @(32,r5),r0
794
        mov.l   r0,@(32,r4)
795
        .global GLOBAL(movmemSI32)
796
        HIDDEN_FUNC(GLOBAL(movmemSI32))
797
        HIDDEN_ALIAS(movstrSI32,movmemSI32)
798
GLOBAL(movmemSI32):
799
        mov.l   @(28,r5),r0
800
        mov.l   r0,@(28,r4)
801
        .global GLOBAL(movmemSI28)
802
        HIDDEN_FUNC(GLOBAL(movmemSI28))
803
        HIDDEN_ALIAS(movstrSI28,movmemSI28)
804
GLOBAL(movmemSI28):
805
        mov.l   @(24,r5),r0
806
        mov.l   r0,@(24,r4)
807
        .global GLOBAL(movmemSI24)
808
        HIDDEN_FUNC(GLOBAL(movmemSI24))
809
        HIDDEN_ALIAS(movstrSI24,movmemSI24)
810
GLOBAL(movmemSI24):
811
        mov.l   @(20,r5),r0
812
        mov.l   r0,@(20,r4)
813
        .global GLOBAL(movmemSI20)
814
        HIDDEN_FUNC(GLOBAL(movmemSI20))
815
        HIDDEN_ALIAS(movstrSI20,movmemSI20)
816
GLOBAL(movmemSI20):
817
        mov.l   @(16,r5),r0
818
        mov.l   r0,@(16,r4)
819
        .global GLOBAL(movmemSI16)
820
        HIDDEN_FUNC(GLOBAL(movmemSI16))
821
        HIDDEN_ALIAS(movstrSI16,movmemSI16)
822
GLOBAL(movmemSI16):
823
        mov.l   @(12,r5),r0
824
        mov.l   r0,@(12,r4)
825
        .global GLOBAL(movmemSI12)
826
        HIDDEN_FUNC(GLOBAL(movmemSI12))
827
        HIDDEN_ALIAS(movstrSI12,movmemSI12)
828
GLOBAL(movmemSI12):
829
        mov.l   @(8,r5),r0
830
        mov.l   r0,@(8,r4)
831
        .global GLOBAL(movmemSI8)
832
        HIDDEN_FUNC(GLOBAL(movmemSI8))
833
        HIDDEN_ALIAS(movstrSI8,movmemSI8)
834
GLOBAL(movmemSI8):
835
        mov.l   @(4,r5),r0
836
        mov.l   r0,@(4,r4)
837
        .global GLOBAL(movmemSI4)
838
        HIDDEN_FUNC(GLOBAL(movmemSI4))
839
        HIDDEN_ALIAS(movstrSI4,movmemSI4)
840
GLOBAL(movmemSI4):
841
        mov.l   @(0,r5),r0
842
        rts
843
        mov.l   r0,@(0,r4)
844
 
845
        ENDFUNC(GLOBAL(movmemSI64))
846
        ENDFUNC(GLOBAL(movmemSI60))
847
        ENDFUNC(GLOBAL(movmemSI56))
848
        ENDFUNC(GLOBAL(movmemSI52))
849
        ENDFUNC(GLOBAL(movmemSI48))
850
        ENDFUNC(GLOBAL(movmemSI44))
851
        ENDFUNC(GLOBAL(movmemSI40))
852
        ENDFUNC(GLOBAL(movmemSI36))
853
        ENDFUNC(GLOBAL(movmemSI32))
854
        ENDFUNC(GLOBAL(movmemSI28))
855
        ENDFUNC(GLOBAL(movmemSI24))
856
        ENDFUNC(GLOBAL(movmemSI20))
857
        ENDFUNC(GLOBAL(movmemSI16))
858
        ENDFUNC(GLOBAL(movmemSI12))
859
        ENDFUNC(GLOBAL(movmemSI8))
860
        ENDFUNC(GLOBAL(movmemSI4))
861
        ENDFUNC(GLOBAL(movmem))
862
#endif
863
 
864
#ifdef L_movmem_i4
865
        .text
866
        .global GLOBAL(movmem_i4_even)
867
        .global GLOBAL(movmem_i4_odd)
868
        .global GLOBAL(movmemSI12_i4)
869
 
870
        HIDDEN_FUNC(GLOBAL(movmem_i4_even))
871
        HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
872
        HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
873
 
874
        HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
875
        HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
876
        HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
877
 
878
        .p2align        5
879
L_movmem_2mod4_end:
880
        mov.l   r0,@(16,r4)
881
        rts
882
        mov.l   r1,@(20,r4)
883
 
884
        .p2align        2
885
 
886
GLOBAL(movmem_i4_even):
887
        mov.l   @r5+,r0
888
        bra     L_movmem_start_even
889
        mov.l   @r5+,r1
890
 
891
GLOBAL(movmem_i4_odd):
892
        mov.l   @r5+,r1
893
        add     #-4,r4
894
        mov.l   @r5+,r2
895
        mov.l   @r5+,r3
896
        mov.l   r1,@(4,r4)
897
        mov.l   r2,@(8,r4)
898
 
899
L_movmem_loop:
900
        mov.l   r3,@(12,r4)
901
        dt      r6
902
        mov.l   @r5+,r0
903
        bt/s    L_movmem_2mod4_end
904
        mov.l   @r5+,r1
905
        add     #16,r4
906
L_movmem_start_even:
907
        mov.l   @r5+,r2
908
        mov.l   @r5+,r3
909
        mov.l   r0,@r4
910
        dt      r6
911
        mov.l   r1,@(4,r4)
912
        bf/s    L_movmem_loop
913
        mov.l   r2,@(8,r4)
914
        rts
915
        mov.l   r3,@(12,r4)
916
 
917
        ENDFUNC(GLOBAL(movmem_i4_even))
918
        ENDFUNC(GLOBAL(movmem_i4_odd))
919
 
920
        .p2align        4
921
GLOBAL(movmemSI12_i4):
922
        mov.l   @r5,r0
923
        mov.l   @(4,r5),r1
924
        mov.l   @(8,r5),r2
925
        mov.l   r0,@r4
926
        mov.l   r1,@(4,r4)
927
        rts
928
        mov.l   r2,@(8,r4)
929
 
930
        ENDFUNC(GLOBAL(movmemSI12_i4))
931
#endif
932
 
933
#ifdef L_mulsi3
934
 
935
 
936
        .global GLOBAL(mulsi3)
937
        HIDDEN_FUNC(GLOBAL(mulsi3))
938
 
939
! r4 =       aabb
940
! r5 =       ccdd
941
! r0 = aabb*ccdd  via partial products
942
!
943
! if aa == 0 and cc = 0
944
! r0 = bb*dd
945
!
946
! else
947
! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
948
!
949
 
950
GLOBAL(mulsi3):
951
        mulu.w  r4,r5           ! multiply the lsws  macl=bb*dd
952
        mov     r5,r3           ! r3 = ccdd
953
        swap.w  r4,r2           ! r2 = bbaa
954
        xtrct   r2,r3           ! r3 = aacc
955
        tst     r3,r3           ! msws zero ?
956
        bf      hiset
957
        rts                     ! yes - then we have the answer
958
        sts     macl,r0
959
 
960
hiset:  sts     macl,r0         ! r0 = bb*dd
961
        mulu.w  r2,r5           ! brewing macl = aa*dd
962
        sts     macl,r1
963
        mulu.w  r3,r4           ! brewing macl = cc*bb
964
        sts     macl,r2
965
        add     r1,r2
966
        shll16  r2
967
        rts
968
        add     r2,r0
969
 
970
        ENDFUNC(GLOBAL(mulsi3))
971
#endif
972
#endif /* ! __SH5__ */
973
#ifdef L_sdivsi3_i4
974
        .title "SH DIVIDE"
975
!! 4 byte integer Divide code for the Renesas SH
976
#ifdef __SH4__
977
!! args in r4 and r5, result in fpul, clobber dr0, dr2
978
 
979
        .global GLOBAL(sdivsi3_i4)
980
        HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
981
GLOBAL(sdivsi3_i4):
982
        lds r4,fpul
983
        float fpul,dr0
984
        lds r5,fpul
985
        float fpul,dr2
986
        fdiv dr2,dr0
987
        rts
988
        ftrc dr0,fpul
989
 
990
        ENDFUNC(GLOBAL(sdivsi3_i4))
991
#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
992
!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
993
 
994
#if ! __SH5__ || __SH5__ == 32
995
#if __SH5__
996
        .mode   SHcompact
997
#endif
998
        .global GLOBAL(sdivsi3_i4)
999
        HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1000
GLOBAL(sdivsi3_i4):
1001
        sts.l fpscr,@-r15
1002
        mov #8,r2
1003
        swap.w r2,r2
1004
        lds r2,fpscr
1005
        lds r4,fpul
1006
        float fpul,dr0
1007
        lds r5,fpul
1008
        float fpul,dr2
1009
        fdiv dr2,dr0
1010
        ftrc dr0,fpul
1011
        rts
1012
        lds.l @r15+,fpscr
1013
 
1014
        ENDFUNC(GLOBAL(sdivsi3_i4))
1015
#endif /* ! __SH5__ || __SH5__ == 32 */
1016
#endif /* ! __SH4__ */
1017
#endif
1018
 
1019
#ifdef L_sdivsi3
1020
/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1021
   sh2e/sh3e code.  */
1022
#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1023
!!
1024
!! Steve Chamberlain
1025
!! sac@cygnus.com
1026
!!
1027
!!
1028
 
1029
!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1030
 
1031
        .global GLOBAL(sdivsi3)
1032
#if __SHMEDIA__
1033
#if __SH5__ == 32
1034
        .section        .text..SHmedia32,"ax"
1035
#else
1036
        .text
1037
#endif
1038
        .align  2
1039
#if 0
1040
/* The assembly code that follows is a hand-optimized version of the C
1041
   code that follows.  Note that the registers that are modified are
1042
   exactly those listed as clobbered in the patterns divsi3_i1 and
1043
   divsi3_i1_media.
1044
 
1045
int __sdivsi3 (i, j)
1046
     int i, j;
1047
{
1048
  register unsigned long long r18 asm ("r18");
1049
  register unsigned long long r19 asm ("r19");
1050
  register unsigned long long r0 asm ("r0") = 0;
1051
  register unsigned long long r1 asm ("r1") = 1;
1052
  register int r2 asm ("r2") = i >> 31;
1053
  register int r3 asm ("r3") = j >> 31;
1054
 
1055
  r2 = r2 ? r2 : r1;
1056
  r3 = r3 ? r3 : r1;
1057
  r18 = i * r2;
1058
  r19 = j * r3;
1059
  r2 *= r3;
1060
 
1061
  r19 <<= 31;
1062
  r1 <<= 31;
1063
  do
1064
    if (r18 >= r19)
1065
      r0 |= r1, r18 -= r19;
1066
  while (r19 >>= 1, r1 >>= 1);
1067
 
1068
  return r2 * (int)r0;
1069
}
1070
*/
1071
GLOBAL(sdivsi3):
1072
        pt/l    LOCAL(sdivsi3_dontadd), tr2
1073
        pt/l    LOCAL(sdivsi3_loop), tr1
1074
        ptabs/l r18, tr0
1075
        movi    0, r0
1076
        movi    1, r1
1077
        shari.l r4, 31, r2
1078
        shari.l r5, 31, r3
1079
        cmveq   r2, r1, r2
1080
        cmveq   r3, r1, r3
1081
        muls.l  r4, r2, r18
1082
        muls.l  r5, r3, r19
1083
        muls.l  r2, r3, r2
1084
        shlli   r19, 31, r19
1085
        shlli   r1, 31, r1
1086
LOCAL(sdivsi3_loop):
1087
        bgtu    r19, r18, tr2
1088
        or      r0, r1, r0
1089
        sub     r18, r19, r18
1090
LOCAL(sdivsi3_dontadd):
1091
        shlri   r1, 1, r1
1092
        shlri   r19, 1, r19
1093
        bnei    r1, 0, tr1
1094
        muls.l  r0, r2, r0
1095
        add.l   r0, r63, r0
1096
        blink   tr0, r63
1097
#elif 0 /* ! 0 */
1098
 // inputs: r4,r5
1099
 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1100
 // result in r0
1101
GLOBAL(sdivsi3):
1102
 // can create absolute value without extra latency,
1103
 // but dependent on proper sign extension of inputs:
1104
 // shari.l r5,31,r2
1105
 // xor r5,r2,r20
1106
 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1107
 shari.l r5,31,r2
1108
 ori r2,1,r2
1109
 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1110
 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1111
 shari.l r4,31,r3
1112
 nsb r20,r0
1113
 shlld r20,r0,r25
1114
 shlri r25,48,r25
1115
 sub r19,r25,r1
1116
 mmulfx.w r1,r1,r2
1117
 mshflo.w r1,r63,r1
1118
 // If r4 was to be used in-place instead of r21, could use this sequence
1119
 // to compute absolute:
1120
 // sub r63,r4,r19 // compute absolute value of r4
1121
 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1122
 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1123
 ori r3,1,r3
1124
 mmulfx.w r25,r2,r2
1125
 sub r19,r0,r0
1126
 muls.l r4,r3,r21
1127
 msub.w r1,r2,r2
1128
 addi r2,-2,r1
1129
 mulu.l r21,r1,r19
1130
 mmulfx.w r2,r2,r2
1131
 shlli r1,15,r1
1132
 shlrd r19,r0,r19
1133
 mulu.l r19,r20,r3
1134
 mmacnfx.wl r25,r2,r1
1135
 ptabs r18,tr0
1136
 sub r21,r3,r25
1137
 
1138
 mulu.l r25,r1,r2
1139
 addi r0,14,r0
1140
 xor r4,r5,r18
1141
 shlrd r2,r0,r2
1142
 mulu.l r2,r20,r3
1143
 add r19,r2,r19
1144
 shari.l r18,31,r18
1145
 sub r25,r3,r25
1146
 
1147
 mulu.l r25,r1,r2
1148
 sub r25,r20,r25
1149
 add r19,r18,r19
1150
 shlrd r2,r0,r2
1151
 mulu.l r2,r20,r3
1152
 addi r25,1,r25
1153
 add r19,r2,r19
1154
 
1155
 cmpgt r25,r3,r25
1156
 add.l r19,r25,r0
1157
 xor r0,r18,r0
1158
 blink tr0,r63
1159
#else /* ! 0 && ! 0 */
1160
 
1161
 // inputs: r4,r5
1162
 // clobbered: r1,r18,r19,r20,r21,r25,tr0
1163
 // result in r0
1164
        HIDDEN_FUNC(GLOBAL(sdivsi3_2))
1165
#ifndef __pic__
1166
        FUNC(GLOBAL(sdivsi3))
1167
GLOBAL(sdivsi3): /* this is the shcompact entry point */
1168
 // The special SHmedia entry point sdivsi3_1 prevents accidental linking
1169
 // with the SHcompact implementation, which clobbers tr1 / tr2.
1170
 .global GLOBAL(sdivsi3_1)
1171
GLOBAL(sdivsi3_1):
1172
 .global GLOBAL(div_table_internal)
1173
 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
1174
 shori GLOBAL(div_table_internal) & 65535, r20
1175
#endif
1176
 .global GLOBAL(sdivsi3_2)
1177
 // div_table in r20
1178
 // clobbered: r1,r18,r19,r21,r25,tr0
1179
GLOBAL(sdivsi3_2):
1180
 nsb r5, r1
1181
 shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
1182
 shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
1183
 ldx.ub r20, r21, r19 // u0.8
1184
 shari r25, 32, r25   // normalize to s2.30
1185
 shlli r21, 1, r21
1186
 muls.l r25, r19, r19 // s2.38
1187
 ldx.w r20, r21, r21  // s2.14
1188
  ptabs r18, tr0
1189
 shari r19, 24, r19   // truncate to s2.14
1190
 sub r21, r19, r19    // some 11 bit inverse in s1.14
1191
 muls.l r19, r19, r21 // u0.28
1192
  sub r63, r1, r1
1193
  addi r1, 92, r1
1194
 muls.l r25, r21, r18 // s2.58
1195
 shlli r19, 45, r19   // multiply by two and convert to s2.58
1196
  /* bubble */
1197
 sub r19, r18, r18
1198
 shari r18, 28, r18   // some 22 bit inverse in s1.30
1199
 muls.l r18, r25, r0  // s2.60
1200
  muls.l r18, r4, r25 // s32.30
1201
  /* bubble */
1202
 shari r0, 16, r19   // s-16.44
1203
 muls.l r19, r18, r19 // s-16.74
1204
  shari r25, 63, r0
1205
  shari r4, 14, r18   // s19.-14
1206
 shari r19, 30, r19   // s-16.44
1207
 muls.l r19, r18, r19 // s15.30
1208
  xor r21, r0, r21    // You could also use the constant 1 << 27.
1209
  add r21, r25, r21
1210
 sub r21, r19, r21
1211
 shard r21, r1, r21
1212
 sub r21, r0, r0
1213
 blink tr0, r63
1214
#ifndef __pic__
1215
        ENDFUNC(GLOBAL(sdivsi3))
1216
#endif
1217
        ENDFUNC(GLOBAL(sdivsi3_2))
1218
#endif
1219
#elif defined __SHMEDIA__
1220
/* m5compact-nofpu */
1221
 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1222
        .mode   SHmedia
1223
        .section        .text..SHmedia32,"ax"
1224
        .align  2
1225
        FUNC(GLOBAL(sdivsi3))
1226
GLOBAL(sdivsi3):
1227
        pt/l LOCAL(sdivsi3_dontsub), tr0
1228
        pt/l LOCAL(sdivsi3_loop), tr1
1229
        ptabs/l r18,tr2
1230
        shari.l r4,31,r18
1231
        shari.l r5,31,r19
1232
        xor r4,r18,r20
1233
        xor r5,r19,r21
1234
        sub.l r20,r18,r20
1235
        sub.l r21,r19,r21
1236
        xor r18,r19,r19
1237
        shlli r21,32,r25
1238
        addi r25,-1,r21
1239
        addz.l r20,r63,r20
1240
LOCAL(sdivsi3_loop):
1241
        shlli r20,1,r20
1242
        bgeu/u r21,r20,tr0
1243
        sub r20,r21,r20
1244
LOCAL(sdivsi3_dontsub):
1245
        addi.l r25,-1,r25
1246
        bnei r25,-32,tr1
1247
        xor r20,r19,r20
1248
        sub.l r20,r19,r0
1249
        blink tr2,r63
1250
        ENDFUNC(GLOBAL(sdivsi3))
1251
#else /* ! __SHMEDIA__ */
1252
        FUNC(GLOBAL(sdivsi3))
1253
GLOBAL(sdivsi3):
1254
        mov     r4,r1
1255
        mov     r5,r0
1256
 
1257
        tst     r0,r0
1258
        bt      div0
1259
        mov     #0,r2
1260
        div0s   r2,r1
1261
        subc    r3,r3
1262
        subc    r2,r1
1263
        div0s   r0,r3
1264
        rotcl   r1
1265
        div1    r0,r3
1266
        rotcl   r1
1267
        div1    r0,r3
1268
        rotcl   r1
1269
        div1    r0,r3
1270
        rotcl   r1
1271
        div1    r0,r3
1272
        rotcl   r1
1273
        div1    r0,r3
1274
        rotcl   r1
1275
        div1    r0,r3
1276
        rotcl   r1
1277
        div1    r0,r3
1278
        rotcl   r1
1279
        div1    r0,r3
1280
        rotcl   r1
1281
        div1    r0,r3
1282
        rotcl   r1
1283
        div1    r0,r3
1284
        rotcl   r1
1285
        div1    r0,r3
1286
        rotcl   r1
1287
        div1    r0,r3
1288
        rotcl   r1
1289
        div1    r0,r3
1290
        rotcl   r1
1291
        div1    r0,r3
1292
        rotcl   r1
1293
        div1    r0,r3
1294
        rotcl   r1
1295
        div1    r0,r3
1296
        rotcl   r1
1297
        div1    r0,r3
1298
        rotcl   r1
1299
        div1    r0,r3
1300
        rotcl   r1
1301
        div1    r0,r3
1302
        rotcl   r1
1303
        div1    r0,r3
1304
        rotcl   r1
1305
        div1    r0,r3
1306
        rotcl   r1
1307
        div1    r0,r3
1308
        rotcl   r1
1309
        div1    r0,r3
1310
        rotcl   r1
1311
        div1    r0,r3
1312
        rotcl   r1
1313
        div1    r0,r3
1314
        rotcl   r1
1315
        div1    r0,r3
1316
        rotcl   r1
1317
        div1    r0,r3
1318
        rotcl   r1
1319
        div1    r0,r3
1320
        rotcl   r1
1321
        div1    r0,r3
1322
        rotcl   r1
1323
        div1    r0,r3
1324
        rotcl   r1
1325
        div1    r0,r3
1326
        rotcl   r1
1327
        div1    r0,r3
1328
        rotcl   r1
1329
        addc    r2,r1
1330
        rts
1331
        mov     r1,r0
1332
 
1333
 
1334
div0:   rts
1335
        mov     #0,r0
1336
 
1337
        ENDFUNC(GLOBAL(sdivsi3))
1338
#endif /* ! __SHMEDIA__ */
1339
#endif /* ! __SH4__ */
1340
#endif
1341
#ifdef L_udivsi3_i4
1342
 
1343
        .title "SH DIVIDE"
1344
!! 4 byte integer Divide code for the Renesas SH
1345
#ifdef __SH4__
1346
!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1347
!! and t bit
1348
 
1349
        .global GLOBAL(udivsi3_i4)
1350
        HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1351
GLOBAL(udivsi3_i4):
1352
        mov #1,r1
1353
        cmp/hi r1,r5
1354
        bf trivial
1355
        rotr r1
1356
        xor r1,r4
1357
        lds r4,fpul
1358
        mova L1,r0
1359
#ifdef FMOVD_WORKS
1360
        fmov.d @r0+,dr4
1361
#else
1362
        fmov.s @r0+,DR40
1363
        fmov.s @r0,DR41
1364
#endif
1365
        float fpul,dr0
1366
        xor r1,r5
1367
        lds r5,fpul
1368
        float fpul,dr2
1369
        fadd dr4,dr0
1370
        fadd dr4,dr2
1371
        fdiv dr2,dr0
1372
        rts
1373
        ftrc dr0,fpul
1374
 
1375
trivial:
1376
        rts
1377
        lds r4,fpul
1378
 
1379
        .align 2
1380
#ifdef FMOVD_WORKS
1381
        .align 3        ! make double below 8 byte aligned.
1382
#endif
1383
L1:
1384
        .double 2147483648
1385
 
1386
        ENDFUNC(GLOBAL(udivsi3_i4))
1387
#elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1388
#if ! __SH5__ || __SH5__ == 32
1389
!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1390
        .mode   SHmedia
1391
        .global GLOBAL(udivsi3_i4)
1392
        HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1393
GLOBAL(udivsi3_i4):
1394
        addz.l  r4,r63,r20
1395
        addz.l  r5,r63,r21
1396
        fmov.qd r20,dr0
1397
        fmov.qd r21,dr32
1398
        ptabs   r18,tr0
1399
        float.qd dr0,dr0
1400
        float.qd dr32,dr32
1401
        fdiv.d  dr0,dr32,dr0
1402
        ftrc.dq dr0,dr32
1403
        fmov.s fr33,fr32
1404
        blink tr0,r63
1405
 
1406
        ENDFUNC(GLOBAL(udivsi3_i4))
1407
#endif /* ! __SH5__ || __SH5__ == 32 */
1408
#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1409
!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1410
 
1411
        .global GLOBAL(udivsi3_i4)
1412
        HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1413
GLOBAL(udivsi3_i4):
1414
        mov #1,r1
1415
        cmp/hi r1,r5
1416
        bf trivial
1417
        sts.l fpscr,@-r15
1418
        mova L1,r0
1419
        lds.l @r0+,fpscr
1420
        rotr r1
1421
        xor r1,r4
1422
        lds r4,fpul
1423
#ifdef FMOVD_WORKS
1424
        fmov.d @r0+,dr4
1425
#else
1426
        fmov.s @r0+,DR40
1427
        fmov.s @r0,DR41
1428
#endif
1429
        float fpul,dr0
1430
        xor r1,r5
1431
        lds r5,fpul
1432
        float fpul,dr2
1433
        fadd dr4,dr0
1434
        fadd dr4,dr2
1435
        fdiv dr2,dr0
1436
        ftrc dr0,fpul
1437
        rts
1438
        lds.l @r15+,fpscr
1439
 
1440
#ifdef FMOVD_WORKS
1441
        .align 3        ! make double below 8 byte aligned.
1442
#endif
1443
trivial:
1444
        rts
1445
        lds r4,fpul
1446
 
1447
        .align 2
1448
L1:
1449
#ifndef FMOVD_WORKS
1450
        .long 0x80000
1451
#else
1452
        .long 0x180000
1453
#endif
1454
        .double 2147483648
1455
 
1456
        ENDFUNC(GLOBAL(udivsi3_i4))
1457
#endif /* ! __SH4__ */
1458
#endif
1459
 
1460
#ifdef L_udivsi3
1461
/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1462
   sh2e/sh3e code.  */
1463
#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1464
 
1465
!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1466
        .global GLOBAL(udivsi3)
1467
        HIDDEN_FUNC(GLOBAL(udivsi3))
1468
 
1469
#if __SHMEDIA__
1470
#if __SH5__ == 32
1471
        .section        .text..SHmedia32,"ax"
1472
#else
1473
        .text
1474
#endif
1475
        .align  2
1476
#if 0
1477
/* The assembly code that follows is a hand-optimized version of the C
1478
   code that follows.  Note that the registers that are modified are
1479
   exactly those listed as clobbered in the patterns udivsi3_i1 and
1480
   udivsi3_i1_media.
1481
 
1482
unsigned
1483
__udivsi3 (i, j)
1484
    unsigned i, j;
1485
{
1486
  register unsigned long long r0 asm ("r0") = 0;
1487
  register unsigned long long r18 asm ("r18") = 1;
1488
  register unsigned long long r4 asm ("r4") = i;
1489
  register unsigned long long r19 asm ("r19") = j;
1490
 
1491
  r19 <<= 31;
1492
  r18 <<= 31;
1493
  do
1494
    if (r4 >= r19)
1495
      r0 |= r18, r4 -= r19;
1496
  while (r19 >>= 1, r18 >>= 1);
1497
 
1498
  return r0;
1499
}
1500
*/
1501
GLOBAL(udivsi3):
1502
        pt/l    LOCAL(udivsi3_dontadd), tr2
1503
        pt/l    LOCAL(udivsi3_loop), tr1
1504
        ptabs/l r18, tr0
1505
        movi    0, r0
1506
        movi    1, r18
1507
        addz.l  r5, r63, r19
1508
        addz.l  r4, r63, r4
1509
        shlli   r19, 31, r19
1510
        shlli   r18, 31, r18
1511
LOCAL(udivsi3_loop):
1512
        bgtu    r19, r4, tr2
1513
        or      r0, r18, r0
1514
        sub     r4, r19, r4
1515
LOCAL(udivsi3_dontadd):
1516
        shlri   r18, 1, r18
1517
        shlri   r19, 1, r19
1518
        bnei    r18, 0, tr1
1519
        blink   tr0, r63
1520
#else
1521
GLOBAL(udivsi3):
1522
 // inputs: r4,r5
1523
 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1524
 // result in r0.
1525
 addz.l r5,r63,r22
1526
 nsb r22,r0
1527
 shlld r22,r0,r25
1528
 shlri r25,48,r25
1529
 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1530
 sub r20,r25,r21
1531
 mmulfx.w r21,r21,r19
1532
 mshflo.w r21,r63,r21
1533
 ptabs r18,tr0
1534
 mmulfx.w r25,r19,r19
1535
 sub r20,r0,r0
1536
 /* bubble */
1537
 msub.w r21,r19,r19
1538
 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1539
                    before the msub.w, but we need a different value for
1540
                    r19 to keep errors under control.  */
1541
 mulu.l r4,r21,r18
1542
 mmulfx.w r19,r19,r19
1543
 shlli r21,15,r21
1544
 shlrd r18,r0,r18
1545
 mulu.l r18,r22,r20
1546
 mmacnfx.wl r25,r19,r21
1547
 /* bubble */
1548
 sub r4,r20,r25
1549
 
1550
 mulu.l r25,r21,r19
1551
 addi r0,14,r0
1552
 /* bubble */
1553
 shlrd r19,r0,r19
1554
 mulu.l r19,r22,r20
1555
 add r18,r19,r18
1556
 /* bubble */
1557
 sub.l r25,r20,r25
1558
 
1559
 mulu.l r25,r21,r19
1560
 addz.l r25,r63,r25
1561
 sub r25,r22,r25
1562
 shlrd r19,r0,r19
1563
 mulu.l r19,r22,r20
1564
 addi r25,1,r25
1565
 add r18,r19,r18
1566
 
1567
 cmpgt r25,r20,r25
1568
 add.l r18,r25,r0
1569
 blink tr0,r63
1570
#endif
1571
#elif defined (__SHMEDIA__)
1572
/* m5compact-nofpu - more emphasis on code size than on speed, but don't
1573
   ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1574
   So use a short shmedia loop.  */
1575
 // clobbered: r20,r21,r25,tr0,tr1,tr2
1576
        .mode   SHmedia
1577
        .section        .text..SHmedia32,"ax"
1578
        .align  2
1579
GLOBAL(udivsi3):
1580
 pt/l LOCAL(udivsi3_dontsub), tr0
1581
 pt/l LOCAL(udivsi3_loop), tr1
1582
 ptabs/l r18,tr2
1583
 shlli r5,32,r25
1584
 addi r25,-1,r21
1585
 addz.l r4,r63,r20
1586
LOCAL(udivsi3_loop):
1587
 shlli r20,1,r20
1588
 bgeu/u r21,r20,tr0
1589
 sub r20,r21,r20
1590
LOCAL(udivsi3_dontsub):
1591
 addi.l r25,-1,r25
1592
 bnei r25,-32,tr1
1593
 add.l r20,r63,r0
1594
 blink tr2,r63
1595
#else /* ! defined (__SHMEDIA__) */
1596
LOCAL(div8):
1597
 div1 r5,r4
1598
LOCAL(div7):
1599
 div1 r5,r4; div1 r5,r4; div1 r5,r4
1600
 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1601
 
1602
LOCAL(divx4):
1603
 div1 r5,r4; rotcl r0
1604
 div1 r5,r4; rotcl r0
1605
 div1 r5,r4; rotcl r0
1606
 rts; div1 r5,r4
1607
 
1608
GLOBAL(udivsi3):
1609
 sts.l pr,@-r15
1610
 extu.w r5,r0
1611
 cmp/eq r5,r0
1612
#ifdef __sh1__
1613
 bf LOCAL(large_divisor)
1614
#else
1615
 bf/s LOCAL(large_divisor)
1616
#endif
1617
 div0u
1618
 swap.w r4,r0
1619
 shlr16 r4
1620
 bsr LOCAL(div8)
1621
 shll16 r5
1622
 bsr LOCAL(div7)
1623
 div1 r5,r4
1624
 xtrct r4,r0
1625
 xtrct r0,r4
1626
 bsr LOCAL(div8)
1627
 swap.w r4,r4
1628
 bsr LOCAL(div7)
1629
 div1 r5,r4
1630
 lds.l @r15+,pr
1631
 xtrct r4,r0
1632
 swap.w r0,r0
1633
 rotcl r0
1634
 rts
1635
 shlr16 r5
1636
 
1637
LOCAL(large_divisor):
1638
#ifdef __sh1__
1639
 div0u
1640
#endif
1641
 mov #0,r0
1642
 xtrct r4,r0
1643
 xtrct r0,r4
1644
 bsr LOCAL(divx4)
1645
 rotcl r0
1646
 bsr LOCAL(divx4)
1647
 rotcl r0
1648
 bsr LOCAL(divx4)
1649
 rotcl r0
1650
 bsr LOCAL(divx4)
1651
 rotcl r0
1652
 lds.l @r15+,pr
1653
 rts
1654
 rotcl r0
1655
 
1656
        ENDFUNC(GLOBAL(udivsi3))
1657
#endif /* ! __SHMEDIA__ */
1658
#endif /* __SH4__ */
1659
#endif /* L_udivsi3 */
1660
 
1661
#ifdef L_udivdi3
1662
#ifdef __SHMEDIA__
1663
        .mode   SHmedia
1664
        .section        .text..SHmedia32,"ax"
1665
        .align  2
1666
        .global GLOBAL(udivdi3)
1667
        FUNC(GLOBAL(udivdi3))
1668
GLOBAL(udivdi3):
1669
        HIDDEN_ALIAS(udivdi3_internal,udivdi3)
1670
        shlri r3,1,r4
1671
        nsb r4,r22
1672
        shlld r3,r22,r6
1673
        shlri r6,49,r5
1674
        movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
1675
        sub r21,r5,r1
1676
        mmulfx.w r1,r1,r4
1677
        mshflo.w r1,r63,r1
1678
        sub r63,r22,r20 // r63 == 64 % 64
1679
        mmulfx.w r5,r4,r4
1680
        pta LOCAL(large_divisor),tr0
1681
        addi r20,32,r9
1682
        msub.w r1,r4,r1
1683
        madd.w r1,r1,r1
1684
        mmulfx.w r1,r1,r4
1685
        shlri r6,32,r7
1686
        bgt/u r9,r63,tr0 // large_divisor
1687
        mmulfx.w r5,r4,r4
1688
        shlri r2,32+14,r19
1689
        addi r22,-31,r0
1690
        msub.w r1,r4,r1
1691
 
1692
        mulu.l r1,r7,r4
1693
        addi r1,-3,r5
1694
        mulu.l r5,r19,r5
1695
        sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1696
        shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1697
                         the case may be, %0000000000000000 000.11111111111, still */
1698
        muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1699
        mulu.l r5,r3,r8
1700
        mshalds.l r1,r21,r1
1701
        shari r4,26,r4
1702
        shlld r8,r0,r8
1703
        add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1704
        sub r2,r8,r2
1705
        /* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
1706
 
1707
        shlri r2,22,r21
1708
        mulu.l r21,r1,r21
1709
        shlld r5,r0,r8
1710
        addi r20,30-22,r0
1711
        shlrd r21,r0,r21
1712
        mulu.l r21,r3,r5
1713
        add r8,r21,r8
1714
        mcmpgt.l r21,r63,r21 // See Note 1
1715
        addi r20,30,r0
1716
        mshfhi.l r63,r21,r21
1717
        sub r2,r5,r2
1718
        andc r2,r21,r2
1719
 
1720
        /* small divisor: need a third divide step */
1721
        mulu.l r2,r1,r7
1722
        ptabs r18,tr0
1723
        addi r2,1,r2
1724
        shlrd r7,r0,r7
1725
        mulu.l r7,r3,r5
1726
        add r8,r7,r8
1727
        sub r2,r3,r2
1728
        cmpgt r2,r5,r5
1729
        add r8,r5,r2
1730
        /* could test r3 here to check for divide by zero.  */
1731
        blink tr0,r63
1732
 
1733
LOCAL(large_divisor):
1734
        mmulfx.w r5,r4,r4
1735
        shlrd r2,r9,r25
1736
        shlri r25,32,r8
1737
        msub.w r1,r4,r1
1738
 
1739
        mulu.l r1,r7,r4
1740
        addi r1,-3,r5
1741
        mulu.l r5,r8,r5
1742
        sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1743
        shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1744
                         the case may be, %0000000000000000 000.11111111111, still */
1745
        muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1746
        shlri r5,14-1,r8
1747
        mulu.l r8,r7,r5
1748
        mshalds.l r1,r21,r1
1749
        shari r4,26,r4
1750
        add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1751
        sub r25,r5,r25
1752
        /* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
1753
 
1754
        shlri r25,22,r21
1755
        mulu.l r21,r1,r21
1756
        pta LOCAL(no_lo_adj),tr0
1757
        addi r22,32,r0
1758
        shlri r21,40,r21
1759
        mulu.l r21,r7,r5
1760
        add r8,r21,r8
1761
        shlld r2,r0,r2
1762
        sub r25,r5,r25
1763
        bgtu/u r7,r25,tr0 // no_lo_adj
1764
        addi r8,1,r8
1765
        sub r25,r7,r25
1766
LOCAL(no_lo_adj):
1767
        mextr4 r2,r25,r2
1768
 
1769
        /* large_divisor: only needs a few adjustments.  */
1770
        mulu.l r8,r6,r5
1771
        ptabs r18,tr0
1772
        /* bubble */
1773
        cmpgtu r5,r2,r5
1774
        sub r8,r5,r2
1775
        blink tr0,r63
1776
        ENDFUNC(GLOBAL(udivdi3))
1777
/* Note 1: To shift the result of the second divide stage so that the result
1778
   always fits into 32 bits, yet we still reduce the rest sufficiently
1779
   would require a lot of instructions to do the shifts just right.  Using
1780
   the full 64 bit shift result to multiply with the divisor would require
1781
   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1782
   Fortunately, if the upper 32 bits of the shift result are nonzero, we
1783
   know that the rest after taking this partial result into account will
1784
   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
1785
   upper 32 bits of the partial result are nonzero.  */
1786
#endif /* __SHMEDIA__ */
1787
#endif /* L_udivdi3 */
1788
 
1789
#ifdef L_divdi3
1790
#ifdef __SHMEDIA__
1791
        .mode   SHmedia
1792
        .section        .text..SHmedia32,"ax"
1793
        .align  2
1794
        .global GLOBAL(divdi3)
1795
        FUNC(GLOBAL(divdi3))
1796
GLOBAL(divdi3):
1797
        pta GLOBAL(udivdi3_internal),tr0
1798
        shari r2,63,r22
1799
        shari r3,63,r23
1800
        xor r2,r22,r2
1801
        xor r3,r23,r3
1802
        sub r2,r22,r2
1803
        sub r3,r23,r3
1804
        beq/u r22,r23,tr0
1805
        ptabs r18,tr1
1806
        blink tr0,r18
1807
        sub r63,r2,r2
1808
        blink tr1,r63
1809
        ENDFUNC(GLOBAL(divdi3))
1810
#endif /* __SHMEDIA__ */
1811
#endif /* L_divdi3 */
1812
 
1813
#ifdef L_umoddi3
1814
#ifdef __SHMEDIA__
1815
        .mode   SHmedia
1816
        .section        .text..SHmedia32,"ax"
1817
        .align  2
1818
        .global GLOBAL(umoddi3)
1819
        FUNC(GLOBAL(umoddi3))
1820
GLOBAL(umoddi3):
1821
        HIDDEN_ALIAS(umoddi3_internal,umoddi3)
1822
        shlri r3,1,r4
1823
        nsb r4,r22
1824
        shlld r3,r22,r6
1825
        shlri r6,49,r5
1826
        movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
1827
        sub r21,r5,r1
1828
        mmulfx.w r1,r1,r4
1829
        mshflo.w r1,r63,r1
1830
        sub r63,r22,r20 // r63 == 64 % 64
1831
        mmulfx.w r5,r4,r4
1832
        pta LOCAL(large_divisor),tr0
1833
        addi r20,32,r9
1834
        msub.w r1,r4,r1
1835
        madd.w r1,r1,r1
1836
        mmulfx.w r1,r1,r4
1837
        shlri r6,32,r7
1838
        bgt/u r9,r63,tr0 // large_divisor
1839
        mmulfx.w r5,r4,r4
1840
        shlri r2,32+14,r19
1841
        addi r22,-31,r0
1842
        msub.w r1,r4,r1
1843
 
1844
        mulu.l r1,r7,r4
1845
        addi r1,-3,r5
1846
        mulu.l r5,r19,r5
1847
        sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1848
        shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1849
                         the case may be, %0000000000000000 000.11111111111, still */
1850
        muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1851
        mulu.l r5,r3,r5
1852
        mshalds.l r1,r21,r1
1853
        shari r4,26,r4
1854
        shlld r5,r0,r5
1855
        add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1856
        sub r2,r5,r2
1857
        /* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
1858
 
1859
        shlri r2,22,r21
1860
        mulu.l r21,r1,r21
1861
        addi r20,30-22,r0
1862
        /* bubble */ /* could test r3 here to check for divide by zero.  */
1863
        shlrd r21,r0,r21
1864
        mulu.l r21,r3,r5
1865
        mcmpgt.l r21,r63,r21 // See Note 1
1866
        addi r20,30,r0
1867
        mshfhi.l r63,r21,r21
1868
        sub r2,r5,r2
1869
        andc r2,r21,r2
1870
 
1871
        /* small divisor: need a third divide step */
1872
        mulu.l r2,r1,r7
1873
        ptabs r18,tr0
1874
        sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1875
        shlrd r7,r0,r7
1876
        mulu.l r7,r3,r5
1877
        /* bubble */
1878
        addi r8,1,r7
1879
        cmpgt r7,r5,r7
1880
        cmvne r7,r8,r2
1881
        sub r2,r5,r2
1882
        blink tr0,r63
1883
 
1884
LOCAL(large_divisor):
1885
        mmulfx.w r5,r4,r4
1886
        shlrd r2,r9,r25
1887
        shlri r25,32,r8
1888
        msub.w r1,r4,r1
1889
 
1890
        mulu.l r1,r7,r4
1891
        addi r1,-3,r5
1892
        mulu.l r5,r8,r5
1893
        sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1894
        shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1895
                         the case may be, %0000000000000000 000.11111111111, still */
1896
        muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1897
        shlri r5,14-1,r8
1898
        mulu.l r8,r7,r5
1899
        mshalds.l r1,r21,r1
1900
        shari r4,26,r4
1901
        add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1902
        sub r25,r5,r25
1903
        /* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
1904
 
1905
        shlri r25,22,r21
1906
        mulu.l r21,r1,r21
1907
        pta LOCAL(no_lo_adj),tr0
1908
        addi r22,32,r0
1909
        shlri r21,40,r21
1910
        mulu.l r21,r7,r5
1911
        add r8,r21,r8
1912
        shlld r2,r0,r2
1913
        sub r25,r5,r25
1914
        bgtu/u r7,r25,tr0 // no_lo_adj
1915
        addi r8,1,r8
1916
        sub r25,r7,r25
1917
LOCAL(no_lo_adj):
1918
        mextr4 r2,r25,r2
1919
 
1920
        /* large_divisor: only needs a few adjustments.  */
1921
        mulu.l r8,r6,r5
1922
        ptabs r18,tr0
1923
        add r2,r6,r7
1924
        cmpgtu r5,r2,r8
1925
        cmvne r8,r7,r2
1926
        sub r2,r5,r2
1927
        shlrd r2,r22,r2
1928
        blink tr0,r63
1929
        ENDFUNC(GLOBAL(umoddi3))
1930
/* Note 1: To shift the result of the second divide stage so that the result
1931
   always fits into 32 bits, yet we still reduce the rest sufficiently
1932
   would require a lot of instructions to do the shifts just right.  Using
1933
   the full 64 bit shift result to multiply with the divisor would require
1934
   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1935
   Fortunately, if the upper 32 bits of the shift result are nonzero, we
1936
   know that the rest after taking this partial result into account will
1937
   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
1938
   upper 32 bits of the partial result are nonzero.  */
1939
#endif /* __SHMEDIA__ */
1940
#endif /* L_umoddi3 */
1941
 
1942
#ifdef L_moddi3
1943
#ifdef __SHMEDIA__
1944
        .mode   SHmedia
1945
        .section        .text..SHmedia32,"ax"
1946
        .align  2
1947
        .global GLOBAL(moddi3)
1948
        FUNC(GLOBAL(moddi3))
1949
GLOBAL(moddi3):
1950
        pta GLOBAL(umoddi3_internal),tr0
1951
        shari r2,63,r22
1952
        shari r3,63,r23
1953
        xor r2,r22,r2
1954
        xor r3,r23,r3
1955
        sub r2,r22,r2
1956
        sub r3,r23,r3
1957
        beq/u r22,r63,tr0
1958
        ptabs r18,tr1
1959
        blink tr0,r18
1960
        sub r63,r2,r2
1961
        blink tr1,r63
1962
        ENDFUNC(GLOBAL(moddi3))
1963
#endif /* __SHMEDIA__ */
1964
#endif /* L_moddi3 */
1965
 
1966
#ifdef L_set_fpscr
1967
#if !defined (__SH2A_NOFPU__)
1968
#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1969
#ifdef __SH5__
1970
        .mode   SHcompact
1971
#endif
1972
        .global GLOBAL(set_fpscr)
1973
        HIDDEN_FUNC(GLOBAL(set_fpscr))
1974
GLOBAL(set_fpscr):
1975
        lds r4,fpscr
1976
#ifdef __PIC__
1977
        mov.l   r12,@-r15
1978
#ifdef __vxworks
1979
        mov.l   LOCAL(set_fpscr_L0_base),r12
1980
        mov.l   LOCAL(set_fpscr_L0_index),r0
1981
        mov.l   @r12,r12
1982
        mov.l   @(r0,r12),r12
1983
#else
1984
        mova    LOCAL(set_fpscr_L0),r0
1985
        mov.l   LOCAL(set_fpscr_L0),r12
1986
        add     r0,r12
1987
#endif
1988
        mov.l   LOCAL(set_fpscr_L1),r0
1989
        mov.l   @(r0,r12),r1
1990
        mov.l   @r15+,r12
1991
#else
1992
        mov.l LOCAL(set_fpscr_L1),r1
1993
#endif
1994
        swap.w r4,r0
1995
        or #24,r0
1996
#ifndef FMOVD_WORKS
1997
        xor #16,r0
1998
#endif
1999
#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2000
        swap.w r0,r3
2001
        mov.l r3,@(4,r1)
2002
#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2003
        swap.w r0,r2
2004
        mov.l r2,@r1
2005
#endif
2006
#ifndef FMOVD_WORKS
2007
        xor #8,r0
2008
#else
2009
        xor #24,r0
2010
#endif
2011
#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2012
        swap.w r0,r2
2013
        rts
2014
        mov.l r2,@r1
2015
#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2016
        swap.w r0,r3
2017
        rts
2018
        mov.l r3,@(4,r1)
2019
#endif
2020
        .align 2
2021
#ifdef __PIC__
2022
#ifdef __vxworks
2023
LOCAL(set_fpscr_L0_base):
2024
        .long ___GOTT_BASE__
2025
LOCAL(set_fpscr_L0_index):
2026
        .long ___GOTT_INDEX__
2027
#else
2028
LOCAL(set_fpscr_L0):
2029
        .long _GLOBAL_OFFSET_TABLE_
2030
#endif
2031
LOCAL(set_fpscr_L1):
2032
        .long GLOBAL(fpscr_values@GOT)
2033
#else
2034
LOCAL(set_fpscr_L1):
2035
        .long GLOBAL(fpscr_values)
2036
#endif
2037
 
2038
        ENDFUNC(GLOBAL(set_fpscr))
2039
#ifndef NO_FPSCR_VALUES
2040
#ifdef __ELF__
2041
        .comm   GLOBAL(fpscr_values),8,4
2042
#else
2043
        .comm   GLOBAL(fpscr_values),8
2044
#endif /* ELF */
2045
#endif /* NO_FPSCR_VALUES */
2046
#endif /* SH2E / SH3E / SH4 */
2047
#endif /* __SH2A_NOFPU__ */
2048
#endif /* L_set_fpscr */
2049
#ifdef L_ic_invalidate
2050
#if __SH5__ == 32
2051
        .mode   SHmedia
2052
        .section        .text..SHmedia32,"ax"
2053
        .align  2
2054
        .global GLOBAL(init_trampoline)
2055
        HIDDEN_FUNC(GLOBAL(init_trampoline))
2056
GLOBAL(init_trampoline):
2057
        st.l    r0,8,r2
2058
#ifdef __LITTLE_ENDIAN__
2059
        movi    9,r20
2060
        shori   0x402b,r20
2061
        shori   0xd101,r20
2062
        shori   0xd002,r20
2063
#else
2064
        movi    0xffffffffffffd002,r20
2065
        shori   0xd101,r20
2066
        shori   0x402b,r20
2067
        shori   9,r20
2068
#endif
2069
        st.q    r0,0,r20
2070
        st.l    r0,12,r3
2071
        ENDFUNC(GLOBAL(init_trampoline))
2072
        .global GLOBAL(ic_invalidate)
2073
        HIDDEN_FUNC(GLOBAL(ic_invalidate))
2074
GLOBAL(ic_invalidate):
2075
        ocbwb   r0,0
2076
        synco
2077
        icbi    r0, 0
2078
        ptabs   r18, tr0
2079
        synci
2080
        blink   tr0, r63
2081
        ENDFUNC(GLOBAL(ic_invalidate))
2082
#elif defined(__SH4A__)
2083
        .global GLOBAL(ic_invalidate)
2084
        HIDDEN_FUNC(GLOBAL(ic_invalidate))
2085
GLOBAL(ic_invalidate):
2086
        ocbwb   @r4
2087
        synco
2088
        icbi    @r4
2089
        rts
2090
          nop
2091
        ENDFUNC(GLOBAL(ic_invalidate))
2092
#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2093
        /* For system code, we use ic_invalidate_line_i, but user code
2094
           needs a different mechanism.  A kernel call is generally not
2095
           available, and it would also be slow.  Different SH4 variants use
2096
           different sizes and associativities of the Icache.  We use a small
2097
           bit of dispatch code that can be put hidden in every shared object,
2098
           which calls the actual processor-specific invalidation code in a
2099
           separate module.
2100
           Or if you have operating system support, the OS could mmap the
2101
           procesor-specific code from a single page, since it is highly
2102
           repetitive.  */
2103
        .global GLOBAL(ic_invalidate)
2104
        HIDDEN_FUNC(GLOBAL(ic_invalidate))
2105
GLOBAL(ic_invalidate):
2106
#ifdef __pic__
2107
#ifdef __vxworks
2108
        mov.l   1f,r1
2109
        mov.l   2f,r0
2110
        mov.l   @r1,r1
2111
        mov.l   0f,r2
2112
        mov.l   @(r0,r1),r0
2113
#else
2114
        mov.l   1f,r1
2115
        mova    1f,r0
2116
        mov.l   0f,r2
2117
        add     r1,r0
2118
#endif
2119
        mov.l   @(r0,r2),r1
2120
#else
2121
        mov.l   0f,r1
2122
#endif
2123
        ocbwb   @r4
2124
        mov.l   @(8,r1),r0
2125
        sub     r1,r4
2126
        and     r4,r0
2127
        add     r1,r0
2128
        jmp     @r0
2129
        mov.l   @(4,r1),r0
2130
        .align  2
2131
#ifndef __pic__
2132
0:      .long   GLOBAL(ic_invalidate_array)
2133
#else /* __pic__ */
2134
        .global GLOBAL(ic_invalidate_array)
2135
0:      .long   GLOBAL(ic_invalidate_array)@GOT
2136
#ifdef __vxworks
2137
1:      .long   ___GOTT_BASE__
2138
2:      .long   ___GOTT_INDEX__
2139
#else
2140
1:      .long   _GLOBAL_OFFSET_TABLE_
2141
#endif
2142
        ENDFUNC(GLOBAL(ic_invalidate))
2143
#endif /* __pic__ */
2144
#endif /* SH4 */
2145
#endif /* L_ic_invalidate */
2146
 
2147
#ifdef L_ic_invalidate_array
2148
#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
2149
        .global GLOBAL(ic_invalidate_array)
2150
        /* This is needed when an SH4 dso with trampolines is used on SH4A.  */
2151
        .global GLOBAL(ic_invalidate_array)
2152
        FUNC(GLOBAL(ic_invalidate_array))
2153
GLOBAL(ic_invalidate_array):
2154
        add     r1,r4
2155
        synco
2156
        icbi    @r4
2157
        rts
2158
          nop
2159
        .align 2
2160
        .long   0
2161
        ENDFUNC(GLOBAL(ic_invalidate_array))
2162
#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2163
        .global GLOBAL(ic_invalidate_array)
2164
        .p2align 5
2165
        FUNC(GLOBAL(ic_invalidate_array))
2166
/* This must be aligned to the beginning of a cache line.  */
2167
GLOBAL(ic_invalidate_array):
2168
#ifndef WAYS
2169
#define WAYS 4
2170
#define WAY_SIZE 0x4000
2171
#endif
2172
#if WAYS == 1
2173
        .rept   WAY_SIZE * WAYS / 32
2174
        rts
2175
        nop
2176
        .rept   7
2177
        .long   WAY_SIZE - 32
2178
        .endr
2179
        .endr
2180
#elif WAYS <= 6
2181
        .rept   WAY_SIZE * WAYS / 32
2182
        braf    r0
2183
        add     #-8,r0
2184
        .long   WAY_SIZE + 8
2185
        .long   WAY_SIZE - 32
2186
        .rept   WAYS-2
2187
        braf    r0
2188
        nop
2189
        .endr
2190
        .rept   7 - WAYS
2191
        rts
2192
        nop
2193
        .endr
2194
        .endr
2195
#else /* WAYS > 6 */
2196
        /* This variant needs two different pages for mmap-ing.  */
2197
        .rept   WAYS-1
2198
        .rept   WAY_SIZE / 32
2199
        braf    r0
2200
        nop
2201
        .long   WAY_SIZE
2202
        .rept 6
2203
        .long   WAY_SIZE - 32
2204
        .endr
2205
        .endr
2206
        .endr
2207
        .rept   WAY_SIZE / 32
2208
        rts
2209
        .rept   15
2210
        nop
2211
        .endr
2212
        .endr
2213
#endif /* WAYS */
2214
        ENDFUNC(GLOBAL(ic_invalidate_array))
2215
#endif /* SH4 */
2216
#endif /* L_ic_invalidate_array */
2217
 
2218
#if defined (__SH5__) && __SH5__ == 32
2219
#ifdef L_shcompact_call_trampoline
2220
        .section        .rodata
2221
        .align  1
2222
LOCAL(ct_main_table):
2223
.word   LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2224
.word   LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2225
.word   LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2226
.word   LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2227
.word   LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2228
.word   LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2229
.word   LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2230
.word   LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2231
.word   LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2232
.word   LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2233
.word   LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2234
.word   LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2235
.word   LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2236
.word   LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2237
.word   LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2238
.word   LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2239
.word   LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2240
.word   LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2241
.word   LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2242
.word   LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2243
.word   LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2244
.word   LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2245
.word   LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2246
.word   LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2247
.word   LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2248
.word   LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2249
.word   LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2250
.word   LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2251
.word   LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2252
.word   LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2253
.word   LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2254
.word   LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2255
.word   LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2256
        .mode   SHmedia
2257
        .section        .text..SHmedia32, "ax"
2258
        .align  2
2259
 
2260
     /* This function loads 64-bit general-purpose registers from the
2261
        stack, from a memory address contained in them or from an FP
2262
        register, according to a cookie passed in r1.  Its execution
2263
        time is linear on the number of registers that actually have
2264
        to be copied.  See sh.h for details on the actual bit pattern.
2265
 
2266
        The function to be called is passed in r0.  If a 32-bit return
2267
        value is expected, the actual function will be tail-called,
2268
        otherwise the return address will be stored in r10 (that the
2269
        caller should expect to be clobbered) and the return value
2270
        will be expanded into r2/r3 upon return.  */
2271
 
2272
        .global GLOBAL(GCC_shcompact_call_trampoline)
2273
        FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2274
GLOBAL(GCC_shcompact_call_trampoline):
2275
        ptabs/l r0, tr0 /* Prepare to call the actual function.  */
2276
        movi    ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2277
        pt/l    LOCAL(ct_loop), tr1
2278
        addz.l  r1, r63, r1
2279
        shori   ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2280
LOCAL(ct_loop):
2281
        nsb     r1, r28
2282
        shlli   r28, 1, r29
2283
        ldx.w   r0, r29, r30
2284
LOCAL(ct_main_label):
2285
        ptrel/l r30, tr2
2286
        blink   tr2, r63
2287
LOCAL(ct_r2_fp):        /* Copy r2 from an FP register.  */
2288
        /* It must be dr0, so just do it.  */
2289
        fmov.dq dr0, r2
2290
        movi    7, r30
2291
        shlli   r30, 29, r31
2292
        andc    r1, r31, r1
2293
        blink   tr1, r63
2294
LOCAL(ct_r3_fp):        /* Copy r3 from an FP register.  */
2295
        /* It is either dr0 or dr2.  */
2296
        movi    7, r30
2297
        shlri   r1, 26, r32
2298
        shlli   r30, 26, r31
2299
        andc    r1, r31, r1
2300
        fmov.dq dr0, r3
2301
        beqi/l  r32, 4, tr1
2302
        fmov.dq dr2, r3
2303
        blink   tr1, r63
2304
LOCAL(ct_r4_fp):        /* Copy r4 from an FP register.  */
2305
        shlri   r1, 23 - 3, r34
2306
        andi    r34, 3 << 3, r33
2307
        addi    r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2308
LOCAL(ct_r4_fp_base):
2309
        ptrel/l r32, tr2
2310
        movi    7, r30
2311
        shlli   r30, 23, r31
2312
        andc    r1, r31, r1
2313
        blink   tr2, r63
2314
LOCAL(ct_r4_fp_copy):
2315
        fmov.dq dr0, r4
2316
        blink   tr1, r63
2317
        fmov.dq dr2, r4
2318
        blink   tr1, r63
2319
        fmov.dq dr4, r4
2320
        blink   tr1, r63
2321
LOCAL(ct_r5_fp):        /* Copy r5 from an FP register.  */
2322
        shlri   r1, 20 - 3, r34
2323
        andi    r34, 3 << 3, r33
2324
        addi    r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2325
LOCAL(ct_r5_fp_base):
2326
        ptrel/l r32, tr2
2327
        movi    7, r30
2328
        shlli   r30, 20, r31
2329
        andc    r1, r31, r1
2330
        blink   tr2, r63
2331
LOCAL(ct_r5_fp_copy):
2332
        fmov.dq dr0, r5
2333
        blink   tr1, r63
2334
        fmov.dq dr2, r5
2335
        blink   tr1, r63
2336
        fmov.dq dr4, r5
2337
        blink   tr1, r63
2338
        fmov.dq dr6, r5
2339
        blink   tr1, r63
2340
LOCAL(ct_r6_fph):       /* Copy r6 from a high FP register.  */
2341
        /* It must be dr8.  */
2342
        fmov.dq dr8, r6
2343
        movi    15, r30
2344
        shlli   r30, 16, r31
2345
        andc    r1, r31, r1
2346
        blink   tr1, r63
2347
LOCAL(ct_r6_fpl):       /* Copy r6 from a low FP register.  */
2348
        shlri   r1, 16 - 3, r34
2349
        andi    r34, 3 << 3, r33
2350
        addi    r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2351
LOCAL(ct_r6_fp_base):
2352
        ptrel/l r32, tr2
2353
        movi    7, r30
2354
        shlli   r30, 16, r31
2355
        andc    r1, r31, r1
2356
        blink   tr2, r63
2357
LOCAL(ct_r6_fp_copy):
2358
        fmov.dq dr0, r6
2359
        blink   tr1, r63
2360
        fmov.dq dr2, r6
2361
        blink   tr1, r63
2362
        fmov.dq dr4, r6
2363
        blink   tr1, r63
2364
        fmov.dq dr6, r6
2365
        blink   tr1, r63
2366
LOCAL(ct_r7_fph):       /* Copy r7 from a high FP register.  */
2367
        /* It is either dr8 or dr10.  */
2368
        movi    15 << 12, r31
2369
        shlri   r1, 12, r32
2370
        andc    r1, r31, r1
2371
        fmov.dq dr8, r7
2372
        beqi/l  r32, 8, tr1
2373
        fmov.dq dr10, r7
2374
        blink   tr1, r63
2375
LOCAL(ct_r7_fpl):       /* Copy r7 from a low FP register.  */
2376
        shlri   r1, 12 - 3, r34
2377
        andi    r34, 3 << 3, r33
2378
        addi    r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2379
LOCAL(ct_r7_fp_base):
2380
        ptrel/l r32, tr2
2381
        movi    7 << 12, r31
2382
        andc    r1, r31, r1
2383
        blink   tr2, r63
2384
LOCAL(ct_r7_fp_copy):
2385
        fmov.dq dr0, r7
2386
        blink   tr1, r63
2387
        fmov.dq dr2, r7
2388
        blink   tr1, r63
2389
        fmov.dq dr4, r7
2390
        blink   tr1, r63
2391
        fmov.dq dr6, r7
2392
        blink   tr1, r63
2393
LOCAL(ct_r8_fph):       /* Copy r8 from a high FP register.  */
2394
        /* It is either dr8 or dr10.  */
2395
        movi    15 << 8, r31
2396
        andi    r1, 1 << 8, r32
2397
        andc    r1, r31, r1
2398
        fmov.dq dr8, r8
2399
        beq/l   r32, r63, tr1
2400
        fmov.dq dr10, r8
2401
        blink   tr1, r63
2402
LOCAL(ct_r8_fpl):       /* Copy r8 from a low FP register.  */
2403
        shlri   r1, 8 - 3, r34
2404
        andi    r34, 3 << 3, r33
2405
        addi    r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2406
LOCAL(ct_r8_fp_base):
2407
        ptrel/l r32, tr2
2408
        movi    7 << 8, r31
2409
        andc    r1, r31, r1
2410
        blink   tr2, r63
2411
LOCAL(ct_r8_fp_copy):
2412
        fmov.dq dr0, r8
2413
        blink   tr1, r63
2414
        fmov.dq dr2, r8
2415
        blink   tr1, r63
2416
        fmov.dq dr4, r8
2417
        blink   tr1, r63
2418
        fmov.dq dr6, r8
2419
        blink   tr1, r63
2420
LOCAL(ct_r9_fph):       /* Copy r9 from a high FP register.  */
2421
        /* It is either dr8 or dr10.  */
2422
        movi    15 << 4, r31
2423
        andi    r1, 1 << 4, r32
2424
        andc    r1, r31, r1
2425
        fmov.dq dr8, r9
2426
        beq/l   r32, r63, tr1
2427
        fmov.dq dr10, r9
2428
        blink   tr1, r63
2429
LOCAL(ct_r9_fpl):       /* Copy r9 from a low FP register.  */
2430
        shlri   r1, 4 - 3, r34
2431
        andi    r34, 3 << 3, r33
2432
        addi    r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2433
LOCAL(ct_r9_fp_base):
2434
        ptrel/l r32, tr2
2435
        movi    7 << 4, r31
2436
        andc    r1, r31, r1
2437
        blink   tr2, r63
2438
LOCAL(ct_r9_fp_copy):
2439
        fmov.dq dr0, r9
2440
        blink   tr1, r63
2441
        fmov.dq dr2, r9
2442
        blink   tr1, r63
2443
        fmov.dq dr4, r9
2444
        blink   tr1, r63
2445
        fmov.dq dr6, r9
2446
        blink   tr1, r63
2447
LOCAL(ct_r2_ld):        /* Copy r2 from a memory address.  */
2448
        pt/l    LOCAL(ct_r2_load), tr2
2449
        movi    3, r30
2450
        shlli   r30, 29, r31
2451
        and     r1, r31, r32
2452
        andc    r1, r31, r1
2453
        beq/l   r31, r32, tr2
2454
        addi.l  r2, 8, r3
2455
        ldx.q   r2, r63, r2
2456
        /* Fall through.  */
2457
LOCAL(ct_r3_ld):        /* Copy r3 from a memory address.  */
2458
        pt/l    LOCAL(ct_r3_load), tr2
2459
        movi    3, r30
2460
        shlli   r30, 26, r31
2461
        and     r1, r31, r32
2462
        andc    r1, r31, r1
2463
        beq/l   r31, r32, tr2
2464
        addi.l  r3, 8, r4
2465
        ldx.q   r3, r63, r3
2466
LOCAL(ct_r4_ld):        /* Copy r4 from a memory address.  */
2467
        pt/l    LOCAL(ct_r4_load), tr2
2468
        movi    3, r30
2469
        shlli   r30, 23, r31
2470
        and     r1, r31, r32
2471
        andc    r1, r31, r1
2472
        beq/l   r31, r32, tr2
2473
        addi.l  r4, 8, r5
2474
        ldx.q   r4, r63, r4
2475
LOCAL(ct_r5_ld):        /* Copy r5 from a memory address.  */
2476
        pt/l    LOCAL(ct_r5_load), tr2
2477
        movi    3, r30
2478
        shlli   r30, 20, r31
2479
        and     r1, r31, r32
2480
        andc    r1, r31, r1
2481
        beq/l   r31, r32, tr2
2482
        addi.l  r5, 8, r6
2483
        ldx.q   r5, r63, r5
2484
LOCAL(ct_r6_ld):        /* Copy r6 from a memory address.  */
2485
        pt/l    LOCAL(ct_r6_load), tr2
2486
        movi    3 << 16, r31
2487
        and     r1, r31, r32
2488
        andc    r1, r31, r1
2489
        beq/l   r31, r32, tr2
2490
        addi.l  r6, 8, r7
2491
        ldx.q   r6, r63, r6
2492
LOCAL(ct_r7_ld):        /* Copy r7 from a memory address.  */
2493
        pt/l    LOCAL(ct_r7_load), tr2
2494
        movi    3 << 12, r31
2495
        and     r1, r31, r32
2496
        andc    r1, r31, r1
2497
        beq/l   r31, r32, tr2
2498
        addi.l  r7, 8, r8
2499
        ldx.q   r7, r63, r7
2500
LOCAL(ct_r8_ld):        /* Copy r8 from a memory address.  */
2501
        pt/l    LOCAL(ct_r8_load), tr2
2502
        movi    3 << 8, r31
2503
        and     r1, r31, r32
2504
        andc    r1, r31, r1
2505
        beq/l   r31, r32, tr2
2506
        addi.l  r8, 8, r9
2507
        ldx.q   r8, r63, r8
2508
LOCAL(ct_r9_ld):        /* Copy r9 from a memory address.  */
2509
        pt/l    LOCAL(ct_check_tramp), tr2
2510
        ldx.q   r9, r63, r9
2511
        blink   tr2, r63
2512
LOCAL(ct_r2_load):
2513
        ldx.q   r2, r63, r2
2514
        blink   tr1, r63
2515
LOCAL(ct_r3_load):
2516
        ldx.q   r3, r63, r3
2517
        blink   tr1, r63
2518
LOCAL(ct_r4_load):
2519
        ldx.q   r4, r63, r4
2520
        blink   tr1, r63
2521
LOCAL(ct_r5_load):
2522
        ldx.q   r5, r63, r5
2523
        blink   tr1, r63
2524
LOCAL(ct_r6_load):
2525
        ldx.q   r6, r63, r6
2526
        blink   tr1, r63
2527
LOCAL(ct_r7_load):
2528
        ldx.q   r7, r63, r7
2529
        blink   tr1, r63
2530
LOCAL(ct_r8_load):
2531
        ldx.q   r8, r63, r8
2532
        blink   tr1, r63
2533
LOCAL(ct_r2_pop):       /* Pop r2 from the stack.  */
2534
        movi    1, r30
2535
        ldx.q   r15, r63, r2
2536
        shlli   r30, 29, r31
2537
        addi.l  r15, 8, r15
2538
        andc    r1, r31, r1
2539
        blink   tr1, r63
2540
LOCAL(ct_r3_pop):       /* Pop r3 from the stack.  */
2541
        movi    1, r30
2542
        ldx.q   r15, r63, r3
2543
        shlli   r30, 26, r31
2544
        addi.l  r15, 8, r15
2545
        andc    r1, r31, r1
2546
        blink   tr1, r63
2547
LOCAL(ct_r4_pop):       /* Pop r4 from the stack.  */
2548
        movi    1, r30
2549
        ldx.q   r15, r63, r4
2550
        shlli   r30, 23, r31
2551
        addi.l  r15, 8, r15
2552
        andc    r1, r31, r1
2553
        blink   tr1, r63
2554
LOCAL(ct_r5_pop):       /* Pop r5 from the stack.  */
2555
        movi    1, r30
2556
        ldx.q   r15, r63, r5
2557
        shlli   r30, 20, r31
2558
        addi.l  r15, 8, r15
2559
        andc    r1, r31, r1
2560
        blink   tr1, r63
2561
LOCAL(ct_r6_pop):       /* Pop r6 from the stack.  */
2562
        movi    1, r30
2563
        ldx.q   r15, r63, r6
2564
        shlli   r30, 16, r31
2565
        addi.l  r15, 8, r15
2566
        andc    r1, r31, r1
2567
        blink   tr1, r63
2568
LOCAL(ct_r7_pop):       /* Pop r7 from the stack.  */
2569
        ldx.q   r15, r63, r7
2570
        movi    1 << 12, r31
2571
        addi.l  r15, 8, r15
2572
        andc    r1, r31, r1
2573
        blink   tr1, r63
2574
LOCAL(ct_r8_pop):       /* Pop r8 from the stack.  */
2575
        ldx.q   r15, r63, r8
2576
        movi    1 << 8, r31
2577
        addi.l  r15, 8, r15
2578
        andc    r1, r31, r1
2579
        blink   tr1, r63
2580
LOCAL(ct_pop_seq):      /* Pop a sequence of registers off the stack.  */
2581
        andi    r1, 7 << 1, r30
2582
        movi    (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2583
        shlli   r30, 2, r31
2584
        shori   LOCAL(ct_end_of_pop_seq) & 65535, r32
2585
        sub.l   r32, r31, r33
2586
        ptabs/l r33, tr2
2587
        blink   tr2, r63
2588
LOCAL(ct_start_of_pop_seq):     /* Beginning of pop sequence.  */
2589
        ldx.q   r15, r63, r3
2590
        addi.l  r15, 8, r15
2591
        ldx.q   r15, r63, r4
2592
        addi.l  r15, 8, r15
2593
        ldx.q   r15, r63, r5
2594
        addi.l  r15, 8, r15
2595
        ldx.q   r15, r63, r6
2596
        addi.l  r15, 8, r15
2597
        ldx.q   r15, r63, r7
2598
        addi.l  r15, 8, r15
2599
        ldx.q   r15, r63, r8
2600
        addi.l  r15, 8, r15
2601
LOCAL(ct_r9_pop):       /* Pop r9 from the stack.  */
2602
        ldx.q   r15, r63, r9
2603
        addi.l  r15, 8, r15
2604
LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction.  */
2605
LOCAL(ct_check_tramp):  /* Check whether we need a trampoline.  */
2606
        pt/u    LOCAL(ct_ret_wide), tr2
2607
        andi    r1, 1, r1
2608
        bne/u   r1, r63, tr2
2609
LOCAL(ct_call_func):    /* Just branch to the function.  */
2610
        blink   tr0, r63
2611
LOCAL(ct_ret_wide):     /* Call the function, so that we can unpack its
2612
                           64-bit return value.  */
2613
        add.l   r18, r63, r10
2614
        blink   tr0, r18
2615
        ptabs   r10, tr0
2616
#if __LITTLE_ENDIAN__
2617
        shari   r2, 32, r3
2618
        add.l   r2, r63, r2
2619
#else
2620
        add.l   r2, r63, r3
2621
        shari   r2, 32, r2
2622
#endif
2623
        blink   tr0, r63
2624
 
2625
        ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2626
#endif /* L_shcompact_call_trampoline */
2627
 
2628
#ifdef L_shcompact_return_trampoline
2629
     /* This function does the converse of the code in `ret_wide'
2630
        above.  It is tail-called by SHcompact functions returning
2631
        64-bit non-floating-point values, to pack the 32-bit values in
2632
        r2 and r3 into r2.  */
2633
 
2634
        .mode   SHmedia
2635
        .section        .text..SHmedia32, "ax"
2636
        .align  2
2637
        .global GLOBAL(GCC_shcompact_return_trampoline)
2638
        HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2639
GLOBAL(GCC_shcompact_return_trampoline):
2640
        ptabs/l r18, tr0
2641
#if __LITTLE_ENDIAN__
2642
        addz.l  r2, r63, r2
2643
        shlli   r3, 32, r3
2644
#else
2645
        addz.l  r3, r63, r3
2646
        shlli   r2, 32, r2
2647
#endif
2648
        or      r3, r2, r2
2649
        blink   tr0, r63
2650
 
2651
        ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2652
#endif /* L_shcompact_return_trampoline */
2653
 
2654
#ifdef L_shcompact_incoming_args
2655
        .section        .rodata
2656
        .align  1
2657
LOCAL(ia_main_table):
2658
.word   1 /* Invalid, just loop */
2659
.word   LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2660
.word   LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2661
.word   1 /* Invalid, just loop */
2662
.word   LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2663
.word   LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2664
.word   1 /* Invalid, just loop */
2665
.word   LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2666
.word   LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2667
.word   1 /* Invalid, just loop */
2668
.word   LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2669
.word   LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2670
.word   1 /* Invalid, just loop */
2671
.word   1 /* Invalid, just loop */
2672
.word   LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2673
.word   LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2674
.word   1 /* Invalid, just loop */
2675
.word   1 /* Invalid, just loop */
2676
.word   LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2677
.word   LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2678
.word   1 /* Invalid, just loop */
2679
.word   1 /* Invalid, just loop */
2680
.word   LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2681
.word   LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2682
.word   1 /* Invalid, just loop */
2683
.word   1 /* Invalid, just loop */
2684
.word   LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2685
.word   LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2686
.word   LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2687
.word   LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2688
.word   LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2689
.word   LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2690
.word   LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2691
        .mode   SHmedia
2692
        .section        .text..SHmedia32, "ax"
2693
        .align  2
2694
 
2695
     /* This function stores 64-bit general-purpose registers back in
2696
        the stack, and loads the address in which each register
2697
        was stored into itself.  The lower 32 bits of r17 hold the address
2698
        to begin storing, and the upper 32 bits of r17 hold the cookie.
2699
        Its execution time is linear on the
2700
        number of registers that actually have to be copied, and it is
2701
        optimized for structures larger than 64 bits, as opposed to
2702
        individual `long long' arguments.  See sh.h for details on the
2703
        actual bit pattern.  */
2704
 
2705
        .global GLOBAL(GCC_shcompact_incoming_args)
2706
        FUNC(GLOBAL(GCC_shcompact_incoming_args))
2707
GLOBAL(GCC_shcompact_incoming_args):
2708
        ptabs/l r18, tr0        /* Prepare to return.  */
2709
        shlri   r17, 32, r0     /* Load the cookie.  */
2710
        movi    ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2711
        pt/l    LOCAL(ia_loop), tr1
2712
        add.l   r17, r63, r17
2713
        shori   ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2714
LOCAL(ia_loop):
2715
        nsb     r0, r36
2716
        shlli   r36, 1, r37
2717
        ldx.w   r43, r37, r38
2718
LOCAL(ia_main_label):
2719
        ptrel/l r38, tr2
2720
        blink   tr2, r63
2721
LOCAL(ia_r2_ld):        /* Store r2 and load its address.  */
2722
        movi    3, r38
2723
        shlli   r38, 29, r39
2724
        and     r0, r39, r40
2725
        andc    r0, r39, r0
2726
        stx.q   r17, r63, r2
2727
        add.l   r17, r63, r2
2728
        addi.l  r17, 8, r17
2729
        beq/u   r39, r40, tr1
2730
LOCAL(ia_r3_ld):        /* Store r3 and load its address.  */
2731
        movi    3, r38
2732
        shlli   r38, 26, r39
2733
        and     r0, r39, r40
2734
        andc    r0, r39, r0
2735
        stx.q   r17, r63, r3
2736
        add.l   r17, r63, r3
2737
        addi.l  r17, 8, r17
2738
        beq/u   r39, r40, tr1
2739
LOCAL(ia_r4_ld):        /* Store r4 and load its address.  */
2740
        movi    3, r38
2741
        shlli   r38, 23, r39
2742
        and     r0, r39, r40
2743
        andc    r0, r39, r0
2744
        stx.q   r17, r63, r4
2745
        add.l   r17, r63, r4
2746
        addi.l  r17, 8, r17
2747
        beq/u   r39, r40, tr1
2748
LOCAL(ia_r5_ld):        /* Store r5 and load its address.  */
2749
        movi    3, r38
2750
        shlli   r38, 20, r39
2751
        and     r0, r39, r40
2752
        andc    r0, r39, r0
2753
        stx.q   r17, r63, r5
2754
        add.l   r17, r63, r5
2755
        addi.l  r17, 8, r17
2756
        beq/u   r39, r40, tr1
2757
LOCAL(ia_r6_ld):        /* Store r6 and load its address.  */
2758
        movi    3, r38
2759
        shlli   r38, 16, r39
2760
        and     r0, r39, r40
2761
        andc    r0, r39, r0
2762
        stx.q   r17, r63, r6
2763
        add.l   r17, r63, r6
2764
        addi.l  r17, 8, r17
2765
        beq/u   r39, r40, tr1
2766
LOCAL(ia_r7_ld):        /* Store r7 and load its address.  */
2767
        movi    3 << 12, r39
2768
        and     r0, r39, r40
2769
        andc    r0, r39, r0
2770
        stx.q   r17, r63, r7
2771
        add.l   r17, r63, r7
2772
        addi.l  r17, 8, r17
2773
        beq/u   r39, r40, tr1
2774
LOCAL(ia_r8_ld):        /* Store r8 and load its address.  */
2775
        movi    3 << 8, r39
2776
        and     r0, r39, r40
2777
        andc    r0, r39, r0
2778
        stx.q   r17, r63, r8
2779
        add.l   r17, r63, r8
2780
        addi.l  r17, 8, r17
2781
        beq/u   r39, r40, tr1
2782
LOCAL(ia_r9_ld):        /* Store r9 and load its address.  */
2783
        stx.q   r17, r63, r9
2784
        add.l   r17, r63, r9
2785
        blink   tr0, r63
2786
LOCAL(ia_r2_push):      /* Push r2 onto the stack.  */
2787
        movi    1, r38
2788
        shlli   r38, 29, r39
2789
        andc    r0, r39, r0
2790
        stx.q   r17, r63, r2
2791
        addi.l  r17, 8, r17
2792
        blink   tr1, r63
2793
LOCAL(ia_r3_push):      /* Push r3 onto the stack.  */
2794
        movi    1, r38
2795
        shlli   r38, 26, r39
2796
        andc    r0, r39, r0
2797
        stx.q   r17, r63, r3
2798
        addi.l  r17, 8, r17
2799
        blink   tr1, r63
2800
LOCAL(ia_r4_push):      /* Push r4 onto the stack.  */
2801
        movi    1, r38
2802
        shlli   r38, 23, r39
2803
        andc    r0, r39, r0
2804
        stx.q   r17, r63, r4
2805
        addi.l  r17, 8, r17
2806
        blink   tr1, r63
2807
LOCAL(ia_r5_push):      /* Push r5 onto the stack.  */
2808
        movi    1, r38
2809
        shlli   r38, 20, r39
2810
        andc    r0, r39, r0
2811
        stx.q   r17, r63, r5
2812
        addi.l  r17, 8, r17
2813
        blink   tr1, r63
2814
LOCAL(ia_r6_push):      /* Push r6 onto the stack.  */
2815
        movi    1, r38
2816
        shlli   r38, 16, r39
2817
        andc    r0, r39, r0
2818
        stx.q   r17, r63, r6
2819
        addi.l  r17, 8, r17
2820
        blink   tr1, r63
2821
LOCAL(ia_r7_push):      /* Push r7 onto the stack.  */
2822
        movi    1 << 12, r39
2823
        andc    r0, r39, r0
2824
        stx.q   r17, r63, r7
2825
        addi.l  r17, 8, r17
2826
        blink   tr1, r63
2827
LOCAL(ia_r8_push):      /* Push r8 onto the stack.  */
2828
        movi    1 << 8, r39
2829
        andc    r0, r39, r0
2830
        stx.q   r17, r63, r8
2831
        addi.l  r17, 8, r17
2832
        blink   tr1, r63
2833
LOCAL(ia_push_seq):     /* Push a sequence of registers onto the stack.  */
2834
        andi    r0, 7 << 1, r38
2835
        movi    (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2836
        shlli   r38, 2, r39
2837
        shori   LOCAL(ia_end_of_push_seq) & 65535, r40
2838
        sub.l   r40, r39, r41
2839
        ptabs/l r41, tr2
2840
        blink   tr2, r63
2841
LOCAL(ia_stack_of_push_seq):     /* Beginning of push sequence.  */
2842
        stx.q   r17, r63, r3
2843
        addi.l  r17, 8, r17
2844
        stx.q   r17, r63, r4
2845
        addi.l  r17, 8, r17
2846
        stx.q   r17, r63, r5
2847
        addi.l  r17, 8, r17
2848
        stx.q   r17, r63, r6
2849
        addi.l  r17, 8, r17
2850
        stx.q   r17, r63, r7
2851
        addi.l  r17, 8, r17
2852
        stx.q   r17, r63, r8
2853
        addi.l  r17, 8, r17
2854
LOCAL(ia_r9_push):      /* Push r9 onto the stack.  */
2855
        stx.q   r17, r63, r9
2856
LOCAL(ia_return):       /* Return.  */
2857
        blink   tr0, r63
2858
LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction.  */
2859
        ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2860
#endif /* L_shcompact_incoming_args */
2861
#endif
2862
#if __SH5__
2863
#ifdef L_nested_trampoline
2864
#if __SH5__ == 32
2865
        .section        .text..SHmedia32,"ax"
2866
#else
2867
        .text
2868
#endif
2869
        .align  3 /* It is copied in units of 8 bytes in SHmedia mode.  */
2870
        .global GLOBAL(GCC_nested_trampoline)
2871
        HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
2872
GLOBAL(GCC_nested_trampoline):
2873
        .mode   SHmedia
2874
        ptrel/u r63, tr0
2875
        gettr   tr0, r0
2876
#if __SH5__ == 64
2877
        ld.q    r0, 24, r1
2878
#else
2879
        ld.l    r0, 24, r1
2880
#endif
2881
        ptabs/l r1, tr1
2882
#if __SH5__ == 64
2883
        ld.q    r0, 32, r1
2884
#else
2885
        ld.l    r0, 28, r1
2886
#endif
2887
        blink   tr1, r63
2888
 
2889
        ENDFUNC(GLOBAL(GCC_nested_trampoline))
2890
#endif /* L_nested_trampoline */
2891
#endif /* __SH5__ */
2892
#if __SH5__ == 32
2893
#ifdef L_push_pop_shmedia_regs
2894
        .section        .text..SHmedia32,"ax"
2895
        .mode   SHmedia
2896
        .align  2
2897
#ifndef __SH4_NOFPU__
2898
        .global GLOBAL(GCC_push_shmedia_regs)
2899
        FUNC(GLOBAL(GCC_push_shmedia_regs))
2900
GLOBAL(GCC_push_shmedia_regs):
2901
        addi.l  r15, -14*8, r15
2902
        fst.d   r15, 13*8, dr62
2903
        fst.d   r15, 12*8, dr60
2904
        fst.d   r15, 11*8, dr58
2905
        fst.d   r15, 10*8, dr56
2906
        fst.d   r15,  9*8, dr54
2907
        fst.d   r15,  8*8, dr52
2908
        fst.d   r15,  7*8, dr50
2909
        fst.d   r15,  6*8, dr48
2910
        fst.d   r15,  5*8, dr46
2911
        fst.d   r15,  4*8, dr44
2912
        fst.d   r15,  3*8, dr42
2913
        fst.d   r15,  2*8, dr40
2914
        fst.d   r15,  1*8, dr38
2915
        fst.d   r15,  0*8, dr36
2916
#else /* ! __SH4_NOFPU__ */
2917
        .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2918
        FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2919
GLOBAL(GCC_push_shmedia_regs_nofpu):
2920
#endif /* ! __SH4_NOFPU__ */
2921
        ptabs/l r18, tr0
2922
        addi.l  r15, -27*8, r15
2923
        gettr   tr7, r62
2924
        gettr   tr6, r61
2925
        gettr   tr5, r60
2926
        st.q    r15, 26*8, r62
2927
        st.q    r15, 25*8, r61
2928
        st.q    r15, 24*8, r60
2929
        st.q    r15, 23*8, r59
2930
        st.q    r15, 22*8, r58
2931
        st.q    r15, 21*8, r57
2932
        st.q    r15, 20*8, r56
2933
        st.q    r15, 19*8, r55
2934
        st.q    r15, 18*8, r54
2935
        st.q    r15, 17*8, r53
2936
        st.q    r15, 16*8, r52
2937
        st.q    r15, 15*8, r51
2938
        st.q    r15, 14*8, r50
2939
        st.q    r15, 13*8, r49
2940
        st.q    r15, 12*8, r48
2941
        st.q    r15, 11*8, r47
2942
        st.q    r15, 10*8, r46
2943
        st.q    r15,  9*8, r45
2944
        st.q    r15,  8*8, r44
2945
        st.q    r15,  7*8, r35
2946
        st.q    r15,  6*8, r34
2947
        st.q    r15,  5*8, r33
2948
        st.q    r15,  4*8, r32
2949
        st.q    r15,  3*8, r31
2950
        st.q    r15,  2*8, r30
2951
        st.q    r15,  1*8, r29
2952
        st.q    r15,  0*8, r28
2953
        blink   tr0, r63
2954
#ifndef __SH4_NOFPU__
2955
        ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2956
#else
2957
        ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2958
#endif
2959
#ifndef __SH4_NOFPU__
2960
        .global GLOBAL(GCC_pop_shmedia_regs)
2961
        FUNC(GLOBAL(GCC_pop_shmedia_regs))
2962
GLOBAL(GCC_pop_shmedia_regs):
2963
        pt      .L0, tr1
2964
        movi    41*8, r0
2965
        fld.d   r15, 40*8, dr62
2966
        fld.d   r15, 39*8, dr60
2967
        fld.d   r15, 38*8, dr58
2968
        fld.d   r15, 37*8, dr56
2969
        fld.d   r15, 36*8, dr54
2970
        fld.d   r15, 35*8, dr52
2971
        fld.d   r15, 34*8, dr50
2972
        fld.d   r15, 33*8, dr48
2973
        fld.d   r15, 32*8, dr46
2974
        fld.d   r15, 31*8, dr44
2975
        fld.d   r15, 30*8, dr42
2976
        fld.d   r15, 29*8, dr40
2977
        fld.d   r15, 28*8, dr38
2978
        fld.d   r15, 27*8, dr36
2979
        blink   tr1, r63
2980
#else /* ! __SH4_NOFPU__        */
2981
        .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2982
        FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2983
GLOBAL(GCC_pop_shmedia_regs_nofpu):
2984
#endif /* ! __SH4_NOFPU__       */
2985
        movi    27*8, r0
2986
.L0:
2987
        ptabs   r18, tr0
2988
        ld.q    r15, 26*8, r62
2989
        ld.q    r15, 25*8, r61
2990
        ld.q    r15, 24*8, r60
2991
        ptabs   r62, tr7
2992
        ptabs   r61, tr6
2993
        ptabs   r60, tr5
2994
        ld.q    r15, 23*8, r59
2995
        ld.q    r15, 22*8, r58
2996
        ld.q    r15, 21*8, r57
2997
        ld.q    r15, 20*8, r56
2998
        ld.q    r15, 19*8, r55
2999
        ld.q    r15, 18*8, r54
3000
        ld.q    r15, 17*8, r53
3001
        ld.q    r15, 16*8, r52
3002
        ld.q    r15, 15*8, r51
3003
        ld.q    r15, 14*8, r50
3004
        ld.q    r15, 13*8, r49
3005
        ld.q    r15, 12*8, r48
3006
        ld.q    r15, 11*8, r47
3007
        ld.q    r15, 10*8, r46
3008
        ld.q    r15,  9*8, r45
3009
        ld.q    r15,  8*8, r44
3010
        ld.q    r15,  7*8, r35
3011
        ld.q    r15,  6*8, r34
3012
        ld.q    r15,  5*8, r33
3013
        ld.q    r15,  4*8, r32
3014
        ld.q    r15,  3*8, r31
3015
        ld.q    r15,  2*8, r30
3016
        ld.q    r15,  1*8, r29
3017
        ld.q    r15,  0*8, r28
3018
        add.l   r15, r0, r15
3019
        blink   tr0, r63
3020
 
3021
#ifndef __SH4_NOFPU__
3022
        ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
3023
#else
3024
        ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
3025
#endif
3026
#endif /* __SH5__ == 32 */
3027
#endif /* L_push_pop_shmedia_regs */
3028
 
3029
#ifdef L_div_table
3030
#if __SH5__
3031
#if defined(__pic__) && defined(__SHMEDIA__)
3032
        .global GLOBAL(sdivsi3)
3033
        FUNC(GLOBAL(sdivsi3))
3034
#if __SH5__ == 32
3035
        .section        .text..SHmedia32,"ax"
3036
#else
3037
        .text
3038
#endif
3039
#if 0
3040
/* ??? FIXME: Presumably due to a linker bug, exporting data symbols
3041
   in a text section does not work (at least for shared libraries):
3042
   the linker sets the LSB of the address as if this was SHmedia code.  */
3043
#define TEXT_DATA_BUG
3044
#endif
3045
        .align  2
3046
 // inputs: r4,r5
3047
 // clobbered: r1,r18,r19,r20,r21,r25,tr0
3048
 // result in r0
3049
 .global GLOBAL(sdivsi3)
3050
GLOBAL(sdivsi3):
3051
#ifdef TEXT_DATA_BUG
3052
 ptb datalabel Local_div_table,tr0
3053
#else
3054
 ptb GLOBAL(div_table_internal),tr0
3055
#endif
3056
 nsb r5, r1
3057
 shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
3058
 shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
3059
 /* bubble */
3060
 gettr tr0,r20
3061
 ldx.ub r20, r21, r19 // u0.8
3062
 shari r25, 32, r25   // normalize to s2.30
3063
 shlli r21, 1, r21
3064
 muls.l r25, r19, r19 // s2.38
3065
 ldx.w r20, r21, r21  // s2.14
3066
  ptabs r18, tr0
3067
 shari r19, 24, r19   // truncate to s2.14
3068
 sub r21, r19, r19    // some 11 bit inverse in s1.14
3069
 muls.l r19, r19, r21 // u0.28
3070
  sub r63, r1, r1
3071
  addi r1, 92, r1
3072
 muls.l r25, r21, r18 // s2.58
3073
 shlli r19, 45, r19   // multiply by two and convert to s2.58
3074
  /* bubble */
3075
 sub r19, r18, r18
3076
 shari r18, 28, r18   // some 22 bit inverse in s1.30
3077
 muls.l r18, r25, r0  // s2.60
3078
  muls.l r18, r4, r25 // s32.30
3079
  /* bubble */
3080
 shari r0, 16, r19   // s-16.44
3081
 muls.l r19, r18, r19 // s-16.74
3082
  shari r25, 63, r0
3083
  shari r4, 14, r18   // s19.-14
3084
 shari r19, 30, r19   // s-16.44
3085
 muls.l r19, r18, r19 // s15.30
3086
  xor r21, r0, r21    // You could also use the constant 1 << 27.
3087
  add r21, r25, r21
3088
 sub r21, r19, r21
3089
 shard r21, r1, r21
3090
 sub r21, r0, r0
3091
 blink tr0, r63
3092
        ENDFUNC(GLOBAL(sdivsi3))
3093
/* This table has been generated by divtab.c .
3094
Defects for bias -330:
3095
   Max defect: 6.081536e-07 at -1.000000e+00
3096
   Min defect: 2.849516e-08 at 1.030651e+00
3097
   Max 2nd step defect: 9.606539e-12 at -1.000000e+00
3098
   Min 2nd step defect: 0.000000e+00 at 0.000000e+00
3099
   Defect at 1: 1.238659e-07
3100
   Defect at -2: 1.061708e-07 */
3101
#else /* ! __pic__ || ! __SHMEDIA__ */
3102
        .section        .rodata
3103
#endif /* __pic__ */
3104
#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
3105
        .balign 2
3106
        .type   Local_div_table,@object
3107
        .size   Local_div_table,128
3108
/* negative division constants */
3109
        .word   -16638
3110
        .word   -17135
3111
        .word   -17737
3112
        .word   -18433
3113
        .word   -19103
3114
        .word   -19751
3115
        .word   -20583
3116
        .word   -21383
3117
        .word   -22343
3118
        .word   -23353
3119
        .word   -24407
3120
        .word   -25582
3121
        .word   -26863
3122
        .word   -28382
3123
        .word   -29965
3124
        .word   -31800
3125
/* negative division factors */
3126
        .byte   66
3127
        .byte   70
3128
        .byte   75
3129
        .byte   81
3130
        .byte   87
3131
        .byte   93
3132
        .byte   101
3133
        .byte   109
3134
        .byte   119
3135
        .byte   130
3136
        .byte   142
3137
        .byte   156
3138
        .byte   172
3139
        .byte   192
3140
        .byte   214
3141
        .byte   241
3142
        .skip 16
3143
Local_div_table:
3144
        .skip 16
3145
/* positive division factors */
3146
        .byte   241
3147
        .byte   214
3148
        .byte   192
3149
        .byte   172
3150
        .byte   156
3151
        .byte   142
3152
        .byte   130
3153
        .byte   119
3154
        .byte   109
3155
        .byte   101
3156
        .byte   93
3157
        .byte   87
3158
        .byte   81
3159
        .byte   75
3160
        .byte   70
3161
        .byte   66
3162
/* positive division constants */
3163
        .word   31801
3164
        .word   29966
3165
        .word   28383
3166
        .word   26864
3167
        .word   25583
3168
        .word   24408
3169
        .word   23354
3170
        .word   22344
3171
        .word   21384
3172
        .word   20584
3173
        .word   19752
3174
        .word   19104
3175
        .word   18434
3176
        .word   17738
3177
        .word   17136
3178
        .word   16639
3179
        .section        .rodata
3180
#endif /* TEXT_DATA_BUG */
3181
        .balign 2
3182
        .type   GLOBAL(div_table),@object
3183
        .size   GLOBAL(div_table),128
3184
/* negative division constants */
3185
        .word   -16638
3186
        .word   -17135
3187
        .word   -17737
3188
        .word   -18433
3189
        .word   -19103
3190
        .word   -19751
3191
        .word   -20583
3192
        .word   -21383
3193
        .word   -22343
3194
        .word   -23353
3195
        .word   -24407
3196
        .word   -25582
3197
        .word   -26863
3198
        .word   -28382
3199
        .word   -29965
3200
        .word   -31800
3201
/* negative division factors */
3202
        .byte   66
3203
        .byte   70
3204
        .byte   75
3205
        .byte   81
3206
        .byte   87
3207
        .byte   93
3208
        .byte   101
3209
        .byte   109
3210
        .byte   119
3211
        .byte   130
3212
        .byte   142
3213
        .byte   156
3214
        .byte   172
3215
        .byte   192
3216
        .byte   214
3217
        .byte   241
3218
        .skip 16
3219
        .global GLOBAL(div_table)
3220
GLOBAL(div_table):
3221
        HIDDEN_ALIAS(div_table_internal,div_table)
3222
        .skip 16
3223
/* positive division factors */
3224
        .byte   241
3225
        .byte   214
3226
        .byte   192
3227
        .byte   172
3228
        .byte   156
3229
        .byte   142
3230
        .byte   130
3231
        .byte   119
3232
        .byte   109
3233
        .byte   101
3234
        .byte   93
3235
        .byte   87
3236
        .byte   81
3237
        .byte   75
3238
        .byte   70
3239
        .byte   66
3240
/* positive division constants */
3241
        .word   31801
3242
        .word   29966
3243
        .word   28383
3244
        .word   26864
3245
        .word   25583
3246
        .word   24408
3247
        .word   23354
3248
        .word   22344
3249
        .word   21384
3250
        .word   20584
3251
        .word   19752
3252
        .word   19104
3253
        .word   18434
3254
        .word   17738
3255
        .word   17136
3256
        .word   16639
3257
 
3258
#elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
3259
/* This code used shld, thus is not suitable for SH1 / SH2.  */
3260
 
3261
/* Signed / unsigned division without use of FPU, optimized for SH4.
3262
   Uses a lookup table for divisors in the range -128 .. +128, and
3263
   div1 with case distinction for larger divisors in three more ranges.
3264
   The code is lumped together with the table to allow the use of mova.  */
3265
#ifdef __LITTLE_ENDIAN__
3266
#define L_LSB 0
3267
#define L_LSWMSB 1
3268
#define L_MSWLSB 2
3269
#else
3270
#define L_LSB 3
3271
#define L_LSWMSB 2
3272
#define L_MSWLSB 1
3273
#endif
3274
 
3275
        .balign 4
3276
        .global GLOBAL(udivsi3_i4i)
3277
        FUNC(GLOBAL(udivsi3_i4i))
3278
GLOBAL(udivsi3_i4i):
3279
        mov.w LOCAL(c128_w), r1
3280
        div0u
3281
        mov r4,r0
3282
        shlr8 r0
3283
        cmp/hi r1,r5
3284
        extu.w r5,r1
3285
        bf LOCAL(udiv_le128)
3286
        cmp/eq r5,r1
3287
        bf LOCAL(udiv_ge64k)
3288
        shlr r0
3289
        mov r5,r1
3290
        shll16 r5
3291
        mov.l r4,@-r15
3292
        div1 r5,r0
3293
        mov.l r1,@-r15
3294
        div1 r5,r0
3295
        div1 r5,r0
3296
        bra LOCAL(udiv_25)
3297
        div1 r5,r0
3298
 
3299
LOCAL(div_le128):
3300
        mova LOCAL(div_table_ix),r0
3301
        bra LOCAL(div_le128_2)
3302
        mov.b @(r0,r5),r1
3303
LOCAL(udiv_le128):
3304
        mov.l r4,@-r15
3305
        mova LOCAL(div_table_ix),r0
3306
        mov.b @(r0,r5),r1
3307
        mov.l r5,@-r15
3308
LOCAL(div_le128_2):
3309
        mova LOCAL(div_table_inv),r0
3310
        mov.l @(r0,r1),r1
3311
        mov r5,r0
3312
        tst #0xfe,r0
3313
        mova LOCAL(div_table_clz),r0
3314
        dmulu.l r1,r4
3315
        mov.b @(r0,r5),r1
3316
        bt/s LOCAL(div_by_1)
3317
        mov r4,r0
3318
        mov.l @r15+,r5
3319
        sts mach,r0
3320
        /* clrt */
3321
        addc r4,r0
3322
        mov.l @r15+,r4
3323
        rotcr r0
3324
        rts
3325
        shld r1,r0
3326
 
3327
LOCAL(div_by_1_neg):
3328
        neg r4,r0
3329
LOCAL(div_by_1):
3330
        mov.l @r15+,r5
3331
        rts
3332
        mov.l @r15+,r4
3333
 
3334
LOCAL(div_ge64k):
3335
        bt/s LOCAL(div_r8)
3336
        div0u
3337
        shll8 r5
3338
        bra LOCAL(div_ge64k_2)
3339
        div1 r5,r0
3340
LOCAL(udiv_ge64k):
3341
        cmp/hi r0,r5
3342
        mov r5,r1
3343
        bt LOCAL(udiv_r8)
3344
        shll8 r5
3345
        mov.l r4,@-r15
3346
        div1 r5,r0
3347
        mov.l r1,@-r15
3348
LOCAL(div_ge64k_2):
3349
        div1 r5,r0
3350
        mov.l LOCAL(zero_l),r1
3351
        .rept 4
3352
        div1 r5,r0
3353
        .endr
3354
        mov.l r1,@-r15
3355
        div1 r5,r0
3356
        mov.w LOCAL(m256_w),r1
3357
        div1 r5,r0
3358
        mov.b r0,@(L_LSWMSB,r15)
3359
        xor r4,r0
3360
        and r1,r0
3361
        bra LOCAL(div_ge64k_end)
3362
        xor r4,r0
3363
 
3364
LOCAL(div_r8):
3365
        shll16 r4
3366
        bra LOCAL(div_r8_2)
3367
        shll8 r4
3368
LOCAL(udiv_r8):
3369
        mov.l r4,@-r15
3370
        shll16 r4
3371
        clrt
3372
        shll8 r4
3373
        mov.l r5,@-r15
3374
LOCAL(div_r8_2):
3375
        rotcl r4
3376
        mov r0,r1
3377
        div1 r5,r1
3378
        mov r4,r0
3379
        rotcl r0
3380
        mov r5,r4
3381
        div1 r5,r1
3382
        .rept 5
3383
        rotcl r0; div1 r5,r1
3384
        .endr
3385
        rotcl r0
3386
        mov.l @r15+,r5
3387
        div1 r4,r1
3388
        mov.l @r15+,r4
3389
        rts
3390
        rotcl r0
3391
 
3392
        ENDFUNC(GLOBAL(udivsi3_i4i))
3393
 
3394
        .global GLOBAL(sdivsi3_i4i)
3395
        FUNC(GLOBAL(sdivsi3_i4i))
3396
        /* This is link-compatible with a GLOBAL(sdivsi3) call,
3397
           but we effectively clobber only r1.  */
3398
GLOBAL(sdivsi3_i4i):
3399
        mov.l r4,@-r15
3400
        cmp/pz r5
3401
        mov.w LOCAL(c128_w), r1
3402
        bt/s LOCAL(pos_divisor)
3403
        cmp/pz r4
3404
        mov.l r5,@-r15
3405
        neg r5,r5
3406
        bt/s LOCAL(neg_result)
3407
        cmp/hi r1,r5
3408
        neg r4,r4
3409
LOCAL(pos_result):
3410
        extu.w r5,r0
3411
        bf LOCAL(div_le128)
3412
        cmp/eq r5,r0
3413
        mov r4,r0
3414
        shlr8 r0
3415
        bf/s LOCAL(div_ge64k)
3416
        cmp/hi r0,r5
3417
        div0u
3418
        shll16 r5
3419
        div1 r5,r0
3420
        div1 r5,r0
3421
        div1 r5,r0
3422
LOCAL(udiv_25):
3423
        mov.l LOCAL(zero_l),r1
3424
        div1 r5,r0
3425
        div1 r5,r0
3426
        mov.l r1,@-r15
3427
        .rept 3
3428
        div1 r5,r0
3429
        .endr
3430
        mov.b r0,@(L_MSWLSB,r15)
3431
        xtrct r4,r0
3432
        swap.w r0,r0
3433
        .rept 8
3434
        div1 r5,r0
3435
        .endr
3436
        mov.b r0,@(L_LSWMSB,r15)
3437
LOCAL(div_ge64k_end):
3438
        .rept 8
3439
        div1 r5,r0
3440
        .endr
3441
        mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3442
        extu.b r0,r0
3443
        mov.l @r15+,r5
3444
        or r4,r0
3445
        mov.l @r15+,r4
3446
        rts
3447
        rotcl r0
3448
 
3449
LOCAL(div_le128_neg):
3450
        tst #0xfe,r0
3451
        mova LOCAL(div_table_ix),r0
3452
        mov.b @(r0,r5),r1
3453
        mova LOCAL(div_table_inv),r0
3454
        bt/s LOCAL(div_by_1_neg)
3455
        mov.l @(r0,r1),r1
3456
        mova LOCAL(div_table_clz),r0
3457
        dmulu.l r1,r4
3458
        mov.b @(r0,r5),r1
3459
        mov.l @r15+,r5
3460
        sts mach,r0
3461
        /* clrt */
3462
        addc r4,r0
3463
        mov.l @r15+,r4
3464
        rotcr r0
3465
        shld r1,r0
3466
        rts
3467
        neg r0,r0
3468
 
3469
LOCAL(pos_divisor):
3470
        mov.l r5,@-r15
3471
        bt/s LOCAL(pos_result)
3472
        cmp/hi r1,r5
3473
        neg r4,r4
3474
LOCAL(neg_result):
3475
        extu.w r5,r0
3476
        bf LOCAL(div_le128_neg)
3477
        cmp/eq r5,r0
3478
        mov r4,r0
3479
        shlr8 r0
3480
        bf/s LOCAL(div_ge64k_neg)
3481
        cmp/hi r0,r5
3482
        div0u
3483
        mov.l LOCAL(zero_l),r1
3484
        shll16 r5
3485
        div1 r5,r0
3486
        mov.l r1,@-r15
3487
        .rept 7
3488
        div1 r5,r0
3489
        .endr
3490
        mov.b r0,@(L_MSWLSB,r15)
3491
        xtrct r4,r0
3492
        swap.w r0,r0
3493
        .rept 8
3494
        div1 r5,r0
3495
        .endr
3496
        mov.b r0,@(L_LSWMSB,r15)
3497
LOCAL(div_ge64k_neg_end):
3498
        .rept 8
3499
        div1 r5,r0
3500
        .endr
3501
        mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3502
        extu.b r0,r1
3503
        mov.l @r15+,r5
3504
        or r4,r1
3505
LOCAL(div_r8_neg_end):
3506
        mov.l @r15+,r4
3507
        rotcl r1
3508
        rts
3509
        neg r1,r0
3510
 
3511
LOCAL(div_ge64k_neg):
3512
        bt/s LOCAL(div_r8_neg)
3513
        div0u
3514
        shll8 r5
3515
        mov.l LOCAL(zero_l),r1
3516
        .rept 6
3517
        div1 r5,r0
3518
        .endr
3519
        mov.l r1,@-r15
3520
        div1 r5,r0
3521
        mov.w LOCAL(m256_w),r1
3522
        div1 r5,r0
3523
        mov.b r0,@(L_LSWMSB,r15)
3524
        xor r4,r0
3525
        and r1,r0
3526
        bra LOCAL(div_ge64k_neg_end)
3527
        xor r4,r0
3528
 
3529
LOCAL(c128_w):
3530
        .word 128
3531
 
3532
LOCAL(div_r8_neg):
3533
        clrt
3534
        shll16 r4
3535
        mov r4,r1
3536
        shll8 r1
3537
        mov r5,r4
3538
        .rept 7
3539
        rotcl r1; div1 r5,r0
3540
        .endr
3541
        mov.l @r15+,r5
3542
        rotcl r1
3543
        bra LOCAL(div_r8_neg_end)
3544
        div1 r4,r0
3545
 
3546
LOCAL(m256_w):
3547
        .word 0xff00
3548
/* This table has been generated by divtab-sh4.c.  */
3549
        .balign 4
3550
LOCAL(div_table_clz):
3551
        .byte   0
3552
        .byte   1
3553
        .byte   0
3554
        .byte   -1
3555
        .byte   -1
3556
        .byte   -2
3557
        .byte   -2
3558
        .byte   -2
3559
        .byte   -2
3560
        .byte   -3
3561
        .byte   -3
3562
        .byte   -3
3563
        .byte   -3
3564
        .byte   -3
3565
        .byte   -3
3566
        .byte   -3
3567
        .byte   -3
3568
        .byte   -4
3569
        .byte   -4
3570
        .byte   -4
3571
        .byte   -4
3572
        .byte   -4
3573
        .byte   -4
3574
        .byte   -4
3575
        .byte   -4
3576
        .byte   -4
3577
        .byte   -4
3578
        .byte   -4
3579
        .byte   -4
3580
        .byte   -4
3581
        .byte   -4
3582
        .byte   -4
3583
        .byte   -4
3584
        .byte   -5
3585
        .byte   -5
3586
        .byte   -5
3587
        .byte   -5
3588
        .byte   -5
3589
        .byte   -5
3590
        .byte   -5
3591
        .byte   -5
3592
        .byte   -5
3593
        .byte   -5
3594
        .byte   -5
3595
        .byte   -5
3596
        .byte   -5
3597
        .byte   -5
3598
        .byte   -5
3599
        .byte   -5
3600
        .byte   -5
3601
        .byte   -5
3602
        .byte   -5
3603
        .byte   -5
3604
        .byte   -5
3605
        .byte   -5
3606
        .byte   -5
3607
        .byte   -5
3608
        .byte   -5
3609
        .byte   -5
3610
        .byte   -5
3611
        .byte   -5
3612
        .byte   -5
3613
        .byte   -5
3614
        .byte   -5
3615
        .byte   -5
3616
        .byte   -6
3617
        .byte   -6
3618
        .byte   -6
3619
        .byte   -6
3620
        .byte   -6
3621
        .byte   -6
3622
        .byte   -6
3623
        .byte   -6
3624
        .byte   -6
3625
        .byte   -6
3626
        .byte   -6
3627
        .byte   -6
3628
        .byte   -6
3629
        .byte   -6
3630
        .byte   -6
3631
        .byte   -6
3632
        .byte   -6
3633
        .byte   -6
3634
        .byte   -6
3635
        .byte   -6
3636
        .byte   -6
3637
        .byte   -6
3638
        .byte   -6
3639
        .byte   -6
3640
        .byte   -6
3641
        .byte   -6
3642
        .byte   -6
3643
        .byte   -6
3644
        .byte   -6
3645
        .byte   -6
3646
        .byte   -6
3647
        .byte   -6
3648
        .byte   -6
3649
        .byte   -6
3650
        .byte   -6
3651
        .byte   -6
3652
        .byte   -6
3653
        .byte   -6
3654
        .byte   -6
3655
        .byte   -6
3656
        .byte   -6
3657
        .byte   -6
3658
        .byte   -6
3659
        .byte   -6
3660
        .byte   -6
3661
        .byte   -6
3662
        .byte   -6
3663
        .byte   -6
3664
        .byte   -6
3665
        .byte   -6
3666
        .byte   -6
3667
        .byte   -6
3668
        .byte   -6
3669
        .byte   -6
3670
        .byte   -6
3671
        .byte   -6
3672
        .byte   -6
3673
        .byte   -6
3674
        .byte   -6
3675
        .byte   -6
3676
        .byte   -6
3677
        .byte   -6
3678
        .byte   -6
3679
/* Lookup table translating positive divisor to index into table of
3680
   normalized inverse.  N.B. the '0' entry is also the last entry of the
3681
 previous table, and causes an unaligned access for division by zero.  */
3682
LOCAL(div_table_ix):
3683
        .byte   -6
3684
        .byte   -128
3685
        .byte   -128
3686
        .byte   0
3687
        .byte   -128
3688
        .byte   -64
3689
        .byte   0
3690
        .byte   64
3691
        .byte   -128
3692
        .byte   -96
3693
        .byte   -64
3694
        .byte   -32
3695
        .byte   0
3696
        .byte   32
3697
        .byte   64
3698
        .byte   96
3699
        .byte   -128
3700
        .byte   -112
3701
        .byte   -96
3702
        .byte   -80
3703
        .byte   -64
3704
        .byte   -48
3705
        .byte   -32
3706
        .byte   -16
3707
        .byte   0
3708
        .byte   16
3709
        .byte   32
3710
        .byte   48
3711
        .byte   64
3712
        .byte   80
3713
        .byte   96
3714
        .byte   112
3715
        .byte   -128
3716
        .byte   -120
3717
        .byte   -112
3718
        .byte   -104
3719
        .byte   -96
3720
        .byte   -88
3721
        .byte   -80
3722
        .byte   -72
3723
        .byte   -64
3724
        .byte   -56
3725
        .byte   -48
3726
        .byte   -40
3727
        .byte   -32
3728
        .byte   -24
3729
        .byte   -16
3730
        .byte   -8
3731
        .byte   0
3732
        .byte   8
3733
        .byte   16
3734
        .byte   24
3735
        .byte   32
3736
        .byte   40
3737
        .byte   48
3738
        .byte   56
3739
        .byte   64
3740
        .byte   72
3741
        .byte   80
3742
        .byte   88
3743
        .byte   96
3744
        .byte   104
3745
        .byte   112
3746
        .byte   120
3747
        .byte   -128
3748
        .byte   -124
3749
        .byte   -120
3750
        .byte   -116
3751
        .byte   -112
3752
        .byte   -108
3753
        .byte   -104
3754
        .byte   -100
3755
        .byte   -96
3756
        .byte   -92
3757
        .byte   -88
3758
        .byte   -84
3759
        .byte   -80
3760
        .byte   -76
3761
        .byte   -72
3762
        .byte   -68
3763
        .byte   -64
3764
        .byte   -60
3765
        .byte   -56
3766
        .byte   -52
3767
        .byte   -48
3768
        .byte   -44
3769
        .byte   -40
3770
        .byte   -36
3771
        .byte   -32
3772
        .byte   -28
3773
        .byte   -24
3774
        .byte   -20
3775
        .byte   -16
3776
        .byte   -12
3777
        .byte   -8
3778
        .byte   -4
3779
        .byte   0
3780
        .byte   4
3781
        .byte   8
3782
        .byte   12
3783
        .byte   16
3784
        .byte   20
3785
        .byte   24
3786
        .byte   28
3787
        .byte   32
3788
        .byte   36
3789
        .byte   40
3790
        .byte   44
3791
        .byte   48
3792
        .byte   52
3793
        .byte   56
3794
        .byte   60
3795
        .byte   64
3796
        .byte   68
3797
        .byte   72
3798
        .byte   76
3799
        .byte   80
3800
        .byte   84
3801
        .byte   88
3802
        .byte   92
3803
        .byte   96
3804
        .byte   100
3805
        .byte   104
3806
        .byte   108
3807
        .byte   112
3808
        .byte   116
3809
        .byte   120
3810
        .byte   124
3811
        .byte   -128
3812
/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */
3813
        .balign 4
3814
LOCAL(zero_l):
3815
        .long   0x0
3816
        .long   0xF81F81F9
3817
        .long   0xF07C1F08
3818
        .long   0xE9131AC0
3819
        .long   0xE1E1E1E2
3820
        .long   0xDAE6076C
3821
        .long   0xD41D41D5
3822
        .long   0xCD856891
3823
        .long   0xC71C71C8
3824
        .long   0xC0E07039
3825
        .long   0xBACF914D
3826
        .long   0xB4E81B4F
3827
        .long   0xAF286BCB
3828
        .long   0xA98EF607
3829
        .long   0xA41A41A5
3830
        .long   0x9EC8E952
3831
        .long   0x9999999A
3832
        .long   0x948B0FCE
3833
        .long   0x8F9C18FA
3834
        .long   0x8ACB90F7
3835
        .long   0x86186187
3836
        .long   0x81818182
3837
        .long   0x7D05F418
3838
        .long   0x78A4C818
3839
        .long   0x745D1746
3840
        .long   0x702E05C1
3841
        .long   0x6C16C16D
3842
        .long   0x68168169
3843
        .long   0x642C8591
3844
        .long   0x60581606
3845
        .long   0x5C9882BA
3846
        .long   0x58ED2309
3847
LOCAL(div_table_inv):
3848
        .long   0x55555556
3849
        .long   0x51D07EAF
3850
        .long   0x4E5E0A73
3851
        .long   0x4AFD6A06
3852
        .long   0x47AE147B
3853
        .long   0x446F8657
3854
        .long   0x41414142
3855
        .long   0x3E22CBCF
3856
        .long   0x3B13B13C
3857
        .long   0x38138139
3858
        .long   0x3521CFB3
3859
        .long   0x323E34A3
3860
        .long   0x2F684BDB
3861
        .long   0x2C9FB4D9
3862
        .long   0x29E4129F
3863
        .long   0x27350B89
3864
        .long   0x24924925
3865
        .long   0x21FB7813
3866
        .long   0x1F7047DD
3867
        .long   0x1CF06ADB
3868
        .long   0x1A7B9612
3869
        .long   0x18118119
3870
        .long   0x15B1E5F8
3871
        .long   0x135C8114
3872
        .long   0x11111112
3873
        .long   0xECF56BF
3874
        .long   0xC9714FC
3875
        .long   0xA6810A7
3876
        .long   0x8421085
3877
        .long   0x624DD30
3878
        .long   0x4104105
3879
        .long   0x2040811
3880
        /* maximum error: 0.987342 scaled: 0.921875*/
3881
 
3882
        ENDFUNC(GLOBAL(sdivsi3_i4i))
3883
#endif /* SH3 / SH4 */
3884
 
3885
#endif /* L_div_table */
3886
 
3887
#ifdef L_udiv_qrnnd_16
3888
#if !__SHMEDIA__
3889
        HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
3890
        /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
3891
        /* n1 < d, but n1 might be larger than d1.  */
3892
        .global GLOBAL(udiv_qrnnd_16)
3893
        .balign 8
3894
GLOBAL(udiv_qrnnd_16):
3895
        div0u
3896
        cmp/hi r6,r0
3897
        bt .Lots
3898
        .rept 16
3899
        div1 r6,r0
3900
        .endr
3901
        extu.w r0,r1
3902
        bt 0f
3903
        add r6,r0
3904
0:      rotcl r1
3905
        mulu.w r1,r5
3906
        xtrct r4,r0
3907
        swap.w r0,r0
3908
        sts macl,r2
3909
        cmp/hs r2,r0
3910
        sub r2,r0
3911
        bt 0f
3912
        addc r5,r0
3913
        add #-1,r1
3914
        bt 0f
3915
1:      add #-1,r1
3916
        rts
3917
        add r5,r0
3918
        .balign 8
3919
.Lots:
3920
        sub r5,r0
3921
        swap.w r4,r1
3922
        xtrct r0,r1
3923
        clrt
3924
        mov r1,r0
3925
        addc r5,r0
3926
        mov #-1,r1
3927
        SL1(bf, 1b,
3928
        shlr16 r1)
3929
0:      rts
3930
        nop
3931
        ENDFUNC(GLOBAL(udiv_qrnnd_16))
3932
#endif /* !__SHMEDIA__ */
3933
#endif /* L_udiv_qrnnd_16 */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.