OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.2.2/] [gcc/] [config/] [sh/] [lib1funcs.asm] - Blame information for rev 154

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
2
   2004, 2005, 2006
3
   Free Software Foundation, Inc.
4
 
5
This file is free software; you can redistribute it and/or modify it
6
under the terms of the GNU General Public License as published by the
7
Free Software Foundation; either version 2, or (at your option) any
8
later version.
9
 
10
In addition to the permissions in the GNU General Public License, the
11
Free Software Foundation gives you unlimited permission to link the
12
compiled version of this file into combinations with other programs,
13
and to distribute those combinations without any restriction coming
14
from the use of this file.  (The General Public License restrictions
15
do apply in other respects; for example, they cover modification of
16
the file, and distribution when not linked into a combine
17
executable.)
18
 
19
This file is distributed in the hope that it will be useful, but
20
WITHOUT ANY WARRANTY; without even the implied warranty of
21
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22
General Public License for more details.
23
 
24
You should have received a copy of the GNU General Public License
25
along with this program; see the file COPYING.  If not, write to
26
the Free Software Foundation, 51 Franklin Street, Fifth Floor,
27
Boston, MA 02110-1301, USA.  */
28
 
29
!! libgcc routines for the Renesas / SuperH SH CPUs.
30
!! Contributed by Steve Chamberlain.
31
!! sac@cygnus.com
32
 
33
!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
34
!! recoded in assembly by Toshiyasu Morita
35
!! tm@netcom.com
36
 
37
/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
38
   ELF local label prefixes by J"orn Rennecke
39
   amylaar@cygnus.com  */
40
 
41
#include "lib1funcs.h"
42
 
43
#if ! __SH5__
44
#ifdef L_ashiftrt
45
        .global GLOBAL(ashiftrt_r4_0)
46
        .global GLOBAL(ashiftrt_r4_1)
47
        .global GLOBAL(ashiftrt_r4_2)
48
        .global GLOBAL(ashiftrt_r4_3)
49
        .global GLOBAL(ashiftrt_r4_4)
50
        .global GLOBAL(ashiftrt_r4_5)
51
        .global GLOBAL(ashiftrt_r4_6)
52
        .global GLOBAL(ashiftrt_r4_7)
53
        .global GLOBAL(ashiftrt_r4_8)
54
        .global GLOBAL(ashiftrt_r4_9)
55
        .global GLOBAL(ashiftrt_r4_10)
56
        .global GLOBAL(ashiftrt_r4_11)
57
        .global GLOBAL(ashiftrt_r4_12)
58
        .global GLOBAL(ashiftrt_r4_13)
59
        .global GLOBAL(ashiftrt_r4_14)
60
        .global GLOBAL(ashiftrt_r4_15)
61
        .global GLOBAL(ashiftrt_r4_16)
62
        .global GLOBAL(ashiftrt_r4_17)
63
        .global GLOBAL(ashiftrt_r4_18)
64
        .global GLOBAL(ashiftrt_r4_19)
65
        .global GLOBAL(ashiftrt_r4_20)
66
        .global GLOBAL(ashiftrt_r4_21)
67
        .global GLOBAL(ashiftrt_r4_22)
68
        .global GLOBAL(ashiftrt_r4_23)
69
        .global GLOBAL(ashiftrt_r4_24)
70
        .global GLOBAL(ashiftrt_r4_25)
71
        .global GLOBAL(ashiftrt_r4_26)
72
        .global GLOBAL(ashiftrt_r4_27)
73
        .global GLOBAL(ashiftrt_r4_28)
74
        .global GLOBAL(ashiftrt_r4_29)
75
        .global GLOBAL(ashiftrt_r4_30)
76
        .global GLOBAL(ashiftrt_r4_31)
77
        .global GLOBAL(ashiftrt_r4_32)
78
 
79
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
80
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
81
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
82
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
83
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
84
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
85
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
86
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
87
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
88
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
89
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
90
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
91
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
92
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
93
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
94
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
95
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
96
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
97
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
98
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
99
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
100
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
101
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
102
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
103
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
104
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
105
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
106
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
107
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
108
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
109
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
110
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
111
        HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
112
 
113
        .align  1
114
GLOBAL(ashiftrt_r4_32):
115
GLOBAL(ashiftrt_r4_31):
116
        rotcl   r4
117
        rts
118
        subc    r4,r4
119
 
120
GLOBAL(ashiftrt_r4_30):
121
        shar    r4
122
GLOBAL(ashiftrt_r4_29):
123
        shar    r4
124
GLOBAL(ashiftrt_r4_28):
125
        shar    r4
126
GLOBAL(ashiftrt_r4_27):
127
        shar    r4
128
GLOBAL(ashiftrt_r4_26):
129
        shar    r4
130
GLOBAL(ashiftrt_r4_25):
131
        shar    r4
132
GLOBAL(ashiftrt_r4_24):
133
        shlr16  r4
134
        shlr8   r4
135
        rts
136
        exts.b  r4,r4
137
 
138
GLOBAL(ashiftrt_r4_23):
139
        shar    r4
140
GLOBAL(ashiftrt_r4_22):
141
        shar    r4
142
GLOBAL(ashiftrt_r4_21):
143
        shar    r4
144
GLOBAL(ashiftrt_r4_20):
145
        shar    r4
146
GLOBAL(ashiftrt_r4_19):
147
        shar    r4
148
GLOBAL(ashiftrt_r4_18):
149
        shar    r4
150
GLOBAL(ashiftrt_r4_17):
151
        shar    r4
152
GLOBAL(ashiftrt_r4_16):
153
        shlr16  r4
154
        rts
155
        exts.w  r4,r4
156
 
157
GLOBAL(ashiftrt_r4_15):
158
        shar    r4
159
GLOBAL(ashiftrt_r4_14):
160
        shar    r4
161
GLOBAL(ashiftrt_r4_13):
162
        shar    r4
163
GLOBAL(ashiftrt_r4_12):
164
        shar    r4
165
GLOBAL(ashiftrt_r4_11):
166
        shar    r4
167
GLOBAL(ashiftrt_r4_10):
168
        shar    r4
169
GLOBAL(ashiftrt_r4_9):
170
        shar    r4
171
GLOBAL(ashiftrt_r4_8):
172
        shar    r4
173
GLOBAL(ashiftrt_r4_7):
174
        shar    r4
175
GLOBAL(ashiftrt_r4_6):
176
        shar    r4
177
GLOBAL(ashiftrt_r4_5):
178
        shar    r4
179
GLOBAL(ashiftrt_r4_4):
180
        shar    r4
181
GLOBAL(ashiftrt_r4_3):
182
        shar    r4
183
GLOBAL(ashiftrt_r4_2):
184
        shar    r4
185
GLOBAL(ashiftrt_r4_1):
186
        rts
187
        shar    r4
188
 
189
GLOBAL(ashiftrt_r4_0):
190
        rts
191
        nop
192
 
193
        ENDFUNC(GLOBAL(ashiftrt_r4_0))
194
        ENDFUNC(GLOBAL(ashiftrt_r4_1))
195
        ENDFUNC(GLOBAL(ashiftrt_r4_2))
196
        ENDFUNC(GLOBAL(ashiftrt_r4_3))
197
        ENDFUNC(GLOBAL(ashiftrt_r4_4))
198
        ENDFUNC(GLOBAL(ashiftrt_r4_5))
199
        ENDFUNC(GLOBAL(ashiftrt_r4_6))
200
        ENDFUNC(GLOBAL(ashiftrt_r4_7))
201
        ENDFUNC(GLOBAL(ashiftrt_r4_8))
202
        ENDFUNC(GLOBAL(ashiftrt_r4_9))
203
        ENDFUNC(GLOBAL(ashiftrt_r4_10))
204
        ENDFUNC(GLOBAL(ashiftrt_r4_11))
205
        ENDFUNC(GLOBAL(ashiftrt_r4_12))
206
        ENDFUNC(GLOBAL(ashiftrt_r4_13))
207
        ENDFUNC(GLOBAL(ashiftrt_r4_14))
208
        ENDFUNC(GLOBAL(ashiftrt_r4_15))
209
        ENDFUNC(GLOBAL(ashiftrt_r4_16))
210
        ENDFUNC(GLOBAL(ashiftrt_r4_17))
211
        ENDFUNC(GLOBAL(ashiftrt_r4_18))
212
        ENDFUNC(GLOBAL(ashiftrt_r4_19))
213
        ENDFUNC(GLOBAL(ashiftrt_r4_20))
214
        ENDFUNC(GLOBAL(ashiftrt_r4_21))
215
        ENDFUNC(GLOBAL(ashiftrt_r4_22))
216
        ENDFUNC(GLOBAL(ashiftrt_r4_23))
217
        ENDFUNC(GLOBAL(ashiftrt_r4_24))
218
        ENDFUNC(GLOBAL(ashiftrt_r4_25))
219
        ENDFUNC(GLOBAL(ashiftrt_r4_26))
220
        ENDFUNC(GLOBAL(ashiftrt_r4_27))
221
        ENDFUNC(GLOBAL(ashiftrt_r4_28))
222
        ENDFUNC(GLOBAL(ashiftrt_r4_29))
223
        ENDFUNC(GLOBAL(ashiftrt_r4_30))
224
        ENDFUNC(GLOBAL(ashiftrt_r4_31))
225
        ENDFUNC(GLOBAL(ashiftrt_r4_32))
226
#endif
227
 
228
#ifdef L_ashiftrt_n
229
 
230
!
231
! GLOBAL(ashrsi3)
232
!
233
! Entry:
234
!
235
! r4: Value to shift
236
! r5: Shifts
237
!
238
! Exit:
239
!
240
! r0: Result
241
!
242
! Destroys:
243
!
244
! (none)
245
!
246
 
247
        .global GLOBAL(ashrsi3)
248
        HIDDEN_FUNC(GLOBAL(ashrsi3))
249
        .align  2
250
GLOBAL(ashrsi3):
251
        mov     #31,r0
252
        and     r0,r5
253
        mova    LOCAL(ashrsi3_table),r0
254
        mov.b   @(r0,r5),r5
255
#ifdef __sh1__
256
        add     r5,r0
257
        jmp     @r0
258
#else
259
        braf    r5
260
#endif
261
        mov     r4,r0
262
 
263
        .align  2
264
LOCAL(ashrsi3_table):
265
        .byte           LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
266
        .byte           LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
267
        .byte           LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
268
        .byte           LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
269
        .byte           LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
270
        .byte           LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
271
        .byte           LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
272
        .byte           LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
273
        .byte           LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
274
        .byte           LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
275
        .byte           LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
276
        .byte           LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
277
        .byte           LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
278
        .byte           LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
279
        .byte           LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
280
        .byte           LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
281
        .byte           LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
282
        .byte           LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
283
        .byte           LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
284
        .byte           LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
285
        .byte           LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
286
        .byte           LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
287
        .byte           LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
288
        .byte           LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
289
        .byte           LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
290
        .byte           LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
291
        .byte           LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
292
        .byte           LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
293
        .byte           LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
294
        .byte           LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
295
        .byte           LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
296
        .byte           LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
297
 
298
LOCAL(ashrsi3_31):
299
        rotcl   r0
300
        rts
301
        subc    r0,r0
302
 
303
LOCAL(ashrsi3_30):
304
        shar    r0
305
LOCAL(ashrsi3_29):
306
        shar    r0
307
LOCAL(ashrsi3_28):
308
        shar    r0
309
LOCAL(ashrsi3_27):
310
        shar    r0
311
LOCAL(ashrsi3_26):
312
        shar    r0
313
LOCAL(ashrsi3_25):
314
        shar    r0
315
LOCAL(ashrsi3_24):
316
        shlr16  r0
317
        shlr8   r0
318
        rts
319
        exts.b  r0,r0
320
 
321
LOCAL(ashrsi3_23):
322
        shar    r0
323
LOCAL(ashrsi3_22):
324
        shar    r0
325
LOCAL(ashrsi3_21):
326
        shar    r0
327
LOCAL(ashrsi3_20):
328
        shar    r0
329
LOCAL(ashrsi3_19):
330
        shar    r0
331
LOCAL(ashrsi3_18):
332
        shar    r0
333
LOCAL(ashrsi3_17):
334
        shar    r0
335
LOCAL(ashrsi3_16):
336
        shlr16  r0
337
        rts
338
        exts.w  r0,r0
339
 
340
LOCAL(ashrsi3_15):
341
        shar    r0
342
LOCAL(ashrsi3_14):
343
        shar    r0
344
LOCAL(ashrsi3_13):
345
        shar    r0
346
LOCAL(ashrsi3_12):
347
        shar    r0
348
LOCAL(ashrsi3_11):
349
        shar    r0
350
LOCAL(ashrsi3_10):
351
        shar    r0
352
LOCAL(ashrsi3_9):
353
        shar    r0
354
LOCAL(ashrsi3_8):
355
        shar    r0
356
LOCAL(ashrsi3_7):
357
        shar    r0
358
LOCAL(ashrsi3_6):
359
        shar    r0
360
LOCAL(ashrsi3_5):
361
        shar    r0
362
LOCAL(ashrsi3_4):
363
        shar    r0
364
LOCAL(ashrsi3_3):
365
        shar    r0
366
LOCAL(ashrsi3_2):
367
        shar    r0
368
LOCAL(ashrsi3_1):
369
        rts
370
        shar    r0
371
 
372
LOCAL(ashrsi3_0):
373
        rts
374
        nop
375
 
376
        ENDFUNC(GLOBAL(ashrsi3))
377
#endif
378
 
379
#ifdef L_ashiftlt
380
 
381
!
382
! GLOBAL(ashlsi3)
383
!
384
! Entry:
385
!
386
! r4: Value to shift
387
! r5: Shifts
388
!
389
! Exit:
390
!
391
! r0: Result
392
!
393
! Destroys:
394
!
395
! (none)
396
!
397
        .global GLOBAL(ashlsi3)
398
        HIDDEN_FUNC(GLOBAL(ashlsi3))
399
        .align  2
400
GLOBAL(ashlsi3):
401
        mov     #31,r0
402
        and     r0,r5
403
        mova    LOCAL(ashlsi3_table),r0
404
        mov.b   @(r0,r5),r5
405
#ifdef __sh1__
406
        add     r5,r0
407
        jmp     @r0
408
#else
409
        braf    r5
410
#endif
411
        mov     r4,r0
412
 
413
        .align  2
414
LOCAL(ashlsi3_table):
415
        .byte           LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
416
        .byte           LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
417
        .byte           LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
418
        .byte           LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
419
        .byte           LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
420
        .byte           LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
421
        .byte           LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
422
        .byte           LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
423
        .byte           LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
424
        .byte           LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
425
        .byte           LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
426
        .byte           LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
427
        .byte           LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
428
        .byte           LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
429
        .byte           LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
430
        .byte           LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
431
        .byte           LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
432
        .byte           LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
433
        .byte           LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
434
        .byte           LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
435
        .byte           LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
436
        .byte           LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
437
        .byte           LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
438
        .byte           LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
439
        .byte           LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
440
        .byte           LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
441
        .byte           LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
442
        .byte           LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
443
        .byte           LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
444
        .byte           LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
445
        .byte           LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
446
        .byte           LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
447
 
448
LOCAL(ashlsi3_6):
449
        shll2   r0
450
LOCAL(ashlsi3_4):
451
        shll2   r0
452
LOCAL(ashlsi3_2):
453
        rts
454
        shll2   r0
455
 
456
LOCAL(ashlsi3_7):
457
        shll2   r0
458
LOCAL(ashlsi3_5):
459
        shll2   r0
460
LOCAL(ashlsi3_3):
461
        shll2   r0
462
LOCAL(ashlsi3_1):
463
        rts
464
        shll    r0
465
 
466
LOCAL(ashlsi3_14):
467
        shll2   r0
468
LOCAL(ashlsi3_12):
469
        shll2   r0
470
LOCAL(ashlsi3_10):
471
        shll2   r0
472
LOCAL(ashlsi3_8):
473
        rts
474
        shll8   r0
475
 
476
LOCAL(ashlsi3_15):
477
        shll2   r0
478
LOCAL(ashlsi3_13):
479
        shll2   r0
480
LOCAL(ashlsi3_11):
481
        shll2   r0
482
LOCAL(ashlsi3_9):
483
        shll8   r0
484
        rts
485
        shll    r0
486
 
487
LOCAL(ashlsi3_22):
488
        shll2   r0
489
LOCAL(ashlsi3_20):
490
        shll2   r0
491
LOCAL(ashlsi3_18):
492
        shll2   r0
493
LOCAL(ashlsi3_16):
494
        rts
495
        shll16  r0
496
 
497
LOCAL(ashlsi3_23):
498
        shll2   r0
499
LOCAL(ashlsi3_21):
500
        shll2   r0
501
LOCAL(ashlsi3_19):
502
        shll2   r0
503
LOCAL(ashlsi3_17):
504
        shll16  r0
505
        rts
506
        shll    r0
507
 
508
LOCAL(ashlsi3_30):
509
        shll2   r0
510
LOCAL(ashlsi3_28):
511
        shll2   r0
512
LOCAL(ashlsi3_26):
513
        shll2   r0
514
LOCAL(ashlsi3_24):
515
        shll16  r0
516
        rts
517
        shll8   r0
518
 
519
LOCAL(ashlsi3_31):
520
        shll2   r0
521
LOCAL(ashlsi3_29):
522
        shll2   r0
523
LOCAL(ashlsi3_27):
524
        shll2   r0
525
LOCAL(ashlsi3_25):
526
        shll16  r0
527
        shll8   r0
528
        rts
529
        shll    r0
530
 
531
LOCAL(ashlsi3_0):
532
        rts
533
        nop
534
 
535
        ENDFUNC(GLOBAL(ashlsi3))
536
#endif
537
 
538
#ifdef L_lshiftrt
539
 
540
!
541
! GLOBAL(lshrsi3)
542
!
543
! Entry:
544
!
545
! r4: Value to shift
546
! r5: Shifts
547
!
548
! Exit:
549
!
550
! r0: Result
551
!
552
! Destroys:
553
!
554
! (none)
555
!
556
        .global GLOBAL(lshrsi3)
557
        HIDDEN_FUNC(GLOBAL(lshrsi3))
558
        .align  2
559
GLOBAL(lshrsi3):
560
        mov     #31,r0
561
        and     r0,r5
562
        mova    LOCAL(lshrsi3_table),r0
563
        mov.b   @(r0,r5),r5
564
#ifdef __sh1__
565
        add     r5,r0
566
        jmp     @r0
567
#else
568
        braf    r5
569
#endif
570
        mov     r4,r0
571
 
572
        .align  2
573
LOCAL(lshrsi3_table):
574
        .byte           LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
575
        .byte           LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
576
        .byte           LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
577
        .byte           LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
578
        .byte           LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
579
        .byte           LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
580
        .byte           LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
581
        .byte           LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
582
        .byte           LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
583
        .byte           LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
584
        .byte           LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
585
        .byte           LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
586
        .byte           LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
587
        .byte           LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
588
        .byte           LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
589
        .byte           LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
590
        .byte           LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
591
        .byte           LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
592
        .byte           LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
593
        .byte           LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
594
        .byte           LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
595
        .byte           LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
596
        .byte           LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
597
        .byte           LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
598
        .byte           LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
599
        .byte           LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
600
        .byte           LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
601
        .byte           LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
602
        .byte           LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
603
        .byte           LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
604
        .byte           LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
605
        .byte           LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
606
 
607
LOCAL(lshrsi3_6):
608
        shlr2   r0
609
LOCAL(lshrsi3_4):
610
        shlr2   r0
611
LOCAL(lshrsi3_2):
612
        rts
613
        shlr2   r0
614
 
615
LOCAL(lshrsi3_7):
616
        shlr2   r0
617
LOCAL(lshrsi3_5):
618
        shlr2   r0
619
LOCAL(lshrsi3_3):
620
        shlr2   r0
621
LOCAL(lshrsi3_1):
622
        rts
623
        shlr    r0
624
 
625
LOCAL(lshrsi3_14):
626
        shlr2   r0
627
LOCAL(lshrsi3_12):
628
        shlr2   r0
629
LOCAL(lshrsi3_10):
630
        shlr2   r0
631
LOCAL(lshrsi3_8):
632
        rts
633
        shlr8   r0
634
 
635
LOCAL(lshrsi3_15):
636
        shlr2   r0
637
LOCAL(lshrsi3_13):
638
        shlr2   r0
639
LOCAL(lshrsi3_11):
640
        shlr2   r0
641
LOCAL(lshrsi3_9):
642
        shlr8   r0
643
        rts
644
        shlr    r0
645
 
646
LOCAL(lshrsi3_22):
647
        shlr2   r0
648
LOCAL(lshrsi3_20):
649
        shlr2   r0
650
LOCAL(lshrsi3_18):
651
        shlr2   r0
652
LOCAL(lshrsi3_16):
653
        rts
654
        shlr16  r0
655
 
656
LOCAL(lshrsi3_23):
657
        shlr2   r0
658
LOCAL(lshrsi3_21):
659
        shlr2   r0
660
LOCAL(lshrsi3_19):
661
        shlr2   r0
662
LOCAL(lshrsi3_17):
663
        shlr16  r0
664
        rts
665
        shlr    r0
666
 
667
LOCAL(lshrsi3_30):
668
        shlr2   r0
669
LOCAL(lshrsi3_28):
670
        shlr2   r0
671
LOCAL(lshrsi3_26):
672
        shlr2   r0
673
LOCAL(lshrsi3_24):
674
        shlr16  r0
675
        rts
676
        shlr8   r0
677
 
678
LOCAL(lshrsi3_31):
679
        shlr2   r0
680
LOCAL(lshrsi3_29):
681
        shlr2   r0
682
LOCAL(lshrsi3_27):
683
        shlr2   r0
684
LOCAL(lshrsi3_25):
685
        shlr16  r0
686
        shlr8   r0
687
        rts
688
        shlr    r0
689
 
690
LOCAL(lshrsi3_0):
691
        rts
692
        nop
693
 
694
        ENDFUNC(GLOBAL(lshrsi3))
695
#endif
696
 
697
#ifdef L_movmem
698
        .text
699
        .balign 4
700
        .global GLOBAL(movmem)
701
        HIDDEN_FUNC(GLOBAL(movmem))
702
        HIDDEN_ALIAS(movstr,movmem)
703
        /* This would be a lot simpler if r6 contained the byte count
704
           minus 64, and we wouldn't be called here for a byte count of 64.  */
705
GLOBAL(movmem):
706
        sts.l   pr,@-r15
707
        shll2   r6
708
        bsr     GLOBAL(movmemSI52+2)
709
        mov.l   @(48,r5),r0
710
        .balign 4
711
LOCAL(movmem_loop): /* Reached with rts */
712
        mov.l   @(60,r5),r0
713
        add     #-64,r6
714
        mov.l   r0,@(60,r4)
715
        tst     r6,r6
716
        mov.l   @(56,r5),r0
717
        bt      LOCAL(movmem_done)
718
        mov.l   r0,@(56,r4)
719
        cmp/pl  r6
720
        mov.l   @(52,r5),r0
721
        add     #64,r5
722
        mov.l   r0,@(52,r4)
723
        add     #64,r4
724
        bt      GLOBAL(movmemSI52)
725
! done all the large groups, do the remainder
726
! jump to movmem+
727
        mova    GLOBAL(movmemSI4)+4,r0
728
        add     r6,r0
729
        jmp     @r0
730
LOCAL(movmem_done): ! share slot insn, works out aligned.
731
        lds.l   @r15+,pr
732
        mov.l   r0,@(56,r4)
733
        mov.l   @(52,r5),r0
734
        rts
735
        mov.l   r0,@(52,r4)
736
        .balign 4
737
! ??? We need aliases movstr* for movmem* for the older libraries.  These
738
! aliases will be removed at the some point in the future.
739
        .global GLOBAL(movmemSI64)
740
        HIDDEN_FUNC(GLOBAL(movmemSI64))
741
        HIDDEN_ALIAS(movstrSI64,movmemSI64)
742
GLOBAL(movmemSI64):
743
        mov.l   @(60,r5),r0
744
        mov.l   r0,@(60,r4)
745
        .global GLOBAL(movmemSI60)
746
        HIDDEN_FUNC(GLOBAL(movmemSI60))
747
        HIDDEN_ALIAS(movstrSI60,movmemSI60)
748
GLOBAL(movmemSI60):
749
        mov.l   @(56,r5),r0
750
        mov.l   r0,@(56,r4)
751
        .global GLOBAL(movmemSI56)
752
        HIDDEN_FUNC(GLOBAL(movmemSI56))
753
        HIDDEN_ALIAS(movstrSI56,movmemSI56)
754
GLOBAL(movmemSI56):
755
        mov.l   @(52,r5),r0
756
        mov.l   r0,@(52,r4)
757
        .global GLOBAL(movmemSI52)
758
        HIDDEN_FUNC(GLOBAL(movmemSI52))
759
        HIDDEN_ALIAS(movstrSI52,movmemSI52)
760
GLOBAL(movmemSI52):
761
        mov.l   @(48,r5),r0
762
        mov.l   r0,@(48,r4)
763
        .global GLOBAL(movmemSI48)
764
        HIDDEN_FUNC(GLOBAL(movmemSI48))
765
        HIDDEN_ALIAS(movstrSI48,movmemSI48)
766
GLOBAL(movmemSI48):
767
        mov.l   @(44,r5),r0
768
        mov.l   r0,@(44,r4)
769
        .global GLOBAL(movmemSI44)
770
        HIDDEN_FUNC(GLOBAL(movmemSI44))
771
        HIDDEN_ALIAS(movstrSI44,movmemSI44)
772
GLOBAL(movmemSI44):
773
        mov.l   @(40,r5),r0
774
        mov.l   r0,@(40,r4)
775
        .global GLOBAL(movmemSI40)
776
        HIDDEN_FUNC(GLOBAL(movmemSI40))
777
        HIDDEN_ALIAS(movstrSI40,movmemSI40)
778
GLOBAL(movmemSI40):
779
        mov.l   @(36,r5),r0
780
        mov.l   r0,@(36,r4)
781
        .global GLOBAL(movmemSI36)
782
        HIDDEN_FUNC(GLOBAL(movmemSI36))
783
        HIDDEN_ALIAS(movstrSI36,movmemSI36)
784
GLOBAL(movmemSI36):
785
        mov.l   @(32,r5),r0
786
        mov.l   r0,@(32,r4)
787
        .global GLOBAL(movmemSI32)
788
        HIDDEN_FUNC(GLOBAL(movmemSI32))
789
        HIDDEN_ALIAS(movstrSI32,movmemSI32)
790
GLOBAL(movmemSI32):
791
        mov.l   @(28,r5),r0
792
        mov.l   r0,@(28,r4)
793
        .global GLOBAL(movmemSI28)
794
        HIDDEN_FUNC(GLOBAL(movmemSI28))
795
        HIDDEN_ALIAS(movstrSI28,movmemSI28)
796
GLOBAL(movmemSI28):
797
        mov.l   @(24,r5),r0
798
        mov.l   r0,@(24,r4)
799
        .global GLOBAL(movmemSI24)
800
        HIDDEN_FUNC(GLOBAL(movmemSI24))
801
        HIDDEN_ALIAS(movstrSI24,movmemSI24)
802
GLOBAL(movmemSI24):
803
        mov.l   @(20,r5),r0
804
        mov.l   r0,@(20,r4)
805
        .global GLOBAL(movmemSI20)
806
        HIDDEN_FUNC(GLOBAL(movmemSI20))
807
        HIDDEN_ALIAS(movstrSI20,movmemSI20)
808
GLOBAL(movmemSI20):
809
        mov.l   @(16,r5),r0
810
        mov.l   r0,@(16,r4)
811
        .global GLOBAL(movmemSI16)
812
        HIDDEN_FUNC(GLOBAL(movmemSI16))
813
        HIDDEN_ALIAS(movstrSI16,movmemSI16)
814
GLOBAL(movmemSI16):
815
        mov.l   @(12,r5),r0
816
        mov.l   r0,@(12,r4)
817
        .global GLOBAL(movmemSI12)
818
        HIDDEN_FUNC(GLOBAL(movmemSI12))
819
        HIDDEN_ALIAS(movstrSI12,movmemSI12)
820
GLOBAL(movmemSI12):
821
        mov.l   @(8,r5),r0
822
        mov.l   r0,@(8,r4)
823
        .global GLOBAL(movmemSI8)
824
        HIDDEN_FUNC(GLOBAL(movmemSI8))
825
        HIDDEN_ALIAS(movstrSI8,movmemSI8)
826
GLOBAL(movmemSI8):
827
        mov.l   @(4,r5),r0
828
        mov.l   r0,@(4,r4)
829
        .global GLOBAL(movmemSI4)
830
        HIDDEN_FUNC(GLOBAL(movmemSI4))
831
        HIDDEN_ALIAS(movstrSI4,movmemSI4)
832
GLOBAL(movmemSI4):
833
        mov.l   @(0,r5),r0
834
        rts
835
        mov.l   r0,@(0,r4)
836
 
837
        ENDFUNC(GLOBAL(movmemSI64))
838
        ENDFUNC(GLOBAL(movmemSI60))
839
        ENDFUNC(GLOBAL(movmemSI56))
840
        ENDFUNC(GLOBAL(movmemSI52))
841
        ENDFUNC(GLOBAL(movmemSI48))
842
        ENDFUNC(GLOBAL(movmemSI44))
843
        ENDFUNC(GLOBAL(movmemSI40))
844
        ENDFUNC(GLOBAL(movmemSI36))
845
        ENDFUNC(GLOBAL(movmemSI32))
846
        ENDFUNC(GLOBAL(movmemSI28))
847
        ENDFUNC(GLOBAL(movmemSI24))
848
        ENDFUNC(GLOBAL(movmemSI20))
849
        ENDFUNC(GLOBAL(movmemSI16))
850
        ENDFUNC(GLOBAL(movmemSI12))
851
        ENDFUNC(GLOBAL(movmemSI8))
852
        ENDFUNC(GLOBAL(movmemSI4))
853
        ENDFUNC(GLOBAL(movmem))
854
#endif
855
 
856
#ifdef L_movmem_i4
857
        .text
858
        .global GLOBAL(movmem_i4_even)
859
        .global GLOBAL(movmem_i4_odd)
860
        .global GLOBAL(movmemSI12_i4)
861
 
862
        HIDDEN_FUNC(GLOBAL(movmem_i4_even))
863
        HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
864
        HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
865
 
866
        HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
867
        HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
868
        HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
869
 
870
        .p2align        5
871
L_movmem_2mod4_end:
872
        mov.l   r0,@(16,r4)
873
        rts
874
        mov.l   r1,@(20,r4)
875
 
876
        .p2align        2
877
 
878
GLOBAL(movmem_i4_even):
879
        mov.l   @r5+,r0
880
        bra     L_movmem_start_even
881
        mov.l   @r5+,r1
882
 
883
GLOBAL(movmem_i4_odd):
884
        mov.l   @r5+,r1
885
        add     #-4,r4
886
        mov.l   @r5+,r2
887
        mov.l   @r5+,r3
888
        mov.l   r1,@(4,r4)
889
        mov.l   r2,@(8,r4)
890
 
891
L_movmem_loop:
892
        mov.l   r3,@(12,r4)
893
        dt      r6
894
        mov.l   @r5+,r0
895
        bt/s    L_movmem_2mod4_end
896
        mov.l   @r5+,r1
897
        add     #16,r4
898
L_movmem_start_even:
899
        mov.l   @r5+,r2
900
        mov.l   @r5+,r3
901
        mov.l   r0,@r4
902
        dt      r6
903
        mov.l   r1,@(4,r4)
904
        bf/s    L_movmem_loop
905
        mov.l   r2,@(8,r4)
906
        rts
907
        mov.l   r3,@(12,r4)
908
 
909
        ENDFUNC(GLOBAL(movmem_i4_even))
910
        ENDFUNC(GLOBAL(movmem_i4_odd))
911
 
912
        .p2align        4
913
GLOBAL(movmemSI12_i4):
914
        mov.l   @r5,r0
915
        mov.l   @(4,r5),r1
916
        mov.l   @(8,r5),r2
917
        mov.l   r0,@r4
918
        mov.l   r1,@(4,r4)
919
        rts
920
        mov.l   r2,@(8,r4)
921
 
922
        ENDFUNC(GLOBAL(movmemSI12_i4))
923
#endif
924
 
925
#ifdef L_mulsi3
926
 
927
 
928
        .global GLOBAL(mulsi3)
929
        HIDDEN_FUNC(GLOBAL(mulsi3))
930
 
931
! r4 =       aabb
932
! r5 =       ccdd
933
! r0 = aabb*ccdd  via partial products
934
!
935
! if aa == 0 and cc = 0
936
! r0 = bb*dd
937
!
938
! else
939
! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
940
!
941
 
942
GLOBAL(mulsi3):
943
        mulu.w  r4,r5           ! multiply the lsws  macl=bb*dd
944
        mov     r5,r3           ! r3 = ccdd
945
        swap.w  r4,r2           ! r2 = bbaa
946
        xtrct   r2,r3           ! r3 = aacc
947
        tst     r3,r3           ! msws zero ?
948
        bf      hiset
949
        rts                     ! yes - then we have the answer
950
        sts     macl,r0
951
 
952
hiset:  sts     macl,r0         ! r0 = bb*dd
953
        mulu.w  r2,r5           ! brewing macl = aa*dd
954
        sts     macl,r1
955
        mulu.w  r3,r4           ! brewing macl = cc*bb
956
        sts     macl,r2
957
        add     r1,r2
958
        shll16  r2
959
        rts
960
        add     r2,r0
961
 
962
        ENDFUNC(GLOBAL(mulsi3))
963
#endif
964
#endif /* ! __SH5__ */
965
#ifdef L_sdivsi3_i4
966
        .title "SH DIVIDE"
967
!! 4 byte integer Divide code for the Renesas SH
968
#ifdef __SH4__
969
!! args in r4 and r5, result in fpul, clobber dr0, dr2
970
 
971
        .global GLOBAL(sdivsi3_i4)
972
        HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
973
GLOBAL(sdivsi3_i4):
974
        lds r4,fpul
975
        float fpul,dr0
976
        lds r5,fpul
977
        float fpul,dr2
978
        fdiv dr2,dr0
979
        rts
980
        ftrc dr0,fpul
981
 
982
        ENDFUNC(GLOBAL(sdivsi3_i4))
983
#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
984
!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
985
 
986
#if ! __SH5__ || __SH5__ == 32
987
#if __SH5__
988
        .mode   SHcompact
989
#endif
990
        .global GLOBAL(sdivsi3_i4)
991
        HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
992
GLOBAL(sdivsi3_i4):
993
        sts.l fpscr,@-r15
994
        mov #8,r2
995
        swap.w r2,r2
996
        lds r2,fpscr
997
        lds r4,fpul
998
        float fpul,dr0
999
        lds r5,fpul
1000
        float fpul,dr2
1001
        fdiv dr2,dr0
1002
        ftrc dr0,fpul
1003
        rts
1004
        lds.l @r15+,fpscr
1005
 
1006
        ENDFUNC(GLOBAL(sdivsi3_i4))
1007
#endif /* ! __SH5__ || __SH5__ == 32 */
1008
#endif /* ! __SH4__ */
1009
#endif
1010
 
1011
#ifdef L_sdivsi3
1012
/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1013
   sh2e/sh3e code.  */
1014
#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1015
!!
1016
!! Steve Chamberlain
1017
!! sac@cygnus.com
1018
!!
1019
!!
1020
 
1021
!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1022
 
1023
        .global GLOBAL(sdivsi3)
1024
#if __SHMEDIA__
1025
#if __SH5__ == 32
1026
        .section        .text..SHmedia32,"ax"
1027
#else
1028
        .text
1029
#endif
1030
        .align  2
1031
#if 0
1032
/* The assembly code that follows is a hand-optimized version of the C
1033
   code that follows.  Note that the registers that are modified are
1034
   exactly those listed as clobbered in the patterns divsi3_i1 and
1035
   divsi3_i1_media.
1036
 
1037
int __sdivsi3 (i, j)
1038
     int i, j;
1039
{
1040
  register unsigned long long r18 asm ("r18");
1041
  register unsigned long long r19 asm ("r19");
1042
  register unsigned long long r0 asm ("r0") = 0;
1043
  register unsigned long long r1 asm ("r1") = 1;
1044
  register int r2 asm ("r2") = i >> 31;
1045
  register int r3 asm ("r3") = j >> 31;
1046
 
1047
  r2 = r2 ? r2 : r1;
1048
  r3 = r3 ? r3 : r1;
1049
  r18 = i * r2;
1050
  r19 = j * r3;
1051
  r2 *= r3;
1052
 
1053
  r19 <<= 31;
1054
  r1 <<= 31;
1055
  do
1056
    if (r18 >= r19)
1057
      r0 |= r1, r18 -= r19;
1058
  while (r19 >>= 1, r1 >>= 1);
1059
 
1060
  return r2 * (int)r0;
1061
}
1062
*/
1063
GLOBAL(sdivsi3):
1064
        pt/l    LOCAL(sdivsi3_dontadd), tr2
1065
        pt/l    LOCAL(sdivsi3_loop), tr1
1066
        ptabs/l r18, tr0
1067
        movi    0, r0
1068
        movi    1, r1
1069
        shari.l r4, 31, r2
1070
        shari.l r5, 31, r3
1071
        cmveq   r2, r1, r2
1072
        cmveq   r3, r1, r3
1073
        muls.l  r4, r2, r18
1074
        muls.l  r5, r3, r19
1075
        muls.l  r2, r3, r2
1076
        shlli   r19, 31, r19
1077
        shlli   r1, 31, r1
1078
LOCAL(sdivsi3_loop):
1079
        bgtu    r19, r18, tr2
1080
        or      r0, r1, r0
1081
        sub     r18, r19, r18
1082
LOCAL(sdivsi3_dontadd):
1083
        shlri   r1, 1, r1
1084
        shlri   r19, 1, r19
1085
        bnei    r1, 0, tr1
1086
        muls.l  r0, r2, r0
1087
        add.l   r0, r63, r0
1088
        blink   tr0, r63
1089
#elif 0 /* ! 0 */
1090
 // inputs: r4,r5
1091
 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1092
 // result in r0
1093
GLOBAL(sdivsi3):
1094
 // can create absolute value without extra latency,
1095
 // but dependent on proper sign extension of inputs:
1096
 // shari.l r5,31,r2
1097
 // xor r5,r2,r20
1098
 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1099
 shari.l r5,31,r2
1100
 ori r2,1,r2
1101
 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1102
 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1103
 shari.l r4,31,r3
1104
 nsb r20,r0
1105
 shlld r20,r0,r25
1106
 shlri r25,48,r25
1107
 sub r19,r25,r1
1108
 mmulfx.w r1,r1,r2
1109
 mshflo.w r1,r63,r1
1110
 // If r4 was to be used in-place instead of r21, could use this sequence
1111
 // to compute absolute:
1112
 // sub r63,r4,r19 // compute absolute value of r4
1113
 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1114
 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1115
 ori r3,1,r3
1116
 mmulfx.w r25,r2,r2
1117
 sub r19,r0,r0
1118
 muls.l r4,r3,r21
1119
 msub.w r1,r2,r2
1120
 addi r2,-2,r1
1121
 mulu.l r21,r1,r19
1122
 mmulfx.w r2,r2,r2
1123
 shlli r1,15,r1
1124
 shlrd r19,r0,r19
1125
 mulu.l r19,r20,r3
1126
 mmacnfx.wl r25,r2,r1
1127
 ptabs r18,tr0
1128
 sub r21,r3,r25
1129
 
1130
 mulu.l r25,r1,r2
1131
 addi r0,14,r0
1132
 xor r4,r5,r18
1133
 shlrd r2,r0,r2
1134
 mulu.l r2,r20,r3
1135
 add r19,r2,r19
1136
 shari.l r18,31,r18
1137
 sub r25,r3,r25
1138
 
1139
 mulu.l r25,r1,r2
1140
 sub r25,r20,r25
1141
 add r19,r18,r19
1142
 shlrd r2,r0,r2
1143
 mulu.l r2,r20,r3
1144
 addi r25,1,r25
1145
 add r19,r2,r19
1146
 
1147
 cmpgt r25,r3,r25
1148
 add.l r19,r25,r0
1149
 xor r0,r18,r0
1150
 blink tr0,r63
1151
#else /* ! 0 && ! 0 */
1152
 
1153
 // inputs: r4,r5
1154
 // clobbered: r1,r18,r19,r20,r21,r25,tr0
1155
 // result in r0
1156
        HIDDEN_FUNC(GLOBAL(sdivsi3_2))
1157
#ifndef __pic__
1158
        FUNC(GLOBAL(sdivsi3))
1159
GLOBAL(sdivsi3): /* this is the shcompact entry point */
1160
 // The special SHmedia entry point sdivsi3_1 prevents accidental linking
1161
 // with the SHcompact implementation, which clobbers tr1 / tr2.
1162
 .global GLOBAL(sdivsi3_1)
1163
GLOBAL(sdivsi3_1):
1164
 .global GLOBAL(div_table_internal)
1165
 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
1166
 shori GLOBAL(div_table_internal) & 65535, r20
1167
#endif
1168
 .global GLOBAL(sdivsi3_2)
1169
 // div_table in r20
1170
 // clobbered: r1,r18,r19,r21,r25,tr0
1171
GLOBAL(sdivsi3_2):
1172
 nsb r5, r1
1173
 shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
1174
 shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
1175
 ldx.ub r20, r21, r19 // u0.8
1176
 shari r25, 32, r25   // normalize to s2.30
1177
 shlli r21, 1, r21
1178
 muls.l r25, r19, r19 // s2.38
1179
 ldx.w r20, r21, r21  // s2.14
1180
  ptabs r18, tr0
1181
 shari r19, 24, r19   // truncate to s2.14
1182
 sub r21, r19, r19    // some 11 bit inverse in s1.14
1183
 muls.l r19, r19, r21 // u0.28
1184
  sub r63, r1, r1
1185
  addi r1, 92, r1
1186
 muls.l r25, r21, r18 // s2.58
1187
 shlli r19, 45, r19   // multiply by two and convert to s2.58
1188
  /* bubble */
1189
 sub r19, r18, r18
1190
 shari r18, 28, r18   // some 22 bit inverse in s1.30
1191
 muls.l r18, r25, r0  // s2.60
1192
  muls.l r18, r4, r25 // s32.30
1193
  /* bubble */
1194
 shari r0, 16, r19   // s-16.44
1195
 muls.l r19, r18, r19 // s-16.74
1196
  shari r25, 63, r0
1197
  shari r4, 14, r18   // s19.-14
1198
 shari r19, 30, r19   // s-16.44
1199
 muls.l r19, r18, r19 // s15.30
1200
  xor r21, r0, r21    // You could also use the constant 1 << 27.
1201
  add r21, r25, r21
1202
 sub r21, r19, r21
1203
 shard r21, r1, r21
1204
 sub r21, r0, r0
1205
 blink tr0, r63
1206
#ifndef __pic__
1207
        ENDFUNC(GLOBAL(sdivsi3))
1208
#endif
1209
        ENDFUNC(GLOBAL(sdivsi3_2))
1210
#endif
1211
#elif defined __SHMEDIA__
1212
/* m5compact-nofpu */
1213
 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1214
        .mode   SHmedia
1215
        .section        .text..SHmedia32,"ax"
1216
        .align  2
1217
        FUNC(GLOBAL(sdivsi3))
1218
GLOBAL(sdivsi3):
1219
        pt/l LOCAL(sdivsi3_dontsub), tr0
1220
        pt/l LOCAL(sdivsi3_loop), tr1
1221
        ptabs/l r18,tr2
1222
        shari.l r4,31,r18
1223
        shari.l r5,31,r19
1224
        xor r4,r18,r20
1225
        xor r5,r19,r21
1226
        sub.l r20,r18,r20
1227
        sub.l r21,r19,r21
1228
        xor r18,r19,r19
1229
        shlli r21,32,r25
1230
        addi r25,-1,r21
1231
        addz.l r20,r63,r20
1232
LOCAL(sdivsi3_loop):
1233
        shlli r20,1,r20
1234
        bgeu/u r21,r20,tr0
1235
        sub r20,r21,r20
1236
LOCAL(sdivsi3_dontsub):
1237
        addi.l r25,-1,r25
1238
        bnei r25,-32,tr1
1239
        xor r20,r19,r20
1240
        sub.l r20,r19,r0
1241
        blink tr2,r63
1242
        ENDFUNC(GLOBAL(sdivsi3))
1243
#else /* ! __SHMEDIA__ */
1244
        FUNC(GLOBAL(sdivsi3))
1245
GLOBAL(sdivsi3):
1246
        mov     r4,r1
1247
        mov     r5,r0
1248
 
1249
        tst     r0,r0
1250
        bt      div0
1251
        mov     #0,r2
1252
        div0s   r2,r1
1253
        subc    r3,r3
1254
        subc    r2,r1
1255
        div0s   r0,r3
1256
        rotcl   r1
1257
        div1    r0,r3
1258
        rotcl   r1
1259
        div1    r0,r3
1260
        rotcl   r1
1261
        div1    r0,r3
1262
        rotcl   r1
1263
        div1    r0,r3
1264
        rotcl   r1
1265
        div1    r0,r3
1266
        rotcl   r1
1267
        div1    r0,r3
1268
        rotcl   r1
1269
        div1    r0,r3
1270
        rotcl   r1
1271
        div1    r0,r3
1272
        rotcl   r1
1273
        div1    r0,r3
1274
        rotcl   r1
1275
        div1    r0,r3
1276
        rotcl   r1
1277
        div1    r0,r3
1278
        rotcl   r1
1279
        div1    r0,r3
1280
        rotcl   r1
1281
        div1    r0,r3
1282
        rotcl   r1
1283
        div1    r0,r3
1284
        rotcl   r1
1285
        div1    r0,r3
1286
        rotcl   r1
1287
        div1    r0,r3
1288
        rotcl   r1
1289
        div1    r0,r3
1290
        rotcl   r1
1291
        div1    r0,r3
1292
        rotcl   r1
1293
        div1    r0,r3
1294
        rotcl   r1
1295
        div1    r0,r3
1296
        rotcl   r1
1297
        div1    r0,r3
1298
        rotcl   r1
1299
        div1    r0,r3
1300
        rotcl   r1
1301
        div1    r0,r3
1302
        rotcl   r1
1303
        div1    r0,r3
1304
        rotcl   r1
1305
        div1    r0,r3
1306
        rotcl   r1
1307
        div1    r0,r3
1308
        rotcl   r1
1309
        div1    r0,r3
1310
        rotcl   r1
1311
        div1    r0,r3
1312
        rotcl   r1
1313
        div1    r0,r3
1314
        rotcl   r1
1315
        div1    r0,r3
1316
        rotcl   r1
1317
        div1    r0,r3
1318
        rotcl   r1
1319
        div1    r0,r3
1320
        rotcl   r1
1321
        addc    r2,r1
1322
        rts
1323
        mov     r1,r0
1324
 
1325
 
1326
div0:   rts
1327
        mov     #0,r0
1328
 
1329
        ENDFUNC(GLOBAL(sdivsi3))
1330
#endif /* ! __SHMEDIA__ */
1331
#endif /* ! __SH4__ */
1332
#endif
1333
#ifdef L_udivsi3_i4
1334
 
1335
        .title "SH DIVIDE"
1336
!! 4 byte integer Divide code for the Renesas SH
1337
#ifdef __SH4__
1338
!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1339
!! and t bit
1340
 
1341
        .global GLOBAL(udivsi3_i4)
1342
        HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1343
GLOBAL(udivsi3_i4):
1344
        mov #1,r1
1345
        cmp/hi r1,r5
1346
        bf trivial
1347
        rotr r1
1348
        xor r1,r4
1349
        lds r4,fpul
1350
        mova L1,r0
1351
#ifdef FMOVD_WORKS
1352
        fmov.d @r0+,dr4
1353
#else
1354
        fmov.s @r0+,DR40
1355
        fmov.s @r0,DR41
1356
#endif
1357
        float fpul,dr0
1358
        xor r1,r5
1359
        lds r5,fpul
1360
        float fpul,dr2
1361
        fadd dr4,dr0
1362
        fadd dr4,dr2
1363
        fdiv dr2,dr0
1364
        rts
1365
        ftrc dr0,fpul
1366
 
1367
trivial:
1368
        rts
1369
        lds r4,fpul
1370
 
1371
        .align 2
1372
#ifdef FMOVD_WORKS
1373
        .align 3        ! make double below 8 byte aligned.
1374
#endif
1375
L1:
1376
        .double 2147483648
1377
 
1378
        ENDFUNC(GLOBAL(udivsi3_i4))
1379
#elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1380
#if ! __SH5__ || __SH5__ == 32
1381
!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1382
        .mode   SHmedia
1383
        .global GLOBAL(udivsi3_i4)
1384
        HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1385
GLOBAL(udivsi3_i4):
1386
        addz.l  r4,r63,r20
1387
        addz.l  r5,r63,r21
1388
        fmov.qd r20,dr0
1389
        fmov.qd r21,dr32
1390
        ptabs   r18,tr0
1391
        float.qd dr0,dr0
1392
        float.qd dr32,dr32
1393
        fdiv.d  dr0,dr32,dr0
1394
        ftrc.dq dr0,dr32
1395
        fmov.s fr33,fr32
1396
        blink tr0,r63
1397
 
1398
        ENDFUNC(GLOBAL(udivsi3_i4))
1399
#endif /* ! __SH5__ || __SH5__ == 32 */
1400
#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1401
!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1402
 
1403
        .global GLOBAL(udivsi3_i4)
1404
        HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1405
GLOBAL(udivsi3_i4):
1406
        mov #1,r1
1407
        cmp/hi r1,r5
1408
        bf trivial
1409
        sts.l fpscr,@-r15
1410
        mova L1,r0
1411
        lds.l @r0+,fpscr
1412
        rotr r1
1413
        xor r1,r4
1414
        lds r4,fpul
1415
#ifdef FMOVD_WORKS
1416
        fmov.d @r0+,dr4
1417
#else
1418
        fmov.s @r0+,DR40
1419
        fmov.s @r0,DR41
1420
#endif
1421
        float fpul,dr0
1422
        xor r1,r5
1423
        lds r5,fpul
1424
        float fpul,dr2
1425
        fadd dr4,dr0
1426
        fadd dr4,dr2
1427
        fdiv dr2,dr0
1428
        ftrc dr0,fpul
1429
        rts
1430
        lds.l @r15+,fpscr
1431
 
1432
#ifdef FMOVD_WORKS
1433
        .align 3        ! make double below 8 byte aligned.
1434
#endif
1435
trivial:
1436
        rts
1437
        lds r4,fpul
1438
 
1439
        .align 2
1440
L1:
1441
#ifndef FMOVD_WORKS
1442
        .long 0x80000
1443
#else
1444
        .long 0x180000
1445
#endif
1446
        .double 2147483648
1447
 
1448
        ENDFUNC(GLOBAL(udivsi3_i4))
1449
#endif /* ! __SH4__ */
1450
#endif
1451
 
1452
#ifdef L_udivsi3
1453
/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1454
   sh2e/sh3e code.  */
1455
#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1456
 
1457
!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1458
        .global GLOBAL(udivsi3)
1459
        HIDDEN_FUNC(GLOBAL(udivsi3))
1460
 
1461
#if __SHMEDIA__
1462
#if __SH5__ == 32
1463
        .section        .text..SHmedia32,"ax"
1464
#else
1465
        .text
1466
#endif
1467
        .align  2
1468
#if 0
1469
/* The assembly code that follows is a hand-optimized version of the C
1470
   code that follows.  Note that the registers that are modified are
1471
   exactly those listed as clobbered in the patterns udivsi3_i1 and
1472
   udivsi3_i1_media.
1473
 
1474
unsigned
1475
__udivsi3 (i, j)
1476
    unsigned i, j;
1477
{
1478
  register unsigned long long r0 asm ("r0") = 0;
1479
  register unsigned long long r18 asm ("r18") = 1;
1480
  register unsigned long long r4 asm ("r4") = i;
1481
  register unsigned long long r19 asm ("r19") = j;
1482
 
1483
  r19 <<= 31;
1484
  r18 <<= 31;
1485
  do
1486
    if (r4 >= r19)
1487
      r0 |= r18, r4 -= r19;
1488
  while (r19 >>= 1, r18 >>= 1);
1489
 
1490
  return r0;
1491
}
1492
*/
1493
GLOBAL(udivsi3):
1494
        pt/l    LOCAL(udivsi3_dontadd), tr2
1495
        pt/l    LOCAL(udivsi3_loop), tr1
1496
        ptabs/l r18, tr0
1497
        movi    0, r0
1498
        movi    1, r18
1499
        addz.l  r5, r63, r19
1500
        addz.l  r4, r63, r4
1501
        shlli   r19, 31, r19
1502
        shlli   r18, 31, r18
1503
LOCAL(udivsi3_loop):
1504
        bgtu    r19, r4, tr2
1505
        or      r0, r18, r0
1506
        sub     r4, r19, r4
1507
LOCAL(udivsi3_dontadd):
1508
        shlri   r18, 1, r18
1509
        shlri   r19, 1, r19
1510
        bnei    r18, 0, tr1
1511
        blink   tr0, r63
1512
#else
1513
GLOBAL(udivsi3):
1514
 // inputs: r4,r5
1515
 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1516
 // result in r0.
1517
 addz.l r5,r63,r22
1518
 nsb r22,r0
1519
 shlld r22,r0,r25
1520
 shlri r25,48,r25
1521
 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1522
 sub r20,r25,r21
1523
 mmulfx.w r21,r21,r19
1524
 mshflo.w r21,r63,r21
1525
 ptabs r18,tr0
1526
 mmulfx.w r25,r19,r19
1527
 sub r20,r0,r0
1528
 /* bubble */
1529
 msub.w r21,r19,r19
1530
 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1531
                    before the msub.w, but we need a different value for
1532
                    r19 to keep errors under control.  */
1533
 mulu.l r4,r21,r18
1534
 mmulfx.w r19,r19,r19
1535
 shlli r21,15,r21
1536
 shlrd r18,r0,r18
1537
 mulu.l r18,r22,r20
1538
 mmacnfx.wl r25,r19,r21
1539
 /* bubble */
1540
 sub r4,r20,r25
1541
 
1542
 mulu.l r25,r21,r19
1543
 addi r0,14,r0
1544
 /* bubble */
1545
 shlrd r19,r0,r19
1546
 mulu.l r19,r22,r20
1547
 add r18,r19,r18
1548
 /* bubble */
1549
 sub.l r25,r20,r25
1550
 
1551
 mulu.l r25,r21,r19
1552
 addz.l r25,r63,r25
1553
 sub r25,r22,r25
1554
 shlrd r19,r0,r19
1555
 mulu.l r19,r22,r20
1556
 addi r25,1,r25
1557
 add r18,r19,r18
1558
 
1559
 cmpgt r25,r20,r25
1560
 add.l r18,r25,r0
1561
 blink tr0,r63
1562
#endif
1563
#elif defined (__SHMEDIA__)
1564
/* m5compact-nofpu - more emphasis on code size than on speed, but don't
1565
   ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1566
   So use a short shmedia loop.  */
1567
 // clobbered: r20,r21,r25,tr0,tr1,tr2
1568
        .mode   SHmedia
1569
        .section        .text..SHmedia32,"ax"
1570
        .align  2
1571
GLOBAL(udivsi3):
1572
 pt/l LOCAL(udivsi3_dontsub), tr0
1573
 pt/l LOCAL(udivsi3_loop), tr1
1574
 ptabs/l r18,tr2
1575
 shlli r5,32,r25
1576
 addi r25,-1,r21
1577
 addz.l r4,r63,r20
1578
LOCAL(udivsi3_loop):
1579
 shlli r20,1,r20
1580
 bgeu/u r21,r20,tr0
1581
 sub r20,r21,r20
1582
LOCAL(udivsi3_dontsub):
1583
 addi.l r25,-1,r25
1584
 bnei r25,-32,tr1
1585
 add.l r20,r63,r0
1586
 blink tr2,r63
1587
#else /* ! defined (__SHMEDIA__) */
1588
LOCAL(div8):
1589
 div1 r5,r4
1590
LOCAL(div7):
1591
 div1 r5,r4; div1 r5,r4; div1 r5,r4
1592
 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1593
 
1594
LOCAL(divx4):
1595
 div1 r5,r4; rotcl r0
1596
 div1 r5,r4; rotcl r0
1597
 div1 r5,r4; rotcl r0
1598
 rts; div1 r5,r4
1599
 
1600
GLOBAL(udivsi3):
1601
 sts.l pr,@-r15
1602
 extu.w r5,r0
1603
 cmp/eq r5,r0
1604
#ifdef __sh1__
1605
 bf LOCAL(large_divisor)
1606
#else
1607
 bf/s LOCAL(large_divisor)
1608
#endif
1609
 div0u
1610
 swap.w r4,r0
1611
 shlr16 r4
1612
 bsr LOCAL(div8)
1613
 shll16 r5
1614
 bsr LOCAL(div7)
1615
 div1 r5,r4
1616
 xtrct r4,r0
1617
 xtrct r0,r4
1618
 bsr LOCAL(div8)
1619
 swap.w r4,r4
1620
 bsr LOCAL(div7)
1621
 div1 r5,r4
1622
 lds.l @r15+,pr
1623
 xtrct r4,r0
1624
 swap.w r0,r0
1625
 rotcl r0
1626
 rts
1627
 shlr16 r5
1628
 
1629
LOCAL(large_divisor):
1630
#ifdef __sh1__
1631
 div0u
1632
#endif
1633
 mov #0,r0
1634
 xtrct r4,r0
1635
 xtrct r0,r4
1636
 bsr LOCAL(divx4)
1637
 rotcl r0
1638
 bsr LOCAL(divx4)
1639
 rotcl r0
1640
 bsr LOCAL(divx4)
1641
 rotcl r0
1642
 bsr LOCAL(divx4)
1643
 rotcl r0
1644
 lds.l @r15+,pr
1645
 rts
1646
 rotcl r0
1647
 
1648
        ENDFUNC(GLOBAL(udivsi3))
1649
#endif /* ! __SHMEDIA__ */
1650
#endif /* __SH4__ */
1651
#endif /* L_udivsi3 */
1652
 
1653
#ifdef L_udivdi3
1654
#ifdef __SHMEDIA__
1655
        .mode   SHmedia
1656
        .section        .text..SHmedia32,"ax"
1657
        .align  2
1658
        .global GLOBAL(udivdi3)
1659
        FUNC(GLOBAL(udivdi3))
1660
GLOBAL(udivdi3):
1661
        HIDDEN_ALIAS(udivdi3_internal,udivdi3)
1662
        shlri r3,1,r4
1663
        nsb r4,r22
1664
        shlld r3,r22,r6
1665
        shlri r6,49,r5
1666
        movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
1667
        sub r21,r5,r1
1668
        mmulfx.w r1,r1,r4
1669
        mshflo.w r1,r63,r1
1670
        sub r63,r22,r20 // r63 == 64 % 64
1671
        mmulfx.w r5,r4,r4
1672
        pta LOCAL(large_divisor),tr0
1673
        addi r20,32,r9
1674
        msub.w r1,r4,r1
1675
        madd.w r1,r1,r1
1676
        mmulfx.w r1,r1,r4
1677
        shlri r6,32,r7
1678
        bgt/u r9,r63,tr0 // large_divisor
1679
        mmulfx.w r5,r4,r4
1680
        shlri r2,32+14,r19
1681
        addi r22,-31,r0
1682
        msub.w r1,r4,r1
1683
 
1684
        mulu.l r1,r7,r4
1685
        addi r1,-3,r5
1686
        mulu.l r5,r19,r5
1687
        sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1688
        shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1689
                         the case may be, %0000000000000000 000.11111111111, still */
1690
        muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1691
        mulu.l r5,r3,r8
1692
        mshalds.l r1,r21,r1
1693
        shari r4,26,r4
1694
        shlld r8,r0,r8
1695
        add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1696
        sub r2,r8,r2
1697
        /* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
1698
 
1699
        shlri r2,22,r21
1700
        mulu.l r21,r1,r21
1701
        shlld r5,r0,r8
1702
        addi r20,30-22,r0
1703
        shlrd r21,r0,r21
1704
        mulu.l r21,r3,r5
1705
        add r8,r21,r8
1706
        mcmpgt.l r21,r63,r21 // See Note 1
1707
        addi r20,30,r0
1708
        mshfhi.l r63,r21,r21
1709
        sub r2,r5,r2
1710
        andc r2,r21,r2
1711
 
1712
        /* small divisor: need a third divide step */
1713
        mulu.l r2,r1,r7
1714
        ptabs r18,tr0
1715
        addi r2,1,r2
1716
        shlrd r7,r0,r7
1717
        mulu.l r7,r3,r5
1718
        add r8,r7,r8
1719
        sub r2,r3,r2
1720
        cmpgt r2,r5,r5
1721
        add r8,r5,r2
1722
        /* could test r3 here to check for divide by zero.  */
1723
        blink tr0,r63
1724
 
1725
LOCAL(large_divisor):
1726
        mmulfx.w r5,r4,r4
1727
        shlrd r2,r9,r25
1728
        shlri r25,32,r8
1729
        msub.w r1,r4,r1
1730
 
1731
        mulu.l r1,r7,r4
1732
        addi r1,-3,r5
1733
        mulu.l r5,r8,r5
1734
        sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1735
        shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1736
                         the case may be, %0000000000000000 000.11111111111, still */
1737
        muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1738
        shlri r5,14-1,r8
1739
        mulu.l r8,r7,r5
1740
        mshalds.l r1,r21,r1
1741
        shari r4,26,r4
1742
        add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1743
        sub r25,r5,r25
1744
        /* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
1745
 
1746
        shlri r25,22,r21
1747
        mulu.l r21,r1,r21
1748
        pta LOCAL(no_lo_adj),tr0
1749
        addi r22,32,r0
1750
        shlri r21,40,r21
1751
        mulu.l r21,r7,r5
1752
        add r8,r21,r8
1753
        shlld r2,r0,r2
1754
        sub r25,r5,r25
1755
        bgtu/u r7,r25,tr0 // no_lo_adj
1756
        addi r8,1,r8
1757
        sub r25,r7,r25
1758
LOCAL(no_lo_adj):
1759
        mextr4 r2,r25,r2
1760
 
1761
        /* large_divisor: only needs a few adjustments.  */
1762
        mulu.l r8,r6,r5
1763
        ptabs r18,tr0
1764
        /* bubble */
1765
        cmpgtu r5,r2,r5
1766
        sub r8,r5,r2
1767
        blink tr0,r63
1768
        ENDFUNC(GLOBAL(udivdi3))
1769
/* Note 1: To shift the result of the second divide stage so that the result
1770
   always fits into 32 bits, yet we still reduce the rest sufficiently
1771
   would require a lot of instructions to do the shifts just right.  Using
1772
   the full 64 bit shift result to multiply with the divisor would require
1773
   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1774
   Fortunately, if the upper 32 bits of the shift result are nonzero, we
1775
   know that the rest after taking this partial result into account will
1776
   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
1777
   upper 32 bits of the partial result are nonzero.  */
1778
#endif /* __SHMEDIA__ */
1779
#endif /* L_udivdi3 */
1780
 
1781
#ifdef L_divdi3
1782
#ifdef __SHMEDIA__
1783
        .mode   SHmedia
1784
        .section        .text..SHmedia32,"ax"
1785
        .align  2
1786
        .global GLOBAL(divdi3)
1787
        FUNC(GLOBAL(divdi3))
1788
GLOBAL(divdi3):
1789
        pta GLOBAL(udivdi3_internal),tr0
1790
        shari r2,63,r22
1791
        shari r3,63,r23
1792
        xor r2,r22,r2
1793
        xor r3,r23,r3
1794
        sub r2,r22,r2
1795
        sub r3,r23,r3
1796
        beq/u r22,r23,tr0
1797
        ptabs r18,tr1
1798
        blink tr0,r18
1799
        sub r63,r2,r2
1800
        blink tr1,r63
1801
        ENDFUNC(GLOBAL(divdi3))
1802
#endif /* __SHMEDIA__ */
1803
#endif /* L_divdi3 */
1804
 
1805
#ifdef L_umoddi3
1806
#ifdef __SHMEDIA__
1807
        .mode   SHmedia
1808
        .section        .text..SHmedia32,"ax"
1809
        .align  2
1810
        .global GLOBAL(umoddi3)
1811
        FUNC(GLOBAL(umoddi3))
1812
GLOBAL(umoddi3):
1813
        HIDDEN_ALIAS(umoddi3_internal,umoddi3)
1814
        shlri r3,1,r4
1815
        nsb r4,r22
1816
        shlld r3,r22,r6
1817
        shlri r6,49,r5
1818
        movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
1819
        sub r21,r5,r1
1820
        mmulfx.w r1,r1,r4
1821
        mshflo.w r1,r63,r1
1822
        sub r63,r22,r20 // r63 == 64 % 64
1823
        mmulfx.w r5,r4,r4
1824
        pta LOCAL(large_divisor),tr0
1825
        addi r20,32,r9
1826
        msub.w r1,r4,r1
1827
        madd.w r1,r1,r1
1828
        mmulfx.w r1,r1,r4
1829
        shlri r6,32,r7
1830
        bgt/u r9,r63,tr0 // large_divisor
1831
        mmulfx.w r5,r4,r4
1832
        shlri r2,32+14,r19
1833
        addi r22,-31,r0
1834
        msub.w r1,r4,r1
1835
 
1836
        mulu.l r1,r7,r4
1837
        addi r1,-3,r5
1838
        mulu.l r5,r19,r5
1839
        sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1840
        shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1841
                         the case may be, %0000000000000000 000.11111111111, still */
1842
        muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1843
        mulu.l r5,r3,r5
1844
        mshalds.l r1,r21,r1
1845
        shari r4,26,r4
1846
        shlld r5,r0,r5
1847
        add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1848
        sub r2,r5,r2
1849
        /* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
1850
 
1851
        shlri r2,22,r21
1852
        mulu.l r21,r1,r21
1853
        addi r20,30-22,r0
1854
        /* bubble */ /* could test r3 here to check for divide by zero.  */
1855
        shlrd r21,r0,r21
1856
        mulu.l r21,r3,r5
1857
        mcmpgt.l r21,r63,r21 // See Note 1
1858
        addi r20,30,r0
1859
        mshfhi.l r63,r21,r21
1860
        sub r2,r5,r2
1861
        andc r2,r21,r2
1862
 
1863
        /* small divisor: need a third divide step */
1864
        mulu.l r2,r1,r7
1865
        ptabs r18,tr0
1866
        sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1867
        shlrd r7,r0,r7
1868
        mulu.l r7,r3,r5
1869
        /* bubble */
1870
        addi r8,1,r7
1871
        cmpgt r7,r5,r7
1872
        cmvne r7,r8,r2
1873
        sub r2,r5,r2
1874
        blink tr0,r63
1875
 
1876
LOCAL(large_divisor):
1877
        mmulfx.w r5,r4,r4
1878
        shlrd r2,r9,r25
1879
        shlri r25,32,r8
1880
        msub.w r1,r4,r1
1881
 
1882
        mulu.l r1,r7,r4
1883
        addi r1,-3,r5
1884
        mulu.l r5,r8,r5
1885
        sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1886
        shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1887
                         the case may be, %0000000000000000 000.11111111111, still */
1888
        muls.l r1,r4,r4 /* leaving at least one sign bit.  */
1889
        shlri r5,14-1,r8
1890
        mulu.l r8,r7,r5
1891
        mshalds.l r1,r21,r1
1892
        shari r4,26,r4
1893
        add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1894
        sub r25,r5,r25
1895
        /* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
1896
 
1897
        shlri r25,22,r21
1898
        mulu.l r21,r1,r21
1899
        pta LOCAL(no_lo_adj),tr0
1900
        addi r22,32,r0
1901
        shlri r21,40,r21
1902
        mulu.l r21,r7,r5
1903
        add r8,r21,r8
1904
        shlld r2,r0,r2
1905
        sub r25,r5,r25
1906
        bgtu/u r7,r25,tr0 // no_lo_adj
1907
        addi r8,1,r8
1908
        sub r25,r7,r25
1909
LOCAL(no_lo_adj):
1910
        mextr4 r2,r25,r2
1911
 
1912
        /* large_divisor: only needs a few adjustments.  */
1913
        mulu.l r8,r6,r5
1914
        ptabs r18,tr0
1915
        add r2,r6,r7
1916
        cmpgtu r5,r2,r8
1917
        cmvne r8,r7,r2
1918
        sub r2,r5,r2
1919
        shlrd r2,r22,r2
1920
        blink tr0,r63
1921
        ENDFUNC(GLOBAL(umoddi3))
1922
/* Note 1: To shift the result of the second divide stage so that the result
1923
   always fits into 32 bits, yet we still reduce the rest sufficiently
1924
   would require a lot of instructions to do the shifts just right.  Using
1925
   the full 64 bit shift result to multiply with the divisor would require
1926
   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1927
   Fortunately, if the upper 32 bits of the shift result are nonzero, we
1928
   know that the rest after taking this partial result into account will
1929
   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
1930
   upper 32 bits of the partial result are nonzero.  */
1931
#endif /* __SHMEDIA__ */
1932
#endif /* L_umoddi3 */
1933
 
1934
#ifdef L_moddi3
1935
#ifdef __SHMEDIA__
1936
        .mode   SHmedia
1937
        .section        .text..SHmedia32,"ax"
1938
        .align  2
1939
        .global GLOBAL(moddi3)
1940
        FUNC(GLOBAL(moddi3))
1941
GLOBAL(moddi3):
1942
        pta GLOBAL(umoddi3_internal),tr0
1943
        shari r2,63,r22
1944
        shari r3,63,r23
1945
        xor r2,r22,r2
1946
        xor r3,r23,r3
1947
        sub r2,r22,r2
1948
        sub r3,r23,r3
1949
        beq/u r22,r63,tr0
1950
        ptabs r18,tr1
1951
        blink tr0,r18
1952
        sub r63,r2,r2
1953
        blink tr1,r63
1954
        ENDFUNC(GLOBAL(moddi3))
1955
#endif /* __SHMEDIA__ */
1956
#endif /* L_moddi3 */
1957
 
1958
#ifdef L_set_fpscr
1959
#if !defined (__SH2A_NOFPU__)
1960
#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1961
#ifdef __SH5__
1962
        .mode   SHcompact
1963
#endif
1964
        .global GLOBAL(set_fpscr)
1965
        HIDDEN_FUNC(GLOBAL(set_fpscr))
1966
GLOBAL(set_fpscr):
1967
        lds r4,fpscr
1968
#ifdef __PIC__
1969
        mov.l   r12,@-r15
1970
        mova    LOCAL(set_fpscr_L0),r0
1971
        mov.l   LOCAL(set_fpscr_L0),r12
1972
        add     r0,r12
1973
        mov.l   LOCAL(set_fpscr_L1),r0
1974
        mov.l   @(r0,r12),r1
1975
        mov.l   @r15+,r12
1976
#else
1977
        mov.l LOCAL(set_fpscr_L1),r1
1978
#endif
1979
        swap.w r4,r0
1980
        or #24,r0
1981
#ifndef FMOVD_WORKS
1982
        xor #16,r0
1983
#endif
1984
#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1985
        swap.w r0,r3
1986
        mov.l r3,@(4,r1)
1987
#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1988
        swap.w r0,r2
1989
        mov.l r2,@r1
1990
#endif
1991
#ifndef FMOVD_WORKS
1992
        xor #8,r0
1993
#else
1994
        xor #24,r0
1995
#endif
1996
#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1997
        swap.w r0,r2
1998
        rts
1999
        mov.l r2,@r1
2000
#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2001
        swap.w r0,r3
2002
        rts
2003
        mov.l r3,@(4,r1)
2004
#endif
2005
        .align 2
2006
#ifdef __PIC__
2007
LOCAL(set_fpscr_L0):
2008
        .long _GLOBAL_OFFSET_TABLE_
2009
LOCAL(set_fpscr_L1):
2010
        .long GLOBAL(fpscr_values@GOT)
2011
#else
2012
LOCAL(set_fpscr_L1):
2013
        .long GLOBAL(fpscr_values)
2014
#endif
2015
 
2016
        ENDFUNC(GLOBAL(set_fpscr))
2017
#ifndef NO_FPSCR_VALUES
2018
#ifdef __ELF__
2019
        .comm   GLOBAL(fpscr_values),8,4
2020
#else
2021
        .comm   GLOBAL(fpscr_values),8
2022
#endif /* ELF */
2023
#endif /* NO_FPSCR_VALUES */
2024
#endif /* SH2E / SH3E / SH4 */
2025
#endif /* __SH2A_NOFPU__ */
2026
#endif /* L_set_fpscr */
2027
#ifdef L_ic_invalidate
2028
#if __SH5__ == 32
2029
        .mode   SHmedia
2030
        .section        .text..SHmedia32,"ax"
2031
        .align  2
2032
        .global GLOBAL(init_trampoline)
2033
        HIDDEN_FUNC(GLOBAL(init_trampoline))
2034
GLOBAL(init_trampoline):
2035
        st.l    r0,8,r2
2036
#ifdef __LITTLE_ENDIAN__
2037
        movi    9,r20
2038
        shori   0x402b,r20
2039
        shori   0xd101,r20
2040
        shori   0xd002,r20
2041
#else
2042
        movi    0xffffffffffffd002,r20
2043
        shori   0xd101,r20
2044
        shori   0x402b,r20
2045
        shori   9,r20
2046
#endif
2047
        st.q    r0,0,r20
2048
        st.l    r0,12,r3
2049
        ENDFUNC(GLOBAL(init_trampoline))
2050
        .global GLOBAL(ic_invalidate)
2051
        HIDDEN_FUNC(GLOBAL(ic_invalidate))
2052
GLOBAL(ic_invalidate):
2053
        ocbwb   r0,0
2054
        synco
2055
        icbi    r0, 0
2056
        ptabs   r18, tr0
2057
        synci
2058
        blink   tr0, r63
2059
        ENDFUNC(GLOBAL(ic_invalidate))
2060
#elif defined(__SH4A__)
2061
        .global GLOBAL(ic_invalidate)
2062
        HIDDEN_FUNC(GLOBAL(ic_invalidate))
2063
GLOBAL(ic_invalidate):
2064
        ocbwb   @r4
2065
        synco
2066
        rts
2067
        icbi    @r4
2068
        ENDFUNC(GLOBAL(ic_invalidate))
2069
#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2070
        /* For system code, we use ic_invalidate_line_i, but user code
2071
           needs a different mechanism.  A kernel call is generally not
2072
           available, and it would also be slow.  Different SH4 variants use
2073
           different sizes and associativities of the Icache.  We use a small
2074
           bit of dispatch code that can be put hidden in every shared object,
2075
           which calls the actual processor-specific invalidation code in a
2076
           separate module.
2077
           Or if you have operating system support, the OS could mmap the
2078
           procesor-specific code from a single page, since it is highly
2079
           repetitive.  */
2080
        .global GLOBAL(ic_invalidate)
2081
        HIDDEN_FUNC(GLOBAL(ic_invalidate))
2082
GLOBAL(ic_invalidate):
2083
        mov.l   0f,r1
2084
#ifdef __pic__
2085
        mova    0f,r0
2086
        mov.l   1f,r2
2087
        add     r1,r0
2088
        mov.l   @(r0,r2),r1
2089
#endif
2090
        ocbwb   @r4
2091
        mov.l   @(8,r1),r0
2092
        sub     r1,r4
2093
        and     r4,r0
2094
        add     r1,r0
2095
        jmp     @r0
2096
        mov.l   @(4,r1),r0
2097
#ifndef __pic__
2098
0:      .long   GLOBAL(ic_invalidate_array)
2099
#else /* __pic__ */
2100
        .global GLOBAL(ic_invalidate_array)
2101
        /* ??? Why won't the assembler allow to add these two constants?  */
2102
0:      .long   _GLOBAL_OFFSET_TABLE_
2103
1:      .long   GLOBAL(ic_invalidate_array)@GOT
2104
        ENDFUNC(GLOBAL(ic_invalidate))
2105
#endif /* __pic__ */
2106
#endif /* SH4 */
2107
#endif /* L_ic_invalidate */
2108
 
2109
#ifdef L_ic_invalidate_array
2110
#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
2111
        .global GLOBAL(ic_invalidate_array)
2112
        /* This is needed when an SH4 dso with trampolines is used on SH4A.  */
2113
        .global GLOBAL(ic_invalidate_array)
2114
        FUNC(GLOBAL(ic_invalidate_array))
2115
GLOBAL(ic_invalidate_array):
2116
        add     r1,r4
2117
        synco
2118
        rts
2119
        icbi    @r4
2120
        .long   0
2121
        ENDFUNC(GLOBAL(ic_invalidate_array))
2122
#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2123
        .global GLOBAL(ic_invalidate_array)
2124
        .p2align 5
2125
        FUNC(GLOBAL(ic_invalidate_array))
2126
/* This must be aligned to the beginning of a cache line.  */
2127
GLOBAL(ic_invalidate_array):
2128
#ifndef WAYS
2129
#define WAYS 4
2130
#define WAY_SIZE 0x4000
2131
#endif
2132
#if WAYS == 1
2133
        .rept   WAY_SIZE * WAYS / 32
2134
        rts
2135
        nop
2136
        .rept   7
2137
        .long   WAY_SIZE - 32
2138
        .endr
2139
        .endr
2140
#elif WAYS <= 6
2141
        .rept   WAY_SIZE * WAYS / 32
2142
        braf    r0
2143
        add     #-8,r0
2144
        .long   WAY_SIZE + 8
2145
        .long   WAY_SIZE - 32
2146
        .rept   WAYS-2
2147
        braf    r0
2148
        nop
2149
        .endr
2150
        .rept   7 - WAYS
2151
        rts
2152
        nop
2153
        .endr
2154
        .endr
2155
#else /* WAYS > 6 */
2156
        /* This variant needs two different pages for mmap-ing.  */
2157
        .rept   WAYS-1
2158
        .rept   WAY_SIZE / 32
2159
        braf    r0
2160
        nop
2161
        .long   WAY_SIZE
2162
        .rept 6
2163
        .long   WAY_SIZE - 32
2164
        .endr
2165
        .endr
2166
        .endr
2167
        .rept   WAY_SIZE / 32
2168
        rts
2169
        .rept   15
2170
        nop
2171
        .endr
2172
        .endr
2173
#endif /* WAYS */
2174
        ENDFUNC(GLOBAL(ic_invalidate_array))
2175
#endif /* SH4 */
2176
#endif /* L_ic_invalidate_array */
2177
 
2178
#if defined (__SH5__) && __SH5__ == 32
2179
#ifdef L_shcompact_call_trampoline
2180
        .section        .rodata
2181
        .align  1
2182
LOCAL(ct_main_table):
2183
.word   LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2184
.word   LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2185
.word   LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2186
.word   LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2187
.word   LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2188
.word   LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2189
.word   LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2190
.word   LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2191
.word   LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2192
.word   LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2193
.word   LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2194
.word   LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2195
.word   LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2196
.word   LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2197
.word   LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2198
.word   LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2199
.word   LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2200
.word   LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2201
.word   LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2202
.word   LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2203
.word   LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2204
.word   LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2205
.word   LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2206
.word   LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2207
.word   LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2208
.word   LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2209
.word   LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2210
.word   LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2211
.word   LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2212
.word   LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2213
.word   LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2214
.word   LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2215
.word   LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2216
        .mode   SHmedia
2217
        .section        .text..SHmedia32, "ax"
2218
        .align  2
2219
 
2220
     /* This function loads 64-bit general-purpose registers from the
2221
        stack, from a memory address contained in them or from an FP
2222
        register, according to a cookie passed in r1.  Its execution
2223
        time is linear on the number of registers that actually have
2224
        to be copied.  See sh.h for details on the actual bit pattern.
2225
 
2226
        The function to be called is passed in r0.  If a 32-bit return
2227
        value is expected, the actual function will be tail-called,
2228
        otherwise the return address will be stored in r10 (that the
2229
        caller should expect to be clobbered) and the return value
2230
        will be expanded into r2/r3 upon return.  */
2231
 
2232
        .global GLOBAL(GCC_shcompact_call_trampoline)
2233
        FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2234
GLOBAL(GCC_shcompact_call_trampoline):
2235
        ptabs/l r0, tr0 /* Prepare to call the actual function.  */
2236
        movi    ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2237
        pt/l    LOCAL(ct_loop), tr1
2238
        addz.l  r1, r63, r1
2239
        shori   ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2240
LOCAL(ct_loop):
2241
        nsb     r1, r28
2242
        shlli   r28, 1, r29
2243
        ldx.w   r0, r29, r30
2244
LOCAL(ct_main_label):
2245
        ptrel/l r30, tr2
2246
        blink   tr2, r63
2247
LOCAL(ct_r2_fp):        /* Copy r2 from an FP register.  */
2248
        /* It must be dr0, so just do it.  */
2249
        fmov.dq dr0, r2
2250
        movi    7, r30
2251
        shlli   r30, 29, r31
2252
        andc    r1, r31, r1
2253
        blink   tr1, r63
2254
LOCAL(ct_r3_fp):        /* Copy r3 from an FP register.  */
2255
        /* It is either dr0 or dr2.  */
2256
        movi    7, r30
2257
        shlri   r1, 26, r32
2258
        shlli   r30, 26, r31
2259
        andc    r1, r31, r1
2260
        fmov.dq dr0, r3
2261
        beqi/l  r32, 4, tr1
2262
        fmov.dq dr2, r3
2263
        blink   tr1, r63
2264
LOCAL(ct_r4_fp):        /* Copy r4 from an FP register.  */
2265
        shlri   r1, 23 - 3, r34
2266
        andi    r34, 3 << 3, r33
2267
        addi    r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2268
LOCAL(ct_r4_fp_base):
2269
        ptrel/l r32, tr2
2270
        movi    7, r30
2271
        shlli   r30, 23, r31
2272
        andc    r1, r31, r1
2273
        blink   tr2, r63
2274
LOCAL(ct_r4_fp_copy):
2275
        fmov.dq dr0, r4
2276
        blink   tr1, r63
2277
        fmov.dq dr2, r4
2278
        blink   tr1, r63
2279
        fmov.dq dr4, r4
2280
        blink   tr1, r63
2281
LOCAL(ct_r5_fp):        /* Copy r5 from an FP register.  */
2282
        shlri   r1, 20 - 3, r34
2283
        andi    r34, 3 << 3, r33
2284
        addi    r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2285
LOCAL(ct_r5_fp_base):
2286
        ptrel/l r32, tr2
2287
        movi    7, r30
2288
        shlli   r30, 20, r31
2289
        andc    r1, r31, r1
2290
        blink   tr2, r63
2291
LOCAL(ct_r5_fp_copy):
2292
        fmov.dq dr0, r5
2293
        blink   tr1, r63
2294
        fmov.dq dr2, r5
2295
        blink   tr1, r63
2296
        fmov.dq dr4, r5
2297
        blink   tr1, r63
2298
        fmov.dq dr6, r5
2299
        blink   tr1, r63
2300
LOCAL(ct_r6_fph):       /* Copy r6 from a high FP register.  */
2301
        /* It must be dr8.  */
2302
        fmov.dq dr8, r6
2303
        movi    15, r30
2304
        shlli   r30, 16, r31
2305
        andc    r1, r31, r1
2306
        blink   tr1, r63
2307
LOCAL(ct_r6_fpl):       /* Copy r6 from a low FP register.  */
2308
        shlri   r1, 16 - 3, r34
2309
        andi    r34, 3 << 3, r33
2310
        addi    r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2311
LOCAL(ct_r6_fp_base):
2312
        ptrel/l r32, tr2
2313
        movi    7, r30
2314
        shlli   r30, 16, r31
2315
        andc    r1, r31, r1
2316
        blink   tr2, r63
2317
LOCAL(ct_r6_fp_copy):
2318
        fmov.dq dr0, r6
2319
        blink   tr1, r63
2320
        fmov.dq dr2, r6
2321
        blink   tr1, r63
2322
        fmov.dq dr4, r6
2323
        blink   tr1, r63
2324
        fmov.dq dr6, r6
2325
        blink   tr1, r63
2326
LOCAL(ct_r7_fph):       /* Copy r7 from a high FP register.  */
2327
        /* It is either dr8 or dr10.  */
2328
        movi    15 << 12, r31
2329
        shlri   r1, 12, r32
2330
        andc    r1, r31, r1
2331
        fmov.dq dr8, r7
2332
        beqi/l  r32, 8, tr1
2333
        fmov.dq dr10, r7
2334
        blink   tr1, r63
2335
LOCAL(ct_r7_fpl):       /* Copy r7 from a low FP register.  */
2336
        shlri   r1, 12 - 3, r34
2337
        andi    r34, 3 << 3, r33
2338
        addi    r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2339
LOCAL(ct_r7_fp_base):
2340
        ptrel/l r32, tr2
2341
        movi    7 << 12, r31
2342
        andc    r1, r31, r1
2343
        blink   tr2, r63
2344
LOCAL(ct_r7_fp_copy):
2345
        fmov.dq dr0, r7
2346
        blink   tr1, r63
2347
        fmov.dq dr2, r7
2348
        blink   tr1, r63
2349
        fmov.dq dr4, r7
2350
        blink   tr1, r63
2351
        fmov.dq dr6, r7
2352
        blink   tr1, r63
2353
LOCAL(ct_r8_fph):       /* Copy r8 from a high FP register.  */
2354
        /* It is either dr8 or dr10.  */
2355
        movi    15 << 8, r31
2356
        andi    r1, 1 << 8, r32
2357
        andc    r1, r31, r1
2358
        fmov.dq dr8, r8
2359
        beq/l   r32, r63, tr1
2360
        fmov.dq dr10, r8
2361
        blink   tr1, r63
2362
LOCAL(ct_r8_fpl):       /* Copy r8 from a low FP register.  */
2363
        shlri   r1, 8 - 3, r34
2364
        andi    r34, 3 << 3, r33
2365
        addi    r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2366
LOCAL(ct_r8_fp_base):
2367
        ptrel/l r32, tr2
2368
        movi    7 << 8, r31
2369
        andc    r1, r31, r1
2370
        blink   tr2, r63
2371
LOCAL(ct_r8_fp_copy):
2372
        fmov.dq dr0, r8
2373
        blink   tr1, r63
2374
        fmov.dq dr2, r8
2375
        blink   tr1, r63
2376
        fmov.dq dr4, r8
2377
        blink   tr1, r63
2378
        fmov.dq dr6, r8
2379
        blink   tr1, r63
2380
LOCAL(ct_r9_fph):       /* Copy r9 from a high FP register.  */
2381
        /* It is either dr8 or dr10.  */
2382
        movi    15 << 4, r31
2383
        andi    r1, 1 << 4, r32
2384
        andc    r1, r31, r1
2385
        fmov.dq dr8, r9
2386
        beq/l   r32, r63, tr1
2387
        fmov.dq dr10, r9
2388
        blink   tr1, r63
2389
LOCAL(ct_r9_fpl):       /* Copy r9 from a low FP register.  */
2390
        shlri   r1, 4 - 3, r34
2391
        andi    r34, 3 << 3, r33
2392
        addi    r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2393
LOCAL(ct_r9_fp_base):
2394
        ptrel/l r32, tr2
2395
        movi    7 << 4, r31
2396
        andc    r1, r31, r1
2397
        blink   tr2, r63
2398
LOCAL(ct_r9_fp_copy):
2399
        fmov.dq dr0, r9
2400
        blink   tr1, r63
2401
        fmov.dq dr2, r9
2402
        blink   tr1, r63
2403
        fmov.dq dr4, r9
2404
        blink   tr1, r63
2405
        fmov.dq dr6, r9
2406
        blink   tr1, r63
2407
LOCAL(ct_r2_ld):        /* Copy r2 from a memory address.  */
2408
        pt/l    LOCAL(ct_r2_load), tr2
2409
        movi    3, r30
2410
        shlli   r30, 29, r31
2411
        and     r1, r31, r32
2412
        andc    r1, r31, r1
2413
        beq/l   r31, r32, tr2
2414
        addi.l  r2, 8, r3
2415
        ldx.q   r2, r63, r2
2416
        /* Fall through.  */
2417
LOCAL(ct_r3_ld):        /* Copy r3 from a memory address.  */
2418
        pt/l    LOCAL(ct_r3_load), tr2
2419
        movi    3, r30
2420
        shlli   r30, 26, r31
2421
        and     r1, r31, r32
2422
        andc    r1, r31, r1
2423
        beq/l   r31, r32, tr2
2424
        addi.l  r3, 8, r4
2425
        ldx.q   r3, r63, r3
2426
LOCAL(ct_r4_ld):        /* Copy r4 from a memory address.  */
2427
        pt/l    LOCAL(ct_r4_load), tr2
2428
        movi    3, r30
2429
        shlli   r30, 23, r31
2430
        and     r1, r31, r32
2431
        andc    r1, r31, r1
2432
        beq/l   r31, r32, tr2
2433
        addi.l  r4, 8, r5
2434
        ldx.q   r4, r63, r4
2435
LOCAL(ct_r5_ld):        /* Copy r5 from a memory address.  */
2436
        pt/l    LOCAL(ct_r5_load), tr2
2437
        movi    3, r30
2438
        shlli   r30, 20, r31
2439
        and     r1, r31, r32
2440
        andc    r1, r31, r1
2441
        beq/l   r31, r32, tr2
2442
        addi.l  r5, 8, r6
2443
        ldx.q   r5, r63, r5
2444
LOCAL(ct_r6_ld):        /* Copy r6 from a memory address.  */
2445
        pt/l    LOCAL(ct_r6_load), tr2
2446
        movi    3 << 16, r31
2447
        and     r1, r31, r32
2448
        andc    r1, r31, r1
2449
        beq/l   r31, r32, tr2
2450
        addi.l  r6, 8, r7
2451
        ldx.q   r6, r63, r6
2452
LOCAL(ct_r7_ld):        /* Copy r7 from a memory address.  */
2453
        pt/l    LOCAL(ct_r7_load), tr2
2454
        movi    3 << 12, r31
2455
        and     r1, r31, r32
2456
        andc    r1, r31, r1
2457
        beq/l   r31, r32, tr2
2458
        addi.l  r7, 8, r8
2459
        ldx.q   r7, r63, r7
2460
LOCAL(ct_r8_ld):        /* Copy r8 from a memory address.  */
2461
        pt/l    LOCAL(ct_r8_load), tr2
2462
        movi    3 << 8, r31
2463
        and     r1, r31, r32
2464
        andc    r1, r31, r1
2465
        beq/l   r31, r32, tr2
2466
        addi.l  r8, 8, r9
2467
        ldx.q   r8, r63, r8
2468
LOCAL(ct_r9_ld):        /* Copy r9 from a memory address.  */
2469
        pt/l    LOCAL(ct_check_tramp), tr2
2470
        ldx.q   r9, r63, r9
2471
        blink   tr2, r63
2472
LOCAL(ct_r2_load):
2473
        ldx.q   r2, r63, r2
2474
        blink   tr1, r63
2475
LOCAL(ct_r3_load):
2476
        ldx.q   r3, r63, r3
2477
        blink   tr1, r63
2478
LOCAL(ct_r4_load):
2479
        ldx.q   r4, r63, r4
2480
        blink   tr1, r63
2481
LOCAL(ct_r5_load):
2482
        ldx.q   r5, r63, r5
2483
        blink   tr1, r63
2484
LOCAL(ct_r6_load):
2485
        ldx.q   r6, r63, r6
2486
        blink   tr1, r63
2487
LOCAL(ct_r7_load):
2488
        ldx.q   r7, r63, r7
2489
        blink   tr1, r63
2490
LOCAL(ct_r8_load):
2491
        ldx.q   r8, r63, r8
2492
        blink   tr1, r63
2493
LOCAL(ct_r2_pop):       /* Pop r2 from the stack.  */
2494
        movi    1, r30
2495
        ldx.q   r15, r63, r2
2496
        shlli   r30, 29, r31
2497
        addi.l  r15, 8, r15
2498
        andc    r1, r31, r1
2499
        blink   tr1, r63
2500
LOCAL(ct_r3_pop):       /* Pop r3 from the stack.  */
2501
        movi    1, r30
2502
        ldx.q   r15, r63, r3
2503
        shlli   r30, 26, r31
2504
        addi.l  r15, 8, r15
2505
        andc    r1, r31, r1
2506
        blink   tr1, r63
2507
LOCAL(ct_r4_pop):       /* Pop r4 from the stack.  */
2508
        movi    1, r30
2509
        ldx.q   r15, r63, r4
2510
        shlli   r30, 23, r31
2511
        addi.l  r15, 8, r15
2512
        andc    r1, r31, r1
2513
        blink   tr1, r63
2514
LOCAL(ct_r5_pop):       /* Pop r5 from the stack.  */
2515
        movi    1, r30
2516
        ldx.q   r15, r63, r5
2517
        shlli   r30, 20, r31
2518
        addi.l  r15, 8, r15
2519
        andc    r1, r31, r1
2520
        blink   tr1, r63
2521
LOCAL(ct_r6_pop):       /* Pop r6 from the stack.  */
2522
        movi    1, r30
2523
        ldx.q   r15, r63, r6
2524
        shlli   r30, 16, r31
2525
        addi.l  r15, 8, r15
2526
        andc    r1, r31, r1
2527
        blink   tr1, r63
2528
LOCAL(ct_r7_pop):       /* Pop r7 from the stack.  */
2529
        ldx.q   r15, r63, r7
2530
        movi    1 << 12, r31
2531
        addi.l  r15, 8, r15
2532
        andc    r1, r31, r1
2533
        blink   tr1, r63
2534
LOCAL(ct_r8_pop):       /* Pop r8 from the stack.  */
2535
        ldx.q   r15, r63, r8
2536
        movi    1 << 8, r31
2537
        addi.l  r15, 8, r15
2538
        andc    r1, r31, r1
2539
        blink   tr1, r63
2540
LOCAL(ct_pop_seq):      /* Pop a sequence of registers off the stack.  */
2541
        andi    r1, 7 << 1, r30
2542
        movi    (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2543
        shlli   r30, 2, r31
2544
        shori   LOCAL(ct_end_of_pop_seq) & 65535, r32
2545
        sub.l   r32, r31, r33
2546
        ptabs/l r33, tr2
2547
        blink   tr2, r63
2548
LOCAL(ct_start_of_pop_seq):     /* Beginning of pop sequence.  */
2549
        ldx.q   r15, r63, r3
2550
        addi.l  r15, 8, r15
2551
        ldx.q   r15, r63, r4
2552
        addi.l  r15, 8, r15
2553
        ldx.q   r15, r63, r5
2554
        addi.l  r15, 8, r15
2555
        ldx.q   r15, r63, r6
2556
        addi.l  r15, 8, r15
2557
        ldx.q   r15, r63, r7
2558
        addi.l  r15, 8, r15
2559
        ldx.q   r15, r63, r8
2560
        addi.l  r15, 8, r15
2561
LOCAL(ct_r9_pop):       /* Pop r9 from the stack.  */
2562
        ldx.q   r15, r63, r9
2563
        addi.l  r15, 8, r15
2564
LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction.  */
2565
LOCAL(ct_check_tramp):  /* Check whether we need a trampoline.  */
2566
        pt/u    LOCAL(ct_ret_wide), tr2
2567
        andi    r1, 1, r1
2568
        bne/u   r1, r63, tr2
2569
LOCAL(ct_call_func):    /* Just branch to the function.  */
2570
        blink   tr0, r63
2571
LOCAL(ct_ret_wide):     /* Call the function, so that we can unpack its
2572
                           64-bit return value.  */
2573
        add.l   r18, r63, r10
2574
        blink   tr0, r18
2575
        ptabs   r10, tr0
2576
#if __LITTLE_ENDIAN__
2577
        shari   r2, 32, r3
2578
        add.l   r2, r63, r2
2579
#else
2580
        add.l   r2, r63, r3
2581
        shari   r2, 32, r2
2582
#endif
2583
        blink   tr0, r63
2584
 
2585
        ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2586
#endif /* L_shcompact_call_trampoline */
2587
 
2588
#ifdef L_shcompact_return_trampoline
2589
     /* This function does the converse of the code in `ret_wide'
2590
        above.  It is tail-called by SHcompact functions returning
2591
        64-bit non-floating-point values, to pack the 32-bit values in
2592
        r2 and r3 into r2.  */
2593
 
2594
        .mode   SHmedia
2595
        .section        .text..SHmedia32, "ax"
2596
        .align  2
2597
        .global GLOBAL(GCC_shcompact_return_trampoline)
2598
        HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2599
GLOBAL(GCC_shcompact_return_trampoline):
2600
        ptabs/l r18, tr0
2601
#if __LITTLE_ENDIAN__
2602
        addz.l  r2, r63, r2
2603
        shlli   r3, 32, r3
2604
#else
2605
        addz.l  r3, r63, r3
2606
        shlli   r2, 32, r2
2607
#endif
2608
        or      r3, r2, r2
2609
        blink   tr0, r63
2610
 
2611
        ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2612
#endif /* L_shcompact_return_trampoline */
2613
 
2614
#ifdef L_shcompact_incoming_args
2615
        .section        .rodata
2616
        .align  1
2617
LOCAL(ia_main_table):
2618
.word   1 /* Invalid, just loop */
2619
.word   LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2620
.word   LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2621
.word   1 /* Invalid, just loop */
2622
.word   LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2623
.word   LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2624
.word   1 /* Invalid, just loop */
2625
.word   LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2626
.word   LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2627
.word   1 /* Invalid, just loop */
2628
.word   LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2629
.word   LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2630
.word   1 /* Invalid, just loop */
2631
.word   1 /* Invalid, just loop */
2632
.word   LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2633
.word   LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2634
.word   1 /* Invalid, just loop */
2635
.word   1 /* Invalid, just loop */
2636
.word   LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2637
.word   LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2638
.word   1 /* Invalid, just loop */
2639
.word   1 /* Invalid, just loop */
2640
.word   LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2641
.word   LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2642
.word   1 /* Invalid, just loop */
2643
.word   1 /* Invalid, just loop */
2644
.word   LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2645
.word   LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2646
.word   LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2647
.word   LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2648
.word   LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2649
.word   LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2650
.word   LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2651
        .mode   SHmedia
2652
        .section        .text..SHmedia32, "ax"
2653
        .align  2
2654
 
2655
     /* This function stores 64-bit general-purpose registers back in
2656
        the stack, and loads the address in which each register
2657
        was stored into itself.  The lower 32 bits of r17 hold the address
2658
        to begin storing, and the upper 32 bits of r17 hold the cookie.
2659
        Its execution time is linear on the
2660
        number of registers that actually have to be copied, and it is
2661
        optimized for structures larger than 64 bits, as opposed to
2662
        individual `long long' arguments.  See sh.h for details on the
2663
        actual bit pattern.  */
2664
 
2665
        .global GLOBAL(GCC_shcompact_incoming_args)
2666
        FUNC(GLOBAL(GCC_shcompact_incoming_args))
2667
GLOBAL(GCC_shcompact_incoming_args):
2668
        ptabs/l r18, tr0        /* Prepare to return.  */
2669
        shlri   r17, 32, r0     /* Load the cookie.  */
2670
        movi    ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2671
        pt/l    LOCAL(ia_loop), tr1
2672
        add.l   r17, r63, r17
2673
        shori   ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2674
LOCAL(ia_loop):
2675
        nsb     r0, r36
2676
        shlli   r36, 1, r37
2677
        ldx.w   r43, r37, r38
2678
LOCAL(ia_main_label):
2679
        ptrel/l r38, tr2
2680
        blink   tr2, r63
2681
LOCAL(ia_r2_ld):        /* Store r2 and load its address.  */
2682
        movi    3, r38
2683
        shlli   r38, 29, r39
2684
        and     r0, r39, r40
2685
        andc    r0, r39, r0
2686
        stx.q   r17, r63, r2
2687
        add.l   r17, r63, r2
2688
        addi.l  r17, 8, r17
2689
        beq/u   r39, r40, tr1
2690
LOCAL(ia_r3_ld):        /* Store r3 and load its address.  */
2691
        movi    3, r38
2692
        shlli   r38, 26, r39
2693
        and     r0, r39, r40
2694
        andc    r0, r39, r0
2695
        stx.q   r17, r63, r3
2696
        add.l   r17, r63, r3
2697
        addi.l  r17, 8, r17
2698
        beq/u   r39, r40, tr1
2699
LOCAL(ia_r4_ld):        /* Store r4 and load its address.  */
2700
        movi    3, r38
2701
        shlli   r38, 23, r39
2702
        and     r0, r39, r40
2703
        andc    r0, r39, r0
2704
        stx.q   r17, r63, r4
2705
        add.l   r17, r63, r4
2706
        addi.l  r17, 8, r17
2707
        beq/u   r39, r40, tr1
2708
LOCAL(ia_r5_ld):        /* Store r5 and load its address.  */
2709
        movi    3, r38
2710
        shlli   r38, 20, r39
2711
        and     r0, r39, r40
2712
        andc    r0, r39, r0
2713
        stx.q   r17, r63, r5
2714
        add.l   r17, r63, r5
2715
        addi.l  r17, 8, r17
2716
        beq/u   r39, r40, tr1
2717
LOCAL(ia_r6_ld):        /* Store r6 and load its address.  */
2718
        movi    3, r38
2719
        shlli   r38, 16, r39
2720
        and     r0, r39, r40
2721
        andc    r0, r39, r0
2722
        stx.q   r17, r63, r6
2723
        add.l   r17, r63, r6
2724
        addi.l  r17, 8, r17
2725
        beq/u   r39, r40, tr1
2726
LOCAL(ia_r7_ld):        /* Store r7 and load its address.  */
2727
        movi    3 << 12, r39
2728
        and     r0, r39, r40
2729
        andc    r0, r39, r0
2730
        stx.q   r17, r63, r7
2731
        add.l   r17, r63, r7
2732
        addi.l  r17, 8, r17
2733
        beq/u   r39, r40, tr1
2734
LOCAL(ia_r8_ld):        /* Store r8 and load its address.  */
2735
        movi    3 << 8, r39
2736
        and     r0, r39, r40
2737
        andc    r0, r39, r0
2738
        stx.q   r17, r63, r8
2739
        add.l   r17, r63, r8
2740
        addi.l  r17, 8, r17
2741
        beq/u   r39, r40, tr1
2742
LOCAL(ia_r9_ld):        /* Store r9 and load its address.  */
2743
        stx.q   r17, r63, r9
2744
        add.l   r17, r63, r9
2745
        blink   tr0, r63
2746
LOCAL(ia_r2_push):      /* Push r2 onto the stack.  */
2747
        movi    1, r38
2748
        shlli   r38, 29, r39
2749
        andc    r0, r39, r0
2750
        stx.q   r17, r63, r2
2751
        addi.l  r17, 8, r17
2752
        blink   tr1, r63
2753
LOCAL(ia_r3_push):      /* Push r3 onto the stack.  */
2754
        movi    1, r38
2755
        shlli   r38, 26, r39
2756
        andc    r0, r39, r0
2757
        stx.q   r17, r63, r3
2758
        addi.l  r17, 8, r17
2759
        blink   tr1, r63
2760
LOCAL(ia_r4_push):      /* Push r4 onto the stack.  */
2761
        movi    1, r38
2762
        shlli   r38, 23, r39
2763
        andc    r0, r39, r0
2764
        stx.q   r17, r63, r4
2765
        addi.l  r17, 8, r17
2766
        blink   tr1, r63
2767
LOCAL(ia_r5_push):      /* Push r5 onto the stack.  */
2768
        movi    1, r38
2769
        shlli   r38, 20, r39
2770
        andc    r0, r39, r0
2771
        stx.q   r17, r63, r5
2772
        addi.l  r17, 8, r17
2773
        blink   tr1, r63
2774
LOCAL(ia_r6_push):      /* Push r6 onto the stack.  */
2775
        movi    1, r38
2776
        shlli   r38, 16, r39
2777
        andc    r0, r39, r0
2778
        stx.q   r17, r63, r6
2779
        addi.l  r17, 8, r17
2780
        blink   tr1, r63
2781
LOCAL(ia_r7_push):      /* Push r7 onto the stack.  */
2782
        movi    1 << 12, r39
2783
        andc    r0, r39, r0
2784
        stx.q   r17, r63, r7
2785
        addi.l  r17, 8, r17
2786
        blink   tr1, r63
2787
LOCAL(ia_r8_push):      /* Push r8 onto the stack.  */
2788
        movi    1 << 8, r39
2789
        andc    r0, r39, r0
2790
        stx.q   r17, r63, r8
2791
        addi.l  r17, 8, r17
2792
        blink   tr1, r63
2793
LOCAL(ia_push_seq):     /* Push a sequence of registers onto the stack.  */
2794
        andi    r0, 7 << 1, r38
2795
        movi    (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2796
        shlli   r38, 2, r39
2797
        shori   LOCAL(ia_end_of_push_seq) & 65535, r40
2798
        sub.l   r40, r39, r41
2799
        ptabs/l r41, tr2
2800
        blink   tr2, r63
2801
LOCAL(ia_stack_of_push_seq):     /* Beginning of push sequence.  */
2802
        stx.q   r17, r63, r3
2803
        addi.l  r17, 8, r17
2804
        stx.q   r17, r63, r4
2805
        addi.l  r17, 8, r17
2806
        stx.q   r17, r63, r5
2807
        addi.l  r17, 8, r17
2808
        stx.q   r17, r63, r6
2809
        addi.l  r17, 8, r17
2810
        stx.q   r17, r63, r7
2811
        addi.l  r17, 8, r17
2812
        stx.q   r17, r63, r8
2813
        addi.l  r17, 8, r17
2814
LOCAL(ia_r9_push):      /* Push r9 onto the stack.  */
2815
        stx.q   r17, r63, r9
2816
LOCAL(ia_return):       /* Return.  */
2817
        blink   tr0, r63
2818
LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction.  */
2819
        ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2820
#endif /* L_shcompact_incoming_args */
2821
#endif
2822
#if __SH5__
2823
#ifdef L_nested_trampoline
2824
#if __SH5__ == 32
2825
        .section        .text..SHmedia32,"ax"
2826
#else
2827
        .text
2828
#endif
2829
        .align  3 /* It is copied in units of 8 bytes in SHmedia mode.  */
2830
        .global GLOBAL(GCC_nested_trampoline)
2831
        HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
2832
GLOBAL(GCC_nested_trampoline):
2833
        .mode   SHmedia
2834
        ptrel/u r63, tr0
2835
        gettr   tr0, r0
2836
#if __SH5__ == 64
2837
        ld.q    r0, 24, r1
2838
#else
2839
        ld.l    r0, 24, r1
2840
#endif
2841
        ptabs/l r1, tr1
2842
#if __SH5__ == 64
2843
        ld.q    r0, 32, r1
2844
#else
2845
        ld.l    r0, 28, r1
2846
#endif
2847
        blink   tr1, r63
2848
 
2849
        ENDFUNC(GLOBAL(GCC_nested_trampoline))
2850
#endif /* L_nested_trampoline */
2851
#endif /* __SH5__ */
2852
#if __SH5__ == 32
2853
#ifdef L_push_pop_shmedia_regs
2854
        .section        .text..SHmedia32,"ax"
2855
        .mode   SHmedia
2856
        .align  2
2857
#ifndef __SH4_NOFPU__
2858
        .global GLOBAL(GCC_push_shmedia_regs)
2859
        FUNC(GLOBAL(GCC_push_shmedia_regs))
2860
GLOBAL(GCC_push_shmedia_regs):
2861
        addi.l  r15, -14*8, r15
2862
        fst.d   r15, 13*8, dr62
2863
        fst.d   r15, 12*8, dr60
2864
        fst.d   r15, 11*8, dr58
2865
        fst.d   r15, 10*8, dr56
2866
        fst.d   r15,  9*8, dr54
2867
        fst.d   r15,  8*8, dr52
2868
        fst.d   r15,  7*8, dr50
2869
        fst.d   r15,  6*8, dr48
2870
        fst.d   r15,  5*8, dr46
2871
        fst.d   r15,  4*8, dr44
2872
        fst.d   r15,  3*8, dr42
2873
        fst.d   r15,  2*8, dr40
2874
        fst.d   r15,  1*8, dr38
2875
        fst.d   r15,  0*8, dr36
2876
#else /* ! __SH4_NOFPU__ */
2877
        .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2878
        FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2879
GLOBAL(GCC_push_shmedia_regs_nofpu):
2880
#endif /* ! __SH4_NOFPU__ */
2881
        ptabs/l r18, tr0
2882
        addi.l  r15, -27*8, r15
2883
        gettr   tr7, r62
2884
        gettr   tr6, r61
2885
        gettr   tr5, r60
2886
        st.q    r15, 26*8, r62
2887
        st.q    r15, 25*8, r61
2888
        st.q    r15, 24*8, r60
2889
        st.q    r15, 23*8, r59
2890
        st.q    r15, 22*8, r58
2891
        st.q    r15, 21*8, r57
2892
        st.q    r15, 20*8, r56
2893
        st.q    r15, 19*8, r55
2894
        st.q    r15, 18*8, r54
2895
        st.q    r15, 17*8, r53
2896
        st.q    r15, 16*8, r52
2897
        st.q    r15, 15*8, r51
2898
        st.q    r15, 14*8, r50
2899
        st.q    r15, 13*8, r49
2900
        st.q    r15, 12*8, r48
2901
        st.q    r15, 11*8, r47
2902
        st.q    r15, 10*8, r46
2903
        st.q    r15,  9*8, r45
2904
        st.q    r15,  8*8, r44
2905
        st.q    r15,  7*8, r35
2906
        st.q    r15,  6*8, r34
2907
        st.q    r15,  5*8, r33
2908
        st.q    r15,  4*8, r32
2909
        st.q    r15,  3*8, r31
2910
        st.q    r15,  2*8, r30
2911
        st.q    r15,  1*8, r29
2912
        st.q    r15,  0*8, r28
2913
        blink   tr0, r63
2914
#ifndef __SH4_NOFPU__
2915
        ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2916
#else
2917
        ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2918
#endif
2919
#ifndef __SH4_NOFPU__
2920
        .global GLOBAL(GCC_pop_shmedia_regs)
2921
        FUNC(GLOBAL(GCC_pop_shmedia_regs))
2922
GLOBAL(GCC_pop_shmedia_regs):
2923
        pt      .L0, tr1
2924
        movi    41*8, r0
2925
        fld.d   r15, 40*8, dr62
2926
        fld.d   r15, 39*8, dr60
2927
        fld.d   r15, 38*8, dr58
2928
        fld.d   r15, 37*8, dr56
2929
        fld.d   r15, 36*8, dr54
2930
        fld.d   r15, 35*8, dr52
2931
        fld.d   r15, 34*8, dr50
2932
        fld.d   r15, 33*8, dr48
2933
        fld.d   r15, 32*8, dr46
2934
        fld.d   r15, 31*8, dr44
2935
        fld.d   r15, 30*8, dr42
2936
        fld.d   r15, 29*8, dr40
2937
        fld.d   r15, 28*8, dr38
2938
        fld.d   r15, 27*8, dr36
2939
        blink   tr1, r63
2940
#else /* ! __SH4_NOFPU__        */
2941
        .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2942
        FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2943
GLOBAL(GCC_pop_shmedia_regs_nofpu):
2944
#endif /* ! __SH4_NOFPU__       */
2945
        movi    27*8, r0
2946
.L0:
2947
        ptabs   r18, tr0
2948
        ld.q    r15, 26*8, r62
2949
        ld.q    r15, 25*8, r61
2950
        ld.q    r15, 24*8, r60
2951
        ptabs   r62, tr7
2952
        ptabs   r61, tr6
2953
        ptabs   r60, tr5
2954
        ld.q    r15, 23*8, r59
2955
        ld.q    r15, 22*8, r58
2956
        ld.q    r15, 21*8, r57
2957
        ld.q    r15, 20*8, r56
2958
        ld.q    r15, 19*8, r55
2959
        ld.q    r15, 18*8, r54
2960
        ld.q    r15, 17*8, r53
2961
        ld.q    r15, 16*8, r52
2962
        ld.q    r15, 15*8, r51
2963
        ld.q    r15, 14*8, r50
2964
        ld.q    r15, 13*8, r49
2965
        ld.q    r15, 12*8, r48
2966
        ld.q    r15, 11*8, r47
2967
        ld.q    r15, 10*8, r46
2968
        ld.q    r15,  9*8, r45
2969
        ld.q    r15,  8*8, r44
2970
        ld.q    r15,  7*8, r35
2971
        ld.q    r15,  6*8, r34
2972
        ld.q    r15,  5*8, r33
2973
        ld.q    r15,  4*8, r32
2974
        ld.q    r15,  3*8, r31
2975
        ld.q    r15,  2*8, r30
2976
        ld.q    r15,  1*8, r29
2977
        ld.q    r15,  0*8, r28
2978
        add.l   r15, r0, r15
2979
        blink   tr0, r63
2980
 
2981
#ifndef __SH4_NOFPU__
2982
        ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
2983
#else
2984
        ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2985
#endif
2986
#endif /* __SH5__ == 32 */
2987
#endif /* L_push_pop_shmedia_regs */
2988
 
2989
#ifdef L_div_table
2990
#if __SH5__
2991
#if defined(__pic__) && defined(__SHMEDIA__)
2992
        .global GLOBAL(sdivsi3)
2993
        FUNC(GLOBAL(sdivsi3))
2994
#if __SH5__ == 32
2995
        .section        .text..SHmedia32,"ax"
2996
#else
2997
        .text
2998
#endif
2999
#if 0
3000
/* ??? FIXME: Presumably due to a linker bug, exporting data symbols
3001
   in a text section does not work (at least for shared libraries):
3002
   the linker sets the LSB of the address as if this was SHmedia code.  */
3003
#define TEXT_DATA_BUG
3004
#endif
3005
        .align  2
3006
 // inputs: r4,r5
3007
 // clobbered: r1,r18,r19,r20,r21,r25,tr0
3008
 // result in r0
3009
 .global GLOBAL(sdivsi3)
3010
GLOBAL(sdivsi3):
3011
#ifdef TEXT_DATA_BUG
3012
 ptb datalabel Local_div_table,tr0
3013
#else
3014
 ptb GLOBAL(div_table_internal),tr0
3015
#endif
3016
 nsb r5, r1
3017
 shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
3018
 shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
3019
 /* bubble */
3020
 gettr tr0,r20
3021
 ldx.ub r20, r21, r19 // u0.8
3022
 shari r25, 32, r25   // normalize to s2.30
3023
 shlli r21, 1, r21
3024
 muls.l r25, r19, r19 // s2.38
3025
 ldx.w r20, r21, r21  // s2.14
3026
  ptabs r18, tr0
3027
 shari r19, 24, r19   // truncate to s2.14
3028
 sub r21, r19, r19    // some 11 bit inverse in s1.14
3029
 muls.l r19, r19, r21 // u0.28
3030
  sub r63, r1, r1
3031
  addi r1, 92, r1
3032
 muls.l r25, r21, r18 // s2.58
3033
 shlli r19, 45, r19   // multiply by two and convert to s2.58
3034
  /* bubble */
3035
 sub r19, r18, r18
3036
 shari r18, 28, r18   // some 22 bit inverse in s1.30
3037
 muls.l r18, r25, r0  // s2.60
3038
  muls.l r18, r4, r25 // s32.30
3039
  /* bubble */
3040
 shari r0, 16, r19   // s-16.44
3041
 muls.l r19, r18, r19 // s-16.74
3042
  shari r25, 63, r0
3043
  shari r4, 14, r18   // s19.-14
3044
 shari r19, 30, r19   // s-16.44
3045
 muls.l r19, r18, r19 // s15.30
3046
  xor r21, r0, r21    // You could also use the constant 1 << 27.
3047
  add r21, r25, r21
3048
 sub r21, r19, r21
3049
 shard r21, r1, r21
3050
 sub r21, r0, r0
3051
 blink tr0, r63
3052
        ENDFUNC(GLOBAL(sdivsi3))
3053
/* This table has been generated by divtab.c .
3054
Defects for bias -330:
3055
   Max defect: 6.081536e-07 at -1.000000e+00
3056
   Min defect: 2.849516e-08 at 1.030651e+00
3057
   Max 2nd step defect: 9.606539e-12 at -1.000000e+00
3058
   Min 2nd step defect: 0.000000e+00 at 0.000000e+00
3059
   Defect at 1: 1.238659e-07
3060
   Defect at -2: 1.061708e-07 */
3061
#else /* ! __pic__ || ! __SHMEDIA__ */
3062
        .section        .rodata
3063
#endif /* __pic__ */
3064
#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
3065
        .balign 2
3066
        .type   Local_div_table,@object
3067
        .size   Local_div_table,128
3068
/* negative division constants */
3069
        .word   -16638
3070
        .word   -17135
3071
        .word   -17737
3072
        .word   -18433
3073
        .word   -19103
3074
        .word   -19751
3075
        .word   -20583
3076
        .word   -21383
3077
        .word   -22343
3078
        .word   -23353
3079
        .word   -24407
3080
        .word   -25582
3081
        .word   -26863
3082
        .word   -28382
3083
        .word   -29965
3084
        .word   -31800
3085
/* negative division factors */
3086
        .byte   66
3087
        .byte   70
3088
        .byte   75
3089
        .byte   81
3090
        .byte   87
3091
        .byte   93
3092
        .byte   101
3093
        .byte   109
3094
        .byte   119
3095
        .byte   130
3096
        .byte   142
3097
        .byte   156
3098
        .byte   172
3099
        .byte   192
3100
        .byte   214
3101
        .byte   241
3102
        .skip 16
3103
Local_div_table:
3104
        .skip 16
3105
/* positive division factors */
3106
        .byte   241
3107
        .byte   214
3108
        .byte   192
3109
        .byte   172
3110
        .byte   156
3111
        .byte   142
3112
        .byte   130
3113
        .byte   119
3114
        .byte   109
3115
        .byte   101
3116
        .byte   93
3117
        .byte   87
3118
        .byte   81
3119
        .byte   75
3120
        .byte   70
3121
        .byte   66
3122
/* positive division constants */
3123
        .word   31801
3124
        .word   29966
3125
        .word   28383
3126
        .word   26864
3127
        .word   25583
3128
        .word   24408
3129
        .word   23354
3130
        .word   22344
3131
        .word   21384
3132
        .word   20584
3133
        .word   19752
3134
        .word   19104
3135
        .word   18434
3136
        .word   17738
3137
        .word   17136
3138
        .word   16639
3139
        .section        .rodata
3140
#endif /* TEXT_DATA_BUG */
3141
        .balign 2
3142
        .type   GLOBAL(div_table),@object
3143
        .size   GLOBAL(div_table),128
3144
/* negative division constants */
3145
        .word   -16638
3146
        .word   -17135
3147
        .word   -17737
3148
        .word   -18433
3149
        .word   -19103
3150
        .word   -19751
3151
        .word   -20583
3152
        .word   -21383
3153
        .word   -22343
3154
        .word   -23353
3155
        .word   -24407
3156
        .word   -25582
3157
        .word   -26863
3158
        .word   -28382
3159
        .word   -29965
3160
        .word   -31800
3161
/* negative division factors */
3162
        .byte   66
3163
        .byte   70
3164
        .byte   75
3165
        .byte   81
3166
        .byte   87
3167
        .byte   93
3168
        .byte   101
3169
        .byte   109
3170
        .byte   119
3171
        .byte   130
3172
        .byte   142
3173
        .byte   156
3174
        .byte   172
3175
        .byte   192
3176
        .byte   214
3177
        .byte   241
3178
        .skip 16
3179
        .global GLOBAL(div_table)
3180
GLOBAL(div_table):
3181
        HIDDEN_ALIAS(div_table_internal,div_table)
3182
        .skip 16
3183
/* positive division factors */
3184
        .byte   241
3185
        .byte   214
3186
        .byte   192
3187
        .byte   172
3188
        .byte   156
3189
        .byte   142
3190
        .byte   130
3191
        .byte   119
3192
        .byte   109
3193
        .byte   101
3194
        .byte   93
3195
        .byte   87
3196
        .byte   81
3197
        .byte   75
3198
        .byte   70
3199
        .byte   66
3200
/* positive division constants */
3201
        .word   31801
3202
        .word   29966
3203
        .word   28383
3204
        .word   26864
3205
        .word   25583
3206
        .word   24408
3207
        .word   23354
3208
        .word   22344
3209
        .word   21384
3210
        .word   20584
3211
        .word   19752
3212
        .word   19104
3213
        .word   18434
3214
        .word   17738
3215
        .word   17136
3216
        .word   16639
3217
 
3218
#elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
3219
/* This code used shld, thus is not suitable for SH1 / SH2.  */
3220
 
3221
/* Signed / unsigned division without use of FPU, optimized for SH4.
3222
   Uses a lookup table for divisors in the range -128 .. +128, and
3223
   div1 with case distinction for larger divisors in three more ranges.
3224
   The code is lumped together with the table to allow the use of mova.  */
3225
#ifdef __LITTLE_ENDIAN__
3226
#define L_LSB 0
3227
#define L_LSWMSB 1
3228
#define L_MSWLSB 2
3229
#else
3230
#define L_LSB 3
3231
#define L_LSWMSB 2
3232
#define L_MSWLSB 1
3233
#endif
3234
 
3235
        .balign 4
3236
        .global GLOBAL(udivsi3_i4i)
3237
        FUNC(GLOBAL(udivsi3_i4i))
3238
GLOBAL(udivsi3_i4i):
3239
        mov.w LOCAL(c128_w), r1
3240
        div0u
3241
        mov r4,r0
3242
        shlr8 r0
3243
        cmp/hi r1,r5
3244
        extu.w r5,r1
3245
        bf LOCAL(udiv_le128)
3246
        cmp/eq r5,r1
3247
        bf LOCAL(udiv_ge64k)
3248
        shlr r0
3249
        mov r5,r1
3250
        shll16 r5
3251
        mov.l r4,@-r15
3252
        div1 r5,r0
3253
        mov.l r1,@-r15
3254
        div1 r5,r0
3255
        div1 r5,r0
3256
        bra LOCAL(udiv_25)
3257
        div1 r5,r0
3258
 
3259
LOCAL(div_le128):
3260
        mova LOCAL(div_table_ix),r0
3261
        bra LOCAL(div_le128_2)
3262
        mov.b @(r0,r5),r1
3263
LOCAL(udiv_le128):
3264
        mov.l r4,@-r15
3265
        mova LOCAL(div_table_ix),r0
3266
        mov.b @(r0,r5),r1
3267
        mov.l r5,@-r15
3268
LOCAL(div_le128_2):
3269
        mova LOCAL(div_table_inv),r0
3270
        mov.l @(r0,r1),r1
3271
        mov r5,r0
3272
        tst #0xfe,r0
3273
        mova LOCAL(div_table_clz),r0
3274
        dmulu.l r1,r4
3275
        mov.b @(r0,r5),r1
3276
        bt/s LOCAL(div_by_1)
3277
        mov r4,r0
3278
        mov.l @r15+,r5
3279
        sts mach,r0
3280
        /* clrt */
3281
        addc r4,r0
3282
        mov.l @r15+,r4
3283
        rotcr r0
3284
        rts
3285
        shld r1,r0
3286
 
3287
LOCAL(div_by_1_neg):
3288
        neg r4,r0
3289
LOCAL(div_by_1):
3290
        mov.l @r15+,r5
3291
        rts
3292
        mov.l @r15+,r4
3293
 
3294
LOCAL(div_ge64k):
3295
        bt/s LOCAL(div_r8)
3296
        div0u
3297
        shll8 r5
3298
        bra LOCAL(div_ge64k_2)
3299
        div1 r5,r0
3300
LOCAL(udiv_ge64k):
3301
        cmp/hi r0,r5
3302
        mov r5,r1
3303
        bt LOCAL(udiv_r8)
3304
        shll8 r5
3305
        mov.l r4,@-r15
3306
        div1 r5,r0
3307
        mov.l r1,@-r15
3308
LOCAL(div_ge64k_2):
3309
        div1 r5,r0
3310
        mov.l LOCAL(zero_l),r1
3311
        .rept 4
3312
        div1 r5,r0
3313
        .endr
3314
        mov.l r1,@-r15
3315
        div1 r5,r0
3316
        mov.w LOCAL(m256_w),r1
3317
        div1 r5,r0
3318
        mov.b r0,@(L_LSWMSB,r15)
3319
        xor r4,r0
3320
        and r1,r0
3321
        bra LOCAL(div_ge64k_end)
3322
        xor r4,r0
3323
 
3324
LOCAL(div_r8):
3325
        shll16 r4
3326
        bra LOCAL(div_r8_2)
3327
        shll8 r4
3328
LOCAL(udiv_r8):
3329
        mov.l r4,@-r15
3330
        shll16 r4
3331
        clrt
3332
        shll8 r4
3333
        mov.l r5,@-r15
3334
LOCAL(div_r8_2):
3335
        rotcl r4
3336
        mov r0,r1
3337
        div1 r5,r1
3338
        mov r4,r0
3339
        rotcl r0
3340
        mov r5,r4
3341
        div1 r5,r1
3342
        .rept 5
3343
        rotcl r0; div1 r5,r1
3344
        .endr
3345
        rotcl r0
3346
        mov.l @r15+,r5
3347
        div1 r4,r1
3348
        mov.l @r15+,r4
3349
        rts
3350
        rotcl r0
3351
 
3352
        ENDFUNC(GLOBAL(udivsi3_i4i))
3353
 
3354
        .global GLOBAL(sdivsi3_i4i)
3355
        FUNC(GLOBAL(sdivsi3_i4i))
3356
        /* This is link-compatible with a GLOBAL(sdivsi3) call,
3357
           but we effectively clobber only r1.  */
3358
GLOBAL(sdivsi3_i4i):
3359
        mov.l r4,@-r15
3360
        cmp/pz r5
3361
        mov.w LOCAL(c128_w), r1
3362
        bt/s LOCAL(pos_divisor)
3363
        cmp/pz r4
3364
        mov.l r5,@-r15
3365
        neg r5,r5
3366
        bt/s LOCAL(neg_result)
3367
        cmp/hi r1,r5
3368
        neg r4,r4
3369
LOCAL(pos_result):
3370
        extu.w r5,r0
3371
        bf LOCAL(div_le128)
3372
        cmp/eq r5,r0
3373
        mov r4,r0
3374
        shlr8 r0
3375
        bf/s LOCAL(div_ge64k)
3376
        cmp/hi r0,r5
3377
        div0u
3378
        shll16 r5
3379
        div1 r5,r0
3380
        div1 r5,r0
3381
        div1 r5,r0
3382
LOCAL(udiv_25):
3383
        mov.l LOCAL(zero_l),r1
3384
        div1 r5,r0
3385
        div1 r5,r0
3386
        mov.l r1,@-r15
3387
        .rept 3
3388
        div1 r5,r0
3389
        .endr
3390
        mov.b r0,@(L_MSWLSB,r15)
3391
        xtrct r4,r0
3392
        swap.w r0,r0
3393
        .rept 8
3394
        div1 r5,r0
3395
        .endr
3396
        mov.b r0,@(L_LSWMSB,r15)
3397
LOCAL(div_ge64k_end):
3398
        .rept 8
3399
        div1 r5,r0
3400
        .endr
3401
        mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3402
        extu.b r0,r0
3403
        mov.l @r15+,r5
3404
        or r4,r0
3405
        mov.l @r15+,r4
3406
        rts
3407
        rotcl r0
3408
 
3409
LOCAL(div_le128_neg):
3410
        tst #0xfe,r0
3411
        mova LOCAL(div_table_ix),r0
3412
        mov.b @(r0,r5),r1
3413
        mova LOCAL(div_table_inv),r0
3414
        bt/s LOCAL(div_by_1_neg)
3415
        mov.l @(r0,r1),r1
3416
        mova LOCAL(div_table_clz),r0
3417
        dmulu.l r1,r4
3418
        mov.b @(r0,r5),r1
3419
        mov.l @r15+,r5
3420
        sts mach,r0
3421
        /* clrt */
3422
        addc r4,r0
3423
        mov.l @r15+,r4
3424
        rotcr r0
3425
        shld r1,r0
3426
        rts
3427
        neg r0,r0
3428
 
3429
LOCAL(pos_divisor):
3430
        mov.l r5,@-r15
3431
        bt/s LOCAL(pos_result)
3432
        cmp/hi r1,r5
3433
        neg r4,r4
3434
LOCAL(neg_result):
3435
        extu.w r5,r0
3436
        bf LOCAL(div_le128_neg)
3437
        cmp/eq r5,r0
3438
        mov r4,r0
3439
        shlr8 r0
3440
        bf/s LOCAL(div_ge64k_neg)
3441
        cmp/hi r0,r5
3442
        div0u
3443
        mov.l LOCAL(zero_l),r1
3444
        shll16 r5
3445
        div1 r5,r0
3446
        mov.l r1,@-r15
3447
        .rept 7
3448
        div1 r5,r0
3449
        .endr
3450
        mov.b r0,@(L_MSWLSB,r15)
3451
        xtrct r4,r0
3452
        swap.w r0,r0
3453
        .rept 8
3454
        div1 r5,r0
3455
        .endr
3456
        mov.b r0,@(L_LSWMSB,r15)
3457
LOCAL(div_ge64k_neg_end):
3458
        .rept 8
3459
        div1 r5,r0
3460
        .endr
3461
        mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3462
        extu.b r0,r1
3463
        mov.l @r15+,r5
3464
        or r4,r1
3465
LOCAL(div_r8_neg_end):
3466
        mov.l @r15+,r4
3467
        rotcl r1
3468
        rts
3469
        neg r1,r0
3470
 
3471
LOCAL(div_ge64k_neg):
3472
        bt/s LOCAL(div_r8_neg)
3473
        div0u
3474
        shll8 r5
3475
        mov.l LOCAL(zero_l),r1
3476
        .rept 6
3477
        div1 r5,r0
3478
        .endr
3479
        mov.l r1,@-r15
3480
        div1 r5,r0
3481
        mov.w LOCAL(m256_w),r1
3482
        div1 r5,r0
3483
        mov.b r0,@(L_LSWMSB,r15)
3484
        xor r4,r0
3485
        and r1,r0
3486
        bra LOCAL(div_ge64k_neg_end)
3487
        xor r4,r0
3488
 
3489
LOCAL(c128_w):
3490
        .word 128
3491
 
3492
LOCAL(div_r8_neg):
3493
        clrt
3494
        shll16 r4
3495
        mov r4,r1
3496
        shll8 r1
3497
        mov r5,r4
3498
        .rept 7
3499
        rotcl r1; div1 r5,r0
3500
        .endr
3501
        mov.l @r15+,r5
3502
        rotcl r1
3503
        bra LOCAL(div_r8_neg_end)
3504
        div1 r4,r0
3505
 
3506
LOCAL(m256_w):
3507
        .word 0xff00
3508
/* This table has been generated by divtab-sh4.c.  */
3509
        .balign 4
3510
LOCAL(div_table_clz):
3511
        .byte   0
3512
        .byte   1
3513
        .byte   0
3514
        .byte   -1
3515
        .byte   -1
3516
        .byte   -2
3517
        .byte   -2
3518
        .byte   -2
3519
        .byte   -2
3520
        .byte   -3
3521
        .byte   -3
3522
        .byte   -3
3523
        .byte   -3
3524
        .byte   -3
3525
        .byte   -3
3526
        .byte   -3
3527
        .byte   -3
3528
        .byte   -4
3529
        .byte   -4
3530
        .byte   -4
3531
        .byte   -4
3532
        .byte   -4
3533
        .byte   -4
3534
        .byte   -4
3535
        .byte   -4
3536
        .byte   -4
3537
        .byte   -4
3538
        .byte   -4
3539
        .byte   -4
3540
        .byte   -4
3541
        .byte   -4
3542
        .byte   -4
3543
        .byte   -4
3544
        .byte   -5
3545
        .byte   -5
3546
        .byte   -5
3547
        .byte   -5
3548
        .byte   -5
3549
        .byte   -5
3550
        .byte   -5
3551
        .byte   -5
3552
        .byte   -5
3553
        .byte   -5
3554
        .byte   -5
3555
        .byte   -5
3556
        .byte   -5
3557
        .byte   -5
3558
        .byte   -5
3559
        .byte   -5
3560
        .byte   -5
3561
        .byte   -5
3562
        .byte   -5
3563
        .byte   -5
3564
        .byte   -5
3565
        .byte   -5
3566
        .byte   -5
3567
        .byte   -5
3568
        .byte   -5
3569
        .byte   -5
3570
        .byte   -5
3571
        .byte   -5
3572
        .byte   -5
3573
        .byte   -5
3574
        .byte   -5
3575
        .byte   -5
3576
        .byte   -6
3577
        .byte   -6
3578
        .byte   -6
3579
        .byte   -6
3580
        .byte   -6
3581
        .byte   -6
3582
        .byte   -6
3583
        .byte   -6
3584
        .byte   -6
3585
        .byte   -6
3586
        .byte   -6
3587
        .byte   -6
3588
        .byte   -6
3589
        .byte   -6
3590
        .byte   -6
3591
        .byte   -6
3592
        .byte   -6
3593
        .byte   -6
3594
        .byte   -6
3595
        .byte   -6
3596
        .byte   -6
3597
        .byte   -6
3598
        .byte   -6
3599
        .byte   -6
3600
        .byte   -6
3601
        .byte   -6
3602
        .byte   -6
3603
        .byte   -6
3604
        .byte   -6
3605
        .byte   -6
3606
        .byte   -6
3607
        .byte   -6
3608
        .byte   -6
3609
        .byte   -6
3610
        .byte   -6
3611
        .byte   -6
3612
        .byte   -6
3613
        .byte   -6
3614
        .byte   -6
3615
        .byte   -6
3616
        .byte   -6
3617
        .byte   -6
3618
        .byte   -6
3619
        .byte   -6
3620
        .byte   -6
3621
        .byte   -6
3622
        .byte   -6
3623
        .byte   -6
3624
        .byte   -6
3625
        .byte   -6
3626
        .byte   -6
3627
        .byte   -6
3628
        .byte   -6
3629
        .byte   -6
3630
        .byte   -6
3631
        .byte   -6
3632
        .byte   -6
3633
        .byte   -6
3634
        .byte   -6
3635
        .byte   -6
3636
        .byte   -6
3637
        .byte   -6
3638
        .byte   -6
3639
/* Lookup table translating positive divisor to index into table of
3640
   normalized inverse.  N.B. the '0' entry is also the last entry of the
3641
 previous table, and causes an unaligned access for division by zero.  */
3642
LOCAL(div_table_ix):
3643
        .byte   -6
3644
        .byte   -128
3645
        .byte   -128
3646
        .byte   0
3647
        .byte   -128
3648
        .byte   -64
3649
        .byte   0
3650
        .byte   64
3651
        .byte   -128
3652
        .byte   -96
3653
        .byte   -64
3654
        .byte   -32
3655
        .byte   0
3656
        .byte   32
3657
        .byte   64
3658
        .byte   96
3659
        .byte   -128
3660
        .byte   -112
3661
        .byte   -96
3662
        .byte   -80
3663
        .byte   -64
3664
        .byte   -48
3665
        .byte   -32
3666
        .byte   -16
3667
        .byte   0
3668
        .byte   16
3669
        .byte   32
3670
        .byte   48
3671
        .byte   64
3672
        .byte   80
3673
        .byte   96
3674
        .byte   112
3675
        .byte   -128
3676
        .byte   -120
3677
        .byte   -112
3678
        .byte   -104
3679
        .byte   -96
3680
        .byte   -88
3681
        .byte   -80
3682
        .byte   -72
3683
        .byte   -64
3684
        .byte   -56
3685
        .byte   -48
3686
        .byte   -40
3687
        .byte   -32
3688
        .byte   -24
3689
        .byte   -16
3690
        .byte   -8
3691
        .byte   0
3692
        .byte   8
3693
        .byte   16
3694
        .byte   24
3695
        .byte   32
3696
        .byte   40
3697
        .byte   48
3698
        .byte   56
3699
        .byte   64
3700
        .byte   72
3701
        .byte   80
3702
        .byte   88
3703
        .byte   96
3704
        .byte   104
3705
        .byte   112
3706
        .byte   120
3707
        .byte   -128
3708
        .byte   -124
3709
        .byte   -120
3710
        .byte   -116
3711
        .byte   -112
3712
        .byte   -108
3713
        .byte   -104
3714
        .byte   -100
3715
        .byte   -96
3716
        .byte   -92
3717
        .byte   -88
3718
        .byte   -84
3719
        .byte   -80
3720
        .byte   -76
3721
        .byte   -72
3722
        .byte   -68
3723
        .byte   -64
3724
        .byte   -60
3725
        .byte   -56
3726
        .byte   -52
3727
        .byte   -48
3728
        .byte   -44
3729
        .byte   -40
3730
        .byte   -36
3731
        .byte   -32
3732
        .byte   -28
3733
        .byte   -24
3734
        .byte   -20
3735
        .byte   -16
3736
        .byte   -12
3737
        .byte   -8
3738
        .byte   -4
3739
        .byte   0
3740
        .byte   4
3741
        .byte   8
3742
        .byte   12
3743
        .byte   16
3744
        .byte   20
3745
        .byte   24
3746
        .byte   28
3747
        .byte   32
3748
        .byte   36
3749
        .byte   40
3750
        .byte   44
3751
        .byte   48
3752
        .byte   52
3753
        .byte   56
3754
        .byte   60
3755
        .byte   64
3756
        .byte   68
3757
        .byte   72
3758
        .byte   76
3759
        .byte   80
3760
        .byte   84
3761
        .byte   88
3762
        .byte   92
3763
        .byte   96
3764
        .byte   100
3765
        .byte   104
3766
        .byte   108
3767
        .byte   112
3768
        .byte   116
3769
        .byte   120
3770
        .byte   124
3771
        .byte   -128
3772
/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */
3773
        .balign 4
3774
LOCAL(zero_l):
3775
        .long   0x0
3776
        .long   0xF81F81F9
3777
        .long   0xF07C1F08
3778
        .long   0xE9131AC0
3779
        .long   0xE1E1E1E2
3780
        .long   0xDAE6076C
3781
        .long   0xD41D41D5
3782
        .long   0xCD856891
3783
        .long   0xC71C71C8
3784
        .long   0xC0E07039
3785
        .long   0xBACF914D
3786
        .long   0xB4E81B4F
3787
        .long   0xAF286BCB
3788
        .long   0xA98EF607
3789
        .long   0xA41A41A5
3790
        .long   0x9EC8E952
3791
        .long   0x9999999A
3792
        .long   0x948B0FCE
3793
        .long   0x8F9C18FA
3794
        .long   0x8ACB90F7
3795
        .long   0x86186187
3796
        .long   0x81818182
3797
        .long   0x7D05F418
3798
        .long   0x78A4C818
3799
        .long   0x745D1746
3800
        .long   0x702E05C1
3801
        .long   0x6C16C16D
3802
        .long   0x68168169
3803
        .long   0x642C8591
3804
        .long   0x60581606
3805
        .long   0x5C9882BA
3806
        .long   0x58ED2309
3807
LOCAL(div_table_inv):
3808
        .long   0x55555556
3809
        .long   0x51D07EAF
3810
        .long   0x4E5E0A73
3811
        .long   0x4AFD6A06
3812
        .long   0x47AE147B
3813
        .long   0x446F8657
3814
        .long   0x41414142
3815
        .long   0x3E22CBCF
3816
        .long   0x3B13B13C
3817
        .long   0x38138139
3818
        .long   0x3521CFB3
3819
        .long   0x323E34A3
3820
        .long   0x2F684BDB
3821
        .long   0x2C9FB4D9
3822
        .long   0x29E4129F
3823
        .long   0x27350B89
3824
        .long   0x24924925
3825
        .long   0x21FB7813
3826
        .long   0x1F7047DD
3827
        .long   0x1CF06ADB
3828
        .long   0x1A7B9612
3829
        .long   0x18118119
3830
        .long   0x15B1E5F8
3831
        .long   0x135C8114
3832
        .long   0x11111112
3833
        .long   0xECF56BF
3834
        .long   0xC9714FC
3835
        .long   0xA6810A7
3836
        .long   0x8421085
3837
        .long   0x624DD30
3838
        .long   0x4104105
3839
        .long   0x2040811
3840
        /* maximum error: 0.987342 scaled: 0.921875*/
3841
 
3842
        ENDFUNC(GLOBAL(sdivsi3_i4i))
3843
#endif /* SH3 / SH4 */
3844
 
3845
#endif /* L_div_table */
3846
 
3847
#ifdef L_udiv_qrnnd_16
3848
#if !__SHMEDIA__
3849
        HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
3850
        /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
3851
        /* n1 < d, but n1 might be larger than d1.  */
3852
        .global GLOBAL(udiv_qrnnd_16)
3853
        .balign 8
3854
GLOBAL(udiv_qrnnd_16):
3855
        div0u
3856
        cmp/hi r6,r0
3857
        bt .Lots
3858
        .rept 16
3859
        div1 r6,r0
3860
        .endr
3861
        extu.w r0,r1
3862
        bt 0f
3863
        add r6,r0
3864
0:      rotcl r1
3865
        mulu.w r1,r5
3866
        xtrct r4,r0
3867
        swap.w r0,r0
3868
        sts macl,r2
3869
        cmp/hs r2,r0
3870
        sub r2,r0
3871
        bt 0f
3872
        addc r5,r0
3873
        add #-1,r1
3874
        bt 0f
3875
1:      add #-1,r1
3876
        rts
3877
        add r5,r0
3878
        .balign 8
3879
.Lots:
3880
        sub r5,r0
3881
        swap.w r4,r1
3882
        xtrct r0,r1
3883
        clrt
3884
        mov r1,r0
3885
        addc r5,r0
3886
        mov #-1,r1
3887
        SL1(bf, 1b,
3888
        shlr16 r1)
3889
0:      rts
3890
        nop
3891
        ENDFUNC(GLOBAL(udiv_qrnnd_16))
3892
#endif /* !__SHMEDIA__ */
3893
#endif /* L_udiv_qrnnd_16 */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.