OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgcc/] [config/] [c6x/] [lib1funcs.S] - Blame information for rev 757

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 734 jeremybenn
/* Copyright 2010, 2011  Free Software Foundation, Inc.
2
   Contributed by Bernd Schmidt .
3
 
4
This file is free software; you can redistribute it and/or modify it
5
under the terms of the GNU General Public License as published by the
6
Free Software Foundation; either version 3, or (at your option) any
7
later version.
8
 
9
This file is distributed in the hope that it will be useful, but
10
WITHOUT ANY WARRANTY; without even the implied warranty of
11
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
General Public License for more details.
13
 
14
Under Section 7 of GPL version 3, you are granted additional
15
permissions described in the GCC Runtime Library Exception, version
16
3.1, as published by the Free Software Foundation.
17
 
18
You should have received a copy of the GNU General Public License and
19
a copy of the GCC Runtime Library Exception along with this program;
20
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
21
.  */
22
 
23
        ;; ABI considerations for the divide functions
24
        ;; The following registers are call-used:
25
        ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
26
        ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
27
        ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
28
        ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
29
        ;;
30
        ;; In our implementation, divu and remu are leaf functions,
31
        ;; while both divi and remi call into divu.
32
        ;; A0 is not clobbered by any of the functions.
33
        ;; divu does not clobber B2 either, which is taken advantage of
34
        ;; in remi.
35
        ;; divi uses B5 to hold the original return address during
36
        ;; the call to divu.
37
        ;; remi uses B2 and A5 to hold the input values during the
38
        ;; call to divu.  It stores B3 in on the stack.
39
 
40
#ifdef L_divsi3
41
.text
42
.align 2
43
.global __c6xabi_divi
44
.hidden __c6xabi_divi
45
.type __c6xabi_divi, STT_FUNC
46
 
47
__c6xabi_divi:
48
        call .s2        __c6xabi_divu
49
||      mv .d2          B3, B5
50
||      cmpgt .l1       0, A4, A1
51
||      cmpgt .l2       0, B4, B1
52
 
53
        [A1] neg .l1    A4, A4
54
||      [B1] neg .l2    B4, B4
55
||      xor .s1x        A1, B1, A1
56
 
57
#ifdef _TMS320C6400
58
        [A1] addkpc .s2 1f, B3, 4
59
#else
60
        [A1] mvkl .s2   1f, B3
61
        [A1] mvkh .s2   1f, B3
62
        nop             2
63
#endif
64
1:
65
        neg .l1         A4, A4
66
||      mv .l2          B3,B5
67
||      ret .s2         B5
68
        nop             5
69
#endif
70
 
71
#if defined L_modsi3 || defined L_divmodsi4
72
.align 2
73
#ifdef L_modsi3
74
#define MOD_OUTPUT_REG A4
75
.global __c6xabi_remi
76
.hidden __c6xabi_remi
77
.type __c6xabi_remi, STT_FUNC
78
#else
79
#define MOD_OUTPUT_REG A5
80
.global __c6xabi_divremi
81
.hidden __c6xabi_divremi
82
.type __c6xabi_divremi, STT_FUNC
83
__c6xabi_divremi:
84
#endif
85
 
86
__c6xabi_remi:
87
        stw .d2t2       B3, *B15--[2]
88
||      cmpgt .l1       0, A4, A1
89
||      cmpgt .l2       0, B4, B2
90
||      mv .s1          A4, A5
91
||      call .s2        __c6xabi_divu
92
 
93
        [A1] neg .l1    A4, A4
94
||      [B2] neg .l2    B4, B4
95
||      xor .s2x        B2, A1, B0
96
||      mv .d2          B4, B2
97
 
98
#ifdef _TMS320C6400
99
        [B0] addkpc .s2 1f, B3, 1
100
        [!B0] addkpc .s2 2f, B3, 1
101
        nop             2
102
#else
103
        [B0] mvkl .s2   1f,B3
104
        [!B0] mvkl .s2  2f,B3
105
 
106
        [B0] mvkh .s2   1f,B3
107
        [!B0] mvkh .s2  2f,B3
108
#endif
109
1:
110
        neg .l1         A4, A4
111
2:
112
        ldw .d2t2       *++B15[2], B3
113
 
114
#ifdef _TMS320C6400_PLUS
115
        mpy32 .m1x      A4, B2, A6
116
        nop             3
117
        ret .s2         B3
118
        sub .l1         A5, A6, MOD_OUTPUT_REG
119
        nop             4
120
#else
121
        mpyu .m1x       A4, B2, A1
122
        nop             1
123
        mpylhu .m1x     A4, B2, A6
124
||      mpylhu .m2x     B2, A4, B2
125
        nop             1
126
        add .l1x        A6, B2, A6
127
||      ret .s2         B3
128
        shl .s1         A6, 16, A6
129
        add .d1         A6, A1, A6
130
        sub .l1         A5, A6, MOD_OUTPUT_REG
131
        nop             2
132
#endif
133
 
134
#endif
135
 
136
#if defined L_udivsi3 || defined L_udivmodsi4
137
.align 2
138
#ifdef L_udivsi3
139
.global __c6xabi_divu
140
.hidden __c6xabi_divu
141
.type __c6xabi_divu, STT_FUNC
142
__c6xabi_divu:
143
#else
144
.global __c6xabi_divremu
145
.hidden __c6xabi_divremu
146
.type __c6xabi_divremu, STT_FUNC
147
__c6xabi_divremu:
148
#endif
149
        ;; We use a series of up to 31 subc instructions.  First, we find
150
        ;; out how many leading zero bits there are in the divisor.  This
151
        ;; gives us both a shift count for aligning (shifting) the divisor
152
        ;; to the, and the number of times we have to execute subc.
153
 
154
        ;; At the end, we have both the remainder and most of the quotient
155
        ;; in A4.  The top bit of the quotient is computed first and is
156
        ;; placed in A2.
157
 
158
        ;; Return immediately if the dividend is zero.  Setting B4 to 1
159
        ;; is a trick to allow us to leave the following insns in the jump
160
        ;; delay slot without affecting the result.
161
        mv      .s2x    A4, B1
162
 
163
#ifndef _TMS320C6400
164
[!b1]   mvk     .s2     1, B4
165
#endif
166
[b1]    lmbd    .l2     1, B4, B1
167
||[!b1] b       .s2     B3      ; RETURN A
168
#ifdef _TMS320C6400
169
||[!b1] mvk     .d2     1, B4
170
#endif
171
#ifdef L_udivmodsi4
172
||[!b1] zero    .s1     A5
173
#endif
174
        mv      .l1x    B1, A6
175
||      shl     .s2     B4, B1, B4
176
 
177
        ;; The loop performs a maximum of 28 steps, so we do the
178
        ;; first 3 here.
179
        cmpltu  .l1x    A4, B4, A2
180
[!A2]   sub     .l1x    A4, B4, A4
181
||      shru    .s2     B4, 1, B4
182
||      xor     .s1     1, A2, A2
183
 
184
        shl     .s1     A2, 31, A2
185
|| [b1] subc    .l1x    A4,B4,A4
186
|| [b1] add     .s2     -1, B1, B1
187
[b1]    subc    .l1x    A4,B4,A4
188
|| [b1] add     .s2     -1, B1, B1
189
 
190
        ;; RETURN A may happen here (note: must happen before the next branch)
191
0:
192
        cmpgt   .l2     B1, 7, B0
193
|| [b1] subc    .l1x    A4,B4,A4
194
|| [b1] add     .s2     -1, B1, B1
195
[b1]    subc    .l1x    A4,B4,A4
196
|| [b1] add     .s2     -1, B1, B1
197
|| [b0] b       .s1     0b
198
[b1]    subc    .l1x    A4,B4,A4
199
|| [b1] add     .s2     -1, B1, B1
200
[b1]    subc    .l1x    A4,B4,A4
201
|| [b1] add     .s2     -1, B1, B1
202
[b1]    subc    .l1x    A4,B4,A4
203
|| [b1] add     .s2     -1, B1, B1
204
[b1]    subc    .l1x    A4,B4,A4
205
|| [b1] add     .s2     -1, B1, B1
206
[b1]    subc    .l1x    A4,B4,A4
207
|| [b1] add     .s2     -1, B1, B1
208
        ;; loop backwards branch happens here
209
 
210
        ret     .s2     B3
211
||      mvk     .s1     32, A1
212
        sub     .l1     A1, A6, A6
213
#ifdef L_udivmodsi4
214
||      extu    .s1     A4, A6, A5
215
#endif
216
        shl     .s1     A4, A6, A4
217
        shru    .s1     A4, 1, A4
218
||      sub     .l1     A6, 1, A6
219
        or      .l1     A2, A4, A4
220
        shru    .s1     A4, A6, A4
221
        nop
222
 
223
#endif
224
 
225
#ifdef L_umodsi3
226
.align 2
227
.global __c6xabi_remu
228
.hidden __c6xabi_remu
229
.type __c6xabi_remu, STT_FUNC
230
__c6xabi_remu:
231
        ;; The ABI seems designed to prevent these functions calling each other,
232
        ;; so we duplicate most of the divsi3 code here.
233
        mv      .s2x    A4, B1
234
#ifndef _TMS320C6400
235
[!b1]   mvk     .s2     1, B4
236
#endif
237
        lmbd    .l2     1, B4, B1
238
||[!b1] b       .s2     B3      ; RETURN A
239
#ifdef _TMS320C6400
240
||[!b1] mvk     .d2     1, B4
241
#endif
242
 
243
        mv      .l1x    B1, A7
244
||      shl     .s2     B4, B1, B4
245
 
246
        cmpltu  .l1x    A4, B4, A1
247
[!a1]   sub     .l1x    A4, B4, A4
248
        shru    .s2     B4, 1, B4
249
 
250
0:
251
        cmpgt   .l2     B1, 7, B0
252
|| [b1] subc    .l1x    A4,B4,A4
253
|| [b1] add     .s2     -1, B1, B1
254
        ;; RETURN A may happen here (note: must happen before the next branch)
255
[b1]    subc    .l1x    A4,B4,A4
256
|| [b1] add     .s2     -1, B1, B1
257
|| [b0] b       .s1     0b
258
[b1]    subc    .l1x    A4,B4,A4
259
|| [b1] add     .s2     -1, B1, B1
260
[b1]    subc    .l1x    A4,B4,A4
261
|| [b1] add     .s2     -1, B1, B1
262
[b1]    subc    .l1x    A4,B4,A4
263
|| [b1] add     .s2     -1, B1, B1
264
[b1]    subc    .l1x    A4,B4,A4
265
|| [b1] add     .s2     -1, B1, B1
266
[b1]    subc    .l1x    A4,B4,A4
267
|| [b1] add     .s2     -1, B1, B1
268
        ;; loop backwards branch happens here
269
 
270
        ret     .s2     B3
271
[b1]    subc    .l1x    A4,B4,A4
272
|| [b1] add     .s2     -1, B1, B1
273
[b1]    subc    .l1x    A4,B4,A4
274
 
275
        extu    .s1     A4, A7, A4
276
        nop     2
277
#endif
278
 
279
#if defined L_strasgi_64plus && defined _TMS320C6400_PLUS
280
 
281
.align 2
282
.global __c6xabi_strasgi_64plus
283
.hidden __c6xabi_strasgi_64plus
284
.type __c6xabi_strasgi_64plus, STT_FUNC
285
__c6xabi_strasgi_64plus:
286
        shru    .s2x    a6, 2, b31
287
||      mv      .s1     a4, a30
288
||      mv      .d2     b4, b30
289
 
290
        add     .s2     -4, b31, b31
291
 
292
        sploopd         1
293
||      mvc     .s2     b31, ilc
294
        ldw     .d2t2   *b30++, b31
295
        nop     4
296
        mv      .s1x    b31,a31
297
        spkernel        6, 0
298
||      stw     .d1t1   a31, *a30++
299
 
300
        ret     .s2     b3
301
        nop 5
302
#endif
303
 
304
#ifdef L_strasgi
305
.global __c6xabi_strasgi
306
.type __c6xabi_strasgi, STT_FUNC
307
__c6xabi_strasgi:
308
        ;; This is essentially memcpy, with alignment known to be at least
309
        ;; 4, and the size a multiple of 4 greater than or equal to 28.
310
        ldw     .d2t1   *B4++, A0
311
||      mvk     .s2     16, B1
312
        ldw     .d2t1   *B4++, A1
313
||      mvk     .s2     20, B2
314
||      sub     .d1     A6, 24, A6
315
        ldw     .d2t1   *B4++, A5
316
        ldw     .d2t1   *B4++, A7
317
||      mv      .l2x    A6, B7
318
        ldw     .d2t1   *B4++, A8
319
        ldw     .d2t1   *B4++, A9
320
||      mv      .s2x    A0, B5
321
||      cmpltu  .l2     B2, B7, B0
322
 
323
0:
324
        stw     .d1t2   B5, *A4++
325
||[b0]  ldw     .d2t1   *B4++, A0
326
||      mv      .s2x    A1, B5
327
||      mv      .l2     B7, B6
328
 
329
[b0]    sub     .d2     B6, 24, B7
330
||[b0]  b       .s2     0b
331
||      cmpltu  .l2     B1, B6, B0
332
 
333
[b0]    ldw     .d2t1   *B4++, A1
334
||      stw     .d1t2   B5, *A4++
335
||      mv      .s2x    A5, B5
336
||      cmpltu  .l2     12, B6, B0
337
 
338
[b0]    ldw     .d2t1   *B4++, A5
339
||      stw     .d1t2   B5, *A4++
340
||      mv      .s2x    A7, B5
341
||      cmpltu  .l2     8, B6, B0
342
 
343
[b0]    ldw     .d2t1   *B4++, A7
344
||      stw     .d1t2   B5, *A4++
345
||      mv      .s2x    A8, B5
346
||      cmpltu  .l2     4, B6, B0
347
 
348
[b0]    ldw     .d2t1   *B4++, A8
349
||      stw     .d1t2   B5, *A4++
350
||      mv      .s2x    A9, B5
351
||      cmpltu  .l2     0, B6, B0
352
 
353
[b0]    ldw     .d2t1   *B4++, A9
354
||      stw     .d1t2   B5, *A4++
355
||      mv      .s2x    A0, B5
356
||      cmpltu  .l2     B2, B7, B0
357
 
358
        ;; loop back branch happens here
359
 
360
        cmpltu  .l2     B1, B6, B0
361
||      ret     .s2     b3
362
 
363
[b0]    stw     .d1t1   A1, *A4++
364
||      cmpltu  .l2     12, B6, B0
365
[b0]    stw     .d1t1   A5, *A4++
366
||      cmpltu  .l2     8, B6, B0
367
[b0]    stw     .d1t1   A7, *A4++
368
||      cmpltu  .l2     4, B6, B0
369
[b0]    stw     .d1t1   A8, *A4++
370
||      cmpltu  .l2     0, B6, B0
371
[b0]    stw     .d1t1   A9, *A4++
372
 
373
        ;; return happens here
374
 
375
#endif
376
 
377
#ifdef _TMS320C6400_PLUS
378
#ifdef L_push_rts
379
.align 2
380
.global __c6xabi_push_rts
381
.hidden __c6xabi_push_rts
382
.type __c6xabi_push_rts, STT_FUNC
383
__c6xabi_push_rts:
384
        stw .d2t2       B14, *B15--[2]
385
        stdw .d2t1      A15:A14, *B15--
386
||      b .s2x          A3
387
        stdw .d2t2      B13:B12, *B15--
388
        stdw .d2t1      A13:A12, *B15--
389
        stdw .d2t2      B11:B10, *B15--
390
        stdw .d2t1      A11:A10, *B15--
391
        stdw .d2t2      B3:B2, *B15--
392
#endif
393
 
394
#ifdef L_pop_rts
395
.align 2
396
.global __c6xabi_pop_rts
397
.hidden __c6xabi_pop_rts
398
.type __c6xabi_pop_rts, STT_FUNC
399
__c6xabi_pop_rts:
400
        lddw .d2t2      *++B15, B3:B2
401
        lddw .d2t1      *++B15, A11:A10
402
        lddw .d2t2      *++B15, B11:B10
403
        lddw .d2t1      *++B15, A13:A12
404
        lddw .d2t2      *++B15, B13:B12
405
        lddw .d2t1      *++B15, A15:A14
406
||      b .s2           B3
407
        ldw .d2t2       *++B15[2], B14
408
        nop             4
409
#endif
410
 
411
#ifdef L_call_stub
412
.align 2
413
.global __c6xabi_call_stub
414
.type __c6xabi_call_stub, STT_FUNC
415
__c6xabi_call_stub:
416
        stw .d2t1       A2, *B15--[2]
417
        stdw .d2t1      A7:A6, *B15--
418
||      call .s2        B31
419
        stdw .d2t1      A1:A0, *B15--
420
        stdw .d2t2      B7:B6, *B15--
421
        stdw .d2t2      B5:B4, *B15--
422
        stdw .d2t2      B1:B0, *B15--
423
        stdw .d2t2      B3:B2, *B15--
424
||      addkpc .s2      1f, B3, 0
425
1:
426
        lddw .d2t2      *++B15, B3:B2
427
        lddw .d2t2      *++B15, B1:B0
428
        lddw .d2t2      *++B15, B5:B4
429
        lddw .d2t2      *++B15, B7:B6
430
        lddw .d2t1      *++B15, A1:A0
431
        lddw .d2t1      *++B15, A7:A6
432
||      b .s2           B3
433
        ldw .d2t1       *++B15[2], A2
434
        nop             4
435
#endif
436
 
437
#endif
438
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.