OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgcc/] [config/] [sh/] [lib1funcs-Os-4-200.S] - Blame information for rev 758

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 734 jeremybenn
/* Copyright (C) 2006, 2009 Free Software Foundation, Inc.
2
 
3
This file is free software; you can redistribute it and/or modify it
4
under the terms of the GNU General Public License as published by the
5
Free Software Foundation; either version 3, or (at your option) any
6
later version.
7
 
8
This file is distributed in the hope that it will be useful, but
9
WITHOUT ANY WARRANTY; without even the implied warranty of
10
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
General Public License for more details.
12
 
13
Under Section 7 of GPL version 3, you are granted additional
14
permissions described in the GCC Runtime Library Exception, version
15
3.1, as published by the Free Software Foundation.
16
 
17
You should have received a copy of the GNU General Public License and
18
a copy of the GCC Runtime Library Exception along with this program;
19
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
20
.  */
21
 
22
/* Moderately Space-optimized libgcc routines for the Renesas SH /
23
   STMicroelectronics ST40 CPUs.
24
   Contributed by J"orn Rennecke joern.rennecke@st.com.  */
25
 
26
#include "lib1funcs.h"
27
 
28
#if !__SHMEDIA__
29
#ifdef L_udivsi3_i4i
30
 
31
/* 88 bytes; sh4-200 cycle counts:
32
   divisor  >= 2G: 11 cycles
33
   dividend <  2G: 48 cycles
34
   dividend >= 2G: divisor != 1: 54 cycles
35
   dividend >= 2G, divisor == 1: 22 cycles */
36
#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
37
!! args in r4 and r5, result in r0, clobber r1
38
 
39
        .global GLOBAL(udivsi3_i4i)
40
        FUNC(GLOBAL(udivsi3_i4i))
41
GLOBAL(udivsi3_i4i):
42
        mova L1,r0
43
        cmp/pz r5
44
        sts fpscr,r1
45
        lds.l @r0+,fpscr
46
        sts.l fpul,@-r15
47
        bf LOCAL(huge_divisor)
48
        mov.l r1,@-r15
49
        lds r4,fpul
50
        cmp/pz r4
51
#ifdef FMOVD_WORKS
52
        fmov.d dr0,@-r15
53
        float fpul,dr0
54
        fmov.d dr2,@-r15
55
        bt LOCAL(dividend_adjusted)
56
        mov #1,r1
57
        fmov.d @r0,dr2
58
        cmp/eq r1,r5
59
        bt LOCAL(div_by_1)
60
        fadd dr2,dr0
61
LOCAL(dividend_adjusted):
62
        lds r5,fpul
63
        float fpul,dr2
64
        fdiv dr2,dr0
65
LOCAL(div_by_1):
66
        fmov.d @r15+,dr2
67
        ftrc dr0,fpul
68
        fmov.d @r15+,dr0
69
#else /* !FMOVD_WORKS */
70
        fmov.s DR01,@-r15
71
        mov #1,r1
72
        fmov.s DR00,@-r15
73
        float fpul,dr0
74
        fmov.s DR21,@-r15
75
        bt/s LOCAL(dividend_adjusted)
76
        fmov.s DR20,@-r15
77
        cmp/eq r1,r5
78
        bt LOCAL(div_by_1)
79
        fmov.s @r0+,DR20
80
        fmov.s @r0,DR21
81
        fadd dr2,dr0
82
LOCAL(dividend_adjusted):
83
        lds r5,fpul
84
        float fpul,dr2
85
        fdiv dr2,dr0
86
LOCAL(div_by_1):
87
        fmov.s @r15+,DR20
88
        fmov.s @r15+,DR21
89
        ftrc dr0,fpul
90
        fmov.s @r15+,DR00
91
        fmov.s @r15+,DR01
92
#endif /* !FMOVD_WORKS */
93
        lds.l @r15+,fpscr
94
        sts fpul,r0
95
        rts
96
        lds.l @r15+,fpul
97
 
98
#ifdef FMOVD_WORKS
99
        .p2align 3        ! make double below 8 byte aligned.
100
#endif
101
LOCAL(huge_divisor):
102
        lds r1,fpscr
103
        add #4,r15
104
        cmp/hs r5,r4
105
        rts
106
        movt r0
107
 
108
        .p2align 2
109
L1:
110
#ifndef FMOVD_WORKS
111
        .long 0x80000
112
#else
113
        .long 0x180000
114
#endif
115
        .double 4294967296
116
 
117
        ENDFUNC(GLOBAL(udivsi3_i4i))
118
#elif !defined (__sh1__)  /* !__SH_FPU_DOUBLE__ */
119
 
120
#if 0
121
/* With 36 bytes, the following would probably be the most compact
122
   implementation, but with 139 cycles on an sh4-200, it is extremely slow.  */
123
GLOBAL(udivsi3_i4i):
124
        mov.l r2,@-r15
125
        mov #0,r1
126
        div0u
127
        mov r1,r2
128
        mov.l r3,@-r15
129
        mov r1,r3
130
        sett
131
        mov r4,r0
132
LOCAL(loop):
133
        rotcr r2
134
        ;
135
        bt/s LOCAL(end)
136
        cmp/gt r2,r3
137
        rotcl r0
138
        bra LOCAL(loop)
139
        div1 r5,r1
140
LOCAL(end):
141
        rotcl r0
142
        mov.l @r15+,r3
143
        rts
144
        mov.l @r15+,r2
145
#endif /* 0 */
146
 
147
/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
148
   sh4-200 run times:
149
   udiv small divisor: 55 cycles
150
   udiv large divisor: 52 cycles
151
   sdiv small divisor, positive result: 59 cycles
152
   sdiv large divisor, positive result: 56 cycles
153
   sdiv small divisor, negative result: 65 cycles (*)
154
   sdiv large divisor, negative result: 62 cycles (*)
155
   (*): r2 is restored in the rts delay slot and has a lingering latency
156
        of two more cycles.  */
157
        .balign 4
158
        .global GLOBAL(udivsi3_i4i)
159
        FUNC(GLOBAL(udivsi3_i4i))
160
        FUNC(GLOBAL(sdivsi3_i4i))
161
GLOBAL(udivsi3_i4i):
162
        sts pr,r1
163
        mov.l r4,@-r15
164
        extu.w r5,r0
165
        cmp/eq r5,r0
166
        swap.w r4,r0
167
        shlr16 r4
168
        bf/s LOCAL(large_divisor)
169
        div0u
170
        mov.l r5,@-r15
171
        shll16 r5
172
LOCAL(sdiv_small_divisor):
173
        div1 r5,r4
174
        bsr LOCAL(div6)
175
        div1 r5,r4
176
        div1 r5,r4
177
        bsr LOCAL(div6)
178
        div1 r5,r4
179
        xtrct r4,r0
180
        xtrct r0,r4
181
        bsr LOCAL(div7)
182
        swap.w r4,r4
183
        div1 r5,r4
184
        bsr LOCAL(div7)
185
        div1 r5,r4
186
        xtrct r4,r0
187
        mov.l @r15+,r5
188
        swap.w r0,r0
189
        mov.l @r15+,r4
190
        jmp @r1
191
        rotcl r0
192
LOCAL(div7):
193
        div1 r5,r4
194
LOCAL(div6):
195
                    div1 r5,r4; div1 r5,r4; div1 r5,r4
196
        div1 r5,r4; div1 r5,r4; rts;        div1 r5,r4
197
 
198
LOCAL(divx3):
199
        rotcl r0
200
        div1 r5,r4
201
        rotcl r0
202
        div1 r5,r4
203
        rotcl r0
204
        rts
205
        div1 r5,r4
206
 
207
LOCAL(large_divisor):
208
        mov.l r5,@-r15
209
LOCAL(sdiv_large_divisor):
210
        xor r4,r0
211
        .rept 4
212
        rotcl r0
213
        bsr LOCAL(divx3)
214
        div1 r5,r4
215
        .endr
216
        mov.l @r15+,r5
217
        mov.l @r15+,r4
218
        jmp @r1
219
        rotcl r0
220
        ENDFUNC(GLOBAL(udivsi3_i4i))
221
 
222
        .global GLOBAL(sdivsi3_i4i)
223
GLOBAL(sdivsi3_i4i):
224
        mov.l r4,@-r15
225
        cmp/pz r5
226
        mov.l r5,@-r15
227
        bt/s LOCAL(pos_divisor)
228
        cmp/pz r4
229
        neg r5,r5
230
        extu.w r5,r0
231
        bt/s LOCAL(neg_result)
232
        cmp/eq r5,r0
233
        neg r4,r4
234
LOCAL(pos_result):
235
        swap.w r4,r0
236
        bra LOCAL(sdiv_check_divisor)
237
        sts pr,r1
238
LOCAL(pos_divisor):
239
        extu.w r5,r0
240
        bt/s LOCAL(pos_result)
241
        cmp/eq r5,r0
242
        neg r4,r4
243
LOCAL(neg_result):
244
        mova LOCAL(negate_result),r0
245
        ;
246
        mov r0,r1
247
        swap.w r4,r0
248
        lds r2,macl
249
        sts pr,r2
250
LOCAL(sdiv_check_divisor):
251
        shlr16 r4
252
        bf/s LOCAL(sdiv_large_divisor)
253
        div0u
254
        bra LOCAL(sdiv_small_divisor)
255
        shll16 r5
256
        .balign 4
257
LOCAL(negate_result):
258
        neg r0,r0
259
        jmp @r2
260
        sts macl,r2
261
        ENDFUNC(GLOBAL(sdivsi3_i4i))
262
#endif /* !__SH_FPU_DOUBLE__ */
263
#endif /* L_udivsi3_i4i */
264
 
265
#ifdef L_sdivsi3_i4i
266
#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
267
/* 48 bytes, 45 cycles on sh4-200  */
268
!! args in r4 and r5, result in r0, clobber r1
269
 
270
        .global GLOBAL(sdivsi3_i4i)
271
        FUNC(GLOBAL(sdivsi3_i4i))
272
GLOBAL(sdivsi3_i4i):
273
        sts.l fpscr,@-r15
274
        sts fpul,r1
275
        mova L1,r0
276
        lds.l @r0+,fpscr
277
        lds r4,fpul
278
#ifdef FMOVD_WORKS
279
        fmov.d dr0,@-r15
280
        float fpul,dr0
281
        lds r5,fpul
282
        fmov.d dr2,@-r15
283
#else
284
        fmov.s DR01,@-r15
285
        fmov.s DR00,@-r15
286
        float fpul,dr0
287
        lds r5,fpul
288
        fmov.s DR21,@-r15
289
        fmov.s DR20,@-r15
290
#endif
291
        float fpul,dr2
292
        fdiv dr2,dr0
293
#ifdef FMOVD_WORKS
294
        fmov.d @r15+,dr2
295
#else
296
        fmov.s @r15+,DR20
297
        fmov.s @r15+,DR21
298
#endif
299
        ftrc dr0,fpul
300
#ifdef FMOVD_WORKS
301
        fmov.d @r15+,dr0
302
#else
303
        fmov.s @r15+,DR00
304
        fmov.s @r15+,DR01
305
#endif
306
        lds.l @r15+,fpscr
307
        sts fpul,r0
308
        rts
309
        lds r1,fpul
310
 
311
        .p2align 2
312
L1:
313
#ifndef FMOVD_WORKS
314
        .long 0x80000
315
#else
316
        .long 0x180000
317
#endif
318
 
319
        ENDFUNC(GLOBAL(sdivsi3_i4i))
320
#endif /* __SH_FPU_DOUBLE__ */
321
#endif /* L_sdivsi3_i4i */
322
#endif /* !__SHMEDIA__ */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.