OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-old/] [gcc-4.2.2/] [gcc/] [config/] [sh/] [lib1funcs-Os-4-200.asm] - Blame information for rev 827

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* Copyright (C) 2006 Free Software Foundation, Inc.
2
 
3
This file is free software; you can redistribute it and/or modify it
4
under the terms of the GNU General Public License as published by the
5
Free Software Foundation; either version 2, or (at your option) any
6
later version.
7
 
8
In addition to the permissions in the GNU General Public License, the
9
Free Software Foundation gives you unlimited permission to link the
10
compiled version of this file into combinations with other programs,
11
and to distribute those combinations without any restriction coming
12
from the use of this file.  (The General Public License restrictions
13
do apply in other respects; for example, they cover modification of
14
the file, and distribution when not linked into a combine
15
executable.)
16
 
17
This file is distributed in the hope that it will be useful, but
18
WITHOUT ANY WARRANTY; without even the implied warranty of
19
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
General Public License for more details.
21
 
22
You should have received a copy of the GNU General Public License
23
along with this program; see the file COPYING.  If not, write to
24
the Free Software Foundation, 51 Franklin Street, Fifth Floor,
25
Boston, MA 02110-1301, USA.  */
26
 
27
/* Moderately Space-optimized libgcc routines for the Renesas SH /
28
   STMicroelectronics ST40 CPUs.
29
   Contributed by J"orn Rennecke joern.rennecke@st.com.  */
30
 
31
#include "lib1funcs.h"
32
 
33
#if !__SHMEDIA__
34
#ifdef L_udivsi3_i4i
35
 
36
/* 88 bytes; sh4-200 cycle counts:
37
   divisor  >= 2G: 11 cycles
38
   dividend <  2G: 48 cycles
39
   dividend >= 2G: divisor != 1: 54 cycles
40
   dividend >= 2G, divisor == 1: 22 cycles */
41
#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
42
!! args in r4 and r5, result in r0, clobber r1
43
 
44
        .global GLOBAL(udivsi3_i4i)
45
        FUNC(GLOBAL(udivsi3_i4i))
46
GLOBAL(udivsi3_i4i):
47
        mova L1,r0
48
        cmp/pz r5
49
        sts fpscr,r1
50
        lds.l @r0+,fpscr
51
        sts.l fpul,@-r15
52
        bf LOCAL(huge_divisor)
53
        mov.l r1,@-r15
54
        lds r4,fpul
55
        cmp/pz r4
56
#ifdef FMOVD_WORKS
57
        fmov.d dr0,@-r15
58
        float fpul,dr0
59
        fmov.d dr2,@-r15
60
        bt LOCAL(dividend_adjusted)
61
        mov #1,r1
62
        fmov.d @r0,dr2
63
        cmp/eq r1,r5
64
        bt LOCAL(div_by_1)
65
        fadd dr2,dr0
66
LOCAL(dividend_adjusted):
67
        lds r5,fpul
68
        float fpul,dr2
69
        fdiv dr2,dr0
70
LOCAL(div_by_1):
71
        fmov.d @r15+,dr2
72
        ftrc dr0,fpul
73
        fmov.d @r15+,dr0
74
#else /* !FMOVD_WORKS */
75
        fmov.s DR01,@-r15
76
        mov #1,r1
77
        fmov.s DR00,@-r15
78
        float fpul,dr0
79
        fmov.s DR21,@-r15
80
        bt/s LOCAL(dividend_adjusted)
81
        fmov.s DR20,@-r15
82
        cmp/eq r1,r5
83
        bt LOCAL(div_by_1)
84
        fmov.s @r0+,DR20
85
        fmov.s @r0,DR21
86
        fadd dr2,dr0
87
LOCAL(dividend_adjusted):
88
        lds r5,fpul
89
        float fpul,dr2
90
        fdiv dr2,dr0
91
LOCAL(div_by_1):
92
        fmov.s @r15+,DR20
93
        fmov.s @r15+,DR21
94
        ftrc dr0,fpul
95
        fmov.s @r15+,DR00
96
        fmov.s @r15+,DR01
97
#endif /* !FMOVD_WORKS */
98
        lds.l @r15+,fpscr
99
        sts fpul,r0
100
        rts
101
        lds.l @r15+,fpul
102
 
103
#ifdef FMOVD_WORKS
104
        .p2align 3        ! make double below 8 byte aligned.
105
#endif
106
LOCAL(huge_divisor):
107
        lds r1,fpscr
108
        add #4,r15
109
        cmp/hs r5,r4
110
        rts
111
        movt r0
112
 
113
        .p2align 2
114
L1:
115
#ifndef FMOVD_WORKS
116
        .long 0x80000
117
#else
118
        .long 0x180000
119
#endif
120
        .double 4294967296
121
 
122
        ENDFUNC(GLOBAL(udivsi3_i4i))
123
#elif !defined (__sh1__)  /* !__SH_FPU_DOUBLE__ */
124
 
125
#if 0
126
/* With 36 bytes, the following would probably be the most compact
127
   implementation, but with 139 cycles on an sh4-200, it is extremely slow.  */
128
GLOBAL(udivsi3_i4i):
129
        mov.l r2,@-r15
130
        mov #0,r1
131
        div0u
132
        mov r1,r2
133
        mov.l r3,@-r15
134
        mov r1,r3
135
        sett
136
        mov r4,r0
137
LOCAL(loop):
138
        rotcr r2
139
        ;
140
        bt/s LOCAL(end)
141
        cmp/gt r2,r3
142
        rotcl r0
143
        bra LOCAL(loop)
144
        div1 r5,r1
145
LOCAL(end):
146
        rotcl r0
147
        mov.l @r15+,r3
148
        rts
149
        mov.l @r15+,r2
150
#endif /* 0 */
151
 
152
/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
153
   sh4-200 run times:
154
   udiv small divisor: 55 cycles
155
   udiv large divisor: 52 cycles
156
   sdiv small divisor, positive result: 59 cycles
157
   sdiv large divisor, positive result: 56 cycles
158
   sdiv small divisor, negative result: 65 cycles (*)
159
   sdiv large divisor, negative result: 62 cycles (*)
160
   (*): r2 is restored in the rts delay slot and has a lingering latency
161
        of two more cycles.  */
162
        .balign 4
163
        .global GLOBAL(udivsi3_i4i)
164
        FUNC(GLOBAL(udivsi3_i4i))
165
        FUNC(GLOBAL(sdivsi3_i4i))
166
GLOBAL(udivsi3_i4i):
167
        sts pr,r1
168
        mov.l r4,@-r15
169
        extu.w r5,r0
170
        cmp/eq r5,r0
171
        swap.w r4,r0
172
        shlr16 r4
173
        bf/s LOCAL(large_divisor)
174
        div0u
175
        mov.l r5,@-r15
176
        shll16 r5
177
LOCAL(sdiv_small_divisor):
178
        div1 r5,r4
179
        bsr LOCAL(div6)
180
        div1 r5,r4
181
        div1 r5,r4
182
        bsr LOCAL(div6)
183
        div1 r5,r4
184
        xtrct r4,r0
185
        xtrct r0,r4
186
        bsr LOCAL(div7)
187
        swap.w r4,r4
188
        div1 r5,r4
189
        bsr LOCAL(div7)
190
        div1 r5,r4
191
        xtrct r4,r0
192
        mov.l @r15+,r5
193
        swap.w r0,r0
194
        mov.l @r15+,r4
195
        jmp @r1
196
        rotcl r0
197
LOCAL(div7):
198
        div1 r5,r4
199
LOCAL(div6):
200
                    div1 r5,r4; div1 r5,r4; div1 r5,r4
201
        div1 r5,r4; div1 r5,r4; rts;        div1 r5,r4
202
 
203
LOCAL(divx3):
204
        rotcl r0
205
        div1 r5,r4
206
        rotcl r0
207
        div1 r5,r4
208
        rotcl r0
209
        rts
210
        div1 r5,r4
211
 
212
LOCAL(large_divisor):
213
        mov.l r5,@-r15
214
LOCAL(sdiv_large_divisor):
215
        xor r4,r0
216
        .rept 4
217
        rotcl r0
218
        bsr LOCAL(divx3)
219
        div1 r5,r4
220
        .endr
221
        mov.l @r15+,r5
222
        mov.l @r15+,r4
223
        jmp @r1
224
        rotcl r0
225
        ENDFUNC(GLOBAL(udivsi3_i4i))
226
 
227
        .global GLOBAL(sdivsi3_i4i)
228
GLOBAL(sdivsi3_i4i):
229
        mov.l r4,@-r15
230
        cmp/pz r5
231
        mov.l r5,@-r15
232
        bt/s LOCAL(pos_divisor)
233
        cmp/pz r4
234
        neg r5,r5
235
        extu.w r5,r0
236
        bt/s LOCAL(neg_result)
237
        cmp/eq r5,r0
238
        neg r4,r4
239
LOCAL(pos_result):
240
        swap.w r4,r0
241
        bra LOCAL(sdiv_check_divisor)
242
        sts pr,r1
243
LOCAL(pos_divisor):
244
        extu.w r5,r0
245
        bt/s LOCAL(pos_result)
246
        cmp/eq r5,r0
247
        neg r4,r4
248
LOCAL(neg_result):
249
        mova LOCAL(negate_result),r0
250
        ;
251
        mov r0,r1
252
        swap.w r4,r0
253
        lds r2,macl
254
        sts pr,r2
255
LOCAL(sdiv_check_divisor):
256
        shlr16 r4
257
        bf/s LOCAL(sdiv_large_divisor)
258
        div0u
259
        bra LOCAL(sdiv_small_divisor)
260
        shll16 r5
261
        .balign 4
262
LOCAL(negate_result):
263
        neg r0,r0
264
        jmp @r2
265
        sts macl,r2
266
        ENDFUNC(GLOBAL(sdivsi3_i4i))
267
#endif /* !__SH_FPU_DOUBLE__ */
268
#endif /* L_udivsi3_i4i */
269
 
270
#ifdef L_sdivsi3_i4i
271
#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
272
/* 48 bytes, 45 cycles on sh4-200  */
273
!! args in r4 and r5, result in r0, clobber r1
274
 
275
        .global GLOBAL(sdivsi3_i4i)
276
        FUNC(GLOBAL(sdivsi3_i4i))
277
GLOBAL(sdivsi3_i4i):
278
        sts.l fpscr,@-r15
279
        sts fpul,r1
280
        mova L1,r0
281
        lds.l @r0+,fpscr
282
        lds r4,fpul
283
#ifdef FMOVD_WORKS
284
        fmov.d dr0,@-r15
285
        float fpul,dr0
286
        lds r5,fpul
287
        fmov.d dr2,@-r15
288
#else
289
        fmov.s DR01,@-r15
290
        fmov.s DR00,@-r15
291
        float fpul,dr0
292
        lds r5,fpul
293
        fmov.s DR21,@-r15
294
        fmov.s DR20,@-r15
295
#endif
296
        float fpul,dr2
297
        fdiv dr2,dr0
298
#ifdef FMOVD_WORKS
299
        fmov.d @r15+,dr2
300
#else
301
        fmov.s @r15+,DR20
302
        fmov.s @r15+,DR21
303
#endif
304
        ftrc dr0,fpul
305
#ifdef FMOVD_WORKS
306
        fmov.d @r15+,dr0
307
#else
308
        fmov.s @r15+,DR00
309
        fmov.s @r15+,DR01
310
#endif
311
        lds.l @r15+,fpscr
312
        sts fpul,r0
313
        rts
314
        lds r1,fpul
315
 
316
        .p2align 2
317
L1:
318
#ifndef FMOVD_WORKS
319
        .long 0x80000
320
#else
321
        .long 0x180000
322
#endif
323
 
324
        ENDFUNC(GLOBAL(sdivsi3_i4i))
325
#endif /* __SH_FPU_DOUBLE__ */
326
#endif /* L_sdivsi3_i4i */
327
#endif /* !__SHMEDIA__ */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.