OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [newlib/] [newlib/] [libc/] [machine/] [hppa/] [strcpy.S] - Blame information for rev 39

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 39 lampret
/*
2
 *  (c) Copyright 1986 HEWLETT-PACKARD COMPANY
3
 *
4
 *  To anyone who acknowledges that this file is provided "AS IS"
5
 *  without any express or implied warranty:
6
 *      permission to use, copy, modify, and distribute this file
7
 *  for any purpose is hereby granted without fee, provided that
8
 *  the above copyright notice and this notice appears in all
9
 *  copies, and that the name of Hewlett-Packard Company not be
10
 *  used in advertising or publicity pertaining to distribution
11
 *  of the software without specific, written prior permission.
12
 *  Hewlett-Packard Company makes no representations about the
13
 *  suitability of this software for any purpose.
14
 */
15
 
16
/*
17
        A faster strcpy.
18
 
19
        by
20
 
21
        Jerry Huck (aligned case)
22
        Daryl Odnert (equal-alignment case)
23
        Edgar Circenis (non-aligned case)
24
*/
25
/*
26
 * strcpy(s1, s2)
27
 *
28
 * Copy string s2 to s1.  s1 must be large enough.
29
 * return s1
30
 */
31
 
32
#include "DEFS.h"
33
 
34
#define d_addr          r26
35
#define s_addr          r25
36
#define tmp6            r24
37
#define tmp1            r19
38
#define evenside        r19
39
#define tmp2            r20
40
#define oddside         r20
41
#define tmp3            r21
42
#define tmp4            r22
43
#define tmp5            arg3
44
#define save            r1
45
 
46
 
47
ENTRY(strcpy)
48
/* Do some quick alignment checking on and fast path both word aligned */
49
        extru,<>   s_addr,31,2,tmp6    /*Is source word aligned? */
50
        ldwm       4(0,s_addr),oddside /*Assume yes and guess that it
51
                                          is double-word aligned. */
52
        dep,=      d_addr,29,2,tmp6    /*Is target word aligned? */
53
        b          case_analysis
54
        copy       d_addr,ret0
55
/* Both are aligned.  First source word already loaded assuming that
56
   source was oddword aligned.  Fall through (therefore fastest) code
57
   shuffles the registers to join the main loop */
58
bothaligned:
59
        bb,>=    s_addr,29,twoatatime  /*Branch if source was odd aligned*/
60
        uxor,nbz oddside,r0,save
61
 
62
/* Even aligned source.  save holds that operand.
63
   Do one iteration of the main copy loop juggling the registers to avoid
64
   one copy. */
65
        b,n      nullfound
66
        ldwm     4(s_addr),oddside
67
        stwm     save,4(d_addr)
68
        uxor,nbz oddside,r0,save
69
        b,n      nullfound
70
        ldwm     4(s_addr),evenside
71
        stwm     oddside,4(d_addr)
72
        uxor,nbz evenside,r0,save
73
        b,n      nullfound
74
        ldwm     4(s_addr),oddside
75
 
76
/* Main loop body.  Entry expects evenside still to be stored, oddside
77
   just loaded. */
78
loop:
79
        stwm     evenside,4(d_addr)
80
        uxor,nbz oddside,r0,save
81
 
82
/* mid loop entry */
83
twoatatime:
84
        b,n      nullfound
85
        ldwm     4(s_addr),evenside
86
        stwm     oddside,4(d_addr)
87
        uxor,sbz evenside,r0,save
88
        b        loop
89
        ldwm     4(s_addr),oddside
90
 
91
/* fall through when null found in evenside.  oddside actually loaded */
92
nullfound:                              /* adjust d_addr and store final word */
93
 
94
        extru,<>        save,7,8,r0         /* pick up leftmost byte */
95
        addib,tr,n      1,d_addr,store_final
96
        extru,<>        save,15,8,r0
97
        addib,tr,n      2,d_addr,store_final
98
        extru,<>        save,23,8,r0
99
        addib,tr        3,d_addr,store_final2
100
        bv              0(rp)
101
        stw             save,0(d_addr)
102
 
103
store_final:
104
        bv              0(rp)
105
store_final2:
106
        stbys,e         save,0(d_addr)  /* delay slot */
107
 
108
case_analysis:
109
 
110
        blr         tmp6,r0
111
        nop
112
 
113
        /* NOTE: the delay slots for the non-aligned cases load a   */
114
        /* shift quantity which is TGT-SRC into tmp3.               */
115
        /* Note also, the case for both strings being word aligned  */
116
        /* is already checked before the BLR is executed, so that   */
117
        /* case can never occur.                                    */
118
 
119
                                       /* TGT SRC */
120
        nop                            /* 00  00  can't happen */
121
        nop
122
        b           neg_aligned_copy   /* 00  01  */
123
        ldi         -1,tmp3            /* load shift quantity. delay slot */
124
        b           neg_aligned_copy   /* 00  10  */
125
        ldi         -2,tmp3            /* load shift quantity. delay slot */
126
        b           neg_aligned_copy   /* 00  11  */
127
        ldi         -3,tmp3            /* load shift quantity. delay slot */
128
        b           pos_aligned_copy0  /* 01  00  */
129
        ldi         1,tmp3            /* load shift quantity. delay slot */
130
        b           equal_alignment_1  /* 01  01  */
131
        ldbs,ma     1(s_addr),tmp1
132
        b           neg_aligned_copy   /* 01  10  */
133
        ldi         -1,tmp3            /* load shift quantity. delay slot */
134
        b           neg_aligned_copy   /* 01  11  */
135
        ldi         -2,tmp3            /* load shift quantity. delay slot */
136
        b           pos_aligned_copy0  /* 10  00  */
137
        ldi         2,tmp3            /* load shift quantity. delay slot */
138
        b           pos_aligned_copy   /* 10  01  */
139
        ldi         1,tmp3            /* load shift quantity. delay slot */
140
        b           equal_alignment_2  /* 10  10  */
141
        ldhs,ma     2(s_addr),tmp1
142
        b           neg_aligned_copy   /* 10  11  */
143
        ldi         -1,tmp3            /* load shift quantity. delay slot */
144
        b           pos_aligned_copy0  /* 11  00  */
145
        ldi         3,tmp3            /* load shift quantity. delay slot */
146
        b           pos_aligned_copy   /* 11  01  */
147
        ldi         2,tmp3            /* load shift quantity. delay slot */
148
        b           pos_aligned_copy   /* 11  10  */
149
        ldi         1,tmp3            /* load shift quantity. delay slot */
150
        ldbs,ma     1(s_addr),tmp1     /* 11  11  */
151
        comiclr,<>  r0,tmp1,r0
152
        bv          0(rp)              /* return if 1st byte was null */
153
        stbs,ma     tmp1,1(d_addr)     /* store a byte to dst string  */
154
        b           bothaligned       /* can now goto word_aligned   */
155
        ldwm        4(s_addr),oddside     /* load next word of source    */
156
 
157
equal_alignment_1:
158
        comiclr,<>  r0,tmp1,r0      /* nullify next if tmp1 <> 0  */
159
        bv          0(rp)           /* return if null byte found  */
160
        stbs,ma     tmp1,1(d_addr)  /* store a byte to dst string */
161
        ldhs,ma     2(s_addr),tmp1  /* load next halfword         */
162
equal_alignment_2:
163
        extru,<>    tmp1,23,8,tmp6  /* look at left byte of halfword */
164
        bv          0(rp)           /* return if 1st byte was null */
165
        stbs,ma     tmp6,1(d_addr)
166
        extru,<>    tmp1,31,8,r0
167
        bv          0(rp)           /* return if 2nd byte was null */
168
        stbs,ma     tmp1,1(d_addr)
169
        b           bothaligned
170
        ldwm        4(s_addr),oddside  /* load next word              */
171
 
172
/* source and destination are not aligned, so we do it the hard way. */
173
 
174
/* target alignment is greater than source alignment */
175
pos_aligned_copy0:
176
        addi            -4,s_addr,s_addr
177
pos_aligned_copy:
178
        extru       d_addr,31,2,tmp6   /* Extract low 2 bits of the dest addr */
179
        extru       s_addr,31,2,tmp1   /* Extract low 2 bits of the src addr */
180
        dep         r0,31,2,s_addr     /* Compute word address of the source. */
181
        sh3add          tmp3,r0,tmp4        /* compute shift amt */
182
        ldwm            4(0,s_addr),tmp2    /* get 1st source word */
183
        sh3add          tmp1,r0,save        /* setup mask shift amount */
184
        mtctl           save,r11            /* set-up cr11 for mask */
185
        zvdepi          -2,32,save          /* create mask */
186
        or              save,tmp2,tmp2      /* mask unused bytes in src */
187
        ldi             -1,tmp1             /* load tmp1 with 0xffffffff */
188
        mtctl           tmp4,r11            /* shift count -> shift count reg */
189
        vshd            tmp1,tmp2,tmp3      /* position data ! */
190
        uxor,nbz        tmp3,r0,save
191
        b,n             first_null
192
        uxor,nbz        tmp2,r0,save
193
        b               nullfound1
194
        mtctl           tmp4,r11            /* re-load shift cnt (delay slot) */
195
        b               loop_entry
196
        ldwm            4(0,s_addr),tmp1    /* get next word. delay slot */
197
 
198
neg_aligned_copy:
199
        extru       d_addr,31,2,tmp6   /* Extract low 2 bits of the dest addr */
200
        extru       s_addr,31,2,tmp2   /* Extract low 2 bits of the src addr */
201
        dep         r0,31,2,s_addr     /* Compute word address of the source. */
202
        sh3add          tmp3,r0,tmp4        /* compute shift amt */
203
        ldwm            4(0,s_addr),tmp1    /* load first word from source. */
204
/* check to see if next word can be read safely */
205
        sh3add          tmp2,r0,save
206
        mtctl           save,r11            /* shift count -> shift count reg */
207
        zvdepi          -2,32,save
208
        or              save, tmp1, tmp1
209
        uxor,nbz        tmp1,r0,save        /* any nulls in first word? */
210
        b               first_null0
211
        mtctl           tmp4,r11
212
        ldwm            4(0,s_addr),tmp2    /* load second word from source */
213
        combt,=         tmp6,r0,chunk1      /* don't mask if whole word valid */
214
        vshd            tmp1,tmp2,tmp3      /* position data ! */
215
        sh3add          tmp6,r0,save        /* setup r1 */
216
        mtctl           save,r11            /* set-up cr11 for mask */
217
        zvdepi          -2,32,save
218
        or              save, tmp3, tmp3
219
        uxor,nbz        tmp3,r0,save
220
        b,n             first_null
221
        uxor,nbz        tmp2,r0,save
222
        b               nullfound1
223
        mtctl           tmp4,r11            /* re-load shift cnt (delay slot) */
224
        b               loop_entry
225
        ldwm            4(0,s_addr),tmp1    /* get next word. delay slot */
226
 
227
chunk1:
228
        uxor,nbz        tmp2,r0,save
229
        b               nullfound0
230
        vshd            tmp1,tmp2,tmp3
231
did_mask:
232
        ldwm            4(0,s_addr),tmp1    /* get next word !  */
233
loop_entry:
234
        stbys,b,m       tmp3,4(0,d_addr)    /* store !  */
235
 
236
        uxor,nbz        tmp1, r0, save
237
        b               nullfound2
238
        vshd            tmp2,tmp1,tmp3      /* position data !  */
239
        ldwm            4(s_addr),tmp2
240
        stwm            tmp3,4(d_addr)
241
        uxor,sbz        tmp2,r0,save
242
        b               did_mask
243
nullfound0:
244
        vshd            tmp1,tmp2,tmp3      /* delay slot */
245
        uxor,nbz        tmp3,r0,save
246
        b,n             nullfound
247
nullfound1:
248
        stbys,b,m       tmp3,4(0,d_addr)
249
        b               nullfound
250
        vshd            tmp2,r0,save        /* delay slot */
251
 
252
nullfound2:
253
        uxor,nbz        tmp3,r0,save
254
        b,n             nullfound
255
        stwm            tmp3,4(d_addr)
256
        b               nullfound
257
        /* notice that delay slot is in next routine */
258
 
259
first_null0:    /* null found in first word of non-aligned (wrt d_addr) */
260
        vshd            tmp1,r0,save        /* delay slot */
261
        combt,=         tmp6,r0,check4
262
        extru           save,7,8,tmp4
263
first_null:
264
        addibt,=        -1,tmp6,check3  /* check last 3 bytes of word */
265
        extru           save,15,8,tmp4
266
        addibt,=,n      -1,tmp6,check2  /* check last 2 bytes */
267
        bv              0(rp)           /* null in last byte--store and exit */
268
        stbys,b         save, 0(d_addr)
269
 
270
check4:
271
        combt,=         tmp4,r0,done
272
        stbs,ma         tmp4,1(d_addr)
273
        extru,<>        save,15,8,tmp4
274
check3:
275
        combt,=         tmp4,r0,done
276
        stbs,ma         tmp4,1(d_addr)
277
check2:
278
        extru,<>        save,23,8,tmp4
279
        bv              0(rp)
280
        stbs,ma         tmp4,1(d_addr)
281
        bv              0(rp)
282
        stbs            r0,0(d_addr)
283
 
284
done:
285
EXIT(strcpy)

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.