OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-stable/] [newlib-1.18.0/] [newlib/] [libc/] [machine/] [arm/] [strcmp.c] - Blame information for rev 816

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 207 jeremybenn
/*
2
 * Copyright (c) 2008 ARM Ltd
3
 * All rights reserved.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions
7
 * are met:
8
 * 1. Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 * 2. Redistributions in binary form must reproduce the above copyright
11
 *    notice, this list of conditions and the following disclaimer in the
12
 *    documentation and/or other materials provided with the distribution.
13
 * 3. The name of the company may not be used to endorse or promote
14
 *    products derived from this software without specific prior written
15
 *    permission.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20
 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
 */
28
 
29
#include "arm_asm.h"
30
#include <_ansi.h>
31
#include <string.h>
32
 
33
#ifdef __ARMEB__
34
#define SHFT2LSB "lsl"
35
#define SHFT2MSB "lsr"
36
#define MSB "0x000000ff"
37
#define LSB "0xff000000"
38
#else
39
#define SHFT2LSB "lsr"
40
#define SHFT2MSB "lsl"
41
#define MSB "0xff000000"
42
#define LSB "0x000000ff"
43
#endif
44
 
45
#ifdef __thumb2__
46
#define magic1(REG) "#0x01010101"
47
#define magic2(REG) "#0x80808080"
48
#else
49
#define magic1(REG) #REG
50
#define magic2(REG) #REG ", lsl #7"
51
#endif
52
 
53
int
54
__attribute__((naked)) strcmp (const char* s1, const char* s2)
55
{
56
  asm(
57
#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
58
      (defined (__thumb__) && !defined (__thumb2__)))
59
      "optpld   r0\n\t"
60
      "optpld   r1\n\t"
61
      "eor      r2, r0, r1\n\t"
62
      "tst      r2, #3\n\t"
63
      /* Strings not at same byte offset from a word boundary.  */
64
      "bne      strcmp_unaligned\n\t"
65
      "ands     r2, r0, #3\n\t"
66
      "bic      r0, r0, #3\n\t"
67
      "bic      r1, r1, #3\n\t"
68
      "ldr      ip, [r0], #4\n\t"
69
      "it       eq\n\t"
70
      "ldreq    r3, [r1], #4\n\t"
71
      "beq      1f\n\t"
72
      /* Although s1 and s2 have identical initial alignment, they are
73
         not currently word aligned.  Rather than comparing bytes,
74
         make sure that any bytes fetched from before the addressed
75
         bytes are forced to 0xff.  Then they will always compare
76
         equal.  */
77
      "eor      r2, r2, #3\n\t"
78
      "lsl      r2, r2, #3\n\t"
79
      "mvn      r3, #"MSB"\n\t"
80
      SHFT2LSB" r2, r3, r2\n\t"
81
      "ldr      r3, [r1], #4\n\t"
82
      "orr      ip, ip, r2\n\t"
83
      "orr      r3, r3, r2\n"
84
 "1:\n\t"
85
#ifndef __thumb2__
86
      /* Load the 'magic' constant 0x01010101.  */
87
      "str      r4, [sp, #-4]!\n\t"
88
      "mov      r4, #1\n\t"
89
      "orr      r4, r4, r4, lsl #8\n\t"
90
      "orr      r4, r4, r4, lsl #16\n"
91
#endif
92
      ".p2align 2\n"
93
 "4:\n\t"
94
      "optpld   r0, #8\n\t"
95
      "optpld   r1, #8\n\t"
96
      "sub      r2, ip, "magic1(r4)"\n\t"
97
      "cmp      ip, r3\n\t"
98
      "itttt    eq\n\t"
99
      /* check for any zero bytes in first word */
100
      "biceq    r2, r2, ip\n\t"
101
      "tsteq    r2, "magic2(r4)"\n\t"
102
      "ldreq    ip, [r0], #4\n\t"
103
      "ldreq    r3, [r1], #4\n\t"
104
      "beq      4b\n"
105
 "2:\n\t"
106
      /* There's a zero or a different byte in the word */
107
      SHFT2MSB" r0, ip, #24\n\t"
108
      SHFT2LSB" ip, ip, #8\n\t"
109
      "cmp      r0, #1\n\t"
110
      "it       cs\n\t"
111
      "cmpcs    r0, r3, "SHFT2MSB" #24\n\t"
112
      "it       eq\n\t"
113
      SHFT2LSB"eq r3, r3, #8\n\t"
114
      "beq      2b\n\t"
115
      /* On a big-endian machine, r0 contains the desired byte in bits
116
         0-7; on a little-endian machine they are in bits 24-31.  In
117
         both cases the other bits in r0 are all zero.  For r3 the
118
         interesting byte is at the other end of the word, but the
119
         other bits are not necessarily zero.  We need a signed result
120
         representing the differnece in the unsigned bytes, so for the
121
         little-endian case we can't just shift the interesting bits
122
         up.  */
123
#ifdef __ARMEB__
124
      "sub      r0, r0, r3, lsr #24\n\t"
125
#else
126
      "and      r3, r3, #255\n\t"
127
#ifdef __thumb2__
128
      /* No RSB instruction in Thumb2 */
129
      "lsr      r0, r0, #24\n\t"
130
      "sub      r0, r0, r3\n\t"
131
#else
132
      "rsb      r0, r3, r0, lsr #24\n\t"
133
#endif
134
#endif
135
#ifndef __thumb2__
136
      "ldr      r4, [sp], #4\n\t"
137
#endif
138
      "RETURN"
139
#elif (defined (__thumb__) && !defined (__thumb2__))
140
  "1:\n\t"
141
      "ldrb     r2, [r0]\n\t"
142
      "ldrb     r3, [r1]\n\t"
143
      "add      r0, r0, #1\n\t"
144
      "add      r1, r1, #1\n\t"
145
      "cmp      r2, #0\n\t"
146
      "beq      2f\n\t"
147
      "cmp      r2, r3\n\t"
148
      "beq      1b\n\t"
149
  "2:\n\t"
150
      "sub      r0, r2, r3\n\t"
151
      "bx       lr"
152
#else
153
 "3:\n\t"
154
      "ldrb     r2, [r0], #1\n\t"
155
      "ldrb     r3, [r1], #1\n\t"
156
      "cmp      r2, #1\n\t"
157
      "it       cs\n\t"
158
      "cmpcs    r2, r3\n\t"
159
      "beq      3b\n\t"
160
      "sub      r0, r2, r3\n\t"
161
      "RETURN"
162
#endif
163
      );
164
}
165
 
166
#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
167
      (defined (__thumb__) && !defined (__thumb2__)))
168
static int __attribute__((naked, used))
169
strcmp_unaligned(const char* s1, const char* s2)
170
{
171
#if 0
172
  /* The assembly code below is based on the following alogrithm.  */
173
#ifdef __ARMEB__
174
#define RSHIFT <<
175
#define LSHIFT >>
176
#else
177
#define RSHIFT >>
178
#define LSHIFT <<
179
#endif
180
 
181
#define body(shift)                                                     \
182
  mask = 0xffffffffU RSHIFT shift;                                      \
183
  w1 = *wp1++;                                                          \
184
  w2 = *wp2++;                                                          \
185
  do                                                                    \
186
    {                                                                   \
187
      t1 = w1 & mask;                                                   \
188
      if (__builtin_expect(t1 != w2 RSHIFT shift, 0))                    \
189
        {                                                               \
190
          w2 RSHIFT= shift;                                             \
191
          break;                                                        \
192
        }                                                               \
193
      if (__builtin_expect(((w1 - b1) & ~w1) & (b1 << 7), 0))            \
194
        {                                                               \
195
          /* See comment in assembler below re syndrome on big-endian */\
196
          if ((((w1 - b1) & ~w1) & (b1 << 7)) & mask)                   \
197
            w2 RSHIFT= shift;                                           \
198
          else                                                          \
199
            {                                                           \
200
              w2 = *wp2;                                                \
201
              t1 = w1 RSHIFT (32 - shift);                              \
202
              w2 = (w2 LSHIFT (32 - shift)) RSHIFT (32 - shift);        \
203
            }                                                           \
204
          break;                                                        \
205
        }                                                               \
206
      w2 = *wp2++;                                                      \
207
      t1 ^= w1;                                                         \
208
      if (__builtin_expect(t1 != w2 LSHIFT (32 - shift), 0))             \
209
        {                                                               \
210
          t1 = w1 >> (32 - shift);                                      \
211
          w2 = (w2 << (32 - shift)) RSHIFT (32 - shift);                \
212
          break;                                                        \
213
        }                                                               \
214
      w1 = *wp1++;                                                      \
215
    } while (1)
216
 
217
  const unsigned* wp1;
218
  const unsigned* wp2;
219
  unsigned w1, w2;
220
  unsigned mask;
221
  unsigned shift;
222
  unsigned b1 = 0x01010101;
223
  char c1, c2;
224
  unsigned t1;
225
 
226
  while (((unsigned) s1) & 3)
227
    {
228
      c1 = *s1++;
229
      c2 = *s2++;
230
      if (c1 == 0 || c1 != c2)
231
        return c1 - (int)c2;
232
    }
233
  wp1 = (unsigned*) (((unsigned)s1) & ~3);
234
  wp2 = (unsigned*) (((unsigned)s2) & ~3);
235
  t1 = ((unsigned) s2) & 3;
236
  if (t1 == 1)
237
    {
238
      body(8);
239
    }
240
  else if (t1 == 2)
241
    {
242
      body(16);
243
    }
244
  else
245
    {
246
      body (24);
247
    }
248
 
249
  do
250
    {
251
#ifdef __ARMEB__
252
      c1 = (char) t1 >> 24;
253
      c2 = (char) w2 >> 24;
254
#else
255
      c1 = (char) t1;
256
      c2 = (char) w2;
257
#endif
258
      t1 RSHIFT= 8;
259
      w2 RSHIFT= 8;
260
    } while (c1 != 0 && c1 == c2);
261
  return c1 - c2;
262
#endif
263
 
264
  asm("wp1 .req r0\n\t"
265
      "wp2 .req r1\n\t"
266
      "b1  .req r2\n\t"
267
      "w1  .req r4\n\t"
268
      "w2  .req r5\n\t"
269
      "t1  .req ip\n\t"
270
      "@ r3 is scratch\n"
271
 
272
      /* First of all, compare bytes until wp1(sp1) is word-aligned. */
273
 "1:\n\t"
274
      "tst      wp1, #3\n\t"
275
      "beq      2f\n\t"
276
      "ldrb     r2, [wp1], #1\n\t"
277
      "ldrb     r3, [wp2], #1\n\t"
278
      "cmp      r2, #1\n\t"
279
      "it       cs\n\t"
280
      "cmpcs    r2, r3\n\t"
281
      "beq      1b\n\t"
282
      "sub      r0, r2, r3\n\t"
283
      "RETURN\n"
284
 
285
 "2:\n\t"
286
      "str      r5, [sp, #-4]!\n\t"
287
      "str      r4, [sp, #-4]!\n\t"
288
      //      "stmfd    sp!, {r4, r5}\n\t"
289
      "mov      b1, #1\n\t"
290
      "orr      b1, b1, b1, lsl #8\n\t"
291
      "orr      b1, b1, b1, lsl #16\n\t"
292
 
293
      "and      t1, wp2, #3\n\t"
294
      "bic      wp2, wp2, #3\n\t"
295
      "ldr      w1, [wp1], #4\n\t"
296
      "ldr      w2, [wp2], #4\n\t"
297
      "cmp      t1, #2\n\t"
298
      "beq      2f\n\t"
299
      "bhi      3f\n"
300
 
301
      /* Critical inner Loop: Block with 3 bytes initial overlap */
302
      ".p2align 2\n"
303
 "1:\n\t"
304
      "bic      t1, w1, #"MSB"\n\t"
305
      "cmp      t1, w2, "SHFT2LSB" #8\n\t"
306
      "sub      r3, w1, b1\n\t"
307
      "bic      r3, r3, w1\n\t"
308
      "bne      4f\n\t"
309
      "ands     r3, r3, b1, lsl #7\n\t"
310
      "it       eq\n\t"
311
      "ldreq    w2, [wp2], #4\n\t"
312
      "bne      5f\n\t"
313
      "eor      t1, t1, w1\n\t"
314
      "cmp      t1, w2, "SHFT2MSB" #24\n\t"
315
      "bne      6f\n\t"
316
      "ldr      w1, [wp1], #4\n\t"
317
      "b        1b\n"
318
 "4:\n\t"
319
      SHFT2LSB" w2, w2, #8\n\t"
320
      "b        8f\n"
321
 
322
 "5:\n\t"
323
#ifdef __ARMEB__
324
      /* The syndrome value may contain false ones if the string ends
325
         with the bytes 0x01 0x00 */
326
      "tst      w1, #0xff000000\n\t"
327
      "itt      ne\n\t"
328
      "tstne    w1, #0x00ff0000\n\t"
329
      "tstne    w1, #0x0000ff00\n\t"
330
      "beq      7f\n\t"
331
#else
332
      "bics     r3, r3, #0xff000000\n\t"
333
      "bne      7f\n\t"
334
#endif
335
      "ldrb     w2, [wp2]\n\t"
336
      SHFT2LSB" t1, w1, #24\n\t"
337
#ifdef __ARMEB__
338
      "lsl      w2, w2, #24\n\t"
339
#endif
340
      "b        8f\n"
341
 
342
 "6:\n\t"
343
      SHFT2LSB" t1, w1, #24\n\t"
344
      "and      w2, w2, #"LSB"\n\t"
345
      "b        8f\n"
346
 
347
      /* Critical inner Loop: Block with 2 bytes initial overlap */
348
      ".p2align 2\n"
349
 "2:\n\t"
350
      SHFT2MSB" t1, w1, #16\n\t"
351
      "sub      r3, w1, b1\n\t"
352
      SHFT2LSB" t1, t1, #16\n\t"
353
      "bic      r3, r3, w1\n\t"
354
      "cmp      t1, w2, "SHFT2LSB" #16\n\t"
355
      "bne      4f\n\t"
356
      "ands     r3, r3, b1, lsl #7\n\t"
357
      "it       eq\n\t"
358
      "ldreq    w2, [wp2], #4\n\t"
359
      "bne      5f\n\t"
360
      "eor      t1, t1, w1\n\t"
361
      "cmp      t1, w2, "SHFT2MSB" #16\n\t"
362
      "bne      6f\n\t"
363
      "ldr      w1, [wp1], #4\n\t"
364
      "b        2b\n"
365
 
366
 "5:\n\t"
367
#ifdef __ARMEB__
368
      /* The syndrome value may contain false ones if the string ends
369
         with the bytes 0x01 0x00 */
370
      "tst      w1, #0xff000000\n\t"
371
      "it       ne\n\t"
372
      "tstne    w1, #0x00ff0000\n\t"
373
      "beq      7f\n\t"
374
#else
375
      "lsls     r3, r3, #16\n\t"
376
      "bne      7f\n\t"
377
#endif
378
      "ldrh     w2, [wp2]\n\t"
379
      SHFT2LSB" t1, w1, #16\n\t"
380
#ifdef __ARMEB__
381
      "lsl      w2, w2, #16\n\t"
382
#endif
383
      "b        8f\n"
384
 
385
 "6:\n\t"
386
      SHFT2MSB" w2, w2, #16\n\t"
387
      SHFT2LSB" t1, w1, #16\n\t"
388
 "4:\n\t"
389
      SHFT2LSB" w2, w2, #16\n\t"
390
      "b        8f\n\t"
391
 
392
      /* Critical inner Loop: Block with 1 byte initial overlap */
393
      ".p2align 2\n"
394
 "3:\n\t"
395
      "and      t1, w1, #"LSB"\n\t"
396
      "cmp      t1, w2, "SHFT2LSB" #24\n\t"
397
      "sub      r3, w1, b1\n\t"
398
      "bic      r3, r3, w1\n\t"
399
      "bne      4f\n\t"
400
      "ands     r3, r3, b1, lsl #7\n\t"
401
      "it       eq\n\t"
402
      "ldreq    w2, [wp2], #4\n\t"
403
      "bne      5f\n\t"
404
      "eor      t1, t1, w1\n\t"
405
      "cmp      t1, w2, "SHFT2MSB" #8\n\t"
406
      "bne      6f\n\t"
407
      "ldr      w1, [wp1], #4\n\t"
408
      "b        3b\n"
409
 "4:\n\t"
410
      SHFT2LSB" w2, w2, #24\n\t"
411
      "b        8f\n"
412
 "5:\n\t"
413
      /* The syndrome value may contain false ones if the string ends
414
         with the bytes 0x01 0x00 */
415
      "tst      w1, #"LSB"\n\t"
416
      "beq      7f\n\t"
417
      "ldr      w2, [wp2], #4\n"
418
 "6:\n\t"
419
      SHFT2LSB" t1, w1, #8\n\t"
420
      "bic      w2, w2, #"MSB"\n\t"
421
      "b        8f\n"
422
 "7:\n\t"
423
      "mov      r0, #0\n\t"
424
      //      "ldmfd    sp!, {r4, r5}\n\t"
425
      "ldr      r4, [sp], #4\n\t"
426
      "ldr      r5, [sp], #4\n\t"
427
      "RETURN\n"
428
 "8:\n\t"
429
      "and      r2, t1, #"LSB"\n\t"
430
      "and      r0, w2, #"LSB"\n\t"
431
      "cmp      r0, #1\n\t"
432
      "it       cs\n\t"
433
      "cmpcs    r0, r2\n\t"
434
      "itt      eq\n\t"
435
      SHFT2LSB"eq       t1, t1, #8\n\t"
436
      SHFT2LSB"eq       w2, w2, #8\n\t"
437
      "beq      8b\n\t"
438
      "sub      r0, r2, r0\n\t"
439
      //      "ldmfd    sp!, {r4, r5}\n\t"
440
      "ldr      r4, [sp], #4\n\t"
441
      "ldr      r5, [sp], #4\n\t"
442
      "RETURN");
443
}
444
 
445
#endif

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.