OpenCores
URL https://opencores.org/ocsvn/openrisc_2011-10-31/openrisc_2011-10-31/trunk

Subversion Repositories openrisc_2011-10-31

[/] [openrisc/] [tags/] [gnu-src/] [newlib-1.18.0/] [newlib-1.18.0-or32-1.0rc1/] [newlib/] [libc/] [machine/] [sh/] [strncpy.S] - Diff between revs 207 and 345

Only display areas with differences | Details | Blame | View Log

Rev 207 Rev 345
/* Copyright 2003 SuperH Ltd.  */
/* Copyright 2003 SuperH Ltd.  */
#include "asm.h"
#include "asm.h"
#ifdef __SH5__
#ifdef __SH5__
#if __SHMEDIA__
#if __SHMEDIA__
#ifdef __LITTLE_ENDIAN__
#ifdef __LITTLE_ENDIAN__
#define ZPAD_MASK(src, dst) addi src, -1, dst
#define ZPAD_MASK(src, dst) addi src, -1, dst
#else
#else
#define ZPAD_MASK(src, dst) \
#define ZPAD_MASK(src, dst) \
 byterev src, dst; addi dst, -1, dst; byterev dst, dst
 byterev src, dst; addi dst, -1, dst; byterev dst, dst
#endif
#endif
/* We assume that the destination is not in the first 16 bytes of memory.
/* We assume that the destination is not in the first 16 bytes of memory.
   A typical linker script will put the text section first, and as
   A typical linker script will put the text section first, and as
   this code is longer that 16 bytes, you have to get out of your way
   this code is longer that 16 bytes, you have to get out of your way
    to put data there.  */
    to put data there.  */
ENTRY(strncpy)
ENTRY(strncpy)
 pt L_small, tr2
 pt L_small, tr2
 ldlo.q r3, 0, r0
 ldlo.q r3, 0, r0
 shlli r3, 3, r19
 shlli r3, 3, r19
 mcmpeq.b r0, r63, r1
 mcmpeq.b r0, r63, r1
 SHHI r1, r19, r7
 SHHI r1, r19, r7
 add r2, r4, r20
 add r2, r4, r20
 addi r20, -8, r5
 addi r20, -8, r5
 /* If the size is greater than 8, we know we can read beyond the first
 /* If the size is greater than 8, we know we can read beyond the first
    (possibly partial) quadword, and write out a full first and last
    (possibly partial) quadword, and write out a full first and last
    (possibly unaligned and/or overlapping) quadword.  */
    (possibly unaligned and/or overlapping) quadword.  */
 bge/u r2, r5, tr2 // L_small
 bge/u r2, r5, tr2 // L_small
 pt L_found0, tr0
 pt L_found0, tr0
 addi r2, 8, r22
 addi r2, 8, r22
 bnei/u r7, 0, tr0  // L_found0
 bnei/u r7, 0, tr0  // L_found0
 ori r3, -8, r38
 ori r3, -8, r38
 pt L_end_early, tr1
 pt L_end_early, tr1
 sub r2, r38, r22
 sub r2, r38, r22
 stlo.q r2, 0, r0
 stlo.q r2, 0, r0
 sthi.q r2, 7, r0
 sthi.q r2, 7, r0
 sub r3, r2, r6
 sub r3, r2, r6
 ldx.q r22, r6, r0
 ldx.q r22, r6, r0
 /* Before each iteration, check that we can store in full the next quad we
 /* Before each iteration, check that we can store in full the next quad we
    are about to fetch.  */
    are about to fetch.  */
 addi r5, -8, r36
 addi r5, -8, r36
 bgtu/u r22, r36, tr1 // L_end_early
 bgtu/u r22, r36, tr1 // L_end_early
 pt L_scan0, tr1
 pt L_scan0, tr1
L_scan0:
L_scan0:
 addi r22, 8, r22
 addi r22, 8, r22
 mcmpeq.b r0, r63, r1
 mcmpeq.b r0, r63, r1
 stlo.q r22, -8, r0
 stlo.q r22, -8, r0
 bnei/u r1, 0, tr0   // L_found0
 bnei/u r1, 0, tr0   // L_found0
 sthi.q r22, -1, r0
 sthi.q r22, -1, r0
 ldx.q r22, r6, r0
 ldx.q r22, r6, r0
 bgeu/l r36, r22, tr1 // L_scan0
 bgeu/l r36, r22, tr1 // L_scan0
L_end:
L_end:
 // At end; we might re-read a few bytes when we fetch the last quad.
 // At end; we might re-read a few bytes when we fetch the last quad.
 // branch mispredict, so load is ready now.
 // branch mispredict, so load is ready now.
 mcmpeq.b r0, r63, r1
 mcmpeq.b r0, r63, r1
 addi r22, 8, r22
 addi r22, 8, r22
 bnei/u r1, 0, tr0   // L_found0
 bnei/u r1, 0, tr0   // L_found0
 add r3, r4, r7
 add r3, r4, r7
 ldlo.q r7, -8, r1
 ldlo.q r7, -8, r1
 ldhi.q r7, -1, r7
 ldhi.q r7, -1, r7
 ptabs r18, tr0
 ptabs r18, tr0
 stlo.q r22, -8, r0
 stlo.q r22, -8, r0
 or r1, r7, r1
 or r1, r7, r1
 mcmpeq.b r1, r63, r7
 mcmpeq.b r1, r63, r7
 sthi.q r22, -1, r0
 sthi.q r22, -1, r0
 ZPAD_MASK (r7, r7)
 ZPAD_MASK (r7, r7)
 and r1, r7, r1 // mask out non-zero bytes after first zero byte
 and r1, r7, r1 // mask out non-zero bytes after first zero byte
 stlo.q r20, -8, r1
 stlo.q r20, -8, r1
 sthi.q r20, -1, r1
 sthi.q r20, -1, r1
 blink tr0, r63
 blink tr0, r63
L_end_early:
L_end_early:
 /* Check if we can store the current quad in full.  */
 /* Check if we can store the current quad in full.  */
 pt L_end, tr1
 pt L_end, tr1
 add r3, r4, r7
 add r3, r4, r7
 bgtu/u r5, r22, tr1 // L_end // Not really unlikely, but gap is short.
 bgtu/u r5, r22, tr1 // L_end // Not really unlikely, but gap is short.
 /* If not, that means we can just proceed to process the last quad.
 /* If not, that means we can just proceed to process the last quad.
    Two pipeline stalls are unavoidable, as we don't have enough ILP.  */
    Two pipeline stalls are unavoidable, as we don't have enough ILP.  */
 ldlo.q r7, -8, r1
 ldlo.q r7, -8, r1
 ldhi.q r7, -1, r7
 ldhi.q r7, -1, r7
 ptabs r18, tr0
 ptabs r18, tr0
 or r1, r7, r1
 or r1, r7, r1
 mcmpeq.b r1, r63, r7
 mcmpeq.b r1, r63, r7
 ZPAD_MASK (r7, r7)
 ZPAD_MASK (r7, r7)
 and r1, r7, r1 // mask out non-zero bytes after first zero byte
 and r1, r7, r1 // mask out non-zero bytes after first zero byte
 stlo.q r20, -8, r1
 stlo.q r20, -8, r1
 sthi.q r20, -1, r1
 sthi.q r20, -1, r1
 blink tr0, r63
 blink tr0, r63
L_found0:
L_found0:
 // r0: string to store, not yet zero-padding normalized.
 // r0: string to store, not yet zero-padding normalized.
 // r1: result of mcmpeq.b r0, r63, r1.
 // r1: result of mcmpeq.b r0, r63, r1.
 // r22: store address plus 8.  I.e. address where zero padding beyond the
 // r22: store address plus 8.  I.e. address where zero padding beyond the
 //      string in r0 goes.
 //      string in r0 goes.
 // r20: store end address.
 // r20: store end address.
 // r5: store end address minus 8.
 // r5: store end address minus 8.
 pt L_write0_multiquad, tr0
 pt L_write0_multiquad, tr0
 ZPAD_MASK (r1, r1)
 ZPAD_MASK (r1, r1)
 and r0, r1, r0 // mask out non-zero bytes after first zero byte
 and r0, r1, r0 // mask out non-zero bytes after first zero byte
 stlo.q r22, -8, r0
 stlo.q r22, -8, r0
 sthi.q r22, -1, r0
 sthi.q r22, -1, r0
 andi r22, -8, r1 // Check if zeros to write fit in one quad word.
 andi r22, -8, r1 // Check if zeros to write fit in one quad word.
 bgtu/l r5, r1, tr0 // L_write0_multiquad
 bgtu/l r5, r1, tr0 // L_write0_multiquad
 ptabs r18, tr1
 ptabs r18, tr1
 sub r20, r22, r1
 sub r20, r22, r1
 shlli r1, 2, r1 // Do shift in two steps so that 64 bit case is
 shlli r1, 2, r1 // Do shift in two steps so that 64 bit case is
 SHLO r0, r1, r0 // handled correctly.
 SHLO r0, r1, r0 // handled correctly.
 SHLO r0, r1, r0
 SHLO r0, r1, r0
 sthi.q r20, -1, r0
 sthi.q r20, -1, r0
 blink tr1, r63
 blink tr1, r63
L_write0_multiquad:
L_write0_multiquad:
 pt L_write0_loop, tr0
 pt L_write0_loop, tr0
 ptabs r18, tr1
 ptabs r18, tr1
 stlo.q r22, 0, r63
 stlo.q r22, 0, r63
 sthi.q r20, -1, r63
 sthi.q r20, -1, r63
 addi r1, 8, r1
 addi r1, 8, r1
 bgeu/l r5, r1, tr0 // L_write0_loop
 bgeu/l r5, r1, tr0 // L_write0_loop
 blink tr1, r63
 blink tr1, r63
L_write0_loop:
L_write0_loop:
 st.q r1, 0 ,r63
 st.q r1, 0 ,r63
 addi r1, 8, r1
 addi r1, 8, r1
 bgeu/l r5, r1, tr0 // L_write0_loop
 bgeu/l r5, r1, tr0 // L_write0_loop
 blink tr1, r63
 blink tr1, r63
L_small:
L_small:
 // r0: string to store, not yet zero-padding normalized.
 // r0: string to store, not yet zero-padding normalized.
 // r1: result of mcmpeq.b r0, r63, r1.
 // r1: result of mcmpeq.b r0, r63, r1.
 // r7: nonzero indicates relevant zero found r0.
 // r7: nonzero indicates relevant zero found r0.
 // r2: store address.
 // r2: store address.
 // r3: read address.
 // r3: read address.
 // r4: size, max 8
 // r4: size, max 8
 // r20: store end address.
 // r20: store end address.
 // r5: store end address minus 8.
 // r5: store end address minus 8.
 pt L_nohi, tr0
 pt L_nohi, tr0
 pt L_small_storelong, tr1
 pt L_small_storelong, tr1
 ptabs r18, tr2
 ptabs r18, tr2
 sub r63, r4, r23
 sub r63, r4, r23
 bnei/u r7, 0, tr0  // L_nohi
 bnei/u r7, 0, tr0  // L_nohi
 ori r3, -8, r7
 ori r3, -8, r7
 bge/l r23, r7, tr0 // L_nohi
 bge/l r23, r7, tr0 // L_nohi
 ldhi.q r3, 7, r1
 ldhi.q r3, 7, r1
 or r0, r1, r0
 or r0, r1, r0
 mcmpeq.b r0, r63, r1
 mcmpeq.b r0, r63, r1
L_nohi:
L_nohi:
 ZPAD_MASK (r1, r1)
 ZPAD_MASK (r1, r1)
 and r0, r1, r0
 and r0, r1, r0
 movi 4, r19
 movi 4, r19
 bge/u r4, r19, tr1 // L_small_storelong
 bge/u r4, r19, tr1 // L_small_storelong
 pt L_small_end, tr0
 pt L_small_end, tr0
#ifndef __LITTLE_ENDIAN__
#ifndef __LITTLE_ENDIAN__
 byterev r0, r0
 byterev r0, r0
#endif
#endif
 beqi/u r4, 0, tr0 // L_small_end
 beqi/u r4, 0, tr0 // L_small_end
 st.b r2, 0, r0
 st.b r2, 0, r0
 beqi/u r4, 1, tr0 // L_small_end
 beqi/u r4, 1, tr0 // L_small_end
 shlri r0, 8, r0
 shlri r0, 8, r0
 st.b r2, 1, r0
 st.b r2, 1, r0
 beqi/u r4, 2, tr0 // L_small_end
 beqi/u r4, 2, tr0 // L_small_end
 shlri r0, 8, r0
 shlri r0, 8, r0
 st.b r2, 2, r0
 st.b r2, 2, r0
L_small_end:
L_small_end:
 blink tr2, r63
 blink tr2, r63
L_small_storelong:
L_small_storelong:
 shlli r23, 3, r7
 shlli r23, 3, r7
 SHHI r0, r7, r1
 SHHI r0, r7, r1
#ifdef __LITTLE_ENDIAN__
#ifdef __LITTLE_ENDIAN__
 shlri r1, 32, r1
 shlri r1, 32, r1
#else
#else
 shlri r0, 32, r0
 shlri r0, 32, r0
#endif
#endif
 stlo.l r2, 0, r0
 stlo.l r2, 0, r0
 sthi.l r2, 3, r0
 sthi.l r2, 3, r0
 stlo.l r20, -4, r1
 stlo.l r20, -4, r1
 sthi.l r20, -1, r1
 sthi.l r20, -1, r1
 blink tr2, r63
 blink tr2, r63
#else /* SHcompact */
#else /* SHcompact */
/* This code is optimized for size.  Instruction selection is SH5 specific.
/* This code is optimized for size.  Instruction selection is SH5 specific.
   SH4 should use a different version.  */
   SH4 should use a different version.  */
ENTRY(strncpy)
ENTRY(strncpy)
 mov #0, r6
 mov #0, r6
 cmp/eq r4, r6
 cmp/eq r4, r6
 bt return
 bt return
 mov r2, r5
 mov r2, r5
 add #-1, r5
 add #-1, r5
 add r5, r4
 add r5, r4
loop:
loop:
 bt/s found0
 bt/s found0
 add #1, r5
 add #1, r5
 mov.b @r3+, r1
 mov.b @r3+, r1
found0:
found0:
 cmp/eq r5,r4
 cmp/eq r5,r4
 mov.b r1, @r5
 mov.b r1, @r5
 bf/s loop
 bf/s loop
 cmp/eq r1, r6
 cmp/eq r1, r6
return:
return:
 rts
 rts
 nop
 nop
#endif /* SHcompact */
#endif /* SHcompact */
#endif /* __SH5__ */
#endif /* __SH5__ */
 
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.