URL
https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk
Subversion Repositories openrisc_me
[/] [openrisc/] [trunk/] [gnu-src/] [newlib-1.18.0/] [newlib/] [libc/] [machine/] [sh/] [strncpy.S] - Rev 323
Go to most recent revision | Compare with Previous | Blame | View Log
/* Copyright 2003 SuperH Ltd. */#include "asm.h"#ifdef __SH5__#if __SHMEDIA__#ifdef __LITTLE_ENDIAN__#define ZPAD_MASK(src, dst) addi src, -1, dst#else#define ZPAD_MASK(src, dst) \byterev src, dst; addi dst, -1, dst; byterev dst, dst#endif/* We assume that the destination is not in the first 16 bytes of memory.A typical linker script will put the text section first, and asthis code is longer that 16 bytes, you have to get out of your wayto put data there. */ENTRY(strncpy)pt L_small, tr2ldlo.q r3, 0, r0shlli r3, 3, r19mcmpeq.b r0, r63, r1SHHI r1, r19, r7add r2, r4, r20addi r20, -8, r5/* If the size is greater than 8, we know we can read beyond the first(possibly partial) quadword, and write out a full first and last(possibly unaligned and/or overlapping) quadword. */bge/u r2, r5, tr2 // L_smallpt L_found0, tr0addi r2, 8, r22bnei/u r7, 0, tr0 // L_found0ori r3, -8, r38pt L_end_early, tr1sub r2, r38, r22stlo.q r2, 0, r0sthi.q r2, 7, r0sub r3, r2, r6ldx.q r22, r6, r0/* Before each iteration, check that we can store in full the next quad weare about to fetch. */addi r5, -8, r36bgtu/u r22, r36, tr1 // L_end_earlypt L_scan0, tr1L_scan0:addi r22, 8, r22mcmpeq.b r0, r63, r1stlo.q r22, -8, r0bnei/u r1, 0, tr0 // L_found0sthi.q r22, -1, r0ldx.q r22, r6, r0bgeu/l r36, r22, tr1 // L_scan0L_end:// At end; we might re-read a few bytes when we fetch the last quad.// branch mispredict, so load is ready now.mcmpeq.b r0, r63, r1addi r22, 8, r22bnei/u r1, 0, tr0 // L_found0add r3, r4, r7ldlo.q r7, -8, r1ldhi.q r7, -1, r7ptabs r18, tr0stlo.q r22, -8, r0or r1, r7, r1mcmpeq.b r1, r63, r7sthi.q r22, -1, r0ZPAD_MASK (r7, r7)and r1, r7, r1 // mask out non-zero bytes after first zero bytestlo.q r20, -8, r1sthi.q r20, -1, r1blink tr0, r63L_end_early:/* Check if we can store the current quad in full. */pt L_end, tr1add r3, r4, r7bgtu/u r5, r22, tr1 // L_end // Not really unlikely, but gap is short./* If not, that means we can just proceed to process the last quad.Two pipeline stalls are unavoidable, as we don't have enough ILP. */ldlo.q r7, -8, r1ldhi.q r7, -1, r7ptabs r18, tr0or r1, r7, r1mcmpeq.b r1, r63, r7ZPAD_MASK (r7, r7)and r1, r7, r1 // mask out non-zero bytes after first zero bytestlo.q r20, -8, r1sthi.q r20, -1, r1blink tr0, r63L_found0:// r0: string to store, not yet zero-padding normalized.// r1: result of mcmpeq.b r0, r63, r1.// r22: store address plus 8. I.e. address where zero padding beyond the// string in r0 goes.// r20: store end address.// r5: store end address minus 8.pt L_write0_multiquad, tr0ZPAD_MASK (r1, r1)and r0, r1, r0 // mask out non-zero bytes after first zero bytestlo.q r22, -8, r0sthi.q r22, -1, r0andi r22, -8, r1 // Check if zeros to write fit in one quad word.bgtu/l r5, r1, tr0 // L_write0_multiquadptabs r18, tr1sub r20, r22, r1shlli r1, 2, r1 // Do shift in two steps so that 64 bit case isSHLO r0, r1, r0 // handled correctly.SHLO r0, r1, r0sthi.q r20, -1, r0blink tr1, r63L_write0_multiquad:pt L_write0_loop, tr0ptabs r18, tr1stlo.q r22, 0, r63sthi.q r20, -1, r63addi r1, 8, r1bgeu/l r5, r1, tr0 // L_write0_loopblink tr1, r63L_write0_loop:st.q r1, 0 ,r63addi r1, 8, r1bgeu/l r5, r1, tr0 // L_write0_loopblink tr1, r63L_small:// r0: string to store, not yet zero-padding normalized.// r1: result of mcmpeq.b r0, r63, r1.// r7: nonzero indicates relevant zero found r0.// r2: store address.// r3: read address.// r4: size, max 8// r20: store end address.// r5: store end address minus 8.pt L_nohi, tr0pt L_small_storelong, tr1ptabs r18, tr2sub r63, r4, r23bnei/u r7, 0, tr0 // L_nohiori r3, -8, r7bge/l r23, r7, tr0 // L_nohildhi.q r3, 7, r1or r0, r1, r0mcmpeq.b r0, r63, r1L_nohi:ZPAD_MASK (r1, r1)and r0, r1, r0movi 4, r19bge/u r4, r19, tr1 // L_small_storelongpt L_small_end, tr0#ifndef __LITTLE_ENDIAN__byterev r0, r0#endifbeqi/u r4, 0, tr0 // L_small_endst.b r2, 0, r0beqi/u r4, 1, tr0 // L_small_endshlri r0, 8, r0st.b r2, 1, r0beqi/u r4, 2, tr0 // L_small_endshlri r0, 8, r0st.b r2, 2, r0L_small_end:blink tr2, r63L_small_storelong:shlli r23, 3, r7SHHI r0, r7, r1#ifdef __LITTLE_ENDIAN__shlri r1, 32, r1#elseshlri r0, 32, r0#endifstlo.l r2, 0, r0sthi.l r2, 3, r0stlo.l r20, -4, r1sthi.l r20, -1, r1blink tr2, r63#else /* SHcompact *//* This code is optimized for size. Instruction selection is SH5 specific.SH4 should use a different version. */ENTRY(strncpy)mov #0, r6cmp/eq r4, r6bt returnmov r2, r5add #-1, r5add r5, r4loop:bt/s found0add #1, r5mov.b @r3+, r1found0:cmp/eq r5,r4mov.b r1, @r5bf/s loopcmp/eq r1, r6return:rtsnop#endif /* SHcompact */#endif /* __SH5__ */
Go to most recent revision | Compare with Previous | Blame | View Log
