OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [tags/] [gnu-src/] [newlib-1.18.0/] [newlib-1.18.0-or32-1.0rc1/] [newlib/] [libc/] [machine/] [hppa/] [strcpy.S] - Diff between revs 207 and 345

Only display areas with differences | Details | Blame | View Log

Rev 207 Rev 345
/*
/*
 *  (c) Copyright 1986 HEWLETT-PACKARD COMPANY
 *  (c) Copyright 1986 HEWLETT-PACKARD COMPANY
 *
 *
 *  To anyone who acknowledges that this file is provided "AS IS"
 *  To anyone who acknowledges that this file is provided "AS IS"
 *  without any express or implied warranty:
 *  without any express or implied warranty:
 *      permission to use, copy, modify, and distribute this file
 *      permission to use, copy, modify, and distribute this file
 *  for any purpose is hereby granted without fee, provided that
 *  for any purpose is hereby granted without fee, provided that
 *  the above copyright notice and this notice appears in all
 *  the above copyright notice and this notice appears in all
 *  copies, and that the name of Hewlett-Packard Company not be
 *  copies, and that the name of Hewlett-Packard Company not be
 *  used in advertising or publicity pertaining to distribution
 *  used in advertising or publicity pertaining to distribution
 *  of the software without specific, written prior permission.
 *  of the software without specific, written prior permission.
 *  Hewlett-Packard Company makes no representations about the
 *  Hewlett-Packard Company makes no representations about the
 *  suitability of this software for any purpose.
 *  suitability of this software for any purpose.
 */
 */
/*
/*
        A faster strcpy.
        A faster strcpy.
        by
        by
        Jerry Huck (aligned case)
        Jerry Huck (aligned case)
        Daryl Odnert (equal-alignment case)
        Daryl Odnert (equal-alignment case)
        Edgar Circenis (non-aligned case)
        Edgar Circenis (non-aligned case)
*/
*/
/*
/*
 * strcpy(s1, s2)
 * strcpy(s1, s2)
 *
 *
 * Copy string s2 to s1.  s1 must be large enough.
 * Copy string s2 to s1.  s1 must be large enough.
 * return s1
 * return s1
 */
 */
#include "DEFS.h"
#include "DEFS.h"
#define d_addr          r26
#define d_addr          r26
#define s_addr          r25
#define s_addr          r25
#define tmp6            r24
#define tmp6            r24
#define tmp1            r19
#define tmp1            r19
#define evenside        r19
#define evenside        r19
#define tmp2            r20
#define tmp2            r20
#define oddside         r20
#define oddside         r20
#define tmp3            r21
#define tmp3            r21
#define tmp4            r22
#define tmp4            r22
#define tmp5            arg3
#define tmp5            arg3
#define save            r1
#define save            r1
ENTRY(strcpy)
ENTRY(strcpy)
/* Do some quick alignment checking on and fast path both word aligned */
/* Do some quick alignment checking on and fast path both word aligned */
        extru,<>   s_addr,31,2,tmp6    /*Is source word aligned? */
        extru,<>   s_addr,31,2,tmp6    /*Is source word aligned? */
        ldwm       4(0,s_addr),oddside /*Assume yes and guess that it
        ldwm       4(0,s_addr),oddside /*Assume yes and guess that it
                                          is double-word aligned. */
                                          is double-word aligned. */
        dep,=      d_addr,29,2,tmp6    /*Is target word aligned? */
        dep,=      d_addr,29,2,tmp6    /*Is target word aligned? */
        b          case_analysis
        b          case_analysis
        copy       d_addr,ret0
        copy       d_addr,ret0
/* Both are aligned.  First source word already loaded assuming that
/* Both are aligned.  First source word already loaded assuming that
   source was oddword aligned.  Fall through (therefore fastest) code
   source was oddword aligned.  Fall through (therefore fastest) code
   shuffles the registers to join the main loop */
   shuffles the registers to join the main loop */
bothaligned:
bothaligned:
        bb,>=    s_addr,29,twoatatime  /*Branch if source was odd aligned*/
        bb,>=    s_addr,29,twoatatime  /*Branch if source was odd aligned*/
        uxor,nbz oddside,r0,save
        uxor,nbz oddside,r0,save
/* Even aligned source.  save holds that operand.
/* Even aligned source.  save holds that operand.
   Do one iteration of the main copy loop juggling the registers to avoid
   Do one iteration of the main copy loop juggling the registers to avoid
   one copy. */
   one copy. */
        b,n      nullfound
        b,n      nullfound
        ldwm     4(s_addr),oddside
        ldwm     4(s_addr),oddside
        stwm     save,4(d_addr)
        stwm     save,4(d_addr)
        uxor,nbz oddside,r0,save
        uxor,nbz oddside,r0,save
        b,n      nullfound
        b,n      nullfound
        ldwm     4(s_addr),evenside
        ldwm     4(s_addr),evenside
        stwm     oddside,4(d_addr)
        stwm     oddside,4(d_addr)
        uxor,nbz evenside,r0,save
        uxor,nbz evenside,r0,save
        b,n      nullfound
        b,n      nullfound
        ldwm     4(s_addr),oddside
        ldwm     4(s_addr),oddside
/* Main loop body.  Entry expects evenside still to be stored, oddside
/* Main loop body.  Entry expects evenside still to be stored, oddside
   just loaded. */
   just loaded. */
loop:
loop:
        stwm     evenside,4(d_addr)
        stwm     evenside,4(d_addr)
        uxor,nbz oddside,r0,save
        uxor,nbz oddside,r0,save
/* mid loop entry */
/* mid loop entry */
twoatatime:
twoatatime:
        b,n      nullfound
        b,n      nullfound
        ldwm     4(s_addr),evenside
        ldwm     4(s_addr),evenside
        stwm     oddside,4(d_addr)
        stwm     oddside,4(d_addr)
        uxor,sbz evenside,r0,save
        uxor,sbz evenside,r0,save
        b        loop
        b        loop
        ldwm     4(s_addr),oddside
        ldwm     4(s_addr),oddside
/* fall through when null found in evenside.  oddside actually loaded */
/* fall through when null found in evenside.  oddside actually loaded */
nullfound:                              /* adjust d_addr and store final word */
nullfound:                              /* adjust d_addr and store final word */
        extru,<>        save,7,8,r0         /* pick up leftmost byte */
        extru,<>        save,7,8,r0         /* pick up leftmost byte */
        addib,tr,n      1,d_addr,store_final
        addib,tr,n      1,d_addr,store_final
        extru,<>        save,15,8,r0
        extru,<>        save,15,8,r0
        addib,tr,n      2,d_addr,store_final
        addib,tr,n      2,d_addr,store_final
        extru,<>        save,23,8,r0
        extru,<>        save,23,8,r0
        addib,tr        3,d_addr,store_final2
        addib,tr        3,d_addr,store_final2
        bv              0(rp)
        bv              0(rp)
        stw             save,0(d_addr)
        stw             save,0(d_addr)
store_final:
store_final:
        bv              0(rp)
        bv              0(rp)
store_final2:
store_final2:
        stbys,e         save,0(d_addr)  /* delay slot */
        stbys,e         save,0(d_addr)  /* delay slot */
case_analysis:
case_analysis:
        blr         tmp6,r0
        blr         tmp6,r0
        nop
        nop
        /* NOTE: the delay slots for the non-aligned cases load a   */
        /* NOTE: the delay slots for the non-aligned cases load a   */
        /* shift quantity which is TGT-SRC into tmp3.               */
        /* shift quantity which is TGT-SRC into tmp3.               */
        /* Note also, the case for both strings being word aligned  */
        /* Note also, the case for both strings being word aligned  */
        /* is already checked before the BLR is executed, so that   */
        /* is already checked before the BLR is executed, so that   */
        /* case can never occur.                                    */
        /* case can never occur.                                    */
                                       /* TGT SRC */
                                       /* TGT SRC */
        nop                            /* 00  00  can't happen */
        nop                            /* 00  00  can't happen */
        nop
        nop
        b           neg_aligned_copy   /* 00  01  */
        b           neg_aligned_copy   /* 00  01  */
        ldi         -1,tmp3            /* load shift quantity. delay slot */
        ldi         -1,tmp3            /* load shift quantity. delay slot */
        b           neg_aligned_copy   /* 00  10  */
        b           neg_aligned_copy   /* 00  10  */
        ldi         -2,tmp3            /* load shift quantity. delay slot */
        ldi         -2,tmp3            /* load shift quantity. delay slot */
        b           neg_aligned_copy   /* 00  11  */
        b           neg_aligned_copy   /* 00  11  */
        ldi         -3,tmp3            /* load shift quantity. delay slot */
        ldi         -3,tmp3            /* load shift quantity. delay slot */
        b           pos_aligned_copy0  /* 01  00  */
        b           pos_aligned_copy0  /* 01  00  */
        ldi         1,tmp3            /* load shift quantity. delay slot */
        ldi         1,tmp3            /* load shift quantity. delay slot */
        b           equal_alignment_1  /* 01  01  */
        b           equal_alignment_1  /* 01  01  */
        ldbs,ma     1(s_addr),tmp1
        ldbs,ma     1(s_addr),tmp1
        b           neg_aligned_copy   /* 01  10  */
        b           neg_aligned_copy   /* 01  10  */
        ldi         -1,tmp3            /* load shift quantity. delay slot */
        ldi         -1,tmp3            /* load shift quantity. delay slot */
        b           neg_aligned_copy   /* 01  11  */
        b           neg_aligned_copy   /* 01  11  */
        ldi         -2,tmp3            /* load shift quantity. delay slot */
        ldi         -2,tmp3            /* load shift quantity. delay slot */
        b           pos_aligned_copy0  /* 10  00  */
        b           pos_aligned_copy0  /* 10  00  */
        ldi         2,tmp3            /* load shift quantity. delay slot */
        ldi         2,tmp3            /* load shift quantity. delay slot */
        b           pos_aligned_copy   /* 10  01  */
        b           pos_aligned_copy   /* 10  01  */
        ldi         1,tmp3            /* load shift quantity. delay slot */
        ldi         1,tmp3            /* load shift quantity. delay slot */
        b           equal_alignment_2  /* 10  10  */
        b           equal_alignment_2  /* 10  10  */
        ldhs,ma     2(s_addr),tmp1
        ldhs,ma     2(s_addr),tmp1
        b           neg_aligned_copy   /* 10  11  */
        b           neg_aligned_copy   /* 10  11  */
        ldi         -1,tmp3            /* load shift quantity. delay slot */
        ldi         -1,tmp3            /* load shift quantity. delay slot */
        b           pos_aligned_copy0  /* 11  00  */
        b           pos_aligned_copy0  /* 11  00  */
        ldi         3,tmp3            /* load shift quantity. delay slot */
        ldi         3,tmp3            /* load shift quantity. delay slot */
        b           pos_aligned_copy   /* 11  01  */
        b           pos_aligned_copy   /* 11  01  */
        ldi         2,tmp3            /* load shift quantity. delay slot */
        ldi         2,tmp3            /* load shift quantity. delay slot */
        b           pos_aligned_copy   /* 11  10  */
        b           pos_aligned_copy   /* 11  10  */
        ldi         1,tmp3            /* load shift quantity. delay slot */
        ldi         1,tmp3            /* load shift quantity. delay slot */
        ldbs,ma     1(s_addr),tmp1     /* 11  11  */
        ldbs,ma     1(s_addr),tmp1     /* 11  11  */
        comiclr,<>  r0,tmp1,r0
        comiclr,<>  r0,tmp1,r0
        bv          0(rp)              /* return if 1st byte was null */
        bv          0(rp)              /* return if 1st byte was null */
        stbs,ma     tmp1,1(d_addr)     /* store a byte to dst string  */
        stbs,ma     tmp1,1(d_addr)     /* store a byte to dst string  */
        b           bothaligned       /* can now goto word_aligned   */
        b           bothaligned       /* can now goto word_aligned   */
        ldwm        4(s_addr),oddside     /* load next word of source    */
        ldwm        4(s_addr),oddside     /* load next word of source    */
equal_alignment_1:
equal_alignment_1:
        comiclr,<>  r0,tmp1,r0      /* nullify next if tmp1 <> 0  */
        comiclr,<>  r0,tmp1,r0      /* nullify next if tmp1 <> 0  */
        bv          0(rp)           /* return if null byte found  */
        bv          0(rp)           /* return if null byte found  */
        stbs,ma     tmp1,1(d_addr)  /* store a byte to dst string */
        stbs,ma     tmp1,1(d_addr)  /* store a byte to dst string */
        ldhs,ma     2(s_addr),tmp1  /* load next halfword         */
        ldhs,ma     2(s_addr),tmp1  /* load next halfword         */
equal_alignment_2:
equal_alignment_2:
        extru,<>    tmp1,23,8,tmp6  /* look at left byte of halfword */
        extru,<>    tmp1,23,8,tmp6  /* look at left byte of halfword */
        bv          0(rp)           /* return if 1st byte was null */
        bv          0(rp)           /* return if 1st byte was null */
        stbs,ma     tmp6,1(d_addr)
        stbs,ma     tmp6,1(d_addr)
        extru,<>    tmp1,31,8,r0
        extru,<>    tmp1,31,8,r0
        bv          0(rp)           /* return if 2nd byte was null */
        bv          0(rp)           /* return if 2nd byte was null */
        stbs,ma     tmp1,1(d_addr)
        stbs,ma     tmp1,1(d_addr)
        b           bothaligned
        b           bothaligned
        ldwm        4(s_addr),oddside  /* load next word              */
        ldwm        4(s_addr),oddside  /* load next word              */
/* source and destination are not aligned, so we do it the hard way. */
/* source and destination are not aligned, so we do it the hard way. */
/* target alignment is greater than source alignment */
/* target alignment is greater than source alignment */
pos_aligned_copy0:
pos_aligned_copy0:
        addi            -4,s_addr,s_addr
        addi            -4,s_addr,s_addr
pos_aligned_copy:
pos_aligned_copy:
        extru       d_addr,31,2,tmp6   /* Extract low 2 bits of the dest addr */
        extru       d_addr,31,2,tmp6   /* Extract low 2 bits of the dest addr */
        extru       s_addr,31,2,tmp1   /* Extract low 2 bits of the src addr */
        extru       s_addr,31,2,tmp1   /* Extract low 2 bits of the src addr */
        dep         r0,31,2,s_addr     /* Compute word address of the source. */
        dep         r0,31,2,s_addr     /* Compute word address of the source. */
        sh3add          tmp3,r0,tmp4        /* compute shift amt */
        sh3add          tmp3,r0,tmp4        /* compute shift amt */
        ldwm            4(0,s_addr),tmp2    /* get 1st source word */
        ldwm            4(0,s_addr),tmp2    /* get 1st source word */
        sh3add          tmp1,r0,save        /* setup mask shift amount */
        sh3add          tmp1,r0,save        /* setup mask shift amount */
        mtctl           save,r11            /* set-up cr11 for mask */
        mtctl           save,r11            /* set-up cr11 for mask */
        zvdepi          -2,32,save          /* create mask */
        zvdepi          -2,32,save          /* create mask */
        or              save,tmp2,tmp2      /* mask unused bytes in src */
        or              save,tmp2,tmp2      /* mask unused bytes in src */
        ldi             -1,tmp1             /* load tmp1 with 0xffffffff */
        ldi             -1,tmp1             /* load tmp1 with 0xffffffff */
        mtctl           tmp4,r11            /* shift count -> shift count reg */
        mtctl           tmp4,r11            /* shift count -> shift count reg */
        vshd            tmp1,tmp2,tmp3      /* position data ! */
        vshd            tmp1,tmp2,tmp3      /* position data ! */
        uxor,nbz        tmp3,r0,save
        uxor,nbz        tmp3,r0,save
        b,n             first_null
        b,n             first_null
        uxor,nbz        tmp2,r0,save
        uxor,nbz        tmp2,r0,save
        b               nullfound1
        b               nullfound1
        mtctl           tmp4,r11            /* re-load shift cnt (delay slot) */
        mtctl           tmp4,r11            /* re-load shift cnt (delay slot) */
        b               loop_entry
        b               loop_entry
        ldwm            4(0,s_addr),tmp1    /* get next word. delay slot */
        ldwm            4(0,s_addr),tmp1    /* get next word. delay slot */
neg_aligned_copy:
neg_aligned_copy:
        extru       d_addr,31,2,tmp6   /* Extract low 2 bits of the dest addr */
        extru       d_addr,31,2,tmp6   /* Extract low 2 bits of the dest addr */
        extru       s_addr,31,2,tmp2   /* Extract low 2 bits of the src addr */
        extru       s_addr,31,2,tmp2   /* Extract low 2 bits of the src addr */
        dep         r0,31,2,s_addr     /* Compute word address of the source. */
        dep         r0,31,2,s_addr     /* Compute word address of the source. */
        sh3add          tmp3,r0,tmp4        /* compute shift amt */
        sh3add          tmp3,r0,tmp4        /* compute shift amt */
        ldwm            4(0,s_addr),tmp1    /* load first word from source. */
        ldwm            4(0,s_addr),tmp1    /* load first word from source. */
/* check to see if next word can be read safely */
/* check to see if next word can be read safely */
        sh3add          tmp2,r0,save
        sh3add          tmp2,r0,save
        mtctl           save,r11            /* shift count -> shift count reg */
        mtctl           save,r11            /* shift count -> shift count reg */
        zvdepi          -2,32,save
        zvdepi          -2,32,save
        or              save, tmp1, tmp1
        or              save, tmp1, tmp1
        uxor,nbz        tmp1,r0,save        /* any nulls in first word? */
        uxor,nbz        tmp1,r0,save        /* any nulls in first word? */
        b               first_null0
        b               first_null0
        mtctl           tmp4,r11
        mtctl           tmp4,r11
        ldwm            4(0,s_addr),tmp2    /* load second word from source */
        ldwm            4(0,s_addr),tmp2    /* load second word from source */
        combt,=         tmp6,r0,chunk1      /* don't mask if whole word valid */
        combt,=         tmp6,r0,chunk1      /* don't mask if whole word valid */
        vshd            tmp1,tmp2,tmp3      /* position data ! */
        vshd            tmp1,tmp2,tmp3      /* position data ! */
        sh3add          tmp6,r0,save        /* setup r1 */
        sh3add          tmp6,r0,save        /* setup r1 */
        mtctl           save,r11            /* set-up cr11 for mask */
        mtctl           save,r11            /* set-up cr11 for mask */
        zvdepi          -2,32,save
        zvdepi          -2,32,save
        or              save, tmp3, tmp3
        or              save, tmp3, tmp3
        uxor,nbz        tmp3,r0,save
        uxor,nbz        tmp3,r0,save
        b,n             first_null
        b,n             first_null
        uxor,nbz        tmp2,r0,save
        uxor,nbz        tmp2,r0,save
        b               nullfound1
        b               nullfound1
        mtctl           tmp4,r11            /* re-load shift cnt (delay slot) */
        mtctl           tmp4,r11            /* re-load shift cnt (delay slot) */
        b               loop_entry
        b               loop_entry
        ldwm            4(0,s_addr),tmp1    /* get next word. delay slot */
        ldwm            4(0,s_addr),tmp1    /* get next word. delay slot */
chunk1:
chunk1:
        uxor,nbz        tmp2,r0,save
        uxor,nbz        tmp2,r0,save
        b               nullfound0
        b               nullfound0
        vshd            tmp1,tmp2,tmp3
        vshd            tmp1,tmp2,tmp3
did_mask:
did_mask:
        ldwm            4(0,s_addr),tmp1    /* get next word !  */
        ldwm            4(0,s_addr),tmp1    /* get next word !  */
loop_entry:
loop_entry:
        stbys,b,m       tmp3,4(0,d_addr)    /* store !  */
        stbys,b,m       tmp3,4(0,d_addr)    /* store !  */
        uxor,nbz        tmp1, r0, save
        uxor,nbz        tmp1, r0, save
        b               nullfound2
        b               nullfound2
        vshd            tmp2,tmp1,tmp3      /* position data !  */
        vshd            tmp2,tmp1,tmp3      /* position data !  */
        ldwm            4(s_addr),tmp2
        ldwm            4(s_addr),tmp2
        stwm            tmp3,4(d_addr)
        stwm            tmp3,4(d_addr)
        uxor,sbz        tmp2,r0,save
        uxor,sbz        tmp2,r0,save
        b               did_mask
        b               did_mask
nullfound0:
nullfound0:
        vshd            tmp1,tmp2,tmp3      /* delay slot */
        vshd            tmp1,tmp2,tmp3      /* delay slot */
        uxor,nbz        tmp3,r0,save
        uxor,nbz        tmp3,r0,save
        b,n             nullfound
        b,n             nullfound
nullfound1:
nullfound1:
        stbys,b,m       tmp3,4(0,d_addr)
        stbys,b,m       tmp3,4(0,d_addr)
        b               nullfound
        b               nullfound
        vshd            tmp2,r0,save        /* delay slot */
        vshd            tmp2,r0,save        /* delay slot */
nullfound2:
nullfound2:
        uxor,nbz        tmp3,r0,save
        uxor,nbz        tmp3,r0,save
        b,n             nullfound
        b,n             nullfound
        stwm            tmp3,4(d_addr)
        stwm            tmp3,4(d_addr)
        b               nullfound
        b               nullfound
        /* notice that delay slot is in next routine */
        /* notice that delay slot is in next routine */
first_null0:    /* null found in first word of non-aligned (wrt d_addr) */
first_null0:    /* null found in first word of non-aligned (wrt d_addr) */
        vshd            tmp1,r0,save        /* delay slot */
        vshd            tmp1,r0,save        /* delay slot */
        combt,=         tmp6,r0,check4
        combt,=         tmp6,r0,check4
        extru           save,7,8,tmp4
        extru           save,7,8,tmp4
first_null:
first_null:
        addibt,=        -1,tmp6,check3  /* check last 3 bytes of word */
        addibt,=        -1,tmp6,check3  /* check last 3 bytes of word */
        extru           save,15,8,tmp4
        extru           save,15,8,tmp4
        addibt,=,n      -1,tmp6,check2  /* check last 2 bytes */
        addibt,=,n      -1,tmp6,check2  /* check last 2 bytes */
        bv              0(rp)           /* null in last byte--store and exit */
        bv              0(rp)           /* null in last byte--store and exit */
        stbys,b         save, 0(d_addr)
        stbys,b         save, 0(d_addr)
check4:
check4:
        combt,=         tmp4,r0,done
        combt,=         tmp4,r0,done
        stbs,ma         tmp4,1(d_addr)
        stbs,ma         tmp4,1(d_addr)
        extru,<>        save,15,8,tmp4
        extru,<>        save,15,8,tmp4
check3:
check3:
        combt,=         tmp4,r0,done
        combt,=         tmp4,r0,done
        stbs,ma         tmp4,1(d_addr)
        stbs,ma         tmp4,1(d_addr)
check2:
check2:
        extru,<>        save,23,8,tmp4
        extru,<>        save,23,8,tmp4
        bv              0(rp)
        bv              0(rp)
        stbs,ma         tmp4,1(d_addr)
        stbs,ma         tmp4,1(d_addr)
        bv              0(rp)
        bv              0(rp)
        stbs            r0,0(d_addr)
        stbs            r0,0(d_addr)
done:
done:
EXIT(strcpy)
EXIT(strcpy)
 
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.