| 1 | 
         207 | 
         jeremybenn | 
         /*
  | 
      
      
         | 2 | 
          | 
          | 
          *  (c) Copyright 1986 HEWLETT-PACKARD COMPANY
  | 
      
      
         | 3 | 
          | 
          | 
          *
  | 
      
      
         | 4 | 
          | 
          | 
          *  To anyone who acknowledges that this file is provided "AS IS"
  | 
      
      
         | 5 | 
          | 
          | 
          *  without any express or implied warranty:
  | 
      
      
         | 6 | 
          | 
          | 
          *      permission to use, copy, modify, and distribute this file
  | 
      
      
         | 7 | 
          | 
          | 
          *  for any purpose is hereby granted without fee, provided that
  | 
      
      
         | 8 | 
          | 
          | 
          *  the above copyright notice and this notice appears in all
  | 
      
      
         | 9 | 
          | 
          | 
          *  copies, and that the name of Hewlett-Packard Company not be
  | 
      
      
         | 10 | 
          | 
          | 
          *  used in advertising or publicity pertaining to distribution
  | 
      
      
         | 11 | 
          | 
          | 
          *  of the software without specific, written prior permission.
  | 
      
      
         | 12 | 
          | 
          | 
          *  Hewlett-Packard Company makes no representations about the
  | 
      
      
         | 13 | 
          | 
          | 
          *  suitability of this software for any purpose.
  | 
      
      
         | 14 | 
          | 
          | 
          */
  | 
      
      
         | 15 | 
          | 
          | 
          
  | 
      
      
         | 16 | 
          | 
          | 
         /* HPUX_ID:     @(#) $Revision: 1.1 $   */
  | 
      
      
         | 17 | 
          | 
          | 
         /*
  | 
      
      
         | 18 | 
          | 
          | 
          * memcpy(s1, s2, n)
  | 
      
      
         | 19 | 
          | 
          | 
          *
  | 
      
      
         | 20 | 
          | 
          | 
          * Copy n characters from s2 to s1; returns s1.
  | 
      
      
         | 21 | 
          | 
          | 
          */
  | 
      
      
         | 22 | 
          | 
          | 
          
  | 
      
      
         | 23 | 
          | 
          | 
         #define d_addr  arg0
  | 
      
      
         | 24 | 
          | 
          | 
         #define s_addr  arg1
  | 
      
      
         | 25 | 
          | 
          | 
         #define count   arg2
  | 
      
      
         | 26 | 
          | 
          | 
         #define tmp5    arg3
  | 
      
      
         | 27 | 
          | 
          | 
         #define tmp1    r19
  | 
      
      
         | 28 | 
          | 
          | 
         #define tmp2    r20
  | 
      
      
         | 29 | 
          | 
          | 
         #define tmp3    r21
  | 
      
      
         | 30 | 
          | 
          | 
         #define tmp4    r22
  | 
      
      
         | 31 | 
          | 
          | 
         #define tmp6    r31
  | 
      
      
         | 32 | 
          | 
          | 
          
  | 
      
      
         | 33 | 
          | 
          | 
         #include "DEFS.h"
  | 
      
      
         | 34 | 
          | 
          | 
          
  | 
      
      
         | 35 | 
          | 
          | 
         ENTRY(memcpy)
  | 
      
      
         | 36 | 
          | 
          | 
                 comib,>=  5,count,byteloop     /* If count is <= 6 don't get fancy.*/
  | 
      
      
         | 37 | 
          | 
          | 
                 movb,=,n      d_addr,ret0,done    /* The return value is defined to be the value of d_addr. DELAY SLOT */
  | 
      
      
         | 38 | 
          | 
          | 
                                                 /* if d_addr is null then exit */
  | 
      
      
         | 39 | 
          | 
          | 
                 extru       s_addr,31,2,tmp1   /* Extract the low two bits of the source address. */
  | 
      
      
         | 40 | 
          | 
          | 
                 extru       d_addr,31,2,tmp2   /* Extract the low two bits of the destination address. */
  | 
      
      
         | 41 | 
          | 
          | 
                 add         count,tmp2,count   /* pre increment the count to adjust for alignment of s1 */
  | 
      
      
         | 42 | 
          | 
          | 
                 comb,<>       tmp2,tmp1,not_aligned /* see if s1 is aligned w.r.t. s2. */
  | 
      
      
         | 43 | 
          | 
          | 
                 dep         0,31,2,s_addr      /* Compute the word address of the source.  DELAY SLOT. */
  | 
      
      
         | 44 | 
          | 
          | 
          
  | 
      
      
         | 45 | 
          | 
          | 
         /* aligned */
  | 
      
      
         | 46 | 
          | 
          | 
          
  | 
      
      
         | 47 | 
          | 
          | 
         /* We will now begin the 16 byte at a time word move if count >= 16 ! */
  | 
      
      
         | 48 | 
          | 
          | 
         /* Else we will branch to the  4 byte-at-a time word move ! */
  | 
      
      
         | 49 | 
          | 
          | 
          
  | 
      
      
         | 50 | 
          | 
          | 
                 addibt,<,n -16,count,chekchunk  /* If count < 16 then we can't move 16 byte chunks ! */
  | 
      
      
         | 51 | 
          | 
          | 
                                                 /*   actually we can legally move 13 or more bytes on the first loop.  */
  | 
      
      
         | 52 | 
          | 
          | 
                 /* These loads and stores are done so as to prevent processor interlock. */
  | 
      
      
         | 53 | 
          | 
          | 
         chunks:
  | 
      
      
         | 54 | 
          | 
          | 
                 ldwm        16(0,s_addr),tmp1   /* tmp1 = *s_addr   s_addr += 16 */
  | 
      
      
         | 55 | 
          | 
          | 
                 ldw         -12(0,s_addr),tmp2  /* tmp2 = 2nd word */
  | 
      
      
         | 56 | 
          | 
          | 
                 ldw         -8(0,s_addr),tmp3   /* tmp3 = 3rd word */
  | 
      
      
         | 57 | 
          | 
          | 
                 ldw         -4(0,s_addr),tmp4   /* tmp4 = 4th word */
  | 
      
      
         | 58 | 
          | 
          | 
                 /* Now store the results !  */
  | 
      
      
         | 59 | 
          | 
          | 
                 stbys,b,m   tmp1,4(0,d_addr)   /* tmp1 = 1st word stored d_addr += 16 also take care of front porch. */
  | 
      
      
         | 60 | 
          | 
          | 
                 stwm        tmp2,4(0,d_addr)    /* tmp2 = 2nd word stored. */
  | 
      
      
         | 61 | 
          | 
          | 
                 stwm        tmp3,4(0,d_addr)   /* tmp3 = 3rd word stored. */
  | 
      
      
         | 62 | 
          | 
          | 
                 addibf,<    -16,count,chunks    /* If count is still >= 16 do another loop. */
  | 
      
      
         | 63 | 
          | 
          | 
                 stwm        tmp4,4(0,d_addr)   /* tmp4 = 4th word stored. DELAY SLOT */
  | 
      
      
         | 64 | 
          | 
          | 
          
  | 
      
      
         | 65 | 
          | 
          | 
         chekchunk:
  | 
      
      
         | 66 | 
          | 
          | 
                 addibt,<,n  12,count,back_porch /* since the count is already decremented by -16 we're testing */
  | 
      
      
         | 67 | 
          | 
          | 
                                                 /*   to see if there are at least 4 bytes left ? */
  | 
      
      
         | 68 | 
          | 
          | 
         subchunk:
  | 
      
      
         | 69 | 
          | 
          | 
                 ldws,ma      4(s_addr),tmp1     /* tmp1 = *s_addr++ */
  | 
      
      
         | 70 | 
          | 
          | 
                 addibf,<     -4,count,subchunk  /* count -= 4 */
  | 
      
      
         | 71 | 
          | 
          | 
                 stbys,b,m    tmp1,4(d_addr)     /* *d_addr++ = tmp1 */
  | 
      
      
         | 72 | 
          | 
          | 
          
  | 
      
      
         | 73 | 
          | 
          | 
          
  | 
      
      
         | 74 | 
          | 
          | 
         back_porch:
  | 
      
      
         | 75 | 
          | 
          | 
                  addibt,=,n  4,count,done       /* if count = 0 we're, of course, done ! */
  | 
      
      
         | 76 | 
          | 
          | 
                  ldws        0(s_addr),tmp1     /* load up the back_porch */
  | 
      
      
         | 77 | 
          | 
          | 
                  add         d_addr,count,d_addr/* final store address  is +1 too high ! */
  | 
      
      
         | 78 | 
          | 
          | 
                  bv             0(r2)           /* return--were done. */
  | 
      
      
         | 79 | 
          | 
          | 
                  stbys,e     tmp1,0(d_addr)    /* kerplunk! whew !  */
  | 
      
      
         | 80 | 
          | 
          | 
          
  | 
      
      
         | 81 | 
          | 
          | 
         /* Begin non_aligned code. (no refrence to politics) */
  | 
      
      
         | 82 | 
          | 
          | 
         not_aligned:
  | 
      
      
         | 83 | 
          | 
          | 
                 sub,>=       tmp2,tmp1,tmp3     /* compute the shift quantity again and skip the load if tmp2 > tmp1. */
  | 
      
      
         | 84 | 
          | 
          | 
                 ldwm         4(0,s_addr),tmp1   /* load up the first word from the source. tmp1 = *s_addr++ */
  | 
      
      
         | 85 | 
          | 
          | 
                 zdep         tmp3,28,29,tmp4    /* compute the number of bits to shift based on the number of bytes above. */
  | 
      
      
         | 86 | 
          | 
          | 
                 mtctl        tmp4,11            /* load the shift count into cr11 = shift count register. */
  | 
      
      
         | 87 | 
          | 
          | 
          
  | 
      
      
         | 88 | 
          | 
          | 
                 addibt,<,n   -16,count,chkchnk2 /* first step in pre adjustment of count for looping. */
  | 
      
      
         | 89 | 
          | 
          | 
          
  | 
      
      
         | 90 | 
          | 
          | 
         chunk2:
  | 
      
      
         | 91 | 
          | 
          | 
                 ldwm            16(0,s_addr),tmp2    /* get either first or second word . tmp2 = *s_addr++ */
  | 
      
      
         | 92 | 
          | 
          | 
                 ldw             -12(s_addr),tmp3
  | 
      
      
         | 93 | 
          | 
          | 
                 ldw             -8(s_addr),tmp4
  | 
      
      
         | 94 | 
          | 
          | 
                 ldw             -4(s_addr),tmp5
  | 
      
      
         | 95 | 
          | 
          | 
                 vshd            tmp1,tmp2,tmp6      /* position data !  */
  | 
      
      
         | 96 | 
          | 
          | 
                 stbys,b,m       tmp6,4(0,d_addr)    /* store !  */
  | 
      
      
         | 97 | 
          | 
          | 
          
  | 
      
      
         | 98 | 
          | 
          | 
                 vshd            tmp2,tmp3,tmp6      /* position data !  */
  | 
      
      
         | 99 | 
          | 
          | 
                 stwm            tmp6,4(0,d_addr)    /* store ! */
  | 
      
      
         | 100 | 
          | 
          | 
          
  | 
      
      
         | 101 | 
          | 
          | 
                 vshd            tmp3,tmp4,tmp6      /* position data ! */
  | 
      
      
         | 102 | 
          | 
          | 
                 stwm            tmp6,4(0,d_addr)    /* store ! */
  | 
      
      
         | 103 | 
          | 
          | 
          
  | 
      
      
         | 104 | 
          | 
          | 
                 vshd            tmp4,tmp5,tmp6      /* position data ! */
  | 
      
      
         | 105 | 
          | 
          | 
                 stwm            tmp6,4(0,d_addr)    /* store the data ! */
  | 
      
      
         | 106 | 
          | 
          | 
                 addibf,<    -16,count,chunk2    /* If count is still >= 16 do another loop. */
  | 
      
      
         | 107 | 
          | 
          | 
                 copy            tmp5,tmp1
  | 
      
      
         | 108 | 
          | 
          | 
          
  | 
      
      
         | 109 | 
          | 
          | 
          
  | 
      
      
         | 110 | 
          | 
          | 
         chkchnk2:
  | 
      
      
         | 111 | 
          | 
          | 
                 addibt,<,n  12,count,bp_0       /* if we don't have 4 bytes left then do the back porch (bp_0) */
  | 
      
      
         | 112 | 
          | 
          | 
          
  | 
      
      
         | 113 | 
          | 
          | 
         subchnk2:
  | 
      
      
         | 114 | 
          | 
          | 
                 ldwm        4(0,s_addr),tmp2    /* get next word ! */
  | 
      
      
         | 115 | 
          | 
          | 
                 vshd        tmp1,tmp2,tmp3      /* position data ! */
  | 
      
      
         | 116 | 
          | 
          | 
                 addibt,<    -4,count,bp_1       /* decrement count and when count < 4 goto back_porch (bp_1) */
  | 
      
      
         | 117 | 
          | 
          | 
                 stbys,b,m   tmp3,4(0,d_addr)    /* store ! */
  | 
      
      
         | 118 | 
          | 
          | 
          
  | 
      
      
         | 119 | 
          | 
          | 
                 ldwm        4(0,s_addr),tmp1    /* get 4th word ! */
  | 
      
      
         | 120 | 
          | 
          | 
                 vshd        tmp2,tmp1,tmp3      /* position data ! */
  | 
      
      
         | 121 | 
          | 
          | 
                 addib,>=    -4,count,subchnk2   /* decrement count and when count <= 4 go to back porch (bp_2) */
  | 
      
      
         | 122 | 
          | 
          | 
                 stbys,b,m   tmp3,4(0,d_addr)    /* store the data ! */
  | 
      
      
         | 123 | 
          | 
          | 
          
  | 
      
      
         | 124 | 
          | 
          | 
         bp_0:    copy        tmp1,tmp2           /* switch registers used in the shift process. */
  | 
      
      
         | 125 | 
          | 
          | 
         bp_1:    addibt,<=,n  4,count,done        /* if count = -4 this implies that count = 0 -> done  */
  | 
      
      
         | 126 | 
          | 
          | 
                 add         d_addr,count,d_addr /* bump destination address to be +1 too high ! */
  | 
      
      
         | 127 | 
          | 
          | 
                 mfctl           sar,tmp3        /* suppress final ldwm unless result used */
  | 
      
      
         | 128 | 
          | 
          | 
                 extru           tmp3,28,2,tmp3  /* convert bitshift to byteshift */
  | 
      
      
         | 129 | 
          | 
          | 
                 sub,<=          count,tmp3,r0   /* bytes unused if (count-byteshift <= 0*/
  | 
      
      
         | 130 | 
          | 
          | 
          
  | 
      
      
         | 131 | 
          | 
          | 
                 ldwm        4(0,s_addr),tmp1    /* get final word !         */
  | 
      
      
         | 132 | 
          | 
          | 
                 vshd        tmp2,tmp1,tmp3      /* position data ! */
  | 
      
      
         | 133 | 
          | 
          | 
                 bv              0(r2)           /* return */
  | 
      
      
         | 134 | 
          | 
          | 
                 stbys,e     tmp3,0(0,d_addr)    /* store the data ! */
  | 
      
      
         | 135 | 
          | 
          | 
          
  | 
      
      
         | 136 | 
          | 
          | 
         /* here we do ye old byte-at-a-time moves. */
  | 
      
      
         | 137 | 
          | 
          | 
         byteloop:
  | 
      
      
         | 138 | 
          | 
          | 
                 comb,>=,n    0,count,done
  | 
      
      
         | 139 | 
          | 
          | 
          
  | 
      
      
         | 140 | 
          | 
          | 
         encore:
  | 
      
      
         | 141 | 
          | 
          | 
                 ldbs,ma     1(s_addr),tmp1
  | 
      
      
         | 142 | 
          | 
          | 
                 addibf,=   -1,count,encore
  | 
      
      
         | 143 | 
          | 
          | 
                 stbs,ma     tmp1,1(d_addr)
  | 
      
      
         | 144 | 
          | 
          | 
          
  | 
      
      
         | 145 | 
          | 
          | 
         done:
  | 
      
      
         | 146 | 
          | 
          | 
         EXIT(memcpy)
  |