OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [newlib-1.17.0/] [newlib/] [libc/] [machine/] [hppa/] [memcpy.S] - Blame information for rev 407

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 148 jeremybenn
/*
2
 *  (c) Copyright 1986 HEWLETT-PACKARD COMPANY
3
 *
4
 *  To anyone who acknowledges that this file is provided "AS IS"
5
 *  without any express or implied warranty:
6
 *      permission to use, copy, modify, and distribute this file
7
 *  for any purpose is hereby granted without fee, provided that
8
 *  the above copyright notice and this notice appears in all
9
 *  copies, and that the name of Hewlett-Packard Company not be
10
 *  used in advertising or publicity pertaining to distribution
11
 *  of the software without specific, written prior permission.
12
 *  Hewlett-Packard Company makes no representations about the
13
 *  suitability of this software for any purpose.
14
 */
15
 
16
/* HPUX_ID:     @(#) $Revision: 1.1 $   */
17
/*
18
 * memcpy(s1, s2, n)
19
 *
20
 * Copy n characters from s2 to s1; returns s1.
21
 */
22
 
23
#define d_addr  arg0
24
#define s_addr  arg1
25
#define count   arg2
26
#define tmp5    arg3
27
#define tmp1    r19
28
#define tmp2    r20
29
#define tmp3    r21
30
#define tmp4    r22
31
#define tmp6    r31
32
 
33
#include "DEFS.h"
34
 
35
ENTRY(memcpy)
36
        comib,>=  5,count,byteloop     /* If count is <= 6 don't get fancy.*/
37
        movb,=,n      d_addr,ret0,done    /* The return value is defined to be the value of d_addr. DELAY SLOT */
38
                                        /* if d_addr is null then exit */
39
        extru       s_addr,31,2,tmp1   /* Extract the low two bits of the source address. */
40
        extru       d_addr,31,2,tmp2   /* Extract the low two bits of the destination address. */
41
        add         count,tmp2,count   /* pre increment the count to adjust for alignment of s1 */
42
        comb,<>       tmp2,tmp1,not_aligned /* see if s1 is aligned w.r.t. s2. */
43
        dep         0,31,2,s_addr      /* Compute the word address of the source.  DELAY SLOT. */
44
 
45
/* aligned */
46
 
47
/* We will now begin the 16 byte at a time word move if count >= 16 ! */
48
/* Else we will branch to the  4 byte-at-a time word move ! */
49
 
50
        addibt,<,n -16,count,chekchunk  /* If count < 16 then we can't move 16 byte chunks ! */
51
                                        /*   actually we can legally move 13 or more bytes on the first loop.  */
52
        /* These loads and stores are done so as to prevent processor interlock. */
53
chunks:
54
        ldwm        16(0,s_addr),tmp1   /* tmp1 = *s_addr   s_addr += 16 */
55
        ldw         -12(0,s_addr),tmp2  /* tmp2 = 2nd word */
56
        ldw         -8(0,s_addr),tmp3   /* tmp3 = 3rd word */
57
        ldw         -4(0,s_addr),tmp4   /* tmp4 = 4th word */
58
        /* Now store the results !  */
59
        stbys,b,m   tmp1,4(0,d_addr)   /* tmp1 = 1st word stored d_addr += 16 also take care of front porch. */
60
        stwm        tmp2,4(0,d_addr)    /* tmp2 = 2nd word stored. */
61
        stwm        tmp3,4(0,d_addr)   /* tmp3 = 3rd word stored. */
62
        addibf,<    -16,count,chunks    /* If count is still >= 16 do another loop. */
63
        stwm        tmp4,4(0,d_addr)   /* tmp4 = 4th word stored. DELAY SLOT */
64
 
65
chekchunk:
66
        addibt,<,n  12,count,back_porch /* since the count is already decremented by -16 we're testing */
67
                                        /*   to see if there are at least 4 bytes left ? */
68
subchunk:
69
        ldws,ma      4(s_addr),tmp1     /* tmp1 = *s_addr++ */
70
        addibf,<     -4,count,subchunk  /* count -= 4 */
71
        stbys,b,m    tmp1,4(d_addr)     /* *d_addr++ = tmp1 */
72
 
73
 
74
back_porch:
75
         addibt,=,n  4,count,done       /* if count = 0 we're, of course, done ! */
76
         ldws        0(s_addr),tmp1     /* load up the back_porch */
77
         add         d_addr,count,d_addr/* final store address  is +1 too high ! */
78
         bv             0(r2)           /* return--were done. */
79
         stbys,e     tmp1,0(d_addr)    /* kerplunk! whew !  */
80
 
81
/* Begin non_aligned code. (no refrence to politics) */
82
not_aligned:
83
        sub,>=       tmp2,tmp1,tmp3     /* compute the shift quantity again and skip the load if tmp2 > tmp1. */
84
        ldwm         4(0,s_addr),tmp1   /* load up the first word from the source. tmp1 = *s_addr++ */
85
        zdep         tmp3,28,29,tmp4    /* compute the number of bits to shift based on the number of bytes above. */
86
        mtctl        tmp4,11            /* load the shift count into cr11 = shift count register. */
87
 
88
        addibt,<,n   -16,count,chkchnk2 /* first step in pre adjustment of count for looping. */
89
 
90
chunk2:
91
        ldwm            16(0,s_addr),tmp2    /* get either first or second word . tmp2 = *s_addr++ */
92
        ldw             -12(s_addr),tmp3
93
        ldw             -8(s_addr),tmp4
94
        ldw             -4(s_addr),tmp5
95
        vshd            tmp1,tmp2,tmp6      /* position data !  */
96
        stbys,b,m       tmp6,4(0,d_addr)    /* store !  */
97
 
98
        vshd            tmp2,tmp3,tmp6      /* position data !  */
99
        stwm            tmp6,4(0,d_addr)    /* store ! */
100
 
101
        vshd            tmp3,tmp4,tmp6      /* position data ! */
102
        stwm            tmp6,4(0,d_addr)    /* store ! */
103
 
104
        vshd            tmp4,tmp5,tmp6      /* position data ! */
105
        stwm            tmp6,4(0,d_addr)    /* store the data ! */
106
        addibf,<    -16,count,chunk2    /* If count is still >= 16 do another loop. */
107
        copy            tmp5,tmp1
108
 
109
 
110
chkchnk2:
111
        addibt,<,n  12,count,bp_0       /* if we don't have 4 bytes left then do the back porch (bp_0) */
112
 
113
subchnk2:
114
        ldwm        4(0,s_addr),tmp2    /* get next word ! */
115
        vshd        tmp1,tmp2,tmp3      /* position data ! */
116
        addibt,<    -4,count,bp_1       /* decrement count and when count < 4 goto back_porch (bp_1) */
117
        stbys,b,m   tmp3,4(0,d_addr)    /* store ! */
118
 
119
        ldwm        4(0,s_addr),tmp1    /* get 4th word ! */
120
        vshd        tmp2,tmp1,tmp3      /* position data ! */
121
        addib,>=    -4,count,subchnk2   /* decrement count and when count <= 4 go to back porch (bp_2) */
122
        stbys,b,m   tmp3,4(0,d_addr)    /* store the data ! */
123
 
124
bp_0:    copy        tmp1,tmp2           /* switch registers used in the shift process. */
125
bp_1:    addibt,<=,n  4,count,done        /* if count = -4 this implies that count = 0 -> done  */
126
        add         d_addr,count,d_addr /* bump destination address to be +1 too high ! */
127
        mfctl           sar,tmp3        /* suppress final ldwm unless result used */
128
        extru           tmp3,28,2,tmp3  /* convert bitshift to byteshift */
129
        sub,<=          count,tmp3,r0   /* bytes unused if (count-byteshift <= 0*/
130
 
131
        ldwm        4(0,s_addr),tmp1    /* get final word !         */
132
        vshd        tmp2,tmp1,tmp3      /* position data ! */
133
        bv              0(r2)           /* return */
134
        stbys,e     tmp3,0(0,d_addr)    /* store the data ! */
135
 
136
/* here we do ye old byte-at-a-time moves. */
137
byteloop:
138
        comb,>=,n    0,count,done
139
 
140
encore:
141
        ldbs,ma     1(s_addr),tmp1
142
        addibf,=   -1,count,encore
143
        stbs,ma     tmp1,1(d_addr)
144
 
145
done:
146
EXIT(memcpy)

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.