OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [newlib-1.18.0/] [newlib/] [libc/] [machine/] [spu/] [memmove.c] - Blame information for rev 207

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 207 jeremybenn
/*
2
  (C) Copyright 2001,2006,
3
  International Business Machines Corporation,
4
  Sony Computer Entertainment, Incorporated,
5
  Toshiba Corporation,
6
 
7
  All rights reserved.
8
 
9
  Redistribution and use in source and binary forms, with or without
10
  modification, are permitted provided that the following conditions are met:
11
 
12
    * Redistributions of source code must retain the above copyright notice,
13
  this list of conditions and the following disclaimer.
14
    * Redistributions in binary form must reproduce the above copyright
15
  notice, this list of conditions and the following disclaimer in the
16
  documentation and/or other materials provided with the distribution.
17
    * Neither the names of the copyright holders nor the names of their
18
  contributors may be used to endorse or promote products derived from this
19
  software without specific prior written permission.
20
 
21
  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31
  POSSIBILITY OF SUCH DAMAGE.
32
*/
33
#include <spu_intrinsics.h>
34
#include <stddef.h>
35
#include "vec_literal.h"
36
 
37
/* Copy n bytes from memory area src to memory area dest.
38
 * Copying is performed as if the n characters pointed to
39
 * by src are first copied into a temporary array that does
40
 * not overlap the src and dest arrays. Then the n characters
41
 * of the temporary array are copied into the destination
42
 * array. The memmove subroutine returns a pointer to dest.
43
 */
44
 
45
void * memmove(void * __restrict__ dest, const void * __restrict__ src, size_t n)
46
{
47
  int adjust, delta;
48
  unsigned int soffset1, soffset2, doffset1, doffset2;
49
  vec_uchar16 *vSrc, *vDst;
50
  vec_uchar16 sdata1, sdata2, sdata, ddata, shuffle;
51
  vec_uchar16 mask, mask1, mask2, mask3, one = spu_splats((unsigned char)-1);
52
 
53
  soffset1  = (unsigned int)(src) & 15;
54
  doffset1 = (unsigned int)(dest) & 15;
55
  doffset2 = ((unsigned int)(dest) + n) & 15;
56
 
57
  /* Construct a series of masks used to data insert. The masks
58
   * contains 0 bit when the destination word is unchanged, 1 when it
59
   * must be replaced by source bits.
60
   *
61
   * mask1 = mask for leading unchanged bytes
62
   * mask2 = mask for trailing unchange bytes
63
   * mask3 = mask indicating the more than one qword is being changed.
64
   */
65
  mask  = one;
66
  mask1 = spu_rlmaskqwbyte(mask, -doffset1);
67
  mask2 = spu_slqwbyte(mask, 16-doffset2);
68
  mask3 = (vec_uchar16)spu_cmpgt(spu_splats((unsigned int)(doffset1 + n)), 15);
69
 
70
  vDst = (vec_uchar16 *)(dest);
71
 
72
  delta  = (int)soffset1 - (int)doffset1;
73
 
74
  /* The follow check only works if the SPU addresses are not
75
   * wrapped. No provisions have been made to correct for this
76
   * limitation.
77
   */
78
  if (((unsigned int)dest - (unsigned int)src) >= (unsigned int)n) {
79
    /* Forward copy. Perform a memcpy.
80
     *
81
     * Handle any leading destination partial quadwords as
82
     * well a very short copy (ie, such that the n characters
83
     * all reside in a single (destination) quadword.
84
     */
85
    vSrc = (vec_uchar16 *)(src);
86
    vDst = (vec_uchar16 *)(dest);
87
 
88
    /* Handle any leading destination partial quadwords as
89
     * well a very short copy (ie, such that the n characters
90
     * all reside in a single (destination) quadword.
91
     */
92
    soffset1 = (unsigned int)(src) & 15;
93
    doffset1 = (unsigned int)(dest) & 15;
94
    doffset2 = ((unsigned int)(dest) + n) & 15;
95
 
96
    /* Compute a shuffle pattern used to align the source string
97
     * with the alignment of the destination string.
98
     */
99
 
100
    adjust = (int)spu_extract(spu_cmpgt(spu_promote(doffset1, 0), spu_promote(soffset1, 0)), 0);
101
    delta  = (int)soffset1 - (int)doffset1;
102
    delta += adjust & 16;
103
 
104
    shuffle = (vec_uchar16)spu_add((vec_uint4)spu_splats((unsigned char)delta),
105
                                   VEC_LITERAL(vec_uint4, 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F));
106
 
107
    vSrc += adjust;
108
 
109
    sdata1 = *vSrc++;
110
    sdata2 = *vSrc++;
111
 
112
    ddata = *vDst;
113
    sdata = spu_shuffle(sdata1, sdata2, shuffle);
114
 
115
    /* Construct a series of masks used to data insert. The masks
116
     * contain 0 when the destination word is unchanged, 1 when it
117
     * must be replaced by source bytes.
118
     *
119
     * mask1 = mask for leading unchanged bytes
120
     * mask2 = mask for trailing unchange bytes
121
     * mask3 = mask indicating the more than one qword is being changed.
122
     */
123
    mask  = one;
124
    mask1 = spu_rlmaskqwbyte(mask, -doffset1);
125
    mask2 = spu_slqwbyte(mask, 16-doffset2);
126
    mask3 = (vec_uchar16)spu_cmpgt(spu_splats((unsigned int)(doffset1 + n)), 15);
127
 
128
    *vDst++ = spu_sel(ddata, sdata, spu_and(mask1, spu_or(mask2, mask3)));
129
 
130
    n += doffset1;
131
 
132
    /* Handle complete destination quadwords
133
     */
134
    while (n > 31) {
135
      sdata1 = sdata2;
136
      sdata2 = *vSrc++;
137
      *vDst++ = spu_shuffle(sdata1, sdata2, shuffle);
138
      n -= 16;
139
    }
140
 
141
    /* Handle any trailing partial (destination) quadwords
142
     */
143
    mask = spu_and((vec_uchar16)spu_cmpgt(spu_splats((unsigned int)n), 16), mask2);
144
    *vDst = spu_sel(*vDst, spu_shuffle(sdata2, *vSrc, shuffle), mask);
145
 
146
  } else {
147
    /* Backward copy.
148
     *
149
     * Handle any leading destination partial quadwords as
150
     * well a very short copy (ie, such that the n characters
151
     * all reside in a single (destination) quadword.
152
     */
153
    vSrc = (vec_uchar16 *)((unsigned int)src  + n-1);
154
    vDst = (vec_uchar16 *)((unsigned int)dest + n-1);
155
 
156
    /* Handle any leading destination partial quadwords as
157
     * well a very short copy (ie, such that the n characters
158
     * all reside in a single (destination) quadword.
159
     */
160
    soffset1 = (unsigned int)(src)  & 15;
161
    soffset2 = (unsigned int)(vSrc) & 15;
162
    doffset1 = (unsigned int)(dest) & 15;
163
    doffset2 = (unsigned int)(vDst) & 15;
164
 
165
    /* Compute a shuffle pattern used to align the source string
166
     * with the alignment of the destination string.
167
     */
168
    adjust = (int)spu_extract(spu_cmpgt(spu_promote(soffset2, 0), spu_promote(doffset2, 0)), 0);
169
    delta  = (int)doffset2 - (int)soffset2;
170
    delta += adjust & 16;
171
 
172
    shuffle = (vec_uchar16)spu_sub(VEC_LITERAL(vec_uint4, 0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F),
173
                                   (vec_uint4)spu_splats((unsigned char)delta));
174
 
175
    vSrc -= adjust;
176
 
177
    sdata2 = *vSrc--;
178
    sdata1 = *vSrc--;
179
 
180
    ddata = *vDst;
181
    sdata = spu_shuffle(sdata1, sdata2, shuffle);
182
 
183
    /* Construct a series of masks used to data insert. The masks
184
     * contain 0 when the destination word is unchanged, 1 when it
185
     * must be replaced by source bytes.
186
     *
187
     * mask1 = mask for leading unchanged bytes
188
     * mask2 = mask for trailing unchange bytes
189
     * mask3 = mask indicating the more than one qword is being changed.
190
     */
191
    mask  = one;
192
    mask1 = spu_rlmaskqwbyte(mask, -doffset1);
193
    mask2 = spu_slqwbyte(mask, 15-doffset2);
194
    mask3 = (vec_uchar16)spu_cmpgt(spu_splats((int)(doffset2 - n)), -2);
195
 
196
    *vDst-- = spu_sel(ddata, sdata, spu_and(mask2, spu_orc(mask1, mask3)));
197
 
198
    n -= doffset2 + 1;
199
 
200
    /* Handle complete destination quadwords
201
     */
202
    while ((int)n > 15) {
203
      sdata2 = sdata1;
204
      sdata1 = *vSrc--;
205
      *vDst-- = spu_shuffle(sdata1, sdata2, shuffle);
206
      n -= 16;
207
    }
208
 
209
    /* Handle any trailing partial (destination) quadwords
210
     */
211
    mask = spu_and((vec_uchar16)spu_cmpgt(spu_splats((int)n), 0), mask1);
212
    *vDst = spu_sel(*vDst, spu_shuffle(*vSrc, sdata1, shuffle), mask);
213
  }
214
  return (dest);
215
}
216
 

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.