OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [arch/] [i386/] [lib/] [mmx.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
#include <linux/config.h>
2
#include <linux/types.h>
3
#include <linux/string.h>
4
#include <linux/sched.h>
5
 
6
#include <asm/i387.h>
7
#include <asm/hardirq.h> 
8
 
9
 
10
/*
11
 *      MMX 3DNow! library helper functions
12
 *
13
 *      To do:
14
 *      We can use MMX just for prefetch in IRQ's. This may be a win.
15
 *              (reported so on K6-III)
16
 *      We should use a better code neutral filler for the short jump
17
 *              leal ebx. [ebx] is apparently best for K6-2, but Cyrix ??
18
 *      We also want to clobber the filler register so we dont get any
19
 *              register forwarding stalls on the filler.
20
 *
21
 *      Add *user handling. Checksums are not a win with MMX on any CPU
22
 *      tested so far for any MMX solution figured.
23
 *
24
 *      22/09/2000 - Arjan van de Ven
25
 *              Improved for non-egineering-sample Athlons
26
 *
27
 */
28
 
29
void *_mmx_memcpy(void *to, const void *from, size_t len)
30
{
31
        void *p;
32
        int i;
33
 
34
        if (in_interrupt())
35
                return __memcpy(to, from, len);
36
 
37
        p = to;
38
        i = len >> 6; /* len/64 */
39
 
40
        kernel_fpu_begin();
41
 
42
        __asm__ __volatile__ (
43
                "1: prefetch (%0)\n"            /* This set is 28 bytes */
44
                "   prefetch 64(%0)\n"
45
                "   prefetch 128(%0)\n"
46
                "   prefetch 192(%0)\n"
47
                "   prefetch 256(%0)\n"
48
                "2:  \n"
49
                ".section .fixup, \"ax\"\n"
50
                "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
51
                "   jmp 2b\n"
52
                ".previous\n"
53
                ".section __ex_table,\"a\"\n"
54
                "       .align 4\n"
55
                "       .long 1b, 3b\n"
56
                ".previous"
57
                : : "r" (from) );
58
 
59
 
60
        for(; i>5; i--)
61
        {
62
                __asm__ __volatile__ (
63
                "1:  prefetch 320(%0)\n"
64
                "2:  movq (%0), %%mm0\n"
65
                "  movq 8(%0), %%mm1\n"
66
                "  movq 16(%0), %%mm2\n"
67
                "  movq 24(%0), %%mm3\n"
68
                "  movq %%mm0, (%1)\n"
69
                "  movq %%mm1, 8(%1)\n"
70
                "  movq %%mm2, 16(%1)\n"
71
                "  movq %%mm3, 24(%1)\n"
72
                "  movq 32(%0), %%mm0\n"
73
                "  movq 40(%0), %%mm1\n"
74
                "  movq 48(%0), %%mm2\n"
75
                "  movq 56(%0), %%mm3\n"
76
                "  movq %%mm0, 32(%1)\n"
77
                "  movq %%mm1, 40(%1)\n"
78
                "  movq %%mm2, 48(%1)\n"
79
                "  movq %%mm3, 56(%1)\n"
80
                ".section .fixup, \"ax\"\n"
81
                "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
82
                "   jmp 2b\n"
83
                ".previous\n"
84
                ".section __ex_table,\"a\"\n"
85
                "       .align 4\n"
86
                "       .long 1b, 3b\n"
87
                ".previous"
88
                : : "r" (from), "r" (to) : "memory");
89
                from+=64;
90
                to+=64;
91
        }
92
 
93
        for(; i>0; i--)
94
        {
95
                __asm__ __volatile__ (
96
                "  movq (%0), %%mm0\n"
97
                "  movq 8(%0), %%mm1\n"
98
                "  movq 16(%0), %%mm2\n"
99
                "  movq 24(%0), %%mm3\n"
100
                "  movq %%mm0, (%1)\n"
101
                "  movq %%mm1, 8(%1)\n"
102
                "  movq %%mm2, 16(%1)\n"
103
                "  movq %%mm3, 24(%1)\n"
104
                "  movq 32(%0), %%mm0\n"
105
                "  movq 40(%0), %%mm1\n"
106
                "  movq 48(%0), %%mm2\n"
107
                "  movq 56(%0), %%mm3\n"
108
                "  movq %%mm0, 32(%1)\n"
109
                "  movq %%mm1, 40(%1)\n"
110
                "  movq %%mm2, 48(%1)\n"
111
                "  movq %%mm3, 56(%1)\n"
112
                : : "r" (from), "r" (to) : "memory");
113
                from+=64;
114
                to+=64;
115
        }
116
        /*
117
         *      Now do the tail of the block
118
         */
119
        __memcpy(to, from, len&63);
120
        kernel_fpu_end();
121
        return p;
122
}
123
 
124
#ifdef CONFIG_MK7
125
 
126
/*
127
 *      The K7 has streaming cache bypass load/store. The Cyrix III, K6 and
128
 *      other MMX using processors do not.
129
 */
130
 
131
static void fast_clear_page(void *page)
132
{
133
        int i;
134
 
135
        kernel_fpu_begin();
136
 
137
        __asm__ __volatile__ (
138
                "  pxor %%mm0, %%mm0\n" : :
139
        );
140
 
141
        for(i=0;i<4096/64;i++)
142
        {
143
                __asm__ __volatile__ (
144
                "  movntq %%mm0, (%0)\n"
145
                "  movntq %%mm0, 8(%0)\n"
146
                "  movntq %%mm0, 16(%0)\n"
147
                "  movntq %%mm0, 24(%0)\n"
148
                "  movntq %%mm0, 32(%0)\n"
149
                "  movntq %%mm0, 40(%0)\n"
150
                "  movntq %%mm0, 48(%0)\n"
151
                "  movntq %%mm0, 56(%0)\n"
152
                : : "r" (page) : "memory");
153
                page+=64;
154
        }
155
        /* since movntq is weakly-ordered, a "sfence" is needed to become
156
         * ordered again.
157
         */
158
        __asm__ __volatile__ (
159
                "  sfence \n" : :
160
        );
161
        kernel_fpu_end();
162
}
163
 
164
static void fast_copy_page(void *to, void *from)
165
{
166
        int i;
167
 
168
        kernel_fpu_begin();
169
 
170
        /* maybe the prefetch stuff can go before the expensive fnsave...
171
         * but that is for later. -AV
172
         */
173
        __asm__ __volatile__ (
174
                "1: prefetch (%0)\n"
175
                "   prefetch 64(%0)\n"
176
                "   prefetch 128(%0)\n"
177
                "   prefetch 192(%0)\n"
178
                "   prefetch 256(%0)\n"
179
                "2:  \n"
180
                ".section .fixup, \"ax\"\n"
181
                "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
182
                "   jmp 2b\n"
183
                ".previous\n"
184
                ".section __ex_table,\"a\"\n"
185
                "       .align 4\n"
186
                "       .long 1b, 3b\n"
187
                ".previous"
188
                : : "r" (from) );
189
 
190
        for(i=0; i<(4096-320)/64; i++)
191
        {
192
                __asm__ __volatile__ (
193
                "1: prefetch 320(%0)\n"
194
                "2: movq (%0), %%mm0\n"
195
                "   movntq %%mm0, (%1)\n"
196
                "   movq 8(%0), %%mm1\n"
197
                "   movntq %%mm1, 8(%1)\n"
198
                "   movq 16(%0), %%mm2\n"
199
                "   movntq %%mm2, 16(%1)\n"
200
                "   movq 24(%0), %%mm3\n"
201
                "   movntq %%mm3, 24(%1)\n"
202
                "   movq 32(%0), %%mm4\n"
203
                "   movntq %%mm4, 32(%1)\n"
204
                "   movq 40(%0), %%mm5\n"
205
                "   movntq %%mm5, 40(%1)\n"
206
                "   movq 48(%0), %%mm6\n"
207
                "   movntq %%mm6, 48(%1)\n"
208
                "   movq 56(%0), %%mm7\n"
209
                "   movntq %%mm7, 56(%1)\n"
210
                ".section .fixup, \"ax\"\n"
211
                "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
212
                "   jmp 2b\n"
213
                ".previous\n"
214
                ".section __ex_table,\"a\"\n"
215
                "       .align 4\n"
216
                "       .long 1b, 3b\n"
217
                ".previous"
218
                : : "r" (from), "r" (to) : "memory");
219
                from+=64;
220
                to+=64;
221
        }
222
        for(i=(4096-320)/64; i<4096/64; i++)
223
        {
224
                __asm__ __volatile__ (
225
                "2: movq (%0), %%mm0\n"
226
                "   movntq %%mm0, (%1)\n"
227
                "   movq 8(%0), %%mm1\n"
228
                "   movntq %%mm1, 8(%1)\n"
229
                "   movq 16(%0), %%mm2\n"
230
                "   movntq %%mm2, 16(%1)\n"
231
                "   movq 24(%0), %%mm3\n"
232
                "   movntq %%mm3, 24(%1)\n"
233
                "   movq 32(%0), %%mm4\n"
234
                "   movntq %%mm4, 32(%1)\n"
235
                "   movq 40(%0), %%mm5\n"
236
                "   movntq %%mm5, 40(%1)\n"
237
                "   movq 48(%0), %%mm6\n"
238
                "   movntq %%mm6, 48(%1)\n"
239
                "   movq 56(%0), %%mm7\n"
240
                "   movntq %%mm7, 56(%1)\n"
241
                : : "r" (from), "r" (to) : "memory");
242
                from+=64;
243
                to+=64;
244
        }
245
        /* since movntq is weakly-ordered, a "sfence" is needed to become
246
         * ordered again.
247
         */
248
        __asm__ __volatile__ (
249
                "  sfence \n" : :
250
        );
251
        kernel_fpu_end();
252
}
253
 
254
#else
255
 
256
/*
257
 *      Generic MMX implementation without K7 specific streaming
258
 */
259
 
260
static void fast_clear_page(void *page)
261
{
262
        int i;
263
 
264
        kernel_fpu_begin();
265
 
266
        __asm__ __volatile__ (
267
                "  pxor %%mm0, %%mm0\n" : :
268
        );
269
 
270
        for(i=0;i<4096/128;i++)
271
        {
272
                __asm__ __volatile__ (
273
                "  movq %%mm0, (%0)\n"
274
                "  movq %%mm0, 8(%0)\n"
275
                "  movq %%mm0, 16(%0)\n"
276
                "  movq %%mm0, 24(%0)\n"
277
                "  movq %%mm0, 32(%0)\n"
278
                "  movq %%mm0, 40(%0)\n"
279
                "  movq %%mm0, 48(%0)\n"
280
                "  movq %%mm0, 56(%0)\n"
281
                "  movq %%mm0, 64(%0)\n"
282
                "  movq %%mm0, 72(%0)\n"
283
                "  movq %%mm0, 80(%0)\n"
284
                "  movq %%mm0, 88(%0)\n"
285
                "  movq %%mm0, 96(%0)\n"
286
                "  movq %%mm0, 104(%0)\n"
287
                "  movq %%mm0, 112(%0)\n"
288
                "  movq %%mm0, 120(%0)\n"
289
                : : "r" (page) : "memory");
290
                page+=128;
291
        }
292
 
293
        kernel_fpu_end();
294
}
295
 
296
static void fast_copy_page(void *to, void *from)
297
{
298
        int i;
299
 
300
 
301
        kernel_fpu_begin();
302
 
303
        __asm__ __volatile__ (
304
                "1: prefetch (%0)\n"
305
                "   prefetch 64(%0)\n"
306
                "   prefetch 128(%0)\n"
307
                "   prefetch 192(%0)\n"
308
                "   prefetch 256(%0)\n"
309
                "2:  \n"
310
                ".section .fixup, \"ax\"\n"
311
                "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
312
                "   jmp 2b\n"
313
                ".previous\n"
314
                ".section __ex_table,\"a\"\n"
315
                "       .align 4\n"
316
                "       .long 1b, 3b\n"
317
                ".previous"
318
                : : "r" (from) );
319
 
320
        for(i=0; i<4096/64; i++)
321
        {
322
                __asm__ __volatile__ (
323
                "1: prefetch 320(%0)\n"
324
                "2: movq (%0), %%mm0\n"
325
                "   movq 8(%0), %%mm1\n"
326
                "   movq 16(%0), %%mm2\n"
327
                "   movq 24(%0), %%mm3\n"
328
                "   movq %%mm0, (%1)\n"
329
                "   movq %%mm1, 8(%1)\n"
330
                "   movq %%mm2, 16(%1)\n"
331
                "   movq %%mm3, 24(%1)\n"
332
                "   movq 32(%0), %%mm0\n"
333
                "   movq 40(%0), %%mm1\n"
334
                "   movq 48(%0), %%mm2\n"
335
                "   movq 56(%0), %%mm3\n"
336
                "   movq %%mm0, 32(%1)\n"
337
                "   movq %%mm1, 40(%1)\n"
338
                "   movq %%mm2, 48(%1)\n"
339
                "   movq %%mm3, 56(%1)\n"
340
                ".section .fixup, \"ax\"\n"
341
                "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
342
                "   jmp 2b\n"
343
                ".previous\n"
344
                ".section __ex_table,\"a\"\n"
345
                "       .align 4\n"
346
                "       .long 1b, 3b\n"
347
                ".previous"
348
                : : "r" (from), "r" (to) : "memory");
349
                from+=64;
350
                to+=64;
351
        }
352
        kernel_fpu_end();
353
}
354
 
355
 
356
#endif
357
 
358
/*
359
 *      Favour MMX for page clear and copy.
360
 */
361
 
362
static void slow_zero_page(void * page)
363
{
364
        int d0, d1;
365
        __asm__ __volatile__( \
366
                "cld\n\t" \
367
                "rep ; stosl" \
368
                : "=&c" (d0), "=&D" (d1)
369
                :"a" (0),"1" (page),"0" (1024)
370
                :"memory");
371
}
372
 
373
void mmx_clear_page(void * page)
374
{
375
        if(in_interrupt())
376
                slow_zero_page(page);
377
        else
378
                fast_clear_page(page);
379
}
380
 
381
static void slow_copy_page(void *to, void *from)
382
{
383
        int d0, d1, d2;
384
        __asm__ __volatile__( \
385
                "cld\n\t" \
386
                "rep ; movsl" \
387
                : "=&c" (d0), "=&D" (d1), "=&S" (d2) \
388
                : "0" (1024),"1" ((long) to),"2" ((long) from) \
389
                : "memory");
390
}
391
 
392
 
393
void mmx_copy_page(void *to, void *from)
394
{
395
        if(in_interrupt())
396
                slow_copy_page(to, from);
397
        else
398
                fast_copy_page(to, from);
399
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.