URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [uclinux/] [uClinux-2.0.x/] [arch/] [alpha/] [lib/] [memcpy.c] - Blame information for rev 1765

Details | Compare with Previous | View Log


/*
 *  linux/arch/alpha/lib/memcpy.c
 *
 *  Copyright (C) 1995  Linus Torvalds
 */
 
/*
 * This is a reasonably optimized memcpy() routine.
 */
 
/*
 * Note that the C code is written to be optimized into good assembly. However,
 * at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a
 * explicit compare against 0 (instead of just using the proper "blt reg, xx" or
 * "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually..
 */
 
#include <linux/types.h>
 
/*
 * This should be done in one go with ldq_u*2/mask/stq_u. Do it
 * with a macro so that we can fix it up later..
 */
#define ALIGN_DEST_TO8(d,s,n) \
        while (d & 7) { \
                if (n <= 0) return; \
                n--; \
                *(char *) d = *(char *) s; \
                d++; s++; \
        }
 
/*
 * This should similarly be done with ldq_u*2/mask/stq. The destination
 * is aligned, but we don't fill in a full quad-word
 */
#define DO_REST(d,s,n) \
        while (n > 0) { \
                n--; \
                *(char *) d = *(char *) s; \
                d++; s++; \
        }
 
/*
 * This should be done with ldq/mask/stq. The source and destination are
 * aligned, but we don't fill in a full quad-word
 */
#define DO_REST_ALIGNED(d,s,n) DO_REST(d,s,n)
 
/*
 * This does unaligned memory copies. We want to avoid storing to
 * an unaligned address, as that would do a read-modify-write cycle.
 * We also want to avoid double-reading the unaligned reads.
 *
 * Note the ordering to try to avoid load (and address generation) latencies.
 */
static inline void __memcpy_unaligned(unsigned long d, unsigned long s, long n)
{
        ALIGN_DEST_TO8(d,s,n);
        n -= 8;                 /* to avoid compare against 8 in the loop */
        if (n >= 0) {
                unsigned long low_word, high_word;
                __asm__("ldq_u %0,%1":"=r" (low_word):"m" (*(unsigned long *) s));
                do {
                        unsigned long tmp;
                        __asm__("ldq_u %0,%1":"=r" (high_word):"m" (*(unsigned long *)(s+8)));
                        n -= 8;
                        __asm__("extql %1,%2,%0"
                                :"=r" (low_word)
                                :"r" (low_word), "r" (s));
                        __asm__("extqh %1,%2,%0"
                                :"=r" (tmp)
                                :"r" (high_word), "r" (s));
                        s += 8;
                        *(unsigned long *) d = low_word | tmp;
                        d += 8;
                        low_word = high_word;
                } while (n >= 0);
        }
        n += 8;
        DO_REST(d,s,n);
}
 
/*
 * Hmm.. Strange. The __asm__ here is there to make gcc use a integer register
 * for the load-store. I don't know why, but it would seem that using a floating
 * point register for the move seems to slow things down (very small difference,
 * though).
 *
 * Note the ordering to try to avoid load (and address generation) latencies.
 */
static inline void __memcpy_aligned(unsigned long d, unsigned long s, long n)
{
        ALIGN_DEST_TO8(d,s,n);
        n -= 8;
        while (n >= 0) {
                unsigned long tmp;
                __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
                n -= 8;
                s += 8;
                *(unsigned long *) d = tmp;
                d += 8;
        }
        n += 8;
        DO_REST_ALIGNED(d,s,n);
}
 
void * __memcpy(void * dest, const void *src, size_t n)
{
        if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) {
                __memcpy_aligned((unsigned long) dest, (unsigned long) src, n);
                return dest;
        }
        __memcpy_unaligned((unsigned long) dest, (unsigned long) src, n);
        return dest;
}
 
/*
 * Broken compiler uses "bcopy" to do internal
 * assignments. Silly OSF/1 BSDism.
 */
char * bcopy(const char * src, char * dest, size_t n)
{
        __memcpy(dest, src, n);
        return dest;
}
 
/*
 * gcc-2.7.1 and newer generate calls to memset and memcpy.  So we
 * need to define that here:
 */
#ifdef __ELF__
 asm (".weak memcpy; memcpy = __memcpy");
#else
 asm (".weakext memcpy, __memcpy");
#endif

Line No.	Rev	Author	Line
1	199	simons	`/*`
2			`* linux/arch/alpha/lib/memcpy.c`
3			`*`
4			`* Copyright (C) 1995 Linus Torvalds`
5			`*/`
6
7			`/*`
8			`* This is a reasonably optimized memcpy() routine.`
9			`*/`
10
11			`/*`
12			`* Note that the C code is written to be optimized into good assembly. However,`
13			`* at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a`
14			`* explicit compare against 0 (instead of just using the proper "blt reg, xx" or`
15			`* "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually..`
16			`*/`
17
18			`#include <linux/types.h>`
19
20			`/*`
21			`* This should be done in one go with ldq_u*2/mask/stq_u. Do it`
22			`* with a macro so that we can fix it up later..`
23			`*/`
24			`#define ALIGN_DEST_TO8(d,s,n) \`
25			`while (d & 7) { \`
26			`if (n <= 0) return; \`
27			`n--; \`
28			`(char ) d = (char ) s; \`
29			`d++; s++; \`
30			`}`
31
32			`/*`
33			`* This should similarly be done with ldq_u*2/mask/stq. The destination`
34			`* is aligned, but we don't fill in a full quad-word`
35			`*/`
36			`#define DO_REST(d,s,n) \`
37			`while (n > 0) { \`
38			`n--; \`
39			`(char ) d = (char ) s; \`
40			`d++; s++; \`
41			`}`
42
43			`/*`
44			`* This should be done with ldq/mask/stq. The source and destination are`
45			`* aligned, but we don't fill in a full quad-word`
46			`*/`
47			`#define DO_REST_ALIGNED(d,s,n) DO_REST(d,s,n)`
48
49			`/*`
50			`* This does unaligned memory copies. We want to avoid storing to`
51			`* an unaligned address, as that would do a read-modify-write cycle.`
52			`* We also want to avoid double-reading the unaligned reads.`
53			`*`
54			`* Note the ordering to try to avoid load (and address generation) latencies.`
55			`*/`
56			`static inline void __memcpy_unaligned(unsigned long d, unsigned long s, long n)`
57			`{`
58			`ALIGN_DEST_TO8(d,s,n);`
59			`n -= 8; /* to avoid compare against 8 in the loop */`
60			`if (n >= 0) {`
61			`unsigned long low_word, high_word;`
62			`__asm__("ldq_u %0,%1":"=r" (low_word):"m" ((unsigned long ) s));`
63			`do {`
64			`unsigned long tmp;`
65			`__asm__("ldq_u %0,%1":"=r" (high_word):"m" ((unsigned long )(s+8)));`
66			`n -= 8;`
67			`__asm__("extql %1,%2,%0"`
68			`:"=r" (low_word)`
69			`:"r" (low_word), "r" (s));`
70			`__asm__("extqh %1,%2,%0"`
71			`:"=r" (tmp)`
72			`:"r" (high_word), "r" (s));`
73			`s += 8;`
74			`(unsigned long ) d = low_word \| tmp;`
75			`d += 8;`
76			`low_word = high_word;`
77			`} while (n >= 0);`
78			`}`
79			`n += 8;`
80			`DO_REST(d,s,n);`
81			`}`
82
83			`/*`
84			`* Hmm.. Strange. The __asm__ here is there to make gcc use a integer register`
85			`* for the load-store. I don't know why, but it would seem that using a floating`
86			`* point register for the move seems to slow things down (very small difference,`
87			`* though).`
88			`*`
89			`* Note the ordering to try to avoid load (and address generation) latencies.`
90			`*/`
91			`static inline void __memcpy_aligned(unsigned long d, unsigned long s, long n)`
92			`{`
93			`ALIGN_DEST_TO8(d,s,n);`
94			`n -= 8;`
95			`while (n >= 0) {`
96			`unsigned long tmp;`
97			`__asm__("ldq %0,%1":"=r" (tmp):"m" ((unsigned long ) s));`
98			`n -= 8;`
99			`s += 8;`
100			`(unsigned long ) d = tmp;`
101			`d += 8;`
102			`}`
103			`n += 8;`
104			`DO_REST_ALIGNED(d,s,n);`
105			`}`
106
107			`void * __memcpy(void * dest, const void *src, size_t n)`
108			`{`
109			`if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) {`
110			`__memcpy_aligned((unsigned long) dest, (unsigned long) src, n);`
111			`return dest;`
112			`}`
113			`__memcpy_unaligned((unsigned long) dest, (unsigned long) src, n);`
114			`return dest;`
115			`}`
116
117			`/*`
118			`* Broken compiler uses "bcopy" to do internal`
119			`* assignments. Silly OSF/1 BSDism.`
120			`*/`
121			`char * bcopy(const char * src, char * dest, size_t n)`
122			`{`
123			`__memcpy(dest, src, n);`
124			`return dest;`
125			`}`
126
127			`/*`
128			`* gcc-2.7.1 and newer generate calls to memset and memcpy. So we`
129			`* need to define that here:`
130			`*/`
131			`#ifdef __ELF__`
132			`asm (".weak memcpy; memcpy = __memcpy");`
133			`#else`
134			`asm (".weakext memcpy, __memcpy");`
135			`#endif`

Browse

Tools

Subversion Repositories or1k

[/] [or1k/] [trunk/] [uclinux/] [uClinux-2.0.x/] [arch/] [alpha/] [lib/] [memcpy.c] - Blame information for rev 1765