URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [arch/] [mips64/] [mm/] [pg-sb1.c] - Blame information for rev 1765

Details | Compare with Previous | View Log


/*
 * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
 * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org)
 * Copyright (C) 2000 SiByte, Inc.
 * Copyright (C) 2002, 2003 Broadcom Corporation
 *
 * Written by Justin Carlson of SiByte, Inc.
 *         and Kip Walker of Broadcom Corp.
 *
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
 
#include <linux/config.h>
#include <linux/sched.h>
#include <linux/smp.h>
 
#include <asm/io.h>
#include <asm/sibyte/sb1250.h>
#include <asm/sibyte/sb1250_regs.h>
#include <asm/sibyte/sb1250_dma.h>
#include <asm/sibyte/64bit.h>
 
#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS
#define SB1_PREF_LOAD_STREAMED_HINT "0"
#define SB1_PREF_STORE_STREAMED_HINT "1"
#else
#define SB1_PREF_LOAD_STREAMED_HINT "4"
#define SB1_PREF_STORE_STREAMED_HINT "5"
#endif
 
#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
static inline void clear_page_cpu(void *page)
#else
void clear_page(void *page)
#endif
{
        /*
         * JDCXXX - This should be bottlenecked by the write buffer, but these
         * things tend to be mildly unpredictable...should check this on the
         * performance model
         *
         * We prefetch 4 lines ahead.  We're also "cheating" slightly here...
         * since we know we're on an SB1, we force the assembler to take
         * 64-bit operands to speed things up
         */
        __asm__ __volatile__(
                ".set push                  \n"
                ".set noreorder             \n"
                ".set noat                  \n"
                ".set mips4                 \n"
                "     daddiu     $1, %0, %2  \n"  /* Calculate the end of the page to clear */
#ifdef CONFIG_CPU_HAS_PREFETCH
                "     pref       " SB1_PREF_STORE_STREAMED_HINT ",  0(%0)  \n"  /* Prefetch the first 4 lines */
                "     pref       " SB1_PREF_STORE_STREAMED_HINT ", 32(%0)  \n"
                "     pref       " SB1_PREF_STORE_STREAMED_HINT ", 64(%0)  \n"
                "     pref       " SB1_PREF_STORE_STREAMED_HINT ", 96(%0)  \n"
#endif
                "1:   sd        $0,  0(%0)  \n"  /* Throw out a cacheline of 0's */
                "     sd        $0,  8(%0)  \n"
                "     sd        $0, 16(%0)  \n"
                "     sd        $0, 24(%0)  \n"
#ifdef CONFIG_CPU_HAS_PREFETCH
                "     pref       " SB1_PREF_STORE_STREAMED_HINT ",128(%0)  \n"  /* Prefetch 4 lines ahead     */
#endif
                "     bne       $1, %0, 1b  \n"
                "      daddiu     %0, %0, 32\n"  /* Next cacheline (This instruction better be short piped!) */
                ".set pop                   \n"
                : "=r" (page)
                : "0" (page), "I" (PAGE_SIZE-32)
                : "memory");
 
}
 
#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
static inline void copy_page_cpu(void *to, void *from)
#else
void copy_page(void *to, void *from)
#endif
{
        /*
         * This should be optimized in assembly...can't use ld/sd, though,
         * because the top 32 bits could be nuked if we took an interrupt
         * during the routine.  And this is not a good place to be cli()'ing
         *
         * The pref's used here are using "streaming" hints, which cause the
         * copied data to be kicked out of the cache sooner.  A page copy often
         * ends up copying a lot more data than is commonly used, so this seems
         * to make sense in terms of reducing cache pollution, but I've no real
         * performance data to back this up
         */
 
        __asm__ __volatile__(
                ".set push                  \n"
                ".set noreorder             \n"
                ".set noat                  \n"
                ".set mips4                 \n"
                "     daddiu     $1, %0, %4  \n"  /* Calculate the end of the page to copy */
#ifdef CONFIG_CPU_HAS_PREFETCH
                "     pref       " SB1_PREF_LOAD_STREAMED_HINT  ",  0(%0)  \n"  /* Prefetch the first 3 lines */
                "     pref       " SB1_PREF_STORE_STREAMED_HINT ",  0(%1)  \n"
                "     pref       " SB1_PREF_LOAD_STREAMED_HINT  ",  32(%0) \n"
                "     pref       " SB1_PREF_STORE_STREAMED_HINT ",  32(%1) \n"
                "     pref       " SB1_PREF_LOAD_STREAMED_HINT  ",  64(%0) \n"
                "     pref       " SB1_PREF_STORE_STREAMED_HINT ",  64(%1) \n"
#endif
                "1:   lw        $2,  0(%0)  \n"  /* Block copy a cacheline */
                "     lw        $3,  4(%0)  \n"
                "     lw        $4,  8(%0)  \n"
                "     lw        $5, 12(%0)  \n"
                "     lw        $6, 16(%0)  \n"
                "     lw        $7, 20(%0)  \n"
                "     lw        $8, 24(%0)  \n"
                "     lw        $9, 28(%0)  \n"
#ifdef CONFIG_CPU_HAS_PREFETCH
                "     pref       " SB1_PREF_LOAD_STREAMED_HINT  ", 96(%0)  \n"  /* Prefetch ahead         */
                "     pref       " SB1_PREF_STORE_STREAMED_HINT ", 96(%1)  \n"
#endif
                "     sw        $2,  0(%1)  \n"
                "     sw        $3,  4(%1)  \n"
                "     sw        $4,  8(%1)  \n"
                "     sw        $5, 12(%1)  \n"
                "     sw        $6, 16(%1)  \n"
                "     sw        $7, 20(%1)  \n"
                "     sw        $8, 24(%1)  \n"
                "     sw        $9, 28(%1)  \n"
                "     daddiu     %1, %1, 32  \n"  /* Next cacheline */
                "     nop                   \n"  /* Force next add to short pipe */
                "     nop                   \n"  /* Force next add to short pipe */
                "     bne       $1, %0, 1b  \n"
                "     daddiu     %0, %0, 32  \n"  /* Next cacheline */
                ".set pop                   \n"
                : "=r" (to), "=r" (from)
                : "0" (from), "1" (to), "I" (PAGE_SIZE-32)
                : "$2","$3","$4","$5","$6","$7","$8","$9","memory");
}
 
 
#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
 
/*
 * Pad descriptors to cacheline, since each is exclusively owned by a
 * particular CPU.
 */
typedef struct dmadscr_s {
        uint64_t  dscr_a;
        uint64_t  dscr_b;
        uint64_t  pad_a;
        uint64_t  pad_b;
} dmadscr_t;
 
static dmadscr_t page_descr[NR_CPUS] __attribute__((aligned(SMP_CACHE_BYTES)));
 
void sb1_dma_init(void)
{
        int cpu = smp_processor_id();
        uint64_t base_val = PHYSADDR(&page_descr[cpu]) | V_DM_DSCR_BASE_RINGSZ(1);
 
        out64(base_val,
              IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE));
        out64(base_val | M_DM_DSCR_BASE_RESET,
              IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE));
        out64(base_val | M_DM_DSCR_BASE_ENABL,
              IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE));
}
 
void clear_page(void *page)
{
        int cpu = smp_processor_id();
 
        /* if the page is above Kseg0, use old way */
        if (KSEGX(page) != K0BASE)
                return clear_page_cpu(page);
 
        page_descr[cpu].dscr_a = PHYSADDR(page) | M_DM_DSCRA_ZERO_MEM | M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
        page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
        out64(1, IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_COUNT));
 
        /*
         * Don't really want to do it this way, but there's no
         * reliable way to delay completion detection.
         */
        while (!(in64(IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)) & M_DM_DSCR_BASE_INTERRUPT))
                ;
        in64(IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE));
}
 
void copy_page(void *to, void *from)
{
        unsigned long from_phys = PHYSADDR(from);
        unsigned long to_phys = PHYSADDR(to);
        int cpu = smp_processor_id();
 
        /* if either page is above Kseg0, use old way */
        if ((KSEGX(to) != K0BASE) || (KSEGX(from) != K0BASE))
                return copy_page_cpu(to, from);
 
        page_descr[cpu].dscr_a = PHYSADDR(to_phys) | M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
        page_descr[cpu].dscr_b = PHYSADDR(from_phys) | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
        out64(1, IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_COUNT));
 
        /*
         * Don't really want to do it this way, but there's no
         * reliable way to delay completion detection.
         */
        while (!(in64(IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)) & M_DM_DSCR_BASE_INTERRUPT))
                ;
        in64(IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE));
}
 
#endif /* CONFIG_SIBYTE_DMA_PAGEOPS */

Browse

Tools

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [arch/] [mips64/] [mm/] [pg-sb1.c] - Blame information for rev 1765

Line No.	Rev	Author	Line
1	1275	phoenix	`/*`
2			`* Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)`
3			`* Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org)`
4			`* Copyright (C) 2000 SiByte, Inc.`
5			`* Copyright (C) 2002, 2003 Broadcom Corporation`
6			`*`
7			`* Written by Justin Carlson of SiByte, Inc.`
8			`* and Kip Walker of Broadcom Corp.`
9			`*`
10			`*`
11			`* This program is free software; you can redistribute it and/or`
12			`* modify it under the terms of the GNU General Public License`
13			`* as published by the Free Software Foundation; either version 2`
14			`* of the License, or (at your option) any later version.`
15			`*`
16			`* This program is distributed in the hope that it will be useful,`
17			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
18			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
19			`* GNU General Public License for more details.`
20			`*`
21			`* You should have received a copy of the GNU General Public License`
22			`* along with this program; if not, write to the Free Software`
23			`* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.`
24			`*/`
25
26			`#include <linux/config.h>`
27			`#include <linux/sched.h>`
28			`#include <linux/smp.h>`
29
30			`#include <asm/io.h>`
31			`#include <asm/sibyte/sb1250.h>`
32			`#include <asm/sibyte/sb1250_regs.h>`
33			`#include <asm/sibyte/sb1250_dma.h>`
34			`#include <asm/sibyte/64bit.h>`
35
36			`#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS`
37			`#define SB1_PREF_LOAD_STREAMED_HINT "0"`
38			`#define SB1_PREF_STORE_STREAMED_HINT "1"`
39			`#else`
40			`#define SB1_PREF_LOAD_STREAMED_HINT "4"`
41			`#define SB1_PREF_STORE_STREAMED_HINT "5"`
42			`#endif`
43
44			`#ifdef CONFIG_SIBYTE_DMA_PAGEOPS`
45			`static inline void clear_page_cpu(void *page)`
46			`#else`
47			`void clear_page(void *page)`
48			`#endif`
49			`{`
50			`/*`
51			`* JDCXXX - This should be bottlenecked by the write buffer, but these`
52			`* things tend to be mildly unpredictable...should check this on the`
53			`* performance model`
54			`*`
55			`* We prefetch 4 lines ahead. We're also "cheating" slightly here...`
56			`* since we know we're on an SB1, we force the assembler to take`
57			`* 64-bit operands to speed things up`
58			`*/`
59			`__asm__ __volatile__(`
60			`".set push \n"`
61			`".set noreorder \n"`
62			`".set noat \n"`
63			`".set mips4 \n"`
64			`" daddiu $1, %0, %2 \n" /* Calculate the end of the page to clear */`
65			`#ifdef CONFIG_CPU_HAS_PREFETCH`
66			`" pref " SB1_PREF_STORE_STREAMED_HINT ", 0(%0) \n" /* Prefetch the first 4 lines */`
67			`" pref " SB1_PREF_STORE_STREAMED_HINT ", 32(%0) \n"`
68			`" pref " SB1_PREF_STORE_STREAMED_HINT ", 64(%0) \n"`
69			`" pref " SB1_PREF_STORE_STREAMED_HINT ", 96(%0) \n"`
70			`#endif`
71			`"1: sd $0, 0(%0) \n" /* Throw out a cacheline of 0's */`
72			`" sd $0, 8(%0) \n"`
73			`" sd $0, 16(%0) \n"`
74			`" sd $0, 24(%0) \n"`
75			`#ifdef CONFIG_CPU_HAS_PREFETCH`
76			`" pref " SB1_PREF_STORE_STREAMED_HINT ",128(%0) \n" /* Prefetch 4 lines ahead */`
77			`#endif`
78			`" bne $1, %0, 1b \n"`
79			`" daddiu %0, %0, 32\n" /* Next cacheline (This instruction better be short piped!) */`
80			`".set pop \n"`
81			`: "=r" (page)`
82			`: "0" (page), "I" (PAGE_SIZE-32)`
83			`: "memory");`
84
85			`}`
86
87			`#ifdef CONFIG_SIBYTE_DMA_PAGEOPS`
88			`static inline void copy_page_cpu(void to, void from)`
89			`#else`
90			`void copy_page(void to, void from)`
91			`#endif`
92			`{`
93			`/*`
94			`* This should be optimized in assembly...can't use ld/sd, though,`
95			`* because the top 32 bits could be nuked if we took an interrupt`
96			`* during the routine. And this is not a good place to be cli()'ing`
97			`*`
98			`* The pref's used here are using "streaming" hints, which cause the`
99			`* copied data to be kicked out of the cache sooner. A page copy often`
100			`* ends up copying a lot more data than is commonly used, so this seems`
101			`* to make sense in terms of reducing cache pollution, but I've no real`
102			`* performance data to back this up`
103			`*/`
104
105			`__asm__ __volatile__(`
106			`".set push \n"`
107			`".set noreorder \n"`
108			`".set noat \n"`
109			`".set mips4 \n"`
110			`" daddiu $1, %0, %4 \n" /* Calculate the end of the page to copy */`
111			`#ifdef CONFIG_CPU_HAS_PREFETCH`
112			`" pref " SB1_PREF_LOAD_STREAMED_HINT ", 0(%0) \n" /* Prefetch the first 3 lines */`
113			`" pref " SB1_PREF_STORE_STREAMED_HINT ", 0(%1) \n"`
114			`" pref " SB1_PREF_LOAD_STREAMED_HINT ", 32(%0) \n"`
115			`" pref " SB1_PREF_STORE_STREAMED_HINT ", 32(%1) \n"`
116			`" pref " SB1_PREF_LOAD_STREAMED_HINT ", 64(%0) \n"`
117			`" pref " SB1_PREF_STORE_STREAMED_HINT ", 64(%1) \n"`
118			`#endif`
119			`"1: lw $2, 0(%0) \n" /* Block copy a cacheline */`
120			`" lw $3, 4(%0) \n"`
121			`" lw $4, 8(%0) \n"`
122			`" lw $5, 12(%0) \n"`
123			`" lw $6, 16(%0) \n"`
124			`" lw $7, 20(%0) \n"`
125			`" lw $8, 24(%0) \n"`
126			`" lw $9, 28(%0) \n"`
127			`#ifdef CONFIG_CPU_HAS_PREFETCH`
128			`" pref " SB1_PREF_LOAD_STREAMED_HINT ", 96(%0) \n" /* Prefetch ahead */`
129			`" pref " SB1_PREF_STORE_STREAMED_HINT ", 96(%1) \n"`
130			`#endif`
131			`" sw $2, 0(%1) \n"`
132			`" sw $3, 4(%1) \n"`
133			`" sw $4, 8(%1) \n"`
134			`" sw $5, 12(%1) \n"`
135			`" sw $6, 16(%1) \n"`
136			`" sw $7, 20(%1) \n"`
137			`" sw $8, 24(%1) \n"`
138			`" sw $9, 28(%1) \n"`
139			`" daddiu %1, %1, 32 \n" /* Next cacheline */`
140			`" nop \n" /* Force next add to short pipe */`
141			`" nop \n" /* Force next add to short pipe */`
142			`" bne $1, %0, 1b \n"`
143			`" daddiu %0, %0, 32 \n" /* Next cacheline */`
144			`".set pop \n"`
145			`: "=r" (to), "=r" (from)`
146			`: "0" (from), "1" (to), "I" (PAGE_SIZE-32)`
147			`: "$2","$3","$4","$5","$6","$7","$8","$9","memory");`
148			`}`
149
150
151			`#ifdef CONFIG_SIBYTE_DMA_PAGEOPS`
152
153			`/*`
154			`* Pad descriptors to cacheline, since each is exclusively owned by a`
155			`* particular CPU.`
156			`*/`
157			`typedef struct dmadscr_s {`
158			`uint64_t dscr_a;`
159			`uint64_t dscr_b;`
160			`uint64_t pad_a;`
161			`uint64_t pad_b;`
162			`} dmadscr_t;`
163
164			`static dmadscr_t page_descr[NR_CPUS] __attribute__((aligned(SMP_CACHE_BYTES)));`
165
166			`void sb1_dma_init(void)`
167			`{`
168			`int cpu = smp_processor_id();`
169			`uint64_t base_val = PHYSADDR(&page_descr[cpu]) \| V_DM_DSCR_BASE_RINGSZ(1);`
170
171			`out64(base_val,`
172			`IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE));`
173			`out64(base_val \| M_DM_DSCR_BASE_RESET,`
174			`IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE));`
175			`out64(base_val \| M_DM_DSCR_BASE_ENABL,`
176			`IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE));`
177			`}`
178
179			`void clear_page(void *page)`
180			`{`
181			`int cpu = smp_processor_id();`
182
183			`/* if the page is above Kseg0, use old way */`
184			`if (KSEGX(page) != K0BASE)`
185			`return clear_page_cpu(page);`
186
187			`page_descr[cpu].dscr_a = PHYSADDR(page) \| M_DM_DSCRA_ZERO_MEM \| M_DM_DSCRA_L2C_DEST \| M_DM_DSCRA_INTERRUPT;`
188			`page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);`
189			`out64(1, IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_COUNT));`
190
191			`/*`
192			`* Don't really want to do it this way, but there's no`
193			`* reliable way to delay completion detection.`
194			`*/`
195			`while (!(in64(IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)) & M_DM_DSCR_BASE_INTERRUPT))`
196			`;`
197			`in64(IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE));`
198			`}`
199
200			`void copy_page(void to, void from)`
201			`{`
202			`unsigned long from_phys = PHYSADDR(from);`
203			`unsigned long to_phys = PHYSADDR(to);`
204			`int cpu = smp_processor_id();`
205
206			`/* if either page is above Kseg0, use old way */`
207			`if ((KSEGX(to) != K0BASE) \|\| (KSEGX(from) != K0BASE))`
208			`return copy_page_cpu(to, from);`
209
210			`page_descr[cpu].dscr_a = PHYSADDR(to_phys) \| M_DM_DSCRA_L2C_DEST \| M_DM_DSCRA_INTERRUPT;`
211			`page_descr[cpu].dscr_b = PHYSADDR(from_phys) \| V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);`
212			`out64(1, IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_COUNT));`
213
214			`/*`
215			`* Don't really want to do it this way, but there's no`
216			`* reliable way to delay completion detection.`
217			`*/`
218			`while (!(in64(IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)) & M_DM_DSCR_BASE_INTERRUPT))`
219			`;`
220			`in64(IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE));`
221			`}`
222
223			`#endif /* CONFIG_SIBYTE_DMA_PAGEOPS */`