OpenCores
URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [arch/] [sh64/] [mm/] [cache.c] - Blame information for rev 3

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 xianfeng
/*
2
 * This file is subject to the terms and conditions of the GNU General Public
3
 * License.  See the file "COPYING" in the main directory of this archive
4
 * for more details.
5
 *
6
 * arch/sh64/mm/cache.c
7
 *
8
 * Original version Copyright (C) 2000, 2001  Paolo Alberelli
9
 * Second version Copyright (C) benedict.gaster@superh.com 2002
10
 * Third version Copyright Richard.Curnow@superh.com 2003
11
 * Hacks to third version Copyright (C) 2003 Paul Mundt
12
 */
13
 
14
/****************************************************************************/
15
 
16
#include <linux/init.h>
17
#include <linux/mman.h>
18
#include <linux/mm.h>
19
#include <linux/threads.h>
20
#include <asm/page.h>
21
#include <asm/pgtable.h>
22
#include <asm/processor.h>
23
#include <asm/cache.h>
24
#include <asm/tlb.h>
25
#include <asm/io.h>
26
#include <asm/uaccess.h>
27
#include <asm/mmu_context.h>
28
#include <asm/pgalloc.h> /* for flush_itlb_range */
29
 
30
#include <linux/proc_fs.h>
31
 
32
/* This function is in entry.S */
33
extern unsigned long switch_and_save_asid(unsigned long new_asid);
34
 
35
/* Wired TLB entry for the D-cache */
36
static unsigned long long dtlb_cache_slot;
37
 
38
/**
39
 * sh64_cache_init()
40
 *
41
 * This is pretty much just a straightforward clone of the SH
42
 * detect_cpu_and_cache_system().
43
 *
44
 * This function is responsible for setting up all of the cache
45
 * info dynamically as well as taking care of CPU probing and
46
 * setting up the relevant subtype data.
47
 *
48
 * FIXME: For the time being, we only really support the SH5-101
49
 * out of the box, and don't support dynamic probing for things
50
 * like the SH5-103 or even cut2 of the SH5-101. Implement this
51
 * later!
52
 */
53
int __init sh64_cache_init(void)
54
{
55
        /*
56
         * First, setup some sane values for the I-cache.
57
         */
58
        cpu_data->icache.ways           = 4;
59
        cpu_data->icache.sets           = 256;
60
        cpu_data->icache.linesz         = L1_CACHE_BYTES;
61
 
62
        /*
63
         * FIXME: This can probably be cleaned up a bit as well.. for example,
64
         * do we really need the way shift _and_ the way_step_shift ?? Judging
65
         * by the existing code, I would guess no.. is there any valid reason
66
         * why we need to be tracking this around?
67
         */
68
        cpu_data->icache.way_shift      = 13;
69
        cpu_data->icache.entry_shift    = 5;
70
        cpu_data->icache.set_shift      = 4;
71
        cpu_data->icache.way_step_shift = 16;
72
        cpu_data->icache.asid_shift     = 2;
73
 
74
        /*
75
         * way offset = cache size / associativity, so just don't factor in
76
         * associativity in the first place..
77
         */
78
        cpu_data->icache.way_ofs        = cpu_data->icache.sets *
79
                                          cpu_data->icache.linesz;
80
 
81
        cpu_data->icache.asid_mask      = 0x3fc;
82
        cpu_data->icache.idx_mask       = 0x1fe0;
83
        cpu_data->icache.epn_mask       = 0xffffe000;
84
        cpu_data->icache.flags          = 0;
85
 
86
        /*
87
         * Next, setup some sane values for the D-cache.
88
         *
89
         * On the SH5, these are pretty consistent with the I-cache settings,
90
         * so we just copy over the existing definitions.. these can be fixed
91
         * up later, especially if we add runtime CPU probing.
92
         *
93
         * Though in the meantime it saves us from having to duplicate all of
94
         * the above definitions..
95
         */
96
        cpu_data->dcache                = cpu_data->icache;
97
 
98
        /*
99
         * Setup any cache-related flags here
100
         */
101
#if defined(CONFIG_DCACHE_WRITE_THROUGH)
102
        set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags));
103
#elif defined(CONFIG_DCACHE_WRITE_BACK)
104
        set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags));
105
#endif
106
 
107
        /*
108
         * We also need to reserve a slot for the D-cache in the DTLB, so we
109
         * do this now ..
110
         */
111
        dtlb_cache_slot                 = sh64_get_wired_dtlb_entry();
112
 
113
        return 0;
114
}
115
 
116
#ifdef CONFIG_DCACHE_DISABLED
117
#define sh64_dcache_purge_all()                                 do { } while (0)
118
#define sh64_dcache_purge_coloured_phy_page(paddr, eaddr)       do { } while (0)
119
#define sh64_dcache_purge_user_range(mm, start, end)            do { } while (0)
120
#define sh64_dcache_purge_phy_page(paddr)                       do { } while (0)
121
#define sh64_dcache_purge_virt_page(mm, eaddr)                  do { } while (0)
122
#define sh64_dcache_purge_kernel_range(start, end)              do { } while (0)
123
#define sh64_dcache_wback_current_user_range(start, end)        do { } while (0)
124
#endif
125
 
126
/*##########################################################################*/
127
 
128
/* From here onwards, a rewrite of the implementation,
129
   by Richard.Curnow@superh.com.
130
 
131
   The major changes in this compared to the old version are;
132
   1. use more selective purging through OCBP instead of using ALLOCO to purge
133
      by natural replacement.  This avoids purging out unrelated cache lines
134
      that happen to be in the same set.
135
   2. exploit the APIs copy_user_page and clear_user_page better
136
   3. be more selective about I-cache purging, in particular use invalidate_all
137
      more sparingly.
138
 
139
   */
140
 
141
/*##########################################################################
142
                               SUPPORT FUNCTIONS
143
  ##########################################################################*/
144
 
145
/****************************************************************************/
146
/* The following group of functions deal with mapping and unmapping a temporary
147
   page into the DTLB slot that have been set aside for our exclusive use. */
148
/* In order to accomplish this, we use the generic interface for adding and
149
   removing a wired slot entry as defined in arch/sh64/mm/tlb.c */
150
/****************************************************************************/
151
 
152
static unsigned long slot_own_flags;
153
 
154
static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr)
155
{
156
        local_irq_save(slot_own_flags);
157
        sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
158
}
159
 
160
static inline void sh64_teardown_dtlb_cache_slot(void)
161
{
162
        sh64_teardown_tlb_slot(dtlb_cache_slot);
163
        local_irq_restore(slot_own_flags);
164
}
165
 
166
/****************************************************************************/
167
 
168
#ifndef CONFIG_ICACHE_DISABLED
169
 
170
static void __inline__ sh64_icache_inv_all(void)
171
{
172
        unsigned long long addr, flag, data;
173
        unsigned int flags;
174
 
175
        addr=ICCR0;
176
        flag=ICCR0_ICI;
177
        data=0;
178
 
179
        /* Make this a critical section for safety (probably not strictly necessary.) */
180
        local_irq_save(flags);
181
 
182
        /* Without %1 it gets unexplicably wrong */
183
        asm volatile("getcfg    %3, 0, %0\n\t"
184
                        "or     %0, %2, %0\n\t"
185
                        "putcfg %3, 0, %0\n\t"
186
                        "synci"
187
                        : "=&r" (data)
188
                        : "0" (data), "r" (flag), "r" (addr));
189
 
190
        local_irq_restore(flags);
191
}
192
 
193
static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
194
{
195
        /* Invalidate range of addresses [start,end] from the I-cache, where
196
         * the addresses lie in the kernel superpage. */
197
 
198
        unsigned long long ullend, addr, aligned_start;
199
#if (NEFF == 32)
200
        aligned_start = (unsigned long long)(signed long long)(signed long) start;
201
#else
202
#error "NEFF != 32"
203
#endif
204
        aligned_start &= L1_CACHE_ALIGN_MASK;
205
        addr = aligned_start;
206
#if (NEFF == 32)
207
        ullend = (unsigned long long) (signed long long) (signed long) end;
208
#else
209
#error "NEFF != 32"
210
#endif
211
        while (addr <= ullend) {
212
                asm __volatile__ ("icbi %0, 0" : : "r" (addr));
213
                addr += L1_CACHE_BYTES;
214
        }
215
}
216
 
217
static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr)
218
{
219
        /* If we get called, we know that vma->vm_flags contains VM_EXEC.
220
           Also, eaddr is page-aligned. */
221
 
222
        unsigned long long addr, end_addr;
223
        unsigned long flags = 0;
224
        unsigned long running_asid, vma_asid;
225
        addr = eaddr;
226
        end_addr = addr + PAGE_SIZE;
227
 
228
        /* Check whether we can use the current ASID for the I-cache
229
           invalidation.  For example, if we're called via
230
           access_process_vm->flush_cache_page->here, (e.g. when reading from
231
           /proc), 'running_asid' will be that of the reader, not of the
232
           victim.
233
 
234
           Also, note the risk that we might get pre-empted between the ASID
235
           compare and blocking IRQs, and before we regain control, the
236
           pid->ASID mapping changes.  However, the whole cache will get
237
           invalidated when the mapping is renewed, so the worst that can
238
           happen is that the loop below ends up invalidating somebody else's
239
           cache entries.
240
        */
241
 
242
        running_asid = get_asid();
243
        vma_asid = (vma->vm_mm->context & MMU_CONTEXT_ASID_MASK);
244
        if (running_asid != vma_asid) {
245
                local_irq_save(flags);
246
                switch_and_save_asid(vma_asid);
247
        }
248
        while (addr < end_addr) {
249
                /* Worth unrolling a little */
250
                asm __volatile__("icbi %0,  0" : : "r" (addr));
251
                asm __volatile__("icbi %0, 32" : : "r" (addr));
252
                asm __volatile__("icbi %0, 64" : : "r" (addr));
253
                asm __volatile__("icbi %0, 96" : : "r" (addr));
254
                addr += 128;
255
        }
256
        if (running_asid != vma_asid) {
257
                switch_and_save_asid(running_asid);
258
                local_irq_restore(flags);
259
        }
260
}
261
 
262
/****************************************************************************/
263
 
264
static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
265
                          unsigned long start, unsigned long end)
266
{
267
        /* Used for invalidating big chunks of I-cache, i.e. assume the range
268
           is whole pages.  If 'start' or 'end' is not page aligned, the code
269
           is conservative and invalidates to the ends of the enclosing pages.
270
           This is functionally OK, just a performance loss. */
271
 
272
        /* See the comments below in sh64_dcache_purge_user_range() regarding
273
           the choice of algorithm.  However, for the I-cache option (2) isn't
274
           available because there are no physical tags so aliases can't be
275
           resolved.  The icbi instruction has to be used through the user
276
           mapping.   Because icbi is cheaper than ocbp on a cache hit, it
277
           would be cheaper to use the selective code for a large range than is
278
           possible with the D-cache.  Just assume 64 for now as a working
279
           figure.
280
           */
281
 
282
        int n_pages;
283
 
284
        if (!mm) return;
285
 
286
        n_pages = ((end - start) >> PAGE_SHIFT);
287
        if (n_pages >= 64) {
288
                sh64_icache_inv_all();
289
        } else {
290
                unsigned long aligned_start;
291
                unsigned long eaddr;
292
                unsigned long after_last_page_start;
293
                unsigned long mm_asid, current_asid;
294
                unsigned long long flags = 0ULL;
295
 
296
                mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
297
                current_asid = get_asid();
298
 
299
                if (mm_asid != current_asid) {
300
                        /* Switch ASID and run the invalidate loop under cli */
301
                        local_irq_save(flags);
302
                        switch_and_save_asid(mm_asid);
303
                }
304
 
305
                aligned_start = start & PAGE_MASK;
306
                after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK);
307
 
308
                while (aligned_start < after_last_page_start) {
309
                        struct vm_area_struct *vma;
310
                        unsigned long vma_end;
311
                        vma = find_vma(mm, aligned_start);
312
                        if (!vma || (aligned_start <= vma->vm_end)) {
313
                                /* Avoid getting stuck in an error condition */
314
                                aligned_start += PAGE_SIZE;
315
                                continue;
316
                        }
317
                        vma_end = vma->vm_end;
318
                        if (vma->vm_flags & VM_EXEC) {
319
                                /* Executable */
320
                                eaddr = aligned_start;
321
                                while (eaddr < vma_end) {
322
                                        sh64_icache_inv_user_page(vma, eaddr);
323
                                        eaddr += PAGE_SIZE;
324
                                }
325
                        }
326
                        aligned_start = vma->vm_end; /* Skip to start of next region */
327
                }
328
                if (mm_asid != current_asid) {
329
                        switch_and_save_asid(current_asid);
330
                        local_irq_restore(flags);
331
                }
332
        }
333
}
334
 
335
static void sh64_icache_inv_user_small_range(struct mm_struct *mm,
336
                                                unsigned long start, int len)
337
{
338
 
339
        /* Invalidate a small range of user context I-cache, not necessarily
340
           page (or even cache-line) aligned. */
341
 
342
        unsigned long long eaddr = start;
343
        unsigned long long eaddr_end = start + len;
344
        unsigned long current_asid, mm_asid;
345
        unsigned long long flags;
346
        unsigned long long epage_start;
347
 
348
        /* Since this is used inside ptrace, the ASID in the mm context
349
           typically won't match current_asid.  We'll have to switch ASID to do
350
           this.  For safety, and given that the range will be small, do all
351
           this under cli.
352
 
353
           Note, there is a hazard that the ASID in mm->context is no longer
354
           actually associated with mm, i.e. if the mm->context has started a
355
           new cycle since mm was last active.  However, this is just a
356
           performance issue: all that happens is that we invalidate lines
357
           belonging to another mm, so the owning process has to refill them
358
           when that mm goes live again.  mm itself can't have any cache
359
           entries because there will have been a flush_cache_all when the new
360
           mm->context cycle started. */
361
 
362
        /* Align to start of cache line.  Otherwise, suppose len==8 and start
363
           was at 32N+28 : the last 4 bytes wouldn't get invalidated. */
364
        eaddr = start & L1_CACHE_ALIGN_MASK;
365
        eaddr_end = start + len;
366
 
367
        local_irq_save(flags);
368
        mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
369
        current_asid = switch_and_save_asid(mm_asid);
370
 
371
        epage_start = eaddr & PAGE_MASK;
372
 
373
        while (eaddr < eaddr_end)
374
        {
375
                asm __volatile__("icbi %0, 0" : : "r" (eaddr));
376
                eaddr += L1_CACHE_BYTES;
377
        }
378
        switch_and_save_asid(current_asid);
379
        local_irq_restore(flags);
380
}
381
 
382
static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
383
{
384
        /* The icbi instruction never raises ITLBMISS.  i.e. if there's not a
385
           cache hit on the virtual tag the instruction ends there, without a
386
           TLB lookup. */
387
 
388
        unsigned long long aligned_start;
389
        unsigned long long ull_end;
390
        unsigned long long addr;
391
 
392
        ull_end = end;
393
 
394
        /* Just invalidate over the range using the natural addresses.  TLB
395
           miss handling will be OK (TBC).  Since it's for the current process,
396
           either we're already in the right ASID context, or the ASIDs have
397
           been recycled since we were last active in which case we might just
398
           invalidate another processes I-cache entries : no worries, just a
399
           performance drop for him. */
400
        aligned_start = start & L1_CACHE_ALIGN_MASK;
401
        addr = aligned_start;
402
        while (addr < ull_end) {
403
                asm __volatile__ ("icbi %0, 0" : : "r" (addr));
404
                asm __volatile__ ("nop");
405
                asm __volatile__ ("nop");
406
                addr += L1_CACHE_BYTES;
407
        }
408
}
409
 
410
#endif /* !CONFIG_ICACHE_DISABLED */
411
 
412
/****************************************************************************/
413
 
414
#ifndef CONFIG_DCACHE_DISABLED
415
 
416
/* Buffer used as the target of alloco instructions to purge data from cache
417
   sets by natural eviction. -- RPC */
418
#define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4)
419
static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
420
 
421
/****************************************************************************/
422
 
423
static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
424
{
425
        /* Purge all ways in a particular block of sets, specified by the base
426
           set number and number of sets.  Can handle wrap-around, if that's
427
           needed.  */
428
 
429
        int dummy_buffer_base_set;
430
        unsigned long long eaddr, eaddr0, eaddr1;
431
        int j;
432
        int set_offset;
433
 
434
        dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift;
435
        set_offset = sets_to_purge_base - dummy_buffer_base_set;
436
 
437
        for (j=0; j<n_sets; j++, set_offset++) {
438
                set_offset &= (cpu_data->dcache.sets - 1);
439
                eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift);
440
 
441
                /* Do one alloco which hits the required set per cache way.  For
442
                   write-back mode, this will purge the #ways resident lines.   There's
443
                   little point unrolling this loop because the allocos stall more if
444
                   they're too close together. */
445
                eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
446
                for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
447
                        asm __volatile__ ("alloco %0, 0" : : "r" (eaddr));
448
                        asm __volatile__ ("synco"); /* TAKum03020 */
449
                }
450
 
451
                eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
452
                for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
453
                        /* Load from each address.  Required because alloco is a NOP if
454
                           the cache is write-through.  Write-through is a config option. */
455
                        if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
456
                                *(volatile unsigned char *)(int)eaddr;
457
                }
458
        }
459
 
460
        /* Don't use OCBI to invalidate the lines.  That costs cycles directly.
461
           If the dummy block is just left resident, it will naturally get
462
           evicted as required.  */
463
 
464
        return;
465
}
466
 
467
/****************************************************************************/
468
 
469
static void sh64_dcache_purge_all(void)
470
{
471
        /* Purge the entire contents of the dcache.  The most efficient way to
472
           achieve this is to use alloco instructions on a region of unused
473
           memory equal in size to the cache, thereby causing the current
474
           contents to be discarded by natural eviction.  The alternative,
475
           namely reading every tag, setting up a mapping for the corresponding
476
           page and doing an OCBP for the line, would be much more expensive.
477
           */
478
 
479
        sh64_dcache_purge_sets(0, cpu_data->dcache.sets);
480
 
481
        return;
482
 
483
}
484
 
485
/****************************************************************************/
486
 
487
static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end)
488
{
489
        /* Purge the range of addresses [start,end] from the D-cache.  The
490
           addresses lie in the superpage mapping.  There's no harm if we
491
           overpurge at either end - just a small performance loss. */
492
        unsigned long long ullend, addr, aligned_start;
493
#if (NEFF == 32)
494
        aligned_start = (unsigned long long)(signed long long)(signed long) start;
495
#else
496
#error "NEFF != 32"
497
#endif
498
        aligned_start &= L1_CACHE_ALIGN_MASK;
499
        addr = aligned_start;
500
#if (NEFF == 32)
501
        ullend = (unsigned long long) (signed long long) (signed long) end;
502
#else
503
#error "NEFF != 32"
504
#endif
505
        while (addr <= ullend) {
506
                asm __volatile__ ("ocbp %0, 0" : : "r" (addr));
507
                addr += L1_CACHE_BYTES;
508
        }
509
        return;
510
}
511
 
512
/* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for
513
   anything else in the kernel */
514
#define MAGIC_PAGE0_START 0xffffffffec000000ULL
515
 
516
static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr)
517
{
518
        /* Purge the physical page 'paddr' from the cache.  It's known that any
519
           cache lines requiring attention have the same page colour as the the
520
           address 'eaddr'.
521
 
522
           This relies on the fact that the D-cache matches on physical tags
523
           when no virtual tag matches.  So we create an alias for the original
524
           page and purge through that.  (Alternatively, we could have done
525
           this by switching ASID to match the original mapping and purged
526
           through that, but that involves ASID switching cost + probably a
527
           TLBMISS + refill anyway.)
528
           */
529
 
530
        unsigned long long magic_page_start;
531
        unsigned long long magic_eaddr, magic_eaddr_end;
532
 
533
        magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK);
534
 
535
        /* As long as the kernel is not pre-emptible, this doesn't need to be
536
           under cli/sti. */
537
 
538
        sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr);
539
 
540
        magic_eaddr = magic_page_start;
541
        magic_eaddr_end = magic_eaddr + PAGE_SIZE;
542
        while (magic_eaddr < magic_eaddr_end) {
543
                /* Little point in unrolling this loop - the OCBPs are blocking
544
                   and won't go any quicker (i.e. the loop overhead is parallel
545
                   to part of the OCBP execution.) */
546
                asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr));
547
                magic_eaddr += L1_CACHE_BYTES;
548
        }
549
 
550
        sh64_teardown_dtlb_cache_slot();
551
}
552
 
553
/****************************************************************************/
554
 
555
static void sh64_dcache_purge_phy_page(unsigned long paddr)
556
{
557
        /* Pure a page given its physical start address, by creating a
558
           temporary 1 page mapping and purging across that.  Even if we know
559
           the virtual address (& vma or mm) of the page, the method here is
560
           more elegant because it avoids issues of coping with page faults on
561
           the purge instructions (i.e. no special-case code required in the
562
           critical path in the TLB miss handling). */
563
 
564
        unsigned long long eaddr_start, eaddr, eaddr_end;
565
        int i;
566
 
567
        /* As long as the kernel is not pre-emptible, this doesn't need to be
568
           under cli/sti. */
569
 
570
        eaddr_start = MAGIC_PAGE0_START;
571
        for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) {
572
                sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr);
573
 
574
                eaddr = eaddr_start;
575
                eaddr_end = eaddr + PAGE_SIZE;
576
                while (eaddr < eaddr_end) {
577
                        asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr));
578
                        eaddr += L1_CACHE_BYTES;
579
                }
580
 
581
                sh64_teardown_dtlb_cache_slot();
582
                eaddr_start += PAGE_SIZE;
583
        }
584
}
585
 
586
static void sh64_dcache_purge_user_pages(struct mm_struct *mm,
587
                                unsigned long addr, unsigned long end)
588
{
589
        pgd_t *pgd;
590
        pmd_t *pmd;
591
        pte_t *pte;
592
        pte_t entry;
593
        spinlock_t *ptl;
594
        unsigned long paddr;
595
 
596
        if (!mm)
597
                return; /* No way to find physical address of page */
598
 
599
        pgd = pgd_offset(mm, addr);
600
        if (pgd_bad(*pgd))
601
                return;
602
 
603
        pmd = pmd_offset(pgd, addr);
604
        if (pmd_none(*pmd) || pmd_bad(*pmd))
605
                return;
606
 
607
        pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
608
        do {
609
                entry = *pte;
610
                if (pte_none(entry) || !pte_present(entry))
611
                        continue;
612
                paddr = pte_val(entry) & PAGE_MASK;
613
                sh64_dcache_purge_coloured_phy_page(paddr, addr);
614
        } while (pte++, addr += PAGE_SIZE, addr != end);
615
        pte_unmap_unlock(pte - 1, ptl);
616
}
617
/****************************************************************************/
618
 
619
static void sh64_dcache_purge_user_range(struct mm_struct *mm,
620
                          unsigned long start, unsigned long end)
621
{
622
        /* There are at least 5 choices for the implementation of this, with
623
           pros (+), cons(-), comments(*):
624
 
625
           1. ocbp each line in the range through the original user's ASID
626
              + no lines spuriously evicted
627
              - tlbmiss handling (must either handle faults on demand => extra
628
                special-case code in tlbmiss critical path), or map the page in
629
                advance (=> flush_tlb_range in advance to avoid multiple hits)
630
              - ASID switching
631
              - expensive for large ranges
632
 
633
           2. temporarily map each page in the range to a special effective
634
              address and ocbp through the temporary mapping; relies on the
635
              fact that SH-5 OCB* always do TLB lookup and match on ptags (they
636
              never look at the etags)
637
              + no spurious evictions
638
              - expensive for large ranges
639
              * surely cheaper than (1)
640
 
641
           3. walk all the lines in the cache, check the tags, if a match
642
              occurs create a page mapping to ocbp the line through
643
              + no spurious evictions
644
              - tag inspection overhead
645
              - (especially for small ranges)
646
              - potential cost of setting up/tearing down page mapping for
647
                every line that matches the range
648
              * cost partly independent of range size
649
 
650
           4. walk all the lines in the cache, check the tags, if a match
651
              occurs use 4 * alloco to purge the line (+3 other probably
652
              innocent victims) by natural eviction
653
              + no tlb mapping overheads
654
              - spurious evictions
655
              - tag inspection overhead
656
 
657
           5. implement like flush_cache_all
658
              + no tag inspection overhead
659
              - spurious evictions
660
              - bad for small ranges
661
 
662
           (1) can be ruled out as more expensive than (2).  (2) appears best
663
           for small ranges.  The choice between (3), (4) and (5) for large
664
           ranges and the range size for the large/small boundary need
665
           benchmarking to determine.
666
 
667
           For now use approach (2) for small ranges and (5) for large ones.
668
 
669
           */
670
 
671
        int n_pages;
672
 
673
        n_pages = ((end - start) >> PAGE_SHIFT);
674
        if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) {
675
#if 1
676
                sh64_dcache_purge_all();
677
#else
678
                unsigned long long set, way;
679
                unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
680
                for (set = 0; set < cpu_data->dcache.sets; set++) {
681
                        unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift);
682
                        for (way = 0; way < cpu_data->dcache.ways; way++) {
683
                                unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift);
684
                                unsigned long long tag0;
685
                                unsigned long line_valid;
686
 
687
                                asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr));
688
                                line_valid = tag0 & SH_CACHE_VALID;
689
                                if (line_valid) {
690
                                        unsigned long cache_asid;
691
                                        unsigned long epn;
692
 
693
                                        cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift;
694
                                        /* The next line needs some
695
                                           explanation.  The virtual tags
696
                                           encode bits [31:13] of the virtual
697
                                           address, bit [12] of the 'tag' being
698
                                           implied by the cache set index. */
699
                                        epn = (tag0 & cpu_data->dcache.epn_mask) | ((set & 0x80) << cpu_data->dcache.entry_shift);
700
 
701
                                        if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) {
702
                                                /* TODO : could optimise this
703
                                                   call by batching multiple
704
                                                   adjacent sets together. */
705
                                                sh64_dcache_purge_sets(set, 1);
706
                                                break; /* Don't waste time inspecting other ways for this set */
707
                                        }
708
                                }
709
                        }
710
                }
711
#endif
712
        } else {
713
                /* Small range, covered by a single page table page */
714
                start &= PAGE_MASK;     /* should already be so */
715
                end = PAGE_ALIGN(end);  /* should already be so */
716
                sh64_dcache_purge_user_pages(mm, start, end);
717
        }
718
        return;
719
}
720
 
721
static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end)
722
{
723
        unsigned long long aligned_start;
724
        unsigned long long ull_end;
725
        unsigned long long addr;
726
 
727
        ull_end = end;
728
 
729
        /* Just wback over the range using the natural addresses.  TLB miss
730
           handling will be OK (TBC) : the range has just been written to by
731
           the signal frame setup code, so the PTEs must exist.
732
 
733
           Note, if we have CONFIG_PREEMPT and get preempted inside this loop,
734
           it doesn't matter, even if the pid->ASID mapping changes whilst
735
           we're away.  In that case the cache will have been flushed when the
736
           mapping was renewed.  So the writebacks below will be nugatory (and
737
           we'll doubtless have to fault the TLB entry/ies in again with the
738
           new ASID), but it's a rare case.
739
           */
740
        aligned_start = start & L1_CACHE_ALIGN_MASK;
741
        addr = aligned_start;
742
        while (addr < ull_end) {
743
                asm __volatile__ ("ocbwb %0, 0" : : "r" (addr));
744
                addr += L1_CACHE_BYTES;
745
        }
746
}
747
 
748
/****************************************************************************/
749
 
750
/* These *MUST* lie in an area of virtual address space that's otherwise unused. */
751
#define UNIQUE_EADDR_START 0xe0000000UL
752
#define UNIQUE_EADDR_END   0xe8000000UL
753
 
754
static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr)
755
{
756
        /* Given a physical address paddr, and a user virtual address
757
           user_eaddr which will eventually be mapped to it, create a one-off
758
           kernel-private eaddr mapped to the same paddr.  This is used for
759
           creating special destination pages for copy_user_page and
760
           clear_user_page */
761
 
762
        static unsigned long current_pointer = UNIQUE_EADDR_START;
763
        unsigned long coloured_pointer;
764
 
765
        if (current_pointer == UNIQUE_EADDR_END) {
766
                sh64_dcache_purge_all();
767
                current_pointer = UNIQUE_EADDR_START;
768
        }
769
 
770
        coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | (user_eaddr & CACHE_OC_SYN_MASK);
771
        sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr);
772
 
773
        current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS);
774
 
775
        return coloured_pointer;
776
}
777
 
778
/****************************************************************************/
779
 
780
static void sh64_copy_user_page_coloured(void *to, void *from, unsigned long address)
781
{
782
        void *coloured_to;
783
 
784
        /* Discard any existing cache entries of the wrong colour.  These are
785
           present quite often, if the kernel has recently used the page
786
           internally, then given it up, then it's been allocated to the user.
787
           */
788
        sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
789
 
790
        coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
791
        sh64_page_copy(from, coloured_to);
792
 
793
        sh64_teardown_dtlb_cache_slot();
794
}
795
 
796
static void sh64_clear_user_page_coloured(void *to, unsigned long address)
797
{
798
        void *coloured_to;
799
 
800
        /* Discard any existing kernel-originated lines of the wrong colour (as
801
           above) */
802
        sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
803
 
804
        coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
805
        sh64_page_clear(coloured_to);
806
 
807
        sh64_teardown_dtlb_cache_slot();
808
}
809
 
810
#endif /* !CONFIG_DCACHE_DISABLED */
811
 
812
/****************************************************************************/
813
 
814
/*##########################################################################
815
                            EXTERNALLY CALLABLE API.
816
  ##########################################################################*/
817
 
818
/* These functions are described in Documentation/cachetlb.txt.
819
   Each one of these functions varies in behaviour depending on whether the
820
   I-cache and/or D-cache are configured out.
821
 
822
   Note that the Linux term 'flush' corresponds to what is termed 'purge' in
823
   the sh/sh64 jargon for the D-cache, i.e. write back dirty data then
824
   invalidate the cache lines, and 'invalidate' for the I-cache.
825
   */
826
 
827
#undef FLUSH_TRACE
828
 
829
void flush_cache_all(void)
830
{
831
        /* Invalidate the entire contents of both caches, after writing back to
832
           memory any dirty data from the D-cache. */
833
        sh64_dcache_purge_all();
834
        sh64_icache_inv_all();
835
}
836
 
837
/****************************************************************************/
838
 
839
void flush_cache_mm(struct mm_struct *mm)
840
{
841
        /* Invalidate an entire user-address space from both caches, after
842
           writing back dirty data (e.g. for shared mmap etc). */
843
 
844
        /* This could be coded selectively by inspecting all the tags then
845
           doing 4*alloco on any set containing a match (as for
846
           flush_cache_range), but fork/exit/execve (where this is called from)
847
           are expensive anyway. */
848
 
849
        /* Have to do a purge here, despite the comments re I-cache below.
850
           There could be odd-coloured dirty data associated with the mm still
851
           in the cache - if this gets written out through natural eviction
852
           after the kernel has reused the page there will be chaos.
853
           */
854
 
855
        sh64_dcache_purge_all();
856
 
857
        /* The mm being torn down won't ever be active again, so any Icache
858
           lines tagged with its ASID won't be visible for the rest of the
859
           lifetime of this ASID cycle.  Before the ASID gets reused, there
860
           will be a flush_cache_all.  Hence we don't need to touch the
861
           I-cache.  This is similar to the lack of action needed in
862
           flush_tlb_mm - see fault.c. */
863
}
864
 
865
/****************************************************************************/
866
 
867
void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
868
                       unsigned long end)
869
{
870
        struct mm_struct *mm = vma->vm_mm;
871
 
872
        /* Invalidate (from both caches) the range [start,end) of virtual
873
           addresses from the user address space specified by mm, after writing
874
           back any dirty data.
875
 
876
           Note, 'end' is 1 byte beyond the end of the range to flush. */
877
 
878
        sh64_dcache_purge_user_range(mm, start, end);
879
        sh64_icache_inv_user_page_range(mm, start, end);
880
}
881
 
882
/****************************************************************************/
883
 
884
void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned long pfn)
885
{
886
        /* Invalidate any entries in either cache for the vma within the user
887
           address space vma->vm_mm for the page starting at virtual address
888
           'eaddr'.   This seems to be used primarily in breaking COW.  Note,
889
           the I-cache must be searched too in case the page in question is
890
           both writable and being executed from (e.g. stack trampolines.)
891
 
892
           Note, this is called with pte lock held.
893
           */
894
 
895
        sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
896
 
897
        if (vma->vm_flags & VM_EXEC) {
898
                sh64_icache_inv_user_page(vma, eaddr);
899
        }
900
}
901
 
902
/****************************************************************************/
903
 
904
#ifndef CONFIG_DCACHE_DISABLED
905
 
906
void copy_user_page(void *to, void *from, unsigned long address, struct page *page)
907
{
908
        /* 'from' and 'to' are kernel virtual addresses (within the superpage
909
           mapping of the physical RAM).  'address' is the user virtual address
910
           where the copy 'to' will be mapped after.  This allows a custom
911
           mapping to be used to ensure that the new copy is placed in the
912
           right cache sets for the user to see it without having to bounce it
913
           out via memory.  Note however : the call to flush_page_to_ram in
914
           (generic)/mm/memory.c:(break_cow) undoes all this good work in that one
915
           very important case!
916
 
917
           TBD : can we guarantee that on every call, any cache entries for
918
           'from' are in the same colour sets as 'address' also?  i.e. is this
919
           always used just to deal with COW?  (I suspect not). */
920
 
921
        /* There are two possibilities here for when the page 'from' was last accessed:
922
           * by the kernel : this is OK, no purge required.
923
           * by the/a user (e.g. for break_COW) : need to purge.
924
 
925
           If the potential user mapping at 'address' is the same colour as
926
           'from' there is no need to purge any cache lines from the 'from'
927
           page mapped into cache sets of colour 'address'.  (The copy will be
928
           accessing the page through 'from').
929
           */
930
 
931
        if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) {
932
                sh64_dcache_purge_coloured_phy_page(__pa(from), address);
933
        }
934
 
935
        if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
936
                /* No synonym problem on destination */
937
                sh64_page_copy(from, to);
938
        } else {
939
                sh64_copy_user_page_coloured(to, from, address);
940
        }
941
 
942
        /* Note, don't need to flush 'from' page from the cache again - it's
943
           done anyway by the generic code */
944
}
945
 
946
void clear_user_page(void *to, unsigned long address, struct page *page)
947
{
948
        /* 'to' is a kernel virtual address (within the superpage
949
           mapping of the physical RAM).  'address' is the user virtual address
950
           where the 'to' page will be mapped after.  This allows a custom
951
           mapping to be used to ensure that the new copy is placed in the
952
           right cache sets for the user to see it without having to bounce it
953
           out via memory.
954
        */
955
 
956
        if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
957
                /* No synonym problem on destination */
958
                sh64_page_clear(to);
959
        } else {
960
                sh64_clear_user_page_coloured(to, address);
961
        }
962
}
963
 
964
#endif /* !CONFIG_DCACHE_DISABLED */
965
 
966
/****************************************************************************/
967
 
968
void flush_dcache_page(struct page *page)
969
{
970
        sh64_dcache_purge_phy_page(page_to_phys(page));
971
        wmb();
972
}
973
 
974
/****************************************************************************/
975
 
976
void flush_icache_range(unsigned long start, unsigned long end)
977
{
978
        /* Flush the range [start,end] of kernel virtual adddress space from
979
           the I-cache.  The corresponding range must be purged from the
980
           D-cache also because the SH-5 doesn't have cache snooping between
981
           the caches.  The addresses will be visible through the superpage
982
           mapping, therefore it's guaranteed that there no cache entries for
983
           the range in cache sets of the wrong colour.
984
 
985
           Primarily used for cohering the I-cache after a module has
986
           been loaded.  */
987
 
988
        /* We also make sure to purge the same range from the D-cache since
989
           flush_page_to_ram() won't be doing this for us! */
990
 
991
        sh64_dcache_purge_kernel_range(start, end);
992
        wmb();
993
        sh64_icache_inv_kernel_range(start, end);
994
}
995
 
996
/****************************************************************************/
997
 
998
void flush_icache_user_range(struct vm_area_struct *vma,
999
                        struct page *page, unsigned long addr, int len)
1000
{
1001
        /* Flush the range of user (defined by vma->vm_mm) address space
1002
           starting at 'addr' for 'len' bytes from the cache.  The range does
1003
           not straddle a page boundary, the unique physical page containing
1004
           the range is 'page'.  This seems to be used mainly for invalidating
1005
           an address range following a poke into the program text through the
1006
           ptrace() call from another process (e.g. for BRK instruction
1007
           insertion). */
1008
 
1009
        sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr);
1010
        mb();
1011
 
1012
        if (vma->vm_flags & VM_EXEC) {
1013
                sh64_icache_inv_user_small_range(vma->vm_mm, addr, len);
1014
        }
1015
}
1016
 
1017
/*##########################################################################
1018
                        ARCH/SH64 PRIVATE CALLABLE API.
1019
  ##########################################################################*/
1020
 
1021
void flush_cache_sigtramp(unsigned long start, unsigned long end)
1022
{
1023
        /* For the address range [start,end), write back the data from the
1024
           D-cache and invalidate the corresponding region of the I-cache for
1025
           the current process.  Used to flush signal trampolines on the stack
1026
           to make them executable. */
1027
 
1028
        sh64_dcache_wback_current_user_range(start, end);
1029
        wmb();
1030
        sh64_icache_inv_current_user_range(start, end);
1031
}
1032
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.