OpenCores
URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [arch/] [x86/] [mm/] [fault_64.c] - Blame information for rev 17

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 xianfeng
/*
2
 *  linux/arch/x86-64/mm/fault.c
3
 *
4
 *  Copyright (C) 1995  Linus Torvalds
5
 *  Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
6
 */
7
 
8
#include <linux/signal.h>
9
#include <linux/sched.h>
10
#include <linux/kernel.h>
11
#include <linux/errno.h>
12
#include <linux/string.h>
13
#include <linux/types.h>
14
#include <linux/ptrace.h>
15
#include <linux/mman.h>
16
#include <linux/mm.h>
17
#include <linux/smp.h>
18
#include <linux/interrupt.h>
19
#include <linux/init.h>
20
#include <linux/tty.h>
21
#include <linux/vt_kern.h>              /* For unblank_screen() */
22
#include <linux/compiler.h>
23
#include <linux/vmalloc.h>
24
#include <linux/module.h>
25
#include <linux/kprobes.h>
26
#include <linux/uaccess.h>
27
#include <linux/kdebug.h>
28
#include <linux/kprobes.h>
29
 
30
#include <asm/system.h>
31
#include <asm/pgalloc.h>
32
#include <asm/smp.h>
33
#include <asm/tlbflush.h>
34
#include <asm/proto.h>
35
#include <asm-generic/sections.h>
36
 
37
/* Page fault error code bits */
38
#define PF_PROT (1<<0)          /* or no page found */
39
#define PF_WRITE        (1<<1)
40
#define PF_USER (1<<2)
41
#define PF_RSVD (1<<3)
42
#define PF_INSTR        (1<<4)
43
 
44
#ifdef CONFIG_KPROBES
45
static inline int notify_page_fault(struct pt_regs *regs)
46
{
47
        int ret = 0;
48
 
49
        /* kprobe_running() needs smp_processor_id() */
50
        if (!user_mode(regs)) {
51
                preempt_disable();
52
                if (kprobe_running() && kprobe_fault_handler(regs, 14))
53
                        ret = 1;
54
                preempt_enable();
55
        }
56
 
57
        return ret;
58
}
59
#else
60
static inline int notify_page_fault(struct pt_regs *regs)
61
{
62
        return 0;
63
}
64
#endif
65
 
66
/* Sometimes the CPU reports invalid exceptions on prefetch.
67
   Check that here and ignore.
68
   Opcode checker based on code by Richard Brunner */
69
static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
70
                                unsigned long error_code)
71
{
72
        unsigned char *instr;
73
        int scan_more = 1;
74
        int prefetch = 0;
75
        unsigned char *max_instr;
76
 
77
        /* If it was a exec fault ignore */
78
        if (error_code & PF_INSTR)
79
                return 0;
80
 
81
        instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
82
        max_instr = instr + 15;
83
 
84
        if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
85
                return 0;
86
 
87
        while (scan_more && instr < max_instr) {
88
                unsigned char opcode;
89
                unsigned char instr_hi;
90
                unsigned char instr_lo;
91
 
92
                if (probe_kernel_address(instr, opcode))
93
                        break;
94
 
95
                instr_hi = opcode & 0xf0;
96
                instr_lo = opcode & 0x0f;
97
                instr++;
98
 
99
                switch (instr_hi) {
100
                case 0x20:
101
                case 0x30:
102
                        /* Values 0x26,0x2E,0x36,0x3E are valid x86
103
                           prefixes.  In long mode, the CPU will signal
104
                           invalid opcode if some of these prefixes are
105
                           present so we will never get here anyway */
106
                        scan_more = ((instr_lo & 7) == 0x6);
107
                        break;
108
 
109
                case 0x40:
110
                        /* In AMD64 long mode, 0x40 to 0x4F are valid REX prefixes
111
                           Need to figure out under what instruction mode the
112
                           instruction was issued ... */
113
                        /* Could check the LDT for lm, but for now it's good
114
                           enough to assume that long mode only uses well known
115
                           segments or kernel. */
116
                        scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
117
                        break;
118
 
119
                case 0x60:
120
                        /* 0x64 thru 0x67 are valid prefixes in all modes. */
121
                        scan_more = (instr_lo & 0xC) == 0x4;
122
                        break;
123
                case 0xF0:
124
                        /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */
125
                        scan_more = !instr_lo || (instr_lo>>1) == 1;
126
                        break;
127
                case 0x00:
128
                        /* Prefetch instruction is 0x0F0D or 0x0F18 */
129
                        scan_more = 0;
130
                        if (probe_kernel_address(instr, opcode))
131
                                break;
132
                        prefetch = (instr_lo == 0xF) &&
133
                                (opcode == 0x0D || opcode == 0x18);
134
                        break;
135
                default:
136
                        scan_more = 0;
137
                        break;
138
                }
139
        }
140
        return prefetch;
141
}
142
 
143
static int bad_address(void *p)
144
{
145
        unsigned long dummy;
146
        return probe_kernel_address((unsigned long *)p, dummy);
147
}
148
 
149
void dump_pagetable(unsigned long address)
150
{
151
        pgd_t *pgd;
152
        pud_t *pud;
153
        pmd_t *pmd;
154
        pte_t *pte;
155
 
156
        pgd = (pgd_t *)read_cr3();
157
 
158
        pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
159
        pgd += pgd_index(address);
160
        if (bad_address(pgd)) goto bad;
161
        printk("PGD %lx ", pgd_val(*pgd));
162
        if (!pgd_present(*pgd)) goto ret;
163
 
164
        pud = pud_offset(pgd, address);
165
        if (bad_address(pud)) goto bad;
166
        printk("PUD %lx ", pud_val(*pud));
167
        if (!pud_present(*pud)) goto ret;
168
 
169
        pmd = pmd_offset(pud, address);
170
        if (bad_address(pmd)) goto bad;
171
        printk("PMD %lx ", pmd_val(*pmd));
172
        if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
173
 
174
        pte = pte_offset_kernel(pmd, address);
175
        if (bad_address(pte)) goto bad;
176
        printk("PTE %lx", pte_val(*pte));
177
ret:
178
        printk("\n");
179
        return;
180
bad:
181
        printk("BAD\n");
182
}
183
 
184
static const char errata93_warning[] =
185
KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
186
KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
187
KERN_ERR "******* Please consider a BIOS update.\n"
188
KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
189
 
190
/* Workaround for K8 erratum #93 & buggy BIOS.
191
   BIOS SMM functions are required to use a specific workaround
192
   to avoid corruption of the 64bit RIP register on C stepping K8.
193
   A lot of BIOS that didn't get tested properly miss this.
194
   The OS sees this as a page fault with the upper 32bits of RIP cleared.
195
   Try to work around it here.
196
   Note we only handle faults in kernel here. */
197
 
198
static int is_errata93(struct pt_regs *regs, unsigned long address)
199
{
200
        static int warned;
201
        if (address != regs->rip)
202
                return 0;
203
        if ((address >> 32) != 0)
204
                return 0;
205
        address |= 0xffffffffUL << 32;
206
        if ((address >= (u64)_stext && address <= (u64)_etext) ||
207
            (address >= MODULES_VADDR && address <= MODULES_END)) {
208
                if (!warned) {
209
                        printk(errata93_warning);
210
                        warned = 1;
211
                }
212
                regs->rip = address;
213
                return 1;
214
        }
215
        return 0;
216
}
217
 
218
static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
219
                                 unsigned long error_code)
220
{
221
        unsigned long flags = oops_begin();
222
        struct task_struct *tsk;
223
 
224
        printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
225
               current->comm, address);
226
        dump_pagetable(address);
227
        tsk = current;
228
        tsk->thread.cr2 = address;
229
        tsk->thread.trap_no = 14;
230
        tsk->thread.error_code = error_code;
231
        __die("Bad pagetable", regs, error_code);
232
        oops_end(flags);
233
        do_exit(SIGKILL);
234
}
235
 
236
/*
237
 * Handle a fault on the vmalloc area
238
 *
239
 * This assumes no large pages in there.
240
 */
241
static int vmalloc_fault(unsigned long address)
242
{
243
        pgd_t *pgd, *pgd_ref;
244
        pud_t *pud, *pud_ref;
245
        pmd_t *pmd, *pmd_ref;
246
        pte_t *pte, *pte_ref;
247
 
248
        /* Copy kernel mappings over when needed. This can also
249
           happen within a race in page table update. In the later
250
           case just flush. */
251
 
252
        pgd = pgd_offset(current->mm ?: &init_mm, address);
253
        pgd_ref = pgd_offset_k(address);
254
        if (pgd_none(*pgd_ref))
255
                return -1;
256
        if (pgd_none(*pgd))
257
                set_pgd(pgd, *pgd_ref);
258
        else
259
                BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
260
 
261
        /* Below here mismatches are bugs because these lower tables
262
           are shared */
263
 
264
        pud = pud_offset(pgd, address);
265
        pud_ref = pud_offset(pgd_ref, address);
266
        if (pud_none(*pud_ref))
267
                return -1;
268
        if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
269
                BUG();
270
        pmd = pmd_offset(pud, address);
271
        pmd_ref = pmd_offset(pud_ref, address);
272
        if (pmd_none(*pmd_ref))
273
                return -1;
274
        if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
275
                BUG();
276
        pte_ref = pte_offset_kernel(pmd_ref, address);
277
        if (!pte_present(*pte_ref))
278
                return -1;
279
        pte = pte_offset_kernel(pmd, address);
280
        /* Don't use pte_page here, because the mappings can point
281
           outside mem_map, and the NUMA hash lookup cannot handle
282
           that. */
283
        if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
284
                BUG();
285
        return 0;
286
}
287
 
288
int show_unhandled_signals = 1;
289
 
290
/*
291
 * This routine handles page faults.  It determines the address,
292
 * and the problem, and then passes it off to one of the appropriate
293
 * routines.
294
 */
295
asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
296
                                        unsigned long error_code)
297
{
298
        struct task_struct *tsk;
299
        struct mm_struct *mm;
300
        struct vm_area_struct * vma;
301
        unsigned long address;
302
        const struct exception_table_entry *fixup;
303
        int write, fault;
304
        unsigned long flags;
305
        siginfo_t info;
306
 
307
        /*
308
         * We can fault from pretty much anywhere, with unknown IRQ state.
309
         */
310
        trace_hardirqs_fixup();
311
 
312
        tsk = current;
313
        mm = tsk->mm;
314
        prefetchw(&mm->mmap_sem);
315
 
316
        /* get the address */
317
        address = read_cr2();
318
 
319
        info.si_code = SEGV_MAPERR;
320
 
321
 
322
        /*
323
         * We fault-in kernel-space virtual memory on-demand. The
324
         * 'reference' page table is init_mm.pgd.
325
         *
326
         * NOTE! We MUST NOT take any locks for this case. We may
327
         * be in an interrupt or a critical region, and should
328
         * only copy the information from the master page table,
329
         * nothing more.
330
         *
331
         * This verifies that the fault happens in kernel space
332
         * (error_code & 4) == 0, and that the fault was not a
333
         * protection error (error_code & 9) == 0.
334
         */
335
        if (unlikely(address >= TASK_SIZE64)) {
336
                /*
337
                 * Don't check for the module range here: its PML4
338
                 * is always initialized because it's shared with the main
339
                 * kernel text. Only vmalloc may need PML4 syncups.
340
                 */
341
                if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
342
                      ((address >= VMALLOC_START && address < VMALLOC_END))) {
343
                        if (vmalloc_fault(address) >= 0)
344
                                return;
345
                }
346
                if (notify_page_fault(regs))
347
                        return;
348
                /*
349
                 * Don't take the mm semaphore here. If we fixup a prefetch
350
                 * fault we could otherwise deadlock.
351
                 */
352
                goto bad_area_nosemaphore;
353
        }
354
 
355
        if (notify_page_fault(regs))
356
                return;
357
 
358
        if (likely(regs->eflags & X86_EFLAGS_IF))
359
                local_irq_enable();
360
 
361
        if (unlikely(error_code & PF_RSVD))
362
                pgtable_bad(address, regs, error_code);
363
 
364
        /*
365
         * If we're in an interrupt or have no user
366
         * context, we must not take the fault..
367
         */
368
        if (unlikely(in_atomic() || !mm))
369
                goto bad_area_nosemaphore;
370
 
371
        /*
372
         * User-mode registers count as a user access even for any
373
         * potential system fault or CPU buglet.
374
         */
375
        if (user_mode_vm(regs))
376
                error_code |= PF_USER;
377
 
378
 again:
379
        /* When running in the kernel we expect faults to occur only to
380
         * addresses in user space.  All other faults represent errors in the
381
         * kernel and should generate an OOPS.  Unfortunately, in the case of an
382
         * erroneous fault occurring in a code path which already holds mmap_sem
383
         * we will deadlock attempting to validate the fault against the
384
         * address space.  Luckily the kernel only validly references user
385
         * space from well defined areas of code, which are listed in the
386
         * exceptions table.
387
         *
388
         * As the vast majority of faults will be valid we will only perform
389
         * the source reference check when there is a possibility of a deadlock.
390
         * Attempt to lock the address space, if we cannot we then validate the
391
         * source.  If this is invalid we can skip the address space check,
392
         * thus avoiding the deadlock.
393
         */
394
        if (!down_read_trylock(&mm->mmap_sem)) {
395
                if ((error_code & PF_USER) == 0 &&
396
                    !search_exception_tables(regs->rip))
397
                        goto bad_area_nosemaphore;
398
                down_read(&mm->mmap_sem);
399
        }
400
 
401
        vma = find_vma(mm, address);
402
        if (!vma)
403
                goto bad_area;
404
        if (likely(vma->vm_start <= address))
405
                goto good_area;
406
        if (!(vma->vm_flags & VM_GROWSDOWN))
407
                goto bad_area;
408
        if (error_code & 4) {
409
                /* Allow userspace just enough access below the stack pointer
410
                 * to let the 'enter' instruction work.
411
                 */
412
                if (address + 65536 + 32 * sizeof(unsigned long) < regs->rsp)
413
                        goto bad_area;
414
        }
415
        if (expand_stack(vma, address))
416
                goto bad_area;
417
/*
418
 * Ok, we have a good vm_area for this memory access, so
419
 * we can handle it..
420
 */
421
good_area:
422
        info.si_code = SEGV_ACCERR;
423
        write = 0;
424
        switch (error_code & (PF_PROT|PF_WRITE)) {
425
                default:        /* 3: write, present */
426
                        /* fall through */
427
                case PF_WRITE:          /* write, not present */
428
                        if (!(vma->vm_flags & VM_WRITE))
429
                                goto bad_area;
430
                        write++;
431
                        break;
432
                case PF_PROT:           /* read, present */
433
                        goto bad_area;
434
                case 0:                  /* read, not present */
435
                        if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
436
                                goto bad_area;
437
        }
438
 
439
        /*
440
         * If for any reason at all we couldn't handle the fault,
441
         * make sure we exit gracefully rather than endlessly redo
442
         * the fault.
443
         */
444
        fault = handle_mm_fault(mm, vma, address, write);
445
        if (unlikely(fault & VM_FAULT_ERROR)) {
446
                if (fault & VM_FAULT_OOM)
447
                        goto out_of_memory;
448
                else if (fault & VM_FAULT_SIGBUS)
449
                        goto do_sigbus;
450
                BUG();
451
        }
452
        if (fault & VM_FAULT_MAJOR)
453
                tsk->maj_flt++;
454
        else
455
                tsk->min_flt++;
456
        up_read(&mm->mmap_sem);
457
        return;
458
 
459
/*
460
 * Something tried to access memory that isn't in our memory map..
461
 * Fix it, but check if it's kernel or user first..
462
 */
463
bad_area:
464
        up_read(&mm->mmap_sem);
465
 
466
bad_area_nosemaphore:
467
        /* User mode accesses just cause a SIGSEGV */
468
        if (error_code & PF_USER) {
469
 
470
                /*
471
                 * It's possible to have interrupts off here.
472
                 */
473
                local_irq_enable();
474
 
475
                if (is_prefetch(regs, address, error_code))
476
                        return;
477
 
478
                /* Work around K8 erratum #100 K8 in compat mode
479
                   occasionally jumps to illegal addresses >4GB.  We
480
                   catch this here in the page fault handler because
481
                   these addresses are not reachable. Just detect this
482
                   case and return.  Any code segment in LDT is
483
                   compatibility mode. */
484
                if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
485
                    (address >> 32))
486
                        return;
487
 
488
                if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
489
                    printk_ratelimit()) {
490
                        printk(
491
                       "%s%s[%d]: segfault at %lx rip %lx rsp %lx error %lx\n",
492
                                        tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
493
                                        tsk->comm, tsk->pid, address, regs->rip,
494
                                        regs->rsp, error_code);
495
                }
496
 
497
                tsk->thread.cr2 = address;
498
                /* Kernel addresses are always protection faults */
499
                tsk->thread.error_code = error_code | (address >= TASK_SIZE);
500
                tsk->thread.trap_no = 14;
501
                info.si_signo = SIGSEGV;
502
                info.si_errno = 0;
503
                /* info.si_code has been set above */
504
                info.si_addr = (void __user *)address;
505
                force_sig_info(SIGSEGV, &info, tsk);
506
                return;
507
        }
508
 
509
no_context:
510
 
511
        /* Are we prepared to handle this kernel fault?  */
512
        fixup = search_exception_tables(regs->rip);
513
        if (fixup) {
514
                regs->rip = fixup->fixup;
515
                return;
516
        }
517
 
518
        /*
519
         * Hall of shame of CPU/BIOS bugs.
520
         */
521
 
522
        if (is_prefetch(regs, address, error_code))
523
                return;
524
 
525
        if (is_errata93(regs, address))
526
                return;
527
 
528
/*
529
 * Oops. The kernel tried to access some bad page. We'll have to
530
 * terminate things with extreme prejudice.
531
 */
532
 
533
        flags = oops_begin();
534
 
535
        if (address < PAGE_SIZE)
536
                printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
537
        else
538
                printk(KERN_ALERT "Unable to handle kernel paging request");
539
        printk(" at %016lx RIP: \n" KERN_ALERT,address);
540
        printk_address(regs->rip);
541
        dump_pagetable(address);
542
        tsk->thread.cr2 = address;
543
        tsk->thread.trap_no = 14;
544
        tsk->thread.error_code = error_code;
545
        __die("Oops", regs, error_code);
546
        /* Executive summary in case the body of the oops scrolled away */
547
        printk(KERN_EMERG "CR2: %016lx\n", address);
548
        oops_end(flags);
549
        do_exit(SIGKILL);
550
 
551
/*
552
 * We ran out of memory, or some other thing happened to us that made
553
 * us unable to handle the page fault gracefully.
554
 */
555
out_of_memory:
556
        up_read(&mm->mmap_sem);
557
        if (is_global_init(current)) {
558
                yield();
559
                goto again;
560
        }
561
        printk("VM: killing process %s\n", tsk->comm);
562
        if (error_code & 4)
563
                do_group_exit(SIGKILL);
564
        goto no_context;
565
 
566
do_sigbus:
567
        up_read(&mm->mmap_sem);
568
 
569
        /* Kernel mode? Handle exceptions or die */
570
        if (!(error_code & PF_USER))
571
                goto no_context;
572
 
573
        tsk->thread.cr2 = address;
574
        tsk->thread.error_code = error_code;
575
        tsk->thread.trap_no = 14;
576
        info.si_signo = SIGBUS;
577
        info.si_errno = 0;
578
        info.si_code = BUS_ADRERR;
579
        info.si_addr = (void __user *)address;
580
        force_sig_info(SIGBUS, &info, tsk);
581
        return;
582
}
583
 
584
DEFINE_SPINLOCK(pgd_lock);
585
LIST_HEAD(pgd_list);
586
 
587
void vmalloc_sync_all(void)
588
{
589
        /* Note that races in the updates of insync and start aren't
590
           problematic:
591
           insync can only get set bits added, and updates to start are only
592
           improving performance (without affecting correctness if undone). */
593
        static DECLARE_BITMAP(insync, PTRS_PER_PGD);
594
        static unsigned long start = VMALLOC_START & PGDIR_MASK;
595
        unsigned long address;
596
 
597
        for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
598
                if (!test_bit(pgd_index(address), insync)) {
599
                        const pgd_t *pgd_ref = pgd_offset_k(address);
600
                        struct page *page;
601
 
602
                        if (pgd_none(*pgd_ref))
603
                                continue;
604
                        spin_lock(&pgd_lock);
605
                        list_for_each_entry(page, &pgd_list, lru) {
606
                                pgd_t *pgd;
607
                                pgd = (pgd_t *)page_address(page) + pgd_index(address);
608
                                if (pgd_none(*pgd))
609
                                        set_pgd(pgd, *pgd_ref);
610
                                else
611
                                        BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
612
                        }
613
                        spin_unlock(&pgd_lock);
614
                        set_bit(pgd_index(address), insync);
615
                }
616
                if (address == start)
617
                        start = address + PGDIR_SIZE;
618
        }
619
        /* Check that there is no need to do the same for the modules area. */
620
        BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
621
        BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
622
                                (__START_KERNEL & PGDIR_MASK)));
623
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.