OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [uclinux/] [uClinux-2.0.x/] [mm/] [memory.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 199 simons
/*
2
 *  linux/mm/memory.c
3
 *
4
 *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
5
 */
6
 
7
/*
8
 * demand-loading started 01.12.91 - seems it is high on the list of
9
 * things wanted, and it should be easy to implement. - Linus
10
 */
11
 
12
/*
13
 * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
14
 * pages started 02.12.91, seems to work. - Linus.
15
 *
16
 * Tested sharing by executing about 30 /bin/sh: under the old kernel it
17
 * would have taken more than the 6M I have free, but it worked well as
18
 * far as I could see.
19
 *
20
 * Also corrected some "invalidate()"s - I wasn't doing enough of them.
21
 */
22
 
23
/*
24
 * Real VM (paging to/from disk) started 18.12.91. Much more work and
25
 * thought has to go into this. Oh, well..
26
 * 19.12.91  -  works, somewhat. Sometimes I get faults, don't know why.
27
 *              Found it. Everything seems to work now.
28
 * 20.12.91  -  Ok, making the swap-device changeable like the root.
29
 */
30
 
31
/*
32
 * 05.04.94  -  Multi-page memory management added for v1.1.
33
 *              Idea by Alex Bligh (alex@cconcepts.co.uk)
34
 */
35
 
36
#include <linux/signal.h>
37
#include <linux/sched.h>
38
#include <linux/head.h>
39
#include <linux/kernel.h>
40
#include <linux/errno.h>
41
#include <linux/string.h>
42
#include <linux/types.h>
43
#include <linux/ptrace.h>
44
#include <linux/mman.h>
45
#include <linux/mm.h>
46
#include <linux/swap.h>
47
 
48
#include <asm/system.h>
49
#include <asm/segment.h>
50
#include <asm/pgtable.h>
51
#include <asm/string.h>
52
 
53
unsigned long high_memory = 0;
54
 
55
/*
56
 * We special-case the C-O-W ZERO_PAGE, because it's such
57
 * a common occurrence (no need to read the page to know
58
 * that it's zero - better for the cache and memory subsystem).
59
 */
60
static inline void copy_page(unsigned long from, unsigned long to)
61
{
62
        if (from == ZERO_PAGE) {
63
                memset((void *) to, 0, PAGE_SIZE);
64
                return;
65
        }
66
        memcpy((void *) to, (void *) from, PAGE_SIZE);
67
}
68
 
69
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
70
 
71
mem_map_t * mem_map = NULL;
72
 
73
/*
74
 * oom() prints a message (so that the user knows why the process died),
75
 * and gives the process an untrappable SIGKILL.
76
 */
77
void oom(struct task_struct * task)
78
{
79
        printk("\nOut of memory for %s.\n", task->comm);
80
        task->sig->action[SIGKILL-1].sa_handler = NULL;
81
        task->blocked &= ~(1<<(SIGKILL-1));
82
        send_sig(SIGKILL,task,1);
83
}
84
 
85
/*
86
 * Note: this doesn't free the actual pages themselves. That
87
 * has been handled earlier when unmapping all the memory regions.
88
 */
89
static inline void free_one_pmd(pmd_t * dir)
90
{
91
        pte_t * pte;
92
 
93
        if (pmd_none(*dir))
94
                return;
95
        if (pmd_bad(*dir)) {
96
                printk("free_one_pmd: bad directory entry %08lx\n", pmd_val(*dir));
97
                pmd_clear(dir);
98
                return;
99
        }
100
        pte = pte_offset(dir, 0);
101
        pmd_clear(dir);
102
        pte_free(pte);
103
}
104
 
105
static inline void free_one_pgd(pgd_t * dir)
106
{
107
        int j;
108
        pmd_t * pmd;
109
 
110
        if (pgd_none(*dir))
111
                return;
112
        if (pgd_bad(*dir)) {
113
                printk("free_one_pgd: bad directory entry %08lx\n", pgd_val(*dir));
114
                pgd_clear(dir);
115
                return;
116
        }
117
        pmd = pmd_offset(dir, 0);
118
        pgd_clear(dir);
119
        for (j = 0; j < PTRS_PER_PMD ; j++)
120
                free_one_pmd(pmd+j);
121
        pmd_free(pmd);
122
}
123
 
124
/*
125
 * This function clears all user-level page tables of a process - this
126
 * is needed by execve(), so that old pages aren't in the way.
127
 */
128
void clear_page_tables(struct task_struct * tsk)
129
{
130
        int i;
131
        pgd_t * page_dir;
132
 
133
        page_dir = tsk->mm->pgd;
134
        if (!page_dir || page_dir == swapper_pg_dir) {
135
                printk("%s trying to clear kernel page-directory: not good\n", tsk->comm);
136
                return;
137
        }
138
        flush_cache_mm(tsk->mm);
139
        for (i = 0 ; i < USER_PTRS_PER_PGD ; i++)
140
                free_one_pgd(page_dir + i);
141
        flush_tlb_mm(tsk->mm);
142
}
143
 
144
/*
145
 * This function frees up all page tables of a process when it exits. It
146
 * is the same as "clear_page_tables()", except it also changes the process'
147
 * page table directory to the kernel page tables and then frees the old
148
 * page table directory.
149
 */
150
void free_page_tables(struct mm_struct * mm)
151
{
152
        int i;
153
        pgd_t * page_dir;
154
 
155
        page_dir = mm->pgd;
156
        if (!page_dir || page_dir == swapper_pg_dir) {
157
                printk("Trying to free kernel page-directory: not good\n");
158
                return;
159
        }
160
        for (i = 0 ; i < USER_PTRS_PER_PGD ; i++)
161
                free_one_pgd(page_dir + i);
162
        pgd_free(page_dir);
163
}
164
 
165
int new_page_tables(struct task_struct * tsk)
166
{
167
        pgd_t * page_dir, * new_pg;
168
 
169
        if (!(new_pg = pgd_alloc()))
170
                return -ENOMEM;
171
        page_dir = pgd_offset(&init_mm, 0);
172
        flush_cache_mm(tsk->mm);
173
        memcpy(new_pg + USER_PTRS_PER_PGD, page_dir + USER_PTRS_PER_PGD,
174
               (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof (pgd_t));
175
        flush_tlb_mm(tsk->mm);
176
        SET_PAGE_DIR(tsk, new_pg);
177
        tsk->mm->pgd = new_pg;
178
        return 0;
179
}
180
 
181
static inline void copy_one_pte(pte_t * old_pte, pte_t * new_pte, int cow)
182
{
183
        pte_t pte = *old_pte;
184
        unsigned long page_nr;
185
 
186
        if (pte_none(pte))
187
                return;
188
        if (!pte_present(pte)) {
189
                swap_duplicate(pte_val(pte));
190
                set_pte(new_pte, pte);
191
                return;
192
        }
193
        page_nr = MAP_NR(pte_page(pte));
194
        if (page_nr >= MAP_NR(high_memory) || PageReserved(mem_map+page_nr)) {
195
                set_pte(new_pte, pte);
196
                return;
197
        }
198
        if (cow)
199
                pte = pte_wrprotect(pte);
200
        if (delete_from_swap_cache(page_nr))
201
                pte = pte_mkdirty(pte);
202
        set_pte(new_pte, pte_mkold(pte));
203
        set_pte(old_pte, pte);
204
        mem_map[page_nr].count++;
205
}
206
 
207
static inline int copy_pte_range(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long address, unsigned long size, int cow)
208
{
209
        pte_t * src_pte, * dst_pte;
210
        unsigned long end;
211
 
212
        if (pmd_none(*src_pmd))
213
                return 0;
214
        if (pmd_bad(*src_pmd)) {
215
                printk("copy_pte_range: bad pmd (%08lx)\n", pmd_val(*src_pmd));
216
                pmd_clear(src_pmd);
217
                return 0;
218
        }
219
        src_pte = pte_offset(src_pmd, address);
220
        if (pmd_none(*dst_pmd)) {
221
                if (!pte_alloc(dst_pmd, 0))
222
                        return -ENOMEM;
223
        }
224
        dst_pte = pte_offset(dst_pmd, address);
225
        address &= ~PMD_MASK;
226
        end = address + size;
227
        if (end >= PMD_SIZE)
228
                end = PMD_SIZE;
229
        do {
230
                /* I would like to switch arguments here, to make it
231
                 * consistent with copy_xxx_range and memcpy syntax.
232
                 */
233
                copy_one_pte(src_pte++, dst_pte++, cow);
234
                address += PAGE_SIZE;
235
        } while (address < end);
236
        return 0;
237
}
238
 
239
static inline int copy_pmd_range(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long address, unsigned long size, int cow)
240
{
241
        pmd_t * src_pmd, * dst_pmd;
242
        unsigned long end;
243
        int error = 0;
244
 
245
        if (pgd_none(*src_pgd))
246
                return 0;
247
        if (pgd_bad(*src_pgd)) {
248
                printk("copy_pmd_range: bad pgd (%08lx)\n", pgd_val(*src_pgd));
249
                pgd_clear(src_pgd);
250
                return 0;
251
        }
252
        src_pmd = pmd_offset(src_pgd, address);
253
        if (pgd_none(*dst_pgd)) {
254
                if (!pmd_alloc(dst_pgd, 0))
255
                        return -ENOMEM;
256
        }
257
        dst_pmd = pmd_offset(dst_pgd, address);
258
        address &= ~PGDIR_MASK;
259
        end = address + size;
260
        if (end > PGDIR_SIZE)
261
                end = PGDIR_SIZE;
262
        do {
263
                error = copy_pte_range(dst_pmd++, src_pmd++, address, end - address, cow);
264
                if (error)
265
                        break;
266
                address = (address + PMD_SIZE) & PMD_MASK;
267
        } while (address < end);
268
        return error;
269
}
270
 
271
/*
272
 * copy one vm_area from one task to the other. Assumes the page tables
273
 * already present in the new task to be cleared in the whole range
274
 * covered by this vma.
275
 */
276
int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
277
                        struct vm_area_struct *vma)
278
{
279
        pgd_t * src_pgd, * dst_pgd;
280
        unsigned long address = vma->vm_start;
281
        unsigned long end = vma->vm_end;
282
        int error = 0, cow;
283
 
284
        cow = (vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE;
285
        src_pgd = pgd_offset(src, address);
286
        dst_pgd = pgd_offset(dst, address);
287
        flush_cache_range(src, vma->vm_start, vma->vm_end);
288
        flush_cache_range(dst, vma->vm_start, vma->vm_end);
289
        while (address < end) {
290
                error = copy_pmd_range(dst_pgd++, src_pgd++, address, end - address, cow);
291
                if (error)
292
                        break;
293
                address = (address + PGDIR_SIZE) & PGDIR_MASK;
294
        }
295
        /* Note that the src ptes get c-o-w treatment, so they change too. */
296
        flush_tlb_range(src, vma->vm_start, vma->vm_end);
297
        flush_tlb_range(dst, vma->vm_start, vma->vm_end);
298
        return error;
299
}
300
 
301
static inline void free_pte(pte_t page)
302
{
303
        if (pte_present(page)) {
304
                unsigned long addr = pte_page(page);
305
                if (addr >= high_memory || PageReserved(mem_map+MAP_NR(addr)))
306
                        return;
307
                free_page(addr);
308
                if (current->mm->rss <= 0)
309
                        return;
310
                current->mm->rss--;
311
                return;
312
        }
313
        swap_free(pte_val(page));
314
}
315
 
316
static inline void forget_pte(pte_t page)
317
{
318
        if (!pte_none(page)) {
319
                printk("forget_pte: old mapping existed!\n");
320
                free_pte(page);
321
        }
322
}
323
 
324
static inline void zap_pte_range(pmd_t * pmd, unsigned long address, unsigned long size)
325
{
326
        pte_t * pte;
327
 
328
        if (pmd_none(*pmd))
329
                return;
330
        if (pmd_bad(*pmd)) {
331
                printk("zap_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
332
                pmd_clear(pmd);
333
                return;
334
        }
335
        pte = pte_offset(pmd, address);
336
        address &= ~PMD_MASK;
337
        if (address + size > PMD_SIZE)
338
                size = PMD_SIZE - address;
339
        size >>= PAGE_SHIFT;
340
        for (;;) {
341
                pte_t page;
342
                if (!size)
343
                        break;
344
                page = *pte;
345
                pte++;
346
                size--;
347
                if (pte_none(page))
348
                        continue;
349
                pte_clear(pte-1);
350
                free_pte(page);
351
        }
352
}
353
 
354
static inline void zap_pmd_range(pgd_t * dir, unsigned long address, unsigned long size)
355
{
356
        pmd_t * pmd;
357
        unsigned long end;
358
 
359
        if (pgd_none(*dir))
360
                return;
361
        if (pgd_bad(*dir)) {
362
                printk("zap_pmd_range: bad pgd (%08lx)\n", pgd_val(*dir));
363
                pgd_clear(dir);
364
                return;
365
        }
366
        pmd = pmd_offset(dir, address);
367
        address &= ~PGDIR_MASK;
368
        end = address + size;
369
        if (end > PGDIR_SIZE)
370
                end = PGDIR_SIZE;
371
        do {
372
                zap_pte_range(pmd, address, end - address);
373
                address = (address + PMD_SIZE) & PMD_MASK;
374
                pmd++;
375
        } while (address < end);
376
}
377
 
378
/*
379
 * remove user pages in a given range.
380
 */
381
int zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size)
382
{
383
        pgd_t * dir;
384
        unsigned long end = address + size;
385
 
386
        dir = pgd_offset(mm, address);
387
        flush_cache_range(mm, end - size, end);
388
        while (address < end) {
389
                zap_pmd_range(dir, address, end - address);
390
                address = (address + PGDIR_SIZE) & PGDIR_MASK;
391
                dir++;
392
        }
393
        flush_tlb_range(mm, end - size, end);
394
        return 0;
395
}
396
 
397
static inline void zeromap_pte_range(pte_t * pte, unsigned long address, unsigned long size, pte_t zero_pte)
398
{
399
        unsigned long end;
400
 
401
        address &= ~PMD_MASK;
402
        end = address + size;
403
        if (end > PMD_SIZE)
404
                end = PMD_SIZE;
405
        do {
406
                pte_t oldpage = *pte;
407
                set_pte(pte, zero_pte);
408
                forget_pte(oldpage);
409
                address += PAGE_SIZE;
410
                pte++;
411
        } while (address < end);
412
}
413
 
414
static inline int zeromap_pmd_range(pmd_t * pmd, unsigned long address, unsigned long size, pte_t zero_pte)
415
{
416
        unsigned long end;
417
 
418
        address &= ~PGDIR_MASK;
419
        end = address + size;
420
        if (end > PGDIR_SIZE)
421
                end = PGDIR_SIZE;
422
        do {
423
                pte_t * pte = pte_alloc(pmd, address);
424
                if (!pte)
425
                        return -ENOMEM;
426
                zeromap_pte_range(pte, address, end - address, zero_pte);
427
                address = (address + PMD_SIZE) & PMD_MASK;
428
                pmd++;
429
        } while (address < end);
430
        return 0;
431
}
432
 
433
int zeromap_page_range(unsigned long address, unsigned long size, pgprot_t prot)
434
{
435
        int error = 0;
436
        pgd_t * dir;
437
        unsigned long beg = address;
438
        unsigned long end = address + size;
439
        pte_t zero_pte;
440
 
441
        zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE, prot));
442
        dir = pgd_offset(current->mm, address);
443
        flush_cache_range(current->mm, beg, end);
444
        while (address < end) {
445
                pmd_t *pmd = pmd_alloc(dir, address);
446
                error = -ENOMEM;
447
                if (!pmd)
448
                        break;
449
                error = zeromap_pmd_range(pmd, address, end - address, zero_pte);
450
                if (error)
451
                        break;
452
                address = (address + PGDIR_SIZE) & PGDIR_MASK;
453
                dir++;
454
        }
455
        flush_tlb_range(current->mm, beg, end);
456
        return error;
457
}
458
 
459
/*
460
 * maps a range of physical memory into the requested pages. the old
461
 * mappings are removed. any references to nonexistent pages results
462
 * in null mappings (currently treated as "copy-on-access")
463
 */
464
static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned long size,
465
        unsigned long offset, pgprot_t prot)
466
{
467
        unsigned long end;
468
 
469
        address &= ~PMD_MASK;
470
        end = address + size;
471
        if (end > PMD_SIZE)
472
                end = PMD_SIZE;
473
        do {
474
                pte_t oldpage = *pte;
475
                pte_clear(pte);
476
                if (offset >= high_memory || PageReserved(mem_map+MAP_NR(offset)))
477
                        set_pte(pte, mk_pte(offset, prot));
478
                forget_pte(oldpage);
479
                address += PAGE_SIZE;
480
                offset += PAGE_SIZE;
481
                pte++;
482
        } while (address < end);
483
}
484
 
485
static inline int remap_pmd_range(pmd_t * pmd, unsigned long address, unsigned long size,
486
        unsigned long offset, pgprot_t prot)
487
{
488
        unsigned long end;
489
 
490
        address &= ~PGDIR_MASK;
491
        end = address + size;
492
        if (end > PGDIR_SIZE)
493
                end = PGDIR_SIZE;
494
        offset -= address;
495
        do {
496
                pte_t * pte = pte_alloc(pmd, address);
497
                if (!pte)
498
                        return -ENOMEM;
499
                remap_pte_range(pte, address, end - address, address + offset, prot);
500
                address = (address + PMD_SIZE) & PMD_MASK;
501
                pmd++;
502
        } while (address < end);
503
        return 0;
504
}
505
 
506
int remap_page_range(unsigned long from, unsigned long offset, unsigned long size, pgprot_t prot)
507
{
508
        int error = 0;
509
        pgd_t * dir;
510
        unsigned long beg = from;
511
        unsigned long end = from + size;
512
 
513
        offset -= from;
514
        dir = pgd_offset(current->mm, from);
515
        flush_cache_range(current->mm, beg, end);
516
        while (from < end) {
517
                pmd_t *pmd = pmd_alloc(dir, from);
518
                error = -ENOMEM;
519
                if (!pmd)
520
                        break;
521
                error = remap_pmd_range(pmd, from, end - from, offset + from, prot);
522
                if (error)
523
                        break;
524
                from = (from + PGDIR_SIZE) & PGDIR_MASK;
525
                dir++;
526
        }
527
        flush_tlb_range(current->mm, beg, end);
528
        return error;
529
}
530
 
531
/*
532
 * sanity-check function..
533
 */
534
static void put_page(pte_t * page_table, pte_t pte)
535
{
536
        if (!pte_none(*page_table)) {
537
                free_page(pte_page(pte));
538
                return;
539
        }
540
/* no need for flush_tlb */
541
        set_pte(page_table, pte);
542
}
543
 
544
/*
545
 * This routine is used to map in a page into an address space: needed by
546
 * execve() for the initial stack and environment pages.
547
 */
548
unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address)
549
{
550
        pgd_t * pgd;
551
        pmd_t * pmd;
552
        pte_t * pte;
553
 
554
        if (page >= high_memory)
555
                printk("put_dirty_page: trying to put page %08lx at %08lx\n",page,address);
556
        if (mem_map[MAP_NR(page)].count != 1)
557
                printk("mem_map disagrees with %08lx at %08lx\n",page,address);
558
        pgd = pgd_offset(tsk->mm,address);
559
        pmd = pmd_alloc(pgd, address);
560
        if (!pmd) {
561
                free_page(page);
562
                oom(tsk);
563
                return 0;
564
        }
565
        pte = pte_alloc(pmd, address);
566
        if (!pte) {
567
                free_page(page);
568
                oom(tsk);
569
                return 0;
570
        }
571
        if (!pte_none(*pte)) {
572
                printk("put_dirty_page: page already exists\n");
573
                free_page(page);
574
                return 0;
575
        }
576
        flush_page_to_ram(page);
577
        set_pte(pte, pte_mkwrite(pte_mkdirty(mk_pte(page, PAGE_COPY))));
578
/* no need for invalidate */
579
        return page;
580
}
581
 
582
/*
583
 * This routine handles present pages, when users try to write
584
 * to a shared page. It is done by copying the page to a new address
585
 * and decrementing the shared-page counter for the old page.
586
 *
587
 * Goto-purists beware: the only reason for goto's here is that it results
588
 * in better assembly code.. The "default" path will see no jumps at all.
589
 *
590
 * Note that this routine assumes that the protection checks have been
591
 * done by the caller (the low-level page fault routine in most cases).
592
 * Thus we can safely just mark it writable once we've done any necessary
593
 * COW.
594
 *
595
 * We also mark the page dirty at this point even though the page will
596
 * change only once the write actually happens. This avoids a few races,
597
 * and potentially makes it more efficient.
598
 */
599
void do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
600
        unsigned long address, int write_access)
601
{
602
        pgd_t *page_dir;
603
        pmd_t *page_middle;
604
        pte_t *page_table, pte;
605
        unsigned long old_page, new_page;
606
 
607
        new_page = __get_free_page(GFP_KERNEL);
608
        page_dir = pgd_offset(vma->vm_mm, address);
609
        if (pgd_none(*page_dir))
610
                goto end_wp_page;
611
        if (pgd_bad(*page_dir))
612
                goto bad_wp_pagedir;
613
        page_middle = pmd_offset(page_dir, address);
614
        if (pmd_none(*page_middle))
615
                goto end_wp_page;
616
        if (pmd_bad(*page_middle))
617
                goto bad_wp_pagemiddle;
618
        page_table = pte_offset(page_middle, address);
619
        pte = *page_table;
620
        if (!pte_present(pte))
621
                goto end_wp_page;
622
        if (pte_write(pte))
623
                goto end_wp_page;
624
        old_page = pte_page(pte);
625
        if (old_page >= high_memory)
626
                goto bad_wp_page;
627
        tsk->min_flt++;
628
        /*
629
         * Do we need to copy?
630
         */
631
        if (mem_map[MAP_NR(old_page)].count != 1) {
632
                if (new_page) {
633
                        if (PageReserved(mem_map + MAP_NR(old_page)))
634
                                ++vma->vm_mm->rss;
635
                        copy_page(old_page,new_page);
636
                        flush_page_to_ram(old_page);
637
                        flush_page_to_ram(new_page);
638
                        flush_cache_page(vma, address);
639
                        set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
640
                        free_page(old_page);
641
                        flush_tlb_page(vma, address);
642
                        return;
643
                }
644
                flush_cache_page(vma, address);
645
                set_pte(page_table, BAD_PAGE);
646
                flush_tlb_page(vma, address);
647
                free_page(old_page);
648
                oom(tsk);
649
                return;
650
        }
651
        flush_cache_page(vma, address);
652
        set_pte(page_table, pte_mkdirty(pte_mkwrite(pte)));
653
        flush_tlb_page(vma, address);
654
        if (new_page)
655
                free_page(new_page);
656
        return;
657
bad_wp_page:
658
        printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
659
        send_sig(SIGKILL, tsk, 1);
660
        goto end_wp_page;
661
bad_wp_pagemiddle:
662
        printk("do_wp_page: bogus page-middle at address %08lx (%08lx)\n", address, pmd_val(*page_middle));
663
        send_sig(SIGKILL, tsk, 1);
664
        goto end_wp_page;
665
bad_wp_pagedir:
666
        printk("do_wp_page: bogus page-dir entry at address %08lx (%08lx)\n", address, pgd_val(*page_dir));
667
        send_sig(SIGKILL, tsk, 1);
668
end_wp_page:
669
        if (new_page)
670
                free_page(new_page);
671
        return;
672
}
673
 
674
/*
675
 * Ugly, ugly, but the goto's result in better assembly..
676
 */
677
int verify_area(int type, const void * addr, unsigned long size)
678
{
679
        struct vm_area_struct * vma;
680
        unsigned long start = (unsigned long) addr;
681
 
682
        /* If the current user space is mapped to kernel space (for the
683
         * case where we use a fake user buffer with get_fs/set_fs()) we
684
         * don't expect to find the address in the user vm map.
685
         */
686
        if (!size || get_fs() == KERNEL_DS)
687
                return 0;
688
 
689
        vma = find_vma(current->mm, start);
690
        if (!vma)
691
                goto bad_area;
692
        if (vma->vm_start > start)
693
                goto check_stack;
694
 
695
good_area:
696
        if (type == VERIFY_WRITE)
697
                goto check_write;
698
        for (;;) {
699
                struct vm_area_struct * next;
700
                if (!(vma->vm_flags & VM_READ))
701
                        goto bad_area;
702
                if (vma->vm_end - start >= size)
703
                        return 0;
704
                next = vma->vm_next;
705
                if (!next || vma->vm_end != next->vm_start)
706
                        goto bad_area;
707
                vma = next;
708
        }
709
 
710
check_write:
711
        if (!(vma->vm_flags & VM_WRITE))
712
                goto bad_area;
713
        if (!wp_works_ok)
714
                goto check_wp_fault_by_hand;
715
        for (;;) {
716
                if (vma->vm_end - start >= size)
717
                        break;
718
                if (!vma->vm_next || vma->vm_end != vma->vm_next->vm_start)
719
                        goto bad_area;
720
                vma = vma->vm_next;
721
                if (!(vma->vm_flags & VM_WRITE))
722
                        goto bad_area;
723
        }
724
        return 0;
725
 
726
check_wp_fault_by_hand:
727
        size--;
728
        size += start & ~PAGE_MASK;
729
        size >>= PAGE_SHIFT;
730
        start &= PAGE_MASK;
731
 
732
        for (;;) {
733
                do_wp_page(current, vma, start, 1);
734
                if (!size)
735
                        break;
736
                size--;
737
                start += PAGE_SIZE;
738
                if (start < vma->vm_end)
739
                        continue;
740
                vma = vma->vm_next;
741
                if (!vma || vma->vm_start != start)
742
                        goto bad_area;
743
                if (!(vma->vm_flags & VM_WRITE))
744
                        goto bad_area;;
745
        }
746
        return 0;
747
 
748
check_stack:
749
        if (!(vma->vm_flags & VM_GROWSDOWN))
750
                goto bad_area;
751
        if (expand_stack(vma, start) == 0)
752
                goto good_area;
753
 
754
bad_area:
755
        return -EFAULT;
756
}
757
 
758
/*
759
 * This function zeroes out partial mmap'ed pages at truncation time..
760
 */
761
static void partial_clear(struct vm_area_struct *vma, unsigned long address)
762
{
763
        pgd_t *page_dir;
764
        pmd_t *page_middle;
765
        pte_t *page_table, pte;
766
 
767
        page_dir = pgd_offset(vma->vm_mm, address);
768
        if (pgd_none(*page_dir))
769
                return;
770
        if (pgd_bad(*page_dir)) {
771
                printk("bad page table directory entry %p:[%lx]\n", page_dir, pgd_val(*page_dir));
772
                pgd_clear(page_dir);
773
                return;
774
        }
775
        page_middle = pmd_offset(page_dir, address);
776
        if (pmd_none(*page_middle))
777
                return;
778
        if (pmd_bad(*page_middle)) {
779
                printk("bad page table directory entry %p:[%lx]\n", page_dir, pgd_val(*page_dir));
780
                pmd_clear(page_middle);
781
                return;
782
        }
783
        page_table = pte_offset(page_middle, address);
784
        pte = *page_table;
785
        if (!pte_present(pte))
786
                return;
787
        flush_cache_page(vma, address);
788
        address &= ~PAGE_MASK;
789
        address += pte_page(pte);
790
        if (address >= high_memory)
791
                return;
792
        memset((void *) address, 0, PAGE_SIZE - (address & ~PAGE_MASK));
793
        flush_page_to_ram(pte_page(pte));
794
}
795
 
796
/*
797
 * Handle all mappings that got truncated by a "truncate()"
798
 * system call.
799
 *
800
 * NOTE! We have to be ready to update the memory sharing
801
 * between the file and the memory map for a potential last
802
 * incomplete page.  Ugly, but necessary.
803
 */
804
void vmtruncate(struct inode * inode, unsigned long offset)
805
{
806
        struct vm_area_struct * mpnt;
807
 
808
        truncate_inode_pages(inode, offset);
809
        if (!inode->i_mmap)
810
                return;
811
        mpnt = inode->i_mmap;
812
        do {
813
                unsigned long start = mpnt->vm_start;
814
                unsigned long len = mpnt->vm_end - start;
815
                unsigned long diff;
816
 
817
                /* mapping wholly truncated? */
818
                if (mpnt->vm_offset >= offset) {
819
                        zap_page_range(mpnt->vm_mm, start, len);
820
                        continue;
821
                }
822
                /* mapping wholly unaffected? */
823
                diff = offset - mpnt->vm_offset;
824
                if (diff >= len)
825
                        continue;
826
                /* Ok, partially affected.. */
827
                start += diff;
828
                len = (len - diff) & PAGE_MASK;
829
                if (start & ~PAGE_MASK) {
830
                        partial_clear(mpnt, start);
831
                        start = (start + ~PAGE_MASK) & PAGE_MASK;
832
                }
833
                zap_page_range(mpnt->vm_mm, start, len);
834
        } while ((mpnt = mpnt->vm_next_share) != inode->i_mmap);
835
}
836
 
837
 
838
static inline void do_swap_page(struct task_struct * tsk,
839
        struct vm_area_struct * vma, unsigned long address,
840
        pte_t * page_table, pte_t entry, int write_access)
841
{
842
        pte_t page;
843
 
844
        if (!vma->vm_ops || !vma->vm_ops->swapin) {
845
                swap_in(tsk, vma, page_table, pte_val(entry), write_access);
846
                flush_page_to_ram(pte_page(*page_table));
847
                return;
848
        }
849
        page = vma->vm_ops->swapin(vma, address - vma->vm_start + vma->vm_offset, pte_val(entry));
850
        if (pte_val(*page_table) != pte_val(entry)) {
851
                free_page(pte_page(page));
852
                return;
853
        }
854
        if (mem_map[MAP_NR(pte_page(page))].count > 1 && !(vma->vm_flags & VM_SHARED))
855
                page = pte_wrprotect(page);
856
        ++vma->vm_mm->rss;
857
        ++tsk->maj_flt;
858
        flush_page_to_ram(pte_page(page));
859
        set_pte(page_table, page);
860
        return;
861
}
862
 
863
/*
864
 * do_no_page() tries to create a new page mapping. It aggressively
865
 * tries to share with existing pages, but makes a separate copy if
866
 * the "write_access" parameter is true in order to avoid the next
867
 * page fault.
868
 *
869
 * As this is called only for pages that do not currently exist, we
870
 * do not need to flush old virtual caches or the TLB.
871
 */
872
void do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
873
        unsigned long address, int write_access)
874
{
875
        pgd_t * pgd;
876
        pmd_t * pmd;
877
        pte_t * page_table;
878
        pte_t entry;
879
        unsigned long page;
880
 
881
        pgd = pgd_offset(tsk->mm, address);
882
        pmd = pmd_alloc(pgd, address);
883
        if (!pmd)
884
                goto no_memory;
885
        page_table = pte_alloc(pmd, address);
886
        if (!page_table)
887
                goto no_memory;
888
        entry = *page_table;
889
        if (pte_present(entry))
890
                goto is_present;
891
        if (!pte_none(entry))
892
                goto swap_page;
893
        address &= PAGE_MASK;
894
        if (!vma->vm_ops || !vma->vm_ops->nopage)
895
                goto anonymous_page;
896
        /*
897
         * The third argument is "no_share", which tells the low-level code
898
         * to copy, not share the page even if sharing is possible.  It's
899
         * essentially an early COW detection
900
         */
901
        page = vma->vm_ops->nopage(vma, address,
902
                (vma->vm_flags & VM_SHARED)?0:write_access);
903
        if (!page)
904
                goto sigbus;
905
        ++tsk->maj_flt;
906
        ++vma->vm_mm->rss;
907
        /*
908
         * This silly early PAGE_DIRTY setting removes a race
909
         * due to the bad i386 page protection. But it's valid
910
         * for other architectures too.
911
         *
912
         * Note that if write_access is true, we either now have
913
         * a exclusive copy of the page, or this is a shared mapping,
914
         * so we can make it writable and dirty to avoid having to
915
         * handle that later.
916
         */
917
        flush_page_to_ram(page);
918
        entry = mk_pte(page, vma->vm_page_prot);
919
        if (write_access) {
920
                entry = pte_mkwrite(pte_mkdirty(entry));
921
        } else if (mem_map[MAP_NR(page)].count > 1 && !(vma->vm_flags & VM_SHARED))
922
                entry = pte_wrprotect(entry);
923
        put_page(page_table, entry);
924
        /* no need to invalidate: a not-present page shouldn't be cached */
925
        return;
926
 
927
anonymous_page:
928
        entry = pte_wrprotect(mk_pte(ZERO_PAGE, vma->vm_page_prot));
929
        if (write_access) {
930
                unsigned long page = __get_free_page(GFP_KERNEL);
931
                if (!page)
932
                        goto sigbus;
933
                memset((void *) page, 0, PAGE_SIZE);
934
                entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
935
                vma->vm_mm->rss++;
936
                tsk->min_flt++;
937
                flush_page_to_ram(page);
938
        }
939
        put_page(page_table, entry);
940
        return;
941
 
942
sigbus:
943
        force_sig(SIGBUS, current);
944
        put_page(page_table, BAD_PAGE);
945
        /* no need to invalidate, wasn't present */
946
        return;
947
 
948
swap_page:
949
        do_swap_page(tsk, vma, address, page_table, entry, write_access);
950
        return;
951
 
952
no_memory:
953
        oom(tsk);
954
is_present:
955
        return;
956
}
957
 
958
/*
959
 * The above separate functions for the no-page and wp-page
960
 * cases will go away (they mostly do the same thing anyway),
961
 * and we'll instead use only a general "handle_mm_fault()".
962
 *
963
 * These routines also need to handle stuff like marking pages dirty
964
 * and/or accessed for architectures that don't do it in hardware (most
965
 * RISC architectures).  The early dirtying is also good on the i386.
966
 *
967
 * There is also a hook called "update_mmu_cache()" that architectures
968
 * with external mmu caches can use to update those (ie the Sparc or
969
 * PowerPC hashed page tables that act as extended TLBs).
970
 */
971
static inline void handle_pte_fault(struct vm_area_struct * vma, unsigned long address,
972
        int write_access, pte_t * pte)
973
{
974
        if (!pte_present(*pte)) {
975
                do_no_page(current, vma, address, write_access);
976
                return;
977
        }
978
        set_pte(pte, pte_mkyoung(*pte));
979
        flush_tlb_page(vma, address);
980
        if (!write_access)
981
                return;
982
        if (pte_write(*pte)) {
983
                set_pte(pte, pte_mkdirty(*pte));
984
                flush_tlb_page(vma, address);
985
                return;
986
        }
987
        do_wp_page(current, vma, address, write_access);
988
}
989
 
990
void handle_mm_fault(struct vm_area_struct * vma, unsigned long address,
991
        int write_access)
992
{
993
        pgd_t *pgd;
994
        pmd_t *pmd;
995
        pte_t *pte;
996
 
997
        pgd = pgd_offset(vma->vm_mm, address);
998
        pmd = pmd_alloc(pgd, address);
999
        if (!pmd)
1000
                goto no_memory;
1001
        pte = pte_alloc(pmd, address);
1002
        if (!pte)
1003
                goto no_memory;
1004
        handle_pte_fault(vma, address, write_access, pte);
1005
        update_mmu_cache(vma, address, *pte);
1006
        return;
1007
no_memory:
1008
        oom(current);
1009
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.