OpenCores
URL https://opencores.org/ocsvn/c0or1k/c0or1k/trunk

Subversion Repositories c0or1k

[/] [c0or1k/] [trunk/] [conts/] [posix/] [mm0/] [mm/] [fault.c] - Blame information for rev 2

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 2 drasko
/*
2
 * Page fault handling.
3
 *
4
 * Copyright (C) 2007, 2008-2010 Bahadir Bilgehan Balban
5
 */
6
#include <vm_area.h>
7
#include <task.h>
8
#include <mm/alloc_page.h>
9
#include <malloc/malloc.h>
10
#include <l4/generic/space.h>
11
#include <l4/api/errno.h>
12
#include <string.h>
13
#include <memory.h>
14
#include <shm.h>
15
#include <file.h>
16
#include <test.h>
17
 
18
#include L4LIB_INC_ARCH(syscalls.h)
19
#include L4LIB_INC_ARCH(syslib.h)
20
#include INC_GLUE(memory.h)
21
#include INC_SUBARCH(mm.h)
22
#include __INC_ARCH(mm.h)
23
#include __INC_ARCH(debug.h)
24
 
25
/* Given a page and the vma it is in, returns that page's virtual address */
26
unsigned long vma_page_to_virtual(struct vm_area *vma, struct page *page)
27
{
28
        unsigned long virtual_pfn = vma->pfn_start + page->offset - vma->file_offset;
29
 
30
        /* Page must be contained in vma's pages  */
31
        BUG_ON(vma->file_offset > page->offset);
32
 
33
        return __pfn_to_addr(virtual_pfn);
34
}
35
 
36
unsigned long fault_to_file_offset(struct fault_data *fault)
37
{
38
        /* Fault's offset in its vma */
39
        unsigned long vma_off_pfn = __pfn(fault->address) - fault->vma->pfn_start;
40
 
41
        /* Fault's offset in the file */
42
        unsigned long f_off_pfn = fault->vma->file_offset + vma_off_pfn;
43
 
44
        return f_off_pfn;
45
}
46
 
47
/*
48
 * Given a reference to a vm_object link, returns the next link but
49
 * avoids wrapping around back to head. If next is head, returns 0.
50
 *
51
 * vma->link1->link2->link3
52
 *       |      |      |
53
 *       V      V      V
54
 *       vmo1   vmo2   vmo3|vm_file
55
 *
56
 * Example:
57
 * Given a reference to link = vma, head = vma, returns link1.
58
 * Given a reference to link = link3, head = vma, returns 0.
59
 */
60
struct vm_obj_link *vma_next_link(struct link *link,
61
                                  struct link *head)
62
{
63
        BUG_ON(list_empty(link));
64
        if (link->next == head)
65
                return 0;
66
        else
67
                return link_to_struct(link->next, struct vm_obj_link, list);
68
}
69
 
70
/* Unlinks orig_link from its vma and deletes it but keeps the object. */
71
struct vm_object *vma_drop_link(struct vm_obj_link *link)
72
{
73
        struct vm_object *dropped;
74
 
75
        /* Remove object link from vma's list */
76
        list_remove(&link->list);
77
 
78
        /* Unlink the link from object */
79
        dropped = vm_unlink_object(link);
80
 
81
        /* Delete the original link */
82
        kfree(link);
83
 
84
        return dropped;
85
}
86
 
87
/*
88
 * Checks if page cache pages of lesser is a subset of those of copier.
89
 *
90
 * FIXME:
91
 * Note this just checks the page cache, so if any objects have pages
92
 * swapped to disk, this function won't work, which is a logic error.
93
 * This should really count the swapped ones as well.
94
 */
95
int vm_object_is_subset(struct vm_object *shadow,
96
                        struct vm_object *original)
97
{
98
        struct page *pc, *pl;
99
 
100
        /* Copier must have equal or more pages to overlap lesser */
101
        if (shadow->npages < original->npages)
102
                return 0;
103
 
104
        /*
105
         * Do a page by page comparison. Every lesser page
106
         * must be in copier for overlap.
107
         */
108
        list_foreach_struct(pl, &original->page_cache, list)
109
                if (!(pc = find_page(shadow, pl->offset)))
110
                        return 0;
111
        /*
112
         * For all pages of lesser vmo, there seems to be a page
113
         * in the copier vmo. So lesser is a subset of copier
114
         */
115
        return 1;
116
}
117
 
118
static inline int vm_object_is_droppable(struct vm_object *shadow,
119
                                         struct vm_object *original)
120
{
121
        if (shadow->npages == original->npages &&
122
            (original->flags & VM_OBJ_SHADOW))
123
                return 1;
124
        else
125
                return 0;
126
}
127
 
128
 
129
 
130
/*
131
 * vma_merge_object()
132
 *
133
 * FIXME: Currently this is an optimisation that needs to go
134
 * away when swapping is available. We have this solely because
135
 * currently a shadow needs to identically mirror the whole
136
 * object underneath, in order to drop it. A file that is 1MB
137
 * long would spend 2MB until dropped. When swapping is available,
138
 * we will go back to identical mirroring instead of merging the
139
 * last shadow, since most unused pages would be swapped out.
140
 */
141
 
142
/*
143
 * When one shadow object is redundant, merges it into the shadow in front of it.
144
 * Note it must be determined that it is redundant before calling this function.
145
 *
146
 * vma --> link1 --> link2 --> link3
147
 *         |         |         |
148
 *         v         v         v
149
 *         Front     Redundant Next
150
 *         Shadow    Shadow    Object (E.g. shadow or file)
151
 */
152
int vma_merge_object(struct vm_object *redundant)
153
{
154
        /* The redundant shadow object */
155
        struct vm_object *front; /* Shadow in front of redundant */
156
        struct vm_obj_link *last_link;
157
        struct page *p1, *p2, *n;
158
 
159
        /* Check link and shadow count is really 1 */
160
        BUG_ON(redundant->nlinks != 1);
161
        BUG_ON(redundant->shadows != 1);
162
 
163
        /* Get the last shadower object in front */
164
        front = link_to_struct(redundant->shdw_list.next,
165
                           struct vm_object, shref);
166
 
167
        /* Move all non-intersecting pages to front shadow. */
168
        list_foreach_removable_struct(p1, n, &redundant->page_cache, list) {
169
                /* Page doesn't exist in front, move it there */
170
                if (!(p2 = find_page(front, p1->offset))) {
171
                        list_remove_init(&p1->list);
172
                        spin_lock(&p1->lock);
173
                        p1->owner = front;
174
                        spin_unlock(&p1->lock);
175
                        insert_page_olist(p1, front);
176
                        front->npages++;
177
                }
178
        }
179
 
180
        /* Sort out shadow relationships after the merge: */
181
 
182
        /* Front won't be a shadow of the redundant shadow anymore */
183
        list_remove_init(&front->shref);
184
 
185
        /* Check that there really was one shadower of redundant left */
186
        BUG_ON(!list_empty(&redundant->shdw_list));
187
 
188
        /* Redundant won't be a shadow of its next object */
189
        list_remove_init(&redundant->shref);
190
 
191
        /* Front is now a shadow of redundant's next object */
192
        list_insert(&front->shref, &redundant->orig_obj->shdw_list);
193
        front->orig_obj = redundant->orig_obj;
194
 
195
        /* Find last link for the object */
196
        last_link = link_to_struct(redundant->link_list.next,
197
                               struct vm_obj_link, linkref);
198
 
199
        /* Drop the last link to the object */
200
        vma_drop_link(last_link);
201
 
202
        /* Redundant shadow has no shadows anymore */
203
        BUG_ON(--redundant->shadows < 0);
204
 
205
        /* Delete the redundant shadow along with all its pages. */
206
        vm_object_delete(redundant);
207
 
208
        return 0;
209
}
210
 
211
struct vm_obj_link *vm_objlink_create(void)
212
{
213
        struct vm_obj_link *vmo_link;
214
 
215
        if (!(vmo_link = kzalloc(sizeof(*vmo_link))))
216
                return PTR_ERR(-ENOMEM);
217
        link_init(&vmo_link->list);
218
        link_init(&vmo_link->linkref);
219
 
220
        return vmo_link;
221
}
222
 
223
/*
224
 * Creates a bare vm_object along with its vma link, since
225
 * the shadow will be immediately used in a vma object list.
226
 */
227
struct vm_obj_link *vma_create_shadow(void)
228
{
229
        struct vm_object *vmo;
230
        struct vm_obj_link *vmo_link;
231
 
232
        if (IS_ERR(vmo_link = vm_objlink_create()))
233
                return 0;
234
 
235
        if (!(vmo = vm_object_create())) {
236
                kfree(vmo_link);
237
                return 0;
238
        }
239
        vmo->flags = VM_OBJ_SHADOW;
240
 
241
        vm_link_object(vmo_link, vmo);
242
 
243
        return vmo_link;
244
}
245
 
246
/* Allocates a new page, copies the original onto it and returns. */
247
struct page *copy_to_new_page(struct page *orig)
248
{
249
        void *paddr = alloc_page(1);
250
 
251
        BUG_ON(!paddr);
252
 
253
        /* Copy the page into new page */
254
        memcpy(phys_to_virt(paddr), page_to_virt(orig), PAGE_SIZE);
255
 
256
        return phys_to_page(paddr);
257
}
258
 
259
/* Copy all mapped object link stack from vma to new vma */
260
int vma_copy_links(struct vm_area *new_vma, struct vm_area *vma)
261
{
262
        struct vm_obj_link *vmo_link, *new_link;
263
 
264
        /* Get the first object on the vma */
265
        BUG_ON(list_empty(&vma->vm_obj_list));
266
        vmo_link = link_to_struct(vma->vm_obj_list.next,
267
                              struct vm_obj_link, list);
268
        do {
269
                /* Create a new link */
270
                new_link = vm_objlink_create();
271
 
272
                /* Link object with new link */
273
                vm_link_object(new_link, vmo_link->obj);
274
 
275
                /* Add the new link to vma in object order */
276
                list_insert_tail(&new_link->list, &new_vma->vm_obj_list);
277
 
278
        /* Continue traversing links, doing the same copying */
279
        } while((vmo_link = vma_next_link(&vmo_link->list,
280
                                          &vma->vm_obj_list)));
281
 
282
        return 0;
283
}
284
 
285
/*
286
 * Determine if an object is deletable.
287
 *
288
 * Shadows are deleted if nlinks = 0, and
289
 * merged if they have nlinks = 1, shadows = 1.
290
 * See below for explanation.
291
 *
292
 * vfs-type vmfiles are deleted if their
293
 * openers = 0, and their nlinks
294
 * (i.e. mappers) = 0.
295
 *
296
 * shm-type vmfiles are deleted if their
297
 * nlinks = 0, since they only have map count.
298
 */
299
int vm_object_is_deletable(struct vm_object *obj)
300
{
301
        struct vm_file *f;
302
 
303
        //printf("%s: Checking: ", __FUNCTION__);
304
        //vm_object_print(obj);
305
 
306
        if (obj->nlinks != 0)
307
                return 0;
308
 
309
        BUG_ON(obj->shadows != 0);
310
        BUG_ON(!list_empty(&obj->shref));
311
 
312
        if (obj->flags & VM_OBJ_SHADOW)
313
                return 1;
314
 
315
        f = vm_object_to_file(obj);
316
 
317
        /* Devzero should probably never have 0 refs left */
318
        if (f->type == VM_FILE_DEVZERO)
319
                return 0;
320
        else if (f->type == VM_FILE_SHM)
321
                return 1;
322
        else if (f->type == VM_FILE_VFS) {
323
                if (f->openers == 0)
324
                        return 1;
325
                else
326
                        return 0;
327
        }
328
 
329
        /* To make gcc happy */
330
        BUG();
331
        return 0;
332
}
333
 
334
/*
335
 * exit has: !prev, next || !next
336
 * shadow drop has: prev, next
337
 */
338
 
339
/*
340
 * Shadow drops: Dropping a link to shadow does not mean the shadow's
341
 * next object has lost a shadow. There may be other links to both. But
342
 * when the shadow has dropped its last link, and is going to be deleted,
343
 * it is then true that the shadow is lost by the next object.
344
 */
345
int vma_drop_merge_delete(struct vm_area *vma, struct vm_obj_link *link)
346
{
347
        struct vm_obj_link *prev, *next;
348
        struct vm_object *obj;
349
 
350
        /* Get previous and next links, if they exist */
351
        prev = (link->list.prev == &vma->vm_obj_list) ? 0 :
352
                link_to_struct(link->list.prev, struct vm_obj_link, list);
353
 
354
        next = (link->list.next == &vma->vm_obj_list) ? 0 :
355
                link_to_struct(link->list.next, struct vm_obj_link, list);
356
 
357
        /* Drop the link */
358
        obj = vma_drop_link(link);
359
 
360
        /* If there is an object in front, this is a shadow drop */
361
        if (prev) {
362
                BUG_ON(!(prev->obj->flags & VM_OBJ_SHADOW));
363
                BUG_ON(!(prev->obj->flags & VM_WRITE));
364
                BUG_ON(--obj->shadows < 0);
365
                // vm_object_print(obj);
366
 
367
                /* Remove prev from current object's shadow list */
368
                BUG_ON(list_empty(&prev->obj->shref));
369
                list_remove_init(&prev->obj->shref);
370
 
371
                /*
372
                 * We don't allow dropping non-shadow objects yet,
373
                 * (see ...is_droppable) so there must be a next.
374
                 */
375
                BUG_ON(!next);
376
 
377
                /* prev is now shadow of next */
378
                list_insert(&prev->obj->shref,
379
                         &next->obj->shdw_list);
380
                prev->obj->orig_obj = next->obj;
381
 
382
                /*
383
                 * No referrers left, meaning this object is not
384
                 * shadowing its original object anymore.
385
                 */
386
                if (obj->nlinks == 0) {
387
                        BUG_ON(obj->orig_obj != next->obj);
388
                        list_remove_init(&obj->shref);
389
                } else {
390
                        /*
391
                         * Dropped object still has referrers, which
392
                         * means next has gained a new shadow.
393
                         * Here's why:
394
                         *
395
                         * T1 and T2:           T2: drop-
396
                         * prev->drop->next              \
397
                         *              became: T1: prev--- next
398
                         *
399
                         * Now we have both prev and current object
400
                         * in next's shadow list.
401
                         */
402
                        next->obj->shadows++;
403
                }
404
        /* It's an exit, we check if there's a shadow loss */
405
        } else {
406
                if (obj->nlinks == 0) {
407
                        /* Is it a shadow delete? Sort out next */
408
                        if (next && obj->flags & VM_OBJ_SHADOW) {
409
                                BUG_ON(obj->orig_obj != next->obj);
410
                                BUG_ON(--next->obj->shadows < 0);
411
                                // vm_object_print(next->obj);
412
                                list_remove_init(&obj->shref);
413
                        }
414
                }
415
        }
416
 
417
        /* Now deal with the object itself */
418
        if (vm_object_is_deletable(obj)) {
419
                dprintf("Deleting object:\n");
420
                // vm_object_print(obj);
421
                vm_object_delete(obj);
422
        } else if ((obj->flags & VM_OBJ_SHADOW) &&
423
                   obj->nlinks == 1 && obj->shadows == 1) {
424
                dprintf("Merging object:\n");
425
                // vm_object_print(obj);
426
                vma_merge_object(obj);
427
        }
428
 
429
        mm0_test_global_vm_integrity();
430
        return 0;
431
}
432
 
433
/*
434
 * A scenario that pretty much covers every exit() case.
435
 *
436
 * T = vma on a unique task
437
 * l = link
438
 * Sobj = Shadow object
439
 * Fobj = File object
440
 *
441
 * Every l links to the object on the nearest
442
 * row to it and on the same column.
443
 *
444
 *      l       l       l       l       l       l               T
445
 *      Sobj    Sobj
446
 *
447
 *                      Sobj    Sobj    Sobj    Fobj
448
 *
449
 * Sobj Sobj    Sobj
450
 * l    l       l       l       l       l       l               T
451
 *
452
 * l    l       l       l       l       l       l               T
453
 * Sobj
454
 *
455
 */
456
 
457
/* This version is used when exiting. */
458
int vma_drop_merge_delete_all(struct vm_area *vma)
459
{
460
        struct vm_obj_link *vmo_link, *n;
461
 
462
        /* Vma cannot be empty */
463
        BUG_ON(list_empty(&vma->vm_obj_list));
464
 
465
        /* Traverse and get rid of all links */
466
        list_foreach_removable_struct(vmo_link, n, &vma->vm_obj_list, list)
467
                vma_drop_merge_delete(vma, vmo_link);
468
 
469
        return 0;
470
}
471
 
472
/* TODO:
473
 * - Why not allocate a swap descriptor in vma_create_shadow() rather than
474
 *   a bare vm_object? It will be needed.
475
 * - Check refcounting of shadows, their references, page refs,
476
 *   reduces increases etc.
477
 *
478
 *   This handles copy-on-write semantics in various situations. Returns
479
 *   page struct for copy page availabe for mapping.
480
 *
481
 *   1) Copy-on-write of read-only files. (Creates r/w shadows/adds pages)
482
 *   2) Copy-on-write of forked RO shadows (Creates r/w shadows/adds pages)
483
 *   3) Copy-on-write of shm files. (Adds pages to r/w shm file from devzero).
484
 */
485
struct page *copy_on_write(struct fault_data *fault)
486
{
487
        struct vm_obj_link *vmo_link, *shadow_link;
488
        struct vm_object *shadow;
489
        struct page *page, *new_page;
490
        struct vm_area *vma = fault->vma;
491
        unsigned long file_offset = fault_to_file_offset(fault);
492
 
493
        /* Get the first object, either original file or a shadow */
494
        if (!(vmo_link = vma_next_link(&vma->vm_obj_list, &vma->vm_obj_list))) {
495
                printf("%s:%s: No vm object in vma!\n",
496
                       __TASKNAME__, __FUNCTION__);
497
                BUG();
498
        }
499
 
500
        /* Is the object read-only? Create a shadow object if so.
501
         *
502
         * NOTE: Whenever the topmost object is read-only, a new shadow
503
         * object must be created. When there are no shadows one is created
504
         * because, its the original vm_object that is not writeable, and
505
         * when there are shadows one is created because a fork had just
506
         * happened, in which case all shadows are rendered read-only.
507
         */
508
        if (!(vmo_link->obj->flags & VM_WRITE)) {
509
                if (!(shadow_link = vma_create_shadow()))
510
                        return PTR_ERR(-ENOMEM);
511
 
512
                /* Initialise the shadow */
513
                shadow = shadow_link->obj;
514
                shadow->orig_obj = vmo_link->obj;
515
                shadow->flags = VM_OBJ_SHADOW | VM_WRITE;
516
                shadow->pager = &swap_pager;
517
                vmo_link->obj->shadows++;
518
                // vm_object_print(vmo_link->obj);
519
                dprintf("%s: Created a shadow:\n", __TASKNAME__);
520
                // vm_object_print(shadow);
521
                dprintf("%s: Original object:\n", __TASKNAME__);
522
                // vm_object_print(shadow->orig_obj);
523
 
524
                /*
525
                 * Add the shadow in front of the original:
526
                 *
527
                 * vma->link0->link1
528
                 *       |      |
529
                 *       v      v
530
                 *       shadow original
531
                 */
532
                list_insert(&shadow_link->list, &vma->vm_obj_list);
533
 
534
                /* Add object to original's shadower list */
535
                list_insert(&shadow->shref, &shadow->orig_obj->shdw_list);
536
 
537
                /* Add to global object list */
538
                global_add_vm_object(shadow);
539
 
540
        } else {
541
                /* We ought to copy the missing RW page to top shadow */
542
                dprintf("No new shadows. Going to add to "
543
                        "topmost r/w shadow object\n");
544
                shadow_link = vmo_link;
545
 
546
                /*
547
                 * FIXME: Here we check for the case that a cloned thread is
548
                 * doing a duplicate write request on an existing RW shadow
549
                 * page. If so, we return the existing writable page in the top
550
                 * shadow. We should find a generic way to detect duplicate
551
                 * requests and cease IPC at an earlier stage.
552
                 */
553
                page = shadow_link->obj->pager->ops.page_in(shadow_link->obj,
554
                                                            file_offset);
555
                if (!IS_ERR(page))
556
                        return page;
557
 
558
                /*
559
                 * We start page search on read-only objects. If the first
560
                 * one was writable, go to next which must be read-only.
561
                 */
562
                BUG_ON(!(vmo_link = vma_next_link(&vmo_link->list,
563
                                                  &vma->vm_obj_list)));
564
                BUG_ON(vmo_link->obj->flags & VM_WRITE);
565
        }
566
 
567
        /* Traverse the list of read-only vm objects and search for the page */
568
        while (IS_ERR(page = vmo_link->obj->pager->ops.page_in(vmo_link->obj,
569
                                                               file_offset))) {
570
                if (!(vmo_link = vma_next_link(&vmo_link->list,
571
                                               &vma->vm_obj_list))) {
572
                        printf("%s:%s: Traversed all shadows and the original "
573
                               "file's vm_object, but could not find the "
574
                               "faulty page in this vma.\n",__TASKNAME__,
575
                               __FUNCTION__);
576
                        BUG();
577
                }
578
        }
579
 
580
        /*
581
         * Copy the page. This traverse and copy is like a page-in operation
582
         * of a pager, except that the page is moving along vm_objects.
583
         */
584
        new_page = copy_to_new_page(page);
585
 
586
        /* Update page details */
587
        spin_lock(&new_page->lock);
588
        BUG_ON(!list_empty(&new_page->list));
589
        new_page->refcnt = 0;
590
        new_page->owner = shadow_link->obj;
591
        new_page->offset = file_offset;
592
        new_page->virtual = 0;
593
        spin_unlock(&page->lock);
594
 
595
        /* Add the page to owner's list of in-memory pages */
596
        insert_page_olist(new_page, new_page->owner);
597
        new_page->owner->npages++;
598
 
599
        mm0_test_global_vm_integrity();
600
 
601
        /* Shared faults don't have shadows so we don't look for collapses */
602
        if (!(vma->flags & VMA_SHARED)) {
603
 
604
                /*
605
                 * Finished handling the actual fault, now check for possible
606
                 * shadow collapses. Does the shadow completely shadow the one
607
                 * underlying it?
608
                 */
609
                if (!(vmo_link = vma_next_link(&shadow_link->list,
610
                                               &vma->vm_obj_list))) {
611
                        /* Copier must have an object under it */
612
                        printf("Copier must have had an object under it!\n");
613
                        BUG();
614
                }
615
                if (vm_object_is_droppable(shadow_link->obj, vmo_link->obj))
616
                        vma_drop_merge_delete(vma, vmo_link);
617
        }
618
 
619
        return new_page;
620
}
621
 
622
/*
623
 * Handles the page fault, all entries here are assumed *legal*
624
 * faults, i.e. do_page_fault() should have already checked
625
 * for illegal accesses.
626
 *
627
 * NOTE:
628
 * Anon/Shared pages:
629
 * First access from first process is COW. All subsequent RW
630
 * accesses (which are attempts of *sharing*) simply map that
631
 * page to faulting processes.
632
 *
633
 * Non-anon/shared pages:
634
 * First access from first process simply writes to the pages
635
 * of that file. All subsequent accesses by other processes
636
 * do so as well.
637
 *
638
 * FIXME: Add VM_DIRTY bit for every page that has write-faulted.
639
 */
640
 
641
/* Handle read faults */
642
struct page *page_read_fault(struct fault_data *fault)
643
{
644
        struct vm_area *vma = fault->vma;
645
        struct vm_obj_link *vmo_link;
646
        unsigned long file_offset;
647
        struct page *page = 0;
648
 
649
        file_offset = fault_to_file_offset(fault);
650
 
651
        /* Get the first object, either original file or a shadow */
652
        if (!(vmo_link = vma_next_link(&vma->vm_obj_list, &vma->vm_obj_list))) {
653
                printf("%s:%s: No vm object in vma!\n",
654
                       __TASKNAME__, __FUNCTION__);
655
                BUG();
656
        }
657
 
658
        /* Traverse the list of read-only vm objects and search for the page */
659
        while (IS_ERR(page = vmo_link->obj->pager->ops.page_in(vmo_link->obj,
660
                                                               file_offset))) {
661
                if (!(vmo_link = vma_next_link(&vmo_link->list,
662
                                               &vma->vm_obj_list))) {
663
                        printf("%s:%s: Traversed all shadows and the original "
664
                               "file's vm_object, but could not find the "
665
                               "faulty page in this vma.\n",__TASKNAME__,
666
                               __FUNCTION__);
667
                        BUG();
668
                }
669
        }
670
        BUG_ON(!page);
671
 
672
        return page;
673
}
674
 
675
struct page *page_write_fault(struct fault_data *fault)
676
{
677
        unsigned int vma_flags = fault->vma->flags;
678
        struct vm_area *vma = fault->vma;
679
        struct vm_obj_link *vmo_link;
680
        unsigned long file_offset;
681
        struct page *page = 0;
682
 
683
        /* Copy-on-write. All private vmas are always COW */
684
        if (vma_flags & VMA_PRIVATE) {
685
                BUG_ON(IS_ERR(page = copy_on_write(fault)));
686
 
687
        /*
688
         * This handles shared pages that are both anon and non-anon.
689
         */
690
        } else if ((vma_flags & VMA_SHARED)) {
691
                file_offset = fault_to_file_offset(fault);
692
 
693
                /* Don't traverse, just take the first object */
694
                BUG_ON(!(vmo_link = vma_next_link(&vma->vm_obj_list,
695
                                                  &vma->vm_obj_list)));
696
 
697
                /* Get the page from its pager */
698
                if (IS_ERR(page = vmo_link->obj->pager->ops.page_in(vmo_link->obj,
699
                                                                    file_offset))) {
700
                        /*
701
                         * Writable page does not exist,
702
                         * if it is anonymous, it needs to be COW'ed,
703
                         * otherwise the file must have paged-in this
704
                         * page, so its a bug.
705
                         */
706
                        if (vma_flags & VMA_ANONYMOUS) {
707
                                BUG_ON(IS_ERR(page = copy_on_write(fault)));
708
                                return page;
709
                        } else {
710
                                printf("%s: Could not obtain faulty "
711
                                       "page from regular file.\n",
712
                                       __TASKNAME__);
713
                                BUG();
714
                        }
715
                }
716
 
717
                /*
718
                 * Page and object are now dirty. Currently it's
719
                 * only relevant for file-backed shared objects.
720
                 */
721
                page->flags |= VM_DIRTY;
722
                page->owner->flags |= VM_DIRTY;
723
        } else
724
                BUG();
725
 
726
        return page;
727
}
728
 
729
struct page *__do_page_fault(struct fault_data *fault)
730
{
731
        unsigned int reason = fault->reason;
732
        unsigned int pte_flags = fault->pte_flags;
733
        unsigned int map_flags = 0;
734
        struct page *page = 0;
735
 
736
        if ((reason & VM_READ) && (pte_flags & VM_NONE)) {
737
                page = page_read_fault(fault);
738
                map_flags = MAP_USR_RO;
739
 
740
        } else if ((reason & VM_WRITE) && (pte_flags & VM_NONE)) {
741
                page = page_read_fault(fault);
742
                page = page_write_fault(fault);
743
                map_flags = MAP_USR_RW;
744
 
745
        } else if ((reason & VM_EXEC) && (pte_flags & VM_NONE)) {
746
                page = page_read_fault(fault);
747
                map_flags = MAP_USR_RX;
748
 
749
        } else if ((reason & VM_EXEC) && (pte_flags & VM_READ)) {
750
                /* Retrieve already paged in file */
751
                page = page_read_fault(fault);
752
                if (pte_flags & VM_WRITE)
753
                        map_flags = MAP_USR_RWX;
754
                else
755
                        map_flags = MAP_USR_RX;
756
 
757
        } else if ((reason & VM_WRITE) && (pte_flags & VM_READ)) {
758
                page = page_write_fault(fault);
759
                if (pte_flags & VM_EXEC)
760
                        map_flags = MAP_USR_RWX;
761
                else
762
                        map_flags = MAP_USR_RW;
763
 
764
        } else {
765
                printf("mm0: Unhandled page fault.\n");
766
                BUG();
767
        }
768
 
769
        BUG_ON(!page);
770
 
771
        /* Map the new page to faulty task */
772
        l4_map((void *)page_to_phys(page),
773
               (void *)page_align(fault->address), 1,
774
               map_flags, fault->task->tid);
775
        // vm_object_print(page->owner);
776
 
777
        return page;
778
}
779
 
780
/*
781
 * Sets all r/w shadow objects as read-only for the process
782
 * so that as expected after a fork() operation, writes to those
783
 * objects cause copy-on-write events.
784
 */
785
int vm_freeze_shadows(struct tcb *task)
786
{
787
        unsigned long virtual;
788
        struct vm_area *vma;
789
        struct vm_obj_link *vmo_link;
790
        struct vm_object *vmo;
791
        struct page *p;
792
 
793
        list_foreach_struct(vma, &task->vm_area_head->list, list) {
794
 
795
                /* Shared vmas don't have shadows */
796
                if (vma->flags & VMA_SHARED)
797
                        continue;
798
 
799
                /* Get the first object */
800
                BUG_ON(list_empty(&vma->vm_obj_list));
801
                vmo_link = link_to_struct(vma->vm_obj_list.next,
802
                                      struct vm_obj_link, list);
803
                vmo = vmo_link->obj;
804
 
805
                /*
806
                 * Is this a writeable shadow?
807
                 *
808
                 * The only R/W shadow in a vma object chain
809
                 * can be the first one, so we don't check further
810
                 * objects if first one is not what we want.
811
                 */
812
                if (!((vmo->flags & VM_OBJ_SHADOW) &&
813
                      (vmo->flags & VM_WRITE)))
814
                        continue;
815
 
816
                /* Make the object read only */
817
                vmo->flags &= ~VM_WRITE;
818
                vmo->flags |= VM_READ;
819
 
820
                /*
821
                 * Make all pages on it read-only
822
                 * in the page tables.
823
                 */
824
                list_foreach_struct(p, &vmo->page_cache, list) {
825
 
826
                        /* Find virtual address of each page */
827
                        virtual = vma_page_to_virtual(vma, p);
828
 
829
                        /* Map the page as read-only */
830
                        l4_map((void *)page_to_phys(p),
831
                               (void *)virtual, 1,
832
                               MAP_USR_RO, task->tid);
833
                }
834
        }
835
 
836
        return 0;
837
}
838
 
839
/*
840
 * Page fault model:
841
 *
842
 * A page is anonymous (e.g. stack)
843
 *  - page needs read access:
844
 *      action: map the zero page.
845
 *  - page needs write access:
846
 *      action: allocate ZI page and map that. Swap file owns the page.
847
 *  - page is swapped to swap:
848
 *      action: read back from swap file into new page.
849
 *
850
 * A page is file-backed but private (e.g. .data section)
851
 *  - page needs read access:
852
 *      action: read the page from its file.
853
 *  - page is swapped out before being private. (i.e. invalidated)
854
 *      action: read the page from its file. (original file)
855
 *  - page is swapped out after being private.
856
 *      action: read the page from its file. (swap file)
857
 *  - page needs write access:
858
 *      action: allocate new page, declare page as private, change its
859
 *              owner to swap file.
860
 *
861
 * A page is file backed but not-private, and read-only. (e.g. .text section)
862
 *  - page needs read access:
863
 *     action: read in the page from its file.
864
 *  - page is swapped out. (i.e. invalidated)
865
 *     action: read in the page from its file.
866
 *  - page needs write access:
867
 *     action: forbidden, kill task?
868
 *
869
 * A page is file backed but not-private, and read/write. (e.g. any data file.)
870
 *  - page needs read access:
871
 *     action: read in the page from its file.
872
 *  - page is flushed back to its original file. (i.e. instead of swap)
873
 *     action: read in the page from its file.
874
 *  - page needs write access:
875
 *     action: read the page in, give write access.
876
 */
877
struct page *do_page_fault(struct fault_data *fault)
878
{
879
        unsigned int vma_flags = (fault->vma) ? fault->vma->flags : VM_NONE;
880
        unsigned int reason = fault->reason;
881
 
882
        /* vma flags show no access */
883
        if (vma_flags & VM_NONE) {
884
                printf("Illegal access, tid: %d, address: 0x%x, PC @ 0x%x,\n",
885
                       fault->task->tid, fault->address, fault->kdata->faulty_pc);
886
                fault_handle_error(fault);
887
        }
888
 
889
        /* The access reason is not included in the vma's listed flags */
890
        if (!(reason & vma_flags)) {
891
                printf("Illegal access, tid: %d, address: 0x%x, PC @ 0x%x\n",
892
                       fault->task->tid, fault->address, fault->kdata->faulty_pc);
893
                fault_handle_error(fault);
894
        }
895
 
896
        /* Handle legitimate faults */
897
        return __do_page_fault(fault);
898
}
899
 
900
struct page *page_fault_handler(struct tcb *sender, fault_kdata_t *fkdata)
901
{
902
        struct fault_data fault = {
903
                /* Fault data from kernel */
904
                .kdata = fkdata,
905
                .task = sender,
906
        };
907
 
908
        /* Extract fault reason, fault address etc. in generic format */
909
        set_generic_fault_params(&fault);
910
 
911
        /* Get vma info */
912
        if (!(fault.vma = find_vma(fault.address,
913
                                   &fault.task->vm_area_head->list)))
914
                printf("Hmm. No vma for faulty region. "
915
                       "Bad things will happen.\n");
916
 
917
        /* Handle the actual fault */
918
        return do_page_fault(&fault);
919
}
920
 
921
static inline unsigned int pte_to_map_flags(unsigned int pte_flags)
922
{
923
        unsigned int map_flags;
924
 
925
        switch(pte_flags) {
926
        case VM_READ:
927
                map_flags = MAP_USR_RO;
928
                break;
929
        case (VM_READ | VM_WRITE):
930
                map_flags = MAP_USR_RW;
931
                break;
932
        case (VM_READ | VM_WRITE | VM_EXEC):
933
                map_flags = MAP_USR_RWX;
934
                break;
935
        case (VM_READ | VM_EXEC):
936
                map_flags = MAP_USR_RX;
937
                break;
938
        default:
939
                BUG();
940
        }
941
 
942
        return map_flags;
943
}
944
 
945
/*
946
 * Prefaults a page of a task. The catch is that the page may already
947
 * have been faulted with even more progress than the desired
948
 * flags would progress in the fault (e.g. read-faulting a
949
 * copy-on-write'd page).
950
 *
951
 * This function detects whether progress is necessary or not by
952
 * inspecting the vma's vm_object chain state.
953
 *
954
 * Generally both read-fault and write-fault paths are repeatable, in
955
 * the sense that an already faulted page may be safely re-faulted again
956
 * and again, be it a read-only or copy-on-write'd page.
957
 *
958
 * The retrieval of the same page in a repetitive fashion is safe,
959
 * but while it also seems to appear safe, it is unnecessary to downgrade
960
 * or change mapping permissions of a page. E.g. make a copy-on-write'd
961
 * page read-only by doing a blind read-fault on it.
962
 *
963
 * Hence this function checks whether a fault is necessary and simply
964
 * returns if it isn't.
965
 *
966
 * FIXME: Escalate any page fault errors like a civilized function!
967
 */
968
struct page *task_prefault_smart(struct tcb *task, unsigned long address,
969
                                 unsigned int wanted_flags)
970
{
971
        struct vm_obj_link *vmo_link;
972
        unsigned long file_offset;
973
        unsigned int vma_flags, pte_flags;
974
        struct vm_area *vma;
975
        struct page *page;
976
        int err;
977
 
978
        struct fault_data fault = {
979
                .task = task,
980
                .address = address,
981
        };
982
 
983
        /* Find the vma */
984
        if (!(fault.vma = find_vma(fault.address,
985
                                   &fault.task->vm_area_head->list))) {
986
                dprintf("%s: Invalid: No vma for given address. %d\n",
987
                        __FUNCTION__, -EINVAL);
988
                return PTR_ERR(-EINVAL);
989
        }
990
 
991
        /* Read fault, repetitive safe */
992
        if (wanted_flags & VM_READ)
993
                if (IS_ERR(page = page_read_fault(&fault)))
994
                        return page;
995
 
996
        /* Write fault, repetitive safe */
997
        if (wanted_flags & VM_WRITE)
998
                if (IS_ERR(page = page_write_fault(&fault)))
999
                        return page;
1000
 
1001
        /*
1002
         * If we came this far, it means we have more
1003
         * permissions than VM_NONE.
1004
         *
1005
         * Now we _must_ find out what those page
1006
         * protection flags were, and do this without
1007
         * needing to inspect any ptes.
1008
         *
1009
         * We don't want to downgrade a RW page to RO again.
1010
         */
1011
        file_offset = fault_to_file_offset(&fault);
1012
        vma_flags = fault.vma->flags;
1013
        vma = fault.vma;
1014
 
1015
        /* Get the topmost vm_object */
1016
        if (!(vmo_link = vma_next_link(&vma->vm_obj_list,
1017
                                       &vma->vm_obj_list))) {
1018
                printf("%s:%s: No vm object in vma!\n",
1019
                       __TASKNAME__, __FUNCTION__);
1020
                BUG();
1021
        }
1022
 
1023
        /* Traverse the list of vm objects and search for the page */
1024
        while (IS_ERR(page = vmo_link->obj->pager->ops.page_in(vmo_link->obj,
1025
                                                               file_offset))) {
1026
                if (!(vmo_link = vma_next_link(&vmo_link->list,
1027
                                               &vma->vm_obj_list))) {
1028
                        printf("%s:%s: Traversed all shadows and the original "
1029
                               "file's vm_object, but could not find the "
1030
                               "faulty page in this vma.\n",__TASKNAME__,
1031
                               __FUNCTION__);
1032
                        BUG();
1033
                }
1034
        }
1035
 
1036
        /* Use flags for the vm_object containing the page */
1037
        if (vmo_link->obj->flags & VM_WRITE)
1038
                pte_flags = VM_WRITE | VM_READ;
1039
        else
1040
                pte_flags = VM_READ;
1041
 
1042
        /*
1043
         * Now check vma flags for adding the VM_EXEC
1044
         * The real pte may not have this flag yet, but
1045
         * it is allowed to have it and it doesn't harm.
1046
         */
1047
        if (vma_flags & VM_EXEC)
1048
                pte_flags |= VM_EXEC;
1049
 
1050
        /* Map the page to task using these flags */
1051
        if ((err = l4_map((void *)page_to_phys(page),
1052
                          (void *)page_align(fault.address), 1,
1053
                          pte_to_map_flags(pte_flags),
1054
                          fault.task->tid)) < 0) {
1055
                printf("l4_map() failed. err=%d\n", err);
1056
                BUG();
1057
        }
1058
 
1059
        return page;
1060
}
1061
 
1062
/*
1063
 * Prefaults the page with given virtual address, to given task
1064
 * with given reasons. Multiple reasons are allowed, they are
1065
 * handled separately in order.
1066
 */
1067
struct page *task_prefault_page(struct tcb *task, unsigned long address,
1068
                                unsigned int vmflags)
1069
{
1070
        struct page *ret;
1071
 
1072
        perfmon_reset_start_cyccnt();
1073
        ret = task_prefault_smart(task, address, vmflags);
1074
 
1075
        debug_record_cycles("task_prefault_smart");
1076
 
1077
        return ret;
1078
 
1079
#if 0
1080
        struct page *p;
1081
        struct fault_data fault = {
1082
                .task = task,
1083
                .address = address,
1084
        };
1085
 
1086
        dprintf("Pre-faulting address 0x%lx, on task %d, with flags: 0x%x\n",
1087
                address, task->tid, vmflags);
1088
 
1089
        /* Find the vma */
1090
        if (!(fault.vma = find_vma(fault.address,
1091
                                   &fault.task->vm_area_head->list))) {
1092
                dprintf("%s: Invalid: No vma for given address. %d\n",
1093
                        __FUNCTION__, -EINVAL);
1094
                return PTR_ERR(-EINVAL);
1095
        }
1096
 
1097
        /* Flags may indicate multiple fault reasons. First do the read */
1098
        if (vmflags & VM_READ) {
1099
                fault.pte_flags = VM_NONE;
1100
                fault.reason = VM_READ;
1101
                if (IS_ERR(p = do_page_fault(&fault)))
1102
                        return p;
1103
        }
1104
        /* Now write */
1105
        if (vmflags & VM_WRITE) {
1106
                fault.pte_flags = VM_READ;
1107
                fault.reason = VM_WRITE;
1108
                if (IS_ERR(p = do_page_fault(&fault)))
1109
                        return p;
1110
        }
1111
        /* No exec or any other fault reason allowed. */
1112
        BUG_ON(vmflags & ~(VM_READ | VM_WRITE));
1113
 
1114
        return p;
1115
#endif
1116
}
1117
 
1118
 
1119
int vm_compare_prot_flags(unsigned int current, unsigned int needed)
1120
{
1121
        current &= VM_PROT_MASK;
1122
        needed &= VM_PROT_MASK;
1123
 
1124
        if (needed & VM_READ)
1125
                if (current & (VM_READ | VM_WRITE))
1126
                        return 1;
1127
 
1128
        if (needed & VM_WRITE &&
1129
            (current & VM_WRITE))
1130
                return 1;
1131
 
1132
        return 0;
1133
}
1134
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.