URL https://opencores.org/ocsvn/c0or1k/c0or1k/trunk
Subversion Repositories c0or1k

[/] [c0or1k/] [trunk/] [conts/] [posix/] [mm0/] [mm/] [fault.c] - Blame information for rev 2

Details | Compare with Previous | View Log

/*
 * Page fault handling.
 *
 * Copyright (C) 2007, 2008-2010 Bahadir Bilgehan Balban
 */
#include <vm_area.h>
#include <task.h>
#include <mm/alloc_page.h>
#include <malloc/malloc.h>
#include <l4/generic/space.h>
#include <l4/api/errno.h>
#include <string.h>
#include <memory.h>
#include <shm.h>
#include <file.h>
#include <test.h>
 
#include L4LIB_INC_ARCH(syscalls.h)
#include L4LIB_INC_ARCH(syslib.h)
#include INC_GLUE(memory.h)
#include INC_SUBARCH(mm.h)
#include __INC_ARCH(mm.h)
#include __INC_ARCH(debug.h)
 
/* Given a page and the vma it is in, returns that page's virtual address */
unsigned long vma_page_to_virtual(struct vm_area *vma, struct page *page)
{
        unsigned long virtual_pfn = vma->pfn_start + page->offset - vma->file_offset;
 
        /* Page must be contained in vma's pages  */
        BUG_ON(vma->file_offset > page->offset);
 
        return __pfn_to_addr(virtual_pfn);
}
 
unsigned long fault_to_file_offset(struct fault_data *fault)
{
        /* Fault's offset in its vma */
        unsigned long vma_off_pfn = __pfn(fault->address) - fault->vma->pfn_start;
 
        /* Fault's offset in the file */
        unsigned long f_off_pfn = fault->vma->file_offset + vma_off_pfn;
 
        return f_off_pfn;
}
 
/*
 * Given a reference to a vm_object link, returns the next link but
 * avoids wrapping around back to head. If next is head, returns 0.
 *
 * vma->link1->link2->link3
 *       |      |      |
 *       V      V      V
 *       vmo1   vmo2   vmo3|vm_file
 *
 * Example:
 * Given a reference to link = vma, head = vma, returns link1.
 * Given a reference to link = link3, head = vma, returns 0.
 */
struct vm_obj_link *vma_next_link(struct link *link,
                                  struct link *head)
{
        BUG_ON(list_empty(link));
        if (link->next == head)
                return 0;
        else
                return link_to_struct(link->next, struct vm_obj_link, list);
}
 
/* Unlinks orig_link from its vma and deletes it but keeps the object. */
struct vm_object *vma_drop_link(struct vm_obj_link *link)
{
        struct vm_object *dropped;
 
        /* Remove object link from vma's list */
        list_remove(&link->list);
 
        /* Unlink the link from object */
        dropped = vm_unlink_object(link);
 
        /* Delete the original link */
        kfree(link);
 
        return dropped;
}
 
/*
 * Checks if page cache pages of lesser is a subset of those of copier.
 *
 * FIXME:
 * Note this just checks the page cache, so if any objects have pages
 * swapped to disk, this function won't work, which is a logic error.
 * This should really count the swapped ones as well.
 */
int vm_object_is_subset(struct vm_object *shadow,
                        struct vm_object *original)
{
        struct page *pc, *pl;
 
        /* Copier must have equal or more pages to overlap lesser */
        if (shadow->npages < original->npages)
                return 0;
 
        /*
         * Do a page by page comparison. Every lesser page
         * must be in copier for overlap.
         */
        list_foreach_struct(pl, &original->page_cache, list)
                if (!(pc = find_page(shadow, pl->offset)))
                        return 0;
        /*
         * For all pages of lesser vmo, there seems to be a page
         * in the copier vmo. So lesser is a subset of copier
         */
        return 1;
}
 
static inline int vm_object_is_droppable(struct vm_object *shadow,
                                         struct vm_object *original)
{
        if (shadow->npages == original->npages &&
            (original->flags & VM_OBJ_SHADOW))
                return 1;
        else
                return 0;
}
 
 
 
/*
 * vma_merge_object()
 *
 * FIXME: Currently this is an optimisation that needs to go
 * away when swapping is available. We have this solely because
 * currently a shadow needs to identically mirror the whole
 * object underneath, in order to drop it. A file that is 1MB
 * long would spend 2MB until dropped. When swapping is available,
 * we will go back to identical mirroring instead of merging the
 * last shadow, since most unused pages would be swapped out.
 */
 
/*
 * When one shadow object is redundant, merges it into the shadow in front of it.
 * Note it must be determined that it is redundant before calling this function.
 *
 * vma --> link1 --> link2 --> link3
 *         |         |         |
 *         v         v         v
 *         Front     Redundant Next
 *         Shadow    Shadow    Object (E.g. shadow or file)
 */
int vma_merge_object(struct vm_object *redundant)
{
        /* The redundant shadow object */
        struct vm_object *front; /* Shadow in front of redundant */
        struct vm_obj_link *last_link;
        struct page *p1, *p2, *n;
 
        /* Check link and shadow count is really 1 */
        BUG_ON(redundant->nlinks != 1);
        BUG_ON(redundant->shadows != 1);
 
        /* Get the last shadower object in front */
        front = link_to_struct(redundant->shdw_list.next,
                           struct vm_object, shref);
 
        /* Move all non-intersecting pages to front shadow. */
        list_foreach_removable_struct(p1, n, &redundant->page_cache, list) {
                /* Page doesn't exist in front, move it there */
                if (!(p2 = find_page(front, p1->offset))) {
                        list_remove_init(&p1->list);
                        spin_lock(&p1->lock);
                        p1->owner = front;
                        spin_unlock(&p1->lock);
                        insert_page_olist(p1, front);
                        front->npages++;
                }
        }
 
        /* Sort out shadow relationships after the merge: */
 
        /* Front won't be a shadow of the redundant shadow anymore */
        list_remove_init(&front->shref);
 
        /* Check that there really was one shadower of redundant left */
        BUG_ON(!list_empty(&redundant->shdw_list));
 
        /* Redundant won't be a shadow of its next object */
        list_remove_init(&redundant->shref);
 
        /* Front is now a shadow of redundant's next object */
        list_insert(&front->shref, &redundant->orig_obj->shdw_list);
        front->orig_obj = redundant->orig_obj;
 
        /* Find last link for the object */
        last_link = link_to_struct(redundant->link_list.next,
                               struct vm_obj_link, linkref);
 
        /* Drop the last link to the object */
        vma_drop_link(last_link);
 
        /* Redundant shadow has no shadows anymore */
        BUG_ON(--redundant->shadows < 0);
 
        /* Delete the redundant shadow along with all its pages. */
        vm_object_delete(redundant);
 
        return 0;
}
 
struct vm_obj_link *vm_objlink_create(void)
{
        struct vm_obj_link *vmo_link;
 
        if (!(vmo_link = kzalloc(sizeof(*vmo_link))))
                return PTR_ERR(-ENOMEM);
        link_init(&vmo_link->list);
        link_init(&vmo_link->linkref);
 
        return vmo_link;
}
 
/*
 * Creates a bare vm_object along with its vma link, since
 * the shadow will be immediately used in a vma object list.
 */
struct vm_obj_link *vma_create_shadow(void)
{
        struct vm_object *vmo;
        struct vm_obj_link *vmo_link;
 
        if (IS_ERR(vmo_link = vm_objlink_create()))
                return 0;
 
        if (!(vmo = vm_object_create())) {
                kfree(vmo_link);
                return 0;
        }
        vmo->flags = VM_OBJ_SHADOW;
 
        vm_link_object(vmo_link, vmo);
 
        return vmo_link;
}
 
/* Allocates a new page, copies the original onto it and returns. */
struct page *copy_to_new_page(struct page *orig)
{
        void *paddr = alloc_page(1);
 
        BUG_ON(!paddr);
 
        /* Copy the page into new page */
        memcpy(phys_to_virt(paddr), page_to_virt(orig), PAGE_SIZE);
 
        return phys_to_page(paddr);
}
 
/* Copy all mapped object link stack from vma to new vma */
int vma_copy_links(struct vm_area *new_vma, struct vm_area *vma)
{
        struct vm_obj_link *vmo_link, *new_link;
 
        /* Get the first object on the vma */
        BUG_ON(list_empty(&vma->vm_obj_list));
        vmo_link = link_to_struct(vma->vm_obj_list.next,
                              struct vm_obj_link, list);
        do {
                /* Create a new link */
                new_link = vm_objlink_create();
 
                /* Link object with new link */
                vm_link_object(new_link, vmo_link->obj);
 
                /* Add the new link to vma in object order */
                list_insert_tail(&new_link->list, &new_vma->vm_obj_list);
 
        /* Continue traversing links, doing the same copying */
        } while((vmo_link = vma_next_link(&vmo_link->list,
                                          &vma->vm_obj_list)));
 
        return 0;
}
 
/*
 * Determine if an object is deletable.
 *
 * Shadows are deleted if nlinks = 0, and
 * merged if they have nlinks = 1, shadows = 1.
 * See below for explanation.
 *
 * vfs-type vmfiles are deleted if their
 * openers = 0, and their nlinks
 * (i.e. mappers) = 0.
 *
 * shm-type vmfiles are deleted if their
 * nlinks = 0, since they only have map count.
 */
int vm_object_is_deletable(struct vm_object *obj)
{
        struct vm_file *f;
 
        //printf("%s: Checking: ", __FUNCTION__);
        //vm_object_print(obj);
 
        if (obj->nlinks != 0)
                return 0;
 
        BUG_ON(obj->shadows != 0);
        BUG_ON(!list_empty(&obj->shref));
 
        if (obj->flags & VM_OBJ_SHADOW)
                return 1;
 
        f = vm_object_to_file(obj);
 
        /* Devzero should probably never have 0 refs left */
        if (f->type == VM_FILE_DEVZERO)
                return 0;
        else if (f->type == VM_FILE_SHM)
                return 1;
        else if (f->type == VM_FILE_VFS) {
                if (f->openers == 0)
                        return 1;
                else
                        return 0;
        }
 
        /* To make gcc happy */
        BUG();
        return 0;
}
 
/*
 * exit has: !prev, next || !next
 * shadow drop has: prev, next
 */
 
/*
 * Shadow drops: Dropping a link to shadow does not mean the shadow's
 * next object has lost a shadow. There may be other links to both. But
 * when the shadow has dropped its last link, and is going to be deleted,
 * it is then true that the shadow is lost by the next object.
 */
int vma_drop_merge_delete(struct vm_area *vma, struct vm_obj_link *link)
{
        struct vm_obj_link *prev, *next;
        struct vm_object *obj;
 
        /* Get previous and next links, if they exist */
        prev = (link->list.prev == &vma->vm_obj_list) ? 0 :
                link_to_struct(link->list.prev, struct vm_obj_link, list);
 
        next = (link->list.next == &vma->vm_obj_list) ? 0 :
                link_to_struct(link->list.next, struct vm_obj_link, list);
 
        /* Drop the link */
        obj = vma_drop_link(link);
 
        /* If there is an object in front, this is a shadow drop */
        if (prev) {
                BUG_ON(!(prev->obj->flags & VM_OBJ_SHADOW));
                BUG_ON(!(prev->obj->flags & VM_WRITE));
                BUG_ON(--obj->shadows < 0);
                // vm_object_print(obj);
 
                /* Remove prev from current object's shadow list */
                BUG_ON(list_empty(&prev->obj->shref));
                list_remove_init(&prev->obj->shref);
 
                /*
                 * We don't allow dropping non-shadow objects yet,
                 * (see ...is_droppable) so there must be a next.
                 */
                BUG_ON(!next);
 
                /* prev is now shadow of next */
                list_insert(&prev->obj->shref,
                         &next->obj->shdw_list);
                prev->obj->orig_obj = next->obj;
 
                /*
                 * No referrers left, meaning this object is not
                 * shadowing its original object anymore.
                 */
                if (obj->nlinks == 0) {
                        BUG_ON(obj->orig_obj != next->obj);
                        list_remove_init(&obj->shref);
                } else {
                        /*
                         * Dropped object still has referrers, which
                         * means next has gained a new shadow.
                         * Here's why:
                         *
                         * T1 and T2:           T2: drop-
                         * prev->drop->next              \
                         *              became: T1: prev--- next
                         *
                         * Now we have both prev and current object
                         * in next's shadow list.
                         */
                        next->obj->shadows++;
                }
        /* It's an exit, we check if there's a shadow loss */
        } else {
                if (obj->nlinks == 0) {
                        /* Is it a shadow delete? Sort out next */
                        if (next && obj->flags & VM_OBJ_SHADOW) {
                                BUG_ON(obj->orig_obj != next->obj);
                                BUG_ON(--next->obj->shadows < 0);
                                // vm_object_print(next->obj);
                                list_remove_init(&obj->shref);
                        }
                }
        }
 
        /* Now deal with the object itself */
        if (vm_object_is_deletable(obj)) {
                dprintf("Deleting object:\n");
                // vm_object_print(obj);
                vm_object_delete(obj);
        } else if ((obj->flags & VM_OBJ_SHADOW) &&
                   obj->nlinks == 1 && obj->shadows == 1) {
                dprintf("Merging object:\n");
                // vm_object_print(obj);
                vma_merge_object(obj);
        }
 
        mm0_test_global_vm_integrity();
        return 0;
}
 
/*
 * A scenario that pretty much covers every exit() case.
 *
 * T = vma on a unique task
 * l = link
 * Sobj = Shadow object
 * Fobj = File object
 *
 * Every l links to the object on the nearest
 * row to it and on the same column.
 *
 *      l       l       l       l       l       l               T
 *      Sobj    Sobj
 *
 *                      Sobj    Sobj    Sobj    Fobj
 *
 * Sobj Sobj    Sobj
 * l    l       l       l       l       l       l               T
 *
 * l    l       l       l       l       l       l               T
 * Sobj
 *
 */
 
/* This version is used when exiting. */
int vma_drop_merge_delete_all(struct vm_area *vma)
{
        struct vm_obj_link *vmo_link, *n;
 
        /* Vma cannot be empty */
        BUG_ON(list_empty(&vma->vm_obj_list));
 
        /* Traverse and get rid of all links */
        list_foreach_removable_struct(vmo_link, n, &vma->vm_obj_list, list)
                vma_drop_merge_delete(vma, vmo_link);
 
        return 0;
}
 
/* TODO:
 * - Why not allocate a swap descriptor in vma_create_shadow() rather than
 *   a bare vm_object? It will be needed.
 * - Check refcounting of shadows, their references, page refs,
 *   reduces increases etc.
 *
 *   This handles copy-on-write semantics in various situations. Returns
 *   page struct for copy page availabe for mapping.
 *
 *   1) Copy-on-write of read-only files. (Creates r/w shadows/adds pages)
 *   2) Copy-on-write of forked RO shadows (Creates r/w shadows/adds pages)
 *   3) Copy-on-write of shm files. (Adds pages to r/w shm file from devzero).
 */
struct page *copy_on_write(struct fault_data *fault)
{
        struct vm_obj_link *vmo_link, *shadow_link;
        struct vm_object *shadow;
        struct page *page, *new_page;
        struct vm_area *vma = fault->vma;
        unsigned long file_offset = fault_to_file_offset(fault);
 
        /* Get the first object, either original file or a shadow */
        if (!(vmo_link = vma_next_link(&vma->vm_obj_list, &vma->vm_obj_list))) {
                printf("%s:%s: No vm object in vma!\n",
                       __TASKNAME__, __FUNCTION__);
                BUG();
        }
 
        /* Is the object read-only? Create a shadow object if so.
         *
         * NOTE: Whenever the topmost object is read-only, a new shadow
         * object must be created. When there are no shadows one is created
         * because, its the original vm_object that is not writeable, and
         * when there are shadows one is created because a fork had just
         * happened, in which case all shadows are rendered read-only.
         */
        if (!(vmo_link->obj->flags & VM_WRITE)) {
                if (!(shadow_link = vma_create_shadow()))
                        return PTR_ERR(-ENOMEM);
 
                /* Initialise the shadow */
                shadow = shadow_link->obj;
                shadow->orig_obj = vmo_link->obj;
                shadow->flags = VM_OBJ_SHADOW | VM_WRITE;
                shadow->pager = &swap_pager;
                vmo_link->obj->shadows++;
                // vm_object_print(vmo_link->obj);
                dprintf("%s: Created a shadow:\n", __TASKNAME__);
                // vm_object_print(shadow);
                dprintf("%s: Original object:\n", __TASKNAME__);
                // vm_object_print(shadow->orig_obj);
 
                /*
                 * Add the shadow in front of the original:
                 *
                 * vma->link0->link1
                 *       |      |
                 *       v      v
                 *       shadow original
                 */
                list_insert(&shadow_link->list, &vma->vm_obj_list);
 
                /* Add object to original's shadower list */
                list_insert(&shadow->shref, &shadow->orig_obj->shdw_list);
 
                /* Add to global object list */
                global_add_vm_object(shadow);
 
        } else {
                /* We ought to copy the missing RW page to top shadow */
                dprintf("No new shadows. Going to add to "
                        "topmost r/w shadow object\n");
                shadow_link = vmo_link;
 
                /*
                 * FIXME: Here we check for the case that a cloned thread is
                 * doing a duplicate write request on an existing RW shadow
                 * page. If so, we return the existing writable page in the top
                 * shadow. We should find a generic way to detect duplicate
                 * requests and cease IPC at an earlier stage.
                 */
                page = shadow_link->obj->pager->ops.page_in(shadow_link->obj,
                                                            file_offset);
                if (!IS_ERR(page))
                        return page;
 
                /*
                 * We start page search on read-only objects. If the first
                 * one was writable, go to next which must be read-only.
                 */
                BUG_ON(!(vmo_link = vma_next_link(&vmo_link->list,
                                                  &vma->vm_obj_list)));
                BUG_ON(vmo_link->obj->flags & VM_WRITE);
        }
 
        /* Traverse the list of read-only vm objects and search for the page */
        while (IS_ERR(page = vmo_link->obj->pager->ops.page_in(vmo_link->obj,
                                                               file_offset))) {
                if (!(vmo_link = vma_next_link(&vmo_link->list,
                                               &vma->vm_obj_list))) {
                        printf("%s:%s: Traversed all shadows and the original "
                               "file's vm_object, but could not find the "
                               "faulty page in this vma.\n",__TASKNAME__,
                               __FUNCTION__);
                        BUG();
                }
        }
 
        /*
         * Copy the page. This traverse and copy is like a page-in operation
         * of a pager, except that the page is moving along vm_objects.
         */
        new_page = copy_to_new_page(page);
 
        /* Update page details */
        spin_lock(&new_page->lock);
        BUG_ON(!list_empty(&new_page->list));
        new_page->refcnt = 0;
        new_page->owner = shadow_link->obj;
        new_page->offset = file_offset;
        new_page->virtual = 0;
        spin_unlock(&page->lock);
 
        /* Add the page to owner's list of in-memory pages */
        insert_page_olist(new_page, new_page->owner);
        new_page->owner->npages++;
 
        mm0_test_global_vm_integrity();
 
        /* Shared faults don't have shadows so we don't look for collapses */
        if (!(vma->flags & VMA_SHARED)) {
 
                /*
                 * Finished handling the actual fault, now check for possible
                 * shadow collapses. Does the shadow completely shadow the one
                 * underlying it?
                 */
                if (!(vmo_link = vma_next_link(&shadow_link->list,
                                               &vma->vm_obj_list))) {
                        /* Copier must have an object under it */
                        printf("Copier must have had an object under it!\n");
                        BUG();
                }
                if (vm_object_is_droppable(shadow_link->obj, vmo_link->obj))
                        vma_drop_merge_delete(vma, vmo_link);
        }
 
        return new_page;
}
 
/*
 * Handles the page fault, all entries here are assumed *legal*
 * faults, i.e. do_page_fault() should have already checked
 * for illegal accesses.
 *
 * NOTE:
 * Anon/Shared pages:
 * First access from first process is COW. All subsequent RW
 * accesses (which are attempts of *sharing*) simply map that
 * page to faulting processes.
 *
 * Non-anon/shared pages:
 * First access from first process simply writes to the pages
 * of that file. All subsequent accesses by other processes
 * do so as well.
 *
 * FIXME: Add VM_DIRTY bit for every page that has write-faulted.
 */
 
/* Handle read faults */
struct page *page_read_fault(struct fault_data *fault)
{
        struct vm_area *vma = fault->vma;
        struct vm_obj_link *vmo_link;
        unsigned long file_offset;
        struct page *page = 0;
 
        file_offset = fault_to_file_offset(fault);
 
        /* Get the first object, either original file or a shadow */
        if (!(vmo_link = vma_next_link(&vma->vm_obj_list, &vma->vm_obj_list))) {
                printf("%s:%s: No vm object in vma!\n",
                       __TASKNAME__, __FUNCTION__);
                BUG();
        }
 
        /* Traverse the list of read-only vm objects and search for the page */
        while (IS_ERR(page = vmo_link->obj->pager->ops.page_in(vmo_link->obj,
                                                               file_offset))) {
                if (!(vmo_link = vma_next_link(&vmo_link->list,
                                               &vma->vm_obj_list))) {
                        printf("%s:%s: Traversed all shadows and the original "
                               "file's vm_object, but could not find the "
                               "faulty page in this vma.\n",__TASKNAME__,
                               __FUNCTION__);
                        BUG();
                }
        }
        BUG_ON(!page);
 
        return page;
}
 
struct page *page_write_fault(struct fault_data *fault)
{
        unsigned int vma_flags = fault->vma->flags;
        struct vm_area *vma = fault->vma;
        struct vm_obj_link *vmo_link;
        unsigned long file_offset;
        struct page *page = 0;
 
        /* Copy-on-write. All private vmas are always COW */
        if (vma_flags & VMA_PRIVATE) {
                BUG_ON(IS_ERR(page = copy_on_write(fault)));
 
        /*
         * This handles shared pages that are both anon and non-anon.
         */
        } else if ((vma_flags & VMA_SHARED)) {
                file_offset = fault_to_file_offset(fault);
 
                /* Don't traverse, just take the first object */
                BUG_ON(!(vmo_link = vma_next_link(&vma->vm_obj_list,
                                                  &vma->vm_obj_list)));
 
                /* Get the page from its pager */
                if (IS_ERR(page = vmo_link->obj->pager->ops.page_in(vmo_link->obj,
                                                                    file_offset))) {
                        /*
                         * Writable page does not exist,
                         * if it is anonymous, it needs to be COW'ed,
                         * otherwise the file must have paged-in this
                         * page, so its a bug.
                         */
                        if (vma_flags & VMA_ANONYMOUS) {
                                BUG_ON(IS_ERR(page = copy_on_write(fault)));
                                return page;
                        } else {
                                printf("%s: Could not obtain faulty "
                                       "page from regular file.\n",
                                       __TASKNAME__);
                                BUG();
                        }
                }
 
                /*
                 * Page and object are now dirty. Currently it's
                 * only relevant for file-backed shared objects.
                 */
                page->flags |= VM_DIRTY;
                page->owner->flags |= VM_DIRTY;
        } else
                BUG();
 
        return page;
}
 
struct page *__do_page_fault(struct fault_data *fault)
{
        unsigned int reason = fault->reason;
        unsigned int pte_flags = fault->pte_flags;
        unsigned int map_flags = 0;
        struct page *page = 0;
 
        if ((reason & VM_READ) && (pte_flags & VM_NONE)) {
                page = page_read_fault(fault);
                map_flags = MAP_USR_RO;
 
        } else if ((reason & VM_WRITE) && (pte_flags & VM_NONE)) {
                page = page_read_fault(fault);
                page = page_write_fault(fault);
                map_flags = MAP_USR_RW;
 
        } else if ((reason & VM_EXEC) && (pte_flags & VM_NONE)) {
                page = page_read_fault(fault);
                map_flags = MAP_USR_RX;
 
        } else if ((reason & VM_EXEC) && (pte_flags & VM_READ)) {
                /* Retrieve already paged in file */
                page = page_read_fault(fault);
                if (pte_flags & VM_WRITE)
                        map_flags = MAP_USR_RWX;
                else
                        map_flags = MAP_USR_RX;
 
        } else if ((reason & VM_WRITE) && (pte_flags & VM_READ)) {
                page = page_write_fault(fault);
                if (pte_flags & VM_EXEC)
                        map_flags = MAP_USR_RWX;
                else
                        map_flags = MAP_USR_RW;
 
        } else {
                printf("mm0: Unhandled page fault.\n");
                BUG();
        }
 
        BUG_ON(!page);
 
        /* Map the new page to faulty task */
        l4_map((void *)page_to_phys(page),
               (void *)page_align(fault->address), 1,
               map_flags, fault->task->tid);
        // vm_object_print(page->owner);
 
        return page;
}
 
/*
 * Sets all r/w shadow objects as read-only for the process
 * so that as expected after a fork() operation, writes to those
 * objects cause copy-on-write events.
 */
int vm_freeze_shadows(struct tcb *task)
{
        unsigned long virtual;
        struct vm_area *vma;
        struct vm_obj_link *vmo_link;
        struct vm_object *vmo;
        struct page *p;
 
        list_foreach_struct(vma, &task->vm_area_head->list, list) {
 
                /* Shared vmas don't have shadows */
                if (vma->flags & VMA_SHARED)
                        continue;
 
                /* Get the first object */
                BUG_ON(list_empty(&vma->vm_obj_list));
                vmo_link = link_to_struct(vma->vm_obj_list.next,
                                      struct vm_obj_link, list);
                vmo = vmo_link->obj;
 
                /*
                 * Is this a writeable shadow?
                 *
                 * The only R/W shadow in a vma object chain
                 * can be the first one, so we don't check further
                 * objects if first one is not what we want.
                 */
                if (!((vmo->flags & VM_OBJ_SHADOW) &&
                      (vmo->flags & VM_WRITE)))
                        continue;
 
                /* Make the object read only */
                vmo->flags &= ~VM_WRITE;
                vmo->flags |= VM_READ;
 
                /*
                 * Make all pages on it read-only
                 * in the page tables.
                 */
                list_foreach_struct(p, &vmo->page_cache, list) {
 
                        /* Find virtual address of each page */
                        virtual = vma_page_to_virtual(vma, p);
 
                        /* Map the page as read-only */
                        l4_map((void *)page_to_phys(p),
                               (void *)virtual, 1,
                               MAP_USR_RO, task->tid);
                }
        }
 
        return 0;
}
 
/*
 * Page fault model:
 *
 * A page is anonymous (e.g. stack)
 *  - page needs read access:
 *      action: map the zero page.
 *  - page needs write access:
 *      action: allocate ZI page and map that. Swap file owns the page.
 *  - page is swapped to swap:
 *      action: read back from swap file into new page.
 *
 * A page is file-backed but private (e.g. .data section)
 *  - page needs read access:
 *      action: read the page from its file.
 *  - page is swapped out before being private. (i.e. invalidated)
 *      action: read the page from its file. (original file)
 *  - page is swapped out after being private.
 *      action: read the page from its file. (swap file)
 *  - page needs write access:
 *      action: allocate new page, declare page as private, change its
 *              owner to swap file.
 *
 * A page is file backed but not-private, and read-only. (e.g. .text section)
 *  - page needs read access:
 *     action: read in the page from its file.
 *  - page is swapped out. (i.e. invalidated)
 *     action: read in the page from its file.
 *  - page needs write access:
 *     action: forbidden, kill task?
 *
 * A page is file backed but not-private, and read/write. (e.g. any data file.)
 *  - page needs read access:
 *     action: read in the page from its file.
 *  - page is flushed back to its original file. (i.e. instead of swap)
 *     action: read in the page from its file.
 *  - page needs write access:
 *     action: read the page in, give write access.
 */
struct page *do_page_fault(struct fault_data *fault)
{
        unsigned int vma_flags = (fault->vma) ? fault->vma->flags : VM_NONE;
        unsigned int reason = fault->reason;
 
        /* vma flags show no access */
        if (vma_flags & VM_NONE) {
                printf("Illegal access, tid: %d, address: 0x%x, PC @ 0x%x,\n",
                       fault->task->tid, fault->address, fault->kdata->faulty_pc);
                fault_handle_error(fault);
        }
 
        /* The access reason is not included in the vma's listed flags */
        if (!(reason & vma_flags)) {
                printf("Illegal access, tid: %d, address: 0x%x, PC @ 0x%x\n",
                       fault->task->tid, fault->address, fault->kdata->faulty_pc);
                fault_handle_error(fault);
        }
 
        /* Handle legitimate faults */
        return __do_page_fault(fault);
}
 
struct page *page_fault_handler(struct tcb *sender, fault_kdata_t *fkdata)
{
        struct fault_data fault = {
                /* Fault data from kernel */
                .kdata = fkdata,
                .task = sender,
        };
 
        /* Extract fault reason, fault address etc. in generic format */
        set_generic_fault_params(&fault);
 
        /* Get vma info */
        if (!(fault.vma = find_vma(fault.address,
                                   &fault.task->vm_area_head->list)))
                printf("Hmm. No vma for faulty region. "
                       "Bad things will happen.\n");
 
        /* Handle the actual fault */
        return do_page_fault(&fault);
}
 
static inline unsigned int pte_to_map_flags(unsigned int pte_flags)
{
        unsigned int map_flags;
 
        switch(pte_flags) {
        case VM_READ:
                map_flags = MAP_USR_RO;
                break;
        case (VM_READ | VM_WRITE):
                map_flags = MAP_USR_RW;
                break;
        case (VM_READ | VM_WRITE | VM_EXEC):
                map_flags = MAP_USR_RWX;
                break;
        case (VM_READ | VM_EXEC):
                map_flags = MAP_USR_RX;
                break;
        default:
                BUG();
        }
 
        return map_flags;
}
 
/*
 * Prefaults a page of a task. The catch is that the page may already
 * have been faulted with even more progress than the desired
 * flags would progress in the fault (e.g. read-faulting a
 * copy-on-write'd page).
 *
 * This function detects whether progress is necessary or not by
 * inspecting the vma's vm_object chain state.
 *
 * Generally both read-fault and write-fault paths are repeatable, in
 * the sense that an already faulted page may be safely re-faulted again
 * and again, be it a read-only or copy-on-write'd page.
 *
 * The retrieval of the same page in a repetitive fashion is safe,
 * but while it also seems to appear safe, it is unnecessary to downgrade
 * or change mapping permissions of a page. E.g. make a copy-on-write'd
 * page read-only by doing a blind read-fault on it.
 *
 * Hence this function checks whether a fault is necessary and simply
 * returns if it isn't.
 *
 * FIXME: Escalate any page fault errors like a civilized function!
 */
struct page *task_prefault_smart(struct tcb *task, unsigned long address,
                                 unsigned int wanted_flags)
{
        struct vm_obj_link *vmo_link;
        unsigned long file_offset;
        unsigned int vma_flags, pte_flags;
        struct vm_area *vma;
        struct page *page;
        int err;
 
        struct fault_data fault = {
                .task = task,
                .address = address,
        };
 
        /* Find the vma */
        if (!(fault.vma = find_vma(fault.address,
                                   &fault.task->vm_area_head->list))) {
                dprintf("%s: Invalid: No vma for given address. %d\n",
                        __FUNCTION__, -EINVAL);
                return PTR_ERR(-EINVAL);
        }
 
        /* Read fault, repetitive safe */
        if (wanted_flags & VM_READ)
                if (IS_ERR(page = page_read_fault(&fault)))
                        return page;
 
        /* Write fault, repetitive safe */
        if (wanted_flags & VM_WRITE)
                if (IS_ERR(page = page_write_fault(&fault)))
                        return page;
 
        /*
         * If we came this far, it means we have more
         * permissions than VM_NONE.
         *
         * Now we _must_ find out what those page
         * protection flags were, and do this without
         * needing to inspect any ptes.
         *
         * We don't want to downgrade a RW page to RO again.
         */
        file_offset = fault_to_file_offset(&fault);
        vma_flags = fault.vma->flags;
        vma = fault.vma;
 
        /* Get the topmost vm_object */
        if (!(vmo_link = vma_next_link(&vma->vm_obj_list,
                                       &vma->vm_obj_list))) {
                printf("%s:%s: No vm object in vma!\n",
                       __TASKNAME__, __FUNCTION__);
                BUG();
        }
 
        /* Traverse the list of vm objects and search for the page */
        while (IS_ERR(page = vmo_link->obj->pager->ops.page_in(vmo_link->obj,
                                                               file_offset))) {
                if (!(vmo_link = vma_next_link(&vmo_link->list,
                                               &vma->vm_obj_list))) {
                        printf("%s:%s: Traversed all shadows and the original "
                               "file's vm_object, but could not find the "
                               "faulty page in this vma.\n",__TASKNAME__,
                               __FUNCTION__);
                        BUG();
                }
        }
 
        /* Use flags for the vm_object containing the page */
        if (vmo_link->obj->flags & VM_WRITE)
                pte_flags = VM_WRITE | VM_READ;
        else
                pte_flags = VM_READ;
 
        /*
         * Now check vma flags for adding the VM_EXEC
         * The real pte may not have this flag yet, but
         * it is allowed to have it and it doesn't harm.
         */
        if (vma_flags & VM_EXEC)
                pte_flags |= VM_EXEC;
 
        /* Map the page to task using these flags */
        if ((err = l4_map((void *)page_to_phys(page),
                          (void *)page_align(fault.address), 1,
                          pte_to_map_flags(pte_flags),
                          fault.task->tid)) < 0) {
                printf("l4_map() failed. err=%d\n", err);
                BUG();
        }
 
        return page;
}
 
/*
 * Prefaults the page with given virtual address, to given task
 * with given reasons. Multiple reasons are allowed, they are
 * handled separately in order.
 */
struct page *task_prefault_page(struct tcb *task, unsigned long address,
                                unsigned int vmflags)
{
        struct page *ret;
 
        perfmon_reset_start_cyccnt();
        ret = task_prefault_smart(task, address, vmflags);
 
        debug_record_cycles("task_prefault_smart");
 
        return ret;
 
#if 0
        struct page *p;
        struct fault_data fault = {
                .task = task,
                .address = address,
        };
 
        dprintf("Pre-faulting address 0x%lx, on task %d, with flags: 0x%x\n",
                address, task->tid, vmflags);
 
        /* Find the vma */
        if (!(fault.vma = find_vma(fault.address,
                                   &fault.task->vm_area_head->list))) {
                dprintf("%s: Invalid: No vma for given address. %d\n",
                        __FUNCTION__, -EINVAL);
                return PTR_ERR(-EINVAL);
        }
 
        /* Flags may indicate multiple fault reasons. First do the read */
        if (vmflags & VM_READ) {
                fault.pte_flags = VM_NONE;
                fault.reason = VM_READ;
                if (IS_ERR(p = do_page_fault(&fault)))
                        return p;
        }
        /* Now write */
        if (vmflags & VM_WRITE) {
                fault.pte_flags = VM_READ;
                fault.reason = VM_WRITE;
                if (IS_ERR(p = do_page_fault(&fault)))
                        return p;
        }
        /* No exec or any other fault reason allowed. */
        BUG_ON(vmflags & ~(VM_READ | VM_WRITE));
 
        return p;
#endif
}
 
 
int vm_compare_prot_flags(unsigned int current, unsigned int needed)
{
        current &= VM_PROT_MASK;
        needed &= VM_PROT_MASK;
 
        if (needed & VM_READ)
                if (current & (VM_READ | VM_WRITE))
                        return 1;
 
        if (needed & VM_WRITE &&
            (current & VM_WRITE))
                return 1;
 
        return 0;
}
 
Browse

Tools

Subversion Repositories c0or1k

[/] [c0or1k/] [trunk/] [conts/] [posix/] [mm0/] [mm/] [fault.c] - Blame information for rev 2

Line No.	Rev	Author	Line
1	2	drasko	`/*`
2			`* Page fault handling.`
3			`*`
4			`* Copyright (C) 2007, 2008-2010 Bahadir Bilgehan Balban`
5			`*/`
6			`#include <vm_area.h>`
7			`#include <task.h>`
8			`#include <mm/alloc_page.h>`
9			`#include <malloc/malloc.h>`
10			`#include <l4/generic/space.h>`
11			`#include <l4/api/errno.h>`
12			`#include <string.h>`
13			`#include <memory.h>`
14			`#include <shm.h>`
15			`#include <file.h>`
16			`#include <test.h>`
17
18			`#include L4LIB_INC_ARCH(syscalls.h)`
19			`#include L4LIB_INC_ARCH(syslib.h)`
20			`#include INC_GLUE(memory.h)`
21			`#include INC_SUBARCH(mm.h)`
22			`#include __INC_ARCH(mm.h)`
23			`#include __INC_ARCH(debug.h)`
24
25			`/* Given a page and the vma it is in, returns that page's virtual address */`
26			`unsigned long vma_page_to_virtual(struct vm_area vma, struct page page)`
27			`{`
28			`unsigned long virtual_pfn = vma->pfn_start + page->offset - vma->file_offset;`
29
30			`/* Page must be contained in vma's pages */`
31			`BUG_ON(vma->file_offset > page->offset);`
32
33			`return __pfn_to_addr(virtual_pfn);`
34			`}`
35
36			`unsigned long fault_to_file_offset(struct fault_data *fault)`
37			`{`
38			`/* Fault's offset in its vma */`
39			`unsigned long vma_off_pfn = __pfn(fault->address) - fault->vma->pfn_start;`
40