1 |
2 |
drasko |
/*
|
2 |
|
|
* Page fault handling.
|
3 |
|
|
*
|
4 |
|
|
* Copyright (C) 2007, 2008-2010 Bahadir Bilgehan Balban
|
5 |
|
|
*/
|
6 |
|
|
#include <vm_area.h>
|
7 |
|
|
#include <task.h>
|
8 |
|
|
#include <mm/alloc_page.h>
|
9 |
|
|
#include <malloc/malloc.h>
|
10 |
|
|
#include <l4/generic/space.h>
|
11 |
|
|
#include <l4/api/errno.h>
|
12 |
|
|
#include <string.h>
|
13 |
|
|
#include <memory.h>
|
14 |
|
|
#include <shm.h>
|
15 |
|
|
#include <file.h>
|
16 |
|
|
#include <test.h>
|
17 |
|
|
|
18 |
|
|
#include L4LIB_INC_ARCH(syscalls.h)
|
19 |
|
|
#include L4LIB_INC_ARCH(syslib.h)
|
20 |
|
|
#include INC_GLUE(memory.h)
|
21 |
|
|
#include INC_SUBARCH(mm.h)
|
22 |
|
|
#include __INC_ARCH(mm.h)
|
23 |
|
|
#include __INC_ARCH(debug.h)
|
24 |
|
|
|
25 |
|
|
/* Given a page and the vma it is in, returns that page's virtual address */
|
26 |
|
|
unsigned long vma_page_to_virtual(struct vm_area *vma, struct page *page)
|
27 |
|
|
{
|
28 |
|
|
unsigned long virtual_pfn = vma->pfn_start + page->offset - vma->file_offset;
|
29 |
|
|
|
30 |
|
|
/* Page must be contained in vma's pages */
|
31 |
|
|
BUG_ON(vma->file_offset > page->offset);
|
32 |
|
|
|
33 |
|
|
return __pfn_to_addr(virtual_pfn);
|
34 |
|
|
}
|
35 |
|
|
|
36 |
|
|
unsigned long fault_to_file_offset(struct fault_data *fault)
|
37 |
|
|
{
|
38 |
|
|
/* Fault's offset in its vma */
|
39 |
|
|
unsigned long vma_off_pfn = __pfn(fault->address) - fault->vma->pfn_start;
|
40 |
|
|
|
41 |
|
|
/* Fault's offset in the file */
|
42 |
|
|
unsigned long f_off_pfn = fault->vma->file_offset + vma_off_pfn;
|
43 |
|
|
|
44 |
|
|
return f_off_pfn;
|
45 |
|
|
}
|
46 |
|
|
|
47 |
|
|
/*
|
48 |
|
|
* Given a reference to a vm_object link, returns the next link but
|
49 |
|
|
* avoids wrapping around back to head. If next is head, returns 0.
|
50 |
|
|
*
|
51 |
|
|
* vma->link1->link2->link3
|
52 |
|
|
* | | |
|
53 |
|
|
* V V V
|
54 |
|
|
* vmo1 vmo2 vmo3|vm_file
|
55 |
|
|
*
|
56 |
|
|
* Example:
|
57 |
|
|
* Given a reference to link = vma, head = vma, returns link1.
|
58 |
|
|
* Given a reference to link = link3, head = vma, returns 0.
|
59 |
|
|
*/
|
60 |
|
|
struct vm_obj_link *vma_next_link(struct link *link,
|
61 |
|
|
struct link *head)
|
62 |
|
|
{
|
63 |
|
|
BUG_ON(list_empty(link));
|
64 |
|
|
if (link->next == head)
|
65 |
|
|
return 0;
|
66 |
|
|
else
|
67 |
|
|
return link_to_struct(link->next, struct vm_obj_link, list);
|
68 |
|
|
}
|
69 |
|
|
|
70 |
|
|
/* Unlinks orig_link from its vma and deletes it but keeps the object. */
|
71 |
|
|
struct vm_object *vma_drop_link(struct vm_obj_link *link)
|
72 |
|
|
{
|
73 |
|
|
struct vm_object *dropped;
|
74 |
|
|
|
75 |
|
|
/* Remove object link from vma's list */
|
76 |
|
|
list_remove(&link->list);
|
77 |
|
|
|
78 |
|
|
/* Unlink the link from object */
|
79 |
|
|
dropped = vm_unlink_object(link);
|
80 |
|
|
|
81 |
|
|
/* Delete the original link */
|
82 |
|
|
kfree(link);
|
83 |
|
|
|
84 |
|
|
return dropped;
|
85 |
|
|
}
|
86 |
|
|
|
87 |
|
|
/*
|
88 |
|
|
* Checks if page cache pages of lesser is a subset of those of copier.
|
89 |
|
|
*
|
90 |
|
|
* FIXME:
|
91 |
|
|
* Note this just checks the page cache, so if any objects have pages
|
92 |
|
|
* swapped to disk, this function won't work, which is a logic error.
|
93 |
|
|
* This should really count the swapped ones as well.
|
94 |
|
|
*/
|
95 |
|
|
int vm_object_is_subset(struct vm_object *shadow,
|
96 |
|
|
struct vm_object *original)
|
97 |
|
|
{
|
98 |
|
|
struct page *pc, *pl;
|
99 |
|
|
|
100 |
|
|
/* Copier must have equal or more pages to overlap lesser */
|
101 |
|
|
if (shadow->npages < original->npages)
|
102 |
|
|
return 0;
|
103 |
|
|
|
104 |
|
|
/*
|
105 |
|
|
* Do a page by page comparison. Every lesser page
|
106 |
|
|
* must be in copier for overlap.
|
107 |
|
|
*/
|
108 |
|
|
list_foreach_struct(pl, &original->page_cache, list)
|
109 |
|
|
if (!(pc = find_page(shadow, pl->offset)))
|
110 |
|
|
return 0;
|
111 |
|
|
/*
|
112 |
|
|
* For all pages of lesser vmo, there seems to be a page
|
113 |
|
|
* in the copier vmo. So lesser is a subset of copier
|
114 |
|
|
*/
|
115 |
|
|
return 1;
|
116 |
|
|
}
|
117 |
|
|
|
118 |
|
|
static inline int vm_object_is_droppable(struct vm_object *shadow,
|
119 |
|
|
struct vm_object *original)
|
120 |
|
|
{
|
121 |
|
|
if (shadow->npages == original->npages &&
|
122 |
|
|
(original->flags & VM_OBJ_SHADOW))
|
123 |
|
|
return 1;
|
124 |
|
|
else
|
125 |
|
|
return 0;
|
126 |
|
|
}
|
127 |
|
|
|
128 |
|
|
|
129 |
|
|
|
130 |
|
|
/*
|
131 |
|
|
* vma_merge_object()
|
132 |
|
|
*
|
133 |
|
|
* FIXME: Currently this is an optimisation that needs to go
|
134 |
|
|
* away when swapping is available. We have this solely because
|
135 |
|
|
* currently a shadow needs to identically mirror the whole
|
136 |
|
|
* object underneath, in order to drop it. A file that is 1MB
|
137 |
|
|
* long would spend 2MB until dropped. When swapping is available,
|
138 |
|
|
* we will go back to identical mirroring instead of merging the
|
139 |
|
|
* last shadow, since most unused pages would be swapped out.
|
140 |
|
|
*/
|
141 |
|
|
|
142 |
|
|
/*
|
143 |
|
|
* When one shadow object is redundant, merges it into the shadow in front of it.
|
144 |
|
|
* Note it must be determined that it is redundant before calling this function.
|
145 |
|
|
*
|
146 |
|
|
* vma --> link1 --> link2 --> link3
|
147 |
|
|
* | | |
|
148 |
|
|
* v v v
|
149 |
|
|
* Front Redundant Next
|
150 |
|
|
* Shadow Shadow Object (E.g. shadow or file)
|
151 |
|
|
*/
|
152 |
|
|
int vma_merge_object(struct vm_object *redundant)
|
153 |
|
|
{
|
154 |
|
|
/* The redundant shadow object */
|
155 |
|
|
struct vm_object *front; /* Shadow in front of redundant */
|
156 |
|
|
struct vm_obj_link *last_link;
|
157 |
|
|
struct page *p1, *p2, *n;
|
158 |
|
|
|
159 |
|
|
/* Check link and shadow count is really 1 */
|
160 |
|
|
BUG_ON(redundant->nlinks != 1);
|
161 |
|
|
BUG_ON(redundant->shadows != 1);
|
162 |
|
|
|
163 |
|
|
/* Get the last shadower object in front */
|
164 |
|
|
front = link_to_struct(redundant->shdw_list.next,
|
165 |
|
|
struct vm_object, shref);
|
166 |
|
|
|
167 |
|
|
/* Move all non-intersecting pages to front shadow. */
|
168 |
|
|
list_foreach_removable_struct(p1, n, &redundant->page_cache, list) {
|
169 |
|
|
/* Page doesn't exist in front, move it there */
|
170 |
|
|
if (!(p2 = find_page(front, p1->offset))) {
|
171 |
|
|
list_remove_init(&p1->list);
|
172 |
|
|
spin_lock(&p1->lock);
|
173 |
|
|
p1->owner = front;
|
174 |
|
|
spin_unlock(&p1->lock);
|
175 |
|
|
insert_page_olist(p1, front);
|
176 |
|
|
front->npages++;
|
177 |
|
|
}
|
178 |
|
|
}
|
179 |
|
|
|
180 |
|
|
/* Sort out shadow relationships after the merge: */
|
181 |
|
|
|
182 |
|
|
/* Front won't be a shadow of the redundant shadow anymore */
|
183 |
|
|
list_remove_init(&front->shref);
|
184 |
|
|
|
185 |
|
|
/* Check that there really was one shadower of redundant left */
|
186 |
|
|
BUG_ON(!list_empty(&redundant->shdw_list));
|
187 |
|
|
|
188 |
|
|
/* Redundant won't be a shadow of its next object */
|
189 |
|
|
list_remove_init(&redundant->shref);
|
190 |
|
|
|
191 |
|
|
/* Front is now a shadow of redundant's next object */
|
192 |
|
|
list_insert(&front->shref, &redundant->orig_obj->shdw_list);
|
193 |
|
|
front->orig_obj = redundant->orig_obj;
|
194 |
|
|
|
195 |
|
|
/* Find last link for the object */
|
196 |
|
|
last_link = link_to_struct(redundant->link_list.next,
|
197 |
|
|
struct vm_obj_link, linkref);
|
198 |
|
|
|
199 |
|
|
/* Drop the last link to the object */
|
200 |
|
|
vma_drop_link(last_link);
|
201 |
|
|
|
202 |
|
|
/* Redundant shadow has no shadows anymore */
|
203 |
|
|
BUG_ON(--redundant->shadows < 0);
|
204 |
|
|
|
205 |
|
|
/* Delete the redundant shadow along with all its pages. */
|
206 |
|
|
vm_object_delete(redundant);
|
207 |
|
|
|
208 |
|
|
return 0;
|
209 |
|
|
}
|
210 |
|
|
|
211 |
|
|
struct vm_obj_link *vm_objlink_create(void)
|
212 |
|
|
{
|
213 |
|
|
struct vm_obj_link *vmo_link;
|
214 |
|
|
|
215 |
|
|
if (!(vmo_link = kzalloc(sizeof(*vmo_link))))
|
216 |
|
|
return PTR_ERR(-ENOMEM);
|
217 |
|
|
link_init(&vmo_link->list);
|
218 |
|
|
link_init(&vmo_link->linkref);
|
219 |
|
|
|
220 |
|
|
return vmo_link;
|
221 |
|
|
}
|
222 |
|
|
|
223 |
|
|
/*
|
224 |
|
|
* Creates a bare vm_object along with its vma link, since
|
225 |
|
|
* the shadow will be immediately used in a vma object list.
|
226 |
|
|
*/
|
227 |
|
|
struct vm_obj_link *vma_create_shadow(void)
|
228 |
|
|
{
|
229 |
|
|
struct vm_object *vmo;
|
230 |
|
|
struct vm_obj_link *vmo_link;
|
231 |
|
|
|
232 |
|
|
if (IS_ERR(vmo_link = vm_objlink_create()))
|
233 |
|
|
return 0;
|
234 |
|
|
|
235 |
|
|
if (!(vmo = vm_object_create())) {
|
236 |
|
|
kfree(vmo_link);
|
237 |
|
|
return 0;
|
238 |
|
|
}
|
239 |
|
|
vmo->flags = VM_OBJ_SHADOW;
|
240 |
|
|
|
241 |
|
|
vm_link_object(vmo_link, vmo);
|
242 |
|
|
|
243 |
|
|
return vmo_link;
|
244 |
|
|
}
|
245 |
|
|
|
246 |
|
|
/* Allocates a new page, copies the original onto it and returns. */
|
247 |
|
|
struct page *copy_to_new_page(struct page *orig)
|
248 |
|
|
{
|
249 |
|
|
void *paddr = alloc_page(1);
|
250 |
|
|
|
251 |
|
|
BUG_ON(!paddr);
|
252 |
|
|
|
253 |
|
|
/* Copy the page into new page */
|
254 |
|
|
memcpy(phys_to_virt(paddr), page_to_virt(orig), PAGE_SIZE);
|
255 |
|
|
|
256 |
|
|
return phys_to_page(paddr);
|
257 |
|
|
}
|
258 |
|
|
|
259 |
|
|
/* Copy all mapped object link stack from vma to new vma */
|
260 |
|
|
int vma_copy_links(struct vm_area *new_vma, struct vm_area *vma)
|
261 |
|
|
{
|
262 |
|
|
struct vm_obj_link *vmo_link, *new_link;
|
263 |
|
|
|
264 |
|
|
/* Get the first object on the vma */
|
265 |
|
|
BUG_ON(list_empty(&vma->vm_obj_list));
|
266 |
|
|
vmo_link = link_to_struct(vma->vm_obj_list.next,
|
267 |
|
|
struct vm_obj_link, list);
|
268 |
|
|
do {
|
269 |
|
|
/* Create a new link */
|
270 |
|
|
new_link = vm_objlink_create();
|
271 |
|
|
|
272 |
|
|
/* Link object with new link */
|
273 |
|
|
vm_link_object(new_link, vmo_link->obj);
|
274 |
|
|
|
275 |
|
|
/* Add the new link to vma in object order */
|
276 |
|
|
list_insert_tail(&new_link->list, &new_vma->vm_obj_list);
|
277 |
|
|
|
278 |
|
|
/* Continue traversing links, doing the same copying */
|
279 |
|
|
} while((vmo_link = vma_next_link(&vmo_link->list,
|
280 |
|
|
&vma->vm_obj_list)));
|
281 |
|
|
|
282 |
|
|
return 0;
|
283 |
|
|
}
|
284 |
|
|
|
285 |
|
|
/*
|
286 |
|
|
* Determine if an object is deletable.
|
287 |
|
|
*
|
288 |
|
|
* Shadows are deleted if nlinks = 0, and
|
289 |
|
|
* merged if they have nlinks = 1, shadows = 1.
|
290 |
|
|
* See below for explanation.
|
291 |
|
|
*
|
292 |
|
|
* vfs-type vmfiles are deleted if their
|
293 |
|
|
* openers = 0, and their nlinks
|
294 |
|
|
* (i.e. mappers) = 0.
|
295 |
|
|
*
|
296 |
|
|
* shm-type vmfiles are deleted if their
|
297 |
|
|
* nlinks = 0, since they only have map count.
|
298 |
|
|
*/
|
299 |
|
|
int vm_object_is_deletable(struct vm_object *obj)
|
300 |
|
|
{
|
301 |
|
|
struct vm_file *f;
|
302 |
|
|
|
303 |
|
|
//printf("%s: Checking: ", __FUNCTION__);
|
304 |
|
|
//vm_object_print(obj);
|
305 |
|
|
|
306 |
|
|
if (obj->nlinks != 0)
|
307 |
|
|
return 0;
|
308 |
|
|
|
309 |
|
|
BUG_ON(obj->shadows != 0);
|
310 |
|
|
BUG_ON(!list_empty(&obj->shref));
|
311 |
|
|
|
312 |
|
|
if (obj->flags & VM_OBJ_SHADOW)
|
313 |
|
|
return 1;
|
314 |
|
|
|
315 |
|
|
f = vm_object_to_file(obj);
|
316 |
|
|
|
317 |
|
|
/* Devzero should probably never have 0 refs left */
|
318 |
|
|
if (f->type == VM_FILE_DEVZERO)
|
319 |
|
|
return 0;
|
320 |
|
|
else if (f->type == VM_FILE_SHM)
|
321 |
|
|
return 1;
|
322 |
|
|
else if (f->type == VM_FILE_VFS) {
|
323 |
|
|
if (f->openers == 0)
|
324 |
|
|
return 1;
|
325 |
|
|
else
|
326 |
|
|
return 0;
|
327 |
|
|
}
|
328 |
|
|
|
329 |
|
|
/* To make gcc happy */
|
330 |
|
|
BUG();
|
331 |
|
|
return 0;
|
332 |
|
|
}
|
333 |
|
|
|
334 |
|
|
/*
|
335 |
|
|
* exit has: !prev, next || !next
|
336 |
|
|
* shadow drop has: prev, next
|
337 |
|
|
*/
|
338 |
|
|
|
339 |
|
|
/*
|
340 |
|
|
* Shadow drops: Dropping a link to shadow does not mean the shadow's
|
341 |
|
|
* next object has lost a shadow. There may be other links to both. But
|
342 |
|
|
* when the shadow has dropped its last link, and is going to be deleted,
|
343 |
|
|
* it is then true that the shadow is lost by the next object.
|
344 |
|
|
*/
|
345 |
|
|
int vma_drop_merge_delete(struct vm_area *vma, struct vm_obj_link *link)
|
346 |
|
|
{
|
347 |
|
|
struct vm_obj_link *prev, *next;
|
348 |
|
|
struct vm_object *obj;
|
349 |
|
|
|
350 |
|
|
/* Get previous and next links, if they exist */
|
351 |
|
|
prev = (link->list.prev == &vma->vm_obj_list) ? 0 :
|
352 |
|
|
link_to_struct(link->list.prev, struct vm_obj_link, list);
|
353 |
|
|
|
354 |
|
|
next = (link->list.next == &vma->vm_obj_list) ? 0 :
|
355 |
|
|
link_to_struct(link->list.next, struct vm_obj_link, list);
|
356 |
|
|
|
357 |
|
|
/* Drop the link */
|
358 |
|
|
obj = vma_drop_link(link);
|
359 |
|
|
|
360 |
|
|
/* If there is an object in front, this is a shadow drop */
|
361 |
|
|
if (prev) {
|
362 |
|
|
BUG_ON(!(prev->obj->flags & VM_OBJ_SHADOW));
|
363 |
|
|
BUG_ON(!(prev->obj->flags & VM_WRITE));
|
364 |
|
|
BUG_ON(--obj->shadows < 0);
|
365 |
|
|
// vm_object_print(obj);
|
366 |
|
|
|
367 |
|
|
/* Remove prev from current object's shadow list */
|
368 |
|
|
BUG_ON(list_empty(&prev->obj->shref));
|
369 |
|
|
list_remove_init(&prev->obj->shref);
|
370 |
|
|
|
371 |
|
|
/*
|
372 |
|
|
* We don't allow dropping non-shadow objects yet,
|
373 |
|
|
* (see ...is_droppable) so there must be a next.
|
374 |
|
|
*/
|
375 |
|
|
BUG_ON(!next);
|
376 |
|
|
|
377 |
|
|
/* prev is now shadow of next */
|
378 |
|
|
list_insert(&prev->obj->shref,
|
379 |
|
|
&next->obj->shdw_list);
|
380 |
|
|
prev->obj->orig_obj = next->obj;
|
381 |
|
|
|
382 |
|
|
/*
|
383 |
|
|
* No referrers left, meaning this object is not
|
384 |
|
|
* shadowing its original object anymore.
|
385 |
|
|
*/
|
386 |
|
|
if (obj->nlinks == 0) {
|
387 |
|
|
BUG_ON(obj->orig_obj != next->obj);
|
388 |
|
|
list_remove_init(&obj->shref);
|
389 |
|
|
} else {
|
390 |
|
|
/*
|
391 |
|
|
* Dropped object still has referrers, which
|
392 |
|
|
* means next has gained a new shadow.
|
393 |
|
|
* Here's why:
|
394 |
|
|
*
|
395 |
|
|
* T1 and T2: T2: drop-
|
396 |
|
|
* prev->drop->next \
|
397 |
|
|
* became: T1: prev--- next
|
398 |
|
|
*
|
399 |
|
|
* Now we have both prev and current object
|
400 |
|
|
* in next's shadow list.
|
401 |
|
|
*/
|
402 |
|
|
next->obj->shadows++;
|
403 |
|
|
}
|
404 |
|
|
/* It's an exit, we check if there's a shadow loss */
|
405 |
|
|
} else {
|
406 |
|
|
if (obj->nlinks == 0) {
|
407 |
|
|
/* Is it a shadow delete? Sort out next */
|
408 |
|
|
if (next && obj->flags & VM_OBJ_SHADOW) {
|
409 |
|
|
BUG_ON(obj->orig_obj != next->obj);
|
410 |
|
|
BUG_ON(--next->obj->shadows < 0);
|
411 |
|
|
// vm_object_print(next->obj);
|
412 |
|
|
list_remove_init(&obj->shref);
|
413 |
|
|
}
|
414 |
|
|
}
|
415 |
|
|
}
|
416 |
|
|
|
417 |
|
|
/* Now deal with the object itself */
|
418 |
|
|
if (vm_object_is_deletable(obj)) {
|
419 |
|
|
dprintf("Deleting object:\n");
|
420 |
|
|
// vm_object_print(obj);
|
421 |
|
|
vm_object_delete(obj);
|
422 |
|
|
} else if ((obj->flags & VM_OBJ_SHADOW) &&
|
423 |
|
|
obj->nlinks == 1 && obj->shadows == 1) {
|
424 |
|
|
dprintf("Merging object:\n");
|
425 |
|
|
// vm_object_print(obj);
|
426 |
|
|
vma_merge_object(obj);
|
427 |
|
|
}
|
428 |
|
|
|
429 |
|
|
mm0_test_global_vm_integrity();
|
430 |
|
|
return 0;
|
431 |
|
|
}
|
432 |
|
|
|
433 |
|
|
/*
|
434 |
|
|
* A scenario that pretty much covers every exit() case.
|
435 |
|
|
*
|
436 |
|
|
* T = vma on a unique task
|
437 |
|
|
* l = link
|
438 |
|
|
* Sobj = Shadow object
|
439 |
|
|
* Fobj = File object
|
440 |
|
|
*
|
441 |
|
|
* Every l links to the object on the nearest
|
442 |
|
|
* row to it and on the same column.
|
443 |
|
|
*
|
444 |
|
|
* l l l l l l T
|
445 |
|
|
* Sobj Sobj
|
446 |
|
|
*
|
447 |
|
|
* Sobj Sobj Sobj Fobj
|
448 |
|
|
*
|
449 |
|
|
* Sobj Sobj Sobj
|
450 |
|
|
* l l l l l l l T
|
451 |
|
|
*
|
452 |
|
|
* l l l l l l l T
|
453 |
|
|
* Sobj
|
454 |
|
|
*
|
455 |
|
|
*/
|
456 |
|
|
|
457 |
|
|
/* This version is used when exiting. */
|
458 |
|
|
int vma_drop_merge_delete_all(struct vm_area *vma)
|
459 |
|
|
{
|
460 |
|
|
struct vm_obj_link *vmo_link, *n;
|
461 |
|
|
|
462 |
|
|
/* Vma cannot be empty */
|
463 |
|
|
BUG_ON(list_empty(&vma->vm_obj_list));
|
464 |
|
|
|
465 |
|
|
/* Traverse and get rid of all links */
|
466 |
|
|
list_foreach_removable_struct(vmo_link, n, &vma->vm_obj_list, list)
|
467 |
|
|
vma_drop_merge_delete(vma, vmo_link);
|
468 |
|
|
|
469 |
|
|
return 0;
|
470 |
|
|
}
|
471 |
|
|
|
472 |
|
|
/* TODO:
|
473 |
|
|
* - Why not allocate a swap descriptor in vma_create_shadow() rather than
|
474 |
|
|
* a bare vm_object? It will be needed.
|
475 |
|
|
* - Check refcounting of shadows, their references, page refs,
|
476 |
|
|
* reduces increases etc.
|
477 |
|
|
*
|
478 |
|
|
* This handles copy-on-write semantics in various situations. Returns
|
479 |
|
|
* page struct for copy page availabe for mapping.
|
480 |
|
|
*
|
481 |
|
|
* 1) Copy-on-write of read-only files. (Creates r/w shadows/adds pages)
|
482 |
|
|
* 2) Copy-on-write of forked RO shadows (Creates r/w shadows/adds pages)
|
483 |
|
|
* 3) Copy-on-write of shm files. (Adds pages to r/w shm file from devzero).
|
484 |
|
|
*/
|
485 |
|
|
struct page *copy_on_write(struct fault_data *fault)
|
486 |
|
|
{
|
487 |
|
|
struct vm_obj_link *vmo_link, *shadow_link;
|
488 |
|
|
struct vm_object *shadow;
|
489 |
|
|
struct page *page, *new_page;
|
490 |
|
|
struct vm_area *vma = fault->vma;
|
491 |
|
|
unsigned long file_offset = fault_to_file_offset(fault);
|
492 |
|
|
|
493 |
|
|
/* Get the first object, either original file or a shadow */
|
494 |
|
|
if (!(vmo_link = vma_next_link(&vma->vm_obj_list, &vma->vm_obj_list))) {
|
495 |
|
|
printf("%s:%s: No vm object in vma!\n",
|
496 |
|
|
__TASKNAME__, __FUNCTION__);
|
497 |
|
|
BUG();
|
498 |
|
|
}
|
499 |
|
|
|
500 |
|
|
/* Is the object read-only? Create a shadow object if so.
|
501 |
|
|
*
|
502 |
|
|
* NOTE: Whenever the topmost object is read-only, a new shadow
|
503 |
|
|
* object must be created. When there are no shadows one is created
|
504 |
|
|
* because, its the original vm_object that is not writeable, and
|
505 |
|
|
* when there are shadows one is created because a fork had just
|
506 |
|
|
* happened, in which case all shadows are rendered read-only.
|
507 |
|
|
*/
|
508 |
|
|
if (!(vmo_link->obj->flags & VM_WRITE)) {
|
509 |
|
|
if (!(shadow_link = vma_create_shadow()))
|
510 |
|
|
return PTR_ERR(-ENOMEM);
|
511 |
|
|
|
512 |
|
|
/* Initialise the shadow */
|
513 |
|
|
shadow = shadow_link->obj;
|
514 |
|
|
shadow->orig_obj = vmo_link->obj;
|
515 |
|
|
shadow->flags = VM_OBJ_SHADOW | VM_WRITE;
|
516 |
|
|
shadow->pager = &swap_pager;
|
517 |
|
|
vmo_link->obj->shadows++;
|
518 |
|
|
// vm_object_print(vmo_link->obj);
|
519 |
|
|
dprintf("%s: Created a shadow:\n", __TASKNAME__);
|
520 |
|
|
// vm_object_print(shadow);
|
521 |
|
|
dprintf("%s: Original object:\n", __TASKNAME__);
|
522 |
|
|
// vm_object_print(shadow->orig_obj);
|
523 |
|
|
|
524 |
|
|
/*
|
525 |
|
|
* Add the shadow in front of the original:
|
526 |
|
|
*
|
527 |
|
|
* vma->link0->link1
|
528 |
|
|
* | |
|
529 |
|
|
* v v
|
530 |
|
|
* shadow original
|
531 |
|
|
*/
|
532 |
|
|
list_insert(&shadow_link->list, &vma->vm_obj_list);
|
533 |
|
|
|
534 |
|
|
/* Add object to original's shadower list */
|
535 |
|
|
list_insert(&shadow->shref, &shadow->orig_obj->shdw_list);
|
536 |
|
|
|
537 |
|
|
/* Add to global object list */
|
538 |
|
|
global_add_vm_object(shadow);
|
539 |
|
|
|
540 |
|
|
} else {
|
541 |
|
|
/* We ought to copy the missing RW page to top shadow */
|
542 |
|
|
dprintf("No new shadows. Going to add to "
|
543 |
|
|
"topmost r/w shadow object\n");
|
544 |
|
|
shadow_link = vmo_link;
|
545 |
|
|
|
546 |
|
|
/*
|
547 |
|
|
* FIXME: Here we check for the case that a cloned thread is
|
548 |
|
|
* doing a duplicate write request on an existing RW shadow
|
549 |
|
|
* page. If so, we return the existing writable page in the top
|
550 |
|
|
* shadow. We should find a generic way to detect duplicate
|
551 |
|
|
* requests and cease IPC at an earlier stage.
|
552 |
|
|
*/
|
553 |
|
|
page = shadow_link->obj->pager->ops.page_in(shadow_link->obj,
|
554 |
|
|
file_offset);
|
555 |
|
|
if (!IS_ERR(page))
|
556 |
|
|
return page;
|
557 |
|
|
|
558 |
|
|
/*
|
559 |
|
|
* We start page search on read-only objects. If the first
|
560 |
|
|
* one was writable, go to next which must be read-only.
|
561 |
|
|
*/
|
562 |
|
|
BUG_ON(!(vmo_link = vma_next_link(&vmo_link->list,
|
563 |
|
|
&vma->vm_obj_list)));
|
564 |
|
|
BUG_ON(vmo_link->obj->flags & VM_WRITE);
|
565 |
|
|
}
|
566 |
|
|
|
567 |
|
|
/* Traverse the list of read-only vm objects and search for the page */
|
568 |
|
|
while (IS_ERR(page = vmo_link->obj->pager->ops.page_in(vmo_link->obj,
|
569 |
|
|
file_offset))) {
|
570 |
|
|
if (!(vmo_link = vma_next_link(&vmo_link->list,
|
571 |
|
|
&vma->vm_obj_list))) {
|
572 |
|
|
printf("%s:%s: Traversed all shadows and the original "
|
573 |
|
|
"file's vm_object, but could not find the "
|
574 |
|
|
"faulty page in this vma.\n",__TASKNAME__,
|
575 |
|
|
__FUNCTION__);
|
576 |
|
|
BUG();
|
577 |
|
|
}
|
578 |
|
|
}
|
579 |
|
|
|
580 |
|
|
/*
|
581 |
|
|
* Copy the page. This traverse and copy is like a page-in operation
|
582 |
|
|
* of a pager, except that the page is moving along vm_objects.
|
583 |
|
|
*/
|
584 |
|
|
new_page = copy_to_new_page(page);
|
585 |
|
|
|
586 |
|
|
/* Update page details */
|
587 |
|
|
spin_lock(&new_page->lock);
|
588 |
|
|
BUG_ON(!list_empty(&new_page->list));
|
589 |
|
|
new_page->refcnt = 0;
|
590 |
|
|
new_page->owner = shadow_link->obj;
|
591 |
|
|
new_page->offset = file_offset;
|
592 |
|
|
new_page->virtual = 0;
|
593 |
|
|
spin_unlock(&page->lock);
|
594 |
|
|
|
595 |
|
|
/* Add the page to owner's list of in-memory pages */
|
596 |
|
|
insert_page_olist(new_page, new_page->owner);
|
597 |
|
|
new_page->owner->npages++;
|
598 |
|
|
|
599 |
|
|
mm0_test_global_vm_integrity();
|
600 |
|
|
|
601 |
|
|
/* Shared faults don't have shadows so we don't look for collapses */
|
602 |
|
|
if (!(vma->flags & VMA_SHARED)) {
|
603 |
|
|
|
604 |
|
|
/*
|
605 |
|
|
* Finished handling the actual fault, now check for possible
|
606 |
|
|
* shadow collapses. Does the shadow completely shadow the one
|
607 |
|
|
* underlying it?
|
608 |
|
|
*/
|
609 |
|
|
if (!(vmo_link = vma_next_link(&shadow_link->list,
|
610 |
|
|
&vma->vm_obj_list))) {
|
611 |
|
|
/* Copier must have an object under it */
|
612 |
|
|
printf("Copier must have had an object under it!\n");
|
613 |
|
|
BUG();
|
614 |
|
|
}
|
615 |
|
|
if (vm_object_is_droppable(shadow_link->obj, vmo_link->obj))
|
616 |
|
|
vma_drop_merge_delete(vma, vmo_link);
|
617 |
|
|
}
|
618 |
|
|
|
619 |
|
|
return new_page;
|
620 |
|
|
}
|
621 |
|
|
|
622 |
|
|
/*
|
623 |
|
|
* Handles the page fault, all entries here are assumed *legal*
|
624 |
|
|
* faults, i.e. do_page_fault() should have already checked
|
625 |
|
|
* for illegal accesses.
|
626 |
|
|
*
|
627 |
|
|
* NOTE:
|
628 |
|
|
* Anon/Shared pages:
|
629 |
|
|
* First access from first process is COW. All subsequent RW
|
630 |
|
|
* accesses (which are attempts of *sharing*) simply map that
|
631 |
|
|
* page to faulting processes.
|
632 |
|
|
*
|
633 |
|
|
* Non-anon/shared pages:
|
634 |
|
|
* First access from first process simply writes to the pages
|
635 |
|
|
* of that file. All subsequent accesses by other processes
|
636 |
|
|
* do so as well.
|
637 |
|
|
*
|
638 |
|
|
* FIXME: Add VM_DIRTY bit for every page that has write-faulted.
|
639 |
|
|
*/
|
640 |
|
|
|
641 |
|
|
/* Handle read faults */
|
642 |
|
|
struct page *page_read_fault(struct fault_data *fault)
|
643 |
|
|
{
|
644 |
|
|
struct vm_area *vma = fault->vma;
|
645 |
|
|
struct vm_obj_link *vmo_link;
|
646 |
|
|
unsigned long file_offset;
|
647 |
|
|
struct page *page = 0;
|
648 |
|
|
|
649 |
|
|
file_offset = fault_to_file_offset(fault);
|
650 |
|
|
|
651 |
|
|
/* Get the first object, either original file or a shadow */
|
652 |
|
|
if (!(vmo_link = vma_next_link(&vma->vm_obj_list, &vma->vm_obj_list))) {
|
653 |
|
|
printf("%s:%s: No vm object in vma!\n",
|
654 |
|
|
__TASKNAME__, __FUNCTION__);
|
655 |
|
|
BUG();
|
656 |
|
|
}
|
657 |
|
|
|
658 |
|
|
/* Traverse the list of read-only vm objects and search for the page */
|
659 |
|
|
while (IS_ERR(page = vmo_link->obj->pager->ops.page_in(vmo_link->obj,
|
660 |
|
|
file_offset))) {
|
661 |
|
|
if (!(vmo_link = vma_next_link(&vmo_link->list,
|
662 |
|
|
&vma->vm_obj_list))) {
|
663 |
|
|
printf("%s:%s: Traversed all shadows and the original "
|
664 |
|
|
"file's vm_object, but could not find the "
|
665 |
|
|
"faulty page in this vma.\n",__TASKNAME__,
|
666 |
|
|
__FUNCTION__);
|
667 |
|
|
BUG();
|
668 |
|
|
}
|
669 |
|
|
}
|
670 |
|
|
BUG_ON(!page);
|
671 |
|
|
|
672 |
|
|
return page;
|
673 |
|
|
}
|
674 |
|
|
|
675 |
|
|
struct page *page_write_fault(struct fault_data *fault)
|
676 |
|
|
{
|
677 |
|
|
unsigned int vma_flags = fault->vma->flags;
|
678 |
|
|
struct vm_area *vma = fault->vma;
|
679 |
|
|
struct vm_obj_link *vmo_link;
|
680 |
|
|
unsigned long file_offset;
|
681 |
|
|
struct page *page = 0;
|
682 |
|
|
|
683 |
|
|
/* Copy-on-write. All private vmas are always COW */
|
684 |
|
|
if (vma_flags & VMA_PRIVATE) {
|
685 |
|
|
BUG_ON(IS_ERR(page = copy_on_write(fault)));
|
686 |
|
|
|
687 |
|
|
/*
|
688 |
|
|
* This handles shared pages that are both anon and non-anon.
|
689 |
|
|
*/
|
690 |
|
|
} else if ((vma_flags & VMA_SHARED)) {
|
691 |
|
|
file_offset = fault_to_file_offset(fault);
|
692 |
|
|
|
693 |
|
|
/* Don't traverse, just take the first object */
|
694 |
|
|
BUG_ON(!(vmo_link = vma_next_link(&vma->vm_obj_list,
|
695 |
|
|
&vma->vm_obj_list)));
|
696 |
|
|
|
697 |
|
|
/* Get the page from its pager */
|
698 |
|
|
if (IS_ERR(page = vmo_link->obj->pager->ops.page_in(vmo_link->obj,
|
699 |
|
|
file_offset))) {
|
700 |
|
|
/*
|
701 |
|
|
* Writable page does not exist,
|
702 |
|
|
* if it is anonymous, it needs to be COW'ed,
|
703 |
|
|
* otherwise the file must have paged-in this
|
704 |
|
|
* page, so its a bug.
|
705 |
|
|
*/
|
706 |
|
|
if (vma_flags & VMA_ANONYMOUS) {
|
707 |
|
|
BUG_ON(IS_ERR(page = copy_on_write(fault)));
|
708 |
|
|
return page;
|
709 |
|
|
} else {
|
710 |
|
|
printf("%s: Could not obtain faulty "
|
711 |
|
|
"page from regular file.\n",
|
712 |
|
|
__TASKNAME__);
|
713 |
|
|
BUG();
|
714 |
|
|
}
|
715 |
|
|
}
|
716 |
|
|
|
717 |
|
|
/*
|
718 |
|
|
* Page and object are now dirty. Currently it's
|
719 |
|
|
* only relevant for file-backed shared objects.
|
720 |
|
|
*/
|
721 |
|
|
page->flags |= VM_DIRTY;
|
722 |
|
|
page->owner->flags |= VM_DIRTY;
|
723 |
|
|
} else
|
724 |
|
|
BUG();
|
725 |
|
|
|
726 |
|
|
return page;
|
727 |
|
|
}
|
728 |
|
|
|
729 |
|
|
struct page *__do_page_fault(struct fault_data *fault)
|
730 |
|
|
{
|
731 |
|
|
unsigned int reason = fault->reason;
|
732 |
|
|
unsigned int pte_flags = fault->pte_flags;
|
733 |
|
|
unsigned int map_flags = 0;
|
734 |
|
|
struct page *page = 0;
|
735 |
|
|
|
736 |
|
|
if ((reason & VM_READ) && (pte_flags & VM_NONE)) {
|
737 |
|
|
page = page_read_fault(fault);
|
738 |
|
|
map_flags = MAP_USR_RO;
|
739 |
|
|
|
740 |
|
|
} else if ((reason & VM_WRITE) && (pte_flags & VM_NONE)) {
|
741 |
|
|
page = page_read_fault(fault);
|
742 |
|
|
page = page_write_fault(fault);
|
743 |
|
|
map_flags = MAP_USR_RW;
|
744 |
|
|
|
745 |
|
|
} else if ((reason & VM_EXEC) && (pte_flags & VM_NONE)) {
|
746 |
|
|
page = page_read_fault(fault);
|
747 |
|
|
map_flags = MAP_USR_RX;
|
748 |
|
|
|
749 |
|
|
} else if ((reason & VM_EXEC) && (pte_flags & VM_READ)) {
|
750 |
|
|
/* Retrieve already paged in file */
|
751 |
|
|
page = page_read_fault(fault);
|
752 |
|
|
if (pte_flags & VM_WRITE)
|
753 |
|
|
map_flags = MAP_USR_RWX;
|
754 |
|
|
else
|
755 |
|
|
map_flags = MAP_USR_RX;
|
756 |
|
|
|
757 |
|
|
} else if ((reason & VM_WRITE) && (pte_flags & VM_READ)) {
|
758 |
|
|
page = page_write_fault(fault);
|
759 |
|
|
if (pte_flags & VM_EXEC)
|
760 |
|
|
map_flags = MAP_USR_RWX;
|
761 |
|
|
else
|
762 |
|
|
map_flags = MAP_USR_RW;
|
763 |
|
|
|
764 |
|
|
} else {
|
765 |
|
|
printf("mm0: Unhandled page fault.\n");
|
766 |
|
|
BUG();
|
767 |
|
|
}
|
768 |
|
|
|
769 |
|
|
BUG_ON(!page);
|
770 |
|
|
|
771 |
|
|
/* Map the new page to faulty task */
|
772 |
|
|
l4_map((void *)page_to_phys(page),
|
773 |
|
|
(void *)page_align(fault->address), 1,
|
774 |
|
|
map_flags, fault->task->tid);
|
775 |
|
|
// vm_object_print(page->owner);
|
776 |
|
|
|
777 |
|
|
return page;
|
778 |
|
|
}
|
779 |
|
|
|
780 |
|
|
/*
|
781 |
|
|
* Sets all r/w shadow objects as read-only for the process
|
782 |
|
|
* so that as expected after a fork() operation, writes to those
|
783 |
|
|
* objects cause copy-on-write events.
|
784 |
|
|
*/
|
785 |
|
|
int vm_freeze_shadows(struct tcb *task)
|
786 |
|
|
{
|
787 |
|
|
unsigned long virtual;
|
788 |
|
|
struct vm_area *vma;
|
789 |
|
|
struct vm_obj_link *vmo_link;
|
790 |
|
|
struct vm_object *vmo;
|
791 |
|
|
struct page *p;
|
792 |
|
|
|
793 |
|
|
list_foreach_struct(vma, &task->vm_area_head->list, list) {
|
794 |
|
|
|
795 |
|
|
/* Shared vmas don't have shadows */
|
796 |
|
|
if (vma->flags & VMA_SHARED)
|
797 |
|
|
continue;
|
798 |
|
|
|
799 |
|
|
/* Get the first object */
|
800 |
|
|
BUG_ON(list_empty(&vma->vm_obj_list));
|
801 |
|
|
vmo_link = link_to_struct(vma->vm_obj_list.next,
|
802 |
|
|
struct vm_obj_link, list);
|
803 |
|
|
vmo = vmo_link->obj;
|
804 |
|
|
|
805 |
|
|
/*
|
806 |
|
|
* Is this a writeable shadow?
|
807 |
|
|
*
|
808 |
|
|
* The only R/W shadow in a vma object chain
|
809 |
|
|
* can be the first one, so we don't check further
|
810 |
|
|
* objects if first one is not what we want.
|
811 |
|
|
*/
|
812 |
|
|
if (!((vmo->flags & VM_OBJ_SHADOW) &&
|
813 |
|
|
(vmo->flags & VM_WRITE)))
|
814 |
|
|
continue;
|
815 |
|
|
|
816 |
|
|
/* Make the object read only */
|
817 |
|
|
vmo->flags &= ~VM_WRITE;
|
818 |
|
|
vmo->flags |= VM_READ;
|
819 |
|
|
|
820 |
|
|
/*
|
821 |
|
|
* Make all pages on it read-only
|
822 |
|
|
* in the page tables.
|
823 |
|
|
*/
|
824 |
|
|
list_foreach_struct(p, &vmo->page_cache, list) {
|
825 |
|
|
|
826 |
|
|
/* Find virtual address of each page */
|
827 |
|
|
virtual = vma_page_to_virtual(vma, p);
|
828 |
|
|
|
829 |
|
|
/* Map the page as read-only */
|
830 |
|
|
l4_map((void *)page_to_phys(p),
|
831 |
|
|
(void *)virtual, 1,
|
832 |
|
|
MAP_USR_RO, task->tid);
|
833 |
|
|
}
|
834 |
|
|
}
|
835 |
|
|
|
836 |
|
|
return 0;
|
837 |
|
|
}
|
838 |
|
|
|
839 |
|
|
/*
|
840 |
|
|
* Page fault model:
|
841 |
|
|
*
|
842 |
|
|
* A page is anonymous (e.g. stack)
|
843 |
|
|
* - page needs read access:
|
844 |
|
|
* action: map the zero page.
|
845 |
|
|
* - page needs write access:
|
846 |
|
|
* action: allocate ZI page and map that. Swap file owns the page.
|
847 |
|
|
* - page is swapped to swap:
|
848 |
|
|
* action: read back from swap file into new page.
|
849 |
|
|
*
|
850 |
|
|
* A page is file-backed but private (e.g. .data section)
|
851 |
|
|
* - page needs read access:
|
852 |
|
|
* action: read the page from its file.
|
853 |
|
|
* - page is swapped out before being private. (i.e. invalidated)
|
854 |
|
|
* action: read the page from its file. (original file)
|
855 |
|
|
* - page is swapped out after being private.
|
856 |
|
|
* action: read the page from its file. (swap file)
|
857 |
|
|
* - page needs write access:
|
858 |
|
|
* action: allocate new page, declare page as private, change its
|
859 |
|
|
* owner to swap file.
|
860 |
|
|
*
|
861 |
|
|
* A page is file backed but not-private, and read-only. (e.g. .text section)
|
862 |
|
|
* - page needs read access:
|
863 |
|
|
* action: read in the page from its file.
|
864 |
|
|
* - page is swapped out. (i.e. invalidated)
|
865 |
|
|
* action: read in the page from its file.
|
866 |
|
|
* - page needs write access:
|
867 |
|
|
* action: forbidden, kill task?
|
868 |
|
|
*
|
869 |
|
|
* A page is file backed but not-private, and read/write. (e.g. any data file.)
|
870 |
|
|
* - page needs read access:
|
871 |
|
|
* action: read in the page from its file.
|
872 |
|
|
* - page is flushed back to its original file. (i.e. instead of swap)
|
873 |
|
|
* action: read in the page from its file.
|
874 |
|
|
* - page needs write access:
|
875 |
|
|
* action: read the page in, give write access.
|
876 |
|
|
*/
|
877 |
|
|
struct page *do_page_fault(struct fault_data *fault)
|
878 |
|
|
{
|
879 |
|
|
unsigned int vma_flags = (fault->vma) ? fault->vma->flags : VM_NONE;
|
880 |
|
|
unsigned int reason = fault->reason;
|
881 |
|
|
|
882 |
|
|
/* vma flags show no access */
|
883 |
|
|
if (vma_flags & VM_NONE) {
|
884 |
|
|
printf("Illegal access, tid: %d, address: 0x%x, PC @ 0x%x,\n",
|
885 |
|
|
fault->task->tid, fault->address, fault->kdata->faulty_pc);
|
886 |
|
|
fault_handle_error(fault);
|
887 |
|
|
}
|
888 |
|
|
|
889 |
|
|
/* The access reason is not included in the vma's listed flags */
|
890 |
|
|
if (!(reason & vma_flags)) {
|
891 |
|
|
printf("Illegal access, tid: %d, address: 0x%x, PC @ 0x%x\n",
|
892 |
|
|
fault->task->tid, fault->address, fault->kdata->faulty_pc);
|
893 |
|
|
fault_handle_error(fault);
|
894 |
|
|
}
|
895 |
|
|
|
896 |
|
|
/* Handle legitimate faults */
|
897 |
|
|
return __do_page_fault(fault);
|
898 |
|
|
}
|
899 |
|
|
|
900 |
|
|
struct page *page_fault_handler(struct tcb *sender, fault_kdata_t *fkdata)
|
901 |
|
|
{
|
902 |
|
|
struct fault_data fault = {
|
903 |
|
|
/* Fault data from kernel */
|
904 |
|
|
.kdata = fkdata,
|
905 |
|
|
.task = sender,
|
906 |
|
|
};
|
907 |
|
|
|
908 |
|
|
/* Extract fault reason, fault address etc. in generic format */
|
909 |
|
|
set_generic_fault_params(&fault);
|
910 |
|
|
|
911 |
|
|
/* Get vma info */
|
912 |
|
|
if (!(fault.vma = find_vma(fault.address,
|
913 |
|
|
&fault.task->vm_area_head->list)))
|
914 |
|
|
printf("Hmm. No vma for faulty region. "
|
915 |
|
|
"Bad things will happen.\n");
|
916 |
|
|
|
917 |
|
|
/* Handle the actual fault */
|
918 |
|
|
return do_page_fault(&fault);
|
919 |
|
|
}
|
920 |
|
|
|
921 |
|
|
static inline unsigned int pte_to_map_flags(unsigned int pte_flags)
|
922 |
|
|
{
|
923 |
|
|
unsigned int map_flags;
|
924 |
|
|
|
925 |
|
|
switch(pte_flags) {
|
926 |
|
|
case VM_READ:
|
927 |
|
|
map_flags = MAP_USR_RO;
|
928 |
|
|
break;
|
929 |
|
|
case (VM_READ | VM_WRITE):
|
930 |
|
|
map_flags = MAP_USR_RW;
|
931 |
|
|
break;
|
932 |
|
|
case (VM_READ | VM_WRITE | VM_EXEC):
|
933 |
|
|
map_flags = MAP_USR_RWX;
|
934 |
|
|
break;
|
935 |
|
|
case (VM_READ | VM_EXEC):
|
936 |
|
|
map_flags = MAP_USR_RX;
|
937 |
|
|
break;
|
938 |
|
|
default:
|
939 |
|
|
BUG();
|
940 |
|
|
}
|
941 |
|
|
|
942 |
|
|
return map_flags;
|
943 |
|
|
}
|
944 |
|
|
|
945 |
|
|
/*
|
946 |
|
|
* Prefaults a page of a task. The catch is that the page may already
|
947 |
|
|
* have been faulted with even more progress than the desired
|
948 |
|
|
* flags would progress in the fault (e.g. read-faulting a
|
949 |
|
|
* copy-on-write'd page).
|
950 |
|
|
*
|
951 |
|
|
* This function detects whether progress is necessary or not by
|
952 |
|
|
* inspecting the vma's vm_object chain state.
|
953 |
|
|
*
|
954 |
|
|
* Generally both read-fault and write-fault paths are repeatable, in
|
955 |
|
|
* the sense that an already faulted page may be safely re-faulted again
|
956 |
|
|
* and again, be it a read-only or copy-on-write'd page.
|
957 |
|
|
*
|
958 |
|
|
* The retrieval of the same page in a repetitive fashion is safe,
|
959 |
|
|
* but while it also seems to appear safe, it is unnecessary to downgrade
|
960 |
|
|
* or change mapping permissions of a page. E.g. make a copy-on-write'd
|
961 |
|
|
* page read-only by doing a blind read-fault on it.
|
962 |
|
|
*
|
963 |
|
|
* Hence this function checks whether a fault is necessary and simply
|
964 |
|
|
* returns if it isn't.
|
965 |
|
|
*
|
966 |
|
|
* FIXME: Escalate any page fault errors like a civilized function!
|
967 |
|
|
*/
|
968 |
|
|
struct page *task_prefault_smart(struct tcb *task, unsigned long address,
|
969 |
|
|
unsigned int wanted_flags)
|
970 |
|
|
{
|
971 |
|
|
struct vm_obj_link *vmo_link;
|
972 |
|
|
unsigned long file_offset;
|
973 |
|
|
unsigned int vma_flags, pte_flags;
|
974 |
|
|
struct vm_area *vma;
|
975 |
|
|
struct page *page;
|
976 |
|
|
int err;
|
977 |
|
|
|
978 |
|
|
struct fault_data fault = {
|
979 |
|
|
.task = task,
|
980 |
|
|
.address = address,
|
981 |
|
|
};
|
982 |
|
|
|
983 |
|
|
/* Find the vma */
|
984 |
|
|
if (!(fault.vma = find_vma(fault.address,
|
985 |
|
|
&fault.task->vm_area_head->list))) {
|
986 |
|
|
dprintf("%s: Invalid: No vma for given address. %d\n",
|
987 |
|
|
__FUNCTION__, -EINVAL);
|
988 |
|
|
return PTR_ERR(-EINVAL);
|
989 |
|
|
}
|
990 |
|
|
|
991 |
|
|
/* Read fault, repetitive safe */
|
992 |
|
|
if (wanted_flags & VM_READ)
|
993 |
|
|
if (IS_ERR(page = page_read_fault(&fault)))
|
994 |
|
|
return page;
|
995 |
|
|
|
996 |
|
|
/* Write fault, repetitive safe */
|
997 |
|
|
if (wanted_flags & VM_WRITE)
|
998 |
|
|
if (IS_ERR(page = page_write_fault(&fault)))
|
999 |
|
|
return page;
|
1000 |
|
|
|
1001 |
|
|
/*
|
1002 |
|
|
* If we came this far, it means we have more
|
1003 |
|
|
* permissions than VM_NONE.
|
1004 |
|
|
*
|
1005 |
|
|
* Now we _must_ find out what those page
|
1006 |
|
|
* protection flags were, and do this without
|
1007 |
|
|
* needing to inspect any ptes.
|
1008 |
|
|
*
|
1009 |
|
|
* We don't want to downgrade a RW page to RO again.
|
1010 |
|
|
*/
|
1011 |
|
|
file_offset = fault_to_file_offset(&fault);
|
1012 |
|
|
vma_flags = fault.vma->flags;
|
1013 |
|
|
vma = fault.vma;
|
1014 |
|
|
|
1015 |
|
|
/* Get the topmost vm_object */
|
1016 |
|
|
if (!(vmo_link = vma_next_link(&vma->vm_obj_list,
|
1017 |
|
|
&vma->vm_obj_list))) {
|
1018 |
|
|
printf("%s:%s: No vm object in vma!\n",
|
1019 |
|
|
__TASKNAME__, __FUNCTION__);
|
1020 |
|
|
BUG();
|
1021 |
|
|
}
|
1022 |
|
|
|
1023 |
|
|
/* Traverse the list of vm objects and search for the page */
|
1024 |
|
|
while (IS_ERR(page = vmo_link->obj->pager->ops.page_in(vmo_link->obj,
|
1025 |
|
|
file_offset))) {
|
1026 |
|
|
if (!(vmo_link = vma_next_link(&vmo_link->list,
|
1027 |
|
|
&vma->vm_obj_list))) {
|
1028 |
|
|
printf("%s:%s: Traversed all shadows and the original "
|
1029 |
|
|
"file's vm_object, but could not find the "
|
1030 |
|
|
"faulty page in this vma.\n",__TASKNAME__,
|
1031 |
|
|
__FUNCTION__);
|
1032 |
|
|
BUG();
|
1033 |
|
|
}
|
1034 |
|
|
}
|
1035 |
|
|
|
1036 |
|
|
/* Use flags for the vm_object containing the page */
|
1037 |
|
|
if (vmo_link->obj->flags & VM_WRITE)
|
1038 |
|
|
pte_flags = VM_WRITE | VM_READ;
|
1039 |
|
|
else
|
1040 |
|
|
pte_flags = VM_READ;
|
1041 |
|
|
|
1042 |
|
|
/*
|
1043 |
|
|
* Now check vma flags for adding the VM_EXEC
|
1044 |
|
|
* The real pte may not have this flag yet, but
|
1045 |
|
|
* it is allowed to have it and it doesn't harm.
|
1046 |
|
|
*/
|
1047 |
|
|
if (vma_flags & VM_EXEC)
|
1048 |
|
|
pte_flags |= VM_EXEC;
|
1049 |
|
|
|
1050 |
|
|
/* Map the page to task using these flags */
|
1051 |
|
|
if ((err = l4_map((void *)page_to_phys(page),
|
1052 |
|
|
(void *)page_align(fault.address), 1,
|
1053 |
|
|
pte_to_map_flags(pte_flags),
|
1054 |
|
|
fault.task->tid)) < 0) {
|
1055 |
|
|
printf("l4_map() failed. err=%d\n", err);
|
1056 |
|
|
BUG();
|
1057 |
|
|
}
|
1058 |
|
|
|
1059 |
|
|
return page;
|
1060 |
|
|
}
|
1061 |
|
|
|
1062 |
|
|
/*
|
1063 |
|
|
* Prefaults the page with given virtual address, to given task
|
1064 |
|
|
* with given reasons. Multiple reasons are allowed, they are
|
1065 |
|
|
* handled separately in order.
|
1066 |
|
|
*/
|
1067 |
|
|
struct page *task_prefault_page(struct tcb *task, unsigned long address,
|
1068 |
|
|
unsigned int vmflags)
|
1069 |
|
|
{
|
1070 |
|
|
struct page *ret;
|
1071 |
|
|
|
1072 |
|
|
perfmon_reset_start_cyccnt();
|
1073 |
|
|
ret = task_prefault_smart(task, address, vmflags);
|
1074 |
|
|
|
1075 |
|
|
debug_record_cycles("task_prefault_smart");
|
1076 |
|
|
|
1077 |
|
|
return ret;
|
1078 |
|
|
|
1079 |
|
|
#if 0
|
1080 |
|
|
struct page *p;
|
1081 |
|
|
struct fault_data fault = {
|
1082 |
|
|
.task = task,
|
1083 |
|
|
.address = address,
|
1084 |
|
|
};
|
1085 |
|
|
|
1086 |
|
|
dprintf("Pre-faulting address 0x%lx, on task %d, with flags: 0x%x\n",
|
1087 |
|
|
address, task->tid, vmflags);
|
1088 |
|
|
|
1089 |
|
|
/* Find the vma */
|
1090 |
|
|
if (!(fault.vma = find_vma(fault.address,
|
1091 |
|
|
&fault.task->vm_area_head->list))) {
|
1092 |
|
|
dprintf("%s: Invalid: No vma for given address. %d\n",
|
1093 |
|
|
__FUNCTION__, -EINVAL);
|
1094 |
|
|
return PTR_ERR(-EINVAL);
|
1095 |
|
|
}
|
1096 |
|
|
|
1097 |
|
|
/* Flags may indicate multiple fault reasons. First do the read */
|
1098 |
|
|
if (vmflags & VM_READ) {
|
1099 |
|
|
fault.pte_flags = VM_NONE;
|
1100 |
|
|
fault.reason = VM_READ;
|
1101 |
|
|
if (IS_ERR(p = do_page_fault(&fault)))
|
1102 |
|
|
return p;
|
1103 |
|
|
}
|
1104 |
|
|
/* Now write */
|
1105 |
|
|
if (vmflags & VM_WRITE) {
|
1106 |
|
|
fault.pte_flags = VM_READ;
|
1107 |
|
|
fault.reason = VM_WRITE;
|
1108 |
|
|
if (IS_ERR(p = do_page_fault(&fault)))
|
1109 |
|
|
return p;
|
1110 |
|
|
}
|
1111 |
|
|
/* No exec or any other fault reason allowed. */
|
1112 |
|
|
BUG_ON(vmflags & ~(VM_READ | VM_WRITE));
|
1113 |
|
|
|
1114 |
|
|
return p;
|
1115 |
|
|
#endif
|
1116 |
|
|
}
|
1117 |
|
|
|
1118 |
|
|
|
1119 |
|
|
int vm_compare_prot_flags(unsigned int current, unsigned int needed)
|
1120 |
|
|
{
|
1121 |
|
|
current &= VM_PROT_MASK;
|
1122 |
|
|
needed &= VM_PROT_MASK;
|
1123 |
|
|
|
1124 |
|
|
if (needed & VM_READ)
|
1125 |
|
|
if (current & (VM_READ | VM_WRITE))
|
1126 |
|
|
return 1;
|
1127 |
|
|
|
1128 |
|
|
if (needed & VM_WRITE &&
|
1129 |
|
|
(current & VM_WRITE))
|
1130 |
|
|
return 1;
|
1131 |
|
|
|
1132 |
|
|
return 0;
|
1133 |
|
|
}
|
1134 |
|
|
|