1 |
2 |
drasko |
/*
|
2 |
|
|
* mmap/munmap and friends.
|
3 |
|
|
*
|
4 |
|
|
* Copyright (C) 2007, 2008 Bahadir Balban
|
5 |
|
|
*/
|
6 |
|
|
#include <l4/lib/math.h>
|
7 |
|
|
#include <vm_area.h>
|
8 |
|
|
#include <malloc/malloc.h>
|
9 |
|
|
#include INC_API(errno.h)
|
10 |
|
|
#include <posix/sys/types.h>
|
11 |
|
|
#include L4LIB_INC_ARCH(syscalls.h)
|
12 |
|
|
#include L4LIB_INC_ARCH(syslib.h)
|
13 |
|
|
#include <memory.h>
|
14 |
|
|
#include <task.h>
|
15 |
|
|
#include <mmap.h>
|
16 |
|
|
#include <file.h>
|
17 |
|
|
#include <shm.h>
|
18 |
|
|
#include <syscalls.h>
|
19 |
|
|
#include <user.h>
|
20 |
|
|
#include <shm.h>
|
21 |
|
|
|
22 |
|
|
struct vm_area *vma_new(unsigned long pfn_start, unsigned long npages,
|
23 |
|
|
unsigned int flags, unsigned long file_offset)
|
24 |
|
|
{
|
25 |
|
|
struct vm_area *vma;
|
26 |
|
|
|
27 |
|
|
/* Allocate new area */
|
28 |
|
|
if (!(vma = kzalloc(sizeof(struct vm_area))))
|
29 |
|
|
return 0;
|
30 |
|
|
|
31 |
|
|
link_init(&vma->list);
|
32 |
|
|
link_init(&vma->vm_obj_list);
|
33 |
|
|
|
34 |
|
|
vma->pfn_start = pfn_start;
|
35 |
|
|
vma->pfn_end = pfn_start + npages;
|
36 |
|
|
vma->flags = flags;
|
37 |
|
|
vma->file_offset = file_offset;
|
38 |
|
|
|
39 |
|
|
return vma;
|
40 |
|
|
}
|
41 |
|
|
|
42 |
|
|
/*
|
43 |
|
|
* Inserts a new vma to the ordered vm area list.
|
44 |
|
|
*
|
45 |
|
|
* The new vma is assumed to have been correctly set up not to intersect
|
46 |
|
|
* with any other existing vma.
|
47 |
|
|
*/
|
48 |
|
|
int task_insert_vma(struct vm_area *this, struct link *vma_list)
|
49 |
|
|
{
|
50 |
|
|
struct vm_area *before, *after;
|
51 |
|
|
|
52 |
|
|
/* Add if list is empty */
|
53 |
|
|
if (list_empty(vma_list)) {
|
54 |
|
|
list_insert_tail(&this->list, vma_list);
|
55 |
|
|
return 0;
|
56 |
|
|
}
|
57 |
|
|
|
58 |
|
|
/* Else find the right interval */
|
59 |
|
|
list_foreach_struct(before, vma_list, list) {
|
60 |
|
|
after = link_to_struct(before->list.next, struct vm_area, list);
|
61 |
|
|
|
62 |
|
|
/* If there's only one in list */
|
63 |
|
|
if (before->list.next == vma_list) {
|
64 |
|
|
|
65 |
|
|
/* Eliminate the possibility of intersection */
|
66 |
|
|
BUG_ON(set_intersection(this->pfn_start, this->pfn_end,
|
67 |
|
|
before->pfn_start,
|
68 |
|
|
before->pfn_end));
|
69 |
|
|
|
70 |
|
|
/* Add as next if greater */
|
71 |
|
|
if (this->pfn_start > before->pfn_start)
|
72 |
|
|
list_insert(&this->list, &before->list);
|
73 |
|
|
/* Add as previous if smaller */
|
74 |
|
|
else if (this->pfn_start < before->pfn_start)
|
75 |
|
|
list_insert_tail(&this->list, &before->list);
|
76 |
|
|
else
|
77 |
|
|
BUG();
|
78 |
|
|
|
79 |
|
|
return 0;
|
80 |
|
|
}
|
81 |
|
|
|
82 |
|
|
/* If this page is in-between two other, insert it there */
|
83 |
|
|
if (before->pfn_start < this->pfn_start &&
|
84 |
|
|
after->pfn_start > this->pfn_start) {
|
85 |
|
|
|
86 |
|
|
/* Eliminate possibility of intersection */
|
87 |
|
|
BUG_ON(set_intersection(this->pfn_start, this->pfn_end,
|
88 |
|
|
before->pfn_start,
|
89 |
|
|
before->pfn_end));
|
90 |
|
|
BUG_ON(set_intersection(this->pfn_start, this->pfn_end,
|
91 |
|
|
after->pfn_start,
|
92 |
|
|
after->pfn_end));
|
93 |
|
|
list_insert(&this->list, &before->list);
|
94 |
|
|
|
95 |
|
|
return 0;
|
96 |
|
|
}
|
97 |
|
|
}
|
98 |
|
|
BUG();
|
99 |
|
|
}
|
100 |
|
|
|
101 |
|
|
/*
|
102 |
|
|
* Search an empty space in the task's mmapable address region.
|
103 |
|
|
*
|
104 |
|
|
* This does a less than O(n) algorithm by starting the estimated region
|
105 |
|
|
* and vma comparison from the beginning, once a vma is not intersected
|
106 |
|
|
* that means it is an available slot. However if vma's and estimated
|
107 |
|
|
* region does not go head-to-head for comparison, individual intersection
|
108 |
|
|
* checks would be meaningless since any other vma could be intersecting.
|
109 |
|
|
* Therefore head-to-head comparison is essential here.
|
110 |
|
|
*/
|
111 |
|
|
unsigned long find_unmapped_area(unsigned long npages, struct tcb *task)
|
112 |
|
|
{
|
113 |
|
|
unsigned long pfn_start = __pfn(task->map_start);
|
114 |
|
|
unsigned long pfn_end = pfn_start + npages;
|
115 |
|
|
struct vm_area *vma;
|
116 |
|
|
|
117 |
|
|
if (npages > __pfn(task->map_end - task->map_start))
|
118 |
|
|
return 0;
|
119 |
|
|
|
120 |
|
|
/* If no vmas, first map slot is available. */
|
121 |
|
|
if (list_empty(&task->vm_area_head->list))
|
122 |
|
|
return task->map_start;
|
123 |
|
|
|
124 |
|
|
/* First vma to check our range against */
|
125 |
|
|
vma = link_to_struct(task->vm_area_head->list.next, struct vm_area, list);
|
126 |
|
|
|
127 |
|
|
/* Start searching from task's end of data to start of stack */
|
128 |
|
|
while (pfn_end <= __pfn(task->map_end)) {
|
129 |
|
|
|
130 |
|
|
/* If intersection, skip the vma and fast-forward to next */
|
131 |
|
|
if (set_intersection(pfn_start, pfn_end,
|
132 |
|
|
vma->pfn_start, vma->pfn_end)) {
|
133 |
|
|
|
134 |
|
|
/* Update interval to next available space */
|
135 |
|
|
pfn_start = vma->pfn_end;
|
136 |
|
|
pfn_end = pfn_start + npages;
|
137 |
|
|
|
138 |
|
|
/*
|
139 |
|
|
* Decision point, no more vmas left to check.
|
140 |
|
|
* Are we out of task map area?
|
141 |
|
|
*/
|
142 |
|
|
if (vma->list.next == &task->vm_area_head->list) {
|
143 |
|
|
if (pfn_end > __pfn(task->map_end))
|
144 |
|
|
break; /* Yes, fail */
|
145 |
|
|
else /* No, success */
|
146 |
|
|
return __pfn_to_addr(pfn_start);
|
147 |
|
|
}
|
148 |
|
|
|
149 |
|
|
/* Otherwise get next vma entry */
|
150 |
|
|
vma = link_to_struct(vma->list.next,
|
151 |
|
|
struct vm_area, list);
|
152 |
|
|
continue;
|
153 |
|
|
}
|
154 |
|
|
BUG_ON(pfn_start + npages > __pfn(task->map_end));
|
155 |
|
|
return __pfn_to_addr(pfn_start);
|
156 |
|
|
}
|
157 |
|
|
|
158 |
|
|
return 0;
|
159 |
|
|
}
|
160 |
|
|
|
161 |
|
|
/* Validate an address that is a possible candidate for an mmap() region */
|
162 |
|
|
int mmap_address_validate(struct tcb *task, unsigned long map_address,
|
163 |
|
|
unsigned int vm_flags)
|
164 |
|
|
{
|
165 |
|
|
if (map_address == 0)
|
166 |
|
|
return 0;
|
167 |
|
|
|
168 |
|
|
/* Private mappings can only go in task address space */
|
169 |
|
|
if (vm_flags & VMA_PRIVATE) {
|
170 |
|
|
if ((map_address >= task->start &&
|
171 |
|
|
map_address < task->end) ||
|
172 |
|
|
(map_address >= __pfn_to_addr(cont_mem_regions.utcb->start) &&
|
173 |
|
|
map_address < __pfn_to_addr(cont_mem_regions.utcb->end))) {
|
174 |
|
|
return 1;
|
175 |
|
|
} else
|
176 |
|
|
return 0;
|
177 |
|
|
/*
|
178 |
|
|
* Shared mappings can go in task, utcb, and shared
|
179 |
|
|
* memory address space,
|
180 |
|
|
*/
|
181 |
|
|
} else if (vm_flags & VMA_SHARED) {
|
182 |
|
|
if ((map_address >= task->start &&
|
183 |
|
|
map_address < task->end) ||
|
184 |
|
|
(map_address >= __pfn_to_addr(cont_mem_regions.shmem->start) &&
|
185 |
|
|
map_address < __pfn_to_addr(cont_mem_regions.shmem->end)))
|
186 |
|
|
return 1;
|
187 |
|
|
else
|
188 |
|
|
return 0;
|
189 |
|
|
} else
|
190 |
|
|
BUG();
|
191 |
|
|
}
|
192 |
|
|
|
193 |
|
|
/*
|
194 |
|
|
* Returns a suitably mmap'able address. It allocates
|
195 |
|
|
* differently for shared and private areas.
|
196 |
|
|
*/
|
197 |
|
|
unsigned long mmap_new_address(struct tcb *task, unsigned int flags,
|
198 |
|
|
unsigned int npages)
|
199 |
|
|
{
|
200 |
|
|
if (flags & VMA_SHARED)
|
201 |
|
|
return (unsigned long)shm_new_address(npages);
|
202 |
|
|
else
|
203 |
|
|
return find_unmapped_area(npages, task);
|
204 |
|
|
}
|
205 |
|
|
|
206 |
|
|
/*
|
207 |
|
|
* Side note:
|
208 |
|
|
* Why in do_mmap() shm files have devzero mapped behind separately but
|
209 |
|
|
* anonymous files map devzero directly? Because private anonymous files get
|
210 |
|
|
* shadow objects in front when written to. Shm files are not private, so they
|
211 |
|
|
* stay where they are and just grow. Other processes can reach and map them.
|
212 |
|
|
*/
|
213 |
|
|
|
214 |
|
|
/*
|
215 |
|
|
* Maps the given file with given flags at the given page offset to the given
|
216 |
|
|
* task's address space at the specified virtual memory address and length.
|
217 |
|
|
*
|
218 |
|
|
* The actual paging in/out of the file from/into memory pages is handled by
|
219 |
|
|
* the file's pager upon page faults.
|
220 |
|
|
*/
|
221 |
|
|
void *do_mmap(struct vm_file *mapfile, unsigned long file_offset,
|
222 |
|
|
struct tcb *task, unsigned long map_address,
|
223 |
|
|
unsigned int flags, unsigned int npages)
|
224 |
|
|
{
|
225 |
|
|
struct vm_obj_link *vmo_link, *vmo_link2;
|
226 |
|
|
unsigned long file_npages;
|
227 |
|
|
struct vm_area *new;
|
228 |
|
|
int err;
|
229 |
|
|
|
230 |
|
|
/* Set up devzero if none given */
|
231 |
|
|
if (!mapfile) {
|
232 |
|
|
if (flags & VMA_ANONYMOUS) {
|
233 |
|
|
BUG_ON(!(mapfile = get_devzero()));
|
234 |
|
|
file_offset = 0;
|
235 |
|
|
} else
|
236 |
|
|
return PTR_ERR(-EINVAL);
|
237 |
|
|
}
|
238 |
|
|
|
239 |
|
|
/* Get total file pages, check if mapping is within file size */
|
240 |
|
|
file_npages = __pfn(page_align_up(mapfile->length));
|
241 |
|
|
if (npages > file_npages - file_offset) {
|
242 |
|
|
printf("%s: Trying to map %d pages from page %lu, "
|
243 |
|
|
"but file length is %lu\n", __FUNCTION__,
|
244 |
|
|
npages, file_offset, file_npages);
|
245 |
|
|
return PTR_ERR(-EINVAL);
|
246 |
|
|
}
|
247 |
|
|
|
248 |
|
|
/* Check invalid page size */
|
249 |
|
|
if (npages == 0) {
|
250 |
|
|
printf("Trying to map %d pages.\n", npages);
|
251 |
|
|
return PTR_ERR(-EINVAL);
|
252 |
|
|
}
|
253 |
|
|
if (npages > __pfn(TASK_SIZE)) {
|
254 |
|
|
printf("Trying to map too many pages: %d\n", npages);
|
255 |
|
|
return PTR_ERR(-ENOMEM);
|
256 |
|
|
}
|
257 |
|
|
|
258 |
|
|
/* Check invalid map address */
|
259 |
|
|
if (!mmap_address_validate(task, map_address, flags)) {
|
260 |
|
|
if (flags & VMA_FIXED)
|
261 |
|
|
return PTR_ERR(-EINVAL);
|
262 |
|
|
else if (!(map_address = mmap_new_address(task, flags, npages)))
|
263 |
|
|
return PTR_ERR(-ENOMEM);
|
264 |
|
|
}
|
265 |
|
|
|
266 |
|
|
/* Unmap any existing vmas that overlap with the new mapping */
|
267 |
|
|
if ((err = do_munmap(task, map_address, npages)) < 0)
|
268 |
|
|
return PTR_ERR(err);
|
269 |
|
|
|
270 |
|
|
/* For valid regions that aren't allocated by us, create the vma. */
|
271 |
|
|
if (!(new = vma_new(__pfn(map_address), npages, flags,
|
272 |
|
|
__pfn(file_offset))))
|
273 |
|
|
return PTR_ERR(-ENOMEM);
|
274 |
|
|
|
275 |
|
|
/* Attach the file as the first vm object of this vma */
|
276 |
|
|
if (!(vmo_link = vm_objlink_create())) {
|
277 |
|
|
kfree(new);
|
278 |
|
|
return PTR_ERR(-ENOMEM);
|
279 |
|
|
}
|
280 |
|
|
|
281 |
|
|
/* Attach link to object */
|
282 |
|
|
vm_link_object(vmo_link, &mapfile->vm_obj);
|
283 |
|
|
|
284 |
|
|
/* Add link to vma list */
|
285 |
|
|
list_insert_tail(&vmo_link->list, &new->vm_obj_list);
|
286 |
|
|
|
287 |
|
|
/*
|
288 |
|
|
* If the file is a shm file, also map devzero behind it. i.e.
|
289 |
|
|
* vma -> vm_link -> vm_link
|
290 |
|
|
* | |
|
291 |
|
|
* v v
|
292 |
|
|
* shm_file devzero
|
293 |
|
|
*
|
294 |
|
|
* So that faults go through shm file and then devzero, as in
|
295 |
|
|
* the shadow object copy_on_write setup in fault.c
|
296 |
|
|
*/
|
297 |
|
|
if (mapfile->type == VM_FILE_SHM) {
|
298 |
|
|
struct vm_file *dzero = get_devzero();
|
299 |
|
|
|
300 |
|
|
/* Attach the file as the first vm object of this vma */
|
301 |
|
|
if (!(vmo_link2 = vm_objlink_create())) {
|
302 |
|
|
kfree(new);
|
303 |
|
|
kfree(vmo_link);
|
304 |
|
|
return PTR_ERR(-ENOMEM);
|
305 |
|
|
}
|
306 |
|
|
vm_link_object(vmo_link2, &dzero->vm_obj);
|
307 |
|
|
list_insert_tail(&vmo_link2->list, &new->vm_obj_list);
|
308 |
|
|
}
|
309 |
|
|
|
310 |
|
|
/* Finished initialising the vma, add it to task */
|
311 |
|
|
dprintf("%s: Mapping 0x%lx - 0x%lx\n", __FUNCTION__,
|
312 |
|
|
map_address, map_address + __pfn_to_addr(npages));
|
313 |
|
|
task_insert_vma(new, &task->vm_area_head->list);
|
314 |
|
|
|
315 |
|
|
/*
|
316 |
|
|
* If area is going to be used going downwards, (i.e. as a stack)
|
317 |
|
|
* we return the *end* of the area as the start address.
|
318 |
|
|
*/
|
319 |
|
|
if (flags & VMA_GROWSDOWN)
|
320 |
|
|
map_address += __pfn_to_addr(npages);
|
321 |
|
|
|
322 |
|
|
return (void *)map_address;
|
323 |
|
|
}
|
324 |
|
|
|
325 |
|
|
/* mmap system call implementation */
|
326 |
|
|
void *__sys_mmap(struct tcb *task, void *start, size_t length, int prot,
|
327 |
|
|
int flags, int fd, unsigned long file_offset)
|
328 |
|
|
{
|
329 |
|
|
unsigned int vmflags = 0;
|
330 |
|
|
struct vm_file *file = 0;
|
331 |
|
|
|
332 |
|
|
/* Check file validity */
|
333 |
|
|
if (!(flags & MAP_ANONYMOUS))
|
334 |
|
|
if (fd < 0 || fd > TASK_FILES_MAX ||
|
335 |
|
|
!task->files->fd[fd].vmfile)
|
336 |
|
|
return PTR_ERR(-EBADF);
|
337 |
|
|
|
338 |
|
|
/* Check file offset is page aligned */
|
339 |
|
|
if (!is_page_aligned(file_offset))
|
340 |
|
|
return PTR_ERR(-EINVAL);
|
341 |
|
|
|
342 |
|
|
/* TODO:
|
343 |
|
|
* Check that @start does not already have a mapping.
|
344 |
|
|
* Check that pfn + npages range is within the file range.
|
345 |
|
|
* Check that posix flags passed match those defined in vm_area.h
|
346 |
|
|
*/
|
347 |
|
|
if (flags & MAP_ANONYMOUS) {
|
348 |
|
|
file = 0;
|
349 |
|
|
vmflags |= VMA_ANONYMOUS;
|
350 |
|
|
} else {
|
351 |
|
|
file = task->files->fd[fd].vmfile;
|
352 |
|
|
}
|
353 |
|
|
|
354 |
|
|
if (flags & MAP_FIXED)
|
355 |
|
|
vmflags |= VMA_FIXED;
|
356 |
|
|
|
357 |
|
|
if (flags & MAP_PRIVATE)
|
358 |
|
|
/* This means COW, if writeable. */
|
359 |
|
|
vmflags |= VMA_PRIVATE;
|
360 |
|
|
else /* This also means COW, if writeable and anonymous */
|
361 |
|
|
vmflags |= VMA_SHARED;
|
362 |
|
|
|
363 |
|
|
if (flags & MAP_GROWSDOWN)
|
364 |
|
|
vmflags |= VMA_GROWSDOWN;
|
365 |
|
|
|
366 |
|
|
if (prot & PROT_READ)
|
367 |
|
|
vmflags |= VM_READ;
|
368 |
|
|
if (prot & PROT_WRITE)
|
369 |
|
|
vmflags |= VM_WRITE;
|
370 |
|
|
if (prot & PROT_EXEC)
|
371 |
|
|
vmflags |= VM_EXEC;
|
372 |
|
|
|
373 |
|
|
/*
|
374 |
|
|
* Currently MAP_SHARED && MAP_ANONYMOUS mappings use the
|
375 |
|
|
* shm interface to create virtual shared memory files and
|
376 |
|
|
* do_mmap is internally called through this interface.
|
377 |
|
|
*
|
378 |
|
|
* FIXME: A common method of creating virtual shm files
|
379 |
|
|
* should be used by both sys_mmap and sys_shmget. With the
|
380 |
|
|
* current method, a task that guesses the shmid of an
|
381 |
|
|
* anonymous shared mmap can attach to it via shmat.
|
382 |
|
|
*/
|
383 |
|
|
if ((flags & MAP_ANONYMOUS) &&
|
384 |
|
|
(flags & MAP_SHARED)) {
|
385 |
|
|
/* Create a new shared memory virtual file */
|
386 |
|
|
l4id_t shmid = sys_shmget(IPC_PRIVATE,
|
387 |
|
|
page_align_up(length),
|
388 |
|
|
0);
|
389 |
|
|
|
390 |
|
|
/* Find and mmap the file via do_shmat() */
|
391 |
|
|
return sys_shmat(task, shmid, 0, 0);
|
392 |
|
|
}
|
393 |
|
|
|
394 |
|
|
return do_mmap(file, file_offset, task, (unsigned long)start,
|
395 |
|
|
vmflags, __pfn(page_align_up(length)));
|
396 |
|
|
}
|
397 |
|
|
|
398 |
|
|
void *sys_mmap(struct tcb *task, struct sys_mmap_args *args)
|
399 |
|
|
{
|
400 |
|
|
|
401 |
|
|
struct sys_mmap_args *mapped_args;
|
402 |
|
|
void *ret;
|
403 |
|
|
|
404 |
|
|
if (!(mapped_args = pager_get_user_page(task, args,
|
405 |
|
|
sizeof(*args),
|
406 |
|
|
VM_READ | VM_WRITE)))
|
407 |
|
|
return PTR_ERR(-EINVAL);
|
408 |
|
|
|
409 |
|
|
ret = __sys_mmap(task, mapped_args->start, mapped_args->length,
|
410 |
|
|
mapped_args->prot, mapped_args->flags, mapped_args->fd,
|
411 |
|
|
mapped_args->offset);
|
412 |
|
|
|
413 |
|
|
return ret;
|
414 |
|
|
}
|
415 |
|
|
|
416 |
|
|
/* Sets the end of data segment for sender */
|
417 |
|
|
int sys_brk(struct tcb *sender, void *ds_end)
|
418 |
|
|
{
|
419 |
|
|
return 0;
|
420 |
|
|
}
|
421 |
|
|
|