1 |
199 |
simons |
/*
|
2 |
|
|
* linux/mm/mmap.c
|
3 |
|
|
*
|
4 |
|
|
* Written by obz.
|
5 |
|
|
*/
|
6 |
|
|
#include <linux/stat.h>
|
7 |
|
|
#include <linux/sched.h>
|
8 |
|
|
#include <linux/kernel.h>
|
9 |
|
|
#include <linux/mm.h>
|
10 |
|
|
#include <linux/shm.h>
|
11 |
|
|
#include <linux/errno.h>
|
12 |
|
|
#include <linux/mman.h>
|
13 |
|
|
#include <linux/string.h>
|
14 |
|
|
#include <linux/malloc.h>
|
15 |
|
|
#include <linux/pagemap.h>
|
16 |
|
|
#include <linux/swap.h>
|
17 |
|
|
|
18 |
|
|
#include <asm/segment.h>
|
19 |
|
|
#include <asm/system.h>
|
20 |
|
|
#include <asm/pgtable.h>
|
21 |
|
|
|
22 |
|
|
/*
|
23 |
|
|
* description of effects of mapping type and prot in current implementation.
|
24 |
|
|
* this is due to the limited x86 page protection hardware. The expected
|
25 |
|
|
* behavior is in parens:
|
26 |
|
|
*
|
27 |
|
|
* map_type prot
|
28 |
|
|
* PROT_NONE PROT_READ PROT_WRITE PROT_EXEC
|
29 |
|
|
* MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes
|
30 |
|
|
* w: (no) no w: (no) no w: (yes) yes w: (no) no
|
31 |
|
|
* x: (no) no x: (no) yes x: (no) yes x: (yes) yes
|
32 |
|
|
*
|
33 |
|
|
* MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes
|
34 |
|
|
* w: (no) no w: (no) no w: (copy) copy w: (no) no
|
35 |
|
|
* x: (no) no x: (no) yes x: (no) yes x: (yes) yes
|
36 |
|
|
*
|
37 |
|
|
*/
|
38 |
|
|
|
39 |
|
|
pgprot_t protection_map[16] = {
|
40 |
|
|
__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
|
41 |
|
|
__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
|
42 |
|
|
};
|
43 |
|
|
|
44 |
|
|
/*
|
45 |
|
|
* Check that a process has enough memory to allocate a
|
46 |
|
|
* new virtual mapping.
|
47 |
|
|
*/
|
48 |
|
|
int vm_enough_memory(long pages)
|
49 |
|
|
{
|
50 |
|
|
/*
|
51 |
|
|
* stupid algorithm to decide if we have enough memory: while
|
52 |
|
|
* simple, it hopefully works in most obvious cases.. Easy to
|
53 |
|
|
* fool it, but this should catch most mistakes.
|
54 |
|
|
*/
|
55 |
|
|
long freepages;
|
56 |
|
|
freepages = buffermem >> PAGE_SHIFT;
|
57 |
|
|
freepages += page_cache_size;
|
58 |
|
|
if (freepages <= (MAP_NR(high_memory) >> 4) + 48)
|
59 |
|
|
freepages >>= 1;
|
60 |
|
|
freepages += nr_free_pages;
|
61 |
|
|
freepages += nr_swap_pages;
|
62 |
|
|
freepages -= MAP_NR(high_memory) >> 4;
|
63 |
|
|
return freepages > pages;
|
64 |
|
|
}
|
65 |
|
|
|
66 |
|
|
asmlinkage unsigned long sys_brk(unsigned long brk)
|
67 |
|
|
{
|
68 |
|
|
unsigned long rlim;
|
69 |
|
|
unsigned long newbrk, oldbrk;
|
70 |
|
|
struct mm_struct *mm = current->mm;
|
71 |
|
|
|
72 |
|
|
if (brk < mm->end_code)
|
73 |
|
|
return mm->brk;
|
74 |
|
|
newbrk = PAGE_ALIGN(brk);
|
75 |
|
|
oldbrk = PAGE_ALIGN(mm->brk);
|
76 |
|
|
if (oldbrk == newbrk)
|
77 |
|
|
return mm->brk = brk;
|
78 |
|
|
|
79 |
|
|
/*
|
80 |
|
|
* Always allow shrinking brk
|
81 |
|
|
*/
|
82 |
|
|
if (brk <= mm->brk) {
|
83 |
|
|
mm->brk = brk;
|
84 |
|
|
do_munmap(newbrk, oldbrk-newbrk);
|
85 |
|
|
return brk;
|
86 |
|
|
}
|
87 |
|
|
/*
|
88 |
|
|
* Check against rlimit and stack..
|
89 |
|
|
*/
|
90 |
|
|
rlim = current->rlim[RLIMIT_DATA].rlim_cur;
|
91 |
|
|
if (rlim >= RLIM_INFINITY)
|
92 |
|
|
rlim = ~0;
|
93 |
|
|
if (brk - mm->end_code > rlim)
|
94 |
|
|
return mm->brk;
|
95 |
|
|
|
96 |
|
|
/*
|
97 |
|
|
* Check against existing mmap mappings.
|
98 |
|
|
*/
|
99 |
|
|
if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
|
100 |
|
|
return mm->brk;
|
101 |
|
|
|
102 |
|
|
/*
|
103 |
|
|
* Check if we have enough memory..
|
104 |
|
|
*/
|
105 |
|
|
if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT))
|
106 |
|
|
return mm->brk;
|
107 |
|
|
|
108 |
|
|
/*
|
109 |
|
|
* Ok, looks good - let it rip.
|
110 |
|
|
*/
|
111 |
|
|
if(do_mmap(NULL, oldbrk, newbrk-oldbrk,
|
112 |
|
|
PROT_READ|PROT_WRITE|PROT_EXEC,
|
113 |
|
|
MAP_FIXED|MAP_PRIVATE, 0) != oldbrk)
|
114 |
|
|
return mm->brk;
|
115 |
|
|
return mm->brk = brk;
|
116 |
|
|
}
|
117 |
|
|
|
118 |
|
|
/*
|
119 |
|
|
* Combine the mmap "prot" and "flags" argument into one "vm_flags" used
|
120 |
|
|
* internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits
|
121 |
|
|
* into "VM_xxx".
|
122 |
|
|
*/
|
123 |
|
|
static inline unsigned long vm_flags(unsigned long prot, unsigned long flags)
|
124 |
|
|
{
|
125 |
|
|
#define _trans(x,bit1,bit2) \
|
126 |
|
|
((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
|
127 |
|
|
|
128 |
|
|
unsigned long prot_bits, flag_bits;
|
129 |
|
|
prot_bits =
|
130 |
|
|
_trans(prot, PROT_READ, VM_READ) |
|
131 |
|
|
_trans(prot, PROT_WRITE, VM_WRITE) |
|
132 |
|
|
_trans(prot, PROT_EXEC, VM_EXEC);
|
133 |
|
|
flag_bits =
|
134 |
|
|
_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN) |
|
135 |
|
|
_trans(flags, MAP_DENYWRITE, VM_DENYWRITE) |
|
136 |
|
|
_trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE);
|
137 |
|
|
return prot_bits | flag_bits;
|
138 |
|
|
#undef _trans
|
139 |
|
|
}
|
140 |
|
|
|
141 |
|
|
unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len,
|
142 |
|
|
unsigned long prot, unsigned long flags, unsigned long off)
|
143 |
|
|
{
|
144 |
|
|
struct mm_struct * mm = current->mm;
|
145 |
|
|
struct vm_area_struct * vma;
|
146 |
|
|
|
147 |
|
|
if ((len = PAGE_ALIGN(len)) == 0)
|
148 |
|
|
return addr;
|
149 |
|
|
|
150 |
|
|
if (len > MAX_USER_ADDR || addr > MAX_USER_ADDR-len)
|
151 |
|
|
return -EINVAL;
|
152 |
|
|
|
153 |
|
|
/* offset overflow? */
|
154 |
|
|
if (off + len < off)
|
155 |
|
|
return -EINVAL;
|
156 |
|
|
|
157 |
|
|
/* mlock MCL_FUTURE? */
|
158 |
|
|
if (mm->def_flags & VM_LOCKED) {
|
159 |
|
|
unsigned long locked = mm->locked_vm << PAGE_SHIFT;
|
160 |
|
|
locked += len;
|
161 |
|
|
if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
|
162 |
|
|
return -EAGAIN;
|
163 |
|
|
}
|
164 |
|
|
|
165 |
|
|
/*
|
166 |
|
|
* do simple checking here so the lower-level routines won't have
|
167 |
|
|
* to. we assume access permissions have been handled by the open
|
168 |
|
|
* of the memory object, so we don't do any here.
|
169 |
|
|
*/
|
170 |
|
|
|
171 |
|
|
if (file != NULL) {
|
172 |
|
|
switch (flags & MAP_TYPE) {
|
173 |
|
|
case MAP_SHARED:
|
174 |
|
|
if ((prot & PROT_WRITE) && !(file->f_mode & 2))
|
175 |
|
|
return -EACCES;
|
176 |
|
|
/*
|
177 |
|
|
* make sure there are no mandatory locks on the file.
|
178 |
|
|
*/
|
179 |
|
|
if (locks_verify_locked(file->f_inode))
|
180 |
|
|
return -EAGAIN;
|
181 |
|
|
/* cevans -- whoops another append-only file flaw */
|
182 |
|
|
if (IS_APPEND(file->f_inode) && (file->f_mode & 2))
|
183 |
|
|
return -EACCES;
|
184 |
|
|
/* fall through */
|
185 |
|
|
case MAP_PRIVATE:
|
186 |
|
|
if (!(file->f_mode & 1))
|
187 |
|
|
return -EACCES;
|
188 |
|
|
break;
|
189 |
|
|
|
190 |
|
|
default:
|
191 |
|
|
return -EINVAL;
|
192 |
|
|
}
|
193 |
|
|
if (flags & MAP_DENYWRITE) {
|
194 |
|
|
if (file->f_inode->i_writecount > 0)
|
195 |
|
|
return -ETXTBSY;
|
196 |
|
|
}
|
197 |
|
|
} else if ((flags & MAP_TYPE) != MAP_PRIVATE)
|
198 |
|
|
return -EINVAL;
|
199 |
|
|
|
200 |
|
|
/*
|
201 |
|
|
* obtain the address to map to. we verify (or select) it and ensure
|
202 |
|
|
* that it represents a valid section of the address space.
|
203 |
|
|
*/
|
204 |
|
|
|
205 |
|
|
if (flags & MAP_FIXED) {
|
206 |
|
|
if (addr & ~PAGE_MASK)
|
207 |
|
|
return -EINVAL;
|
208 |
|
|
} else {
|
209 |
|
|
addr = get_unmapped_area(addr, len);
|
210 |
|
|
if (!addr)
|
211 |
|
|
return -ENOMEM;
|
212 |
|
|
}
|
213 |
|
|
|
214 |
|
|
/*
|
215 |
|
|
* determine the object being mapped and call the appropriate
|
216 |
|
|
* specific mapper. the address has already been validated, but
|
217 |
|
|
* not unmapped, but the maps are removed from the list.
|
218 |
|
|
*/
|
219 |
|
|
if (file && (!file->f_op || !file->f_op->mmap))
|
220 |
|
|
return -ENODEV;
|
221 |
|
|
|
222 |
|
|
vma = (struct vm_area_struct *)kmalloc(sizeof(struct vm_area_struct),
|
223 |
|
|
GFP_KERNEL);
|
224 |
|
|
if (!vma)
|
225 |
|
|
return -ENOMEM;
|
226 |
|
|
|
227 |
|
|
vma->vm_mm = mm;
|
228 |
|
|
vma->vm_start = addr;
|
229 |
|
|
vma->vm_end = addr + len;
|
230 |
|
|
vma->vm_flags = vm_flags(prot,flags) | mm->def_flags;
|
231 |
|
|
|
232 |
|
|
if (file) {
|
233 |
|
|
if (file->f_mode & 1)
|
234 |
|
|
vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
|
235 |
|
|
if (flags & MAP_SHARED) {
|
236 |
|
|
vma->vm_flags |= VM_SHARED | VM_MAYSHARE;
|
237 |
|
|
/*
|
238 |
|
|
* This looks strange, but when we don't have the file open
|
239 |
|
|
* for writing, we can demote the shared mapping to a simpler
|
240 |
|
|
* private mapping. That also takes care of a security hole
|
241 |
|
|
* with ptrace() writing to a shared mapping without write
|
242 |
|
|
* permissions.
|
243 |
|
|
*
|
244 |
|
|
* We leave the VM_MAYSHARE bit on, just to get correct output
|
245 |
|
|
* from /proc/xxx/maps..
|
246 |
|
|
*/
|
247 |
|
|
if (!(file->f_mode & 2))
|
248 |
|
|
vma->vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
|
249 |
|
|
}
|
250 |
|
|
} else
|
251 |
|
|
vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
|
252 |
|
|
vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f];
|
253 |
|
|
vma->vm_ops = NULL;
|
254 |
|
|
vma->vm_offset = off;
|
255 |
|
|
vma->vm_inode = NULL;
|
256 |
|
|
vma->vm_pte = 0;
|
257 |
|
|
|
258 |
|
|
do_munmap(addr, len); /* Clear old maps */
|
259 |
|
|
|
260 |
|
|
/* Check against address space limit. */
|
261 |
|
|
if ((mm->total_vm << PAGE_SHIFT) + len
|
262 |
|
|
> current->rlim[RLIMIT_AS].rlim_cur) {
|
263 |
|
|
kfree(vma);
|
264 |
|
|
return -ENOMEM;
|
265 |
|
|
}
|
266 |
|
|
|
267 |
|
|
/* Private writable mapping? Check memory availability.. */
|
268 |
|
|
if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE) {
|
269 |
|
|
if (!vm_enough_memory(len >> PAGE_SHIFT)) {
|
270 |
|
|
kfree(vma);
|
271 |
|
|
return -ENOMEM;
|
272 |
|
|
}
|
273 |
|
|
}
|
274 |
|
|
|
275 |
|
|
if (file) {
|
276 |
|
|
int error = file->f_op->mmap(file->f_inode, file, vma);
|
277 |
|
|
|
278 |
|
|
if (error) {
|
279 |
|
|
kfree(vma);
|
280 |
|
|
return error;
|
281 |
|
|
}
|
282 |
|
|
}
|
283 |
|
|
|
284 |
|
|
flags = vma->vm_flags;
|
285 |
|
|
insert_vm_struct(mm, vma);
|
286 |
|
|
merge_segments(mm, vma->vm_start, vma->vm_end);
|
287 |
|
|
|
288 |
|
|
/* merge_segments might have merged our vma, so we can't use it any more */
|
289 |
|
|
mm->total_vm += len >> PAGE_SHIFT;
|
290 |
|
|
if (flags & VM_LOCKED) {
|
291 |
|
|
unsigned long start = addr;
|
292 |
|
|
mm->locked_vm += len >> PAGE_SHIFT;
|
293 |
|
|
do {
|
294 |
|
|
char c = get_user((char *) start);
|
295 |
|
|
len -= PAGE_SIZE;
|
296 |
|
|
start += PAGE_SIZE;
|
297 |
|
|
__asm__ __volatile__("": :"r" (c));
|
298 |
|
|
} while (len > 0);
|
299 |
|
|
}
|
300 |
|
|
return addr;
|
301 |
|
|
}
|
302 |
|
|
|
303 |
|
|
/*
|
304 |
|
|
* Get an address range which is currently unmapped.
|
305 |
|
|
* For mmap() without MAP_FIXED and shmat() with addr=0.
|
306 |
|
|
* Return value 0 means ENOMEM.
|
307 |
|
|
*/
|
308 |
|
|
unsigned long get_unmapped_area(unsigned long addr, unsigned long len)
|
309 |
|
|
{
|
310 |
|
|
struct vm_area_struct * vmm;
|
311 |
|
|
|
312 |
|
|
if (len > MAX_USER_ADDR)
|
313 |
|
|
return 0;
|
314 |
|
|
if (!addr)
|
315 |
|
|
addr = MMAP_SEARCH_START;
|
316 |
|
|
addr = PAGE_ALIGN(addr);
|
317 |
|
|
|
318 |
|
|
for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
|
319 |
|
|
/* At this point: (!vmm || addr < vmm->vm_end). */
|
320 |
|
|
if (MAX_USER_ADDR - len < addr)
|
321 |
|
|
return 0;
|
322 |
|
|
if (!vmm || addr + len <= vmm->vm_start)
|
323 |
|
|
return addr;
|
324 |
|
|
addr = vmm->vm_end;
|
325 |
|
|
}
|
326 |
|
|
}
|
327 |
|
|
|
328 |
|
|
/*
|
329 |
|
|
* Searching a VMA in the linear list task->mm->mmap is horribly slow.
|
330 |
|
|
* Use an AVL (Adelson-Velskii and Landis) tree to speed up this search
|
331 |
|
|
* from O(n) to O(log n), where n is the number of VMAs of the task
|
332 |
|
|
* (typically around 6, but may reach 3000 in some cases).
|
333 |
|
|
* Written by Bruno Haible <haible@ma2s2.mathematik.uni-karlsruhe.de>.
|
334 |
|
|
*/
|
335 |
|
|
|
336 |
|
|
/* We keep the list and tree sorted by address. */
|
337 |
|
|
#define vm_avl_key vm_end
|
338 |
|
|
#define vm_avl_key_t unsigned long /* typeof(vma->avl_key) */
|
339 |
|
|
|
340 |
|
|
/*
|
341 |
|
|
* task->mm->mmap_avl is the AVL tree corresponding to task->mm->mmap
|
342 |
|
|
* or, more exactly, its root.
|
343 |
|
|
* A vm_area_struct has the following fields:
|
344 |
|
|
* vm_avl_left left son of a tree node
|
345 |
|
|
* vm_avl_right right son of a tree node
|
346 |
|
|
* vm_avl_height 1+max(heightof(left),heightof(right))
|
347 |
|
|
* The empty tree is represented as NULL.
|
348 |
|
|
*/
|
349 |
|
|
|
350 |
|
|
/* Since the trees are balanced, their height will never be large. */
|
351 |
|
|
#define avl_maxheight 41 /* why this? a small exercise */
|
352 |
|
|
#define heightof(tree) ((tree) == avl_empty ? 0 : (tree)->vm_avl_height)
|
353 |
|
|
/*
|
354 |
|
|
* Consistency and balancing rules:
|
355 |
|
|
* 1. tree->vm_avl_height == 1+max(heightof(tree->vm_avl_left),heightof(tree->vm_avl_right))
|
356 |
|
|
* 2. abs( heightof(tree->vm_avl_left) - heightof(tree->vm_avl_right) ) <= 1
|
357 |
|
|
* 3. foreach node in tree->vm_avl_left: node->vm_avl_key <= tree->vm_avl_key,
|
358 |
|
|
* foreach node in tree->vm_avl_right: node->vm_avl_key >= tree->vm_avl_key.
|
359 |
|
|
*/
|
360 |
|
|
|
361 |
|
|
/* Look up the nodes at the left and at the right of a given node. */
|
362 |
|
|
static inline void avl_neighbours (struct vm_area_struct * node, struct vm_area_struct * tree, struct vm_area_struct ** to_the_left, struct vm_area_struct ** to_the_right)
|
363 |
|
|
{
|
364 |
|
|
vm_avl_key_t key = node->vm_avl_key;
|
365 |
|
|
|
366 |
|
|
*to_the_left = *to_the_right = NULL;
|
367 |
|
|
for (;;) {
|
368 |
|
|
if (tree == avl_empty) {
|
369 |
|
|
printk("avl_neighbours: node not found in the tree\n");
|
370 |
|
|
return;
|
371 |
|
|
}
|
372 |
|
|
if (key == tree->vm_avl_key)
|
373 |
|
|
break;
|
374 |
|
|
if (key < tree->vm_avl_key) {
|
375 |
|
|
*to_the_right = tree;
|
376 |
|
|
tree = tree->vm_avl_left;
|
377 |
|
|
} else {
|
378 |
|
|
*to_the_left = tree;
|
379 |
|
|
tree = tree->vm_avl_right;
|
380 |
|
|
}
|
381 |
|
|
}
|
382 |
|
|
if (tree != node) {
|
383 |
|
|
printk("avl_neighbours: node not exactly found in the tree\n");
|
384 |
|
|
return;
|
385 |
|
|
}
|
386 |
|
|
if (tree->vm_avl_left != avl_empty) {
|
387 |
|
|
struct vm_area_struct * node;
|
388 |
|
|
for (node = tree->vm_avl_left; node->vm_avl_right != avl_empty; node = node->vm_avl_right)
|
389 |
|
|
continue;
|
390 |
|
|
*to_the_left = node;
|
391 |
|
|
}
|
392 |
|
|
if (tree->vm_avl_right != avl_empty) {
|
393 |
|
|
struct vm_area_struct * node;
|
394 |
|
|
for (node = tree->vm_avl_right; node->vm_avl_left != avl_empty; node = node->vm_avl_left)
|
395 |
|
|
continue;
|
396 |
|
|
*to_the_right = node;
|
397 |
|
|
}
|
398 |
|
|
if ((*to_the_left && ((*to_the_left)->vm_next != node)) || (node->vm_next != *to_the_right))
|
399 |
|
|
printk("avl_neighbours: tree inconsistent with list\n");
|
400 |
|
|
}
|
401 |
|
|
|
402 |
|
|
/*
|
403 |
|
|
* Rebalance a tree.
|
404 |
|
|
* After inserting or deleting a node of a tree we have a sequence of subtrees
|
405 |
|
|
* nodes[0]..nodes[k-1] such that
|
406 |
|
|
* nodes[0] is the root and nodes[i+1] = nodes[i]->{vm_avl_left|vm_avl_right}.
|
407 |
|
|
*/
|
408 |
|
|
static inline void avl_rebalance (struct vm_area_struct *** nodeplaces_ptr, int count)
|
409 |
|
|
{
|
410 |
|
|
for ( ; count > 0 ; count--) {
|
411 |
|
|
struct vm_area_struct ** nodeplace = *--nodeplaces_ptr;
|
412 |
|
|
struct vm_area_struct * node = *nodeplace;
|
413 |
|
|
struct vm_area_struct * nodeleft = node->vm_avl_left;
|
414 |
|
|
struct vm_area_struct * noderight = node->vm_avl_right;
|
415 |
|
|
int heightleft = heightof(nodeleft);
|
416 |
|
|
int heightright = heightof(noderight);
|
417 |
|
|
if (heightright + 1 < heightleft) {
|
418 |
|
|
/* */
|
419 |
|
|
/* * */
|
420 |
|
|
/* / \ */
|
421 |
|
|
/* n+2 n */
|
422 |
|
|
/* */
|
423 |
|
|
struct vm_area_struct * nodeleftleft = nodeleft->vm_avl_left;
|
424 |
|
|
struct vm_area_struct * nodeleftright = nodeleft->vm_avl_right;
|
425 |
|
|
int heightleftright = heightof(nodeleftright);
|
426 |
|
|
if (heightof(nodeleftleft) >= heightleftright) {
|
427 |
|
|
/* */
|
428 |
|
|
/* * n+2|n+3 */
|
429 |
|
|
/* / \ / \ */
|
430 |
|
|
/* n+2 n --> / n+1|n+2 */
|
431 |
|
|
/* / \ | / \ */
|
432 |
|
|
/* n+1 n|n+1 n+1 n|n+1 n */
|
433 |
|
|
/* */
|
434 |
|
|
node->vm_avl_left = nodeleftright; nodeleft->vm_avl_right = node;
|
435 |
|
|
nodeleft->vm_avl_height = 1 + (node->vm_avl_height = 1 + heightleftright);
|
436 |
|
|
*nodeplace = nodeleft;
|
437 |
|
|
} else {
|
438 |
|
|
/* */
|
439 |
|
|
/* * n+2 */
|
440 |
|
|
/* / \ / \ */
|
441 |
|
|
/* n+2 n --> n+1 n+1 */
|
442 |
|
|
/* / \ / \ / \ */
|
443 |
|
|
/* n n+1 n L R n */
|
444 |
|
|
/* / \ */
|
445 |
|
|
/* L R */
|
446 |
|
|
/* */
|
447 |
|
|
nodeleft->vm_avl_right = nodeleftright->vm_avl_left;
|
448 |
|
|
node->vm_avl_left = nodeleftright->vm_avl_right;
|
449 |
|
|
nodeleftright->vm_avl_left = nodeleft;
|
450 |
|
|
nodeleftright->vm_avl_right = node;
|
451 |
|
|
nodeleft->vm_avl_height = node->vm_avl_height = heightleftright;
|
452 |
|
|
nodeleftright->vm_avl_height = heightleft;
|
453 |
|
|
*nodeplace = nodeleftright;
|
454 |
|
|
}
|
455 |
|
|
}
|
456 |
|
|
else if (heightleft + 1 < heightright) {
|
457 |
|
|
/* similar to the above, just interchange 'left' <--> 'right' */
|
458 |
|
|
struct vm_area_struct * noderightright = noderight->vm_avl_right;
|
459 |
|
|
struct vm_area_struct * noderightleft = noderight->vm_avl_left;
|
460 |
|
|
int heightrightleft = heightof(noderightleft);
|
461 |
|
|
if (heightof(noderightright) >= heightrightleft) {
|
462 |
|
|
node->vm_avl_right = noderightleft; noderight->vm_avl_left = node;
|
463 |
|
|
noderight->vm_avl_height = 1 + (node->vm_avl_height = 1 + heightrightleft);
|
464 |
|
|
*nodeplace = noderight;
|
465 |
|
|
} else {
|
466 |
|
|
noderight->vm_avl_left = noderightleft->vm_avl_right;
|
467 |
|
|
node->vm_avl_right = noderightleft->vm_avl_left;
|
468 |
|
|
noderightleft->vm_avl_right = noderight;
|
469 |
|
|
noderightleft->vm_avl_left = node;
|
470 |
|
|
noderight->vm_avl_height = node->vm_avl_height = heightrightleft;
|
471 |
|
|
noderightleft->vm_avl_height = heightright;
|
472 |
|
|
*nodeplace = noderightleft;
|
473 |
|
|
}
|
474 |
|
|
}
|
475 |
|
|
else {
|
476 |
|
|
int height = (heightleft<heightright ? heightright : heightleft) + 1;
|
477 |
|
|
if (height == node->vm_avl_height)
|
478 |
|
|
break;
|
479 |
|
|
node->vm_avl_height = height;
|
480 |
|
|
}
|
481 |
|
|
}
|
482 |
|
|
}
|
483 |
|
|
|
484 |
|
|
/* Insert a node into a tree. */
|
485 |
|
|
static inline void avl_insert (struct vm_area_struct * new_node, struct vm_area_struct ** ptree)
|
486 |
|
|
{
|
487 |
|
|
vm_avl_key_t key = new_node->vm_avl_key;
|
488 |
|
|
struct vm_area_struct ** nodeplace = ptree;
|
489 |
|
|
struct vm_area_struct ** stack[avl_maxheight];
|
490 |
|
|
int stack_count = 0;
|
491 |
|
|
struct vm_area_struct *** stack_ptr = &stack[0]; /* = &stack[stackcount] */
|
492 |
|
|
for (;;) {
|
493 |
|
|
struct vm_area_struct * node = *nodeplace;
|
494 |
|
|
if (node == avl_empty)
|
495 |
|
|
break;
|
496 |
|
|
*stack_ptr++ = nodeplace; stack_count++;
|
497 |
|
|
if (key < node->vm_avl_key)
|
498 |
|
|
nodeplace = &node->vm_avl_left;
|
499 |
|
|
else
|
500 |
|
|
nodeplace = &node->vm_avl_right;
|
501 |
|
|
}
|
502 |
|
|
new_node->vm_avl_left = avl_empty;
|
503 |
|
|
new_node->vm_avl_right = avl_empty;
|
504 |
|
|
new_node->vm_avl_height = 1;
|
505 |
|
|
*nodeplace = new_node;
|
506 |
|
|
avl_rebalance(stack_ptr,stack_count);
|
507 |
|
|
}
|
508 |
|
|
|
509 |
|
|
/* Insert a node into a tree, and
|
510 |
|
|
* return the node to the left of it and the node to the right of it.
|
511 |
|
|
*/
|
512 |
|
|
static inline void avl_insert_neighbours (struct vm_area_struct * new_node, struct vm_area_struct ** ptree,
|
513 |
|
|
struct vm_area_struct ** to_the_left, struct vm_area_struct ** to_the_right)
|
514 |
|
|
{
|
515 |
|
|
vm_avl_key_t key = new_node->vm_avl_key;
|
516 |
|
|
struct vm_area_struct ** nodeplace = ptree;
|
517 |
|
|
struct vm_area_struct ** stack[avl_maxheight];
|
518 |
|
|
int stack_count = 0;
|
519 |
|
|
struct vm_area_struct *** stack_ptr = &stack[0]; /* = &stack[stackcount] */
|
520 |
|
|
*to_the_left = *to_the_right = NULL;
|
521 |
|
|
for (;;) {
|
522 |
|
|
struct vm_area_struct * node = *nodeplace;
|
523 |
|
|
if (node == avl_empty)
|
524 |
|
|
break;
|
525 |
|
|
*stack_ptr++ = nodeplace; stack_count++;
|
526 |
|
|
if (key < node->vm_avl_key) {
|
527 |
|
|
*to_the_right = node;
|
528 |
|
|
nodeplace = &node->vm_avl_left;
|
529 |
|
|
} else {
|
530 |
|
|
*to_the_left = node;
|
531 |
|
|
nodeplace = &node->vm_avl_right;
|
532 |
|
|
}
|
533 |
|
|
}
|
534 |
|
|
new_node->vm_avl_left = avl_empty;
|
535 |
|
|
new_node->vm_avl_right = avl_empty;
|
536 |
|
|
new_node->vm_avl_height = 1;
|
537 |
|
|
*nodeplace = new_node;
|
538 |
|
|
avl_rebalance(stack_ptr,stack_count);
|
539 |
|
|
}
|
540 |
|
|
|
541 |
|
|
/* Removes a node out of a tree. */
|
542 |
|
|
static inline void avl_remove (struct vm_area_struct * node_to_delete, struct vm_area_struct ** ptree)
|
543 |
|
|
{
|
544 |
|
|
vm_avl_key_t key = node_to_delete->vm_avl_key;
|
545 |
|
|
struct vm_area_struct ** nodeplace = ptree;
|
546 |
|
|
struct vm_area_struct ** stack[avl_maxheight];
|
547 |
|
|
int stack_count = 0;
|
548 |
|
|
struct vm_area_struct *** stack_ptr = &stack[0]; /* = &stack[stackcount] */
|
549 |
|
|
struct vm_area_struct ** nodeplace_to_delete;
|
550 |
|
|
for (;;) {
|
551 |
|
|
struct vm_area_struct * node = *nodeplace;
|
552 |
|
|
if (node == avl_empty) {
|
553 |
|
|
/* what? node_to_delete not found in tree? */
|
554 |
|
|
printk("avl_remove: node to delete not found in tree\n");
|
555 |
|
|
return;
|
556 |
|
|
}
|
557 |
|
|
*stack_ptr++ = nodeplace; stack_count++;
|
558 |
|
|
if (key == node->vm_avl_key)
|
559 |
|
|
break;
|
560 |
|
|
if (key < node->vm_avl_key)
|
561 |
|
|
nodeplace = &node->vm_avl_left;
|
562 |
|
|
else
|
563 |
|
|
nodeplace = &node->vm_avl_right;
|
564 |
|
|
}
|
565 |
|
|
nodeplace_to_delete = nodeplace;
|
566 |
|
|
/* Have to remove node_to_delete = *nodeplace_to_delete. */
|
567 |
|
|
if (node_to_delete->vm_avl_left == avl_empty) {
|
568 |
|
|
*nodeplace_to_delete = node_to_delete->vm_avl_right;
|
569 |
|
|
stack_ptr--; stack_count--;
|
570 |
|
|
} else {
|
571 |
|
|
struct vm_area_struct *** stack_ptr_to_delete = stack_ptr;
|
572 |
|
|
struct vm_area_struct ** nodeplace = &node_to_delete->vm_avl_left;
|
573 |
|
|
struct vm_area_struct * node;
|
574 |
|
|
for (;;) {
|
575 |
|
|
node = *nodeplace;
|
576 |
|
|
if (node->vm_avl_right == avl_empty)
|
577 |
|
|
break;
|
578 |
|
|
*stack_ptr++ = nodeplace; stack_count++;
|
579 |
|
|
nodeplace = &node->vm_avl_right;
|
580 |
|
|
}
|
581 |
|
|
*nodeplace = node->vm_avl_left;
|
582 |
|
|
/* node replaces node_to_delete */
|
583 |
|
|
node->vm_avl_left = node_to_delete->vm_avl_left;
|
584 |
|
|
node->vm_avl_right = node_to_delete->vm_avl_right;
|
585 |
|
|
node->vm_avl_height = node_to_delete->vm_avl_height;
|
586 |
|
|
*nodeplace_to_delete = node; /* replace node_to_delete */
|
587 |
|
|
*stack_ptr_to_delete = &node->vm_avl_left; /* replace &node_to_delete->vm_avl_left */
|
588 |
|
|
}
|
589 |
|
|
avl_rebalance(stack_ptr,stack_count);
|
590 |
|
|
}
|
591 |
|
|
|
592 |
|
|
#ifdef DEBUG_AVL
|
593 |
|
|
|
594 |
|
|
/* print a list */
|
595 |
|
|
static void printk_list (struct vm_area_struct * vma)
|
596 |
|
|
{
|
597 |
|
|
printk("[");
|
598 |
|
|
while (vma) {
|
599 |
|
|
printk("%08lX-%08lX", vma->vm_start, vma->vm_end);
|
600 |
|
|
vma = vma->vm_next;
|
601 |
|
|
if (!vma)
|
602 |
|
|
break;
|
603 |
|
|
printk(" ");
|
604 |
|
|
}
|
605 |
|
|
printk("]");
|
606 |
|
|
}
|
607 |
|
|
|
608 |
|
|
/* print a tree */
|
609 |
|
|
static void printk_avl (struct vm_area_struct * tree)
|
610 |
|
|
{
|
611 |
|
|
if (tree != avl_empty) {
|
612 |
|
|
printk("(");
|
613 |
|
|
if (tree->vm_avl_left != avl_empty) {
|
614 |
|
|
printk_avl(tree->vm_avl_left);
|
615 |
|
|
printk("<");
|
616 |
|
|
}
|
617 |
|
|
printk("%08lX-%08lX", tree->vm_start, tree->vm_end);
|
618 |
|
|
if (tree->vm_avl_right != avl_empty) {
|
619 |
|
|
printk(">");
|
620 |
|
|
printk_avl(tree->vm_avl_right);
|
621 |
|
|
}
|
622 |
|
|
printk(")");
|
623 |
|
|
}
|
624 |
|
|
}
|
625 |
|
|
|
626 |
|
|
static char *avl_check_point = "somewhere";
|
627 |
|
|
|
628 |
|
|
/* check a tree's consistency and balancing */
|
629 |
|
|
static void avl_checkheights (struct vm_area_struct * tree)
|
630 |
|
|
{
|
631 |
|
|
int h, hl, hr;
|
632 |
|
|
|
633 |
|
|
if (tree == avl_empty)
|
634 |
|
|
return;
|
635 |
|
|
avl_checkheights(tree->vm_avl_left);
|
636 |
|
|
avl_checkheights(tree->vm_avl_right);
|
637 |
|
|
h = tree->vm_avl_height;
|
638 |
|
|
hl = heightof(tree->vm_avl_left);
|
639 |
|
|
hr = heightof(tree->vm_avl_right);
|
640 |
|
|
if ((h == hl+1) && (hr <= hl) && (hl <= hr+1))
|
641 |
|
|
return;
|
642 |
|
|
if ((h == hr+1) && (hl <= hr) && (hr <= hl+1))
|
643 |
|
|
return;
|
644 |
|
|
printk("%s: avl_checkheights: heights inconsistent\n",avl_check_point);
|
645 |
|
|
}
|
646 |
|
|
|
647 |
|
|
/* check that all values stored in a tree are < key */
|
648 |
|
|
static void avl_checkleft (struct vm_area_struct * tree, vm_avl_key_t key)
|
649 |
|
|
{
|
650 |
|
|
if (tree == avl_empty)
|
651 |
|
|
return;
|
652 |
|
|
avl_checkleft(tree->vm_avl_left,key);
|
653 |
|
|
avl_checkleft(tree->vm_avl_right,key);
|
654 |
|
|
if (tree->vm_avl_key < key)
|
655 |
|
|
return;
|
656 |
|
|
printk("%s: avl_checkleft: left key %lu >= top key %lu\n",avl_check_point,tree->vm_avl_key,key);
|
657 |
|
|
}
|
658 |
|
|
|
659 |
|
|
/* check that all values stored in a tree are > key */
|
660 |
|
|
static void avl_checkright (struct vm_area_struct * tree, vm_avl_key_t key)
|
661 |
|
|
{
|
662 |
|
|
if (tree == avl_empty)
|
663 |
|
|
return;
|
664 |
|
|
avl_checkright(tree->vm_avl_left,key);
|
665 |
|
|
avl_checkright(tree->vm_avl_right,key);
|
666 |
|
|
if (tree->vm_avl_key > key)
|
667 |
|
|
return;
|
668 |
|
|
printk("%s: avl_checkright: right key %lu <= top key %lu\n",avl_check_point,tree->vm_avl_key,key);
|
669 |
|
|
}
|
670 |
|
|
|
671 |
|
|
/* check that all values are properly increasing */
|
672 |
|
|
static void avl_checkorder (struct vm_area_struct * tree)
|
673 |
|
|
{
|
674 |
|
|
if (tree == avl_empty)
|
675 |
|
|
return;
|
676 |
|
|
avl_checkorder(tree->vm_avl_left);
|
677 |
|
|
avl_checkorder(tree->vm_avl_right);
|
678 |
|
|
avl_checkleft(tree->vm_avl_left,tree->vm_avl_key);
|
679 |
|
|
avl_checkright(tree->vm_avl_right,tree->vm_avl_key);
|
680 |
|
|
}
|
681 |
|
|
|
682 |
|
|
/* all checks */
|
683 |
|
|
static void avl_check (struct task_struct * task, char *caller)
|
684 |
|
|
{
|
685 |
|
|
avl_check_point = caller;
|
686 |
|
|
/* printk("task \"%s\", %s\n",task->comm,caller); */
|
687 |
|
|
/* printk("task \"%s\" list: ",task->comm); printk_list(task->mm->mmap); printk("\n"); */
|
688 |
|
|
/* printk("task \"%s\" tree: ",task->comm); printk_avl(task->mm->mmap_avl); printk("\n"); */
|
689 |
|
|
avl_checkheights(task->mm->mmap_avl);
|
690 |
|
|
avl_checkorder(task->mm->mmap_avl);
|
691 |
|
|
}
|
692 |
|
|
|
693 |
|
|
#endif
|
694 |
|
|
|
695 |
|
|
|
696 |
|
|
/*
|
697 |
|
|
* Normal function to fix up a mapping
|
698 |
|
|
* This function is the default for when an area has no specific
|
699 |
|
|
* function. This may be used as part of a more specific routine.
|
700 |
|
|
* This function works out what part of an area is affected and
|
701 |
|
|
* adjusts the mapping information. Since the actual page
|
702 |
|
|
* manipulation is done in do_mmap(), none need be done here,
|
703 |
|
|
* though it would probably be more appropriate.
|
704 |
|
|
*
|
705 |
|
|
* By the time this function is called, the area struct has been
|
706 |
|
|
* removed from the process mapping list, so it needs to be
|
707 |
|
|
* reinserted if necessary.
|
708 |
|
|
*
|
709 |
|
|
* The 4 main cases are:
|
710 |
|
|
* Unmapping the whole area
|
711 |
|
|
* Unmapping from the start of the segment to a point in it
|
712 |
|
|
* Unmapping from an intermediate point to the end
|
713 |
|
|
* Unmapping between to intermediate points, making a hole.
|
714 |
|
|
*
|
715 |
|
|
* Case 4 involves the creation of 2 new areas, for each side of
|
716 |
|
|
* the hole.
|
717 |
|
|
*/
|
718 |
|
|
static void unmap_fixup(struct vm_area_struct *area,
|
719 |
|
|
unsigned long addr, size_t len)
|
720 |
|
|
{
|
721 |
|
|
struct vm_area_struct *mpnt;
|
722 |
|
|
unsigned long end = addr + len;
|
723 |
|
|
|
724 |
|
|
if (addr < area->vm_start || addr >= area->vm_end ||
|
725 |
|
|
end <= area->vm_start || end > area->vm_end ||
|
726 |
|
|
end < addr)
|
727 |
|
|
{
|
728 |
|
|
printk("unmap_fixup: area=%lx-%lx, unmap %lx-%lx!!\n",
|
729 |
|
|
area->vm_start, area->vm_end, addr, end);
|
730 |
|
|
return;
|
731 |
|
|
}
|
732 |
|
|
area->vm_mm->total_vm -= len >> PAGE_SHIFT;
|
733 |
|
|
if (area->vm_flags & VM_LOCKED)
|
734 |
|
|
area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
|
735 |
|
|
|
736 |
|
|
/* Unmapping the whole area */
|
737 |
|
|
if (addr == area->vm_start && end == area->vm_end) {
|
738 |
|
|
if (area->vm_ops && area->vm_ops->close)
|
739 |
|
|
area->vm_ops->close(area);
|
740 |
|
|
if (area->vm_inode)
|
741 |
|
|
iput(area->vm_inode);
|
742 |
|
|
return;
|
743 |
|
|
}
|
744 |
|
|
|
745 |
|
|
/* Work out to one of the ends */
|
746 |
|
|
if (end == area->vm_end)
|
747 |
|
|
area->vm_end = addr;
|
748 |
|
|
else
|
749 |
|
|
if (addr == area->vm_start) {
|
750 |
|
|
area->vm_offset += (end - area->vm_start);
|
751 |
|
|
area->vm_start = end;
|
752 |
|
|
}
|
753 |
|
|
else {
|
754 |
|
|
/* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */
|
755 |
|
|
/* Add end mapping -- leave beginning for below */
|
756 |
|
|
mpnt = (struct vm_area_struct *)kmalloc(sizeof(*mpnt), GFP_KERNEL);
|
757 |
|
|
|
758 |
|
|
if (!mpnt)
|
759 |
|
|
return;
|
760 |
|
|
*mpnt = *area;
|
761 |
|
|
mpnt->vm_offset += (end - area->vm_start);
|
762 |
|
|
mpnt->vm_start = end;
|
763 |
|
|
if (mpnt->vm_inode)
|
764 |
|
|
mpnt->vm_inode->i_count++;
|
765 |
|
|
if (mpnt->vm_ops && mpnt->vm_ops->open)
|
766 |
|
|
mpnt->vm_ops->open(mpnt);
|
767 |
|
|
area->vm_end = addr; /* Truncate area */
|
768 |
|
|
insert_vm_struct(current->mm, mpnt);
|
769 |
|
|
}
|
770 |
|
|
|
771 |
|
|
/* construct whatever mapping is needed */
|
772 |
|
|
mpnt = (struct vm_area_struct *)kmalloc(sizeof(*mpnt), GFP_KERNEL);
|
773 |
|
|
if (!mpnt)
|
774 |
|
|
return;
|
775 |
|
|
*mpnt = *area;
|
776 |
|
|
if (mpnt->vm_ops && mpnt->vm_ops->open)
|
777 |
|
|
mpnt->vm_ops->open(mpnt);
|
778 |
|
|
if (area->vm_ops && area->vm_ops->close) {
|
779 |
|
|
area->vm_end = area->vm_start;
|
780 |
|
|
area->vm_ops->close(area);
|
781 |
|
|
}
|
782 |
|
|
insert_vm_struct(current->mm, mpnt);
|
783 |
|
|
}
|
784 |
|
|
|
785 |
|
|
asmlinkage int sys_munmap(unsigned long addr, size_t len)
|
786 |
|
|
{
|
787 |
|
|
return do_munmap(addr, len);
|
788 |
|
|
}
|
789 |
|
|
|
790 |
|
|
/*
|
791 |
|
|
* Munmap is split into 2 main parts -- this part which finds
|
792 |
|
|
* what needs doing, and the areas themselves, which do the
|
793 |
|
|
* work. This now handles partial unmappings.
|
794 |
|
|
* Jeremy Fitzhardine <jeremy@sw.oz.au>
|
795 |
|
|
*/
|
796 |
|
|
int do_munmap(unsigned long addr, size_t len)
|
797 |
|
|
{
|
798 |
|
|
struct vm_area_struct *mpnt, *prev, *next, **npp, *free;
|
799 |
|
|
|
800 |
|
|
if ((addr & ~PAGE_MASK) || addr > MAX_USER_ADDR || len > MAX_USER_ADDR-addr)
|
801 |
|
|
return -EINVAL;
|
802 |
|
|
|
803 |
|
|
if ((len = PAGE_ALIGN(len)) == 0)
|
804 |
|
|
return 0;
|
805 |
|
|
|
806 |
|
|
/*
|
807 |
|
|
* Check if this memory area is ok - put it on the temporary
|
808 |
|
|
* list if so.. The checks here are pretty simple --
|
809 |
|
|
* every area affected in some way (by any overlap) is put
|
810 |
|
|
* on the list. If nothing is put on, nothing is affected.
|
811 |
|
|
*/
|
812 |
|
|
mpnt = find_vma(current->mm, addr);
|
813 |
|
|
if (!mpnt)
|
814 |
|
|
return 0;
|
815 |
|
|
avl_neighbours(mpnt, current->mm->mmap_avl, &prev, &next);
|
816 |
|
|
/* we have prev->vm_next == mpnt && mpnt->vm_next = next */
|
817 |
|
|
/* and addr < mpnt->vm_end */
|
818 |
|
|
|
819 |
|
|
npp = (prev ? &prev->vm_next : ¤t->mm->mmap);
|
820 |
|
|
free = NULL;
|
821 |
|
|
for ( ; mpnt && mpnt->vm_start < addr+len; mpnt = *npp) {
|
822 |
|
|
*npp = mpnt->vm_next;
|
823 |
|
|
mpnt->vm_next = free;
|
824 |
|
|
free = mpnt;
|
825 |
|
|
avl_remove(mpnt, ¤t->mm->mmap_avl);
|
826 |
|
|
}
|
827 |
|
|
|
828 |
|
|
if (free == NULL)
|
829 |
|
|
return 0;
|
830 |
|
|
|
831 |
|
|
/*
|
832 |
|
|
* Ok - we have the memory areas we should free on the 'free' list,
|
833 |
|
|
* so release them, and unmap the page range..
|
834 |
|
|
* If the one of the segments is only being partially unmapped,
|
835 |
|
|
* it will put new vm_area_struct(s) into the address space.
|
836 |
|
|
*/
|
837 |
|
|
do {
|
838 |
|
|
unsigned long st, end;
|
839 |
|
|
|
840 |
|
|
mpnt = free;
|
841 |
|
|
free = free->vm_next;
|
842 |
|
|
|
843 |
|
|
remove_shared_vm_struct(mpnt);
|
844 |
|
|
|
845 |
|
|
st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
|
846 |
|
|
end = addr+len;
|
847 |
|
|
end = end > mpnt->vm_end ? mpnt->vm_end : end;
|
848 |
|
|
|
849 |
|
|
if (mpnt->vm_ops && mpnt->vm_ops->unmap)
|
850 |
|
|
mpnt->vm_ops->unmap(mpnt, st, end-st);
|
851 |
|
|
zap_page_range(current->mm, st, end-st);
|
852 |
|
|
unmap_fixup(mpnt, st, end-st);
|
853 |
|
|
kfree(mpnt);
|
854 |
|
|
} while (free);
|
855 |
|
|
|
856 |
|
|
/* we could zap the page tables here too.. */
|
857 |
|
|
|
858 |
|
|
return 0;
|
859 |
|
|
}
|
860 |
|
|
|
861 |
|
|
/* Build the AVL tree corresponding to the VMA list. */
|
862 |
|
|
void build_mmap_avl(struct mm_struct * mm)
|
863 |
|
|
{
|
864 |
|
|
struct vm_area_struct * vma;
|
865 |
|
|
|
866 |
|
|
mm->mmap_avl = NULL;
|
867 |
|
|
for (vma = mm->mmap; vma; vma = vma->vm_next)
|
868 |
|
|
avl_insert(vma, &mm->mmap_avl);
|
869 |
|
|
}
|
870 |
|
|
|
871 |
|
|
/* Release all mmaps. */
|
872 |
|
|
void exit_mmap(struct mm_struct * mm)
|
873 |
|
|
{
|
874 |
|
|
struct vm_area_struct * mpnt;
|
875 |
|
|
|
876 |
|
|
mpnt = mm->mmap;
|
877 |
|
|
mm->mmap = NULL;
|
878 |
|
|
mm->mmap_avl = NULL;
|
879 |
|
|
mm->rss = 0;
|
880 |
|
|
mm->total_vm = 0;
|
881 |
|
|
mm->locked_vm = 0;
|
882 |
|
|
while (mpnt) {
|
883 |
|
|
struct vm_area_struct * next = mpnt->vm_next;
|
884 |
|
|
if (mpnt->vm_ops) {
|
885 |
|
|
if (mpnt->vm_ops->unmap)
|
886 |
|
|
mpnt->vm_ops->unmap(mpnt, mpnt->vm_start, mpnt->vm_end-mpnt->vm_start);
|
887 |
|
|
if (mpnt->vm_ops->close)
|
888 |
|
|
mpnt->vm_ops->close(mpnt);
|
889 |
|
|
}
|
890 |
|
|
remove_shared_vm_struct(mpnt);
|
891 |
|
|
zap_page_range(mm, mpnt->vm_start, mpnt->vm_end-mpnt->vm_start);
|
892 |
|
|
if (mpnt->vm_inode)
|
893 |
|
|
iput(mpnt->vm_inode);
|
894 |
|
|
kfree(mpnt);
|
895 |
|
|
mpnt = next;
|
896 |
|
|
}
|
897 |
|
|
}
|
898 |
|
|
|
899 |
|
|
/*
|
900 |
|
|
* Insert vm structure into process list sorted by address
|
901 |
|
|
* and into the inode's i_mmap ring.
|
902 |
|
|
*/
|
903 |
|
|
void insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vmp)
|
904 |
|
|
{
|
905 |
|
|
struct vm_area_struct *share;
|
906 |
|
|
struct inode * inode;
|
907 |
|
|
|
908 |
|
|
#if 0 /* equivalent, but slow */
|
909 |
|
|
struct vm_area_struct **p, *mpnt;
|
910 |
|
|
|
911 |
|
|
p = &mm->mmap;
|
912 |
|
|
while ((mpnt = *p) != NULL) {
|
913 |
|
|
if (mpnt->vm_start > vmp->vm_start)
|
914 |
|
|
break;
|
915 |
|
|
if (mpnt->vm_end > vmp->vm_start)
|
916 |
|
|
printk("insert_vm_struct: overlapping memory areas\n");
|
917 |
|
|
p = &mpnt->vm_next;
|
918 |
|
|
}
|
919 |
|
|
vmp->vm_next = mpnt;
|
920 |
|
|
*p = vmp;
|
921 |
|
|
#else
|
922 |
|
|
struct vm_area_struct * prev, * next;
|
923 |
|
|
|
924 |
|
|
avl_insert_neighbours(vmp, &mm->mmap_avl, &prev, &next);
|
925 |
|
|
if ((prev ? prev->vm_next : mm->mmap) != next)
|
926 |
|
|
printk("insert_vm_struct: tree inconsistent with list\n");
|
927 |
|
|
if (prev)
|
928 |
|
|
prev->vm_next = vmp;
|
929 |
|
|
else
|
930 |
|
|
mm->mmap = vmp;
|
931 |
|
|
vmp->vm_next = next;
|
932 |
|
|
#endif
|
933 |
|
|
|
934 |
|
|
inode = vmp->vm_inode;
|
935 |
|
|
if (!inode)
|
936 |
|
|
return;
|
937 |
|
|
|
938 |
|
|
/* insert vmp into inode's circular share list */
|
939 |
|
|
if ((share = inode->i_mmap)) {
|
940 |
|
|
vmp->vm_next_share = share->vm_next_share;
|
941 |
|
|
vmp->vm_next_share->vm_prev_share = vmp;
|
942 |
|
|
share->vm_next_share = vmp;
|
943 |
|
|
vmp->vm_prev_share = share;
|
944 |
|
|
} else
|
945 |
|
|
inode->i_mmap = vmp->vm_next_share = vmp->vm_prev_share = vmp;
|
946 |
|
|
}
|
947 |
|
|
|
948 |
|
|
/*
|
949 |
|
|
* Remove one vm structure from the inode's i_mmap ring.
|
950 |
|
|
*/
|
951 |
|
|
void remove_shared_vm_struct(struct vm_area_struct *mpnt)
|
952 |
|
|
{
|
953 |
|
|
struct inode * inode = mpnt->vm_inode;
|
954 |
|
|
|
955 |
|
|
if (!inode)
|
956 |
|
|
return;
|
957 |
|
|
|
958 |
|
|
if (mpnt->vm_next_share == mpnt) {
|
959 |
|
|
if (inode->i_mmap != mpnt)
|
960 |
|
|
printk("Inode i_mmap ring corrupted\n");
|
961 |
|
|
inode->i_mmap = NULL;
|
962 |
|
|
return;
|
963 |
|
|
}
|
964 |
|
|
|
965 |
|
|
if (inode->i_mmap == mpnt)
|
966 |
|
|
inode->i_mmap = mpnt->vm_next_share;
|
967 |
|
|
|
968 |
|
|
mpnt->vm_prev_share->vm_next_share = mpnt->vm_next_share;
|
969 |
|
|
mpnt->vm_next_share->vm_prev_share = mpnt->vm_prev_share;
|
970 |
|
|
}
|
971 |
|
|
|
972 |
|
|
/*
|
973 |
|
|
* Merge the list of memory segments if possible.
|
974 |
|
|
* Redundant vm_area_structs are freed.
|
975 |
|
|
* This assumes that the list is ordered by address.
|
976 |
|
|
* We don't need to traverse the entire list, only those segments
|
977 |
|
|
* which intersect or are adjacent to a given interval.
|
978 |
|
|
*/
|
979 |
|
|
void merge_segments (struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
|
980 |
|
|
{
|
981 |
|
|
struct vm_area_struct *prev, *mpnt, *next;
|
982 |
|
|
|
983 |
|
|
down(&mm->mmap_sem);
|
984 |
|
|
mpnt = find_vma(mm, start_addr);
|
985 |
|
|
if (!mpnt)
|
986 |
|
|
goto no_vma;
|
987 |
|
|
|
988 |
|
|
avl_neighbours(mpnt, mm->mmap_avl, &prev, &next);
|
989 |
|
|
/* we have prev->vm_next == mpnt && mpnt->vm_next = next */
|
990 |
|
|
|
991 |
|
|
if (!prev) {
|
992 |
|
|
prev = mpnt;
|
993 |
|
|
mpnt = next;
|
994 |
|
|
}
|
995 |
|
|
|
996 |
|
|
/* prev and mpnt cycle through the list, as long as
|
997 |
|
|
* start_addr < mpnt->vm_end && prev->vm_start < end_addr
|
998 |
|
|
*/
|
999 |
|
|
for ( ; mpnt && prev->vm_start < end_addr ; prev = mpnt, mpnt = next) {
|
1000 |
|
|
#if 0
|
1001 |
|
|
printk("looping in merge_segments, mpnt=0x%lX\n", (unsigned long) mpnt);
|
1002 |
|
|
#endif
|
1003 |
|
|
|
1004 |
|
|
next = mpnt->vm_next;
|
1005 |
|
|
|
1006 |
|
|
/*
|
1007 |
|
|
* To share, we must have the same inode, operations..
|
1008 |
|
|
*/
|
1009 |
|
|
if (mpnt->vm_inode != prev->vm_inode)
|
1010 |
|
|
continue;
|
1011 |
|
|
if (mpnt->vm_pte != prev->vm_pte)
|
1012 |
|
|
continue;
|
1013 |
|
|
if (mpnt->vm_ops != prev->vm_ops)
|
1014 |
|
|
continue;
|
1015 |
|
|
if (mpnt->vm_flags != prev->vm_flags)
|
1016 |
|
|
continue;
|
1017 |
|
|
if (prev->vm_end != mpnt->vm_start)
|
1018 |
|
|
continue;
|
1019 |
|
|
/*
|
1020 |
|
|
* and if we have an inode, the offsets must be contiguous..
|
1021 |
|
|
*/
|
1022 |
|
|
if ((mpnt->vm_inode != NULL) || (mpnt->vm_flags & VM_SHM)) {
|
1023 |
|
|
if (prev->vm_offset + prev->vm_end - prev->vm_start != mpnt->vm_offset)
|
1024 |
|
|
continue;
|
1025 |
|
|
}
|
1026 |
|
|
|
1027 |
|
|
/*
|
1028 |
|
|
* merge prev with mpnt and set up pointers so the new
|
1029 |
|
|
* big segment can possibly merge with the next one.
|
1030 |
|
|
* The old unused mpnt is freed.
|
1031 |
|
|
*/
|
1032 |
|
|
avl_remove(mpnt, &mm->mmap_avl);
|
1033 |
|
|
prev->vm_end = mpnt->vm_end;
|
1034 |
|
|
prev->vm_next = mpnt->vm_next;
|
1035 |
|
|
if (mpnt->vm_ops && mpnt->vm_ops->close) {
|
1036 |
|
|
mpnt->vm_offset += mpnt->vm_end - mpnt->vm_start;
|
1037 |
|
|
mpnt->vm_start = mpnt->vm_end;
|
1038 |
|
|
mpnt->vm_ops->close(mpnt);
|
1039 |
|
|
}
|
1040 |
|
|
remove_shared_vm_struct(mpnt);
|
1041 |
|
|
if (mpnt->vm_inode)
|
1042 |
|
|
mpnt->vm_inode->i_count--;
|
1043 |
|
|
kfree_s(mpnt, sizeof(*mpnt));
|
1044 |
|
|
mpnt = prev;
|
1045 |
|
|
}
|
1046 |
|
|
no_vma:
|
1047 |
|
|
up(&mm->mmap_sem);
|
1048 |
|
|
}
|