1 |
1275 |
phoenix |
/*
|
2 |
|
|
* linux/mm/mmap.c
|
3 |
|
|
*
|
4 |
|
|
* Written by obz.
|
5 |
|
|
*/
|
6 |
|
|
#include <linux/slab.h>
|
7 |
|
|
#include <linux/shm.h>
|
8 |
|
|
#include <linux/mman.h>
|
9 |
|
|
#include <linux/pagemap.h>
|
10 |
|
|
#include <linux/swap.h>
|
11 |
|
|
#include <linux/swapctl.h>
|
12 |
|
|
#include <linux/smp_lock.h>
|
13 |
|
|
#include <linux/init.h>
|
14 |
|
|
#include <linux/file.h>
|
15 |
|
|
#include <linux/fs.h>
|
16 |
|
|
#include <linux/personality.h>
|
17 |
|
|
#include <linux/mount.h>
|
18 |
|
|
|
19 |
|
|
#include <asm/uaccess.h>
|
20 |
|
|
#include <asm/pgalloc.h>
|
21 |
|
|
|
22 |
|
|
/*
|
23 |
|
|
* WARNING: the debugging will use recursive algorithms so never enable this
|
24 |
|
|
* unless you know what you are doing.
|
25 |
|
|
*/
|
26 |
|
|
#undef DEBUG_MM_RB
|
27 |
|
|
|
28 |
|
|
/* description of effects of mapping type and prot in current implementation.
|
29 |
|
|
* this is due to the limited x86 page protection hardware. The expected
|
30 |
|
|
* behavior is in parens:
|
31 |
|
|
*
|
32 |
|
|
* map_type prot
|
33 |
|
|
* PROT_NONE PROT_READ PROT_WRITE PROT_EXEC
|
34 |
|
|
* MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes
|
35 |
|
|
* w: (no) no w: (no) no w: (yes) yes w: (no) no
|
36 |
|
|
* x: (no) no x: (no) yes x: (no) yes x: (yes) yes
|
37 |
|
|
*
|
38 |
|
|
* MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes
|
39 |
|
|
* w: (no) no w: (no) no w: (copy) copy w: (no) no
|
40 |
|
|
* x: (no) no x: (no) yes x: (no) yes x: (yes) yes
|
41 |
|
|
*
|
42 |
|
|
*/
|
43 |
|
|
pgprot_t protection_map[16] = {
|
44 |
|
|
__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
|
45 |
|
|
__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
|
46 |
|
|
};
|
47 |
|
|
|
48 |
|
|
int sysctl_overcommit_memory;
|
49 |
|
|
int max_map_count = DEFAULT_MAX_MAP_COUNT;
|
50 |
|
|
|
51 |
|
|
/* Check that a process has enough memory to allocate a
|
52 |
|
|
* new virtual mapping.
|
53 |
|
|
*/
|
54 |
|
|
int vm_enough_memory(long pages)
|
55 |
|
|
{
|
56 |
|
|
/* Stupid algorithm to decide if we have enough memory: while
|
57 |
|
|
* simple, it hopefully works in most obvious cases.. Easy to
|
58 |
|
|
* fool it, but this should catch most mistakes.
|
59 |
|
|
*/
|
60 |
|
|
/* 23/11/98 NJC: Somewhat less stupid version of algorithm,
|
61 |
|
|
* which tries to do "TheRightThing". Instead of using half of
|
62 |
|
|
* (buffers+cache), use the minimum values. Allow an extra 2%
|
63 |
|
|
* of num_physpages for safety margin.
|
64 |
|
|
*/
|
65 |
|
|
|
66 |
|
|
unsigned long free;
|
67 |
|
|
|
68 |
|
|
/* Sometimes we want to use more memory than we have. */
|
69 |
|
|
if (sysctl_overcommit_memory)
|
70 |
|
|
return 1;
|
71 |
|
|
|
72 |
|
|
/* The page cache contains buffer pages these days.. */
|
73 |
|
|
free = page_cache_size;
|
74 |
|
|
free += nr_free_pages();
|
75 |
|
|
free += nr_swap_pages;
|
76 |
|
|
|
77 |
|
|
/*
|
78 |
|
|
* This double-counts: the nrpages are both in the page-cache
|
79 |
|
|
* and in the swapper space. At the same time, this compensates
|
80 |
|
|
* for the swap-space over-allocation (ie "nr_swap_pages" being
|
81 |
|
|
* too small.
|
82 |
|
|
*/
|
83 |
|
|
free += swapper_space.nrpages;
|
84 |
|
|
|
85 |
|
|
/*
|
86 |
|
|
* The code below doesn't account for free space in the inode
|
87 |
|
|
* and dentry slab cache, slab cache fragmentation, inodes and
|
88 |
|
|
* dentries which will become freeable under VM load, etc.
|
89 |
|
|
* Lets just hope all these (complex) factors balance out...
|
90 |
|
|
*/
|
91 |
|
|
free += (dentry_stat.nr_unused * sizeof(struct dentry)) >> PAGE_SHIFT;
|
92 |
|
|
free += (inodes_stat.nr_unused * sizeof(struct inode)) >> PAGE_SHIFT;
|
93 |
|
|
|
94 |
|
|
return free > pages;
|
95 |
|
|
}
|
96 |
|
|
|
97 |
|
|
/* Remove one vm structure from the inode's i_mapping address space. */
|
98 |
|
|
static inline void __remove_shared_vm_struct(struct vm_area_struct *vma)
|
99 |
|
|
{
|
100 |
|
|
struct file * file = vma->vm_file;
|
101 |
|
|
|
102 |
|
|
if (file) {
|
103 |
|
|
struct inode *inode = file->f_dentry->d_inode;
|
104 |
|
|
if (vma->vm_flags & VM_DENYWRITE)
|
105 |
|
|
atomic_inc(&inode->i_writecount);
|
106 |
|
|
if(vma->vm_next_share)
|
107 |
|
|
vma->vm_next_share->vm_pprev_share = vma->vm_pprev_share;
|
108 |
|
|
*vma->vm_pprev_share = vma->vm_next_share;
|
109 |
|
|
}
|
110 |
|
|
}
|
111 |
|
|
|
112 |
|
|
static inline void remove_shared_vm_struct(struct vm_area_struct *vma)
|
113 |
|
|
{
|
114 |
|
|
lock_vma_mappings(vma);
|
115 |
|
|
__remove_shared_vm_struct(vma);
|
116 |
|
|
unlock_vma_mappings(vma);
|
117 |
|
|
}
|
118 |
|
|
|
119 |
|
|
void lock_vma_mappings(struct vm_area_struct *vma)
|
120 |
|
|
{
|
121 |
|
|
struct address_space *mapping;
|
122 |
|
|
|
123 |
|
|
mapping = NULL;
|
124 |
|
|
if (vma->vm_file)
|
125 |
|
|
mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
|
126 |
|
|
if (mapping)
|
127 |
|
|
spin_lock(&mapping->i_shared_lock);
|
128 |
|
|
}
|
129 |
|
|
|
130 |
|
|
void unlock_vma_mappings(struct vm_area_struct *vma)
|
131 |
|
|
{
|
132 |
|
|
struct address_space *mapping;
|
133 |
|
|
|
134 |
|
|
mapping = NULL;
|
135 |
|
|
if (vma->vm_file)
|
136 |
|
|
mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
|
137 |
|
|
if (mapping)
|
138 |
|
|
spin_unlock(&mapping->i_shared_lock);
|
139 |
|
|
}
|
140 |
|
|
|
141 |
|
|
/*
|
142 |
|
|
* sys_brk() for the most part doesn't need the global kernel
|
143 |
|
|
* lock, except when an application is doing something nasty
|
144 |
|
|
* like trying to un-brk an area that has already been mapped
|
145 |
|
|
* to a regular file. in this case, the unmapping will need
|
146 |
|
|
* to invoke file system routines that need the global lock.
|
147 |
|
|
*/
|
148 |
|
|
asmlinkage unsigned long sys_brk(unsigned long brk)
|
149 |
|
|
{
|
150 |
|
|
unsigned long rlim, retval;
|
151 |
|
|
unsigned long newbrk, oldbrk;
|
152 |
|
|
struct mm_struct *mm = current->mm;
|
153 |
|
|
|
154 |
|
|
down_write(&mm->mmap_sem);
|
155 |
|
|
|
156 |
|
|
if (brk < mm->end_code)
|
157 |
|
|
goto out;
|
158 |
|
|
newbrk = PAGE_ALIGN(brk);
|
159 |
|
|
oldbrk = PAGE_ALIGN(mm->brk);
|
160 |
|
|
if (oldbrk == newbrk)
|
161 |
|
|
goto set_brk;
|
162 |
|
|
|
163 |
|
|
/* Always allow shrinking brk. */
|
164 |
|
|
if (brk <= mm->brk) {
|
165 |
|
|
if (!do_munmap(mm, newbrk, oldbrk-newbrk))
|
166 |
|
|
goto set_brk;
|
167 |
|
|
goto out;
|
168 |
|
|
}
|
169 |
|
|
|
170 |
|
|
/* Check against rlimit.. */
|
171 |
|
|
rlim = current->rlim[RLIMIT_DATA].rlim_cur;
|
172 |
|
|
if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
|
173 |
|
|
goto out;
|
174 |
|
|
|
175 |
|
|
/* Check against existing mmap mappings. */
|
176 |
|
|
if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
|
177 |
|
|
goto out;
|
178 |
|
|
|
179 |
|
|
/* Check if we have enough memory.. */
|
180 |
|
|
if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT))
|
181 |
|
|
goto out;
|
182 |
|
|
|
183 |
|
|
/* Ok, looks good - let it rip. */
|
184 |
|
|
if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
|
185 |
|
|
goto out;
|
186 |
|
|
set_brk:
|
187 |
|
|
mm->brk = brk;
|
188 |
|
|
out:
|
189 |
|
|
retval = mm->brk;
|
190 |
|
|
up_write(&mm->mmap_sem);
|
191 |
|
|
return retval;
|
192 |
|
|
}
|
193 |
|
|
|
194 |
|
|
/* Combine the mmap "prot" and "flags" argument into one "vm_flags" used
|
195 |
|
|
* internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits
|
196 |
|
|
* into "VM_xxx".
|
197 |
|
|
*/
|
198 |
|
|
static inline unsigned long calc_vm_flags(unsigned long prot, unsigned long flags)
|
199 |
|
|
{
|
200 |
|
|
#define _trans(x,bit1,bit2) \
|
201 |
|
|
((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
|
202 |
|
|
|
203 |
|
|
unsigned long prot_bits, flag_bits;
|
204 |
|
|
prot_bits =
|
205 |
|
|
_trans(prot, PROT_READ, VM_READ) |
|
206 |
|
|
_trans(prot, PROT_WRITE, VM_WRITE) |
|
207 |
|
|
_trans(prot, PROT_EXEC, VM_EXEC);
|
208 |
|
|
flag_bits =
|
209 |
|
|
_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN) |
|
210 |
|
|
_trans(flags, MAP_DENYWRITE, VM_DENYWRITE) |
|
211 |
|
|
_trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE);
|
212 |
|
|
return prot_bits | flag_bits;
|
213 |
|
|
#undef _trans
|
214 |
|
|
}
|
215 |
|
|
|
216 |
|
|
#ifdef DEBUG_MM_RB
|
217 |
|
|
static int browse_rb(rb_node_t * rb_node) {
|
218 |
|
|
int i = 0;
|
219 |
|
|
if (rb_node) {
|
220 |
|
|
i++;
|
221 |
|
|
i += browse_rb(rb_node->rb_left);
|
222 |
|
|
i += browse_rb(rb_node->rb_right);
|
223 |
|
|
}
|
224 |
|
|
return i;
|
225 |
|
|
}
|
226 |
|
|
|
227 |
|
|
static void validate_mm(struct mm_struct * mm) {
|
228 |
|
|
int bug = 0;
|
229 |
|
|
int i = 0;
|
230 |
|
|
struct vm_area_struct * tmp = mm->mmap;
|
231 |
|
|
while (tmp) {
|
232 |
|
|
tmp = tmp->vm_next;
|
233 |
|
|
i++;
|
234 |
|
|
}
|
235 |
|
|
if (i != mm->map_count)
|
236 |
|
|
printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
|
237 |
|
|
i = browse_rb(mm->mm_rb.rb_node);
|
238 |
|
|
if (i != mm->map_count)
|
239 |
|
|
printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
|
240 |
|
|
if (bug)
|
241 |
|
|
BUG();
|
242 |
|
|
}
|
243 |
|
|
#else
|
244 |
|
|
#define validate_mm(mm) do { } while (0)
|
245 |
|
|
#endif
|
246 |
|
|
|
247 |
|
|
static struct vm_area_struct * find_vma_prepare(struct mm_struct * mm, unsigned long addr,
|
248 |
|
|
struct vm_area_struct ** pprev,
|
249 |
|
|
rb_node_t *** rb_link, rb_node_t ** rb_parent)
|
250 |
|
|
{
|
251 |
|
|
struct vm_area_struct * vma;
|
252 |
|
|
rb_node_t ** __rb_link, * __rb_parent, * rb_prev;
|
253 |
|
|
|
254 |
|
|
__rb_link = &mm->mm_rb.rb_node;
|
255 |
|
|
rb_prev = __rb_parent = NULL;
|
256 |
|
|
vma = NULL;
|
257 |
|
|
|
258 |
|
|
while (*__rb_link) {
|
259 |
|
|
struct vm_area_struct *vma_tmp;
|
260 |
|
|
|
261 |
|
|
__rb_parent = *__rb_link;
|
262 |
|
|
vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
|
263 |
|
|
|
264 |
|
|
if (vma_tmp->vm_end > addr) {
|
265 |
|
|
vma = vma_tmp;
|
266 |
|
|
if (vma_tmp->vm_start <= addr)
|
267 |
|
|
return vma;
|
268 |
|
|
__rb_link = &__rb_parent->rb_left;
|
269 |
|
|
} else {
|
270 |
|
|
rb_prev = __rb_parent;
|
271 |
|
|
__rb_link = &__rb_parent->rb_right;
|
272 |
|
|
}
|
273 |
|
|
}
|
274 |
|
|
|
275 |
|
|
*pprev = NULL;
|
276 |
|
|
if (rb_prev)
|
277 |
|
|
*pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
|
278 |
|
|
*rb_link = __rb_link;
|
279 |
|
|
*rb_parent = __rb_parent;
|
280 |
|
|
return vma;
|
281 |
|
|
}
|
282 |
|
|
|
283 |
|
|
static inline void __vma_link_list(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev,
|
284 |
|
|
rb_node_t * rb_parent)
|
285 |
|
|
{
|
286 |
|
|
if (prev) {
|
287 |
|
|
vma->vm_next = prev->vm_next;
|
288 |
|
|
prev->vm_next = vma;
|
289 |
|
|
} else {
|
290 |
|
|
mm->mmap = vma;
|
291 |
|
|
if (rb_parent)
|
292 |
|
|
vma->vm_next = rb_entry(rb_parent, struct vm_area_struct, vm_rb);
|
293 |
|
|
else
|
294 |
|
|
vma->vm_next = NULL;
|
295 |
|
|
}
|
296 |
|
|
}
|
297 |
|
|
|
298 |
|
|
static inline void __vma_link_rb(struct mm_struct * mm, struct vm_area_struct * vma,
|
299 |
|
|
rb_node_t ** rb_link, rb_node_t * rb_parent)
|
300 |
|
|
{
|
301 |
|
|
rb_link_node(&vma->vm_rb, rb_parent, rb_link);
|
302 |
|
|
rb_insert_color(&vma->vm_rb, &mm->mm_rb);
|
303 |
|
|
}
|
304 |
|
|
|
305 |
|
|
static inline void __vma_link_file(struct vm_area_struct * vma)
|
306 |
|
|
{
|
307 |
|
|
struct file * file;
|
308 |
|
|
|
309 |
|
|
file = vma->vm_file;
|
310 |
|
|
if (file) {
|
311 |
|
|
struct inode * inode = file->f_dentry->d_inode;
|
312 |
|
|
struct address_space *mapping = inode->i_mapping;
|
313 |
|
|
struct vm_area_struct **head;
|
314 |
|
|
|
315 |
|
|
if (vma->vm_flags & VM_DENYWRITE)
|
316 |
|
|
atomic_dec(&inode->i_writecount);
|
317 |
|
|
|
318 |
|
|
head = &mapping->i_mmap;
|
319 |
|
|
if (vma->vm_flags & VM_SHARED)
|
320 |
|
|
head = &mapping->i_mmap_shared;
|
321 |
|
|
|
322 |
|
|
/* insert vma into inode's share list */
|
323 |
|
|
if((vma->vm_next_share = *head) != NULL)
|
324 |
|
|
(*head)->vm_pprev_share = &vma->vm_next_share;
|
325 |
|
|
*head = vma;
|
326 |
|
|
vma->vm_pprev_share = head;
|
327 |
|
|
}
|
328 |
|
|
}
|
329 |
|
|
|
330 |
|
|
static void __vma_link(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev,
|
331 |
|
|
rb_node_t ** rb_link, rb_node_t * rb_parent)
|
332 |
|
|
{
|
333 |
|
|
__vma_link_list(mm, vma, prev, rb_parent);
|
334 |
|
|
__vma_link_rb(mm, vma, rb_link, rb_parent);
|
335 |
|
|
__vma_link_file(vma);
|
336 |
|
|
}
|
337 |
|
|
|
338 |
|
|
static inline void vma_link(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev,
|
339 |
|
|
rb_node_t ** rb_link, rb_node_t * rb_parent)
|
340 |
|
|
{
|
341 |
|
|
lock_vma_mappings(vma);
|
342 |
|
|
spin_lock(&mm->page_table_lock);
|
343 |
|
|
__vma_link(mm, vma, prev, rb_link, rb_parent);
|
344 |
|
|
spin_unlock(&mm->page_table_lock);
|
345 |
|
|
unlock_vma_mappings(vma);
|
346 |
|
|
|
347 |
|
|
mm->map_count++;
|
348 |
|
|
validate_mm(mm);
|
349 |
|
|
}
|
350 |
|
|
|
351 |
|
|
static int vma_merge(struct mm_struct * mm, struct vm_area_struct * prev,
|
352 |
|
|
rb_node_t * rb_parent, unsigned long addr, unsigned long end, unsigned long vm_flags)
|
353 |
|
|
{
|
354 |
|
|
spinlock_t * lock = &mm->page_table_lock;
|
355 |
|
|
if (!prev) {
|
356 |
|
|
prev = rb_entry(rb_parent, struct vm_area_struct, vm_rb);
|
357 |
|
|
goto merge_next;
|
358 |
|
|
}
|
359 |
|
|
if (prev->vm_end == addr && can_vma_merge(prev, vm_flags)) {
|
360 |
|
|
struct vm_area_struct * next;
|
361 |
|
|
|
362 |
|
|
spin_lock(lock);
|
363 |
|
|
prev->vm_end = end;
|
364 |
|
|
next = prev->vm_next;
|
365 |
|
|
if (next && prev->vm_end == next->vm_start && can_vma_merge(next, vm_flags)) {
|
366 |
|
|
prev->vm_end = next->vm_end;
|
367 |
|
|
__vma_unlink(mm, next, prev);
|
368 |
|
|
spin_unlock(lock);
|
369 |
|
|
|
370 |
|
|
mm->map_count--;
|
371 |
|
|
kmem_cache_free(vm_area_cachep, next);
|
372 |
|
|
return 1;
|
373 |
|
|
}
|
374 |
|
|
spin_unlock(lock);
|
375 |
|
|
return 1;
|
376 |
|
|
}
|
377 |
|
|
|
378 |
|
|
prev = prev->vm_next;
|
379 |
|
|
if (prev) {
|
380 |
|
|
merge_next:
|
381 |
|
|
if (!can_vma_merge(prev, vm_flags))
|
382 |
|
|
return 0;
|
383 |
|
|
if (end == prev->vm_start) {
|
384 |
|
|
spin_lock(lock);
|
385 |
|
|
prev->vm_start = addr;
|
386 |
|
|
spin_unlock(lock);
|
387 |
|
|
return 1;
|
388 |
|
|
}
|
389 |
|
|
}
|
390 |
|
|
|
391 |
|
|
return 0;
|
392 |
|
|
}
|
393 |
|
|
|
394 |
|
|
unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned long len,
|
395 |
|
|
unsigned long prot, unsigned long flags, unsigned long pgoff)
|
396 |
|
|
{
|
397 |
|
|
struct mm_struct * mm = current->mm;
|
398 |
|
|
struct vm_area_struct * vma, * prev;
|
399 |
|
|
unsigned int vm_flags;
|
400 |
|
|
int correct_wcount = 0;
|
401 |
|
|
int error;
|
402 |
|
|
rb_node_t ** rb_link, * rb_parent;
|
403 |
|
|
|
404 |
|
|
if (file) {
|
405 |
|
|
if (!file->f_op || !file->f_op->mmap)
|
406 |
|
|
return -ENODEV;
|
407 |
|
|
|
408 |
|
|
if ((prot & PROT_EXEC) && (file->f_vfsmnt->mnt_flags & MNT_NOEXEC))
|
409 |
|
|
return -EPERM;
|
410 |
|
|
}
|
411 |
|
|
|
412 |
|
|
if (!len)
|
413 |
|
|
return addr;
|
414 |
|
|
|
415 |
|
|
len = PAGE_ALIGN(len);
|
416 |
|
|
|
417 |
|
|
if (len > TASK_SIZE || len == 0)
|
418 |
|
|
return -EINVAL;
|
419 |
|
|
|
420 |
|
|
/* offset overflow? */
|
421 |
|
|
if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
|
422 |
|
|
return -EINVAL;
|
423 |
|
|
|
424 |
|
|
/* Too many mappings? */
|
425 |
|
|
if (mm->map_count > max_map_count)
|
426 |
|
|
return -ENOMEM;
|
427 |
|
|
|
428 |
|
|
/* Obtain the address to map to. we verify (or select) it and ensure
|
429 |
|
|
* that it represents a valid section of the address space.
|
430 |
|
|
*/
|
431 |
|
|
addr = get_unmapped_area(file, addr, len, pgoff, flags);
|
432 |
|
|
if (addr & ~PAGE_MASK)
|
433 |
|
|
return addr;
|
434 |
|
|
|
435 |
|
|
/* Do simple checking here so the lower-level routines won't have
|
436 |
|
|
* to. we assume access permissions have been handled by the open
|
437 |
|
|
* of the memory object, so we don't do any here.
|
438 |
|
|
*/
|
439 |
|
|
vm_flags = calc_vm_flags(prot,flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
|
440 |
|
|
|
441 |
|
|
/* mlock MCL_FUTURE? */
|
442 |
|
|
if (vm_flags & VM_LOCKED) {
|
443 |
|
|
unsigned long locked = mm->locked_vm << PAGE_SHIFT;
|
444 |
|
|
locked += len;
|
445 |
|
|
if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
|
446 |
|
|
return -EAGAIN;
|
447 |
|
|
}
|
448 |
|
|
|
449 |
|
|
if (file) {
|
450 |
|
|
switch (flags & MAP_TYPE) {
|
451 |
|
|
case MAP_SHARED:
|
452 |
|
|
if ((prot & PROT_WRITE) && !(file->f_mode & FMODE_WRITE))
|
453 |
|
|
return -EACCES;
|
454 |
|
|
|
455 |
|
|
/* Make sure we don't allow writing to an append-only file.. */
|
456 |
|
|
if (IS_APPEND(file->f_dentry->d_inode) && (file->f_mode & FMODE_WRITE))
|
457 |
|
|
return -EACCES;
|
458 |
|
|
|
459 |
|
|
/* make sure there are no mandatory locks on the file. */
|
460 |
|
|
if (locks_verify_locked(file->f_dentry->d_inode))
|
461 |
|
|
return -EAGAIN;
|
462 |
|
|
|
463 |
|
|
vm_flags |= VM_SHARED | VM_MAYSHARE;
|
464 |
|
|
if (!(file->f_mode & FMODE_WRITE))
|
465 |
|
|
vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
|
466 |
|
|
|
467 |
|
|
/* fall through */
|
468 |
|
|
case MAP_PRIVATE:
|
469 |
|
|
if (!(file->f_mode & FMODE_READ))
|
470 |
|
|
return -EACCES;
|
471 |
|
|
break;
|
472 |
|
|
|
473 |
|
|
default:
|
474 |
|
|
return -EINVAL;
|
475 |
|
|
}
|
476 |
|
|
} else {
|
477 |
|
|
vm_flags |= VM_SHARED | VM_MAYSHARE;
|
478 |
|
|
switch (flags & MAP_TYPE) {
|
479 |
|
|
default:
|
480 |
|
|
return -EINVAL;
|
481 |
|
|
case MAP_PRIVATE:
|
482 |
|
|
vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
|
483 |
|
|
/* fall through */
|
484 |
|
|
case MAP_SHARED:
|
485 |
|
|
break;
|
486 |
|
|
}
|
487 |
|
|
}
|
488 |
|
|
|
489 |
|
|
/* Clear old maps */
|
490 |
|
|
munmap_back:
|
491 |
|
|
vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
|
492 |
|
|
if (vma && vma->vm_start < addr + len) {
|
493 |
|
|
if (do_munmap(mm, addr, len))
|
494 |
|
|
return -ENOMEM;
|
495 |
|
|
goto munmap_back;
|
496 |
|
|
}
|
497 |
|
|
|
498 |
|
|
/* Check against address space limit. */
|
499 |
|
|
if ((mm->total_vm << PAGE_SHIFT) + len
|
500 |
|
|
> current->rlim[RLIMIT_AS].rlim_cur)
|
501 |
|
|
return -ENOMEM;
|
502 |
|
|
|
503 |
|
|
/* Private writable mapping? Check memory availability.. */
|
504 |
|
|
if ((vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE &&
|
505 |
|
|
!(flags & MAP_NORESERVE) &&
|
506 |
|
|
!vm_enough_memory(len >> PAGE_SHIFT))
|
507 |
|
|
return -ENOMEM;
|
508 |
|
|
|
509 |
|
|
/* Can we just expand an old anonymous mapping? */
|
510 |
|
|
if (!file && !(vm_flags & VM_SHARED) && rb_parent)
|
511 |
|
|
if (vma_merge(mm, prev, rb_parent, addr, addr + len, vm_flags))
|
512 |
|
|
goto out;
|
513 |
|
|
|
514 |
|
|
/* Determine the object being mapped and call the appropriate
|
515 |
|
|
* specific mapper. the address has already been validated, but
|
516 |
|
|
* not unmapped, but the maps are removed from the list.
|
517 |
|
|
*/
|
518 |
|
|
vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
|
519 |
|
|
if (!vma)
|
520 |
|
|
return -ENOMEM;
|
521 |
|
|
|
522 |
|
|
vma->vm_mm = mm;
|
523 |
|
|
vma->vm_start = addr;
|
524 |
|
|
vma->vm_end = addr + len;
|
525 |
|
|
vma->vm_flags = vm_flags;
|
526 |
|
|
vma->vm_page_prot = protection_map[vm_flags & 0x0f];
|
527 |
|
|
vma->vm_ops = NULL;
|
528 |
|
|
vma->vm_pgoff = pgoff;
|
529 |
|
|
vma->vm_file = NULL;
|
530 |
|
|
vma->vm_private_data = NULL;
|
531 |
|
|
vma->vm_raend = 0;
|
532 |
|
|
|
533 |
|
|
if (file) {
|
534 |
|
|
error = -EINVAL;
|
535 |
|
|
if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
|
536 |
|
|
goto free_vma;
|
537 |
|
|
if (vm_flags & VM_DENYWRITE) {
|
538 |
|
|
error = deny_write_access(file);
|
539 |
|
|
if (error)
|
540 |
|
|
goto free_vma;
|
541 |
|
|
correct_wcount = 1;
|
542 |
|
|
}
|
543 |
|
|
vma->vm_file = file;
|
544 |
|
|
get_file(file);
|
545 |
|
|
error = file->f_op->mmap(file, vma);
|
546 |
|
|
if (error)
|
547 |
|
|
goto unmap_and_free_vma;
|
548 |
|
|
} else if (flags & MAP_SHARED) {
|
549 |
|
|
error = shmem_zero_setup(vma);
|
550 |
|
|
if (error)
|
551 |
|
|
goto free_vma;
|
552 |
|
|
}
|
553 |
|
|
|
554 |
|
|
/* Can addr have changed??
|
555 |
|
|
*
|
556 |
|
|
* Answer: Yes, several device drivers can do it in their
|
557 |
|
|
* f_op->mmap method. -DaveM
|
558 |
|
|
*/
|
559 |
|
|
if (addr != vma->vm_start) {
|
560 |
|
|
/*
|
561 |
|
|
* It is a bit too late to pretend changing the virtual
|
562 |
|
|
* area of the mapping, we just corrupted userspace
|
563 |
|
|
* in the do_munmap, so FIXME (not in 2.4 to avoid breaking
|
564 |
|
|
* the driver API).
|
565 |
|
|
*/
|
566 |
|
|
struct vm_area_struct * stale_vma;
|
567 |
|
|
/* Since addr changed, we rely on the mmap op to prevent
|
568 |
|
|
* collisions with existing vmas and just use find_vma_prepare
|
569 |
|
|
* to update the tree pointers.
|
570 |
|
|
*/
|
571 |
|
|
addr = vma->vm_start;
|
572 |
|
|
stale_vma = find_vma_prepare(mm, addr, &prev,
|
573 |
|
|
&rb_link, &rb_parent);
|
574 |
|
|
/*
|
575 |
|
|
* Make sure the lowlevel driver did its job right.
|
576 |
|
|
*/
|
577 |
|
|
if (unlikely(stale_vma && stale_vma->vm_start < vma->vm_end)) {
|
578 |
|
|
printk(KERN_ERR "buggy mmap operation: [<%p>]\n",
|
579 |
|
|
file ? file->f_op->mmap : NULL);
|
580 |
|
|
BUG();
|
581 |
|
|
}
|
582 |
|
|
}
|
583 |
|
|
|
584 |
|
|
vma_link(mm, vma, prev, rb_link, rb_parent);
|
585 |
|
|
if (correct_wcount)
|
586 |
|
|
atomic_inc(&file->f_dentry->d_inode->i_writecount);
|
587 |
|
|
|
588 |
|
|
out:
|
589 |
|
|
mm->total_vm += len >> PAGE_SHIFT;
|
590 |
|
|
if (vm_flags & VM_LOCKED) {
|
591 |
|
|
mm->locked_vm += len >> PAGE_SHIFT;
|
592 |
|
|
make_pages_present(addr, addr + len);
|
593 |
|
|
}
|
594 |
|
|
return addr;
|
595 |
|
|
|
596 |
|
|
unmap_and_free_vma:
|
597 |
|
|
if (correct_wcount)
|
598 |
|
|
atomic_inc(&file->f_dentry->d_inode->i_writecount);
|
599 |
|
|
vma->vm_file = NULL;
|
600 |
|
|
fput(file);
|
601 |
|
|
|
602 |
|
|
/* Undo any partial mapping done by a device driver. */
|
603 |
|
|
zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start);
|
604 |
|
|
free_vma:
|
605 |
|
|
kmem_cache_free(vm_area_cachep, vma);
|
606 |
|
|
return error;
|
607 |
|
|
}
|
608 |
|
|
|
609 |
|
|
/* Get an address range which is currently unmapped.
|
610 |
|
|
* For shmat() with addr=0.
|
611 |
|
|
*
|
612 |
|
|
* Ugly calling convention alert:
|
613 |
|
|
* Return value with the low bits set means error value,
|
614 |
|
|
* ie
|
615 |
|
|
* if (ret & ~PAGE_MASK)
|
616 |
|
|
* error = ret;
|
617 |
|
|
*
|
618 |
|
|
* This function "knows" that -ENOMEM has the bits set.
|
619 |
|
|
*/
|
620 |
|
|
#ifndef HAVE_ARCH_UNMAPPED_AREA
|
621 |
|
|
static inline unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags)
|
622 |
|
|
{
|
623 |
|
|
struct vm_area_struct *vma;
|
624 |
|
|
|
625 |
|
|
if (len > TASK_SIZE)
|
626 |
|
|
return -ENOMEM;
|
627 |
|
|
|
628 |
|
|
if (addr) {
|
629 |
|
|
addr = PAGE_ALIGN(addr);
|
630 |
|
|
vma = find_vma(current->mm, addr);
|
631 |
|
|
if (TASK_SIZE - len >= addr &&
|
632 |
|
|
(!vma || addr + len <= vma->vm_start))
|
633 |
|
|
return addr;
|
634 |
|
|
}
|
635 |
|
|
addr = PAGE_ALIGN(TASK_UNMAPPED_BASE);
|
636 |
|
|
|
637 |
|
|
for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
|
638 |
|
|
/* At this point: (!vma || addr < vma->vm_end). */
|
639 |
|
|
if (TASK_SIZE - len < addr)
|
640 |
|
|
return -ENOMEM;
|
641 |
|
|
if (!vma || addr + len <= vma->vm_start)
|
642 |
|
|
return addr;
|
643 |
|
|
addr = vma->vm_end;
|
644 |
|
|
}
|
645 |
|
|
}
|
646 |
|
|
#else
|
647 |
|
|
extern unsigned long arch_get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
|
648 |
|
|
#endif
|
649 |
|
|
|
650 |
|
|
unsigned long get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags)
|
651 |
|
|
{
|
652 |
|
|
if (flags & MAP_FIXED) {
|
653 |
|
|
if (addr > TASK_SIZE - len)
|
654 |
|
|
return -ENOMEM;
|
655 |
|
|
if (addr & ~PAGE_MASK)
|
656 |
|
|
return -EINVAL;
|
657 |
|
|
return addr;
|
658 |
|
|
}
|
659 |
|
|
|
660 |
|
|
if (file && file->f_op && file->f_op->get_unmapped_area)
|
661 |
|
|
return file->f_op->get_unmapped_area(file, addr, len, pgoff, flags);
|
662 |
|
|
|
663 |
|
|
return arch_get_unmapped_area(file, addr, len, pgoff, flags);
|
664 |
|
|
}
|
665 |
|
|
|
666 |
|
|
/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
|
667 |
|
|
struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
|
668 |
|
|
{
|
669 |
|
|
struct vm_area_struct *vma = NULL;
|
670 |
|
|
|
671 |
|
|
if (mm) {
|
672 |
|
|
/* Check the cache first. */
|
673 |
|
|
/* (Cache hit rate is typically around 35%.) */
|
674 |
|
|
vma = mm->mmap_cache;
|
675 |
|
|
if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
|
676 |
|
|
rb_node_t * rb_node;
|
677 |
|
|
|
678 |
|
|
rb_node = mm->mm_rb.rb_node;
|
679 |
|
|
vma = NULL;
|
680 |
|
|
|
681 |
|
|
while (rb_node) {
|
682 |
|
|
struct vm_area_struct * vma_tmp;
|
683 |
|
|
|
684 |
|
|
vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
|
685 |
|
|
|
686 |
|
|
if (vma_tmp->vm_end > addr) {
|
687 |
|
|
vma = vma_tmp;
|
688 |
|
|
if (vma_tmp->vm_start <= addr)
|
689 |
|
|
break;
|
690 |
|
|
rb_node = rb_node->rb_left;
|
691 |
|
|
} else
|
692 |
|
|
rb_node = rb_node->rb_right;
|
693 |
|
|
}
|
694 |
|
|
if (vma)
|
695 |
|
|
mm->mmap_cache = vma;
|
696 |
|
|
}
|
697 |
|
|
}
|
698 |
|
|
return vma;
|
699 |
|
|
}
|
700 |
|
|
|
701 |
|
|
/* Same as find_vma, but also return a pointer to the previous VMA in *pprev. */
|
702 |
|
|
struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
|
703 |
|
|
struct vm_area_struct **pprev)
|
704 |
|
|
{
|
705 |
|
|
if (mm) {
|
706 |
|
|
/* Go through the RB tree quickly. */
|
707 |
|
|
struct vm_area_struct * vma;
|
708 |
|
|
rb_node_t * rb_node, * rb_last_right, * rb_prev;
|
709 |
|
|
|
710 |
|
|
rb_node = mm->mm_rb.rb_node;
|
711 |
|
|
rb_last_right = rb_prev = NULL;
|
712 |
|
|
vma = NULL;
|
713 |
|
|
|
714 |
|
|
while (rb_node) {
|
715 |
|
|
struct vm_area_struct * vma_tmp;
|
716 |
|
|
|
717 |
|
|
vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
|
718 |
|
|
|
719 |
|
|
if (vma_tmp->vm_end > addr) {
|
720 |
|
|
vma = vma_tmp;
|
721 |
|
|
rb_prev = rb_last_right;
|
722 |
|
|
if (vma_tmp->vm_start <= addr)
|
723 |
|
|
break;
|
724 |
|
|
rb_node = rb_node->rb_left;
|
725 |
|
|
} else {
|
726 |
|
|
rb_last_right = rb_node;
|
727 |
|
|
rb_node = rb_node->rb_right;
|
728 |
|
|
}
|
729 |
|
|
}
|
730 |
|
|
if (vma) {
|
731 |
|
|
if (vma->vm_rb.rb_left) {
|
732 |
|
|
rb_prev = vma->vm_rb.rb_left;
|
733 |
|
|
while (rb_prev->rb_right)
|
734 |
|
|
rb_prev = rb_prev->rb_right;
|
735 |
|
|
}
|
736 |
|
|
*pprev = NULL;
|
737 |
|
|
if (rb_prev)
|
738 |
|
|
*pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
|
739 |
|
|
if ((rb_prev ? (*pprev)->vm_next : mm->mmap) != vma)
|
740 |
|
|
BUG();
|
741 |
|
|
return vma;
|
742 |
|
|
}
|
743 |
|
|
}
|
744 |
|
|
*pprev = NULL;
|
745 |
|
|
return NULL;
|
746 |
|
|
}
|
747 |
|
|
|
748 |
|
|
struct vm_area_struct * find_extend_vma(struct mm_struct * mm, unsigned long addr)
|
749 |
|
|
{
|
750 |
|
|
struct vm_area_struct * vma;
|
751 |
|
|
unsigned long start;
|
752 |
|
|
|
753 |
|
|
addr &= PAGE_MASK;
|
754 |
|
|
vma = find_vma(mm,addr);
|
755 |
|
|
if (!vma)
|
756 |
|
|
return NULL;
|
757 |
|
|
if (vma->vm_start <= addr)
|
758 |
|
|
return vma;
|
759 |
|
|
if (!(vma->vm_flags & VM_GROWSDOWN))
|
760 |
|
|
return NULL;
|
761 |
|
|
start = vma->vm_start;
|
762 |
|
|
if (expand_stack(vma, addr))
|
763 |
|
|
return NULL;
|
764 |
|
|
if (vma->vm_flags & VM_LOCKED) {
|
765 |
|
|
make_pages_present(addr, start);
|
766 |
|
|
}
|
767 |
|
|
return vma;
|
768 |
|
|
}
|
769 |
|
|
|
770 |
|
|
/* Normal function to fix up a mapping
|
771 |
|
|
* This function is the default for when an area has no specific
|
772 |
|
|
* function. This may be used as part of a more specific routine.
|
773 |
|
|
* This function works out what part of an area is affected and
|
774 |
|
|
* adjusts the mapping information. Since the actual page
|
775 |
|
|
* manipulation is done in do_mmap(), none need be done here,
|
776 |
|
|
* though it would probably be more appropriate.
|
777 |
|
|
*
|
778 |
|
|
* By the time this function is called, the area struct has been
|
779 |
|
|
* removed from the process mapping list, so it needs to be
|
780 |
|
|
* reinserted if necessary.
|
781 |
|
|
*
|
782 |
|
|
* The 4 main cases are:
|
783 |
|
|
* Unmapping the whole area
|
784 |
|
|
* Unmapping from the start of the segment to a point in it
|
785 |
|
|
* Unmapping from an intermediate point to the end
|
786 |
|
|
* Unmapping between to intermediate points, making a hole.
|
787 |
|
|
*
|
788 |
|
|
* Case 4 involves the creation of 2 new areas, for each side of
|
789 |
|
|
* the hole. If possible, we reuse the existing area rather than
|
790 |
|
|
* allocate a new one, and the return indicates whether the old
|
791 |
|
|
* area was reused.
|
792 |
|
|
*/
|
793 |
|
|
static struct vm_area_struct * unmap_fixup(struct mm_struct *mm,
|
794 |
|
|
struct vm_area_struct *area, unsigned long addr, size_t len,
|
795 |
|
|
struct vm_area_struct *extra)
|
796 |
|
|
{
|
797 |
|
|
struct vm_area_struct *mpnt;
|
798 |
|
|
unsigned long end = addr + len;
|
799 |
|
|
|
800 |
|
|
area->vm_mm->total_vm -= len >> PAGE_SHIFT;
|
801 |
|
|
if (area->vm_flags & VM_LOCKED)
|
802 |
|
|
area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
|
803 |
|
|
|
804 |
|
|
/* Unmapping the whole area. */
|
805 |
|
|
if (addr == area->vm_start && end == area->vm_end) {
|
806 |
|
|
if (area->vm_ops && area->vm_ops->close)
|
807 |
|
|
area->vm_ops->close(area);
|
808 |
|
|
if (area->vm_file)
|
809 |
|
|
fput(area->vm_file);
|
810 |
|
|
kmem_cache_free(vm_area_cachep, area);
|
811 |
|
|
return extra;
|
812 |
|
|
}
|
813 |
|
|
|
814 |
|
|
/* Work out to one of the ends. */
|
815 |
|
|
if (end == area->vm_end) {
|
816 |
|
|
/*
|
817 |
|
|
* here area isn't visible to the semaphore-less readers
|
818 |
|
|
* so we don't need to update it under the spinlock.
|
819 |
|
|
*/
|
820 |
|
|
area->vm_end = addr;
|
821 |
|
|
lock_vma_mappings(area);
|
822 |
|
|
spin_lock(&mm->page_table_lock);
|
823 |
|
|
} else if (addr == area->vm_start) {
|
824 |
|
|
area->vm_pgoff += (end - area->vm_start) >> PAGE_SHIFT;
|
825 |
|
|
/* same locking considerations of the above case */
|
826 |
|
|
area->vm_start = end;
|
827 |
|
|
lock_vma_mappings(area);
|
828 |
|
|
spin_lock(&mm->page_table_lock);
|
829 |
|
|
} else {
|
830 |
|
|
/* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */
|
831 |
|
|
/* Add end mapping -- leave beginning for below */
|
832 |
|
|
mpnt = extra;
|
833 |
|
|
extra = NULL;
|
834 |
|
|
|
835 |
|
|
mpnt->vm_mm = area->vm_mm;
|
836 |
|
|
mpnt->vm_start = end;
|
837 |
|
|
mpnt->vm_end = area->vm_end;
|
838 |
|
|
mpnt->vm_page_prot = area->vm_page_prot;
|
839 |
|
|
mpnt->vm_flags = area->vm_flags;
|
840 |
|
|
mpnt->vm_raend = 0;
|
841 |
|
|
mpnt->vm_ops = area->vm_ops;
|
842 |
|
|
mpnt->vm_pgoff = area->vm_pgoff + ((end - area->vm_start) >> PAGE_SHIFT);
|
843 |
|
|
mpnt->vm_file = area->vm_file;
|
844 |
|
|
mpnt->vm_private_data = area->vm_private_data;
|
845 |
|
|
if (mpnt->vm_file)
|
846 |
|
|
get_file(mpnt->vm_file);
|
847 |
|
|
if (mpnt->vm_ops && mpnt->vm_ops->open)
|
848 |
|
|
mpnt->vm_ops->open(mpnt);
|
849 |
|
|
area->vm_end = addr; /* Truncate area */
|
850 |
|
|
|
851 |
|
|
/* Because mpnt->vm_file == area->vm_file this locks
|
852 |
|
|
* things correctly.
|
853 |
|
|
*/
|
854 |
|
|
lock_vma_mappings(area);
|
855 |
|
|
spin_lock(&mm->page_table_lock);
|
856 |
|
|
__insert_vm_struct(mm, mpnt);
|
857 |
|
|
}
|
858 |
|
|
|
859 |
|
|
__insert_vm_struct(mm, area);
|
860 |
|
|
spin_unlock(&mm->page_table_lock);
|
861 |
|
|
unlock_vma_mappings(area);
|
862 |
|
|
return extra;
|
863 |
|
|
}
|
864 |
|
|
|
865 |
|
|
/*
|
866 |
|
|
* Try to free as many page directory entries as we can,
|
867 |
|
|
* without having to work very hard at actually scanning
|
868 |
|
|
* the page tables themselves.
|
869 |
|
|
*
|
870 |
|
|
* Right now we try to free page tables if we have a nice
|
871 |
|
|
* PGDIR-aligned area that got free'd up. We could be more
|
872 |
|
|
* granular if we want to, but this is fast and simple,
|
873 |
|
|
* and covers the bad cases.
|
874 |
|
|
*
|
875 |
|
|
* "prev", if it exists, points to a vma before the one
|
876 |
|
|
* we just free'd - but there's no telling how much before.
|
877 |
|
|
*/
|
878 |
|
|
static void free_pgtables(struct mm_struct * mm, struct vm_area_struct *prev,
|
879 |
|
|
unsigned long start, unsigned long end)
|
880 |
|
|
{
|
881 |
|
|
unsigned long first = start & PGDIR_MASK;
|
882 |
|
|
unsigned long last = end + PGDIR_SIZE - 1;
|
883 |
|
|
unsigned long start_index, end_index;
|
884 |
|
|
|
885 |
|
|
if (!prev) {
|
886 |
|
|
prev = mm->mmap;
|
887 |
|
|
if (!prev)
|
888 |
|
|
goto no_mmaps;
|
889 |
|
|
if (prev->vm_end > start) {
|
890 |
|
|
if (last > prev->vm_start)
|
891 |
|
|
last = prev->vm_start;
|
892 |
|
|
goto no_mmaps;
|
893 |
|
|
}
|
894 |
|
|
}
|
895 |
|
|
for (;;) {
|
896 |
|
|
struct vm_area_struct *next = prev->vm_next;
|
897 |
|
|
|
898 |
|
|
if (next) {
|
899 |
|
|
if (next->vm_start < start) {
|
900 |
|
|
prev = next;
|
901 |
|
|
continue;
|
902 |
|
|
}
|
903 |
|
|
if (last > next->vm_start)
|
904 |
|
|
last = next->vm_start;
|
905 |
|
|
}
|
906 |
|
|
if (prev->vm_end > first)
|
907 |
|
|
first = prev->vm_end + PGDIR_SIZE - 1;
|
908 |
|
|
break;
|
909 |
|
|
}
|
910 |
|
|
no_mmaps:
|
911 |
|
|
if (last < first)
|
912 |
|
|
return;
|
913 |
|
|
/*
|
914 |
|
|
* If the PGD bits are not consecutive in the virtual address, the
|
915 |
|
|
* old method of shifting the VA >> by PGDIR_SHIFT doesn't work.
|
916 |
|
|
*/
|
917 |
|
|
start_index = pgd_index(first);
|
918 |
|
|
end_index = pgd_index(last);
|
919 |
|
|
if (end_index > start_index) {
|
920 |
|
|
clear_page_tables(mm, start_index, end_index - start_index);
|
921 |
|
|
flush_tlb_pgtables(mm, first & PGDIR_MASK, last & PGDIR_MASK);
|
922 |
|
|
}
|
923 |
|
|
}
|
924 |
|
|
|
925 |
|
|
/* Munmap is split into 2 main parts -- this part which finds
|
926 |
|
|
* what needs doing, and the areas themselves, which do the
|
927 |
|
|
* work. This now handles partial unmappings.
|
928 |
|
|
* Jeremy Fitzhardine <jeremy@sw.oz.au>
|
929 |
|
|
*/
|
930 |
|
|
int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
|
931 |
|
|
{
|
932 |
|
|
struct vm_area_struct *mpnt, *prev, **npp, *free, *extra;
|
933 |
|
|
|
934 |
|
|
if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
|
935 |
|
|
return -EINVAL;
|
936 |
|
|
|
937 |
|
|
if ((len = PAGE_ALIGN(len)) == 0)
|
938 |
|
|
return -EINVAL;
|
939 |
|
|
|
940 |
|
|
/* Check if this memory area is ok - put it on the temporary
|
941 |
|
|
* list if so.. The checks here are pretty simple --
|
942 |
|
|
* every area affected in some way (by any overlap) is put
|
943 |
|
|
* on the list. If nothing is put on, nothing is affected.
|
944 |
|
|
*/
|
945 |
|
|
mpnt = find_vma_prev(mm, addr, &prev);
|
946 |
|
|
if (!mpnt)
|
947 |
|
|
return 0;
|
948 |
|
|
/* we have addr < mpnt->vm_end */
|
949 |
|
|
|
950 |
|
|
if (mpnt->vm_start >= addr+len)
|
951 |
|
|
return 0;
|
952 |
|
|
|
953 |
|
|
/* If we'll make "hole", check the vm areas limit */
|
954 |
|
|
if ((mpnt->vm_start < addr && mpnt->vm_end > addr+len)
|
955 |
|
|
&& mm->map_count >= max_map_count)
|
956 |
|
|
return -ENOMEM;
|
957 |
|
|
|
958 |
|
|
/*
|
959 |
|
|
* We may need one additional vma to fix up the mappings ...
|
960 |
|
|
* and this is the last chance for an easy error exit.
|
961 |
|
|
*/
|
962 |
|
|
extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
|
963 |
|
|
if (!extra)
|
964 |
|
|
return -ENOMEM;
|
965 |
|
|
|
966 |
|
|
npp = (prev ? &prev->vm_next : &mm->mmap);
|
967 |
|
|
free = NULL;
|
968 |
|
|
spin_lock(&mm->page_table_lock);
|
969 |
|
|
for ( ; mpnt && mpnt->vm_start < addr+len; mpnt = *npp) {
|
970 |
|
|
*npp = mpnt->vm_next;
|
971 |
|
|
mpnt->vm_next = free;
|
972 |
|
|
free = mpnt;
|
973 |
|
|
rb_erase(&mpnt->vm_rb, &mm->mm_rb);
|
974 |
|
|
}
|
975 |
|
|
mm->mmap_cache = NULL; /* Kill the cache. */
|
976 |
|
|
spin_unlock(&mm->page_table_lock);
|
977 |
|
|
|
978 |
|
|
/* Ok - we have the memory areas we should free on the 'free' list,
|
979 |
|
|
* so release them, and unmap the page range..
|
980 |
|
|
* If the one of the segments is only being partially unmapped,
|
981 |
|
|
* it will put new vm_area_struct(s) into the address space.
|
982 |
|
|
* In that case we have to be careful with VM_DENYWRITE.
|
983 |
|
|
*/
|
984 |
|
|
while ((mpnt = free) != NULL) {
|
985 |
|
|
unsigned long st, end, size;
|
986 |
|
|
struct file *file = NULL;
|
987 |
|
|
|
988 |
|
|
free = free->vm_next;
|
989 |
|
|
|
990 |
|
|
st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
|
991 |
|
|
end = addr+len;
|
992 |
|
|
end = end > mpnt->vm_end ? mpnt->vm_end : end;
|
993 |
|
|
size = end - st;
|
994 |
|
|
|
995 |
|
|
if (mpnt->vm_flags & VM_DENYWRITE &&
|
996 |
|
|
(st != mpnt->vm_start || end != mpnt->vm_end) &&
|
997 |
|
|
(file = mpnt->vm_file) != NULL) {
|
998 |
|
|
atomic_dec(&file->f_dentry->d_inode->i_writecount);
|
999 |
|
|
}
|
1000 |
|
|
remove_shared_vm_struct(mpnt);
|
1001 |
|
|
mm->map_count--;
|
1002 |
|
|
|
1003 |
|
|
zap_page_range(mm, st, size);
|
1004 |
|
|
|
1005 |
|
|
/*
|
1006 |
|
|
* Fix the mapping, and free the old area if it wasn't reused.
|
1007 |
|
|
*/
|
1008 |
|
|
extra = unmap_fixup(mm, mpnt, st, size, extra);
|
1009 |
|
|
if (file)
|
1010 |
|
|
atomic_inc(&file->f_dentry->d_inode->i_writecount);
|
1011 |
|
|
}
|
1012 |
|
|
validate_mm(mm);
|
1013 |
|
|
|
1014 |
|
|
/* Release the extra vma struct if it wasn't used */
|
1015 |
|
|
if (extra)
|
1016 |
|
|
kmem_cache_free(vm_area_cachep, extra);
|
1017 |
|
|
|
1018 |
|
|
free_pgtables(mm, prev, addr, addr+len);
|
1019 |
|
|
|
1020 |
|
|
return 0;
|
1021 |
|
|
}
|
1022 |
|
|
|
1023 |
|
|
asmlinkage long sys_munmap(unsigned long addr, size_t len)
|
1024 |
|
|
{
|
1025 |
|
|
int ret;
|
1026 |
|
|
struct mm_struct *mm = current->mm;
|
1027 |
|
|
|
1028 |
|
|
down_write(&mm->mmap_sem);
|
1029 |
|
|
ret = do_munmap(mm, addr, len);
|
1030 |
|
|
up_write(&mm->mmap_sem);
|
1031 |
|
|
return ret;
|
1032 |
|
|
}
|
1033 |
|
|
|
1034 |
|
|
/*
|
1035 |
|
|
* this is really a simplified "do_mmap". it only handles
|
1036 |
|
|
* anonymous maps. eventually we may be able to do some
|
1037 |
|
|
* brk-specific accounting here.
|
1038 |
|
|
*/
|
1039 |
|
|
unsigned long do_brk(unsigned long addr, unsigned long len)
|
1040 |
|
|
{
|
1041 |
|
|
struct mm_struct * mm = current->mm;
|
1042 |
|
|
struct vm_area_struct * vma, * prev;
|
1043 |
|
|
unsigned long flags;
|
1044 |
|
|
rb_node_t ** rb_link, * rb_parent;
|
1045 |
|
|
|
1046 |
|
|
len = PAGE_ALIGN(len);
|
1047 |
|
|
if (!len)
|
1048 |
|
|
return addr;
|
1049 |
|
|
|
1050 |
|
|
if ((addr + len) > TASK_SIZE || (addr + len) < addr)
|
1051 |
|
|
return -EINVAL;
|
1052 |
|
|
|
1053 |
|
|
/*
|
1054 |
|
|
* mlock MCL_FUTURE?
|
1055 |
|
|
*/
|
1056 |
|
|
if (mm->def_flags & VM_LOCKED) {
|
1057 |
|
|
unsigned long locked = mm->locked_vm << PAGE_SHIFT;
|
1058 |
|
|
locked += len;
|
1059 |
|
|
if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
|
1060 |
|
|
return -EAGAIN;
|
1061 |
|
|
}
|
1062 |
|
|
|
1063 |
|
|
/*
|
1064 |
|
|
* Clear old maps. this also does some error checking for us
|
1065 |
|
|
*/
|
1066 |
|
|
munmap_back:
|
1067 |
|
|
vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
|
1068 |
|
|
if (vma && vma->vm_start < addr + len) {
|
1069 |
|
|
if (do_munmap(mm, addr, len))
|
1070 |
|
|
return -ENOMEM;
|
1071 |
|
|
goto munmap_back;
|
1072 |
|
|
}
|
1073 |
|
|
|
1074 |
|
|
/* Check against address space limits *after* clearing old maps... */
|
1075 |
|
|
if ((mm->total_vm << PAGE_SHIFT) + len
|
1076 |
|
|
> current->rlim[RLIMIT_AS].rlim_cur)
|
1077 |
|
|
return -ENOMEM;
|
1078 |
|
|
|
1079 |
|
|
if (mm->map_count > max_map_count)
|
1080 |
|
|
return -ENOMEM;
|
1081 |
|
|
|
1082 |
|
|
if (!vm_enough_memory(len >> PAGE_SHIFT))
|
1083 |
|
|
return -ENOMEM;
|
1084 |
|
|
|
1085 |
|
|
flags = VM_DATA_DEFAULT_FLAGS | mm->def_flags;
|
1086 |
|
|
|
1087 |
|
|
/* Can we just expand an old anonymous mapping? */
|
1088 |
|
|
if (rb_parent && vma_merge(mm, prev, rb_parent, addr, addr + len, flags))
|
1089 |
|
|
goto out;
|
1090 |
|
|
|
1091 |
|
|
/*
|
1092 |
|
|
* create a vma struct for an anonymous mapping
|
1093 |
|
|
*/
|
1094 |
|
|
vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
|
1095 |
|
|
if (!vma)
|
1096 |
|
|
return -ENOMEM;
|
1097 |
|
|
|
1098 |
|
|
vma->vm_mm = mm;
|
1099 |
|
|
vma->vm_start = addr;
|
1100 |
|
|
vma->vm_end = addr + len;
|
1101 |
|
|
vma->vm_flags = flags;
|
1102 |
|
|
vma->vm_page_prot = protection_map[flags & 0x0f];
|
1103 |
|
|
vma->vm_ops = NULL;
|
1104 |
|
|
vma->vm_pgoff = 0;
|
1105 |
|
|
vma->vm_file = NULL;
|
1106 |
|
|
vma->vm_private_data = NULL;
|
1107 |
|
|
|
1108 |
|
|
vma_link(mm, vma, prev, rb_link, rb_parent);
|
1109 |
|
|
|
1110 |
|
|
out:
|
1111 |
|
|
mm->total_vm += len >> PAGE_SHIFT;
|
1112 |
|
|
if (flags & VM_LOCKED) {
|
1113 |
|
|
mm->locked_vm += len >> PAGE_SHIFT;
|
1114 |
|
|
make_pages_present(addr, addr + len);
|
1115 |
|
|
}
|
1116 |
|
|
return addr;
|
1117 |
|
|
}
|
1118 |
|
|
|
1119 |
|
|
/* Build the RB tree corresponding to the VMA list. */
|
1120 |
|
|
void build_mmap_rb(struct mm_struct * mm)
|
1121 |
|
|
{
|
1122 |
|
|
struct vm_area_struct * vma;
|
1123 |
|
|
rb_node_t ** rb_link, * rb_parent;
|
1124 |
|
|
|
1125 |
|
|
mm->mm_rb = RB_ROOT;
|
1126 |
|
|
rb_link = &mm->mm_rb.rb_node;
|
1127 |
|
|
rb_parent = NULL;
|
1128 |
|
|
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
1129 |
|
|
__vma_link_rb(mm, vma, rb_link, rb_parent);
|
1130 |
|
|
rb_parent = &vma->vm_rb;
|
1131 |
|
|
rb_link = &rb_parent->rb_right;
|
1132 |
|
|
}
|
1133 |
|
|
}
|
1134 |
|
|
|
1135 |
|
|
/* Release all mmaps. */
|
1136 |
|
|
void exit_mmap(struct mm_struct * mm)
|
1137 |
|
|
{
|
1138 |
|
|
struct vm_area_struct * mpnt;
|
1139 |
|
|
|
1140 |
|
|
release_segments(mm);
|
1141 |
|
|
spin_lock(&mm->page_table_lock);
|
1142 |
|
|
mpnt = mm->mmap;
|
1143 |
|
|
mm->mmap = mm->mmap_cache = NULL;
|
1144 |
|
|
mm->mm_rb = RB_ROOT;
|
1145 |
|
|
mm->rss = 0;
|
1146 |
|
|
spin_unlock(&mm->page_table_lock);
|
1147 |
|
|
mm->total_vm = 0;
|
1148 |
|
|
mm->locked_vm = 0;
|
1149 |
|
|
|
1150 |
|
|
flush_cache_mm(mm);
|
1151 |
|
|
while (mpnt) {
|
1152 |
|
|
struct vm_area_struct * next = mpnt->vm_next;
|
1153 |
|
|
unsigned long start = mpnt->vm_start;
|
1154 |
|
|
unsigned long end = mpnt->vm_end;
|
1155 |
|
|
unsigned long size = end - start;
|
1156 |
|
|
|
1157 |
|
|
if (mpnt->vm_ops) {
|
1158 |
|
|
if (mpnt->vm_ops->close)
|
1159 |
|
|
mpnt->vm_ops->close(mpnt);
|
1160 |
|
|
}
|
1161 |
|
|
mm->map_count--;
|
1162 |
|
|
remove_shared_vm_struct(mpnt);
|
1163 |
|
|
zap_page_range(mm, start, size);
|
1164 |
|
|
if (mpnt->vm_file)
|
1165 |
|
|
fput(mpnt->vm_file);
|
1166 |
|
|
kmem_cache_free(vm_area_cachep, mpnt);
|
1167 |
|
|
mpnt = next;
|
1168 |
|
|
}
|
1169 |
|
|
|
1170 |
|
|
/* This is just debugging */
|
1171 |
|
|
if (mm->map_count)
|
1172 |
|
|
BUG();
|
1173 |
|
|
|
1174 |
|
|
clear_page_tables(mm, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
|
1175 |
|
|
|
1176 |
|
|
flush_tlb_mm(mm);
|
1177 |
|
|
}
|
1178 |
|
|
|
1179 |
|
|
/* Insert vm structure into process list sorted by address
|
1180 |
|
|
* and into the inode's i_mmap ring. If vm_file is non-NULL
|
1181 |
|
|
* then the i_shared_lock must be held here.
|
1182 |
|
|
*/
|
1183 |
|
|
void __insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
|
1184 |
|
|
{
|
1185 |
|
|
struct vm_area_struct * __vma, * prev;
|
1186 |
|
|
rb_node_t ** rb_link, * rb_parent;
|
1187 |
|
|
|
1188 |
|
|
__vma = find_vma_prepare(mm, vma->vm_start, &prev, &rb_link, &rb_parent);
|
1189 |
|
|
if (__vma && __vma->vm_start < vma->vm_end)
|
1190 |
|
|
BUG();
|
1191 |
|
|
__vma_link(mm, vma, prev, rb_link, rb_parent);
|
1192 |
|
|
mm->map_count++;
|
1193 |
|
|
validate_mm(mm);
|
1194 |
|
|
}
|
1195 |
|
|
|
1196 |
|
|
void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
|
1197 |
|
|
{
|
1198 |
|
|
struct vm_area_struct * __vma, * prev;
|
1199 |
|
|
rb_node_t ** rb_link, * rb_parent;
|
1200 |
|
|
|
1201 |
|
|
__vma = find_vma_prepare(mm, vma->vm_start, &prev, &rb_link, &rb_parent);
|
1202 |
|
|
if (__vma && __vma->vm_start < vma->vm_end)
|
1203 |
|
|
BUG();
|
1204 |
|
|
vma_link(mm, vma, prev, rb_link, rb_parent);
|
1205 |
|
|
validate_mm(mm);
|
1206 |
|
|
}
|