1 |
62 |
marcus.erl |
/*
|
2 |
|
|
* mm/mmap.c
|
3 |
|
|
*
|
4 |
|
|
* Written by obz.
|
5 |
|
|
*
|
6 |
|
|
* Address space accounting code <alan@redhat.com>
|
7 |
|
|
*/
|
8 |
|
|
|
9 |
|
|
#include <linux/slab.h>
|
10 |
|
|
#include <linux/backing-dev.h>
|
11 |
|
|
#include <linux/mm.h>
|
12 |
|
|
#include <linux/shm.h>
|
13 |
|
|
#include <linux/mman.h>
|
14 |
|
|
#include <linux/pagemap.h>
|
15 |
|
|
#include <linux/swap.h>
|
16 |
|
|
#include <linux/syscalls.h>
|
17 |
|
|
#include <linux/capability.h>
|
18 |
|
|
#include <linux/init.h>
|
19 |
|
|
#include <linux/file.h>
|
20 |
|
|
#include <linux/fs.h>
|
21 |
|
|
#include <linux/personality.h>
|
22 |
|
|
#include <linux/security.h>
|
23 |
|
|
#include <linux/hugetlb.h>
|
24 |
|
|
#include <linux/profile.h>
|
25 |
|
|
#include <linux/module.h>
|
26 |
|
|
#include <linux/mount.h>
|
27 |
|
|
#include <linux/mempolicy.h>
|
28 |
|
|
#include <linux/rmap.h>
|
29 |
|
|
|
30 |
|
|
#include <asm/uaccess.h>
|
31 |
|
|
#include <asm/cacheflush.h>
|
32 |
|
|
#include <asm/tlb.h>
|
33 |
|
|
#include <asm/mmu_context.h>
|
34 |
|
|
|
35 |
|
|
#ifndef arch_mmap_check
|
36 |
|
|
#define arch_mmap_check(addr, len, flags) (0)
|
37 |
|
|
#endif
|
38 |
|
|
|
39 |
|
|
static void unmap_region(struct mm_struct *mm,
|
40 |
|
|
struct vm_area_struct *vma, struct vm_area_struct *prev,
|
41 |
|
|
unsigned long start, unsigned long end);
|
42 |
|
|
|
43 |
|
|
/*
|
44 |
|
|
* WARNING: the debugging will use recursive algorithms so never enable this
|
45 |
|
|
* unless you know what you are doing.
|
46 |
|
|
*/
|
47 |
|
|
#undef DEBUG_MM_RB
|
48 |
|
|
|
49 |
|
|
/* description of effects of mapping type and prot in current implementation.
|
50 |
|
|
* this is due to the limited x86 page protection hardware. The expected
|
51 |
|
|
* behavior is in parens:
|
52 |
|
|
*
|
53 |
|
|
* map_type prot
|
54 |
|
|
* PROT_NONE PROT_READ PROT_WRITE PROT_EXEC
|
55 |
|
|
* MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes
|
56 |
|
|
* w: (no) no w: (no) no w: (yes) yes w: (no) no
|
57 |
|
|
* x: (no) no x: (no) yes x: (no) yes x: (yes) yes
|
58 |
|
|
*
|
59 |
|
|
* MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes
|
60 |
|
|
* w: (no) no w: (no) no w: (copy) copy w: (no) no
|
61 |
|
|
* x: (no) no x: (no) yes x: (no) yes x: (yes) yes
|
62 |
|
|
*
|
63 |
|
|
*/
|
64 |
|
|
pgprot_t protection_map[16] = {
|
65 |
|
|
__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
|
66 |
|
|
__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
|
67 |
|
|
};
|
68 |
|
|
|
69 |
|
|
pgprot_t vm_get_page_prot(unsigned long vm_flags)
|
70 |
|
|
{
|
71 |
|
|
return protection_map[vm_flags &
|
72 |
|
|
(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
|
73 |
|
|
}
|
74 |
|
|
EXPORT_SYMBOL(vm_get_page_prot);
|
75 |
|
|
|
76 |
|
|
int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
|
77 |
|
|
int sysctl_overcommit_ratio = 50; /* default is 50% */
|
78 |
|
|
int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
|
79 |
|
|
atomic_t vm_committed_space = ATOMIC_INIT(0);
|
80 |
|
|
|
81 |
|
|
/*
|
82 |
|
|
* Check that a process has enough memory to allocate a new virtual
|
83 |
|
|
* mapping. 0 means there is enough memory for the allocation to
|
84 |
|
|
* succeed and -ENOMEM implies there is not.
|
85 |
|
|
*
|
86 |
|
|
* We currently support three overcommit policies, which are set via the
|
87 |
|
|
* vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting
|
88 |
|
|
*
|
89 |
|
|
* Strict overcommit modes added 2002 Feb 26 by Alan Cox.
|
90 |
|
|
* Additional code 2002 Jul 20 by Robert Love.
|
91 |
|
|
*
|
92 |
|
|
* cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
|
93 |
|
|
*
|
94 |
|
|
* Note this is a helper function intended to be used by LSMs which
|
95 |
|
|
* wish to use this logic.
|
96 |
|
|
*/
|
97 |
|
|
int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
|
98 |
|
|
{
|
99 |
|
|
unsigned long free, allowed;
|
100 |
|
|
|
101 |
|
|
vm_acct_memory(pages);
|
102 |
|
|
|
103 |
|
|
/*
|
104 |
|
|
* Sometimes we want to use more memory than we have
|
105 |
|
|
*/
|
106 |
|
|
if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
|
107 |
|
|
return 0;
|
108 |
|
|
|
109 |
|
|
if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
|
110 |
|
|
unsigned long n;
|
111 |
|
|
|
112 |
|
|
free = global_page_state(NR_FILE_PAGES);
|
113 |
|
|
free += nr_swap_pages;
|
114 |
|
|
|
115 |
|
|
/*
|
116 |
|
|
* Any slabs which are created with the
|
117 |
|
|
* SLAB_RECLAIM_ACCOUNT flag claim to have contents
|
118 |
|
|
* which are reclaimable, under pressure. The dentry
|
119 |
|
|
* cache and most inode caches should fall into this
|
120 |
|
|
*/
|
121 |
|
|
free += global_page_state(NR_SLAB_RECLAIMABLE);
|
122 |
|
|
|
123 |
|
|
/*
|
124 |
|
|
* Leave the last 3% for root
|
125 |
|
|
*/
|
126 |
|
|
if (!cap_sys_admin)
|
127 |
|
|
free -= free / 32;
|
128 |
|
|
|
129 |
|
|
if (free > pages)
|
130 |
|
|
return 0;
|
131 |
|
|
|
132 |
|
|
/*
|
133 |
|
|
* nr_free_pages() is very expensive on large systems,
|
134 |
|
|
* only call if we're about to fail.
|
135 |
|
|
*/
|
136 |
|
|
n = nr_free_pages();
|
137 |
|
|
|
138 |
|
|
/*
|
139 |
|
|
* Leave reserved pages. The pages are not for anonymous pages.
|
140 |
|
|
*/
|
141 |
|
|
if (n <= totalreserve_pages)
|
142 |
|
|
goto error;
|
143 |
|
|
else
|
144 |
|
|
n -= totalreserve_pages;
|
145 |
|
|
|
146 |
|
|
/*
|
147 |
|
|
* Leave the last 3% for root
|
148 |
|
|
*/
|
149 |
|
|
if (!cap_sys_admin)
|
150 |
|
|
n -= n / 32;
|
151 |
|
|
free += n;
|
152 |
|
|
|
153 |
|
|
if (free > pages)
|
154 |
|
|
return 0;
|
155 |
|
|
|
156 |
|
|
goto error;
|
157 |
|
|
}
|
158 |
|
|
|
159 |
|
|
allowed = (totalram_pages - hugetlb_total_pages())
|
160 |
|
|
* sysctl_overcommit_ratio / 100;
|
161 |
|
|
/*
|
162 |
|
|
* Leave the last 3% for root
|
163 |
|
|
*/
|
164 |
|
|
if (!cap_sys_admin)
|
165 |
|
|
allowed -= allowed / 32;
|
166 |
|
|
allowed += total_swap_pages;
|
167 |
|
|
|
168 |
|
|
/* Don't let a single process grow too big:
|
169 |
|
|
leave 3% of the size of this process for other processes */
|
170 |
|
|
allowed -= mm->total_vm / 32;
|
171 |
|
|
|
172 |
|
|
/*
|
173 |
|
|
* cast `allowed' as a signed long because vm_committed_space
|
174 |
|
|
* sometimes has a negative value
|
175 |
|
|
*/
|
176 |
|
|
if (atomic_read(&vm_committed_space) < (long)allowed)
|
177 |
|
|
return 0;
|
178 |
|
|
error:
|
179 |
|
|
vm_unacct_memory(pages);
|
180 |
|
|
|
181 |
|
|
return -ENOMEM;
|
182 |
|
|
}
|
183 |
|
|
|
184 |
|
|
/*
|
185 |
|
|
* Requires inode->i_mapping->i_mmap_lock
|
186 |
|
|
*/
|
187 |
|
|
static void __remove_shared_vm_struct(struct vm_area_struct *vma,
|
188 |
|
|
struct file *file, struct address_space *mapping)
|
189 |
|
|
{
|
190 |
|
|
if (vma->vm_flags & VM_DENYWRITE)
|
191 |
|
|
atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
|
192 |
|
|
if (vma->vm_flags & VM_SHARED)
|
193 |
|
|
mapping->i_mmap_writable--;
|
194 |
|
|
|
195 |
|
|
flush_dcache_mmap_lock(mapping);
|
196 |
|
|
if (unlikely(vma->vm_flags & VM_NONLINEAR))
|
197 |
|
|
list_del_init(&vma->shared.vm_set.list);
|
198 |
|
|
else
|
199 |
|
|
vma_prio_tree_remove(vma, &mapping->i_mmap);
|
200 |
|
|
flush_dcache_mmap_unlock(mapping);
|
201 |
|
|
}
|
202 |
|
|
|
203 |
|
|
/*
|
204 |
|
|
* Unlink a file-based vm structure from its prio_tree, to hide
|
205 |
|
|
* vma from rmap and vmtruncate before freeing its page tables.
|
206 |
|
|
*/
|
207 |
|
|
void unlink_file_vma(struct vm_area_struct *vma)
|
208 |
|
|
{
|
209 |
|
|
struct file *file = vma->vm_file;
|
210 |
|
|
|
211 |
|
|
if (file) {
|
212 |
|
|
struct address_space *mapping = file->f_mapping;
|
213 |
|
|
spin_lock(&mapping->i_mmap_lock);
|
214 |
|
|
__remove_shared_vm_struct(vma, file, mapping);
|
215 |
|
|
spin_unlock(&mapping->i_mmap_lock);
|
216 |
|
|
}
|
217 |
|
|
}
|
218 |
|
|
|
219 |
|
|
/*
|
220 |
|
|
* Close a vm structure and free it, returning the next.
|
221 |
|
|
*/
|
222 |
|
|
static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
|
223 |
|
|
{
|
224 |
|
|
struct vm_area_struct *next = vma->vm_next;
|
225 |
|
|
|
226 |
|
|
might_sleep();
|
227 |
|
|
if (vma->vm_ops && vma->vm_ops->close)
|
228 |
|
|
vma->vm_ops->close(vma);
|
229 |
|
|
if (vma->vm_file)
|
230 |
|
|
fput(vma->vm_file);
|
231 |
|
|
mpol_free(vma_policy(vma));
|
232 |
|
|
kmem_cache_free(vm_area_cachep, vma);
|
233 |
|
|
return next;
|
234 |
|
|
}
|
235 |
|
|
|
236 |
|
|
asmlinkage unsigned long sys_brk(unsigned long brk)
|
237 |
|
|
{
|
238 |
|
|
unsigned long rlim, retval;
|
239 |
|
|
unsigned long newbrk, oldbrk;
|
240 |
|
|
struct mm_struct *mm = current->mm;
|
241 |
|
|
|
242 |
|
|
down_write(&mm->mmap_sem);
|
243 |
|
|
|
244 |
|
|
if (brk < mm->end_code)
|
245 |
|
|
goto out;
|
246 |
|
|
|
247 |
|
|
/*
|
248 |
|
|
* Check against rlimit here. If this check is done later after the test
|
249 |
|
|
* of oldbrk with newbrk then it can escape the test and let the data
|
250 |
|
|
* segment grow beyond its set limit the in case where the limit is
|
251 |
|
|
* not page aligned -Ram Gupta
|
252 |
|
|
*/
|
253 |
|
|
rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
|
254 |
|
|
if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
|
255 |
|
|
goto out;
|
256 |
|
|
|
257 |
|
|
newbrk = PAGE_ALIGN(brk);
|
258 |
|
|
oldbrk = PAGE_ALIGN(mm->brk);
|
259 |
|
|
if (oldbrk == newbrk)
|
260 |
|
|
goto set_brk;
|
261 |
|
|
|
262 |
|
|
/* Always allow shrinking brk. */
|
263 |
|
|
if (brk <= mm->brk) {
|
264 |
|
|
if (!do_munmap(mm, newbrk, oldbrk-newbrk))
|
265 |
|
|
goto set_brk;
|
266 |
|
|
goto out;
|
267 |
|
|
}
|
268 |
|
|
|
269 |
|
|
/* Check against existing mmap mappings. */
|
270 |
|
|
if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
|
271 |
|
|
goto out;
|
272 |
|
|
|
273 |
|
|
/* Ok, looks good - let it rip. */
|
274 |
|
|
if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
|
275 |
|
|
goto out;
|
276 |
|
|
set_brk:
|
277 |
|
|
mm->brk = brk;
|
278 |
|
|
out:
|
279 |
|
|
retval = mm->brk;
|
280 |
|
|
up_write(&mm->mmap_sem);
|
281 |
|
|
return retval;
|
282 |
|
|
}
|
283 |
|
|
|
284 |
|
|
#ifdef DEBUG_MM_RB
|
285 |
|
|
static int browse_rb(struct rb_root *root)
|
286 |
|
|
{
|
287 |
|
|
int i = 0, j;
|
288 |
|
|
struct rb_node *nd, *pn = NULL;
|
289 |
|
|
unsigned long prev = 0, pend = 0;
|
290 |
|
|
|
291 |
|
|
for (nd = rb_first(root); nd; nd = rb_next(nd)) {
|
292 |
|
|
struct vm_area_struct *vma;
|
293 |
|
|
vma = rb_entry(nd, struct vm_area_struct, vm_rb);
|
294 |
|
|
if (vma->vm_start < prev)
|
295 |
|
|
printk("vm_start %lx prev %lx\n", vma->vm_start, prev), i = -1;
|
296 |
|
|
if (vma->vm_start < pend)
|
297 |
|
|
printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
|
298 |
|
|
if (vma->vm_start > vma->vm_end)
|
299 |
|
|
printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start);
|
300 |
|
|
i++;
|
301 |
|
|
pn = nd;
|
302 |
|
|
prev = vma->vm_start;
|
303 |
|
|
pend = vma->vm_end;
|
304 |
|
|
}
|
305 |
|
|
j = 0;
|
306 |
|
|
for (nd = pn; nd; nd = rb_prev(nd)) {
|
307 |
|
|
j++;
|
308 |
|
|
}
|
309 |
|
|
if (i != j)
|
310 |
|
|
printk("backwards %d, forwards %d\n", j, i), i = 0;
|
311 |
|
|
return i;
|
312 |
|
|
}
|
313 |
|
|
|
314 |
|
|
void validate_mm(struct mm_struct *mm)
|
315 |
|
|
{
|
316 |
|
|
int bug = 0;
|
317 |
|
|
int i = 0;
|
318 |
|
|
struct vm_area_struct *tmp = mm->mmap;
|
319 |
|
|
while (tmp) {
|
320 |
|
|
tmp = tmp->vm_next;
|
321 |
|
|
i++;
|
322 |
|
|
}
|
323 |
|
|
if (i != mm->map_count)
|
324 |
|
|
printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
|
325 |
|
|
i = browse_rb(&mm->mm_rb);
|
326 |
|
|
if (i != mm->map_count)
|
327 |
|
|
printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
|
328 |
|
|
BUG_ON(bug);
|
329 |
|
|
}
|
330 |
|
|
#else
|
331 |
|
|
#define validate_mm(mm) do { } while (0)
|
332 |
|
|
#endif
|
333 |
|
|
|
334 |
|
|
static struct vm_area_struct *
|
335 |
|
|
find_vma_prepare(struct mm_struct *mm, unsigned long addr,
|
336 |
|
|
struct vm_area_struct **pprev, struct rb_node ***rb_link,
|
337 |
|
|
struct rb_node ** rb_parent)
|
338 |
|
|
{
|
339 |
|
|
struct vm_area_struct * vma;
|
340 |
|
|
struct rb_node ** __rb_link, * __rb_parent, * rb_prev;
|
341 |
|
|
|
342 |
|
|
__rb_link = &mm->mm_rb.rb_node;
|
343 |
|
|
rb_prev = __rb_parent = NULL;
|
344 |
|
|
vma = NULL;
|
345 |
|
|
|
346 |
|
|
while (*__rb_link) {
|
347 |
|
|
struct vm_area_struct *vma_tmp;
|
348 |
|
|
|
349 |
|
|
__rb_parent = *__rb_link;
|
350 |
|
|
vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
|
351 |
|
|
|
352 |
|
|
if (vma_tmp->vm_end > addr) {
|
353 |
|
|
vma = vma_tmp;
|
354 |
|
|
if (vma_tmp->vm_start <= addr)
|
355 |
|
|
return vma;
|
356 |
|
|
__rb_link = &__rb_parent->rb_left;
|
357 |
|
|
} else {
|
358 |
|
|
rb_prev = __rb_parent;
|
359 |
|
|
__rb_link = &__rb_parent->rb_right;
|
360 |
|
|
}
|
361 |
|
|
}
|
362 |
|
|
|
363 |
|
|
*pprev = NULL;
|
364 |
|
|
if (rb_prev)
|
365 |
|
|
*pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
|
366 |
|
|
*rb_link = __rb_link;
|
367 |
|
|
*rb_parent = __rb_parent;
|
368 |
|
|
return vma;
|
369 |
|
|
}
|
370 |
|
|
|
371 |
|
|
static inline void
|
372 |
|
|
__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
|
373 |
|
|
struct vm_area_struct *prev, struct rb_node *rb_parent)
|
374 |
|
|
{
|
375 |
|
|
if (prev) {
|
376 |
|
|
vma->vm_next = prev->vm_next;
|
377 |
|
|
prev->vm_next = vma;
|
378 |
|
|
} else {
|
379 |
|
|
mm->mmap = vma;
|
380 |
|
|
if (rb_parent)
|
381 |
|
|
vma->vm_next = rb_entry(rb_parent,
|
382 |
|
|
struct vm_area_struct, vm_rb);
|
383 |
|
|
else
|
384 |
|
|
vma->vm_next = NULL;
|
385 |
|
|
}
|
386 |
|
|
}
|
387 |
|
|
|
388 |
|
|
void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
|
389 |
|
|
struct rb_node **rb_link, struct rb_node *rb_parent)
|
390 |
|
|
{
|
391 |
|
|
rb_link_node(&vma->vm_rb, rb_parent, rb_link);
|
392 |
|
|
rb_insert_color(&vma->vm_rb, &mm->mm_rb);
|
393 |
|
|
}
|
394 |
|
|
|
395 |
|
|
static inline void __vma_link_file(struct vm_area_struct *vma)
|
396 |
|
|
{
|
397 |
|
|
struct file * file;
|
398 |
|
|
|
399 |
|
|
file = vma->vm_file;
|
400 |
|
|
if (file) {
|
401 |
|
|
struct address_space *mapping = file->f_mapping;
|
402 |
|
|
|
403 |
|
|
if (vma->vm_flags & VM_DENYWRITE)
|
404 |
|
|
atomic_dec(&file->f_path.dentry->d_inode->i_writecount);
|
405 |
|
|
if (vma->vm_flags & VM_SHARED)
|
406 |
|
|
mapping->i_mmap_writable++;
|
407 |
|
|
|
408 |
|
|
flush_dcache_mmap_lock(mapping);
|
409 |
|
|
if (unlikely(vma->vm_flags & VM_NONLINEAR))
|
410 |
|
|
vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
|
411 |
|
|
else
|
412 |
|
|
vma_prio_tree_insert(vma, &mapping->i_mmap);
|
413 |
|
|
flush_dcache_mmap_unlock(mapping);
|
414 |
|
|
}
|
415 |
|
|
}
|
416 |
|
|
|
417 |
|
|
static void
|
418 |
|
|
__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
|
419 |
|
|
struct vm_area_struct *prev, struct rb_node **rb_link,
|
420 |
|
|
struct rb_node *rb_parent)
|
421 |
|
|
{
|
422 |
|
|
__vma_link_list(mm, vma, prev, rb_parent);
|
423 |
|
|
__vma_link_rb(mm, vma, rb_link, rb_parent);
|
424 |
|
|
__anon_vma_link(vma);
|
425 |
|
|
}
|
426 |
|
|
|
427 |
|
|
static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
|
428 |
|
|
struct vm_area_struct *prev, struct rb_node **rb_link,
|
429 |
|
|
struct rb_node *rb_parent)
|
430 |
|
|
{
|
431 |
|
|
struct address_space *mapping = NULL;
|
432 |
|
|
|
433 |
|
|
if (vma->vm_file)
|
434 |
|
|
mapping = vma->vm_file->f_mapping;
|
435 |
|
|
|
436 |
|
|
if (mapping) {
|
437 |
|
|
spin_lock(&mapping->i_mmap_lock);
|
438 |
|
|
vma->vm_truncate_count = mapping->truncate_count;
|
439 |
|
|
}
|
440 |
|
|
anon_vma_lock(vma);
|
441 |
|
|
|
442 |
|
|
__vma_link(mm, vma, prev, rb_link, rb_parent);
|
443 |
|
|
__vma_link_file(vma);
|
444 |
|
|
|
445 |
|
|
anon_vma_unlock(vma);
|
446 |
|
|
if (mapping)
|
447 |
|
|
spin_unlock(&mapping->i_mmap_lock);
|
448 |
|
|
|
449 |
|
|
mm->map_count++;
|
450 |
|
|
validate_mm(mm);
|
451 |
|
|
}
|
452 |
|
|
|
453 |
|
|
/*
|
454 |
|
|
* Helper for vma_adjust in the split_vma insert case:
|
455 |
|
|
* insert vm structure into list and rbtree and anon_vma,
|
456 |
|
|
* but it has already been inserted into prio_tree earlier.
|
457 |
|
|
*/
|
458 |
|
|
static void
|
459 |
|
|
__insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
|
460 |
|
|
{
|
461 |
|
|
struct vm_area_struct * __vma, * prev;
|
462 |
|
|
struct rb_node ** rb_link, * rb_parent;
|
463 |
|
|
|
464 |
|
|
__vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
|
465 |
|
|
BUG_ON(__vma && __vma->vm_start < vma->vm_end);
|
466 |
|
|
__vma_link(mm, vma, prev, rb_link, rb_parent);
|
467 |
|
|
mm->map_count++;
|
468 |
|
|
}
|
469 |
|
|
|
470 |
|
|
static inline void
|
471 |
|
|
__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
|
472 |
|
|
struct vm_area_struct *prev)
|
473 |
|
|
{
|
474 |
|
|
prev->vm_next = vma->vm_next;
|
475 |
|
|
rb_erase(&vma->vm_rb, &mm->mm_rb);
|
476 |
|
|
if (mm->mmap_cache == vma)
|
477 |
|
|
mm->mmap_cache = prev;
|
478 |
|
|
}
|
479 |
|
|
|
480 |
|
|
/*
|
481 |
|
|
* We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that
|
482 |
|
|
* is already present in an i_mmap tree without adjusting the tree.
|
483 |
|
|
* The following helper function should be used when such adjustments
|
484 |
|
|
* are necessary. The "insert" vma (if any) is to be inserted
|
485 |
|
|
* before we drop the necessary locks.
|
486 |
|
|
*/
|
487 |
|
|
void vma_adjust(struct vm_area_struct *vma, unsigned long start,
|
488 |
|
|
unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
|
489 |
|
|
{
|
490 |
|
|
struct mm_struct *mm = vma->vm_mm;
|
491 |
|
|
struct vm_area_struct *next = vma->vm_next;
|
492 |
|
|
struct vm_area_struct *importer = NULL;
|
493 |
|
|
struct address_space *mapping = NULL;
|
494 |
|
|
struct prio_tree_root *root = NULL;
|
495 |
|
|
struct file *file = vma->vm_file;
|
496 |
|
|
struct anon_vma *anon_vma = NULL;
|
497 |
|
|
long adjust_next = 0;
|
498 |
|
|
int remove_next = 0;
|
499 |
|
|
|
500 |
|
|
if (next && !insert) {
|
501 |
|
|
if (end >= next->vm_end) {
|
502 |
|
|
/*
|
503 |
|
|
* vma expands, overlapping all the next, and
|
504 |
|
|
* perhaps the one after too (mprotect case 6).
|
505 |
|
|
*/
|
506 |
|
|
again: remove_next = 1 + (end > next->vm_end);
|
507 |
|
|
end = next->vm_end;
|
508 |
|
|
anon_vma = next->anon_vma;
|
509 |
|
|
importer = vma;
|
510 |
|
|
} else if (end > next->vm_start) {
|
511 |
|
|
/*
|
512 |
|
|
* vma expands, overlapping part of the next:
|
513 |
|
|
* mprotect case 5 shifting the boundary up.
|
514 |
|
|
*/
|
515 |
|
|
adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
|
516 |
|
|
anon_vma = next->anon_vma;
|
517 |
|
|
importer = vma;
|
518 |
|
|
} else if (end < vma->vm_end) {
|
519 |
|
|
/*
|
520 |
|
|
* vma shrinks, and !insert tells it's not
|
521 |
|
|
* split_vma inserting another: so it must be
|
522 |
|
|
* mprotect case 4 shifting the boundary down.
|
523 |
|
|
*/
|
524 |
|
|
adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
|
525 |
|
|
anon_vma = next->anon_vma;
|
526 |
|
|
importer = next;
|
527 |
|
|
}
|
528 |
|
|
}
|
529 |
|
|
|
530 |
|
|
if (file) {
|
531 |
|
|
mapping = file->f_mapping;
|
532 |
|
|
if (!(vma->vm_flags & VM_NONLINEAR))
|
533 |
|
|
root = &mapping->i_mmap;
|
534 |
|
|
spin_lock(&mapping->i_mmap_lock);
|
535 |
|
|
if (importer &&
|
536 |
|
|
vma->vm_truncate_count != next->vm_truncate_count) {
|
537 |
|
|
/*
|
538 |
|
|
* unmap_mapping_range might be in progress:
|
539 |
|
|
* ensure that the expanding vma is rescanned.
|
540 |
|
|
*/
|
541 |
|
|
importer->vm_truncate_count = 0;
|
542 |
|
|
}
|
543 |
|
|
if (insert) {
|
544 |
|
|
insert->vm_truncate_count = vma->vm_truncate_count;
|
545 |
|
|
/*
|
546 |
|
|
* Put into prio_tree now, so instantiated pages
|
547 |
|
|
* are visible to arm/parisc __flush_dcache_page
|
548 |
|
|
* throughout; but we cannot insert into address
|
549 |
|
|
* space until vma start or end is updated.
|
550 |
|
|
*/
|
551 |
|
|
__vma_link_file(insert);
|
552 |
|
|
}
|
553 |
|
|
}
|
554 |
|
|
|
555 |
|
|
/*
|
556 |
|
|
* When changing only vma->vm_end, we don't really need
|
557 |
|
|
* anon_vma lock: but is that case worth optimizing out?
|
558 |
|
|
*/
|
559 |
|
|
if (vma->anon_vma)
|
560 |
|
|
anon_vma = vma->anon_vma;
|
561 |
|
|
if (anon_vma) {
|
562 |
|
|
spin_lock(&anon_vma->lock);
|
563 |
|
|
/*
|
564 |
|
|
* Easily overlooked: when mprotect shifts the boundary,
|
565 |
|
|
* make sure the expanding vma has anon_vma set if the
|
566 |
|
|
* shrinking vma had, to cover any anon pages imported.
|
567 |
|
|
*/
|
568 |
|
|
if (importer && !importer->anon_vma) {
|
569 |
|
|
importer->anon_vma = anon_vma;
|
570 |
|
|
__anon_vma_link(importer);
|
571 |
|
|
}
|
572 |
|
|
}
|
573 |
|
|
|
574 |
|
|
if (root) {
|
575 |
|
|
flush_dcache_mmap_lock(mapping);
|
576 |
|
|
vma_prio_tree_remove(vma, root);
|
577 |
|
|
if (adjust_next)
|
578 |
|
|
vma_prio_tree_remove(next, root);
|
579 |
|
|
}
|
580 |
|
|
|
581 |
|
|
vma->vm_start = start;
|
582 |
|
|
vma->vm_end = end;
|
583 |
|
|
vma->vm_pgoff = pgoff;
|
584 |
|
|
if (adjust_next) {
|
585 |
|
|
next->vm_start += adjust_next << PAGE_SHIFT;
|
586 |
|
|
next->vm_pgoff += adjust_next;
|
587 |
|
|
}
|
588 |
|
|
|
589 |
|
|
if (root) {
|
590 |
|
|
if (adjust_next)
|
591 |
|
|
vma_prio_tree_insert(next, root);
|
592 |
|
|
vma_prio_tree_insert(vma, root);
|
593 |
|
|
flush_dcache_mmap_unlock(mapping);
|
594 |
|
|
}
|
595 |
|
|
|
596 |
|
|
if (remove_next) {
|
597 |
|
|
/*
|
598 |
|
|
* vma_merge has merged next into vma, and needs
|
599 |
|
|
* us to remove next before dropping the locks.
|
600 |
|
|
*/
|
601 |
|
|
__vma_unlink(mm, next, vma);
|
602 |
|
|
if (file)
|
603 |
|
|
__remove_shared_vm_struct(next, file, mapping);
|
604 |
|
|
if (next->anon_vma)
|
605 |
|
|
__anon_vma_merge(vma, next);
|
606 |
|
|
} else if (insert) {
|
607 |
|
|
/*
|
608 |
|
|
* split_vma has split insert from vma, and needs
|
609 |
|
|
* us to insert it before dropping the locks
|
610 |
|
|
* (it may either follow vma or precede it).
|
611 |
|
|
*/
|
612 |
|
|
__insert_vm_struct(mm, insert);
|
613 |
|
|
}
|
614 |
|
|
|
615 |
|
|
if (anon_vma)
|
616 |
|
|
spin_unlock(&anon_vma->lock);
|
617 |
|
|
if (mapping)
|
618 |
|
|
spin_unlock(&mapping->i_mmap_lock);
|
619 |
|
|
|
620 |
|
|
if (remove_next) {
|
621 |
|
|
if (file)
|
622 |
|
|
fput(file);
|
623 |
|
|
mm->map_count--;
|
624 |
|
|
mpol_free(vma_policy(next));
|
625 |
|
|
kmem_cache_free(vm_area_cachep, next);
|
626 |
|
|
/*
|
627 |
|
|
* In mprotect's case 6 (see comments on vma_merge),
|
628 |
|
|
* we must remove another next too. It would clutter
|
629 |
|
|
* up the code too much to do both in one go.
|
630 |
|
|
*/
|
631 |
|
|
if (remove_next == 2) {
|
632 |
|
|
next = vma->vm_next;
|
633 |
|
|
goto again;
|
634 |
|
|
}
|
635 |
|
|
}
|
636 |
|
|
|
637 |
|
|
validate_mm(mm);
|
638 |
|
|
}
|
639 |
|
|
|
640 |
|
|
/*
|
641 |
|
|
* If the vma has a ->close operation then the driver probably needs to release
|
642 |
|
|
* per-vma resources, so we don't attempt to merge those.
|
643 |
|
|
*/
|
644 |
|
|
#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
|
645 |
|
|
|
646 |
|
|
static inline int is_mergeable_vma(struct vm_area_struct *vma,
|
647 |
|
|
struct file *file, unsigned long vm_flags)
|
648 |
|
|
{
|
649 |
|
|
if (vma->vm_flags != vm_flags)
|
650 |
|
|
return 0;
|
651 |
|
|
if (vma->vm_file != file)
|
652 |
|
|
return 0;
|
653 |
|
|
if (vma->vm_ops && vma->vm_ops->close)
|
654 |
|
|
return 0;
|
655 |
|
|
return 1;
|
656 |
|
|
}
|
657 |
|
|
|
658 |
|
|
static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
|
659 |
|
|
struct anon_vma *anon_vma2)
|
660 |
|
|
{
|
661 |
|
|
return !anon_vma1 || !anon_vma2 || (anon_vma1 == anon_vma2);
|
662 |
|
|
}
|
663 |
|
|
|
664 |
|
|
/*
|
665 |
|
|
* Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
|
666 |
|
|
* in front of (at a lower virtual address and file offset than) the vma.
|
667 |
|
|
*
|
668 |
|
|
* We cannot merge two vmas if they have differently assigned (non-NULL)
|
669 |
|
|
* anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
|
670 |
|
|
*
|
671 |
|
|
* We don't check here for the merged mmap wrapping around the end of pagecache
|
672 |
|
|
* indices (16TB on ia32) because do_mmap_pgoff() does not permit mmap's which
|
673 |
|
|
* wrap, nor mmaps which cover the final page at index -1UL.
|
674 |
|
|
*/
|
675 |
|
|
static int
|
676 |
|
|
can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
|
677 |
|
|
struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
|
678 |
|
|
{
|
679 |
|
|
if (is_mergeable_vma(vma, file, vm_flags) &&
|
680 |
|
|
is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
|
681 |
|
|
if (vma->vm_pgoff == vm_pgoff)
|
682 |
|
|
return 1;
|
683 |
|
|
}
|
684 |
|
|
return 0;
|
685 |
|
|
}
|
686 |
|
|
|
687 |
|
|
/*
|
688 |
|
|
* Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
|
689 |
|
|
* beyond (at a higher virtual address and file offset than) the vma.
|
690 |
|
|
*
|
691 |
|
|
* We cannot merge two vmas if they have differently assigned (non-NULL)
|
692 |
|
|
* anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
|
693 |
|
|
*/
|
694 |
|
|
static int
|
695 |
|
|
can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
|
696 |
|
|
struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
|
697 |
|
|
{
|
698 |
|
|
if (is_mergeable_vma(vma, file, vm_flags) &&
|
699 |
|
|
is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
|
700 |
|
|
pgoff_t vm_pglen;
|
701 |
|
|
vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
|
702 |
|
|
if (vma->vm_pgoff + vm_pglen == vm_pgoff)
|
703 |
|
|
return 1;
|
704 |
|
|
}
|
705 |
|
|
return 0;
|
706 |
|
|
}
|
707 |
|
|
|
708 |
|
|
/*
|
709 |
|
|
* Given a mapping request (addr,end,vm_flags,file,pgoff), figure out
|
710 |
|
|
* whether that can be merged with its predecessor or its successor.
|
711 |
|
|
* Or both (it neatly fills a hole).
|
712 |
|
|
*
|
713 |
|
|
* In most cases - when called for mmap, brk or mremap - [addr,end) is
|
714 |
|
|
* certain not to be mapped by the time vma_merge is called; but when
|
715 |
|
|
* called for mprotect, it is certain to be already mapped (either at
|
716 |
|
|
* an offset within prev, or at the start of next), and the flags of
|
717 |
|
|
* this area are about to be changed to vm_flags - and the no-change
|
718 |
|
|
* case has already been eliminated.
|
719 |
|
|
*
|
720 |
|
|
* The following mprotect cases have to be considered, where AAAA is
|
721 |
|
|
* the area passed down from mprotect_fixup, never extending beyond one
|
722 |
|
|
* vma, PPPPPP is the prev vma specified, and NNNNNN the next vma after:
|
723 |
|
|
*
|
724 |
|
|
* AAAA AAAA AAAA AAAA
|
725 |
|
|
* PPPPPPNNNNNN PPPPPPNNNNNN PPPPPPNNNNNN PPPPNNNNXXXX
|
726 |
|
|
* cannot merge might become might become might become
|
727 |
|
|
* PPNNNNNNNNNN PPPPPPPPPPNN PPPPPPPPPPPP 6 or
|
728 |
|
|
* mmap, brk or case 4 below case 5 below PPPPPPPPXXXX 7 or
|
729 |
|
|
* mremap move: PPPPNNNNNNNN 8
|
730 |
|
|
* AAAA
|
731 |
|
|
* PPPP NNNN PPPPPPPPPPPP PPPPPPPPNNNN PPPPNNNNNNNN
|
732 |
|
|
* might become case 1 below case 2 below case 3 below
|
733 |
|
|
*
|
734 |
|
|
* Odd one out? Case 8, because it extends NNNN but needs flags of XXXX:
|
735 |
|
|
* mprotect_fixup updates vm_flags & vm_page_prot on successful return.
|
736 |
|
|
*/
|
737 |
|
|
struct vm_area_struct *vma_merge(struct mm_struct *mm,
|
738 |
|
|
struct vm_area_struct *prev, unsigned long addr,
|
739 |
|
|
unsigned long end, unsigned long vm_flags,
|
740 |
|
|
struct anon_vma *anon_vma, struct file *file,
|
741 |
|
|
pgoff_t pgoff, struct mempolicy *policy)
|
742 |
|
|
{
|
743 |
|
|
pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
|
744 |
|
|
struct vm_area_struct *area, *next;
|
745 |
|
|
|
746 |
|
|
/*
|
747 |
|
|
* We later require that vma->vm_flags == vm_flags,
|
748 |
|
|
* so this tests vma->vm_flags & VM_SPECIAL, too.
|
749 |
|
|
*/
|
750 |
|
|
if (vm_flags & VM_SPECIAL)
|
751 |
|
|
return NULL;
|
752 |
|
|
|
753 |
|
|
if (prev)
|
754 |
|
|
next = prev->vm_next;
|
755 |
|
|
else
|
756 |
|
|
next = mm->mmap;
|
757 |
|
|
area = next;
|
758 |
|
|
if (next && next->vm_end == end) /* cases 6, 7, 8 */
|
759 |
|
|
next = next->vm_next;
|
760 |
|
|
|
761 |
|
|
/*
|
762 |
|
|
* Can it merge with the predecessor?
|
763 |
|
|
*/
|
764 |
|
|
if (prev && prev->vm_end == addr &&
|
765 |
|
|
mpol_equal(vma_policy(prev), policy) &&
|
766 |
|
|
can_vma_merge_after(prev, vm_flags,
|
767 |
|
|
anon_vma, file, pgoff)) {
|
768 |
|
|
/*
|
769 |
|
|
* OK, it can. Can we now merge in the successor as well?
|
770 |
|
|
*/
|
771 |
|
|
if (next && end == next->vm_start &&
|
772 |
|
|
mpol_equal(policy, vma_policy(next)) &&
|
773 |
|
|
can_vma_merge_before(next, vm_flags,
|
774 |
|
|
anon_vma, file, pgoff+pglen) &&
|
775 |
|
|
is_mergeable_anon_vma(prev->anon_vma,
|
776 |
|
|
next->anon_vma)) {
|
777 |
|
|
/* cases 1, 6 */
|
778 |
|
|
vma_adjust(prev, prev->vm_start,
|
779 |
|
|
next->vm_end, prev->vm_pgoff, NULL);
|
780 |
|
|
} else /* cases 2, 5, 7 */
|
781 |
|
|
vma_adjust(prev, prev->vm_start,
|
782 |
|
|
end, prev->vm_pgoff, NULL);
|
783 |
|
|
return prev;
|
784 |
|
|
}
|
785 |
|
|
|
786 |
|
|
/*
|
787 |
|
|
* Can this new request be merged in front of next?
|
788 |
|
|
*/
|
789 |
|
|
if (next && end == next->vm_start &&
|
790 |
|
|
mpol_equal(policy, vma_policy(next)) &&
|
791 |
|
|
can_vma_merge_before(next, vm_flags,
|
792 |
|
|
anon_vma, file, pgoff+pglen)) {
|
793 |
|
|
if (prev && addr < prev->vm_end) /* case 4 */
|
794 |
|
|
vma_adjust(prev, prev->vm_start,
|
795 |
|
|
addr, prev->vm_pgoff, NULL);
|
796 |
|
|
else /* cases 3, 8 */
|
797 |
|
|
vma_adjust(area, addr, next->vm_end,
|
798 |
|
|
next->vm_pgoff - pglen, NULL);
|
799 |
|
|
return area;
|
800 |
|
|
}
|
801 |
|
|
|
802 |
|
|
return NULL;
|
803 |
|
|
}
|
804 |
|
|
|
805 |
|
|
/*
|
806 |
|
|
* find_mergeable_anon_vma is used by anon_vma_prepare, to check
|
807 |
|
|
* neighbouring vmas for a suitable anon_vma, before it goes off
|
808 |
|
|
* to allocate a new anon_vma. It checks because a repetitive
|
809 |
|
|
* sequence of mprotects and faults may otherwise lead to distinct
|
810 |
|
|
* anon_vmas being allocated, preventing vma merge in subsequent
|
811 |
|
|
* mprotect.
|
812 |
|
|
*/
|
813 |
|
|
struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
|
814 |
|
|
{
|
815 |
|
|
struct vm_area_struct *near;
|
816 |
|
|
unsigned long vm_flags;
|
817 |
|
|
|
818 |
|
|
near = vma->vm_next;
|
819 |
|
|
if (!near)
|
820 |
|
|
goto try_prev;
|
821 |
|
|
|
822 |
|
|
/*
|
823 |
|
|
* Since only mprotect tries to remerge vmas, match flags
|
824 |
|
|
* which might be mprotected into each other later on.
|
825 |
|
|
* Neither mlock nor madvise tries to remerge at present,
|
826 |
|
|
* so leave their flags as obstructing a merge.
|
827 |
|
|
*/
|
828 |
|
|
vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
|
829 |
|
|
vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
|
830 |
|
|
|
831 |
|
|
if (near->anon_vma && vma->vm_end == near->vm_start &&
|
832 |
|
|
mpol_equal(vma_policy(vma), vma_policy(near)) &&
|
833 |
|
|
can_vma_merge_before(near, vm_flags,
|
834 |
|
|
NULL, vma->vm_file, vma->vm_pgoff +
|
835 |
|
|
((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
|
836 |
|
|
return near->anon_vma;
|
837 |
|
|
try_prev:
|
838 |
|
|
/*
|
839 |
|
|
* It is potentially slow to have to call find_vma_prev here.
|
840 |
|
|
* But it's only on the first write fault on the vma, not
|
841 |
|
|
* every time, and we could devise a way to avoid it later
|
842 |
|
|
* (e.g. stash info in next's anon_vma_node when assigning
|
843 |
|
|
* an anon_vma, or when trying vma_merge). Another time.
|
844 |
|
|
*/
|
845 |
|
|
BUG_ON(find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma);
|
846 |
|
|
if (!near)
|
847 |
|
|
goto none;
|
848 |
|
|
|
849 |
|
|
vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
|
850 |
|
|
vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
|
851 |
|
|
|
852 |
|
|
if (near->anon_vma && near->vm_end == vma->vm_start &&
|
853 |
|
|
mpol_equal(vma_policy(near), vma_policy(vma)) &&
|
854 |
|
|
can_vma_merge_after(near, vm_flags,
|
855 |
|
|
NULL, vma->vm_file, vma->vm_pgoff))
|
856 |
|
|
return near->anon_vma;
|
857 |
|
|
none:
|
858 |
|
|
/*
|
859 |
|
|
* There's no absolute need to look only at touching neighbours:
|
860 |
|
|
* we could search further afield for "compatible" anon_vmas.
|
861 |
|
|
* But it would probably just be a waste of time searching,
|
862 |
|
|
* or lead to too many vmas hanging off the same anon_vma.
|
863 |
|
|
* We're trying to allow mprotect remerging later on,
|
864 |
|
|
* not trying to minimize memory used for anon_vmas.
|
865 |
|
|
*/
|
866 |
|
|
return NULL;
|
867 |
|
|
}
|
868 |
|
|
|
869 |
|
|
#ifdef CONFIG_PROC_FS
|
870 |
|
|
void vm_stat_account(struct mm_struct *mm, unsigned long flags,
|
871 |
|
|
struct file *file, long pages)
|
872 |
|
|
{
|
873 |
|
|
const unsigned long stack_flags
|
874 |
|
|
= VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
|
875 |
|
|
|
876 |
|
|
if (file) {
|
877 |
|
|
mm->shared_vm += pages;
|
878 |
|
|
if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
|
879 |
|
|
mm->exec_vm += pages;
|
880 |
|
|
} else if (flags & stack_flags)
|
881 |
|
|
mm->stack_vm += pages;
|
882 |
|
|
if (flags & (VM_RESERVED|VM_IO))
|
883 |
|
|
mm->reserved_vm += pages;
|
884 |
|
|
}
|
885 |
|
|
#endif /* CONFIG_PROC_FS */
|
886 |
|
|
|
887 |
|
|
/*
|
888 |
|
|
* The caller must hold down_write(current->mm->mmap_sem).
|
889 |
|
|
*/
|
890 |
|
|
|
891 |
|
|
unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
|
892 |
|
|
unsigned long len, unsigned long prot,
|
893 |
|
|
unsigned long flags, unsigned long pgoff)
|
894 |
|
|
{
|
895 |
|
|
struct mm_struct * mm = current->mm;
|
896 |
|
|
struct inode *inode;
|
897 |
|
|
unsigned int vm_flags;
|
898 |
|
|
int error;
|
899 |
|
|
int accountable = 1;
|
900 |
|
|
unsigned long reqprot = prot;
|
901 |
|
|
|
902 |
|
|
/*
|
903 |
|
|
* Does the application expect PROT_READ to imply PROT_EXEC?
|
904 |
|
|
*
|
905 |
|
|
* (the exception is when the underlying filesystem is noexec
|
906 |
|
|
* mounted, in which case we dont add PROT_EXEC.)
|
907 |
|
|
*/
|
908 |
|
|
if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
|
909 |
|
|
if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
|
910 |
|
|
prot |= PROT_EXEC;
|
911 |
|
|
|
912 |
|
|
if (!len)
|
913 |
|
|
return -EINVAL;
|
914 |
|
|
|
915 |
|
|
if (!(flags & MAP_FIXED))
|
916 |
|
|
addr = round_hint_to_min(addr);
|
917 |
|
|
|
918 |
|
|
error = arch_mmap_check(addr, len, flags);
|
919 |
|
|
if (error)
|
920 |
|
|
return error;
|
921 |
|
|
|
922 |
|
|
/* Careful about overflows.. */
|
923 |
|
|
len = PAGE_ALIGN(len);
|
924 |
|
|
if (!len || len > TASK_SIZE)
|
925 |
|
|
return -ENOMEM;
|
926 |
|
|
|
927 |
|
|
/* offset overflow? */
|
928 |
|
|
if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
|
929 |
|
|
return -EOVERFLOW;
|
930 |
|
|
|
931 |
|
|
/* Too many mappings? */
|
932 |
|
|
if (mm->map_count > sysctl_max_map_count)
|
933 |
|
|
return -ENOMEM;
|
934 |
|
|
|
935 |
|
|
/* Obtain the address to map to. we verify (or select) it and ensure
|
936 |
|
|
* that it represents a valid section of the address space.
|
937 |
|
|
*/
|
938 |
|
|
addr = get_unmapped_area(file, addr, len, pgoff, flags);
|
939 |
|
|
if (addr & ~PAGE_MASK)
|
940 |
|
|
return addr;
|
941 |
|
|
|
942 |
|
|
/* Do simple checking here so the lower-level routines won't have
|
943 |
|
|
* to. we assume access permissions have been handled by the open
|
944 |
|
|
* of the memory object, so we don't do any here.
|
945 |
|
|
*/
|
946 |
|
|
vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
|
947 |
|
|
mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
|
948 |
|
|
|
949 |
|
|
if (flags & MAP_LOCKED) {
|
950 |
|
|
if (!can_do_mlock())
|
951 |
|
|
return -EPERM;
|
952 |
|
|
vm_flags |= VM_LOCKED;
|
953 |
|
|
}
|
954 |
|
|
/* mlock MCL_FUTURE? */
|
955 |
|
|
if (vm_flags & VM_LOCKED) {
|
956 |
|
|
unsigned long locked, lock_limit;
|
957 |
|
|
locked = len >> PAGE_SHIFT;
|
958 |
|
|
locked += mm->locked_vm;
|
959 |
|
|
lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
|
960 |
|
|
lock_limit >>= PAGE_SHIFT;
|
961 |
|
|
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
|
962 |
|
|
return -EAGAIN;
|
963 |
|
|
}
|
964 |
|
|
|
965 |
|
|
inode = file ? file->f_path.dentry->d_inode : NULL;
|
966 |
|
|
|
967 |
|
|
if (file) {
|
968 |
|
|
switch (flags & MAP_TYPE) {
|
969 |
|
|
case MAP_SHARED:
|
970 |
|
|
if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
|
971 |
|
|
return -EACCES;
|
972 |
|
|
|
973 |
|
|
/*
|
974 |
|
|
* Make sure we don't allow writing to an append-only
|
975 |
|
|
* file..
|
976 |
|
|
*/
|
977 |
|
|
if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
|
978 |
|
|
return -EACCES;
|
979 |
|
|
|
980 |
|
|
/*
|
981 |
|
|
* Make sure there are no mandatory locks on the file.
|
982 |
|
|
*/
|
983 |
|
|
if (locks_verify_locked(inode))
|
984 |
|
|
return -EAGAIN;
|
985 |
|
|
|
986 |
|
|
vm_flags |= VM_SHARED | VM_MAYSHARE;
|
987 |
|
|
if (!(file->f_mode & FMODE_WRITE))
|
988 |
|
|
vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
|
989 |
|
|
|
990 |
|
|
/* fall through */
|
991 |
|
|
case MAP_PRIVATE:
|
992 |
|
|
if (!(file->f_mode & FMODE_READ))
|
993 |
|
|
return -EACCES;
|
994 |
|
|
if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
|
995 |
|
|
if (vm_flags & VM_EXEC)
|
996 |
|
|
return -EPERM;
|
997 |
|
|
vm_flags &= ~VM_MAYEXEC;
|
998 |
|
|
}
|
999 |
|
|
if (is_file_hugepages(file))
|
1000 |
|
|
accountable = 0;
|
1001 |
|
|
|
1002 |
|
|
if (!file->f_op || !file->f_op->mmap)
|
1003 |
|
|
return -ENODEV;
|
1004 |
|
|
break;
|
1005 |
|
|
|
1006 |
|
|
default:
|
1007 |
|
|
return -EINVAL;
|
1008 |
|
|
}
|
1009 |
|
|
} else {
|
1010 |
|
|
switch (flags & MAP_TYPE) {
|
1011 |
|
|
case MAP_SHARED:
|
1012 |
|
|
vm_flags |= VM_SHARED | VM_MAYSHARE;
|
1013 |
|
|
break;
|
1014 |
|
|
case MAP_PRIVATE:
|
1015 |
|
|
/*
|
1016 |
|
|
* Set pgoff according to addr for anon_vma.
|
1017 |
|
|
*/
|
1018 |
|
|
pgoff = addr >> PAGE_SHIFT;
|
1019 |
|
|
break;
|
1020 |
|
|
default:
|
1021 |
|
|
return -EINVAL;
|
1022 |
|
|
}
|
1023 |
|
|
}
|
1024 |
|
|
|
1025 |
|
|
error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
|
1026 |
|
|
if (error)
|
1027 |
|
|
return error;
|
1028 |
|
|
|
1029 |
|
|
return mmap_region(file, addr, len, flags, vm_flags, pgoff,
|
1030 |
|
|
accountable);
|
1031 |
|
|
}
|
1032 |
|
|
EXPORT_SYMBOL(do_mmap_pgoff);
|
1033 |
|
|
|
1034 |
|
|
/*
|
1035 |
|
|
* Some shared mappigns will want the pages marked read-only
|
1036 |
|
|
* to track write events. If so, we'll downgrade vm_page_prot
|
1037 |
|
|
* to the private version (using protection_map[] without the
|
1038 |
|
|
* VM_SHARED bit).
|
1039 |
|
|
*/
|
1040 |
|
|
int vma_wants_writenotify(struct vm_area_struct *vma)
|
1041 |
|
|
{
|
1042 |
|
|
unsigned int vm_flags = vma->vm_flags;
|
1043 |
|
|
|
1044 |
|
|
/* If it was private or non-writable, the write bit is already clear */
|
1045 |
|
|
if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
|
1046 |
|
|
return 0;
|
1047 |
|
|
|
1048 |
|
|
/* The backer wishes to know when pages are first written to? */
|
1049 |
|
|
if (vma->vm_ops && vma->vm_ops->page_mkwrite)
|
1050 |
|
|
return 1;
|
1051 |
|
|
|
1052 |
|
|
/* The open routine did something to the protections already? */
|
1053 |
|
|
if (pgprot_val(vma->vm_page_prot) !=
|
1054 |
|
|
pgprot_val(vm_get_page_prot(vm_flags)))
|
1055 |
|
|
return 0;
|
1056 |
|
|
|
1057 |
|
|
/* Specialty mapping? */
|
1058 |
|
|
if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE))
|
1059 |
|
|
return 0;
|
1060 |
|
|
|
1061 |
|
|
/* Can the mapping track the dirty pages? */
|
1062 |
|
|
return vma->vm_file && vma->vm_file->f_mapping &&
|
1063 |
|
|
mapping_cap_account_dirty(vma->vm_file->f_mapping);
|
1064 |
|
|
}
|
1065 |
|
|
|
1066 |
|
|
|
1067 |
|
|
unsigned long mmap_region(struct file *file, unsigned long addr,
|
1068 |
|
|
unsigned long len, unsigned long flags,
|
1069 |
|
|
unsigned int vm_flags, unsigned long pgoff,
|
1070 |
|
|
int accountable)
|
1071 |
|
|
{
|
1072 |
|
|
struct mm_struct *mm = current->mm;
|
1073 |
|
|
struct vm_area_struct *vma, *prev;
|
1074 |
|
|
int correct_wcount = 0;
|
1075 |
|
|
int error;
|
1076 |
|
|
struct rb_node **rb_link, *rb_parent;
|
1077 |
|
|
unsigned long charged = 0;
|
1078 |
|
|
struct inode *inode = file ? file->f_path.dentry->d_inode : NULL;
|
1079 |
|
|
|
1080 |
|
|
/* Clear old maps */
|
1081 |
|
|
error = -ENOMEM;
|
1082 |
|
|
munmap_back:
|
1083 |
|
|
vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
|
1084 |
|
|
if (vma && vma->vm_start < addr + len) {
|
1085 |
|
|
if (do_munmap(mm, addr, len))
|
1086 |
|
|
return -ENOMEM;
|
1087 |
|
|
goto munmap_back;
|
1088 |
|
|
}
|
1089 |
|
|
|
1090 |
|
|
/* Check against address space limit. */
|
1091 |
|
|
if (!may_expand_vm(mm, len >> PAGE_SHIFT))
|
1092 |
|
|
return -ENOMEM;
|
1093 |
|
|
|
1094 |
|
|
if (accountable && (!(flags & MAP_NORESERVE) ||
|
1095 |
|
|
sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
|
1096 |
|
|
if (vm_flags & VM_SHARED) {
|
1097 |
|
|
/* Check memory availability in shmem_file_setup? */
|
1098 |
|
|
vm_flags |= VM_ACCOUNT;
|
1099 |
|
|
} else if (vm_flags & VM_WRITE) {
|
1100 |
|
|
/*
|
1101 |
|
|
* Private writable mapping: check memory availability
|
1102 |
|
|
*/
|
1103 |
|
|
charged = len >> PAGE_SHIFT;
|
1104 |
|
|
if (security_vm_enough_memory(charged))
|
1105 |
|
|
return -ENOMEM;
|
1106 |
|
|
vm_flags |= VM_ACCOUNT;
|
1107 |
|
|
}
|
1108 |
|
|
}
|
1109 |
|
|
|
1110 |
|
|
/*
|
1111 |
|
|
* Can we just expand an old private anonymous mapping?
|
1112 |
|
|
* The VM_SHARED test is necessary because shmem_zero_setup
|
1113 |
|
|
* will create the file object for a shared anonymous map below.
|
1114 |
|
|
*/
|
1115 |
|
|
if (!file && !(vm_flags & VM_SHARED) &&
|
1116 |
|
|
vma_merge(mm, prev, addr, addr + len, vm_flags,
|
1117 |
|
|
NULL, NULL, pgoff, NULL))
|
1118 |
|
|
goto out;
|
1119 |
|
|
|
1120 |
|
|
/*
|
1121 |
|
|
* Determine the object being mapped and call the appropriate
|
1122 |
|
|
* specific mapper. the address has already been validated, but
|
1123 |
|
|
* not unmapped, but the maps are removed from the list.
|
1124 |
|
|
*/
|
1125 |
|
|
vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
|
1126 |
|
|
if (!vma) {
|
1127 |
|
|
error = -ENOMEM;
|
1128 |
|
|
goto unacct_error;
|
1129 |
|
|
}
|
1130 |
|
|
|
1131 |
|
|
vma->vm_mm = mm;
|
1132 |
|
|
vma->vm_start = addr;
|
1133 |
|
|
vma->vm_end = addr + len;
|
1134 |
|
|
vma->vm_flags = vm_flags;
|
1135 |
|
|
vma->vm_page_prot = vm_get_page_prot(vm_flags);
|
1136 |
|
|
vma->vm_pgoff = pgoff;
|
1137 |
|
|
|
1138 |
|
|
if (file) {
|
1139 |
|
|
error = -EINVAL;
|
1140 |
|
|
if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
|
1141 |
|
|
goto free_vma;
|
1142 |
|
|
if (vm_flags & VM_DENYWRITE) {
|
1143 |
|
|
error = deny_write_access(file);
|
1144 |
|
|
if (error)
|
1145 |
|
|
goto free_vma;
|
1146 |
|
|
correct_wcount = 1;
|
1147 |
|
|
}
|
1148 |
|
|
vma->vm_file = file;
|
1149 |
|
|
get_file(file);
|
1150 |
|
|
error = file->f_op->mmap(file, vma);
|
1151 |
|
|
if (error)
|
1152 |
|
|
goto unmap_and_free_vma;
|
1153 |
|
|
} else if (vm_flags & VM_SHARED) {
|
1154 |
|
|
error = shmem_zero_setup(vma);
|
1155 |
|
|
if (error)
|
1156 |
|
|
goto free_vma;
|
1157 |
|
|
}
|
1158 |
|
|
|
1159 |
|
|
/* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform
|
1160 |
|
|
* shmem_zero_setup (perhaps called through /dev/zero's ->mmap)
|
1161 |
|
|
* that memory reservation must be checked; but that reservation
|
1162 |
|
|
* belongs to shared memory object, not to vma: so now clear it.
|
1163 |
|
|
*/
|
1164 |
|
|
if ((vm_flags & (VM_SHARED|VM_ACCOUNT)) == (VM_SHARED|VM_ACCOUNT))
|
1165 |
|
|
vma->vm_flags &= ~VM_ACCOUNT;
|
1166 |
|
|
|
1167 |
|
|
/* Can addr have changed??
|
1168 |
|
|
*
|
1169 |
|
|
* Answer: Yes, several device drivers can do it in their
|
1170 |
|
|
* f_op->mmap method. -DaveM
|
1171 |
|
|
*/
|
1172 |
|
|
addr = vma->vm_start;
|
1173 |
|
|
pgoff = vma->vm_pgoff;
|
1174 |
|
|
vm_flags = vma->vm_flags;
|
1175 |
|
|
|
1176 |
|
|
if (vma_wants_writenotify(vma))
|
1177 |
|
|
vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
|
1178 |
|
|
|
1179 |
|
|
if (!file || !vma_merge(mm, prev, addr, vma->vm_end,
|
1180 |
|
|
vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
|
1181 |
|
|
file = vma->vm_file;
|
1182 |
|
|
vma_link(mm, vma, prev, rb_link, rb_parent);
|
1183 |
|
|
if (correct_wcount)
|
1184 |
|
|
atomic_inc(&inode->i_writecount);
|
1185 |
|
|
} else {
|
1186 |
|
|
if (file) {
|
1187 |
|
|
if (correct_wcount)
|
1188 |
|
|
atomic_inc(&inode->i_writecount);
|
1189 |
|
|
fput(file);
|
1190 |
|
|
}
|
1191 |
|
|
mpol_free(vma_policy(vma));
|
1192 |
|
|
kmem_cache_free(vm_area_cachep, vma);
|
1193 |
|
|
}
|
1194 |
|
|
out:
|
1195 |
|
|
mm->total_vm += len >> PAGE_SHIFT;
|
1196 |
|
|
vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
|
1197 |
|
|
if (vm_flags & VM_LOCKED) {
|
1198 |
|
|
mm->locked_vm += len >> PAGE_SHIFT;
|
1199 |
|
|
make_pages_present(addr, addr + len);
|
1200 |
|
|
}
|
1201 |
|
|
if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
|
1202 |
|
|
make_pages_present(addr, addr + len);
|
1203 |
|
|
return addr;
|
1204 |
|
|
|
1205 |
|
|
unmap_and_free_vma:
|
1206 |
|
|
if (correct_wcount)
|
1207 |
|
|
atomic_inc(&inode->i_writecount);
|
1208 |
|
|
vma->vm_file = NULL;
|
1209 |
|
|
fput(file);
|
1210 |
|
|
|
1211 |
|
|
/* Undo any partial mapping done by a device driver. */
|
1212 |
|
|
unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
|
1213 |
|
|
charged = 0;
|
1214 |
|
|
free_vma:
|
1215 |
|
|
kmem_cache_free(vm_area_cachep, vma);
|
1216 |
|
|
unacct_error:
|
1217 |
|
|
if (charged)
|
1218 |
|
|
vm_unacct_memory(charged);
|
1219 |
|
|
return error;
|
1220 |
|
|
}
|
1221 |
|
|
|
1222 |
|
|
/* Get an address range which is currently unmapped.
|
1223 |
|
|
* For shmat() with addr=0.
|
1224 |
|
|
*
|
1225 |
|
|
* Ugly calling convention alert:
|
1226 |
|
|
* Return value with the low bits set means error value,
|
1227 |
|
|
* ie
|
1228 |
|
|
* if (ret & ~PAGE_MASK)
|
1229 |
|
|
* error = ret;
|
1230 |
|
|
*
|
1231 |
|
|
* This function "knows" that -ENOMEM has the bits set.
|
1232 |
|
|
*/
|
1233 |
|
|
#ifndef HAVE_ARCH_UNMAPPED_AREA
|
1234 |
|
|
unsigned long
|
1235 |
|
|
arch_get_unmapped_area(struct file *filp, unsigned long addr,
|
1236 |
|
|
unsigned long len, unsigned long pgoff, unsigned long flags)
|
1237 |
|
|
{
|
1238 |
|
|
struct mm_struct *mm = current->mm;
|
1239 |
|
|
struct vm_area_struct *vma;
|
1240 |
|
|
unsigned long start_addr;
|
1241 |
|
|
|
1242 |
|
|
if (len > TASK_SIZE)
|
1243 |
|
|
return -ENOMEM;
|
1244 |
|
|
|
1245 |
|
|
if (flags & MAP_FIXED)
|
1246 |
|
|
return addr;
|
1247 |
|
|
|
1248 |
|
|
if (addr) {
|
1249 |
|
|
addr = PAGE_ALIGN(addr);
|
1250 |
|
|
vma = find_vma(mm, addr);
|
1251 |
|
|
if (TASK_SIZE - len >= addr &&
|
1252 |
|
|
(!vma || addr + len <= vma->vm_start))
|
1253 |
|
|
return addr;
|
1254 |
|
|
}
|
1255 |
|
|
if (len > mm->cached_hole_size) {
|
1256 |
|
|
start_addr = addr = mm->free_area_cache;
|
1257 |
|
|
} else {
|
1258 |
|
|
start_addr = addr = TASK_UNMAPPED_BASE;
|
1259 |
|
|
mm->cached_hole_size = 0;
|
1260 |
|
|
}
|
1261 |
|
|
|
1262 |
|
|
full_search:
|
1263 |
|
|
for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
|
1264 |
|
|
/* At this point: (!vma || addr < vma->vm_end). */
|
1265 |
|
|
if (TASK_SIZE - len < addr) {
|
1266 |
|
|
/*
|
1267 |
|
|
* Start a new search - just in case we missed
|
1268 |
|
|
* some holes.
|
1269 |
|
|
*/
|
1270 |
|
|
if (start_addr != TASK_UNMAPPED_BASE) {
|
1271 |
|
|
addr = TASK_UNMAPPED_BASE;
|
1272 |
|
|
start_addr = addr;
|
1273 |
|
|
mm->cached_hole_size = 0;
|
1274 |
|
|
goto full_search;
|
1275 |
|
|
}
|
1276 |
|
|
return -ENOMEM;
|
1277 |
|
|
}
|
1278 |
|
|
if (!vma || addr + len <= vma->vm_start) {
|
1279 |
|
|
/*
|
1280 |
|
|
* Remember the place where we stopped the search:
|
1281 |
|
|
*/
|
1282 |
|
|
mm->free_area_cache = addr + len;
|
1283 |
|
|
return addr;
|
1284 |
|
|
}
|
1285 |
|
|
if (addr + mm->cached_hole_size < vma->vm_start)
|
1286 |
|
|
mm->cached_hole_size = vma->vm_start - addr;
|
1287 |
|
|
addr = vma->vm_end;
|
1288 |
|
|
}
|
1289 |
|
|
}
|
1290 |
|
|
#endif
|
1291 |
|
|
|
1292 |
|
|
void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
|
1293 |
|
|
{
|
1294 |
|
|
/*
|
1295 |
|
|
* Is this a new hole at the lowest possible address?
|
1296 |
|
|
*/
|
1297 |
|
|
if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) {
|
1298 |
|
|
mm->free_area_cache = addr;
|
1299 |
|
|
mm->cached_hole_size = ~0UL;
|
1300 |
|
|
}
|
1301 |
|
|
}
|
1302 |
|
|
|
1303 |
|
|
/*
|
1304 |
|
|
* This mmap-allocator allocates new areas top-down from below the
|
1305 |
|
|
* stack's low limit (the base):
|
1306 |
|
|
*/
|
1307 |
|
|
#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
|
1308 |
|
|
unsigned long
|
1309 |
|
|
arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
|
1310 |
|
|
const unsigned long len, const unsigned long pgoff,
|
1311 |
|
|
const unsigned long flags)
|
1312 |
|
|
{
|
1313 |
|
|
struct vm_area_struct *vma;
|
1314 |
|
|
struct mm_struct *mm = current->mm;
|
1315 |
|
|
unsigned long addr = addr0;
|
1316 |
|
|
|
1317 |
|
|
/* requested length too big for entire address space */
|
1318 |
|
|
if (len > TASK_SIZE)
|
1319 |
|
|
return -ENOMEM;
|
1320 |
|
|
|
1321 |
|
|
if (flags & MAP_FIXED)
|
1322 |
|
|
return addr;
|
1323 |
|
|
|
1324 |
|
|
/* requesting a specific address */
|
1325 |
|
|
if (addr) {
|
1326 |
|
|
addr = PAGE_ALIGN(addr);
|
1327 |
|
|
vma = find_vma(mm, addr);
|
1328 |
|
|
if (TASK_SIZE - len >= addr &&
|
1329 |
|
|
(!vma || addr + len <= vma->vm_start))
|
1330 |
|
|
return addr;
|
1331 |
|
|
}
|
1332 |
|
|
|
1333 |
|
|
/* check if free_area_cache is useful for us */
|
1334 |
|
|
if (len <= mm->cached_hole_size) {
|
1335 |
|
|
mm->cached_hole_size = 0;
|
1336 |
|
|
mm->free_area_cache = mm->mmap_base;
|
1337 |
|
|
}
|
1338 |
|
|
|
1339 |
|
|
/* either no address requested or can't fit in requested address hole */
|
1340 |
|
|
addr = mm->free_area_cache;
|
1341 |
|
|
|
1342 |
|
|
/* make sure it can fit in the remaining address space */
|
1343 |
|
|
if (addr > len) {
|
1344 |
|
|
vma = find_vma(mm, addr-len);
|
1345 |
|
|
if (!vma || addr <= vma->vm_start)
|
1346 |
|
|
/* remember the address as a hint for next time */
|
1347 |
|
|
return (mm->free_area_cache = addr-len);
|
1348 |
|
|
}
|
1349 |
|
|
|
1350 |
|
|
if (mm->mmap_base < len)
|
1351 |
|
|
goto bottomup;
|
1352 |
|
|
|
1353 |
|
|
addr = mm->mmap_base-len;
|
1354 |
|
|
|
1355 |
|
|
do {
|
1356 |
|
|
/*
|
1357 |
|
|
* Lookup failure means no vma is above this address,
|
1358 |
|
|
* else if new region fits below vma->vm_start,
|
1359 |
|
|
* return with success:
|
1360 |
|
|
*/
|
1361 |
|
|
vma = find_vma(mm, addr);
|
1362 |
|
|
if (!vma || addr+len <= vma->vm_start)
|
1363 |
|
|
/* remember the address as a hint for next time */
|
1364 |
|
|
return (mm->free_area_cache = addr);
|
1365 |
|
|
|
1366 |
|
|
/* remember the largest hole we saw so far */
|
1367 |
|
|
if (addr + mm->cached_hole_size < vma->vm_start)
|
1368 |
|
|
mm->cached_hole_size = vma->vm_start - addr;
|
1369 |
|
|
|
1370 |
|
|
/* try just below the current vma->vm_start */
|
1371 |
|
|
addr = vma->vm_start-len;
|
1372 |
|
|
} while (len < vma->vm_start);
|
1373 |
|
|
|
1374 |
|
|
bottomup:
|
1375 |
|
|
/*
|
1376 |
|
|
* A failed mmap() very likely causes application failure,
|
1377 |
|
|
* so fall back to the bottom-up function here. This scenario
|
1378 |
|
|
* can happen with large stack limits and large mmap()
|
1379 |
|
|
* allocations.
|
1380 |
|
|
*/
|
1381 |
|
|
mm->cached_hole_size = ~0UL;
|
1382 |
|
|
mm->free_area_cache = TASK_UNMAPPED_BASE;
|
1383 |
|
|
addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
|
1384 |
|
|
/*
|
1385 |
|
|
* Restore the topdown base:
|
1386 |
|
|
*/
|
1387 |
|
|
mm->free_area_cache = mm->mmap_base;
|
1388 |
|
|
mm->cached_hole_size = ~0UL;
|
1389 |
|
|
|
1390 |
|
|
return addr;
|
1391 |
|
|
}
|
1392 |
|
|
#endif
|
1393 |
|
|
|
1394 |
|
|
void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
|
1395 |
|
|
{
|
1396 |
|
|
/*
|
1397 |
|
|
* Is this a new hole at the highest possible address?
|
1398 |
|
|
*/
|
1399 |
|
|
if (addr > mm->free_area_cache)
|
1400 |
|
|
mm->free_area_cache = addr;
|
1401 |
|
|
|
1402 |
|
|
/* dont allow allocations above current base */
|
1403 |
|
|
if (mm->free_area_cache > mm->mmap_base)
|
1404 |
|
|
mm->free_area_cache = mm->mmap_base;
|
1405 |
|
|
}
|
1406 |
|
|
|
1407 |
|
|
unsigned long
|
1408 |
|
|
get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
|
1409 |
|
|
unsigned long pgoff, unsigned long flags)
|
1410 |
|
|
{
|
1411 |
|
|
unsigned long (*get_area)(struct file *, unsigned long,
|
1412 |
|
|
unsigned long, unsigned long, unsigned long);
|
1413 |
|
|
|
1414 |
|
|
get_area = current->mm->get_unmapped_area;
|
1415 |
|
|
if (file && file->f_op && file->f_op->get_unmapped_area)
|
1416 |
|
|
get_area = file->f_op->get_unmapped_area;
|
1417 |
|
|
addr = get_area(file, addr, len, pgoff, flags);
|
1418 |
|
|
if (IS_ERR_VALUE(addr))
|
1419 |
|
|
return addr;
|
1420 |
|
|
|
1421 |
|
|
if (addr > TASK_SIZE - len)
|
1422 |
|
|
return -ENOMEM;
|
1423 |
|
|
if (addr & ~PAGE_MASK)
|
1424 |
|
|
return -EINVAL;
|
1425 |
|
|
|
1426 |
|
|
return addr;
|
1427 |
|
|
}
|
1428 |
|
|
|
1429 |
|
|
EXPORT_SYMBOL(get_unmapped_area);
|
1430 |
|
|
|
1431 |
|
|
/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
|
1432 |
|
|
struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
|
1433 |
|
|
{
|
1434 |
|
|
struct vm_area_struct *vma = NULL;
|
1435 |
|
|
|
1436 |
|
|
if (mm) {
|
1437 |
|
|
/* Check the cache first. */
|
1438 |
|
|
/* (Cache hit rate is typically around 35%.) */
|
1439 |
|
|
vma = mm->mmap_cache;
|
1440 |
|
|
if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
|
1441 |
|
|
struct rb_node * rb_node;
|
1442 |
|
|
|
1443 |
|
|
rb_node = mm->mm_rb.rb_node;
|
1444 |
|
|
vma = NULL;
|
1445 |
|
|
|
1446 |
|
|
while (rb_node) {
|
1447 |
|
|
struct vm_area_struct * vma_tmp;
|
1448 |
|
|
|
1449 |
|
|
vma_tmp = rb_entry(rb_node,
|
1450 |
|
|
struct vm_area_struct, vm_rb);
|
1451 |
|
|
|
1452 |
|
|
if (vma_tmp->vm_end > addr) {
|
1453 |
|
|
vma = vma_tmp;
|
1454 |
|
|
if (vma_tmp->vm_start <= addr)
|
1455 |
|
|
break;
|
1456 |
|
|
rb_node = rb_node->rb_left;
|
1457 |
|
|
} else
|
1458 |
|
|
rb_node = rb_node->rb_right;
|
1459 |
|
|
}
|
1460 |
|
|
if (vma)
|
1461 |
|
|
mm->mmap_cache = vma;
|
1462 |
|
|
}
|
1463 |
|
|
}
|
1464 |
|
|
return vma;
|
1465 |
|
|
}
|
1466 |
|
|
|
1467 |
|
|
EXPORT_SYMBOL(find_vma);
|
1468 |
|
|
|
1469 |
|
|
/* Same as find_vma, but also return a pointer to the previous VMA in *pprev. */
|
1470 |
|
|
struct vm_area_struct *
|
1471 |
|
|
find_vma_prev(struct mm_struct *mm, unsigned long addr,
|
1472 |
|
|
struct vm_area_struct **pprev)
|
1473 |
|
|
{
|
1474 |
|
|
struct vm_area_struct *vma = NULL, *prev = NULL;
|
1475 |
|
|
struct rb_node * rb_node;
|
1476 |
|
|
if (!mm)
|
1477 |
|
|
goto out;
|
1478 |
|
|
|
1479 |
|
|
/* Guard against addr being lower than the first VMA */
|
1480 |
|
|
vma = mm->mmap;
|
1481 |
|
|
|
1482 |
|
|
/* Go through the RB tree quickly. */
|
1483 |
|
|
rb_node = mm->mm_rb.rb_node;
|
1484 |
|
|
|
1485 |
|
|
while (rb_node) {
|
1486 |
|
|
struct vm_area_struct *vma_tmp;
|
1487 |
|
|
vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
|
1488 |
|
|
|
1489 |
|
|
if (addr < vma_tmp->vm_end) {
|
1490 |
|
|
rb_node = rb_node->rb_left;
|
1491 |
|
|
} else {
|
1492 |
|
|
prev = vma_tmp;
|
1493 |
|
|
if (!prev->vm_next || (addr < prev->vm_next->vm_end))
|
1494 |
|
|
break;
|
1495 |
|
|
rb_node = rb_node->rb_right;
|
1496 |
|
|
}
|
1497 |
|
|
}
|
1498 |
|
|
|
1499 |
|
|
out:
|
1500 |
|
|
*pprev = prev;
|
1501 |
|
|
return prev ? prev->vm_next : vma;
|
1502 |
|
|
}
|
1503 |
|
|
|
1504 |
|
|
/*
|
1505 |
|
|
* Verify that the stack growth is acceptable and
|
1506 |
|
|
* update accounting. This is shared with both the
|
1507 |
|
|
* grow-up and grow-down cases.
|
1508 |
|
|
*/
|
1509 |
|
|
static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, unsigned long grow)
|
1510 |
|
|
{
|
1511 |
|
|
struct mm_struct *mm = vma->vm_mm;
|
1512 |
|
|
struct rlimit *rlim = current->signal->rlim;
|
1513 |
|
|
unsigned long new_start;
|
1514 |
|
|
|
1515 |
|
|
/* address space limit tests */
|
1516 |
|
|
if (!may_expand_vm(mm, grow))
|
1517 |
|
|
return -ENOMEM;
|
1518 |
|
|
|
1519 |
|
|
/* Stack limit test */
|
1520 |
|
|
if (size > rlim[RLIMIT_STACK].rlim_cur)
|
1521 |
|
|
return -ENOMEM;
|
1522 |
|
|
|
1523 |
|
|
/* mlock limit tests */
|
1524 |
|
|
if (vma->vm_flags & VM_LOCKED) {
|
1525 |
|
|
unsigned long locked;
|
1526 |
|
|
unsigned long limit;
|
1527 |
|
|
locked = mm->locked_vm + grow;
|
1528 |
|
|
limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
|
1529 |
|
|
if (locked > limit && !capable(CAP_IPC_LOCK))
|
1530 |
|
|
return -ENOMEM;
|
1531 |
|
|
}
|
1532 |
|
|
|
1533 |
|
|
/* Check to ensure the stack will not grow into a hugetlb-only region */
|
1534 |
|
|
new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
|
1535 |
|
|
vma->vm_end - size;
|
1536 |
|
|
if (is_hugepage_only_range(vma->vm_mm, new_start, size))
|
1537 |
|
|
return -EFAULT;
|
1538 |
|
|
|
1539 |
|
|
/*
|
1540 |
|
|
* Overcommit.. This must be the final test, as it will
|
1541 |
|
|
* update security statistics.
|
1542 |
|
|
*/
|
1543 |
|
|
if (security_vm_enough_memory(grow))
|
1544 |
|
|
return -ENOMEM;
|
1545 |
|
|
|
1546 |
|
|
/* Ok, everything looks good - let it rip */
|
1547 |
|
|
mm->total_vm += grow;
|
1548 |
|
|
if (vma->vm_flags & VM_LOCKED)
|
1549 |
|
|
mm->locked_vm += grow;
|
1550 |
|
|
vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
|
1551 |
|
|
return 0;
|
1552 |
|
|
}
|
1553 |
|
|
|
1554 |
|
|
#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
|
1555 |
|
|
/*
|
1556 |
|
|
* PA-RISC uses this for its stack; IA64 for its Register Backing Store.
|
1557 |
|
|
* vma is the last one with address > vma->vm_end. Have to extend vma.
|
1558 |
|
|
*/
|
1559 |
|
|
#ifndef CONFIG_IA64
|
1560 |
|
|
static inline
|
1561 |
|
|
#endif
|
1562 |
|
|
int expand_upwards(struct vm_area_struct *vma, unsigned long address)
|
1563 |
|
|
{
|
1564 |
|
|
int error;
|
1565 |
|
|
|
1566 |
|
|
if (!(vma->vm_flags & VM_GROWSUP))
|
1567 |
|
|
return -EFAULT;
|
1568 |
|
|
|
1569 |
|
|
/*
|
1570 |
|
|
* We must make sure the anon_vma is allocated
|
1571 |
|
|
* so that the anon_vma locking is not a noop.
|
1572 |
|
|
*/
|
1573 |
|
|
if (unlikely(anon_vma_prepare(vma)))
|
1574 |
|
|
return -ENOMEM;
|
1575 |
|
|
anon_vma_lock(vma);
|
1576 |
|
|
|
1577 |
|
|
/*
|
1578 |
|
|
* vma->vm_start/vm_end cannot change under us because the caller
|
1579 |
|
|
* is required to hold the mmap_sem in read mode. We need the
|
1580 |
|
|
* anon_vma lock to serialize against concurrent expand_stacks.
|
1581 |
|
|
* Also guard against wrapping around to address 0.
|
1582 |
|
|
*/
|
1583 |
|
|
if (address < PAGE_ALIGN(address+4))
|
1584 |
|
|
address = PAGE_ALIGN(address+4);
|
1585 |
|
|
else {
|
1586 |
|
|
anon_vma_unlock(vma);
|
1587 |
|
|
return -ENOMEM;
|
1588 |
|
|
}
|
1589 |
|
|
error = 0;
|
1590 |
|
|
|
1591 |
|
|
/* Somebody else might have raced and expanded it already */
|
1592 |
|
|
if (address > vma->vm_end) {
|
1593 |
|
|
unsigned long size, grow;
|
1594 |
|
|
|
1595 |
|
|
size = address - vma->vm_start;
|
1596 |
|
|
grow = (address - vma->vm_end) >> PAGE_SHIFT;
|
1597 |
|
|
|
1598 |
|
|
error = acct_stack_growth(vma, size, grow);
|
1599 |
|
|
if (!error)
|
1600 |
|
|
vma->vm_end = address;
|
1601 |
|
|
}
|
1602 |
|
|
anon_vma_unlock(vma);
|
1603 |
|
|
return error;
|
1604 |
|
|
}
|
1605 |
|
|
#endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
|
1606 |
|
|
|
1607 |
|
|
/*
|
1608 |
|
|
* vma is the first one with address < vma->vm_start. Have to extend vma.
|
1609 |
|
|
*/
|
1610 |
|
|
static inline int expand_downwards(struct vm_area_struct *vma,
|
1611 |
|
|
unsigned long address)
|
1612 |
|
|
{
|
1613 |
|
|
int error;
|
1614 |
|
|
|
1615 |
|
|
/*
|
1616 |
|
|
* We must make sure the anon_vma is allocated
|
1617 |
|
|
* so that the anon_vma locking is not a noop.
|
1618 |
|
|
*/
|
1619 |
|
|
if (unlikely(anon_vma_prepare(vma)))
|
1620 |
|
|
return -ENOMEM;
|
1621 |
|
|
|
1622 |
|
|
address &= PAGE_MASK;
|
1623 |
|
|
error = security_file_mmap(0, 0, 0, 0, address, 1);
|
1624 |
|
|
if (error)
|
1625 |
|
|
return error;
|
1626 |
|
|
|
1627 |
|
|
anon_vma_lock(vma);
|
1628 |
|
|
|
1629 |
|
|
/*
|
1630 |
|
|
* vma->vm_start/vm_end cannot change under us because the caller
|
1631 |
|
|
* is required to hold the mmap_sem in read mode. We need the
|
1632 |
|
|
* anon_vma lock to serialize against concurrent expand_stacks.
|
1633 |
|
|
*/
|
1634 |
|
|
|
1635 |
|
|
/* Somebody else might have raced and expanded it already */
|
1636 |
|
|
if (address < vma->vm_start) {
|
1637 |
|
|
unsigned long size, grow;
|
1638 |
|
|
|
1639 |
|
|
size = vma->vm_end - address;
|
1640 |
|
|
grow = (vma->vm_start - address) >> PAGE_SHIFT;
|
1641 |
|
|
|
1642 |
|
|
error = acct_stack_growth(vma, size, grow);
|
1643 |
|
|
if (!error) {
|
1644 |
|
|
vma->vm_start = address;
|
1645 |
|
|
vma->vm_pgoff -= grow;
|
1646 |
|
|
}
|
1647 |
|
|
}
|
1648 |
|
|
anon_vma_unlock(vma);
|
1649 |
|
|
return error;
|
1650 |
|
|
}
|
1651 |
|
|
|
1652 |
|
|
int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address)
|
1653 |
|
|
{
|
1654 |
|
|
return expand_downwards(vma, address);
|
1655 |
|
|
}
|
1656 |
|
|
|
1657 |
|
|
#ifdef CONFIG_STACK_GROWSUP
|
1658 |
|
|
int expand_stack(struct vm_area_struct *vma, unsigned long address)
|
1659 |
|
|
{
|
1660 |
|
|
return expand_upwards(vma, address);
|
1661 |
|
|
}
|
1662 |
|
|
|
1663 |
|
|
struct vm_area_struct *
|
1664 |
|
|
find_extend_vma(struct mm_struct *mm, unsigned long addr)
|
1665 |
|
|
{
|
1666 |
|
|
struct vm_area_struct *vma, *prev;
|
1667 |
|
|
|
1668 |
|
|
addr &= PAGE_MASK;
|
1669 |
|
|
vma = find_vma_prev(mm, addr, &prev);
|
1670 |
|
|
if (vma && (vma->vm_start <= addr))
|
1671 |
|
|
return vma;
|
1672 |
|
|
if (!prev || expand_stack(prev, addr))
|
1673 |
|
|
return NULL;
|
1674 |
|
|
if (prev->vm_flags & VM_LOCKED)
|
1675 |
|
|
make_pages_present(addr, prev->vm_end);
|
1676 |
|
|
return prev;
|
1677 |
|
|
}
|
1678 |
|
|
#else
|
1679 |
|
|
int expand_stack(struct vm_area_struct *vma, unsigned long address)
|
1680 |
|
|
{
|
1681 |
|
|
return expand_downwards(vma, address);
|
1682 |
|
|
}
|
1683 |
|
|
|
1684 |
|
|
struct vm_area_struct *
|
1685 |
|
|
find_extend_vma(struct mm_struct * mm, unsigned long addr)
|
1686 |
|
|
{
|
1687 |
|
|
struct vm_area_struct * vma;
|
1688 |
|
|
unsigned long start;
|
1689 |
|
|
|
1690 |
|
|
addr &= PAGE_MASK;
|
1691 |
|
|
vma = find_vma(mm,addr);
|
1692 |
|
|
if (!vma)
|
1693 |
|
|
return NULL;
|
1694 |
|
|
if (vma->vm_start <= addr)
|
1695 |
|
|
return vma;
|
1696 |
|
|
if (!(vma->vm_flags & VM_GROWSDOWN))
|
1697 |
|
|
return NULL;
|
1698 |
|
|
start = vma->vm_start;
|
1699 |
|
|
if (expand_stack(vma, addr))
|
1700 |
|
|
return NULL;
|
1701 |
|
|
if (vma->vm_flags & VM_LOCKED)
|
1702 |
|
|
make_pages_present(addr, start);
|
1703 |
|
|
return vma;
|
1704 |
|
|
}
|
1705 |
|
|
#endif
|
1706 |
|
|
|
1707 |
|
|
/*
|
1708 |
|
|
* Ok - we have the memory areas we should free on the vma list,
|
1709 |
|
|
* so release them, and do the vma updates.
|
1710 |
|
|
*
|
1711 |
|
|
* Called with the mm semaphore held.
|
1712 |
|
|
*/
|
1713 |
|
|
static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
|
1714 |
|
|
{
|
1715 |
|
|
/* Update high watermark before we lower total_vm */
|
1716 |
|
|
update_hiwater_vm(mm);
|
1717 |
|
|
do {
|
1718 |
|
|
long nrpages = vma_pages(vma);
|
1719 |
|
|
|
1720 |
|
|
mm->total_vm -= nrpages;
|
1721 |
|
|
if (vma->vm_flags & VM_LOCKED)
|
1722 |
|
|
mm->locked_vm -= nrpages;
|
1723 |
|
|
vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
|
1724 |
|
|
vma = remove_vma(vma);
|
1725 |
|
|
} while (vma);
|
1726 |
|
|
validate_mm(mm);
|
1727 |
|
|
}
|
1728 |
|
|
|
1729 |
|
|
/*
|
1730 |
|
|
* Get rid of page table information in the indicated region.
|
1731 |
|
|
*
|
1732 |
|
|
* Called with the mm semaphore held.
|
1733 |
|
|
*/
|
1734 |
|
|
static void unmap_region(struct mm_struct *mm,
|
1735 |
|
|
struct vm_area_struct *vma, struct vm_area_struct *prev,
|
1736 |
|
|
unsigned long start, unsigned long end)
|
1737 |
|
|
{
|
1738 |
|
|
struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
|
1739 |
|
|
struct mmu_gather *tlb;
|
1740 |
|
|
unsigned long nr_accounted = 0;
|
1741 |
|
|
|
1742 |
|
|
lru_add_drain();
|
1743 |
|
|
tlb = tlb_gather_mmu(mm, 0);
|
1744 |
|
|
update_hiwater_rss(mm);
|
1745 |
|
|
unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
|
1746 |
|
|
vm_unacct_memory(nr_accounted);
|
1747 |
|
|
free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
|
1748 |
|
|
next? next->vm_start: 0);
|
1749 |
|
|
tlb_finish_mmu(tlb, start, end);
|
1750 |
|
|
}
|
1751 |
|
|
|
1752 |
|
|
/*
|
1753 |
|
|
* Create a list of vma's touched by the unmap, removing them from the mm's
|
1754 |
|
|
* vma list as we go..
|
1755 |
|
|
*/
|
1756 |
|
|
static void
|
1757 |
|
|
detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
|
1758 |
|
|
struct vm_area_struct *prev, unsigned long end)
|
1759 |
|
|
{
|
1760 |
|
|
struct vm_area_struct **insertion_point;
|
1761 |
|
|
struct vm_area_struct *tail_vma = NULL;
|
1762 |
|
|
unsigned long addr;
|
1763 |
|
|
|
1764 |
|
|
insertion_point = (prev ? &prev->vm_next : &mm->mmap);
|
1765 |
|
|
do {
|
1766 |
|
|
rb_erase(&vma->vm_rb, &mm->mm_rb);
|
1767 |
|
|
mm->map_count--;
|
1768 |
|
|
tail_vma = vma;
|
1769 |
|
|
vma = vma->vm_next;
|
1770 |
|
|
} while (vma && vma->vm_start < end);
|
1771 |
|
|
*insertion_point = vma;
|
1772 |
|
|
tail_vma->vm_next = NULL;
|
1773 |
|
|
if (mm->unmap_area == arch_unmap_area)
|
1774 |
|
|
addr = prev ? prev->vm_end : mm->mmap_base;
|
1775 |
|
|
else
|
1776 |
|
|
addr = vma ? vma->vm_start : mm->mmap_base;
|
1777 |
|
|
mm->unmap_area(mm, addr);
|
1778 |
|
|
mm->mmap_cache = NULL; /* Kill the cache. */
|
1779 |
|
|
}
|
1780 |
|
|
|
1781 |
|
|
/*
|
1782 |
|
|
* Split a vma into two pieces at address 'addr', a new vma is allocated
|
1783 |
|
|
* either for the first part or the tail.
|
1784 |
|
|
*/
|
1785 |
|
|
int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
|
1786 |
|
|
unsigned long addr, int new_below)
|
1787 |
|
|
{
|
1788 |
|
|
struct mempolicy *pol;
|
1789 |
|
|
struct vm_area_struct *new;
|
1790 |
|
|
|
1791 |
|
|
if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK))
|
1792 |
|
|
return -EINVAL;
|
1793 |
|
|
|
1794 |
|
|
if (mm->map_count >= sysctl_max_map_count)
|
1795 |
|
|
return -ENOMEM;
|
1796 |
|
|
|
1797 |
|
|
new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
|
1798 |
|
|
if (!new)
|
1799 |
|
|
return -ENOMEM;
|
1800 |
|
|
|
1801 |
|
|
/* most fields are the same, copy all, and then fixup */
|
1802 |
|
|
*new = *vma;
|
1803 |
|
|
|
1804 |
|
|
if (new_below)
|
1805 |
|
|
new->vm_end = addr;
|
1806 |
|
|
else {
|
1807 |
|
|
new->vm_start = addr;
|
1808 |
|
|
new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
|
1809 |
|
|
}
|
1810 |
|
|
|
1811 |
|
|
pol = mpol_copy(vma_policy(vma));
|
1812 |
|
|
if (IS_ERR(pol)) {
|
1813 |
|
|
kmem_cache_free(vm_area_cachep, new);
|
1814 |
|
|
return PTR_ERR(pol);
|
1815 |
|
|
}
|
1816 |
|
|
vma_set_policy(new, pol);
|
1817 |
|
|
|
1818 |
|
|
if (new->vm_file)
|
1819 |
|
|
get_file(new->vm_file);
|
1820 |
|
|
|
1821 |
|
|
if (new->vm_ops && new->vm_ops->open)
|
1822 |
|
|
new->vm_ops->open(new);
|
1823 |
|
|
|
1824 |
|
|
if (new_below)
|
1825 |
|
|
vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
|
1826 |
|
|
((addr - new->vm_start) >> PAGE_SHIFT), new);
|
1827 |
|
|
else
|
1828 |
|
|
vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
|
1829 |
|
|
|
1830 |
|
|
return 0;
|
1831 |
|
|
}
|
1832 |
|
|
|
1833 |
|
|
/* Munmap is split into 2 main parts -- this part which finds
|
1834 |
|
|
* what needs doing, and the areas themselves, which do the
|
1835 |
|
|
* work. This now handles partial unmappings.
|
1836 |
|
|
* Jeremy Fitzhardinge <jeremy@goop.org>
|
1837 |
|
|
*/
|
1838 |
|
|
int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
|
1839 |
|
|
{
|
1840 |
|
|
unsigned long end;
|
1841 |
|
|
struct vm_area_struct *vma, *prev, *last;
|
1842 |
|
|
|
1843 |
|
|
if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
|
1844 |
|
|
return -EINVAL;
|
1845 |
|
|
|
1846 |
|
|
if ((len = PAGE_ALIGN(len)) == 0)
|
1847 |
|
|
return -EINVAL;
|
1848 |
|
|
|
1849 |
|
|
/* Find the first overlapping VMA */
|
1850 |
|
|
vma = find_vma_prev(mm, start, &prev);
|
1851 |
|
|
if (!vma)
|
1852 |
|
|
return 0;
|
1853 |
|
|
/* we have start < vma->vm_end */
|
1854 |
|
|
|
1855 |
|
|
/* if it doesn't overlap, we have nothing.. */
|
1856 |
|
|
end = start + len;
|
1857 |
|
|
if (vma->vm_start >= end)
|
1858 |
|
|
return 0;
|
1859 |
|
|
|
1860 |
|
|
/*
|
1861 |
|
|
* If we need to split any vma, do it now to save pain later.
|
1862 |
|
|
*
|
1863 |
|
|
* Note: mremap's move_vma VM_ACCOUNT handling assumes a partially
|
1864 |
|
|
* unmapped vm_area_struct will remain in use: so lower split_vma
|
1865 |
|
|
* places tmp vma above, and higher split_vma places tmp vma below.
|
1866 |
|
|
*/
|
1867 |
|
|
if (start > vma->vm_start) {
|
1868 |
|
|
int error = split_vma(mm, vma, start, 0);
|
1869 |
|
|
if (error)
|
1870 |
|
|
return error;
|
1871 |
|
|
prev = vma;
|
1872 |
|
|
}
|
1873 |
|
|
|
1874 |
|
|
/* Does it split the last one? */
|
1875 |
|
|
last = find_vma(mm, end);
|
1876 |
|
|
if (last && end > last->vm_start) {
|
1877 |
|
|
int error = split_vma(mm, last, end, 1);
|
1878 |
|
|
if (error)
|
1879 |
|
|
return error;
|
1880 |
|
|
}
|
1881 |
|
|
vma = prev? prev->vm_next: mm->mmap;
|
1882 |
|
|
|
1883 |
|
|
/*
|
1884 |
|
|
* Remove the vma's, and unmap the actual pages
|
1885 |
|
|
*/
|
1886 |
|
|
detach_vmas_to_be_unmapped(mm, vma, prev, end);
|
1887 |
|
|
unmap_region(mm, vma, prev, start, end);
|
1888 |
|
|
|
1889 |
|
|
/* Fix up all other VM information */
|
1890 |
|
|
remove_vma_list(mm, vma);
|
1891 |
|
|
|
1892 |
|
|
return 0;
|
1893 |
|
|
}
|
1894 |
|
|
|
1895 |
|
|
EXPORT_SYMBOL(do_munmap);
|
1896 |
|
|
|
1897 |
|
|
asmlinkage long sys_munmap(unsigned long addr, size_t len)
|
1898 |
|
|
{
|
1899 |
|
|
int ret;
|
1900 |
|
|
struct mm_struct *mm = current->mm;
|
1901 |
|
|
|
1902 |
|
|
profile_munmap(addr);
|
1903 |
|
|
|
1904 |
|
|
down_write(&mm->mmap_sem);
|
1905 |
|
|
ret = do_munmap(mm, addr, len);
|
1906 |
|
|
up_write(&mm->mmap_sem);
|
1907 |
|
|
return ret;
|
1908 |
|
|
}
|
1909 |
|
|
|
1910 |
|
|
static inline void verify_mm_writelocked(struct mm_struct *mm)
|
1911 |
|
|
{
|
1912 |
|
|
#ifdef CONFIG_DEBUG_VM
|
1913 |
|
|
if (unlikely(down_read_trylock(&mm->mmap_sem))) {
|
1914 |
|
|
WARN_ON(1);
|
1915 |
|
|
up_read(&mm->mmap_sem);
|
1916 |
|
|
}
|
1917 |
|
|
#endif
|
1918 |
|
|
}
|
1919 |
|
|
|
1920 |
|
|
/*
|
1921 |
|
|
* this is really a simplified "do_mmap". it only handles
|
1922 |
|
|
* anonymous maps. eventually we may be able to do some
|
1923 |
|
|
* brk-specific accounting here.
|
1924 |
|
|
*/
|
1925 |
|
|
unsigned long do_brk(unsigned long addr, unsigned long len)
|
1926 |
|
|
{
|
1927 |
|
|
struct mm_struct * mm = current->mm;
|
1928 |
|
|
struct vm_area_struct * vma, * prev;
|
1929 |
|
|
unsigned long flags;
|
1930 |
|
|
struct rb_node ** rb_link, * rb_parent;
|
1931 |
|
|
pgoff_t pgoff = addr >> PAGE_SHIFT;
|
1932 |
|
|
int error;
|
1933 |
|
|
|
1934 |
|
|
len = PAGE_ALIGN(len);
|
1935 |
|
|
if (!len)
|
1936 |
|
|
return addr;
|
1937 |
|
|
|
1938 |
|
|
if ((addr + len) > TASK_SIZE || (addr + len) < addr)
|
1939 |
|
|
return -EINVAL;
|
1940 |
|
|
|
1941 |
|
|
if (is_hugepage_only_range(mm, addr, len))
|
1942 |
|
|
return -EINVAL;
|
1943 |
|
|
|
1944 |
|
|
error = security_file_mmap(0, 0, 0, 0, addr, 1);
|
1945 |
|
|
if (error)
|
1946 |
|
|
return error;
|
1947 |
|
|
|
1948 |
|
|
flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
|
1949 |
|
|
|
1950 |
|
|
error = arch_mmap_check(addr, len, flags);
|
1951 |
|
|
if (error)
|
1952 |
|
|
return error;
|
1953 |
|
|
|
1954 |
|
|
/*
|
1955 |
|
|
* mlock MCL_FUTURE?
|
1956 |
|
|
*/
|
1957 |
|
|
if (mm->def_flags & VM_LOCKED) {
|
1958 |
|
|
unsigned long locked, lock_limit;
|
1959 |
|
|
locked = len >> PAGE_SHIFT;
|
1960 |
|
|
locked += mm->locked_vm;
|
1961 |
|
|
lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
|
1962 |
|
|
lock_limit >>= PAGE_SHIFT;
|
1963 |
|
|
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
|
1964 |
|
|
return -EAGAIN;
|
1965 |
|
|
}
|
1966 |
|
|
|
1967 |
|
|
/*
|
1968 |
|
|
* mm->mmap_sem is required to protect against another thread
|
1969 |
|
|
* changing the mappings in case we sleep.
|
1970 |
|
|
*/
|
1971 |
|
|
verify_mm_writelocked(mm);
|
1972 |
|
|
|
1973 |
|
|
/*
|
1974 |
|
|
* Clear old maps. this also does some error checking for us
|
1975 |
|
|
*/
|
1976 |
|
|
munmap_back:
|
1977 |
|
|
vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
|
1978 |
|
|
if (vma && vma->vm_start < addr + len) {
|
1979 |
|
|
if (do_munmap(mm, addr, len))
|
1980 |
|
|
return -ENOMEM;
|
1981 |
|
|
goto munmap_back;
|
1982 |
|
|
}
|
1983 |
|
|
|
1984 |
|
|
/* Check against address space limits *after* clearing old maps... */
|
1985 |
|
|
if (!may_expand_vm(mm, len >> PAGE_SHIFT))
|
1986 |
|
|
return -ENOMEM;
|
1987 |
|
|
|
1988 |
|
|
if (mm->map_count > sysctl_max_map_count)
|
1989 |
|
|
return -ENOMEM;
|
1990 |
|
|
|
1991 |
|
|
if (security_vm_enough_memory(len >> PAGE_SHIFT))
|
1992 |
|
|
return -ENOMEM;
|
1993 |
|
|
|
1994 |
|
|
/* Can we just expand an old private anonymous mapping? */
|
1995 |
|
|
if (vma_merge(mm, prev, addr, addr + len, flags,
|
1996 |
|
|
NULL, NULL, pgoff, NULL))
|
1997 |
|
|
goto out;
|
1998 |
|
|
|
1999 |
|
|
/*
|
2000 |
|
|
* create a vma struct for an anonymous mapping
|
2001 |
|
|
*/
|
2002 |
|
|
vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
|
2003 |
|
|
if (!vma) {
|
2004 |
|
|
vm_unacct_memory(len >> PAGE_SHIFT);
|
2005 |
|
|
return -ENOMEM;
|
2006 |
|
|
}
|
2007 |
|
|
|
2008 |
|
|
vma->vm_mm = mm;
|
2009 |
|
|
vma->vm_start = addr;
|
2010 |
|
|
vma->vm_end = addr + len;
|
2011 |
|
|
vma->vm_pgoff = pgoff;
|
2012 |
|
|
vma->vm_flags = flags;
|
2013 |
|
|
vma->vm_page_prot = vm_get_page_prot(flags);
|
2014 |
|
|
vma_link(mm, vma, prev, rb_link, rb_parent);
|
2015 |
|
|
out:
|
2016 |
|
|
mm->total_vm += len >> PAGE_SHIFT;
|
2017 |
|
|
if (flags & VM_LOCKED) {
|
2018 |
|
|
mm->locked_vm += len >> PAGE_SHIFT;
|
2019 |
|
|
make_pages_present(addr, addr + len);
|
2020 |
|
|
}
|
2021 |
|
|
return addr;
|
2022 |
|
|
}
|
2023 |
|
|
|
2024 |
|
|
EXPORT_SYMBOL(do_brk);
|
2025 |
|
|
|
2026 |
|
|
/* Release all mmaps. */
|
2027 |
|
|
void exit_mmap(struct mm_struct *mm)
|
2028 |
|
|
{
|
2029 |
|
|
struct mmu_gather *tlb;
|
2030 |
|
|
struct vm_area_struct *vma = mm->mmap;
|
2031 |
|
|
unsigned long nr_accounted = 0;
|
2032 |
|
|
unsigned long end;
|
2033 |
|
|
|
2034 |
|
|
/* mm's last user has gone, and its about to be pulled down */
|
2035 |
|
|
arch_exit_mmap(mm);
|
2036 |
|
|
|
2037 |
|
|
lru_add_drain();
|
2038 |
|
|
flush_cache_mm(mm);
|
2039 |
|
|
tlb = tlb_gather_mmu(mm, 1);
|
2040 |
|
|
/* Don't update_hiwater_rss(mm) here, do_exit already did */
|
2041 |
|
|
/* Use -1 here to ensure all VMAs in the mm are unmapped */
|
2042 |
|
|
end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
|
2043 |
|
|
vm_unacct_memory(nr_accounted);
|
2044 |
|
|
free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
|
2045 |
|
|
tlb_finish_mmu(tlb, 0, end);
|
2046 |
|
|
|
2047 |
|
|
/*
|
2048 |
|
|
* Walk the list again, actually closing and freeing it,
|
2049 |
|
|
* with preemption enabled, without holding any MM locks.
|
2050 |
|
|
*/
|
2051 |
|
|
while (vma)
|
2052 |
|
|
vma = remove_vma(vma);
|
2053 |
|
|
|
2054 |
|
|
BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
|
2055 |
|
|
}
|
2056 |
|
|
|
2057 |
|
|
/* Insert vm structure into process list sorted by address
|
2058 |
|
|
* and into the inode's i_mmap tree. If vm_file is non-NULL
|
2059 |
|
|
* then i_mmap_lock is taken here.
|
2060 |
|
|
*/
|
2061 |
|
|
int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
|
2062 |
|
|
{
|
2063 |
|
|
struct vm_area_struct * __vma, * prev;
|
2064 |
|
|
struct rb_node ** rb_link, * rb_parent;
|
2065 |
|
|
|
2066 |
|
|
/*
|
2067 |
|
|
* The vm_pgoff of a purely anonymous vma should be irrelevant
|
2068 |
|
|
* until its first write fault, when page's anon_vma and index
|
2069 |
|
|
* are set. But now set the vm_pgoff it will almost certainly
|
2070 |
|
|
* end up with (unless mremap moves it elsewhere before that
|
2071 |
|
|
* first wfault), so /proc/pid/maps tells a consistent story.
|
2072 |
|
|
*
|
2073 |
|
|
* By setting it to reflect the virtual start address of the
|
2074 |
|
|
* vma, merges and splits can happen in a seamless way, just
|
2075 |
|
|
* using the existing file pgoff checks and manipulations.
|
2076 |
|
|
* Similarly in do_mmap_pgoff and in do_brk.
|
2077 |
|
|
*/
|
2078 |
|
|
if (!vma->vm_file) {
|
2079 |
|
|
BUG_ON(vma->anon_vma);
|
2080 |
|
|
vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
|
2081 |
|
|
}
|
2082 |
|
|
__vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
|
2083 |
|
|
if (__vma && __vma->vm_start < vma->vm_end)
|
2084 |
|
|
return -ENOMEM;
|
2085 |
|
|
if ((vma->vm_flags & VM_ACCOUNT) &&
|
2086 |
|
|
security_vm_enough_memory_mm(mm, vma_pages(vma)))
|
2087 |
|
|
return -ENOMEM;
|
2088 |
|
|
vma_link(mm, vma, prev, rb_link, rb_parent);
|
2089 |
|
|
return 0;
|
2090 |
|
|
}
|
2091 |
|
|
|
2092 |
|
|
/*
|
2093 |
|
|
* Copy the vma structure to a new location in the same mm,
|
2094 |
|
|
* prior to moving page table entries, to effect an mremap move.
|
2095 |
|
|
*/
|
2096 |
|
|
struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
|
2097 |
|
|
unsigned long addr, unsigned long len, pgoff_t pgoff)
|
2098 |
|
|
{
|
2099 |
|
|
struct vm_area_struct *vma = *vmap;
|
2100 |
|
|
unsigned long vma_start = vma->vm_start;
|
2101 |
|
|
struct mm_struct *mm = vma->vm_mm;
|
2102 |
|
|
struct vm_area_struct *new_vma, *prev;
|
2103 |
|
|
struct rb_node **rb_link, *rb_parent;
|
2104 |
|
|
struct mempolicy *pol;
|
2105 |
|
|
|
2106 |
|
|
/*
|
2107 |
|
|
* If anonymous vma has not yet been faulted, update new pgoff
|
2108 |
|
|
* to match new location, to increase its chance of merging.
|
2109 |
|
|
*/
|
2110 |
|
|
if (!vma->vm_file && !vma->anon_vma)
|
2111 |
|
|
pgoff = addr >> PAGE_SHIFT;
|
2112 |
|
|
|
2113 |
|
|
find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
|
2114 |
|
|
new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
|
2115 |
|
|
vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
|
2116 |
|
|
if (new_vma) {
|
2117 |
|
|
/*
|
2118 |
|
|
* Source vma may have been merged into new_vma
|
2119 |
|
|
*/
|
2120 |
|
|
if (vma_start >= new_vma->vm_start &&
|
2121 |
|
|
vma_start < new_vma->vm_end)
|
2122 |
|
|
*vmap = new_vma;
|
2123 |
|
|
} else {
|
2124 |
|
|
new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
|
2125 |
|
|
if (new_vma) {
|
2126 |
|
|
*new_vma = *vma;
|
2127 |
|
|
pol = mpol_copy(vma_policy(vma));
|
2128 |
|
|
if (IS_ERR(pol)) {
|
2129 |
|
|
kmem_cache_free(vm_area_cachep, new_vma);
|
2130 |
|
|
return NULL;
|
2131 |
|
|
}
|
2132 |
|
|
vma_set_policy(new_vma, pol);
|
2133 |
|
|
new_vma->vm_start = addr;
|
2134 |
|
|
new_vma->vm_end = addr + len;
|
2135 |
|
|
new_vma->vm_pgoff = pgoff;
|
2136 |
|
|
if (new_vma->vm_file)
|
2137 |
|
|
get_file(new_vma->vm_file);
|
2138 |
|
|
if (new_vma->vm_ops && new_vma->vm_ops->open)
|
2139 |
|
|
new_vma->vm_ops->open(new_vma);
|
2140 |
|
|
vma_link(mm, new_vma, prev, rb_link, rb_parent);
|
2141 |
|
|
}
|
2142 |
|
|
}
|
2143 |
|
|
return new_vma;
|
2144 |
|
|
}
|
2145 |
|
|
|
2146 |
|
|
/*
|
2147 |
|
|
* Return true if the calling process may expand its vm space by the passed
|
2148 |
|
|
* number of pages
|
2149 |
|
|
*/
|
2150 |
|
|
int may_expand_vm(struct mm_struct *mm, unsigned long npages)
|
2151 |
|
|
{
|
2152 |
|
|
unsigned long cur = mm->total_vm; /* pages */
|
2153 |
|
|
unsigned long lim;
|
2154 |
|
|
|
2155 |
|
|
lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
|
2156 |
|
|
|
2157 |
|
|
if (cur + npages > lim)
|
2158 |
|
|
return 0;
|
2159 |
|
|
return 1;
|
2160 |
|
|
}
|
2161 |
|
|
|
2162 |
|
|
|
2163 |
|
|
static struct page *special_mapping_nopage(struct vm_area_struct *vma,
|
2164 |
|
|
unsigned long address, int *type)
|
2165 |
|
|
{
|
2166 |
|
|
struct page **pages;
|
2167 |
|
|
|
2168 |
|
|
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
|
2169 |
|
|
|
2170 |
|
|
address -= vma->vm_start;
|
2171 |
|
|
for (pages = vma->vm_private_data; address > 0 && *pages; ++pages)
|
2172 |
|
|
address -= PAGE_SIZE;
|
2173 |
|
|
|
2174 |
|
|
if (*pages) {
|
2175 |
|
|
struct page *page = *pages;
|
2176 |
|
|
get_page(page);
|
2177 |
|
|
return page;
|
2178 |
|
|
}
|
2179 |
|
|
|
2180 |
|
|
return NOPAGE_SIGBUS;
|
2181 |
|
|
}
|
2182 |
|
|
|
2183 |
|
|
/*
|
2184 |
|
|
* Having a close hook prevents vma merging regardless of flags.
|
2185 |
|
|
*/
|
2186 |
|
|
static void special_mapping_close(struct vm_area_struct *vma)
|
2187 |
|
|
{
|
2188 |
|
|
}
|
2189 |
|
|
|
2190 |
|
|
static struct vm_operations_struct special_mapping_vmops = {
|
2191 |
|
|
.close = special_mapping_close,
|
2192 |
|
|
.nopage = special_mapping_nopage,
|
2193 |
|
|
};
|
2194 |
|
|
|
2195 |
|
|
/*
|
2196 |
|
|
* Called with mm->mmap_sem held for writing.
|
2197 |
|
|
* Insert a new vma covering the given region, with the given flags.
|
2198 |
|
|
* Its pages are supplied by the given array of struct page *.
|
2199 |
|
|
* The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
|
2200 |
|
|
* The region past the last page supplied will always produce SIGBUS.
|
2201 |
|
|
* The array pointer and the pages it points to are assumed to stay alive
|
2202 |
|
|
* for as long as this mapping might exist.
|
2203 |
|
|
*/
|
2204 |
|
|
int install_special_mapping(struct mm_struct *mm,
|
2205 |
|
|
unsigned long addr, unsigned long len,
|
2206 |
|
|
unsigned long vm_flags, struct page **pages)
|
2207 |
|
|
{
|
2208 |
|
|
struct vm_area_struct *vma;
|
2209 |
|
|
|
2210 |
|
|
vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
|
2211 |
|
|
if (unlikely(vma == NULL))
|
2212 |
|
|
return -ENOMEM;
|
2213 |
|
|
|
2214 |
|
|
vma->vm_mm = mm;
|
2215 |
|
|
vma->vm_start = addr;
|
2216 |
|
|
vma->vm_end = addr + len;
|
2217 |
|
|
|
2218 |
|
|
vma->vm_flags = vm_flags | mm->def_flags;
|
2219 |
|
|
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
|
2220 |
|
|
|
2221 |
|
|
vma->vm_ops = &special_mapping_vmops;
|
2222 |
|
|
vma->vm_private_data = pages;
|
2223 |
|
|
|
2224 |
|
|
if (unlikely(insert_vm_struct(mm, vma))) {
|
2225 |
|
|
kmem_cache_free(vm_area_cachep, vma);
|
2226 |
|
|
return -ENOMEM;
|
2227 |
|
|
}
|
2228 |
|
|
|
2229 |
|
|
mm->total_vm += len >> PAGE_SHIFT;
|
2230 |
|
|
|
2231 |
|
|
return 0;
|
2232 |
|
|
}
|