| 1 |
1275 |
phoenix |
/*
|
| 2 |
|
|
* linux/mm/mmap.c
|
| 3 |
|
|
*
|
| 4 |
|
|
* Written by obz.
|
| 5 |
|
|
*/
|
| 6 |
|
|
#include <linux/slab.h>
|
| 7 |
|
|
#include <linux/shm.h>
|
| 8 |
|
|
#include <linux/mman.h>
|
| 9 |
|
|
#include <linux/pagemap.h>
|
| 10 |
|
|
#include <linux/swap.h>
|
| 11 |
|
|
#include <linux/swapctl.h>
|
| 12 |
|
|
#include <linux/smp_lock.h>
|
| 13 |
|
|
#include <linux/init.h>
|
| 14 |
|
|
#include <linux/file.h>
|
| 15 |
|
|
#include <linux/fs.h>
|
| 16 |
|
|
#include <linux/personality.h>
|
| 17 |
|
|
#include <linux/mount.h>
|
| 18 |
|
|
|
| 19 |
|
|
#include <asm/uaccess.h>
|
| 20 |
|
|
#include <asm/pgalloc.h>
|
| 21 |
|
|
|
| 22 |
|
|
/*
|
| 23 |
|
|
* WARNING: the debugging will use recursive algorithms so never enable this
|
| 24 |
|
|
* unless you know what you are doing.
|
| 25 |
|
|
*/
|
| 26 |
|
|
#undef DEBUG_MM_RB
|
| 27 |
|
|
|
| 28 |
|
|
/* description of effects of mapping type and prot in current implementation.
|
| 29 |
|
|
* this is due to the limited x86 page protection hardware. The expected
|
| 30 |
|
|
* behavior is in parens:
|
| 31 |
|
|
*
|
| 32 |
|
|
* map_type prot
|
| 33 |
|
|
* PROT_NONE PROT_READ PROT_WRITE PROT_EXEC
|
| 34 |
|
|
* MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes
|
| 35 |
|
|
* w: (no) no w: (no) no w: (yes) yes w: (no) no
|
| 36 |
|
|
* x: (no) no x: (no) yes x: (no) yes x: (yes) yes
|
| 37 |
|
|
*
|
| 38 |
|
|
* MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes
|
| 39 |
|
|
* w: (no) no w: (no) no w: (copy) copy w: (no) no
|
| 40 |
|
|
* x: (no) no x: (no) yes x: (no) yes x: (yes) yes
|
| 41 |
|
|
*
|
| 42 |
|
|
*/
|
| 43 |
|
|
pgprot_t protection_map[16] = {
|
| 44 |
|
|
__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
|
| 45 |
|
|
__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
|
| 46 |
|
|
};
|
| 47 |
|
|
|
| 48 |
|
|
int sysctl_overcommit_memory;
|
| 49 |
|
|
int max_map_count = DEFAULT_MAX_MAP_COUNT;
|
| 50 |
|
|
|
| 51 |
|
|
/* Check that a process has enough memory to allocate a
|
| 52 |
|
|
* new virtual mapping.
|
| 53 |
|
|
*/
|
| 54 |
|
|
int vm_enough_memory(long pages)
|
| 55 |
|
|
{
|
| 56 |
|
|
/* Stupid algorithm to decide if we have enough memory: while
|
| 57 |
|
|
* simple, it hopefully works in most obvious cases.. Easy to
|
| 58 |
|
|
* fool it, but this should catch most mistakes.
|
| 59 |
|
|
*/
|
| 60 |
|
|
/* 23/11/98 NJC: Somewhat less stupid version of algorithm,
|
| 61 |
|
|
* which tries to do "TheRightThing". Instead of using half of
|
| 62 |
|
|
* (buffers+cache), use the minimum values. Allow an extra 2%
|
| 63 |
|
|
* of num_physpages for safety margin.
|
| 64 |
|
|
*/
|
| 65 |
|
|
|
| 66 |
|
|
unsigned long free;
|
| 67 |
|
|
|
| 68 |
|
|
/* Sometimes we want to use more memory than we have. */
|
| 69 |
|
|
if (sysctl_overcommit_memory)
|
| 70 |
|
|
return 1;
|
| 71 |
|
|
|
| 72 |
|
|
/* The page cache contains buffer pages these days.. */
|
| 73 |
|
|
free = page_cache_size;
|
| 74 |
|
|
free += nr_free_pages();
|
| 75 |
|
|
free += nr_swap_pages;
|
| 76 |
|
|
|
| 77 |
|
|
/*
|
| 78 |
|
|
* This double-counts: the nrpages are both in the page-cache
|
| 79 |
|
|
* and in the swapper space. At the same time, this compensates
|
| 80 |
|
|
* for the swap-space over-allocation (ie "nr_swap_pages" being
|
| 81 |
|
|
* too small.
|
| 82 |
|
|
*/
|
| 83 |
|
|
free += swapper_space.nrpages;
|
| 84 |
|
|
|
| 85 |
|
|
/*
|
| 86 |
|
|
* The code below doesn't account for free space in the inode
|
| 87 |
|
|
* and dentry slab cache, slab cache fragmentation, inodes and
|
| 88 |
|
|
* dentries which will become freeable under VM load, etc.
|
| 89 |
|
|
* Lets just hope all these (complex) factors balance out...
|
| 90 |
|
|
*/
|
| 91 |
|
|
free += (dentry_stat.nr_unused * sizeof(struct dentry)) >> PAGE_SHIFT;
|
| 92 |
|
|
free += (inodes_stat.nr_unused * sizeof(struct inode)) >> PAGE_SHIFT;
|
| 93 |
|
|
|
| 94 |
|
|
return free > pages;
|
| 95 |
|
|
}
|
| 96 |
|
|
|
| 97 |
|
|
/* Remove one vm structure from the inode's i_mapping address space. */
|
| 98 |
|
|
static inline void __remove_shared_vm_struct(struct vm_area_struct *vma)
|
| 99 |
|
|
{
|
| 100 |
|
|
struct file * file = vma->vm_file;
|
| 101 |
|
|
|
| 102 |
|
|
if (file) {
|
| 103 |
|
|
struct inode *inode = file->f_dentry->d_inode;
|
| 104 |
|
|
if (vma->vm_flags & VM_DENYWRITE)
|
| 105 |
|
|
atomic_inc(&inode->i_writecount);
|
| 106 |
|
|
if(vma->vm_next_share)
|
| 107 |
|
|
vma->vm_next_share->vm_pprev_share = vma->vm_pprev_share;
|
| 108 |
|
|
*vma->vm_pprev_share = vma->vm_next_share;
|
| 109 |
|
|
}
|
| 110 |
|
|
}
|
| 111 |
|
|
|
| 112 |
|
|
static inline void remove_shared_vm_struct(struct vm_area_struct *vma)
|
| 113 |
|
|
{
|
| 114 |
|
|
lock_vma_mappings(vma);
|
| 115 |
|
|
__remove_shared_vm_struct(vma);
|
| 116 |
|
|
unlock_vma_mappings(vma);
|
| 117 |
|
|
}
|
| 118 |
|
|
|
| 119 |
|
|
void lock_vma_mappings(struct vm_area_struct *vma)
|
| 120 |
|
|
{
|
| 121 |
|
|
struct address_space *mapping;
|
| 122 |
|
|
|
| 123 |
|
|
mapping = NULL;
|
| 124 |
|
|
if (vma->vm_file)
|
| 125 |
|
|
mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
|
| 126 |
|
|
if (mapping)
|
| 127 |
|
|
spin_lock(&mapping->i_shared_lock);
|
| 128 |
|
|
}
|
| 129 |
|
|
|
| 130 |
|
|
void unlock_vma_mappings(struct vm_area_struct *vma)
|
| 131 |
|
|
{
|
| 132 |
|
|
struct address_space *mapping;
|
| 133 |
|
|
|
| 134 |
|
|
mapping = NULL;
|
| 135 |
|
|
if (vma->vm_file)
|
| 136 |
|
|
mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
|
| 137 |
|
|
if (mapping)
|
| 138 |
|
|
spin_unlock(&mapping->i_shared_lock);
|
| 139 |
|
|
}
|
| 140 |
|
|
|
| 141 |
|
|
/*
|
| 142 |
|
|
* sys_brk() for the most part doesn't need the global kernel
|
| 143 |
|
|
* lock, except when an application is doing something nasty
|
| 144 |
|
|
* like trying to un-brk an area that has already been mapped
|
| 145 |
|
|
* to a regular file. in this case, the unmapping will need
|
| 146 |
|
|
* to invoke file system routines that need the global lock.
|
| 147 |
|
|
*/
|
| 148 |
|
|
asmlinkage unsigned long sys_brk(unsigned long brk)
|
| 149 |
|
|
{
|
| 150 |
|
|
unsigned long rlim, retval;
|
| 151 |
|
|
unsigned long newbrk, oldbrk;
|
| 152 |
|
|
struct mm_struct *mm = current->mm;
|
| 153 |
|
|
|
| 154 |
|
|
down_write(&mm->mmap_sem);
|
| 155 |
|
|
|
| 156 |
|
|
if (brk < mm->end_code)
|
| 157 |
|
|
goto out;
|
| 158 |
|
|
newbrk = PAGE_ALIGN(brk);
|
| 159 |
|
|
oldbrk = PAGE_ALIGN(mm->brk);
|
| 160 |
|
|
if (oldbrk == newbrk)
|
| 161 |
|
|
goto set_brk;
|
| 162 |
|
|
|
| 163 |
|
|
/* Always allow shrinking brk. */
|
| 164 |
|
|
if (brk <= mm->brk) {
|
| 165 |
|
|
if (!do_munmap(mm, newbrk, oldbrk-newbrk))
|
| 166 |
|
|
goto set_brk;
|
| 167 |
|
|
goto out;
|
| 168 |
|
|
}
|
| 169 |
|
|
|
| 170 |
|
|
/* Check against rlimit.. */
|
| 171 |
|
|
rlim = current->rlim[RLIMIT_DATA].rlim_cur;
|
| 172 |
|
|
if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
|
| 173 |
|
|
goto out;
|
| 174 |
|
|
|
| 175 |
|
|
/* Check against existing mmap mappings. */
|
| 176 |
|
|
if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
|
| 177 |
|
|
goto out;
|
| 178 |
|
|
|
| 179 |
|
|
/* Check if we have enough memory.. */
|
| 180 |
|
|
if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT))
|
| 181 |
|
|
goto out;
|
| 182 |
|
|
|
| 183 |
|
|
/* Ok, looks good - let it rip. */
|
| 184 |
|
|
if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
|
| 185 |
|
|
goto out;
|
| 186 |
|
|
set_brk:
|
| 187 |
|
|
mm->brk = brk;
|
| 188 |
|
|
out:
|
| 189 |
|
|
retval = mm->brk;
|
| 190 |
|
|
up_write(&mm->mmap_sem);
|
| 191 |
|
|
return retval;
|
| 192 |
|
|
}
|
| 193 |
|
|
|
| 194 |
|
|
/* Combine the mmap "prot" and "flags" argument into one "vm_flags" used
|
| 195 |
|
|
* internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits
|
| 196 |
|
|
* into "VM_xxx".
|
| 197 |
|
|
*/
|
| 198 |
|
|
static inline unsigned long calc_vm_flags(unsigned long prot, unsigned long flags)
|
| 199 |
|
|
{
|
| 200 |
|
|
#define _trans(x,bit1,bit2) \
|
| 201 |
|
|
((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
|
| 202 |
|
|
|
| 203 |
|
|
unsigned long prot_bits, flag_bits;
|
| 204 |
|
|
prot_bits =
|
| 205 |
|
|
_trans(prot, PROT_READ, VM_READ) |
|
| 206 |
|
|
_trans(prot, PROT_WRITE, VM_WRITE) |
|
| 207 |
|
|
_trans(prot, PROT_EXEC, VM_EXEC);
|
| 208 |
|
|
flag_bits =
|
| 209 |
|
|
_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN) |
|
| 210 |
|
|
_trans(flags, MAP_DENYWRITE, VM_DENYWRITE) |
|
| 211 |
|
|
_trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE);
|
| 212 |
|
|
return prot_bits | flag_bits;
|
| 213 |
|
|
#undef _trans
|
| 214 |
|
|
}
|
| 215 |
|
|
|
| 216 |
|
|
#ifdef DEBUG_MM_RB
|
| 217 |
|
|
static int browse_rb(rb_node_t * rb_node) {
|
| 218 |
|
|
int i = 0;
|
| 219 |
|
|
if (rb_node) {
|
| 220 |
|
|
i++;
|
| 221 |
|
|
i += browse_rb(rb_node->rb_left);
|
| 222 |
|
|
i += browse_rb(rb_node->rb_right);
|
| 223 |
|
|
}
|
| 224 |
|
|
return i;
|
| 225 |
|
|
}
|
| 226 |
|
|
|
| 227 |
|
|
static void validate_mm(struct mm_struct * mm) {
|
| 228 |
|
|
int bug = 0;
|
| 229 |
|
|
int i = 0;
|
| 230 |
|
|
struct vm_area_struct * tmp = mm->mmap;
|
| 231 |
|
|
while (tmp) {
|
| 232 |
|
|
tmp = tmp->vm_next;
|
| 233 |
|
|
i++;
|
| 234 |
|
|
}
|
| 235 |
|
|
if (i != mm->map_count)
|
| 236 |
|
|
printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
|
| 237 |
|
|
i = browse_rb(mm->mm_rb.rb_node);
|
| 238 |
|
|
if (i != mm->map_count)
|
| 239 |
|
|
printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
|
| 240 |
|
|
if (bug)
|
| 241 |
|
|
BUG();
|
| 242 |
|
|
}
|
| 243 |
|
|
#else
|
| 244 |
|
|
#define validate_mm(mm) do { } while (0)
|
| 245 |
|
|
#endif
|
| 246 |
|
|
|
| 247 |
|
|
static struct vm_area_struct * find_vma_prepare(struct mm_struct * mm, unsigned long addr,
|
| 248 |
|
|
struct vm_area_struct ** pprev,
|
| 249 |
|
|
rb_node_t *** rb_link, rb_node_t ** rb_parent)
|
| 250 |
|
|
{
|
| 251 |
|
|
struct vm_area_struct * vma;
|
| 252 |
|
|
rb_node_t ** __rb_link, * __rb_parent, * rb_prev;
|
| 253 |
|
|
|
| 254 |
|
|
__rb_link = &mm->mm_rb.rb_node;
|
| 255 |
|
|
rb_prev = __rb_parent = NULL;
|
| 256 |
|
|
vma = NULL;
|
| 257 |
|
|
|
| 258 |
|
|
while (*__rb_link) {
|
| 259 |
|
|
struct vm_area_struct *vma_tmp;
|
| 260 |
|
|
|
| 261 |
|
|
__rb_parent = *__rb_link;
|
| 262 |
|
|
vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
|
| 263 |
|
|
|
| 264 |
|
|
if (vma_tmp->vm_end > addr) {
|
| 265 |
|
|
vma = vma_tmp;
|
| 266 |
|
|
if (vma_tmp->vm_start <= addr)
|
| 267 |
|
|
return vma;
|
| 268 |
|
|
__rb_link = &__rb_parent->rb_left;
|
| 269 |
|
|
} else {
|
| 270 |
|
|
rb_prev = __rb_parent;
|
| 271 |
|
|
__rb_link = &__rb_parent->rb_right;
|
| 272 |
|
|
}
|
| 273 |
|
|
}
|
| 274 |
|
|
|
| 275 |
|
|
*pprev = NULL;
|
| 276 |
|
|
if (rb_prev)
|
| 277 |
|
|
*pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
|
| 278 |
|
|
*rb_link = __rb_link;
|
| 279 |
|
|
*rb_parent = __rb_parent;
|
| 280 |
|
|
return vma;
|
| 281 |
|
|
}
|
| 282 |
|
|
|
| 283 |
|
|
static inline void __vma_link_list(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev,
|
| 284 |
|
|
rb_node_t * rb_parent)
|
| 285 |
|
|
{
|
| 286 |
|
|
if (prev) {
|
| 287 |
|
|
vma->vm_next = prev->vm_next;
|
| 288 |
|
|
prev->vm_next = vma;
|
| 289 |
|
|
} else {
|
| 290 |
|
|
mm->mmap = vma;
|
| 291 |
|
|
if (rb_parent)
|
| 292 |
|
|
vma->vm_next = rb_entry(rb_parent, struct vm_area_struct, vm_rb);
|
| 293 |
|
|
else
|
| 294 |
|
|
vma->vm_next = NULL;
|
| 295 |
|
|
}
|
| 296 |
|
|
}
|
| 297 |
|
|
|
| 298 |
|
|
static inline void __vma_link_rb(struct mm_struct * mm, struct vm_area_struct * vma,
|
| 299 |
|
|
rb_node_t ** rb_link, rb_node_t * rb_parent)
|
| 300 |
|
|
{
|
| 301 |
|
|
rb_link_node(&vma->vm_rb, rb_parent, rb_link);
|
| 302 |
|
|
rb_insert_color(&vma->vm_rb, &mm->mm_rb);
|
| 303 |
|
|
}
|
| 304 |
|
|
|
| 305 |
|
|
static inline void __vma_link_file(struct vm_area_struct * vma)
|
| 306 |
|
|
{
|
| 307 |
|
|
struct file * file;
|
| 308 |
|
|
|
| 309 |
|
|
file = vma->vm_file;
|
| 310 |
|
|
if (file) {
|
| 311 |
|
|
struct inode * inode = file->f_dentry->d_inode;
|
| 312 |
|
|
struct address_space *mapping = inode->i_mapping;
|
| 313 |
|
|
struct vm_area_struct **head;
|
| 314 |
|
|
|
| 315 |
|
|
if (vma->vm_flags & VM_DENYWRITE)
|
| 316 |
|
|
atomic_dec(&inode->i_writecount);
|
| 317 |
|
|
|
| 318 |
|
|
head = &mapping->i_mmap;
|
| 319 |
|
|
if (vma->vm_flags & VM_SHARED)
|
| 320 |
|
|
head = &mapping->i_mmap_shared;
|
| 321 |
|
|
|
| 322 |
|
|
/* insert vma into inode's share list */
|
| 323 |
|
|
if((vma->vm_next_share = *head) != NULL)
|
| 324 |
|
|
(*head)->vm_pprev_share = &vma->vm_next_share;
|
| 325 |
|
|
*head = vma;
|
| 326 |
|
|
vma->vm_pprev_share = head;
|
| 327 |
|
|
}
|
| 328 |
|
|
}
|
| 329 |
|
|
|
| 330 |
|
|
static void __vma_link(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev,
|
| 331 |
|
|
rb_node_t ** rb_link, rb_node_t * rb_parent)
|
| 332 |
|
|
{
|
| 333 |
|
|
__vma_link_list(mm, vma, prev, rb_parent);
|
| 334 |
|
|
__vma_link_rb(mm, vma, rb_link, rb_parent);
|
| 335 |
|
|
__vma_link_file(vma);
|
| 336 |
|
|
}
|
| 337 |
|
|
|
| 338 |
|
|
static inline void vma_link(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev,
|
| 339 |
|
|
rb_node_t ** rb_link, rb_node_t * rb_parent)
|
| 340 |
|
|
{
|
| 341 |
|
|
lock_vma_mappings(vma);
|
| 342 |
|
|
spin_lock(&mm->page_table_lock);
|
| 343 |
|
|
__vma_link(mm, vma, prev, rb_link, rb_parent);
|
| 344 |
|
|
spin_unlock(&mm->page_table_lock);
|
| 345 |
|
|
unlock_vma_mappings(vma);
|
| 346 |
|
|
|
| 347 |
|
|
mm->map_count++;
|
| 348 |
|
|
validate_mm(mm);
|
| 349 |
|
|
}
|
| 350 |
|
|
|
| 351 |
|
|
static int vma_merge(struct mm_struct * mm, struct vm_area_struct * prev,
|
| 352 |
|
|
rb_node_t * rb_parent, unsigned long addr, unsigned long end, unsigned long vm_flags)
|
| 353 |
|
|
{
|
| 354 |
|
|
spinlock_t * lock = &mm->page_table_lock;
|
| 355 |
|
|
if (!prev) {
|
| 356 |
|
|
prev = rb_entry(rb_parent, struct vm_area_struct, vm_rb);
|
| 357 |
|
|
goto merge_next;
|
| 358 |
|
|
}
|
| 359 |
|
|
if (prev->vm_end == addr && can_vma_merge(prev, vm_flags)) {
|
| 360 |
|
|
struct vm_area_struct * next;
|
| 361 |
|
|
|
| 362 |
|
|
spin_lock(lock);
|
| 363 |
|
|
prev->vm_end = end;
|
| 364 |
|
|
next = prev->vm_next;
|
| 365 |
|
|
if (next && prev->vm_end == next->vm_start && can_vma_merge(next, vm_flags)) {
|
| 366 |
|
|
prev->vm_end = next->vm_end;
|
| 367 |
|
|
__vma_unlink(mm, next, prev);
|
| 368 |
|
|
spin_unlock(lock);
|
| 369 |
|
|
|
| 370 |
|
|
mm->map_count--;
|
| 371 |
|
|
kmem_cache_free(vm_area_cachep, next);
|
| 372 |
|
|
return 1;
|
| 373 |
|
|
}
|
| 374 |
|
|
spin_unlock(lock);
|
| 375 |
|
|
return 1;
|
| 376 |
|
|
}
|
| 377 |
|
|
|
| 378 |
|
|
prev = prev->vm_next;
|
| 379 |
|
|
if (prev) {
|
| 380 |
|
|
merge_next:
|
| 381 |
|
|
if (!can_vma_merge(prev, vm_flags))
|
| 382 |
|
|
return 0;
|
| 383 |
|
|
if (end == prev->vm_start) {
|
| 384 |
|
|
spin_lock(lock);
|
| 385 |
|
|
prev->vm_start = addr;
|
| 386 |
|
|
spin_unlock(lock);
|
| 387 |
|
|
return 1;
|
| 388 |
|
|
}
|
| 389 |
|
|
}
|
| 390 |
|
|
|
| 391 |
|
|
return 0;
|
| 392 |
|
|
}
|
| 393 |
|
|
|
| 394 |
|
|
unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned long len,
|
| 395 |
|
|
unsigned long prot, unsigned long flags, unsigned long pgoff)
|
| 396 |
|
|
{
|
| 397 |
|
|
struct mm_struct * mm = current->mm;
|
| 398 |
|
|
struct vm_area_struct * vma, * prev;
|
| 399 |
|
|
unsigned int vm_flags;
|
| 400 |
|
|
int correct_wcount = 0;
|
| 401 |
|
|
int error;
|
| 402 |
|
|
rb_node_t ** rb_link, * rb_parent;
|
| 403 |
|
|
|
| 404 |
|
|
if (file) {
|
| 405 |
|
|
if (!file->f_op || !file->f_op->mmap)
|
| 406 |
|
|
return -ENODEV;
|
| 407 |
|
|
|
| 408 |
|
|
if ((prot & PROT_EXEC) && (file->f_vfsmnt->mnt_flags & MNT_NOEXEC))
|
| 409 |
|
|
return -EPERM;
|
| 410 |
|
|
}
|
| 411 |
|
|
|
| 412 |
|
|
if (!len)
|
| 413 |
|
|
return addr;
|
| 414 |
|
|
|
| 415 |
|
|
len = PAGE_ALIGN(len);
|
| 416 |
|
|
|
| 417 |
|
|
if (len > TASK_SIZE || len == 0)
|
| 418 |
|
|
return -EINVAL;
|
| 419 |
|
|
|
| 420 |
|
|
/* offset overflow? */
|
| 421 |
|
|
if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
|
| 422 |
|
|
return -EINVAL;
|
| 423 |
|
|
|
| 424 |
|
|
/* Too many mappings? */
|
| 425 |
|
|
if (mm->map_count > max_map_count)
|
| 426 |
|
|
return -ENOMEM;
|
| 427 |
|
|
|
| 428 |
|
|
/* Obtain the address to map to. we verify (or select) it and ensure
|
| 429 |
|
|
* that it represents a valid section of the address space.
|
| 430 |
|
|
*/
|
| 431 |
|
|
addr = get_unmapped_area(file, addr, len, pgoff, flags);
|
| 432 |
|
|
if (addr & ~PAGE_MASK)
|
| 433 |
|
|
return addr;
|
| 434 |
|
|
|
| 435 |
|
|
/* Do simple checking here so the lower-level routines won't have
|
| 436 |
|
|
* to. we assume access permissions have been handled by the open
|
| 437 |
|
|
* of the memory object, so we don't do any here.
|
| 438 |
|
|
*/
|
| 439 |
|
|
vm_flags = calc_vm_flags(prot,flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
|
| 440 |
|
|
|
| 441 |
|
|
/* mlock MCL_FUTURE? */
|
| 442 |
|
|
if (vm_flags & VM_LOCKED) {
|
| 443 |
|
|
unsigned long locked = mm->locked_vm << PAGE_SHIFT;
|
| 444 |
|
|
locked += len;
|
| 445 |
|
|
if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
|
| 446 |
|
|
return -EAGAIN;
|
| 447 |
|
|
}
|
| 448 |
|
|
|
| 449 |
|
|
if (file) {
|
| 450 |
|
|
switch (flags & MAP_TYPE) {
|
| 451 |
|
|
case MAP_SHARED:
|
| 452 |
|
|
if ((prot & PROT_WRITE) && !(file->f_mode & FMODE_WRITE))
|
| 453 |
|
|
return -EACCES;
|
| 454 |
|
|
|
| 455 |
|
|
/* Make sure we don't allow writing to an append-only file.. */
|
| 456 |
|
|
if (IS_APPEND(file->f_dentry->d_inode) && (file->f_mode & FMODE_WRITE))
|
| 457 |
|
|
return -EACCES;
|
| 458 |
|
|
|
| 459 |
|
|
/* make sure there are no mandatory locks on the file. */
|
| 460 |
|
|
if (locks_verify_locked(file->f_dentry->d_inode))
|
| 461 |
|
|
return -EAGAIN;
|
| 462 |
|
|
|
| 463 |
|
|
vm_flags |= VM_SHARED | VM_MAYSHARE;
|
| 464 |
|
|
if (!(file->f_mode & FMODE_WRITE))
|
| 465 |
|
|
vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
|
| 466 |
|
|
|
| 467 |
|
|
/* fall through */
|
| 468 |
|
|
case MAP_PRIVATE:
|
| 469 |
|
|
if (!(file->f_mode & FMODE_READ))
|
| 470 |
|
|
return -EACCES;
|
| 471 |
|
|
break;
|
| 472 |
|
|
|
| 473 |
|
|
default:
|
| 474 |
|
|
return -EINVAL;
|
| 475 |
|
|
}
|
| 476 |
|
|
} else {
|
| 477 |
|
|
vm_flags |= VM_SHARED | VM_MAYSHARE;
|
| 478 |
|
|
switch (flags & MAP_TYPE) {
|
| 479 |
|
|
default:
|
| 480 |
|
|
return -EINVAL;
|
| 481 |
|
|
case MAP_PRIVATE:
|
| 482 |
|
|
vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
|
| 483 |
|
|
/* fall through */
|
| 484 |
|
|
case MAP_SHARED:
|
| 485 |
|
|
break;
|
| 486 |
|
|
}
|
| 487 |
|
|
}
|
| 488 |
|
|
|
| 489 |
|
|
/* Clear old maps */
|
| 490 |
|
|
munmap_back:
|
| 491 |
|
|
vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
|
| 492 |
|
|
if (vma && vma->vm_start < addr + len) {
|
| 493 |
|
|
if (do_munmap(mm, addr, len))
|
| 494 |
|
|
return -ENOMEM;
|
| 495 |
|
|
goto munmap_back;
|
| 496 |
|
|
}
|
| 497 |
|
|
|
| 498 |
|
|
/* Check against address space limit. */
|
| 499 |
|
|
if ((mm->total_vm << PAGE_SHIFT) + len
|
| 500 |
|
|
> current->rlim[RLIMIT_AS].rlim_cur)
|
| 501 |
|
|
return -ENOMEM;
|
| 502 |
|
|
|
| 503 |
|
|
/* Private writable mapping? Check memory availability.. */
|
| 504 |
|
|
if ((vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE &&
|
| 505 |
|
|
!(flags & MAP_NORESERVE) &&
|
| 506 |
|
|
!vm_enough_memory(len >> PAGE_SHIFT))
|
| 507 |
|
|
return -ENOMEM;
|
| 508 |
|
|
|
| 509 |
|
|
/* Can we just expand an old anonymous mapping? */
|
| 510 |
|
|
if (!file && !(vm_flags & VM_SHARED) && rb_parent)
|
| 511 |
|
|
if (vma_merge(mm, prev, rb_parent, addr, addr + len, vm_flags))
|
| 512 |
|
|
goto out;
|
| 513 |
|
|
|
| 514 |
|
|
/* Determine the object being mapped and call the appropriate
|
| 515 |
|
|
* specific mapper. the address has already been validated, but
|
| 516 |
|
|
* not unmapped, but the maps are removed from the list.
|
| 517 |
|
|
*/
|
| 518 |
|
|
vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
|
| 519 |
|
|
if (!vma)
|
| 520 |
|
|
return -ENOMEM;
|
| 521 |
|
|
|
| 522 |
|
|
vma->vm_mm = mm;
|
| 523 |
|
|
vma->vm_start = addr;
|
| 524 |
|
|
vma->vm_end = addr + len;
|
| 525 |
|
|
vma->vm_flags = vm_flags;
|
| 526 |
|
|
vma->vm_page_prot = protection_map[vm_flags & 0x0f];
|
| 527 |
|
|
vma->vm_ops = NULL;
|
| 528 |
|
|
vma->vm_pgoff = pgoff;
|
| 529 |
|
|
vma->vm_file = NULL;
|
| 530 |
|
|
vma->vm_private_data = NULL;
|
| 531 |
|
|
vma->vm_raend = 0;
|
| 532 |
|
|
|
| 533 |
|
|
if (file) {
|
| 534 |
|
|
error = -EINVAL;
|
| 535 |
|
|
if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
|
| 536 |
|
|
goto free_vma;
|
| 537 |
|
|
if (vm_flags & VM_DENYWRITE) {
|
| 538 |
|
|
error = deny_write_access(file);
|
| 539 |
|
|
if (error)
|
| 540 |
|
|
goto free_vma;
|
| 541 |
|
|
correct_wcount = 1;
|
| 542 |
|
|
}
|
| 543 |
|
|
vma->vm_file = file;
|
| 544 |
|
|
get_file(file);
|
| 545 |
|
|
error = file->f_op->mmap(file, vma);
|
| 546 |
|
|
if (error)
|
| 547 |
|
|
goto unmap_and_free_vma;
|
| 548 |
|
|
} else if (flags & MAP_SHARED) {
|
| 549 |
|
|
error = shmem_zero_setup(vma);
|
| 550 |
|
|
if (error)
|
| 551 |
|
|
goto free_vma;
|
| 552 |
|
|
}
|
| 553 |
|
|
|
| 554 |
|
|
/* Can addr have changed??
|
| 555 |
|
|
*
|
| 556 |
|
|
* Answer: Yes, several device drivers can do it in their
|
| 557 |
|
|
* f_op->mmap method. -DaveM
|
| 558 |
|
|
*/
|
| 559 |
|
|
if (addr != vma->vm_start) {
|
| 560 |
|
|
/*
|
| 561 |
|
|
* It is a bit too late to pretend changing the virtual
|
| 562 |
|
|
* area of the mapping, we just corrupted userspace
|
| 563 |
|
|
* in the do_munmap, so FIXME (not in 2.4 to avoid breaking
|
| 564 |
|
|
* the driver API).
|
| 565 |
|
|
*/
|
| 566 |
|
|
struct vm_area_struct * stale_vma;
|
| 567 |
|
|
/* Since addr changed, we rely on the mmap op to prevent
|
| 568 |
|
|
* collisions with existing vmas and just use find_vma_prepare
|
| 569 |
|
|
* to update the tree pointers.
|
| 570 |
|
|
*/
|
| 571 |
|
|
addr = vma->vm_start;
|
| 572 |
|
|
stale_vma = find_vma_prepare(mm, addr, &prev,
|
| 573 |
|
|
&rb_link, &rb_parent);
|
| 574 |
|
|
/*
|
| 575 |
|
|
* Make sure the lowlevel driver did its job right.
|
| 576 |
|
|
*/
|
| 577 |
|
|
if (unlikely(stale_vma && stale_vma->vm_start < vma->vm_end)) {
|
| 578 |
|
|
printk(KERN_ERR "buggy mmap operation: [<%p>]\n",
|
| 579 |
|
|
file ? file->f_op->mmap : NULL);
|
| 580 |
|
|
BUG();
|
| 581 |
|
|
}
|
| 582 |
|
|
}
|
| 583 |
|
|
|
| 584 |
|
|
vma_link(mm, vma, prev, rb_link, rb_parent);
|
| 585 |
|
|
if (correct_wcount)
|
| 586 |
|
|
atomic_inc(&file->f_dentry->d_inode->i_writecount);
|
| 587 |
|
|
|
| 588 |
|
|
out:
|
| 589 |
|
|
mm->total_vm += len >> PAGE_SHIFT;
|
| 590 |
|
|
if (vm_flags & VM_LOCKED) {
|
| 591 |
|
|
mm->locked_vm += len >> PAGE_SHIFT;
|
| 592 |
|
|
make_pages_present(addr, addr + len);
|
| 593 |
|
|
}
|
| 594 |
|
|
return addr;
|
| 595 |
|
|
|
| 596 |
|
|
unmap_and_free_vma:
|
| 597 |
|
|
if (correct_wcount)
|
| 598 |
|
|
atomic_inc(&file->f_dentry->d_inode->i_writecount);
|
| 599 |
|
|
vma->vm_file = NULL;
|
| 600 |
|
|
fput(file);
|
| 601 |
|
|
|
| 602 |
|
|
/* Undo any partial mapping done by a device driver. */
|
| 603 |
|
|
zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start);
|
| 604 |
|
|
free_vma:
|
| 605 |
|
|
kmem_cache_free(vm_area_cachep, vma);
|
| 606 |
|
|
return error;
|
| 607 |
|
|
}
|
| 608 |
|
|
|
| 609 |
|
|
/* Get an address range which is currently unmapped.
|
| 610 |
|
|
* For shmat() with addr=0.
|
| 611 |
|
|
*
|
| 612 |
|
|
* Ugly calling convention alert:
|
| 613 |
|
|
* Return value with the low bits set means error value,
|
| 614 |
|
|
* ie
|
| 615 |
|
|
* if (ret & ~PAGE_MASK)
|
| 616 |
|
|
* error = ret;
|
| 617 |
|
|
*
|
| 618 |
|
|
* This function "knows" that -ENOMEM has the bits set.
|
| 619 |
|
|
*/
|
| 620 |
|
|
#ifndef HAVE_ARCH_UNMAPPED_AREA
|
| 621 |
|
|
static inline unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags)
|
| 622 |
|
|
{
|
| 623 |
|
|
struct vm_area_struct *vma;
|
| 624 |
|
|
|
| 625 |
|
|
if (len > TASK_SIZE)
|
| 626 |
|
|
return -ENOMEM;
|
| 627 |
|
|
|
| 628 |
|
|
if (addr) {
|
| 629 |
|
|
addr = PAGE_ALIGN(addr);
|
| 630 |
|
|
vma = find_vma(current->mm, addr);
|
| 631 |
|
|
if (TASK_SIZE - len >= addr &&
|
| 632 |
|
|
(!vma || addr + len <= vma->vm_start))
|
| 633 |
|
|
return addr;
|
| 634 |
|
|
}
|
| 635 |
|
|
addr = PAGE_ALIGN(TASK_UNMAPPED_BASE);
|
| 636 |
|
|
|
| 637 |
|
|
for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
|
| 638 |
|
|
/* At this point: (!vma || addr < vma->vm_end). */
|
| 639 |
|
|
if (TASK_SIZE - len < addr)
|
| 640 |
|
|
return -ENOMEM;
|
| 641 |
|
|
if (!vma || addr + len <= vma->vm_start)
|
| 642 |
|
|
return addr;
|
| 643 |
|
|
addr = vma->vm_end;
|
| 644 |
|
|
}
|
| 645 |
|
|
}
|
| 646 |
|
|
#else
|
| 647 |
|
|
extern unsigned long arch_get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
|
| 648 |
|
|
#endif
|
| 649 |
|
|
|
| 650 |
|
|
unsigned long get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags)
|
| 651 |
|
|
{
|
| 652 |
|
|
if (flags & MAP_FIXED) {
|
| 653 |
|
|
if (addr > TASK_SIZE - len)
|
| 654 |
|
|
return -ENOMEM;
|
| 655 |
|
|
if (addr & ~PAGE_MASK)
|
| 656 |
|
|
return -EINVAL;
|
| 657 |
|
|
return addr;
|
| 658 |
|
|
}
|
| 659 |
|
|
|
| 660 |
|
|
if (file && file->f_op && file->f_op->get_unmapped_area)
|
| 661 |
|
|
return file->f_op->get_unmapped_area(file, addr, len, pgoff, flags);
|
| 662 |
|
|
|
| 663 |
|
|
return arch_get_unmapped_area(file, addr, len, pgoff, flags);
|
| 664 |
|
|
}
|
| 665 |
|
|
|
| 666 |
|
|
/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
|
| 667 |
|
|
struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
|
| 668 |
|
|
{
|
| 669 |
|
|
struct vm_area_struct *vma = NULL;
|
| 670 |
|
|
|
| 671 |
|
|
if (mm) {
|
| 672 |
|
|
/* Check the cache first. */
|
| 673 |
|
|
/* (Cache hit rate is typically around 35%.) */
|
| 674 |
|
|
vma = mm->mmap_cache;
|
| 675 |
|
|
if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
|
| 676 |
|
|
rb_node_t * rb_node;
|
| 677 |
|
|
|
| 678 |
|
|
rb_node = mm->mm_rb.rb_node;
|
| 679 |
|
|
vma = NULL;
|
| 680 |
|
|
|
| 681 |
|
|
while (rb_node) {
|
| 682 |
|
|
struct vm_area_struct * vma_tmp;
|
| 683 |
|
|
|
| 684 |
|
|
vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
|
| 685 |
|
|
|
| 686 |
|
|
if (vma_tmp->vm_end > addr) {
|
| 687 |
|
|
vma = vma_tmp;
|
| 688 |
|
|
if (vma_tmp->vm_start <= addr)
|
| 689 |
|
|
break;
|
| 690 |
|
|
rb_node = rb_node->rb_left;
|
| 691 |
|
|
} else
|
| 692 |
|
|
rb_node = rb_node->rb_right;
|
| 693 |
|
|
}
|
| 694 |
|
|
if (vma)
|
| 695 |
|
|
mm->mmap_cache = vma;
|
| 696 |
|
|
}
|
| 697 |
|
|
}
|
| 698 |
|
|
return vma;
|
| 699 |
|
|
}
|
| 700 |
|
|
|
| 701 |
|
|
/* Same as find_vma, but also return a pointer to the previous VMA in *pprev. */
|
| 702 |
|
|
struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
|
| 703 |
|
|
struct vm_area_struct **pprev)
|
| 704 |
|
|
{
|
| 705 |
|
|
if (mm) {
|
| 706 |
|
|
/* Go through the RB tree quickly. */
|
| 707 |
|
|
struct vm_area_struct * vma;
|
| 708 |
|
|
rb_node_t * rb_node, * rb_last_right, * rb_prev;
|
| 709 |
|
|
|
| 710 |
|
|
rb_node = mm->mm_rb.rb_node;
|
| 711 |
|
|
rb_last_right = rb_prev = NULL;
|
| 712 |
|
|
vma = NULL;
|
| 713 |
|
|
|
| 714 |
|
|
while (rb_node) {
|
| 715 |
|
|
struct vm_area_struct * vma_tmp;
|
| 716 |
|
|
|
| 717 |
|
|
vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
|
| 718 |
|
|
|
| 719 |
|
|
if (vma_tmp->vm_end > addr) {
|
| 720 |
|
|
vma = vma_tmp;
|
| 721 |
|
|
rb_prev = rb_last_right;
|
| 722 |
|
|
if (vma_tmp->vm_start <= addr)
|
| 723 |
|
|
break;
|
| 724 |
|
|
rb_node = rb_node->rb_left;
|
| 725 |
|
|
} else {
|
| 726 |
|
|
rb_last_right = rb_node;
|
| 727 |
|
|
rb_node = rb_node->rb_right;
|
| 728 |
|
|
}
|
| 729 |
|
|
}
|
| 730 |
|
|
if (vma) {
|
| 731 |
|
|
if (vma->vm_rb.rb_left) {
|
| 732 |
|
|
rb_prev = vma->vm_rb.rb_left;
|
| 733 |
|
|
while (rb_prev->rb_right)
|
| 734 |
|
|
rb_prev = rb_prev->rb_right;
|
| 735 |
|
|
}
|
| 736 |
|
|
*pprev = NULL;
|
| 737 |
|
|
if (rb_prev)
|
| 738 |
|
|
*pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
|
| 739 |
|
|
if ((rb_prev ? (*pprev)->vm_next : mm->mmap) != vma)
|
| 740 |
|
|
BUG();
|
| 741 |
|
|
return vma;
|
| 742 |
|
|
}
|
| 743 |
|
|
}
|
| 744 |
|
|
*pprev = NULL;
|
| 745 |
|
|
return NULL;
|
| 746 |
|
|
}
|
| 747 |
|
|
|
| 748 |
|
|
struct vm_area_struct * find_extend_vma(struct mm_struct * mm, unsigned long addr)
|
| 749 |
|
|
{
|
| 750 |
|
|
struct vm_area_struct * vma;
|
| 751 |
|
|
unsigned long start;
|
| 752 |
|
|
|
| 753 |
|
|
addr &= PAGE_MASK;
|
| 754 |
|
|
vma = find_vma(mm,addr);
|
| 755 |
|
|
if (!vma)
|
| 756 |
|
|
return NULL;
|
| 757 |
|
|
if (vma->vm_start <= addr)
|
| 758 |
|
|
return vma;
|
| 759 |
|
|
if (!(vma->vm_flags & VM_GROWSDOWN))
|
| 760 |
|
|
return NULL;
|
| 761 |
|
|
start = vma->vm_start;
|
| 762 |
|
|
if (expand_stack(vma, addr))
|
| 763 |
|
|
return NULL;
|
| 764 |
|
|
if (vma->vm_flags & VM_LOCKED) {
|
| 765 |
|
|
make_pages_present(addr, start);
|
| 766 |
|
|
}
|
| 767 |
|
|
return vma;
|
| 768 |
|
|
}
|
| 769 |
|
|
|
| 770 |
|
|
/* Normal function to fix up a mapping
|
| 771 |
|
|
* This function is the default for when an area has no specific
|
| 772 |
|
|
* function. This may be used as part of a more specific routine.
|
| 773 |
|
|
* This function works out what part of an area is affected and
|
| 774 |
|
|
* adjusts the mapping information. Since the actual page
|
| 775 |
|
|
* manipulation is done in do_mmap(), none need be done here,
|
| 776 |
|
|
* though it would probably be more appropriate.
|
| 777 |
|
|
*
|
| 778 |
|
|
* By the time this function is called, the area struct has been
|
| 779 |
|
|
* removed from the process mapping list, so it needs to be
|
| 780 |
|
|
* reinserted if necessary.
|
| 781 |
|
|
*
|
| 782 |
|
|
* The 4 main cases are:
|
| 783 |
|
|
* Unmapping the whole area
|
| 784 |
|
|
* Unmapping from the start of the segment to a point in it
|
| 785 |
|
|
* Unmapping from an intermediate point to the end
|
| 786 |
|
|
* Unmapping between to intermediate points, making a hole.
|
| 787 |
|
|
*
|
| 788 |
|
|
* Case 4 involves the creation of 2 new areas, for each side of
|
| 789 |
|
|
* the hole. If possible, we reuse the existing area rather than
|
| 790 |
|
|
* allocate a new one, and the return indicates whether the old
|
| 791 |
|
|
* area was reused.
|
| 792 |
|
|
*/
|
| 793 |
|
|
static struct vm_area_struct * unmap_fixup(struct mm_struct *mm,
|
| 794 |
|
|
struct vm_area_struct *area, unsigned long addr, size_t len,
|
| 795 |
|
|
struct vm_area_struct *extra)
|
| 796 |
|
|
{
|
| 797 |
|
|
struct vm_area_struct *mpnt;
|
| 798 |
|
|
unsigned long end = addr + len;
|
| 799 |
|
|
|
| 800 |
|
|
area->vm_mm->total_vm -= len >> PAGE_SHIFT;
|
| 801 |
|
|
if (area->vm_flags & VM_LOCKED)
|
| 802 |
|
|
area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
|
| 803 |
|
|
|
| 804 |
|
|
/* Unmapping the whole area. */
|
| 805 |
|
|
if (addr == area->vm_start && end == area->vm_end) {
|
| 806 |
|
|
if (area->vm_ops && area->vm_ops->close)
|
| 807 |
|
|
area->vm_ops->close(area);
|
| 808 |
|
|
if (area->vm_file)
|
| 809 |
|
|
fput(area->vm_file);
|
| 810 |
|
|
kmem_cache_free(vm_area_cachep, area);
|
| 811 |
|
|
return extra;
|
| 812 |
|
|
}
|
| 813 |
|
|
|
| 814 |
|
|
/* Work out to one of the ends. */
|
| 815 |
|
|
if (end == area->vm_end) {
|
| 816 |
|
|
/*
|
| 817 |
|
|
* here area isn't visible to the semaphore-less readers
|
| 818 |
|
|
* so we don't need to update it under the spinlock.
|
| 819 |
|
|
*/
|
| 820 |
|
|
area->vm_end = addr;
|
| 821 |
|
|
lock_vma_mappings(area);
|
| 822 |
|
|
spin_lock(&mm->page_table_lock);
|
| 823 |
|
|
} else if (addr == area->vm_start) {
|
| 824 |
|
|
area->vm_pgoff += (end - area->vm_start) >> PAGE_SHIFT;
|
| 825 |
|
|
/* same locking considerations of the above case */
|
| 826 |
|
|
area->vm_start = end;
|
| 827 |
|
|
lock_vma_mappings(area);
|
| 828 |
|
|
spin_lock(&mm->page_table_lock);
|
| 829 |
|
|
} else {
|
| 830 |
|
|
/* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */
|
| 831 |
|
|
/* Add end mapping -- leave beginning for below */
|
| 832 |
|
|
mpnt = extra;
|
| 833 |
|
|
extra = NULL;
|
| 834 |
|
|
|
| 835 |
|
|
mpnt->vm_mm = area->vm_mm;
|
| 836 |
|
|
mpnt->vm_start = end;
|
| 837 |
|
|
mpnt->vm_end = area->vm_end;
|
| 838 |
|
|
mpnt->vm_page_prot = area->vm_page_prot;
|
| 839 |
|
|
mpnt->vm_flags = area->vm_flags;
|
| 840 |
|
|
mpnt->vm_raend = 0;
|
| 841 |
|
|
mpnt->vm_ops = area->vm_ops;
|
| 842 |
|
|
mpnt->vm_pgoff = area->vm_pgoff + ((end - area->vm_start) >> PAGE_SHIFT);
|
| 843 |
|
|
mpnt->vm_file = area->vm_file;
|
| 844 |
|
|
mpnt->vm_private_data = area->vm_private_data;
|
| 845 |
|
|
if (mpnt->vm_file)
|
| 846 |
|
|
get_file(mpnt->vm_file);
|
| 847 |
|
|
if (mpnt->vm_ops && mpnt->vm_ops->open)
|
| 848 |
|
|
mpnt->vm_ops->open(mpnt);
|
| 849 |
|
|
area->vm_end = addr; /* Truncate area */
|
| 850 |
|
|
|
| 851 |
|
|
/* Because mpnt->vm_file == area->vm_file this locks
|
| 852 |
|
|
* things correctly.
|
| 853 |
|
|
*/
|
| 854 |
|
|
lock_vma_mappings(area);
|
| 855 |
|
|
spin_lock(&mm->page_table_lock);
|
| 856 |
|
|
__insert_vm_struct(mm, mpnt);
|
| 857 |
|
|
}
|
| 858 |
|
|
|
| 859 |
|
|
__insert_vm_struct(mm, area);
|
| 860 |
|
|
spin_unlock(&mm->page_table_lock);
|
| 861 |
|
|
unlock_vma_mappings(area);
|
| 862 |
|
|
return extra;
|
| 863 |
|
|
}
|
| 864 |
|
|
|
| 865 |
|
|
/*
|
| 866 |
|
|
* Try to free as many page directory entries as we can,
|
| 867 |
|
|
* without having to work very hard at actually scanning
|
| 868 |
|
|
* the page tables themselves.
|
| 869 |
|
|
*
|
| 870 |
|
|
* Right now we try to free page tables if we have a nice
|
| 871 |
|
|
* PGDIR-aligned area that got free'd up. We could be more
|
| 872 |
|
|
* granular if we want to, but this is fast and simple,
|
| 873 |
|
|
* and covers the bad cases.
|
| 874 |
|
|
*
|
| 875 |
|
|
* "prev", if it exists, points to a vma before the one
|
| 876 |
|
|
* we just free'd - but there's no telling how much before.
|
| 877 |
|
|
*/
|
| 878 |
|
|
static void free_pgtables(struct mm_struct * mm, struct vm_area_struct *prev,
|
| 879 |
|
|
unsigned long start, unsigned long end)
|
| 880 |
|
|
{
|
| 881 |
|
|
unsigned long first = start & PGDIR_MASK;
|
| 882 |
|
|
unsigned long last = end + PGDIR_SIZE - 1;
|
| 883 |
|
|
unsigned long start_index, end_index;
|
| 884 |
|
|
|
| 885 |
|
|
if (!prev) {
|
| 886 |
|
|
prev = mm->mmap;
|
| 887 |
|
|
if (!prev)
|
| 888 |
|
|
goto no_mmaps;
|
| 889 |
|
|
if (prev->vm_end > start) {
|
| 890 |
|
|
if (last > prev->vm_start)
|
| 891 |
|
|
last = prev->vm_start;
|
| 892 |
|
|
goto no_mmaps;
|
| 893 |
|
|
}
|
| 894 |
|
|
}
|
| 895 |
|
|
for (;;) {
|
| 896 |
|
|
struct vm_area_struct *next = prev->vm_next;
|
| 897 |
|
|
|
| 898 |
|
|
if (next) {
|
| 899 |
|
|
if (next->vm_start < start) {
|
| 900 |
|
|
prev = next;
|
| 901 |
|
|
continue;
|
| 902 |
|
|
}
|
| 903 |
|
|
if (last > next->vm_start)
|
| 904 |
|
|
last = next->vm_start;
|
| 905 |
|
|
}
|
| 906 |
|
|
if (prev->vm_end > first)
|
| 907 |
|
|
first = prev->vm_end + PGDIR_SIZE - 1;
|
| 908 |
|
|
break;
|
| 909 |
|
|
}
|
| 910 |
|
|
no_mmaps:
|
| 911 |
|
|
if (last < first)
|
| 912 |
|
|
return;
|
| 913 |
|
|
/*
|
| 914 |
|
|
* If the PGD bits are not consecutive in the virtual address, the
|
| 915 |
|
|
* old method of shifting the VA >> by PGDIR_SHIFT doesn't work.
|
| 916 |
|
|
*/
|
| 917 |
|
|
start_index = pgd_index(first);
|
| 918 |
|
|
end_index = pgd_index(last);
|
| 919 |
|
|
if (end_index > start_index) {
|
| 920 |
|
|
clear_page_tables(mm, start_index, end_index - start_index);
|
| 921 |
|
|
flush_tlb_pgtables(mm, first & PGDIR_MASK, last & PGDIR_MASK);
|
| 922 |
|
|
}
|
| 923 |
|
|
}
|
| 924 |
|
|
|
| 925 |
|
|
/* Munmap is split into 2 main parts -- this part which finds
|
| 926 |
|
|
* what needs doing, and the areas themselves, which do the
|
| 927 |
|
|
* work. This now handles partial unmappings.
|
| 928 |
|
|
* Jeremy Fitzhardine <jeremy@sw.oz.au>
|
| 929 |
|
|
*/
|
| 930 |
|
|
int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
|
| 931 |
|
|
{
|
| 932 |
|
|
struct vm_area_struct *mpnt, *prev, **npp, *free, *extra;
|
| 933 |
|
|
|
| 934 |
|
|
if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
|
| 935 |
|
|
return -EINVAL;
|
| 936 |
|
|
|
| 937 |
|
|
if ((len = PAGE_ALIGN(len)) == 0)
|
| 938 |
|
|
return -EINVAL;
|
| 939 |
|
|
|
| 940 |
|
|
/* Check if this memory area is ok - put it on the temporary
|
| 941 |
|
|
* list if so.. The checks here are pretty simple --
|
| 942 |
|
|
* every area affected in some way (by any overlap) is put
|
| 943 |
|
|
* on the list. If nothing is put on, nothing is affected.
|
| 944 |
|
|
*/
|
| 945 |
|
|
mpnt = find_vma_prev(mm, addr, &prev);
|
| 946 |
|
|
if (!mpnt)
|
| 947 |
|
|
return 0;
|
| 948 |
|
|
/* we have addr < mpnt->vm_end */
|
| 949 |
|
|
|
| 950 |
|
|
if (mpnt->vm_start >= addr+len)
|
| 951 |
|
|
return 0;
|
| 952 |
|
|
|
| 953 |
|
|
/* If we'll make "hole", check the vm areas limit */
|
| 954 |
|
|
if ((mpnt->vm_start < addr && mpnt->vm_end > addr+len)
|
| 955 |
|
|
&& mm->map_count >= max_map_count)
|
| 956 |
|
|
return -ENOMEM;
|
| 957 |
|
|
|
| 958 |
|
|
/*
|
| 959 |
|
|
* We may need one additional vma to fix up the mappings ...
|
| 960 |
|
|
* and this is the last chance for an easy error exit.
|
| 961 |
|
|
*/
|
| 962 |
|
|
extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
|
| 963 |
|
|
if (!extra)
|
| 964 |
|
|
return -ENOMEM;
|
| 965 |
|
|
|
| 966 |
|
|
npp = (prev ? &prev->vm_next : &mm->mmap);
|
| 967 |
|
|
free = NULL;
|
| 968 |
|
|
spin_lock(&mm->page_table_lock);
|
| 969 |
|
|
for ( ; mpnt && mpnt->vm_start < addr+len; mpnt = *npp) {
|
| 970 |
|
|
*npp = mpnt->vm_next;
|
| 971 |
|
|
mpnt->vm_next = free;
|
| 972 |
|
|
free = mpnt;
|
| 973 |
|
|
rb_erase(&mpnt->vm_rb, &mm->mm_rb);
|
| 974 |
|
|
}
|
| 975 |
|
|
mm->mmap_cache = NULL; /* Kill the cache. */
|
| 976 |
|
|
spin_unlock(&mm->page_table_lock);
|
| 977 |
|
|
|
| 978 |
|
|
/* Ok - we have the memory areas we should free on the 'free' list,
|
| 979 |
|
|
* so release them, and unmap the page range..
|
| 980 |
|
|
* If the one of the segments is only being partially unmapped,
|
| 981 |
|
|
* it will put new vm_area_struct(s) into the address space.
|
| 982 |
|
|
* In that case we have to be careful with VM_DENYWRITE.
|
| 983 |
|
|
*/
|
| 984 |
|
|
while ((mpnt = free) != NULL) {
|
| 985 |
|
|
unsigned long st, end, size;
|
| 986 |
|
|
struct file *file = NULL;
|
| 987 |
|
|
|
| 988 |
|
|
free = free->vm_next;
|
| 989 |
|
|
|
| 990 |
|
|
st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
|
| 991 |
|
|
end = addr+len;
|
| 992 |
|
|
end = end > mpnt->vm_end ? mpnt->vm_end : end;
|
| 993 |
|
|
size = end - st;
|
| 994 |
|
|
|
| 995 |
|
|
if (mpnt->vm_flags & VM_DENYWRITE &&
|
| 996 |
|
|
(st != mpnt->vm_start || end != mpnt->vm_end) &&
|
| 997 |
|
|
(file = mpnt->vm_file) != NULL) {
|
| 998 |
|
|
atomic_dec(&file->f_dentry->d_inode->i_writecount);
|
| 999 |
|
|
}
|
| 1000 |
|
|
remove_shared_vm_struct(mpnt);
|
| 1001 |
|
|
mm->map_count--;
|
| 1002 |
|
|
|
| 1003 |
|
|
zap_page_range(mm, st, size);
|
| 1004 |
|
|
|
| 1005 |
|
|
/*
|
| 1006 |
|
|
* Fix the mapping, and free the old area if it wasn't reused.
|
| 1007 |
|
|
*/
|
| 1008 |
|
|
extra = unmap_fixup(mm, mpnt, st, size, extra);
|
| 1009 |
|
|
if (file)
|
| 1010 |
|
|
atomic_inc(&file->f_dentry->d_inode->i_writecount);
|
| 1011 |
|
|
}
|
| 1012 |
|
|
validate_mm(mm);
|
| 1013 |
|
|
|
| 1014 |
|
|
/* Release the extra vma struct if it wasn't used */
|
| 1015 |
|
|
if (extra)
|
| 1016 |
|
|
kmem_cache_free(vm_area_cachep, extra);
|
| 1017 |
|
|
|
| 1018 |
|
|
free_pgtables(mm, prev, addr, addr+len);
|
| 1019 |
|
|
|
| 1020 |
|
|
return 0;
|
| 1021 |
|
|
}
|
| 1022 |
|
|
|
| 1023 |
|
|
asmlinkage long sys_munmap(unsigned long addr, size_t len)
|
| 1024 |
|
|
{
|
| 1025 |
|
|
int ret;
|
| 1026 |
|
|
struct mm_struct *mm = current->mm;
|
| 1027 |
|
|
|
| 1028 |
|
|
down_write(&mm->mmap_sem);
|
| 1029 |
|
|
ret = do_munmap(mm, addr, len);
|
| 1030 |
|
|
up_write(&mm->mmap_sem);
|
| 1031 |
|
|
return ret;
|
| 1032 |
|
|
}
|
| 1033 |
|
|
|
| 1034 |
|
|
/*
|
| 1035 |
|
|
* this is really a simplified "do_mmap". it only handles
|
| 1036 |
|
|
* anonymous maps. eventually we may be able to do some
|
| 1037 |
|
|
* brk-specific accounting here.
|
| 1038 |
|
|
*/
|
| 1039 |
|
|
unsigned long do_brk(unsigned long addr, unsigned long len)
|
| 1040 |
|
|
{
|
| 1041 |
|
|
struct mm_struct * mm = current->mm;
|
| 1042 |
|
|
struct vm_area_struct * vma, * prev;
|
| 1043 |
|
|
unsigned long flags;
|
| 1044 |
|
|
rb_node_t ** rb_link, * rb_parent;
|
| 1045 |
|
|
|
| 1046 |
|
|
len = PAGE_ALIGN(len);
|
| 1047 |
|
|
if (!len)
|
| 1048 |
|
|
return addr;
|
| 1049 |
|
|
|
| 1050 |
|
|
if ((addr + len) > TASK_SIZE || (addr + len) < addr)
|
| 1051 |
|
|
return -EINVAL;
|
| 1052 |
|
|
|
| 1053 |
|
|
/*
|
| 1054 |
|
|
* mlock MCL_FUTURE?
|
| 1055 |
|
|
*/
|
| 1056 |
|
|
if (mm->def_flags & VM_LOCKED) {
|
| 1057 |
|
|
unsigned long locked = mm->locked_vm << PAGE_SHIFT;
|
| 1058 |
|
|
locked += len;
|
| 1059 |
|
|
if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
|
| 1060 |
|
|
return -EAGAIN;
|
| 1061 |
|
|
}
|
| 1062 |
|
|
|
| 1063 |
|
|
/*
|
| 1064 |
|
|
* Clear old maps. this also does some error checking for us
|
| 1065 |
|
|
*/
|
| 1066 |
|
|
munmap_back:
|
| 1067 |
|
|
vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
|
| 1068 |
|
|
if (vma && vma->vm_start < addr + len) {
|
| 1069 |
|
|
if (do_munmap(mm, addr, len))
|
| 1070 |
|
|
return -ENOMEM;
|
| 1071 |
|
|
goto munmap_back;
|
| 1072 |
|
|
}
|
| 1073 |
|
|
|
| 1074 |
|
|
/* Check against address space limits *after* clearing old maps... */
|
| 1075 |
|
|
if ((mm->total_vm << PAGE_SHIFT) + len
|
| 1076 |
|
|
> current->rlim[RLIMIT_AS].rlim_cur)
|
| 1077 |
|
|
return -ENOMEM;
|
| 1078 |
|
|
|
| 1079 |
|
|
if (mm->map_count > max_map_count)
|
| 1080 |
|
|
return -ENOMEM;
|
| 1081 |
|
|
|
| 1082 |
|
|
if (!vm_enough_memory(len >> PAGE_SHIFT))
|
| 1083 |
|
|
return -ENOMEM;
|
| 1084 |
|
|
|
| 1085 |
|
|
flags = VM_DATA_DEFAULT_FLAGS | mm->def_flags;
|
| 1086 |
|
|
|
| 1087 |
|
|
/* Can we just expand an old anonymous mapping? */
|
| 1088 |
|
|
if (rb_parent && vma_merge(mm, prev, rb_parent, addr, addr + len, flags))
|
| 1089 |
|
|
goto out;
|
| 1090 |
|
|
|
| 1091 |
|
|
/*
|
| 1092 |
|
|
* create a vma struct for an anonymous mapping
|
| 1093 |
|
|
*/
|
| 1094 |
|
|
vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
|
| 1095 |
|
|
if (!vma)
|
| 1096 |
|
|
return -ENOMEM;
|
| 1097 |
|
|
|
| 1098 |
|
|
vma->vm_mm = mm;
|
| 1099 |
|
|
vma->vm_start = addr;
|
| 1100 |
|
|
vma->vm_end = addr + len;
|
| 1101 |
|
|
vma->vm_flags = flags;
|
| 1102 |
|
|
vma->vm_page_prot = protection_map[flags & 0x0f];
|
| 1103 |
|
|
vma->vm_ops = NULL;
|
| 1104 |
|
|
vma->vm_pgoff = 0;
|
| 1105 |
|
|
vma->vm_file = NULL;
|
| 1106 |
|
|
vma->vm_private_data = NULL;
|
| 1107 |
|
|
|
| 1108 |
|
|
vma_link(mm, vma, prev, rb_link, rb_parent);
|
| 1109 |
|
|
|
| 1110 |
|
|
out:
|
| 1111 |
|
|
mm->total_vm += len >> PAGE_SHIFT;
|
| 1112 |
|
|
if (flags & VM_LOCKED) {
|
| 1113 |
|
|
mm->locked_vm += len >> PAGE_SHIFT;
|
| 1114 |
|
|
make_pages_present(addr, addr + len);
|
| 1115 |
|
|
}
|
| 1116 |
|
|
return addr;
|
| 1117 |
|
|
}
|
| 1118 |
|
|
|
| 1119 |
|
|
/* Build the RB tree corresponding to the VMA list. */
|
| 1120 |
|
|
void build_mmap_rb(struct mm_struct * mm)
|
| 1121 |
|
|
{
|
| 1122 |
|
|
struct vm_area_struct * vma;
|
| 1123 |
|
|
rb_node_t ** rb_link, * rb_parent;
|
| 1124 |
|
|
|
| 1125 |
|
|
mm->mm_rb = RB_ROOT;
|
| 1126 |
|
|
rb_link = &mm->mm_rb.rb_node;
|
| 1127 |
|
|
rb_parent = NULL;
|
| 1128 |
|
|
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
| 1129 |
|
|
__vma_link_rb(mm, vma, rb_link, rb_parent);
|
| 1130 |
|
|
rb_parent = &vma->vm_rb;
|
| 1131 |
|
|
rb_link = &rb_parent->rb_right;
|
| 1132 |
|
|
}
|
| 1133 |
|
|
}
|
| 1134 |
|
|
|
| 1135 |
|
|
/* Release all mmaps. */
|
| 1136 |
|
|
void exit_mmap(struct mm_struct * mm)
|
| 1137 |
|
|
{
|
| 1138 |
|
|
struct vm_area_struct * mpnt;
|
| 1139 |
|
|
|
| 1140 |
|
|
release_segments(mm);
|
| 1141 |
|
|
spin_lock(&mm->page_table_lock);
|
| 1142 |
|
|
mpnt = mm->mmap;
|
| 1143 |
|
|
mm->mmap = mm->mmap_cache = NULL;
|
| 1144 |
|
|
mm->mm_rb = RB_ROOT;
|
| 1145 |
|
|
mm->rss = 0;
|
| 1146 |
|
|
spin_unlock(&mm->page_table_lock);
|
| 1147 |
|
|
mm->total_vm = 0;
|
| 1148 |
|
|
mm->locked_vm = 0;
|
| 1149 |
|
|
|
| 1150 |
|
|
flush_cache_mm(mm);
|
| 1151 |
|
|
while (mpnt) {
|
| 1152 |
|
|
struct vm_area_struct * next = mpnt->vm_next;
|
| 1153 |
|
|
unsigned long start = mpnt->vm_start;
|
| 1154 |
|
|
unsigned long end = mpnt->vm_end;
|
| 1155 |
|
|
unsigned long size = end - start;
|
| 1156 |
|
|
|
| 1157 |
|
|
if (mpnt->vm_ops) {
|
| 1158 |
|
|
if (mpnt->vm_ops->close)
|
| 1159 |
|
|
mpnt->vm_ops->close(mpnt);
|
| 1160 |
|
|
}
|
| 1161 |
|
|
mm->map_count--;
|
| 1162 |
|
|
remove_shared_vm_struct(mpnt);
|
| 1163 |
|
|
zap_page_range(mm, start, size);
|
| 1164 |
|
|
if (mpnt->vm_file)
|
| 1165 |
|
|
fput(mpnt->vm_file);
|
| 1166 |
|
|
kmem_cache_free(vm_area_cachep, mpnt);
|
| 1167 |
|
|
mpnt = next;
|
| 1168 |
|
|
}
|
| 1169 |
|
|
|
| 1170 |
|
|
/* This is just debugging */
|
| 1171 |
|
|
if (mm->map_count)
|
| 1172 |
|
|
BUG();
|
| 1173 |
|
|
|
| 1174 |
|
|
clear_page_tables(mm, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
|
| 1175 |
|
|
|
| 1176 |
|
|
flush_tlb_mm(mm);
|
| 1177 |
|
|
}
|
| 1178 |
|
|
|
| 1179 |
|
|
/* Insert vm structure into process list sorted by address
|
| 1180 |
|
|
* and into the inode's i_mmap ring. If vm_file is non-NULL
|
| 1181 |
|
|
* then the i_shared_lock must be held here.
|
| 1182 |
|
|
*/
|
| 1183 |
|
|
void __insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
|
| 1184 |
|
|
{
|
| 1185 |
|
|
struct vm_area_struct * __vma, * prev;
|
| 1186 |
|
|
rb_node_t ** rb_link, * rb_parent;
|
| 1187 |
|
|
|
| 1188 |
|
|
__vma = find_vma_prepare(mm, vma->vm_start, &prev, &rb_link, &rb_parent);
|
| 1189 |
|
|
if (__vma && __vma->vm_start < vma->vm_end)
|
| 1190 |
|
|
BUG();
|
| 1191 |
|
|
__vma_link(mm, vma, prev, rb_link, rb_parent);
|
| 1192 |
|
|
mm->map_count++;
|
| 1193 |
|
|
validate_mm(mm);
|
| 1194 |
|
|
}
|
| 1195 |
|
|
|
| 1196 |
|
|
void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
|
| 1197 |
|
|
{
|
| 1198 |
|
|
struct vm_area_struct * __vma, * prev;
|
| 1199 |
|
|
rb_node_t ** rb_link, * rb_parent;
|
| 1200 |
|
|
|
| 1201 |
|
|
__vma = find_vma_prepare(mm, vma->vm_start, &prev, &rb_link, &rb_parent);
|
| 1202 |
|
|
if (__vma && __vma->vm_start < vma->vm_end)
|
| 1203 |
|
|
BUG();
|
| 1204 |
|
|
vma_link(mm, vma, prev, rb_link, rb_parent);
|
| 1205 |
|
|
validate_mm(mm);
|
| 1206 |
|
|
}
|