OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [mm/] [mmap.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 *      linux/mm/mmap.c
3
 *
4
 * Written by obz.
5
 */
6
#include <linux/slab.h>
7
#include <linux/shm.h>
8
#include <linux/mman.h>
9
#include <linux/pagemap.h>
10
#include <linux/swap.h>
11
#include <linux/swapctl.h>
12
#include <linux/smp_lock.h>
13
#include <linux/init.h>
14
#include <linux/file.h>
15
#include <linux/fs.h>
16
#include <linux/personality.h>
17
#include <linux/mount.h>
18
 
19
#include <asm/uaccess.h>
20
#include <asm/pgalloc.h>
21
 
22
/*
23
 * WARNING: the debugging will use recursive algorithms so never enable this
24
 * unless you know what you are doing.
25
 */
26
#undef DEBUG_MM_RB
27
 
28
/* description of effects of mapping type and prot in current implementation.
29
 * this is due to the limited x86 page protection hardware.  The expected
30
 * behavior is in parens:
31
 *
32
 * map_type     prot
33
 *              PROT_NONE       PROT_READ       PROT_WRITE      PROT_EXEC
34
 * MAP_SHARED   r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
35
 *              w: (no) no      w: (no) no      w: (yes) yes    w: (no) no
36
 *              x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
37
 *
38
 * MAP_PRIVATE  r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
39
 *              w: (no) no      w: (no) no      w: (copy) copy  w: (no) no
40
 *              x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
41
 *
42
 */
43
pgprot_t protection_map[16] = {
44
        __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
45
        __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
46
};
47
 
48
int sysctl_overcommit_memory;
49
int max_map_count = DEFAULT_MAX_MAP_COUNT;
50
 
51
/* Check that a process has enough memory to allocate a
52
 * new virtual mapping.
53
 */
54
int vm_enough_memory(long pages)
55
{
56
        /* Stupid algorithm to decide if we have enough memory: while
57
         * simple, it hopefully works in most obvious cases.. Easy to
58
         * fool it, but this should catch most mistakes.
59
         */
60
        /* 23/11/98 NJC: Somewhat less stupid version of algorithm,
61
         * which tries to do "TheRightThing".  Instead of using half of
62
         * (buffers+cache), use the minimum values.  Allow an extra 2%
63
         * of num_physpages for safety margin.
64
         */
65
 
66
        unsigned long free;
67
 
68
        /* Sometimes we want to use more memory than we have. */
69
        if (sysctl_overcommit_memory)
70
            return 1;
71
 
72
        /* The page cache contains buffer pages these days.. */
73
        free = page_cache_size;
74
        free += nr_free_pages();
75
        free += nr_swap_pages;
76
 
77
        /*
78
         * This double-counts: the nrpages are both in the page-cache
79
         * and in the swapper space. At the same time, this compensates
80
         * for the swap-space over-allocation (ie "nr_swap_pages" being
81
         * too small.
82
         */
83
        free += swapper_space.nrpages;
84
 
85
        /*
86
         * The code below doesn't account for free space in the inode
87
         * and dentry slab cache, slab cache fragmentation, inodes and
88
         * dentries which will become freeable under VM load, etc.
89
         * Lets just hope all these (complex) factors balance out...
90
         */
91
        free += (dentry_stat.nr_unused * sizeof(struct dentry)) >> PAGE_SHIFT;
92
        free += (inodes_stat.nr_unused * sizeof(struct inode)) >> PAGE_SHIFT;
93
 
94
        return free > pages;
95
}
96
 
97
/* Remove one vm structure from the inode's i_mapping address space. */
98
static inline void __remove_shared_vm_struct(struct vm_area_struct *vma)
99
{
100
        struct file * file = vma->vm_file;
101
 
102
        if (file) {
103
                struct inode *inode = file->f_dentry->d_inode;
104
                if (vma->vm_flags & VM_DENYWRITE)
105
                        atomic_inc(&inode->i_writecount);
106
                if(vma->vm_next_share)
107
                        vma->vm_next_share->vm_pprev_share = vma->vm_pprev_share;
108
                *vma->vm_pprev_share = vma->vm_next_share;
109
        }
110
}
111
 
112
static inline void remove_shared_vm_struct(struct vm_area_struct *vma)
113
{
114
        lock_vma_mappings(vma);
115
        __remove_shared_vm_struct(vma);
116
        unlock_vma_mappings(vma);
117
}
118
 
119
void lock_vma_mappings(struct vm_area_struct *vma)
120
{
121
        struct address_space *mapping;
122
 
123
        mapping = NULL;
124
        if (vma->vm_file)
125
                mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
126
        if (mapping)
127
                spin_lock(&mapping->i_shared_lock);
128
}
129
 
130
void unlock_vma_mappings(struct vm_area_struct *vma)
131
{
132
        struct address_space *mapping;
133
 
134
        mapping = NULL;
135
        if (vma->vm_file)
136
                mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
137
        if (mapping)
138
                spin_unlock(&mapping->i_shared_lock);
139
}
140
 
141
/*
142
 *  sys_brk() for the most part doesn't need the global kernel
143
 *  lock, except when an application is doing something nasty
144
 *  like trying to un-brk an area that has already been mapped
145
 *  to a regular file.  in this case, the unmapping will need
146
 *  to invoke file system routines that need the global lock.
147
 */
148
asmlinkage unsigned long sys_brk(unsigned long brk)
149
{
150
        unsigned long rlim, retval;
151
        unsigned long newbrk, oldbrk;
152
        struct mm_struct *mm = current->mm;
153
 
154
        down_write(&mm->mmap_sem);
155
 
156
        if (brk < mm->end_code)
157
                goto out;
158
        newbrk = PAGE_ALIGN(brk);
159
        oldbrk = PAGE_ALIGN(mm->brk);
160
        if (oldbrk == newbrk)
161
                goto set_brk;
162
 
163
        /* Always allow shrinking brk. */
164
        if (brk <= mm->brk) {
165
                if (!do_munmap(mm, newbrk, oldbrk-newbrk))
166
                        goto set_brk;
167
                goto out;
168
        }
169
 
170
        /* Check against rlimit.. */
171
        rlim = current->rlim[RLIMIT_DATA].rlim_cur;
172
        if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
173
                goto out;
174
 
175
        /* Check against existing mmap mappings. */
176
        if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
177
                goto out;
178
 
179
        /* Check if we have enough memory.. */
180
        if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT))
181
                goto out;
182
 
183
        /* Ok, looks good - let it rip. */
184
        if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
185
                goto out;
186
set_brk:
187
        mm->brk = brk;
188
out:
189
        retval = mm->brk;
190
        up_write(&mm->mmap_sem);
191
        return retval;
192
}
193
 
194
/* Combine the mmap "prot" and "flags" argument into one "vm_flags" used
195
 * internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits
196
 * into "VM_xxx".
197
 */
198
static inline unsigned long calc_vm_flags(unsigned long prot, unsigned long flags)
199
{
200
#define _trans(x,bit1,bit2) \
201
((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
202
 
203
        unsigned long prot_bits, flag_bits;
204
        prot_bits =
205
                _trans(prot, PROT_READ, VM_READ) |
206
                _trans(prot, PROT_WRITE, VM_WRITE) |
207
                _trans(prot, PROT_EXEC, VM_EXEC);
208
        flag_bits =
209
                _trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN) |
210
                _trans(flags, MAP_DENYWRITE, VM_DENYWRITE) |
211
                _trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE);
212
        return prot_bits | flag_bits;
213
#undef _trans
214
}
215
 
216
#ifdef DEBUG_MM_RB
217
static int browse_rb(rb_node_t * rb_node) {
218
        int i = 0;
219
        if (rb_node) {
220
                i++;
221
                i += browse_rb(rb_node->rb_left);
222
                i += browse_rb(rb_node->rb_right);
223
        }
224
        return i;
225
}
226
 
227
static void validate_mm(struct mm_struct * mm) {
228
        int bug = 0;
229
        int i = 0;
230
        struct vm_area_struct * tmp = mm->mmap;
231
        while (tmp) {
232
                tmp = tmp->vm_next;
233
                i++;
234
        }
235
        if (i != mm->map_count)
236
                printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
237
        i = browse_rb(mm->mm_rb.rb_node);
238
        if (i != mm->map_count)
239
                printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
240
        if (bug)
241
                BUG();
242
}
243
#else
244
#define validate_mm(mm) do { } while (0)
245
#endif
246
 
247
static struct vm_area_struct * find_vma_prepare(struct mm_struct * mm, unsigned long addr,
248
                                                struct vm_area_struct ** pprev,
249
                                                rb_node_t *** rb_link, rb_node_t ** rb_parent)
250
{
251
        struct vm_area_struct * vma;
252
        rb_node_t ** __rb_link, * __rb_parent, * rb_prev;
253
 
254
        __rb_link = &mm->mm_rb.rb_node;
255
        rb_prev = __rb_parent = NULL;
256
        vma = NULL;
257
 
258
        while (*__rb_link) {
259
                struct vm_area_struct *vma_tmp;
260
 
261
                __rb_parent = *__rb_link;
262
                vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
263
 
264
                if (vma_tmp->vm_end > addr) {
265
                        vma = vma_tmp;
266
                        if (vma_tmp->vm_start <= addr)
267
                                return vma;
268
                        __rb_link = &__rb_parent->rb_left;
269
                } else {
270
                        rb_prev = __rb_parent;
271
                        __rb_link = &__rb_parent->rb_right;
272
                }
273
        }
274
 
275
        *pprev = NULL;
276
        if (rb_prev)
277
                *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
278
        *rb_link = __rb_link;
279
        *rb_parent = __rb_parent;
280
        return vma;
281
}
282
 
283
static inline void __vma_link_list(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev,
284
                                   rb_node_t * rb_parent)
285
{
286
        if (prev) {
287
                vma->vm_next = prev->vm_next;
288
                prev->vm_next = vma;
289
        } else {
290
                mm->mmap = vma;
291
                if (rb_parent)
292
                        vma->vm_next = rb_entry(rb_parent, struct vm_area_struct, vm_rb);
293
                else
294
                        vma->vm_next = NULL;
295
        }
296
}
297
 
298
static inline void __vma_link_rb(struct mm_struct * mm, struct vm_area_struct * vma,
299
                                 rb_node_t ** rb_link, rb_node_t * rb_parent)
300
{
301
        rb_link_node(&vma->vm_rb, rb_parent, rb_link);
302
        rb_insert_color(&vma->vm_rb, &mm->mm_rb);
303
}
304
 
305
static inline void __vma_link_file(struct vm_area_struct * vma)
306
{
307
        struct file * file;
308
 
309
        file = vma->vm_file;
310
        if (file) {
311
                struct inode * inode = file->f_dentry->d_inode;
312
                struct address_space *mapping = inode->i_mapping;
313
                struct vm_area_struct **head;
314
 
315
                if (vma->vm_flags & VM_DENYWRITE)
316
                        atomic_dec(&inode->i_writecount);
317
 
318
                head = &mapping->i_mmap;
319
                if (vma->vm_flags & VM_SHARED)
320
                        head = &mapping->i_mmap_shared;
321
 
322
                /* insert vma into inode's share list */
323
                if((vma->vm_next_share = *head) != NULL)
324
                        (*head)->vm_pprev_share = &vma->vm_next_share;
325
                *head = vma;
326
                vma->vm_pprev_share = head;
327
        }
328
}
329
 
330
static void __vma_link(struct mm_struct * mm, struct vm_area_struct * vma,  struct vm_area_struct * prev,
331
                       rb_node_t ** rb_link, rb_node_t * rb_parent)
332
{
333
        __vma_link_list(mm, vma, prev, rb_parent);
334
        __vma_link_rb(mm, vma, rb_link, rb_parent);
335
        __vma_link_file(vma);
336
}
337
 
338
static inline void vma_link(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev,
339
                            rb_node_t ** rb_link, rb_node_t * rb_parent)
340
{
341
        lock_vma_mappings(vma);
342
        spin_lock(&mm->page_table_lock);
343
        __vma_link(mm, vma, prev, rb_link, rb_parent);
344
        spin_unlock(&mm->page_table_lock);
345
        unlock_vma_mappings(vma);
346
 
347
        mm->map_count++;
348
        validate_mm(mm);
349
}
350
 
351
static int vma_merge(struct mm_struct * mm, struct vm_area_struct * prev,
352
                     rb_node_t * rb_parent, unsigned long addr, unsigned long end, unsigned long vm_flags)
353
{
354
        spinlock_t * lock = &mm->page_table_lock;
355
        if (!prev) {
356
                prev = rb_entry(rb_parent, struct vm_area_struct, vm_rb);
357
                goto merge_next;
358
        }
359
        if (prev->vm_end == addr && can_vma_merge(prev, vm_flags)) {
360
                struct vm_area_struct * next;
361
 
362
                spin_lock(lock);
363
                prev->vm_end = end;
364
                next = prev->vm_next;
365
                if (next && prev->vm_end == next->vm_start && can_vma_merge(next, vm_flags)) {
366
                        prev->vm_end = next->vm_end;
367
                        __vma_unlink(mm, next, prev);
368
                        spin_unlock(lock);
369
 
370
                        mm->map_count--;
371
                        kmem_cache_free(vm_area_cachep, next);
372
                        return 1;
373
                }
374
                spin_unlock(lock);
375
                return 1;
376
        }
377
 
378
        prev = prev->vm_next;
379
        if (prev) {
380
 merge_next:
381
                if (!can_vma_merge(prev, vm_flags))
382
                        return 0;
383
                if (end == prev->vm_start) {
384
                        spin_lock(lock);
385
                        prev->vm_start = addr;
386
                        spin_unlock(lock);
387
                        return 1;
388
                }
389
        }
390
 
391
        return 0;
392
}
393
 
394
unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned long len,
395
        unsigned long prot, unsigned long flags, unsigned long pgoff)
396
{
397
        struct mm_struct * mm = current->mm;
398
        struct vm_area_struct * vma, * prev;
399
        unsigned int vm_flags;
400
        int correct_wcount = 0;
401
        int error;
402
        rb_node_t ** rb_link, * rb_parent;
403
 
404
        if (file) {
405
                if (!file->f_op || !file->f_op->mmap)
406
                        return -ENODEV;
407
 
408
                if ((prot & PROT_EXEC) && (file->f_vfsmnt->mnt_flags & MNT_NOEXEC))
409
                        return -EPERM;
410
        }
411
 
412
        if (!len)
413
                return addr;
414
 
415
        len = PAGE_ALIGN(len);
416
 
417
        if (len > TASK_SIZE || len == 0)
418
                return -EINVAL;
419
 
420
        /* offset overflow? */
421
        if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
422
                return -EINVAL;
423
 
424
        /* Too many mappings? */
425
        if (mm->map_count > max_map_count)
426
                return -ENOMEM;
427
 
428
        /* Obtain the address to map to. we verify (or select) it and ensure
429
         * that it represents a valid section of the address space.
430
         */
431
        addr = get_unmapped_area(file, addr, len, pgoff, flags);
432
        if (addr & ~PAGE_MASK)
433
                return addr;
434
 
435
        /* Do simple checking here so the lower-level routines won't have
436
         * to. we assume access permissions have been handled by the open
437
         * of the memory object, so we don't do any here.
438
         */
439
        vm_flags = calc_vm_flags(prot,flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
440
 
441
        /* mlock MCL_FUTURE? */
442
        if (vm_flags & VM_LOCKED) {
443
                unsigned long locked = mm->locked_vm << PAGE_SHIFT;
444
                locked += len;
445
                if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
446
                        return -EAGAIN;
447
        }
448
 
449
        if (file) {
450
                switch (flags & MAP_TYPE) {
451
                case MAP_SHARED:
452
                        if ((prot & PROT_WRITE) && !(file->f_mode & FMODE_WRITE))
453
                                return -EACCES;
454
 
455
                        /* Make sure we don't allow writing to an append-only file.. */
456
                        if (IS_APPEND(file->f_dentry->d_inode) && (file->f_mode & FMODE_WRITE))
457
                                return -EACCES;
458
 
459
                        /* make sure there are no mandatory locks on the file. */
460
                        if (locks_verify_locked(file->f_dentry->d_inode))
461
                                return -EAGAIN;
462
 
463
                        vm_flags |= VM_SHARED | VM_MAYSHARE;
464
                        if (!(file->f_mode & FMODE_WRITE))
465
                                vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
466
 
467
                        /* fall through */
468
                case MAP_PRIVATE:
469
                        if (!(file->f_mode & FMODE_READ))
470
                                return -EACCES;
471
                        break;
472
 
473
                default:
474
                        return -EINVAL;
475
                }
476
        } else {
477
                vm_flags |= VM_SHARED | VM_MAYSHARE;
478
                switch (flags & MAP_TYPE) {
479
                default:
480
                        return -EINVAL;
481
                case MAP_PRIVATE:
482
                        vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
483
                        /* fall through */
484
                case MAP_SHARED:
485
                        break;
486
                }
487
        }
488
 
489
        /* Clear old maps */
490
munmap_back:
491
        vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
492
        if (vma && vma->vm_start < addr + len) {
493
                if (do_munmap(mm, addr, len))
494
                        return -ENOMEM;
495
                goto munmap_back;
496
        }
497
 
498
        /* Check against address space limit. */
499
        if ((mm->total_vm << PAGE_SHIFT) + len
500
            > current->rlim[RLIMIT_AS].rlim_cur)
501
                return -ENOMEM;
502
 
503
        /* Private writable mapping? Check memory availability.. */
504
        if ((vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE &&
505
            !(flags & MAP_NORESERVE)                             &&
506
            !vm_enough_memory(len >> PAGE_SHIFT))
507
                return -ENOMEM;
508
 
509
        /* Can we just expand an old anonymous mapping? */
510
        if (!file && !(vm_flags & VM_SHARED) && rb_parent)
511
                if (vma_merge(mm, prev, rb_parent, addr, addr + len, vm_flags))
512
                        goto out;
513
 
514
        /* Determine the object being mapped and call the appropriate
515
         * specific mapper. the address has already been validated, but
516
         * not unmapped, but the maps are removed from the list.
517
         */
518
        vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
519
        if (!vma)
520
                return -ENOMEM;
521
 
522
        vma->vm_mm = mm;
523
        vma->vm_start = addr;
524
        vma->vm_end = addr + len;
525
        vma->vm_flags = vm_flags;
526
        vma->vm_page_prot = protection_map[vm_flags & 0x0f];
527
        vma->vm_ops = NULL;
528
        vma->vm_pgoff = pgoff;
529
        vma->vm_file = NULL;
530
        vma->vm_private_data = NULL;
531
        vma->vm_raend = 0;
532
 
533
        if (file) {
534
                error = -EINVAL;
535
                if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
536
                        goto free_vma;
537
                if (vm_flags & VM_DENYWRITE) {
538
                        error = deny_write_access(file);
539
                        if (error)
540
                                goto free_vma;
541
                        correct_wcount = 1;
542
                }
543
                vma->vm_file = file;
544
                get_file(file);
545
                error = file->f_op->mmap(file, vma);
546
                if (error)
547
                        goto unmap_and_free_vma;
548
        } else if (flags & MAP_SHARED) {
549
                error = shmem_zero_setup(vma);
550
                if (error)
551
                        goto free_vma;
552
        }
553
 
554
        /* Can addr have changed??
555
         *
556
         * Answer: Yes, several device drivers can do it in their
557
         *         f_op->mmap method. -DaveM
558
         */
559
        if (addr != vma->vm_start) {
560
                /*
561
                 * It is a bit too late to pretend changing the virtual
562
                 * area of the mapping, we just corrupted userspace
563
                 * in the do_munmap, so FIXME (not in 2.4 to avoid breaking
564
                 * the driver API).
565
                 */
566
                struct vm_area_struct * stale_vma;
567
                /* Since addr changed, we rely on the mmap op to prevent
568
                 * collisions with existing vmas and just use find_vma_prepare
569
                 * to update the tree pointers.
570
                 */
571
                addr = vma->vm_start;
572
                stale_vma = find_vma_prepare(mm, addr, &prev,
573
                                                &rb_link, &rb_parent);
574
                /*
575
                 * Make sure the lowlevel driver did its job right.
576
                 */
577
                if (unlikely(stale_vma && stale_vma->vm_start < vma->vm_end)) {
578
                        printk(KERN_ERR "buggy mmap operation: [<%p>]\n",
579
                                file ? file->f_op->mmap : NULL);
580
                        BUG();
581
                }
582
        }
583
 
584
        vma_link(mm, vma, prev, rb_link, rb_parent);
585
        if (correct_wcount)
586
                atomic_inc(&file->f_dentry->d_inode->i_writecount);
587
 
588
out:
589
        mm->total_vm += len >> PAGE_SHIFT;
590
        if (vm_flags & VM_LOCKED) {
591
                mm->locked_vm += len >> PAGE_SHIFT;
592
                make_pages_present(addr, addr + len);
593
        }
594
        return addr;
595
 
596
unmap_and_free_vma:
597
        if (correct_wcount)
598
                atomic_inc(&file->f_dentry->d_inode->i_writecount);
599
        vma->vm_file = NULL;
600
        fput(file);
601
 
602
        /* Undo any partial mapping done by a device driver. */
603
        zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start);
604
free_vma:
605
        kmem_cache_free(vm_area_cachep, vma);
606
        return error;
607
}
608
 
609
/* Get an address range which is currently unmapped.
610
 * For shmat() with addr=0.
611
 *
612
 * Ugly calling convention alert:
613
 * Return value with the low bits set means error value,
614
 * ie
615
 *      if (ret & ~PAGE_MASK)
616
 *              error = ret;
617
 *
618
 * This function "knows" that -ENOMEM has the bits set.
619
 */
620
#ifndef HAVE_ARCH_UNMAPPED_AREA
621
static inline unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags)
622
{
623
        struct vm_area_struct *vma;
624
 
625
        if (len > TASK_SIZE)
626
                return -ENOMEM;
627
 
628
        if (addr) {
629
                addr = PAGE_ALIGN(addr);
630
                vma = find_vma(current->mm, addr);
631
                if (TASK_SIZE - len >= addr &&
632
                    (!vma || addr + len <= vma->vm_start))
633
                        return addr;
634
        }
635
        addr = PAGE_ALIGN(TASK_UNMAPPED_BASE);
636
 
637
        for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
638
                /* At this point:  (!vma || addr < vma->vm_end). */
639
                if (TASK_SIZE - len < addr)
640
                        return -ENOMEM;
641
                if (!vma || addr + len <= vma->vm_start)
642
                        return addr;
643
                addr = vma->vm_end;
644
        }
645
}
646
#else
647
extern unsigned long arch_get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
648
#endif  
649
 
650
unsigned long get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags)
651
{
652
        if (flags & MAP_FIXED) {
653
                if (addr > TASK_SIZE - len)
654
                        return -ENOMEM;
655
                if (addr & ~PAGE_MASK)
656
                        return -EINVAL;
657
                return addr;
658
        }
659
 
660
        if (file && file->f_op && file->f_op->get_unmapped_area)
661
                return file->f_op->get_unmapped_area(file, addr, len, pgoff, flags);
662
 
663
        return arch_get_unmapped_area(file, addr, len, pgoff, flags);
664
}
665
 
666
/* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
667
struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
668
{
669
        struct vm_area_struct *vma = NULL;
670
 
671
        if (mm) {
672
                /* Check the cache first. */
673
                /* (Cache hit rate is typically around 35%.) */
674
                vma = mm->mmap_cache;
675
                if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
676
                        rb_node_t * rb_node;
677
 
678
                        rb_node = mm->mm_rb.rb_node;
679
                        vma = NULL;
680
 
681
                        while (rb_node) {
682
                                struct vm_area_struct * vma_tmp;
683
 
684
                                vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
685
 
686
                                if (vma_tmp->vm_end > addr) {
687
                                        vma = vma_tmp;
688
                                        if (vma_tmp->vm_start <= addr)
689
                                                break;
690
                                        rb_node = rb_node->rb_left;
691
                                } else
692
                                        rb_node = rb_node->rb_right;
693
                        }
694
                        if (vma)
695
                                mm->mmap_cache = vma;
696
                }
697
        }
698
        return vma;
699
}
700
 
701
/* Same as find_vma, but also return a pointer to the previous VMA in *pprev. */
702
struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
703
                                      struct vm_area_struct **pprev)
704
{
705
        if (mm) {
706
                /* Go through the RB tree quickly. */
707
                struct vm_area_struct * vma;
708
                rb_node_t * rb_node, * rb_last_right, * rb_prev;
709
 
710
                rb_node = mm->mm_rb.rb_node;
711
                rb_last_right = rb_prev = NULL;
712
                vma = NULL;
713
 
714
                while (rb_node) {
715
                        struct vm_area_struct * vma_tmp;
716
 
717
                        vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
718
 
719
                        if (vma_tmp->vm_end > addr) {
720
                                vma = vma_tmp;
721
                                rb_prev = rb_last_right;
722
                                if (vma_tmp->vm_start <= addr)
723
                                        break;
724
                                rb_node = rb_node->rb_left;
725
                        } else {
726
                                rb_last_right = rb_node;
727
                                rb_node = rb_node->rb_right;
728
                        }
729
                }
730
                if (vma) {
731
                        if (vma->vm_rb.rb_left) {
732
                                rb_prev = vma->vm_rb.rb_left;
733
                                while (rb_prev->rb_right)
734
                                        rb_prev = rb_prev->rb_right;
735
                        }
736
                        *pprev = NULL;
737
                        if (rb_prev)
738
                                *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
739
                        if ((rb_prev ? (*pprev)->vm_next : mm->mmap) != vma)
740
                                BUG();
741
                        return vma;
742
                }
743
        }
744
        *pprev = NULL;
745
        return NULL;
746
}
747
 
748
struct vm_area_struct * find_extend_vma(struct mm_struct * mm, unsigned long addr)
749
{
750
        struct vm_area_struct * vma;
751
        unsigned long start;
752
 
753
        addr &= PAGE_MASK;
754
        vma = find_vma(mm,addr);
755
        if (!vma)
756
                return NULL;
757
        if (vma->vm_start <= addr)
758
                return vma;
759
        if (!(vma->vm_flags & VM_GROWSDOWN))
760
                return NULL;
761
        start = vma->vm_start;
762
        if (expand_stack(vma, addr))
763
                return NULL;
764
        if (vma->vm_flags & VM_LOCKED) {
765
                make_pages_present(addr, start);
766
        }
767
        return vma;
768
}
769
 
770
/* Normal function to fix up a mapping
771
 * This function is the default for when an area has no specific
772
 * function.  This may be used as part of a more specific routine.
773
 * This function works out what part of an area is affected and
774
 * adjusts the mapping information.  Since the actual page
775
 * manipulation is done in do_mmap(), none need be done here,
776
 * though it would probably be more appropriate.
777
 *
778
 * By the time this function is called, the area struct has been
779
 * removed from the process mapping list, so it needs to be
780
 * reinserted if necessary.
781
 *
782
 * The 4 main cases are:
783
 *    Unmapping the whole area
784
 *    Unmapping from the start of the segment to a point in it
785
 *    Unmapping from an intermediate point to the end
786
 *    Unmapping between to intermediate points, making a hole.
787
 *
788
 * Case 4 involves the creation of 2 new areas, for each side of
789
 * the hole.  If possible, we reuse the existing area rather than
790
 * allocate a new one, and the return indicates whether the old
791
 * area was reused.
792
 */
793
static struct vm_area_struct * unmap_fixup(struct mm_struct *mm,
794
        struct vm_area_struct *area, unsigned long addr, size_t len,
795
        struct vm_area_struct *extra)
796
{
797
        struct vm_area_struct *mpnt;
798
        unsigned long end = addr + len;
799
 
800
        area->vm_mm->total_vm -= len >> PAGE_SHIFT;
801
        if (area->vm_flags & VM_LOCKED)
802
                area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
803
 
804
        /* Unmapping the whole area. */
805
        if (addr == area->vm_start && end == area->vm_end) {
806
                if (area->vm_ops && area->vm_ops->close)
807
                        area->vm_ops->close(area);
808
                if (area->vm_file)
809
                        fput(area->vm_file);
810
                kmem_cache_free(vm_area_cachep, area);
811
                return extra;
812
        }
813
 
814
        /* Work out to one of the ends. */
815
        if (end == area->vm_end) {
816
                /*
817
                 * here area isn't visible to the semaphore-less readers
818
                 * so we don't need to update it under the spinlock.
819
                 */
820
                area->vm_end = addr;
821
                lock_vma_mappings(area);
822
                spin_lock(&mm->page_table_lock);
823
        } else if (addr == area->vm_start) {
824
                area->vm_pgoff += (end - area->vm_start) >> PAGE_SHIFT;
825
                /* same locking considerations of the above case */
826
                area->vm_start = end;
827
                lock_vma_mappings(area);
828
                spin_lock(&mm->page_table_lock);
829
        } else {
830
        /* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */
831
                /* Add end mapping -- leave beginning for below */
832
                mpnt = extra;
833
                extra = NULL;
834
 
835
                mpnt->vm_mm = area->vm_mm;
836
                mpnt->vm_start = end;
837
                mpnt->vm_end = area->vm_end;
838
                mpnt->vm_page_prot = area->vm_page_prot;
839
                mpnt->vm_flags = area->vm_flags;
840
                mpnt->vm_raend = 0;
841
                mpnt->vm_ops = area->vm_ops;
842
                mpnt->vm_pgoff = area->vm_pgoff + ((end - area->vm_start) >> PAGE_SHIFT);
843
                mpnt->vm_file = area->vm_file;
844
                mpnt->vm_private_data = area->vm_private_data;
845
                if (mpnt->vm_file)
846
                        get_file(mpnt->vm_file);
847
                if (mpnt->vm_ops && mpnt->vm_ops->open)
848
                        mpnt->vm_ops->open(mpnt);
849
                area->vm_end = addr;    /* Truncate area */
850
 
851
                /* Because mpnt->vm_file == area->vm_file this locks
852
                 * things correctly.
853
                 */
854
                lock_vma_mappings(area);
855
                spin_lock(&mm->page_table_lock);
856
                __insert_vm_struct(mm, mpnt);
857
        }
858
 
859
        __insert_vm_struct(mm, area);
860
        spin_unlock(&mm->page_table_lock);
861
        unlock_vma_mappings(area);
862
        return extra;
863
}
864
 
865
/*
866
 * Try to free as many page directory entries as we can,
867
 * without having to work very hard at actually scanning
868
 * the page tables themselves.
869
 *
870
 * Right now we try to free page tables if we have a nice
871
 * PGDIR-aligned area that got free'd up. We could be more
872
 * granular if we want to, but this is fast and simple,
873
 * and covers the bad cases.
874
 *
875
 * "prev", if it exists, points to a vma before the one
876
 * we just free'd - but there's no telling how much before.
877
 */
878
static void free_pgtables(struct mm_struct * mm, struct vm_area_struct *prev,
879
        unsigned long start, unsigned long end)
880
{
881
        unsigned long first = start & PGDIR_MASK;
882
        unsigned long last = end + PGDIR_SIZE - 1;
883
        unsigned long start_index, end_index;
884
 
885
        if (!prev) {
886
                prev = mm->mmap;
887
                if (!prev)
888
                        goto no_mmaps;
889
                if (prev->vm_end > start) {
890
                        if (last > prev->vm_start)
891
                                last = prev->vm_start;
892
                        goto no_mmaps;
893
                }
894
        }
895
        for (;;) {
896
                struct vm_area_struct *next = prev->vm_next;
897
 
898
                if (next) {
899
                        if (next->vm_start < start) {
900
                                prev = next;
901
                                continue;
902
                        }
903
                        if (last > next->vm_start)
904
                                last = next->vm_start;
905
                }
906
                if (prev->vm_end > first)
907
                        first = prev->vm_end + PGDIR_SIZE - 1;
908
                break;
909
        }
910
no_mmaps:
911
        if (last < first)
912
                return;
913
        /*
914
         * If the PGD bits are not consecutive in the virtual address, the
915
         * old method of shifting the VA >> by PGDIR_SHIFT doesn't work.
916
         */
917
        start_index = pgd_index(first);
918
        end_index = pgd_index(last);
919
        if (end_index > start_index) {
920
                clear_page_tables(mm, start_index, end_index - start_index);
921
                flush_tlb_pgtables(mm, first & PGDIR_MASK, last & PGDIR_MASK);
922
        }
923
}
924
 
925
/* Munmap is split into 2 main parts -- this part which finds
926
 * what needs doing, and the areas themselves, which do the
927
 * work.  This now handles partial unmappings.
928
 * Jeremy Fitzhardine <jeremy@sw.oz.au>
929
 */
930
int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
931
{
932
        struct vm_area_struct *mpnt, *prev, **npp, *free, *extra;
933
 
934
        if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
935
                return -EINVAL;
936
 
937
        if ((len = PAGE_ALIGN(len)) == 0)
938
                return -EINVAL;
939
 
940
        /* Check if this memory area is ok - put it on the temporary
941
         * list if so..  The checks here are pretty simple --
942
         * every area affected in some way (by any overlap) is put
943
         * on the list.  If nothing is put on, nothing is affected.
944
         */
945
        mpnt = find_vma_prev(mm, addr, &prev);
946
        if (!mpnt)
947
                return 0;
948
        /* we have  addr < mpnt->vm_end  */
949
 
950
        if (mpnt->vm_start >= addr+len)
951
                return 0;
952
 
953
        /* If we'll make "hole", check the vm areas limit */
954
        if ((mpnt->vm_start < addr && mpnt->vm_end > addr+len)
955
            && mm->map_count >= max_map_count)
956
                return -ENOMEM;
957
 
958
        /*
959
         * We may need one additional vma to fix up the mappings ...
960
         * and this is the last chance for an easy error exit.
961
         */
962
        extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
963
        if (!extra)
964
                return -ENOMEM;
965
 
966
        npp = (prev ? &prev->vm_next : &mm->mmap);
967
        free = NULL;
968
        spin_lock(&mm->page_table_lock);
969
        for ( ; mpnt && mpnt->vm_start < addr+len; mpnt = *npp) {
970
                *npp = mpnt->vm_next;
971
                mpnt->vm_next = free;
972
                free = mpnt;
973
                rb_erase(&mpnt->vm_rb, &mm->mm_rb);
974
        }
975
        mm->mmap_cache = NULL;  /* Kill the cache. */
976
        spin_unlock(&mm->page_table_lock);
977
 
978
        /* Ok - we have the memory areas we should free on the 'free' list,
979
         * so release them, and unmap the page range..
980
         * If the one of the segments is only being partially unmapped,
981
         * it will put new vm_area_struct(s) into the address space.
982
         * In that case we have to be careful with VM_DENYWRITE.
983
         */
984
        while ((mpnt = free) != NULL) {
985
                unsigned long st, end, size;
986
                struct file *file = NULL;
987
 
988
                free = free->vm_next;
989
 
990
                st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
991
                end = addr+len;
992
                end = end > mpnt->vm_end ? mpnt->vm_end : end;
993
                size = end - st;
994
 
995
                if (mpnt->vm_flags & VM_DENYWRITE &&
996
                    (st != mpnt->vm_start || end != mpnt->vm_end) &&
997
                    (file = mpnt->vm_file) != NULL) {
998
                        atomic_dec(&file->f_dentry->d_inode->i_writecount);
999
                }
1000
                remove_shared_vm_struct(mpnt);
1001
                mm->map_count--;
1002
 
1003
                zap_page_range(mm, st, size);
1004
 
1005
                /*
1006
                 * Fix the mapping, and free the old area if it wasn't reused.
1007
                 */
1008
                extra = unmap_fixup(mm, mpnt, st, size, extra);
1009
                if (file)
1010
                        atomic_inc(&file->f_dentry->d_inode->i_writecount);
1011
        }
1012
        validate_mm(mm);
1013
 
1014
        /* Release the extra vma struct if it wasn't used */
1015
        if (extra)
1016
                kmem_cache_free(vm_area_cachep, extra);
1017
 
1018
        free_pgtables(mm, prev, addr, addr+len);
1019
 
1020
        return 0;
1021
}
1022
 
1023
asmlinkage long sys_munmap(unsigned long addr, size_t len)
1024
{
1025
        int ret;
1026
        struct mm_struct *mm = current->mm;
1027
 
1028
        down_write(&mm->mmap_sem);
1029
        ret = do_munmap(mm, addr, len);
1030
        up_write(&mm->mmap_sem);
1031
        return ret;
1032
}
1033
 
1034
/*
1035
 *  this is really a simplified "do_mmap".  it only handles
1036
 *  anonymous maps.  eventually we may be able to do some
1037
 *  brk-specific accounting here.
1038
 */
1039
unsigned long do_brk(unsigned long addr, unsigned long len)
1040
{
1041
        struct mm_struct * mm = current->mm;
1042
        struct vm_area_struct * vma, * prev;
1043
        unsigned long flags;
1044
        rb_node_t ** rb_link, * rb_parent;
1045
 
1046
        len = PAGE_ALIGN(len);
1047
        if (!len)
1048
                return addr;
1049
 
1050
        if ((addr + len) > TASK_SIZE || (addr + len) < addr)
1051
                return -EINVAL;
1052
 
1053
        /*
1054
         * mlock MCL_FUTURE?
1055
         */
1056
        if (mm->def_flags & VM_LOCKED) {
1057
                unsigned long locked = mm->locked_vm << PAGE_SHIFT;
1058
                locked += len;
1059
                if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
1060
                        return -EAGAIN;
1061
        }
1062
 
1063
        /*
1064
         * Clear old maps.  this also does some error checking for us
1065
         */
1066
 munmap_back:
1067
        vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
1068
        if (vma && vma->vm_start < addr + len) {
1069
                if (do_munmap(mm, addr, len))
1070
                        return -ENOMEM;
1071
                goto munmap_back;
1072
        }
1073
 
1074
        /* Check against address space limits *after* clearing old maps... */
1075
        if ((mm->total_vm << PAGE_SHIFT) + len
1076
            > current->rlim[RLIMIT_AS].rlim_cur)
1077
                return -ENOMEM;
1078
 
1079
        if (mm->map_count > max_map_count)
1080
                return -ENOMEM;
1081
 
1082
        if (!vm_enough_memory(len >> PAGE_SHIFT))
1083
                return -ENOMEM;
1084
 
1085
        flags = VM_DATA_DEFAULT_FLAGS | mm->def_flags;
1086
 
1087
        /* Can we just expand an old anonymous mapping? */
1088
        if (rb_parent && vma_merge(mm, prev, rb_parent, addr, addr + len, flags))
1089
                goto out;
1090
 
1091
        /*
1092
         * create a vma struct for an anonymous mapping
1093
         */
1094
        vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
1095
        if (!vma)
1096
                return -ENOMEM;
1097
 
1098
        vma->vm_mm = mm;
1099
        vma->vm_start = addr;
1100
        vma->vm_end = addr + len;
1101
        vma->vm_flags = flags;
1102
        vma->vm_page_prot = protection_map[flags & 0x0f];
1103
        vma->vm_ops = NULL;
1104
        vma->vm_pgoff = 0;
1105
        vma->vm_file = NULL;
1106
        vma->vm_private_data = NULL;
1107
 
1108
        vma_link(mm, vma, prev, rb_link, rb_parent);
1109
 
1110
out:
1111
        mm->total_vm += len >> PAGE_SHIFT;
1112
        if (flags & VM_LOCKED) {
1113
                mm->locked_vm += len >> PAGE_SHIFT;
1114
                make_pages_present(addr, addr + len);
1115
        }
1116
        return addr;
1117
}
1118
 
1119
/* Build the RB tree corresponding to the VMA list. */
1120
void build_mmap_rb(struct mm_struct * mm)
1121
{
1122
        struct vm_area_struct * vma;
1123
        rb_node_t ** rb_link, * rb_parent;
1124
 
1125
        mm->mm_rb = RB_ROOT;
1126
        rb_link = &mm->mm_rb.rb_node;
1127
        rb_parent = NULL;
1128
        for (vma = mm->mmap; vma; vma = vma->vm_next) {
1129
                __vma_link_rb(mm, vma, rb_link, rb_parent);
1130
                rb_parent = &vma->vm_rb;
1131
                rb_link = &rb_parent->rb_right;
1132
        }
1133
}
1134
 
1135
/* Release all mmaps. */
1136
void exit_mmap(struct mm_struct * mm)
1137
{
1138
        struct vm_area_struct * mpnt;
1139
 
1140
        release_segments(mm);
1141
        spin_lock(&mm->page_table_lock);
1142
        mpnt = mm->mmap;
1143
        mm->mmap = mm->mmap_cache = NULL;
1144
        mm->mm_rb = RB_ROOT;
1145
        mm->rss = 0;
1146
        spin_unlock(&mm->page_table_lock);
1147
        mm->total_vm = 0;
1148
        mm->locked_vm = 0;
1149
 
1150
        flush_cache_mm(mm);
1151
        while (mpnt) {
1152
                struct vm_area_struct * next = mpnt->vm_next;
1153
                unsigned long start = mpnt->vm_start;
1154
                unsigned long end = mpnt->vm_end;
1155
                unsigned long size = end - start;
1156
 
1157
                if (mpnt->vm_ops) {
1158
                        if (mpnt->vm_ops->close)
1159
                                mpnt->vm_ops->close(mpnt);
1160
                }
1161
                mm->map_count--;
1162
                remove_shared_vm_struct(mpnt);
1163
                zap_page_range(mm, start, size);
1164
                if (mpnt->vm_file)
1165
                        fput(mpnt->vm_file);
1166
                kmem_cache_free(vm_area_cachep, mpnt);
1167
                mpnt = next;
1168
        }
1169
 
1170
        /* This is just debugging */
1171
        if (mm->map_count)
1172
                BUG();
1173
 
1174
        clear_page_tables(mm, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
1175
 
1176
        flush_tlb_mm(mm);
1177
}
1178
 
1179
/* Insert vm structure into process list sorted by address
1180
 * and into the inode's i_mmap ring.  If vm_file is non-NULL
1181
 * then the i_shared_lock must be held here.
1182
 */
1183
void __insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
1184
{
1185
        struct vm_area_struct * __vma, * prev;
1186
        rb_node_t ** rb_link, * rb_parent;
1187
 
1188
        __vma = find_vma_prepare(mm, vma->vm_start, &prev, &rb_link, &rb_parent);
1189
        if (__vma && __vma->vm_start < vma->vm_end)
1190
                BUG();
1191
        __vma_link(mm, vma, prev, rb_link, rb_parent);
1192
        mm->map_count++;
1193
        validate_mm(mm);
1194
}
1195
 
1196
void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
1197
{
1198
        struct vm_area_struct * __vma, * prev;
1199
        rb_node_t ** rb_link, * rb_parent;
1200
 
1201
        __vma = find_vma_prepare(mm, vma->vm_start, &prev, &rb_link, &rb_parent);
1202
        if (__vma && __vma->vm_start < vma->vm_end)
1203
                BUG();
1204
        vma_link(mm, vma, prev, rb_link, rb_parent);
1205
        validate_mm(mm);
1206
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.