OpenCores
URL https://opencores.org/ocsvn/test_project/test_project/trunk

Subversion Repositories test_project

[/] [test_project/] [trunk/] [linux_sd_driver/] [mm/] [madvise.c] - Blame information for rev 81

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 62 marcus.erl
/*
2
 *      linux/mm/madvise.c
3
 *
4
 * Copyright (C) 1999  Linus Torvalds
5
 * Copyright (C) 2002  Christoph Hellwig
6
 */
7
 
8
#include <linux/mman.h>
9
#include <linux/pagemap.h>
10
#include <linux/syscalls.h>
11
#include <linux/mempolicy.h>
12
#include <linux/hugetlb.h>
13
#include <linux/sched.h>
14
 
15
/*
16
 * Any behaviour which results in changes to the vma->vm_flags needs to
17
 * take mmap_sem for writing. Others, which simply traverse vmas, need
18
 * to only take it for reading.
19
 */
20
static int madvise_need_mmap_write(int behavior)
21
{
22
        switch (behavior) {
23
        case MADV_REMOVE:
24
        case MADV_WILLNEED:
25
        case MADV_DONTNEED:
26
                return 0;
27
        default:
28
                /* be safe, default to 1. list exceptions explicitly */
29
                return 1;
30
        }
31
}
32
 
33
/*
34
 * We can potentially split a vm area into separate
35
 * areas, each area with its own behavior.
36
 */
37
static long madvise_behavior(struct vm_area_struct * vma,
38
                     struct vm_area_struct **prev,
39
                     unsigned long start, unsigned long end, int behavior)
40
{
41
        struct mm_struct * mm = vma->vm_mm;
42
        int error = 0;
43
        pgoff_t pgoff;
44
        int new_flags = vma->vm_flags;
45
 
46
        switch (behavior) {
47
        case MADV_NORMAL:
48
                new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
49
                break;
50
        case MADV_SEQUENTIAL:
51
                new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ;
52
                break;
53
        case MADV_RANDOM:
54
                new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ;
55
                break;
56
        case MADV_DONTFORK:
57
                new_flags |= VM_DONTCOPY;
58
                break;
59
        case MADV_DOFORK:
60
                new_flags &= ~VM_DONTCOPY;
61
                break;
62
        }
63
 
64
        if (new_flags == vma->vm_flags) {
65
                *prev = vma;
66
                goto out;
67
        }
68
 
69
        pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
70
        *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
71
                                vma->vm_file, pgoff, vma_policy(vma));
72
        if (*prev) {
73
                vma = *prev;
74
                goto success;
75
        }
76
 
77
        *prev = vma;
78
 
79
        if (start != vma->vm_start) {
80
                error = split_vma(mm, vma, start, 1);
81
                if (error)
82
                        goto out;
83
        }
84
 
85
        if (end != vma->vm_end) {
86
                error = split_vma(mm, vma, end, 0);
87
                if (error)
88
                        goto out;
89
        }
90
 
91
success:
92
        /*
93
         * vm_flags is protected by the mmap_sem held in write mode.
94
         */
95
        vma->vm_flags = new_flags;
96
 
97
out:
98
        if (error == -ENOMEM)
99
                error = -EAGAIN;
100
        return error;
101
}
102
 
103
/*
104
 * Schedule all required I/O operations.  Do not wait for completion.
105
 */
106
static long madvise_willneed(struct vm_area_struct * vma,
107
                             struct vm_area_struct ** prev,
108
                             unsigned long start, unsigned long end)
109
{
110
        struct file *file = vma->vm_file;
111
 
112
        if (!file)
113
                return -EBADF;
114
 
115
        if (file->f_mapping->a_ops->get_xip_page) {
116
                /* no bad return value, but ignore advice */
117
                return 0;
118
        }
119
 
120
        *prev = vma;
121
        start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
122
        if (end > vma->vm_end)
123
                end = vma->vm_end;
124
        end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
125
 
126
        force_page_cache_readahead(file->f_mapping,
127
                        file, start, max_sane_readahead(end - start));
128
        return 0;
129
}
130
 
131
/*
132
 * Application no longer needs these pages.  If the pages are dirty,
133
 * it's OK to just throw them away.  The app will be more careful about
134
 * data it wants to keep.  Be sure to free swap resources too.  The
135
 * zap_page_range call sets things up for refill_inactive to actually free
136
 * these pages later if no one else has touched them in the meantime,
137
 * although we could add these pages to a global reuse list for
138
 * refill_inactive to pick up before reclaiming other pages.
139
 *
140
 * NB: This interface discards data rather than pushes it out to swap,
141
 * as some implementations do.  This has performance implications for
142
 * applications like large transactional databases which want to discard
143
 * pages in anonymous maps after committing to backing store the data
144
 * that was kept in them.  There is no reason to write this data out to
145
 * the swap area if the application is discarding it.
146
 *
147
 * An interface that causes the system to free clean pages and flush
148
 * dirty pages is already available as msync(MS_INVALIDATE).
149
 */
150
static long madvise_dontneed(struct vm_area_struct * vma,
151
                             struct vm_area_struct ** prev,
152
                             unsigned long start, unsigned long end)
153
{
154
        *prev = vma;
155
        if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
156
                return -EINVAL;
157
 
158
        if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
159
                struct zap_details details = {
160
                        .nonlinear_vma = vma,
161
                        .last_index = ULONG_MAX,
162
                };
163
                zap_page_range(vma, start, end - start, &details);
164
        } else
165
                zap_page_range(vma, start, end - start, NULL);
166
        return 0;
167
}
168
 
169
/*
170
 * Application wants to free up the pages and associated backing store.
171
 * This is effectively punching a hole into the middle of a file.
172
 *
173
 * NOTE: Currently, only shmfs/tmpfs is supported for this operation.
174
 * Other filesystems return -ENOSYS.
175
 */
176
static long madvise_remove(struct vm_area_struct *vma,
177
                                struct vm_area_struct **prev,
178
                                unsigned long start, unsigned long end)
179
{
180
        struct address_space *mapping;
181
        loff_t offset, endoff;
182
        int error;
183
 
184
        *prev = NULL;   /* tell sys_madvise we drop mmap_sem */
185
 
186
        if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB))
187
                return -EINVAL;
188
 
189
        if (!vma->vm_file || !vma->vm_file->f_mapping
190
                || !vma->vm_file->f_mapping->host) {
191
                        return -EINVAL;
192
        }
193
 
194
        if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))
195
                return -EACCES;
196
 
197
        mapping = vma->vm_file->f_mapping;
198
 
199
        offset = (loff_t)(start - vma->vm_start)
200
                        + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
201
        endoff = (loff_t)(end - vma->vm_start - 1)
202
                        + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
203
 
204
        /* vmtruncate_range needs to take i_mutex and i_alloc_sem */
205
        up_read(&current->mm->mmap_sem);
206
        error = vmtruncate_range(mapping->host, offset, endoff);
207
        down_read(&current->mm->mmap_sem);
208
        return error;
209
}
210
 
211
static long
212
madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
213
                unsigned long start, unsigned long end, int behavior)
214
{
215
        long error;
216
 
217
        switch (behavior) {
218
        case MADV_DOFORK:
219
                if (vma->vm_flags & VM_IO) {
220
                        error = -EINVAL;
221
                        break;
222
                }
223
        case MADV_DONTFORK:
224
        case MADV_NORMAL:
225
        case MADV_SEQUENTIAL:
226
        case MADV_RANDOM:
227
                error = madvise_behavior(vma, prev, start, end, behavior);
228
                break;
229
        case MADV_REMOVE:
230
                error = madvise_remove(vma, prev, start, end);
231
                break;
232
 
233
        case MADV_WILLNEED:
234
                error = madvise_willneed(vma, prev, start, end);
235
                break;
236
 
237
        case MADV_DONTNEED:
238
                error = madvise_dontneed(vma, prev, start, end);
239
                break;
240
 
241
        default:
242
                error = -EINVAL;
243
                break;
244
        }
245
        return error;
246
}
247
 
248
/*
249
 * The madvise(2) system call.
250
 *
251
 * Applications can use madvise() to advise the kernel how it should
252
 * handle paging I/O in this VM area.  The idea is to help the kernel
253
 * use appropriate read-ahead and caching techniques.  The information
254
 * provided is advisory only, and can be safely disregarded by the
255
 * kernel without affecting the correct operation of the application.
256
 *
257
 * behavior values:
258
 *  MADV_NORMAL - the default behavior is to read clusters.  This
259
 *              results in some read-ahead and read-behind.
260
 *  MADV_RANDOM - the system should read the minimum amount of data
261
 *              on any access, since it is unlikely that the appli-
262
 *              cation will need more than what it asks for.
263
 *  MADV_SEQUENTIAL - pages in the given range will probably be accessed
264
 *              once, so they can be aggressively read ahead, and
265
 *              can be freed soon after they are accessed.
266
 *  MADV_WILLNEED - the application is notifying the system to read
267
 *              some pages ahead.
268
 *  MADV_DONTNEED - the application is finished with the given range,
269
 *              so the kernel can free resources associated with it.
270
 *  MADV_REMOVE - the application wants to free up the given range of
271
 *              pages and associated backing store.
272
 *
273
 * return values:
274
 *  zero    - success
275
 *  -EINVAL - start + len < 0, start is not page-aligned,
276
 *              "behavior" is not a valid value, or application
277
 *              is attempting to release locked or shared pages.
278
 *  -ENOMEM - addresses in the specified range are not currently
279
 *              mapped, or are outside the AS of the process.
280
 *  -EIO    - an I/O error occurred while paging in data.
281
 *  -EBADF  - map exists, but area maps something that isn't a file.
282
 *  -EAGAIN - a kernel resource was temporarily unavailable.
283
 */
284
asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior)
285
{
286
        unsigned long end, tmp;
287
        struct vm_area_struct * vma, *prev;
288
        int unmapped_error = 0;
289
        int error = -EINVAL;
290
        int write;
291
        size_t len;
292
 
293
        write = madvise_need_mmap_write(behavior);
294
        if (write)
295
                down_write(&current->mm->mmap_sem);
296
        else
297
                down_read(&current->mm->mmap_sem);
298
 
299
        if (start & ~PAGE_MASK)
300
                goto out;
301
        len = (len_in + ~PAGE_MASK) & PAGE_MASK;
302
 
303
        /* Check to see whether len was rounded up from small -ve to zero */
304
        if (len_in && !len)
305
                goto out;
306
 
307
        end = start + len;
308
        if (end < start)
309
                goto out;
310
 
311
        error = 0;
312
        if (end == start)
313
                goto out;
314
 
315
        /*
316
         * If the interval [start,end) covers some unmapped address
317
         * ranges, just ignore them, but return -ENOMEM at the end.
318
         * - different from the way of handling in mlock etc.
319
         */
320
        vma = find_vma_prev(current->mm, start, &prev);
321
        if (vma && start > vma->vm_start)
322
                prev = vma;
323
 
324
        for (;;) {
325
                /* Still start < end. */
326
                error = -ENOMEM;
327
                if (!vma)
328
                        goto out;
329
 
330
                /* Here start < (end|vma->vm_end). */
331
                if (start < vma->vm_start) {
332
                        unmapped_error = -ENOMEM;
333
                        start = vma->vm_start;
334
                        if (start >= end)
335
                                goto out;
336
                }
337
 
338
                /* Here vma->vm_start <= start < (end|vma->vm_end) */
339
                tmp = vma->vm_end;
340
                if (end < tmp)
341
                        tmp = end;
342
 
343
                /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
344
                error = madvise_vma(vma, &prev, start, tmp, behavior);
345
                if (error)
346
                        goto out;
347
                start = tmp;
348
                if (prev && start < prev->vm_end)
349
                        start = prev->vm_end;
350
                error = unmapped_error;
351
                if (start >= end)
352
                        goto out;
353
                if (prev)
354
                        vma = prev->vm_next;
355
                else    /* madvise_remove dropped mmap_sem */
356
                        vma = find_vma(current->mm, start);
357
        }
358
out:
359
        if (write)
360
                up_write(&current->mm->mmap_sem);
361
        else
362
                up_read(&current->mm->mmap_sem);
363
 
364
        return error;
365
}

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.