OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [rc203soc/] [sw/] [uClinux/] [mm/] [filemap.c] - Blame information for rev 1777

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1634 jcastillo
/*
2
 *      linux/mm/filemap.c
3
 *
4
 * Copyright (C) 1994, 1995  Linus Torvalds
5
 */
6
 
7
/*
8
 * This file handles the generic file mmap semantics used by
9
 * most "normal" filesystems (but you don't /have/ to use this:
10
 * the NFS filesystem does this differently, for example)
11
 */
12
#include <linux/config.h> /* CONFIG_READA_SMALL */
13
#include <linux/stat.h>
14
#include <linux/sched.h>
15
#include <linux/kernel.h>
16
#include <linux/mm.h>
17
#include <linux/shm.h>
18
#include <linux/errno.h>
19
#include <linux/mman.h>
20
#include <linux/string.h>
21
#include <linux/malloc.h>
22
#include <linux/fs.h>
23
#include <linux/locks.h>
24
#include <linux/pagemap.h>
25
#include <linux/swap.h>
26
 
27
#include <asm/segment.h>
28
#include <asm/system.h>
29
#include <asm/pgtable.h>
30
 
31
/*
32
 * Shared mappings implemented 30.11.1994. It's not fully working yet,
33
 * though.
34
 *
35
 * Shared mappings now work. 15.8.1995  Bruno.
36
 */
37
 
38
unsigned long page_cache_size = 0;
39
struct page * page_hash_table[PAGE_HASH_SIZE];
40
 
41
/*
42
 * Simple routines for both non-shared and shared mappings.
43
 */
44
 
45
#define release_page(page) __free_page((page))
46
 
47
/*
48
 * Invalidate the pages of an inode, removing all pages that aren't
49
 * locked down (those are sure to be up-to-date anyway, so we shouldn't
50
 * invalidate them).
51
 */
52
void invalidate_inode_pages(struct inode * inode)
53
{
54
        struct page ** p;
55
        struct page * page;
56
 
57
        p = &inode->i_pages;
58
        while ((page = *p) != NULL) {
59
                if (PageLocked(page)) {
60
                        p = &page->next;
61
                        continue;
62
                }
63
                inode->i_nrpages--;
64
                if ((*p = page->next) != NULL)
65
                        (*p)->prev = page->prev;
66
                page->dirty = 0;
67
                page->next = NULL;
68
                page->prev = NULL;
69
                remove_page_from_hash_queue(page);
70
                page->inode = NULL;
71
                __free_page(page);
72
                continue;
73
        }
74
}
75
 
76
/*
77
 * Truncate the page cache at a set offset, removing the pages
78
 * that are beyond that offset (and zeroing out partial pages).
79
 */
80
void truncate_inode_pages(struct inode * inode, unsigned long start)
81
{
82
        struct page ** p;
83
        struct page * page;
84
 
85
repeat:
86
        p = &inode->i_pages;
87
        while ((page = *p) != NULL) {
88
                unsigned long offset = page->offset;
89
 
90
                /* page wholly truncated - free it */
91
                if (offset >= start) {
92
                        if (PageLocked(page)) {
93
                                __wait_on_page(page);
94
                                goto repeat;
95
                        }
96
                        inode->i_nrpages--;
97
                        if ((*p = page->next) != NULL)
98
                                (*p)->prev = page->prev;
99
                        page->dirty = 0;
100
                        page->next = NULL;
101
                        page->prev = NULL;
102
                        remove_page_from_hash_queue(page);
103
                        page->inode = NULL;
104
                        __free_page(page);
105
                        continue;
106
                }
107
                p = &page->next;
108
                offset = start - offset;
109
                /* partial truncate, clear end of page */
110
                if (offset < PAGE_SIZE) {
111
                        unsigned long address = page_address(page);
112
                        memset((void *) (offset + address), 0, PAGE_SIZE - offset);
113
                        flush_page_to_ram(address);
114
                }
115
        }
116
}
117
 
118
int shrink_mmap(int priority, int dma, int free_buf)
119
{
120
        static int clock = 0;
121
        struct page * page;
122
        unsigned long limit = MAP_NR(high_memory);
123
        struct buffer_head *tmp, *bh;
124
        int count_max, count_min;
125
 
126
        count_max = (limit<<1) >> (priority>>1);
127
        count_min = (limit<<1) >> (priority);
128
 
129
        page = mem_map + clock;
130
        do {
131
                count_max--;
132
                if (page->inode || page->buffers)
133
                        count_min--;
134
 
135
                if (PageLocked(page))
136
                        goto next;
137
                if (dma && !PageDMA(page))
138
                        goto next;
139
                /* First of all, regenerate the page's referenced bit
140
                   from any buffers in the page */
141
                bh = page->buffers;
142
                if (bh) {
143
                        tmp = bh;
144
                        do {
145
                                if (buffer_touched(tmp)) {
146
                                        clear_bit(BH_Touched, &tmp->b_state);
147
                                        set_bit(PG_referenced, &page->flags);
148
                                }
149
                                tmp = tmp->b_this_page;
150
                        } while (tmp != bh);
151
                }
152
 
153
                /* We can't throw away shared pages, but we do mark
154
                   them as referenced.  This relies on the fact that
155
                   no page is currently in both the page cache and the
156
                   buffer cache; we'd have to modify the following
157
                   test to allow for that case. */
158
 
159
                switch (page->count) {
160
                        case 1:
161
                                /* If it has been referenced recently, don't free it */
162
                                if (clear_bit(PG_referenced, &page->flags)) {
163
                                        /* age this page potential used */
164
                                        if (priority < 4)
165
                                                age_page(page);
166
                                        break;
167
                                }
168
 
169
                                /* is it a page cache page? */
170
                                if (page->inode) {
171
                                        remove_page_from_hash_queue(page);
172
                                        remove_page_from_inode_queue(page);
173
                                        __free_page(page);
174
                                        return 1;
175
                                }
176
 
177
                                /* is it a buffer cache page? */
178
                                if (free_buf && bh && try_to_free_buffer(bh, &bh, 6))
179
                                        return 1;
180
                                break;
181
 
182
                        default:
183
                                /* more than one users: we can't throw it away */
184
                                set_bit(PG_referenced, &page->flags);
185
                                /* fall through */
186
                        case 0:
187
                                /* nothing */
188
                }
189
next:
190
                page++;
191
                clock++;
192
                if (clock >= limit) {
193
                        clock = 0;
194
                        page = mem_map;
195
                }
196
        } while (count_max > 0 && count_min > 0);
197
        return 0;
198
}
199
 
200
/*
201
 * This is called from try_to_swap_out() when we try to get rid of some
202
 * pages..  If we're unmapping the last occurrence of this page, we also
203
 * free it from the page hash-queues etc, as we don't want to keep it
204
 * in-core unnecessarily.
205
 */
206
unsigned long page_unuse(unsigned long page)
207
{
208
        struct page * p = mem_map + MAP_NR(page);
209
        int count = p->count;
210
 
211
        if (count != 2)
212
                return count;
213
        if (!p->inode)
214
                return count;
215
        remove_page_from_hash_queue(p);
216
        remove_page_from_inode_queue(p);
217
        free_page(page);
218
        return 1;
219
}
220
 
221
/*
222
 * Update a page cache copy, when we're doing a "write()" system call
223
 * See also "update_vm_cache()".
224
 */
225
void update_vm_cache(struct inode * inode, unsigned long pos, const char * buf, int count)
226
{
227
        unsigned long offset, len;
228
 
229
        offset = (pos & ~PAGE_MASK);
230
        pos = pos & PAGE_MASK;
231
        len = PAGE_SIZE - offset;
232
        do {
233
                struct page * page;
234
 
235
                if (len > count)
236
                        len = count;
237
                page = find_page(inode, pos);
238
                if (page) {
239
                        wait_on_page(page);
240
                        memcpy((void *) (offset + page_address(page)), buf, len);
241
                        release_page(page);
242
                }
243
                count -= len;
244
                buf += len;
245
                len = PAGE_SIZE;
246
                offset = 0;
247
                pos += PAGE_SIZE;
248
        } while (count);
249
}
250
 
251
static inline void add_to_page_cache(struct page * page,
252
        struct inode * inode, unsigned long offset,
253
        struct page **hash)
254
{
255
        page->count++;
256
        page->flags &= ~((1 << PG_uptodate) | (1 << PG_error));
257
        page->offset = offset;
258
        add_page_to_inode_queue(inode, page);
259
        __add_page_to_hash_queue(page, hash);
260
}
261
 
262
/*
263
 * Try to read ahead in the file. "page_cache" is a potentially free page
264
 * that we could use for the cache (if it is 0 we can try to create one,
265
 * this is all overlapped with the IO on the previous page finishing anyway)
266
 */
267
static unsigned long try_to_read_ahead(struct inode * inode, unsigned long offset, unsigned long page_cache)
268
{
269
        struct page * page;
270
        struct page ** hash;
271
 
272
        offset &= PAGE_MASK;
273
        switch (page_cache) {
274
        case 0:
275
                page_cache = __get_free_page(GFP_KERNEL);
276
                if (!page_cache)
277
                        break;
278
        default:
279
                if (offset >= inode->i_size)
280
                        break;
281
                hash = page_hash(inode, offset);
282
                page = __find_page(inode, offset, *hash);
283
                if (!page) {
284
                        /*
285
                         * Ok, add the new page to the hash-queues...
286
                         */
287
                        page = mem_map + MAP_NR(page_cache);
288
                        add_to_page_cache(page, inode, offset, hash);
289
                        inode->i_op->readpage(inode, page);
290
                        page_cache = 0;
291
                }
292
                release_page(page);
293
        }
294
        return page_cache;
295
}
296
 
297
/*
298
 * Wait for IO to complete on a locked page.
299
 *
300
 * This must be called with the caller "holding" the page,
301
 * ie with increased "page->count" so that the page won't
302
 * go away during the wait..
303
 */
304
void __wait_on_page(struct page *page)
305
{
306
        struct wait_queue wait = { current, NULL };
307
 
308
        add_wait_queue(&page->wait, &wait);
309
repeat:
310
        run_task_queue(&tq_disk);
311
        current->state = TASK_UNINTERRUPTIBLE;
312
        if (PageLocked(page)) {
313
                schedule();
314
                goto repeat;
315
        }
316
        remove_wait_queue(&page->wait, &wait);
317
        current->state = TASK_RUNNING;
318
}
319
 
320
#if 0
321
#define PROFILE_READAHEAD
322
#define DEBUG_READAHEAD
323
#endif
324
 
325
/*
326
 * Read-ahead profiling information
327
 * --------------------------------
328
 * Every PROFILE_MAXREADCOUNT, the following information is written
329
 * to the syslog:
330
 *   Percentage of asynchronous read-ahead.
331
 *   Average of read-ahead fields context value.
332
 * If DEBUG_READAHEAD is defined, a snapshot of these fields is written
333
 * to the syslog.
334
 */
335
 
336
#ifdef PROFILE_READAHEAD
337
 
338
#define PROFILE_MAXREADCOUNT 1000
339
 
340
static unsigned long total_reada;
341
static unsigned long total_async;
342
static unsigned long total_ramax;
343
static unsigned long total_ralen;
344
static unsigned long total_rawin;
345
 
346
static void profile_readahead(int async, struct file *filp)
347
{
348
        unsigned long flags;
349
 
350
        ++total_reada;
351
        if (async)
352
                ++total_async;
353
 
354
        total_ramax     += filp->f_ramax;
355
        total_ralen     += filp->f_ralen;
356
        total_rawin     += filp->f_rawin;
357
 
358
        if (total_reada > PROFILE_MAXREADCOUNT) {
359
                save_flags(flags);
360
                cli();
361
                if (!(total_reada > PROFILE_MAXREADCOUNT)) {
362
                        restore_flags(flags);
363
                        return;
364
                }
365
 
366
                printk("Readahead average:  max=%ld, len=%ld, win=%ld, async=%ld%%\n",
367
                        total_ramax/total_reada,
368
                        total_ralen/total_reada,
369
                        total_rawin/total_reada,
370
                        (total_async*100)/total_reada);
371
#ifdef DEBUG_READAHEAD
372
                printk("Readahead snapshot: max=%ld, len=%ld, win=%ld, raend=%ld\n",
373
                        filp->f_ramax, filp->f_ralen, filp->f_rawin, filp->f_raend);
374
#endif
375
 
376
                total_reada     = 0;
377
                total_async     = 0;
378
                total_ramax     = 0;
379
                total_ralen     = 0;
380
                total_rawin     = 0;
381
 
382
                restore_flags(flags);
383
        }
384
}
385
#endif  /* defined PROFILE_READAHEAD */
386
 
387
/*
388
 * Read-ahead context:
389
 * -------------------
390
 * The read ahead context fields of the "struct file" are the following:
391
 * - f_raend : position of the first byte after the last page we tried to
392
 *             read ahead.
393
 * - f_ramax : current read-ahead maximum size.
394
 * - f_ralen : length of the current IO read block we tried to read-ahead.
395
 * - f_rawin : length of the current read-ahead window.
396
 *             if last read-ahead was synchronous then
397
 *                  f_rawin = f_ralen
398
 *             otherwise (was asynchronous)
399
 *                  f_rawin = previous value of f_ralen + f_ralen
400
 *
401
 * Read-ahead limits:
402
 * ------------------
403
 * MIN_READAHEAD   : minimum read-ahead size when read-ahead.
404
 * MAX_READAHEAD   : maximum read-ahead size when read-ahead.
405
 *
406
 * Synchronous read-ahead benefits:
407
 * --------------------------------
408
 * Using reasonable IO xfer length from peripheral devices increase system
409
 * performances.
410
 * Reasonable means, in this context, not too large but not too small.
411
 * The actual maximum value is:
412
 *      MAX_READAHEAD + PAGE_SIZE = 76k is CONFIG_READA_SMALL is undefined
413
 *      and 32K if defined (4K page size assumed).
414
 *
415
 * Asynchronous read-ahead benefits:
416
 * ---------------------------------
417
 * Overlapping next read request and user process execution increase system
418
 * performance.
419
 *
420
 * Read-ahead risks:
421
 * -----------------
422
 * We have to guess which further data are needed by the user process.
423
 * If these data are often not really needed, it's bad for system
424
 * performances.
425
 * However, we know that files are often accessed sequentially by
426
 * application programs and it seems that it is possible to have some good
427
 * strategy in that guessing.
428
 * We only try to read-ahead files that seems to be read sequentially.
429
 *
430
 * Asynchronous read-ahead risks:
431
 * ------------------------------
432
 * In order to maximize overlapping, we must start some asynchronous read
433
 * request from the device, as soon as possible.
434
 * We must be very careful about:
435
 * - The number of effective pending IO read requests.
436
 *   ONE seems to be the only reasonable value.
437
 * - The total memory pool usage for the file access stream.
438
 *   This maximum memory usage is implicitly 2 IO read chunks:
439
 *   2*(MAX_READAHEAD + PAGE_SIZE) = 156K if CONFIG_READA_SMALL is undefined,
440
 *   64k if defined (4K page size assumed).
441
 */
442
 
443
#define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK)
444
 
445
#ifdef CONFIG_READA_SMALL  /* small readahead */
446
#define MAX_READAHEAD PageAlignSize(4096*7)
447
#define MIN_READAHEAD PageAlignSize(4096*2)
448
#else /* large readahead */
449
#define MAX_READAHEAD PageAlignSize(4096*18)
450
#define MIN_READAHEAD PageAlignSize(4096*3)
451
#endif
452
 
453
static inline unsigned long generic_file_readahead(int reada_ok, struct file * filp, struct inode * inode,
454
        unsigned long ppos, struct page * page,
455
        unsigned long page_cache)
456
{
457
        unsigned long max_ahead, ahead;
458
        unsigned long raend;
459
 
460
        raend = filp->f_raend & PAGE_MASK;
461
        max_ahead = 0;
462
 
463
/*
464
 * The current page is locked.
465
 * If the current position is inside the previous read IO request, do not
466
 * try to reread previously read ahead pages.
467
 * Otherwise decide or not to read ahead some pages synchronously.
468
 * If we are not going to read ahead, set the read ahead context for this
469
 * page only.
470
 */
471
        if (PageLocked(page)) {
472
                if (!filp->f_ralen || ppos >= raend || ppos + filp->f_ralen < raend) {
473
                        raend = ppos;
474
                        if (raend < inode->i_size)
475
                                max_ahead = filp->f_ramax;
476
                        filp->f_rawin = 0;
477
                        filp->f_ralen = PAGE_SIZE;
478
                        if (!max_ahead) {
479
                                filp->f_raend  = ppos + filp->f_ralen;
480
                                filp->f_rawin += filp->f_ralen;
481
                        }
482
                }
483
        }
484
/*
485
 * The current page is not locked.
486
 * If we were reading ahead and,
487
 * if the current max read ahead size is not zero and,
488
 * if the current position is inside the last read-ahead IO request,
489
 *   it is the moment to try to read ahead asynchronously.
490
 * We will later force unplug device in order to force asynchronous read IO.
491
 */
492
        else if (reada_ok && filp->f_ramax && raend >= PAGE_SIZE &&
493
                 ppos <= raend && ppos + filp->f_ralen >= raend) {
494
/*
495
 * Add ONE page to max_ahead in order to try to have about the same IO max size
496
 * as synchronous read-ahead (MAX_READAHEAD + 1)*PAGE_SIZE.
497
 * Compute the position of the last page we have tried to read in order to
498
 * begin to read ahead just at the next page.
499
 */
500
                raend -= PAGE_SIZE;
501
                if (raend < inode->i_size)
502
                        max_ahead = filp->f_ramax + PAGE_SIZE;
503
 
504
                if (max_ahead) {
505
                        filp->f_rawin = filp->f_ralen;
506
                        filp->f_ralen = 0;
507
                        reada_ok      = 2;
508
                }
509
        }
510
/*
511
 * Try to read ahead pages.
512
 * We hope that ll_rw_blk() plug/unplug, coalescence, requests sort and the
513
 * scheduler, will work enough for us to avoid too bad actuals IO requests.
514
 */
515
        ahead = 0;
516
        while (ahead < max_ahead) {
517
                ahead += PAGE_SIZE;
518
                page_cache = try_to_read_ahead(inode, raend + ahead, page_cache);
519
        }
520
/*
521
 * If we tried to read ahead some pages,
522
 * If we tried to read ahead asynchronously,
523
 *   Try to force unplug of the device in order to start an asynchronous
524
 *   read IO request.
525
 * Update the read-ahead context.
526
 * Store the length of the current read-ahead window.
527
 * Double the current max read ahead size.
528
 *   That heuristic avoid to do some large IO for files that are not really
529
 *   accessed sequentially.
530
 */
531
        if (ahead) {
532
                if (reada_ok == 2) {
533
                        run_task_queue(&tq_disk);
534
                }
535
 
536
                filp->f_ralen += ahead;
537
                filp->f_rawin += filp->f_ralen;
538
                filp->f_raend = raend + ahead + PAGE_SIZE;
539
 
540
                filp->f_ramax += filp->f_ramax;
541
 
542
                if (filp->f_ramax > MAX_READAHEAD)
543
                        filp->f_ramax = MAX_READAHEAD;
544
 
545
#ifdef PROFILE_READAHEAD
546
                profile_readahead((reada_ok == 2), filp);
547
#endif
548
        }
549
 
550
        return page_cache;
551
}
552
 
553
 
554
/*
555
 * This is a generic file read routine, and uses the
556
 * inode->i_op->readpage() function for the actual low-level
557
 * stuff.
558
 *
559
 * This is really ugly. But the goto's actually try to clarify some
560
 * of the logic when it comes to error handling etc.
561
 */
562
 
563
int generic_file_read(struct inode * inode, struct file * filp, char * buf, int count)
564
{
565
        int error, read;
566
        unsigned long pos, ppos, page_cache;
567
        int reada_ok;
568
 
569
        error = 0;
570
        read = 0;
571
        page_cache = 0;
572
 
573
        pos = filp->f_pos;
574
        ppos = pos & PAGE_MASK;
575
/*
576
 * If the current position is outside the previous read-ahead window,
577
 * we reset the current read-ahead context and set read ahead max to zero
578
 * (will be set to just needed value later),
579
 * otherwise, we assume that the file accesses are sequential enough to
580
 * continue read-ahead.
581
 */
582
        if (ppos > filp->f_raend || ppos + filp->f_rawin < filp->f_raend) {
583
                reada_ok = 0;
584
                filp->f_raend = 0;
585
                filp->f_ralen = 0;
586
                filp->f_ramax = 0;
587
                filp->f_rawin = 0;
588
        } else {
589
                reada_ok = 1;
590
        }
591
/*
592
 * Adjust the current value of read-ahead max.
593
 * If the read operation stay in the first half page, force no readahead.
594
 * Otherwise try to increase read ahead max just enough to do the read request.
595
 * Then, at least MIN_READAHEAD if read ahead is ok,
596
 * and at most MAX_READAHEAD in all cases.
597
 */
598
        if (pos + count <= (PAGE_SIZE >> 1)) {
599
                filp->f_ramax = 0;
600
        } else {
601
                unsigned long needed;
602
 
603
                needed = ((pos + count) & PAGE_MASK) - ppos;
604
 
605
                if (filp->f_ramax < needed)
606
                        filp->f_ramax = needed;
607
 
608
                if (reada_ok && filp->f_ramax < MIN_READAHEAD)
609
                                filp->f_ramax = MIN_READAHEAD;
610
                if (filp->f_ramax > MAX_READAHEAD)
611
                        filp->f_ramax = MAX_READAHEAD;
612
        }
613
 
614
        for (;;) {
615
                struct page *page, **hash;
616
 
617
                if (pos >= inode->i_size)
618
                        break;
619
 
620
                /*
621
                 * Try to find the data in the page cache..
622
                 */
623
                hash = page_hash(inode, pos & PAGE_MASK);
624
                page = __find_page(inode, pos & PAGE_MASK, *hash);
625
                if (!page)
626
                        goto no_cached_page;
627
 
628
found_page:
629
/*
630
 * Try to read ahead only if the current page is filled or being filled.
631
 * Otherwise, if we were reading ahead, decrease max read ahead size to
632
 * the minimum value.
633
 * In this context, that seems to may happen only on some read error or if
634
 * the page has been rewritten.
635
 */
636
                if (PageUptodate(page) || PageLocked(page))
637
                        page_cache = generic_file_readahead(reada_ok, filp, inode, pos & PAGE_MASK, page, page_cache);
638
                else if (reada_ok && filp->f_ramax > MIN_READAHEAD)
639
                                filp->f_ramax = MIN_READAHEAD;
640
 
641
                wait_on_page(page);
642
 
643
                if (!PageUptodate(page))
644
                        goto page_read_error;
645
 
646
success:
647
                /*
648
                 * Ok, we have the page, it's up-to-date and ok,
649
                 * so now we can finally copy it to user space...
650
                 */
651
        {
652
                unsigned long offset, nr;
653
                offset = pos & ~PAGE_MASK;
654
                nr = PAGE_SIZE - offset;
655
                if (nr > count)
656
                        nr = count;
657
 
658
                if (nr > inode->i_size - pos)
659
                        nr = inode->i_size - pos;
660
                memcpy_tofs(buf, (void *) (page_address(page) + offset), nr);
661
                release_page(page);
662
                buf += nr;
663
                pos += nr;
664
                read += nr;
665
                count -= nr;
666
                if (count) {
667
                        /*
668
                         * to prevent hogging the CPU on well-cached systems,
669
                         * schedule if needed, it's safe to do it here:
670
                         */
671
                        if (need_resched)
672
                                schedule();
673
                        continue;
674
                }
675
                break;
676
        }
677
 
678
no_cached_page:
679
                /*
680
                 * Ok, it wasn't cached, so we need to create a new
681
                 * page..
682
                 */
683
                if (!page_cache) {
684
                        page_cache = __get_free_page(GFP_KERNEL);
685
                        /*
686
                         * That could have slept, so go around to the
687
                         * very beginning..
688
                         */
689
                        if (page_cache)
690
                                continue;
691
                        error = -ENOMEM;
692
                        break;
693
                }
694
 
695
                /*
696
                 * Ok, add the new page to the hash-queues...
697
                 */
698
                page = mem_map + MAP_NR(page_cache);
699
                page_cache = 0;
700
                add_to_page_cache(page, inode, pos & PAGE_MASK, hash);
701
 
702
                /*
703
                 * Error handling is tricky. If we get a read error,
704
                 * the cached page stays in the cache (but uptodate=0),
705
                 * and the next process that accesses it will try to
706
                 * re-read it. This is needed for NFS etc, where the
707
                 * identity of the reader can decide if we can read the
708
                 * page or not..
709
                 */
710
/*
711
 * We have to read the page.
712
 * If we were reading ahead, we had previously tried to read this page,
713
 * That means that the page has probably been removed from the cache before
714
 * the application process needs it, or has been rewritten.
715
 * Decrease max readahead size to the minimum value in that situation.
716
 */
717
                if (reada_ok && filp->f_ramax > MIN_READAHEAD)
718
                        filp->f_ramax = MIN_READAHEAD;
719
 
720
                error = inode->i_op->readpage(inode, page);
721
                if (!error)
722
                        goto found_page;
723
                release_page(page);
724
                break;
725
 
726
page_read_error:
727
                /*
728
                 * We found the page, but it wasn't up-to-date.
729
                 * Try to re-read it _once_. We do this synchronously,
730
                 * because this happens only if there were errors.
731
                 */
732
                error = inode->i_op->readpage(inode, page);
733
                if (!error) {
734
                        wait_on_page(page);
735
                        if (PageUptodate(page) && !PageError(page))
736
                                goto success;
737
                        error = -EIO; /* Some unspecified error occurred.. */
738
                }
739
                release_page(page);
740
                break;
741
        }
742
 
743
        filp->f_pos = pos;
744
        filp->f_reada = 1;
745
        if (page_cache)
746
                free_page(page_cache);
747
        UPDATE_ATIME(inode)
748
        if (!read)
749
                read = error;
750
        return read;
751
}
752
 
753
/*
754
 * Semantics for shared and private memory areas are different past the end
755
 * of the file. A shared mapping past the last page of the file is an error
756
 * and results in a SIGBUS, while a private mapping just maps in a zero page.
757
 *
758
 * The goto's are kind of ugly, but this streamlines the normal case of having
759
 * it in the page cache, and handles the special cases reasonably without
760
 * having a lot of duplicated code.
761
 */
762
static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address, int no_share)
763
{
764
        unsigned long offset;
765
        struct page * page, **hash;
766
        struct inode * inode = area->vm_inode;
767
        unsigned long old_page, new_page;
768
 
769
        new_page = 0;
770
        offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
771
        if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
772
                goto no_page;
773
 
774
        /*
775
         * Do we have something in the page cache already?
776
         */
777
        hash = page_hash(inode, offset);
778
        page = __find_page(inode, offset, *hash);
779
        if (!page)
780
                goto no_cached_page;
781
 
782
found_page:
783
        /*
784
         * Ok, found a page in the page cache, now we need to check
785
         * that it's up-to-date.  First check whether we'll need an
786
         * extra page -- better to overlap the allocation with the I/O.
787
         */
788
        if (no_share && !new_page) {
789
                new_page = __get_free_page(GFP_KERNEL);
790
                if (!new_page)
791
                        goto failure;
792
        }
793
 
794
        if (PageLocked(page))
795
                goto page_locked_wait;
796
        if (!PageUptodate(page))
797
                goto page_read_error;
798
 
799
success:
800
        /*
801
         * Found the page, need to check sharing and possibly
802
         * copy it over to another page..
803
         */
804
        old_page = page_address(page);
805
        if (!no_share) {
806
                /*
807
                 * Ok, we can share the cached page directly.. Get rid
808
                 * of any potential extra pages.
809
                 */
810
                if (new_page)
811
                        free_page(new_page);
812
 
813
                flush_page_to_ram(old_page);
814
                return old_page;
815
        }
816
 
817
        /*
818
         * No sharing ... copy to the new page.
819
         */
820
        memcpy((void *) new_page, (void *) old_page, PAGE_SIZE);
821
        flush_page_to_ram(new_page);
822
        release_page(page);
823
        return new_page;
824
 
825
no_cached_page:
826
        new_page = __get_free_page(GFP_KERNEL);
827
        if (!new_page)
828
                goto no_page;
829
 
830
        /*
831
         * During getting the above page we might have slept,
832
         * so we need to re-check the situation with the page
833
         * cache.. The page we just got may be useful if we
834
         * can't share, so don't get rid of it here.
835
         */
836
        page = find_page(inode, offset);
837
        if (page)
838
                goto found_page;
839
 
840
        /*
841
         * Now, create a new page-cache page from the page we got
842
         */
843
        page = mem_map + MAP_NR(new_page);
844
        new_page = 0;
845
        add_to_page_cache(page, inode, offset, hash);
846
 
847
        if (inode->i_op->readpage(inode, page) != 0)
848
                goto failure;
849
 
850
        /*
851
         * Do a very limited read-ahead if appropriate
852
         */
853
        if (PageLocked(page))
854
                new_page = try_to_read_ahead(inode, offset + PAGE_SIZE, 0);
855
        goto found_page;
856
 
857
page_locked_wait:
858
        __wait_on_page(page);
859
        if (PageUptodate(page))
860
                goto success;
861
 
862
page_read_error:
863
        /*
864
         * Umm, take care of errors if the page isn't up-to-date.
865
         * Try to re-read it _once_. We do this synchronously,
866
         * because there really aren't any performance issues here
867
         * and we need to check for errors.
868
         */
869
        if (inode->i_op->readpage(inode, page) != 0)
870
                goto failure;
871
        wait_on_page(page);
872
        if (PageError(page))
873
                goto failure;
874
        if (PageUptodate(page))
875
                goto success;
876
 
877
        /*
878
         * Uhhuh.. Things didn't work out. Return zero to tell the
879
         * mm layer so, possibly freeing the page cache page first.
880
         */
881
failure:
882
        release_page(page);
883
        if (new_page)
884
                free_page(new_page);
885
no_page:
886
        return 0;
887
}
888
 
889
/*
890
 * Tries to write a shared mapped page to its backing store. May return -EIO
891
 * if the disk is full.
892
 */
893
static inline int do_write_page(struct inode * inode, struct file * file,
894
        const char * page, unsigned long offset)
895
{
896
        int old_fs, retval;
897
        unsigned long size;
898
 
899
        size = offset + PAGE_SIZE;
900
        /* refuse to extend file size.. */
901
        if (S_ISREG(inode->i_mode)) {
902
                if (size > inode->i_size)
903
                        size = inode->i_size;
904
                /* Ho humm.. We should have tested for this earlier */
905
                if (size < offset)
906
                        return -EIO;
907
        }
908
        size -= offset;
909
        old_fs = get_fs();
910
        set_fs(KERNEL_DS);
911
        retval = -EIO;
912
        if (size == file->f_op->write(inode, file, (const char *) page, size))
913
                retval = 0;
914
        set_fs(old_fs);
915
        return retval;
916
}
917
 
918
static int filemap_write_page(struct vm_area_struct * vma,
919
        unsigned long offset,
920
        unsigned long page)
921
{
922
        int result;
923
        struct file file;
924
        struct inode * inode;
925
        struct buffer_head * bh;
926
 
927
        bh = mem_map[MAP_NR(page)].buffers;
928
        if (bh) {
929
                /* whee.. just mark the buffer heads dirty */
930
                struct buffer_head * tmp = bh;
931
                do {
932
                        mark_buffer_dirty(tmp, 0);
933
                        tmp = tmp->b_this_page;
934
                } while (tmp != bh);
935
                return 0;
936
        }
937
 
938
        inode = vma->vm_inode;
939
        file.f_op = inode->i_op->default_file_ops;
940
        if (!file.f_op->write)
941
                return -EIO;
942
        file.f_mode = 3;
943
        file.f_flags = 0;
944
        file.f_count = 1;
945
        file.f_inode = inode;
946
        file.f_pos = offset;
947
        file.f_reada = 0;
948
 
949
        down(&inode->i_sem);
950
        result = do_write_page(inode, &file, (const char *) page, offset);
951
        up(&inode->i_sem);
952
        return result;
953
}
954
 
955
 
956
/*
957
 * Swapping to a shared file: while we're busy writing out the page
958
 * (and the page still exists in memory), we save the page information
959
 * in the page table, so that "filemap_swapin()" can re-use the page
960
 * immediately if it is called while we're busy swapping it out..
961
 *
962
 * Once we've written it all out, we mark the page entry "empty", which
963
 * will result in a normal page-in (instead of a swap-in) from the now
964
 * up-to-date disk file.
965
 */
966
int filemap_swapout(struct vm_area_struct * vma,
967
        unsigned long offset,
968
        pte_t *page_table)
969
{
970
        int error;
971
        unsigned long page = pte_page(*page_table);
972
        unsigned long entry = SWP_ENTRY(SHM_SWP_TYPE, MAP_NR(page));
973
 
974
        flush_cache_page(vma, (offset + vma->vm_start - vma->vm_offset));
975
        set_pte(page_table, __pte(entry));
976
        flush_tlb_page(vma, (offset + vma->vm_start - vma->vm_offset));
977
        error = filemap_write_page(vma, offset, page);
978
        if (pte_val(*page_table) == entry)
979
                pte_clear(page_table);
980
        return error;
981
}
982
 
983
/*
984
 * filemap_swapin() is called only if we have something in the page
985
 * tables that is non-zero (but not present), which we know to be the
986
 * page index of a page that is busy being swapped out (see above).
987
 * So we just use it directly..
988
 */
989
static pte_t filemap_swapin(struct vm_area_struct * vma,
990
        unsigned long offset,
991
        unsigned long entry)
992
{
993
        unsigned long page = SWP_OFFSET(entry);
994
 
995
        mem_map[page].count++;
996
        page = (page << PAGE_SHIFT) + PAGE_OFFSET;
997
        return mk_pte(page,vma->vm_page_prot);
998
}
999
 
1000
 
1001
static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
1002
        unsigned long address, unsigned int flags)
1003
{
1004
        pte_t pte = *ptep;
1005
        unsigned long page;
1006
        int error;
1007
 
1008
        if (pte_none(pte))
1009
                return 0;
1010
        if (!(flags & MS_INVALIDATE)) {
1011
                if (!pte_present(pte))
1012
                        return 0;
1013
                if (!pte_dirty(pte))
1014
                        return 0;
1015
                flush_page_to_ram(pte_page(pte));
1016
                flush_cache_page(vma, address);
1017
                set_pte(ptep, pte_mkclean(pte));
1018
                flush_tlb_page(vma, address);
1019
                page = pte_page(pte);
1020
                mem_map[MAP_NR(page)].count++;
1021
        } else {
1022
                flush_cache_page(vma, address);
1023
                pte_clear(ptep);
1024
                flush_tlb_page(vma, address);
1025
                if (!pte_present(pte)) {
1026
                        swap_free(pte_val(pte));
1027
                        return 0;
1028
                }
1029
                page = pte_page(pte);
1030
                if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
1031
                        free_page(page);
1032
                        return 0;
1033
                }
1034
        }
1035
        error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
1036
        free_page(page);
1037
        return error;
1038
}
1039
 
1040
static inline int filemap_sync_pte_range(pmd_t * pmd,
1041
        unsigned long address, unsigned long size,
1042
        struct vm_area_struct *vma, unsigned long offset, unsigned int flags)
1043
{
1044
        pte_t * pte;
1045
        unsigned long end;
1046
        int error;
1047
 
1048
        if (pmd_none(*pmd))
1049
                return 0;
1050
        if (pmd_bad(*pmd)) {
1051
                printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
1052
                pmd_clear(pmd);
1053
                return 0;
1054
        }
1055
        pte = pte_offset(pmd, address);
1056
        offset += address & PMD_MASK;
1057
        address &= ~PMD_MASK;
1058
        end = address + size;
1059
        if (end > PMD_SIZE)
1060
                end = PMD_SIZE;
1061
        error = 0;
1062
        do {
1063
                error |= filemap_sync_pte(pte, vma, address + offset, flags);
1064
                address += PAGE_SIZE;
1065
                pte++;
1066
        } while (address < end);
1067
        return error;
1068
}
1069
 
1070
static inline int filemap_sync_pmd_range(pgd_t * pgd,
1071
        unsigned long address, unsigned long size,
1072
        struct vm_area_struct *vma, unsigned int flags)
1073
{
1074
        pmd_t * pmd;
1075
        unsigned long offset, end;
1076
        int error;
1077
 
1078
        if (pgd_none(*pgd))
1079
                return 0;
1080
        if (pgd_bad(*pgd)) {
1081
                printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
1082
                pgd_clear(pgd);
1083
                return 0;
1084
        }
1085
        pmd = pmd_offset(pgd, address);
1086
        offset = address & PGDIR_MASK;
1087
        address &= ~PGDIR_MASK;
1088
        end = address + size;
1089
        if (end > PGDIR_SIZE)
1090
                end = PGDIR_SIZE;
1091
        error = 0;
1092
        do {
1093
                error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
1094
                address = (address + PMD_SIZE) & PMD_MASK;
1095
                pmd++;
1096
        } while (address < end);
1097
        return error;
1098
}
1099
 
1100
static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
1101
        size_t size, unsigned int flags)
1102
{
1103
        pgd_t * dir;
1104
        unsigned long end = address + size;
1105
        int error = 0;
1106
 
1107
        dir = pgd_offset(vma->vm_mm, address);
1108
        flush_cache_range(vma->vm_mm, end - size, end);
1109
        while (address < end) {
1110
                error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
1111
                address = (address + PGDIR_SIZE) & PGDIR_MASK;
1112
                dir++;
1113
        }
1114
        flush_tlb_range(vma->vm_mm, end - size, end);
1115
        return error;
1116
}
1117
 
1118
/*
1119
 * This handles (potentially partial) area unmaps..
1120
 */
1121
static void filemap_unmap(struct vm_area_struct *vma, unsigned long start, size_t len)
1122
{
1123
        filemap_sync(vma, start, len, MS_ASYNC);
1124
}
1125
 
1126
/*
1127
 * Shared mappings need to be able to do the right thing at
1128
 * close/unmap/sync. They will also use the private file as
1129
 * backing-store for swapping..
1130
 */
1131
static struct vm_operations_struct file_shared_mmap = {
1132
        NULL,                   /* no special open */
1133
        NULL,                   /* no special close */
1134
        filemap_unmap,          /* unmap - we need to sync the pages */
1135
        NULL,                   /* no special protect */
1136
        filemap_sync,           /* sync */
1137
        NULL,                   /* advise */
1138
        filemap_nopage,         /* nopage */
1139
        NULL,                   /* wppage */
1140
        filemap_swapout,        /* swapout */
1141
        filemap_swapin,         /* swapin */
1142
};
1143
 
1144
/*
1145
 * Private mappings just need to be able to load in the map.
1146
 *
1147
 * (This is actually used for shared mappings as well, if we
1148
 * know they can't ever get write permissions..)
1149
 */
1150
static struct vm_operations_struct file_private_mmap = {
1151
        NULL,                   /* open */
1152
        NULL,                   /* close */
1153
        NULL,                   /* unmap */
1154
        NULL,                   /* protect */
1155
        NULL,                   /* sync */
1156
        NULL,                   /* advise */
1157
        filemap_nopage,         /* nopage */
1158
        NULL,                   /* wppage */
1159
        NULL,                   /* swapout */
1160
        NULL,                   /* swapin */
1161
};
1162
 
1163
/* This is used for a general mmap of a disk file */
1164
int generic_file_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
1165
{
1166
        struct vm_operations_struct * ops;
1167
 
1168
        if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
1169
                ops = &file_shared_mmap;
1170
                /* share_page() can only guarantee proper page sharing if
1171
                 * the offsets are all page aligned. */
1172
                if (vma->vm_offset & (PAGE_SIZE - 1))
1173
                        return -EINVAL;
1174
        } else {
1175
                ops = &file_private_mmap;
1176
                if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
1177
                        return -EINVAL;
1178
        }
1179
        if (!inode->i_sb || !S_ISREG(inode->i_mode))
1180
                return -EACCES;
1181
        if (!inode->i_op || !inode->i_op->readpage)
1182
                return -ENOEXEC;
1183
        UPDATE_ATIME(inode)
1184
        vma->vm_inode = inode;
1185
        inode->i_count++;
1186
        vma->vm_ops = ops;
1187
        return 0;
1188
}
1189
 
1190
 
1191
/*
1192
 * The msync() system call.
1193
 */
1194
 
1195
static int msync_interval(struct vm_area_struct * vma,
1196
        unsigned long start, unsigned long end, int flags)
1197
{
1198
        if (vma->vm_inode && vma->vm_ops && vma->vm_ops->sync) {
1199
                int error;
1200
                error = vma->vm_ops->sync(vma, start, end-start, flags);
1201
                if (error)
1202
                        return error;
1203
                if (flags & MS_SYNC)
1204
                        return file_fsync(vma->vm_inode, NULL);
1205
                return 0;
1206
        }
1207
        return 0;
1208
}
1209
 
1210
asmlinkage int sys_msync(unsigned long start, size_t len, int flags)
1211
{
1212
        unsigned long end;
1213
        struct vm_area_struct * vma;
1214
        int unmapped_error, error;
1215
 
1216
        if (start & ~PAGE_MASK)
1217
                return -EINVAL;
1218
        len = (len + ~PAGE_MASK) & PAGE_MASK;
1219
        end = start + len;
1220
        if (end < start)
1221
                return -EINVAL;
1222
        if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
1223
                return -EINVAL;
1224
        if (end == start)
1225
                return 0;
1226
        /*
1227
         * If the interval [start,end) covers some unmapped address ranges,
1228
         * just ignore them, but return -EFAULT at the end.
1229
         */
1230
        vma = find_vma(current->mm, start);
1231
        unmapped_error = 0;
1232
        for (;;) {
1233
                /* Still start < end. */
1234
                if (!vma)
1235
                        return -EFAULT;
1236
                /* Here start < vma->vm_end. */
1237
                if (start < vma->vm_start) {
1238
                        unmapped_error = -EFAULT;
1239
                        start = vma->vm_start;
1240
                }
1241
                /* Here vma->vm_start <= start < vma->vm_end. */
1242
                if (end <= vma->vm_end) {
1243
                        if (start < end) {
1244
                                error = msync_interval(vma, start, end, flags);
1245
                                if (error)
1246
                                        return error;
1247
                        }
1248
                        return unmapped_error;
1249
                }
1250
                /* Here vma->vm_start <= start < vma->vm_end < end. */
1251
                error = msync_interval(vma, start, vma->vm_end, flags);
1252
                if (error)
1253
                        return error;
1254
                start = vma->vm_end;
1255
                vma = vma->vm_next;
1256
        }
1257
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.