OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [rc203soc/] [sw/] [uClinux/] [fs/] [buffer.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1627 jcastillo
/*
2
 *  linux/fs/buffer.c
3
 *
4
 *  Copyright (C) 1991, 1992  Linus Torvalds
5
 */
6
 
7
/*
8
 *  'buffer.c' implements the buffer-cache functions. Race-conditions have
9
 * been avoided by NEVER letting an interrupt change a buffer (except for the
10
 * data, of course), but instead letting the caller do it.
11
 */
12
 
13
/* Some bdflush() changes for the dynamic ramdisk - Paul Gortmaker, 12/94 */
14
/* Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95 */
15
 
16
/* Removed a lot of unnecessary code and simplified things now that
17
   the buffer cache isn't our primary cache - Andrew Tridgell 12/96 */
18
 
19
/*
20
 * uClinux revisions for memory usage tuning, MAGIC_ROM_PTR,
21
 * and invalidate_by_block hack
22
 * Copyright (C) 1998  Kenneth Albanowski <kjahds@kjahds.com>,
23
 *                     The Silver Hammer Group, Ltd.
24
 */
25
 
26
#include <linux/sched.h>
27
#include <linux/kernel.h>
28
#include <linux/major.h>
29
#include <linux/string.h>
30
#include <linux/locks.h>
31
#include <linux/errno.h>
32
#include <linux/malloc.h>
33
#include <linux/pagemap.h>
34
#include <linux/swap.h>
35
#include <linux/swapctl.h>
36
#include <linux/smp.h>
37
#include <linux/smp_lock.h>
38
 
39
#include <asm/system.h>
40
#include <asm/segment.h>
41
#include <asm/io.h>
42
#include <asm/bitops.h>
43
 
44
#define NR_SIZES 5
45
static char buffersize_index[17] =
46
{-1,  0,  1, -1,  2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1, 4};
47
 
48
#define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
49
#define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
50
#ifdef CONFIG_REDUCED_MEMORY
51
// TJK:
52
#define MAX_CLEAN_BUFFERS 50 // that's 50 1024-byte buffers
53
// end TJK
54
#define MAX_UNUSED_BUFFERS 10 
55
#define HASH_PAGES         1
56
#else /* !CONFIG_REDUCED_MEMORY */
57
#define MAX_UNUSED_BUFFERS 30 /* don't ever have more than this number of 
58
                                 unused buffer heads */
59
#define HASH_PAGES         4  /* number of pages to use for the hash table */
60
#endif /* !CONFIG_REDUCED_MEMORY */
61
#define NR_HASH (HASH_PAGES*PAGE_SIZE/sizeof(struct buffer_head *))
62
#define HASH_MASK (NR_HASH-1)
63
 
64
static int grow_buffers(int pri, int size);
65
 
66
static struct buffer_head ** hash_table;
67
static struct buffer_head * lru_list[NR_LIST] = {NULL, };
68
static struct buffer_head * free_list[NR_SIZES] = {NULL, };
69
 
70
static struct buffer_head * unused_list = NULL;
71
static struct buffer_head * reuse_list  = NULL;
72
struct wait_queue *         buffer_wait = NULL;
73
 
74
static int nr_buffers = 0;
75
static int nr_buffers_type[NR_LIST] = {0,};
76
static int nr_buffer_heads = 0;
77
static int nr_unused_buffer_heads = 0;
78
static int refilled = 0;       /* Set NZ when a buffer freelist is refilled
79
                                  this is used by the loop device */
80
 
81
/* this is used by some architectures to estimate available memory */
82
int buffermem = 0;
83
 
84
/* Here is the parameter block for the bdflush process. If you add or
85
 * remove any of the parameters, make sure to update kernel/sysctl.c.
86
 */
87
 
88
static void wakeup_bdflush(int);
89
 
90
#define N_PARAM 9
91
 
92
/* the dummy values in this structure are left in there for compatibility
93
   with old programs that play with the /proc entries */
94
union bdflush_param{
95
        struct {
96
                int nfract;  /* Percentage of buffer cache dirty to
97
                                activate bdflush */
98
                int ndirty;  /* Maximum number of dirty blocks to write out per
99
                                wake-cycle */
100
                int nrefill; /* Number of clean buffers to try to obtain
101
                                each time we call refill */
102
                int nref_dirt; /* Dirty buffer threshold for activating bdflush
103
                                  when trying to refill buffers. */
104
                int dummy1;    /* unused */
105
                int age_buffer;  /* Time for normal buffer to age before
106
                                    we flush it */
107
                int age_super;  /* Time for superblock to age before we
108
                                   flush it */
109
                int dummy2;    /* unused */
110
                int dummy3;    /* unused */
111
        } b_un;
112
        unsigned int data[N_PARAM];
113
#ifdef CONFIG_REDUCED_MEMORY
114
} bdf_prm = {{40, 500, 8, 8, 15, 30*HZ, 5*HZ, 1884, 2}};
115
                /*Originally {{40, 500, 64, 64, 15, 30*HZ, 5*HZ, 1884, 2}};*/
116
#else /* !CONFIG_REDUCED_MEMORY */
117
} bdf_prm = {{40, 500, 64, 64, 15, 30*HZ, 5*HZ, 1884, 2}};
118
#endif /* !CONFIG_REDUCED_MEMORY */
119
 
120
/* These are the min and max parameter values that we will allow to be assigned */
121
int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   100,   100, 1, 1};
122
int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
123
 
124
/*
125
 * Rewrote the wait-routines to use the "new" wait-queue functionality,
126
 * and getting rid of the cli-sti pairs. The wait-queue routines still
127
 * need cli-sti, but now it's just a couple of 386 instructions or so.
128
 *
129
 * Note that the real wait_on_buffer() is an inline function that checks
130
 * if 'b_wait' is set before calling this, so that the queues aren't set
131
 * up unnecessarily.
132
 */
133
void __wait_on_buffer(struct buffer_head * bh)
134
{
135
        struct wait_queue wait = { current, NULL };
136
 
137
        bh->b_count++;
138
        add_wait_queue(&bh->b_wait, &wait);
139
repeat:
140
        run_task_queue(&tq_disk);
141
        current->state = TASK_UNINTERRUPTIBLE;
142
        if (buffer_locked(bh)) {
143
                schedule();
144
                goto repeat;
145
        }
146
        remove_wait_queue(&bh->b_wait, &wait);
147
        bh->b_count--;
148
        current->state = TASK_RUNNING;
149
}
150
 
151
/* Call sync_buffers with wait!=0 to ensure that the call does not
152
   return until all buffer writes have completed.  Sync() may return
153
   before the writes have finished; fsync() may not. */
154
 
155
 
156
/* Godamity-damn.  Some buffers (bitmaps for filesystems)
157
   spontaneously dirty themselves without ever brelse being called.
158
   We will ultimately want to put these in a separate list, but for
159
   now we search all of the lists for dirty buffers */
160
 
161
static int sync_buffers(kdev_t dev, int wait)
162
{
163
        int i, retry, pass = 0, err = 0;
164
        struct buffer_head * bh, *next;
165
 
166
        /* One pass for no-wait, three for wait:
167
           0) write out all dirty, unlocked buffers;
168
           1) write out all dirty buffers, waiting if locked;
169
           2) wait for completion by waiting for all buffers to unlock. */
170
        do {
171
                retry = 0;
172
repeat:
173
        /* We search all lists as a failsafe mechanism, not because we expect
174
           there to be dirty buffers on any of the other lists. */
175
                bh = lru_list[BUF_DIRTY];
176
                if (!bh)
177
                        goto repeat2;
178
                for (i = nr_buffers_type[BUF_DIRTY]*2 ; i-- > 0 ; bh = next) {
179
                        if (bh->b_list != BUF_DIRTY)
180
                                goto repeat;
181
                        next = bh->b_next_free;
182
                        if (!lru_list[BUF_DIRTY])
183
                                break;
184
                        if (dev && bh->b_dev != dev)
185
                                continue;
186
                        if (buffer_locked(bh)) {
187
                                /* Buffer is locked; skip it unless wait is
188
                                   requested AND pass > 0. */
189
                                if (!wait || !pass) {
190
                                        retry = 1;
191
                                        continue;
192
                                }
193
                                wait_on_buffer (bh);
194
                                goto repeat;
195
                        }
196
                        /* If an unlocked buffer is not uptodate, there has
197
                            been an IO error. Skip it. */
198
                        if (wait && buffer_req(bh) && !buffer_locked(bh) &&
199
                            !buffer_dirty(bh) && !buffer_uptodate(bh)) {
200
                                err = 1;
201
                                continue;
202
                        }
203
                        /* Don't write clean buffers.  Don't write ANY buffers
204
                           on the third pass. */
205
                        if (!buffer_dirty(bh) || pass >= 2)
206
                                continue;
207
                        /* don't bother about locked buffers */
208
                        if (buffer_locked(bh))
209
                                continue;
210
                        bh->b_count++;
211
                        next->b_count++;
212
                        bh->b_flushtime = 0;
213
                        ll_rw_block(WRITE, 1, &bh);
214
                        bh->b_count--;
215
                        next->b_count--;
216
                        retry = 1;
217
                }
218
 
219
    repeat2:
220
                bh = lru_list[BUF_LOCKED];
221
                if (!bh)
222
                        break;
223
                for (i = nr_buffers_type[BUF_LOCKED]*2 ; i-- > 0 ; bh = next) {
224
                        if (bh->b_list != BUF_LOCKED)
225
                                goto repeat2;
226
                        next = bh->b_next_free;
227
                        if (!lru_list[BUF_LOCKED])
228
                                break;
229
                        if (dev && bh->b_dev != dev)
230
                                continue;
231
                        if (buffer_locked(bh)) {
232
                                /* Buffer is locked; skip it unless wait is
233
                                   requested AND pass > 0. */
234
                                if (!wait || !pass) {
235
                                        retry = 1;
236
                                        continue;
237
                                }
238
                                wait_on_buffer (bh);
239
                                goto repeat2;
240
                        }
241
                }
242
 
243
        /* If we are waiting for the sync to succeed, and if any dirty
244
           blocks were written, then repeat; on the second pass, only
245
           wait for buffers being written (do not pass to write any
246
           more buffers on the second pass). */
247
        } while (wait && retry && ++pass<=2);
248
        return err;
249
}
250
 
251
void sync_dev(kdev_t dev)
252
{
253
        sync_buffers(dev, 0);
254
        sync_supers(dev);
255
        sync_inodes(dev);
256
        sync_buffers(dev, 0);
257
        sync_dquots(dev, -1);
258
}
259
 
260
int fsync_dev(kdev_t dev)
261
{
262
        sync_buffers(dev, 0);
263
        sync_supers(dev);
264
        sync_inodes(dev);
265
        sync_dquots(dev, -1);
266
        return sync_buffers(dev, 1);
267
}
268
 
269
asmlinkage int sys_sync(void)
270
{
271
        fsync_dev(0);
272
        return 0;
273
}
274
 
275
int file_fsync (struct inode *inode, struct file *filp)
276
{
277
        return fsync_dev(inode->i_dev);
278
}
279
 
280
asmlinkage int sys_fsync(unsigned int fd)
281
{
282
        struct file * file;
283
        struct inode * inode;
284
 
285
        if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
286
                return -EBADF;
287
        if (!file->f_op || !file->f_op->fsync)
288
                return -EINVAL;
289
        if (file->f_op->fsync(inode,file))
290
                return -EIO;
291
        return 0;
292
}
293
 
294
asmlinkage int sys_fdatasync(unsigned int fd)
295
{
296
        struct file * file;
297
        struct inode * inode;
298
 
299
        if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
300
                return -EBADF;
301
        if (!file->f_op || !file->f_op->fsync)
302
                return -EINVAL;
303
        /* this needs further work, at the moment it is identical to fsync() */
304
        if (file->f_op->fsync(inode,file))
305
                return -EIO;
306
        return 0;
307
}
308
 
309
void invalidate_buffers(kdev_t dev)
310
{
311
        int i;
312
        int nlist;
313
        struct buffer_head * bh;
314
 
315
        for(nlist = 0; nlist < NR_LIST; nlist++) {
316
                bh = lru_list[nlist];
317
                for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
318
                        if (bh->b_dev != dev)
319
                                continue;
320
                        wait_on_buffer(bh);
321
                        if (bh->b_dev != dev)
322
                                continue;
323
                        if (bh->b_count)
324
                                continue;
325
                        bh->b_flushtime = 0;
326
                        clear_bit(BH_Protected, &bh->b_state);
327
                        clear_bit(BH_Uptodate, &bh->b_state);
328
                        clear_bit(BH_Dirty, &bh->b_state);
329
                        clear_bit(BH_Req, &bh->b_state);
330
                }
331
        }
332
}
333
 
334
void invalidate_buffers_by_block(kdev_t dev, unsigned int block, unsigned int count)
335
{
336
        int i;
337
        int nlist;
338
        struct buffer_head * bh;
339
 
340
        for(nlist = 0; nlist < NR_LIST; nlist++) {
341
                bh = lru_list[nlist];
342
                for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
343
                        if (bh->b_dev != dev)
344
                                continue;
345
                        wait_on_buffer(bh);
346
                        if (bh->b_dev != dev)
347
                                continue;
348
                        if (bh->b_count)
349
                                continue;
350
                        if (bh->b_blocknr < block)
351
                                continue;
352
                        if (bh->b_blocknr >= (block+count))
353
                                continue;
354
                        bh->b_flushtime = 0;
355
                        clear_bit(BH_Protected, &bh->b_state);
356
                        clear_bit(BH_Uptodate, &bh->b_state);
357
                        clear_bit(BH_Dirty, &bh->b_state);
358
                        clear_bit(BH_Req, &bh->b_state);
359
                }
360
        }
361
}
362
 
363
#define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))&HASH_MASK)
364
#define hash(dev,block) hash_table[_hashfn(dev,block)]
365
 
366
static inline void remove_from_hash_queue(struct buffer_head * bh)
367
{
368
        if (bh->b_next)
369
                bh->b_next->b_prev = bh->b_prev;
370
        if (bh->b_prev)
371
                bh->b_prev->b_next = bh->b_next;
372
        if (hash(bh->b_dev,bh->b_blocknr) == bh)
373
                hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
374
        bh->b_next = bh->b_prev = NULL;
375
}
376
 
377
static inline void remove_from_lru_list(struct buffer_head * bh)
378
{
379
        if (!(bh->b_prev_free) || !(bh->b_next_free))
380
                panic("VFS: LRU block list corrupted");
381
        if (bh->b_dev == B_FREE)
382
                panic("LRU list corrupted");
383
        bh->b_prev_free->b_next_free = bh->b_next_free;
384
        bh->b_next_free->b_prev_free = bh->b_prev_free;
385
 
386
        if (lru_list[bh->b_list] == bh)
387
                 lru_list[bh->b_list] = bh->b_next_free;
388
        if (lru_list[bh->b_list] == bh)
389
                 lru_list[bh->b_list] = NULL;
390
        bh->b_next_free = bh->b_prev_free = NULL;
391
}
392
 
393
static inline void remove_from_free_list(struct buffer_head * bh)
394
{
395
        int isize = BUFSIZE_INDEX(bh->b_size);
396
        if (!(bh->b_prev_free) || !(bh->b_next_free))
397
                panic("VFS: Free block list corrupted");
398
        if(bh->b_dev != B_FREE)
399
                panic("Free list corrupted");
400
        if(!free_list[isize])
401
                panic("Free list empty");
402
        if(bh->b_next_free == bh)
403
                 free_list[isize] = NULL;
404
        else {
405
                bh->b_prev_free->b_next_free = bh->b_next_free;
406
                bh->b_next_free->b_prev_free = bh->b_prev_free;
407
                if (free_list[isize] == bh)
408
                         free_list[isize] = bh->b_next_free;
409
        }
410
        bh->b_next_free = bh->b_prev_free = NULL;
411
}
412
 
413
static inline void remove_from_queues(struct buffer_head * bh)
414
{
415
        if(bh->b_dev == B_FREE) {
416
                remove_from_free_list(bh); /* Free list entries should not be
417
                                              in the hash queue */
418
                return;
419
        }
420
        nr_buffers_type[bh->b_list]--;
421
        remove_from_hash_queue(bh);
422
        remove_from_lru_list(bh);
423
}
424
 
425
static inline void put_last_lru(struct buffer_head * bh)
426
{
427
        if (!bh)
428
                return;
429
        if (bh == lru_list[bh->b_list]) {
430
                lru_list[bh->b_list] = bh->b_next_free;
431
                return;
432
        }
433
        if(bh->b_dev == B_FREE)
434
                panic("Wrong block for lru list");
435
        remove_from_lru_list(bh);
436
/* add to back of free list */
437
 
438
        if(!lru_list[bh->b_list]) {
439
                lru_list[bh->b_list] = bh;
440
                lru_list[bh->b_list]->b_prev_free = bh;
441
        }
442
 
443
        bh->b_next_free = lru_list[bh->b_list];
444
        bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
445
        lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
446
        lru_list[bh->b_list]->b_prev_free = bh;
447
}
448
 
449
static inline void put_last_free(struct buffer_head * bh)
450
{
451
        int isize;
452
        if (!bh)
453
                return;
454
 
455
        isize = BUFSIZE_INDEX(bh->b_size);
456
        bh->b_dev = B_FREE;  /* So it is obvious we are on the free list */
457
        /* add to back of free list */
458
        if(!free_list[isize]) {
459
                free_list[isize] = bh;
460
                bh->b_prev_free = bh;
461
        }
462
 
463
        bh->b_next_free = free_list[isize];
464
        bh->b_prev_free = free_list[isize]->b_prev_free;
465
        free_list[isize]->b_prev_free->b_next_free = bh;
466
        free_list[isize]->b_prev_free = bh;
467
}
468
 
469
static inline void insert_into_queues(struct buffer_head * bh)
470
{
471
        /* put at end of free list */
472
        if(bh->b_dev == B_FREE) {
473
                put_last_free(bh);
474
                return;
475
        }
476
        if(!lru_list[bh->b_list]) {
477
                lru_list[bh->b_list] = bh;
478
                bh->b_prev_free = bh;
479
        }
480
 
481
        if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
482
        bh->b_next_free = lru_list[bh->b_list];
483
        bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
484
        lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
485
        lru_list[bh->b_list]->b_prev_free = bh;
486
        nr_buffers_type[bh->b_list]++;
487
/* put the buffer in new hash-queue if it has a device */
488
        bh->b_prev = NULL;
489
        bh->b_next = NULL;
490
        if (!(bh->b_dev))
491
                return;
492
        bh->b_next = hash(bh->b_dev,bh->b_blocknr);
493
        hash(bh->b_dev,bh->b_blocknr) = bh;
494
        if (bh->b_next)
495
                bh->b_next->b_prev = bh;
496
}
497
 
498
static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size)
499
{
500
        struct buffer_head * tmp;
501
 
502
        for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
503
                if (tmp->b_blocknr == block && tmp->b_dev == dev)
504
                        if (tmp->b_size == size)
505
                                return tmp;
506
                        else {
507
                                printk("VFS: Wrong blocksize on device %s\n",
508
                                        kdevname(dev));
509
                                return NULL;
510
                        }
511
        return NULL;
512
}
513
 
514
struct buffer_head *efind_buffer(kdev_t dev, int block, int size)
515
{
516
        return find_buffer(dev, block, size);
517
}
518
 
519
/*
520
 * Why like this, I hear you say... The reason is race-conditions.
521
 * As we don't lock buffers (unless we are reading them, that is),
522
 * something might happen to it while we sleep (ie a read-error
523
 * will force it bad). This shouldn't really happen currently, but
524
 * the code is ready.
525
 */
526
struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
527
{
528
        struct buffer_head * bh;
529
 
530
        for (;;) {
531
                if (!(bh=find_buffer(dev,block,size)))
532
                        return NULL;
533
                bh->b_count++;
534
                wait_on_buffer(bh);
535
                if (bh->b_dev == dev && bh->b_blocknr == block
536
                                             && bh->b_size == size)
537
                        return bh;
538
                bh->b_count--;
539
        }
540
}
541
 
542
void set_blocksize(kdev_t dev, int size)
543
{
544
        extern int *blksize_size[];
545
        int i, nlist;
546
        struct buffer_head * bh, *bhnext;
547
 
548
        if (!blksize_size[MAJOR(dev)])
549
                return;
550
 
551
        if (size > PAGE_SIZE)
552
                size = 0;
553
 
554
        switch (size) {
555
                default: panic("Invalid blocksize passed to set_blocksize");
556
                case 512: case 1024: case 2048: case 4096: case 8192: ;
557
        }
558
 
559
        if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
560
                blksize_size[MAJOR(dev)][MINOR(dev)] = size;
561
                return;
562
        }
563
        if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
564
                return;
565
        sync_buffers(dev, 2);
566
        blksize_size[MAJOR(dev)][MINOR(dev)] = size;
567
 
568
  /* We need to be quite careful how we do this - we are moving entries
569
     around on the free list, and we can get in a loop if we are not careful.*/
570
 
571
        for(nlist = 0; nlist < NR_LIST; nlist++) {
572
                bh = lru_list[nlist];
573
                for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
574
                        if(!bh) break;
575
                        bhnext = bh->b_next_free;
576
                        if (bh->b_dev != dev)
577
                                 continue;
578
                        if (bh->b_size == size)
579
                                 continue;
580
 
581
                        bhnext->b_count++;
582
                        wait_on_buffer(bh);
583
                        bhnext->b_count--;
584
                        if (bh->b_dev == dev && bh->b_size != size) {
585
                                clear_bit(BH_Dirty, &bh->b_state);
586
                                clear_bit(BH_Uptodate, &bh->b_state);
587
                                clear_bit(BH_Req, &bh->b_state);
588
                                bh->b_flushtime = 0;
589
                        }
590
                        remove_from_hash_queue(bh);
591
                }
592
        }
593
}
594
 
595
 
596
/* check if a buffer is OK to be reclaimed */
597
static inline int can_reclaim(struct buffer_head *bh, int size)
598
{
599
        if (bh->b_count ||
600
            buffer_protected(bh) ||
601
            buffer_locked(bh) ||
602
            mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
603
            buffer_dirty(bh))
604
                return 0;
605
 
606
        if (bh->b_size != size)
607
                return 0;
608
 
609
        return 1;
610
}
611
 
612
/* find a candidate buffer to be reclaimed */
613
static struct buffer_head *find_candidate(struct buffer_head *bh,
614
                                          int *list_len, int size)
615
{
616
        int lookahead  = 7;
617
 
618
        if (!bh)
619
                goto no_candidate;
620
 
621
        for (; (*list_len) > 0; bh = bh->b_next_free, (*list_len)--) {
622
                if (size != bh->b_size) {
623
                        /* this provides a mechanism for freeing blocks
624
                           of other sizes, this is necessary now that we
625
                           no longer have the lav code. */
626
                        try_to_free_buffer(bh,&bh,1);
627
                        if (!bh)
628
                                break;
629
                        lookahead = 7;
630
                        continue;
631
                }
632
                else if (buffer_locked(bh) &&
633
                         (bh->b_list == BUF_LOCKED || bh->b_list == BUF_LOCKED1)) {
634
                        if (!--lookahead) {
635
                                (*list_len) = 0;
636
                                goto no_candidate;
637
                        }
638
                }
639
                else if (can_reclaim(bh,size))
640
                        return bh;
641
        }
642
 
643
no_candidate:
644
        return NULL;
645
}
646
 
647
static void put_unused_buffer_head(struct buffer_head * bh)
648
{
649
        if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) {
650
                nr_buffer_heads--;
651
                kfree(bh);
652
                return;
653
        }
654
        memset(bh,0,sizeof(*bh));
655
        nr_unused_buffer_heads++;
656
        bh->b_next_free = unused_list;
657
        unused_list = bh;
658
        if (!waitqueue_active(&buffer_wait))
659
                return;
660
        wake_up(&buffer_wait);
661
}
662
 
663
/*
664
 * We can't put completed temporary IO buffer_heads directly onto the
665
 * unused_list when they become unlocked, since the device driver
666
 * end_request routines still expect access to the buffer_head's
667
 * fields after the final unlock.  So, the device driver puts them on
668
 * the reuse_list instead once IO completes, and we recover these to
669
 * the unused_list here.
670
 *
671
 * The reuse_list receives buffers from interrupt routines, so we need
672
 * to be IRQ-safe here (but note that interrupts only _add_ to the
673
 * reuse_list, never take away. So we don't need to worry about the
674
 * reuse_list magically emptying).
675
 */
676
static inline void recover_reusable_buffer_heads(void)
677
{
678
        if (reuse_list) {
679
                struct buffer_head *head;
680
 
681
                head = xchg(&reuse_list, NULL);
682
 
683
                do {
684
                        struct buffer_head *bh = head;
685
                        head = head->b_next_free;
686
                        put_unused_buffer_head(bh);
687
                } while (head);
688
        }
689
}
690
 
691
extern void allow_interrupts(void);
692
 
693
static void refill_freelist(int size)
694
{
695
        struct buffer_head * bh;
696
        struct buffer_head * candidate[BUF_DIRTY];
697
        extern struct task_struct *bdflush_tsk;
698
        unsigned int best_time, winner;
699
        int buffers[BUF_DIRTY];
700
        int i, limit = ((min_free_pages + free_pages_low) >> 1);
701
        int needed;
702
 
703
        refilled = 1;
704
        /* If there are too many dirty buffers, we wake up the update process
705
           now so as to ensure that there are still clean buffers available
706
           for user processes to use (and dirty) */
707
 
708
        if (nr_buffers_type[BUF_DIRTY] > nr_buffers * bdf_prm.b_un.nfract/100)
709
                wakeup_bdflush(1);
710
 
711
        /* We are going to try to locate this much memory */
712
        needed = bdf_prm.b_un.nrefill * size;
713
 
714
        while (nr_free_pages > min_free_pages*2 && needed > 0 &&
715
               grow_buffers(GFP_BUFFER, size)) {
716
                needed -= PAGE_SIZE;
717
        }
718
 
719
repeat:
720
        allow_interrupts();
721
        recover_reusable_buffer_heads();
722
        if(needed <= 0)
723
                return;
724
 
725
        /* OK, we cannot grow the buffer cache, now try to get some
726
           from the lru list */
727
 
728
        /* First set the candidate pointers to usable buffers.  This
729
           should be quick nearly all of the time. */
730
 
731
        for(i=0; i<BUF_DIRTY; i++){
732
                buffers[i] = nr_buffers_type[i];
733
                candidate[i] = find_candidate(lru_list[i], &buffers[i], size);
734
        }
735
 
736
        /* Now see which candidate wins the election */
737
 
738
        winner = best_time = UINT_MAX;
739
        for(i=0; i<BUF_DIRTY; i++){
740
                if(!candidate[i]) continue;
741
                if(candidate[i]->b_lru_time < best_time){
742
                        best_time = candidate[i]->b_lru_time;
743
                        winner = i;
744
                }
745
        }
746
 
747
        /* If we have a winner, use it, and then get a new candidate from that list */
748
        if(winner != UINT_MAX) {
749
                i = winner;
750
                while (needed>0 && (bh=candidate[i])) {
751
                        candidate[i] = bh->b_next_free;
752
                        if(candidate[i] == bh) candidate[i] = NULL;  /* Got last one */
753
                        remove_from_queues(bh);
754
                        bh->b_dev = B_FREE;
755
                        put_last_free(bh);
756
                        needed -= bh->b_size;
757
                        buffers[i]--;
758
                        if(buffers[i] == 0) candidate[i] = NULL;
759
 
760
                        if (candidate[i] && !can_reclaim(candidate[i],size))
761
                                candidate[i] = find_candidate(candidate[i],&buffers[i], size);
762
                }
763
                goto repeat;
764
        }
765
 
766
        /* Too bad, that was not enough. Try a little harder to grow some. */
767
 
768
        if (nr_free_pages > limit) {
769
                if (grow_buffers(GFP_BUFFER, size)) {
770
                        needed -= PAGE_SIZE;
771
                        goto repeat;
772
                };
773
        }
774
 
775
        /* If we are not bdflush we should wake up bdflush and try it again. */
776
 
777
        if (current != bdflush_tsk &&
778
            (buffermem >> PAGE_SHIFT) > (MAP_NR(high_memory) >> 2) &&
779
            nr_buffers_type[BUF_DIRTY] > bdf_prm.b_un.nref_dirt) {
780
                wakeup_bdflush(1);
781
                needed -= PAGE_SIZE;
782
                goto repeat;
783
        }
784
 
785
        /*
786
         * In order to protect our reserved pages,
787
         * return now if we got any buffers.
788
         */
789
        allow_interrupts();
790
        if (free_list[BUFSIZE_INDEX(size)])
791
                return;
792
 
793
        /* and repeat until we find something good */
794
        i = grow_buffers(GFP_BUFFER, size);
795
 
796
        if (current != bdflush_tsk && !i && nr_buffers_type[BUF_DIRTY] > 0)
797
                wakeup_bdflush(1);
798
        else if (!i)
799
                grow_buffers(GFP_IO, size);
800
 
801
        /* decrease needed even if there is no success */
802
        needed -= PAGE_SIZE;
803
        goto repeat;
804
}
805
 
806
/*
807
 * Ok, this is getblk, and it isn't very clear, again to hinder
808
 * race-conditions. Most of the code is seldom used, (ie repeating),
809
 * so it should be much more efficient than it looks.
810
 *
811
 * The algorithm is changed: hopefully better, and an elusive bug removed.
812
 *
813
 * 14.02.92: changed it to sync dirty buffers a bit: better performance
814
 * when the filesystem starts to get full of dirty blocks (I hope).
815
 */
816
struct buffer_head * getblk(kdev_t dev, int block, int size)
817
{
818
        struct buffer_head * bh;
819
        int isize = BUFSIZE_INDEX(size);
820
 
821
        /* If there are too many dirty buffers, we wake up the update process
822
           now so as to ensure that there are still clean buffers available
823
           for user processes to use (and dirty) */
824
repeat:
825
        allow_interrupts();
826
        bh = get_hash_table(dev, block, size);
827
        if (bh) {
828
                if (!buffer_dirty(bh)) {
829
                        if (buffer_uptodate(bh))
830
                                 put_last_lru(bh);
831
                        bh->b_flushtime = 0;
832
                }
833
                set_bit(BH_Touched, &bh->b_state);
834
                return bh;
835
        }
836
 
837
get_free:
838
        bh = free_list[isize];
839
        if (!bh)
840
                goto refill;
841
        remove_from_free_list(bh);
842
 
843
        /* OK, FINALLY we know that this buffer is the only one of its kind,
844
         * and that it's unused (b_count=0), unlocked (buffer_locked=0),
845
         * and clean */
846
        bh->b_count=1;
847
        bh->b_list=BUF_CLEAN;
848
        bh->b_flushtime=0;
849
        bh->b_state=(1<<BH_Touched);
850
        bh->b_dev=dev;
851
        bh->b_blocknr=block;
852
        insert_into_queues(bh);
853
        return bh;
854
 
855
refill:
856
        allow_interrupts();
857
        refill_freelist(size);
858
        if (!find_buffer(dev,block,size))
859
                goto get_free;
860
        goto repeat;
861
}
862
 
863
void set_writetime(struct buffer_head * buf, int flag)
864
{
865
        int newtime;
866
 
867
        if (buffer_dirty(buf)) {
868
                /* Move buffer to dirty list if jiffies is clear */
869
                newtime = jiffies + (flag ? bdf_prm.b_un.age_super :
870
                                     bdf_prm.b_un.age_buffer);
871
                if(!buf->b_flushtime || buf->b_flushtime > newtime)
872
                         buf->b_flushtime = newtime;
873
        } else {
874
                buf->b_flushtime = 0;
875
        }
876
}
877
 
878
 
879
/*
880
 * A buffer may need to be moved from one buffer list to another
881
 * (e.g. in case it is not shared any more). Handle this.
882
 */
883
void refile_buffer(struct buffer_head * buf)
884
{
885
        int dispose;
886
 
887
        if(buf->b_dev == B_FREE) {
888
                printk("Attempt to refile free buffer\n");
889
                return;
890
        }
891
        if (buffer_dirty(buf))
892
                dispose = BUF_DIRTY;
893
        else if (buffer_locked(buf))
894
                dispose = BUF_LOCKED;
895
        else
896
                dispose = BUF_CLEAN;
897
        if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
898
        if(dispose != buf->b_list)  {
899
                if(dispose == BUF_DIRTY)
900
                         buf->b_lru_time = jiffies;
901
                if(dispose == BUF_LOCKED &&
902
                   (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
903
                         dispose = BUF_LOCKED1;
904
                remove_from_queues(buf);
905
                buf->b_list = dispose;
906
#ifdef CONFIG_REDUCED_MEMORY
907
                // TJK: try to prevent the BUF_CLEAN lru_list from growing
908
                // too much.
909
                if (nr_buffers_type[BUF_CLEAN] > MAX_CLEAN_BUFFERS) {
910
                        wakeup_bdflush(0);
911
                }
912
                // end TJK
913
#endif
914
                insert_into_queues(buf);
915
                if (dispose == BUF_DIRTY) {
916
                /* This buffer is dirty, maybe we need to start flushing. */
917
                /* If too high a percentage of the buffers are dirty... */
918
                if (nr_buffers_type[BUF_DIRTY] > nr_buffers * bdf_prm.b_un.nfract/100)
919
                         wakeup_bdflush(0);
920
                /* If this is a loop device, and
921
                 * more than half of the buffers are dirty... */
922
                /* (Prevents no-free-buffers deadlock with loop device.) */
923
                if (MAJOR(buf->b_dev) == LOOP_MAJOR &&
924
                    nr_buffers_type[BUF_DIRTY]*2>nr_buffers)
925
                        wakeup_bdflush(1);
926
                }
927
        }
928
}
929
 
930
/*
931
 * Release a buffer head
932
 */
933
void __brelse(struct buffer_head * buf)
934
{
935
        wait_on_buffer(buf);
936
 
937
        /* If dirty, mark the time this buffer should be written back */
938
        set_writetime(buf, 0);
939
        refile_buffer(buf);
940
 
941
        if (buf->b_count) {
942
                buf->b_count--;
943
                return;
944
        }
945
        printk("VFS: brelse: Trying to free free buffer\n");
946
}
947
 
948
/*
949
 * bforget() is like brelse(), except it removes the buffer
950
 * from the hash-queues (so that it won't be re-used if it's
951
 * shared).
952
 */
953
void __bforget(struct buffer_head * buf)
954
{
955
        wait_on_buffer(buf);
956
        mark_buffer_clean(buf);
957
        clear_bit(BH_Protected, &buf->b_state);
958
        buf->b_count--;
959
        remove_from_hash_queue(buf);
960
        buf->b_dev = NODEV;
961
        refile_buffer(buf);
962
}
963
 
964
/*
965
 * bread() reads a specified block and returns the buffer that contains
966
 * it. It returns NULL if the block was unreadable.
967
 */
968
struct buffer_head * bread(kdev_t dev, int block, int size)
969
{
970
        struct buffer_head * bh;
971
 
972
        if (!(bh = getblk(dev, block, size))) {
973
                printk("VFS: bread: impossible error\n");
974
                return NULL;
975
        }
976
        if (buffer_uptodate(bh))
977
                return bh;
978
        ll_rw_block(READ, 1, &bh);
979
        wait_on_buffer(bh);
980
        if (buffer_uptodate(bh))
981
                return bh;
982
        brelse(bh);
983
        return NULL;
984
}
985
 
986
/*
987
 * Ok, breada can be used as bread, but additionally to mark other
988
 * blocks for reading as well. End the argument list with a negative
989
 * number.
990
 */
991
 
992
#define NBUF 16
993
 
994
struct buffer_head * breada(kdev_t dev, int block, int bufsize,
995
        unsigned int pos, unsigned int filesize)
996
{
997
        struct buffer_head * bhlist[NBUF];
998
        unsigned int blocks;
999
        struct buffer_head * bh;
1000
        int index;
1001
        int i, j;
1002
 
1003
        if (pos >= filesize)
1004
                return NULL;
1005
 
1006
        if (block < 0 || !(bh = getblk(dev,block,bufsize)))
1007
                return NULL;
1008
 
1009
        index = BUFSIZE_INDEX(bh->b_size);
1010
 
1011
        if (buffer_uptodate(bh))
1012
                return(bh);
1013
        else ll_rw_block(READ, 1, &bh);
1014
 
1015
        blocks = (filesize - pos) >> (9+index);
1016
 
1017
        if (blocks < (read_ahead[MAJOR(dev)] >> index))
1018
                blocks = read_ahead[MAJOR(dev)] >> index;
1019
        if (blocks > NBUF)
1020
                blocks = NBUF;
1021
 
1022
/*      if (blocks) printk("breada (new) %d blocks\n",blocks); */
1023
 
1024
 
1025
        bhlist[0] = bh;
1026
        j = 1;
1027
        for(i=1; i<blocks; i++) {
1028
                bh = getblk(dev,block+i,bufsize);
1029
                if (buffer_uptodate(bh)) {
1030
                        brelse(bh);
1031
                        break;
1032
                }
1033
                else bhlist[j++] = bh;
1034
        }
1035
 
1036
        /* Request the read for these buffers, and then release them */
1037
        if (j>1)
1038
                ll_rw_block(READA, (j-1), bhlist+1);
1039
        for(i=1; i<j; i++)
1040
                brelse(bhlist[i]);
1041
 
1042
        /* Wait for this buffer, and then continue on */
1043
        bh = bhlist[0];
1044
        wait_on_buffer(bh);
1045
        if (buffer_uptodate(bh))
1046
                return bh;
1047
        brelse(bh);
1048
        return NULL;
1049
}
1050
 
1051
static void get_more_buffer_heads(void)
1052
{
1053
        struct wait_queue wait = { current, NULL };
1054
        struct buffer_head * bh;
1055
 
1056
        while (!unused_list) {
1057
                /*
1058
                 * This is critical.  We can't swap out pages to get
1059
                 * more buffer heads, because the swap-out may need
1060
                 * more buffer-heads itself.  Thus GFP_ATOMIC.
1061
                 *
1062
                 * This is no longer true, it is GFP_BUFFER again, the
1063
                 * swapping code now knows not to perform I/O when that
1064
                 * GFP level is specified... -DaveM
1065
                 *
1066
                 * Ouch, another bug!  get_free_page() does not call
1067
                 * try_to_free_page() if priority == GFP_BUFFER.  This
1068
                 * lets kswapd get into a lockup situation if there is
1069
                 * no free space for buffer growth but we need more
1070
                 * memory for a buffer_head for swapping.  If memory is
1071
                 * full of recyclable buffers, we deadlock because
1072
                 * kswapd won't recycle them!  Use GFP_IO instead: it
1073
                 * still won't recurse (GFP_IO sets can_do_io to zero in
1074
                 * try_to_free_page), but it lets us recover those
1075
                 * buffer heads.  --sct
1076
                 */
1077
                /* we now use kmalloc() here instead of gfp as we want
1078
                   to be able to easily release buffer heads - they
1079
                   took up quite a bit of memory (tridge) */
1080
                bh = (struct buffer_head *) kmalloc(sizeof(*bh),GFP_IO);
1081
                if (bh) {
1082
                        put_unused_buffer_head(bh);
1083
                        nr_buffer_heads++;
1084
                        return;
1085
                }
1086
 
1087
                /*
1088
                 * Uhhuh. We're _really_ low on memory. Now we just
1089
                 * wait for old buffer heads to become free due to
1090
                 * finishing IO..
1091
                 */
1092
                run_task_queue(&tq_disk);
1093
 
1094
                /*
1095
                 * Set our state for sleeping, then check again for buffer heads.
1096
                 * This ensures we won't miss a wake_up from an interrupt.
1097
                 */
1098
                add_wait_queue(&buffer_wait, &wait);
1099
                current->state = TASK_UNINTERRUPTIBLE;
1100
                if (!unused_list && !reuse_list)
1101
                        schedule();
1102
                recover_reusable_buffer_heads();
1103
                remove_wait_queue(&buffer_wait, &wait);
1104
                current->state = TASK_RUNNING;
1105
        }
1106
 
1107
}
1108
 
1109
static struct buffer_head * get_unused_buffer_head(void)
1110
{
1111
        struct buffer_head * bh;
1112
 
1113
        recover_reusable_buffer_heads();
1114
        get_more_buffer_heads();
1115
        if (!unused_list)
1116
                return NULL;
1117
        bh = unused_list;
1118
        unused_list = bh->b_next_free;
1119
        nr_unused_buffer_heads--;
1120
        return bh;
1121
}
1122
 
1123
/*
1124
 * Create the appropriate buffers when given a page for data area and
1125
 * the size of each buffer.. Use the bh->b_this_page linked list to
1126
 * follow the buffers created.  Return NULL if unable to create more
1127
 * buffers.
1128
 */
1129
static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
1130
{
1131
        struct buffer_head *bh, *head;
1132
        long offset;
1133
 
1134
        head = NULL;
1135
        offset = PAGE_SIZE;
1136
        while ((offset -= size) >= 0) {
1137
                bh = get_unused_buffer_head();
1138
                if (!bh)
1139
                        goto no_grow;
1140
 
1141
                bh->b_dev = B_FREE;  /* Flag as unused */
1142
                bh->b_this_page = head;
1143
                head = bh;
1144
 
1145
                bh->b_state = 0;
1146
                bh->b_next_free = NULL;
1147
                bh->b_count = 0;
1148
                bh->b_size = size;
1149
 
1150
                bh->b_data = (char *) (page+offset);
1151
                bh->b_list = 0;
1152
        }
1153
        return head;
1154
/*
1155
 * In case anything failed, we just free everything we got.
1156
 */
1157
no_grow:
1158
        bh = head;
1159
        while (bh) {
1160
                head = bh;
1161
                bh = bh->b_this_page;
1162
                put_unused_buffer_head(head);
1163
        }
1164
        return NULL;
1165
}
1166
 
1167
/* Run the hooks that have to be done when a page I/O has completed. */
1168
static inline void after_unlock_page (struct page * page)
1169
{
1170
        if (clear_bit(PG_decr_after, &page->flags))
1171
                atomic_dec(&nr_async_pages);
1172
        if (clear_bit(PG_free_after, &page->flags))
1173
                __free_page(page);
1174
#ifndef NO_MM
1175
        if (clear_bit(PG_swap_unlock_after, &page->flags))
1176
                swap_after_unlock_page(page->swap_unlock_entry);
1177
#endif /*!NO_MM*/
1178
}
1179
 
1180
/*
1181
 * Free all temporary buffers belonging to a page.
1182
 * This needs to be called with interrupts disabled.
1183
 */
1184
static inline void free_async_buffers (struct buffer_head * bh)
1185
{
1186
        struct buffer_head * tmp;
1187
 
1188
        tmp = bh;
1189
        do {
1190
                if (!test_bit(BH_FreeOnIO, &tmp->b_state)) {
1191
                        printk ("Whoops: unlock_buffer: "
1192
                                "async IO mismatch on page.\n");
1193
                        return;
1194
                }
1195
                tmp->b_next_free = reuse_list;
1196
                reuse_list = tmp;
1197
                clear_bit(BH_FreeOnIO, &tmp->b_state);
1198
                tmp = tmp->b_this_page;
1199
        } while (tmp != bh);
1200
}
1201
 
1202
/*
1203
 * Start I/O on a page.
1204
 * This function expects the page to be locked and may return before I/O is complete.
1205
 * You then have to check page->locked, page->uptodate, and maybe wait on page->wait.
1206
 */
1207
int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap)
1208
{
1209
        struct buffer_head *bh, *prev, *next, *arr[MAX_BUF_PER_PAGE];
1210
        int block, nr;
1211
 
1212
        if (!PageLocked(page))
1213
                panic("brw_page: page not locked for I/O");
1214
        clear_bit(PG_uptodate, &page->flags);
1215
        clear_bit(PG_error, &page->flags);
1216
        /*
1217
         * Allocate buffer heads pointing to this page, just for I/O.
1218
         * They do _not_ show up in the buffer hash table!
1219
         * They are _not_ registered in page->buffers either!
1220
         */
1221
        bh = create_buffers(page_address(page), size);
1222
        if (!bh) {
1223
                clear_bit(PG_locked, &page->flags);
1224
                wake_up(&page->wait);
1225
                return -ENOMEM;
1226
        }
1227
        nr = 0;
1228
        next = bh;
1229
        do {
1230
                struct buffer_head * tmp;
1231
                block = *(b++);
1232
 
1233
                set_bit(BH_FreeOnIO, &next->b_state);
1234
                next->b_list = BUF_CLEAN;
1235
                next->b_dev = dev;
1236
                next->b_blocknr = block;
1237
                next->b_count = 1;
1238
                next->b_flushtime = 0;
1239
                set_bit(BH_Uptodate, &next->b_state);
1240
 
1241
                /*
1242
                 * When we use bmap, we define block zero to represent
1243
                 * a hole.  ll_rw_page, however, may legitimately
1244
                 * access block zero, and we need to distinguish the
1245
                 * two cases.
1246
                 */
1247
                if (bmap && !block) {
1248
                        memset(next->b_data, 0, size);
1249
                        next->b_count--;
1250
                        continue;
1251
                }
1252
                tmp = get_hash_table(dev, block, size);
1253
                if (tmp) {
1254
                        if (!buffer_uptodate(tmp)) {
1255
                                if (rw == READ)
1256
                                        ll_rw_block(READ, 1, &tmp);
1257
                                wait_on_buffer(tmp);
1258
                        }
1259
                        if (rw == READ)
1260
                                memcpy(next->b_data, tmp->b_data, size);
1261
                        else {
1262
                                memcpy(tmp->b_data, next->b_data, size);
1263
                                mark_buffer_dirty(tmp, 0);
1264
                        }
1265
                        brelse(tmp);
1266
                        next->b_count--;
1267
                        continue;
1268
                }
1269
                if (rw == READ)
1270
                        clear_bit(BH_Uptodate, &next->b_state);
1271
                else
1272
                        set_bit(BH_Dirty, &next->b_state);
1273
                arr[nr++] = next;
1274
        } while (prev = next, (next = next->b_this_page) != NULL);
1275
        prev->b_this_page = bh;
1276
 
1277
        if (nr) {
1278
                ll_rw_block(rw, nr, arr);
1279
                /* The rest of the work is done in mark_buffer_uptodate()
1280
                 * and unlock_buffer(). */
1281
        } else {
1282
                unsigned long flags;
1283
                save_flags(flags);
1284
                cli();
1285
                free_async_buffers(bh);
1286
                restore_flags(flags);
1287
                clear_bit(PG_locked, &page->flags);
1288
                set_bit(PG_uptodate, &page->flags);
1289
                wake_up(&page->wait);
1290
                after_unlock_page(page);
1291
                if (waitqueue_active(&buffer_wait))
1292
                        wake_up(&buffer_wait);
1293
        }
1294
        ++current->maj_flt;
1295
        return 0;
1296
}
1297
 
1298
/*
1299
 * This is called by end_request() when I/O has completed.
1300
 */
1301
void mark_buffer_uptodate(struct buffer_head * bh, int on)
1302
{
1303
        if (on) {
1304
                struct buffer_head *tmp = bh;
1305
                set_bit(BH_Uptodate, &bh->b_state);
1306
                /* If a page has buffers and all these buffers are uptodate,
1307
                 * then the page is uptodate. */
1308
                do {
1309
                        if (!test_bit(BH_Uptodate, &tmp->b_state))
1310
                                return;
1311
                        tmp=tmp->b_this_page;
1312
                } while (tmp && tmp != bh);
1313
                set_bit(PG_uptodate, &mem_map[MAP_NR(bh->b_data)].flags);
1314
                return;
1315
        }
1316
        clear_bit(BH_Uptodate, &bh->b_state);
1317
}
1318
 
1319
/*
1320
 * This is called by end_request() when I/O has completed.
1321
 */
1322
void unlock_buffer(struct buffer_head * bh)
1323
{
1324
        unsigned long flags;
1325
        struct buffer_head *tmp;
1326
        struct page *page;
1327
 
1328
        if (!clear_bit(BH_Lock, &bh->b_state))
1329
                printk ("unlock_buffer: already unlocked on %s\n",
1330
                        kdevname(bh->b_dev));
1331
        wake_up(&bh->b_wait);
1332
        if (waitqueue_active(&buffer_wait))
1333
                wake_up(&buffer_wait);
1334
 
1335
        if (!test_bit(BH_FreeOnIO, &bh->b_state))
1336
                return;
1337
        /* This is a temporary buffer used for page I/O. */
1338
        page = mem_map + MAP_NR(bh->b_data);
1339
        if (!PageLocked(page))
1340
                goto not_locked;
1341
        if (bh->b_count != 1)
1342
                goto bad_count;
1343
 
1344
        if (!test_bit(BH_Uptodate, &bh->b_state))
1345
                set_bit(PG_error, &page->flags);
1346
 
1347
        /*
1348
         * Be _very_ careful from here on. Bad things can happen if
1349
         * two buffer heads end IO at almost the same time and both
1350
         * decide that the page is now completely done.
1351
         *
1352
         * Async buffer_heads are here only as labels for IO, and get
1353
         * thrown away once the IO for this page is complete.  IO is
1354
         * deemed complete once all buffers have been visited
1355
         * (b_count==0) and are now unlocked. We must make sure that
1356
         * only the _last_ buffer that decrements its count is the one
1357
         * that free's the page..
1358
         */
1359
        save_flags(flags);
1360
        cli();
1361
        bh->b_count--;
1362
        tmp = bh;
1363
        do {
1364
                if (tmp->b_count)
1365
                        goto still_busy;
1366
                tmp = tmp->b_this_page;
1367
        } while (tmp != bh);
1368
 
1369
        /* OK, the async IO on this page is complete. */
1370
        free_async_buffers(bh);
1371
        restore_flags(flags);
1372
        clear_bit(PG_locked, &page->flags);
1373
        wake_up(&page->wait);
1374
        after_unlock_page(page);
1375
        wake_up(&buffer_wait);
1376
        return;
1377
 
1378
still_busy:
1379
        restore_flags(flags);
1380
        return;
1381
 
1382
not_locked:
1383
        printk ("Whoops: unlock_buffer: async io complete on unlocked page\n");
1384
        return;
1385
 
1386
bad_count:
1387
        printk ("Whoops: unlock_buffer: b_count != 1 on async io.\n");
1388
        return;
1389
}
1390
 
1391
/*
1392
 * Generic "readpage" function for block devices that have the normal
1393
 * bmap functionality. This is most of the block device filesystems.
1394
 * Reads the page asynchronously --- the unlock_buffer() and
1395
 * mark_buffer_uptodate() functions propagate buffer state into the
1396
 * page struct once IO has completed.
1397
 */
1398
int generic_readpage(struct inode * inode, struct page * page)
1399
{
1400
        unsigned long block;
1401
        int *p, nr[PAGE_SIZE/512];
1402
        int i;
1403
 
1404
        page->count++;
1405
        set_bit(PG_locked, &page->flags);
1406
        set_bit(PG_free_after, &page->flags);
1407
 
1408
        i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
1409
        block = page->offset >> inode->i_sb->s_blocksize_bits;
1410
        p = nr;
1411
        do {
1412
                *p = inode->i_op->bmap(inode, block);
1413
                i--;
1414
                block++;
1415
                p++;
1416
        } while (i > 0);
1417
 
1418
        /* IO start */
1419
        brw_page(READ, page, inode->i_dev, nr, inode->i_sb->s_blocksize, 1);
1420
        return 0;
1421
}
1422
 
1423
/*
1424
 * Try to increase the number of buffers available: the size argument
1425
 * is used to determine what kind of buffers we want.
1426
 */
1427
static int grow_buffers(int pri, int size)
1428
{
1429
        unsigned long page;
1430
        struct buffer_head *bh, *tmp;
1431
        struct buffer_head * insert_point;
1432
        int isize;
1433
 
1434
        if ((size & 511) || (size > PAGE_SIZE)) {
1435
                printk("VFS: grow_buffers: size = %d\n",size);
1436
                return 0;
1437
        }
1438
 
1439
        isize = BUFSIZE_INDEX(size);
1440
 
1441
        if (!(page = __get_free_page(pri)))
1442
                return 0;
1443
        bh = create_buffers(page, size);
1444
        if (!bh) {
1445
                free_page(page);
1446
                return 0;
1447
        }
1448
 
1449
        insert_point = free_list[isize];
1450
 
1451
        tmp = bh;
1452
        while (1) {
1453
                if (insert_point) {
1454
                        tmp->b_next_free = insert_point->b_next_free;
1455
                        tmp->b_prev_free = insert_point;
1456
                        insert_point->b_next_free->b_prev_free = tmp;
1457
                        insert_point->b_next_free = tmp;
1458
                } else {
1459
                        tmp->b_prev_free = tmp;
1460
                        tmp->b_next_free = tmp;
1461
                }
1462
                insert_point = tmp;
1463
                ++nr_buffers;
1464
                if (tmp->b_this_page)
1465
                        tmp = tmp->b_this_page;
1466
                else
1467
                        break;
1468
        }
1469
        tmp->b_this_page = bh;
1470
        free_list[isize] = bh;
1471
        mem_map[MAP_NR(page)].buffers = bh;
1472
        buffermem += PAGE_SIZE;
1473
        return 1;
1474
}
1475
 
1476
 
1477
/* =========== Reduce the buffer memory ============= */
1478
 
1479
static inline int buffer_waiting(struct buffer_head * bh)
1480
{
1481
        return waitqueue_active(&bh->b_wait);
1482
}
1483
 
1484
/*
1485
 * try_to_free_buffer() checks if all the buffers on this particular page
1486
 * are unused, and free's the page if so.
1487
 */
1488
int try_to_free_buffer(struct buffer_head * bh, struct buffer_head ** bhp,
1489
                       int priority)
1490
{
1491
        unsigned long page;
1492
        struct buffer_head * tmp, * p;
1493
 
1494
        *bhp = bh;
1495
        page = (unsigned long) bh->b_data;
1496
        page &= PAGE_MASK;
1497
        tmp = bh;
1498
        do {
1499
                if (!tmp)
1500
                        return 0;
1501
                if (tmp->b_count || buffer_protected(tmp) ||
1502
                    buffer_dirty(tmp) || buffer_locked(tmp) ||
1503
                    buffer_waiting(tmp))
1504
                        return 0;
1505
                if (priority && buffer_touched(tmp))
1506
                        return 0;
1507
                tmp = tmp->b_this_page;
1508
        } while (tmp != bh);
1509
        tmp = bh;
1510
        do {
1511
                p = tmp;
1512
                tmp = tmp->b_this_page;
1513
                nr_buffers--;
1514
                if (p == *bhp)
1515
                  {
1516
                    *bhp = p->b_prev_free;
1517
                    if (p == *bhp) /* Was this the last in the list? */
1518
                      *bhp = NULL;
1519
                  }
1520
                remove_from_queues(p);
1521
                put_unused_buffer_head(p);
1522
        } while (tmp != bh);
1523
        buffermem -= PAGE_SIZE;
1524
        mem_map[MAP_NR(page)].buffers = NULL;
1525
        free_page(page);
1526
        return !mem_map[MAP_NR(page)].count;
1527
}
1528
 
1529
/* ================== Debugging =================== */
1530
 
1531
void show_buffers(void)
1532
{
1533
        struct buffer_head * bh;
1534
        int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
1535
        int protected = 0;
1536
        int nlist;
1537
        static char *buf_types[NR_LIST] = {"CLEAN","LOCKED","LOCKED1","DIRTY"};
1538
 
1539
        printk("Buffer memory:   %6dkB\n",buffermem>>10);
1540
        printk("Buffer heads:    %6d\n",nr_buffer_heads);
1541
        printk("Buffer blocks:   %6d\n",nr_buffers);
1542
 
1543
        for(nlist = 0; nlist < NR_LIST; nlist++) {
1544
          found = locked = dirty = used = lastused = protected = 0;
1545
          bh = lru_list[nlist];
1546
          if(!bh) continue;
1547
 
1548
          do {
1549
                found++;
1550
                if (buffer_locked(bh))
1551
                        locked++;
1552
                if (buffer_protected(bh))
1553
                        protected++;
1554
                if (buffer_dirty(bh))
1555
                        dirty++;
1556
                if (bh->b_count)
1557
                        used++, lastused = found;
1558
                bh = bh->b_next_free;
1559
          } while (bh != lru_list[nlist]);
1560
          printk("%8s: %d buffers, %d used (last=%d), "
1561
                 "%d locked, %d protected, %d dirty\n",
1562
                 buf_types[nlist], found, used, lastused,
1563
                 locked, protected, dirty);
1564
        };
1565
}
1566
 
1567
/* ===================== Init ======================= */
1568
 
1569
/*
1570
 * allocate the hash table and init the free list
1571
 */
1572
void buffer_init(void)
1573
{
1574
#ifdef CONFIG_REDUCED_MEMORY
1575
        hash_table = (struct buffer_head **)__get_free_pages(GFP_KERNEL, 0, 0); /* HACK! - kja */
1576
#else /* !CONFIG_REDUCED_MEMORY */
1577
        hash_table = (struct buffer_head **)vmalloc(NR_HASH*sizeof(struct buffer_head *));
1578
#endif /* !CONFIG_REDUCED_MEMORY */
1579
        if (!hash_table)
1580
                panic("Failed to allocate buffer hash table\n");
1581
        memset(hash_table,0,NR_HASH*sizeof(struct buffer_head *));
1582
 
1583
        lru_list[BUF_CLEAN] = 0;
1584
        grow_buffers(GFP_KERNEL, BLOCK_SIZE);
1585
}
1586
 
1587
 
1588
/* ====================== bdflush support =================== */
1589
 
1590
/* This is a simple kernel daemon, whose job it is to provide a dynamic
1591
 * response to dirty buffers.  Once this process is activated, we write back
1592
 * a limited number of buffers to the disks and then go back to sleep again.
1593
 */
1594
struct wait_queue * bdflush_wait = NULL;
1595
struct wait_queue * bdflush_done = NULL;
1596
struct task_struct *bdflush_tsk = 0;
1597
 
1598
static void wakeup_bdflush(int wait)
1599
{
1600
        if (current == bdflush_tsk)
1601
                return;
1602
        wake_up(&bdflush_wait);
1603
        if (wait) {
1604
                run_task_queue(&tq_disk);
1605
                sleep_on(&bdflush_done);
1606
                recover_reusable_buffer_heads();
1607
        }
1608
}
1609
 
1610
 
1611
/*
1612
 * Here we attempt to write back old buffers.  We also try to flush inodes
1613
 * and supers as well, since this function is essentially "update", and
1614
 * otherwise there would be no way of ensuring that these quantities ever
1615
 * get written back.  Ideally, we would have a timestamp on the inodes
1616
 * and superblocks so that we could write back only the old ones as well
1617
 */
1618
 
1619
asmlinkage int sync_old_buffers(void)
1620
{
1621
        int i;
1622
        int ndirty, nwritten;
1623
        int nlist;
1624
        int ncount;
1625
        struct buffer_head * bh, *next;
1626
 
1627
        sync_supers(0);
1628
        sync_inodes(0);
1629
 
1630
        ncount = 0;
1631
#ifdef DEBUG
1632
        for(nlist = 0; nlist < NR_LIST; nlist++)
1633
#else
1634
        for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1635
#endif
1636
        {
1637
                ndirty = 0;
1638
                nwritten = 0;
1639
        repeat:
1640
                allow_interrupts();
1641
 
1642
                bh = lru_list[nlist];
1643
                if(bh)
1644
                         for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
1645
                                 /* We may have stalled while waiting for I/O to complete. */
1646
                                 if(bh->b_list != nlist) goto repeat;
1647
                                 next = bh->b_next_free;
1648
                                 if(!lru_list[nlist]) {
1649
                                         printk("Dirty list empty %d\n", i);
1650
                                         break;
1651
                                 }
1652
 
1653
                                 /* Clean buffer on dirty list?  Refile it */
1654
                                 if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
1655
                                  {
1656
                                          refile_buffer(bh);
1657
                                          continue;
1658
                                  }
1659
 
1660
                                 if (buffer_locked(bh) || !buffer_dirty(bh))
1661
                                          continue;
1662
                                 ndirty++;
1663
                                 if(bh->b_flushtime > jiffies) continue;
1664
                                 nwritten++;
1665
                                 next->b_count++;
1666
                                 bh->b_count++;
1667
                                 bh->b_flushtime = 0;
1668
#ifdef DEBUG
1669
                                 if(nlist != BUF_DIRTY) ncount++;
1670
#endif
1671
                                 ll_rw_block(WRITE, 1, &bh);
1672
                                 bh->b_count--;
1673
                                 next->b_count--;
1674
                         }
1675
        }
1676
        run_task_queue(&tq_disk);
1677
#ifdef DEBUG
1678
        if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
1679
        printk("Wrote %d/%d buffers\n", nwritten, ndirty);
1680
#endif
1681
        run_task_queue(&tq_disk);
1682
        return 0;
1683
}
1684
 
1685
 
1686
/* This is the interface to bdflush.  As we get more sophisticated, we can
1687
 * pass tuning parameters to this "process", to adjust how it behaves.
1688
 * We would want to verify each parameter, however, to make sure that it
1689
 * is reasonable. */
1690
 
1691
asmlinkage int sys_bdflush(int func, long data)
1692
{
1693
        if (!suser())
1694
                return -EPERM;
1695
 
1696
        if (func == 1)
1697
                 return sync_old_buffers();
1698
 
1699
        /* Basically func 1 means read param 1, 2 means write param 1, etc */
1700
        if (func >= 2) {
1701
                int i = (func-2) >> 1;
1702
                if (i < 0 || i >= N_PARAM)
1703
                        return -EINVAL;
1704
                if((func & 1) == 0) {
1705
                        int error = verify_area(VERIFY_WRITE, (int*)data, 4);
1706
                        if (!error)
1707
                                put_user(bdf_prm.data[i], (int*)data);
1708
                        return error;
1709
                }
1710
                if (data < bdflush_min[i] || data > bdflush_max[i])
1711
                        return -EINVAL;
1712
                bdf_prm.data[i] = data;
1713
        }
1714
 
1715
        /* Having func 0 used to launch the actual bdflush and then never
1716
         * return (unless explicitly killed). We return zero here to
1717
         * remain semi-compatible with present update(8) programs.
1718
         */
1719
        return 0;
1720
}
1721
 
1722
/* This is the actual bdflush daemon itself. It used to be started from
1723
 * the syscall above, but now we launch it ourselves internally with
1724
 * kernel_thread(...)  directly after the first thread in init/main.c */
1725
 
1726
/* To prevent deadlocks for a loop device:
1727
 * 1) Do non-blocking writes to loop (avoids deadlock with running
1728
 *      out of request blocks).
1729
 * 2) But do a blocking write if the only dirty buffers are loop buffers
1730
 *      (otherwise we go into an infinite busy-loop).
1731
 * 3) Quit writing loop blocks if a freelist went low (avoids deadlock
1732
 *      with running out of free buffers for loop's "real" device).
1733
*/
1734
int bdflush(void * unused)
1735
{
1736
        int i;
1737
        int ndirty;
1738
        int nlist;
1739
        int ncount;
1740
        struct buffer_head * bh, *next;
1741
        int major;
1742
        int wrta_cmd = WRITEA;  /* non-blocking write for LOOP */
1743
 
1744
        /*
1745
         *      We have a bare-bones task_struct, and really should fill
1746
         *      in a few more things so "top" and /proc/2/{exe,root,cwd}
1747
         *      display semi-sane things. Not real crucial though...
1748
         */
1749
 
1750
        current->session = 1;
1751
        current->pgrp = 1;
1752
        sprintf(current->comm, "kflushd");
1753
        bdflush_tsk = current;
1754
 
1755
        /*
1756
         *      As a kernel thread we want to tamper with system buffers
1757
         *      and other internals and thus be subject to the SMP locking
1758
         *      rules. (On a uniprocessor box this does nothing).
1759
         */
1760
 
1761
 
1762
#ifdef __SMP__
1763
        lock_kernel();
1764
        syscall_count++;
1765
#endif
1766
        for (;;) {
1767
#ifdef DEBUG
1768
                printk("bdflush() activated...");
1769
#endif
1770
 
1771
                ncount = 0;
1772
 
1773
#ifdef CONFIG_REDUCED_MEMORY
1774
// TJK: free up the buffers, if there are too many clean
1775
                ncount = 0;
1776
                if (nr_buffers_type[BUF_CLEAN] > MAX_CLEAN_BUFFERS) {
1777
                bh = lru_list[BUF_CLEAN];
1778
                        do {
1779
                                if (!bh)
1780
                                        break;
1781
                        if (!bh->b_count && try_to_free_buffer(bh, &bh, 0)) {
1782
                                ncount++;
1783
                        }
1784
                        bh = bh->b_next_free;
1785
                        // at most free 8 (2 pages)
1786
                    } while (bh != lru_list[BUF_CLEAN] && ncount < 8);
1787
                }
1788
                if (ncount) {
1789
#ifdef DEBUG
1790
                    printk("free()ed %d buffers\n",ncount);
1791
#endif
1792
                }
1793
// TJK
1794
#endif
1795
                ncount = 0;
1796
 
1797
#ifdef CONFIG_REDUCED_MEMORY
1798
                for(nlist = 0; nlist < NR_LIST; nlist++)
1799
#else
1800
                for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1801
#endif
1802
                 {
1803
                         ndirty = 0;
1804
                         refilled = 0;
1805
                 repeat:
1806
                         allow_interrupts();
1807
 
1808
                         bh = lru_list[nlist];
1809
                         if(bh)
1810
                                  for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty;
1811
                                       bh = next) {
1812
                                          /* We may have stalled while waiting for I/O to complete. */
1813
                                          if(bh->b_list != nlist) goto repeat;
1814
                                          next = bh->b_next_free;
1815
                                          if(!lru_list[nlist]) {
1816
                                                  printk("Dirty list empty %d\n", i);
1817
                                                  break;
1818
                                          }
1819
 
1820
                                          /* Clean buffer on dirty list?  Refile it */
1821
#ifdef CONFIG_REDUCED_MEMORY
1822
// TJK: still in bdflush()
1823
// this is a slight modification to the conditional, forcing
1824
// it to call refile_buffer() on unlocked buffers sitting on the
1825
// lru_list[BUF_LOCKED]
1826
                                         /* Clean buffer on dirty list?  Refile it */
1827
                                          if ((nlist == BUF_DIRTY || nlist == BUF_LOCKED)
1828
                                                && !buffer_dirty(bh) && !buffer_locked(bh))
1829
#else
1830
                                          if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
1831
#endif
1832
                                           {
1833
                                                   refile_buffer(bh);
1834
                                                   continue;
1835
                                           }
1836
 
1837
                                          if (buffer_locked(bh) || !buffer_dirty(bh))
1838
                                                   continue;
1839
                                          major = MAJOR(bh->b_dev);
1840
                                          /* Should we write back buffers that are shared or not??
1841
                                             currently dirty buffers are not shared, so it does not matter */
1842
                                          if (refilled && major == LOOP_MAJOR)
1843
                                                   continue;
1844
                                          next->b_count++;
1845
                                          bh->b_count++;
1846
                                          ndirty++;
1847
                                          bh->b_flushtime = 0;
1848
                                          if (major == LOOP_MAJOR) {
1849
                                                  ll_rw_block(wrta_cmd,1, &bh);
1850
                                                  wrta_cmd = WRITEA;
1851
                                                  if (buffer_dirty(bh))
1852
                                                          --ndirty;
1853
                                          }
1854
                                          else
1855
                                          ll_rw_block(WRITE, 1, &bh);
1856
#ifdef DEBUG
1857
                                          if(nlist != BUF_DIRTY) ncount++;
1858
#endif
1859
                                          bh->b_count--;
1860
                                          next->b_count--;
1861
                                  }
1862
                 }
1863
#ifdef DEBUG
1864
                if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
1865
                printk("sleeping again.\n");
1866
#endif
1867
                /* If we didn't write anything, but there are still
1868
                 * dirty buffers, then make the next write to a
1869
                 * loop device to be a blocking write.
1870
                 * This lets us block--which we _must_ do! */
1871
                if (ndirty == 0 && nr_buffers_type[BUF_DIRTY] > 0 && wrta_cmd != WRITE) {
1872
                        wrta_cmd = WRITE;
1873
                        continue;
1874
                }
1875
                run_task_queue(&tq_disk);
1876
 
1877
                /* If there are still a lot of dirty buffers around, skip the sleep
1878
                   and flush some more */
1879
                if(ndirty == 0 || nr_buffers_type[BUF_DIRTY] <= nr_buffers * bdf_prm.b_un.nfract/100) {
1880
                        wake_up(&bdflush_done);
1881
                        current->signal = 0;
1882
                        interruptible_sleep_on(&bdflush_wait);
1883
                }
1884
        }
1885
}
1886
 
1887
#ifdef MAGIC_ROM_PTR
1888
int bromptr(kdev_t dev, struct vm_area_struct * vma)
1889
{
1890
        struct inode inode_fake;
1891
        extern struct file_operations * get_blkfops(unsigned int);
1892
 
1893
        if (get_blkfops(MAJOR(dev))->romptr!=NULL)
1894
        {
1895
                inode_fake.i_rdev=dev;
1896
                return get_blkfops(MAJOR(dev))->romptr(&inode_fake, NULL, vma);
1897
        }
1898
        return -ENOSYS;
1899
}
1900
#endif /* MAGIC_ROM_PTR */
1901
 
1902
/*
1903
 * Overrides for Emacs so that we follow Linus's tabbing style.
1904
 * Emacs will notice this stuff at the end of the file and automatically
1905
 * adjust the settings for this buffer only.  This must remain at the end
1906
 * of the file.
1907
 * ---------------------------------------------------------------------------
1908
 * Local variables:
1909
 * c-indent-level: 8
1910
 * c-brace-imaginary-offset: 0
1911
 * c-brace-offset: -8
1912
 * c-argdecl-indent: 8
1913
 * c-label-offset: -8
1914
 * c-continued-statement-offset: 8
1915
 * c-continued-brace-offset: 0
1916
 * End:
1917
 */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.