/*
|
/*
|
* linux/fs/buffer.c
|
* linux/fs/buffer.c
|
*
|
*
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
*/
|
*/
|
|
|
/*
|
/*
|
* 'buffer.c' implements the buffer-cache functions. Race-conditions have
|
* 'buffer.c' implements the buffer-cache functions. Race-conditions have
|
* been avoided by NEVER letting an interrupt change a buffer (except for the
|
* been avoided by NEVER letting an interrupt change a buffer (except for the
|
* data, of course), but instead letting the caller do it.
|
* data, of course), but instead letting the caller do it.
|
*/
|
*/
|
|
|
/* Some bdflush() changes for the dynamic ramdisk - Paul Gortmaker, 12/94 */
|
/* Some bdflush() changes for the dynamic ramdisk - Paul Gortmaker, 12/94 */
|
/* Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95 */
|
/* Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95 */
|
|
|
/* Removed a lot of unnecessary code and simplified things now that
|
/* Removed a lot of unnecessary code and simplified things now that
|
the buffer cache isn't our primary cache - Andrew Tridgell 12/96 */
|
the buffer cache isn't our primary cache - Andrew Tridgell 12/96 */
|
|
|
/*
|
/*
|
* uClinux revisions for memory usage tuning, MAGIC_ROM_PTR,
|
* uClinux revisions for memory usage tuning, MAGIC_ROM_PTR,
|
* and invalidate_by_block hack
|
* and invalidate_by_block hack
|
* Copyright (C) 1998 Kenneth Albanowski <kjahds@kjahds.com>,
|
* Copyright (C) 1998 Kenneth Albanowski <kjahds@kjahds.com>,
|
* The Silver Hammer Group, Ltd.
|
* The Silver Hammer Group, Ltd.
|
*/
|
*/
|
|
|
#include <linux/sched.h>
|
#include <linux/sched.h>
|
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
#include <linux/major.h>
|
#include <linux/major.h>
|
#include <linux/string.h>
|
#include <linux/string.h>
|
#include <linux/locks.h>
|
#include <linux/locks.h>
|
#include <linux/errno.h>
|
#include <linux/errno.h>
|
#include <linux/malloc.h>
|
#include <linux/malloc.h>
|
#include <linux/pagemap.h>
|
#include <linux/pagemap.h>
|
#include <linux/swap.h>
|
#include <linux/swap.h>
|
#include <linux/swapctl.h>
|
#include <linux/swapctl.h>
|
#include <linux/smp.h>
|
#include <linux/smp.h>
|
#include <linux/smp_lock.h>
|
#include <linux/smp_lock.h>
|
|
|
#include <asm/system.h>
|
#include <asm/system.h>
|
#include <asm/segment.h>
|
#include <asm/segment.h>
|
#include <asm/io.h>
|
#include <asm/io.h>
|
#include <asm/bitops.h>
|
#include <asm/bitops.h>
|
|
|
#define NR_SIZES 5
|
#define NR_SIZES 5
|
static char buffersize_index[17] =
|
static char buffersize_index[17] =
|
{-1, 0, 1, -1, 2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1, 4};
|
{-1, 0, 1, -1, 2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1, 4};
|
|
|
#define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
|
#define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
|
#define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
|
#define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
|
#ifdef CONFIG_REDUCED_MEMORY
|
#ifdef CONFIG_REDUCED_MEMORY
|
// TJK:
|
// TJK:
|
#define MAX_CLEAN_BUFFERS 50 // that's 50 1024-byte buffers
|
#define MAX_CLEAN_BUFFERS 50 // that's 50 1024-byte buffers
|
// end TJK
|
// end TJK
|
#define MAX_UNUSED_BUFFERS 10
|
#define MAX_UNUSED_BUFFERS 10
|
#define HASH_PAGES 1
|
#define HASH_PAGES 1
|
#else /* !CONFIG_REDUCED_MEMORY */
|
#else /* !CONFIG_REDUCED_MEMORY */
|
#define MAX_UNUSED_BUFFERS 30 /* don't ever have more than this number of
|
#define MAX_UNUSED_BUFFERS 30 /* don't ever have more than this number of
|
unused buffer heads */
|
unused buffer heads */
|
#define HASH_PAGES 4 /* number of pages to use for the hash table */
|
#define HASH_PAGES 4 /* number of pages to use for the hash table */
|
#endif /* !CONFIG_REDUCED_MEMORY */
|
#endif /* !CONFIG_REDUCED_MEMORY */
|
#define NR_HASH (HASH_PAGES*PAGE_SIZE/sizeof(struct buffer_head *))
|
#define NR_HASH (HASH_PAGES*PAGE_SIZE/sizeof(struct buffer_head *))
|
#define HASH_MASK (NR_HASH-1)
|
#define HASH_MASK (NR_HASH-1)
|
|
|
static int grow_buffers(int pri, int size);
|
static int grow_buffers(int pri, int size);
|
|
|
static struct buffer_head ** hash_table;
|
static struct buffer_head ** hash_table;
|
static struct buffer_head * lru_list[NR_LIST] = {NULL, };
|
static struct buffer_head * lru_list[NR_LIST] = {NULL, };
|
static struct buffer_head * free_list[NR_SIZES] = {NULL, };
|
static struct buffer_head * free_list[NR_SIZES] = {NULL, };
|
|
|
static struct buffer_head * unused_list = NULL;
|
static struct buffer_head * unused_list = NULL;
|
static struct buffer_head * reuse_list = NULL;
|
static struct buffer_head * reuse_list = NULL;
|
struct wait_queue * buffer_wait = NULL;
|
struct wait_queue * buffer_wait = NULL;
|
|
|
static int nr_buffers = 0;
|
static int nr_buffers = 0;
|
static int nr_buffers_type[NR_LIST] = {0,};
|
static int nr_buffers_type[NR_LIST] = {0,};
|
static int nr_buffer_heads = 0;
|
static int nr_buffer_heads = 0;
|
static int nr_unused_buffer_heads = 0;
|
static int nr_unused_buffer_heads = 0;
|
static int refilled = 0; /* Set NZ when a buffer freelist is refilled
|
static int refilled = 0; /* Set NZ when a buffer freelist is refilled
|
this is used by the loop device */
|
this is used by the loop device */
|
|
|
/* this is used by some architectures to estimate available memory */
|
/* this is used by some architectures to estimate available memory */
|
int buffermem = 0;
|
int buffermem = 0;
|
|
|
/* Here is the parameter block for the bdflush process. If you add or
|
/* Here is the parameter block for the bdflush process. If you add or
|
* remove any of the parameters, make sure to update kernel/sysctl.c.
|
* remove any of the parameters, make sure to update kernel/sysctl.c.
|
*/
|
*/
|
|
|
static void wakeup_bdflush(int);
|
static void wakeup_bdflush(int);
|
|
|
#define N_PARAM 9
|
#define N_PARAM 9
|
|
|
/* the dummy values in this structure are left in there for compatibility
|
/* the dummy values in this structure are left in there for compatibility
|
with old programs that play with the /proc entries */
|
with old programs that play with the /proc entries */
|
union bdflush_param{
|
union bdflush_param{
|
struct {
|
struct {
|
int nfract; /* Percentage of buffer cache dirty to
|
int nfract; /* Percentage of buffer cache dirty to
|
activate bdflush */
|
activate bdflush */
|
int ndirty; /* Maximum number of dirty blocks to write out per
|
int ndirty; /* Maximum number of dirty blocks to write out per
|
wake-cycle */
|
wake-cycle */
|
int nrefill; /* Number of clean buffers to try to obtain
|
int nrefill; /* Number of clean buffers to try to obtain
|
each time we call refill */
|
each time we call refill */
|
int nref_dirt; /* Dirty buffer threshold for activating bdflush
|
int nref_dirt; /* Dirty buffer threshold for activating bdflush
|
when trying to refill buffers. */
|
when trying to refill buffers. */
|
int dummy1; /* unused */
|
int dummy1; /* unused */
|
int age_buffer; /* Time for normal buffer to age before
|
int age_buffer; /* Time for normal buffer to age before
|
we flush it */
|
we flush it */
|
int age_super; /* Time for superblock to age before we
|
int age_super; /* Time for superblock to age before we
|
flush it */
|
flush it */
|
int dummy2; /* unused */
|
int dummy2; /* unused */
|
int dummy3; /* unused */
|
int dummy3; /* unused */
|
} b_un;
|
} b_un;
|
unsigned int data[N_PARAM];
|
unsigned int data[N_PARAM];
|
#ifdef CONFIG_REDUCED_MEMORY
|
#ifdef CONFIG_REDUCED_MEMORY
|
} bdf_prm = {{40, 500, 8, 8, 15, 30*HZ, 5*HZ, 1884, 2}};
|
} bdf_prm = {{40, 500, 8, 8, 15, 30*HZ, 5*HZ, 1884, 2}};
|
/*Originally {{40, 500, 64, 64, 15, 30*HZ, 5*HZ, 1884, 2}};*/
|
/*Originally {{40, 500, 64, 64, 15, 30*HZ, 5*HZ, 1884, 2}};*/
|
#else /* !CONFIG_REDUCED_MEMORY */
|
#else /* !CONFIG_REDUCED_MEMORY */
|
} bdf_prm = {{40, 500, 64, 64, 15, 30*HZ, 5*HZ, 1884, 2}};
|
} bdf_prm = {{40, 500, 64, 64, 15, 30*HZ, 5*HZ, 1884, 2}};
|
#endif /* !CONFIG_REDUCED_MEMORY */
|
#endif /* !CONFIG_REDUCED_MEMORY */
|
|
|
/* These are the min and max parameter values that we will allow to be assigned */
|
/* These are the min and max parameter values that we will allow to be assigned */
|
int bdflush_min[N_PARAM] = { 0, 10, 5, 25, 0, 100, 100, 1, 1};
|
int bdflush_min[N_PARAM] = { 0, 10, 5, 25, 0, 100, 100, 1, 1};
|
int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
|
int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
|
|
|
/*
|
/*
|
* Rewrote the wait-routines to use the "new" wait-queue functionality,
|
* Rewrote the wait-routines to use the "new" wait-queue functionality,
|
* and getting rid of the cli-sti pairs. The wait-queue routines still
|
* and getting rid of the cli-sti pairs. The wait-queue routines still
|
* need cli-sti, but now it's just a couple of 386 instructions or so.
|
* need cli-sti, but now it's just a couple of 386 instructions or so.
|
*
|
*
|
* Note that the real wait_on_buffer() is an inline function that checks
|
* Note that the real wait_on_buffer() is an inline function that checks
|
* if 'b_wait' is set before calling this, so that the queues aren't set
|
* if 'b_wait' is set before calling this, so that the queues aren't set
|
* up unnecessarily.
|
* up unnecessarily.
|
*/
|
*/
|
void __wait_on_buffer(struct buffer_head * bh)
|
void __wait_on_buffer(struct buffer_head * bh)
|
{
|
{
|
struct wait_queue wait = { current, NULL };
|
struct wait_queue wait = { current, NULL };
|
|
|
bh->b_count++;
|
bh->b_count++;
|
add_wait_queue(&bh->b_wait, &wait);
|
add_wait_queue(&bh->b_wait, &wait);
|
repeat:
|
repeat:
|
run_task_queue(&tq_disk);
|
run_task_queue(&tq_disk);
|
current->state = TASK_UNINTERRUPTIBLE;
|
current->state = TASK_UNINTERRUPTIBLE;
|
if (buffer_locked(bh)) {
|
if (buffer_locked(bh)) {
|
schedule();
|
schedule();
|
goto repeat;
|
goto repeat;
|
}
|
}
|
remove_wait_queue(&bh->b_wait, &wait);
|
remove_wait_queue(&bh->b_wait, &wait);
|
bh->b_count--;
|
bh->b_count--;
|
current->state = TASK_RUNNING;
|
current->state = TASK_RUNNING;
|
}
|
}
|
|
|
/* Call sync_buffers with wait!=0 to ensure that the call does not
|
/* Call sync_buffers with wait!=0 to ensure that the call does not
|
return until all buffer writes have completed. Sync() may return
|
return until all buffer writes have completed. Sync() may return
|
before the writes have finished; fsync() may not. */
|
before the writes have finished; fsync() may not. */
|
|
|
|
|
/* Godamity-damn. Some buffers (bitmaps for filesystems)
|
/* Godamity-damn. Some buffers (bitmaps for filesystems)
|
spontaneously dirty themselves without ever brelse being called.
|
spontaneously dirty themselves without ever brelse being called.
|
We will ultimately want to put these in a separate list, but for
|
We will ultimately want to put these in a separate list, but for
|
now we search all of the lists for dirty buffers */
|
now we search all of the lists for dirty buffers */
|
|
|
static int sync_buffers(kdev_t dev, int wait)
|
static int sync_buffers(kdev_t dev, int wait)
|
{
|
{
|
int i, retry, pass = 0, err = 0;
|
int i, retry, pass = 0, err = 0;
|
struct buffer_head * bh, *next;
|
struct buffer_head * bh, *next;
|
|
|
/* One pass for no-wait, three for wait:
|
/* One pass for no-wait, three for wait:
|
0) write out all dirty, unlocked buffers;
|
0) write out all dirty, unlocked buffers;
|
1) write out all dirty buffers, waiting if locked;
|
1) write out all dirty buffers, waiting if locked;
|
2) wait for completion by waiting for all buffers to unlock. */
|
2) wait for completion by waiting for all buffers to unlock. */
|
do {
|
do {
|
retry = 0;
|
retry = 0;
|
repeat:
|
repeat:
|
/* We search all lists as a failsafe mechanism, not because we expect
|
/* We search all lists as a failsafe mechanism, not because we expect
|
there to be dirty buffers on any of the other lists. */
|
there to be dirty buffers on any of the other lists. */
|
bh = lru_list[BUF_DIRTY];
|
bh = lru_list[BUF_DIRTY];
|
if (!bh)
|
if (!bh)
|
goto repeat2;
|
goto repeat2;
|
for (i = nr_buffers_type[BUF_DIRTY]*2 ; i-- > 0 ; bh = next) {
|
for (i = nr_buffers_type[BUF_DIRTY]*2 ; i-- > 0 ; bh = next) {
|
if (bh->b_list != BUF_DIRTY)
|
if (bh->b_list != BUF_DIRTY)
|
goto repeat;
|
goto repeat;
|
next = bh->b_next_free;
|
next = bh->b_next_free;
|
if (!lru_list[BUF_DIRTY])
|
if (!lru_list[BUF_DIRTY])
|
break;
|
break;
|
if (dev && bh->b_dev != dev)
|
if (dev && bh->b_dev != dev)
|
continue;
|
continue;
|
if (buffer_locked(bh)) {
|
if (buffer_locked(bh)) {
|
/* Buffer is locked; skip it unless wait is
|
/* Buffer is locked; skip it unless wait is
|
requested AND pass > 0. */
|
requested AND pass > 0. */
|
if (!wait || !pass) {
|
if (!wait || !pass) {
|
retry = 1;
|
retry = 1;
|
continue;
|
continue;
|
}
|
}
|
wait_on_buffer (bh);
|
wait_on_buffer (bh);
|
goto repeat;
|
goto repeat;
|
}
|
}
|
/* If an unlocked buffer is not uptodate, there has
|
/* If an unlocked buffer is not uptodate, there has
|
been an IO error. Skip it. */
|
been an IO error. Skip it. */
|
if (wait && buffer_req(bh) && !buffer_locked(bh) &&
|
if (wait && buffer_req(bh) && !buffer_locked(bh) &&
|
!buffer_dirty(bh) && !buffer_uptodate(bh)) {
|
!buffer_dirty(bh) && !buffer_uptodate(bh)) {
|
err = 1;
|
err = 1;
|
continue;
|
continue;
|
}
|
}
|
/* Don't write clean buffers. Don't write ANY buffers
|
/* Don't write clean buffers. Don't write ANY buffers
|
on the third pass. */
|
on the third pass. */
|
if (!buffer_dirty(bh) || pass >= 2)
|
if (!buffer_dirty(bh) || pass >= 2)
|
continue;
|
continue;
|
/* don't bother about locked buffers */
|
/* don't bother about locked buffers */
|
if (buffer_locked(bh))
|
if (buffer_locked(bh))
|
continue;
|
continue;
|
bh->b_count++;
|
bh->b_count++;
|
next->b_count++;
|
next->b_count++;
|
bh->b_flushtime = 0;
|
bh->b_flushtime = 0;
|
ll_rw_block(WRITE, 1, &bh);
|
ll_rw_block(WRITE, 1, &bh);
|
bh->b_count--;
|
bh->b_count--;
|
next->b_count--;
|
next->b_count--;
|
retry = 1;
|
retry = 1;
|
}
|
}
|
|
|
repeat2:
|
repeat2:
|
bh = lru_list[BUF_LOCKED];
|
bh = lru_list[BUF_LOCKED];
|
if (!bh)
|
if (!bh)
|
break;
|
break;
|
for (i = nr_buffers_type[BUF_LOCKED]*2 ; i-- > 0 ; bh = next) {
|
for (i = nr_buffers_type[BUF_LOCKED]*2 ; i-- > 0 ; bh = next) {
|
if (bh->b_list != BUF_LOCKED)
|
if (bh->b_list != BUF_LOCKED)
|
goto repeat2;
|
goto repeat2;
|
next = bh->b_next_free;
|
next = bh->b_next_free;
|
if (!lru_list[BUF_LOCKED])
|
if (!lru_list[BUF_LOCKED])
|
break;
|
break;
|
if (dev && bh->b_dev != dev)
|
if (dev && bh->b_dev != dev)
|
continue;
|
continue;
|
if (buffer_locked(bh)) {
|
if (buffer_locked(bh)) {
|
/* Buffer is locked; skip it unless wait is
|
/* Buffer is locked; skip it unless wait is
|
requested AND pass > 0. */
|
requested AND pass > 0. */
|
if (!wait || !pass) {
|
if (!wait || !pass) {
|
retry = 1;
|
retry = 1;
|
continue;
|
continue;
|
}
|
}
|
wait_on_buffer (bh);
|
wait_on_buffer (bh);
|
goto repeat2;
|
goto repeat2;
|
}
|
}
|
}
|
}
|
|
|
/* If we are waiting for the sync to succeed, and if any dirty
|
/* If we are waiting for the sync to succeed, and if any dirty
|
blocks were written, then repeat; on the second pass, only
|
blocks were written, then repeat; on the second pass, only
|
wait for buffers being written (do not pass to write any
|
wait for buffers being written (do not pass to write any
|
more buffers on the second pass). */
|
more buffers on the second pass). */
|
} while (wait && retry && ++pass<=2);
|
} while (wait && retry && ++pass<=2);
|
return err;
|
return err;
|
}
|
}
|
|
|
void sync_dev(kdev_t dev)
|
void sync_dev(kdev_t dev)
|
{
|
{
|
sync_buffers(dev, 0);
|
sync_buffers(dev, 0);
|
sync_supers(dev);
|
sync_supers(dev);
|
sync_inodes(dev);
|
sync_inodes(dev);
|
sync_buffers(dev, 0);
|
sync_buffers(dev, 0);
|
sync_dquots(dev, -1);
|
sync_dquots(dev, -1);
|
}
|
}
|
|
|
int fsync_dev(kdev_t dev)
|
int fsync_dev(kdev_t dev)
|
{
|
{
|
sync_buffers(dev, 0);
|
sync_buffers(dev, 0);
|
sync_supers(dev);
|
sync_supers(dev);
|
sync_inodes(dev);
|
sync_inodes(dev);
|
sync_dquots(dev, -1);
|
sync_dquots(dev, -1);
|
return sync_buffers(dev, 1);
|
return sync_buffers(dev, 1);
|
}
|
}
|
|
|
asmlinkage int sys_sync(void)
|
asmlinkage int sys_sync(void)
|
{
|
{
|
fsync_dev(0);
|
fsync_dev(0);
|
return 0;
|
return 0;
|
}
|
}
|
|
|
int file_fsync (struct inode *inode, struct file *filp)
|
int file_fsync (struct inode *inode, struct file *filp)
|
{
|
{
|
return fsync_dev(inode->i_dev);
|
return fsync_dev(inode->i_dev);
|
}
|
}
|
|
|
asmlinkage int sys_fsync(unsigned int fd)
|
asmlinkage int sys_fsync(unsigned int fd)
|
{
|
{
|
struct file * file;
|
struct file * file;
|
struct inode * inode;
|
struct inode * inode;
|
|
|
if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
|
if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
|
return -EBADF;
|
return -EBADF;
|
if (!file->f_op || !file->f_op->fsync)
|
if (!file->f_op || !file->f_op->fsync)
|
return -EINVAL;
|
return -EINVAL;
|
if (file->f_op->fsync(inode,file))
|
if (file->f_op->fsync(inode,file))
|
return -EIO;
|
return -EIO;
|
return 0;
|
return 0;
|
}
|
}
|
|
|
asmlinkage int sys_fdatasync(unsigned int fd)
|
asmlinkage int sys_fdatasync(unsigned int fd)
|
{
|
{
|
struct file * file;
|
struct file * file;
|
struct inode * inode;
|
struct inode * inode;
|
|
|
if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
|
if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
|
return -EBADF;
|
return -EBADF;
|
if (!file->f_op || !file->f_op->fsync)
|
if (!file->f_op || !file->f_op->fsync)
|
return -EINVAL;
|
return -EINVAL;
|
/* this needs further work, at the moment it is identical to fsync() */
|
/* this needs further work, at the moment it is identical to fsync() */
|
if (file->f_op->fsync(inode,file))
|
if (file->f_op->fsync(inode,file))
|
return -EIO;
|
return -EIO;
|
return 0;
|
return 0;
|
}
|
}
|
|
|
void invalidate_buffers(kdev_t dev)
|
void invalidate_buffers(kdev_t dev)
|
{
|
{
|
int i;
|
int i;
|
int nlist;
|
int nlist;
|
struct buffer_head * bh;
|
struct buffer_head * bh;
|
|
|
for(nlist = 0; nlist < NR_LIST; nlist++) {
|
for(nlist = 0; nlist < NR_LIST; nlist++) {
|
bh = lru_list[nlist];
|
bh = lru_list[nlist];
|
for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
|
for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
|
if (bh->b_dev != dev)
|
if (bh->b_dev != dev)
|
continue;
|
continue;
|
wait_on_buffer(bh);
|
wait_on_buffer(bh);
|
if (bh->b_dev != dev)
|
if (bh->b_dev != dev)
|
continue;
|
continue;
|
if (bh->b_count)
|
if (bh->b_count)
|
continue;
|
continue;
|
bh->b_flushtime = 0;
|
bh->b_flushtime = 0;
|
clear_bit(BH_Protected, &bh->b_state);
|
clear_bit(BH_Protected, &bh->b_state);
|
clear_bit(BH_Uptodate, &bh->b_state);
|
clear_bit(BH_Uptodate, &bh->b_state);
|
clear_bit(BH_Dirty, &bh->b_state);
|
clear_bit(BH_Dirty, &bh->b_state);
|
clear_bit(BH_Req, &bh->b_state);
|
clear_bit(BH_Req, &bh->b_state);
|
}
|
}
|
}
|
}
|
}
|
}
|
|
|
void invalidate_buffers_by_block(kdev_t dev, unsigned int block, unsigned int count)
|
void invalidate_buffers_by_block(kdev_t dev, unsigned int block, unsigned int count)
|
{
|
{
|
int i;
|
int i;
|
int nlist;
|
int nlist;
|
struct buffer_head * bh;
|
struct buffer_head * bh;
|
|
|
for(nlist = 0; nlist < NR_LIST; nlist++) {
|
for(nlist = 0; nlist < NR_LIST; nlist++) {
|
bh = lru_list[nlist];
|
bh = lru_list[nlist];
|
for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
|
for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
|
if (bh->b_dev != dev)
|
if (bh->b_dev != dev)
|
continue;
|
continue;
|
wait_on_buffer(bh);
|
wait_on_buffer(bh);
|
if (bh->b_dev != dev)
|
if (bh->b_dev != dev)
|
continue;
|
continue;
|
if (bh->b_count)
|
if (bh->b_count)
|
continue;
|
continue;
|
if (bh->b_blocknr < block)
|
if (bh->b_blocknr < block)
|
continue;
|
continue;
|
if (bh->b_blocknr >= (block+count))
|
if (bh->b_blocknr >= (block+count))
|
continue;
|
continue;
|
bh->b_flushtime = 0;
|
bh->b_flushtime = 0;
|
clear_bit(BH_Protected, &bh->b_state);
|
clear_bit(BH_Protected, &bh->b_state);
|
clear_bit(BH_Uptodate, &bh->b_state);
|
clear_bit(BH_Uptodate, &bh->b_state);
|
clear_bit(BH_Dirty, &bh->b_state);
|
clear_bit(BH_Dirty, &bh->b_state);
|
clear_bit(BH_Req, &bh->b_state);
|
clear_bit(BH_Req, &bh->b_state);
|
}
|
}
|
}
|
}
|
}
|
}
|
|
|
#define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))&HASH_MASK)
|
#define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))&HASH_MASK)
|
#define hash(dev,block) hash_table[_hashfn(dev,block)]
|
#define hash(dev,block) hash_table[_hashfn(dev,block)]
|
|
|
static inline void remove_from_hash_queue(struct buffer_head * bh)
|
static inline void remove_from_hash_queue(struct buffer_head * bh)
|
{
|
{
|
if (bh->b_next)
|
if (bh->b_next)
|
bh->b_next->b_prev = bh->b_prev;
|
bh->b_next->b_prev = bh->b_prev;
|
if (bh->b_prev)
|
if (bh->b_prev)
|
bh->b_prev->b_next = bh->b_next;
|
bh->b_prev->b_next = bh->b_next;
|
if (hash(bh->b_dev,bh->b_blocknr) == bh)
|
if (hash(bh->b_dev,bh->b_blocknr) == bh)
|
hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
|
hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
|
bh->b_next = bh->b_prev = NULL;
|
bh->b_next = bh->b_prev = NULL;
|
}
|
}
|
|
|
static inline void remove_from_lru_list(struct buffer_head * bh)
|
static inline void remove_from_lru_list(struct buffer_head * bh)
|
{
|
{
|
if (!(bh->b_prev_free) || !(bh->b_next_free))
|
if (!(bh->b_prev_free) || !(bh->b_next_free))
|
panic("VFS: LRU block list corrupted");
|
panic("VFS: LRU block list corrupted");
|
if (bh->b_dev == B_FREE)
|
if (bh->b_dev == B_FREE)
|
panic("LRU list corrupted");
|
panic("LRU list corrupted");
|
bh->b_prev_free->b_next_free = bh->b_next_free;
|
bh->b_prev_free->b_next_free = bh->b_next_free;
|
bh->b_next_free->b_prev_free = bh->b_prev_free;
|
bh->b_next_free->b_prev_free = bh->b_prev_free;
|
|
|
if (lru_list[bh->b_list] == bh)
|
if (lru_list[bh->b_list] == bh)
|
lru_list[bh->b_list] = bh->b_next_free;
|
lru_list[bh->b_list] = bh->b_next_free;
|
if (lru_list[bh->b_list] == bh)
|
if (lru_list[bh->b_list] == bh)
|
lru_list[bh->b_list] = NULL;
|
lru_list[bh->b_list] = NULL;
|
bh->b_next_free = bh->b_prev_free = NULL;
|
bh->b_next_free = bh->b_prev_free = NULL;
|
}
|
}
|
|
|
static inline void remove_from_free_list(struct buffer_head * bh)
|
static inline void remove_from_free_list(struct buffer_head * bh)
|
{
|
{
|
int isize = BUFSIZE_INDEX(bh->b_size);
|
int isize = BUFSIZE_INDEX(bh->b_size);
|
if (!(bh->b_prev_free) || !(bh->b_next_free))
|
if (!(bh->b_prev_free) || !(bh->b_next_free))
|
panic("VFS: Free block list corrupted");
|
panic("VFS: Free block list corrupted");
|
if(bh->b_dev != B_FREE)
|
if(bh->b_dev != B_FREE)
|
panic("Free list corrupted");
|
panic("Free list corrupted");
|
if(!free_list[isize])
|
if(!free_list[isize])
|
panic("Free list empty");
|
panic("Free list empty");
|
if(bh->b_next_free == bh)
|
if(bh->b_next_free == bh)
|
free_list[isize] = NULL;
|
free_list[isize] = NULL;
|
else {
|
else {
|
bh->b_prev_free->b_next_free = bh->b_next_free;
|
bh->b_prev_free->b_next_free = bh->b_next_free;
|
bh->b_next_free->b_prev_free = bh->b_prev_free;
|
bh->b_next_free->b_prev_free = bh->b_prev_free;
|
if (free_list[isize] == bh)
|
if (free_list[isize] == bh)
|
free_list[isize] = bh->b_next_free;
|
free_list[isize] = bh->b_next_free;
|
}
|
}
|
bh->b_next_free = bh->b_prev_free = NULL;
|
bh->b_next_free = bh->b_prev_free = NULL;
|
}
|
}
|
|
|
static inline void remove_from_queues(struct buffer_head * bh)
|
static inline void remove_from_queues(struct buffer_head * bh)
|
{
|
{
|
if(bh->b_dev == B_FREE) {
|
if(bh->b_dev == B_FREE) {
|
remove_from_free_list(bh); /* Free list entries should not be
|
remove_from_free_list(bh); /* Free list entries should not be
|
in the hash queue */
|
in the hash queue */
|
return;
|
return;
|
}
|
}
|
nr_buffers_type[bh->b_list]--;
|
nr_buffers_type[bh->b_list]--;
|
remove_from_hash_queue(bh);
|
remove_from_hash_queue(bh);
|
remove_from_lru_list(bh);
|
remove_from_lru_list(bh);
|
}
|
}
|
|
|
static inline void put_last_lru(struct buffer_head * bh)
|
static inline void put_last_lru(struct buffer_head * bh)
|
{
|
{
|
if (!bh)
|
if (!bh)
|
return;
|
return;
|
if (bh == lru_list[bh->b_list]) {
|
if (bh == lru_list[bh->b_list]) {
|
lru_list[bh->b_list] = bh->b_next_free;
|
lru_list[bh->b_list] = bh->b_next_free;
|
return;
|
return;
|
}
|
}
|
if(bh->b_dev == B_FREE)
|
if(bh->b_dev == B_FREE)
|
panic("Wrong block for lru list");
|
panic("Wrong block for lru list");
|
remove_from_lru_list(bh);
|
remove_from_lru_list(bh);
|
/* add to back of free list */
|
/* add to back of free list */
|
|
|
if(!lru_list[bh->b_list]) {
|
if(!lru_list[bh->b_list]) {
|
lru_list[bh->b_list] = bh;
|
lru_list[bh->b_list] = bh;
|
lru_list[bh->b_list]->b_prev_free = bh;
|
lru_list[bh->b_list]->b_prev_free = bh;
|
}
|
}
|
|
|
bh->b_next_free = lru_list[bh->b_list];
|
bh->b_next_free = lru_list[bh->b_list];
|
bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
|
bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
|
lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
|
lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
|
lru_list[bh->b_list]->b_prev_free = bh;
|
lru_list[bh->b_list]->b_prev_free = bh;
|
}
|
}
|
|
|
static inline void put_last_free(struct buffer_head * bh)
|
static inline void put_last_free(struct buffer_head * bh)
|
{
|
{
|
int isize;
|
int isize;
|
if (!bh)
|
if (!bh)
|
return;
|
return;
|
|
|
isize = BUFSIZE_INDEX(bh->b_size);
|
isize = BUFSIZE_INDEX(bh->b_size);
|
bh->b_dev = B_FREE; /* So it is obvious we are on the free list */
|
bh->b_dev = B_FREE; /* So it is obvious we are on the free list */
|
/* add to back of free list */
|
/* add to back of free list */
|
if(!free_list[isize]) {
|
if(!free_list[isize]) {
|
free_list[isize] = bh;
|
free_list[isize] = bh;
|
bh->b_prev_free = bh;
|
bh->b_prev_free = bh;
|
}
|
}
|
|
|
bh->b_next_free = free_list[isize];
|
bh->b_next_free = free_list[isize];
|
bh->b_prev_free = free_list[isize]->b_prev_free;
|
bh->b_prev_free = free_list[isize]->b_prev_free;
|
free_list[isize]->b_prev_free->b_next_free = bh;
|
free_list[isize]->b_prev_free->b_next_free = bh;
|
free_list[isize]->b_prev_free = bh;
|
free_list[isize]->b_prev_free = bh;
|
}
|
}
|
|
|
static inline void insert_into_queues(struct buffer_head * bh)
|
static inline void insert_into_queues(struct buffer_head * bh)
|
{
|
{
|
/* put at end of free list */
|
/* put at end of free list */
|
if(bh->b_dev == B_FREE) {
|
if(bh->b_dev == B_FREE) {
|
put_last_free(bh);
|
put_last_free(bh);
|
return;
|
return;
|
}
|
}
|
if(!lru_list[bh->b_list]) {
|
if(!lru_list[bh->b_list]) {
|
lru_list[bh->b_list] = bh;
|
lru_list[bh->b_list] = bh;
|
bh->b_prev_free = bh;
|
bh->b_prev_free = bh;
|
}
|
}
|
|
|
if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
|
if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
|
bh->b_next_free = lru_list[bh->b_list];
|
bh->b_next_free = lru_list[bh->b_list];
|
bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
|
bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
|
lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
|
lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
|
lru_list[bh->b_list]->b_prev_free = bh;
|
lru_list[bh->b_list]->b_prev_free = bh;
|
nr_buffers_type[bh->b_list]++;
|
nr_buffers_type[bh->b_list]++;
|
/* put the buffer in new hash-queue if it has a device */
|
/* put the buffer in new hash-queue if it has a device */
|
bh->b_prev = NULL;
|
bh->b_prev = NULL;
|
bh->b_next = NULL;
|
bh->b_next = NULL;
|
if (!(bh->b_dev))
|
if (!(bh->b_dev))
|
return;
|
return;
|
bh->b_next = hash(bh->b_dev,bh->b_blocknr);
|
bh->b_next = hash(bh->b_dev,bh->b_blocknr);
|
hash(bh->b_dev,bh->b_blocknr) = bh;
|
hash(bh->b_dev,bh->b_blocknr) = bh;
|
if (bh->b_next)
|
if (bh->b_next)
|
bh->b_next->b_prev = bh;
|
bh->b_next->b_prev = bh;
|
}
|
}
|
|
|
static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size)
|
static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size)
|
{
|
{
|
struct buffer_head * tmp;
|
struct buffer_head * tmp;
|
|
|
for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
|
for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
|
if (tmp->b_blocknr == block && tmp->b_dev == dev)
|
if (tmp->b_blocknr == block && tmp->b_dev == dev)
|
if (tmp->b_size == size)
|
if (tmp->b_size == size)
|
return tmp;
|
return tmp;
|
else {
|
else {
|
printk("VFS: Wrong blocksize on device %s\n",
|
printk("VFS: Wrong blocksize on device %s\n",
|
kdevname(dev));
|
kdevname(dev));
|
return NULL;
|
return NULL;
|
}
|
}
|
return NULL;
|
return NULL;
|
}
|
}
|
|
|
struct buffer_head *efind_buffer(kdev_t dev, int block, int size)
|
struct buffer_head *efind_buffer(kdev_t dev, int block, int size)
|
{
|
{
|
return find_buffer(dev, block, size);
|
return find_buffer(dev, block, size);
|
}
|
}
|
|
|
/*
|
/*
|
* Why like this, I hear you say... The reason is race-conditions.
|
* Why like this, I hear you say... The reason is race-conditions.
|
* As we don't lock buffers (unless we are reading them, that is),
|
* As we don't lock buffers (unless we are reading them, that is),
|
* something might happen to it while we sleep (ie a read-error
|
* something might happen to it while we sleep (ie a read-error
|
* will force it bad). This shouldn't really happen currently, but
|
* will force it bad). This shouldn't really happen currently, but
|
* the code is ready.
|
* the code is ready.
|
*/
|
*/
|
struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
|
struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
|
{
|
{
|
struct buffer_head * bh;
|
struct buffer_head * bh;
|
|
|
for (;;) {
|
for (;;) {
|
if (!(bh=find_buffer(dev,block,size)))
|
if (!(bh=find_buffer(dev,block,size)))
|
return NULL;
|
return NULL;
|
bh->b_count++;
|
bh->b_count++;
|
wait_on_buffer(bh);
|
wait_on_buffer(bh);
|
if (bh->b_dev == dev && bh->b_blocknr == block
|
if (bh->b_dev == dev && bh->b_blocknr == block
|
&& bh->b_size == size)
|
&& bh->b_size == size)
|
return bh;
|
return bh;
|
bh->b_count--;
|
bh->b_count--;
|
}
|
}
|
}
|
}
|
|
|
void set_blocksize(kdev_t dev, int size)
|
void set_blocksize(kdev_t dev, int size)
|
{
|
{
|
extern int *blksize_size[];
|
extern int *blksize_size[];
|
int i, nlist;
|
int i, nlist;
|
struct buffer_head * bh, *bhnext;
|
struct buffer_head * bh, *bhnext;
|
|
|
if (!blksize_size[MAJOR(dev)])
|
if (!blksize_size[MAJOR(dev)])
|
return;
|
return;
|
|
|
if (size > PAGE_SIZE)
|
if (size > PAGE_SIZE)
|
size = 0;
|
size = 0;
|
|
|
switch (size) {
|
switch (size) {
|
default: panic("Invalid blocksize passed to set_blocksize");
|
default: panic("Invalid blocksize passed to set_blocksize");
|
case 512: case 1024: case 2048: case 4096: case 8192: ;
|
case 512: case 1024: case 2048: case 4096: case 8192: ;
|
}
|
}
|
|
|
if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
|
if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
|
blksize_size[MAJOR(dev)][MINOR(dev)] = size;
|
blksize_size[MAJOR(dev)][MINOR(dev)] = size;
|
return;
|
return;
|
}
|
}
|
if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
|
if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
|
return;
|
return;
|
sync_buffers(dev, 2);
|
sync_buffers(dev, 2);
|
blksize_size[MAJOR(dev)][MINOR(dev)] = size;
|
blksize_size[MAJOR(dev)][MINOR(dev)] = size;
|
|
|
/* We need to be quite careful how we do this - we are moving entries
|
/* We need to be quite careful how we do this - we are moving entries
|
around on the free list, and we can get in a loop if we are not careful.*/
|
around on the free list, and we can get in a loop if we are not careful.*/
|
|
|
for(nlist = 0; nlist < NR_LIST; nlist++) {
|
for(nlist = 0; nlist < NR_LIST; nlist++) {
|
bh = lru_list[nlist];
|
bh = lru_list[nlist];
|
for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
|
for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
|
if(!bh) break;
|
if(!bh) break;
|
bhnext = bh->b_next_free;
|
bhnext = bh->b_next_free;
|
if (bh->b_dev != dev)
|
if (bh->b_dev != dev)
|
continue;
|
continue;
|
if (bh->b_size == size)
|
if (bh->b_size == size)
|
continue;
|
continue;
|
|
|
bhnext->b_count++;
|
bhnext->b_count++;
|
wait_on_buffer(bh);
|
wait_on_buffer(bh);
|
bhnext->b_count--;
|
bhnext->b_count--;
|
if (bh->b_dev == dev && bh->b_size != size) {
|
if (bh->b_dev == dev && bh->b_size != size) {
|
clear_bit(BH_Dirty, &bh->b_state);
|
clear_bit(BH_Dirty, &bh->b_state);
|
clear_bit(BH_Uptodate, &bh->b_state);
|
clear_bit(BH_Uptodate, &bh->b_state);
|
clear_bit(BH_Req, &bh->b_state);
|
clear_bit(BH_Req, &bh->b_state);
|
bh->b_flushtime = 0;
|
bh->b_flushtime = 0;
|
}
|
}
|
remove_from_hash_queue(bh);
|
remove_from_hash_queue(bh);
|
}
|
}
|
}
|
}
|
}
|
}
|
|
|
|
|
/* check if a buffer is OK to be reclaimed */
|
/* check if a buffer is OK to be reclaimed */
|
static inline int can_reclaim(struct buffer_head *bh, int size)
|
static inline int can_reclaim(struct buffer_head *bh, int size)
|
{
|
{
|
if (bh->b_count ||
|
if (bh->b_count ||
|
buffer_protected(bh) ||
|
buffer_protected(bh) ||
|
buffer_locked(bh) ||
|
buffer_locked(bh) ||
|
mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
|
mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
|
buffer_dirty(bh))
|
buffer_dirty(bh))
|
return 0;
|
return 0;
|
|
|
if (bh->b_size != size)
|
if (bh->b_size != size)
|
return 0;
|
return 0;
|
|
|
return 1;
|
return 1;
|
}
|
}
|
|
|
/* find a candidate buffer to be reclaimed */
|
/* find a candidate buffer to be reclaimed */
|
static struct buffer_head *find_candidate(struct buffer_head *bh,
|
static struct buffer_head *find_candidate(struct buffer_head *bh,
|
int *list_len, int size)
|
int *list_len, int size)
|
{
|
{
|
int lookahead = 7;
|
int lookahead = 7;
|
|
|
if (!bh)
|
if (!bh)
|
goto no_candidate;
|
goto no_candidate;
|
|
|
for (; (*list_len) > 0; bh = bh->b_next_free, (*list_len)--) {
|
for (; (*list_len) > 0; bh = bh->b_next_free, (*list_len)--) {
|
if (size != bh->b_size) {
|
if (size != bh->b_size) {
|
/* this provides a mechanism for freeing blocks
|
/* this provides a mechanism for freeing blocks
|
of other sizes, this is necessary now that we
|
of other sizes, this is necessary now that we
|
no longer have the lav code. */
|
no longer have the lav code. */
|
try_to_free_buffer(bh,&bh,1);
|
try_to_free_buffer(bh,&bh,1);
|
if (!bh)
|
if (!bh)
|
break;
|
break;
|
lookahead = 7;
|
lookahead = 7;
|
continue;
|
continue;
|
}
|
}
|
else if (buffer_locked(bh) &&
|
else if (buffer_locked(bh) &&
|
(bh->b_list == BUF_LOCKED || bh->b_list == BUF_LOCKED1)) {
|
(bh->b_list == BUF_LOCKED || bh->b_list == BUF_LOCKED1)) {
|
if (!--lookahead) {
|
if (!--lookahead) {
|
(*list_len) = 0;
|
(*list_len) = 0;
|
goto no_candidate;
|
goto no_candidate;
|
}
|
}
|
}
|
}
|
else if (can_reclaim(bh,size))
|
else if (can_reclaim(bh,size))
|
return bh;
|
return bh;
|
}
|
}
|
|
|
no_candidate:
|
no_candidate:
|
return NULL;
|
return NULL;
|
}
|
}
|
|
|
static void put_unused_buffer_head(struct buffer_head * bh)
|
static void put_unused_buffer_head(struct buffer_head * bh)
|
{
|
{
|
if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) {
|
if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) {
|
nr_buffer_heads--;
|
nr_buffer_heads--;
|
kfree(bh);
|
kfree(bh);
|
return;
|
return;
|
}
|
}
|
memset(bh,0,sizeof(*bh));
|
memset(bh,0,sizeof(*bh));
|
nr_unused_buffer_heads++;
|
nr_unused_buffer_heads++;
|
bh->b_next_free = unused_list;
|
bh->b_next_free = unused_list;
|
unused_list = bh;
|
unused_list = bh;
|
if (!waitqueue_active(&buffer_wait))
|
if (!waitqueue_active(&buffer_wait))
|
return;
|
return;
|
wake_up(&buffer_wait);
|
wake_up(&buffer_wait);
|
}
|
}
|
|
|
/*
|
/*
|
* We can't put completed temporary IO buffer_heads directly onto the
|
* We can't put completed temporary IO buffer_heads directly onto the
|
* unused_list when they become unlocked, since the device driver
|
* unused_list when they become unlocked, since the device driver
|
* end_request routines still expect access to the buffer_head's
|
* end_request routines still expect access to the buffer_head's
|
* fields after the final unlock. So, the device driver puts them on
|
* fields after the final unlock. So, the device driver puts them on
|
* the reuse_list instead once IO completes, and we recover these to
|
* the reuse_list instead once IO completes, and we recover these to
|
* the unused_list here.
|
* the unused_list here.
|
*
|
*
|
* The reuse_list receives buffers from interrupt routines, so we need
|
* The reuse_list receives buffers from interrupt routines, so we need
|
* to be IRQ-safe here (but note that interrupts only _add_ to the
|
* to be IRQ-safe here (but note that interrupts only _add_ to the
|
* reuse_list, never take away. So we don't need to worry about the
|
* reuse_list, never take away. So we don't need to worry about the
|
* reuse_list magically emptying).
|
* reuse_list magically emptying).
|
*/
|
*/
|
static inline void recover_reusable_buffer_heads(void)
|
static inline void recover_reusable_buffer_heads(void)
|
{
|
{
|
if (reuse_list) {
|
if (reuse_list) {
|
struct buffer_head *head;
|
struct buffer_head *head;
|
|
|
head = xchg(&reuse_list, NULL);
|
head = xchg(&reuse_list, NULL);
|
|
|
do {
|
do {
|
struct buffer_head *bh = head;
|
struct buffer_head *bh = head;
|
head = head->b_next_free;
|
head = head->b_next_free;
|
put_unused_buffer_head(bh);
|
put_unused_buffer_head(bh);
|
} while (head);
|
} while (head);
|
}
|
}
|
}
|
}
|
|
|
extern void allow_interrupts(void);
|
extern void allow_interrupts(void);
|
|
|
static void refill_freelist(int size)
|
static void refill_freelist(int size)
|
{
|
{
|
struct buffer_head * bh;
|
struct buffer_head * bh;
|
struct buffer_head * candidate[BUF_DIRTY];
|
struct buffer_head * candidate[BUF_DIRTY];
|
extern struct task_struct *bdflush_tsk;
|
extern struct task_struct *bdflush_tsk;
|
unsigned int best_time, winner;
|
unsigned int best_time, winner;
|
int buffers[BUF_DIRTY];
|
int buffers[BUF_DIRTY];
|
int i, limit = ((min_free_pages + free_pages_low) >> 1);
|
int i, limit = ((min_free_pages + free_pages_low) >> 1);
|
int needed;
|
int needed;
|
|
|
refilled = 1;
|
refilled = 1;
|
/* If there are too many dirty buffers, we wake up the update process
|
/* If there are too many dirty buffers, we wake up the update process
|
now so as to ensure that there are still clean buffers available
|
now so as to ensure that there are still clean buffers available
|
for user processes to use (and dirty) */
|
for user processes to use (and dirty) */
|
|
|
if (nr_buffers_type[BUF_DIRTY] > nr_buffers * bdf_prm.b_un.nfract/100)
|
if (nr_buffers_type[BUF_DIRTY] > nr_buffers * bdf_prm.b_un.nfract/100)
|
wakeup_bdflush(1);
|
wakeup_bdflush(1);
|
|
|
/* We are going to try to locate this much memory */
|
/* We are going to try to locate this much memory */
|
needed = bdf_prm.b_un.nrefill * size;
|
needed = bdf_prm.b_un.nrefill * size;
|
|
|
while (nr_free_pages > min_free_pages*2 && needed > 0 &&
|
while (nr_free_pages > min_free_pages*2 && needed > 0 &&
|
grow_buffers(GFP_BUFFER, size)) {
|
grow_buffers(GFP_BUFFER, size)) {
|
needed -= PAGE_SIZE;
|
needed -= PAGE_SIZE;
|
}
|
}
|
|
|
repeat:
|
repeat:
|
allow_interrupts();
|
allow_interrupts();
|
recover_reusable_buffer_heads();
|
recover_reusable_buffer_heads();
|
if(needed <= 0)
|
if(needed <= 0)
|
return;
|
return;
|
|
|
/* OK, we cannot grow the buffer cache, now try to get some
|
/* OK, we cannot grow the buffer cache, now try to get some
|
from the lru list */
|
from the lru list */
|
|
|
/* First set the candidate pointers to usable buffers. This
|
/* First set the candidate pointers to usable buffers. This
|
should be quick nearly all of the time. */
|
should be quick nearly all of the time. */
|
|
|
for(i=0; i<BUF_DIRTY; i++){
|
for(i=0; i<BUF_DIRTY; i++){
|
buffers[i] = nr_buffers_type[i];
|
buffers[i] = nr_buffers_type[i];
|
candidate[i] = find_candidate(lru_list[i], &buffers[i], size);
|
candidate[i] = find_candidate(lru_list[i], &buffers[i], size);
|
}
|
}
|
|
|
/* Now see which candidate wins the election */
|
/* Now see which candidate wins the election */
|
|
|
winner = best_time = UINT_MAX;
|
winner = best_time = UINT_MAX;
|
for(i=0; i<BUF_DIRTY; i++){
|
for(i=0; i<BUF_DIRTY; i++){
|
if(!candidate[i]) continue;
|
if(!candidate[i]) continue;
|
if(candidate[i]->b_lru_time < best_time){
|
if(candidate[i]->b_lru_time < best_time){
|
best_time = candidate[i]->b_lru_time;
|
best_time = candidate[i]->b_lru_time;
|
winner = i;
|
winner = i;
|
}
|
}
|
}
|
}
|
|
|
/* If we have a winner, use it, and then get a new candidate from that list */
|
/* If we have a winner, use it, and then get a new candidate from that list */
|
if(winner != UINT_MAX) {
|
if(winner != UINT_MAX) {
|
i = winner;
|
i = winner;
|
while (needed>0 && (bh=candidate[i])) {
|
while (needed>0 && (bh=candidate[i])) {
|
candidate[i] = bh->b_next_free;
|
candidate[i] = bh->b_next_free;
|
if(candidate[i] == bh) candidate[i] = NULL; /* Got last one */
|
if(candidate[i] == bh) candidate[i] = NULL; /* Got last one */
|
remove_from_queues(bh);
|
remove_from_queues(bh);
|
bh->b_dev = B_FREE;
|
bh->b_dev = B_FREE;
|
put_last_free(bh);
|
put_last_free(bh);
|
needed -= bh->b_size;
|
needed -= bh->b_size;
|
buffers[i]--;
|
buffers[i]--;
|
if(buffers[i] == 0) candidate[i] = NULL;
|
if(buffers[i] == 0) candidate[i] = NULL;
|
|
|
if (candidate[i] && !can_reclaim(candidate[i],size))
|
if (candidate[i] && !can_reclaim(candidate[i],size))
|
candidate[i] = find_candidate(candidate[i],&buffers[i], size);
|
candidate[i] = find_candidate(candidate[i],&buffers[i], size);
|
}
|
}
|
goto repeat;
|
goto repeat;
|
}
|
}
|
|
|
/* Too bad, that was not enough. Try a little harder to grow some. */
|
/* Too bad, that was not enough. Try a little harder to grow some. */
|
|
|
if (nr_free_pages > limit) {
|
if (nr_free_pages > limit) {
|
if (grow_buffers(GFP_BUFFER, size)) {
|
if (grow_buffers(GFP_BUFFER, size)) {
|
needed -= PAGE_SIZE;
|
needed -= PAGE_SIZE;
|
goto repeat;
|
goto repeat;
|
};
|
};
|
}
|
}
|
|
|
/* If we are not bdflush we should wake up bdflush and try it again. */
|
/* If we are not bdflush we should wake up bdflush and try it again. */
|
|
|
if (current != bdflush_tsk &&
|
if (current != bdflush_tsk &&
|
(buffermem >> PAGE_SHIFT) > (MAP_NR(high_memory) >> 2) &&
|
(buffermem >> PAGE_SHIFT) > (MAP_NR(high_memory) >> 2) &&
|
nr_buffers_type[BUF_DIRTY] > bdf_prm.b_un.nref_dirt) {
|
nr_buffers_type[BUF_DIRTY] > bdf_prm.b_un.nref_dirt) {
|
wakeup_bdflush(1);
|
wakeup_bdflush(1);
|
needed -= PAGE_SIZE;
|
needed -= PAGE_SIZE;
|
goto repeat;
|
goto repeat;
|
}
|
}
|
|
|
/*
|
/*
|
* In order to protect our reserved pages,
|
* In order to protect our reserved pages,
|
* return now if we got any buffers.
|
* return now if we got any buffers.
|
*/
|
*/
|
allow_interrupts();
|
allow_interrupts();
|
if (free_list[BUFSIZE_INDEX(size)])
|
if (free_list[BUFSIZE_INDEX(size)])
|
return;
|
return;
|
|
|
/* and repeat until we find something good */
|
/* and repeat until we find something good */
|
i = grow_buffers(GFP_BUFFER, size);
|
i = grow_buffers(GFP_BUFFER, size);
|
|
|
if (current != bdflush_tsk && !i && nr_buffers_type[BUF_DIRTY] > 0)
|
if (current != bdflush_tsk && !i && nr_buffers_type[BUF_DIRTY] > 0)
|
wakeup_bdflush(1);
|
wakeup_bdflush(1);
|
else if (!i)
|
else if (!i)
|
grow_buffers(GFP_IO, size);
|
grow_buffers(GFP_IO, size);
|
|
|
/* decrease needed even if there is no success */
|
/* decrease needed even if there is no success */
|
needed -= PAGE_SIZE;
|
needed -= PAGE_SIZE;
|
goto repeat;
|
goto repeat;
|
}
|
}
|
|
|
/*
|
/*
|
* Ok, this is getblk, and it isn't very clear, again to hinder
|
* Ok, this is getblk, and it isn't very clear, again to hinder
|
* race-conditions. Most of the code is seldom used, (ie repeating),
|
* race-conditions. Most of the code is seldom used, (ie repeating),
|
* so it should be much more efficient than it looks.
|
* so it should be much more efficient than it looks.
|
*
|
*
|
* The algorithm is changed: hopefully better, and an elusive bug removed.
|
* The algorithm is changed: hopefully better, and an elusive bug removed.
|
*
|
*
|
* 14.02.92: changed it to sync dirty buffers a bit: better performance
|
* 14.02.92: changed it to sync dirty buffers a bit: better performance
|
* when the filesystem starts to get full of dirty blocks (I hope).
|
* when the filesystem starts to get full of dirty blocks (I hope).
|
*/
|
*/
|
struct buffer_head * getblk(kdev_t dev, int block, int size)
|
struct buffer_head * getblk(kdev_t dev, int block, int size)
|
{
|
{
|
struct buffer_head * bh;
|
struct buffer_head * bh;
|
int isize = BUFSIZE_INDEX(size);
|
int isize = BUFSIZE_INDEX(size);
|
|
|
/* If there are too many dirty buffers, we wake up the update process
|
/* If there are too many dirty buffers, we wake up the update process
|
now so as to ensure that there are still clean buffers available
|
now so as to ensure that there are still clean buffers available
|
for user processes to use (and dirty) */
|
for user processes to use (and dirty) */
|
repeat:
|
repeat:
|
allow_interrupts();
|
allow_interrupts();
|
bh = get_hash_table(dev, block, size);
|
bh = get_hash_table(dev, block, size);
|
if (bh) {
|
if (bh) {
|
if (!buffer_dirty(bh)) {
|
if (!buffer_dirty(bh)) {
|
if (buffer_uptodate(bh))
|
if (buffer_uptodate(bh))
|
put_last_lru(bh);
|
put_last_lru(bh);
|
bh->b_flushtime = 0;
|
bh->b_flushtime = 0;
|
}
|
}
|
set_bit(BH_Touched, &bh->b_state);
|
set_bit(BH_Touched, &bh->b_state);
|
return bh;
|
return bh;
|
}
|
}
|
|
|
get_free:
|
get_free:
|
bh = free_list[isize];
|
bh = free_list[isize];
|
if (!bh)
|
if (!bh)
|
goto refill;
|
goto refill;
|
remove_from_free_list(bh);
|
remove_from_free_list(bh);
|
|
|
/* OK, FINALLY we know that this buffer is the only one of its kind,
|
/* OK, FINALLY we know that this buffer is the only one of its kind,
|
* and that it's unused (b_count=0), unlocked (buffer_locked=0),
|
* and that it's unused (b_count=0), unlocked (buffer_locked=0),
|
* and clean */
|
* and clean */
|
bh->b_count=1;
|
bh->b_count=1;
|
bh->b_list=BUF_CLEAN;
|
bh->b_list=BUF_CLEAN;
|
bh->b_flushtime=0;
|
bh->b_flushtime=0;
|
bh->b_state=(1<<BH_Touched);
|
bh->b_state=(1<<BH_Touched);
|
bh->b_dev=dev;
|
bh->b_dev=dev;
|
bh->b_blocknr=block;
|
bh->b_blocknr=block;
|
insert_into_queues(bh);
|
insert_into_queues(bh);
|
return bh;
|
return bh;
|
|
|
refill:
|
refill:
|
allow_interrupts();
|
allow_interrupts();
|
refill_freelist(size);
|
refill_freelist(size);
|
if (!find_buffer(dev,block,size))
|
if (!find_buffer(dev,block,size))
|
goto get_free;
|
goto get_free;
|
goto repeat;
|
goto repeat;
|
}
|
}
|
|
|
void set_writetime(struct buffer_head * buf, int flag)
|
void set_writetime(struct buffer_head * buf, int flag)
|
{
|
{
|
int newtime;
|
int newtime;
|
|
|
if (buffer_dirty(buf)) {
|
if (buffer_dirty(buf)) {
|
/* Move buffer to dirty list if jiffies is clear */
|
/* Move buffer to dirty list if jiffies is clear */
|
newtime = jiffies + (flag ? bdf_prm.b_un.age_super :
|
newtime = jiffies + (flag ? bdf_prm.b_un.age_super :
|
bdf_prm.b_un.age_buffer);
|
bdf_prm.b_un.age_buffer);
|
if(!buf->b_flushtime || buf->b_flushtime > newtime)
|
if(!buf->b_flushtime || buf->b_flushtime > newtime)
|
buf->b_flushtime = newtime;
|
buf->b_flushtime = newtime;
|
} else {
|
} else {
|
buf->b_flushtime = 0;
|
buf->b_flushtime = 0;
|
}
|
}
|
}
|
}
|
|
|
|
|
/*
|
/*
|
* A buffer may need to be moved from one buffer list to another
|
* A buffer may need to be moved from one buffer list to another
|
* (e.g. in case it is not shared any more). Handle this.
|
* (e.g. in case it is not shared any more). Handle this.
|
*/
|
*/
|
void refile_buffer(struct buffer_head * buf)
|
void refile_buffer(struct buffer_head * buf)
|
{
|
{
|
int dispose;
|
int dispose;
|
|
|
if(buf->b_dev == B_FREE) {
|
if(buf->b_dev == B_FREE) {
|
printk("Attempt to refile free buffer\n");
|
printk("Attempt to refile free buffer\n");
|
return;
|
return;
|
}
|
}
|
if (buffer_dirty(buf))
|
if (buffer_dirty(buf))
|
dispose = BUF_DIRTY;
|
dispose = BUF_DIRTY;
|
else if (buffer_locked(buf))
|
else if (buffer_locked(buf))
|
dispose = BUF_LOCKED;
|
dispose = BUF_LOCKED;
|
else
|
else
|
dispose = BUF_CLEAN;
|
dispose = BUF_CLEAN;
|
if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
|
if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
|
if(dispose != buf->b_list) {
|
if(dispose != buf->b_list) {
|
if(dispose == BUF_DIRTY)
|
if(dispose == BUF_DIRTY)
|
buf->b_lru_time = jiffies;
|
buf->b_lru_time = jiffies;
|
if(dispose == BUF_LOCKED &&
|
if(dispose == BUF_LOCKED &&
|
(buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
|
(buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
|
dispose = BUF_LOCKED1;
|
dispose = BUF_LOCKED1;
|
remove_from_queues(buf);
|
remove_from_queues(buf);
|
buf->b_list = dispose;
|
buf->b_list = dispose;
|
#ifdef CONFIG_REDUCED_MEMORY
|
#ifdef CONFIG_REDUCED_MEMORY
|
// TJK: try to prevent the BUF_CLEAN lru_list from growing
|
// TJK: try to prevent the BUF_CLEAN lru_list from growing
|
// too much.
|
// too much.
|
if (nr_buffers_type[BUF_CLEAN] > MAX_CLEAN_BUFFERS) {
|
if (nr_buffers_type[BUF_CLEAN] > MAX_CLEAN_BUFFERS) {
|
wakeup_bdflush(0);
|
wakeup_bdflush(0);
|
}
|
}
|
// end TJK
|
// end TJK
|
#endif
|
#endif
|
insert_into_queues(buf);
|
insert_into_queues(buf);
|
if (dispose == BUF_DIRTY) {
|
if (dispose == BUF_DIRTY) {
|
/* This buffer is dirty, maybe we need to start flushing. */
|
/* This buffer is dirty, maybe we need to start flushing. */
|
/* If too high a percentage of the buffers are dirty... */
|
/* If too high a percentage of the buffers are dirty... */
|
if (nr_buffers_type[BUF_DIRTY] > nr_buffers * bdf_prm.b_un.nfract/100)
|
if (nr_buffers_type[BUF_DIRTY] > nr_buffers * bdf_prm.b_un.nfract/100)
|
wakeup_bdflush(0);
|
wakeup_bdflush(0);
|
/* If this is a loop device, and
|
/* If this is a loop device, and
|
* more than half of the buffers are dirty... */
|
* more than half of the buffers are dirty... */
|
/* (Prevents no-free-buffers deadlock with loop device.) */
|
/* (Prevents no-free-buffers deadlock with loop device.) */
|
if (MAJOR(buf->b_dev) == LOOP_MAJOR &&
|
if (MAJOR(buf->b_dev) == LOOP_MAJOR &&
|
nr_buffers_type[BUF_DIRTY]*2>nr_buffers)
|
nr_buffers_type[BUF_DIRTY]*2>nr_buffers)
|
wakeup_bdflush(1);
|
wakeup_bdflush(1);
|
}
|
}
|
}
|
}
|
}
|
}
|
|
|
/*
|
/*
|
* Release a buffer head
|
* Release a buffer head
|
*/
|
*/
|
void __brelse(struct buffer_head * buf)
|
void __brelse(struct buffer_head * buf)
|
{
|
{
|
wait_on_buffer(buf);
|
wait_on_buffer(buf);
|
|
|
/* If dirty, mark the time this buffer should be written back */
|
/* If dirty, mark the time this buffer should be written back */
|
set_writetime(buf, 0);
|
set_writetime(buf, 0);
|
refile_buffer(buf);
|
refile_buffer(buf);
|
|
|
if (buf->b_count) {
|
if (buf->b_count) {
|
buf->b_count--;
|
buf->b_count--;
|
return;
|
return;
|
}
|
}
|
printk("VFS: brelse: Trying to free free buffer\n");
|
printk("VFS: brelse: Trying to free free buffer\n");
|
}
|
}
|
|
|
/*
|
/*
|
* bforget() is like brelse(), except it removes the buffer
|
* bforget() is like brelse(), except it removes the buffer
|
* from the hash-queues (so that it won't be re-used if it's
|
* from the hash-queues (so that it won't be re-used if it's
|
* shared).
|
* shared).
|
*/
|
*/
|
void __bforget(struct buffer_head * buf)
|
void __bforget(struct buffer_head * buf)
|
{
|
{
|
wait_on_buffer(buf);
|
wait_on_buffer(buf);
|
mark_buffer_clean(buf);
|
mark_buffer_clean(buf);
|
clear_bit(BH_Protected, &buf->b_state);
|
clear_bit(BH_Protected, &buf->b_state);
|
buf->b_count--;
|
buf->b_count--;
|
remove_from_hash_queue(buf);
|
remove_from_hash_queue(buf);
|
buf->b_dev = NODEV;
|
buf->b_dev = NODEV;
|
refile_buffer(buf);
|
refile_buffer(buf);
|
}
|
}
|
|
|
/*
|
/*
|
* bread() reads a specified block and returns the buffer that contains
|
* bread() reads a specified block and returns the buffer that contains
|
* it. It returns NULL if the block was unreadable.
|
* it. It returns NULL if the block was unreadable.
|
*/
|
*/
|
struct buffer_head * bread(kdev_t dev, int block, int size)
|
struct buffer_head * bread(kdev_t dev, int block, int size)
|
{
|
{
|
struct buffer_head * bh;
|
struct buffer_head * bh;
|
|
|
if (!(bh = getblk(dev, block, size))) {
|
if (!(bh = getblk(dev, block, size))) {
|
printk("VFS: bread: impossible error\n");
|
printk("VFS: bread: impossible error\n");
|
return NULL;
|
return NULL;
|
}
|
}
|
if (buffer_uptodate(bh))
|
if (buffer_uptodate(bh))
|
return bh;
|
return bh;
|
ll_rw_block(READ, 1, &bh);
|
ll_rw_block(READ, 1, &bh);
|
wait_on_buffer(bh);
|
wait_on_buffer(bh);
|
if (buffer_uptodate(bh))
|
if (buffer_uptodate(bh))
|
return bh;
|
return bh;
|
brelse(bh);
|
brelse(bh);
|
return NULL;
|
return NULL;
|
}
|
}
|
|
|
/*
|
/*
|
* Ok, breada can be used as bread, but additionally to mark other
|
* Ok, breada can be used as bread, but additionally to mark other
|
* blocks for reading as well. End the argument list with a negative
|
* blocks for reading as well. End the argument list with a negative
|
* number.
|
* number.
|
*/
|
*/
|
|
|
#define NBUF 16
|
#define NBUF 16
|
|
|
struct buffer_head * breada(kdev_t dev, int block, int bufsize,
|
struct buffer_head * breada(kdev_t dev, int block, int bufsize,
|
unsigned int pos, unsigned int filesize)
|
unsigned int pos, unsigned int filesize)
|
{
|
{
|
struct buffer_head * bhlist[NBUF];
|
struct buffer_head * bhlist[NBUF];
|
unsigned int blocks;
|
unsigned int blocks;
|
struct buffer_head * bh;
|
struct buffer_head * bh;
|
int index;
|
int index;
|
int i, j;
|
int i, j;
|
|
|
if (pos >= filesize)
|
if (pos >= filesize)
|
return NULL;
|
return NULL;
|
|
|
if (block < 0 || !(bh = getblk(dev,block,bufsize)))
|
if (block < 0 || !(bh = getblk(dev,block,bufsize)))
|
return NULL;
|
return NULL;
|
|
|
index = BUFSIZE_INDEX(bh->b_size);
|
index = BUFSIZE_INDEX(bh->b_size);
|
|
|
if (buffer_uptodate(bh))
|
if (buffer_uptodate(bh))
|
return(bh);
|
return(bh);
|
else ll_rw_block(READ, 1, &bh);
|
else ll_rw_block(READ, 1, &bh);
|
|
|
blocks = (filesize - pos) >> (9+index);
|
blocks = (filesize - pos) >> (9+index);
|
|
|
if (blocks < (read_ahead[MAJOR(dev)] >> index))
|
if (blocks < (read_ahead[MAJOR(dev)] >> index))
|
blocks = read_ahead[MAJOR(dev)] >> index;
|
blocks = read_ahead[MAJOR(dev)] >> index;
|
if (blocks > NBUF)
|
if (blocks > NBUF)
|
blocks = NBUF;
|
blocks = NBUF;
|
|
|
/* if (blocks) printk("breada (new) %d blocks\n",blocks); */
|
/* if (blocks) printk("breada (new) %d blocks\n",blocks); */
|
|
|
|
|
bhlist[0] = bh;
|
bhlist[0] = bh;
|
j = 1;
|
j = 1;
|
for(i=1; i<blocks; i++) {
|
for(i=1; i<blocks; i++) {
|
bh = getblk(dev,block+i,bufsize);
|
bh = getblk(dev,block+i,bufsize);
|
if (buffer_uptodate(bh)) {
|
if (buffer_uptodate(bh)) {
|
brelse(bh);
|
brelse(bh);
|
break;
|
break;
|
}
|
}
|
else bhlist[j++] = bh;
|
else bhlist[j++] = bh;
|
}
|
}
|
|
|
/* Request the read for these buffers, and then release them */
|
/* Request the read for these buffers, and then release them */
|
if (j>1)
|
if (j>1)
|
ll_rw_block(READA, (j-1), bhlist+1);
|
ll_rw_block(READA, (j-1), bhlist+1);
|
for(i=1; i<j; i++)
|
for(i=1; i<j; i++)
|
brelse(bhlist[i]);
|
brelse(bhlist[i]);
|
|
|
/* Wait for this buffer, and then continue on */
|
/* Wait for this buffer, and then continue on */
|
bh = bhlist[0];
|
bh = bhlist[0];
|
wait_on_buffer(bh);
|
wait_on_buffer(bh);
|
if (buffer_uptodate(bh))
|
if (buffer_uptodate(bh))
|
return bh;
|
return bh;
|
brelse(bh);
|
brelse(bh);
|
return NULL;
|
return NULL;
|
}
|
}
|
|
|
static void get_more_buffer_heads(void)
|
static void get_more_buffer_heads(void)
|
{
|
{
|
struct wait_queue wait = { current, NULL };
|
struct wait_queue wait = { current, NULL };
|
struct buffer_head * bh;
|
struct buffer_head * bh;
|
|
|
while (!unused_list) {
|
while (!unused_list) {
|
/*
|
/*
|
* This is critical. We can't swap out pages to get
|
* This is critical. We can't swap out pages to get
|
* more buffer heads, because the swap-out may need
|
* more buffer heads, because the swap-out may need
|
* more buffer-heads itself. Thus GFP_ATOMIC.
|
* more buffer-heads itself. Thus GFP_ATOMIC.
|
*
|
*
|
* This is no longer true, it is GFP_BUFFER again, the
|
* This is no longer true, it is GFP_BUFFER again, the
|
* swapping code now knows not to perform I/O when that
|
* swapping code now knows not to perform I/O when that
|
* GFP level is specified... -DaveM
|
* GFP level is specified... -DaveM
|
*
|
*
|
* Ouch, another bug! get_free_page() does not call
|
* Ouch, another bug! get_free_page() does not call
|
* try_to_free_page() if priority == GFP_BUFFER. This
|
* try_to_free_page() if priority == GFP_BUFFER. This
|
* lets kswapd get into a lockup situation if there is
|
* lets kswapd get into a lockup situation if there is
|
* no free space for buffer growth but we need more
|
* no free space for buffer growth but we need more
|
* memory for a buffer_head for swapping. If memory is
|
* memory for a buffer_head for swapping. If memory is
|
* full of recyclable buffers, we deadlock because
|
* full of recyclable buffers, we deadlock because
|
* kswapd won't recycle them! Use GFP_IO instead: it
|
* kswapd won't recycle them! Use GFP_IO instead: it
|
* still won't recurse (GFP_IO sets can_do_io to zero in
|
* still won't recurse (GFP_IO sets can_do_io to zero in
|
* try_to_free_page), but it lets us recover those
|
* try_to_free_page), but it lets us recover those
|
* buffer heads. --sct
|
* buffer heads. --sct
|
*/
|
*/
|
/* we now use kmalloc() here instead of gfp as we want
|
/* we now use kmalloc() here instead of gfp as we want
|
to be able to easily release buffer heads - they
|
to be able to easily release buffer heads - they
|
took up quite a bit of memory (tridge) */
|
took up quite a bit of memory (tridge) */
|
bh = (struct buffer_head *) kmalloc(sizeof(*bh),GFP_IO);
|
bh = (struct buffer_head *) kmalloc(sizeof(*bh),GFP_IO);
|
if (bh) {
|
if (bh) {
|
put_unused_buffer_head(bh);
|
put_unused_buffer_head(bh);
|
nr_buffer_heads++;
|
nr_buffer_heads++;
|
return;
|
return;
|
}
|
}
|
|
|
/*
|
/*
|
* Uhhuh. We're _really_ low on memory. Now we just
|
* Uhhuh. We're _really_ low on memory. Now we just
|
* wait for old buffer heads to become free due to
|
* wait for old buffer heads to become free due to
|
* finishing IO..
|
* finishing IO..
|
*/
|
*/
|
run_task_queue(&tq_disk);
|
run_task_queue(&tq_disk);
|
|
|
/*
|
/*
|
* Set our state for sleeping, then check again for buffer heads.
|
* Set our state for sleeping, then check again for buffer heads.
|
* This ensures we won't miss a wake_up from an interrupt.
|
* This ensures we won't miss a wake_up from an interrupt.
|
*/
|
*/
|
add_wait_queue(&buffer_wait, &wait);
|
add_wait_queue(&buffer_wait, &wait);
|
current->state = TASK_UNINTERRUPTIBLE;
|
current->state = TASK_UNINTERRUPTIBLE;
|
if (!unused_list && !reuse_list)
|
if (!unused_list && !reuse_list)
|
schedule();
|
schedule();
|
recover_reusable_buffer_heads();
|
recover_reusable_buffer_heads();
|
remove_wait_queue(&buffer_wait, &wait);
|
remove_wait_queue(&buffer_wait, &wait);
|
current->state = TASK_RUNNING;
|
current->state = TASK_RUNNING;
|
}
|
}
|
|
|
}
|
}
|
|
|
static struct buffer_head * get_unused_buffer_head(void)
|
static struct buffer_head * get_unused_buffer_head(void)
|
{
|
{
|
struct buffer_head * bh;
|
struct buffer_head * bh;
|
|
|
recover_reusable_buffer_heads();
|
recover_reusable_buffer_heads();
|
get_more_buffer_heads();
|
get_more_buffer_heads();
|
if (!unused_list)
|
if (!unused_list)
|
return NULL;
|
return NULL;
|
bh = unused_list;
|
bh = unused_list;
|
unused_list = bh->b_next_free;
|
unused_list = bh->b_next_free;
|
nr_unused_buffer_heads--;
|
nr_unused_buffer_heads--;
|
return bh;
|
return bh;
|
}
|
}
|
|
|
/*
|
/*
|
* Create the appropriate buffers when given a page for data area and
|
* Create the appropriate buffers when given a page for data area and
|
* the size of each buffer.. Use the bh->b_this_page linked list to
|
* the size of each buffer.. Use the bh->b_this_page linked list to
|
* follow the buffers created. Return NULL if unable to create more
|
* follow the buffers created. Return NULL if unable to create more
|
* buffers.
|
* buffers.
|
*/
|
*/
|
static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
|
static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
|
{
|
{
|
struct buffer_head *bh, *head;
|
struct buffer_head *bh, *head;
|
long offset;
|
long offset;
|
|
|
head = NULL;
|
head = NULL;
|
offset = PAGE_SIZE;
|
offset = PAGE_SIZE;
|
while ((offset -= size) >= 0) {
|
while ((offset -= size) >= 0) {
|
bh = get_unused_buffer_head();
|
bh = get_unused_buffer_head();
|
if (!bh)
|
if (!bh)
|
goto no_grow;
|
goto no_grow;
|
|
|
bh->b_dev = B_FREE; /* Flag as unused */
|
bh->b_dev = B_FREE; /* Flag as unused */
|
bh->b_this_page = head;
|
bh->b_this_page = head;
|
head = bh;
|
head = bh;
|
|
|
bh->b_state = 0;
|
bh->b_state = 0;
|
bh->b_next_free = NULL;
|
bh->b_next_free = NULL;
|
bh->b_count = 0;
|
bh->b_count = 0;
|
bh->b_size = size;
|
bh->b_size = size;
|
|
|
bh->b_data = (char *) (page+offset);
|
bh->b_data = (char *) (page+offset);
|
bh->b_list = 0;
|
bh->b_list = 0;
|
}
|
}
|
return head;
|
return head;
|
/*
|
/*
|
* In case anything failed, we just free everything we got.
|
* In case anything failed, we just free everything we got.
|
*/
|
*/
|
no_grow:
|
no_grow:
|
bh = head;
|
bh = head;
|
while (bh) {
|
while (bh) {
|
head = bh;
|
head = bh;
|
bh = bh->b_this_page;
|
bh = bh->b_this_page;
|
put_unused_buffer_head(head);
|
put_unused_buffer_head(head);
|
}
|
}
|
return NULL;
|
return NULL;
|
}
|
}
|
|
|
/* Run the hooks that have to be done when a page I/O has completed. */
|
/* Run the hooks that have to be done when a page I/O has completed. */
|
static inline void after_unlock_page (struct page * page)
|
static inline void after_unlock_page (struct page * page)
|
{
|
{
|
if (clear_bit(PG_decr_after, &page->flags))
|
if (clear_bit(PG_decr_after, &page->flags))
|
atomic_dec(&nr_async_pages);
|
atomic_dec(&nr_async_pages);
|
if (clear_bit(PG_free_after, &page->flags))
|
if (clear_bit(PG_free_after, &page->flags))
|
__free_page(page);
|
__free_page(page);
|
#ifndef NO_MM
|
#ifndef NO_MM
|
if (clear_bit(PG_swap_unlock_after, &page->flags))
|
if (clear_bit(PG_swap_unlock_after, &page->flags))
|
swap_after_unlock_page(page->swap_unlock_entry);
|
swap_after_unlock_page(page->swap_unlock_entry);
|
#endif /*!NO_MM*/
|
#endif /*!NO_MM*/
|
}
|
}
|
|
|
/*
|
/*
|
* Free all temporary buffers belonging to a page.
|
* Free all temporary buffers belonging to a page.
|
* This needs to be called with interrupts disabled.
|
* This needs to be called with interrupts disabled.
|
*/
|
*/
|
static inline void free_async_buffers (struct buffer_head * bh)
|
static inline void free_async_buffers (struct buffer_head * bh)
|
{
|
{
|
struct buffer_head * tmp;
|
struct buffer_head * tmp;
|
|
|
tmp = bh;
|
tmp = bh;
|
do {
|
do {
|
if (!test_bit(BH_FreeOnIO, &tmp->b_state)) {
|
if (!test_bit(BH_FreeOnIO, &tmp->b_state)) {
|
printk ("Whoops: unlock_buffer: "
|
printk ("Whoops: unlock_buffer: "
|
"async IO mismatch on page.\n");
|
"async IO mismatch on page.\n");
|
return;
|
return;
|
}
|
}
|
tmp->b_next_free = reuse_list;
|
tmp->b_next_free = reuse_list;
|
reuse_list = tmp;
|
reuse_list = tmp;
|
clear_bit(BH_FreeOnIO, &tmp->b_state);
|
clear_bit(BH_FreeOnIO, &tmp->b_state);
|
tmp = tmp->b_this_page;
|
tmp = tmp->b_this_page;
|
} while (tmp != bh);
|
} while (tmp != bh);
|
}
|
}
|
|
|
/*
|
/*
|
* Start I/O on a page.
|
* Start I/O on a page.
|
* This function expects the page to be locked and may return before I/O is complete.
|
* This function expects the page to be locked and may return before I/O is complete.
|
* You then have to check page->locked, page->uptodate, and maybe wait on page->wait.
|
* You then have to check page->locked, page->uptodate, and maybe wait on page->wait.
|
*/
|
*/
|
int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap)
|
int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap)
|
{
|
{
|
struct buffer_head *bh, *prev, *next, *arr[MAX_BUF_PER_PAGE];
|
struct buffer_head *bh, *prev, *next, *arr[MAX_BUF_PER_PAGE];
|
int block, nr;
|
int block, nr;
|
|
|
if (!PageLocked(page))
|
if (!PageLocked(page))
|
panic("brw_page: page not locked for I/O");
|
panic("brw_page: page not locked for I/O");
|
clear_bit(PG_uptodate, &page->flags);
|
clear_bit(PG_uptodate, &page->flags);
|
clear_bit(PG_error, &page->flags);
|
clear_bit(PG_error, &page->flags);
|
/*
|
/*
|
* Allocate buffer heads pointing to this page, just for I/O.
|
* Allocate buffer heads pointing to this page, just for I/O.
|
* They do _not_ show up in the buffer hash table!
|
* They do _not_ show up in the buffer hash table!
|
* They are _not_ registered in page->buffers either!
|
* They are _not_ registered in page->buffers either!
|
*/
|
*/
|
bh = create_buffers(page_address(page), size);
|
bh = create_buffers(page_address(page), size);
|
if (!bh) {
|
if (!bh) {
|
clear_bit(PG_locked, &page->flags);
|
clear_bit(PG_locked, &page->flags);
|
wake_up(&page->wait);
|
wake_up(&page->wait);
|
return -ENOMEM;
|
return -ENOMEM;
|
}
|
}
|
nr = 0;
|
nr = 0;
|
next = bh;
|
next = bh;
|
do {
|
do {
|
struct buffer_head * tmp;
|
struct buffer_head * tmp;
|
block = *(b++);
|
block = *(b++);
|
|
|
set_bit(BH_FreeOnIO, &next->b_state);
|
set_bit(BH_FreeOnIO, &next->b_state);
|
next->b_list = BUF_CLEAN;
|
next->b_list = BUF_CLEAN;
|
next->b_dev = dev;
|
next->b_dev = dev;
|
next->b_blocknr = block;
|
next->b_blocknr = block;
|
next->b_count = 1;
|
next->b_count = 1;
|
next->b_flushtime = 0;
|
next->b_flushtime = 0;
|
set_bit(BH_Uptodate, &next->b_state);
|
set_bit(BH_Uptodate, &next->b_state);
|
|
|
/*
|
/*
|
* When we use bmap, we define block zero to represent
|
* When we use bmap, we define block zero to represent
|
* a hole. ll_rw_page, however, may legitimately
|
* a hole. ll_rw_page, however, may legitimately
|
* access block zero, and we need to distinguish the
|
* access block zero, and we need to distinguish the
|
* two cases.
|
* two cases.
|
*/
|
*/
|
if (bmap && !block) {
|
if (bmap && !block) {
|
memset(next->b_data, 0, size);
|
memset(next->b_data, 0, size);
|
next->b_count--;
|
next->b_count--;
|
continue;
|
continue;
|
}
|
}
|
tmp = get_hash_table(dev, block, size);
|
tmp = get_hash_table(dev, block, size);
|
if (tmp) {
|
if (tmp) {
|
if (!buffer_uptodate(tmp)) {
|
if (!buffer_uptodate(tmp)) {
|
if (rw == READ)
|
if (rw == READ)
|
ll_rw_block(READ, 1, &tmp);
|
ll_rw_block(READ, 1, &tmp);
|
wait_on_buffer(tmp);
|
wait_on_buffer(tmp);
|
}
|
}
|
if (rw == READ)
|
if (rw == READ)
|
memcpy(next->b_data, tmp->b_data, size);
|
memcpy(next->b_data, tmp->b_data, size);
|
else {
|
else {
|
memcpy(tmp->b_data, next->b_data, size);
|
memcpy(tmp->b_data, next->b_data, size);
|
mark_buffer_dirty(tmp, 0);
|
mark_buffer_dirty(tmp, 0);
|
}
|
}
|
brelse(tmp);
|
brelse(tmp);
|
next->b_count--;
|
next->b_count--;
|
continue;
|
continue;
|
}
|
}
|
if (rw == READ)
|
if (rw == READ)
|
clear_bit(BH_Uptodate, &next->b_state);
|
clear_bit(BH_Uptodate, &next->b_state);
|
else
|
else
|
set_bit(BH_Dirty, &next->b_state);
|
set_bit(BH_Dirty, &next->b_state);
|
arr[nr++] = next;
|
arr[nr++] = next;
|
} while (prev = next, (next = next->b_this_page) != NULL);
|
} while (prev = next, (next = next->b_this_page) != NULL);
|
prev->b_this_page = bh;
|
prev->b_this_page = bh;
|
|
|
if (nr) {
|
if (nr) {
|
ll_rw_block(rw, nr, arr);
|
ll_rw_block(rw, nr, arr);
|
/* The rest of the work is done in mark_buffer_uptodate()
|
/* The rest of the work is done in mark_buffer_uptodate()
|
* and unlock_buffer(). */
|
* and unlock_buffer(). */
|
} else {
|
} else {
|
unsigned long flags;
|
unsigned long flags;
|
save_flags(flags);
|
save_flags(flags);
|
cli();
|
cli();
|
free_async_buffers(bh);
|
free_async_buffers(bh);
|
restore_flags(flags);
|
restore_flags(flags);
|
clear_bit(PG_locked, &page->flags);
|
clear_bit(PG_locked, &page->flags);
|
set_bit(PG_uptodate, &page->flags);
|
set_bit(PG_uptodate, &page->flags);
|
wake_up(&page->wait);
|
wake_up(&page->wait);
|
after_unlock_page(page);
|
after_unlock_page(page);
|
if (waitqueue_active(&buffer_wait))
|
if (waitqueue_active(&buffer_wait))
|
wake_up(&buffer_wait);
|
wake_up(&buffer_wait);
|
}
|
}
|
++current->maj_flt;
|
++current->maj_flt;
|
return 0;
|
return 0;
|
}
|
}
|
|
|
/*
|
/*
|
* This is called by end_request() when I/O has completed.
|
* This is called by end_request() when I/O has completed.
|
*/
|
*/
|
void mark_buffer_uptodate(struct buffer_head * bh, int on)
|
void mark_buffer_uptodate(struct buffer_head * bh, int on)
|
{
|
{
|
if (on) {
|
if (on) {
|
struct buffer_head *tmp = bh;
|
struct buffer_head *tmp = bh;
|
set_bit(BH_Uptodate, &bh->b_state);
|
set_bit(BH_Uptodate, &bh->b_state);
|
/* If a page has buffers and all these buffers are uptodate,
|
/* If a page has buffers and all these buffers are uptodate,
|
* then the page is uptodate. */
|
* then the page is uptodate. */
|
do {
|
do {
|
if (!test_bit(BH_Uptodate, &tmp->b_state))
|
if (!test_bit(BH_Uptodate, &tmp->b_state))
|
return;
|
return;
|
tmp=tmp->b_this_page;
|
tmp=tmp->b_this_page;
|
} while (tmp && tmp != bh);
|
} while (tmp && tmp != bh);
|
set_bit(PG_uptodate, &mem_map[MAP_NR(bh->b_data)].flags);
|
set_bit(PG_uptodate, &mem_map[MAP_NR(bh->b_data)].flags);
|
return;
|
return;
|
}
|
}
|
clear_bit(BH_Uptodate, &bh->b_state);
|
clear_bit(BH_Uptodate, &bh->b_state);
|
}
|
}
|
|
|
/*
|
/*
|
* This is called by end_request() when I/O has completed.
|
* This is called by end_request() when I/O has completed.
|
*/
|
*/
|
void unlock_buffer(struct buffer_head * bh)
|
void unlock_buffer(struct buffer_head * bh)
|
{
|
{
|
unsigned long flags;
|
unsigned long flags;
|
struct buffer_head *tmp;
|
struct buffer_head *tmp;
|
struct page *page;
|
struct page *page;
|
|
|
if (!clear_bit(BH_Lock, &bh->b_state))
|
if (!clear_bit(BH_Lock, &bh->b_state))
|
printk ("unlock_buffer: already unlocked on %s\n",
|
printk ("unlock_buffer: already unlocked on %s\n",
|
kdevname(bh->b_dev));
|
kdevname(bh->b_dev));
|
wake_up(&bh->b_wait);
|
wake_up(&bh->b_wait);
|
if (waitqueue_active(&buffer_wait))
|
if (waitqueue_active(&buffer_wait))
|
wake_up(&buffer_wait);
|
wake_up(&buffer_wait);
|
|
|
if (!test_bit(BH_FreeOnIO, &bh->b_state))
|
if (!test_bit(BH_FreeOnIO, &bh->b_state))
|
return;
|
return;
|
/* This is a temporary buffer used for page I/O. */
|
/* This is a temporary buffer used for page I/O. */
|
page = mem_map + MAP_NR(bh->b_data);
|
page = mem_map + MAP_NR(bh->b_data);
|
if (!PageLocked(page))
|
if (!PageLocked(page))
|
goto not_locked;
|
goto not_locked;
|
if (bh->b_count != 1)
|
if (bh->b_count != 1)
|
goto bad_count;
|
goto bad_count;
|
|
|
if (!test_bit(BH_Uptodate, &bh->b_state))
|
if (!test_bit(BH_Uptodate, &bh->b_state))
|
set_bit(PG_error, &page->flags);
|
set_bit(PG_error, &page->flags);
|
|
|
/*
|
/*
|
* Be _very_ careful from here on. Bad things can happen if
|
* Be _very_ careful from here on. Bad things can happen if
|
* two buffer heads end IO at almost the same time and both
|
* two buffer heads end IO at almost the same time and both
|
* decide that the page is now completely done.
|
* decide that the page is now completely done.
|
*
|
*
|
* Async buffer_heads are here only as labels for IO, and get
|
* Async buffer_heads are here only as labels for IO, and get
|
* thrown away once the IO for this page is complete. IO is
|
* thrown away once the IO for this page is complete. IO is
|
* deemed complete once all buffers have been visited
|
* deemed complete once all buffers have been visited
|
* (b_count==0) and are now unlocked. We must make sure that
|
* (b_count==0) and are now unlocked. We must make sure that
|
* only the _last_ buffer that decrements its count is the one
|
* only the _last_ buffer that decrements its count is the one
|
* that free's the page..
|
* that free's the page..
|
*/
|
*/
|
save_flags(flags);
|
save_flags(flags);
|
cli();
|
cli();
|
bh->b_count--;
|
bh->b_count--;
|
tmp = bh;
|
tmp = bh;
|
do {
|
do {
|
if (tmp->b_count)
|
if (tmp->b_count)
|
goto still_busy;
|
goto still_busy;
|
tmp = tmp->b_this_page;
|
tmp = tmp->b_this_page;
|
} while (tmp != bh);
|
} while (tmp != bh);
|
|
|
/* OK, the async IO on this page is complete. */
|
/* OK, the async IO on this page is complete. */
|
free_async_buffers(bh);
|
free_async_buffers(bh);
|
restore_flags(flags);
|
restore_flags(flags);
|
clear_bit(PG_locked, &page->flags);
|
clear_bit(PG_locked, &page->flags);
|
wake_up(&page->wait);
|
wake_up(&page->wait);
|
after_unlock_page(page);
|
after_unlock_page(page);
|
wake_up(&buffer_wait);
|
wake_up(&buffer_wait);
|
return;
|
return;
|
|
|
still_busy:
|
still_busy:
|
restore_flags(flags);
|
restore_flags(flags);
|
return;
|
return;
|
|
|
not_locked:
|
not_locked:
|
printk ("Whoops: unlock_buffer: async io complete on unlocked page\n");
|
printk ("Whoops: unlock_buffer: async io complete on unlocked page\n");
|
return;
|
return;
|
|
|
bad_count:
|
bad_count:
|
printk ("Whoops: unlock_buffer: b_count != 1 on async io.\n");
|
printk ("Whoops: unlock_buffer: b_count != 1 on async io.\n");
|
return;
|
return;
|
}
|
}
|
|
|
/*
|
/*
|
* Generic "readpage" function for block devices that have the normal
|
* Generic "readpage" function for block devices that have the normal
|
* bmap functionality. This is most of the block device filesystems.
|
* bmap functionality. This is most of the block device filesystems.
|
* Reads the page asynchronously --- the unlock_buffer() and
|
* Reads the page asynchronously --- the unlock_buffer() and
|
* mark_buffer_uptodate() functions propagate buffer state into the
|
* mark_buffer_uptodate() functions propagate buffer state into the
|
* page struct once IO has completed.
|
* page struct once IO has completed.
|
*/
|
*/
|
int generic_readpage(struct inode * inode, struct page * page)
|
int generic_readpage(struct inode * inode, struct page * page)
|
{
|
{
|
unsigned long block;
|
unsigned long block;
|
int *p, nr[PAGE_SIZE/512];
|
int *p, nr[PAGE_SIZE/512];
|
int i;
|
int i;
|
|
|
page->count++;
|
page->count++;
|
set_bit(PG_locked, &page->flags);
|
set_bit(PG_locked, &page->flags);
|
set_bit(PG_free_after, &page->flags);
|
set_bit(PG_free_after, &page->flags);
|
|
|
i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
|
i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
|
block = page->offset >> inode->i_sb->s_blocksize_bits;
|
block = page->offset >> inode->i_sb->s_blocksize_bits;
|
p = nr;
|
p = nr;
|
do {
|
do {
|
*p = inode->i_op->bmap(inode, block);
|
*p = inode->i_op->bmap(inode, block);
|
i--;
|
i--;
|
block++;
|
block++;
|
p++;
|
p++;
|
} while (i > 0);
|
} while (i > 0);
|
|
|
/* IO start */
|
/* IO start */
|
brw_page(READ, page, inode->i_dev, nr, inode->i_sb->s_blocksize, 1);
|
brw_page(READ, page, inode->i_dev, nr, inode->i_sb->s_blocksize, 1);
|
return 0;
|
return 0;
|
}
|
}
|
|
|
/*
|
/*
|
* Try to increase the number of buffers available: the size argument
|
* Try to increase the number of buffers available: the size argument
|
* is used to determine what kind of buffers we want.
|
* is used to determine what kind of buffers we want.
|
*/
|
*/
|
static int grow_buffers(int pri, int size)
|
static int grow_buffers(int pri, int size)
|
{
|
{
|
unsigned long page;
|
unsigned long page;
|
struct buffer_head *bh, *tmp;
|
struct buffer_head *bh, *tmp;
|
struct buffer_head * insert_point;
|
struct buffer_head * insert_point;
|
int isize;
|
int isize;
|
|
|
if ((size & 511) || (size > PAGE_SIZE)) {
|
if ((size & 511) || (size > PAGE_SIZE)) {
|
printk("VFS: grow_buffers: size = %d\n",size);
|
printk("VFS: grow_buffers: size = %d\n",size);
|
return 0;
|
return 0;
|
}
|
}
|
|
|
isize = BUFSIZE_INDEX(size);
|
isize = BUFSIZE_INDEX(size);
|
|
|
if (!(page = __get_free_page(pri)))
|
if (!(page = __get_free_page(pri)))
|
return 0;
|
return 0;
|
bh = create_buffers(page, size);
|
bh = create_buffers(page, size);
|
if (!bh) {
|
if (!bh) {
|
free_page(page);
|
free_page(page);
|
return 0;
|
return 0;
|
}
|
}
|
|
|
insert_point = free_list[isize];
|
insert_point = free_list[isize];
|
|
|
tmp = bh;
|
tmp = bh;
|
while (1) {
|
while (1) {
|
if (insert_point) {
|
if (insert_point) {
|
tmp->b_next_free = insert_point->b_next_free;
|
tmp->b_next_free = insert_point->b_next_free;
|
tmp->b_prev_free = insert_point;
|
tmp->b_prev_free = insert_point;
|
insert_point->b_next_free->b_prev_free = tmp;
|
insert_point->b_next_free->b_prev_free = tmp;
|
insert_point->b_next_free = tmp;
|
insert_point->b_next_free = tmp;
|
} else {
|
} else {
|
tmp->b_prev_free = tmp;
|
tmp->b_prev_free = tmp;
|
tmp->b_next_free = tmp;
|
tmp->b_next_free = tmp;
|
}
|
}
|
insert_point = tmp;
|
insert_point = tmp;
|
++nr_buffers;
|
++nr_buffers;
|
if (tmp->b_this_page)
|
if (tmp->b_this_page)
|
tmp = tmp->b_this_page;
|
tmp = tmp->b_this_page;
|
else
|
else
|
break;
|
break;
|
}
|
}
|
tmp->b_this_page = bh;
|
tmp->b_this_page = bh;
|
free_list[isize] = bh;
|
free_list[isize] = bh;
|
mem_map[MAP_NR(page)].buffers = bh;
|
mem_map[MAP_NR(page)].buffers = bh;
|
buffermem += PAGE_SIZE;
|
buffermem += PAGE_SIZE;
|
return 1;
|
return 1;
|
}
|
}
|
|
|
|
|
/* =========== Reduce the buffer memory ============= */
|
/* =========== Reduce the buffer memory ============= */
|
|
|
static inline int buffer_waiting(struct buffer_head * bh)
|
static inline int buffer_waiting(struct buffer_head * bh)
|
{
|
{
|
return waitqueue_active(&bh->b_wait);
|
return waitqueue_active(&bh->b_wait);
|
}
|
}
|
|
|
/*
|
/*
|
* try_to_free_buffer() checks if all the buffers on this particular page
|
* try_to_free_buffer() checks if all the buffers on this particular page
|
* are unused, and free's the page if so.
|
* are unused, and free's the page if so.
|
*/
|
*/
|
int try_to_free_buffer(struct buffer_head * bh, struct buffer_head ** bhp,
|
int try_to_free_buffer(struct buffer_head * bh, struct buffer_head ** bhp,
|
int priority)
|
int priority)
|
{
|
{
|
unsigned long page;
|
unsigned long page;
|
struct buffer_head * tmp, * p;
|
struct buffer_head * tmp, * p;
|
|
|
*bhp = bh;
|
*bhp = bh;
|
page = (unsigned long) bh->b_data;
|
page = (unsigned long) bh->b_data;
|
page &= PAGE_MASK;
|
page &= PAGE_MASK;
|
tmp = bh;
|
tmp = bh;
|
do {
|
do {
|
if (!tmp)
|
if (!tmp)
|
return 0;
|
return 0;
|
if (tmp->b_count || buffer_protected(tmp) ||
|
if (tmp->b_count || buffer_protected(tmp) ||
|
buffer_dirty(tmp) || buffer_locked(tmp) ||
|
buffer_dirty(tmp) || buffer_locked(tmp) ||
|
buffer_waiting(tmp))
|
buffer_waiting(tmp))
|
return 0;
|
return 0;
|
if (priority && buffer_touched(tmp))
|
if (priority && buffer_touched(tmp))
|
return 0;
|
return 0;
|
tmp = tmp->b_this_page;
|
tmp = tmp->b_this_page;
|
} while (tmp != bh);
|
} while (tmp != bh);
|
tmp = bh;
|
tmp = bh;
|
do {
|
do {
|
p = tmp;
|
p = tmp;
|
tmp = tmp->b_this_page;
|
tmp = tmp->b_this_page;
|
nr_buffers--;
|
nr_buffers--;
|
if (p == *bhp)
|
if (p == *bhp)
|
{
|
{
|
*bhp = p->b_prev_free;
|
*bhp = p->b_prev_free;
|
if (p == *bhp) /* Was this the last in the list? */
|
if (p == *bhp) /* Was this the last in the list? */
|
*bhp = NULL;
|
*bhp = NULL;
|
}
|
}
|
remove_from_queues(p);
|
remove_from_queues(p);
|
put_unused_buffer_head(p);
|
put_unused_buffer_head(p);
|
} while (tmp != bh);
|
} while (tmp != bh);
|
buffermem -= PAGE_SIZE;
|
buffermem -= PAGE_SIZE;
|
mem_map[MAP_NR(page)].buffers = NULL;
|
mem_map[MAP_NR(page)].buffers = NULL;
|
free_page(page);
|
free_page(page);
|
return !mem_map[MAP_NR(page)].count;
|
return !mem_map[MAP_NR(page)].count;
|
}
|
}
|
|
|
/* ================== Debugging =================== */
|
/* ================== Debugging =================== */
|
|
|
void show_buffers(void)
|
void show_buffers(void)
|
{
|
{
|
struct buffer_head * bh;
|
struct buffer_head * bh;
|
int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
|
int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
|
int protected = 0;
|
int protected = 0;
|
int nlist;
|
int nlist;
|
static char *buf_types[NR_LIST] = {"CLEAN","LOCKED","LOCKED1","DIRTY"};
|
static char *buf_types[NR_LIST] = {"CLEAN","LOCKED","LOCKED1","DIRTY"};
|
|
|
printk("Buffer memory: %6dkB\n",buffermem>>10);
|
printk("Buffer memory: %6dkB\n",buffermem>>10);
|
printk("Buffer heads: %6d\n",nr_buffer_heads);
|
printk("Buffer heads: %6d\n",nr_buffer_heads);
|
printk("Buffer blocks: %6d\n",nr_buffers);
|
printk("Buffer blocks: %6d\n",nr_buffers);
|
|
|
for(nlist = 0; nlist < NR_LIST; nlist++) {
|
for(nlist = 0; nlist < NR_LIST; nlist++) {
|
found = locked = dirty = used = lastused = protected = 0;
|
found = locked = dirty = used = lastused = protected = 0;
|
bh = lru_list[nlist];
|
bh = lru_list[nlist];
|
if(!bh) continue;
|
if(!bh) continue;
|
|
|
do {
|
do {
|
found++;
|
found++;
|
if (buffer_locked(bh))
|
if (buffer_locked(bh))
|
locked++;
|
locked++;
|
if (buffer_protected(bh))
|
if (buffer_protected(bh))
|
protected++;
|
protected++;
|
if (buffer_dirty(bh))
|
if (buffer_dirty(bh))
|
dirty++;
|
dirty++;
|
if (bh->b_count)
|
if (bh->b_count)
|
used++, lastused = found;
|
used++, lastused = found;
|
bh = bh->b_next_free;
|
bh = bh->b_next_free;
|
} while (bh != lru_list[nlist]);
|
} while (bh != lru_list[nlist]);
|
printk("%8s: %d buffers, %d used (last=%d), "
|
printk("%8s: %d buffers, %d used (last=%d), "
|
"%d locked, %d protected, %d dirty\n",
|
"%d locked, %d protected, %d dirty\n",
|
buf_types[nlist], found, used, lastused,
|
buf_types[nlist], found, used, lastused,
|
locked, protected, dirty);
|
locked, protected, dirty);
|
};
|
};
|
}
|
}
|
|
|
/* ===================== Init ======================= */
|
/* ===================== Init ======================= */
|
|
|
/*
|
/*
|
* allocate the hash table and init the free list
|
* allocate the hash table and init the free list
|
*/
|
*/
|
void buffer_init(void)
|
void buffer_init(void)
|
{
|
{
|
#ifdef CONFIG_REDUCED_MEMORY
|
#ifdef CONFIG_REDUCED_MEMORY
|
hash_table = (struct buffer_head **)__get_free_pages(GFP_KERNEL, 0, 0); /* HACK! - kja */
|
hash_table = (struct buffer_head **)__get_free_pages(GFP_KERNEL, 0, 0); /* HACK! - kja */
|
#else /* !CONFIG_REDUCED_MEMORY */
|
#else /* !CONFIG_REDUCED_MEMORY */
|
hash_table = (struct buffer_head **)vmalloc(NR_HASH*sizeof(struct buffer_head *));
|
hash_table = (struct buffer_head **)vmalloc(NR_HASH*sizeof(struct buffer_head *));
|
#endif /* !CONFIG_REDUCED_MEMORY */
|
#endif /* !CONFIG_REDUCED_MEMORY */
|
if (!hash_table)
|
if (!hash_table)
|
panic("Failed to allocate buffer hash table\n");
|
panic("Failed to allocate buffer hash table\n");
|
memset(hash_table,0,NR_HASH*sizeof(struct buffer_head *));
|
memset(hash_table,0,NR_HASH*sizeof(struct buffer_head *));
|
|
|
lru_list[BUF_CLEAN] = 0;
|
lru_list[BUF_CLEAN] = 0;
|
grow_buffers(GFP_KERNEL, BLOCK_SIZE);
|
grow_buffers(GFP_KERNEL, BLOCK_SIZE);
|
}
|
}
|
|
|
|
|
/* ====================== bdflush support =================== */
|
/* ====================== bdflush support =================== */
|
|
|
/* This is a simple kernel daemon, whose job it is to provide a dynamic
|
/* This is a simple kernel daemon, whose job it is to provide a dynamic
|
* response to dirty buffers. Once this process is activated, we write back
|
* response to dirty buffers. Once this process is activated, we write back
|
* a limited number of buffers to the disks and then go back to sleep again.
|
* a limited number of buffers to the disks and then go back to sleep again.
|
*/
|
*/
|
struct wait_queue * bdflush_wait = NULL;
|
struct wait_queue * bdflush_wait = NULL;
|
struct wait_queue * bdflush_done = NULL;
|
struct wait_queue * bdflush_done = NULL;
|
struct task_struct *bdflush_tsk = 0;
|
struct task_struct *bdflush_tsk = 0;
|
|
|
static void wakeup_bdflush(int wait)
|
static void wakeup_bdflush(int wait)
|
{
|
{
|
if (current == bdflush_tsk)
|
if (current == bdflush_tsk)
|
return;
|
return;
|
wake_up(&bdflush_wait);
|
wake_up(&bdflush_wait);
|
if (wait) {
|
if (wait) {
|
run_task_queue(&tq_disk);
|
run_task_queue(&tq_disk);
|
sleep_on(&bdflush_done);
|
sleep_on(&bdflush_done);
|
recover_reusable_buffer_heads();
|
recover_reusable_buffer_heads();
|
}
|
}
|
}
|
}
|
|
|
|
|
/*
|
/*
|
* Here we attempt to write back old buffers. We also try to flush inodes
|
* Here we attempt to write back old buffers. We also try to flush inodes
|
* and supers as well, since this function is essentially "update", and
|
* and supers as well, since this function is essentially "update", and
|
* otherwise there would be no way of ensuring that these quantities ever
|
* otherwise there would be no way of ensuring that these quantities ever
|
* get written back. Ideally, we would have a timestamp on the inodes
|
* get written back. Ideally, we would have a timestamp on the inodes
|
* and superblocks so that we could write back only the old ones as well
|
* and superblocks so that we could write back only the old ones as well
|
*/
|
*/
|
|
|
asmlinkage int sync_old_buffers(void)
|
asmlinkage int sync_old_buffers(void)
|
{
|
{
|
int i;
|
int i;
|
int ndirty, nwritten;
|
int ndirty, nwritten;
|
int nlist;
|
int nlist;
|
int ncount;
|
int ncount;
|
struct buffer_head * bh, *next;
|
struct buffer_head * bh, *next;
|
|
|
sync_supers(0);
|
sync_supers(0);
|
sync_inodes(0);
|
sync_inodes(0);
|
|
|
ncount = 0;
|
ncount = 0;
|
#ifdef DEBUG
|
#ifdef DEBUG
|
for(nlist = 0; nlist < NR_LIST; nlist++)
|
for(nlist = 0; nlist < NR_LIST; nlist++)
|
#else
|
#else
|
for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
|
for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
|
#endif
|
#endif
|
{
|
{
|
ndirty = 0;
|
ndirty = 0;
|
nwritten = 0;
|
nwritten = 0;
|
repeat:
|
repeat:
|
allow_interrupts();
|
allow_interrupts();
|
|
|
bh = lru_list[nlist];
|
bh = lru_list[nlist];
|
if(bh)
|
if(bh)
|
for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
|
for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
|
/* We may have stalled while waiting for I/O to complete. */
|
/* We may have stalled while waiting for I/O to complete. */
|
if(bh->b_list != nlist) goto repeat;
|
if(bh->b_list != nlist) goto repeat;
|
next = bh->b_next_free;
|
next = bh->b_next_free;
|
if(!lru_list[nlist]) {
|
if(!lru_list[nlist]) {
|
printk("Dirty list empty %d\n", i);
|
printk("Dirty list empty %d\n", i);
|
break;
|
break;
|
}
|
}
|
|
|
/* Clean buffer on dirty list? Refile it */
|
/* Clean buffer on dirty list? Refile it */
|
if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
|
if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
|
{
|
{
|
refile_buffer(bh);
|
refile_buffer(bh);
|
continue;
|
continue;
|
}
|
}
|
|
|
if (buffer_locked(bh) || !buffer_dirty(bh))
|
if (buffer_locked(bh) || !buffer_dirty(bh))
|
continue;
|
continue;
|
ndirty++;
|
ndirty++;
|
if(bh->b_flushtime > jiffies) continue;
|
if(bh->b_flushtime > jiffies) continue;
|
nwritten++;
|
nwritten++;
|
next->b_count++;
|
next->b_count++;
|
bh->b_count++;
|
bh->b_count++;
|
bh->b_flushtime = 0;
|
bh->b_flushtime = 0;
|
#ifdef DEBUG
|
#ifdef DEBUG
|
if(nlist != BUF_DIRTY) ncount++;
|
if(nlist != BUF_DIRTY) ncount++;
|
#endif
|
#endif
|
ll_rw_block(WRITE, 1, &bh);
|
ll_rw_block(WRITE, 1, &bh);
|
bh->b_count--;
|
bh->b_count--;
|
next->b_count--;
|
next->b_count--;
|
}
|
}
|
}
|
}
|
run_task_queue(&tq_disk);
|
run_task_queue(&tq_disk);
|
#ifdef DEBUG
|
#ifdef DEBUG
|
if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
|
if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
|
printk("Wrote %d/%d buffers\n", nwritten, ndirty);
|
printk("Wrote %d/%d buffers\n", nwritten, ndirty);
|
#endif
|
#endif
|
run_task_queue(&tq_disk);
|
run_task_queue(&tq_disk);
|
return 0;
|
return 0;
|
}
|
}
|
|
|
|
|
/* This is the interface to bdflush. As we get more sophisticated, we can
|
/* This is the interface to bdflush. As we get more sophisticated, we can
|
* pass tuning parameters to this "process", to adjust how it behaves.
|
* pass tuning parameters to this "process", to adjust how it behaves.
|
* We would want to verify each parameter, however, to make sure that it
|
* We would want to verify each parameter, however, to make sure that it
|
* is reasonable. */
|
* is reasonable. */
|
|
|
asmlinkage int sys_bdflush(int func, long data)
|
asmlinkage int sys_bdflush(int func, long data)
|
{
|
{
|
if (!suser())
|
if (!suser())
|
return -EPERM;
|
return -EPERM;
|
|
|
if (func == 1)
|
if (func == 1)
|
return sync_old_buffers();
|
return sync_old_buffers();
|
|
|
/* Basically func 1 means read param 1, 2 means write param 1, etc */
|
/* Basically func 1 means read param 1, 2 means write param 1, etc */
|
if (func >= 2) {
|
if (func >= 2) {
|
int i = (func-2) >> 1;
|
int i = (func-2) >> 1;
|
if (i < 0 || i >= N_PARAM)
|
if (i < 0 || i >= N_PARAM)
|
return -EINVAL;
|
return -EINVAL;
|
if((func & 1) == 0) {
|
if((func & 1) == 0) {
|
int error = verify_area(VERIFY_WRITE, (int*)data, 4);
|
int error = verify_area(VERIFY_WRITE, (int*)data, 4);
|
if (!error)
|
if (!error)
|
put_user(bdf_prm.data[i], (int*)data);
|
put_user(bdf_prm.data[i], (int*)data);
|
return error;
|
return error;
|
}
|
}
|
if (data < bdflush_min[i] || data > bdflush_max[i])
|
if (data < bdflush_min[i] || data > bdflush_max[i])
|
return -EINVAL;
|
return -EINVAL;
|
bdf_prm.data[i] = data;
|
bdf_prm.data[i] = data;
|
}
|
}
|
|
|
/* Having func 0 used to launch the actual bdflush and then never
|
/* Having func 0 used to launch the actual bdflush and then never
|
* return (unless explicitly killed). We return zero here to
|
* return (unless explicitly killed). We return zero here to
|
* remain semi-compatible with present update(8) programs.
|
* remain semi-compatible with present update(8) programs.
|
*/
|
*/
|
return 0;
|
return 0;
|
}
|
}
|
|
|
/* This is the actual bdflush daemon itself. It used to be started from
|
/* This is the actual bdflush daemon itself. It used to be started from
|
* the syscall above, but now we launch it ourselves internally with
|
* the syscall above, but now we launch it ourselves internally with
|
* kernel_thread(...) directly after the first thread in init/main.c */
|
* kernel_thread(...) directly after the first thread in init/main.c */
|
|
|
/* To prevent deadlocks for a loop device:
|
/* To prevent deadlocks for a loop device:
|
* 1) Do non-blocking writes to loop (avoids deadlock with running
|
* 1) Do non-blocking writes to loop (avoids deadlock with running
|
* out of request blocks).
|
* out of request blocks).
|
* 2) But do a blocking write if the only dirty buffers are loop buffers
|
* 2) But do a blocking write if the only dirty buffers are loop buffers
|
* (otherwise we go into an infinite busy-loop).
|
* (otherwise we go into an infinite busy-loop).
|
* 3) Quit writing loop blocks if a freelist went low (avoids deadlock
|
* 3) Quit writing loop blocks if a freelist went low (avoids deadlock
|
* with running out of free buffers for loop's "real" device).
|
* with running out of free buffers for loop's "real" device).
|
*/
|
*/
|
int bdflush(void * unused)
|
int bdflush(void * unused)
|
{
|
{
|
int i;
|
int i;
|
int ndirty;
|
int ndirty;
|
int nlist;
|
int nlist;
|
int ncount;
|
int ncount;
|
struct buffer_head * bh, *next;
|
struct buffer_head * bh, *next;
|
int major;
|
int major;
|
int wrta_cmd = WRITEA; /* non-blocking write for LOOP */
|
int wrta_cmd = WRITEA; /* non-blocking write for LOOP */
|
|
|
/*
|
/*
|
* We have a bare-bones task_struct, and really should fill
|
* We have a bare-bones task_struct, and really should fill
|
* in a few more things so "top" and /proc/2/{exe,root,cwd}
|
* in a few more things so "top" and /proc/2/{exe,root,cwd}
|
* display semi-sane things. Not real crucial though...
|
* display semi-sane things. Not real crucial though...
|
*/
|
*/
|
|
|
current->session = 1;
|
current->session = 1;
|
current->pgrp = 1;
|
current->pgrp = 1;
|
sprintf(current->comm, "kflushd");
|
sprintf(current->comm, "kflushd");
|
bdflush_tsk = current;
|
bdflush_tsk = current;
|
|
|
/*
|
/*
|
* As a kernel thread we want to tamper with system buffers
|
* As a kernel thread we want to tamper with system buffers
|
* and other internals and thus be subject to the SMP locking
|
* and other internals and thus be subject to the SMP locking
|
* rules. (On a uniprocessor box this does nothing).
|
* rules. (On a uniprocessor box this does nothing).
|
*/
|
*/
|
|
|
|
|
#ifdef __SMP__
|
#ifdef __SMP__
|
lock_kernel();
|
lock_kernel();
|
syscall_count++;
|
syscall_count++;
|
#endif
|
#endif
|
for (;;) {
|
for (;;) {
|
#ifdef DEBUG
|
#ifdef DEBUG
|
printk("bdflush() activated...");
|
printk("bdflush() activated...");
|
#endif
|
#endif
|
|
|
ncount = 0;
|
ncount = 0;
|
|
|
#ifdef CONFIG_REDUCED_MEMORY
|
#ifdef CONFIG_REDUCED_MEMORY
|
// TJK: free up the buffers, if there are too many clean
|
// TJK: free up the buffers, if there are too many clean
|
ncount = 0;
|
ncount = 0;
|
if (nr_buffers_type[BUF_CLEAN] > MAX_CLEAN_BUFFERS) {
|
if (nr_buffers_type[BUF_CLEAN] > MAX_CLEAN_BUFFERS) {
|
bh = lru_list[BUF_CLEAN];
|
bh = lru_list[BUF_CLEAN];
|
do {
|
do {
|
if (!bh)
|
if (!bh)
|
break;
|
break;
|
if (!bh->b_count && try_to_free_buffer(bh, &bh, 0)) {
|
if (!bh->b_count && try_to_free_buffer(bh, &bh, 0)) {
|
ncount++;
|
ncount++;
|
}
|
}
|
bh = bh->b_next_free;
|
bh = bh->b_next_free;
|
// at most free 8 (2 pages)
|
// at most free 8 (2 pages)
|
} while (bh != lru_list[BUF_CLEAN] && ncount < 8);
|
} while (bh != lru_list[BUF_CLEAN] && ncount < 8);
|
}
|
}
|
if (ncount) {
|
if (ncount) {
|
#ifdef DEBUG
|
#ifdef DEBUG
|
printk("free()ed %d buffers\n",ncount);
|
printk("free()ed %d buffers\n",ncount);
|
#endif
|
#endif
|
}
|
}
|
// TJK
|
// TJK
|
#endif
|
#endif
|
ncount = 0;
|
ncount = 0;
|
|
|
#ifdef CONFIG_REDUCED_MEMORY
|
#ifdef CONFIG_REDUCED_MEMORY
|
for(nlist = 0; nlist < NR_LIST; nlist++)
|
for(nlist = 0; nlist < NR_LIST; nlist++)
|
#else
|
#else
|
for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
|
for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
|
#endif
|
#endif
|
{
|
{
|
ndirty = 0;
|
ndirty = 0;
|
refilled = 0;
|
refilled = 0;
|
repeat:
|
repeat:
|
allow_interrupts();
|
allow_interrupts();
|
|
|
bh = lru_list[nlist];
|
bh = lru_list[nlist];
|
if(bh)
|
if(bh)
|
for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty;
|
for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty;
|
bh = next) {
|
bh = next) {
|
/* We may have stalled while waiting for I/O to complete. */
|
/* We may have stalled while waiting for I/O to complete. */
|
if(bh->b_list != nlist) goto repeat;
|
if(bh->b_list != nlist) goto repeat;
|
next = bh->b_next_free;
|
next = bh->b_next_free;
|
if(!lru_list[nlist]) {
|
if(!lru_list[nlist]) {
|
printk("Dirty list empty %d\n", i);
|
printk("Dirty list empty %d\n", i);
|
break;
|
break;
|
}
|
}
|
|
|
/* Clean buffer on dirty list? Refile it */
|
/* Clean buffer on dirty list? Refile it */
|
#ifdef CONFIG_REDUCED_MEMORY
|
#ifdef CONFIG_REDUCED_MEMORY
|
// TJK: still in bdflush()
|
// TJK: still in bdflush()
|
// this is a slight modification to the conditional, forcing
|
// this is a slight modification to the conditional, forcing
|
// it to call refile_buffer() on unlocked buffers sitting on the
|
// it to call refile_buffer() on unlocked buffers sitting on the
|
// lru_list[BUF_LOCKED]
|
// lru_list[BUF_LOCKED]
|
/* Clean buffer on dirty list? Refile it */
|
/* Clean buffer on dirty list? Refile it */
|
if ((nlist == BUF_DIRTY || nlist == BUF_LOCKED)
|
if ((nlist == BUF_DIRTY || nlist == BUF_LOCKED)
|
&& !buffer_dirty(bh) && !buffer_locked(bh))
|
&& !buffer_dirty(bh) && !buffer_locked(bh))
|
#else
|
#else
|
if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
|
if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
|
#endif
|
#endif
|
{
|
{
|
refile_buffer(bh);
|
refile_buffer(bh);
|
continue;
|
continue;
|
}
|
}
|
|
|
if (buffer_locked(bh) || !buffer_dirty(bh))
|
if (buffer_locked(bh) || !buffer_dirty(bh))
|
continue;
|
continue;
|
major = MAJOR(bh->b_dev);
|
major = MAJOR(bh->b_dev);
|
/* Should we write back buffers that are shared or not??
|
/* Should we write back buffers that are shared or not??
|
currently dirty buffers are not shared, so it does not matter */
|
currently dirty buffers are not shared, so it does not matter */
|
if (refilled && major == LOOP_MAJOR)
|
if (refilled && major == LOOP_MAJOR)
|
continue;
|
continue;
|
next->b_count++;
|
next->b_count++;
|
bh->b_count++;
|
bh->b_count++;
|
ndirty++;
|
ndirty++;
|
bh->b_flushtime = 0;
|
bh->b_flushtime = 0;
|
if (major == LOOP_MAJOR) {
|
if (major == LOOP_MAJOR) {
|
ll_rw_block(wrta_cmd,1, &bh);
|
ll_rw_block(wrta_cmd,1, &bh);
|
wrta_cmd = WRITEA;
|
wrta_cmd = WRITEA;
|
if (buffer_dirty(bh))
|
if (buffer_dirty(bh))
|
--ndirty;
|
--ndirty;
|
}
|
}
|
else
|
else
|
ll_rw_block(WRITE, 1, &bh);
|
ll_rw_block(WRITE, 1, &bh);
|
#ifdef DEBUG
|
#ifdef DEBUG
|
if(nlist != BUF_DIRTY) ncount++;
|
if(nlist != BUF_DIRTY) ncount++;
|
#endif
|
#endif
|
bh->b_count--;
|
bh->b_count--;
|
next->b_count--;
|
next->b_count--;
|
}
|
}
|
}
|
}
|
#ifdef DEBUG
|
#ifdef DEBUG
|
if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
|
if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
|
printk("sleeping again.\n");
|
printk("sleeping again.\n");
|
#endif
|
#endif
|
/* If we didn't write anything, but there are still
|
/* If we didn't write anything, but there are still
|
* dirty buffers, then make the next write to a
|
* dirty buffers, then make the next write to a
|
* loop device to be a blocking write.
|
* loop device to be a blocking write.
|
* This lets us block--which we _must_ do! */
|
* This lets us block--which we _must_ do! */
|
if (ndirty == 0 && nr_buffers_type[BUF_DIRTY] > 0 && wrta_cmd != WRITE) {
|
if (ndirty == 0 && nr_buffers_type[BUF_DIRTY] > 0 && wrta_cmd != WRITE) {
|
wrta_cmd = WRITE;
|
wrta_cmd = WRITE;
|
continue;
|
continue;
|
}
|
}
|
run_task_queue(&tq_disk);
|
run_task_queue(&tq_disk);
|
|
|
/* If there are still a lot of dirty buffers around, skip the sleep
|
/* If there are still a lot of dirty buffers around, skip the sleep
|
and flush some more */
|
and flush some more */
|
if(ndirty == 0 || nr_buffers_type[BUF_DIRTY] <= nr_buffers * bdf_prm.b_un.nfract/100) {
|
if(ndirty == 0 || nr_buffers_type[BUF_DIRTY] <= nr_buffers * bdf_prm.b_un.nfract/100) {
|
wake_up(&bdflush_done);
|
wake_up(&bdflush_done);
|
current->signal = 0;
|
current->signal = 0;
|
interruptible_sleep_on(&bdflush_wait);
|
interruptible_sleep_on(&bdflush_wait);
|
}
|
}
|
}
|
}
|
}
|
}
|
|
|
#ifdef MAGIC_ROM_PTR
|
#ifdef MAGIC_ROM_PTR
|
int bromptr(kdev_t dev, struct vm_area_struct * vma)
|
int bromptr(kdev_t dev, struct vm_area_struct * vma)
|
{
|
{
|
struct inode inode_fake;
|
struct inode inode_fake;
|
extern struct file_operations * get_blkfops(unsigned int);
|
extern struct file_operations * get_blkfops(unsigned int);
|
|
|
if (get_blkfops(MAJOR(dev))->romptr!=NULL)
|
if (get_blkfops(MAJOR(dev))->romptr!=NULL)
|
{
|
{
|
inode_fake.i_rdev=dev;
|
inode_fake.i_rdev=dev;
|
return get_blkfops(MAJOR(dev))->romptr(&inode_fake, NULL, vma);
|
return get_blkfops(MAJOR(dev))->romptr(&inode_fake, NULL, vma);
|
}
|
}
|
return -ENOSYS;
|
return -ENOSYS;
|
}
|
}
|
#endif /* MAGIC_ROM_PTR */
|
#endif /* MAGIC_ROM_PTR */
|
|
|
/*
|
/*
|
* Overrides for Emacs so that we follow Linus's tabbing style.
|
* Overrides for Emacs so that we follow Linus's tabbing style.
|
* Emacs will notice this stuff at the end of the file and automatically
|
* Emacs will notice this stuff at the end of the file and automatically
|
* adjust the settings for this buffer only. This must remain at the end
|
* adjust the settings for this buffer only. This must remain at the end
|
* of the file.
|
* of the file.
|
* ---------------------------------------------------------------------------
|
* ---------------------------------------------------------------------------
|
* Local variables:
|
* Local variables:
|
* c-indent-level: 8
|
* c-indent-level: 8
|
* c-brace-imaginary-offset: 0
|
* c-brace-imaginary-offset: 0
|
* c-brace-offset: -8
|
* c-brace-offset: -8
|
* c-argdecl-indent: 8
|
* c-argdecl-indent: 8
|
* c-label-offset: -8
|
* c-label-offset: -8
|
* c-continued-statement-offset: 8
|
* c-continued-statement-offset: 8
|
* c-continued-brace-offset: 0
|
* c-continued-brace-offset: 0
|
* End:
|
* End:
|
*/
|
*/
|
|
|