OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [drivers/] [block/] [ll_rw_blk.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 *  linux/drivers/block/ll_rw_blk.c
3
 *
4
 * Copyright (C) 1991, 1992 Linus Torvalds
5
 * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
6
 * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
7
 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
8
 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> -  July2000
9
 */
10
 
11
/*
12
 * This handles all read/write requests to block devices
13
 */
14
#include <linux/sched.h>
15
#include <linux/kernel.h>
16
#include <linux/kernel_stat.h>
17
#include <linux/errno.h>
18
#include <linux/string.h>
19
#include <linux/config.h>
20
#include <linux/locks.h>
21
#include <linux/mm.h>
22
#include <linux/swap.h>
23
#include <linux/init.h>
24
#include <linux/smp_lock.h>
25
#include <linux/completion.h>
26
#include <linux/bootmem.h>
27
 
28
#include <asm/system.h>
29
#include <asm/io.h>
30
#include <linux/blk.h>
31
#include <linux/highmem.h>
32
#include <linux/slab.h>
33
#include <linux/module.h>
34
 
35
/*
36
 * MAC Floppy IWM hooks
37
 */
38
 
39
#ifdef CONFIG_MAC_FLOPPY_IWM
40
extern int mac_floppy_init(void);
41
#endif
42
 
43
/*
44
 * For the allocated request tables
45
 */
46
static kmem_cache_t *request_cachep;
47
 
48
/*
49
 * The "disk" task queue is used to start the actual requests
50
 * after a plug
51
 */
52
DECLARE_TASK_QUEUE(tq_disk);
53
 
54
/*
55
 * Protect the request list against multiple users..
56
 *
57
 * With this spinlock the Linux block IO subsystem is 100% SMP threaded
58
 * from the IRQ event side, and almost 100% SMP threaded from the syscall
59
 * side (we still have protect against block device array operations, and
60
 * the do_request() side is casually still unsafe. The kernel lock protects
61
 * this part currently.).
62
 *
63
 * there is a fair chance that things will work just OK if these functions
64
 * are called with no global kernel lock held ...
65
 */
66
spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED;
67
 
68
/* This specifies how many sectors to read ahead on the disk. */
69
 
70
int read_ahead[MAX_BLKDEV];
71
 
72
/* blk_dev_struct is:
73
 *      *request_fn
74
 *      *current_request
75
 */
76
struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
77
 
78
/*
79
 * blk_size contains the size of all block-devices in units of 1024 byte
80
 * sectors:
81
 *
82
 * blk_size[MAJOR][MINOR]
83
 *
84
 * if (!blk_size[MAJOR]) then no minor size checking is done.
85
 */
86
int * blk_size[MAX_BLKDEV];
87
 
88
/*
89
 * blksize_size contains the size of all block-devices:
90
 *
91
 * blksize_size[MAJOR][MINOR]
92
 *
93
 * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
94
 */
95
int * blksize_size[MAX_BLKDEV];
96
 
97
/*
98
 * hardsect_size contains the size of the hardware sector of a device.
99
 *
100
 * hardsect_size[MAJOR][MINOR]
101
 *
102
 * if (!hardsect_size[MAJOR])
103
 *              then 512 bytes is assumed.
104
 * else
105
 *              sector_size is hardsect_size[MAJOR][MINOR]
106
 * This is currently set by some scsi devices and read by the msdos fs driver.
107
 * Other uses may appear later.
108
 */
109
int * hardsect_size[MAX_BLKDEV];
110
 
111
/*
112
 * The following tunes the read-ahead algorithm in mm/filemap.c
113
 */
114
int * max_readahead[MAX_BLKDEV];
115
 
116
/*
117
 * Max number of sectors per request
118
 */
119
int * max_sectors[MAX_BLKDEV];
120
 
121
unsigned long blk_max_low_pfn, blk_max_pfn;
122
int blk_nohighio = 0;
123
 
124
int block_dump = 0;
125
 
126
static struct timer_list writeback_timer;
127
 
128
static inline int get_max_sectors(kdev_t dev)
129
{
130
        if (!max_sectors[MAJOR(dev)])
131
                return MAX_SECTORS;
132
        return max_sectors[MAJOR(dev)][MINOR(dev)];
133
}
134
 
135
inline request_queue_t *blk_get_queue(kdev_t dev)
136
{
137
        struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
138
 
139
        if (bdev->queue)
140
                return bdev->queue(dev);
141
        else
142
                return &blk_dev[MAJOR(dev)].request_queue;
143
}
144
 
145
static int __blk_cleanup_queue(struct request_list *list)
146
{
147
        struct list_head *head = &list->free;
148
        struct request *rq;
149
        int i = 0;
150
 
151
        while (!list_empty(head)) {
152
                rq = list_entry(head->next, struct request, queue);
153
                list_del(&rq->queue);
154
                kmem_cache_free(request_cachep, rq);
155
                i++;
156
        };
157
 
158
        if (i != list->count)
159
                printk("request list leak!\n");
160
 
161
        list->count = 0;
162
        return i;
163
}
164
 
165
/**
166
 * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
167
 * @q:    the request queue to be released
168
 *
169
 * Description:
170
 *     blk_cleanup_queue is the pair to blk_init_queue().  It should
171
 *     be called when a request queue is being released; typically
172
 *     when a block device is being de-registered.  Currently, its
173
 *     primary task it to free all the &struct request structures that
174
 *     were allocated to the queue.
175
 * Caveat:
176
 *     Hopefully the low level driver will have finished any
177
 *     outstanding requests first...
178
 **/
179
void blk_cleanup_queue(request_queue_t * q)
180
{
181
        int count = q->nr_requests;
182
 
183
        count -= __blk_cleanup_queue(&q->rq);
184
 
185
        if (count)
186
                printk("blk_cleanup_queue: leaked requests (%d)\n", count);
187
        if (atomic_read(&q->nr_sectors))
188
                printk("blk_cleanup_queue: leaked sectors (%d)\n", atomic_read(&q->nr_sectors));
189
 
190
        memset(q, 0, sizeof(*q));
191
}
192
 
193
/**
194
 * blk_queue_headactive - indicate whether head of request queue may be active
195
 * @q:       The queue which this applies to.
196
 * @active:  A flag indication where the head of the queue is active.
197
 *
198
 * Description:
199
 *    The driver for a block device may choose to leave the currently active
200
 *    request on the request queue, removing it only when it has completed.
201
 *    The queue handling routines assume this by default for safety reasons
202
 *    and will not involve the head of the request queue in any merging or
203
 *    reordering of requests when the queue is unplugged (and thus may be
204
 *    working on this particular request).
205
 *
206
 *    If a driver removes requests from the queue before processing them, then
207
 *    it may indicate that it does so, there by allowing the head of the queue
208
 *    to be involved in merging and reordering.  This is done be calling
209
 *    blk_queue_headactive() with an @active flag of %0.
210
 *
211
 *    If a driver processes several requests at once, it must remove them (or
212
 *    at least all but one of them) from the request queue.
213
 *
214
 *    When a queue is plugged the head will be assumed to be inactive.
215
 **/
216
 
217
void blk_queue_headactive(request_queue_t * q, int active)
218
{
219
        q->head_active = active;
220
}
221
 
222
/**
223
 * blk_queue_throttle_sectors - indicates you will call sector throttling funcs
224
 * @q:       The queue which this applies to.
225
 * @active:  A flag indication if you want sector throttling on
226
 *
227
 * Description:
228
 * The sector throttling code allows us to put a limit on the number of
229
 * sectors pending io to the disk at a given time, sending @active nonzero
230
 * indicates you will call blk_started_sectors and blk_finished_sectors in
231
 * addition to calling blk_started_io and blk_finished_io in order to
232
 * keep track of the number of sectors in flight.
233
 **/
234
 
235
void blk_queue_throttle_sectors(request_queue_t * q, int active)
236
{
237
        q->can_throttle = active;
238
}
239
 
240
/**
241
 * blk_queue_make_request - define an alternate make_request function for a device
242
 * @q:  the request queue for the device to be affected
243
 * @mfn: the alternate make_request function
244
 *
245
 * Description:
246
 *    The normal way for &struct buffer_heads to be passed to a device
247
 *    driver is for them to be collected into requests on a request
248
 *    queue, and then to allow the device driver to select requests
249
 *    off that queue when it is ready.  This works well for many block
250
 *    devices. However some block devices (typically virtual devices
251
 *    such as md or lvm) do not benefit from the processing on the
252
 *    request queue, and are served best by having the requests passed
253
 *    directly to them.  This can be achieved by providing a function
254
 *    to blk_queue_make_request().
255
 *
256
 * Caveat:
257
 *    The driver that does this *must* be able to deal appropriately
258
 *    with buffers in "highmemory", either by calling bh_kmap() to get
259
 *    a kernel mapping, to by calling create_bounce() to create a
260
 *    buffer in normal memory.
261
 **/
262
 
263
void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
264
{
265
        q->make_request_fn = mfn;
266
}
267
 
268
/**
269
 * blk_queue_bounce_limit - set bounce buffer limit for queue
270
 * @q:  the request queue for the device
271
 * @dma_addr:   bus address limit
272
 *
273
 * Description:
274
 *    Different hardware can have different requirements as to what pages
275
 *    it can do I/O directly to. A low level driver can call
276
 *    blk_queue_bounce_limit to have lower memory pages allocated as bounce
277
 *    buffers for doing I/O to pages residing above @page. By default
278
 *    the block layer sets this to the highest numbered "low" memory page.
279
 **/
280
void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
281
{
282
        unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
283
        unsigned long mb = dma_addr >> 20;
284
        static request_queue_t *old_q;
285
 
286
        /*
287
         * keep this for debugging for now...
288
         */
289
        if (dma_addr != BLK_BOUNCE_HIGH && q != old_q) {
290
                old_q = q;
291
                printk("blk: queue %p, ", q);
292
                if (dma_addr == BLK_BOUNCE_ANY)
293
                        printk("no I/O memory limit\n");
294
                else
295
                        printk("I/O limit %luMb (mask 0x%Lx)\n", mb,
296
                               (long long) dma_addr);
297
        }
298
 
299
        q->bounce_pfn = bounce_pfn;
300
}
301
 
302
 
303
/*
304
 * can we merge the two segments, or do we need to start a new one?
305
 */
306
inline int blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt)
307
{
308
        /*
309
         * if bh and nxt are contigous and don't cross a 4g boundary, it's ok
310
         */
311
        if (BH_CONTIG(bh, nxt) && BH_PHYS_4G(bh, nxt))
312
                return 1;
313
 
314
        return 0;
315
}
316
 
317
static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
318
{
319
        if (req->nr_segments < max_segments) {
320
                req->nr_segments++;
321
                return 1;
322
        }
323
        return 0;
324
}
325
 
326
static int ll_back_merge_fn(request_queue_t *q, struct request *req,
327
                            struct buffer_head *bh, int max_segments)
328
{
329
        if (blk_seg_merge_ok(req->bhtail, bh))
330
                return 1;
331
 
332
        return ll_new_segment(q, req, max_segments);
333
}
334
 
335
static int ll_front_merge_fn(request_queue_t *q, struct request *req,
336
                             struct buffer_head *bh, int max_segments)
337
{
338
        if (blk_seg_merge_ok(bh, req->bh))
339
                return 1;
340
 
341
        return ll_new_segment(q, req, max_segments);
342
}
343
 
344
static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
345
                                struct request *next, int max_segments)
346
{
347
        int total_segments = req->nr_segments + next->nr_segments;
348
 
349
        if (blk_seg_merge_ok(req->bhtail, next->bh))
350
                total_segments--;
351
 
352
        if (total_segments > max_segments)
353
                return 0;
354
 
355
        req->nr_segments = total_segments;
356
        return 1;
357
}
358
 
359
/*
360
 * "plug" the device if there are no outstanding requests: this will
361
 * force the transfer to start only after we have put all the requests
362
 * on the list.
363
 *
364
 * This is called with interrupts off and no requests on the queue.
365
 * (and with the request spinlock acquired)
366
 */
367
static void generic_plug_device(request_queue_t *q, kdev_t dev)
368
{
369
        /*
370
         * no need to replug device
371
         */
372
        if (!list_empty(&q->queue_head) || q->plugged)
373
                return;
374
 
375
        q->plugged = 1;
376
        queue_task(&q->plug_tq, &tq_disk);
377
}
378
 
379
/*
380
 * remove the plug and let it rip..
381
 */
382
static inline void __generic_unplug_device(request_queue_t *q)
383
{
384
        if (q->plugged) {
385
                q->plugged = 0;
386
                if (!list_empty(&q->queue_head))
387
                        q->request_fn(q);
388
        }
389
}
390
 
391
void generic_unplug_device(void *data)
392
{
393
        request_queue_t *q = (request_queue_t *) data;
394
        unsigned long flags;
395
 
396
        spin_lock_irqsave(&io_request_lock, flags);
397
        __generic_unplug_device(q);
398
        spin_unlock_irqrestore(&io_request_lock, flags);
399
}
400
 
401
/** blk_grow_request_list
402
 *  @q: The &request_queue_t
403
 *  @nr_requests: how many requests are desired
404
 *
405
 * More free requests are added to the queue's free lists, bringing
406
 * the total number of requests to @nr_requests.
407
 *
408
 * The requests are added equally to the request queue's read
409
 * and write freelists.
410
 *
411
 * This function can sleep.
412
 *
413
 * Returns the (new) number of requests which the queue has available.
414
 */
415
int blk_grow_request_list(request_queue_t *q, int nr_requests, int max_queue_sectors)
416
{
417
        unsigned long flags;
418
        /* Several broken drivers assume that this function doesn't sleep,
419
         * this causes system hangs during boot.
420
         * As a temporary fix, make the function non-blocking.
421
         */
422
        spin_lock_irqsave(&io_request_lock, flags);
423
        while (q->nr_requests < nr_requests) {
424
                struct request *rq;
425
 
426
                rq = kmem_cache_alloc(request_cachep, SLAB_ATOMIC);
427
                if (rq == NULL)
428
                        break;
429
                memset(rq, 0, sizeof(*rq));
430
                rq->rq_status = RQ_INACTIVE;
431
                list_add(&rq->queue, &q->rq.free);
432
                q->rq.count++;
433
 
434
                q->nr_requests++;
435
        }
436
 
437
        /*
438
         * Wakeup waiters after both one quarter of the
439
         * max-in-fligh queue and one quarter of the requests
440
         * are available again.
441
         */
442
 
443
        q->batch_requests = q->nr_requests / 4;
444
        if (q->batch_requests > 32)
445
                q->batch_requests = 32;
446
        q->batch_sectors = max_queue_sectors / 4;
447
 
448
        q->max_queue_sectors = max_queue_sectors;
449
 
450
        BUG_ON(!q->batch_sectors);
451
        atomic_set(&q->nr_sectors, 0);
452
 
453
        spin_unlock_irqrestore(&io_request_lock, flags);
454
        return q->nr_requests;
455
}
456
 
457
static void blk_init_free_list(request_queue_t *q)
458
{
459
        struct sysinfo si;
460
        int megs;               /* Total memory, in megabytes */
461
        int nr_requests, max_queue_sectors = MAX_QUEUE_SECTORS;
462
 
463
        INIT_LIST_HEAD(&q->rq.free);
464
        q->rq.count = 0;
465
        q->rq.pending[READ] = q->rq.pending[WRITE] = 0;
466
        q->nr_requests = 0;
467
 
468
        si_meminfo(&si);
469
        megs = si.totalram >> (20 - PAGE_SHIFT);
470
        nr_requests = MAX_NR_REQUESTS;
471
        if (megs < 30) {
472
                nr_requests /= 2;
473
                max_queue_sectors /= 2;
474
        }
475
        /* notice early if anybody screwed the defaults */
476
        BUG_ON(!nr_requests);
477
        BUG_ON(!max_queue_sectors);
478
 
479
        blk_grow_request_list(q, nr_requests, max_queue_sectors);
480
 
481
        init_waitqueue_head(&q->wait_for_requests);
482
 
483
        spin_lock_init(&q->queue_lock);
484
}
485
 
486
static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
487
 
488
/**
489
 * blk_init_queue  - prepare a request queue for use with a block device
490
 * @q:    The &request_queue_t to be initialised
491
 * @rfn:  The function to be called to process requests that have been
492
 *        placed on the queue.
493
 *
494
 * Description:
495
 *    If a block device wishes to use the standard request handling procedures,
496
 *    which sorts requests and coalesces adjacent requests, then it must
497
 *    call blk_init_queue().  The function @rfn will be called when there
498
 *    are requests on the queue that need to be processed.  If the device
499
 *    supports plugging, then @rfn may not be called immediately when requests
500
 *    are available on the queue, but may be called at some time later instead.
501
 *    Plugged queues are generally unplugged when a buffer belonging to one
502
 *    of the requests on the queue is needed, or due to memory pressure.
503
 *
504
 *    @rfn is not required, or even expected, to remove all requests off the
505
 *    queue, but only as many as it can handle at a time.  If it does leave
506
 *    requests on the queue, it is responsible for arranging that the requests
507
 *    get dealt with eventually.
508
 *
509
 *    A global spin lock $io_request_lock must be held while manipulating the
510
 *    requests on the request queue.
511
 *
512
 *    The request on the head of the queue is by default assumed to be
513
 *    potentially active, and it is not considered for re-ordering or merging
514
 *    whenever the given queue is unplugged. This behaviour can be changed with
515
 *    blk_queue_headactive().
516
 *
517
 * Note:
518
 *    blk_init_queue() must be paired with a blk_cleanup_queue() call
519
 *    when the block device is deactivated (such as at module unload).
520
 **/
521
void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
522
{
523
        INIT_LIST_HEAD(&q->queue_head);
524
        elevator_init(&q->elevator, ELEVATOR_LINUS);
525
        blk_init_free_list(q);
526
        q->request_fn           = rfn;
527
        q->back_merge_fn        = ll_back_merge_fn;
528
        q->front_merge_fn       = ll_front_merge_fn;
529
        q->merge_requests_fn    = ll_merge_requests_fn;
530
        q->make_request_fn      = __make_request;
531
        q->plug_tq.sync         = 0;
532
        q->plug_tq.routine      = &generic_unplug_device;
533
        q->plug_tq.data         = q;
534
        q->plugged              = 0;
535
        q->can_throttle         = 0;
536
 
537
        /*
538
         * These booleans describe the queue properties.  We set the
539
         * default (and most common) values here.  Other drivers can
540
         * use the appropriate functions to alter the queue properties.
541
         * as appropriate.
542
         */
543
        q->plug_device_fn       = generic_plug_device;
544
        q->head_active          = 1;
545
 
546
        blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
547
}
548
 
549
#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue);
550
/*
551
 * Get a free request. io_request_lock must be held and interrupts
552
 * disabled on the way in.  Returns NULL if there are no free requests.
553
 */
554
static struct request *get_request(request_queue_t *q, int rw)
555
{
556
        struct request *rq = NULL;
557
        struct request_list *rl = &q->rq;
558
 
559
        if (blk_oversized_queue(q)) {
560
                int rlim = q->nr_requests >> 5;
561
 
562
                if (rlim < 4)
563
                        rlim = 4;
564
 
565
                /*
566
                 * if its a write, or we have more than a handful of reads
567
                 * pending, bail out
568
                 */
569
                if ((rw == WRITE) || (rw == READ && rl->pending[READ] > rlim))
570
                        return NULL;
571
                if (blk_oversized_queue_reads(q))
572
                        return NULL;
573
        }
574
 
575
        if (!list_empty(&rl->free)) {
576
                rq = blkdev_free_rq(&rl->free);
577
                list_del(&rq->queue);
578
                rl->count--;
579
                rl->pending[rw]++;
580
                rq->rq_status = RQ_ACTIVE;
581
                rq->cmd = rw;
582
                rq->special = NULL;
583
                rq->q = q;
584
        }
585
 
586
        return rq;
587
}
588
 
589
/*
590
 * Here's the request allocation design, low latency version:
591
 *
592
 * 1: Blocking on request exhaustion is a key part of I/O throttling.
593
 *
594
 * 2: We want to be `fair' to all requesters.  We must avoid starvation, and
595
 *    attempt to ensure that all requesters sleep for a similar duration.  Hence
596
 *    no stealing requests when there are other processes waiting.
597
 *
598
 * There used to be more here, attempting to allow a process to send in a
599
 * number of requests once it has woken up.  But, there's no way to
600
 * tell if a process has just been woken up, or if it is a new process
601
 * coming in to steal requests from the waiters.  So, we give up and force
602
 * everyone to wait fairly.
603
 *
604
 * So here's what we do:
605
 *
606
 *    a) A READA requester fails if free_requests < batch_requests
607
 *
608
 *       We don't want READA requests to prevent sleepers from ever
609
 *       waking.  Note that READA is used extremely rarely - a few
610
 *       filesystems use it for directory readahead.
611
 *
612
 *  When a process wants a new request:
613
 *
614
 *    b) If free_requests == 0, the requester sleeps in FIFO manner, and
615
 *       the queue full condition is set.  The full condition is not
616
 *       cleared until there are no longer any waiters.  Once the full
617
 *       condition is set, all new io must wait, hopefully for a very
618
 *       short period of time.
619
 *
620
 *  When a request is released:
621
 *
622
 *    c) If free_requests < batch_requests, do nothing.
623
 *
624
 *    d) If free_requests >= batch_requests, wake up a single waiter.
625
 *
626
 *   As each waiter gets a request, he wakes another waiter.  We do this
627
 *   to prevent a race where an unplug might get run before a request makes
628
 *   it's way onto the queue.  The result is a cascade of wakeups, so delaying
629
 *   the initial wakeup until we've got batch_requests available helps avoid
630
 *   wakeups where there aren't any requests available yet.
631
 */
632
 
633
static struct request *__get_request_wait(request_queue_t *q, int rw)
634
{
635
        register struct request *rq;
636
        DECLARE_WAITQUEUE(wait, current);
637
 
638
        add_wait_queue_exclusive(&q->wait_for_requests, &wait);
639
 
640
        do {
641
                set_current_state(TASK_UNINTERRUPTIBLE);
642
                spin_lock_irq(&io_request_lock);
643
                if (blk_oversized_queue(q) || q->rq.count == 0) {
644
                        __generic_unplug_device(q);
645
                        spin_unlock_irq(&io_request_lock);
646
                        schedule();
647
                        spin_lock_irq(&io_request_lock);
648
                }
649
                rq = get_request(q, rw);
650
                spin_unlock_irq(&io_request_lock);
651
        } while (rq == NULL);
652
        remove_wait_queue(&q->wait_for_requests, &wait);
653
        current->state = TASK_RUNNING;
654
 
655
        return rq;
656
}
657
 
658
static void get_request_wait_wakeup(request_queue_t *q, int rw)
659
{
660
        /*
661
         * avoid losing an unplug if a second __get_request_wait did the
662
         * generic_unplug_device while our __get_request_wait was running
663
         * w/o the queue_lock held and w/ our request out of the queue.
664
         */
665
        if (waitqueue_active(&q->wait_for_requests))
666
                wake_up(&q->wait_for_requests);
667
}
668
 
669
/* RO fail safe mechanism */
670
 
671
static long ro_bits[MAX_BLKDEV][8];
672
 
673
int is_read_only(kdev_t dev)
674
{
675
        int minor,major;
676
 
677
        major = MAJOR(dev);
678
        minor = MINOR(dev);
679
        if (major < 0 || major >= MAX_BLKDEV) return 0;
680
        return ro_bits[major][minor >> 5] & (1 << (minor & 31));
681
}
682
 
683
void set_device_ro(kdev_t dev,int flag)
684
{
685
        int minor,major;
686
 
687
        major = MAJOR(dev);
688
        minor = MINOR(dev);
689
        if (major < 0 || major >= MAX_BLKDEV) return;
690
        if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31);
691
        else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31));
692
}
693
 
694
inline void drive_stat_acct (kdev_t dev, int rw,
695
                                unsigned long nr_sectors, int new_io)
696
{
697
        unsigned int major = MAJOR(dev);
698
        unsigned int index;
699
 
700
        index = disk_index(dev);
701
        if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
702
                return;
703
 
704
        kstat.dk_drive[major][index] += new_io;
705
        if (rw == READ) {
706
                kstat.dk_drive_rio[major][index] += new_io;
707
                kstat.dk_drive_rblk[major][index] += nr_sectors;
708
        } else if (rw == WRITE) {
709
                kstat.dk_drive_wio[major][index] += new_io;
710
                kstat.dk_drive_wblk[major][index] += nr_sectors;
711
        } else
712
                printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
713
}
714
 
715
#ifdef CONFIG_BLK_STATS
716
/*
717
 * Return up to two hd_structs on which to do IO accounting for a given
718
 * request.
719
 *
720
 * On a partitioned device, we want to account both against the partition
721
 * and against the whole disk.
722
 */
723
static void locate_hd_struct(struct request *req,
724
                             struct hd_struct **hd1,
725
                             struct hd_struct **hd2)
726
{
727
        struct gendisk *gd;
728
 
729
        *hd1 = NULL;
730
        *hd2 = NULL;
731
 
732
        gd = get_gendisk(req->rq_dev);
733
        if (gd && gd->part) {
734
                /* Mask out the partition bits: account for the entire disk */
735
                int devnr = MINOR(req->rq_dev) >> gd->minor_shift;
736
                int whole_minor = devnr << gd->minor_shift;
737
 
738
                *hd1 = &gd->part[whole_minor];
739
                if (whole_minor != MINOR(req->rq_dev))
740
                        *hd2= &gd->part[MINOR(req->rq_dev)];
741
        }
742
}
743
 
744
/*
745
 * Round off the performance stats on an hd_struct.
746
 *
747
 * The average IO queue length and utilisation statistics are maintained
748
 * by observing the current state of the queue length and the amount of
749
 * time it has been in this state for.
750
 * Normally, that accounting is done on IO completion, but that can result
751
 * in more than a second's worth of IO being accounted for within any one
752
 * second, leading to >100% utilisation.  To deal with that, we do a
753
 * round-off before returning the results when reading /proc/partitions,
754
 * accounting immediately for all queue usage up to the current jiffies and
755
 * restarting the counters again.
756
 */
757
void disk_round_stats(struct hd_struct *hd)
758
{
759
        unsigned long now = jiffies;
760
 
761
        hd->aveq += (hd->ios_in_flight * (jiffies - hd->last_queue_change));
762
        hd->last_queue_change = now;
763
 
764
        if (hd->ios_in_flight)
765
                hd->io_ticks += (now - hd->last_idle_time);
766
        hd->last_idle_time = now;
767
}
768
 
769
static inline void down_ios(struct hd_struct *hd)
770
{
771
        disk_round_stats(hd);
772
        --hd->ios_in_flight;
773
}
774
 
775
static inline void up_ios(struct hd_struct *hd)
776
{
777
        disk_round_stats(hd);
778
        ++hd->ios_in_flight;
779
}
780
 
781
static void account_io_start(struct hd_struct *hd, struct request *req,
782
                             int merge, int sectors)
783
{
784
        switch (req->cmd) {
785
        case READ:
786
                if (merge)
787
                        hd->rd_merges++;
788
                hd->rd_sectors += sectors;
789
                break;
790
        case WRITE:
791
                if (merge)
792
                        hd->wr_merges++;
793
                hd->wr_sectors += sectors;
794
                break;
795
        }
796
        if (!merge)
797
                up_ios(hd);
798
}
799
 
800
static void account_io_end(struct hd_struct *hd, struct request *req)
801
{
802
        unsigned long duration = jiffies - req->start_time;
803
        switch (req->cmd) {
804
        case READ:
805
                hd->rd_ticks += duration;
806
                hd->rd_ios++;
807
                break;
808
        case WRITE:
809
                hd->wr_ticks += duration;
810
                hd->wr_ios++;
811
                break;
812
        }
813
        down_ios(hd);
814
}
815
 
816
void req_new_io(struct request *req, int merge, int sectors)
817
{
818
        struct hd_struct *hd1, *hd2;
819
 
820
        locate_hd_struct(req, &hd1, &hd2);
821
        if (hd1)
822
                account_io_start(hd1, req, merge, sectors);
823
        if (hd2)
824
                account_io_start(hd2, req, merge, sectors);
825
}
826
 
827
void req_merged_io(struct request *req)
828
{
829
        struct hd_struct *hd1, *hd2;
830
 
831
        locate_hd_struct(req, &hd1, &hd2);
832
        if (hd1)
833
                down_ios(hd1);
834
        if (hd2)
835
                down_ios(hd2);
836
}
837
 
838
void req_finished_io(struct request *req)
839
{
840
        struct hd_struct *hd1, *hd2;
841
 
842
        locate_hd_struct(req, &hd1, &hd2);
843
        if (hd1)
844
                account_io_end(hd1, req);
845
        if (hd2)
846
                account_io_end(hd2, req);
847
}
848
EXPORT_SYMBOL(req_finished_io);
849
#endif /* CONFIG_BLK_STATS */
850
 
851
/*
852
 * add-request adds a request to the linked list.
853
 * io_request_lock is held and interrupts disabled, as we muck with the
854
 * request queue list.
855
 *
856
 * By this point, req->cmd is always either READ/WRITE, never READA,
857
 * which is important for drive_stat_acct() above.
858
 */
859
static inline void add_request(request_queue_t * q, struct request * req,
860
                               struct list_head *insert_here)
861
{
862
        drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
863
 
864
        if (!q->plugged && q->head_active && insert_here == &q->queue_head) {
865
                spin_unlock_irq(&io_request_lock);
866
                BUG();
867
        }
868
 
869
        /*
870
         * elevator indicated where it wants this request to be
871
         * inserted at elevator_merge time
872
         */
873
        list_add(&req->queue, insert_here);
874
}
875
 
876
/*
877
 * Must be called with io_request_lock held and interrupts disabled
878
 */
879
void blkdev_release_request(struct request *req)
880
{
881
        request_queue_t *q = req->q;
882
 
883
        req->rq_status = RQ_INACTIVE;
884
        req->q = NULL;
885
 
886
        /*
887
         * Request may not have originated from ll_rw_blk. if not,
888
         * assume it has free buffers and check waiters
889
         */
890
        if (q) {
891
                struct request_list *rl = &q->rq;
892
                int oversized_batch = 0;
893
 
894
                if (q->can_throttle)
895
                        oversized_batch = blk_oversized_queue_batch(q);
896
                rl->count++;
897
                /*
898
                 * paranoia check
899
                 */
900
                if (req->cmd == READ || req->cmd == WRITE)
901
                        rl->pending[req->cmd]--;
902
                if (rl->pending[READ] > q->nr_requests)
903
                        printk("blk: reads: %u\n", rl->pending[READ]);
904
                if (rl->pending[WRITE] > q->nr_requests)
905
                        printk("blk: writes: %u\n", rl->pending[WRITE]);
906
                if (rl->pending[READ] + rl->pending[WRITE] > q->nr_requests)
907
                        printk("blk: r/w: %u + %u > %u\n", rl->pending[READ], rl->pending[WRITE], q->nr_requests);
908
                list_add(&req->queue, &rl->free);
909
                if (rl->count >= q->batch_requests && !oversized_batch) {
910
                        smp_mb();
911
                        if (waitqueue_active(&q->wait_for_requests))
912
                                wake_up(&q->wait_for_requests);
913
                }
914
        }
915
}
916
 
917
/*
918
 * Has to be called with the request spinlock acquired
919
 */
920
static void attempt_merge(request_queue_t * q,
921
                          struct request *req,
922
                          int max_sectors,
923
                          int max_segments)
924
{
925
        struct request *next;
926
 
927
        next = blkdev_next_request(req);
928
        if (req->sector + req->nr_sectors != next->sector)
929
                return;
930
        if (req->cmd != next->cmd
931
            || req->rq_dev != next->rq_dev
932
            || req->nr_sectors + next->nr_sectors > max_sectors
933
            || next->waiting)
934
                return;
935
        /*
936
         * If we are not allowed to merge these requests, then
937
         * return.  If we are allowed to merge, then the count
938
         * will have been updated to the appropriate number,
939
         * and we shouldn't do it here too.
940
         */
941
        if (!q->merge_requests_fn(q, req, next, max_segments))
942
                return;
943
 
944
        q->elevator.elevator_merge_req_fn(req, next);
945
 
946
        /* At this point we have either done a back merge
947
         * or front merge. We need the smaller start_time of
948
         * the merged requests to be the current request
949
         * for accounting purposes.
950
         */
951
        if (time_after(req->start_time, next->start_time))
952
                req->start_time = next->start_time;
953
 
954
        req->bhtail->b_reqnext = next->bh;
955
        req->bhtail = next->bhtail;
956
        req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
957
        list_del(&next->queue);
958
 
959
        /* One last thing: we have removed a request, so we now have one
960
           less expected IO to complete for accounting purposes. */
961
        req_merged_io(req);
962
 
963
        blkdev_release_request(next);
964
}
965
 
966
static inline void attempt_back_merge(request_queue_t * q,
967
                                      struct request *req,
968
                                      int max_sectors,
969
                                      int max_segments)
970
{
971
        if (&req->queue == q->queue_head.prev)
972
                return;
973
        attempt_merge(q, req, max_sectors, max_segments);
974
}
975
 
976
static inline void attempt_front_merge(request_queue_t * q,
977
                                       struct list_head * head,
978
                                       struct request *req,
979
                                       int max_sectors,
980
                                       int max_segments)
981
{
982
        struct list_head * prev;
983
 
984
        prev = req->queue.prev;
985
        if (head == prev)
986
                return;
987
        attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments);
988
}
989
 
990
static int __make_request(request_queue_t * q, int rw,
991
                                  struct buffer_head * bh)
992
{
993
        unsigned int sector, count, sync;
994
        int max_segments = MAX_SEGMENTS;
995
        struct request * req, *freereq = NULL;
996
        int rw_ahead, max_sectors, el_ret;
997
        struct list_head *head, *insert_here;
998
        int latency;
999
        elevator_t *elevator = &q->elevator;
1000
        int should_wake = 0;
1001
 
1002
        count = bh->b_size >> 9;
1003
        sector = bh->b_rsector;
1004
        sync = test_and_clear_bit(BH_Sync, &bh->b_state);
1005
 
1006
        rw_ahead = 0;    /* normal case; gets changed below for READA */
1007
        switch (rw) {
1008
                case READA:
1009
#if 0   /* bread() misinterprets failed READA attempts as IO errors on SMP */
1010
                        rw_ahead = 1;
1011
#endif
1012
                        rw = READ;      /* drop into READ */
1013
                case READ:
1014
                case WRITE:
1015
                        latency = elevator_request_latency(elevator, rw);
1016
                        break;
1017
                default:
1018
                        BUG();
1019
                        goto end_io;
1020
        }
1021
 
1022
        /* We'd better have a real physical mapping!
1023
           Check this bit only if the buffer was dirty and just locked
1024
           down by us so at this point flushpage will block and
1025
           won't clear the mapped bit under us. */
1026
        if (!buffer_mapped(bh))
1027
                BUG();
1028
 
1029
        /*
1030
         * Temporary solution - in 2.5 this will be done by the lowlevel
1031
         * driver. Create a bounce buffer if the buffer data points into
1032
         * high memory - keep the original buffer otherwise.
1033
         */
1034
        bh = blk_queue_bounce(q, rw, bh);
1035
 
1036
/* look for a free request. */
1037
        /*
1038
         * Try to coalesce the new request with old requests
1039
         */
1040
        max_sectors = get_max_sectors(bh->b_rdev);
1041
 
1042
        req = NULL;
1043
        head = &q->queue_head;
1044
        /*
1045
         * Now we acquire the request spinlock, we have to be mega careful
1046
         * not to schedule or do something nonatomic
1047
         */
1048
        spin_lock_irq(&io_request_lock);
1049
 
1050
again:
1051
        insert_here = head->prev;
1052
 
1053
        if (list_empty(head)) {
1054
                q->plug_device_fn(q, bh->b_rdev); /* is atomic */
1055
                goto get_rq;
1056
        } else if (q->head_active && !q->plugged)
1057
                head = head->next;
1058
 
1059
        el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors);
1060
        switch (el_ret) {
1061
 
1062
                case ELEVATOR_BACK_MERGE:
1063
                        if (!q->back_merge_fn(q, req, bh, max_segments)) {
1064
                                insert_here = &req->queue;
1065
                                break;
1066
                        }
1067
                        req->bhtail->b_reqnext = bh;
1068
                        req->bhtail = bh;
1069
                        req->nr_sectors = req->hard_nr_sectors += count;
1070
                        blk_started_io(count);
1071
                        blk_started_sectors(req, count);
1072
                        drive_stat_acct(req->rq_dev, req->cmd, count, 0);
1073
                        req_new_io(req, 1, count);
1074
                        attempt_back_merge(q, req, max_sectors, max_segments);
1075
                        goto out;
1076
 
1077
                case ELEVATOR_FRONT_MERGE:
1078
                        if (!q->front_merge_fn(q, req, bh, max_segments)) {
1079
                                insert_here = req->queue.prev;
1080
                                break;
1081
                        }
1082
                        bh->b_reqnext = req->bh;
1083
                        req->bh = bh;
1084
                        /*
1085
                         * may not be valid, but queues not having bounce
1086
                         * enabled for highmem pages must not look at
1087
                         * ->buffer anyway
1088
                         */
1089
                        req->buffer = bh->b_data;
1090
                        req->current_nr_sectors = req->hard_cur_sectors = count;
1091
                        req->sector = req->hard_sector = sector;
1092
                        req->nr_sectors = req->hard_nr_sectors += count;
1093
                        blk_started_io(count);
1094
                        blk_started_sectors(req, count);
1095
                        drive_stat_acct(req->rq_dev, req->cmd, count, 0);
1096
                        req_new_io(req, 1, count);
1097
                        attempt_front_merge(q, head, req, max_sectors, max_segments);
1098
                        goto out;
1099
 
1100
                /*
1101
                 * elevator says don't/can't merge. get new request
1102
                 */
1103
                case ELEVATOR_NO_MERGE:
1104
                        /*
1105
                         * use elevator hints as to where to insert the
1106
                         * request. if no hints, just add it to the back
1107
                         * of the queue
1108
                         */
1109
                        if (req)
1110
                                insert_here = &req->queue;
1111
                        break;
1112
 
1113
                default:
1114
                        printk("elevator returned crap (%d)\n", el_ret);
1115
                        BUG();
1116
        }
1117
 
1118
get_rq:
1119
        if (freereq) {
1120
                req = freereq;
1121
                freereq = NULL;
1122
        } else {
1123
                /*
1124
                 * See description above __get_request_wait()
1125
                 */
1126
                if (rw_ahead) {
1127
                        if (q->rq.count < q->batch_requests || blk_oversized_queue_batch(q)) {
1128
                                spin_unlock_irq(&io_request_lock);
1129
                                goto end_io;
1130
                        }
1131
                        req = get_request(q, rw);
1132
                        if (req == NULL)
1133
                                BUG();
1134
                } else {
1135
                        req = get_request(q, rw);
1136
                        if (req == NULL) {
1137
                                spin_unlock_irq(&io_request_lock);
1138
                                freereq = __get_request_wait(q, rw);
1139
                                head = &q->queue_head;
1140
                                spin_lock_irq(&io_request_lock);
1141
                                should_wake = 1;
1142
                                goto again;
1143
                        }
1144
                }
1145
        }
1146
 
1147
/* fill up the request-info, and add it to the queue */
1148
        req->elevator_sequence = latency;
1149
        req->cmd = rw;
1150
        req->errors = 0;
1151
        req->hard_sector = req->sector = sector;
1152
        req->hard_nr_sectors = req->nr_sectors = count;
1153
        req->current_nr_sectors = req->hard_cur_sectors = count;
1154
        req->nr_segments = 1; /* Always 1 for a new request. */
1155
        req->nr_hw_segments = 1; /* Always 1 for a new request. */
1156
        req->buffer = bh->b_data;
1157
        req->waiting = NULL;
1158
        req->bh = bh;
1159
        req->bhtail = bh;
1160
        req->rq_dev = bh->b_rdev;
1161
        req->start_time = jiffies;
1162
        req_new_io(req, 0, count);
1163
        blk_started_io(count);
1164
        blk_started_sectors(req, count);
1165
        add_request(q, req, insert_here);
1166
out:
1167
        if (freereq)
1168
                blkdev_release_request(freereq);
1169
        if (should_wake)
1170
                get_request_wait_wakeup(q, rw);
1171
        if (sync)
1172
                __generic_unplug_device(q);
1173
        spin_unlock_irq(&io_request_lock);
1174
        return 0;
1175
end_io:
1176
        bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
1177
        return 0;
1178
}
1179
 
1180
/**
1181
 * generic_make_request: hand a buffer head to it's device driver for I/O
1182
 * @rw:  READ, WRITE, or READA - what sort of I/O is desired.
1183
 * @bh:  The buffer head describing the location in memory and on the device.
1184
 *
1185
 * generic_make_request() is used to make I/O requests of block
1186
 * devices. It is passed a &struct buffer_head and a &rw value.  The
1187
 * %READ and %WRITE options are (hopefully) obvious in meaning.  The
1188
 * %READA value means that a read is required, but that the driver is
1189
 * free to fail the request if, for example, it cannot get needed
1190
 * resources immediately.
1191
 *
1192
 * generic_make_request() does not return any status.  The
1193
 * success/failure status of the request, along with notification of
1194
 * completion, is delivered asynchronously through the bh->b_end_io
1195
 * function described (one day) else where.
1196
 *
1197
 * The caller of generic_make_request must make sure that b_page,
1198
 * b_addr, b_size are set to describe the memory buffer, that b_rdev
1199
 * and b_rsector are set to describe the device address, and the
1200
 * b_end_io and optionally b_private are set to describe how
1201
 * completion notification should be signaled.  BH_Mapped should also
1202
 * be set (to confirm that b_dev and b_blocknr are valid).
1203
 *
1204
 * generic_make_request and the drivers it calls may use b_reqnext,
1205
 * and may change b_rdev and b_rsector.  So the values of these fields
1206
 * should NOT be depended on after the call to generic_make_request.
1207
 * Because of this, the caller should record the device address
1208
 * information in b_dev and b_blocknr.
1209
 *
1210
 * Apart from those fields mentioned above, no other fields, and in
1211
 * particular, no other flags, are changed by generic_make_request or
1212
 * any lower level drivers.
1213
 * */
1214
void generic_make_request (int rw, struct buffer_head * bh)
1215
{
1216
        int major = MAJOR(bh->b_rdev);
1217
        int minorsize = 0;
1218
        request_queue_t *q;
1219
 
1220
        if (!bh->b_end_io)
1221
                BUG();
1222
 
1223
        /* Test device size, when known. */
1224
        if (blk_size[major])
1225
                minorsize = blk_size[major][MINOR(bh->b_rdev)];
1226
        if (minorsize) {
1227
                unsigned long maxsector = (minorsize << 1) + 1;
1228
                unsigned long sector = bh->b_rsector;
1229
                unsigned int count = bh->b_size >> 9;
1230
 
1231
                if (maxsector < count || maxsector - count < sector) {
1232
                        /* Yecch */
1233
                        bh->b_state &= ~(1 << BH_Dirty);
1234
 
1235
                        /* This may well happen - the kernel calls bread()
1236
                           without checking the size of the device, e.g.,
1237
                           when mounting a device. */
1238
                        printk(KERN_INFO
1239
                               "attempt to access beyond end of device\n");
1240
                        printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n",
1241
                               kdevname(bh->b_rdev), rw,
1242
                               (sector + count)>>1, minorsize);
1243
 
1244
                        bh->b_end_io(bh, 0);
1245
                        return;
1246
                }
1247
        }
1248
 
1249
        /*
1250
         * Resolve the mapping until finished. (drivers are
1251
         * still free to implement/resolve their own stacking
1252
         * by explicitly returning 0)
1253
         */
1254
        /* NOTE: we don't repeat the blk_size check for each new device.
1255
         * Stacking drivers are expected to know what they are doing.
1256
         */
1257
        do {
1258
                q = blk_get_queue(bh->b_rdev);
1259
                if (!q) {
1260
                        printk(KERN_ERR
1261
                               "generic_make_request: Trying to access "
1262
                               "nonexistent block-device %s (%ld)\n",
1263
                               kdevname(bh->b_rdev), bh->b_rsector);
1264
                        buffer_IO_error(bh);
1265
                        break;
1266
                }
1267
        } while (q->make_request_fn(q, rw, bh));
1268
}
1269
 
1270
 
1271
/**
1272
 * submit_bh: submit a buffer_head to the block device later for I/O
1273
 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
1274
 * @bh: The &struct buffer_head which describes the I/O
1275
 *
1276
 * submit_bh() is very similar in purpose to generic_make_request(), and
1277
 * uses that function to do most of the work.
1278
 *
1279
 * The extra functionality provided by submit_bh is to determine
1280
 * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev.
1281
 * This is is appropriate for IO requests that come from the buffer
1282
 * cache and page cache which (currently) always use aligned blocks.
1283
 */
1284
void submit_bh(int rw, struct buffer_head * bh)
1285
{
1286
        int count = bh->b_size >> 9;
1287
 
1288
        if (!test_bit(BH_Lock, &bh->b_state))
1289
                BUG();
1290
 
1291
        set_bit(BH_Req, &bh->b_state);
1292
        set_bit(BH_Launder, &bh->b_state);
1293
 
1294
        /*
1295
         * First step, 'identity mapping' - RAID or LVM might
1296
         * further remap this.
1297
         */
1298
        bh->b_rdev = bh->b_dev;
1299
        bh->b_rsector = bh->b_blocknr * count;
1300
 
1301
        get_bh(bh);
1302
        generic_make_request(rw, bh);
1303
 
1304
        /* fix race condition with wait_on_buffer() */
1305
        smp_mb(); /* spin_unlock may have inclusive semantics */
1306
        if (waitqueue_active(&bh->b_wait))
1307
                wake_up(&bh->b_wait);
1308
 
1309
        if (block_dump)
1310
                printk(KERN_DEBUG "%s: %s block %lu/%u on %s\n", current->comm, rw == WRITE ? "WRITE" : "READ", bh->b_rsector, count, kdevname(bh->b_rdev));
1311
 
1312
        put_bh(bh);
1313
        switch (rw) {
1314
                case WRITE:
1315
                        kstat.pgpgout += count;
1316
                        break;
1317
                default:
1318
                        kstat.pgpgin += count;
1319
                        break;
1320
        }
1321
}
1322
 
1323
/**
1324
 * ll_rw_block: low-level access to block devices
1325
 * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
1326
 * @nr: number of &struct buffer_heads in the array
1327
 * @bhs: array of pointers to &struct buffer_head
1328
 *
1329
 * ll_rw_block() takes an array of pointers to &struct buffer_heads,
1330
 * and requests an I/O operation on them, either a %READ or a %WRITE.
1331
 * The third %READA option is described in the documentation for
1332
 * generic_make_request() which ll_rw_block() calls.
1333
 *
1334
 * This function provides extra functionality that is not in
1335
 * generic_make_request() that is relevant to buffers in the buffer
1336
 * cache or page cache.  In particular it drops any buffer that it
1337
 * cannot get a lock on (with the BH_Lock state bit), any buffer that
1338
 * appears to be clean when doing a write request, and any buffer that
1339
 * appears to be up-to-date when doing read request.  Further it marks
1340
 * as clean buffers that are processed for writing (the buffer cache
1341
 * wont assume that they are actually clean until the buffer gets
1342
 * unlocked).
1343
 *
1344
 * ll_rw_block sets b_end_io to simple completion handler that marks
1345
 * the buffer up-to-date (if approriate), unlocks the buffer and wakes
1346
 * any waiters.  As client that needs a more interesting completion
1347
 * routine should call submit_bh() (or generic_make_request())
1348
 * directly.
1349
 *
1350
 * Caveat:
1351
 *  All of the buffers must be for the same device, and must also be
1352
 *  of the current approved size for the device.  */
1353
 
1354
void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
1355
{
1356
        unsigned int major;
1357
        int correct_size;
1358
        int i;
1359
 
1360
        if (!nr)
1361
                return;
1362
 
1363
        major = MAJOR(bhs[0]->b_dev);
1364
 
1365
        /* Determine correct block size for this device. */
1366
        correct_size = get_hardsect_size(bhs[0]->b_dev);
1367
 
1368
        /* Verify requested block sizes. */
1369
        for (i = 0; i < nr; i++) {
1370
                struct buffer_head *bh = bhs[i];
1371
                if (bh->b_size % correct_size) {
1372
                        printk(KERN_NOTICE "ll_rw_block: device %s: "
1373
                               "only %d-char blocks implemented (%u)\n",
1374
                               kdevname(bhs[0]->b_dev),
1375
                               correct_size, bh->b_size);
1376
                        goto sorry;
1377
                }
1378
        }
1379
 
1380
        if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) {
1381
                printk(KERN_NOTICE "Can't write to read-only device %s\n",
1382
                       kdevname(bhs[0]->b_dev));
1383
                goto sorry;
1384
        }
1385
 
1386
        for (i = 0; i < nr; i++) {
1387
                struct buffer_head *bh = bhs[i];
1388
 
1389
                lock_buffer(bh);
1390
 
1391
                /* We have the buffer lock */
1392
                atomic_inc(&bh->b_count);
1393
                bh->b_end_io = end_buffer_io_sync;
1394
 
1395
                switch(rw) {
1396
                case WRITE:
1397
                        if (!atomic_set_buffer_clean(bh))
1398
                                /* Hmmph! Nothing to write */
1399
                                goto end_io;
1400
                        __mark_buffer_clean(bh);
1401
                        break;
1402
 
1403
                case READA:
1404
                case READ:
1405
                        if (buffer_uptodate(bh))
1406
                                /* Hmmph! Already have it */
1407
                                goto end_io;
1408
                        break;
1409
                default:
1410
                        BUG();
1411
        end_io:
1412
                        bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
1413
                        continue;
1414
                }
1415
 
1416
                submit_bh(rw, bh);
1417
        }
1418
        return;
1419
 
1420
sorry:
1421
        /* Make sure we don't get infinite dirty retries.. */
1422
        for (i = 0; i < nr; i++)
1423
                mark_buffer_clean(bhs[i]);
1424
}
1425
 
1426
#ifdef CONFIG_STRAM_SWAP
1427
extern int stram_device_init (void);
1428
#endif
1429
 
1430
static void blk_writeback_timer(unsigned long data)
1431
{
1432
        wakeup_bdflush();
1433
        wakeup_kupdate();
1434
}
1435
 
1436
/**
1437
 * end_that_request_first - end I/O on one buffer.
1438
 * @req:      the request being processed
1439
 * @uptodate: 0 for I/O error
1440
 * @name:     the name printed for an I/O error
1441
 *
1442
 * Description:
1443
 *     Ends I/O on the first buffer attached to @req, and sets it up
1444
 *     for the next buffer_head (if any) in the cluster.
1445
 *
1446
 * Return:
1447
 *     0 - we are done with this request, call end_that_request_last()
1448
 *     1 - still buffers pending for this request
1449
 *
1450
 * Caveat:
1451
 *     Drivers implementing their own end_request handling must call
1452
 *     blk_finished_io() appropriately.
1453
 **/
1454
 
1455
int end_that_request_first (struct request *req, int uptodate, char *name)
1456
{
1457
        struct buffer_head * bh;
1458
        int nsect;
1459
 
1460
        req->errors = 0;
1461
        if (!uptodate)
1462
                printk("end_request: I/O error, dev %s (%s), sector %lu\n",
1463
                        kdevname(req->rq_dev), name, req->sector);
1464
 
1465
        if ((bh = req->bh) != NULL) {
1466
                nsect = bh->b_size >> 9;
1467
                blk_finished_io(nsect);
1468
                blk_finished_sectors(req, nsect);
1469
                req->bh = bh->b_reqnext;
1470
                bh->b_reqnext = NULL;
1471
                bh->b_end_io(bh, uptodate);
1472
                if ((bh = req->bh) != NULL) {
1473
                        req->hard_sector += nsect;
1474
                        req->hard_nr_sectors -= nsect;
1475
                        req->sector = req->hard_sector;
1476
                        req->nr_sectors = req->hard_nr_sectors;
1477
 
1478
                        req->current_nr_sectors = bh->b_size >> 9;
1479
                        req->hard_cur_sectors = req->current_nr_sectors;
1480
                        if (req->nr_sectors < req->current_nr_sectors) {
1481
                                req->nr_sectors = req->current_nr_sectors;
1482
                                printk("end_request: buffer-list destroyed\n");
1483
                        }
1484
                        req->buffer = bh->b_data;
1485
                        return 1;
1486
                }
1487
        }
1488
        return 0;
1489
}
1490
 
1491
extern int laptop_mode;
1492
 
1493
void end_that_request_last(struct request *req)
1494
{
1495
        struct completion *waiting = req->waiting;
1496
 
1497
        /*
1498
         * schedule the writeout of pending dirty data when the disk is idle
1499
         */
1500
        if (laptop_mode && req->cmd == READ)
1501
                mod_timer(&writeback_timer, jiffies + 5 * HZ);
1502
 
1503
        req_finished_io(req);
1504
        blkdev_release_request(req);
1505
        if (waiting)
1506
                complete(waiting);
1507
}
1508
 
1509
int __init blk_dev_init(void)
1510
{
1511
        struct blk_dev_struct *dev;
1512
 
1513
        request_cachep = kmem_cache_create("blkdev_requests",
1514
                                           sizeof(struct request),
1515
                                           0, SLAB_HWCACHE_ALIGN, NULL, NULL);
1516
 
1517
        if (!request_cachep)
1518
                panic("Can't create request pool slab cache\n");
1519
 
1520
        for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;)
1521
                dev->queue = NULL;
1522
 
1523
        memset(ro_bits,0,sizeof(ro_bits));
1524
        memset(max_readahead, 0, sizeof(max_readahead));
1525
        memset(max_sectors, 0, sizeof(max_sectors));
1526
 
1527
        blk_max_low_pfn = max_low_pfn - 1;
1528
        blk_max_pfn = max_pfn - 1;
1529
 
1530
        init_timer(&writeback_timer);
1531
        writeback_timer.function = blk_writeback_timer;
1532
 
1533
#ifdef CONFIG_AMIGA_Z2RAM
1534
        z2_init();
1535
#endif
1536
#ifdef CONFIG_STRAM_SWAP
1537
        stram_device_init();
1538
#endif
1539
#ifdef CONFIG_ISP16_CDI
1540
        isp16_init();
1541
#endif
1542
#ifdef CONFIG_BLK_DEV_PS2
1543
        ps2esdi_init();
1544
#endif
1545
#ifdef CONFIG_BLK_DEV_XD
1546
        xd_init();
1547
#endif
1548
#ifdef CONFIG_BLK_DEV_MFM
1549
        mfm_init();
1550
#endif
1551
#ifdef CONFIG_PARIDE
1552
        { extern void paride_init(void); paride_init(); };
1553
#endif
1554
#ifdef CONFIG_MAC_FLOPPY
1555
        swim3_init();
1556
#endif
1557
#ifdef CONFIG_BLK_DEV_SWIM_IOP
1558
        swimiop_init();
1559
#endif
1560
#ifdef CONFIG_AMIGA_FLOPPY
1561
        amiga_floppy_init();
1562
#endif
1563
#ifdef CONFIG_ATARI_FLOPPY
1564
        atari_floppy_init();
1565
#endif
1566
#ifdef CONFIG_BLK_DEV_FD
1567
        floppy_init();
1568
#else
1569
#if defined(__i386__)   /* Do we even need this? */
1570
        outb_p(0xc, 0x3f2);
1571
#endif
1572
#endif
1573
#ifdef CONFIG_CDU31A
1574
        cdu31a_init();
1575
#endif
1576
#ifdef CONFIG_ATARI_ACSI
1577
        acsi_init();
1578
#endif
1579
#ifdef CONFIG_MCD
1580
        mcd_init();
1581
#endif
1582
#ifdef CONFIG_MCDX
1583
        mcdx_init();
1584
#endif
1585
#ifdef CONFIG_SBPCD
1586
        sbpcd_init();
1587
#endif
1588
#ifdef CONFIG_AZTCD
1589
        aztcd_init();
1590
#endif
1591
#ifdef CONFIG_CDU535
1592
        sony535_init();
1593
#endif
1594
#ifdef CONFIG_GSCD
1595
        gscd_init();
1596
#endif
1597
#ifdef CONFIG_CM206
1598
        cm206_init();
1599
#endif
1600
#ifdef CONFIG_OPTCD
1601
        optcd_init();
1602
#endif
1603
#ifdef CONFIG_SJCD
1604
        sjcd_init();
1605
#endif
1606
#ifdef CONFIG_APBLOCK
1607
        ap_init();
1608
#endif
1609
#ifdef CONFIG_DDV
1610
        ddv_init();
1611
#endif
1612
#ifdef CONFIG_MDISK
1613
        mdisk_init();
1614
#endif
1615
#ifdef CONFIG_DASD
1616
        dasd_init();
1617
#endif
1618
#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK)
1619
        tapeblock_init();
1620
#endif
1621
#ifdef CONFIG_BLK_DEV_XPRAM
1622
        xpram_init();
1623
#endif
1624
 
1625
#ifdef CONFIG_SUN_JSFLASH
1626
        jsfd_init();
1627
#endif
1628
        return 0;
1629
};
1630
 
1631
EXPORT_SYMBOL(io_request_lock);
1632
EXPORT_SYMBOL(end_that_request_first);
1633
EXPORT_SYMBOL(end_that_request_last);
1634
EXPORT_SYMBOL(blk_grow_request_list);
1635
EXPORT_SYMBOL(blk_init_queue);
1636
EXPORT_SYMBOL(blk_get_queue);
1637
EXPORT_SYMBOL(blk_cleanup_queue);
1638
EXPORT_SYMBOL(blk_queue_headactive);
1639
EXPORT_SYMBOL(blk_queue_throttle_sectors);
1640
EXPORT_SYMBOL(blk_queue_make_request);
1641
EXPORT_SYMBOL(generic_make_request);
1642
EXPORT_SYMBOL(blkdev_release_request);
1643
EXPORT_SYMBOL(generic_unplug_device);
1644
EXPORT_SYMBOL(blk_queue_bounce_limit);
1645
EXPORT_SYMBOL(blk_max_low_pfn);
1646
EXPORT_SYMBOL(blk_max_pfn);
1647
EXPORT_SYMBOL(blk_seg_merge_ok);
1648
EXPORT_SYMBOL(blk_nohighio);

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.