OpenCores
URL https://opencores.org/ocsvn/or1k_old/or1k_old/trunk

Subversion Repositories or1k_old

[/] [or1k_old/] [trunk/] [uclinux/] [uClinux-2.0.x/] [drivers/] [block/] [raid1.c] - Blame information for rev 1782

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 199 simons
/************************************************************************
2
 * raid1.c : Multiple Devices driver for Linux
3
 *           Copyright (C) 1996 Ingo Molnar, Miguel de Icaza, Gadi Oxman
4
 *
5
 * RAID-1 management functions.
6
 *
7
 * This program is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 2, or (at your option)
10
 * any later version.
11
 *
12
 * You should have received a copy of the GNU General Public License
13
 * (for example /usr/src/linux/COPYING); if not, write to the Free
14
 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15
 */
16
 
17
#include <linux/module.h>
18
#include <linux/locks.h>
19
#include <linux/malloc.h>
20
#include <linux/md.h>
21
#include <linux/raid1.h>
22
#include <asm/bitops.h>
23
#include <asm/atomic.h>
24
 
25
#define MAJOR_NR MD_MAJOR
26
#define MD_DRIVER
27
#define MD_PERSONALITY
28
 
29
/*
30
 * The following can be used to debug the driver
31
 */
32
/*#define RAID1_DEBUG*/
33
#ifdef RAID1_DEBUG
34
#define PRINTK(x)   do { printk x; } while (0);
35
#else
36
#define PRINTK(x)   do { ; } while (0);
37
#endif
38
 
39
 
40
static struct md_personality raid1_personality;
41
static struct md_thread *raid1_thread = NULL;
42
struct buffer_head *raid1_retry_list = NULL;
43
 
44
static int __raid1_map (struct md_dev *mddev, kdev_t *rdev,
45
                        unsigned long *rsector, unsigned long size)
46
{
47
        struct raid1_data *raid_conf = (struct raid1_data *) mddev->private;
48
        int i, n = raid_conf->raid_disks;
49
 
50
        /*
51
         * Later we do read balancing on the read side
52
         * now we use the first available disk.
53
         */
54
 
55
        PRINTK(("raid1_map().\n"));
56
 
57
        for (i=0; i<n; i++) {
58
                if (raid_conf->mirrors[i].operational) {
59
                        *rdev = raid_conf->mirrors[i].dev;
60
                        return (0);
61
                }
62
        }
63
 
64
        printk (KERN_ERR "raid1_map(): huh, no more operational devices?\n");
65
        return (-1);
66
}
67
 
68
static int raid1_map (struct md_dev *mddev, kdev_t *rdev,
69
                      unsigned long *rsector, unsigned long size)
70
{
71
        return 0;
72
}
73
 
74
void raid1_reschedule_retry (struct buffer_head *bh)
75
{
76
        struct raid1_bh * r1_bh = (struct raid1_bh *)(bh->private_bh);
77
 
78
        PRINTK(("raid1_reschedule_retry().\n"));
79
 
80
        r1_bh->next_retry = raid1_retry_list;
81
        raid1_retry_list = bh;
82
        md_wakeup_thread(raid1_thread);
83
}
84
 
85
/*
86
 * raid1_end_buffer_io() is called when we have finished servicing a mirrored
87
 * operation and are ready to return a success/failture code to the buffer
88
 * cache layer.
89
 */
90
static inline void raid1_end_buffer_io (struct buffer_head *bh, int uptodate)
91
{
92
        /*
93
         * kfree() can sleep and we try to keep this bh operation atomic.
94
         */
95
        struct raid1_bh * tmp = (struct raid1_bh *) bh->private_bh;
96
 
97
        clear_bit (BH_MD, &bh->b_state);
98
        bh->private_bh = NULL;
99
        bh->personality = NULL;
100
        mark_buffer_uptodate(bh, uptodate);
101
        unlock_buffer(bh);
102
        kfree(tmp);
103
}
104
 
105
void raid1_end_request (struct buffer_head *bh, int uptodate)
106
{
107
        struct raid1_bh * r1_bh = (struct raid1_bh *)(bh->private_bh);
108
        unsigned long flags;
109
 
110
        save_flags(flags);
111
        cli();
112
        PRINTK(("raid1_end_request().\n"));
113
 
114
        /*
115
         * this branch is our 'one mirror IO has finished' event handler:
116
         */
117
        if (!uptodate)
118
                md_error (bh->b_dev, bh->b_rdev);
119
        else {
120
                /*
121
                 * Set BH_Uptodate in our master buffer_head, so that
122
                 * we will return a good error code for to the higher
123
                 * levels even if IO on some other mirrored buffer fails.
124
                 *
125
                 * The 'master' represents the complex operation to
126
                 * user-side. So if something waits for IO, then it will
127
                 * wait for the 'master' buffer_head.
128
                 */
129
                set_bit (BH_Uptodate, &r1_bh->state);
130
        }
131
 
132
        /*
133
         * We split up the read and write side, imho they are
134
         * conceptually different.
135
         */
136
 
137
        if ( (r1_bh->cmd == READ) || (r1_bh->cmd == READA) ) {
138
 
139
                PRINTK(("raid1_end_request(), read branch.\n"));
140
 
141
                /*
142
                 * we have only one buffer_head on the read side
143
                 */
144
                if (uptodate) {
145
                        PRINTK(("raid1_end_request(), read branch, uptodate.\n"));
146
                        raid1_end_buffer_io (bh, uptodate);
147
                        restore_flags(flags);
148
                        return;
149
                }
150
                /*
151
                 * oops, read error:
152
                 */
153
                printk(KERN_ERR "raid1: %s: rescheduling block %lu\n",
154
                                 kdevname(bh->b_dev), bh->b_blocknr);
155
                raid1_reschedule_retry (bh);
156
                restore_flags(flags);
157
                return;
158
        }
159
 
160
        /*
161
         * WRITE or WRITEA.
162
         */
163
        PRINTK(("raid1_end_request(), write branch.\n"));
164
 
165
        /*
166
         * lets see if all mirrored write operations have finished
167
         * already [we have irqs off, so we can decrease]:
168
         */
169
 
170
        if (!--r1_bh->remaining) {
171
                struct md_dev *mddev = r1_bh->mddev;
172
                struct raid1_data *raid_conf = (struct raid1_data *) mddev->private;
173
                int i, n = raid_conf->raid_disks;
174
 
175
                PRINTK(("raid1_end_request(), remaining == 0.\n"));
176
 
177
                /*
178
                 * kfree() can sleep? really? if yes then we are
179
                 * doomed here ...
180
                 */
181
                for ( i=0; i<n; i++) {
182
                        if (r1_bh->mirror_bh[i]) kfree(r1_bh->mirror_bh[i]);
183
                }
184
 
185
                /*
186
                 * the 'master' bh is the one that is used in page IO,
187
                 * perhaps someone is waiting on it. Lets erase all
188
                 * signs of mirroring, and lets finish the bh operation:
189
                 *
190
                 * In particular, the "uptodate" value which we return
191
                 * to the higher level represents the entire mirror set.
192
                 *
193
                 * yes, and this is why i want to use the 'master' bh as
194
                 * a 'representative'. Thats why i think it's not clean to
195
                 * use the master bh for real IO. We mix concepts, which
196
                 * isnt too good.
197
                 *
198
                 * a buffer_head is basically a user-side file buffer.
199
                 * Normally it has direct relationship with the physical
200
                 * device, but as in this case, we have an abstract mapping
201
                 * between the file buffer and the physical layout. So i've
202
                 * reverted all changes that do this mixing.
203
                 *
204
                 * we 'waste' about 76 bytes for the one more buffer_head,
205
                 * but note that we will do the mirror bh allocation at once
206
                 * in the future, so this isnt really a valid point, i think.
207
                 *
208
                 * Also i dont like the current way of mixing the user-side buffer
209
                 * concept with the 'real' physical layout like raid0.c does
210
                 * now: it increases the size of buffer_head even for nonstriped
211
                 * devices, etc.
212
                 *
213
                 * IMHO, in the future, we should have a lightweight buffer_head
214
                 * structure, which holds almost no physical device information.
215
 
216
                 * Abstract relationship between buffers:
217
                 * =====================================
218
                 *
219
                 *           [user]
220
                 *              |
221
                 *              |
222
                 *    ['master' buffer_head] + [private_buffer_head]
223
                 *                                      |
224
                 *                                      |
225
                 *                                      |
226
                 *                        [additional 'sub'-buffer_heads]
227
                 *                           |          |           |
228
                 *                         [dev1]     [dev2]      [dev3]
229
                 *
230
 
231
                 * In this scheme it's not clean to use the 'master' as one of
232
                 * the 'sub' buffer_heads. If you think about it, currently we can
233
                 * do this only because raid0 introduced it's own private_buffer_head
234
                 * structure in buffer_head: rdev,rsector. And raid0 has a 1:1
235
                 * relationship to the physical device. But this is really just a
236
                 * special case. Once we have our megafast bh pools running, we could
237
                 * clean up raid0.c too :))
238
                 *
239
                 * Not that it isnt clean, it is lethal if in the future we insert our
240
                 * sub buffer_heads into the global block cache. The master request
241
                 * should be an IO operation label for the complex operation, nothing
242
                 * more.
243
                 *
244
                 * So we have almost no performance arguments, and alot of cleanness
245
                 * arguments.
246
                 *
247
                 * Comments? Gonna change it back to your way again if you can convince
248
                 * me :)) --mingo
249
                 *
250
                 */
251
                raid1_end_buffer_io ( r1_bh->master_bh,
252
                                test_bit (BH_Uptodate, &r1_bh->state));
253
        }
254
        else PRINTK(("raid1_end_request(), remaining == %u.\n", r1_bh->remaining));
255
        restore_flags(flags);
256
}
257
 
258
/* This routine checks if the undelying device is an md device and in that
259
 * case it maps the blocks before putting the request on the queue
260
 */
261
static inline void
262
map_and_make_request (int rw, struct buffer_head *bh)
263
{
264
        if (MAJOR (bh->b_rdev) == MD_MAJOR){
265
                md_map (MINOR (bh->b_rdev), &bh->b_rdev, &bh->b_rsector, bh->b_size >> 9);
266
        }
267
        make_request (MAJOR (bh->b_rdev), rw, bh);
268
}
269
 
270
static int
271
raid1_make_request (struct md_dev *mddev, int rw, struct buffer_head * bh)
272
{
273
 
274
        struct raid1_data *raid_conf = (struct raid1_data *) mddev->private;
275
        struct buffer_head *mirror_bh[MD_SB_DISKS];
276
        struct raid1_bh * r1_bh;
277
        int n = raid_conf->raid_disks, i, sum_bhs = 0, switch_disks = 0, sectors;
278
        struct mirror_info *mirror;
279
 
280
        PRINTK(("raid1_make_request().\n"));
281
 
282
/*
283
 * We put allocations at the beginning, to avoid sleeping while doing
284
 * atomic operations of buffer heads. This might or might not make much
285
 * difference, but lets rather be careful.
286
 *
287
 * but this has two side effects (probably non harmless):
288
 *
289
 *      1.      The buffer will not be locked while we sleep.
290
 *      2.      The rest of the kernel will see BH_Req without
291
 *              BH_Lock.
292
 */
293
        while (!( /* FIXME: now we are rather fault tolerant than nice */
294
        r1_bh = kmalloc (sizeof (struct raid1_bh), GFP_KERNEL)
295
        ) )
296
                printk ("raid1_make_request(#1): out of memory\n");
297
        memset (r1_bh, 0, sizeof (struct raid1_bh));
298
/*
299
 * make_request() can abort the operation when READA or WRITEA are being
300
 * used and no empty request is available.
301
 *
302
 * Currently, just replace the command with READ/WRITE.
303
 */
304
        if (rw == READA) rw = READ;
305
        if (rw == WRITEA) rw = WRITE;
306
 
307
        if (rw == WRITE || rw == WRITEA)
308
                mark_buffer_clean(bh);          /* Too early ? */
309
 
310
/*
311
 * i think the read and write branch should be separated completely, since we want
312
 * to do read balancing on the read side for example. Comments? :) --mingo
313
 */
314
 
315
        r1_bh->master_bh=bh;
316
        r1_bh->mddev=mddev;
317
        r1_bh->cmd = rw;
318
 
319
        set_bit (BH_MD, &bh->b_state);
320
        bh->personality  = &raid1_personality;
321
        bh->private_bh   = (void*)(r1_bh);
322
 
323
        if (rw==READ || rw==READA) {
324
                int last_used = raid_conf->last_used;
325
                PRINTK(("raid1_make_request(), read branch.\n"));
326
                mirror = raid_conf->mirrors + last_used;
327
                bh->b_rdev = mirror->dev;
328
                sectors = bh->b_size >> 9;
329
                if (bh->b_blocknr * sectors == raid_conf->next_sect) {
330
                        raid_conf->sect_count += sectors;
331
                        if (raid_conf->sect_count >= mirror->sect_limit)
332
                                switch_disks = 1;
333
                } else
334
                        switch_disks = 1;
335
                raid_conf->next_sect = (bh->b_blocknr + 1) * sectors;
336
                if (switch_disks) {
337
                        PRINTK(("read-balancing: switching %d -> %d (%d sectors)\n", last_used, mirror->next, raid_conf->sect_count));
338
                        raid_conf->sect_count = 0;
339
                        raid_conf->last_used = mirror->next;
340
                }
341
                PRINTK (("raid1 read queue: %d %d\n", MAJOR (bh->b_rdev), MINOR (bh->b_rdev)));
342
 
343
                clear_bit (BH_Lock, &bh->b_state);
344
                map_and_make_request (rw, bh);
345
                return 0;
346
        }
347
 
348
        /*
349
         * WRITE or WRITEA.
350
         */
351
/*
352
 * btw, we have no more master disk. 'slave' is gone too :) [i hate that word :))]
353
 *
354
 * We are now using the master bh for a real IO. It seems important that:
355
 *
356
 * 1.   lock_buffer() will be called when we start to handle the request,
357
 *      before we do anything (done by ll_rw_blk.c).
358
 *
359
 * 2.   It seems that Linus took great care to set mark_buffer_clean()
360
 *      atomically with cli() in effect just when the buffer was placed
361
 *      into the queue. To be compatible with this behavior, it would be
362
 *      best to lock the buffer *first*, but mark it clean *last*, and to
363
 *      do this by passing through the exact logic in ll_rw_blk.c.
364
 *
365
 * Note: i've reverted this #3 thing, see the big comment in this file.
366
 *
367
 * 3.   We are now called from within make_request(), so the real bh
368
 *      will be automatically handled last when we return, so we only need
369
 *      to add the rest of the buffers (but remember to include the
370
 *      master bh in the remaining count).
371
 */
372
        PRINTK(("raid1_make_request(n=%d), write branch.\n",n));
373
 
374
        for (i = 0; i < n; i++) {
375
 
376
                if (!raid_conf->mirrors [i].operational) {
377
                        /*
378
                         * the r1_bh->mirror_bh[i] pointer remains NULL
379
                         */
380
                        mirror_bh[i] = NULL;
381
                        continue;
382
                }
383
 
384
        /*
385
         * We should use a private pool (size depending on NR_REQUEST),
386
         * to avoid writes filling up the memory with bhs
387
         *
388
         * Such pools are much faster than kmalloc anyways (so we waste almost
389
         * nothing by not using the master bh when writing and win alot of cleanness)
390
         *
391
         * but for now we are cool enough. --mingo
392
         *
393
         * It's safe to sleep here, buffer heads cannot be used in a shared
394
         * manner in the write branch. Look how we lock the buffer at the beginning
395
         * of this function to grok the difference ;)
396
         */
397
                while (!( /* FIXME: now we are rather fault tolerant than nice */
398
                mirror_bh[i] = kmalloc (sizeof (struct buffer_head), GFP_KERNEL)
399
                ) )
400
                        printk ("raid1_make_request(#2): out of memory\n");
401
                memset (mirror_bh[i], 0, sizeof (struct buffer_head));
402
 
403
        /*
404
         * prepare mirrored bh (fields ordered for max mem throughput):
405
         */
406
                mirror_bh [i]->b_blocknr    = bh->b_blocknr;
407
                mirror_bh [i]->b_dev        = bh->b_dev;
408
                mirror_bh [i]->b_rdev       = raid_conf->mirrors [i].dev;
409
                mirror_bh [i]->b_rsector    = bh->b_rsector;
410
                mirror_bh [i]->b_state      =   (1<<BH_MD)      | (1<<BH_Req) |
411
                                                (1<<BH_Touched) | (1<<BH_Dirty);
412
                mirror_bh [i]->b_count      = 1;
413
                mirror_bh [i]->b_size       = bh->b_size;
414
                mirror_bh [i]->b_data       = bh->b_data;
415
                mirror_bh [i]->b_list       = BUF_LOCKED;
416
                mirror_bh [i]->personality  = &raid1_personality;
417
                mirror_bh [i]->private_bh   = (void*)(r1_bh);
418
 
419
                r1_bh->mirror_bh[i] = mirror_bh[i];
420
                sum_bhs++;
421
        }
422
 
423
        r1_bh->remaining = sum_bhs;
424
 
425
        PRINTK(("raid1_make_request(), write branch, sum_bhs=%d.\n",sum_bhs));
426
 
427
        /*
428
         * We have to be a bit careful about the semaphore above, thats why we
429
         * start the requests separately. Since kmalloc() could fail, sleep and
430
         * make_request() can sleep too, this is the safer solution. Imagine,
431
         * end_request decreasing the semaphore before we could have set it up ...
432
         * We could play tricks with the semaphore (presetting it and correcting
433
         * at the end if sum_bhs is not 'n' but we have to do end_request by hand
434
         * if all requests finish until we had a chance to set up the semaphore
435
         * correctly ... lots of races).
436
         */
437
        for (i = 0; i < n; i++)
438
                if (mirror_bh [i] != NULL)
439
                        map_and_make_request (rw, mirror_bh [i]);
440
 
441
        return (0);
442
}
443
 
444
static int raid1_status (char *page, int minor, struct md_dev *mddev)
445
{
446
        struct raid1_data *raid_conf = (struct raid1_data *) mddev->private;
447
        int sz = 0, i;
448
 
449
        sz += sprintf (page+sz, " [%d/%d] [", raid_conf->raid_disks, raid_conf->working_disks);
450
        for (i = 0; i < raid_conf->raid_disks; i++)
451
                sz += sprintf (page+sz, "%s", raid_conf->mirrors [i].operational ? "U" : "_");
452
        sz += sprintf (page+sz, "]");
453
        return sz;
454
}
455
 
456
static void raid1_fix_links (struct raid1_data *raid_conf, int failed_index)
457
{
458
        int disks = raid_conf->raid_disks;
459
        int j;
460
 
461
        for (j = 0; j < disks; j++)
462
                if (raid_conf->mirrors [j].next == failed_index)
463
                        raid_conf->mirrors [j].next = raid_conf->mirrors [failed_index].next;
464
}
465
 
466
static int raid1_error (struct md_dev *mddev, kdev_t dev)
467
{
468
        struct raid1_data *raid_conf = (struct raid1_data *) mddev->private;
469
        struct mirror_info *mirror;
470
        md_superblock_t *sb = mddev->sb;
471
        int disks = raid_conf->raid_disks;
472
        int i;
473
 
474
        PRINTK(("raid1_error called\n"));
475
 
476
        if (raid_conf->working_disks == 1) {
477
                /*
478
                 * Uh oh, we can do nothing if this is our last disk, but
479
                 * first check if this is a queued request for a device
480
                 * which has just failed.
481
                 */
482
                for (i = 0, mirror = raid_conf->mirrors; i < disks; i++, mirror++)
483
                        if (mirror->dev == dev && !mirror->operational)
484
                                return 0;
485
                printk (KERN_ALERT "RAID1: only one disk left and IO error.\n");
486
                return 0;
487
        }
488
 
489
        /* Mark disk as unusable */
490
        for (i = 0, mirror = raid_conf->mirrors; i < disks; i++, mirror++) {
491
                if (mirror->dev == dev && mirror->operational){
492
                        mirror->operational = 0;
493
                        raid1_fix_links (raid_conf, i);
494
                        sb->disks[mirror->number].state |= (1 << MD_FAULTY_DEVICE);
495
                        sb->disks[mirror->number].state &= ~(1 << MD_SYNC_DEVICE);
496
                        sb->disks[mirror->number].state &= ~(1 << MD_ACTIVE_DEVICE);
497
                        sb->active_disks--;
498
                        sb->working_disks--;
499
                        sb->failed_disks++;
500
                        mddev->sb_dirty = 1;
501
                        md_wakeup_thread(raid1_thread);
502
                        raid_conf->working_disks--;
503
                        printk (KERN_ALERT
504
                                "RAID1: Disk failure on %s, disabling device."
505
                                "Operation continuing on %d devices\n",
506
                                kdevname (dev), raid_conf->working_disks);
507
                }
508
        }
509
 
510
        return 0;
511
}
512
 
513
/*
514
 * This is a kernel thread which:
515
 *
516
 *      1.      Retries failed read operations on working mirrors.
517
 *      2.      Updates the raid superblock when problems are encountered.
518
 */
519
void raid1d (void *data)
520
{
521
        struct buffer_head *bh;
522
        kdev_t dev;
523
        unsigned long flags;
524
        struct raid1_bh * r1_bh;
525
        struct md_dev *mddev;
526
 
527
        PRINTK(("raid1d() active\n"));
528
        save_flags(flags);
529
        cli();
530
        while (raid1_retry_list) {
531
                bh = raid1_retry_list;
532
                r1_bh = (struct raid1_bh *)(bh->private_bh);
533
                raid1_retry_list = r1_bh->next_retry;
534
                restore_flags(flags);
535
 
536
                mddev = md_dev + MINOR(bh->b_dev);
537
                if (mddev->sb_dirty) {
538
                        mddev->sb_dirty = 0;
539
                        md_update_sb(MINOR(bh->b_dev));
540
                }
541
                dev = bh->b_rdev;
542
                __raid1_map (md_dev + MINOR(bh->b_dev), &bh->b_rdev, &bh->b_rsector, bh->b_size >> 9);
543
                if (bh->b_rdev == dev) {
544
                        printk (KERN_ALERT
545
                                        "raid1: %s: unrecoverable I/O read error for block %lu\n",
546
                                                kdevname(bh->b_dev), bh->b_blocknr);
547
                        raid1_end_buffer_io (bh, 0);
548
                } else {
549
                        printk (KERN_ERR "raid1: %s: redirecting sector %lu to another mirror\n",
550
                                          kdevname(bh->b_dev), bh->b_blocknr);
551
                        clear_bit (BH_Lock, &bh->b_state);
552
                        map_and_make_request (r1_bh->cmd, bh);
553
                }
554
                cli();
555
        }
556
        restore_flags(flags);
557
 
558
}
559
 
560
/*
561
 * This will catch the scenario in which one of the mirrors was
562
 * mounted as a normal device rather than as a part of a raid set.
563
 */
564
static int check_consistenty (struct md_dev *mddev)
565
{
566
        struct raid1_data *raid_conf = mddev->private;
567
        kdev_t dev;
568
        struct buffer_head *bh = NULL;
569
        int i, rc = 0;
570
        char *buffer = NULL;
571
 
572
        for (i = 0; i < raid_conf->raid_disks; i++) {
573
                if (!raid_conf->mirrors[i].operational)
574
                        continue;
575
                dev = raid_conf->mirrors[i].dev;
576
                set_blocksize(dev, 4096);
577
                if ((bh = bread(dev, 0, 4096)) == NULL)
578
                        break;
579
                if (!buffer) {
580
                        buffer = (char *) __get_free_page(GFP_KERNEL);
581
                        if (!buffer)
582
                                break;
583
                        memcpy(buffer, bh->b_data, 4096);
584
                } else if (memcmp(buffer, bh->b_data, 4096)) {
585
                        rc = 1;
586
                        break;
587
                }
588
                bforget(bh);
589
                fsync_dev(dev);
590
                invalidate_buffers(dev);
591
                bh = NULL;
592
        }
593
        if (buffer)
594
                free_page((unsigned long) buffer);
595
        if (bh) {
596
                dev = bh->b_dev;
597
                bforget(bh);
598
                fsync_dev(dev);
599
                invalidate_buffers(dev);
600
        }
601
        return rc;
602
}
603
 
604
static int raid1_run (int minor, struct md_dev *mddev)
605
{
606
        struct raid1_data *raid_conf;
607
        int i, j, raid_disk;
608
        md_superblock_t *sb = mddev->sb;
609
        md_descriptor_t *descriptor;
610
        struct real_dev *realdev;
611
 
612
        MOD_INC_USE_COUNT;
613
 
614
        if (sb->level != 1) {
615
                printk("raid1: %s: raid level not set to mirroring (%d)\n", kdevname(MKDEV(MD_MAJOR, minor)), sb->level);
616
                MOD_DEC_USE_COUNT;
617
                return -EIO;
618
        }
619
        /****
620
         * copy the now verified devices into our private RAID1 bookkeeping area:
621
         *
622
         * [whatever we allocate in raid1_run(), should be freed in raid1_stop()]
623
         */
624
 
625
        while (!( /* FIXME: now we are rather fault tolerant than nice */
626
        mddev->private = kmalloc (sizeof (struct raid1_data), GFP_KERNEL)
627
        ) )
628
                printk ("raid1_run(): out of memory\n");
629
        raid_conf = mddev->private;
630
        memset(raid_conf, 0, sizeof(*raid_conf));
631
 
632
        PRINTK(("raid1_run(%d) called.\n", minor));
633
 
634
        for (i = 0; i < mddev->nb_dev; i++) {
635
                realdev = &mddev->devices[i];
636
                if (!realdev->sb) {
637
                        printk(KERN_ERR "raid1: disabled mirror %s (couldn't access raid superblock)\n", kdevname(realdev->dev));
638
                        continue;
639
                }
640
 
641
                /*
642
                 * This is important -- we are using the descriptor on
643
                 * the disk only to get a pointer to the descriptor on
644
                 * the main superblock, which might be more recent.
645
                 */
646
                descriptor = &sb->disks[realdev->sb->descriptor.number];
647
                if (descriptor->state & (1 << MD_FAULTY_DEVICE)) {
648
                        printk(KERN_ERR "raid1: disabled mirror %s (errors detected)\n", kdevname(realdev->dev));
649
                        continue;
650
                }
651
                if (descriptor->state & (1 << MD_ACTIVE_DEVICE)) {
652
                        if (!(descriptor->state & (1 << MD_SYNC_DEVICE))) {
653
                                printk(KERN_ERR "raid1: disabled mirror %s (not in sync)\n", kdevname(realdev->dev));
654
                                continue;
655
                        }
656
                        raid_disk = descriptor->raid_disk;
657
                        if (descriptor->number > sb->nr_disks || raid_disk > sb->raid_disks) {
658
                                printk(KERN_ERR "raid1: disabled mirror %s (inconsistent descriptor)\n", kdevname(realdev->dev));
659
                                continue;
660
                        }
661
                        if (raid_conf->mirrors[raid_disk].operational) {
662
                                printk(KERN_ERR "raid1: disabled mirror %s (mirror %d already operational)\n", kdevname(realdev->dev), raid_disk);
663
                                continue;
664
                        }
665
                        printk(KERN_INFO "raid1: device %s operational as mirror %d\n", kdevname(realdev->dev), raid_disk);
666
                        raid_conf->mirrors[raid_disk].number = descriptor->number;
667
                        raid_conf->mirrors[raid_disk].raid_disk = raid_disk;
668
                        raid_conf->mirrors[raid_disk].dev = mddev->devices [i].dev;
669
                        raid_conf->mirrors[raid_disk].operational = 1;
670
                        raid_conf->mirrors[raid_disk].sect_limit = 128;
671
                        raid_conf->working_disks++;
672
                }
673
        }
674
        if (!raid_conf->working_disks) {
675
                printk(KERN_ERR "raid1: no operational mirrors for %s\n", kdevname(MKDEV(MD_MAJOR, minor)));
676
                kfree(raid_conf);
677
                mddev->private = NULL;
678
                MOD_DEC_USE_COUNT;
679
                return -EIO;
680
        }
681
 
682
        raid_conf->raid_disks = sb->raid_disks;
683
        raid_conf->mddev = mddev;
684
 
685
        for (j = 0; !raid_conf->mirrors[j].operational; j++);
686
        raid_conf->last_used = j;
687
        for (i = raid_conf->raid_disks - 1; i >= 0; i--) {
688
                if (raid_conf->mirrors[i].operational) {
689
                        PRINTK(("raid_conf->mirrors[%d].next == %d\n", i, j));
690
                        raid_conf->mirrors[i].next = j;
691
                        j = i;
692
                }
693
        }
694
 
695
        if (check_consistenty(mddev)) {
696
                printk(KERN_ERR "raid1: detected mirror differences -- run ckraid\n");
697
                sb->state |= 1 << MD_SB_ERRORS;
698
                kfree(raid_conf);
699
                mddev->private = NULL;
700
                MOD_DEC_USE_COUNT;
701
                return -EIO;
702
        }
703
 
704
        /*
705
         * Regenerate the "device is in sync with the raid set" bit for
706
         * each device.
707
         */
708
        for (i = 0; i < sb->nr_disks ; i++) {
709
                sb->disks[i].state &= ~(1 << MD_SYNC_DEVICE);
710
                for (j = 0; j < sb->raid_disks; j++) {
711
                        if (!raid_conf->mirrors[j].operational)
712
                                continue;
713
                        if (sb->disks[i].number == raid_conf->mirrors[j].number)
714
                                sb->disks[i].state |= 1 << MD_SYNC_DEVICE;
715
                }
716
        }
717
        sb->active_disks = raid_conf->working_disks;
718
 
719
        printk("raid1: raid set %s active with %d out of %d mirrors\n", kdevname(MKDEV(MD_MAJOR, minor)), sb->active_disks, sb->raid_disks);
720
        /* Ok, everything is just fine now */
721
        return (0);
722
}
723
 
724
static int raid1_stop (int minor, struct md_dev *mddev)
725
{
726
        struct raid1_data *raid_conf = (struct raid1_data *) mddev->private;
727
 
728
        kfree (raid_conf);
729
        mddev->private = NULL;
730
        MOD_DEC_USE_COUNT;
731
        return 0;
732
}
733
 
734
static struct md_personality raid1_personality=
735
{
736
        "raid1",
737
        raid1_map,
738
        raid1_make_request,
739
        raid1_end_request,
740
        raid1_run,
741
        raid1_stop,
742
        raid1_status,
743
        NULL,                   /* no ioctls */
744
        0,
745
        raid1_error
746
};
747
 
748
int raid1_init (void)
749
{
750
        if ((raid1_thread = md_register_thread(raid1d, NULL)) == NULL)
751
                return -EBUSY;
752
        return register_md_personality (RAID1, &raid1_personality);
753
}
754
 
755
#ifdef MODULE
756
int init_module (void)
757
{
758
        return raid1_init();
759
}
760
 
761
void cleanup_module (void)
762
{
763
        md_unregister_thread (raid1_thread);
764
        unregister_md_personality (RAID1);
765
}
766
#endif

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.