OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [fs/] [jbd/] [recovery.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 * linux/fs/recovery.c
3
 *
4
 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5
 *
6
 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
7
 *
8
 * This file is part of the Linux kernel and is made available under
9
 * the terms of the GNU General Public License, version 2, or at your
10
 * option, any later version, incorporated herein by reference.
11
 *
12
 * Journal recovery routines for the generic filesystem journaling code;
13
 * part of the ext2fs journaling system.
14
 */
15
 
16
#ifndef __KERNEL__
17
#include "jfs_user.h"
18
#else
19
#include <linux/sched.h>
20
#include <linux/fs.h>
21
#include <linux/jbd.h>
22
#include <linux/errno.h>
23
#include <linux/slab.h>
24
#include <linux/locks.h>
25
#endif
26
 
27
/*
28
 * Maintain information about the progress of the recovery job, so that
29
 * the different passes can carry information between them.
30
 */
31
struct recovery_info
32
{
33
        tid_t           start_transaction;
34
        tid_t           end_transaction;
35
 
36
        int             nr_replays;
37
        int             nr_revokes;
38
        int             nr_revoke_hits;
39
};
40
 
41
enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
42
static int do_one_pass(journal_t *journal,
43
                                struct recovery_info *info, enum passtype pass);
44
static int scan_revoke_records(journal_t *, struct buffer_head *,
45
                                tid_t, struct recovery_info *);
46
 
47
#ifdef __KERNEL__
48
 
49
/* Release readahead buffers after use */
50
void journal_brelse_array(struct buffer_head *b[], int n)
51
{
52
        while (--n >= 0)
53
                brelse (b[n]);
54
}
55
 
56
 
57
/*
58
 * When reading from the journal, we are going through the block device
59
 * layer directly and so there is no readahead being done for us.  We
60
 * need to implement any readahead ourselves if we want it to happen at
61
 * all.  Recovery is basically one long sequential read, so make sure we
62
 * do the IO in reasonably large chunks.
63
 *
64
 * This is not so critical that we need to be enormously clever about
65
 * the readahead size, though.  128K is a purely arbitrary, good-enough
66
 * fixed value.
67
 */
68
 
69
#define MAXBUF 8
70
static int do_readahead(journal_t *journal, unsigned int start)
71
{
72
        int err;
73
        unsigned int max, nbufs, next;
74
        unsigned long blocknr;
75
        struct buffer_head *bh;
76
 
77
        struct buffer_head * bufs[MAXBUF];
78
 
79
        /* Do up to 128K of readahead */
80
        max = start + (128 * 1024 / journal->j_blocksize);
81
        if (max > journal->j_maxlen)
82
                max = journal->j_maxlen;
83
 
84
        /* Do the readahead itself.  We'll submit MAXBUF buffer_heads at
85
         * a time to the block device IO layer. */
86
 
87
        nbufs = 0;
88
 
89
        for (next = start; next < max; next++) {
90
                err = journal_bmap(journal, next, &blocknr);
91
 
92
                if (err) {
93
                        printk (KERN_ERR "JBD: bad block at offset %u\n",
94
                                next);
95
                        goto failed;
96
                }
97
 
98
                bh = getblk(journal->j_dev, blocknr, journal->j_blocksize);
99
                if (!bh) {
100
                        err = -ENOMEM;
101
                        goto failed;
102
                }
103
 
104
                if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
105
                        bufs[nbufs++] = bh;
106
                        if (nbufs == MAXBUF) {
107
                                ll_rw_block(READ, nbufs, bufs);
108
                                journal_brelse_array(bufs, nbufs);
109
                                nbufs = 0;
110
                        }
111
                } else
112
                        brelse(bh);
113
        }
114
 
115
        if (nbufs)
116
                ll_rw_block(READ, nbufs, bufs);
117
        err = 0;
118
 
119
failed:
120
        if (nbufs)
121
                journal_brelse_array(bufs, nbufs);
122
        return err;
123
}
124
 
125
#endif /* __KERNEL__ */
126
 
127
 
128
/*
129
 * Read a block from the journal
130
 */
131
 
132
static int jread(struct buffer_head **bhp, journal_t *journal,
133
                 unsigned int offset)
134
{
135
        int err;
136
        unsigned long blocknr;
137
        struct buffer_head *bh;
138
 
139
        *bhp = NULL;
140
 
141
        J_ASSERT (offset < journal->j_maxlen);
142
 
143
        err = journal_bmap(journal, offset, &blocknr);
144
 
145
        if (err) {
146
                printk (KERN_ERR "JBD: bad block at offset %u\n",
147
                        offset);
148
                return err;
149
        }
150
 
151
        bh = getblk(journal->j_dev, blocknr, journal->j_blocksize);
152
        if (!bh)
153
                return -ENOMEM;
154
 
155
        if (!buffer_uptodate(bh)) {
156
                /* If this is a brand new buffer, start readahead.
157
                   Otherwise, we assume we are already reading it.  */
158
                if (!buffer_req(bh))
159
                        do_readahead(journal, offset);
160
                wait_on_buffer(bh);
161
        }
162
 
163
        if (!buffer_uptodate(bh)) {
164
                printk (KERN_ERR "JBD: Failed to read block at offset %u\n",
165
                        offset);
166
                brelse(bh);
167
                return -EIO;
168
        }
169
 
170
        *bhp = bh;
171
        return 0;
172
}
173
 
174
 
175
/*
176
 * Count the number of in-use tags in a journal descriptor block.
177
 */
178
 
179
static int count_tags(struct buffer_head *bh, int size)
180
{
181
        char *                  tagp;
182
        journal_block_tag_t *   tag;
183
        int                     nr = 0;
184
 
185
        tagp = &bh->b_data[sizeof(journal_header_t)];
186
 
187
        while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) {
188
                tag = (journal_block_tag_t *) tagp;
189
 
190
                nr++;
191
                tagp += sizeof(journal_block_tag_t);
192
                if (!(tag->t_flags & htonl(JFS_FLAG_SAME_UUID)))
193
                        tagp += 16;
194
 
195
                if (tag->t_flags & htonl(JFS_FLAG_LAST_TAG))
196
                        break;
197
        }
198
 
199
        return nr;
200
}
201
 
202
 
203
/* Make sure we wrap around the log correctly! */
204
#define wrap(journal, var)                                              \
205
do {                                                                    \
206
        if (var >= (journal)->j_last)                                   \
207
                var -= ((journal)->j_last - (journal)->j_first);        \
208
} while (0)
209
 
210
/**
211
 * int journal_recover(journal_t *journal) - recovers a on-disk journal
212
 * @journal: the journal to recover
213
 *
214
 * The primary function for recovering the log contents when mounting a
215
 * journaled device.
216
 */
217
int journal_recover(journal_t *journal)
218
{
219
/*
220
 * Recovery is done in three passes.  In the first pass, we look for the
221
 * end of the log.  In the second, we assemble the list of revoke
222
 * blocks.  In the third and final pass, we replay any un-revoked blocks
223
 * in the log.
224
 */
225
 
226
        int                     err;
227
        journal_superblock_t *  sb;
228
 
229
        struct recovery_info    info;
230
 
231
        memset(&info, 0, sizeof(info));
232
        sb = journal->j_superblock;
233
 
234
        /*
235
         * The journal superblock's s_start field (the current log head)
236
         * is always zero if, and only if, the journal was cleanly
237
         * unmounted.
238
         */
239
 
240
        if (!sb->s_start) {
241
                jbd_debug(1, "No recovery required, last transaction %d\n",
242
                          ntohl(sb->s_sequence));
243
                journal->j_transaction_sequence = ntohl(sb->s_sequence) + 1;
244
                return 0;
245
        }
246
 
247
 
248
        err = do_one_pass(journal, &info, PASS_SCAN);
249
        if (!err)
250
                err = do_one_pass(journal, &info, PASS_REVOKE);
251
        if (!err)
252
                err = do_one_pass(journal, &info, PASS_REPLAY);
253
 
254
        jbd_debug(0, "JBD: recovery, exit status %d, "
255
                  "recovered transactions %u to %u\n",
256
                  err, info.start_transaction, info.end_transaction);
257
        jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n",
258
                  info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
259
 
260
        /* Restart the log at the next transaction ID, thus invalidating
261
         * any existing commit records in the log. */
262
        journal->j_transaction_sequence = ++info.end_transaction;
263
 
264
        journal_clear_revoke(journal);
265
        fsync_no_super(journal->j_fs_dev);
266
        return err;
267
}
268
 
269
/**
270
 * int journal_skip_recovery() - Start journal and wipe exiting records
271
 * @journal: journal to startup
272
 *
273
 * Locate any valid recovery information from the journal and set up the
274
 * journal structures in memory to ignore it (presumably because the
275
 * caller has evidence that it is out of date).
276
 * This function does'nt appear to be exorted..
277
 */
278
int journal_skip_recovery(journal_t *journal)
279
{
280
/*
281
 * We perform one pass over the journal to allow us to tell the user how
282
 * much recovery information is being erased, and to let us initialise
283
 * the journal transaction sequence numbers to the next unused ID.
284
 */
285
 
286
        int                     err;
287
        journal_superblock_t *  sb;
288
 
289
        struct recovery_info    info;
290
 
291
        memset (&info, 0, sizeof(info));
292
        sb = journal->j_superblock;
293
 
294
        err = do_one_pass(journal, &info, PASS_SCAN);
295
 
296
        if (err) {
297
                printk(KERN_ERR "JBD: error %d scanning journal\n", err);
298
                ++journal->j_transaction_sequence;
299
        } else {
300
#ifdef CONFIG_JBD_DEBUG
301
                int dropped = info.end_transaction - ntohl(sb->s_sequence);
302
#endif
303
 
304
                jbd_debug(0,
305
                          "JBD: ignoring %d transaction%s from the journal.\n",
306
                          dropped, (dropped == 1) ? "" : "s");
307
                journal->j_transaction_sequence = ++info.end_transaction;
308
        }
309
 
310
        journal->j_tail = 0;
311
 
312
        return err;
313
}
314
 
315
static int do_one_pass(journal_t *journal,
316
                        struct recovery_info *info, enum passtype pass)
317
{
318
 
319
        unsigned int            first_commit_ID, next_commit_ID;
320
        unsigned long           next_log_block;
321
        int                     err, success = 0;
322
        journal_superblock_t *  sb;
323
        journal_header_t *      tmp;
324
        struct buffer_head *    bh;
325
        unsigned int            sequence;
326
        int                     blocktype;
327
 
328
        /* Precompute the maximum metadata descriptors in a descriptor block */
329
        int                     MAX_BLOCKS_PER_DESC;
330
        MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
331
                               / sizeof(journal_block_tag_t));
332
 
333
        /*
334
         * First thing is to establish what we expect to find in the log
335
         * (in terms of transaction IDs), and where (in terms of log
336
         * block offsets): query the superblock.
337
         */
338
 
339
        sb = journal->j_superblock;
340
        next_commit_ID = ntohl(sb->s_sequence);
341
        next_log_block = ntohl(sb->s_start);
342
 
343
        first_commit_ID = next_commit_ID;
344
        if (pass == PASS_SCAN)
345
                info->start_transaction = first_commit_ID;
346
 
347
        jbd_debug(1, "Starting recovery pass %d\n", pass);
348
 
349
        /*
350
         * Now we walk through the log, transaction by transaction,
351
         * making sure that each transaction has a commit block in the
352
         * expected place.  Each complete transaction gets replayed back
353
         * into the main filesystem.
354
         */
355
 
356
        while (1) {
357
                int                     flags;
358
                char *                  tagp;
359
                journal_block_tag_t *   tag;
360
                struct buffer_head *    obh;
361
                struct buffer_head *    nbh;
362
 
363
                /* If we already know where to stop the log traversal,
364
                 * check right now that we haven't gone past the end of
365
                 * the log. */
366
 
367
                if (pass != PASS_SCAN)
368
                        if (tid_geq(next_commit_ID, info->end_transaction))
369
                                break;
370
 
371
                jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
372
                          next_commit_ID, next_log_block, journal->j_last);
373
 
374
                /* Skip over each chunk of the transaction looking
375
                 * either the next descriptor block or the final commit
376
                 * record. */
377
 
378
                jbd_debug(3, "JBD: checking block %ld\n", next_log_block);
379
                err = jread(&bh, journal, next_log_block);
380
                if (err)
381
                        goto failed;
382
 
383
                next_log_block++;
384
                wrap(journal, next_log_block);
385
 
386
                /* What kind of buffer is it?
387
                 *
388
                 * If it is a descriptor block, check that it has the
389
                 * expected sequence number.  Otherwise, we're all done
390
                 * here. */
391
 
392
                tmp = (journal_header_t *)bh->b_data;
393
 
394
                if (tmp->h_magic != htonl(JFS_MAGIC_NUMBER)) {
395
                        brelse(bh);
396
                        break;
397
                }
398
 
399
                blocktype = ntohl(tmp->h_blocktype);
400
                sequence = ntohl(tmp->h_sequence);
401
                jbd_debug(3, "Found magic %d, sequence %d\n",
402
                          blocktype, sequence);
403
 
404
                if (sequence != next_commit_ID) {
405
                        brelse(bh);
406
                        break;
407
                }
408
 
409
                /* OK, we have a valid descriptor block which matches
410
                 * all of the sequence number checks.  What are we going
411
                 * to do with it?  That depends on the pass... */
412
 
413
                switch(blocktype) {
414
                case JFS_DESCRIPTOR_BLOCK:
415
                        /* If it is a valid descriptor block, replay it
416
                         * in pass REPLAY; otherwise, just skip over the
417
                         * blocks it describes. */
418
                        if (pass != PASS_REPLAY) {
419
                                next_log_block +=
420
                                        count_tags(bh, journal->j_blocksize);
421
                                wrap(journal, next_log_block);
422
                                brelse(bh);
423
                                continue;
424
                        }
425
 
426
                        /* A descriptor block: we can now write all of
427
                         * the data blocks.  Yay, useful work is finally
428
                         * getting done here! */
429
 
430
                        tagp = &bh->b_data[sizeof(journal_header_t)];
431
                        while ((tagp - bh->b_data +sizeof(journal_block_tag_t))
432
                               <= journal->j_blocksize) {
433
                                unsigned long io_block;
434
 
435
                                tag = (journal_block_tag_t *) tagp;
436
                                flags = ntohl(tag->t_flags);
437
 
438
                                io_block = next_log_block++;
439
                                wrap(journal, next_log_block);
440
                                err = jread(&obh, journal, io_block);
441
                                if (err) {
442
                                        /* Recover what we can, but
443
                                         * report failure at the end. */
444
                                        success = err;
445
                                        printk (KERN_ERR
446
                                                "JBD: IO error %d recovering "
447
                                                "block %ld in log\n",
448
                                                err, io_block);
449
                                } else {
450
                                        unsigned long blocknr;
451
 
452
                                        J_ASSERT(obh != NULL);
453
                                        blocknr = ntohl(tag->t_blocknr);
454
 
455
                                        /* If the block has been
456
                                         * revoked, then we're all done
457
                                         * here. */
458
                                        if (journal_test_revoke
459
                                            (journal, blocknr,
460
                                             next_commit_ID)) {
461
                                                brelse(obh);
462
                                                ++info->nr_revoke_hits;
463
                                                goto skip_write;
464
                                        }
465
 
466
                                        /* Find a buffer for the new
467
                                         * data being restored */
468
                                        nbh = getblk(journal->j_fs_dev, blocknr,
469
                                                     journal->j_blocksize);
470
                                        if (nbh == NULL) {
471
                                                printk(KERN_ERR
472
                                                       "JBD: Out of memory "
473
                                                       "during recovery.\n");
474
                                                err = -ENOMEM;
475
                                                brelse(bh);
476
                                                brelse(obh);
477
                                                goto failed;
478
                                        }
479
 
480
                                        lock_buffer(nbh);
481
                                        memcpy(nbh->b_data, obh->b_data,
482
                                                        journal->j_blocksize);
483
                                        if (flags & JFS_FLAG_ESCAPE) {
484
                                                *((unsigned int *)bh->b_data) =
485
                                                        htonl(JFS_MAGIC_NUMBER);
486
                                        }
487
 
488
                                        BUFFER_TRACE(nbh, "marking dirty");
489
                                        mark_buffer_dirty(nbh);
490
                                        BUFFER_TRACE(nbh, "marking uptodate");
491
                                        mark_buffer_uptodate(nbh, 1);
492
                                        unlock_buffer(nbh);
493
                                        ++info->nr_replays;
494
                                        /* ll_rw_block(WRITE, 1, &nbh); */
495
                                        brelse(obh);
496
                                        brelse(nbh);
497
                                }
498
 
499
                        skip_write:
500
                                tagp += sizeof(journal_block_tag_t);
501
                                if (!(flags & JFS_FLAG_SAME_UUID))
502
                                        tagp += 16;
503
 
504
                                if (flags & JFS_FLAG_LAST_TAG)
505
                                        break;
506
                        }
507
 
508
                        brelse(bh);
509
                        continue;
510
 
511
                case JFS_COMMIT_BLOCK:
512
                        /* Found an expected commit block: not much to
513
                         * do other than move on to the next sequence
514
                         * number. */
515
                        brelse(bh);
516
                        next_commit_ID++;
517
                        continue;
518
 
519
                case JFS_REVOKE_BLOCK:
520
                        /* If we aren't in the REVOKE pass, then we can
521
                         * just skip over this block. */
522
                        if (pass != PASS_REVOKE) {
523
                                brelse(bh);
524
                                continue;
525
                        }
526
 
527
                        err = scan_revoke_records(journal, bh,
528
                                                  next_commit_ID, info);
529
                        brelse(bh);
530
                        if (err)
531
                                goto failed;
532
                        continue;
533
 
534
                default:
535
                        jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
536
                                  blocktype);
537
                        goto done;
538
                }
539
        }
540
 
541
 done:
542
        /*
543
         * We broke out of the log scan loop: either we came to the
544
         * known end of the log or we found an unexpected block in the
545
         * log.  If the latter happened, then we know that the "current"
546
         * transaction marks the end of the valid log.
547
         */
548
 
549
        if (pass == PASS_SCAN)
550
                info->end_transaction = next_commit_ID;
551
        else {
552
                /* It's really bad news if different passes end up at
553
                 * different places (but possible due to IO errors). */
554
                if (info->end_transaction != next_commit_ID) {
555
                        printk (KERN_ERR "JBD: recovery pass %d ended at "
556
                                "transaction %u, expected %u\n",
557
                                pass, next_commit_ID, info->end_transaction);
558
                        if (!success)
559
                                success = -EIO;
560
                }
561
        }
562
 
563
        return success;
564
 
565
 failed:
566
        return err;
567
}
568
 
569
 
570
/* Scan a revoke record, marking all blocks mentioned as revoked. */
571
 
572
static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
573
                               tid_t sequence, struct recovery_info *info)
574
{
575
        journal_revoke_header_t *header;
576
        int offset, max;
577
 
578
        header = (journal_revoke_header_t *) bh->b_data;
579
        offset = sizeof(journal_revoke_header_t);
580
        max = ntohl(header->r_count);
581
 
582
        while (offset < max) {
583
                unsigned long blocknr;
584
                int err;
585
 
586
                blocknr = ntohl(* ((unsigned int *) (bh->b_data+offset)));
587
                offset += 4;
588
                err = journal_set_revoke(journal, blocknr, sequence);
589
                if (err)
590
                        return err;
591
                ++info->nr_revokes;
592
        }
593
        return 0;
594
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.