OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [fs/] [jbd/] [revoke.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 * linux/fs/revoke.c
3
 *
4
 * Written by Stephen C. Tweedie <sct@redhat.com>, 2000
5
 *
6
 * Copyright 2000 Red Hat corp --- All Rights Reserved
7
 *
8
 * This file is part of the Linux kernel and is made available under
9
 * the terms of the GNU General Public License, version 2, or at your
10
 * option, any later version, incorporated herein by reference.
11
 *
12
 * Journal revoke routines for the generic filesystem journaling code;
13
 * part of the ext2fs journaling system.
14
 *
15
 * Revoke is the mechanism used to prevent old log records for deleted
16
 * metadata from being replayed on top of newer data using the same
17
 * blocks.  The revoke mechanism is used in two separate places:
18
 *
19
 * + Commit: during commit we write the entire list of the current
20
 *   transaction's revoked blocks to the journal
21
 *
22
 * + Recovery: during recovery we record the transaction ID of all
23
 *   revoked blocks.  If there are multiple revoke records in the log
24
 *   for a single block, only the last one counts, and if there is a log
25
 *   entry for a block beyond the last revoke, then that log entry still
26
 *   gets replayed.
27
 *
28
 * We can get interactions between revokes and new log data within a
29
 * single transaction:
30
 *
31
 * Block is revoked and then journaled:
32
 *   The desired end result is the journaling of the new block, so we
33
 *   cancel the revoke before the transaction commits.
34
 *
35
 * Block is journaled and then revoked:
36
 *   The revoke must take precedence over the write of the block, so we
37
 *   need either to cancel the journal entry or to write the revoke
38
 *   later in the log than the log block.  In this case, we choose the
39
 *   latter: journaling a block cancels any revoke record for that block
40
 *   in the current transaction, so any revoke for that block in the
41
 *   transaction must have happened after the block was journaled and so
42
 *   the revoke must take precedence.
43
 *
44
 * Block is revoked and then written as data:
45
 *   The data write is allowed to succeed, but the revoke is _not_
46
 *   cancelled.  We still need to prevent old log records from
47
 *   overwriting the new data.  We don't even need to clear the revoke
48
 *   bit here.
49
 *
50
 * Revoke information on buffers is a tri-state value:
51
 *
52
 * RevokeValid clear:   no cached revoke status, need to look it up
53
 * RevokeValid set, Revoked clear:
54
 *                      buffer has not been revoked, and cancel_revoke
55
 *                      need do nothing.
56
 * RevokeValid set, Revoked set:
57
 *                      buffer has been revoked.
58
 */
59
 
60
#ifndef __KERNEL__
61
#include "jfs_user.h"
62
#else
63
#include <linux/sched.h>
64
#include <linux/fs.h>
65
#include <linux/jbd.h>
66
#include <linux/errno.h>
67
#include <linux/slab.h>
68
#include <linux/locks.h>
69
#include <linux/list.h>
70
#include <linux/smp_lock.h>
71
#include <linux/init.h>
72
#endif
73
 
74
static kmem_cache_t *revoke_record_cache;
75
static kmem_cache_t *revoke_table_cache;
76
 
77
/* Each revoke record represents one single revoked block.  During
78
   journal replay, this involves recording the transaction ID of the
79
   last transaction to revoke this block. */
80
 
81
struct jbd_revoke_record_s
82
{
83
        struct list_head  hash;
84
        tid_t             sequence;     /* Used for recovery only */
85
        unsigned long     blocknr;
86
};
87
 
88
 
89
/* The revoke table is just a simple hash table of revoke records. */
90
struct jbd_revoke_table_s
91
{
92
        /* It is conceivable that we might want a larger hash table
93
         * for recovery.  Must be a power of two. */
94
        int               hash_size;
95
        int               hash_shift;
96
        struct list_head *hash_table;
97
};
98
 
99
 
100
#ifdef __KERNEL__
101
static void write_one_revoke_record(journal_t *, transaction_t *,
102
                                    struct journal_head **, int *,
103
                                    struct jbd_revoke_record_s *);
104
static void flush_descriptor(journal_t *, struct journal_head *, int);
105
#endif
106
 
107
/* Utility functions to maintain the revoke table */
108
 
109
/* Borrowed from buffer.c: this is a tried and tested block hash function */
110
static inline int hash(journal_t *journal, unsigned long block)
111
{
112
        struct jbd_revoke_table_s *table = journal->j_revoke;
113
        int hash_shift = table->hash_shift;
114
 
115
        return ((block << (hash_shift - 6)) ^
116
                (block >> 13) ^
117
                (block << (hash_shift - 12))) & (table->hash_size - 1);
118
}
119
 
120
int insert_revoke_hash(journal_t *journal, unsigned long blocknr, tid_t seq)
121
{
122
        struct list_head *hash_list;
123
        struct jbd_revoke_record_s *record;
124
 
125
repeat:
126
        record = kmem_cache_alloc(revoke_record_cache, GFP_NOFS);
127
        if (!record)
128
                goto oom;
129
 
130
        record->sequence = seq;
131
        record->blocknr = blocknr;
132
        hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
133
        list_add(&record->hash, hash_list);
134
        return 0;
135
 
136
oom:
137
        if (!journal_oom_retry)
138
                return -ENOMEM;
139
        jbd_debug(1, "ENOMEM in %s, retrying.\n", __FUNCTION__);
140
        yield();
141
        goto repeat;
142
}
143
 
144
/* Find a revoke record in the journal's hash table. */
145
 
146
static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
147
                                                      unsigned long blocknr)
148
{
149
        struct list_head *hash_list;
150
        struct jbd_revoke_record_s *record;
151
 
152
        hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
153
 
154
        record = (struct jbd_revoke_record_s *) hash_list->next;
155
        while (&(record->hash) != hash_list) {
156
                if (record->blocknr == blocknr)
157
                        return record;
158
                record = (struct jbd_revoke_record_s *) record->hash.next;
159
        }
160
        return NULL;
161
}
162
 
163
int __init journal_init_revoke_caches(void)
164
{
165
        revoke_record_cache = kmem_cache_create("revoke_record",
166
                                           sizeof(struct jbd_revoke_record_s),
167
                                           0, SLAB_HWCACHE_ALIGN, NULL, NULL);
168
        if (revoke_record_cache == 0)
169
                return -ENOMEM;
170
 
171
        revoke_table_cache = kmem_cache_create("revoke_table",
172
                                           sizeof(struct jbd_revoke_table_s),
173
                                           0, 0, NULL, NULL);
174
        if (revoke_table_cache == 0) {
175
                kmem_cache_destroy(revoke_record_cache);
176
                revoke_record_cache = NULL;
177
                return -ENOMEM;
178
        }
179
        return 0;
180
}
181
 
182
void journal_destroy_revoke_caches(void)
183
{
184
        kmem_cache_destroy(revoke_record_cache);
185
        revoke_record_cache = 0;
186
        kmem_cache_destroy(revoke_table_cache);
187
        revoke_table_cache = 0;
188
}
189
 
190
/* Initialise the revoke table for a given journal to a given size. */
191
 
192
int journal_init_revoke(journal_t *journal, int hash_size)
193
{
194
        int shift, tmp;
195
 
196
        J_ASSERT (journal->j_revoke == NULL);
197
 
198
        journal->j_revoke = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
199
        if (!journal->j_revoke)
200
                return -ENOMEM;
201
 
202
        /* Check that the hash_size is a power of two */
203
        J_ASSERT ((hash_size & (hash_size-1)) == 0);
204
 
205
        journal->j_revoke->hash_size = hash_size;
206
 
207
        shift = 0;
208
        tmp = hash_size;
209
        while((tmp >>= 1UL) != 0UL)
210
                shift++;
211
        journal->j_revoke->hash_shift = shift;
212
 
213
        journal->j_revoke->hash_table =
214
                kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
215
        if (!journal->j_revoke->hash_table) {
216
                kmem_cache_free(revoke_table_cache, journal->j_revoke);
217
                journal->j_revoke = NULL;
218
                return -ENOMEM;
219
        }
220
 
221
        for (tmp = 0; tmp < hash_size; tmp++)
222
                INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
223
 
224
        return 0;
225
}
226
 
227
/* Destoy a journal's revoke table.  The table must already be empty! */
228
 
229
void journal_destroy_revoke(journal_t *journal)
230
{
231
        struct jbd_revoke_table_s *table;
232
        struct list_head *hash_list;
233
        int i;
234
 
235
        table = journal->j_revoke;
236
        if (!table)
237
                return;
238
 
239
        for (i=0; i<table->hash_size; i++) {
240
                hash_list = &table->hash_table[i];
241
                J_ASSERT (list_empty(hash_list));
242
        }
243
 
244
        kfree(table->hash_table);
245
        kmem_cache_free(revoke_table_cache, table);
246
        journal->j_revoke = NULL;
247
}
248
 
249
 
250
#ifdef __KERNEL__
251
 
252
/*
253
 * journal_revoke: revoke a given buffer_head from the journal.  This
254
 * prevents the block from being replayed during recovery if we take a
255
 * crash after this current transaction commits.  Any subsequent
256
 * metadata writes of the buffer in this transaction cancel the
257
 * revoke.
258
 *
259
 * Note that this call may block --- it is up to the caller to make
260
 * sure that there are no further calls to journal_write_metadata
261
 * before the revoke is complete.  In ext3, this implies calling the
262
 * revoke before clearing the block bitmap when we are deleting
263
 * metadata.
264
 *
265
 * Revoke performs a journal_forget on any buffer_head passed in as a
266
 * parameter, but does _not_ forget the buffer_head if the bh was only
267
 * found implicitly.
268
 *
269
 * bh_in may not be a journalled buffer - it may have come off
270
 * the hash tables without an attached journal_head.
271
 *
272
 * If bh_in is non-zero, journal_revoke() will decrement its b_count
273
 * by one.
274
 */
275
 
276
int journal_revoke(handle_t *handle, unsigned long blocknr,
277
                   struct buffer_head *bh_in)
278
{
279
        struct buffer_head *bh = NULL;
280
        journal_t *journal;
281
        kdev_t dev;
282
        int err;
283
 
284
        if (bh_in)
285
                BUFFER_TRACE(bh_in, "enter");
286
 
287
        journal = handle->h_transaction->t_journal;
288
        if (!journal_set_features(journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)){
289
                J_ASSERT (!"Cannot set revoke feature!");
290
                return -EINVAL;
291
        }
292
 
293
        dev = journal->j_fs_dev;
294
        bh = bh_in;
295
 
296
        if (!bh) {
297
                bh = get_hash_table(dev, blocknr, journal->j_blocksize);
298
                if (bh)
299
                        BUFFER_TRACE(bh, "found on hash");
300
        }
301
#ifdef JBD_EXPENSIVE_CHECKING
302
        else {
303
                struct buffer_head *bh2;
304
 
305
                /* If there is a different buffer_head lying around in
306
                 * memory anywhere... */
307
                bh2 = get_hash_table(dev, blocknr, journal->j_blocksize);
308
                if (bh2) {
309
                        /* ... and it has RevokeValid status... */
310
                        if ((bh2 != bh) &&
311
                            test_bit(BH_RevokeValid, &bh2->b_state))
312
                                /* ...then it better be revoked too,
313
                                 * since it's illegal to create a revoke
314
                                 * record against a buffer_head which is
315
                                 * not marked revoked --- that would
316
                                 * risk missing a subsequent revoke
317
                                 * cancel. */
318
                                J_ASSERT_BH(bh2, test_bit(BH_Revoked, &
319
                                                          bh2->b_state));
320
                        __brelse(bh2);
321
                }
322
        }
323
#endif
324
 
325
        /* We really ought not ever to revoke twice in a row without
326
           first having the revoke cancelled: it's illegal to free a
327
           block twice without allocating it in between! */
328
        if (bh) {
329
                J_ASSERT_BH(bh, !test_bit(BH_Revoked, &bh->b_state));
330
                set_bit(BH_Revoked, &bh->b_state);
331
                set_bit(BH_RevokeValid, &bh->b_state);
332
                if (bh_in) {
333
                        BUFFER_TRACE(bh_in, "call journal_forget");
334
                        journal_forget(handle, bh_in);
335
                } else {
336
                        BUFFER_TRACE(bh, "call brelse");
337
                        __brelse(bh);
338
                }
339
        }
340
 
341
        lock_journal(journal);
342
        jbd_debug(2, "insert revoke for block %lu, bh_in=%p\n", blocknr, bh_in);
343
        err = insert_revoke_hash(journal, blocknr,
344
                                handle->h_transaction->t_tid);
345
        unlock_journal(journal);
346
        BUFFER_TRACE(bh_in, "exit");
347
        return err;
348
}
349
 
350
/*
351
 * Cancel an outstanding revoke.  For use only internally by the
352
 * journaling code (called from journal_get_write_access).
353
 *
354
 * We trust the BH_Revoked bit on the buffer if the buffer is already
355
 * being journaled: if there is no revoke pending on the buffer, then we
356
 * don't do anything here.
357
 *
358
 * This would break if it were possible for a buffer to be revoked and
359
 * discarded, and then reallocated within the same transaction.  In such
360
 * a case we would have lost the revoked bit, but when we arrived here
361
 * the second time we would still have a pending revoke to cancel.  So,
362
 * do not trust the Revoked bit on buffers unless RevokeValid is also
363
 * set.
364
 *
365
 * The caller must have the journal locked.
366
 */
367
int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
368
{
369
        struct jbd_revoke_record_s *record;
370
        journal_t *journal = handle->h_transaction->t_journal;
371
        int need_cancel;
372
        int did_revoke = 0;      /* akpm: debug */
373
        struct buffer_head *bh = jh2bh(jh);
374
 
375
        jbd_debug(4, "journal_head %p, cancelling revoke\n", jh);
376
 
377
        /* Is the existing Revoke bit valid?  If so, we trust it, and
378
         * only perform the full cancel if the revoke bit is set.  If
379
         * not, we can't trust the revoke bit, and we need to do the
380
         * full search for a revoke record. */
381
        if (test_and_set_bit(BH_RevokeValid, &bh->b_state))
382
                need_cancel = (test_and_clear_bit(BH_Revoked, &bh->b_state));
383
        else {
384
                need_cancel = 1;
385
                clear_bit(BH_Revoked, &bh->b_state);
386
        }
387
 
388
        if (need_cancel) {
389
                record = find_revoke_record(journal, bh->b_blocknr);
390
                if (record) {
391
                        jbd_debug(4, "cancelled existing revoke on "
392
                                  "blocknr %lu\n", bh->b_blocknr);
393
                        list_del(&record->hash);
394
                        kmem_cache_free(revoke_record_cache, record);
395
                        did_revoke = 1;
396
                }
397
        }
398
 
399
#ifdef JBD_EXPENSIVE_CHECKING
400
        /* There better not be one left behind by now! */
401
        record = find_revoke_record(journal, bh->b_blocknr);
402
        J_ASSERT_JH(jh, record == NULL);
403
#endif
404
 
405
        /* Finally, have we just cleared revoke on an unhashed
406
         * buffer_head?  If so, we'd better make sure we clear the
407
         * revoked status on any hashed alias too, otherwise the revoke
408
         * state machine will get very upset later on. */
409
        if (need_cancel && !bh->b_pprev) {
410
                struct buffer_head *bh2;
411
                bh2 = get_hash_table(bh->b_dev, bh->b_blocknr, bh->b_size);
412
                if (bh2) {
413
                        clear_bit(BH_Revoked, &bh2->b_state);
414
                        __brelse(bh2);
415
                }
416
        }
417
 
418
        return did_revoke;
419
}
420
 
421
 
422
/*
423
 * Write revoke records to the journal for all entries in the current
424
 * revoke hash, deleting the entries as we go.
425
 *
426
 * Called with the journal lock held.
427
 */
428
 
429
void journal_write_revoke_records(journal_t *journal,
430
                                  transaction_t *transaction)
431
{
432
        struct journal_head *descriptor;
433
        struct jbd_revoke_record_s *record;
434
        struct jbd_revoke_table_s *revoke;
435
        struct list_head *hash_list;
436
        int i, offset, count;
437
 
438
        descriptor = NULL;
439
        offset = 0;
440
        count = 0;
441
        revoke = journal->j_revoke;
442
 
443
        for (i = 0; i < revoke->hash_size; i++) {
444
                hash_list = &revoke->hash_table[i];
445
 
446
                while (!list_empty(hash_list)) {
447
                        record = (struct jbd_revoke_record_s *)
448
                                hash_list->next;
449
                        write_one_revoke_record(journal, transaction,
450
                                                &descriptor, &offset,
451
                                                record);
452
                        count++;
453
                        list_del(&record->hash);
454
                        kmem_cache_free(revoke_record_cache, record);
455
                }
456
        }
457
        if (descriptor)
458
                flush_descriptor(journal, descriptor, offset);
459
        jbd_debug(1, "Wrote %d revoke records\n", count);
460
}
461
 
462
/*
463
 * Write out one revoke record.  We need to create a new descriptor
464
 * block if the old one is full or if we have not already created one.
465
 */
466
 
467
static void write_one_revoke_record(journal_t *journal,
468
                                    transaction_t *transaction,
469
                                    struct journal_head **descriptorp,
470
                                    int *offsetp,
471
                                    struct jbd_revoke_record_s *record)
472
{
473
        struct journal_head *descriptor;
474
        int offset;
475
        journal_header_t *header;
476
 
477
        /* If we are already aborting, this all becomes a noop.  We
478
           still need to go round the loop in
479
           journal_write_revoke_records in order to free all of the
480
           revoke records: only the IO to the journal is omitted. */
481
        if (is_journal_aborted(journal))
482
                return;
483
 
484
        descriptor = *descriptorp;
485
        offset = *offsetp;
486
 
487
        /* Make sure we have a descriptor with space left for the record */
488
        if (descriptor) {
489
                if (offset == journal->j_blocksize) {
490
                        flush_descriptor(journal, descriptor, offset);
491
                        descriptor = NULL;
492
                }
493
        }
494
 
495
        if (!descriptor) {
496
                descriptor = journal_get_descriptor_buffer(journal);
497
                if (!descriptor)
498
                        return;
499
                header = (journal_header_t *) &jh2bh(descriptor)->b_data[0];
500
                header->h_magic     = htonl(JFS_MAGIC_NUMBER);
501
                header->h_blocktype = htonl(JFS_REVOKE_BLOCK);
502
                header->h_sequence  = htonl(transaction->t_tid);
503
 
504
                /* Record it so that we can wait for IO completion later */
505
                JBUFFER_TRACE(descriptor, "file as BJ_LogCtl");
506
                journal_file_buffer(descriptor, transaction, BJ_LogCtl);
507
 
508
                offset = sizeof(journal_revoke_header_t);
509
                *descriptorp = descriptor;
510
        }
511
 
512
        * ((unsigned int *)(&jh2bh(descriptor)->b_data[offset])) =
513
                htonl(record->blocknr);
514
        offset += 4;
515
        *offsetp = offset;
516
}
517
 
518
/*
519
 * Flush a revoke descriptor out to the journal.  If we are aborting,
520
 * this is a noop; otherwise we are generating a buffer which needs to
521
 * be waited for during commit, so it has to go onto the appropriate
522
 * journal buffer list.
523
 */
524
 
525
static void flush_descriptor(journal_t *journal,
526
                             struct journal_head *descriptor,
527
                             int offset)
528
{
529
        journal_revoke_header_t *header;
530
 
531
        if (is_journal_aborted(journal)) {
532
                JBUFFER_TRACE(descriptor, "brelse");
533
                unlock_buffer(jh2bh(descriptor));
534
                __brelse(jh2bh(descriptor));
535
                return;
536
        }
537
 
538
        header = (journal_revoke_header_t *) jh2bh(descriptor)->b_data;
539
        header->r_count = htonl(offset);
540
        set_bit(BH_JWrite, &jh2bh(descriptor)->b_state);
541
        {
542
                struct buffer_head *bh = jh2bh(descriptor);
543
                BUFFER_TRACE(bh, "write");
544
                clear_bit(BH_Dirty, &bh->b_state);
545
                bh->b_end_io = journal_end_buffer_io_sync;
546
                submit_bh(WRITE, bh);
547
        }
548
}
549
 
550
#endif
551
 
552
/*
553
 * Revoke support for recovery.
554
 *
555
 * Recovery needs to be able to:
556
 *
557
 *  record all revoke records, including the tid of the latest instance
558
 *  of each revoke in the journal
559
 *
560
 *  check whether a given block in a given transaction should be replayed
561
 *  (ie. has not been revoked by a revoke record in that or a subsequent
562
 *  transaction)
563
 *
564
 *  empty the revoke table after recovery.
565
 */
566
 
567
/*
568
 * First, setting revoke records.  We create a new revoke record for
569
 * every block ever revoked in the log as we scan it for recovery, and
570
 * we update the existing records if we find multiple revokes for a
571
 * single block.
572
 */
573
 
574
int journal_set_revoke(journal_t *journal,
575
                       unsigned long blocknr,
576
                       tid_t sequence)
577
{
578
        struct jbd_revoke_record_s *record;
579
 
580
        record = find_revoke_record(journal, blocknr);
581
        if (record) {
582
                /* If we have multiple occurences, only record the
583
                 * latest sequence number in the hashed record */
584
                if (tid_gt(sequence, record->sequence))
585
                        record->sequence = sequence;
586
                return 0;
587
        }
588
        return insert_revoke_hash(journal, blocknr, sequence);
589
}
590
 
591
/*
592
 * Test revoke records.  For a given block referenced in the log, has
593
 * that block been revoked?  A revoke record with a given transaction
594
 * sequence number revokes all blocks in that transaction and earlier
595
 * ones, but later transactions still need replayed.
596
 */
597
 
598
int journal_test_revoke(journal_t *journal,
599
                        unsigned long blocknr,
600
                        tid_t sequence)
601
{
602
        struct jbd_revoke_record_s *record;
603
 
604
        record = find_revoke_record(journal, blocknr);
605
        if (!record)
606
                return 0;
607
        if (tid_gt(sequence, record->sequence))
608
                return 0;
609
        return 1;
610
}
611
 
612
/*
613
 * Finally, once recovery is over, we need to clear the revoke table so
614
 * that it can be reused by the running filesystem.
615
 */
616
 
617
void journal_clear_revoke(journal_t *journal)
618
{
619
        int i;
620
        struct list_head *hash_list;
621
        struct jbd_revoke_record_s *record;
622
        struct jbd_revoke_table_s *revoke;
623
 
624
        revoke = journal->j_revoke;
625
 
626
        for (i = 0; i < revoke->hash_size; i++) {
627
                hash_list = &revoke->hash_table[i];
628
                while (!list_empty(hash_list)) {
629
                        record = (struct jbd_revoke_record_s*) hash_list->next;
630
                        list_del(&record->hash);
631
                        kmem_cache_free(revoke_record_cache, record);
632
                }
633
        }
634
}
635
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.