OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [fs/] [jbd/] [transaction.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 * linux/fs/transaction.c
3
 *
4
 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
5
 *
6
 * Copyright 1998 Red Hat corp --- All Rights Reserved
7
 *
8
 * This file is part of the Linux kernel and is made available under
9
 * the terms of the GNU General Public License, version 2, or at your
10
 * option, any later version, incorporated herein by reference.
11
 *
12
 * Generic filesystem transaction handling code; part of the ext2fs
13
 * journaling system.
14
 *
15
 * This file manages transactions (compound commits managed by the
16
 * journaling code) and handles (individual atomic operations by the
17
 * filesystem).
18
 */
19
 
20
#include <linux/sched.h>
21
#include <linux/fs.h>
22
#include <linux/jbd.h>
23
#include <linux/errno.h>
24
#include <linux/slab.h>
25
#include <linux/locks.h>
26
#include <linux/timer.h>
27
#include <linux/smp_lock.h>
28
#include <linux/mm.h>
29
 
30
extern spinlock_t journal_datalist_lock;
31
 
32
/*
33
 * get_transaction: obtain a new transaction_t object.
34
 *
35
 * Simply allocate and initialise a new transaction.  Create it in
36
 * RUNNING state and add it to the current journal (which should not
37
 * have an existing running transaction: we only make a new transaction
38
 * once we have started to commit the old one).
39
 *
40
 * Preconditions:
41
 *      The journal MUST be locked.  We don't perform atomic mallocs on the
42
 *      new transaction and we can't block without protecting against other
43
 *      processes trying to touch the journal while it is in transition.
44
 */
45
 
46
static transaction_t * get_transaction (journal_t * journal, int is_try)
47
{
48
        transaction_t * transaction;
49
 
50
        transaction = jbd_kmalloc (sizeof (transaction_t), GFP_NOFS);
51
        if (!transaction)
52
                return NULL;
53
 
54
        memset (transaction, 0, sizeof (transaction_t));
55
 
56
        transaction->t_journal = journal;
57
        transaction->t_state = T_RUNNING;
58
        transaction->t_tid = journal->j_transaction_sequence++;
59
        transaction->t_expires = jiffies + journal->j_commit_interval;
60
        INIT_LIST_HEAD(&transaction->t_jcb);
61
 
62
        if (journal->j_commit_interval) {
63
                /* Set up the commit timer for the new transaction. */
64
                J_ASSERT (!journal->j_commit_timer_active);
65
                journal->j_commit_timer_active = 1;
66
                journal->j_commit_timer->expires = transaction->t_expires;
67
                add_timer(journal->j_commit_timer);
68
        }
69
 
70
        J_ASSERT (journal->j_running_transaction == NULL);
71
        journal->j_running_transaction = transaction;
72
 
73
        return transaction;
74
}
75
 
76
/*
77
 * Handle management.
78
 *
79
 * A handle_t is an object which represents a single atomic update to a
80
 * filesystem, and which tracks all of the modifications which form part
81
 * of that one update.
82
 */
83
 
84
/*
85
 * start_this_handle: Given a handle, deal with any locking or stalling
86
 * needed to make sure that there is enough journal space for the handle
87
 * to begin.  Attach the handle to a transaction and set up the
88
 * transaction's buffer credits.
89
 */
90
 
91
static int start_this_handle(journal_t *journal, handle_t *handle)
92
{
93
        transaction_t *transaction;
94
        int needed;
95
        int nblocks = handle->h_buffer_credits;
96
 
97
        if (nblocks > journal->j_max_transaction_buffers) {
98
                jbd_debug(1, "JBD: %s wants too many credits (%d > %d)\n",
99
                       current->comm, nblocks,
100
                       journal->j_max_transaction_buffers);
101
                return -ENOSPC;
102
        }
103
 
104
        jbd_debug(3, "New handle %p going live.\n", handle);
105
 
106
repeat:
107
 
108
        lock_journal(journal);
109
 
110
repeat_locked:
111
 
112
        if (is_journal_aborted(journal) ||
113
            (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) {
114
                unlock_journal(journal);
115
                return -EROFS;
116
        }
117
 
118
        /* Wait on the journal's transaction barrier if necessary */
119
        if (journal->j_barrier_count) {
120
                unlock_journal(journal);
121
                sleep_on(&journal->j_wait_transaction_locked);
122
                goto repeat;
123
        }
124
 
125
        if (!journal->j_running_transaction)
126
                get_transaction(journal, 0);
127
        /* @@@ Error? */
128
        J_ASSERT(journal->j_running_transaction);
129
 
130
        transaction = journal->j_running_transaction;
131
 
132
        /* If the current transaction is locked down for commit, wait
133
         * for the lock to be released. */
134
 
135
        if (transaction->t_state == T_LOCKED) {
136
                unlock_journal(journal);
137
                jbd_debug(3, "Handle %p stalling...\n", handle);
138
                sleep_on(&journal->j_wait_transaction_locked);
139
                goto repeat;
140
        }
141
 
142
        /* If there is not enough space left in the log to write all
143
         * potential buffers requested by this operation, we need to
144
         * stall pending a log checkpoint to free some more log
145
         * space. */
146
 
147
        needed = transaction->t_outstanding_credits + nblocks;
148
 
149
        if (needed > journal->j_max_transaction_buffers) {
150
                /* If the current transaction is already too large, then
151
                 * start to commit it: we can then go back and attach
152
                 * this handle to a new transaction. */
153
 
154
                jbd_debug(2, "Handle %p starting new commit...\n", handle);
155
                log_start_commit(journal, transaction);
156
                unlock_journal(journal);
157
                sleep_on(&journal->j_wait_transaction_locked);
158
                lock_journal(journal);
159
                goto repeat_locked;
160
        }
161
 
162
        /*
163
         * The commit code assumes that it can get enough log space
164
         * without forcing a checkpoint.  This is *critical* for
165
         * correctness: a checkpoint of a buffer which is also
166
         * associated with a committing transaction creates a deadlock,
167
         * so commit simply cannot force through checkpoints.
168
         *
169
         * We must therefore ensure the necessary space in the journal
170
         * *before* starting to dirty potentially checkpointed buffers
171
         * in the new transaction.
172
         *
173
         * The worst part is, any transaction currently committing can
174
         * reduce the free space arbitrarily.  Be careful to account for
175
         * those buffers when checkpointing.
176
         */
177
 
178
        /*
179
         * @@@ AKPM: This seems rather over-defensive.  We're giving commit
180
         * a _lot_ of headroom: 1/4 of the journal plus the size of
181
         * the committing transaction.  Really, we only need to give it
182
         * committing_transaction->t_outstanding_credits plus "enough" for
183
         * the log control blocks.
184
         * Also, this test is inconsitent with the matching one in
185
         * journal_extend().
186
         */
187
        needed = journal->j_max_transaction_buffers;
188
        if (journal->j_committing_transaction)
189
                needed += journal->j_committing_transaction->
190
                                        t_outstanding_credits;
191
 
192
        if (log_space_left(journal) < needed) {
193
                jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
194
                log_wait_for_space(journal, needed);
195
                goto repeat_locked;
196
        }
197
 
198
        /* OK, account for the buffers that this operation expects to
199
         * use and add the handle to the running transaction. */
200
 
201
        handle->h_transaction = transaction;
202
        transaction->t_outstanding_credits += nblocks;
203
        transaction->t_updates++;
204
        transaction->t_handle_count++;
205
        jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
206
                  handle, nblocks, transaction->t_outstanding_credits,
207
                  log_space_left(journal));
208
 
209
        unlock_journal(journal);
210
 
211
        return 0;
212
}
213
 
214
/* Allocate a new handle.  This should probably be in a slab... */
215
static handle_t *new_handle(int nblocks)
216
{
217
        handle_t *handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
218
        if (!handle)
219
                return NULL;
220
        memset(handle, 0, sizeof (handle_t));
221
        handle->h_buffer_credits = nblocks;
222
        handle->h_ref = 1;
223
        INIT_LIST_HEAD(&handle->h_jcb);
224
 
225
        return handle;
226
}
227
 
228
/**
229
 * handle_t *journal_start() - Obtain a new handle.
230
 * @journal: Journal to start transaction on.
231
 * @nblocks: number of block buffer we might modify
232
 *
233
 * We make sure that the transaction can guarantee at least nblocks of
234
 * modified buffers in the log.  We block until the log can guarantee
235
 * that much space.
236
 *
237
 * This function is visible to journal users (like ext3fs), so is not
238
 * called with the journal already locked.
239
 *
240
 * Return a pointer to a newly allocated handle, or NULL on failure
241
 */
242
handle_t *journal_start(journal_t *journal, int nblocks)
243
{
244
        handle_t *handle = journal_current_handle();
245
        int err;
246
 
247
        if (!journal)
248
                return ERR_PTR(-EROFS);
249
 
250
        if (handle) {
251
                J_ASSERT(handle->h_transaction->t_journal == journal);
252
                handle->h_ref++;
253
                return handle;
254
        }
255
 
256
        handle = new_handle(nblocks);
257
        if (!handle)
258
                return ERR_PTR(-ENOMEM);
259
 
260
        current->journal_info = handle;
261
 
262
        err = start_this_handle(journal, handle);
263
        if (err < 0) {
264
                kfree(handle);
265
                current->journal_info = NULL;
266
                return ERR_PTR(err);
267
        }
268
 
269
        return handle;
270
}
271
 
272
/*
273
 * Return zero on success
274
 */
275
static int try_start_this_handle(journal_t *journal, handle_t *handle)
276
{
277
        transaction_t *transaction;
278
        int needed;
279
        int nblocks = handle->h_buffer_credits;
280
        int ret = 0;
281
 
282
        jbd_debug(3, "New handle %p maybe going live.\n", handle);
283
 
284
        lock_journal(journal);
285
 
286
        if (is_journal_aborted(journal) ||
287
            (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) {
288
                ret = -EROFS;
289
                goto fail_unlock;
290
        }
291
 
292
        if (journal->j_barrier_count)
293
                goto fail_unlock;
294
 
295
        if (!journal->j_running_transaction && get_transaction(journal, 1) == 0)
296
                goto fail_unlock;
297
 
298
        transaction = journal->j_running_transaction;
299
        if (transaction->t_state == T_LOCKED)
300
                goto fail_unlock;
301
 
302
        needed = transaction->t_outstanding_credits + nblocks;
303
        /* We could run log_start_commit here */
304
        if (needed > journal->j_max_transaction_buffers)
305
                goto fail_unlock;
306
 
307
        needed = journal->j_max_transaction_buffers;
308
        if (journal->j_committing_transaction)
309
                needed += journal->j_committing_transaction->
310
                                                t_outstanding_credits;
311
 
312
        if (log_space_left(journal) < needed)
313
                goto fail_unlock;
314
 
315
        handle->h_transaction = transaction;
316
        transaction->t_outstanding_credits += nblocks;
317
        transaction->t_updates++;
318
        jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
319
                  handle, nblocks, transaction->t_outstanding_credits,
320
                  log_space_left(journal));
321
        unlock_journal(journal);
322
        return 0;
323
 
324
fail_unlock:
325
        unlock_journal(journal);
326
        if (ret >= 0)
327
                ret = -1;
328
        return ret;
329
}
330
 
331
/**
332
 * handle_t *journal_try_start() - Don't block, but try and get a handle
333
 * @journal: Journal to start transaction on.
334
 * @nblocks: number of block buffer we might modify
335
 *
336
 * Try to start a handle, but non-blockingly.  If we weren't able
337
 * to, return an ERR_PTR value.
338
 */
339
handle_t *journal_try_start(journal_t *journal, int nblocks)
340
{
341
        handle_t *handle = journal_current_handle();
342
        int err;
343
 
344
        if (!journal)
345
                return ERR_PTR(-EROFS);
346
 
347
        if (handle) {
348
                jbd_debug(4, "h_ref %d -> %d\n",
349
                                handle->h_ref,
350
                                handle->h_ref + 1);
351
                J_ASSERT(handle->h_transaction->t_journal == journal);
352
                if (is_handle_aborted(handle))
353
                        return ERR_PTR(-EIO);
354
                handle->h_ref++;
355
                return handle;
356
        } else {
357
                jbd_debug(4, "no current transaction\n");
358
        }
359
 
360
        if (is_journal_aborted(journal))
361
                return ERR_PTR(-EIO);
362
 
363
        handle = new_handle(nblocks);
364
        if (!handle)
365
                return ERR_PTR(-ENOMEM);
366
 
367
        current->journal_info = handle;
368
 
369
        err = try_start_this_handle(journal, handle);
370
        if (err < 0) {
371
                kfree(handle);
372
                current->journal_info = NULL;
373
                return ERR_PTR(err);
374
        }
375
 
376
        return handle;
377
}
378
 
379
/**
380
 * int journal_extend() - extend buffer credits.
381
 * @handle:  handle to 'extend'
382
 * @nblocks: nr blocks to try to extend by.
383
 *
384
 * Some transactions, such as large extends and truncates, can be done
385
 * atomically all at once or in several stages.  The operation requests
386
 * a credit for a number of buffer modications in advance, but can
387
 * extend its credit if it needs more.
388
 *
389
 * journal_extend tries to give the running handle more buffer credits.
390
 * It does not guarantee that allocation - this is a best-effort only.
391
 * The calling process MUST be able to deal cleanly with a failure to
392
 * extend here.
393
 *
394
 * Return 0 on success, non-zero on failure.
395
 *
396
 * return code < 0 implies an error
397
 * return code > 0 implies normal transaction-full status.
398
 */
399
int journal_extend (handle_t *handle, int nblocks)
400
{
401
        transaction_t *transaction = handle->h_transaction;
402
        journal_t *journal = transaction->t_journal;
403
        int result;
404
        int wanted;
405
 
406
        lock_journal (journal);
407
 
408
        result = -EIO;
409
        if (is_handle_aborted(handle))
410
                goto error_out;
411
 
412
        result = 1;
413
 
414
        /* Don't extend a locked-down transaction! */
415
        if (handle->h_transaction->t_state != T_RUNNING) {
416
                jbd_debug(3, "denied handle %p %d blocks: "
417
                          "transaction not running\n", handle, nblocks);
418
                goto error_out;
419
        }
420
 
421
        wanted = transaction->t_outstanding_credits + nblocks;
422
 
423
        if (wanted > journal->j_max_transaction_buffers) {
424
                jbd_debug(3, "denied handle %p %d blocks: "
425
                          "transaction too large\n", handle, nblocks);
426
                goto error_out;
427
        }
428
 
429
        if (wanted > log_space_left(journal)) {
430
                jbd_debug(3, "denied handle %p %d blocks: "
431
                          "insufficient log space\n", handle, nblocks);
432
                goto error_out;
433
        }
434
 
435
        handle->h_buffer_credits += nblocks;
436
        transaction->t_outstanding_credits += nblocks;
437
        result = 0;
438
 
439
        jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
440
 
441
error_out:
442
        unlock_journal (journal);
443
        return result;
444
}
445
 
446
 
447
/**
448
 * int journal_restart() - restart a handle .
449
 * @handle:  handle to restart
450
 * @nblocks: nr credits requested
451
 *
452
 * Restart a handle for a multi-transaction filesystem
453
 * operation.
454
 *
455
 * If the journal_extend() call above fails to grant new buffer credits
456
 * to a running handle, a call to journal_restart will commit the
457
 * handle's transaction so far and reattach the handle to a new
458
 * transaction capabable of guaranteeing the requested number of
459
 * credits.
460
 */
461
 
462
int journal_restart(handle_t *handle, int nblocks)
463
{
464
        transaction_t *transaction = handle->h_transaction;
465
        journal_t *journal = transaction->t_journal;
466
        int ret;
467
 
468
        /* If we've had an abort of any type, don't even think about
469
         * actually doing the restart! */
470
        if (is_handle_aborted(handle))
471
                return 0;
472
 
473
        /* First unlink the handle from its current transaction, and
474
         * start the commit on that. */
475
 
476
        J_ASSERT (transaction->t_updates > 0);
477
        J_ASSERT (journal_current_handle() == handle);
478
 
479
        transaction->t_outstanding_credits -= handle->h_buffer_credits;
480
        transaction->t_updates--;
481
 
482
        if (!transaction->t_updates)
483
                wake_up(&journal->j_wait_updates);
484
 
485
        jbd_debug(2, "restarting handle %p\n", handle);
486
        log_start_commit(journal, transaction);
487
 
488
        handle->h_buffer_credits = nblocks;
489
        ret = start_this_handle(journal, handle);
490
        return ret;
491
}
492
 
493
 
494
/**
495
 * void journal_lock_updates () - establish a transaction barrier.
496
 * @journal:  Journal to establish a barrier on.
497
 *
498
 * This locks out any further updates from being started, and blocks
499
 * until all existing updates have completed, returning only once the
500
 * journal is in a quiescent state with no updates running.
501
 *
502
 * The journal lock should not be held on entry.
503
 */
504
void journal_lock_updates (journal_t *journal)
505
{
506
        lock_journal(journal);
507
        ++journal->j_barrier_count;
508
 
509
        /* Wait until there are no running updates */
510
        while (1) {
511
                transaction_t *transaction = journal->j_running_transaction;
512
                if (!transaction)
513
                        break;
514
                if (!transaction->t_updates)
515
                        break;
516
 
517
                unlock_journal(journal);
518
                sleep_on(&journal->j_wait_updates);
519
                lock_journal(journal);
520
        }
521
 
522
        unlock_journal(journal);
523
 
524
        /* We have now established a barrier against other normal
525
         * updates, but we also need to barrier against other
526
         * journal_lock_updates() calls to make sure that we serialise
527
         * special journal-locked operations too. */
528
        down(&journal->j_barrier);
529
}
530
 
531
/**
532
 * void journal_unlock_updates (journal_t* journal) - release barrier
533
 * @journal:  Journal to release the barrier on.
534
 *
535
 * Release a transaction barrier obtained with journal_lock_updates().
536
 *
537
 * Should be called without the journal lock held.
538
 */
539
void journal_unlock_updates (journal_t *journal)
540
{
541
        lock_journal(journal);
542
 
543
        J_ASSERT (journal->j_barrier_count != 0);
544
 
545
        up(&journal->j_barrier);
546
        --journal->j_barrier_count;
547
        wake_up(&journal->j_wait_transaction_locked);
548
        unlock_journal(journal);
549
}
550
 
551
/*
552
 * if the buffer is already part of the current transaction, then there
553
 * is nothing we need to do.  if it is already part of a prior
554
 * transaction which we are still committing to disk, then we need to
555
 * make sure that we do not overwrite the old copy: we do copy-out to
556
 * preserve the copy going to disk.  we also account the buffer against
557
 * the handle's metadata buffer credits (unless the buffer is already
558
 * part of the transaction, that is).
559
 */
560
static int
561
do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy)
562
{
563
        struct buffer_head *bh;
564
        transaction_t *transaction = handle->h_transaction;
565
        journal_t *journal = transaction->t_journal;
566
        int error;
567
        char *frozen_buffer = NULL;
568
        int need_copy = 0;
569
        int locked;
570
 
571
        jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy);
572
 
573
        JBUFFER_TRACE(jh, "entry");
574
repeat:
575
        bh = jh2bh(jh);
576
 
577
        /* @@@ Need to check for errors here at some point. */
578
 
579
        /*
580
         * AKPM: we have replaced all the lock_journal_bh_wait() stuff with a
581
         * simple lock_journal().  This code here will care for locked buffers.
582
         */
583
        locked = test_and_set_bit(BH_Lock, &bh->b_state);
584
        if (locked) {
585
                /* We can't reliably test the buffer state if we found
586
                 * it already locked, so just wait for the lock and
587
                 * retry. */
588
                unlock_journal(journal);
589
                __wait_on_buffer(bh);
590
                lock_journal(journal);
591
                goto repeat;
592
        }
593
 
594
        /* We now hold the buffer lock so it is safe to query the buffer
595
         * state.  Is the buffer dirty?
596
         *
597
         * If so, there are two possibilities.  The buffer may be
598
         * non-journaled, and undergoing a quite legitimate writeback.
599
         * Otherwise, it is journaled, and we don't expect dirty buffers
600
         * in that state (the buffers should be marked JBD_Dirty
601
         * instead.)  So either the IO is being done under our own
602
         * control and this is a bug, or it's a third party IO such as
603
         * dump(8) (which may leave the buffer scheduled for read ---
604
         * ie. locked but not dirty) or tune2fs (which may actually have
605
         * the buffer dirtied, ugh.)  */
606
 
607
        if (buffer_dirty(bh)) {
608
                spin_lock(&journal_datalist_lock);
609
                /* First question: is this buffer already part of the
610
                 * current transaction or the existing committing
611
                 * transaction? */
612
                if (jh->b_transaction) {
613
                        J_ASSERT_JH(jh, jh->b_transaction == transaction ||
614
                                    jh->b_transaction == journal->j_committing_transaction);
615
                        if (jh->b_next_transaction)
616
                                J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
617
                        JBUFFER_TRACE(jh, "Unexpected dirty buffer");
618
                        jbd_unexpected_dirty_buffer(jh);
619
                }
620
                spin_unlock(&journal_datalist_lock);
621
        }
622
 
623
        unlock_buffer(bh);
624
 
625
        error = -EROFS;
626
        if (is_handle_aborted(handle))
627
                goto out_unlocked;
628
        error = 0;
629
 
630
        spin_lock(&journal_datalist_lock);
631
 
632
        /* The buffer is already part of this transaction if
633
         * b_transaction or b_next_transaction points to it. */
634
 
635
        if (jh->b_transaction == transaction ||
636
            jh->b_next_transaction == transaction)
637
                goto done_locked;
638
 
639
        /* If there is already a copy-out version of this buffer, then
640
         * we don't need to make another one. */
641
 
642
        if (jh->b_frozen_data) {
643
                JBUFFER_TRACE(jh, "has frozen data");
644
                J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
645
                jh->b_next_transaction = transaction;
646
 
647
                J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
648
                handle->h_buffer_credits--;
649
                goto done_locked;
650
        }
651
 
652
        /* Is there data here we need to preserve? */
653
 
654
        if (jh->b_transaction && jh->b_transaction != transaction) {
655
                JBUFFER_TRACE(jh, "owned by older transaction");
656
                J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
657
                J_ASSERT_JH(jh, jh->b_transaction ==
658
                                        journal->j_committing_transaction);
659
 
660
                /* There is one case we have to be very careful about.
661
                 * If the committing transaction is currently writing
662
                 * this buffer out to disk and has NOT made a copy-out,
663
                 * then we cannot modify the buffer contents at all
664
                 * right now.  The essence of copy-out is that it is the
665
                 * extra copy, not the primary copy, which gets
666
                 * journaled.  If the primary copy is already going to
667
                 * disk then we cannot do copy-out here. */
668
 
669
                if (jh->b_jlist == BJ_Shadow) {
670
                        JBUFFER_TRACE(jh, "on shadow: sleep");
671
                        spin_unlock(&journal_datalist_lock);
672
                        unlock_journal(journal);
673
                        /* commit wakes up all shadow buffers after IO */
674
                        wait_event(jh2bh(jh)->b_wait,
675
                                                jh->b_jlist != BJ_Shadow);
676
                        lock_journal(journal);
677
                        goto repeat;
678
                }
679
 
680
                /* Only do the copy if the currently-owning transaction
681
                 * still needs it.  If it is on the Forget list, the
682
                 * committing transaction is past that stage.  The
683
                 * buffer had better remain locked during the kmalloc,
684
                 * but that should be true --- we hold the journal lock
685
                 * still and the buffer is already on the BUF_JOURNAL
686
                 * list so won't be flushed.
687
                 *
688
                 * Subtle point, though: if this is a get_undo_access,
689
                 * then we will be relying on the frozen_data to contain
690
                 * the new value of the committed_data record after the
691
                 * transaction, so we HAVE to force the frozen_data copy
692
                 * in that case. */
693
 
694
                if (jh->b_jlist != BJ_Forget || force_copy) {
695
                        JBUFFER_TRACE(jh, "generate frozen data");
696
                        if (!frozen_buffer) {
697
                                JBUFFER_TRACE(jh, "allocate memory for buffer");
698
                                spin_unlock(&journal_datalist_lock);
699
                                unlock_journal(journal);
700
                                frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size,
701
                                                            GFP_NOFS);
702
                                lock_journal(journal);
703
                                if (!frozen_buffer) {
704
                                        printk(KERN_EMERG
705
                                                "%s: OOM for frozen_buffer\n",
706
                                                __FUNCTION__);
707
                                        JBUFFER_TRACE(jh, "oom!");
708
                                        error = -ENOMEM;
709
                                        spin_lock(&journal_datalist_lock);
710
                                        goto done_locked;
711
                                }
712
                                goto repeat;
713
                        }
714
 
715
                        jh->b_frozen_data = frozen_buffer;
716
                        frozen_buffer = NULL;
717
                        need_copy = 1;
718
                }
719
                jh->b_next_transaction = transaction;
720
        }
721
 
722
        J_ASSERT(handle->h_buffer_credits > 0);
723
        handle->h_buffer_credits--;
724
 
725
        /* Finally, if the buffer is not journaled right now, we need to
726
         * make sure it doesn't get written to disk before the caller
727
         * actually commits the new data. */
728
 
729
        if (!jh->b_transaction) {
730
                JBUFFER_TRACE(jh, "no transaction");
731
                J_ASSERT_JH(jh, !jh->b_next_transaction);
732
                jh->b_transaction = transaction;
733
                JBUFFER_TRACE(jh, "file as BJ_Reserved");
734
                __journal_file_buffer(jh, transaction, BJ_Reserved);
735
        }
736
 
737
done_locked:
738
        spin_unlock(&journal_datalist_lock);
739
        if (need_copy) {
740
                struct page *page;
741
                int offset;
742
                char *source;
743
 
744
                J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
745
                            "Possible IO failure.\n");
746
                page = jh2bh(jh)->b_page;
747
                offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
748
                source = kmap(page);
749
                memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
750
                kunmap(page);
751
        }
752
 
753
 
754
        /* If we are about to journal a buffer, then any revoke pending
755
           on it is no longer valid. */
756
        journal_cancel_revoke(handle, jh);
757
 
758
out_unlocked:
759
        if (frozen_buffer)
760
                kfree(frozen_buffer);
761
 
762
        JBUFFER_TRACE(jh, "exit");
763
        return error;
764
}
765
 
766
/**
767
 * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
768
 * @handle: transaction to add buffer modifications to
769
 * @bh:     bh to be used for metadata writes
770
 *
771
 * Returns an error code or 0 on success.
772
 *
773
 * In full data journalling mode the buffer may be of type BJ_AsyncData,
774
 * because we're write()ing a buffer which is also part of a shared mapping.
775
 */
776
 
777
int journal_get_write_access (handle_t *handle, struct buffer_head *bh)
778
{
779
        transaction_t *transaction = handle->h_transaction;
780
        journal_t *journal = transaction->t_journal;
781
        struct journal_head *jh = journal_add_journal_head(bh);
782
        int rc;
783
 
784
        /* We do not want to get caught playing with fields which the
785
         * log thread also manipulates.  Make sure that the buffer
786
         * completes any outstanding IO before proceeding. */
787
        lock_journal(journal);
788
        rc = do_get_write_access(handle, jh, 0);
789
        journal_unlock_journal_head(jh);
790
        unlock_journal(journal);
791
        return rc;
792
}
793
 
794
 
795
/*
796
 * When the user wants to journal a newly created buffer_head
797
 * (ie. getblk() returned a new buffer and we are going to populate it
798
 * manually rather than reading off disk), then we need to keep the
799
 * buffer_head locked until it has been completely filled with new
800
 * data.  In this case, we should be able to make the assertion that
801
 * the bh is not already part of an existing transaction.
802
 *
803
 * The buffer should already be locked by the caller by this point.
804
 * There is no lock ranking violation: it was a newly created,
805
 * unlocked buffer beforehand. */
806
 
807
/**
808
 * int journal_get_create_access () - notify intent to use newly created bh
809
 * @handle: ransaction to new buffer to
810
 * @bh: new buffer.
811
 *
812
 * Call this if you create a new bh.
813
 */
814
int journal_get_create_access (handle_t *handle, struct buffer_head *bh)
815
{
816
        transaction_t *transaction = handle->h_transaction;
817
        journal_t *journal = transaction->t_journal;
818
        struct journal_head *jh = journal_add_journal_head(bh);
819
        int err;
820
 
821
        jbd_debug(5, "journal_head %p\n", jh);
822
        lock_journal(journal);
823
        err = -EROFS;
824
        if (is_handle_aborted(handle))
825
                goto out;
826
        err = 0;
827
 
828
        JBUFFER_TRACE(jh, "entry");
829
        /* The buffer may already belong to this transaction due to
830
         * pre-zeroing in the filesystem's new_block code.  It may also
831
         * be on the previous, committing transaction's lists, but it
832
         * HAS to be in Forget state in that case: the transaction must
833
         * have deleted the buffer for it to be reused here. */
834
        J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
835
                         jh->b_transaction == NULL ||
836
                         (jh->b_transaction == journal->j_committing_transaction &&
837
                          jh->b_jlist == BJ_Forget)));
838
 
839
        J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
840
        J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
841
 
842
        J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
843
        handle->h_buffer_credits--;
844
 
845
        spin_lock(&journal_datalist_lock);
846
        if (jh->b_transaction == NULL) {
847
                jh->b_transaction = transaction;
848
                JBUFFER_TRACE(jh, "file as BJ_Reserved");
849
                __journal_file_buffer(jh, transaction, BJ_Reserved);
850
                JBUFFER_TRACE(jh, "refile");
851
                refile_buffer(jh2bh(jh));
852
        } else if (jh->b_transaction == journal->j_committing_transaction) {
853
                JBUFFER_TRACE(jh, "set next transaction");
854
                jh->b_next_transaction = transaction;
855
        }
856
        spin_unlock(&journal_datalist_lock);
857
 
858
        /*
859
         * akpm: I added this.  ext3_alloc_branch can pick up new indirect
860
         * blocks which contain freed but then revoked metadata.  We need
861
         * to cancel the revoke in case we end up freeing it yet again
862
         * and the reallocating as data - this would cause a second revoke,
863
         * which hits an assertion error.
864
         */
865
        JBUFFER_TRACE(jh, "cancelling revoke");
866
        journal_cancel_revoke(handle, jh);
867
        journal_unlock_journal_head(jh);
868
out:
869
        unlock_journal(journal);
870
        return err;
871
}
872
 
873
 
874
 
875
/**
876
 * int journal_get_undo_access() -  Notify intent to modify metadata with non-rewindable consequences
877
 * @handle: transaction
878
 * @bh: buffer to undo
879
 *
880
 * Sometimes there is a need to distinguish between metadata which has
881
 * been committed to disk and that which has not.  The ext3fs code uses
882
 * this for freeing and allocating space, we have to make sure that we
883
 * do not reuse freed space until the deallocation has been committed,
884
 * since if we overwrote that space we would make the delete
885
 * un-rewindable in case of a crash.
886
 *
887
 * To deal with that, journal_get_undo_access requests write access to a
888
 * buffer for parts of non-rewindable operations such as delete
889
 * operations on the bitmaps.  The journaling code must keep a copy of
890
 * the buffer's contents prior to the undo_access call until such time
891
 * as we know that the buffer has definitely been committed to disk.
892
 *
893
 * We never need to know which transaction the committed data is part
894
 * of, buffers touched here are guaranteed to be dirtied later and so
895
 * will be committed to a new transaction in due course, at which point
896
 * we can discard the old committed data pointer.
897
 *
898
 * Returns error number or 0 on success.
899
 */
900
int journal_get_undo_access (handle_t *handle, struct buffer_head *bh)
901
{
902
        journal_t *journal = handle->h_transaction->t_journal;
903
        int err;
904
        struct journal_head *jh = journal_add_journal_head(bh);
905
 
906
        JBUFFER_TRACE(jh, "entry");
907
        lock_journal(journal);
908
 
909
        /* Do this first --- it can drop the journal lock, so we want to
910
         * make sure that obtaining the committed_data is done
911
         * atomically wrt. completion of any outstanding commits. */
912
        err = do_get_write_access (handle, jh, 1);
913
        if (err)
914
                goto out;
915
 
916
        if (!jh->b_committed_data) {
917
                /* Copy out the current buffer contents into the
918
                 * preserved, committed copy. */
919
                JBUFFER_TRACE(jh, "generate b_committed data");
920
                jh->b_committed_data = jbd_kmalloc(jh2bh(jh)->b_size,
921
                                                   GFP_NOFS);
922
                if (!jh->b_committed_data) {
923
                        printk(KERN_EMERG "%s: No memory for committed data!\n",
924
                                __FUNCTION__);
925
                        err = -ENOMEM;
926
                        goto out;
927
                }
928
 
929
                memcpy (jh->b_committed_data, jh2bh(jh)->b_data,
930
                                jh2bh(jh)->b_size);
931
        }
932
 
933
out:
934
        if (!err)
935
                J_ASSERT_JH(jh, jh->b_committed_data);
936
        journal_unlock_journal_head(jh);
937
        unlock_journal(journal);
938
        return err;
939
}
940
 
941
/**
942
 * int journal_dirty_data() -  mark a buffer as containing dirty data which needs to be flushed before we can commit the current transaction.
943
 * @handle: transaction
944
 * @bh: bufferhead to mark
945
 * @async: flag
946
 *
947
 * The buffer is placed on the transaction's data list and is marked as
948
 * belonging to the transaction.
949
 *
950
 * If `async' is set then the writebask will be initiated by the caller
951
 * using submit_bh -> end_buffer_io_async.  We put the buffer onto
952
 * t_async_datalist.
953
 *
954
 * Returns error number or 0 on success.
955
 */
956
int journal_dirty_data (handle_t *handle, struct buffer_head *bh, int async)
957
{
958
/*
959
 * journal_dirty_data() can be called via page_launder->ext3_writepage
960
 * by kswapd.  So it cannot block.  Happily, there's nothing here
961
 * which needs lock_journal if `async' is set.
962
 *
963
 * When the buffer is on the current transaction we freely move it
964
 * between BJ_AsyncData and BJ_SyncData according to who tried to
965
 * change its state last.
966
 */
967
        journal_t *journal = handle->h_transaction->t_journal;
968
        int need_brelse = 0;
969
        int wanted_jlist = async ? BJ_AsyncData : BJ_SyncData;
970
        struct journal_head *jh;
971
 
972
        if (is_handle_aborted(handle))
973
                return 0;
974
 
975
        jh = journal_add_journal_head(bh);
976
        JBUFFER_TRACE(jh, "entry");
977
 
978
        /*
979
         * The buffer could *already* be dirty.  Writeout can start
980
         * at any time.
981
         */
982
        jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid);
983
 
984
        /*
985
         * What if the buffer is already part of a running transaction?
986
         *
987
         * There are two cases:
988
         * 1) It is part of the current running transaction.  Refile it,
989
         *    just in case we have allocated it as metadata, deallocated
990
         *    it, then reallocated it as data.
991
         * 2) It is part of the previous, still-committing transaction.
992
         *    If all we want to do is to guarantee that the buffer will be
993
         *    written to disk before this new transaction commits, then
994
         *    being sure that the *previous* transaction has this same
995
         *    property is sufficient for us!  Just leave it on its old
996
         *    transaction.
997
         *
998
         * In case (2), the buffer must not already exist as metadata
999
         * --- that would violate write ordering (a transaction is free
1000
         * to write its data at any point, even before the previous
1001
         * committing transaction has committed).  The caller must
1002
         * never, ever allow this to happen: there's nothing we can do
1003
         * about it in this layer.
1004
         */
1005
        spin_lock(&journal_datalist_lock);
1006
        if (jh->b_transaction) {
1007
                JBUFFER_TRACE(jh, "has transaction");
1008
                if (jh->b_transaction != handle->h_transaction) {
1009
                        JBUFFER_TRACE(jh, "belongs to older transaction");
1010
                        J_ASSERT_JH(jh, jh->b_transaction ==
1011
                                        journal->j_committing_transaction);
1012
 
1013
                        /* @@@ IS THIS TRUE  ? */
1014
                        /*
1015
                         * Not any more.  Scenario: someone does a write()
1016
                         * in data=journal mode.  The buffer's transaction has
1017
                         * moved into commit.  Then someone does another
1018
                         * write() to the file.  We do the frozen data copyout
1019
                         * and set b_next_transaction to point to j_running_t.
1020
                         * And while we're in that state, someone does a
1021
                         * writepage() in an attempt to pageout the same area
1022
                         * of the file via a shared mapping.  At present that
1023
                         * calls journal_dirty_data(), and we get right here.
1024
                         * It may be too late to journal the data.  Simply
1025
                         * falling through to the next test will suffice: the
1026
                         * data will be dirty and wil be checkpointed.  The
1027
                         * ordering comments in the next comment block still
1028
                         * apply.
1029
                         */
1030
                        //J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
1031
 
1032
                        /*
1033
                         * If we're journalling data, and this buffer was
1034
                         * subject to a write(), it could be metadata, forget
1035
                         * or shadow against the committing transaction.  Now,
1036
                         * someone has dirtied the same darn page via a mapping
1037
                         * and it is being writepage()'d.
1038
                         * We *could* just steal the page from commit, with some
1039
                         * fancy locking there.  Instead, we just skip it -
1040
                         * don't tie the page's buffers to the new transaction
1041
                         * at all.
1042
                         * Implication: if we crash before the writepage() data
1043
                         * is written into the filesystem, recovery will replay
1044
                         * the write() data.
1045
                         */
1046
                        if (jh->b_jlist != BJ_None &&
1047
                                        jh->b_jlist != BJ_SyncData &&
1048
                                        jh->b_jlist != BJ_AsyncData) {
1049
                                JBUFFER_TRACE(jh, "Not stealing");
1050
                                goto no_journal;
1051
                        }
1052
 
1053
                        /*
1054
                         * This buffer may be undergoing writeout in commit.  We
1055
                         * can't return from here and let the caller dirty it
1056
                         * again because that can cause the write-out loop in
1057
                         * commit to never terminate.
1058
                         */
1059
                        if (!async && buffer_dirty(bh)) {
1060
                                atomic_inc(&bh->b_count);
1061
                                spin_unlock(&journal_datalist_lock);
1062
                                need_brelse = 1;
1063
                                ll_rw_block(WRITE, 1, &bh);
1064
                                wait_on_buffer(bh);
1065
                                spin_lock(&journal_datalist_lock);
1066
                                /* The buffer may become locked again at any
1067
                                   time if it is redirtied */
1068
                        }
1069
 
1070
                        /* journal_clean_data_list() may have got there first */
1071
                        if (jh->b_transaction != NULL) {
1072
                                JBUFFER_TRACE(jh, "unfile from commit");
1073
                                __journal_unfile_buffer(jh);
1074
                                jh->b_transaction = NULL;
1075
                        }
1076
                        /* The buffer will be refiled below */
1077
 
1078
                }
1079
                /*
1080
                 * Special case --- the buffer might actually have been
1081
                 * allocated and then immediately deallocated in the previous,
1082
                 * committing transaction, so might still be left on that
1083
                 * transaction's metadata lists.
1084
                 */
1085
                if (jh->b_jlist != wanted_jlist) {
1086
                        JBUFFER_TRACE(jh, "not on correct data list: unfile");
1087
                        J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
1088
                        __journal_unfile_buffer(jh);
1089
                        jh->b_transaction = NULL;
1090
                        JBUFFER_TRACE(jh, "file as data");
1091
                        __journal_file_buffer(jh, handle->h_transaction,
1092
                                                wanted_jlist);
1093
                }
1094
        } else {
1095
                JBUFFER_TRACE(jh, "not on a transaction");
1096
                __journal_file_buffer(jh, handle->h_transaction, wanted_jlist);
1097
        }
1098
no_journal:
1099
        spin_unlock(&journal_datalist_lock);
1100
        if (need_brelse) {
1101
                BUFFER_TRACE(bh, "brelse");
1102
                __brelse(bh);
1103
        }
1104
        JBUFFER_TRACE(jh, "exit");
1105
        journal_unlock_journal_head(jh);
1106
        return 0;
1107
}
1108
 
1109
/**
1110
 * int journal_dirty_metadata() -  mark a buffer as containing dirty metadata
1111
 * @handle: transaction to add buffer to.
1112
 * @bh: buffer to mark
1113
 *
1114
 * mark dirty metadata which needs to be journaled as part of the current transaction.
1115
 *
1116
 * The buffer is placed on the transaction's metadata list and is marked
1117
 * as belonging to the transaction.
1118
 *
1119
 * Returns error number or 0 on success.
1120
 */
1121
int journal_dirty_metadata (handle_t *handle, struct buffer_head *bh)
1122
{
1123
/*
1124
 * Special care needs to be taken if the buffer already belongs to the
1125
 * current committing transaction (in which case we should have frozen
1126
 * data present for that commit).  In that case, we don't relink the
1127
 * buffer: that only gets done when the old transaction finally
1128
 * completes its commit.
1129
 *
1130
 */
1131
        transaction_t *transaction = handle->h_transaction;
1132
        journal_t *journal = transaction->t_journal;
1133
        struct journal_head *jh = bh2jh(bh);
1134
 
1135
        jbd_debug(5, "journal_head %p\n", jh);
1136
        JBUFFER_TRACE(jh, "entry");
1137
        lock_journal(journal);
1138
        if (is_handle_aborted(handle))
1139
                goto out_unlock;
1140
 
1141
        spin_lock(&journal_datalist_lock);
1142
        set_bit(BH_JBDDirty, &bh->b_state);
1143
 
1144
        J_ASSERT_JH(jh, jh->b_transaction != NULL);
1145
 
1146
        /*
1147
         * Metadata already on the current transaction list doesn't
1148
         * need to be filed.  Metadata on another transaction's list must
1149
         * be committing, and will be refiled once the commit completes:
1150
         * leave it alone for now.
1151
         */
1152
 
1153
        if (jh->b_transaction != transaction) {
1154
                JBUFFER_TRACE(jh, "already on other transaction");
1155
                J_ASSERT_JH(jh, jh->b_transaction ==
1156
                                        journal->j_committing_transaction);
1157
                J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
1158
                /* And this case is illegal: we can't reuse another
1159
                 * transaction's data buffer, ever. */
1160
                /* FIXME: writepage() should be journalled */
1161
                J_ASSERT_JH(jh, jh->b_jlist != BJ_SyncData);
1162
                goto done_locked;
1163
        }
1164
 
1165
        /* That test should have eliminated the following case: */
1166
        J_ASSERT_JH(jh, jh->b_frozen_data == 0);
1167
 
1168
        JBUFFER_TRACE(jh, "file as BJ_Metadata");
1169
        __journal_file_buffer(jh, handle->h_transaction, BJ_Metadata);
1170
 
1171
done_locked:
1172
        spin_unlock(&journal_datalist_lock);
1173
        JBUFFER_TRACE(jh, "exit");
1174
out_unlock:
1175
        unlock_journal(journal);
1176
        return 0;
1177
}
1178
 
1179
#if 0
1180
/*
1181
 * journal_release_buffer: undo a get_write_access without any buffer
1182
 * updates, if the update decided in the end that it didn't need access.
1183
 *
1184
 * journal_get_write_access() can block, so it is quite possible for a
1185
 * journaling component to decide after the write access is returned
1186
 * that global state has changed and the update is no longer required.  */
1187
 
1188
void journal_release_buffer (handle_t *handle, struct buffer_head *bh)
1189
{
1190
        transaction_t *transaction = handle->h_transaction;
1191
        journal_t *journal = transaction->t_journal;
1192
        struct journal_head *jh = bh2jh(bh);
1193
 
1194
        lock_journal(journal);
1195
        JBUFFER_TRACE(jh, "entry");
1196
 
1197
        /* If the buffer is reserved but not modified by this
1198
         * transaction, then it is safe to release it.  In all other
1199
         * cases, just leave the buffer as it is. */
1200
 
1201
        spin_lock(&journal_datalist_lock);
1202
        if (jh->b_jlist == BJ_Reserved && jh->b_transaction == transaction &&
1203
            !buffer_jdirty(jh2bh(jh))) {
1204
                JBUFFER_TRACE(jh, "unused: refiling it");
1205
                handle->h_buffer_credits++;
1206
                __journal_refile_buffer(jh);
1207
        }
1208
        spin_unlock(&journal_datalist_lock);
1209
 
1210
        JBUFFER_TRACE(jh, "exit");
1211
        unlock_journal(journal);
1212
}
1213
#endif
1214
 
1215
/**
1216
 * void journal_forget() - bforget() for potentially-journaled buffers.
1217
 * @handle: transaction handle
1218
 * @bh:     bh to 'forget'
1219
 *
1220
 * We can only do the bforget if there are no commits pending against the
1221
 * buffer.  If the buffer is dirty in the current running transaction we
1222
 * can safely unlink it.
1223
 *
1224
 * bh may not be a journalled buffer at all - it may be a non-JBD
1225
 * buffer which came off the hashtable.  Check for this.
1226
 *
1227
 * Decrements bh->b_count by one.
1228
 *
1229
 * Allow this call even if the handle has aborted --- it may be part of
1230
 * the caller's cleanup after an abort.
1231
 */
1232
void journal_forget (handle_t *handle, struct buffer_head *bh)
1233
{
1234
        transaction_t *transaction = handle->h_transaction;
1235
        journal_t *journal = transaction->t_journal;
1236
        struct journal_head *jh;
1237
 
1238
        BUFFER_TRACE(bh, "entry");
1239
 
1240
        lock_journal(journal);
1241
        spin_lock(&journal_datalist_lock);
1242
 
1243
        if (!buffer_jbd(bh))
1244
                goto not_jbd;
1245
        jh = bh2jh(bh);
1246
 
1247
        if (jh->b_transaction == handle->h_transaction) {
1248
                J_ASSERT_JH(jh, !jh->b_frozen_data);
1249
 
1250
                /* If we are forgetting a buffer which is already part
1251
                 * of this transaction, then we can just drop it from
1252
                 * the transaction immediately. */
1253
                clear_bit(BH_Dirty, &bh->b_state);
1254
                clear_bit(BH_JBDDirty, &bh->b_state);
1255
 
1256
                JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
1257
                J_ASSERT_JH(jh, !jh->b_committed_data);
1258
 
1259
                __journal_unfile_buffer(jh);
1260
                jh->b_transaction = 0;
1261
 
1262
                /*
1263
                 * We are no longer going to journal this buffer.
1264
                 * However, the commit of this transaction is still
1265
                 * important to the buffer: the delete that we are now
1266
                 * processing might obsolete an old log entry, so by
1267
                 * committing, we can satisfy the buffer's checkpoint.
1268
                 *
1269
                 * So, if we have a checkpoint on the buffer, we should
1270
                 * now refile the buffer on our BJ_Forget list so that
1271
                 * we know to remove the checkpoint after we commit.
1272
                 */
1273
 
1274
                if (jh->b_cp_transaction) {
1275
                        __journal_file_buffer(jh, transaction, BJ_Forget);
1276
                } else {
1277
                        __journal_remove_journal_head(bh);
1278
                        __brelse(bh);
1279
                        if (!buffer_jbd(bh)) {
1280
                                spin_unlock(&journal_datalist_lock);
1281
                                unlock_journal(journal);
1282
                                __bforget(bh);
1283
                                return;
1284
                        }
1285
                }
1286
 
1287
        } else if (jh->b_transaction) {
1288
                J_ASSERT_JH(jh, (jh->b_transaction ==
1289
                                 journal->j_committing_transaction));
1290
                /* However, if the buffer is still owned by a prior
1291
                 * (committing) transaction, we can't drop it yet... */
1292
                JBUFFER_TRACE(jh, "belongs to older transaction");
1293
                /* ... but we CAN drop it from the new transaction if we
1294
                 * have also modified it since the original commit. */
1295
 
1296
                if (jh->b_next_transaction) {
1297
                        J_ASSERT(jh->b_next_transaction == transaction);
1298
                        jh->b_next_transaction = NULL;
1299
                }
1300
        }
1301
 
1302
not_jbd:
1303
        spin_unlock(&journal_datalist_lock);
1304
        unlock_journal(journal);
1305
        __brelse(bh);
1306
        return;
1307
}
1308
 
1309
#if 0   /* Unused */
1310
/*
1311
 * journal_sync_buffer: flush a potentially-journaled buffer to disk.
1312
 *
1313
 * Used for O_SYNC filesystem operations.  If the buffer is journaled,
1314
 * we need to complete the O_SYNC by waiting for the transaction to
1315
 * complete.  It is an error to call journal_sync_buffer before
1316
 * journal_stop!
1317
 */
1318
 
1319
void journal_sync_buffer(struct buffer_head *bh)
1320
{
1321
        transaction_t *transaction;
1322
        journal_t *journal;
1323
        long sequence;
1324
        struct journal_head *jh;
1325
 
1326
        /* If the buffer isn't journaled, this is easy: just sync it to
1327
         * disk.  */
1328
        BUFFER_TRACE(bh, "entry");
1329
 
1330
        spin_lock(&journal_datalist_lock);
1331
        if (!buffer_jbd(bh)) {
1332
                spin_unlock(&journal_datalist_lock);
1333
                return;
1334
        }
1335
        jh = bh2jh(bh);
1336
        if (jh->b_transaction == NULL) {
1337
                /* If the buffer has already been journaled, then this
1338
                 * is a noop. */
1339
                if (jh->b_cp_transaction == NULL) {
1340
                        spin_unlock(&journal_datalist_lock);
1341
                        return;
1342
                }
1343
                atomic_inc(&bh->b_count);
1344
                spin_unlock(&journal_datalist_lock);
1345
                ll_rw_block (WRITE, 1, &bh);
1346
                wait_on_buffer(bh);
1347
                __brelse(bh);
1348
                goto out;
1349
        }
1350
 
1351
        /* Otherwise, just wait until the transaction is synced to disk. */
1352
        transaction = jh->b_transaction;
1353
        journal = transaction->t_journal;
1354
        sequence = transaction->t_tid;
1355
        spin_unlock(&journal_datalist_lock);
1356
 
1357
        jbd_debug(2, "requesting commit for jh %p\n", jh);
1358
        log_start_commit (journal, transaction);
1359
 
1360
        while (tid_gt(sequence, journal->j_commit_sequence)) {
1361
                wake_up(&journal->j_wait_done_commit);
1362
                sleep_on(&journal->j_wait_done_commit);
1363
        }
1364
        JBUFFER_TRACE(jh, "exit");
1365
out:
1366
        return;
1367
}
1368
#endif
1369
 
1370
/*
1371
 * Register a callback function for this handle.  The function will be
1372
 * called when the transaction that this handle is part of has been
1373
 * committed to disk with the original callback data struct and the
1374
 * error status of the journal as parameters.  There is no guarantee of
1375
 * ordering between handles within a single transaction, nor between
1376
 * callbacks registered on the same handle.
1377
 *
1378
 * The caller is responsible for allocating the journal_callback struct.
1379
 * This is to allow the caller to add as much extra data to the callback
1380
 * as needed, but reduce the overhead of multiple allocations.  The caller
1381
 * allocated struct must start with a struct journal_callback at offset 0,
1382
 * and has the caller-specific data afterwards.
1383
 */
1384
void journal_callback_set(handle_t *handle,
1385
                          void (*func)(struct journal_callback *jcb, int error),
1386
                          struct journal_callback *jcb)
1387
{
1388
        list_add_tail(&jcb->jcb_list, &handle->h_jcb);
1389
        jcb->jcb_func = func;
1390
}
1391
 
1392
/**
1393
 * int journal_stop() - complete a transaction
1394
 * @handle: tranaction to complete.
1395
 *
1396
 * All done for a particular handle.
1397
 *
1398
 * There is not much action needed here.  We just return any remaining
1399
 * buffer credits to the transaction and remove the handle.  The only
1400
 * complication is that we need to start a commit operation if the
1401
 * filesystem is marked for synchronous update.
1402
 *
1403
 * journal_stop itself will not usually return an error, but it may
1404
 * do so in unusual circumstances.  In particular, expect it to
1405
 * return -EIO if a journal_abort has been executed since the
1406
 * transaction began.
1407
 */
1408
int journal_stop(handle_t *handle)
1409
{
1410
        transaction_t *transaction = handle->h_transaction;
1411
        journal_t *journal = transaction->t_journal;
1412
        int old_handle_count, err;
1413
 
1414
        if (!handle)
1415
                return 0;
1416
 
1417
        J_ASSERT (transaction->t_updates > 0);
1418
        J_ASSERT (journal_current_handle() == handle);
1419
 
1420
        if (is_handle_aborted(handle))
1421
                err = -EIO;
1422
        else
1423
                err = 0;
1424
 
1425
        if (--handle->h_ref > 0) {
1426
                jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
1427
                          handle->h_ref);
1428
                return err;
1429
        }
1430
 
1431
        jbd_debug(4, "Handle %p going down\n", handle);
1432
 
1433
        /*
1434
         * Implement synchronous transaction batching.  If the handle
1435
         * was synchronous, don't force a commit immediately.  Let's
1436
         * yield and let another thread piggyback onto this transaction.
1437
         * Keep doing that while new threads continue to arrive.
1438
         * It doesn't cost much - we're about to run a commit and sleep
1439
         * on IO anyway.  Speeds up many-threaded, many-dir operations
1440
         * by 30x or more...
1441
         */
1442
        if (handle->h_sync) {
1443
                do {
1444
                        old_handle_count = transaction->t_handle_count;
1445
                        yield();
1446
                } while (old_handle_count != transaction->t_handle_count);
1447
        }
1448
 
1449
        current->journal_info = NULL;
1450
        transaction->t_outstanding_credits -= handle->h_buffer_credits;
1451
        transaction->t_updates--;
1452
        if (!transaction->t_updates) {
1453
                wake_up(&journal->j_wait_updates);
1454
                if (journal->j_barrier_count)
1455
                        wake_up(&journal->j_wait_transaction_locked);
1456
        }
1457
 
1458
        /* Move callbacks from the handle to the transaction. */
1459
        list_splice(&handle->h_jcb, &transaction->t_jcb);
1460
 
1461
        /*
1462
         * If the handle is marked SYNC, we need to set another commit
1463
         * going!  We also want to force a commit if the current
1464
         * transaction is occupying too much of the log, or if the
1465
         * transaction is too old now.
1466
         */
1467
        if (handle->h_sync ||
1468
                        transaction->t_outstanding_credits >
1469
                                journal->j_max_transaction_buffers ||
1470
                        (journal->j_commit_interval &&
1471
                         time_after_eq(jiffies, transaction->t_expires))) {
1472
                /* Do this even for aborted journals: an abort still
1473
                 * completes the commit thread, it just doesn't write
1474
                 * anything to disk. */
1475
                tid_t tid = transaction->t_tid;
1476
 
1477
                jbd_debug(2, "transaction too old, requesting commit for "
1478
                                        "handle %p\n", handle);
1479
                /* This is non-blocking */
1480
                log_start_commit(journal, transaction);
1481
 
1482
                /*
1483
                 * Special case: JFS_SYNC synchronous updates require us
1484
                 * to wait for the commit to complete.
1485
                 */
1486
                if (handle->h_sync && !(current->flags & PF_MEMALLOC))
1487
                        log_wait_commit(journal, tid);
1488
        }
1489
        kfree(handle);
1490
        return err;
1491
}
1492
 
1493
/**int journal_force_commit() - force any uncommitted transactions
1494
 * @journal: journal to force
1495
 *
1496
 * For synchronous operations: force any uncommitted transactions
1497
 * to disk.  May seem kludgy, but it reuses all the handle batching
1498
 * code in a very simple manner.
1499
 */
1500
int journal_force_commit(journal_t *journal)
1501
{
1502
        handle_t *handle;
1503
        int ret = 0;
1504
 
1505
        lock_kernel();
1506
        handle = journal_start(journal, 1);
1507
        if (IS_ERR(handle)) {
1508
                ret = PTR_ERR(handle);
1509
                goto out;
1510
        }
1511
        handle->h_sync = 1;
1512
        journal_stop(handle);
1513
out:
1514
        unlock_kernel();
1515
        return ret;
1516
}
1517
 
1518
/*
1519
 *
1520
 * List management code snippets: various functions for manipulating the
1521
 * transaction buffer lists.
1522
 *
1523
 */
1524
 
1525
/*
1526
 * Append a buffer to a transaction list, given the transaction's list head
1527
 * pointer.
1528
 * journal_datalist_lock is held.
1529
 */
1530
 
1531
static inline void
1532
__blist_add_buffer(struct journal_head **list, struct journal_head *jh)
1533
{
1534
        if (!*list) {
1535
                jh->b_tnext = jh->b_tprev = jh;
1536
                *list = jh;
1537
        } else {
1538
                /* Insert at the tail of the list to preserve order */
1539
                struct journal_head *first = *list, *last = first->b_tprev;
1540
                jh->b_tprev = last;
1541
                jh->b_tnext = first;
1542
                last->b_tnext = first->b_tprev = jh;
1543
        }
1544
}
1545
 
1546
/*
1547
 * Remove a buffer from a transaction list, given the transaction's list
1548
 * head pointer.
1549
 *
1550
 * Called with journal_datalist_lock held, and the journal may not
1551
 * be locked.
1552
 */
1553
 
1554
static inline void
1555
__blist_del_buffer(struct journal_head **list, struct journal_head *jh)
1556
{
1557
        if (*list == jh) {
1558
                *list = jh->b_tnext;
1559
                if (*list == jh)
1560
                        *list = 0;
1561
        }
1562
        jh->b_tprev->b_tnext = jh->b_tnext;
1563
        jh->b_tnext->b_tprev = jh->b_tprev;
1564
}
1565
 
1566
/*
1567
 * Remove a buffer from the appropriate transaction list.
1568
 *
1569
 * Note that this function can *change* the value of
1570
 * bh->b_transaction->t_sync_datalist, t_async_datalist, t_buffers, t_forget,
1571
 * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list.  If the caller
1572
 * is holding onto a copy of one of thee pointers, it could go bad.
1573
 * Generally the caller needs to re-read the pointer from the transaction_t.
1574
 *
1575
 * If bh->b_jlist is BJ_SyncData or BJ_AsyncData then we may have been called
1576
 * via journal_try_to_free_buffer() or journal_clean_data_list().  In that
1577
 * case, journal_datalist_lock will be held, and the journal may not be locked.
1578
 */
1579
void __journal_unfile_buffer(struct journal_head *jh)
1580
{
1581
        struct journal_head **list = 0;
1582
        transaction_t * transaction;
1583
 
1584
        assert_spin_locked(&journal_datalist_lock);
1585
        transaction = jh->b_transaction;
1586
 
1587
        J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
1588
 
1589
        if (jh->b_jlist != BJ_None)
1590
                J_ASSERT_JH(jh, transaction != 0);
1591
 
1592
        switch (jh->b_jlist) {
1593
        case BJ_None:
1594
                return;
1595
        case BJ_SyncData:
1596
                list = &transaction->t_sync_datalist;
1597
                break;
1598
        case BJ_AsyncData:
1599
                list = &transaction->t_async_datalist;
1600
                break;
1601
        case BJ_Metadata:
1602
                transaction->t_nr_buffers--;
1603
                J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
1604
                list = &transaction->t_buffers;
1605
                break;
1606
        case BJ_Forget:
1607
                list = &transaction->t_forget;
1608
                break;
1609
        case BJ_IO:
1610
                list = &transaction->t_iobuf_list;
1611
                break;
1612
        case BJ_Shadow:
1613
                list = &transaction->t_shadow_list;
1614
                break;
1615
        case BJ_LogCtl:
1616
                list = &transaction->t_log_list;
1617
                break;
1618
        case BJ_Reserved:
1619
                list = &transaction->t_reserved_list;
1620
                break;
1621
        }
1622
 
1623
        __blist_del_buffer(list, jh);
1624
        jh->b_jlist = BJ_None;
1625
        if (test_and_clear_bit(BH_JBDDirty, &jh2bh(jh)->b_state)) {
1626
                set_bit(BH_Dirty, &jh2bh(jh)->b_state);
1627
        }
1628
}
1629
 
1630
void journal_unfile_buffer(struct journal_head *jh)
1631
{
1632
        spin_lock(&journal_datalist_lock);
1633
        __journal_unfile_buffer(jh);
1634
        spin_unlock(&journal_datalist_lock);
1635
}
1636
 
1637
/*
1638
 * Called from journal_try_to_free_buffers().  The journal is not
1639
 * locked. lru_list_lock is not held.
1640
 *
1641
 * Here we see why journal_datalist_lock is global and not per-journal.
1642
 * We cannot get back to this buffer's journal pointer without locking
1643
 * out journal_clean_data_list() in some manner.
1644
 *
1645
 * One could use journal_datalist_lock to get unracy access to a
1646
 * per-journal lock.
1647
 *
1648
 * Called with journal_datalist_lock held.
1649
 *
1650
 * Returns non-zero iff we were able to free the journal_head.
1651
 */
1652
static int __journal_try_to_free_buffer(struct buffer_head *bh,
1653
                                        int *locked_or_dirty)
1654
{
1655
        struct journal_head *jh;
1656
 
1657
        assert_spin_locked(&journal_datalist_lock);
1658
 
1659
        jh = bh2jh(bh);
1660
 
1661
        if (buffer_locked(bh) || buffer_dirty(bh)) {
1662
                *locked_or_dirty = 1;
1663
                goto out;
1664
        }
1665
 
1666
        if (!buffer_uptodate(bh))
1667
                goto out;
1668
 
1669
        if (jh->b_next_transaction != 0)
1670
                goto out;
1671
 
1672
        if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) {
1673
                if (jh->b_jlist == BJ_SyncData || jh->b_jlist==BJ_AsyncData) {
1674
                        /* A written-back ordered data buffer */
1675
                        JBUFFER_TRACE(jh, "release data");
1676
                        __journal_unfile_buffer(jh);
1677
                        jh->b_transaction = 0;
1678
                        __journal_remove_journal_head(bh);
1679
                        __brelse(bh);
1680
                }
1681
        }
1682
        else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) {
1683
                /* written-back checkpointed metadata buffer */
1684
                if (jh->b_jlist == BJ_None) {
1685
                        JBUFFER_TRACE(jh, "remove from checkpoint list");
1686
                        __journal_remove_checkpoint(jh);
1687
                        __journal_remove_journal_head(bh);
1688
                        __brelse(bh);
1689
                }
1690
        }
1691
        return !buffer_jbd(bh);
1692
 
1693
out:
1694
        return 0;
1695
}
1696
 
1697
 
1698
/**
1699
 * int journal_try_to_free_buffers() - try to free page buffers.
1700
 * @journal: journal for operation
1701
 * @page: to try and free
1702
 * @gfp_mask: 'IO' mode for try_to_free_buffers()
1703
 *
1704
 *
1705
 * For all the buffers on this page,
1706
 * if they are fully written out ordered data, move them onto BUF_CLEAN
1707
 * so try_to_free_buffers() can reap them.
1708
 *
1709
 * This function returns non-zero if we wish try_to_free_buffers()
1710
 * to be called. We do this if the page is releasable by try_to_free_buffers().
1711
 * We also do it if the page has locked or dirty buffers and the caller wants
1712
 * us to perform sync or async writeout.
1713
 */
1714
int journal_try_to_free_buffers(journal_t *journal,
1715
                                struct page *page, int gfp_mask)
1716
{
1717
/*
1718
 * journal_try_to_free_buffers().  For all the buffers on this page,
1719
 * if they are fully written out ordered data, move them onto BUF_CLEAN
1720
 * so try_to_free_buffers() can reap them.  Called with lru_list_lock
1721
 * not held.  Does its own locking.
1722
 *
1723
 * This complicates JBD locking somewhat.  We aren't protected by the
1724
 * BKL here.  We wish to remove the buffer from its committing or
1725
 * running transaction's ->t_datalist via __journal_unfile_buffer.
1726
 *
1727
 * This may *change* the value of transaction_t->t_datalist, so anyone
1728
 * who looks at t_datalist needs to lock against this function.
1729
 *
1730
 * Even worse, someone may be doing a journal_dirty_data on this
1731
 * buffer.  So we need to lock against that.  journal_dirty_data()
1732
 * will come out of the lock with the buffer dirty, which makes it
1733
 * ineligible for release here.
1734
 *
1735
 * Who else is affected by this?  hmm...  Really the only contender
1736
 * is do_get_write_access() - it could be looking at the buffer while
1737
 * journal_try_to_free_buffer() is changing its state.  But that
1738
 * cannot happen because we never reallocate freed data as metadata
1739
 * while the data is part of a transaction.  Yes?
1740
 *
1741
 */
1742
        struct buffer_head *bh;
1743
        struct buffer_head *tmp;
1744
        int locked_or_dirty = 0;
1745
        int call_ttfb = 1;
1746
 
1747
        J_ASSERT(PageLocked(page));
1748
 
1749
        bh = page->buffers;
1750
        tmp = bh;
1751
        spin_lock(&journal_datalist_lock);
1752
        do {
1753
                struct buffer_head *p = tmp;
1754
 
1755
                tmp = tmp->b_this_page;
1756
                if (buffer_jbd(p))
1757
                        if (!__journal_try_to_free_buffer(p, &locked_or_dirty))
1758
                                call_ttfb = 0;
1759
        } while (tmp != bh);
1760
        spin_unlock(&journal_datalist_lock);
1761
 
1762
        if (!(gfp_mask & (__GFP_IO|__GFP_WAIT)))
1763
                goto out;
1764
        if (!locked_or_dirty)
1765
                goto out;
1766
        /*
1767
         * The VM wants us to do writeout, or to block on IO, or both.
1768
         * So we allow try_to_free_buffers to be called even if the page
1769
         * still has journalled buffers.
1770
         */
1771
        call_ttfb = 1;
1772
out:
1773
        return call_ttfb;
1774
}
1775
 
1776
/*
1777
 * This buffer is no longer needed.  If it is on an older transaction's
1778
 * checkpoint list we need to record it on this transaction's forget list
1779
 * to pin this buffer (and hence its checkpointing transaction) down until
1780
 * this transaction commits.  If the buffer isn't on a checkpoint list, we
1781
 * release it.
1782
 * Returns non-zero if JBD no longer has an interest in the buffer.
1783
 */
1784
static int dispose_buffer(struct journal_head *jh,
1785
                transaction_t *transaction)
1786
{
1787
        int may_free = 1;
1788
        struct buffer_head *bh = jh2bh(jh);
1789
 
1790
        spin_lock(&journal_datalist_lock);
1791
        __journal_unfile_buffer(jh);
1792
        jh->b_transaction = 0;
1793
 
1794
        if (jh->b_cp_transaction) {
1795
                JBUFFER_TRACE(jh, "on running+cp transaction");
1796
                __journal_file_buffer(jh, transaction, BJ_Forget);
1797
                clear_bit(BH_JBDDirty, &bh->b_state);
1798
                may_free = 0;
1799
        } else {
1800
                JBUFFER_TRACE(jh, "on running transaction");
1801
                __journal_remove_journal_head(bh);
1802
                __brelse(bh);
1803
        }
1804
        spin_unlock(&journal_datalist_lock);
1805
        return may_free;
1806
}
1807
 
1808
/*
1809
 * journal_flushpage
1810
 *
1811
 * This code is tricky.  It has a number of cases to deal with.
1812
 *
1813
 * There are two invariants which this code relies on:
1814
 *
1815
 * i_size must be updated on disk before we start calling flushpage on the
1816
 * data.
1817
 *
1818
 *  This is done in ext3 by defining an ext3_setattr method which
1819
 *  updates i_size before truncate gets going.  By maintaining this
1820
 *  invariant, we can be sure that it is safe to throw away any buffers
1821
 *  attached to the current transaction: once the transaction commits,
1822
 *  we know that the data will not be needed.
1823
 *
1824
 *  Note however that we can *not* throw away data belonging to the
1825
 *  previous, committing transaction!
1826
 *
1827
 * Any disk blocks which *are* part of the previous, committing
1828
 * transaction (and which therefore cannot be discarded immediately) are
1829
 * not going to be reused in the new running transaction
1830
 *
1831
 *  The bitmap committed_data images guarantee this: any block which is
1832
 *  allocated in one transaction and removed in the next will be marked
1833
 *  as in-use in the committed_data bitmap, so cannot be reused until
1834
 *  the next transaction to delete the block commits.  This means that
1835
 *  leaving committing buffers dirty is quite safe: the disk blocks
1836
 *  cannot be reallocated to a different file and so buffer aliasing is
1837
 *  not possible.
1838
 *
1839
 *
1840
 * The above applies mainly to ordered data mode.  In writeback mode we
1841
 * don't make guarantees about the order in which data hits disk --- in
1842
 * particular we don't guarantee that new dirty data is flushed before
1843
 * transaction commit --- so it is always safe just to discard data
1844
 * immediately in that mode.  --sct
1845
 */
1846
 
1847
/*
1848
 * The journal_unmap_buffer helper function returns zero if the buffer
1849
 * concerned remains pinned as an anonymous buffer belonging to an older
1850
 * transaction.
1851
 *
1852
 * We're outside-transaction here.  Either or both of j_running_transaction
1853
 * and j_committing_transaction may be NULL.
1854
 */
1855
static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1856
{
1857
        transaction_t *transaction;
1858
        struct journal_head *jh;
1859
        int may_free = 1;
1860
 
1861
        BUFFER_TRACE(bh, "entry");
1862
 
1863
        if (!buffer_mapped(bh))
1864
                return 1;
1865
 
1866
        /* It is safe to proceed here without the
1867
         * journal_datalist_spinlock because the buffers cannot be
1868
         * stolen by try_to_free_buffers as long as we are holding the
1869
         * page lock. --sct */
1870
 
1871
        if (!buffer_jbd(bh))
1872
                goto zap_buffer;
1873
 
1874
        jh = bh2jh(bh);
1875
        transaction = jh->b_transaction;
1876
        if (transaction == NULL) {
1877
                /* First case: not on any transaction.  If it
1878
                 * has no checkpoint link, then we can zap it:
1879
                 * it's a writeback-mode buffer so we don't care
1880
                 * if it hits disk safely. */
1881
                if (!jh->b_cp_transaction) {
1882
                        JBUFFER_TRACE(jh, "not on any transaction: zap");
1883
                        goto zap_buffer;
1884
                }
1885
 
1886
                if (!buffer_dirty(bh)) {
1887
                        /* bdflush has written it.  We can drop it now */
1888
                        goto zap_buffer;
1889
                }
1890
 
1891
                /* OK, it must be in the journal but still not
1892
                 * written fully to disk: it's metadata or
1893
                 * journaled data... */
1894
 
1895
                if (journal->j_running_transaction) {
1896
                        /* ... and once the current transaction has
1897
                         * committed, the buffer won't be needed any
1898
                         * longer. */
1899
                        JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
1900
                        return dispose_buffer(jh,
1901
                                        journal->j_running_transaction);
1902
                } else {
1903
                        /* There is no currently-running transaction. So the
1904
                         * orphan record which we wrote for this file must have
1905
                         * passed into commit.  We must attach this buffer to
1906
                         * the committing transaction, if it exists. */
1907
                        if (journal->j_committing_transaction) {
1908
                                JBUFFER_TRACE(jh, "give to committing trans");
1909
                                return dispose_buffer(jh,
1910
                                        journal->j_committing_transaction);
1911
                        } else {
1912
                                /* The orphan record's transaction has
1913
                                 * committed.  We can cleanse this buffer */
1914
                                clear_bit(BH_JBDDirty, &bh->b_state);
1915
                                goto zap_buffer;
1916
                        }
1917
                }
1918
        } else if (transaction == journal->j_committing_transaction) {
1919
                /* If it is committing, we simply cannot touch it.  We
1920
                 * can remove it's next_transaction pointer from the
1921
                 * running transaction if that is set, but nothing
1922
                 * else. */
1923
                JBUFFER_TRACE(jh, "on committing transaction");
1924
                set_bit(BH_Freed, &bh->b_state);
1925
                if (jh->b_next_transaction) {
1926
                        J_ASSERT(jh->b_next_transaction ==
1927
                                        journal->j_running_transaction);
1928
                        jh->b_next_transaction = NULL;
1929
                }
1930
                return 0;
1931
        } else {
1932
                /* Good, the buffer belongs to the running transaction.
1933
                 * We are writing our own transaction's data, not any
1934
                 * previous one's, so it is safe to throw it away
1935
                 * (remember that we expect the filesystem to have set
1936
                 * i_size already for this truncate so recovery will not
1937
                 * expose the disk blocks we are discarding here.) */
1938
                J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
1939
                may_free = dispose_buffer(jh, transaction);
1940
        }
1941
 
1942
zap_buffer:
1943
        if (buffer_dirty(bh))
1944
                mark_buffer_clean(bh);
1945
        J_ASSERT_BH(bh, !buffer_jdirty(bh));
1946
        clear_bit(BH_Uptodate, &bh->b_state);
1947
        clear_bit(BH_Mapped, &bh->b_state);
1948
        clear_bit(BH_Req, &bh->b_state);
1949
        clear_bit(BH_New, &bh->b_state);
1950
        return may_free;
1951
}
1952
 
1953
/**
1954
 * int journal_flushpage()
1955
 * @journal: journal to use for flush...
1956
 * @page:    page to flush
1957
 * @offset:  length of page to flush.
1958
 *
1959
 * Reap page buffers containing data after offset in page.
1960
 *
1961
 * Return non-zero if the page's buffers were successfully reaped.
1962
 */
1963
int journal_flushpage(journal_t *journal,
1964
                      struct page *page,
1965
                      unsigned long offset)
1966
{
1967
        struct buffer_head *head, *bh, *next;
1968
        unsigned int curr_off = 0;
1969
        int may_free = 1;
1970
 
1971
        if (!PageLocked(page))
1972
                BUG();
1973
        if (!page->buffers)
1974
                return 1;
1975
 
1976
        /* We will potentially be playing with lists other than just the
1977
         * data lists (especially for journaled data mode), so be
1978
         * cautious in our locking. */
1979
        lock_journal(journal);
1980
 
1981
        head = bh = page->buffers;
1982
        do {
1983
                unsigned int next_off = curr_off + bh->b_size;
1984
                next = bh->b_this_page;
1985
 
1986
                /* AKPM: doing lock_buffer here may be overly paranoid */
1987
                if (offset <= curr_off) {
1988
                        /* This block is wholly outside the truncation point */
1989
                        lock_buffer(bh);
1990
                        may_free &= journal_unmap_buffer(journal, bh);
1991
                        unlock_buffer(bh);
1992
                }
1993
                curr_off = next_off;
1994
                bh = next;
1995
 
1996
        } while (bh != head);
1997
 
1998
        unlock_journal(journal);
1999
 
2000
        if (!offset) {
2001
                if (!may_free || !try_to_free_buffers(page, 0))
2002
                        return 0;
2003
                J_ASSERT(page->buffers == NULL);
2004
        }
2005
        return 1;
2006
}
2007
 
2008
/*
2009
 * File a buffer on the given transaction list.
2010
 */
2011
void __journal_file_buffer(struct journal_head *jh,
2012
                        transaction_t *transaction, int jlist)
2013
{
2014
        struct journal_head **list = 0;
2015
        int was_dirty = 0;
2016
 
2017
        assert_spin_locked(&journal_datalist_lock);
2018
 
2019
        J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
2020
        J_ASSERT_JH(jh, jh->b_transaction == transaction ||
2021
                                jh->b_transaction == 0);
2022
 
2023
        if (jh->b_transaction && jh->b_jlist == jlist)
2024
                return;
2025
 
2026
        /* The following list of buffer states needs to be consistent
2027
         * with __jbd_unexpected_dirty_buffer()'s handling of dirty
2028
         * state. */
2029
 
2030
        if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
2031
            jlist == BJ_Shadow || jlist == BJ_Forget) {
2032
                if (atomic_set_buffer_clean(jh2bh(jh)) ||
2033
                    test_and_clear_bit(BH_JBDDirty, &jh2bh(jh)->b_state))
2034
                        was_dirty = 1;
2035
        }
2036
 
2037
        if (jh->b_transaction)
2038
                __journal_unfile_buffer(jh);
2039
        else
2040
                jh->b_transaction = transaction;
2041
 
2042
        switch (jlist) {
2043
        case BJ_None:
2044
                J_ASSERT_JH(jh, !jh->b_committed_data);
2045
                J_ASSERT_JH(jh, !jh->b_frozen_data);
2046
                return;
2047
        case BJ_SyncData:
2048
                list = &transaction->t_sync_datalist;
2049
                break;
2050
        case BJ_AsyncData:
2051
                list = &transaction->t_async_datalist;
2052
                break;
2053
        case BJ_Metadata:
2054
                transaction->t_nr_buffers++;
2055
                list = &transaction->t_buffers;
2056
                break;
2057
        case BJ_Forget:
2058
                list = &transaction->t_forget;
2059
                break;
2060
        case BJ_IO:
2061
                list = &transaction->t_iobuf_list;
2062
                break;
2063
        case BJ_Shadow:
2064
                list = &transaction->t_shadow_list;
2065
                break;
2066
        case BJ_LogCtl:
2067
                list = &transaction->t_log_list;
2068
                break;
2069
        case BJ_Reserved:
2070
                list = &transaction->t_reserved_list;
2071
                break;
2072
        }
2073
 
2074
        __blist_add_buffer(list, jh);
2075
        jh->b_jlist = jlist;
2076
 
2077
        if (was_dirty)
2078
                set_bit(BH_JBDDirty, &jh2bh(jh)->b_state);
2079
}
2080
 
2081
void journal_file_buffer(struct journal_head *jh,
2082
                                transaction_t *transaction, int jlist)
2083
{
2084
        spin_lock(&journal_datalist_lock);
2085
        __journal_file_buffer(jh, transaction, jlist);
2086
        spin_unlock(&journal_datalist_lock);
2087
}
2088
 
2089
static void jbd_refile_buffer(struct buffer_head *bh)
2090
{
2091
        if (buffer_dirty(bh) && (bh->b_list != BUF_DIRTY))
2092
                set_buffer_flushtime(bh);
2093
        refile_buffer(bh);
2094
}
2095
 
2096
/*
2097
 * Remove a buffer from its current buffer list in preparation for
2098
 * dropping it from its current transaction entirely.  If the buffer has
2099
 * already started to be used by a subsequent transaction, refile the
2100
 * buffer on that transaction's metadata list.
2101
 */
2102
 
2103
void __journal_refile_buffer(struct journal_head *jh)
2104
{
2105
        int was_dirty = 0;
2106
 
2107
        assert_spin_locked(&journal_datalist_lock);
2108
        /* If the buffer is now unused, just drop it. */
2109
        if (jh->b_next_transaction == NULL) {
2110
                __journal_unfile_buffer(jh);
2111
                jh->b_transaction = NULL;
2112
                /* Onto BUF_DIRTY for writeback */
2113
                jbd_refile_buffer(jh2bh(jh));
2114
                return;
2115
        }
2116
 
2117
        /* It has been modified by a later transaction: add it to the
2118
         * new transaction's metadata list. */
2119
 
2120
        if (test_and_clear_bit(BH_JBDDirty, &jh2bh(jh)->b_state))
2121
                        was_dirty = 1;
2122
 
2123
        __journal_unfile_buffer(jh);
2124
        jh->b_transaction = jh->b_next_transaction;
2125
        jh->b_next_transaction = NULL;
2126
        __journal_file_buffer(jh, jh->b_transaction, BJ_Metadata);
2127
        J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
2128
 
2129
        if (was_dirty)
2130
                set_bit(BH_JBDDirty, &jh2bh(jh)->b_state);
2131
 
2132
}
2133
 
2134
/*
2135
 * For the unlocked version of this call, also make sure that any
2136
 * hanging journal_head is cleaned up if necessary.
2137
 *
2138
 * __journal_refile_buffer is usually called as part of a single locked
2139
 * operation on a buffer_head, in which the caller is probably going to
2140
 * be hooking the journal_head onto other lists.  In that case it is up
2141
 * to the caller to remove the journal_head if necessary.  For the
2142
 * unlocked journal_refile_buffer call, the caller isn't going to be
2143
 * doing anything else to the buffer so we need to do the cleanup
2144
 * ourselves to avoid a jh leak.
2145
 *
2146
 * *** The journal_head may be freed by this call! ***
2147
 */
2148
void journal_refile_buffer(struct journal_head *jh)
2149
{
2150
        struct buffer_head *bh;
2151
 
2152
        spin_lock(&journal_datalist_lock);
2153
        bh = jh2bh(jh);
2154
 
2155
        __journal_refile_buffer(jh);
2156
        __journal_remove_journal_head(bh);
2157
 
2158
        spin_unlock(&journal_datalist_lock);
2159
        __brelse(bh);
2160
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.