1 |
1275 |
phoenix |
/*
|
2 |
|
|
* linux/fs/transaction.c
|
3 |
|
|
*
|
4 |
|
|
* Written by Stephen C. Tweedie <sct@redhat.com>, 1998
|
5 |
|
|
*
|
6 |
|
|
* Copyright 1998 Red Hat corp --- All Rights Reserved
|
7 |
|
|
*
|
8 |
|
|
* This file is part of the Linux kernel and is made available under
|
9 |
|
|
* the terms of the GNU General Public License, version 2, or at your
|
10 |
|
|
* option, any later version, incorporated herein by reference.
|
11 |
|
|
*
|
12 |
|
|
* Generic filesystem transaction handling code; part of the ext2fs
|
13 |
|
|
* journaling system.
|
14 |
|
|
*
|
15 |
|
|
* This file manages transactions (compound commits managed by the
|
16 |
|
|
* journaling code) and handles (individual atomic operations by the
|
17 |
|
|
* filesystem).
|
18 |
|
|
*/
|
19 |
|
|
|
20 |
|
|
#include <linux/sched.h>
|
21 |
|
|
#include <linux/fs.h>
|
22 |
|
|
#include <linux/jbd.h>
|
23 |
|
|
#include <linux/errno.h>
|
24 |
|
|
#include <linux/slab.h>
|
25 |
|
|
#include <linux/locks.h>
|
26 |
|
|
#include <linux/timer.h>
|
27 |
|
|
#include <linux/smp_lock.h>
|
28 |
|
|
#include <linux/mm.h>
|
29 |
|
|
|
30 |
|
|
extern spinlock_t journal_datalist_lock;
|
31 |
|
|
|
32 |
|
|
/*
|
33 |
|
|
* get_transaction: obtain a new transaction_t object.
|
34 |
|
|
*
|
35 |
|
|
* Simply allocate and initialise a new transaction. Create it in
|
36 |
|
|
* RUNNING state and add it to the current journal (which should not
|
37 |
|
|
* have an existing running transaction: we only make a new transaction
|
38 |
|
|
* once we have started to commit the old one).
|
39 |
|
|
*
|
40 |
|
|
* Preconditions:
|
41 |
|
|
* The journal MUST be locked. We don't perform atomic mallocs on the
|
42 |
|
|
* new transaction and we can't block without protecting against other
|
43 |
|
|
* processes trying to touch the journal while it is in transition.
|
44 |
|
|
*/
|
45 |
|
|
|
46 |
|
|
static transaction_t * get_transaction (journal_t * journal, int is_try)
|
47 |
|
|
{
|
48 |
|
|
transaction_t * transaction;
|
49 |
|
|
|
50 |
|
|
transaction = jbd_kmalloc (sizeof (transaction_t), GFP_NOFS);
|
51 |
|
|
if (!transaction)
|
52 |
|
|
return NULL;
|
53 |
|
|
|
54 |
|
|
memset (transaction, 0, sizeof (transaction_t));
|
55 |
|
|
|
56 |
|
|
transaction->t_journal = journal;
|
57 |
|
|
transaction->t_state = T_RUNNING;
|
58 |
|
|
transaction->t_tid = journal->j_transaction_sequence++;
|
59 |
|
|
transaction->t_expires = jiffies + journal->j_commit_interval;
|
60 |
|
|
INIT_LIST_HEAD(&transaction->t_jcb);
|
61 |
|
|
|
62 |
|
|
if (journal->j_commit_interval) {
|
63 |
|
|
/* Set up the commit timer for the new transaction. */
|
64 |
|
|
J_ASSERT (!journal->j_commit_timer_active);
|
65 |
|
|
journal->j_commit_timer_active = 1;
|
66 |
|
|
journal->j_commit_timer->expires = transaction->t_expires;
|
67 |
|
|
add_timer(journal->j_commit_timer);
|
68 |
|
|
}
|
69 |
|
|
|
70 |
|
|
J_ASSERT (journal->j_running_transaction == NULL);
|
71 |
|
|
journal->j_running_transaction = transaction;
|
72 |
|
|
|
73 |
|
|
return transaction;
|
74 |
|
|
}
|
75 |
|
|
|
76 |
|
|
/*
|
77 |
|
|
* Handle management.
|
78 |
|
|
*
|
79 |
|
|
* A handle_t is an object which represents a single atomic update to a
|
80 |
|
|
* filesystem, and which tracks all of the modifications which form part
|
81 |
|
|
* of that one update.
|
82 |
|
|
*/
|
83 |
|
|
|
84 |
|
|
/*
|
85 |
|
|
* start_this_handle: Given a handle, deal with any locking or stalling
|
86 |
|
|
* needed to make sure that there is enough journal space for the handle
|
87 |
|
|
* to begin. Attach the handle to a transaction and set up the
|
88 |
|
|
* transaction's buffer credits.
|
89 |
|
|
*/
|
90 |
|
|
|
91 |
|
|
static int start_this_handle(journal_t *journal, handle_t *handle)
|
92 |
|
|
{
|
93 |
|
|
transaction_t *transaction;
|
94 |
|
|
int needed;
|
95 |
|
|
int nblocks = handle->h_buffer_credits;
|
96 |
|
|
|
97 |
|
|
if (nblocks > journal->j_max_transaction_buffers) {
|
98 |
|
|
jbd_debug(1, "JBD: %s wants too many credits (%d > %d)\n",
|
99 |
|
|
current->comm, nblocks,
|
100 |
|
|
journal->j_max_transaction_buffers);
|
101 |
|
|
return -ENOSPC;
|
102 |
|
|
}
|
103 |
|
|
|
104 |
|
|
jbd_debug(3, "New handle %p going live.\n", handle);
|
105 |
|
|
|
106 |
|
|
repeat:
|
107 |
|
|
|
108 |
|
|
lock_journal(journal);
|
109 |
|
|
|
110 |
|
|
repeat_locked:
|
111 |
|
|
|
112 |
|
|
if (is_journal_aborted(journal) ||
|
113 |
|
|
(journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) {
|
114 |
|
|
unlock_journal(journal);
|
115 |
|
|
return -EROFS;
|
116 |
|
|
}
|
117 |
|
|
|
118 |
|
|
/* Wait on the journal's transaction barrier if necessary */
|
119 |
|
|
if (journal->j_barrier_count) {
|
120 |
|
|
unlock_journal(journal);
|
121 |
|
|
sleep_on(&journal->j_wait_transaction_locked);
|
122 |
|
|
goto repeat;
|
123 |
|
|
}
|
124 |
|
|
|
125 |
|
|
if (!journal->j_running_transaction)
|
126 |
|
|
get_transaction(journal, 0);
|
127 |
|
|
/* @@@ Error? */
|
128 |
|
|
J_ASSERT(journal->j_running_transaction);
|
129 |
|
|
|
130 |
|
|
transaction = journal->j_running_transaction;
|
131 |
|
|
|
132 |
|
|
/* If the current transaction is locked down for commit, wait
|
133 |
|
|
* for the lock to be released. */
|
134 |
|
|
|
135 |
|
|
if (transaction->t_state == T_LOCKED) {
|
136 |
|
|
unlock_journal(journal);
|
137 |
|
|
jbd_debug(3, "Handle %p stalling...\n", handle);
|
138 |
|
|
sleep_on(&journal->j_wait_transaction_locked);
|
139 |
|
|
goto repeat;
|
140 |
|
|
}
|
141 |
|
|
|
142 |
|
|
/* If there is not enough space left in the log to write all
|
143 |
|
|
* potential buffers requested by this operation, we need to
|
144 |
|
|
* stall pending a log checkpoint to free some more log
|
145 |
|
|
* space. */
|
146 |
|
|
|
147 |
|
|
needed = transaction->t_outstanding_credits + nblocks;
|
148 |
|
|
|
149 |
|
|
if (needed > journal->j_max_transaction_buffers) {
|
150 |
|
|
/* If the current transaction is already too large, then
|
151 |
|
|
* start to commit it: we can then go back and attach
|
152 |
|
|
* this handle to a new transaction. */
|
153 |
|
|
|
154 |
|
|
jbd_debug(2, "Handle %p starting new commit...\n", handle);
|
155 |
|
|
log_start_commit(journal, transaction);
|
156 |
|
|
unlock_journal(journal);
|
157 |
|
|
sleep_on(&journal->j_wait_transaction_locked);
|
158 |
|
|
lock_journal(journal);
|
159 |
|
|
goto repeat_locked;
|
160 |
|
|
}
|
161 |
|
|
|
162 |
|
|
/*
|
163 |
|
|
* The commit code assumes that it can get enough log space
|
164 |
|
|
* without forcing a checkpoint. This is *critical* for
|
165 |
|
|
* correctness: a checkpoint of a buffer which is also
|
166 |
|
|
* associated with a committing transaction creates a deadlock,
|
167 |
|
|
* so commit simply cannot force through checkpoints.
|
168 |
|
|
*
|
169 |
|
|
* We must therefore ensure the necessary space in the journal
|
170 |
|
|
* *before* starting to dirty potentially checkpointed buffers
|
171 |
|
|
* in the new transaction.
|
172 |
|
|
*
|
173 |
|
|
* The worst part is, any transaction currently committing can
|
174 |
|
|
* reduce the free space arbitrarily. Be careful to account for
|
175 |
|
|
* those buffers when checkpointing.
|
176 |
|
|
*/
|
177 |
|
|
|
178 |
|
|
/*
|
179 |
|
|
* @@@ AKPM: This seems rather over-defensive. We're giving commit
|
180 |
|
|
* a _lot_ of headroom: 1/4 of the journal plus the size of
|
181 |
|
|
* the committing transaction. Really, we only need to give it
|
182 |
|
|
* committing_transaction->t_outstanding_credits plus "enough" for
|
183 |
|
|
* the log control blocks.
|
184 |
|
|
* Also, this test is inconsitent with the matching one in
|
185 |
|
|
* journal_extend().
|
186 |
|
|
*/
|
187 |
|
|
needed = journal->j_max_transaction_buffers;
|
188 |
|
|
if (journal->j_committing_transaction)
|
189 |
|
|
needed += journal->j_committing_transaction->
|
190 |
|
|
t_outstanding_credits;
|
191 |
|
|
|
192 |
|
|
if (log_space_left(journal) < needed) {
|
193 |
|
|
jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
|
194 |
|
|
log_wait_for_space(journal, needed);
|
195 |
|
|
goto repeat_locked;
|
196 |
|
|
}
|
197 |
|
|
|
198 |
|
|
/* OK, account for the buffers that this operation expects to
|
199 |
|
|
* use and add the handle to the running transaction. */
|
200 |
|
|
|
201 |
|
|
handle->h_transaction = transaction;
|
202 |
|
|
transaction->t_outstanding_credits += nblocks;
|
203 |
|
|
transaction->t_updates++;
|
204 |
|
|
transaction->t_handle_count++;
|
205 |
|
|
jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
|
206 |
|
|
handle, nblocks, transaction->t_outstanding_credits,
|
207 |
|
|
log_space_left(journal));
|
208 |
|
|
|
209 |
|
|
unlock_journal(journal);
|
210 |
|
|
|
211 |
|
|
return 0;
|
212 |
|
|
}
|
213 |
|
|
|
214 |
|
|
/* Allocate a new handle. This should probably be in a slab... */
|
215 |
|
|
static handle_t *new_handle(int nblocks)
|
216 |
|
|
{
|
217 |
|
|
handle_t *handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
|
218 |
|
|
if (!handle)
|
219 |
|
|
return NULL;
|
220 |
|
|
memset(handle, 0, sizeof (handle_t));
|
221 |
|
|
handle->h_buffer_credits = nblocks;
|
222 |
|
|
handle->h_ref = 1;
|
223 |
|
|
INIT_LIST_HEAD(&handle->h_jcb);
|
224 |
|
|
|
225 |
|
|
return handle;
|
226 |
|
|
}
|
227 |
|
|
|
228 |
|
|
/**
|
229 |
|
|
* handle_t *journal_start() - Obtain a new handle.
|
230 |
|
|
* @journal: Journal to start transaction on.
|
231 |
|
|
* @nblocks: number of block buffer we might modify
|
232 |
|
|
*
|
233 |
|
|
* We make sure that the transaction can guarantee at least nblocks of
|
234 |
|
|
* modified buffers in the log. We block until the log can guarantee
|
235 |
|
|
* that much space.
|
236 |
|
|
*
|
237 |
|
|
* This function is visible to journal users (like ext3fs), so is not
|
238 |
|
|
* called with the journal already locked.
|
239 |
|
|
*
|
240 |
|
|
* Return a pointer to a newly allocated handle, or NULL on failure
|
241 |
|
|
*/
|
242 |
|
|
handle_t *journal_start(journal_t *journal, int nblocks)
|
243 |
|
|
{
|
244 |
|
|
handle_t *handle = journal_current_handle();
|
245 |
|
|
int err;
|
246 |
|
|
|
247 |
|
|
if (!journal)
|
248 |
|
|
return ERR_PTR(-EROFS);
|
249 |
|
|
|
250 |
|
|
if (handle) {
|
251 |
|
|
J_ASSERT(handle->h_transaction->t_journal == journal);
|
252 |
|
|
handle->h_ref++;
|
253 |
|
|
return handle;
|
254 |
|
|
}
|
255 |
|
|
|
256 |
|
|
handle = new_handle(nblocks);
|
257 |
|
|
if (!handle)
|
258 |
|
|
return ERR_PTR(-ENOMEM);
|
259 |
|
|
|
260 |
|
|
current->journal_info = handle;
|
261 |
|
|
|
262 |
|
|
err = start_this_handle(journal, handle);
|
263 |
|
|
if (err < 0) {
|
264 |
|
|
kfree(handle);
|
265 |
|
|
current->journal_info = NULL;
|
266 |
|
|
return ERR_PTR(err);
|
267 |
|
|
}
|
268 |
|
|
|
269 |
|
|
return handle;
|
270 |
|
|
}
|
271 |
|
|
|
272 |
|
|
/*
|
273 |
|
|
* Return zero on success
|
274 |
|
|
*/
|
275 |
|
|
static int try_start_this_handle(journal_t *journal, handle_t *handle)
|
276 |
|
|
{
|
277 |
|
|
transaction_t *transaction;
|
278 |
|
|
int needed;
|
279 |
|
|
int nblocks = handle->h_buffer_credits;
|
280 |
|
|
int ret = 0;
|
281 |
|
|
|
282 |
|
|
jbd_debug(3, "New handle %p maybe going live.\n", handle);
|
283 |
|
|
|
284 |
|
|
lock_journal(journal);
|
285 |
|
|
|
286 |
|
|
if (is_journal_aborted(journal) ||
|
287 |
|
|
(journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) {
|
288 |
|
|
ret = -EROFS;
|
289 |
|
|
goto fail_unlock;
|
290 |
|
|
}
|
291 |
|
|
|
292 |
|
|
if (journal->j_barrier_count)
|
293 |
|
|
goto fail_unlock;
|
294 |
|
|
|
295 |
|
|
if (!journal->j_running_transaction && get_transaction(journal, 1) == 0)
|
296 |
|
|
goto fail_unlock;
|
297 |
|
|
|
298 |
|
|
transaction = journal->j_running_transaction;
|
299 |
|
|
if (transaction->t_state == T_LOCKED)
|
300 |
|
|
goto fail_unlock;
|
301 |
|
|
|
302 |
|
|
needed = transaction->t_outstanding_credits + nblocks;
|
303 |
|
|
/* We could run log_start_commit here */
|
304 |
|
|
if (needed > journal->j_max_transaction_buffers)
|
305 |
|
|
goto fail_unlock;
|
306 |
|
|
|
307 |
|
|
needed = journal->j_max_transaction_buffers;
|
308 |
|
|
if (journal->j_committing_transaction)
|
309 |
|
|
needed += journal->j_committing_transaction->
|
310 |
|
|
t_outstanding_credits;
|
311 |
|
|
|
312 |
|
|
if (log_space_left(journal) < needed)
|
313 |
|
|
goto fail_unlock;
|
314 |
|
|
|
315 |
|
|
handle->h_transaction = transaction;
|
316 |
|
|
transaction->t_outstanding_credits += nblocks;
|
317 |
|
|
transaction->t_updates++;
|
318 |
|
|
jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
|
319 |
|
|
handle, nblocks, transaction->t_outstanding_credits,
|
320 |
|
|
log_space_left(journal));
|
321 |
|
|
unlock_journal(journal);
|
322 |
|
|
return 0;
|
323 |
|
|
|
324 |
|
|
fail_unlock:
|
325 |
|
|
unlock_journal(journal);
|
326 |
|
|
if (ret >= 0)
|
327 |
|
|
ret = -1;
|
328 |
|
|
return ret;
|
329 |
|
|
}
|
330 |
|
|
|
331 |
|
|
/**
|
332 |
|
|
* handle_t *journal_try_start() - Don't block, but try and get a handle
|
333 |
|
|
* @journal: Journal to start transaction on.
|
334 |
|
|
* @nblocks: number of block buffer we might modify
|
335 |
|
|
*
|
336 |
|
|
* Try to start a handle, but non-blockingly. If we weren't able
|
337 |
|
|
* to, return an ERR_PTR value.
|
338 |
|
|
*/
|
339 |
|
|
handle_t *journal_try_start(journal_t *journal, int nblocks)
|
340 |
|
|
{
|
341 |
|
|
handle_t *handle = journal_current_handle();
|
342 |
|
|
int err;
|
343 |
|
|
|
344 |
|
|
if (!journal)
|
345 |
|
|
return ERR_PTR(-EROFS);
|
346 |
|
|
|
347 |
|
|
if (handle) {
|
348 |
|
|
jbd_debug(4, "h_ref %d -> %d\n",
|
349 |
|
|
handle->h_ref,
|
350 |
|
|
handle->h_ref + 1);
|
351 |
|
|
J_ASSERT(handle->h_transaction->t_journal == journal);
|
352 |
|
|
if (is_handle_aborted(handle))
|
353 |
|
|
return ERR_PTR(-EIO);
|
354 |
|
|
handle->h_ref++;
|
355 |
|
|
return handle;
|
356 |
|
|
} else {
|
357 |
|
|
jbd_debug(4, "no current transaction\n");
|
358 |
|
|
}
|
359 |
|
|
|
360 |
|
|
if (is_journal_aborted(journal))
|
361 |
|
|
return ERR_PTR(-EIO);
|
362 |
|
|
|
363 |
|
|
handle = new_handle(nblocks);
|
364 |
|
|
if (!handle)
|
365 |
|
|
return ERR_PTR(-ENOMEM);
|
366 |
|
|
|
367 |
|
|
current->journal_info = handle;
|
368 |
|
|
|
369 |
|
|
err = try_start_this_handle(journal, handle);
|
370 |
|
|
if (err < 0) {
|
371 |
|
|
kfree(handle);
|
372 |
|
|
current->journal_info = NULL;
|
373 |
|
|
return ERR_PTR(err);
|
374 |
|
|
}
|
375 |
|
|
|
376 |
|
|
return handle;
|
377 |
|
|
}
|
378 |
|
|
|
379 |
|
|
/**
|
380 |
|
|
* int journal_extend() - extend buffer credits.
|
381 |
|
|
* @handle: handle to 'extend'
|
382 |
|
|
* @nblocks: nr blocks to try to extend by.
|
383 |
|
|
*
|
384 |
|
|
* Some transactions, such as large extends and truncates, can be done
|
385 |
|
|
* atomically all at once or in several stages. The operation requests
|
386 |
|
|
* a credit for a number of buffer modications in advance, but can
|
387 |
|
|
* extend its credit if it needs more.
|
388 |
|
|
*
|
389 |
|
|
* journal_extend tries to give the running handle more buffer credits.
|
390 |
|
|
* It does not guarantee that allocation - this is a best-effort only.
|
391 |
|
|
* The calling process MUST be able to deal cleanly with a failure to
|
392 |
|
|
* extend here.
|
393 |
|
|
*
|
394 |
|
|
* Return 0 on success, non-zero on failure.
|
395 |
|
|
*
|
396 |
|
|
* return code < 0 implies an error
|
397 |
|
|
* return code > 0 implies normal transaction-full status.
|
398 |
|
|
*/
|
399 |
|
|
int journal_extend (handle_t *handle, int nblocks)
|
400 |
|
|
{
|
401 |
|
|
transaction_t *transaction = handle->h_transaction;
|
402 |
|
|
journal_t *journal = transaction->t_journal;
|
403 |
|
|
int result;
|
404 |
|
|
int wanted;
|
405 |
|
|
|
406 |
|
|
lock_journal (journal);
|
407 |
|
|
|
408 |
|
|
result = -EIO;
|
409 |
|
|
if (is_handle_aborted(handle))
|
410 |
|
|
goto error_out;
|
411 |
|
|
|
412 |
|
|
result = 1;
|
413 |
|
|
|
414 |
|
|
/* Don't extend a locked-down transaction! */
|
415 |
|
|
if (handle->h_transaction->t_state != T_RUNNING) {
|
416 |
|
|
jbd_debug(3, "denied handle %p %d blocks: "
|
417 |
|
|
"transaction not running\n", handle, nblocks);
|
418 |
|
|
goto error_out;
|
419 |
|
|
}
|
420 |
|
|
|
421 |
|
|
wanted = transaction->t_outstanding_credits + nblocks;
|
422 |
|
|
|
423 |
|
|
if (wanted > journal->j_max_transaction_buffers) {
|
424 |
|
|
jbd_debug(3, "denied handle %p %d blocks: "
|
425 |
|
|
"transaction too large\n", handle, nblocks);
|
426 |
|
|
goto error_out;
|
427 |
|
|
}
|
428 |
|
|
|
429 |
|
|
if (wanted > log_space_left(journal)) {
|
430 |
|
|
jbd_debug(3, "denied handle %p %d blocks: "
|
431 |
|
|
"insufficient log space\n", handle, nblocks);
|
432 |
|
|
goto error_out;
|
433 |
|
|
}
|
434 |
|
|
|
435 |
|
|
handle->h_buffer_credits += nblocks;
|
436 |
|
|
transaction->t_outstanding_credits += nblocks;
|
437 |
|
|
result = 0;
|
438 |
|
|
|
439 |
|
|
jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
|
440 |
|
|
|
441 |
|
|
error_out:
|
442 |
|
|
unlock_journal (journal);
|
443 |
|
|
return result;
|
444 |
|
|
}
|
445 |
|
|
|
446 |
|
|
|
447 |
|
|
/**
|
448 |
|
|
* int journal_restart() - restart a handle .
|
449 |
|
|
* @handle: handle to restart
|
450 |
|
|
* @nblocks: nr credits requested
|
451 |
|
|
*
|
452 |
|
|
* Restart a handle for a multi-transaction filesystem
|
453 |
|
|
* operation.
|
454 |
|
|
*
|
455 |
|
|
* If the journal_extend() call above fails to grant new buffer credits
|
456 |
|
|
* to a running handle, a call to journal_restart will commit the
|
457 |
|
|
* handle's transaction so far and reattach the handle to a new
|
458 |
|
|
* transaction capabable of guaranteeing the requested number of
|
459 |
|
|
* credits.
|
460 |
|
|
*/
|
461 |
|
|
|
462 |
|
|
int journal_restart(handle_t *handle, int nblocks)
|
463 |
|
|
{
|
464 |
|
|
transaction_t *transaction = handle->h_transaction;
|
465 |
|
|
journal_t *journal = transaction->t_journal;
|
466 |
|
|
int ret;
|
467 |
|
|
|
468 |
|
|
/* If we've had an abort of any type, don't even think about
|
469 |
|
|
* actually doing the restart! */
|
470 |
|
|
if (is_handle_aborted(handle))
|
471 |
|
|
return 0;
|
472 |
|
|
|
473 |
|
|
/* First unlink the handle from its current transaction, and
|
474 |
|
|
* start the commit on that. */
|
475 |
|
|
|
476 |
|
|
J_ASSERT (transaction->t_updates > 0);
|
477 |
|
|
J_ASSERT (journal_current_handle() == handle);
|
478 |
|
|
|
479 |
|
|
transaction->t_outstanding_credits -= handle->h_buffer_credits;
|
480 |
|
|
transaction->t_updates--;
|
481 |
|
|
|
482 |
|
|
if (!transaction->t_updates)
|
483 |
|
|
wake_up(&journal->j_wait_updates);
|
484 |
|
|
|
485 |
|
|
jbd_debug(2, "restarting handle %p\n", handle);
|
486 |
|
|
log_start_commit(journal, transaction);
|
487 |
|
|
|
488 |
|
|
handle->h_buffer_credits = nblocks;
|
489 |
|
|
ret = start_this_handle(journal, handle);
|
490 |
|
|
return ret;
|
491 |
|
|
}
|
492 |
|
|
|
493 |
|
|
|
494 |
|
|
/**
|
495 |
|
|
* void journal_lock_updates () - establish a transaction barrier.
|
496 |
|
|
* @journal: Journal to establish a barrier on.
|
497 |
|
|
*
|
498 |
|
|
* This locks out any further updates from being started, and blocks
|
499 |
|
|
* until all existing updates have completed, returning only once the
|
500 |
|
|
* journal is in a quiescent state with no updates running.
|
501 |
|
|
*
|
502 |
|
|
* The journal lock should not be held on entry.
|
503 |
|
|
*/
|
504 |
|
|
void journal_lock_updates (journal_t *journal)
|
505 |
|
|
{
|
506 |
|
|
lock_journal(journal);
|
507 |
|
|
++journal->j_barrier_count;
|
508 |
|
|
|
509 |
|
|
/* Wait until there are no running updates */
|
510 |
|
|
while (1) {
|
511 |
|
|
transaction_t *transaction = journal->j_running_transaction;
|
512 |
|
|
if (!transaction)
|
513 |
|
|
break;
|
514 |
|
|
if (!transaction->t_updates)
|
515 |
|
|
break;
|
516 |
|
|
|
517 |
|
|
unlock_journal(journal);
|
518 |
|
|
sleep_on(&journal->j_wait_updates);
|
519 |
|
|
lock_journal(journal);
|
520 |
|
|
}
|
521 |
|
|
|
522 |
|
|
unlock_journal(journal);
|
523 |
|
|
|
524 |
|
|
/* We have now established a barrier against other normal
|
525 |
|
|
* updates, but we also need to barrier against other
|
526 |
|
|
* journal_lock_updates() calls to make sure that we serialise
|
527 |
|
|
* special journal-locked operations too. */
|
528 |
|
|
down(&journal->j_barrier);
|
529 |
|
|
}
|
530 |
|
|
|
531 |
|
|
/**
|
532 |
|
|
* void journal_unlock_updates (journal_t* journal) - release barrier
|
533 |
|
|
* @journal: Journal to release the barrier on.
|
534 |
|
|
*
|
535 |
|
|
* Release a transaction barrier obtained with journal_lock_updates().
|
536 |
|
|
*
|
537 |
|
|
* Should be called without the journal lock held.
|
538 |
|
|
*/
|
539 |
|
|
void journal_unlock_updates (journal_t *journal)
|
540 |
|
|
{
|
541 |
|
|
lock_journal(journal);
|
542 |
|
|
|
543 |
|
|
J_ASSERT (journal->j_barrier_count != 0);
|
544 |
|
|
|
545 |
|
|
up(&journal->j_barrier);
|
546 |
|
|
--journal->j_barrier_count;
|
547 |
|
|
wake_up(&journal->j_wait_transaction_locked);
|
548 |
|
|
unlock_journal(journal);
|
549 |
|
|
}
|
550 |
|
|
|
551 |
|
|
/*
|
552 |
|
|
* if the buffer is already part of the current transaction, then there
|
553 |
|
|
* is nothing we need to do. if it is already part of a prior
|
554 |
|
|
* transaction which we are still committing to disk, then we need to
|
555 |
|
|
* make sure that we do not overwrite the old copy: we do copy-out to
|
556 |
|
|
* preserve the copy going to disk. we also account the buffer against
|
557 |
|
|
* the handle's metadata buffer credits (unless the buffer is already
|
558 |
|
|
* part of the transaction, that is).
|
559 |
|
|
*/
|
560 |
|
|
static int
|
561 |
|
|
do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy)
|
562 |
|
|
{
|
563 |
|
|
struct buffer_head *bh;
|
564 |
|
|
transaction_t *transaction = handle->h_transaction;
|
565 |
|
|
journal_t *journal = transaction->t_journal;
|
566 |
|
|
int error;
|
567 |
|
|
char *frozen_buffer = NULL;
|
568 |
|
|
int need_copy = 0;
|
569 |
|
|
int locked;
|
570 |
|
|
|
571 |
|
|
jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy);
|
572 |
|
|
|
573 |
|
|
JBUFFER_TRACE(jh, "entry");
|
574 |
|
|
repeat:
|
575 |
|
|
bh = jh2bh(jh);
|
576 |
|
|
|
577 |
|
|
/* @@@ Need to check for errors here at some point. */
|
578 |
|
|
|
579 |
|
|
/*
|
580 |
|
|
* AKPM: we have replaced all the lock_journal_bh_wait() stuff with a
|
581 |
|
|
* simple lock_journal(). This code here will care for locked buffers.
|
582 |
|
|
*/
|
583 |
|
|
locked = test_and_set_bit(BH_Lock, &bh->b_state);
|
584 |
|
|
if (locked) {
|
585 |
|
|
/* We can't reliably test the buffer state if we found
|
586 |
|
|
* it already locked, so just wait for the lock and
|
587 |
|
|
* retry. */
|
588 |
|
|
unlock_journal(journal);
|
589 |
|
|
__wait_on_buffer(bh);
|
590 |
|
|
lock_journal(journal);
|
591 |
|
|
goto repeat;
|
592 |
|
|
}
|
593 |
|
|
|
594 |
|
|
/* We now hold the buffer lock so it is safe to query the buffer
|
595 |
|
|
* state. Is the buffer dirty?
|
596 |
|
|
*
|
597 |
|
|
* If so, there are two possibilities. The buffer may be
|
598 |
|
|
* non-journaled, and undergoing a quite legitimate writeback.
|
599 |
|
|
* Otherwise, it is journaled, and we don't expect dirty buffers
|
600 |
|
|
* in that state (the buffers should be marked JBD_Dirty
|
601 |
|
|
* instead.) So either the IO is being done under our own
|
602 |
|
|
* control and this is a bug, or it's a third party IO such as
|
603 |
|
|
* dump(8) (which may leave the buffer scheduled for read ---
|
604 |
|
|
* ie. locked but not dirty) or tune2fs (which may actually have
|
605 |
|
|
* the buffer dirtied, ugh.) */
|
606 |
|
|
|
607 |
|
|
if (buffer_dirty(bh)) {
|
608 |
|
|
spin_lock(&journal_datalist_lock);
|
609 |
|
|
/* First question: is this buffer already part of the
|
610 |
|
|
* current transaction or the existing committing
|
611 |
|
|
* transaction? */
|
612 |
|
|
if (jh->b_transaction) {
|
613 |
|
|
J_ASSERT_JH(jh, jh->b_transaction == transaction ||
|
614 |
|
|
jh->b_transaction == journal->j_committing_transaction);
|
615 |
|
|
if (jh->b_next_transaction)
|
616 |
|
|
J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
|
617 |
|
|
JBUFFER_TRACE(jh, "Unexpected dirty buffer");
|
618 |
|
|
jbd_unexpected_dirty_buffer(jh);
|
619 |
|
|
}
|
620 |
|
|
spin_unlock(&journal_datalist_lock);
|
621 |
|
|
}
|
622 |
|
|
|
623 |
|
|
unlock_buffer(bh);
|
624 |
|
|
|
625 |
|
|
error = -EROFS;
|
626 |
|
|
if (is_handle_aborted(handle))
|
627 |
|
|
goto out_unlocked;
|
628 |
|
|
error = 0;
|
629 |
|
|
|
630 |
|
|
spin_lock(&journal_datalist_lock);
|
631 |
|
|
|
632 |
|
|
/* The buffer is already part of this transaction if
|
633 |
|
|
* b_transaction or b_next_transaction points to it. */
|
634 |
|
|
|
635 |
|
|
if (jh->b_transaction == transaction ||
|
636 |
|
|
jh->b_next_transaction == transaction)
|
637 |
|
|
goto done_locked;
|
638 |
|
|
|
639 |
|
|
/* If there is already a copy-out version of this buffer, then
|
640 |
|
|
* we don't need to make another one. */
|
641 |
|
|
|
642 |
|
|
if (jh->b_frozen_data) {
|
643 |
|
|
JBUFFER_TRACE(jh, "has frozen data");
|
644 |
|
|
J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
|
645 |
|
|
jh->b_next_transaction = transaction;
|
646 |
|
|
|
647 |
|
|
J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
|
648 |
|
|
handle->h_buffer_credits--;
|
649 |
|
|
goto done_locked;
|
650 |
|
|
}
|
651 |
|
|
|
652 |
|
|
/* Is there data here we need to preserve? */
|
653 |
|
|
|
654 |
|
|
if (jh->b_transaction && jh->b_transaction != transaction) {
|
655 |
|
|
JBUFFER_TRACE(jh, "owned by older transaction");
|
656 |
|
|
J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
|
657 |
|
|
J_ASSERT_JH(jh, jh->b_transaction ==
|
658 |
|
|
journal->j_committing_transaction);
|
659 |
|
|
|
660 |
|
|
/* There is one case we have to be very careful about.
|
661 |
|
|
* If the committing transaction is currently writing
|
662 |
|
|
* this buffer out to disk and has NOT made a copy-out,
|
663 |
|
|
* then we cannot modify the buffer contents at all
|
664 |
|
|
* right now. The essence of copy-out is that it is the
|
665 |
|
|
* extra copy, not the primary copy, which gets
|
666 |
|
|
* journaled. If the primary copy is already going to
|
667 |
|
|
* disk then we cannot do copy-out here. */
|
668 |
|
|
|
669 |
|
|
if (jh->b_jlist == BJ_Shadow) {
|
670 |
|
|
JBUFFER_TRACE(jh, "on shadow: sleep");
|
671 |
|
|
spin_unlock(&journal_datalist_lock);
|
672 |
|
|
unlock_journal(journal);
|
673 |
|
|
/* commit wakes up all shadow buffers after IO */
|
674 |
|
|
wait_event(jh2bh(jh)->b_wait,
|
675 |
|
|
jh->b_jlist != BJ_Shadow);
|
676 |
|
|
lock_journal(journal);
|
677 |
|
|
goto repeat;
|
678 |
|
|
}
|
679 |
|
|
|
680 |
|
|
/* Only do the copy if the currently-owning transaction
|
681 |
|
|
* still needs it. If it is on the Forget list, the
|
682 |
|
|
* committing transaction is past that stage. The
|
683 |
|
|
* buffer had better remain locked during the kmalloc,
|
684 |
|
|
* but that should be true --- we hold the journal lock
|
685 |
|
|
* still and the buffer is already on the BUF_JOURNAL
|
686 |
|
|
* list so won't be flushed.
|
687 |
|
|
*
|
688 |
|
|
* Subtle point, though: if this is a get_undo_access,
|
689 |
|
|
* then we will be relying on the frozen_data to contain
|
690 |
|
|
* the new value of the committed_data record after the
|
691 |
|
|
* transaction, so we HAVE to force the frozen_data copy
|
692 |
|
|
* in that case. */
|
693 |
|
|
|
694 |
|
|
if (jh->b_jlist != BJ_Forget || force_copy) {
|
695 |
|
|
JBUFFER_TRACE(jh, "generate frozen data");
|
696 |
|
|
if (!frozen_buffer) {
|
697 |
|
|
JBUFFER_TRACE(jh, "allocate memory for buffer");
|
698 |
|
|
spin_unlock(&journal_datalist_lock);
|
699 |
|
|
unlock_journal(journal);
|
700 |
|
|
frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size,
|
701 |
|
|
GFP_NOFS);
|
702 |
|
|
lock_journal(journal);
|
703 |
|
|
if (!frozen_buffer) {
|
704 |
|
|
printk(KERN_EMERG
|
705 |
|
|
"%s: OOM for frozen_buffer\n",
|
706 |
|
|
__FUNCTION__);
|
707 |
|
|
JBUFFER_TRACE(jh, "oom!");
|
708 |
|
|
error = -ENOMEM;
|
709 |
|
|
spin_lock(&journal_datalist_lock);
|
710 |
|
|
goto done_locked;
|
711 |
|
|
}
|
712 |
|
|
goto repeat;
|
713 |
|
|
}
|
714 |
|
|
|
715 |
|
|
jh->b_frozen_data = frozen_buffer;
|
716 |
|
|
frozen_buffer = NULL;
|
717 |
|
|
need_copy = 1;
|
718 |
|
|
}
|
719 |
|
|
jh->b_next_transaction = transaction;
|
720 |
|
|
}
|
721 |
|
|
|
722 |
|
|
J_ASSERT(handle->h_buffer_credits > 0);
|
723 |
|
|
handle->h_buffer_credits--;
|
724 |
|
|
|
725 |
|
|
/* Finally, if the buffer is not journaled right now, we need to
|
726 |
|
|
* make sure it doesn't get written to disk before the caller
|
727 |
|
|
* actually commits the new data. */
|
728 |
|
|
|
729 |
|
|
if (!jh->b_transaction) {
|
730 |
|
|
JBUFFER_TRACE(jh, "no transaction");
|
731 |
|
|
J_ASSERT_JH(jh, !jh->b_next_transaction);
|
732 |
|
|
jh->b_transaction = transaction;
|
733 |
|
|
JBUFFER_TRACE(jh, "file as BJ_Reserved");
|
734 |
|
|
__journal_file_buffer(jh, transaction, BJ_Reserved);
|
735 |
|
|
}
|
736 |
|
|
|
737 |
|
|
done_locked:
|
738 |
|
|
spin_unlock(&journal_datalist_lock);
|
739 |
|
|
if (need_copy) {
|
740 |
|
|
struct page *page;
|
741 |
|
|
int offset;
|
742 |
|
|
char *source;
|
743 |
|
|
|
744 |
|
|
J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
|
745 |
|
|
"Possible IO failure.\n");
|
746 |
|
|
page = jh2bh(jh)->b_page;
|
747 |
|
|
offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
|
748 |
|
|
source = kmap(page);
|
749 |
|
|
memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
|
750 |
|
|
kunmap(page);
|
751 |
|
|
}
|
752 |
|
|
|
753 |
|
|
|
754 |
|
|
/* If we are about to journal a buffer, then any revoke pending
|
755 |
|
|
on it is no longer valid. */
|
756 |
|
|
journal_cancel_revoke(handle, jh);
|
757 |
|
|
|
758 |
|
|
out_unlocked:
|
759 |
|
|
if (frozen_buffer)
|
760 |
|
|
kfree(frozen_buffer);
|
761 |
|
|
|
762 |
|
|
JBUFFER_TRACE(jh, "exit");
|
763 |
|
|
return error;
|
764 |
|
|
}
|
765 |
|
|
|
766 |
|
|
/**
|
767 |
|
|
* int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
|
768 |
|
|
* @handle: transaction to add buffer modifications to
|
769 |
|
|
* @bh: bh to be used for metadata writes
|
770 |
|
|
*
|
771 |
|
|
* Returns an error code or 0 on success.
|
772 |
|
|
*
|
773 |
|
|
* In full data journalling mode the buffer may be of type BJ_AsyncData,
|
774 |
|
|
* because we're write()ing a buffer which is also part of a shared mapping.
|
775 |
|
|
*/
|
776 |
|
|
|
777 |
|
|
int journal_get_write_access (handle_t *handle, struct buffer_head *bh)
|
778 |
|
|
{
|
779 |
|
|
transaction_t *transaction = handle->h_transaction;
|
780 |
|
|
journal_t *journal = transaction->t_journal;
|
781 |
|
|
struct journal_head *jh = journal_add_journal_head(bh);
|
782 |
|
|
int rc;
|
783 |
|
|
|
784 |
|
|
/* We do not want to get caught playing with fields which the
|
785 |
|
|
* log thread also manipulates. Make sure that the buffer
|
786 |
|
|
* completes any outstanding IO before proceeding. */
|
787 |
|
|
lock_journal(journal);
|
788 |
|
|
rc = do_get_write_access(handle, jh, 0);
|
789 |
|
|
journal_unlock_journal_head(jh);
|
790 |
|
|
unlock_journal(journal);
|
791 |
|
|
return rc;
|
792 |
|
|
}
|
793 |
|
|
|
794 |
|
|
|
795 |
|
|
/*
|
796 |
|
|
* When the user wants to journal a newly created buffer_head
|
797 |
|
|
* (ie. getblk() returned a new buffer and we are going to populate it
|
798 |
|
|
* manually rather than reading off disk), then we need to keep the
|
799 |
|
|
* buffer_head locked until it has been completely filled with new
|
800 |
|
|
* data. In this case, we should be able to make the assertion that
|
801 |
|
|
* the bh is not already part of an existing transaction.
|
802 |
|
|
*
|
803 |
|
|
* The buffer should already be locked by the caller by this point.
|
804 |
|
|
* There is no lock ranking violation: it was a newly created,
|
805 |
|
|
* unlocked buffer beforehand. */
|
806 |
|
|
|
807 |
|
|
/**
|
808 |
|
|
* int journal_get_create_access () - notify intent to use newly created bh
|
809 |
|
|
* @handle: ransaction to new buffer to
|
810 |
|
|
* @bh: new buffer.
|
811 |
|
|
*
|
812 |
|
|
* Call this if you create a new bh.
|
813 |
|
|
*/
|
814 |
|
|
int journal_get_create_access (handle_t *handle, struct buffer_head *bh)
|
815 |
|
|
{
|
816 |
|
|
transaction_t *transaction = handle->h_transaction;
|
817 |
|
|
journal_t *journal = transaction->t_journal;
|
818 |
|
|
struct journal_head *jh = journal_add_journal_head(bh);
|
819 |
|
|
int err;
|
820 |
|
|
|
821 |
|
|
jbd_debug(5, "journal_head %p\n", jh);
|
822 |
|
|
lock_journal(journal);
|
823 |
|
|
err = -EROFS;
|
824 |
|
|
if (is_handle_aborted(handle))
|
825 |
|
|
goto out;
|
826 |
|
|
err = 0;
|
827 |
|
|
|
828 |
|
|
JBUFFER_TRACE(jh, "entry");
|
829 |
|
|
/* The buffer may already belong to this transaction due to
|
830 |
|
|
* pre-zeroing in the filesystem's new_block code. It may also
|
831 |
|
|
* be on the previous, committing transaction's lists, but it
|
832 |
|
|
* HAS to be in Forget state in that case: the transaction must
|
833 |
|
|
* have deleted the buffer for it to be reused here. */
|
834 |
|
|
J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
|
835 |
|
|
jh->b_transaction == NULL ||
|
836 |
|
|
(jh->b_transaction == journal->j_committing_transaction &&
|
837 |
|
|
jh->b_jlist == BJ_Forget)));
|
838 |
|
|
|
839 |
|
|
J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
|
840 |
|
|
J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
|
841 |
|
|
|
842 |
|
|
J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
|
843 |
|
|
handle->h_buffer_credits--;
|
844 |
|
|
|
845 |
|
|
spin_lock(&journal_datalist_lock);
|
846 |
|
|
if (jh->b_transaction == NULL) {
|
847 |
|
|
jh->b_transaction = transaction;
|
848 |
|
|
JBUFFER_TRACE(jh, "file as BJ_Reserved");
|
849 |
|
|
__journal_file_buffer(jh, transaction, BJ_Reserved);
|
850 |
|
|
JBUFFER_TRACE(jh, "refile");
|
851 |
|
|
refile_buffer(jh2bh(jh));
|
852 |
|
|
} else if (jh->b_transaction == journal->j_committing_transaction) {
|
853 |
|
|
JBUFFER_TRACE(jh, "set next transaction");
|
854 |
|
|
jh->b_next_transaction = transaction;
|
855 |
|
|
}
|
856 |
|
|
spin_unlock(&journal_datalist_lock);
|
857 |
|
|
|
858 |
|
|
/*
|
859 |
|
|
* akpm: I added this. ext3_alloc_branch can pick up new indirect
|
860 |
|
|
* blocks which contain freed but then revoked metadata. We need
|
861 |
|
|
* to cancel the revoke in case we end up freeing it yet again
|
862 |
|
|
* and the reallocating as data - this would cause a second revoke,
|
863 |
|
|
* which hits an assertion error.
|
864 |
|
|
*/
|
865 |
|
|
JBUFFER_TRACE(jh, "cancelling revoke");
|
866 |
|
|
journal_cancel_revoke(handle, jh);
|
867 |
|
|
journal_unlock_journal_head(jh);
|
868 |
|
|
out:
|
869 |
|
|
unlock_journal(journal);
|
870 |
|
|
return err;
|
871 |
|
|
}
|
872 |
|
|
|
873 |
|
|
|
874 |
|
|
|
875 |
|
|
/**
|
876 |
|
|
* int journal_get_undo_access() - Notify intent to modify metadata with non-rewindable consequences
|
877 |
|
|
* @handle: transaction
|
878 |
|
|
* @bh: buffer to undo
|
879 |
|
|
*
|
880 |
|
|
* Sometimes there is a need to distinguish between metadata which has
|
881 |
|
|
* been committed to disk and that which has not. The ext3fs code uses
|
882 |
|
|
* this for freeing and allocating space, we have to make sure that we
|
883 |
|
|
* do not reuse freed space until the deallocation has been committed,
|
884 |
|
|
* since if we overwrote that space we would make the delete
|
885 |
|
|
* un-rewindable in case of a crash.
|
886 |
|
|
*
|
887 |
|
|
* To deal with that, journal_get_undo_access requests write access to a
|
888 |
|
|
* buffer for parts of non-rewindable operations such as delete
|
889 |
|
|
* operations on the bitmaps. The journaling code must keep a copy of
|
890 |
|
|
* the buffer's contents prior to the undo_access call until such time
|
891 |
|
|
* as we know that the buffer has definitely been committed to disk.
|
892 |
|
|
*
|
893 |
|
|
* We never need to know which transaction the committed data is part
|
894 |
|
|
* of, buffers touched here are guaranteed to be dirtied later and so
|
895 |
|
|
* will be committed to a new transaction in due course, at which point
|
896 |
|
|
* we can discard the old committed data pointer.
|
897 |
|
|
*
|
898 |
|
|
* Returns error number or 0 on success.
|
899 |
|
|
*/
|
900 |
|
|
int journal_get_undo_access (handle_t *handle, struct buffer_head *bh)
|
901 |
|
|
{
|
902 |
|
|
journal_t *journal = handle->h_transaction->t_journal;
|
903 |
|
|
int err;
|
904 |
|
|
struct journal_head *jh = journal_add_journal_head(bh);
|
905 |
|
|
|
906 |
|
|
JBUFFER_TRACE(jh, "entry");
|
907 |
|
|
lock_journal(journal);
|
908 |
|
|
|
909 |
|
|
/* Do this first --- it can drop the journal lock, so we want to
|
910 |
|
|
* make sure that obtaining the committed_data is done
|
911 |
|
|
* atomically wrt. completion of any outstanding commits. */
|
912 |
|
|
err = do_get_write_access (handle, jh, 1);
|
913 |
|
|
if (err)
|
914 |
|
|
goto out;
|
915 |
|
|
|
916 |
|
|
if (!jh->b_committed_data) {
|
917 |
|
|
/* Copy out the current buffer contents into the
|
918 |
|
|
* preserved, committed copy. */
|
919 |
|
|
JBUFFER_TRACE(jh, "generate b_committed data");
|
920 |
|
|
jh->b_committed_data = jbd_kmalloc(jh2bh(jh)->b_size,
|
921 |
|
|
GFP_NOFS);
|
922 |
|
|
if (!jh->b_committed_data) {
|
923 |
|
|
printk(KERN_EMERG "%s: No memory for committed data!\n",
|
924 |
|
|
__FUNCTION__);
|
925 |
|
|
err = -ENOMEM;
|
926 |
|
|
goto out;
|
927 |
|
|
}
|
928 |
|
|
|
929 |
|
|
memcpy (jh->b_committed_data, jh2bh(jh)->b_data,
|
930 |
|
|
jh2bh(jh)->b_size);
|
931 |
|
|
}
|
932 |
|
|
|
933 |
|
|
out:
|
934 |
|
|
if (!err)
|
935 |
|
|
J_ASSERT_JH(jh, jh->b_committed_data);
|
936 |
|
|
journal_unlock_journal_head(jh);
|
937 |
|
|
unlock_journal(journal);
|
938 |
|
|
return err;
|
939 |
|
|
}
|
940 |
|
|
|
941 |
|
|
/**
|
942 |
|
|
* int journal_dirty_data() - mark a buffer as containing dirty data which needs to be flushed before we can commit the current transaction.
|
943 |
|
|
* @handle: transaction
|
944 |
|
|
* @bh: bufferhead to mark
|
945 |
|
|
* @async: flag
|
946 |
|
|
*
|
947 |
|
|
* The buffer is placed on the transaction's data list and is marked as
|
948 |
|
|
* belonging to the transaction.
|
949 |
|
|
*
|
950 |
|
|
* If `async' is set then the writebask will be initiated by the caller
|
951 |
|
|
* using submit_bh -> end_buffer_io_async. We put the buffer onto
|
952 |
|
|
* t_async_datalist.
|
953 |
|
|
*
|
954 |
|
|
* Returns error number or 0 on success.
|
955 |
|
|
*/
|
956 |
|
|
int journal_dirty_data (handle_t *handle, struct buffer_head *bh, int async)
|
957 |
|
|
{
|
958 |
|
|
/*
|
959 |
|
|
* journal_dirty_data() can be called via page_launder->ext3_writepage
|
960 |
|
|
* by kswapd. So it cannot block. Happily, there's nothing here
|
961 |
|
|
* which needs lock_journal if `async' is set.
|
962 |
|
|
*
|
963 |
|
|
* When the buffer is on the current transaction we freely move it
|
964 |
|
|
* between BJ_AsyncData and BJ_SyncData according to who tried to
|
965 |
|
|
* change its state last.
|
966 |
|
|
*/
|
967 |
|
|
journal_t *journal = handle->h_transaction->t_journal;
|
968 |
|
|
int need_brelse = 0;
|
969 |
|
|
int wanted_jlist = async ? BJ_AsyncData : BJ_SyncData;
|
970 |
|
|
struct journal_head *jh;
|
971 |
|
|
|
972 |
|
|
if (is_handle_aborted(handle))
|
973 |
|
|
return 0;
|
974 |
|
|
|
975 |
|
|
jh = journal_add_journal_head(bh);
|
976 |
|
|
JBUFFER_TRACE(jh, "entry");
|
977 |
|
|
|
978 |
|
|
/*
|
979 |
|
|
* The buffer could *already* be dirty. Writeout can start
|
980 |
|
|
* at any time.
|
981 |
|
|
*/
|
982 |
|
|
jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid);
|
983 |
|
|
|
984 |
|
|
/*
|
985 |
|
|
* What if the buffer is already part of a running transaction?
|
986 |
|
|
*
|
987 |
|
|
* There are two cases:
|
988 |
|
|
* 1) It is part of the current running transaction. Refile it,
|
989 |
|
|
* just in case we have allocated it as metadata, deallocated
|
990 |
|
|
* it, then reallocated it as data.
|
991 |
|
|
* 2) It is part of the previous, still-committing transaction.
|
992 |
|
|
* If all we want to do is to guarantee that the buffer will be
|
993 |
|
|
* written to disk before this new transaction commits, then
|
994 |
|
|
* being sure that the *previous* transaction has this same
|
995 |
|
|
* property is sufficient for us! Just leave it on its old
|
996 |
|
|
* transaction.
|
997 |
|
|
*
|
998 |
|
|
* In case (2), the buffer must not already exist as metadata
|
999 |
|
|
* --- that would violate write ordering (a transaction is free
|
1000 |
|
|
* to write its data at any point, even before the previous
|
1001 |
|
|
* committing transaction has committed). The caller must
|
1002 |
|
|
* never, ever allow this to happen: there's nothing we can do
|
1003 |
|
|
* about it in this layer.
|
1004 |
|
|
*/
|
1005 |
|
|
spin_lock(&journal_datalist_lock);
|
1006 |
|
|
if (jh->b_transaction) {
|
1007 |
|
|
JBUFFER_TRACE(jh, "has transaction");
|
1008 |
|
|
if (jh->b_transaction != handle->h_transaction) {
|
1009 |
|
|
JBUFFER_TRACE(jh, "belongs to older transaction");
|
1010 |
|
|
J_ASSERT_JH(jh, jh->b_transaction ==
|
1011 |
|
|
journal->j_committing_transaction);
|
1012 |
|
|
|
1013 |
|
|
/* @@@ IS THIS TRUE ? */
|
1014 |
|
|
/*
|
1015 |
|
|
* Not any more. Scenario: someone does a write()
|
1016 |
|
|
* in data=journal mode. The buffer's transaction has
|
1017 |
|
|
* moved into commit. Then someone does another
|
1018 |
|
|
* write() to the file. We do the frozen data copyout
|
1019 |
|
|
* and set b_next_transaction to point to j_running_t.
|
1020 |
|
|
* And while we're in that state, someone does a
|
1021 |
|
|
* writepage() in an attempt to pageout the same area
|
1022 |
|
|
* of the file via a shared mapping. At present that
|
1023 |
|
|
* calls journal_dirty_data(), and we get right here.
|
1024 |
|
|
* It may be too late to journal the data. Simply
|
1025 |
|
|
* falling through to the next test will suffice: the
|
1026 |
|
|
* data will be dirty and wil be checkpointed. The
|
1027 |
|
|
* ordering comments in the next comment block still
|
1028 |
|
|
* apply.
|
1029 |
|
|
*/
|
1030 |
|
|
//J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
|
1031 |
|
|
|
1032 |
|
|
/*
|
1033 |
|
|
* If we're journalling data, and this buffer was
|
1034 |
|
|
* subject to a write(), it could be metadata, forget
|
1035 |
|
|
* or shadow against the committing transaction. Now,
|
1036 |
|
|
* someone has dirtied the same darn page via a mapping
|
1037 |
|
|
* and it is being writepage()'d.
|
1038 |
|
|
* We *could* just steal the page from commit, with some
|
1039 |
|
|
* fancy locking there. Instead, we just skip it -
|
1040 |
|
|
* don't tie the page's buffers to the new transaction
|
1041 |
|
|
* at all.
|
1042 |
|
|
* Implication: if we crash before the writepage() data
|
1043 |
|
|
* is written into the filesystem, recovery will replay
|
1044 |
|
|
* the write() data.
|
1045 |
|
|
*/
|
1046 |
|
|
if (jh->b_jlist != BJ_None &&
|
1047 |
|
|
jh->b_jlist != BJ_SyncData &&
|
1048 |
|
|
jh->b_jlist != BJ_AsyncData) {
|
1049 |
|
|
JBUFFER_TRACE(jh, "Not stealing");
|
1050 |
|
|
goto no_journal;
|
1051 |
|
|
}
|
1052 |
|
|
|
1053 |
|
|
/*
|
1054 |
|
|
* This buffer may be undergoing writeout in commit. We
|
1055 |
|
|
* can't return from here and let the caller dirty it
|
1056 |
|
|
* again because that can cause the write-out loop in
|
1057 |
|
|
* commit to never terminate.
|
1058 |
|
|
*/
|
1059 |
|
|
if (!async && buffer_dirty(bh)) {
|
1060 |
|
|
atomic_inc(&bh->b_count);
|
1061 |
|
|
spin_unlock(&journal_datalist_lock);
|
1062 |
|
|
need_brelse = 1;
|
1063 |
|
|
ll_rw_block(WRITE, 1, &bh);
|
1064 |
|
|
wait_on_buffer(bh);
|
1065 |
|
|
spin_lock(&journal_datalist_lock);
|
1066 |
|
|
/* The buffer may become locked again at any
|
1067 |
|
|
time if it is redirtied */
|
1068 |
|
|
}
|
1069 |
|
|
|
1070 |
|
|
/* journal_clean_data_list() may have got there first */
|
1071 |
|
|
if (jh->b_transaction != NULL) {
|
1072 |
|
|
JBUFFER_TRACE(jh, "unfile from commit");
|
1073 |
|
|
__journal_unfile_buffer(jh);
|
1074 |
|
|
jh->b_transaction = NULL;
|
1075 |
|
|
}
|
1076 |
|
|
/* The buffer will be refiled below */
|
1077 |
|
|
|
1078 |
|
|
}
|
1079 |
|
|
/*
|
1080 |
|
|
* Special case --- the buffer might actually have been
|
1081 |
|
|
* allocated and then immediately deallocated in the previous,
|
1082 |
|
|
* committing transaction, so might still be left on that
|
1083 |
|
|
* transaction's metadata lists.
|
1084 |
|
|
*/
|
1085 |
|
|
if (jh->b_jlist != wanted_jlist) {
|
1086 |
|
|
JBUFFER_TRACE(jh, "not on correct data list: unfile");
|
1087 |
|
|
J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
|
1088 |
|
|
__journal_unfile_buffer(jh);
|
1089 |
|
|
jh->b_transaction = NULL;
|
1090 |
|
|
JBUFFER_TRACE(jh, "file as data");
|
1091 |
|
|
__journal_file_buffer(jh, handle->h_transaction,
|
1092 |
|
|
wanted_jlist);
|
1093 |
|
|
}
|
1094 |
|
|
} else {
|
1095 |
|
|
JBUFFER_TRACE(jh, "not on a transaction");
|
1096 |
|
|
__journal_file_buffer(jh, handle->h_transaction, wanted_jlist);
|
1097 |
|
|
}
|
1098 |
|
|
no_journal:
|
1099 |
|
|
spin_unlock(&journal_datalist_lock);
|
1100 |
|
|
if (need_brelse) {
|
1101 |
|
|
BUFFER_TRACE(bh, "brelse");
|
1102 |
|
|
__brelse(bh);
|
1103 |
|
|
}
|
1104 |
|
|
JBUFFER_TRACE(jh, "exit");
|
1105 |
|
|
journal_unlock_journal_head(jh);
|
1106 |
|
|
return 0;
|
1107 |
|
|
}
|
1108 |
|
|
|
1109 |
|
|
/**
|
1110 |
|
|
* int journal_dirty_metadata() - mark a buffer as containing dirty metadata
|
1111 |
|
|
* @handle: transaction to add buffer to.
|
1112 |
|
|
* @bh: buffer to mark
|
1113 |
|
|
*
|
1114 |
|
|
* mark dirty metadata which needs to be journaled as part of the current transaction.
|
1115 |
|
|
*
|
1116 |
|
|
* The buffer is placed on the transaction's metadata list and is marked
|
1117 |
|
|
* as belonging to the transaction.
|
1118 |
|
|
*
|
1119 |
|
|
* Returns error number or 0 on success.
|
1120 |
|
|
*/
|
1121 |
|
|
int journal_dirty_metadata (handle_t *handle, struct buffer_head *bh)
|
1122 |
|
|
{
|
1123 |
|
|
/*
|
1124 |
|
|
* Special care needs to be taken if the buffer already belongs to the
|
1125 |
|
|
* current committing transaction (in which case we should have frozen
|
1126 |
|
|
* data present for that commit). In that case, we don't relink the
|
1127 |
|
|
* buffer: that only gets done when the old transaction finally
|
1128 |
|
|
* completes its commit.
|
1129 |
|
|
*
|
1130 |
|
|
*/
|
1131 |
|
|
transaction_t *transaction = handle->h_transaction;
|
1132 |
|
|
journal_t *journal = transaction->t_journal;
|
1133 |
|
|
struct journal_head *jh = bh2jh(bh);
|
1134 |
|
|
|
1135 |
|
|
jbd_debug(5, "journal_head %p\n", jh);
|
1136 |
|
|
JBUFFER_TRACE(jh, "entry");
|
1137 |
|
|
lock_journal(journal);
|
1138 |
|
|
if (is_handle_aborted(handle))
|
1139 |
|
|
goto out_unlock;
|
1140 |
|
|
|
1141 |
|
|
spin_lock(&journal_datalist_lock);
|
1142 |
|
|
set_bit(BH_JBDDirty, &bh->b_state);
|
1143 |
|
|
|
1144 |
|
|
J_ASSERT_JH(jh, jh->b_transaction != NULL);
|
1145 |
|
|
|
1146 |
|
|
/*
|
1147 |
|
|
* Metadata already on the current transaction list doesn't
|
1148 |
|
|
* need to be filed. Metadata on another transaction's list must
|
1149 |
|
|
* be committing, and will be refiled once the commit completes:
|
1150 |
|
|
* leave it alone for now.
|
1151 |
|
|
*/
|
1152 |
|
|
|
1153 |
|
|
if (jh->b_transaction != transaction) {
|
1154 |
|
|
JBUFFER_TRACE(jh, "already on other transaction");
|
1155 |
|
|
J_ASSERT_JH(jh, jh->b_transaction ==
|
1156 |
|
|
journal->j_committing_transaction);
|
1157 |
|
|
J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
|
1158 |
|
|
/* And this case is illegal: we can't reuse another
|
1159 |
|
|
* transaction's data buffer, ever. */
|
1160 |
|
|
/* FIXME: writepage() should be journalled */
|
1161 |
|
|
J_ASSERT_JH(jh, jh->b_jlist != BJ_SyncData);
|
1162 |
|
|
goto done_locked;
|
1163 |
|
|
}
|
1164 |
|
|
|
1165 |
|
|
/* That test should have eliminated the following case: */
|
1166 |
|
|
J_ASSERT_JH(jh, jh->b_frozen_data == 0);
|
1167 |
|
|
|
1168 |
|
|
JBUFFER_TRACE(jh, "file as BJ_Metadata");
|
1169 |
|
|
__journal_file_buffer(jh, handle->h_transaction, BJ_Metadata);
|
1170 |
|
|
|
1171 |
|
|
done_locked:
|
1172 |
|
|
spin_unlock(&journal_datalist_lock);
|
1173 |
|
|
JBUFFER_TRACE(jh, "exit");
|
1174 |
|
|
out_unlock:
|
1175 |
|
|
unlock_journal(journal);
|
1176 |
|
|
return 0;
|
1177 |
|
|
}
|
1178 |
|
|
|
1179 |
|
|
#if 0
|
1180 |
|
|
/*
|
1181 |
|
|
* journal_release_buffer: undo a get_write_access without any buffer
|
1182 |
|
|
* updates, if the update decided in the end that it didn't need access.
|
1183 |
|
|
*
|
1184 |
|
|
* journal_get_write_access() can block, so it is quite possible for a
|
1185 |
|
|
* journaling component to decide after the write access is returned
|
1186 |
|
|
* that global state has changed and the update is no longer required. */
|
1187 |
|
|
|
1188 |
|
|
void journal_release_buffer (handle_t *handle, struct buffer_head *bh)
|
1189 |
|
|
{
|
1190 |
|
|
transaction_t *transaction = handle->h_transaction;
|
1191 |
|
|
journal_t *journal = transaction->t_journal;
|
1192 |
|
|
struct journal_head *jh = bh2jh(bh);
|
1193 |
|
|
|
1194 |
|
|
lock_journal(journal);
|
1195 |
|
|
JBUFFER_TRACE(jh, "entry");
|
1196 |
|
|
|
1197 |
|
|
/* If the buffer is reserved but not modified by this
|
1198 |
|
|
* transaction, then it is safe to release it. In all other
|
1199 |
|
|
* cases, just leave the buffer as it is. */
|
1200 |
|
|
|
1201 |
|
|
spin_lock(&journal_datalist_lock);
|
1202 |
|
|
if (jh->b_jlist == BJ_Reserved && jh->b_transaction == transaction &&
|
1203 |
|
|
!buffer_jdirty(jh2bh(jh))) {
|
1204 |
|
|
JBUFFER_TRACE(jh, "unused: refiling it");
|
1205 |
|
|
handle->h_buffer_credits++;
|
1206 |
|
|
__journal_refile_buffer(jh);
|
1207 |
|
|
}
|
1208 |
|
|
spin_unlock(&journal_datalist_lock);
|
1209 |
|
|
|
1210 |
|
|
JBUFFER_TRACE(jh, "exit");
|
1211 |
|
|
unlock_journal(journal);
|
1212 |
|
|
}
|
1213 |
|
|
#endif
|
1214 |
|
|
|
1215 |
|
|
/**
|
1216 |
|
|
* void journal_forget() - bforget() for potentially-journaled buffers.
|
1217 |
|
|
* @handle: transaction handle
|
1218 |
|
|
* @bh: bh to 'forget'
|
1219 |
|
|
*
|
1220 |
|
|
* We can only do the bforget if there are no commits pending against the
|
1221 |
|
|
* buffer. If the buffer is dirty in the current running transaction we
|
1222 |
|
|
* can safely unlink it.
|
1223 |
|
|
*
|
1224 |
|
|
* bh may not be a journalled buffer at all - it may be a non-JBD
|
1225 |
|
|
* buffer which came off the hashtable. Check for this.
|
1226 |
|
|
*
|
1227 |
|
|
* Decrements bh->b_count by one.
|
1228 |
|
|
*
|
1229 |
|
|
* Allow this call even if the handle has aborted --- it may be part of
|
1230 |
|
|
* the caller's cleanup after an abort.
|
1231 |
|
|
*/
|
1232 |
|
|
void journal_forget (handle_t *handle, struct buffer_head *bh)
|
1233 |
|
|
{
|
1234 |
|
|
transaction_t *transaction = handle->h_transaction;
|
1235 |
|
|
journal_t *journal = transaction->t_journal;
|
1236 |
|
|
struct journal_head *jh;
|
1237 |
|
|
|
1238 |
|
|
BUFFER_TRACE(bh, "entry");
|
1239 |
|
|
|
1240 |
|
|
lock_journal(journal);
|
1241 |
|
|
spin_lock(&journal_datalist_lock);
|
1242 |
|
|
|
1243 |
|
|
if (!buffer_jbd(bh))
|
1244 |
|
|
goto not_jbd;
|
1245 |
|
|
jh = bh2jh(bh);
|
1246 |
|
|
|
1247 |
|
|
if (jh->b_transaction == handle->h_transaction) {
|
1248 |
|
|
J_ASSERT_JH(jh, !jh->b_frozen_data);
|
1249 |
|
|
|
1250 |
|
|
/* If we are forgetting a buffer which is already part
|
1251 |
|
|
* of this transaction, then we can just drop it from
|
1252 |
|
|
* the transaction immediately. */
|
1253 |
|
|
clear_bit(BH_Dirty, &bh->b_state);
|
1254 |
|
|
clear_bit(BH_JBDDirty, &bh->b_state);
|
1255 |
|
|
|
1256 |
|
|
JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
|
1257 |
|
|
J_ASSERT_JH(jh, !jh->b_committed_data);
|
1258 |
|
|
|
1259 |
|
|
__journal_unfile_buffer(jh);
|
1260 |
|
|
jh->b_transaction = 0;
|
1261 |
|
|
|
1262 |
|
|
/*
|
1263 |
|
|
* We are no longer going to journal this buffer.
|
1264 |
|
|
* However, the commit of this transaction is still
|
1265 |
|
|
* important to the buffer: the delete that we are now
|
1266 |
|
|
* processing might obsolete an old log entry, so by
|
1267 |
|
|
* committing, we can satisfy the buffer's checkpoint.
|
1268 |
|
|
*
|
1269 |
|
|
* So, if we have a checkpoint on the buffer, we should
|
1270 |
|
|
* now refile the buffer on our BJ_Forget list so that
|
1271 |
|
|
* we know to remove the checkpoint after we commit.
|
1272 |
|
|
*/
|
1273 |
|
|
|
1274 |
|
|
if (jh->b_cp_transaction) {
|
1275 |
|
|
__journal_file_buffer(jh, transaction, BJ_Forget);
|
1276 |
|
|
} else {
|
1277 |
|
|
__journal_remove_journal_head(bh);
|
1278 |
|
|
__brelse(bh);
|
1279 |
|
|
if (!buffer_jbd(bh)) {
|
1280 |
|
|
spin_unlock(&journal_datalist_lock);
|
1281 |
|
|
unlock_journal(journal);
|
1282 |
|
|
__bforget(bh);
|
1283 |
|
|
return;
|
1284 |
|
|
}
|
1285 |
|
|
}
|
1286 |
|
|
|
1287 |
|
|
} else if (jh->b_transaction) {
|
1288 |
|
|
J_ASSERT_JH(jh, (jh->b_transaction ==
|
1289 |
|
|
journal->j_committing_transaction));
|
1290 |
|
|
/* However, if the buffer is still owned by a prior
|
1291 |
|
|
* (committing) transaction, we can't drop it yet... */
|
1292 |
|
|
JBUFFER_TRACE(jh, "belongs to older transaction");
|
1293 |
|
|
/* ... but we CAN drop it from the new transaction if we
|
1294 |
|
|
* have also modified it since the original commit. */
|
1295 |
|
|
|
1296 |
|
|
if (jh->b_next_transaction) {
|
1297 |
|
|
J_ASSERT(jh->b_next_transaction == transaction);
|
1298 |
|
|
jh->b_next_transaction = NULL;
|
1299 |
|
|
}
|
1300 |
|
|
}
|
1301 |
|
|
|
1302 |
|
|
not_jbd:
|
1303 |
|
|
spin_unlock(&journal_datalist_lock);
|
1304 |
|
|
unlock_journal(journal);
|
1305 |
|
|
__brelse(bh);
|
1306 |
|
|
return;
|
1307 |
|
|
}
|
1308 |
|
|
|
1309 |
|
|
#if 0 /* Unused */
|
1310 |
|
|
/*
|
1311 |
|
|
* journal_sync_buffer: flush a potentially-journaled buffer to disk.
|
1312 |
|
|
*
|
1313 |
|
|
* Used for O_SYNC filesystem operations. If the buffer is journaled,
|
1314 |
|
|
* we need to complete the O_SYNC by waiting for the transaction to
|
1315 |
|
|
* complete. It is an error to call journal_sync_buffer before
|
1316 |
|
|
* journal_stop!
|
1317 |
|
|
*/
|
1318 |
|
|
|
1319 |
|
|
void journal_sync_buffer(struct buffer_head *bh)
|
1320 |
|
|
{
|
1321 |
|
|
transaction_t *transaction;
|
1322 |
|
|
journal_t *journal;
|
1323 |
|
|
long sequence;
|
1324 |
|
|
struct journal_head *jh;
|
1325 |
|
|
|
1326 |
|
|
/* If the buffer isn't journaled, this is easy: just sync it to
|
1327 |
|
|
* disk. */
|
1328 |
|
|
BUFFER_TRACE(bh, "entry");
|
1329 |
|
|
|
1330 |
|
|
spin_lock(&journal_datalist_lock);
|
1331 |
|
|
if (!buffer_jbd(bh)) {
|
1332 |
|
|
spin_unlock(&journal_datalist_lock);
|
1333 |
|
|
return;
|
1334 |
|
|
}
|
1335 |
|
|
jh = bh2jh(bh);
|
1336 |
|
|
if (jh->b_transaction == NULL) {
|
1337 |
|
|
/* If the buffer has already been journaled, then this
|
1338 |
|
|
* is a noop. */
|
1339 |
|
|
if (jh->b_cp_transaction == NULL) {
|
1340 |
|
|
spin_unlock(&journal_datalist_lock);
|
1341 |
|
|
return;
|
1342 |
|
|
}
|
1343 |
|
|
atomic_inc(&bh->b_count);
|
1344 |
|
|
spin_unlock(&journal_datalist_lock);
|
1345 |
|
|
ll_rw_block (WRITE, 1, &bh);
|
1346 |
|
|
wait_on_buffer(bh);
|
1347 |
|
|
__brelse(bh);
|
1348 |
|
|
goto out;
|
1349 |
|
|
}
|
1350 |
|
|
|
1351 |
|
|
/* Otherwise, just wait until the transaction is synced to disk. */
|
1352 |
|
|
transaction = jh->b_transaction;
|
1353 |
|
|
journal = transaction->t_journal;
|
1354 |
|
|
sequence = transaction->t_tid;
|
1355 |
|
|
spin_unlock(&journal_datalist_lock);
|
1356 |
|
|
|
1357 |
|
|
jbd_debug(2, "requesting commit for jh %p\n", jh);
|
1358 |
|
|
log_start_commit (journal, transaction);
|
1359 |
|
|
|
1360 |
|
|
while (tid_gt(sequence, journal->j_commit_sequence)) {
|
1361 |
|
|
wake_up(&journal->j_wait_done_commit);
|
1362 |
|
|
sleep_on(&journal->j_wait_done_commit);
|
1363 |
|
|
}
|
1364 |
|
|
JBUFFER_TRACE(jh, "exit");
|
1365 |
|
|
out:
|
1366 |
|
|
return;
|
1367 |
|
|
}
|
1368 |
|
|
#endif
|
1369 |
|
|
|
1370 |
|
|
/*
|
1371 |
|
|
* Register a callback function for this handle. The function will be
|
1372 |
|
|
* called when the transaction that this handle is part of has been
|
1373 |
|
|
* committed to disk with the original callback data struct and the
|
1374 |
|
|
* error status of the journal as parameters. There is no guarantee of
|
1375 |
|
|
* ordering between handles within a single transaction, nor between
|
1376 |
|
|
* callbacks registered on the same handle.
|
1377 |
|
|
*
|
1378 |
|
|
* The caller is responsible for allocating the journal_callback struct.
|
1379 |
|
|
* This is to allow the caller to add as much extra data to the callback
|
1380 |
|
|
* as needed, but reduce the overhead of multiple allocations. The caller
|
1381 |
|
|
* allocated struct must start with a struct journal_callback at offset 0,
|
1382 |
|
|
* and has the caller-specific data afterwards.
|
1383 |
|
|
*/
|
1384 |
|
|
void journal_callback_set(handle_t *handle,
|
1385 |
|
|
void (*func)(struct journal_callback *jcb, int error),
|
1386 |
|
|
struct journal_callback *jcb)
|
1387 |
|
|
{
|
1388 |
|
|
list_add_tail(&jcb->jcb_list, &handle->h_jcb);
|
1389 |
|
|
jcb->jcb_func = func;
|
1390 |
|
|
}
|
1391 |
|
|
|
1392 |
|
|
/**
|
1393 |
|
|
* int journal_stop() - complete a transaction
|
1394 |
|
|
* @handle: tranaction to complete.
|
1395 |
|
|
*
|
1396 |
|
|
* All done for a particular handle.
|
1397 |
|
|
*
|
1398 |
|
|
* There is not much action needed here. We just return any remaining
|
1399 |
|
|
* buffer credits to the transaction and remove the handle. The only
|
1400 |
|
|
* complication is that we need to start a commit operation if the
|
1401 |
|
|
* filesystem is marked for synchronous update.
|
1402 |
|
|
*
|
1403 |
|
|
* journal_stop itself will not usually return an error, but it may
|
1404 |
|
|
* do so in unusual circumstances. In particular, expect it to
|
1405 |
|
|
* return -EIO if a journal_abort has been executed since the
|
1406 |
|
|
* transaction began.
|
1407 |
|
|
*/
|
1408 |
|
|
int journal_stop(handle_t *handle)
|
1409 |
|
|
{
|
1410 |
|
|
transaction_t *transaction = handle->h_transaction;
|
1411 |
|
|
journal_t *journal = transaction->t_journal;
|
1412 |
|
|
int old_handle_count, err;
|
1413 |
|
|
|
1414 |
|
|
if (!handle)
|
1415 |
|
|
return 0;
|
1416 |
|
|
|
1417 |
|
|
J_ASSERT (transaction->t_updates > 0);
|
1418 |
|
|
J_ASSERT (journal_current_handle() == handle);
|
1419 |
|
|
|
1420 |
|
|
if (is_handle_aborted(handle))
|
1421 |
|
|
err = -EIO;
|
1422 |
|
|
else
|
1423 |
|
|
err = 0;
|
1424 |
|
|
|
1425 |
|
|
if (--handle->h_ref > 0) {
|
1426 |
|
|
jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
|
1427 |
|
|
handle->h_ref);
|
1428 |
|
|
return err;
|
1429 |
|
|
}
|
1430 |
|
|
|
1431 |
|
|
jbd_debug(4, "Handle %p going down\n", handle);
|
1432 |
|
|
|
1433 |
|
|
/*
|
1434 |
|
|
* Implement synchronous transaction batching. If the handle
|
1435 |
|
|
* was synchronous, don't force a commit immediately. Let's
|
1436 |
|
|
* yield and let another thread piggyback onto this transaction.
|
1437 |
|
|
* Keep doing that while new threads continue to arrive.
|
1438 |
|
|
* It doesn't cost much - we're about to run a commit and sleep
|
1439 |
|
|
* on IO anyway. Speeds up many-threaded, many-dir operations
|
1440 |
|
|
* by 30x or more...
|
1441 |
|
|
*/
|
1442 |
|
|
if (handle->h_sync) {
|
1443 |
|
|
do {
|
1444 |
|
|
old_handle_count = transaction->t_handle_count;
|
1445 |
|
|
yield();
|
1446 |
|
|
} while (old_handle_count != transaction->t_handle_count);
|
1447 |
|
|
}
|
1448 |
|
|
|
1449 |
|
|
current->journal_info = NULL;
|
1450 |
|
|
transaction->t_outstanding_credits -= handle->h_buffer_credits;
|
1451 |
|
|
transaction->t_updates--;
|
1452 |
|
|
if (!transaction->t_updates) {
|
1453 |
|
|
wake_up(&journal->j_wait_updates);
|
1454 |
|
|
if (journal->j_barrier_count)
|
1455 |
|
|
wake_up(&journal->j_wait_transaction_locked);
|
1456 |
|
|
}
|
1457 |
|
|
|
1458 |
|
|
/* Move callbacks from the handle to the transaction. */
|
1459 |
|
|
list_splice(&handle->h_jcb, &transaction->t_jcb);
|
1460 |
|
|
|
1461 |
|
|
/*
|
1462 |
|
|
* If the handle is marked SYNC, we need to set another commit
|
1463 |
|
|
* going! We also want to force a commit if the current
|
1464 |
|
|
* transaction is occupying too much of the log, or if the
|
1465 |
|
|
* transaction is too old now.
|
1466 |
|
|
*/
|
1467 |
|
|
if (handle->h_sync ||
|
1468 |
|
|
transaction->t_outstanding_credits >
|
1469 |
|
|
journal->j_max_transaction_buffers ||
|
1470 |
|
|
(journal->j_commit_interval &&
|
1471 |
|
|
time_after_eq(jiffies, transaction->t_expires))) {
|
1472 |
|
|
/* Do this even for aborted journals: an abort still
|
1473 |
|
|
* completes the commit thread, it just doesn't write
|
1474 |
|
|
* anything to disk. */
|
1475 |
|
|
tid_t tid = transaction->t_tid;
|
1476 |
|
|
|
1477 |
|
|
jbd_debug(2, "transaction too old, requesting commit for "
|
1478 |
|
|
"handle %p\n", handle);
|
1479 |
|
|
/* This is non-blocking */
|
1480 |
|
|
log_start_commit(journal, transaction);
|
1481 |
|
|
|
1482 |
|
|
/*
|
1483 |
|
|
* Special case: JFS_SYNC synchronous updates require us
|
1484 |
|
|
* to wait for the commit to complete.
|
1485 |
|
|
*/
|
1486 |
|
|
if (handle->h_sync && !(current->flags & PF_MEMALLOC))
|
1487 |
|
|
log_wait_commit(journal, tid);
|
1488 |
|
|
}
|
1489 |
|
|
kfree(handle);
|
1490 |
|
|
return err;
|
1491 |
|
|
}
|
1492 |
|
|
|
1493 |
|
|
/**int journal_force_commit() - force any uncommitted transactions
|
1494 |
|
|
* @journal: journal to force
|
1495 |
|
|
*
|
1496 |
|
|
* For synchronous operations: force any uncommitted transactions
|
1497 |
|
|
* to disk. May seem kludgy, but it reuses all the handle batching
|
1498 |
|
|
* code in a very simple manner.
|
1499 |
|
|
*/
|
1500 |
|
|
int journal_force_commit(journal_t *journal)
|
1501 |
|
|
{
|
1502 |
|
|
handle_t *handle;
|
1503 |
|
|
int ret = 0;
|
1504 |
|
|
|
1505 |
|
|
lock_kernel();
|
1506 |
|
|
handle = journal_start(journal, 1);
|
1507 |
|
|
if (IS_ERR(handle)) {
|
1508 |
|
|
ret = PTR_ERR(handle);
|
1509 |
|
|
goto out;
|
1510 |
|
|
}
|
1511 |
|
|
handle->h_sync = 1;
|
1512 |
|
|
journal_stop(handle);
|
1513 |
|
|
out:
|
1514 |
|
|
unlock_kernel();
|
1515 |
|
|
return ret;
|
1516 |
|
|
}
|
1517 |
|
|
|
1518 |
|
|
/*
|
1519 |
|
|
*
|
1520 |
|
|
* List management code snippets: various functions for manipulating the
|
1521 |
|
|
* transaction buffer lists.
|
1522 |
|
|
*
|
1523 |
|
|
*/
|
1524 |
|
|
|
1525 |
|
|
/*
|
1526 |
|
|
* Append a buffer to a transaction list, given the transaction's list head
|
1527 |
|
|
* pointer.
|
1528 |
|
|
* journal_datalist_lock is held.
|
1529 |
|
|
*/
|
1530 |
|
|
|
1531 |
|
|
static inline void
|
1532 |
|
|
__blist_add_buffer(struct journal_head **list, struct journal_head *jh)
|
1533 |
|
|
{
|
1534 |
|
|
if (!*list) {
|
1535 |
|
|
jh->b_tnext = jh->b_tprev = jh;
|
1536 |
|
|
*list = jh;
|
1537 |
|
|
} else {
|
1538 |
|
|
/* Insert at the tail of the list to preserve order */
|
1539 |
|
|
struct journal_head *first = *list, *last = first->b_tprev;
|
1540 |
|
|
jh->b_tprev = last;
|
1541 |
|
|
jh->b_tnext = first;
|
1542 |
|
|
last->b_tnext = first->b_tprev = jh;
|
1543 |
|
|
}
|
1544 |
|
|
}
|
1545 |
|
|
|
1546 |
|
|
/*
|
1547 |
|
|
* Remove a buffer from a transaction list, given the transaction's list
|
1548 |
|
|
* head pointer.
|
1549 |
|
|
*
|
1550 |
|
|
* Called with journal_datalist_lock held, and the journal may not
|
1551 |
|
|
* be locked.
|
1552 |
|
|
*/
|
1553 |
|
|
|
1554 |
|
|
static inline void
|
1555 |
|
|
__blist_del_buffer(struct journal_head **list, struct journal_head *jh)
|
1556 |
|
|
{
|
1557 |
|
|
if (*list == jh) {
|
1558 |
|
|
*list = jh->b_tnext;
|
1559 |
|
|
if (*list == jh)
|
1560 |
|
|
*list = 0;
|
1561 |
|
|
}
|
1562 |
|
|
jh->b_tprev->b_tnext = jh->b_tnext;
|
1563 |
|
|
jh->b_tnext->b_tprev = jh->b_tprev;
|
1564 |
|
|
}
|
1565 |
|
|
|
1566 |
|
|
/*
|
1567 |
|
|
* Remove a buffer from the appropriate transaction list.
|
1568 |
|
|
*
|
1569 |
|
|
* Note that this function can *change* the value of
|
1570 |
|
|
* bh->b_transaction->t_sync_datalist, t_async_datalist, t_buffers, t_forget,
|
1571 |
|
|
* t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list. If the caller
|
1572 |
|
|
* is holding onto a copy of one of thee pointers, it could go bad.
|
1573 |
|
|
* Generally the caller needs to re-read the pointer from the transaction_t.
|
1574 |
|
|
*
|
1575 |
|
|
* If bh->b_jlist is BJ_SyncData or BJ_AsyncData then we may have been called
|
1576 |
|
|
* via journal_try_to_free_buffer() or journal_clean_data_list(). In that
|
1577 |
|
|
* case, journal_datalist_lock will be held, and the journal may not be locked.
|
1578 |
|
|
*/
|
1579 |
|
|
void __journal_unfile_buffer(struct journal_head *jh)
|
1580 |
|
|
{
|
1581 |
|
|
struct journal_head **list = 0;
|
1582 |
|
|
transaction_t * transaction;
|
1583 |
|
|
|
1584 |
|
|
assert_spin_locked(&journal_datalist_lock);
|
1585 |
|
|
transaction = jh->b_transaction;
|
1586 |
|
|
|
1587 |
|
|
J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
|
1588 |
|
|
|
1589 |
|
|
if (jh->b_jlist != BJ_None)
|
1590 |
|
|
J_ASSERT_JH(jh, transaction != 0);
|
1591 |
|
|
|
1592 |
|
|
switch (jh->b_jlist) {
|
1593 |
|
|
case BJ_None:
|
1594 |
|
|
return;
|
1595 |
|
|
case BJ_SyncData:
|
1596 |
|
|
list = &transaction->t_sync_datalist;
|
1597 |
|
|
break;
|
1598 |
|
|
case BJ_AsyncData:
|
1599 |
|
|
list = &transaction->t_async_datalist;
|
1600 |
|
|
break;
|
1601 |
|
|
case BJ_Metadata:
|
1602 |
|
|
transaction->t_nr_buffers--;
|
1603 |
|
|
J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
|
1604 |
|
|
list = &transaction->t_buffers;
|
1605 |
|
|
break;
|
1606 |
|
|
case BJ_Forget:
|
1607 |
|
|
list = &transaction->t_forget;
|
1608 |
|
|
break;
|
1609 |
|
|
case BJ_IO:
|
1610 |
|
|
list = &transaction->t_iobuf_list;
|
1611 |
|
|
break;
|
1612 |
|
|
case BJ_Shadow:
|
1613 |
|
|
list = &transaction->t_shadow_list;
|
1614 |
|
|
break;
|
1615 |
|
|
case BJ_LogCtl:
|
1616 |
|
|
list = &transaction->t_log_list;
|
1617 |
|
|
break;
|
1618 |
|
|
case BJ_Reserved:
|
1619 |
|
|
list = &transaction->t_reserved_list;
|
1620 |
|
|
break;
|
1621 |
|
|
}
|
1622 |
|
|
|
1623 |
|
|
__blist_del_buffer(list, jh);
|
1624 |
|
|
jh->b_jlist = BJ_None;
|
1625 |
|
|
if (test_and_clear_bit(BH_JBDDirty, &jh2bh(jh)->b_state)) {
|
1626 |
|
|
set_bit(BH_Dirty, &jh2bh(jh)->b_state);
|
1627 |
|
|
}
|
1628 |
|
|
}
|
1629 |
|
|
|
1630 |
|
|
void journal_unfile_buffer(struct journal_head *jh)
|
1631 |
|
|
{
|
1632 |
|
|
spin_lock(&journal_datalist_lock);
|
1633 |
|
|
__journal_unfile_buffer(jh);
|
1634 |
|
|
spin_unlock(&journal_datalist_lock);
|
1635 |
|
|
}
|
1636 |
|
|
|
1637 |
|
|
/*
|
1638 |
|
|
* Called from journal_try_to_free_buffers(). The journal is not
|
1639 |
|
|
* locked. lru_list_lock is not held.
|
1640 |
|
|
*
|
1641 |
|
|
* Here we see why journal_datalist_lock is global and not per-journal.
|
1642 |
|
|
* We cannot get back to this buffer's journal pointer without locking
|
1643 |
|
|
* out journal_clean_data_list() in some manner.
|
1644 |
|
|
*
|
1645 |
|
|
* One could use journal_datalist_lock to get unracy access to a
|
1646 |
|
|
* per-journal lock.
|
1647 |
|
|
*
|
1648 |
|
|
* Called with journal_datalist_lock held.
|
1649 |
|
|
*
|
1650 |
|
|
* Returns non-zero iff we were able to free the journal_head.
|
1651 |
|
|
*/
|
1652 |
|
|
static int __journal_try_to_free_buffer(struct buffer_head *bh,
|
1653 |
|
|
int *locked_or_dirty)
|
1654 |
|
|
{
|
1655 |
|
|
struct journal_head *jh;
|
1656 |
|
|
|
1657 |
|
|
assert_spin_locked(&journal_datalist_lock);
|
1658 |
|
|
|
1659 |
|
|
jh = bh2jh(bh);
|
1660 |
|
|
|
1661 |
|
|
if (buffer_locked(bh) || buffer_dirty(bh)) {
|
1662 |
|
|
*locked_or_dirty = 1;
|
1663 |
|
|
goto out;
|
1664 |
|
|
}
|
1665 |
|
|
|
1666 |
|
|
if (!buffer_uptodate(bh))
|
1667 |
|
|
goto out;
|
1668 |
|
|
|
1669 |
|
|
if (jh->b_next_transaction != 0)
|
1670 |
|
|
goto out;
|
1671 |
|
|
|
1672 |
|
|
if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) {
|
1673 |
|
|
if (jh->b_jlist == BJ_SyncData || jh->b_jlist==BJ_AsyncData) {
|
1674 |
|
|
/* A written-back ordered data buffer */
|
1675 |
|
|
JBUFFER_TRACE(jh, "release data");
|
1676 |
|
|
__journal_unfile_buffer(jh);
|
1677 |
|
|
jh->b_transaction = 0;
|
1678 |
|
|
__journal_remove_journal_head(bh);
|
1679 |
|
|
__brelse(bh);
|
1680 |
|
|
}
|
1681 |
|
|
}
|
1682 |
|
|
else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) {
|
1683 |
|
|
/* written-back checkpointed metadata buffer */
|
1684 |
|
|
if (jh->b_jlist == BJ_None) {
|
1685 |
|
|
JBUFFER_TRACE(jh, "remove from checkpoint list");
|
1686 |
|
|
__journal_remove_checkpoint(jh);
|
1687 |
|
|
__journal_remove_journal_head(bh);
|
1688 |
|
|
__brelse(bh);
|
1689 |
|
|
}
|
1690 |
|
|
}
|
1691 |
|
|
return !buffer_jbd(bh);
|
1692 |
|
|
|
1693 |
|
|
out:
|
1694 |
|
|
return 0;
|
1695 |
|
|
}
|
1696 |
|
|
|
1697 |
|
|
|
1698 |
|
|
/**
|
1699 |
|
|
* int journal_try_to_free_buffers() - try to free page buffers.
|
1700 |
|
|
* @journal: journal for operation
|
1701 |
|
|
* @page: to try and free
|
1702 |
|
|
* @gfp_mask: 'IO' mode for try_to_free_buffers()
|
1703 |
|
|
*
|
1704 |
|
|
*
|
1705 |
|
|
* For all the buffers on this page,
|
1706 |
|
|
* if they are fully written out ordered data, move them onto BUF_CLEAN
|
1707 |
|
|
* so try_to_free_buffers() can reap them.
|
1708 |
|
|
*
|
1709 |
|
|
* This function returns non-zero if we wish try_to_free_buffers()
|
1710 |
|
|
* to be called. We do this if the page is releasable by try_to_free_buffers().
|
1711 |
|
|
* We also do it if the page has locked or dirty buffers and the caller wants
|
1712 |
|
|
* us to perform sync or async writeout.
|
1713 |
|
|
*/
|
1714 |
|
|
int journal_try_to_free_buffers(journal_t *journal,
|
1715 |
|
|
struct page *page, int gfp_mask)
|
1716 |
|
|
{
|
1717 |
|
|
/*
|
1718 |
|
|
* journal_try_to_free_buffers(). For all the buffers on this page,
|
1719 |
|
|
* if they are fully written out ordered data, move them onto BUF_CLEAN
|
1720 |
|
|
* so try_to_free_buffers() can reap them. Called with lru_list_lock
|
1721 |
|
|
* not held. Does its own locking.
|
1722 |
|
|
*
|
1723 |
|
|
* This complicates JBD locking somewhat. We aren't protected by the
|
1724 |
|
|
* BKL here. We wish to remove the buffer from its committing or
|
1725 |
|
|
* running transaction's ->t_datalist via __journal_unfile_buffer.
|
1726 |
|
|
*
|
1727 |
|
|
* This may *change* the value of transaction_t->t_datalist, so anyone
|
1728 |
|
|
* who looks at t_datalist needs to lock against this function.
|
1729 |
|
|
*
|
1730 |
|
|
* Even worse, someone may be doing a journal_dirty_data on this
|
1731 |
|
|
* buffer. So we need to lock against that. journal_dirty_data()
|
1732 |
|
|
* will come out of the lock with the buffer dirty, which makes it
|
1733 |
|
|
* ineligible for release here.
|
1734 |
|
|
*
|
1735 |
|
|
* Who else is affected by this? hmm... Really the only contender
|
1736 |
|
|
* is do_get_write_access() - it could be looking at the buffer while
|
1737 |
|
|
* journal_try_to_free_buffer() is changing its state. But that
|
1738 |
|
|
* cannot happen because we never reallocate freed data as metadata
|
1739 |
|
|
* while the data is part of a transaction. Yes?
|
1740 |
|
|
*
|
1741 |
|
|
*/
|
1742 |
|
|
struct buffer_head *bh;
|
1743 |
|
|
struct buffer_head *tmp;
|
1744 |
|
|
int locked_or_dirty = 0;
|
1745 |
|
|
int call_ttfb = 1;
|
1746 |
|
|
|
1747 |
|
|
J_ASSERT(PageLocked(page));
|
1748 |
|
|
|
1749 |
|
|
bh = page->buffers;
|
1750 |
|
|
tmp = bh;
|
1751 |
|
|
spin_lock(&journal_datalist_lock);
|
1752 |
|
|
do {
|
1753 |
|
|
struct buffer_head *p = tmp;
|
1754 |
|
|
|
1755 |
|
|
tmp = tmp->b_this_page;
|
1756 |
|
|
if (buffer_jbd(p))
|
1757 |
|
|
if (!__journal_try_to_free_buffer(p, &locked_or_dirty))
|
1758 |
|
|
call_ttfb = 0;
|
1759 |
|
|
} while (tmp != bh);
|
1760 |
|
|
spin_unlock(&journal_datalist_lock);
|
1761 |
|
|
|
1762 |
|
|
if (!(gfp_mask & (__GFP_IO|__GFP_WAIT)))
|
1763 |
|
|
goto out;
|
1764 |
|
|
if (!locked_or_dirty)
|
1765 |
|
|
goto out;
|
1766 |
|
|
/*
|
1767 |
|
|
* The VM wants us to do writeout, or to block on IO, or both.
|
1768 |
|
|
* So we allow try_to_free_buffers to be called even if the page
|
1769 |
|
|
* still has journalled buffers.
|
1770 |
|
|
*/
|
1771 |
|
|
call_ttfb = 1;
|
1772 |
|
|
out:
|
1773 |
|
|
return call_ttfb;
|
1774 |
|
|
}
|
1775 |
|
|
|
1776 |
|
|
/*
|
1777 |
|
|
* This buffer is no longer needed. If it is on an older transaction's
|
1778 |
|
|
* checkpoint list we need to record it on this transaction's forget list
|
1779 |
|
|
* to pin this buffer (and hence its checkpointing transaction) down until
|
1780 |
|
|
* this transaction commits. If the buffer isn't on a checkpoint list, we
|
1781 |
|
|
* release it.
|
1782 |
|
|
* Returns non-zero if JBD no longer has an interest in the buffer.
|
1783 |
|
|
*/
|
1784 |
|
|
static int dispose_buffer(struct journal_head *jh,
|
1785 |
|
|
transaction_t *transaction)
|
1786 |
|
|
{
|
1787 |
|
|
int may_free = 1;
|
1788 |
|
|
struct buffer_head *bh = jh2bh(jh);
|
1789 |
|
|
|
1790 |
|
|
spin_lock(&journal_datalist_lock);
|
1791 |
|
|
__journal_unfile_buffer(jh);
|
1792 |
|
|
jh->b_transaction = 0;
|
1793 |
|
|
|
1794 |
|
|
if (jh->b_cp_transaction) {
|
1795 |
|
|
JBUFFER_TRACE(jh, "on running+cp transaction");
|
1796 |
|
|
__journal_file_buffer(jh, transaction, BJ_Forget);
|
1797 |
|
|
clear_bit(BH_JBDDirty, &bh->b_state);
|
1798 |
|
|
may_free = 0;
|
1799 |
|
|
} else {
|
1800 |
|
|
JBUFFER_TRACE(jh, "on running transaction");
|
1801 |
|
|
__journal_remove_journal_head(bh);
|
1802 |
|
|
__brelse(bh);
|
1803 |
|
|
}
|
1804 |
|
|
spin_unlock(&journal_datalist_lock);
|
1805 |
|
|
return may_free;
|
1806 |
|
|
}
|
1807 |
|
|
|
1808 |
|
|
/*
|
1809 |
|
|
* journal_flushpage
|
1810 |
|
|
*
|
1811 |
|
|
* This code is tricky. It has a number of cases to deal with.
|
1812 |
|
|
*
|
1813 |
|
|
* There are two invariants which this code relies on:
|
1814 |
|
|
*
|
1815 |
|
|
* i_size must be updated on disk before we start calling flushpage on the
|
1816 |
|
|
* data.
|
1817 |
|
|
*
|
1818 |
|
|
* This is done in ext3 by defining an ext3_setattr method which
|
1819 |
|
|
* updates i_size before truncate gets going. By maintaining this
|
1820 |
|
|
* invariant, we can be sure that it is safe to throw away any buffers
|
1821 |
|
|
* attached to the current transaction: once the transaction commits,
|
1822 |
|
|
* we know that the data will not be needed.
|
1823 |
|
|
*
|
1824 |
|
|
* Note however that we can *not* throw away data belonging to the
|
1825 |
|
|
* previous, committing transaction!
|
1826 |
|
|
*
|
1827 |
|
|
* Any disk blocks which *are* part of the previous, committing
|
1828 |
|
|
* transaction (and which therefore cannot be discarded immediately) are
|
1829 |
|
|
* not going to be reused in the new running transaction
|
1830 |
|
|
*
|
1831 |
|
|
* The bitmap committed_data images guarantee this: any block which is
|
1832 |
|
|
* allocated in one transaction and removed in the next will be marked
|
1833 |
|
|
* as in-use in the committed_data bitmap, so cannot be reused until
|
1834 |
|
|
* the next transaction to delete the block commits. This means that
|
1835 |
|
|
* leaving committing buffers dirty is quite safe: the disk blocks
|
1836 |
|
|
* cannot be reallocated to a different file and so buffer aliasing is
|
1837 |
|
|
* not possible.
|
1838 |
|
|
*
|
1839 |
|
|
*
|
1840 |
|
|
* The above applies mainly to ordered data mode. In writeback mode we
|
1841 |
|
|
* don't make guarantees about the order in which data hits disk --- in
|
1842 |
|
|
* particular we don't guarantee that new dirty data is flushed before
|
1843 |
|
|
* transaction commit --- so it is always safe just to discard data
|
1844 |
|
|
* immediately in that mode. --sct
|
1845 |
|
|
*/
|
1846 |
|
|
|
1847 |
|
|
/*
|
1848 |
|
|
* The journal_unmap_buffer helper function returns zero if the buffer
|
1849 |
|
|
* concerned remains pinned as an anonymous buffer belonging to an older
|
1850 |
|
|
* transaction.
|
1851 |
|
|
*
|
1852 |
|
|
* We're outside-transaction here. Either or both of j_running_transaction
|
1853 |
|
|
* and j_committing_transaction may be NULL.
|
1854 |
|
|
*/
|
1855 |
|
|
static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
|
1856 |
|
|
{
|
1857 |
|
|
transaction_t *transaction;
|
1858 |
|
|
struct journal_head *jh;
|
1859 |
|
|
int may_free = 1;
|
1860 |
|
|
|
1861 |
|
|
BUFFER_TRACE(bh, "entry");
|
1862 |
|
|
|
1863 |
|
|
if (!buffer_mapped(bh))
|
1864 |
|
|
return 1;
|
1865 |
|
|
|
1866 |
|
|
/* It is safe to proceed here without the
|
1867 |
|
|
* journal_datalist_spinlock because the buffers cannot be
|
1868 |
|
|
* stolen by try_to_free_buffers as long as we are holding the
|
1869 |
|
|
* page lock. --sct */
|
1870 |
|
|
|
1871 |
|
|
if (!buffer_jbd(bh))
|
1872 |
|
|
goto zap_buffer;
|
1873 |
|
|
|
1874 |
|
|
jh = bh2jh(bh);
|
1875 |
|
|
transaction = jh->b_transaction;
|
1876 |
|
|
if (transaction == NULL) {
|
1877 |
|
|
/* First case: not on any transaction. If it
|
1878 |
|
|
* has no checkpoint link, then we can zap it:
|
1879 |
|
|
* it's a writeback-mode buffer so we don't care
|
1880 |
|
|
* if it hits disk safely. */
|
1881 |
|
|
if (!jh->b_cp_transaction) {
|
1882 |
|
|
JBUFFER_TRACE(jh, "not on any transaction: zap");
|
1883 |
|
|
goto zap_buffer;
|
1884 |
|
|
}
|
1885 |
|
|
|
1886 |
|
|
if (!buffer_dirty(bh)) {
|
1887 |
|
|
/* bdflush has written it. We can drop it now */
|
1888 |
|
|
goto zap_buffer;
|
1889 |
|
|
}
|
1890 |
|
|
|
1891 |
|
|
/* OK, it must be in the journal but still not
|
1892 |
|
|
* written fully to disk: it's metadata or
|
1893 |
|
|
* journaled data... */
|
1894 |
|
|
|
1895 |
|
|
if (journal->j_running_transaction) {
|
1896 |
|
|
/* ... and once the current transaction has
|
1897 |
|
|
* committed, the buffer won't be needed any
|
1898 |
|
|
* longer. */
|
1899 |
|
|
JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
|
1900 |
|
|
return dispose_buffer(jh,
|
1901 |
|
|
journal->j_running_transaction);
|
1902 |
|
|
} else {
|
1903 |
|
|
/* There is no currently-running transaction. So the
|
1904 |
|
|
* orphan record which we wrote for this file must have
|
1905 |
|
|
* passed into commit. We must attach this buffer to
|
1906 |
|
|
* the committing transaction, if it exists. */
|
1907 |
|
|
if (journal->j_committing_transaction) {
|
1908 |
|
|
JBUFFER_TRACE(jh, "give to committing trans");
|
1909 |
|
|
return dispose_buffer(jh,
|
1910 |
|
|
journal->j_committing_transaction);
|
1911 |
|
|
} else {
|
1912 |
|
|
/* The orphan record's transaction has
|
1913 |
|
|
* committed. We can cleanse this buffer */
|
1914 |
|
|
clear_bit(BH_JBDDirty, &bh->b_state);
|
1915 |
|
|
goto zap_buffer;
|
1916 |
|
|
}
|
1917 |
|
|
}
|
1918 |
|
|
} else if (transaction == journal->j_committing_transaction) {
|
1919 |
|
|
/* If it is committing, we simply cannot touch it. We
|
1920 |
|
|
* can remove it's next_transaction pointer from the
|
1921 |
|
|
* running transaction if that is set, but nothing
|
1922 |
|
|
* else. */
|
1923 |
|
|
JBUFFER_TRACE(jh, "on committing transaction");
|
1924 |
|
|
set_bit(BH_Freed, &bh->b_state);
|
1925 |
|
|
if (jh->b_next_transaction) {
|
1926 |
|
|
J_ASSERT(jh->b_next_transaction ==
|
1927 |
|
|
journal->j_running_transaction);
|
1928 |
|
|
jh->b_next_transaction = NULL;
|
1929 |
|
|
}
|
1930 |
|
|
return 0;
|
1931 |
|
|
} else {
|
1932 |
|
|
/* Good, the buffer belongs to the running transaction.
|
1933 |
|
|
* We are writing our own transaction's data, not any
|
1934 |
|
|
* previous one's, so it is safe to throw it away
|
1935 |
|
|
* (remember that we expect the filesystem to have set
|
1936 |
|
|
* i_size already for this truncate so recovery will not
|
1937 |
|
|
* expose the disk blocks we are discarding here.) */
|
1938 |
|
|
J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
|
1939 |
|
|
may_free = dispose_buffer(jh, transaction);
|
1940 |
|
|
}
|
1941 |
|
|
|
1942 |
|
|
zap_buffer:
|
1943 |
|
|
if (buffer_dirty(bh))
|
1944 |
|
|
mark_buffer_clean(bh);
|
1945 |
|
|
J_ASSERT_BH(bh, !buffer_jdirty(bh));
|
1946 |
|
|
clear_bit(BH_Uptodate, &bh->b_state);
|
1947 |
|
|
clear_bit(BH_Mapped, &bh->b_state);
|
1948 |
|
|
clear_bit(BH_Req, &bh->b_state);
|
1949 |
|
|
clear_bit(BH_New, &bh->b_state);
|
1950 |
|
|
return may_free;
|
1951 |
|
|
}
|
1952 |
|
|
|
1953 |
|
|
/**
|
1954 |
|
|
* int journal_flushpage()
|
1955 |
|
|
* @journal: journal to use for flush...
|
1956 |
|
|
* @page: page to flush
|
1957 |
|
|
* @offset: length of page to flush.
|
1958 |
|
|
*
|
1959 |
|
|
* Reap page buffers containing data after offset in page.
|
1960 |
|
|
*
|
1961 |
|
|
* Return non-zero if the page's buffers were successfully reaped.
|
1962 |
|
|
*/
|
1963 |
|
|
int journal_flushpage(journal_t *journal,
|
1964 |
|
|
struct page *page,
|
1965 |
|
|
unsigned long offset)
|
1966 |
|
|
{
|
1967 |
|
|
struct buffer_head *head, *bh, *next;
|
1968 |
|
|
unsigned int curr_off = 0;
|
1969 |
|
|
int may_free = 1;
|
1970 |
|
|
|
1971 |
|
|
if (!PageLocked(page))
|
1972 |
|
|
BUG();
|
1973 |
|
|
if (!page->buffers)
|
1974 |
|
|
return 1;
|
1975 |
|
|
|
1976 |
|
|
/* We will potentially be playing with lists other than just the
|
1977 |
|
|
* data lists (especially for journaled data mode), so be
|
1978 |
|
|
* cautious in our locking. */
|
1979 |
|
|
lock_journal(journal);
|
1980 |
|
|
|
1981 |
|
|
head = bh = page->buffers;
|
1982 |
|
|
do {
|
1983 |
|
|
unsigned int next_off = curr_off + bh->b_size;
|
1984 |
|
|
next = bh->b_this_page;
|
1985 |
|
|
|
1986 |
|
|
/* AKPM: doing lock_buffer here may be overly paranoid */
|
1987 |
|
|
if (offset <= curr_off) {
|
1988 |
|
|
/* This block is wholly outside the truncation point */
|
1989 |
|
|
lock_buffer(bh);
|
1990 |
|
|
may_free &= journal_unmap_buffer(journal, bh);
|
1991 |
|
|
unlock_buffer(bh);
|
1992 |
|
|
}
|
1993 |
|
|
curr_off = next_off;
|
1994 |
|
|
bh = next;
|
1995 |
|
|
|
1996 |
|
|
} while (bh != head);
|
1997 |
|
|
|
1998 |
|
|
unlock_journal(journal);
|
1999 |
|
|
|
2000 |
|
|
if (!offset) {
|
2001 |
|
|
if (!may_free || !try_to_free_buffers(page, 0))
|
2002 |
|
|
return 0;
|
2003 |
|
|
J_ASSERT(page->buffers == NULL);
|
2004 |
|
|
}
|
2005 |
|
|
return 1;
|
2006 |
|
|
}
|
2007 |
|
|
|
2008 |
|
|
/*
|
2009 |
|
|
* File a buffer on the given transaction list.
|
2010 |
|
|
*/
|
2011 |
|
|
void __journal_file_buffer(struct journal_head *jh,
|
2012 |
|
|
transaction_t *transaction, int jlist)
|
2013 |
|
|
{
|
2014 |
|
|
struct journal_head **list = 0;
|
2015 |
|
|
int was_dirty = 0;
|
2016 |
|
|
|
2017 |
|
|
assert_spin_locked(&journal_datalist_lock);
|
2018 |
|
|
|
2019 |
|
|
J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
|
2020 |
|
|
J_ASSERT_JH(jh, jh->b_transaction == transaction ||
|
2021 |
|
|
jh->b_transaction == 0);
|
2022 |
|
|
|
2023 |
|
|
if (jh->b_transaction && jh->b_jlist == jlist)
|
2024 |
|
|
return;
|
2025 |
|
|
|
2026 |
|
|
/* The following list of buffer states needs to be consistent
|
2027 |
|
|
* with __jbd_unexpected_dirty_buffer()'s handling of dirty
|
2028 |
|
|
* state. */
|
2029 |
|
|
|
2030 |
|
|
if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
|
2031 |
|
|
jlist == BJ_Shadow || jlist == BJ_Forget) {
|
2032 |
|
|
if (atomic_set_buffer_clean(jh2bh(jh)) ||
|
2033 |
|
|
test_and_clear_bit(BH_JBDDirty, &jh2bh(jh)->b_state))
|
2034 |
|
|
was_dirty = 1;
|
2035 |
|
|
}
|
2036 |
|
|
|
2037 |
|
|
if (jh->b_transaction)
|
2038 |
|
|
__journal_unfile_buffer(jh);
|
2039 |
|
|
else
|
2040 |
|
|
jh->b_transaction = transaction;
|
2041 |
|
|
|
2042 |
|
|
switch (jlist) {
|
2043 |
|
|
case BJ_None:
|
2044 |
|
|
J_ASSERT_JH(jh, !jh->b_committed_data);
|
2045 |
|
|
J_ASSERT_JH(jh, !jh->b_frozen_data);
|
2046 |
|
|
return;
|
2047 |
|
|
case BJ_SyncData:
|
2048 |
|
|
list = &transaction->t_sync_datalist;
|
2049 |
|
|
break;
|
2050 |
|
|
case BJ_AsyncData:
|
2051 |
|
|
list = &transaction->t_async_datalist;
|
2052 |
|
|
break;
|
2053 |
|
|
case BJ_Metadata:
|
2054 |
|
|
transaction->t_nr_buffers++;
|
2055 |
|
|
list = &transaction->t_buffers;
|
2056 |
|
|
break;
|
2057 |
|
|
case BJ_Forget:
|
2058 |
|
|
list = &transaction->t_forget;
|
2059 |
|
|
break;
|
2060 |
|
|
case BJ_IO:
|
2061 |
|
|
list = &transaction->t_iobuf_list;
|
2062 |
|
|
break;
|
2063 |
|
|
case BJ_Shadow:
|
2064 |
|
|
list = &transaction->t_shadow_list;
|
2065 |
|
|
break;
|
2066 |
|
|
case BJ_LogCtl:
|
2067 |
|
|
list = &transaction->t_log_list;
|
2068 |
|
|
break;
|
2069 |
|
|
case BJ_Reserved:
|
2070 |
|
|
list = &transaction->t_reserved_list;
|
2071 |
|
|
break;
|
2072 |
|
|
}
|
2073 |
|
|
|
2074 |
|
|
__blist_add_buffer(list, jh);
|
2075 |
|
|
jh->b_jlist = jlist;
|
2076 |
|
|
|
2077 |
|
|
if (was_dirty)
|
2078 |
|
|
set_bit(BH_JBDDirty, &jh2bh(jh)->b_state);
|
2079 |
|
|
}
|
2080 |
|
|
|
2081 |
|
|
void journal_file_buffer(struct journal_head *jh,
|
2082 |
|
|
transaction_t *transaction, int jlist)
|
2083 |
|
|
{
|
2084 |
|
|
spin_lock(&journal_datalist_lock);
|
2085 |
|
|
__journal_file_buffer(jh, transaction, jlist);
|
2086 |
|
|
spin_unlock(&journal_datalist_lock);
|
2087 |
|
|
}
|
2088 |
|
|
|
2089 |
|
|
static void jbd_refile_buffer(struct buffer_head *bh)
|
2090 |
|
|
{
|
2091 |
|
|
if (buffer_dirty(bh) && (bh->b_list != BUF_DIRTY))
|
2092 |
|
|
set_buffer_flushtime(bh);
|
2093 |
|
|
refile_buffer(bh);
|
2094 |
|
|
}
|
2095 |
|
|
|
2096 |
|
|
/*
|
2097 |
|
|
* Remove a buffer from its current buffer list in preparation for
|
2098 |
|
|
* dropping it from its current transaction entirely. If the buffer has
|
2099 |
|
|
* already started to be used by a subsequent transaction, refile the
|
2100 |
|
|
* buffer on that transaction's metadata list.
|
2101 |
|
|
*/
|
2102 |
|
|
|
2103 |
|
|
void __journal_refile_buffer(struct journal_head *jh)
|
2104 |
|
|
{
|
2105 |
|
|
int was_dirty = 0;
|
2106 |
|
|
|
2107 |
|
|
assert_spin_locked(&journal_datalist_lock);
|
2108 |
|
|
/* If the buffer is now unused, just drop it. */
|
2109 |
|
|
if (jh->b_next_transaction == NULL) {
|
2110 |
|
|
__journal_unfile_buffer(jh);
|
2111 |
|
|
jh->b_transaction = NULL;
|
2112 |
|
|
/* Onto BUF_DIRTY for writeback */
|
2113 |
|
|
jbd_refile_buffer(jh2bh(jh));
|
2114 |
|
|
return;
|
2115 |
|
|
}
|
2116 |
|
|
|
2117 |
|
|
/* It has been modified by a later transaction: add it to the
|
2118 |
|
|
* new transaction's metadata list. */
|
2119 |
|
|
|
2120 |
|
|
if (test_and_clear_bit(BH_JBDDirty, &jh2bh(jh)->b_state))
|
2121 |
|
|
was_dirty = 1;
|
2122 |
|
|
|
2123 |
|
|
__journal_unfile_buffer(jh);
|
2124 |
|
|
jh->b_transaction = jh->b_next_transaction;
|
2125 |
|
|
jh->b_next_transaction = NULL;
|
2126 |
|
|
__journal_file_buffer(jh, jh->b_transaction, BJ_Metadata);
|
2127 |
|
|
J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
|
2128 |
|
|
|
2129 |
|
|
if (was_dirty)
|
2130 |
|
|
set_bit(BH_JBDDirty, &jh2bh(jh)->b_state);
|
2131 |
|
|
|
2132 |
|
|
}
|
2133 |
|
|
|
2134 |
|
|
/*
|
2135 |
|
|
* For the unlocked version of this call, also make sure that any
|
2136 |
|
|
* hanging journal_head is cleaned up if necessary.
|
2137 |
|
|
*
|
2138 |
|
|
* __journal_refile_buffer is usually called as part of a single locked
|
2139 |
|
|
* operation on a buffer_head, in which the caller is probably going to
|
2140 |
|
|
* be hooking the journal_head onto other lists. In that case it is up
|
2141 |
|
|
* to the caller to remove the journal_head if necessary. For the
|
2142 |
|
|
* unlocked journal_refile_buffer call, the caller isn't going to be
|
2143 |
|
|
* doing anything else to the buffer so we need to do the cleanup
|
2144 |
|
|
* ourselves to avoid a jh leak.
|
2145 |
|
|
*
|
2146 |
|
|
* *** The journal_head may be freed by this call! ***
|
2147 |
|
|
*/
|
2148 |
|
|
void journal_refile_buffer(struct journal_head *jh)
|
2149 |
|
|
{
|
2150 |
|
|
struct buffer_head *bh;
|
2151 |
|
|
|
2152 |
|
|
spin_lock(&journal_datalist_lock);
|
2153 |
|
|
bh = jh2bh(jh);
|
2154 |
|
|
|
2155 |
|
|
__journal_refile_buffer(jh);
|
2156 |
|
|
__journal_remove_journal_head(bh);
|
2157 |
|
|
|
2158 |
|
|
spin_unlock(&journal_datalist_lock);
|
2159 |
|
|
__brelse(bh);
|
2160 |
|
|
}
|