1 |
1275 |
phoenix |
/*
|
2 |
|
|
* linux/fs/journal.c
|
3 |
|
|
*
|
4 |
|
|
* Written by Stephen C. Tweedie <sct@redhat.com>, 1998
|
5 |
|
|
*
|
6 |
|
|
* Copyright 1998 Red Hat corp --- All Rights Reserved
|
7 |
|
|
*
|
8 |
|
|
* This file is part of the Linux kernel and is made available under
|
9 |
|
|
* the terms of the GNU General Public License, version 2, or at your
|
10 |
|
|
* option, any later version, incorporated herein by reference.
|
11 |
|
|
*
|
12 |
|
|
* Generic filesystem journal-writing code; part of the ext2fs
|
13 |
|
|
* journaling system.
|
14 |
|
|
*
|
15 |
|
|
* This file manages journals: areas of disk reserved for logging
|
16 |
|
|
* transactional updates. This includes the kernel journaling thread
|
17 |
|
|
* which is responsible for scheduling updates to the log.
|
18 |
|
|
*
|
19 |
|
|
* We do not actually manage the physical storage of the journal in this
|
20 |
|
|
* file: that is left to a per-journal policy function, which allows us
|
21 |
|
|
* to store the journal within a filesystem-specified area for ext2
|
22 |
|
|
* journaling (ext2 can use a reserved inode for storing the log).
|
23 |
|
|
*/
|
24 |
|
|
|
25 |
|
|
#include <linux/module.h>
|
26 |
|
|
#include <linux/sched.h>
|
27 |
|
|
#include <linux/fs.h>
|
28 |
|
|
#include <linux/jbd.h>
|
29 |
|
|
#include <linux/errno.h>
|
30 |
|
|
#include <linux/slab.h>
|
31 |
|
|
#include <linux/locks.h>
|
32 |
|
|
#include <linux/smp_lock.h>
|
33 |
|
|
#include <linux/sched.h>
|
34 |
|
|
#include <linux/init.h>
|
35 |
|
|
#include <linux/mm.h>
|
36 |
|
|
#include <asm/uaccess.h>
|
37 |
|
|
#include <linux/proc_fs.h>
|
38 |
|
|
|
39 |
|
|
EXPORT_SYMBOL(journal_start);
|
40 |
|
|
EXPORT_SYMBOL(journal_try_start);
|
41 |
|
|
EXPORT_SYMBOL(journal_restart);
|
42 |
|
|
EXPORT_SYMBOL(journal_extend);
|
43 |
|
|
EXPORT_SYMBOL(journal_stop);
|
44 |
|
|
EXPORT_SYMBOL(journal_lock_updates);
|
45 |
|
|
EXPORT_SYMBOL(journal_unlock_updates);
|
46 |
|
|
EXPORT_SYMBOL(journal_get_write_access);
|
47 |
|
|
EXPORT_SYMBOL(journal_get_create_access);
|
48 |
|
|
EXPORT_SYMBOL(journal_get_undo_access);
|
49 |
|
|
EXPORT_SYMBOL(journal_dirty_data);
|
50 |
|
|
EXPORT_SYMBOL(journal_dirty_metadata);
|
51 |
|
|
#if 0
|
52 |
|
|
EXPORT_SYMBOL(journal_release_buffer);
|
53 |
|
|
#endif
|
54 |
|
|
EXPORT_SYMBOL(journal_forget);
|
55 |
|
|
#if 0
|
56 |
|
|
EXPORT_SYMBOL(journal_sync_buffer);
|
57 |
|
|
#endif
|
58 |
|
|
EXPORT_SYMBOL(journal_flush);
|
59 |
|
|
EXPORT_SYMBOL(journal_revoke);
|
60 |
|
|
EXPORT_SYMBOL(journal_callback_set);
|
61 |
|
|
|
62 |
|
|
EXPORT_SYMBOL(journal_init_dev);
|
63 |
|
|
EXPORT_SYMBOL(journal_init_inode);
|
64 |
|
|
EXPORT_SYMBOL(journal_update_format);
|
65 |
|
|
EXPORT_SYMBOL(journal_check_used_features);
|
66 |
|
|
EXPORT_SYMBOL(journal_check_available_features);
|
67 |
|
|
EXPORT_SYMBOL(journal_set_features);
|
68 |
|
|
EXPORT_SYMBOL(journal_create);
|
69 |
|
|
EXPORT_SYMBOL(journal_load);
|
70 |
|
|
EXPORT_SYMBOL(journal_destroy);
|
71 |
|
|
EXPORT_SYMBOL(journal_recover);
|
72 |
|
|
EXPORT_SYMBOL(journal_update_superblock);
|
73 |
|
|
EXPORT_SYMBOL(journal_abort);
|
74 |
|
|
EXPORT_SYMBOL(journal_errno);
|
75 |
|
|
EXPORT_SYMBOL(journal_ack_err);
|
76 |
|
|
EXPORT_SYMBOL(journal_clear_err);
|
77 |
|
|
EXPORT_SYMBOL(log_wait_commit);
|
78 |
|
|
EXPORT_SYMBOL(log_start_commit);
|
79 |
|
|
EXPORT_SYMBOL(journal_wipe);
|
80 |
|
|
EXPORT_SYMBOL(journal_blocks_per_page);
|
81 |
|
|
EXPORT_SYMBOL(journal_flushpage);
|
82 |
|
|
EXPORT_SYMBOL(journal_try_to_free_buffers);
|
83 |
|
|
EXPORT_SYMBOL(journal_bmap);
|
84 |
|
|
EXPORT_SYMBOL(journal_force_commit);
|
85 |
|
|
|
86 |
|
|
static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
|
87 |
|
|
|
88 |
|
|
/*
|
89 |
|
|
* journal_datalist_lock is used to protect data buffers:
|
90 |
|
|
*
|
91 |
|
|
* bh->b_transaction
|
92 |
|
|
* bh->b_tprev
|
93 |
|
|
* bh->b_tnext
|
94 |
|
|
*
|
95 |
|
|
* journal_free_buffer() is called from journal_try_to_free_buffer(), and is
|
96 |
|
|
* async wrt everything else.
|
97 |
|
|
*
|
98 |
|
|
* It is also used for checkpoint data, also to protect against
|
99 |
|
|
* journal_try_to_free_buffer():
|
100 |
|
|
*
|
101 |
|
|
* bh->b_cp_transaction
|
102 |
|
|
* bh->b_cpnext
|
103 |
|
|
* bh->b_cpprev
|
104 |
|
|
* transaction->t_checkpoint_list
|
105 |
|
|
* transaction->t_cpnext
|
106 |
|
|
* transaction->t_cpprev
|
107 |
|
|
* journal->j_checkpoint_transactions
|
108 |
|
|
*
|
109 |
|
|
* It is global at this time rather than per-journal because it's
|
110 |
|
|
* impossible for __journal_free_buffer to go from a buffer_head
|
111 |
|
|
* back to a journal_t unracily (well, not true. Fix later)
|
112 |
|
|
*
|
113 |
|
|
*
|
114 |
|
|
* The `datalist' and `checkpoint list' functions are quite
|
115 |
|
|
* separate and we could use two spinlocks here.
|
116 |
|
|
*
|
117 |
|
|
* lru_list_lock nests inside journal_datalist_lock.
|
118 |
|
|
*/
|
119 |
|
|
spinlock_t journal_datalist_lock = SPIN_LOCK_UNLOCKED;
|
120 |
|
|
|
121 |
|
|
/*
|
122 |
|
|
* jh_splice_lock needs explantion.
|
123 |
|
|
*
|
124 |
|
|
* In a number of places we want to do things like:
|
125 |
|
|
*
|
126 |
|
|
* if (buffer_jbd(bh) && bh2jh(bh)->foo)
|
127 |
|
|
*
|
128 |
|
|
* This is racy on SMP, because another CPU could remove the journal_head
|
129 |
|
|
* in the middle of this expression. We need locking.
|
130 |
|
|
*
|
131 |
|
|
* But we can greatly optimise the locking cost by testing BH_JBD
|
132 |
|
|
* outside the lock. So, effectively:
|
133 |
|
|
*
|
134 |
|
|
* ret = 0;
|
135 |
|
|
* if (buffer_jbd(bh)) {
|
136 |
|
|
* spin_lock(&jh_splice_lock);
|
137 |
|
|
* if (buffer_jbd(bh)) { (* Still there? *)
|
138 |
|
|
* ret = bh2jh(bh)->foo;
|
139 |
|
|
* }
|
140 |
|
|
* spin_unlock(&jh_splice_lock);
|
141 |
|
|
* }
|
142 |
|
|
* return ret;
|
143 |
|
|
*
|
144 |
|
|
* Now, that protects us from races where another CPU can remove the
|
145 |
|
|
* journal_head. But it doesn't defend us from the situation where another
|
146 |
|
|
* CPU can *add* a journal_head. This is a correctness issue. But it's not
|
147 |
|
|
* a problem because a) the calling code was *already* racy and b) it often
|
148 |
|
|
* can't happen at the call site and c) the places where we add journal_heads
|
149 |
|
|
* tend to be under external locking.
|
150 |
|
|
*/
|
151 |
|
|
spinlock_t jh_splice_lock = SPIN_LOCK_UNLOCKED;
|
152 |
|
|
|
153 |
|
|
/*
|
154 |
|
|
* List of all journals in the system. Protected by the BKL.
|
155 |
|
|
*/
|
156 |
|
|
static LIST_HEAD(all_journals);
|
157 |
|
|
|
158 |
|
|
/*
|
159 |
|
|
* Helper function used to manage commit timeouts
|
160 |
|
|
*/
|
161 |
|
|
|
162 |
|
|
static void commit_timeout(unsigned long __data)
|
163 |
|
|
{
|
164 |
|
|
struct task_struct * p = (struct task_struct *) __data;
|
165 |
|
|
|
166 |
|
|
wake_up_process(p);
|
167 |
|
|
}
|
168 |
|
|
|
169 |
|
|
/* Static check for data structure consistency. There's no code
|
170 |
|
|
* invoked --- we'll just get a linker failure if things aren't right.
|
171 |
|
|
*/
|
172 |
|
|
void __journal_internal_check(void)
|
173 |
|
|
{
|
174 |
|
|
extern void journal_bad_superblock_size(void);
|
175 |
|
|
if (sizeof(struct journal_superblock_s) != 1024)
|
176 |
|
|
journal_bad_superblock_size();
|
177 |
|
|
}
|
178 |
|
|
|
179 |
|
|
/*
|
180 |
|
|
* kjournald: The main thread function used to manage a logging device
|
181 |
|
|
* journal.
|
182 |
|
|
*
|
183 |
|
|
* This kernel thread is responsible for two things:
|
184 |
|
|
*
|
185 |
|
|
* 1) COMMIT: Every so often we need to commit the current state of the
|
186 |
|
|
* filesystem to disk. The journal thread is responsible for writing
|
187 |
|
|
* all of the metadata buffers to disk.
|
188 |
|
|
*
|
189 |
|
|
* 2) CHECKPOINT: We cannot reuse a used section of the log file until all
|
190 |
|
|
* of the data in that part of the log has been rewritten elsewhere on
|
191 |
|
|
* the disk. Flushing these old buffers to reclaim space in the log is
|
192 |
|
|
* known as checkpointing, and this thread is responsible for that job.
|
193 |
|
|
*/
|
194 |
|
|
|
195 |
|
|
journal_t *current_journal; // AKPM: debug
|
196 |
|
|
|
197 |
|
|
int kjournald(void *arg)
|
198 |
|
|
{
|
199 |
|
|
journal_t *journal = (journal_t *) arg;
|
200 |
|
|
transaction_t *transaction;
|
201 |
|
|
struct timer_list timer;
|
202 |
|
|
|
203 |
|
|
current_journal = journal;
|
204 |
|
|
|
205 |
|
|
lock_kernel();
|
206 |
|
|
daemonize();
|
207 |
|
|
reparent_to_init();
|
208 |
|
|
spin_lock_irq(¤t->sigmask_lock);
|
209 |
|
|
sigfillset(¤t->blocked);
|
210 |
|
|
recalc_sigpending(current);
|
211 |
|
|
spin_unlock_irq(¤t->sigmask_lock);
|
212 |
|
|
|
213 |
|
|
sprintf(current->comm, "kjournald");
|
214 |
|
|
|
215 |
|
|
/* Set up an interval timer which can be used to trigger a
|
216 |
|
|
commit wakeup after the commit interval expires */
|
217 |
|
|
init_timer(&timer);
|
218 |
|
|
timer.data = (unsigned long) current;
|
219 |
|
|
timer.function = commit_timeout;
|
220 |
|
|
journal->j_commit_timer = &timer;
|
221 |
|
|
|
222 |
|
|
/* Record that the journal thread is running */
|
223 |
|
|
journal->j_task = current;
|
224 |
|
|
wake_up(&journal->j_wait_done_commit);
|
225 |
|
|
|
226 |
|
|
printk(KERN_INFO "kjournald starting. Commit interval %ld seconds\n",
|
227 |
|
|
journal->j_commit_interval / HZ);
|
228 |
|
|
list_add(&journal->j_all_journals, &all_journals);
|
229 |
|
|
|
230 |
|
|
/* And now, wait forever for commit wakeup events. */
|
231 |
|
|
while (1) {
|
232 |
|
|
if (journal->j_flags & JFS_UNMOUNT)
|
233 |
|
|
break;
|
234 |
|
|
|
235 |
|
|
jbd_debug(1, "commit_sequence=%d, commit_request=%d\n",
|
236 |
|
|
journal->j_commit_sequence, journal->j_commit_request);
|
237 |
|
|
|
238 |
|
|
if (journal->j_commit_sequence != journal->j_commit_request) {
|
239 |
|
|
jbd_debug(1, "OK, requests differ\n");
|
240 |
|
|
if (journal->j_commit_timer_active) {
|
241 |
|
|
journal->j_commit_timer_active = 0;
|
242 |
|
|
del_timer(journal->j_commit_timer);
|
243 |
|
|
}
|
244 |
|
|
|
245 |
|
|
journal_commit_transaction(journal);
|
246 |
|
|
continue;
|
247 |
|
|
}
|
248 |
|
|
|
249 |
|
|
wake_up(&journal->j_wait_done_commit);
|
250 |
|
|
interruptible_sleep_on(&journal->j_wait_commit);
|
251 |
|
|
|
252 |
|
|
jbd_debug(1, "kjournald wakes\n");
|
253 |
|
|
|
254 |
|
|
/* Were we woken up by a commit wakeup event? */
|
255 |
|
|
if ((transaction = journal->j_running_transaction) != NULL &&
|
256 |
|
|
journal->j_commit_interval &&
|
257 |
|
|
time_after_eq(jiffies, transaction->t_expires)) {
|
258 |
|
|
journal->j_commit_request = transaction->t_tid;
|
259 |
|
|
jbd_debug(1, "woke because of timeout\n");
|
260 |
|
|
}
|
261 |
|
|
}
|
262 |
|
|
|
263 |
|
|
if (journal->j_commit_timer_active) {
|
264 |
|
|
journal->j_commit_timer_active = 0;
|
265 |
|
|
del_timer_sync(journal->j_commit_timer);
|
266 |
|
|
}
|
267 |
|
|
|
268 |
|
|
list_del(&journal->j_all_journals);
|
269 |
|
|
|
270 |
|
|
journal->j_task = NULL;
|
271 |
|
|
wake_up(&journal->j_wait_done_commit);
|
272 |
|
|
unlock_kernel();
|
273 |
|
|
jbd_debug(1, "Journal thread exiting.\n");
|
274 |
|
|
return 0;
|
275 |
|
|
}
|
276 |
|
|
|
277 |
|
|
static void journal_start_thread(journal_t *journal)
|
278 |
|
|
{
|
279 |
|
|
kernel_thread(kjournald, (void *) journal,
|
280 |
|
|
CLONE_VM | CLONE_FS | CLONE_FILES);
|
281 |
|
|
while (!journal->j_task)
|
282 |
|
|
sleep_on(&journal->j_wait_done_commit);
|
283 |
|
|
}
|
284 |
|
|
|
285 |
|
|
static void journal_kill_thread(journal_t *journal)
|
286 |
|
|
{
|
287 |
|
|
journal->j_flags |= JFS_UNMOUNT;
|
288 |
|
|
|
289 |
|
|
while (journal->j_task) {
|
290 |
|
|
wake_up(&journal->j_wait_commit);
|
291 |
|
|
sleep_on(&journal->j_wait_done_commit);
|
292 |
|
|
}
|
293 |
|
|
}
|
294 |
|
|
|
295 |
|
|
#if 0
|
296 |
|
|
|
297 |
|
|
This is no longer needed - we do it in commit quite efficiently.
|
298 |
|
|
Note that if this function is resurrected, the loop needs to
|
299 |
|
|
be reorganised into the next_jh/last_jh algorithm.
|
300 |
|
|
|
301 |
|
|
/*
|
302 |
|
|
* journal_clean_data_list: cleanup after data IO.
|
303 |
|
|
*
|
304 |
|
|
* Once the IO system has finished writing the buffers on the transaction's
|
305 |
|
|
* data list, we can remove those buffers from the list. This function
|
306 |
|
|
* scans the list for such buffers and removes them cleanly.
|
307 |
|
|
*
|
308 |
|
|
* We assume that the journal is already locked.
|
309 |
|
|
* We are called with journal_datalist_lock held.
|
310 |
|
|
*
|
311 |
|
|
* AKPM: This function looks inefficient. Approximately O(n^2)
|
312 |
|
|
* for potentially thousands of buffers. It no longer shows on profiles
|
313 |
|
|
* because these buffers are mainly dropped in journal_commit_transaction().
|
314 |
|
|
*/
|
315 |
|
|
|
316 |
|
|
void __journal_clean_data_list(transaction_t *transaction)
|
317 |
|
|
{
|
318 |
|
|
struct journal_head *jh, *next;
|
319 |
|
|
|
320 |
|
|
assert_spin_locked(&journal_datalist_lock);
|
321 |
|
|
|
322 |
|
|
restart:
|
323 |
|
|
jh = transaction->t_sync_datalist;
|
324 |
|
|
if (!jh)
|
325 |
|
|
goto out;
|
326 |
|
|
do {
|
327 |
|
|
next = jh->b_tnext;
|
328 |
|
|
if (!buffer_locked(jh2bh(jh)) && !buffer_dirty(jh2bh(jh))) {
|
329 |
|
|
struct buffer_head *bh = jh2bh(jh);
|
330 |
|
|
BUFFER_TRACE(bh, "data writeout complete: unfile");
|
331 |
|
|
__journal_unfile_buffer(jh);
|
332 |
|
|
jh->b_transaction = NULL;
|
333 |
|
|
__journal_remove_journal_head(bh);
|
334 |
|
|
refile_buffer(bh);
|
335 |
|
|
__brelse(bh);
|
336 |
|
|
goto restart;
|
337 |
|
|
}
|
338 |
|
|
jh = next;
|
339 |
|
|
} while (transaction->t_sync_datalist &&
|
340 |
|
|
jh != transaction->t_sync_datalist);
|
341 |
|
|
out:
|
342 |
|
|
return;
|
343 |
|
|
}
|
344 |
|
|
#endif
|
345 |
|
|
|
346 |
|
|
/*
|
347 |
|
|
* journal_write_metadata_buffer: write a metadata buffer to the journal.
|
348 |
|
|
*
|
349 |
|
|
* Writes a metadata buffer to a given disk block. The actual IO is not
|
350 |
|
|
* performed but a new buffer_head is constructed which labels the data
|
351 |
|
|
* to be written with the correct destination disk block.
|
352 |
|
|
*
|
353 |
|
|
* Any magic-number escaping which needs to be done will cause a
|
354 |
|
|
* copy-out here. If the buffer happens to start with the
|
355 |
|
|
* JFS_MAGIC_NUMBER, then we can't write it to the log directly: the
|
356 |
|
|
* magic number is only written to the log for descripter blocks. In
|
357 |
|
|
* this case, we copy the data and replace the first word with 0, and we
|
358 |
|
|
* return a result code which indicates that this buffer needs to be
|
359 |
|
|
* marked as an escaped buffer in the corresponding log descriptor
|
360 |
|
|
* block. The missing word can then be restored when the block is read
|
361 |
|
|
* during recovery.
|
362 |
|
|
*
|
363 |
|
|
* If the source buffer has already been modified by a new transaction
|
364 |
|
|
* since we took the last commit snapshot, we use the frozen copy of
|
365 |
|
|
* that data for IO. If we end up using the existing buffer_head's data
|
366 |
|
|
* for the write, then we *have* to lock the buffer to prevent anyone
|
367 |
|
|
* else from using and possibly modifying it while the IO is in
|
368 |
|
|
* progress.
|
369 |
|
|
*
|
370 |
|
|
* The function returns a pointer to the buffer_heads to be used for IO.
|
371 |
|
|
*
|
372 |
|
|
* We assume that the journal has already been locked in this function.
|
373 |
|
|
*
|
374 |
|
|
* Return value:
|
375 |
|
|
* <0: Error
|
376 |
|
|
* >=0: Finished OK
|
377 |
|
|
*
|
378 |
|
|
* On success:
|
379 |
|
|
* Bit 0 set == escape performed on the data
|
380 |
|
|
* Bit 1 set == buffer copy-out performed (kfree the data after IO)
|
381 |
|
|
*/
|
382 |
|
|
|
383 |
|
|
static inline unsigned long virt_to_offset(void *p)
|
384 |
|
|
{return ((unsigned long) p) & ~PAGE_MASK;}
|
385 |
|
|
|
386 |
|
|
int journal_write_metadata_buffer(transaction_t *transaction,
|
387 |
|
|
struct journal_head *jh_in,
|
388 |
|
|
struct journal_head **jh_out,
|
389 |
|
|
int blocknr)
|
390 |
|
|
{
|
391 |
|
|
int need_copy_out = 0;
|
392 |
|
|
int done_copy_out = 0;
|
393 |
|
|
int do_escape = 0;
|
394 |
|
|
char *mapped_data;
|
395 |
|
|
struct buffer_head *new_bh;
|
396 |
|
|
struct journal_head * new_jh;
|
397 |
|
|
struct page *new_page;
|
398 |
|
|
unsigned int new_offset;
|
399 |
|
|
|
400 |
|
|
/*
|
401 |
|
|
* The buffer really shouldn't be locked: only the current committing
|
402 |
|
|
* transaction is allowed to write it, so nobody else is allowed
|
403 |
|
|
* to do any IO.
|
404 |
|
|
*
|
405 |
|
|
* akpm: except if we're journalling data, and write() output is
|
406 |
|
|
* also part of a shared mapping, and another thread has
|
407 |
|
|
* decided to launch a writepage() against this buffer.
|
408 |
|
|
*/
|
409 |
|
|
J_ASSERT_JH(jh_in, buffer_jdirty(jh2bh(jh_in)));
|
410 |
|
|
|
411 |
|
|
/*
|
412 |
|
|
* If a new transaction has already done a buffer copy-out, then
|
413 |
|
|
* we use that version of the data for the commit.
|
414 |
|
|
*/
|
415 |
|
|
|
416 |
|
|
if (jh_in->b_frozen_data) {
|
417 |
|
|
done_copy_out = 1;
|
418 |
|
|
new_page = virt_to_page(jh_in->b_frozen_data);
|
419 |
|
|
new_offset = virt_to_offset(jh_in->b_frozen_data);
|
420 |
|
|
} else {
|
421 |
|
|
new_page = jh2bh(jh_in)->b_page;
|
422 |
|
|
new_offset = virt_to_offset(jh2bh(jh_in)->b_data);
|
423 |
|
|
}
|
424 |
|
|
|
425 |
|
|
mapped_data = ((char *) kmap(new_page)) + new_offset;
|
426 |
|
|
|
427 |
|
|
/*
|
428 |
|
|
* Check for escaping
|
429 |
|
|
*/
|
430 |
|
|
if (* ((unsigned int *) mapped_data) == htonl(JFS_MAGIC_NUMBER)) {
|
431 |
|
|
need_copy_out = 1;
|
432 |
|
|
do_escape = 1;
|
433 |
|
|
}
|
434 |
|
|
|
435 |
|
|
/*
|
436 |
|
|
* Do we need to do a data copy?
|
437 |
|
|
*/
|
438 |
|
|
|
439 |
|
|
if (need_copy_out && !done_copy_out) {
|
440 |
|
|
char *tmp;
|
441 |
|
|
tmp = jbd_rep_kmalloc(jh2bh(jh_in)->b_size, GFP_NOFS);
|
442 |
|
|
|
443 |
|
|
jh_in->b_frozen_data = tmp;
|
444 |
|
|
memcpy (tmp, mapped_data, jh2bh(jh_in)->b_size);
|
445 |
|
|
|
446 |
|
|
/* If we get to this path, we'll always need the new
|
447 |
|
|
address kmapped so that we can clear the escaped
|
448 |
|
|
magic number below. */
|
449 |
|
|
kunmap(new_page);
|
450 |
|
|
new_page = virt_to_page(tmp);
|
451 |
|
|
new_offset = virt_to_offset(tmp);
|
452 |
|
|
mapped_data = ((char *) kmap(new_page)) + new_offset;
|
453 |
|
|
|
454 |
|
|
done_copy_out = 1;
|
455 |
|
|
}
|
456 |
|
|
|
457 |
|
|
/*
|
458 |
|
|
* Right, time to make up the new buffer_head.
|
459 |
|
|
*/
|
460 |
|
|
do {
|
461 |
|
|
new_bh = get_unused_buffer_head(0);
|
462 |
|
|
if (!new_bh) {
|
463 |
|
|
printk (KERN_NOTICE "%s: ENOMEM at "
|
464 |
|
|
"get_unused_buffer_head, trying again.\n",
|
465 |
|
|
__FUNCTION__);
|
466 |
|
|
yield();
|
467 |
|
|
}
|
468 |
|
|
} while (!new_bh);
|
469 |
|
|
/* keep subsequent assertions sane */
|
470 |
|
|
new_bh->b_prev_free = 0;
|
471 |
|
|
new_bh->b_next_free = 0;
|
472 |
|
|
new_bh->b_state = 0;
|
473 |
|
|
init_buffer(new_bh, NULL, NULL);
|
474 |
|
|
atomic_set(&new_bh->b_count, 1);
|
475 |
|
|
new_jh = journal_add_journal_head(new_bh);
|
476 |
|
|
|
477 |
|
|
set_bh_page(new_bh, new_page, new_offset);
|
478 |
|
|
|
479 |
|
|
new_jh->b_transaction = NULL;
|
480 |
|
|
new_bh->b_size = jh2bh(jh_in)->b_size;
|
481 |
|
|
new_bh->b_dev = transaction->t_journal->j_dev;
|
482 |
|
|
new_bh->b_blocknr = blocknr;
|
483 |
|
|
new_bh->b_state |= (1 << BH_Mapped) | (1 << BH_Dirty);
|
484 |
|
|
|
485 |
|
|
*jh_out = new_jh;
|
486 |
|
|
|
487 |
|
|
/*
|
488 |
|
|
* Did we need to do an escaping? Now we've done all the
|
489 |
|
|
* copying, we can finally do so.
|
490 |
|
|
*/
|
491 |
|
|
|
492 |
|
|
if (do_escape)
|
493 |
|
|
* ((unsigned int *) mapped_data) = 0;
|
494 |
|
|
kunmap(new_page);
|
495 |
|
|
|
496 |
|
|
/*
|
497 |
|
|
* The to-be-written buffer needs to get moved to the io queue,
|
498 |
|
|
* and the original buffer whose contents we are shadowing or
|
499 |
|
|
* copying is moved to the transaction's shadow queue.
|
500 |
|
|
*/
|
501 |
|
|
JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
|
502 |
|
|
journal_file_buffer(jh_in, transaction, BJ_Shadow);
|
503 |
|
|
JBUFFER_TRACE(new_jh, "file as BJ_IO");
|
504 |
|
|
journal_file_buffer(new_jh, transaction, BJ_IO);
|
505 |
|
|
|
506 |
|
|
return do_escape | (done_copy_out << 1);
|
507 |
|
|
}
|
508 |
|
|
|
509 |
|
|
/*
|
510 |
|
|
* Allocation code for the journal file. Manage the space left in the
|
511 |
|
|
* journal, so that we can begin checkpointing when appropriate.
|
512 |
|
|
*/
|
513 |
|
|
|
514 |
|
|
/*
|
515 |
|
|
* log_space_left: Return the number of free blocks left in the journal.
|
516 |
|
|
*
|
517 |
|
|
* Called with the journal already locked.
|
518 |
|
|
*/
|
519 |
|
|
|
520 |
|
|
int log_space_left (journal_t *journal)
|
521 |
|
|
{
|
522 |
|
|
int left = journal->j_free;
|
523 |
|
|
|
524 |
|
|
/* Be pessimistic here about the number of those free blocks
|
525 |
|
|
* which might be required for log descriptor control blocks. */
|
526 |
|
|
|
527 |
|
|
#define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */
|
528 |
|
|
|
529 |
|
|
left -= MIN_LOG_RESERVED_BLOCKS;
|
530 |
|
|
|
531 |
|
|
if (left <= 0)
|
532 |
|
|
return 0;
|
533 |
|
|
left -= (left >> 3);
|
534 |
|
|
return left;
|
535 |
|
|
}
|
536 |
|
|
|
537 |
|
|
/*
|
538 |
|
|
* This function must be non-allocating for PF_MEMALLOC tasks
|
539 |
|
|
*/
|
540 |
|
|
tid_t log_start_commit (journal_t *journal, transaction_t *transaction)
|
541 |
|
|
{
|
542 |
|
|
tid_t target = journal->j_commit_request;
|
543 |
|
|
|
544 |
|
|
lock_kernel(); /* Protect journal->j_running_transaction */
|
545 |
|
|
|
546 |
|
|
/*
|
547 |
|
|
* A NULL transaction asks us to commit the currently running
|
548 |
|
|
* transaction, if there is one.
|
549 |
|
|
*/
|
550 |
|
|
if (transaction)
|
551 |
|
|
target = transaction->t_tid;
|
552 |
|
|
else {
|
553 |
|
|
transaction = journal->j_running_transaction;
|
554 |
|
|
if (!transaction)
|
555 |
|
|
goto out;
|
556 |
|
|
target = transaction->t_tid;
|
557 |
|
|
}
|
558 |
|
|
|
559 |
|
|
/*
|
560 |
|
|
* Are we already doing a recent enough commit?
|
561 |
|
|
*/
|
562 |
|
|
if (tid_geq(journal->j_commit_request, target))
|
563 |
|
|
goto out;
|
564 |
|
|
|
565 |
|
|
/*
|
566 |
|
|
* We want a new commit: OK, mark the request and wakup the
|
567 |
|
|
* commit thread. We do _not_ do the commit ourselves.
|
568 |
|
|
*/
|
569 |
|
|
|
570 |
|
|
journal->j_commit_request = target;
|
571 |
|
|
jbd_debug(1, "JBD: requesting commit %d/%d\n",
|
572 |
|
|
journal->j_commit_request,
|
573 |
|
|
journal->j_commit_sequence);
|
574 |
|
|
wake_up(&journal->j_wait_commit);
|
575 |
|
|
|
576 |
|
|
out:
|
577 |
|
|
unlock_kernel();
|
578 |
|
|
return target;
|
579 |
|
|
}
|
580 |
|
|
|
581 |
|
|
/*
|
582 |
|
|
* Wait for a specified commit to complete.
|
583 |
|
|
* The caller may not hold the journal lock.
|
584 |
|
|
*/
|
585 |
|
|
void log_wait_commit (journal_t *journal, tid_t tid)
|
586 |
|
|
{
|
587 |
|
|
lock_kernel();
|
588 |
|
|
#ifdef CONFIG_JBD_DEBUG
|
589 |
|
|
lock_journal(journal);
|
590 |
|
|
if (!tid_geq(journal->j_commit_request, tid)) {
|
591 |
|
|
printk(KERN_EMERG "%s: error: j_commit_request=%d, tid=%d\n",
|
592 |
|
|
__FUNCTION__, journal->j_commit_request, tid);
|
593 |
|
|
}
|
594 |
|
|
unlock_journal(journal);
|
595 |
|
|
#endif
|
596 |
|
|
while (tid_gt(tid, journal->j_commit_sequence)) {
|
597 |
|
|
jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
|
598 |
|
|
tid, journal->j_commit_sequence);
|
599 |
|
|
wake_up(&journal->j_wait_commit);
|
600 |
|
|
sleep_on(&journal->j_wait_done_commit);
|
601 |
|
|
}
|
602 |
|
|
unlock_kernel();
|
603 |
|
|
}
|
604 |
|
|
|
605 |
|
|
/*
|
606 |
|
|
* Log buffer allocation routines:
|
607 |
|
|
*/
|
608 |
|
|
|
609 |
|
|
int journal_next_log_block(journal_t *journal, unsigned long *retp)
|
610 |
|
|
{
|
611 |
|
|
unsigned long blocknr;
|
612 |
|
|
|
613 |
|
|
J_ASSERT(journal->j_free > 1);
|
614 |
|
|
|
615 |
|
|
blocknr = journal->j_head;
|
616 |
|
|
journal->j_head++;
|
617 |
|
|
journal->j_free--;
|
618 |
|
|
if (journal->j_head == journal->j_last)
|
619 |
|
|
journal->j_head = journal->j_first;
|
620 |
|
|
return journal_bmap(journal, blocknr, retp);
|
621 |
|
|
}
|
622 |
|
|
|
623 |
|
|
/*
|
624 |
|
|
* Conversion of logical to physical block numbers for the journal
|
625 |
|
|
*
|
626 |
|
|
* On external journals the journal blocks are identity-mapped, so
|
627 |
|
|
* this is a no-op. If needed, we can use j_blk_offset - everything is
|
628 |
|
|
* ready.
|
629 |
|
|
*/
|
630 |
|
|
int journal_bmap(journal_t *journal, unsigned long blocknr,
|
631 |
|
|
unsigned long *retp)
|
632 |
|
|
{
|
633 |
|
|
int err = 0;
|
634 |
|
|
unsigned long ret;
|
635 |
|
|
|
636 |
|
|
if (journal->j_inode) {
|
637 |
|
|
ret = bmap(journal->j_inode, blocknr);
|
638 |
|
|
if (ret)
|
639 |
|
|
*retp = ret;
|
640 |
|
|
else {
|
641 |
|
|
printk (KERN_ALERT "%s: journal block not found "
|
642 |
|
|
"at offset %lu on %s\n", __FUNCTION__,
|
643 |
|
|
blocknr, bdevname(journal->j_dev));
|
644 |
|
|
err = -EIO;
|
645 |
|
|
__journal_abort_soft(journal, err);
|
646 |
|
|
}
|
647 |
|
|
} else {
|
648 |
|
|
*retp = blocknr; /* +journal->j_blk_offset */
|
649 |
|
|
}
|
650 |
|
|
return err;
|
651 |
|
|
}
|
652 |
|
|
|
653 |
|
|
/*
|
654 |
|
|
* We play buffer_head aliasing tricks to write data/metadata blocks to
|
655 |
|
|
* the journal without copying their contents, but for journal
|
656 |
|
|
* descriptor blocks we do need to generate bona fide buffers.
|
657 |
|
|
*
|
658 |
|
|
* We return a jh whose bh is locked and ready to be populated.
|
659 |
|
|
*/
|
660 |
|
|
|
661 |
|
|
struct journal_head * journal_get_descriptor_buffer(journal_t *journal)
|
662 |
|
|
{
|
663 |
|
|
struct buffer_head *bh;
|
664 |
|
|
unsigned long blocknr;
|
665 |
|
|
int err;
|
666 |
|
|
|
667 |
|
|
err = journal_next_log_block(journal, &blocknr);
|
668 |
|
|
|
669 |
|
|
if (err)
|
670 |
|
|
return NULL;
|
671 |
|
|
|
672 |
|
|
bh = getblk(journal->j_dev, blocknr, journal->j_blocksize);
|
673 |
|
|
lock_buffer(bh);
|
674 |
|
|
memset(bh->b_data, 0, journal->j_blocksize);
|
675 |
|
|
BUFFER_TRACE(bh, "return this buffer");
|
676 |
|
|
return journal_add_journal_head(bh);
|
677 |
|
|
}
|
678 |
|
|
|
679 |
|
|
/*
|
680 |
|
|
* Management for journal control blocks: functions to create and
|
681 |
|
|
* destroy journal_t structures, and to initialise and read existing
|
682 |
|
|
* journal blocks from disk. */
|
683 |
|
|
|
684 |
|
|
/* First: create and setup a journal_t object in memory. We initialise
|
685 |
|
|
* very few fields yet: that has to wait until we have created the
|
686 |
|
|
* journal structures from from scratch, or loaded them from disk. */
|
687 |
|
|
|
688 |
|
|
static journal_t * journal_init_common (void)
|
689 |
|
|
{
|
690 |
|
|
journal_t *journal;
|
691 |
|
|
int err;
|
692 |
|
|
|
693 |
|
|
MOD_INC_USE_COUNT;
|
694 |
|
|
|
695 |
|
|
journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL);
|
696 |
|
|
if (!journal)
|
697 |
|
|
goto fail;
|
698 |
|
|
memset(journal, 0, sizeof(*journal));
|
699 |
|
|
|
700 |
|
|
init_waitqueue_head(&journal->j_wait_transaction_locked);
|
701 |
|
|
init_waitqueue_head(&journal->j_wait_logspace);
|
702 |
|
|
init_waitqueue_head(&journal->j_wait_done_commit);
|
703 |
|
|
init_waitqueue_head(&journal->j_wait_checkpoint);
|
704 |
|
|
init_waitqueue_head(&journal->j_wait_commit);
|
705 |
|
|
init_waitqueue_head(&journal->j_wait_updates);
|
706 |
|
|
init_MUTEX(&journal->j_barrier);
|
707 |
|
|
init_MUTEX(&journal->j_checkpoint_sem);
|
708 |
|
|
init_MUTEX(&journal->j_sem);
|
709 |
|
|
|
710 |
|
|
journal->j_commit_interval = get_buffer_flushtime();
|
711 |
|
|
|
712 |
|
|
/* The journal is marked for error until we succeed with recovery! */
|
713 |
|
|
journal->j_flags = JFS_ABORT;
|
714 |
|
|
|
715 |
|
|
/* Set up a default-sized revoke table for the new mount. */
|
716 |
|
|
err = journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
|
717 |
|
|
if (err) {
|
718 |
|
|
kfree(journal);
|
719 |
|
|
goto fail;
|
720 |
|
|
}
|
721 |
|
|
return journal;
|
722 |
|
|
fail:
|
723 |
|
|
MOD_DEC_USE_COUNT;
|
724 |
|
|
return NULL;
|
725 |
|
|
}
|
726 |
|
|
|
727 |
|
|
/* journal_init_dev and journal_init_inode:
|
728 |
|
|
*
|
729 |
|
|
* Create a journal structure assigned some fixed set of disk blocks to
|
730 |
|
|
* the journal. We don't actually touch those disk blocks yet, but we
|
731 |
|
|
* need to set up all of the mapping information to tell the journaling
|
732 |
|
|
* system where the journal blocks are.
|
733 |
|
|
*
|
734 |
|
|
*/
|
735 |
|
|
|
736 |
|
|
/**
|
737 |
|
|
* journal_t * journal_init_dev() - creates an initialises a journal structure
|
738 |
|
|
* @kdev: Block device on which to create the journal
|
739 |
|
|
* @fs_dev: Device which hold journalled filesystem for this journal.
|
740 |
|
|
* @start: Block nr Start of journal.
|
741 |
|
|
* @len: Lenght of the journal in blocks.
|
742 |
|
|
* @blocksize: blocksize of journalling device
|
743 |
|
|
* @returns: a newly created journal_t *
|
744 |
|
|
*
|
745 |
|
|
* journal_init_dev creates a journal which maps a fixed contiguous
|
746 |
|
|
* range of blocks on an arbitrary block device.
|
747 |
|
|
*
|
748 |
|
|
*/
|
749 |
|
|
journal_t * journal_init_dev(kdev_t dev, kdev_t fs_dev,
|
750 |
|
|
int start, int len, int blocksize)
|
751 |
|
|
{
|
752 |
|
|
journal_t *journal = journal_init_common();
|
753 |
|
|
struct buffer_head *bh;
|
754 |
|
|
|
755 |
|
|
if (!journal)
|
756 |
|
|
return NULL;
|
757 |
|
|
|
758 |
|
|
journal->j_dev = dev;
|
759 |
|
|
journal->j_fs_dev = fs_dev;
|
760 |
|
|
journal->j_blk_offset = start;
|
761 |
|
|
journal->j_maxlen = len;
|
762 |
|
|
journal->j_blocksize = blocksize;
|
763 |
|
|
|
764 |
|
|
bh = getblk(journal->j_dev, start, journal->j_blocksize);
|
765 |
|
|
J_ASSERT(bh != NULL);
|
766 |
|
|
journal->j_sb_buffer = bh;
|
767 |
|
|
journal->j_superblock = (journal_superblock_t *)bh->b_data;
|
768 |
|
|
|
769 |
|
|
return journal;
|
770 |
|
|
}
|
771 |
|
|
|
772 |
|
|
/**
|
773 |
|
|
* journal_t * journal_init_inode () - creates a journal which maps to a inode.
|
774 |
|
|
* @inode: An inode to create the journal in
|
775 |
|
|
*
|
776 |
|
|
* journal_init_inode creates a journal which maps an on-disk inode as
|
777 |
|
|
* the journal. The inode must exist already, must support bmap() and
|
778 |
|
|
* must have all data blocks preallocated.
|
779 |
|
|
*/
|
780 |
|
|
journal_t * journal_init_inode (struct inode *inode)
|
781 |
|
|
{
|
782 |
|
|
struct buffer_head *bh;
|
783 |
|
|
journal_t *journal = journal_init_common();
|
784 |
|
|
int err;
|
785 |
|
|
unsigned long blocknr;
|
786 |
|
|
|
787 |
|
|
if (!journal)
|
788 |
|
|
return NULL;
|
789 |
|
|
|
790 |
|
|
journal->j_dev = inode->i_dev;
|
791 |
|
|
journal->j_fs_dev = inode->i_dev;
|
792 |
|
|
journal->j_inode = inode;
|
793 |
|
|
jbd_debug(1,
|
794 |
|
|
"journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
|
795 |
|
|
journal, bdevname(inode->i_dev), inode->i_ino,
|
796 |
|
|
(long long) inode->i_size,
|
797 |
|
|
inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
|
798 |
|
|
|
799 |
|
|
journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
|
800 |
|
|
journal->j_blocksize = inode->i_sb->s_blocksize;
|
801 |
|
|
|
802 |
|
|
err = journal_bmap(journal, 0, &blocknr);
|
803 |
|
|
/* If that failed, give up */
|
804 |
|
|
if (err) {
|
805 |
|
|
printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
|
806 |
|
|
__FUNCTION__);
|
807 |
|
|
kfree(journal);
|
808 |
|
|
return NULL;
|
809 |
|
|
}
|
810 |
|
|
|
811 |
|
|
bh = getblk(journal->j_dev, blocknr, journal->j_blocksize);
|
812 |
|
|
J_ASSERT(bh != NULL);
|
813 |
|
|
journal->j_sb_buffer = bh;
|
814 |
|
|
journal->j_superblock = (journal_superblock_t *)bh->b_data;
|
815 |
|
|
|
816 |
|
|
return journal;
|
817 |
|
|
}
|
818 |
|
|
|
819 |
|
|
/*
|
820 |
|
|
* If the journal init or create aborts, we need to mark the journal
|
821 |
|
|
* superblock as being NULL to prevent the journal destroy from writing
|
822 |
|
|
* back a bogus superblock.
|
823 |
|
|
*/
|
824 |
|
|
static void journal_fail_superblock (journal_t *journal)
|
825 |
|
|
{
|
826 |
|
|
struct buffer_head *bh = journal->j_sb_buffer;
|
827 |
|
|
brelse(bh);
|
828 |
|
|
journal->j_sb_buffer = NULL;
|
829 |
|
|
}
|
830 |
|
|
|
831 |
|
|
/*
|
832 |
|
|
* Given a journal_t structure, initialise the various fields for
|
833 |
|
|
* startup of a new journaling session. We use this both when creating
|
834 |
|
|
* a journal, and after recovering an old journal to reset it for
|
835 |
|
|
* subsequent use.
|
836 |
|
|
*/
|
837 |
|
|
|
838 |
|
|
static int journal_reset (journal_t *journal)
|
839 |
|
|
{
|
840 |
|
|
journal_superblock_t *sb = journal->j_superblock;
|
841 |
|
|
unsigned int first, last;
|
842 |
|
|
|
843 |
|
|
first = ntohl(sb->s_first);
|
844 |
|
|
last = ntohl(sb->s_maxlen);
|
845 |
|
|
|
846 |
|
|
journal->j_first = first;
|
847 |
|
|
journal->j_last = last;
|
848 |
|
|
|
849 |
|
|
journal->j_head = first;
|
850 |
|
|
journal->j_tail = first;
|
851 |
|
|
journal->j_free = last - first;
|
852 |
|
|
|
853 |
|
|
journal->j_tail_sequence = journal->j_transaction_sequence;
|
854 |
|
|
journal->j_commit_sequence = journal->j_transaction_sequence - 1;
|
855 |
|
|
journal->j_commit_request = journal->j_commit_sequence;
|
856 |
|
|
|
857 |
|
|
journal->j_max_transaction_buffers = journal->j_maxlen / 4;
|
858 |
|
|
|
859 |
|
|
/* Add the dynamic fields and write it to disk. */
|
860 |
|
|
journal_update_superblock(journal, 1);
|
861 |
|
|
|
862 |
|
|
lock_journal(journal);
|
863 |
|
|
journal_start_thread(journal);
|
864 |
|
|
unlock_journal(journal);
|
865 |
|
|
|
866 |
|
|
return 0;
|
867 |
|
|
}
|
868 |
|
|
|
869 |
|
|
/**
|
870 |
|
|
* int journal_create() - Initialise the new journal file
|
871 |
|
|
* @journal: Journal to create. This structure must have been initialised
|
872 |
|
|
*
|
873 |
|
|
* Given a journal_t structure which tells us which disk blocks we can
|
874 |
|
|
* use, create a new journal superblock and initialise all of the
|
875 |
|
|
* journal fields from scratch.
|
876 |
|
|
**/
|
877 |
|
|
int journal_create(journal_t *journal)
|
878 |
|
|
{
|
879 |
|
|
unsigned long blocknr;
|
880 |
|
|
struct buffer_head *bh;
|
881 |
|
|
journal_superblock_t *sb;
|
882 |
|
|
int i, err;
|
883 |
|
|
|
884 |
|
|
if (journal->j_maxlen < JFS_MIN_JOURNAL_BLOCKS) {
|
885 |
|
|
printk (KERN_ERR "Journal length (%d blocks) too short.\n",
|
886 |
|
|
journal->j_maxlen);
|
887 |
|
|
journal_fail_superblock(journal);
|
888 |
|
|
return -EINVAL;
|
889 |
|
|
}
|
890 |
|
|
|
891 |
|
|
if (journal->j_inode == NULL) {
|
892 |
|
|
/*
|
893 |
|
|
* We don't know what block to start at!
|
894 |
|
|
*/
|
895 |
|
|
printk(KERN_EMERG "%s: creation of journal on external "
|
896 |
|
|
"device!\n", __FUNCTION__);
|
897 |
|
|
BUG();
|
898 |
|
|
}
|
899 |
|
|
|
900 |
|
|
/* Zero out the entire journal on disk. We cannot afford to
|
901 |
|
|
have any blocks on disk beginning with JFS_MAGIC_NUMBER. */
|
902 |
|
|
jbd_debug(1, "JBD: Zeroing out journal blocks...\n");
|
903 |
|
|
for (i = 0; i < journal->j_maxlen; i++) {
|
904 |
|
|
err = journal_bmap(journal, i, &blocknr);
|
905 |
|
|
if (err)
|
906 |
|
|
return err;
|
907 |
|
|
bh = getblk(journal->j_dev, blocknr, journal->j_blocksize);
|
908 |
|
|
wait_on_buffer(bh);
|
909 |
|
|
memset (bh->b_data, 0, journal->j_blocksize);
|
910 |
|
|
BUFFER_TRACE(bh, "marking dirty");
|
911 |
|
|
mark_buffer_dirty(bh);
|
912 |
|
|
BUFFER_TRACE(bh, "marking uptodate");
|
913 |
|
|
mark_buffer_uptodate(bh, 1);
|
914 |
|
|
__brelse(bh);
|
915 |
|
|
}
|
916 |
|
|
|
917 |
|
|
fsync_no_super(journal->j_dev);
|
918 |
|
|
jbd_debug(1, "JBD: journal cleared.\n");
|
919 |
|
|
|
920 |
|
|
/* OK, fill in the initial static fields in the new superblock */
|
921 |
|
|
sb = journal->j_superblock;
|
922 |
|
|
|
923 |
|
|
sb->s_header.h_magic = htonl(JFS_MAGIC_NUMBER);
|
924 |
|
|
sb->s_header.h_blocktype = htonl(JFS_SUPERBLOCK_V2);
|
925 |
|
|
|
926 |
|
|
sb->s_blocksize = htonl(journal->j_blocksize);
|
927 |
|
|
sb->s_maxlen = htonl(journal->j_maxlen);
|
928 |
|
|
sb->s_first = htonl(1);
|
929 |
|
|
|
930 |
|
|
journal->j_transaction_sequence = 1;
|
931 |
|
|
|
932 |
|
|
journal->j_flags &= ~JFS_ABORT;
|
933 |
|
|
journal->j_format_version = 2;
|
934 |
|
|
|
935 |
|
|
return journal_reset(journal);
|
936 |
|
|
}
|
937 |
|
|
|
938 |
|
|
/**
|
939 |
|
|
* void journal_update_superblock() - Update journal sb on disk.
|
940 |
|
|
* @journal: The journal to update.
|
941 |
|
|
* @wait: Set to '0' if you don't want to wait for IO completion.
|
942 |
|
|
*
|
943 |
|
|
* Update a journal's dynamic superblock fields and write it to disk,
|
944 |
|
|
* optionally waiting for the IO to complete.
|
945 |
|
|
*/
|
946 |
|
|
void journal_update_superblock(journal_t *journal, int wait)
|
947 |
|
|
{
|
948 |
|
|
journal_superblock_t *sb = journal->j_superblock;
|
949 |
|
|
struct buffer_head *bh = journal->j_sb_buffer;
|
950 |
|
|
|
951 |
|
|
jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
|
952 |
|
|
journal->j_tail, journal->j_tail_sequence, journal->j_errno);
|
953 |
|
|
|
954 |
|
|
sb->s_sequence = htonl(journal->j_tail_sequence);
|
955 |
|
|
sb->s_start = htonl(journal->j_tail);
|
956 |
|
|
sb->s_errno = htonl(journal->j_errno);
|
957 |
|
|
|
958 |
|
|
BUFFER_TRACE(bh, "marking dirty");
|
959 |
|
|
mark_buffer_dirty(bh);
|
960 |
|
|
ll_rw_block(WRITE, 1, &bh);
|
961 |
|
|
if (wait)
|
962 |
|
|
wait_on_buffer(bh);
|
963 |
|
|
|
964 |
|
|
/* If we have just flushed the log (by marking s_start==0), then
|
965 |
|
|
* any future commit will have to be careful to update the
|
966 |
|
|
* superblock again to re-record the true start of the log. */
|
967 |
|
|
|
968 |
|
|
if (sb->s_start)
|
969 |
|
|
journal->j_flags &= ~JFS_FLUSHED;
|
970 |
|
|
else
|
971 |
|
|
journal->j_flags |= JFS_FLUSHED;
|
972 |
|
|
}
|
973 |
|
|
|
974 |
|
|
|
975 |
|
|
/*
|
976 |
|
|
* Read the superblock for a given journal, performing initial
|
977 |
|
|
* validation of the format.
|
978 |
|
|
*/
|
979 |
|
|
|
980 |
|
|
static int journal_get_superblock(journal_t *journal)
|
981 |
|
|
{
|
982 |
|
|
struct buffer_head *bh;
|
983 |
|
|
journal_superblock_t *sb;
|
984 |
|
|
int err = -EIO;
|
985 |
|
|
|
986 |
|
|
bh = journal->j_sb_buffer;
|
987 |
|
|
|
988 |
|
|
J_ASSERT(bh != NULL);
|
989 |
|
|
if (!buffer_uptodate(bh)) {
|
990 |
|
|
ll_rw_block(READ, 1, &bh);
|
991 |
|
|
wait_on_buffer(bh);
|
992 |
|
|
if (!buffer_uptodate(bh)) {
|
993 |
|
|
printk (KERN_ERR
|
994 |
|
|
"JBD: IO error reading journal superblock\n");
|
995 |
|
|
goto out;
|
996 |
|
|
}
|
997 |
|
|
}
|
998 |
|
|
|
999 |
|
|
sb = journal->j_superblock;
|
1000 |
|
|
|
1001 |
|
|
err = -EINVAL;
|
1002 |
|
|
|
1003 |
|
|
if (sb->s_header.h_magic != htonl(JFS_MAGIC_NUMBER) ||
|
1004 |
|
|
sb->s_blocksize != htonl(journal->j_blocksize)) {
|
1005 |
|
|
printk(KERN_WARNING "JBD: no valid journal superblock found\n");
|
1006 |
|
|
goto out;
|
1007 |
|
|
}
|
1008 |
|
|
|
1009 |
|
|
switch(ntohl(sb->s_header.h_blocktype)) {
|
1010 |
|
|
case JFS_SUPERBLOCK_V1:
|
1011 |
|
|
journal->j_format_version = 1;
|
1012 |
|
|
break;
|
1013 |
|
|
case JFS_SUPERBLOCK_V2:
|
1014 |
|
|
journal->j_format_version = 2;
|
1015 |
|
|
break;
|
1016 |
|
|
default:
|
1017 |
|
|
printk(KERN_WARNING "JBD: unrecognised superblock format ID\n");
|
1018 |
|
|
goto out;
|
1019 |
|
|
}
|
1020 |
|
|
|
1021 |
|
|
if (ntohl(sb->s_maxlen) < journal->j_maxlen)
|
1022 |
|
|
journal->j_maxlen = ntohl(sb->s_maxlen);
|
1023 |
|
|
else if (ntohl(sb->s_maxlen) > journal->j_maxlen) {
|
1024 |
|
|
printk (KERN_WARNING "JBD: journal file too short\n");
|
1025 |
|
|
goto out;
|
1026 |
|
|
}
|
1027 |
|
|
|
1028 |
|
|
return 0;
|
1029 |
|
|
|
1030 |
|
|
out:
|
1031 |
|
|
journal_fail_superblock(journal);
|
1032 |
|
|
return err;
|
1033 |
|
|
}
|
1034 |
|
|
|
1035 |
|
|
/*
|
1036 |
|
|
* Load the on-disk journal superblock and read the key fields into the
|
1037 |
|
|
* journal_t.
|
1038 |
|
|
*/
|
1039 |
|
|
|
1040 |
|
|
static int load_superblock(journal_t *journal)
|
1041 |
|
|
{
|
1042 |
|
|
int err;
|
1043 |
|
|
journal_superblock_t *sb;
|
1044 |
|
|
|
1045 |
|
|
err = journal_get_superblock(journal);
|
1046 |
|
|
if (err)
|
1047 |
|
|
return err;
|
1048 |
|
|
|
1049 |
|
|
sb = journal->j_superblock;
|
1050 |
|
|
|
1051 |
|
|
journal->j_tail_sequence = ntohl(sb->s_sequence);
|
1052 |
|
|
journal->j_tail = ntohl(sb->s_start);
|
1053 |
|
|
journal->j_first = ntohl(sb->s_first);
|
1054 |
|
|
journal->j_last = ntohl(sb->s_maxlen);
|
1055 |
|
|
journal->j_errno = ntohl(sb->s_errno);
|
1056 |
|
|
|
1057 |
|
|
return 0;
|
1058 |
|
|
}
|
1059 |
|
|
|
1060 |
|
|
|
1061 |
|
|
/**
|
1062 |
|
|
* int journal_load() - Read journal from disk.
|
1063 |
|
|
* @journal: Journal to act on.
|
1064 |
|
|
*
|
1065 |
|
|
* Given a journal_t structure which tells us which disk blocks contain
|
1066 |
|
|
* a journal, read the journal from disk to initialise the in-memory
|
1067 |
|
|
* structures.
|
1068 |
|
|
*/
|
1069 |
|
|
int journal_load(journal_t *journal)
|
1070 |
|
|
{
|
1071 |
|
|
int err;
|
1072 |
|
|
|
1073 |
|
|
err = load_superblock(journal);
|
1074 |
|
|
if (err)
|
1075 |
|
|
return err;
|
1076 |
|
|
|
1077 |
|
|
/* If this is a V2 superblock, then we have to check the
|
1078 |
|
|
* features flags on it. */
|
1079 |
|
|
|
1080 |
|
|
if (journal->j_format_version >= 2) {
|
1081 |
|
|
journal_superblock_t *sb = journal->j_superblock;
|
1082 |
|
|
|
1083 |
|
|
if ((sb->s_feature_ro_compat &
|
1084 |
|
|
~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) ||
|
1085 |
|
|
(sb->s_feature_incompat &
|
1086 |
|
|
~cpu_to_be32(JFS_KNOWN_INCOMPAT_FEATURES))) {
|
1087 |
|
|
printk (KERN_WARNING
|
1088 |
|
|
"JBD: Unrecognised features on journal\n");
|
1089 |
|
|
return -EINVAL;
|
1090 |
|
|
}
|
1091 |
|
|
}
|
1092 |
|
|
|
1093 |
|
|
/* Let the recovery code check whether it needs to recover any
|
1094 |
|
|
* data from the journal. */
|
1095 |
|
|
if (journal_recover(journal))
|
1096 |
|
|
goto recovery_error;
|
1097 |
|
|
|
1098 |
|
|
/* OK, we've finished with the dynamic journal bits:
|
1099 |
|
|
* reinitialise the dynamic contents of the superblock in memory
|
1100 |
|
|
* and reset them on disk. */
|
1101 |
|
|
if (journal_reset(journal))
|
1102 |
|
|
goto recovery_error;
|
1103 |
|
|
|
1104 |
|
|
journal->j_flags &= ~JFS_ABORT;
|
1105 |
|
|
journal->j_flags |= JFS_LOADED;
|
1106 |
|
|
return 0;
|
1107 |
|
|
|
1108 |
|
|
recovery_error:
|
1109 |
|
|
printk (KERN_WARNING "JBD: recovery failed\n");
|
1110 |
|
|
return -EIO;
|
1111 |
|
|
}
|
1112 |
|
|
|
1113 |
|
|
/**
|
1114 |
|
|
* void journal_destroy() - Release a journal_t structure.
|
1115 |
|
|
* @journal: Journal to act on.
|
1116 |
|
|
*
|
1117 |
|
|
* Release a journal_t structure once it is no longer in use by the
|
1118 |
|
|
* journaled object.
|
1119 |
|
|
*/
|
1120 |
|
|
void journal_destroy (journal_t *journal)
|
1121 |
|
|
{
|
1122 |
|
|
/* Wait for the commit thread to wake up and die. */
|
1123 |
|
|
journal_kill_thread(journal);
|
1124 |
|
|
|
1125 |
|
|
/* Force a final log commit */
|
1126 |
|
|
if (journal->j_running_transaction)
|
1127 |
|
|
journal_commit_transaction(journal);
|
1128 |
|
|
|
1129 |
|
|
/* Force any old transactions to disk */
|
1130 |
|
|
lock_journal(journal);
|
1131 |
|
|
while (journal->j_checkpoint_transactions != NULL)
|
1132 |
|
|
log_do_checkpoint(journal, 1);
|
1133 |
|
|
|
1134 |
|
|
J_ASSERT(journal->j_running_transaction == NULL);
|
1135 |
|
|
J_ASSERT(journal->j_committing_transaction == NULL);
|
1136 |
|
|
J_ASSERT(journal->j_checkpoint_transactions == NULL);
|
1137 |
|
|
|
1138 |
|
|
/* We can now mark the journal as empty. */
|
1139 |
|
|
journal->j_tail = 0;
|
1140 |
|
|
journal->j_tail_sequence = ++journal->j_transaction_sequence;
|
1141 |
|
|
if (journal->j_sb_buffer) {
|
1142 |
|
|
journal_update_superblock(journal, 1);
|
1143 |
|
|
brelse(journal->j_sb_buffer);
|
1144 |
|
|
}
|
1145 |
|
|
|
1146 |
|
|
if (journal->j_inode)
|
1147 |
|
|
iput(journal->j_inode);
|
1148 |
|
|
if (journal->j_revoke)
|
1149 |
|
|
journal_destroy_revoke(journal);
|
1150 |
|
|
|
1151 |
|
|
unlock_journal(journal);
|
1152 |
|
|
kfree(journal);
|
1153 |
|
|
MOD_DEC_USE_COUNT;
|
1154 |
|
|
}
|
1155 |
|
|
|
1156 |
|
|
|
1157 |
|
|
/**
|
1158 |
|
|
*int journal_check_used_features () - Check if features specified are used.
|
1159 |
|
|
*
|
1160 |
|
|
* Check whether the journal uses all of a given set of
|
1161 |
|
|
* features. Return true (non-zero) if it does.
|
1162 |
|
|
**/
|
1163 |
|
|
|
1164 |
|
|
int journal_check_used_features (journal_t *journal, unsigned long compat,
|
1165 |
|
|
unsigned long ro, unsigned long incompat)
|
1166 |
|
|
{
|
1167 |
|
|
journal_superblock_t *sb;
|
1168 |
|
|
|
1169 |
|
|
if (!compat && !ro && !incompat)
|
1170 |
|
|
return 1;
|
1171 |
|
|
if (journal->j_format_version == 1)
|
1172 |
|
|
return 0;
|
1173 |
|
|
|
1174 |
|
|
sb = journal->j_superblock;
|
1175 |
|
|
|
1176 |
|
|
if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) &&
|
1177 |
|
|
((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) &&
|
1178 |
|
|
((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat))
|
1179 |
|
|
return 1;
|
1180 |
|
|
|
1181 |
|
|
return 0;
|
1182 |
|
|
}
|
1183 |
|
|
|
1184 |
|
|
/**
|
1185 |
|
|
* int journal_check_available_features() - Check feature set in journalling layer
|
1186 |
|
|
*
|
1187 |
|
|
* Check whether the journaling code supports the use of
|
1188 |
|
|
* all of a given set of features on this journal. Return true
|
1189 |
|
|
* (non-zero) if it can. */
|
1190 |
|
|
|
1191 |
|
|
int journal_check_available_features (journal_t *journal, unsigned long compat,
|
1192 |
|
|
unsigned long ro, unsigned long incompat)
|
1193 |
|
|
{
|
1194 |
|
|
journal_superblock_t *sb;
|
1195 |
|
|
|
1196 |
|
|
if (!compat && !ro && !incompat)
|
1197 |
|
|
return 1;
|
1198 |
|
|
|
1199 |
|
|
sb = journal->j_superblock;
|
1200 |
|
|
|
1201 |
|
|
/* We can support any known requested features iff the
|
1202 |
|
|
* superblock is in version 2. Otherwise we fail to support any
|
1203 |
|
|
* extended sb features. */
|
1204 |
|
|
|
1205 |
|
|
if (journal->j_format_version != 2)
|
1206 |
|
|
return 0;
|
1207 |
|
|
|
1208 |
|
|
if ((compat & JFS_KNOWN_COMPAT_FEATURES) == compat &&
|
1209 |
|
|
(ro & JFS_KNOWN_ROCOMPAT_FEATURES) == ro &&
|
1210 |
|
|
(incompat & JFS_KNOWN_INCOMPAT_FEATURES) == incompat)
|
1211 |
|
|
return 1;
|
1212 |
|
|
|
1213 |
|
|
return 0;
|
1214 |
|
|
}
|
1215 |
|
|
|
1216 |
|
|
/**
|
1217 |
|
|
* int journal_set_features () - Mark a given journal feature in the superblock
|
1218 |
|
|
*
|
1219 |
|
|
* Mark a given journal feature as present on the
|
1220 |
|
|
* superblock. Returns true if the requested features could be set.
|
1221 |
|
|
*
|
1222 |
|
|
*/
|
1223 |
|
|
|
1224 |
|
|
int journal_set_features (journal_t *journal, unsigned long compat,
|
1225 |
|
|
unsigned long ro, unsigned long incompat)
|
1226 |
|
|
{
|
1227 |
|
|
journal_superblock_t *sb;
|
1228 |
|
|
|
1229 |
|
|
if (journal_check_used_features(journal, compat, ro, incompat))
|
1230 |
|
|
return 1;
|
1231 |
|
|
|
1232 |
|
|
if (!journal_check_available_features(journal, compat, ro, incompat))
|
1233 |
|
|
return 0;
|
1234 |
|
|
|
1235 |
|
|
jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
|
1236 |
|
|
compat, ro, incompat);
|
1237 |
|
|
|
1238 |
|
|
sb = journal->j_superblock;
|
1239 |
|
|
|
1240 |
|
|
sb->s_feature_compat |= cpu_to_be32(compat);
|
1241 |
|
|
sb->s_feature_ro_compat |= cpu_to_be32(ro);
|
1242 |
|
|
sb->s_feature_incompat |= cpu_to_be32(incompat);
|
1243 |
|
|
|
1244 |
|
|
return 1;
|
1245 |
|
|
}
|
1246 |
|
|
|
1247 |
|
|
|
1248 |
|
|
/**
|
1249 |
|
|
* int journal_update_format () - Update on-disk journal structure.
|
1250 |
|
|
*
|
1251 |
|
|
* Given an initialised but unloaded journal struct, poke about in the
|
1252 |
|
|
* on-disk structure to update it to the most recent supported version.
|
1253 |
|
|
*/
|
1254 |
|
|
int journal_update_format (journal_t *journal)
|
1255 |
|
|
{
|
1256 |
|
|
journal_superblock_t *sb;
|
1257 |
|
|
int err;
|
1258 |
|
|
|
1259 |
|
|
err = journal_get_superblock(journal);
|
1260 |
|
|
if (err)
|
1261 |
|
|
return err;
|
1262 |
|
|
|
1263 |
|
|
sb = journal->j_superblock;
|
1264 |
|
|
|
1265 |
|
|
switch (ntohl(sb->s_header.h_blocktype)) {
|
1266 |
|
|
case JFS_SUPERBLOCK_V2:
|
1267 |
|
|
return 0;
|
1268 |
|
|
case JFS_SUPERBLOCK_V1:
|
1269 |
|
|
return journal_convert_superblock_v1(journal, sb);
|
1270 |
|
|
default:
|
1271 |
|
|
break;
|
1272 |
|
|
}
|
1273 |
|
|
return -EINVAL;
|
1274 |
|
|
}
|
1275 |
|
|
|
1276 |
|
|
static int journal_convert_superblock_v1(journal_t *journal,
|
1277 |
|
|
journal_superblock_t *sb)
|
1278 |
|
|
{
|
1279 |
|
|
int offset, blocksize;
|
1280 |
|
|
struct buffer_head *bh;
|
1281 |
|
|
|
1282 |
|
|
printk(KERN_WARNING
|
1283 |
|
|
"JBD: Converting superblock from version 1 to 2.\n");
|
1284 |
|
|
|
1285 |
|
|
/* Pre-initialise new fields to zero */
|
1286 |
|
|
offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb);
|
1287 |
|
|
blocksize = ntohl(sb->s_blocksize);
|
1288 |
|
|
memset(&sb->s_feature_compat, 0, blocksize-offset);
|
1289 |
|
|
|
1290 |
|
|
sb->s_nr_users = cpu_to_be32(1);
|
1291 |
|
|
sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2);
|
1292 |
|
|
journal->j_format_version = 2;
|
1293 |
|
|
|
1294 |
|
|
bh = journal->j_sb_buffer;
|
1295 |
|
|
BUFFER_TRACE(bh, "marking dirty");
|
1296 |
|
|
mark_buffer_dirty(bh);
|
1297 |
|
|
ll_rw_block(WRITE, 1, &bh);
|
1298 |
|
|
wait_on_buffer(bh);
|
1299 |
|
|
return 0;
|
1300 |
|
|
}
|
1301 |
|
|
|
1302 |
|
|
|
1303 |
|
|
/**
|
1304 |
|
|
* int journal_flush () - Flush journal
|
1305 |
|
|
* @journal: Journal to act on.
|
1306 |
|
|
*
|
1307 |
|
|
* Flush all data for a given journal to disk and empty the journal.
|
1308 |
|
|
* Filesystems can use this when remounting readonly to ensure that
|
1309 |
|
|
* recovery does not need to happen on remount.
|
1310 |
|
|
*/
|
1311 |
|
|
|
1312 |
|
|
int journal_flush (journal_t *journal)
|
1313 |
|
|
{
|
1314 |
|
|
int err = 0;
|
1315 |
|
|
transaction_t *transaction = NULL;
|
1316 |
|
|
unsigned long old_tail;
|
1317 |
|
|
|
1318 |
|
|
lock_kernel();
|
1319 |
|
|
|
1320 |
|
|
/* Force everything buffered to the log... */
|
1321 |
|
|
if (journal->j_running_transaction) {
|
1322 |
|
|
transaction = journal->j_running_transaction;
|
1323 |
|
|
log_start_commit(journal, transaction);
|
1324 |
|
|
} else if (journal->j_committing_transaction)
|
1325 |
|
|
transaction = journal->j_committing_transaction;
|
1326 |
|
|
|
1327 |
|
|
/* Wait for the log commit to complete... */
|
1328 |
|
|
if (transaction)
|
1329 |
|
|
log_wait_commit(journal, transaction->t_tid);
|
1330 |
|
|
|
1331 |
|
|
/* ...and flush everything in the log out to disk. */
|
1332 |
|
|
lock_journal(journal);
|
1333 |
|
|
while (!err && journal->j_checkpoint_transactions != NULL)
|
1334 |
|
|
err = log_do_checkpoint(journal, journal->j_maxlen);
|
1335 |
|
|
cleanup_journal_tail(journal);
|
1336 |
|
|
|
1337 |
|
|
/* Finally, mark the journal as really needing no recovery.
|
1338 |
|
|
* This sets s_start==0 in the underlying superblock, which is
|
1339 |
|
|
* the magic code for a fully-recovered superblock. Any future
|
1340 |
|
|
* commits of data to the journal will restore the current
|
1341 |
|
|
* s_start value. */
|
1342 |
|
|
old_tail = journal->j_tail;
|
1343 |
|
|
journal->j_tail = 0;
|
1344 |
|
|
journal_update_superblock(journal, 1);
|
1345 |
|
|
journal->j_tail = old_tail;
|
1346 |
|
|
|
1347 |
|
|
unlock_journal(journal);
|
1348 |
|
|
|
1349 |
|
|
J_ASSERT(!journal->j_running_transaction);
|
1350 |
|
|
J_ASSERT(!journal->j_committing_transaction);
|
1351 |
|
|
J_ASSERT(!journal->j_checkpoint_transactions);
|
1352 |
|
|
J_ASSERT(journal->j_head == journal->j_tail);
|
1353 |
|
|
J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
|
1354 |
|
|
|
1355 |
|
|
unlock_kernel();
|
1356 |
|
|
|
1357 |
|
|
return err;
|
1358 |
|
|
}
|
1359 |
|
|
|
1360 |
|
|
/**
|
1361 |
|
|
* int journal_wipe() - Wipe journal contents
|
1362 |
|
|
* @journal: Journal to act on.
|
1363 |
|
|
* @write: flag (see below)
|
1364 |
|
|
*
|
1365 |
|
|
* Wipe out all of the contents of a journal, safely. This will produce
|
1366 |
|
|
* a warning if the journal contains any valid recovery information.
|
1367 |
|
|
* Must be called between journal_init_*() and journal_load().
|
1368 |
|
|
*
|
1369 |
|
|
* If 'write' is non-zero, then we wipe out the journal on disk; otherwise
|
1370 |
|
|
* we merely suppress recovery.
|
1371 |
|
|
*/
|
1372 |
|
|
|
1373 |
|
|
int journal_wipe (journal_t *journal, int write)
|
1374 |
|
|
{
|
1375 |
|
|
journal_superblock_t *sb;
|
1376 |
|
|
int err = 0;
|
1377 |
|
|
|
1378 |
|
|
J_ASSERT (!(journal->j_flags & JFS_LOADED));
|
1379 |
|
|
|
1380 |
|
|
err = load_superblock(journal);
|
1381 |
|
|
if (err)
|
1382 |
|
|
return err;
|
1383 |
|
|
|
1384 |
|
|
sb = journal->j_superblock;
|
1385 |
|
|
|
1386 |
|
|
if (!journal->j_tail)
|
1387 |
|
|
goto no_recovery;
|
1388 |
|
|
|
1389 |
|
|
printk (KERN_WARNING "JBD: %s recovery information on journal\n",
|
1390 |
|
|
write ? "Clearing" : "Ignoring");
|
1391 |
|
|
|
1392 |
|
|
err = journal_skip_recovery(journal);
|
1393 |
|
|
if (write)
|
1394 |
|
|
journal_update_superblock(journal, 1);
|
1395 |
|
|
|
1396 |
|
|
no_recovery:
|
1397 |
|
|
return err;
|
1398 |
|
|
}
|
1399 |
|
|
|
1400 |
|
|
/*
|
1401 |
|
|
* journal_dev_name: format a character string to describe on what
|
1402 |
|
|
* device this journal is present.
|
1403 |
|
|
*/
|
1404 |
|
|
|
1405 |
|
|
const char * journal_dev_name(journal_t *journal)
|
1406 |
|
|
{
|
1407 |
|
|
kdev_t dev;
|
1408 |
|
|
|
1409 |
|
|
if (journal->j_inode)
|
1410 |
|
|
dev = journal->j_inode->i_dev;
|
1411 |
|
|
else
|
1412 |
|
|
dev = journal->j_dev;
|
1413 |
|
|
|
1414 |
|
|
return bdevname(dev);
|
1415 |
|
|
}
|
1416 |
|
|
|
1417 |
|
|
/*
|
1418 |
|
|
* Journal abort has very specific semantics, which we describe
|
1419 |
|
|
* for journal abort.
|
1420 |
|
|
*
|
1421 |
|
|
* Two internal function, which provide abort to te jbd layer
|
1422 |
|
|
* itself are here.
|
1423 |
|
|
*/
|
1424 |
|
|
|
1425 |
|
|
/* Quick version for internal journal use (doesn't lock the journal).
|
1426 |
|
|
* Aborts hard --- we mark the abort as occurred, but do _nothing_ else,
|
1427 |
|
|
* and don't attempt to make any other journal updates. */
|
1428 |
|
|
void __journal_abort_hard (journal_t *journal)
|
1429 |
|
|
{
|
1430 |
|
|
transaction_t *transaction;
|
1431 |
|
|
|
1432 |
|
|
if (journal->j_flags & JFS_ABORT)
|
1433 |
|
|
return;
|
1434 |
|
|
|
1435 |
|
|
printk (KERN_ERR "Aborting journal on device %s.\n",
|
1436 |
|
|
journal_dev_name(journal));
|
1437 |
|
|
|
1438 |
|
|
journal->j_flags |= JFS_ABORT;
|
1439 |
|
|
transaction = journal->j_running_transaction;
|
1440 |
|
|
if (transaction)
|
1441 |
|
|
log_start_commit(journal, transaction);
|
1442 |
|
|
}
|
1443 |
|
|
|
1444 |
|
|
/* Soft abort: record the abort error status in the journal superblock,
|
1445 |
|
|
* but don't do any other IO. */
|
1446 |
|
|
void __journal_abort_soft (journal_t *journal, int errno)
|
1447 |
|
|
{
|
1448 |
|
|
if (journal->j_flags & JFS_ABORT)
|
1449 |
|
|
return;
|
1450 |
|
|
|
1451 |
|
|
if (!journal->j_errno)
|
1452 |
|
|
journal->j_errno = errno;
|
1453 |
|
|
|
1454 |
|
|
__journal_abort_hard(journal);
|
1455 |
|
|
|
1456 |
|
|
if (errno)
|
1457 |
|
|
journal_update_superblock(journal, 1);
|
1458 |
|
|
}
|
1459 |
|
|
|
1460 |
|
|
/**
|
1461 |
|
|
* void journal_abort () - Shutdown the journal immediately.
|
1462 |
|
|
* @journal: the journal to shutdown.
|
1463 |
|
|
* @errno: an error number to record in the journal indicating
|
1464 |
|
|
* the reason for the shutdown.
|
1465 |
|
|
*
|
1466 |
|
|
* Perform a complete, immediate shutdown of the ENTIRE
|
1467 |
|
|
* journal (not of a single transaction). This operation cannot be
|
1468 |
|
|
* undone without closing and reopening the journal.
|
1469 |
|
|
*
|
1470 |
|
|
* The journal_abort function is intended to support higher level error
|
1471 |
|
|
* recovery mechanisms such as the ext2/ext3 remount-readonly error
|
1472 |
|
|
* mode.
|
1473 |
|
|
*
|
1474 |
|
|
* Journal abort has very specific semantics. Any existing dirty,
|
1475 |
|
|
* unjournaled buffers in the main filesystem will still be written to
|
1476 |
|
|
* disk by bdflush, but the journaling mechanism will be suspended
|
1477 |
|
|
* immediately and no further transaction commits will be honoured.
|
1478 |
|
|
*
|
1479 |
|
|
* Any dirty, journaled buffers will be written back to disk without
|
1480 |
|
|
* hitting the journal. Atomicity cannot be guaranteed on an aborted
|
1481 |
|
|
* filesystem, but we _do_ attempt to leave as much data as possible
|
1482 |
|
|
* behind for fsck to use for cleanup.
|
1483 |
|
|
*
|
1484 |
|
|
* Any attempt to get a new transaction handle on a journal which is in
|
1485 |
|
|
* ABORT state will just result in an -EROFS error return. A
|
1486 |
|
|
* journal_stop on an existing handle will return -EIO if we have
|
1487 |
|
|
* entered abort state during the update.
|
1488 |
|
|
*
|
1489 |
|
|
* Recursive transactions are not disturbed by journal abort until the
|
1490 |
|
|
* final journal_stop, which will receive the -EIO error.
|
1491 |
|
|
*
|
1492 |
|
|
* Finally, the journal_abort call allows the caller to supply an errno
|
1493 |
|
|
* which will be recorded (if possible) in the journal superblock. This
|
1494 |
|
|
* allows a client to record failure conditions in the middle of a
|
1495 |
|
|
* transaction without having to complete the transaction to record the
|
1496 |
|
|
* failure to disk. ext3_error, for example, now uses this
|
1497 |
|
|
* functionality.
|
1498 |
|
|
*
|
1499 |
|
|
* Errors which originate from within the journaling layer will NOT
|
1500 |
|
|
* supply an errno; a null errno implies that absolutely no further
|
1501 |
|
|
* writes are done to the journal (unless there are any already in
|
1502 |
|
|
* progress).
|
1503 |
|
|
*
|
1504 |
|
|
*/
|
1505 |
|
|
|
1506 |
|
|
void journal_abort (journal_t *journal, int errno)
|
1507 |
|
|
{
|
1508 |
|
|
lock_journal(journal);
|
1509 |
|
|
__journal_abort_soft(journal, errno);
|
1510 |
|
|
unlock_journal(journal);
|
1511 |
|
|
}
|
1512 |
|
|
|
1513 |
|
|
/**
|
1514 |
|
|
* int journal_errno () - returns the journal's error state.
|
1515 |
|
|
* @journal: journal to examine.
|
1516 |
|
|
*
|
1517 |
|
|
* This is the errno numbet set with journal_abort(), the last
|
1518 |
|
|
* time the journal was mounted - if the journal was stopped
|
1519 |
|
|
* without calling abort this will be 0.
|
1520 |
|
|
*
|
1521 |
|
|
* If the journal has been aborted on this mount time -EROFS will
|
1522 |
|
|
* be returned.
|
1523 |
|
|
*/
|
1524 |
|
|
int journal_errno (journal_t *journal)
|
1525 |
|
|
{
|
1526 |
|
|
int err;
|
1527 |
|
|
|
1528 |
|
|
lock_journal(journal);
|
1529 |
|
|
if (journal->j_flags & JFS_ABORT)
|
1530 |
|
|
err = -EROFS;
|
1531 |
|
|
else
|
1532 |
|
|
err = journal->j_errno;
|
1533 |
|
|
unlock_journal(journal);
|
1534 |
|
|
return err;
|
1535 |
|
|
}
|
1536 |
|
|
|
1537 |
|
|
|
1538 |
|
|
|
1539 |
|
|
/**
|
1540 |
|
|
* int journal_clear_err () - clears the journal's error state
|
1541 |
|
|
*
|
1542 |
|
|
* An error must be cleared or Acked to take a FS out of readonly
|
1543 |
|
|
* mode.
|
1544 |
|
|
*/
|
1545 |
|
|
int journal_clear_err (journal_t *journal)
|
1546 |
|
|
{
|
1547 |
|
|
int err = 0;
|
1548 |
|
|
|
1549 |
|
|
lock_journal(journal);
|
1550 |
|
|
if (journal->j_flags & JFS_ABORT)
|
1551 |
|
|
err = -EROFS;
|
1552 |
|
|
else
|
1553 |
|
|
journal->j_errno = 0;
|
1554 |
|
|
unlock_journal(journal);
|
1555 |
|
|
return err;
|
1556 |
|
|
}
|
1557 |
|
|
|
1558 |
|
|
|
1559 |
|
|
/**
|
1560 |
|
|
* void journal_ack_err() - Ack journal err.
|
1561 |
|
|
*
|
1562 |
|
|
* An error must be cleared or Acked to take a FS out of readonly
|
1563 |
|
|
* mode.
|
1564 |
|
|
*/
|
1565 |
|
|
void journal_ack_err (journal_t *journal)
|
1566 |
|
|
{
|
1567 |
|
|
lock_journal(journal);
|
1568 |
|
|
if (journal->j_errno)
|
1569 |
|
|
journal->j_flags |= JFS_ACK_ERR;
|
1570 |
|
|
unlock_journal(journal);
|
1571 |
|
|
}
|
1572 |
|
|
|
1573 |
|
|
|
1574 |
|
|
/*
|
1575 |
|
|
* Report any unexpected dirty buffers which turn up. Normally those
|
1576 |
|
|
* indicate an error, but they can occur if the user is running (say)
|
1577 |
|
|
* tune2fs to modify the live filesystem, so we need the option of
|
1578 |
|
|
* continuing as gracefully as possible. #
|
1579 |
|
|
*
|
1580 |
|
|
* The caller should already hold the journal lock and
|
1581 |
|
|
* journal_datalist_lock spinlock: most callers will need those anyway
|
1582 |
|
|
* in order to probe the buffer's journaling state safely.
|
1583 |
|
|
*/
|
1584 |
|
|
void __jbd_unexpected_dirty_buffer(char *function, int line,
|
1585 |
|
|
struct journal_head *jh)
|
1586 |
|
|
{
|
1587 |
|
|
struct buffer_head *bh = jh2bh(jh);
|
1588 |
|
|
int jlist;
|
1589 |
|
|
|
1590 |
|
|
if (buffer_dirty(bh)) {
|
1591 |
|
|
printk ("%sUnexpected dirty buffer encountered at "
|
1592 |
|
|
"%s:%d (%s blocknr %lu)\n",
|
1593 |
|
|
KERN_WARNING, function, line,
|
1594 |
|
|
kdevname(bh->b_dev), bh->b_blocknr);
|
1595 |
|
|
#ifdef JBD_PARANOID_WRITES
|
1596 |
|
|
J_ASSERT_BH (bh, !buffer_dirty(bh));
|
1597 |
|
|
#endif
|
1598 |
|
|
|
1599 |
|
|
/* If this buffer is one which might reasonably be dirty
|
1600 |
|
|
* --- ie. data, or not part of this journal --- then
|
1601 |
|
|
* we're OK to leave it alone, but otherwise we need to
|
1602 |
|
|
* move the dirty bit to the journal's own internal
|
1603 |
|
|
* JBDDirty bit. */
|
1604 |
|
|
jlist = jh->b_jlist;
|
1605 |
|
|
|
1606 |
|
|
if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
|
1607 |
|
|
jlist == BJ_Shadow || jlist == BJ_Forget) {
|
1608 |
|
|
if (atomic_set_buffer_clean(jh2bh(jh))) {
|
1609 |
|
|
set_bit(BH_JBDDirty, &jh2bh(jh)->b_state);
|
1610 |
|
|
}
|
1611 |
|
|
}
|
1612 |
|
|
}
|
1613 |
|
|
}
|
1614 |
|
|
|
1615 |
|
|
|
1616 |
|
|
int journal_blocks_per_page(struct inode *inode)
|
1617 |
|
|
{
|
1618 |
|
|
return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
|
1619 |
|
|
}
|
1620 |
|
|
|
1621 |
|
|
/*
|
1622 |
|
|
* shrink_journal_memory().
|
1623 |
|
|
* Called when we're under memory pressure. Free up all the written-back
|
1624 |
|
|
* checkpointed metadata buffers.
|
1625 |
|
|
*/
|
1626 |
|
|
void shrink_journal_memory(void)
|
1627 |
|
|
{
|
1628 |
|
|
struct list_head *list;
|
1629 |
|
|
|
1630 |
|
|
lock_kernel();
|
1631 |
|
|
list_for_each(list, &all_journals) {
|
1632 |
|
|
journal_t *journal =
|
1633 |
|
|
list_entry(list, journal_t, j_all_journals);
|
1634 |
|
|
spin_lock(&journal_datalist_lock);
|
1635 |
|
|
__journal_clean_checkpoint_list(journal);
|
1636 |
|
|
spin_unlock(&journal_datalist_lock);
|
1637 |
|
|
}
|
1638 |
|
|
unlock_kernel();
|
1639 |
|
|
}
|
1640 |
|
|
|
1641 |
|
|
/*
|
1642 |
|
|
* Simple support for retying memory allocations. Introduced to help to
|
1643 |
|
|
* debug different VM deadlock avoidance strategies.
|
1644 |
|
|
*/
|
1645 |
|
|
/*
|
1646 |
|
|
* Simple support for retying memory allocations. Introduced to help to
|
1647 |
|
|
* debug different VM deadlock avoidance strategies.
|
1648 |
|
|
*/
|
1649 |
|
|
void * __jbd_kmalloc (char *where, size_t size, int flags, int retry)
|
1650 |
|
|
{
|
1651 |
|
|
void *p;
|
1652 |
|
|
static unsigned long last_warning;
|
1653 |
|
|
|
1654 |
|
|
while (1) {
|
1655 |
|
|
p = kmalloc(size, flags);
|
1656 |
|
|
if (p)
|
1657 |
|
|
return p;
|
1658 |
|
|
if (!retry)
|
1659 |
|
|
return NULL;
|
1660 |
|
|
/* Log every retry for debugging. Also log them to the
|
1661 |
|
|
* syslog, but do rate-limiting on the non-debugging
|
1662 |
|
|
* messages. */
|
1663 |
|
|
jbd_debug(1, "ENOMEM in %s, retrying.\n", where);
|
1664 |
|
|
|
1665 |
|
|
if (time_after(jiffies, last_warning + 120*HZ)) {
|
1666 |
|
|
printk(KERN_NOTICE
|
1667 |
|
|
"ENOMEM in %s, retrying.\n", where);
|
1668 |
|
|
last_warning = jiffies;
|
1669 |
|
|
}
|
1670 |
|
|
|
1671 |
|
|
yield();
|
1672 |
|
|
}
|
1673 |
|
|
}
|
1674 |
|
|
|
1675 |
|
|
/*
|
1676 |
|
|
* Journal_head storage management
|
1677 |
|
|
*/
|
1678 |
|
|
static kmem_cache_t *journal_head_cache;
|
1679 |
|
|
#ifdef CONFIG_JBD_DEBUG
|
1680 |
|
|
static atomic_t nr_journal_heads = ATOMIC_INIT(0);
|
1681 |
|
|
#endif
|
1682 |
|
|
|
1683 |
|
|
static int journal_init_journal_head_cache(void)
|
1684 |
|
|
{
|
1685 |
|
|
int retval;
|
1686 |
|
|
|
1687 |
|
|
J_ASSERT(journal_head_cache == 0);
|
1688 |
|
|
journal_head_cache = kmem_cache_create("journal_head",
|
1689 |
|
|
sizeof(struct journal_head),
|
1690 |
|
|
0, /* offset */
|
1691 |
|
|
0, /* flags */
|
1692 |
|
|
NULL, /* ctor */
|
1693 |
|
|
NULL); /* dtor */
|
1694 |
|
|
retval = 0;
|
1695 |
|
|
if (journal_head_cache == 0) {
|
1696 |
|
|
retval = -ENOMEM;
|
1697 |
|
|
printk(KERN_EMERG "JBD: no memory for journal_head cache\n");
|
1698 |
|
|
}
|
1699 |
|
|
return retval;
|
1700 |
|
|
}
|
1701 |
|
|
|
1702 |
|
|
static void journal_destroy_journal_head_cache(void)
|
1703 |
|
|
{
|
1704 |
|
|
J_ASSERT(journal_head_cache != NULL);
|
1705 |
|
|
kmem_cache_destroy(journal_head_cache);
|
1706 |
|
|
journal_head_cache = 0;
|
1707 |
|
|
}
|
1708 |
|
|
|
1709 |
|
|
/*
|
1710 |
|
|
* journal_head splicing and dicing
|
1711 |
|
|
*/
|
1712 |
|
|
static struct journal_head *journal_alloc_journal_head(void)
|
1713 |
|
|
{
|
1714 |
|
|
struct journal_head *ret;
|
1715 |
|
|
static unsigned long last_warning;
|
1716 |
|
|
|
1717 |
|
|
#ifdef CONFIG_JBD_DEBUG
|
1718 |
|
|
atomic_inc(&nr_journal_heads);
|
1719 |
|
|
#endif
|
1720 |
|
|
ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
|
1721 |
|
|
if (ret == 0) {
|
1722 |
|
|
jbd_debug(1, "out of memory for journal_head\n");
|
1723 |
|
|
if (time_after(jiffies, last_warning + 5*HZ)) {
|
1724 |
|
|
printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
|
1725 |
|
|
__FUNCTION__);
|
1726 |
|
|
last_warning = jiffies;
|
1727 |
|
|
}
|
1728 |
|
|
while (ret == 0) {
|
1729 |
|
|
yield();
|
1730 |
|
|
ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
|
1731 |
|
|
}
|
1732 |
|
|
}
|
1733 |
|
|
return ret;
|
1734 |
|
|
}
|
1735 |
|
|
|
1736 |
|
|
static void journal_free_journal_head(struct journal_head *jh)
|
1737 |
|
|
{
|
1738 |
|
|
#ifdef CONFIG_JBD_DEBUG
|
1739 |
|
|
atomic_dec(&nr_journal_heads);
|
1740 |
|
|
memset(jh, 0x5b, sizeof(*jh));
|
1741 |
|
|
#endif
|
1742 |
|
|
kmem_cache_free(journal_head_cache, jh);
|
1743 |
|
|
}
|
1744 |
|
|
|
1745 |
|
|
/*
|
1746 |
|
|
* A journal_head is attached to a buffer_head whenever JBD has an
|
1747 |
|
|
* interest in the buffer.
|
1748 |
|
|
*
|
1749 |
|
|
* Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit
|
1750 |
|
|
* is set. This bit is tested in core kernel code where we need to take
|
1751 |
|
|
* JBD-specific actions. Testing the zeroness of ->b_journal_head is not
|
1752 |
|
|
* reliable there.
|
1753 |
|
|
*
|
1754 |
|
|
* When a buffer has its BH_JBD bit set, its ->b_count is elevated by one.
|
1755 |
|
|
*
|
1756 |
|
|
* When a buffer has its BH_JBD bit set it is immune from being released by
|
1757 |
|
|
* core kernel code, mainly via ->b_count.
|
1758 |
|
|
*
|
1759 |
|
|
* A journal_head may be detached from its buffer_head when the journal_head's
|
1760 |
|
|
* b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
|
1761 |
|
|
* Various places in JBD call journal_remove_journal_head() to indicate that the
|
1762 |
|
|
* journal_head can be dropped if needed.
|
1763 |
|
|
*
|
1764 |
|
|
* Various places in the kernel want to attach a journal_head to a buffer_head
|
1765 |
|
|
* _before_ attaching the journal_head to a transaction. To protect the
|
1766 |
|
|
* journal_head in this situation, journal_add_journal_head elevates the
|
1767 |
|
|
* journal_head's b_jcount refcount by one. The caller must call
|
1768 |
|
|
* journal_unlock_journal_head() to undo this.
|
1769 |
|
|
*
|
1770 |
|
|
* So the typical usage would be:
|
1771 |
|
|
*
|
1772 |
|
|
* (Attach a journal_head if needed. Increments b_jcount)
|
1773 |
|
|
* struct journal_head *jh = journal_add_journal_head(bh);
|
1774 |
|
|
* ...
|
1775 |
|
|
* jh->b_transaction = xxx;
|
1776 |
|
|
* journal_unlock_journal_head(jh);
|
1777 |
|
|
*
|
1778 |
|
|
* Now, the journal_head's b_jcount is zero, but it is safe from being released
|
1779 |
|
|
* because it has a non-zero b_transaction.
|
1780 |
|
|
*/
|
1781 |
|
|
|
1782 |
|
|
/*
|
1783 |
|
|
* Give a buffer_head a journal_head.
|
1784 |
|
|
*
|
1785 |
|
|
* Doesn't need the journal lock.
|
1786 |
|
|
* May sleep.
|
1787 |
|
|
* Cannot be called with journal_datalist_lock held.
|
1788 |
|
|
*/
|
1789 |
|
|
struct journal_head *journal_add_journal_head(struct buffer_head *bh)
|
1790 |
|
|
{
|
1791 |
|
|
struct journal_head *jh;
|
1792 |
|
|
|
1793 |
|
|
spin_lock(&journal_datalist_lock);
|
1794 |
|
|
if (buffer_jbd(bh)) {
|
1795 |
|
|
jh = bh2jh(bh);
|
1796 |
|
|
} else {
|
1797 |
|
|
J_ASSERT_BH(bh,
|
1798 |
|
|
(atomic_read(&bh->b_count) > 0) ||
|
1799 |
|
|
(bh->b_page && bh->b_page->mapping));
|
1800 |
|
|
spin_unlock(&journal_datalist_lock);
|
1801 |
|
|
jh = journal_alloc_journal_head();
|
1802 |
|
|
memset(jh, 0, sizeof(*jh));
|
1803 |
|
|
spin_lock(&journal_datalist_lock);
|
1804 |
|
|
|
1805 |
|
|
if (buffer_jbd(bh)) {
|
1806 |
|
|
/* Someone did it for us! */
|
1807 |
|
|
J_ASSERT_BH(bh, bh->b_private != NULL);
|
1808 |
|
|
journal_free_journal_head(jh);
|
1809 |
|
|
jh = bh->b_private;
|
1810 |
|
|
} else {
|
1811 |
|
|
/*
|
1812 |
|
|
* We actually don't need jh_splice_lock when
|
1813 |
|
|
* adding a journal_head - only on removal.
|
1814 |
|
|
*/
|
1815 |
|
|
spin_lock(&jh_splice_lock);
|
1816 |
|
|
set_bit(BH_JBD, &bh->b_state);
|
1817 |
|
|
bh->b_private = jh;
|
1818 |
|
|
jh->b_bh = bh;
|
1819 |
|
|
atomic_inc(&bh->b_count);
|
1820 |
|
|
spin_unlock(&jh_splice_lock);
|
1821 |
|
|
BUFFER_TRACE(bh, "added journal_head");
|
1822 |
|
|
}
|
1823 |
|
|
}
|
1824 |
|
|
jh->b_jcount++;
|
1825 |
|
|
spin_unlock(&journal_datalist_lock);
|
1826 |
|
|
return bh->b_private;
|
1827 |
|
|
}
|
1828 |
|
|
|
1829 |
|
|
/*
|
1830 |
|
|
* journal_remove_journal_head(): if the buffer isn't attached to a transaction
|
1831 |
|
|
* and has a zero b_jcount then remove and release its journal_head. If we did
|
1832 |
|
|
* see that the buffer is not used by any transaction we also "logically"
|
1833 |
|
|
* decrement ->b_count.
|
1834 |
|
|
*
|
1835 |
|
|
* We in fact take an additional increment on ->b_count as a convenience,
|
1836 |
|
|
* because the caller usually wants to do additional things with the bh
|
1837 |
|
|
* after calling here.
|
1838 |
|
|
* The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
|
1839 |
|
|
* time. Once the caller has run __brelse(), the buffer is eligible for
|
1840 |
|
|
* reaping by try_to_free_buffers().
|
1841 |
|
|
*
|
1842 |
|
|
* Requires journal_datalist_lock.
|
1843 |
|
|
*/
|
1844 |
|
|
void __journal_remove_journal_head(struct buffer_head *bh)
|
1845 |
|
|
{
|
1846 |
|
|
struct journal_head *jh = bh2jh(bh);
|
1847 |
|
|
|
1848 |
|
|
assert_spin_locked(&journal_datalist_lock);
|
1849 |
|
|
J_ASSERT_JH(jh, jh->b_jcount >= 0);
|
1850 |
|
|
atomic_inc(&bh->b_count);
|
1851 |
|
|
if (jh->b_jcount == 0) {
|
1852 |
|
|
if (jh->b_transaction == NULL &&
|
1853 |
|
|
jh->b_next_transaction == NULL &&
|
1854 |
|
|
jh->b_cp_transaction == NULL) {
|
1855 |
|
|
J_ASSERT_BH(bh, buffer_jbd(bh));
|
1856 |
|
|
J_ASSERT_BH(bh, jh2bh(jh) == bh);
|
1857 |
|
|
BUFFER_TRACE(bh, "remove journal_head");
|
1858 |
|
|
spin_lock(&jh_splice_lock);
|
1859 |
|
|
bh->b_private = NULL;
|
1860 |
|
|
jh->b_bh = NULL; /* debug, really */
|
1861 |
|
|
clear_bit(BH_JBD, &bh->b_state);
|
1862 |
|
|
__brelse(bh);
|
1863 |
|
|
spin_unlock(&jh_splice_lock);
|
1864 |
|
|
journal_free_journal_head(jh);
|
1865 |
|
|
} else {
|
1866 |
|
|
BUFFER_TRACE(bh, "journal_head was locked");
|
1867 |
|
|
}
|
1868 |
|
|
}
|
1869 |
|
|
}
|
1870 |
|
|
|
1871 |
|
|
void journal_unlock_journal_head(struct journal_head *jh)
|
1872 |
|
|
{
|
1873 |
|
|
spin_lock(&journal_datalist_lock);
|
1874 |
|
|
J_ASSERT_JH(jh, jh->b_jcount > 0);
|
1875 |
|
|
--jh->b_jcount;
|
1876 |
|
|
if (!jh->b_jcount && !jh->b_transaction) {
|
1877 |
|
|
struct buffer_head *bh;
|
1878 |
|
|
bh = jh2bh(jh);
|
1879 |
|
|
__journal_remove_journal_head(bh);
|
1880 |
|
|
__brelse(bh);
|
1881 |
|
|
}
|
1882 |
|
|
|
1883 |
|
|
spin_unlock(&journal_datalist_lock);
|
1884 |
|
|
}
|
1885 |
|
|
|
1886 |
|
|
void journal_remove_journal_head(struct buffer_head *bh)
|
1887 |
|
|
{
|
1888 |
|
|
spin_lock(&journal_datalist_lock);
|
1889 |
|
|
__journal_remove_journal_head(bh);
|
1890 |
|
|
spin_unlock(&journal_datalist_lock);
|
1891 |
|
|
}
|
1892 |
|
|
|
1893 |
|
|
/*
|
1894 |
|
|
* /proc tunables
|
1895 |
|
|
*/
|
1896 |
|
|
#if defined(CONFIG_JBD_DEBUG)
|
1897 |
|
|
int journal_enable_debug;
|
1898 |
|
|
EXPORT_SYMBOL(journal_enable_debug);
|
1899 |
|
|
#endif
|
1900 |
|
|
|
1901 |
|
|
#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS)
|
1902 |
|
|
|
1903 |
|
|
static struct proc_dir_entry *proc_jbd_debug;
|
1904 |
|
|
|
1905 |
|
|
int read_jbd_debug(char *page, char **start, off_t off,
|
1906 |
|
|
int count, int *eof, void *data)
|
1907 |
|
|
{
|
1908 |
|
|
int ret;
|
1909 |
|
|
|
1910 |
|
|
ret = sprintf(page + off, "%d\n", journal_enable_debug);
|
1911 |
|
|
*eof = 1;
|
1912 |
|
|
return ret;
|
1913 |
|
|
}
|
1914 |
|
|
|
1915 |
|
|
int write_jbd_debug(struct file *file, const char *buffer,
|
1916 |
|
|
unsigned long count, void *data)
|
1917 |
|
|
{
|
1918 |
|
|
char buf[32];
|
1919 |
|
|
|
1920 |
|
|
if (count > ARRAY_SIZE(buf) - 1)
|
1921 |
|
|
count = ARRAY_SIZE(buf) - 1;
|
1922 |
|
|
if (copy_from_user(buf, buffer, count))
|
1923 |
|
|
return -EFAULT;
|
1924 |
|
|
buf[ARRAY_SIZE(buf) - 1] = '\0';
|
1925 |
|
|
journal_enable_debug = simple_strtoul(buf, NULL, 10);
|
1926 |
|
|
return count;
|
1927 |
|
|
}
|
1928 |
|
|
|
1929 |
|
|
#define JBD_PROC_NAME "sys/fs/jbd-debug"
|
1930 |
|
|
|
1931 |
|
|
static void __init create_jbd_proc_entry(void)
|
1932 |
|
|
{
|
1933 |
|
|
proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL);
|
1934 |
|
|
if (proc_jbd_debug) {
|
1935 |
|
|
/* Why is this so hard? */
|
1936 |
|
|
proc_jbd_debug->read_proc = read_jbd_debug;
|
1937 |
|
|
proc_jbd_debug->write_proc = write_jbd_debug;
|
1938 |
|
|
}
|
1939 |
|
|
}
|
1940 |
|
|
|
1941 |
|
|
static void __exit remove_jbd_proc_entry(void)
|
1942 |
|
|
{
|
1943 |
|
|
if (proc_jbd_debug)
|
1944 |
|
|
remove_proc_entry(JBD_PROC_NAME, NULL);
|
1945 |
|
|
}
|
1946 |
|
|
|
1947 |
|
|
#else
|
1948 |
|
|
|
1949 |
|
|
#define create_jbd_proc_entry() do {} while (0)
|
1950 |
|
|
#define remove_jbd_proc_entry() do {} while (0)
|
1951 |
|
|
|
1952 |
|
|
#endif
|
1953 |
|
|
|
1954 |
|
|
/*
|
1955 |
|
|
* Module startup and shutdown
|
1956 |
|
|
*/
|
1957 |
|
|
|
1958 |
|
|
static int __init journal_init_caches(void)
|
1959 |
|
|
{
|
1960 |
|
|
int ret;
|
1961 |
|
|
|
1962 |
|
|
ret = journal_init_revoke_caches();
|
1963 |
|
|
if (ret == 0)
|
1964 |
|
|
ret = journal_init_journal_head_cache();
|
1965 |
|
|
return ret;
|
1966 |
|
|
}
|
1967 |
|
|
|
1968 |
|
|
static void journal_destroy_caches(void)
|
1969 |
|
|
{
|
1970 |
|
|
journal_destroy_revoke_caches();
|
1971 |
|
|
journal_destroy_journal_head_cache();
|
1972 |
|
|
}
|
1973 |
|
|
|
1974 |
|
|
static int __init journal_init(void)
|
1975 |
|
|
{
|
1976 |
|
|
int ret;
|
1977 |
|
|
|
1978 |
|
|
printk(KERN_INFO "Journalled Block Device driver loaded\n");
|
1979 |
|
|
ret = journal_init_caches();
|
1980 |
|
|
if (ret != 0)
|
1981 |
|
|
journal_destroy_caches();
|
1982 |
|
|
create_jbd_proc_entry();
|
1983 |
|
|
return ret;
|
1984 |
|
|
}
|
1985 |
|
|
|
1986 |
|
|
static void __exit journal_exit(void)
|
1987 |
|
|
{
|
1988 |
|
|
#ifdef CONFIG_JBD_DEBUG
|
1989 |
|
|
int n = atomic_read(&nr_journal_heads);
|
1990 |
|
|
if (n)
|
1991 |
|
|
printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
|
1992 |
|
|
#endif
|
1993 |
|
|
remove_jbd_proc_entry();
|
1994 |
|
|
journal_destroy_caches();
|
1995 |
|
|
}
|
1996 |
|
|
|
1997 |
|
|
MODULE_LICENSE("GPL");
|
1998 |
|
|
module_init(journal_init);
|
1999 |
|
|
module_exit(journal_exit);
|
2000 |
|
|
|