OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [fs/] [jfs/] [jfs_logmgr.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 *   Copyright (c) International Business Machines Corp., 2000-2003
3
 *   Portions Copyright (c) Christoph Hellwig, 2001-2002
4
 *
5
 *   This program is free software;  you can redistribute it and/or modify
6
 *   it under the terms of the GNU General Public License as published by
7
 *   the Free Software Foundation; either version 2 of the License, or
8
 *   (at your option) any later version.
9
 *
10
 *   This program is distributed in the hope that it will be useful,
11
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
12
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
13
 *   the GNU General Public License for more details.
14
 *
15
 *   You should have received a copy of the GNU General Public License
16
 *   along with this program;  if not, write to the Free Software
17
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
 */
19
 
20
/*
21
 *      jfs_logmgr.c: log manager
22
 *
23
 * for related information, see transaction manager (jfs_txnmgr.c), and
24
 * recovery manager (jfs_logredo.c).
25
 *
26
 * note: for detail, RTFS.
27
 *
28
 *      log buffer manager:
29
 * special purpose buffer manager supporting log i/o requirements.
30
 * per log serial pageout of logpage
31
 * queuing i/o requests and redrive i/o at iodone
32
 * maintain current logpage buffer
33
 * no caching since append only
34
 * appropriate jfs buffer cache buffers as needed
35
 *
36
 *      group commit:
37
 * transactions which wrote COMMIT records in the same in-memory
38
 * log page during the pageout of previous/current log page(s) are
39
 * committed together by the pageout of the page.
40
 *
41
 *      TBD lazy commit:
42
 * transactions are committed asynchronously when the log page
43
 * containing it COMMIT is paged out when it becomes full;
44
 *
45
 *      serialization:
46
 * . a per log lock serialize log write.
47
 * . a per log lock serialize group commit.
48
 * . a per log lock serialize log open/close;
49
 *
50
 *      TBD log integrity:
51
 * careful-write (ping-pong) of last logpage to recover from crash
52
 * in overwrite.
53
 * detection of split (out-of-order) write of physical sectors
54
 * of last logpage via timestamp at end of each sector
55
 * with its mirror data array at trailer).
56
 *
57
 *      alternatives:
58
 * lsn - 64-bit monotonically increasing integer vs
59
 * 32-bit lspn and page eor.
60
 */
61
 
62
#include <linux/fs.h>
63
#include <linux/locks.h>
64
#include <linux/blkdev.h>
65
#include <linux/interrupt.h>
66
#include <linux/smp_lock.h>
67
#include <linux/completion.h>
68
#include "jfs_incore.h"
69
#include "jfs_filsys.h"
70
#include "jfs_metapage.h"
71
#include "jfs_txnmgr.h"
72
#include "jfs_debug.h"
73
 
74
 
75
/*
76
 * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
77
 */
78
static struct lbuf *log_redrive_list;
79
static spinlock_t log_redrive_lock = SPIN_LOCK_UNLOCKED;
80
DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait);
81
 
82
 
83
/*
84
 *      log read/write serialization (per log)
85
 */
86
#define LOG_LOCK_INIT(log)      init_MUTEX(&(log)->loglock)
87
#define LOG_LOCK(log)           down(&((log)->loglock))
88
#define LOG_UNLOCK(log)         up(&((log)->loglock))
89
 
90
 
91
/*
92
 *      log group commit serialization (per log)
93
 */
94
 
95
#define LOGGC_LOCK_INIT(log)    spin_lock_init(&(log)->gclock)
96
#define LOGGC_LOCK(log)         spin_lock_irq(&(log)->gclock)
97
#define LOGGC_UNLOCK(log)       spin_unlock_irq(&(log)->gclock)
98
#define LOGGC_WAKEUP(tblk)      wake_up_all(&(tblk)->gcwait)
99
 
100
/*
101
 *      log sync serialization (per log)
102
 */
103
#define LOGSYNC_DELTA(logsize)          min((logsize)/8, 128*LOGPSIZE)
104
#define LOGSYNC_BARRIER(logsize)        ((logsize)/4)
105
/*
106
#define LOGSYNC_DELTA(logsize)          min((logsize)/4, 256*LOGPSIZE)
107
#define LOGSYNC_BARRIER(logsize)        ((logsize)/2)
108
*/
109
 
110
 
111
/*
112
 *      log buffer cache synchronization
113
 */
114
static spinlock_t jfsLCacheLock = SPIN_LOCK_UNLOCKED;
115
 
116
#define LCACHE_LOCK(flags)      spin_lock_irqsave(&jfsLCacheLock, flags)
117
#define LCACHE_UNLOCK(flags)    spin_unlock_irqrestore(&jfsLCacheLock, flags)
118
 
119
/*
120
 * See __SLEEP_COND in jfs_locks.h
121
 */
122
#define LCACHE_SLEEP_COND(wq, cond, flags)      \
123
do {                                            \
124
        if (cond)                               \
125
                break;                          \
126
        __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
127
} while (0)
128
 
129
#define LCACHE_WAKEUP(event)    wake_up(event)
130
 
131
 
132
/*
133
 *      lbuf buffer cache (lCache) control
134
 */
135
/* log buffer manager pageout control (cumulative, inclusive) */
136
#define lbmREAD         0x0001
137
#define lbmWRITE        0x0002  /* enqueue at tail of write queue;
138
                                 * init pageout if at head of queue;
139
                                 */
140
#define lbmRELEASE      0x0004  /* remove from write queue
141
                                 * at completion of pageout;
142
                                 * do not free/recycle it yet:
143
                                 * caller will free it;
144
                                 */
145
#define lbmSYNC         0x0008  /* do not return to freelist
146
                                 * when removed from write queue;
147
                                 */
148
#define lbmFREE         0x0010  /* return to freelist
149
                                 * at completion of pageout;
150
                                 * the buffer may be recycled;
151
                                 */
152
#define lbmDONE         0x0020
153
#define lbmERROR        0x0040
154
#define lbmGC           0x0080  /* lbmIODone to perform post-GC processing
155
                                 * of log page
156
                                 */
157
#define lbmDIRECT       0x0100
158
 
159
/*
160
 * external references
161
 */
162
extern void txLazyUnlock(struct tblock * tblk);
163
extern int jfs_stop_threads;
164
extern struct completion jfsIOwait;
165
 
166
/*
167
 * forward references
168
 */
169
static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
170
                         struct lrd * lrd, struct tlock * tlck);
171
 
172
static int lmNextPage(struct jfs_log * log);
173
static int lmLogFileSystem(struct jfs_log * log, char *uuid, int activate);
174
 
175
static int lbmLogInit(struct jfs_log * log);
176
static void lbmLogShutdown(struct jfs_log * log);
177
static struct lbuf *lbmAllocate(struct jfs_log * log, int);
178
static void lbmFree(struct lbuf * bp);
179
static void lbmfree(struct lbuf * bp);
180
static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
181
static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
182
                     int cant_block);
183
static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
184
static int lbmIOWait(struct lbuf * bp, int flag);
185
static void lbmIODone(struct buffer_head *bh, int);
186
static void lbmStartIO(struct lbuf * bp);
187
static void lmGCwrite(struct jfs_log * log, int cant_block);
188
static int lmLogSync(struct jfs_log * log, int nosyncwait);
189
 
190
 
191
/*
192
 *      statistics
193
 */
194
#ifdef CONFIG_JFS_STATISTICS
195
struct lmStat {
196
        uint commit;            /* # of commit */
197
        uint pagedone;          /* # of page written */
198
        uint submitted;         /* # of pages submitted */
199
        uint full_page;         /* # of full pages submitted */
200
        uint partial_page;      /* # of partial pages submitted */
201
} lmStat;
202
#endif
203
 
204
 
205
/*
206
 * NAME:        lmLog()
207
 *
208
 * FUNCTION:    write a log record;
209
 *
210
 * PARAMETER:
211
 *
212
 * RETURN:      lsn - offset to the next log record to write (end-of-log);
213
 *              -1  - error;
214
 *
215
 * note: todo: log error handler
216
 */
217
int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
218
          struct tlock * tlck)
219
{
220
        int lsn;
221
        int diffp, difft;
222
        struct metapage *mp = NULL;
223
 
224
        jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
225
                 log, tblk, lrd, tlck);
226
 
227
        LOG_LOCK(log);
228
 
229
        /* log by (out-of-transaction) JFS ? */
230
        if (tblk == NULL)
231
                goto writeRecord;
232
 
233
        /* log from page ? */
234
        if (tlck == NULL ||
235
            tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
236
                goto writeRecord;
237
 
238
        /*
239
         *      initialize/update page/transaction recovery lsn
240
         */
241
        lsn = log->lsn;
242
 
243
        LOGSYNC_LOCK(log);
244
 
245
        /*
246
         * initialize page lsn if first log write of the page
247
         */
248
        if (mp->lsn == 0) {
249
                mp->log = log;
250
                mp->lsn = lsn;
251
                log->count++;
252
 
253
                /* insert page at tail of logsynclist */
254
                list_add_tail(&mp->synclist, &log->synclist);
255
        }
256
 
257
        /*
258
         *      initialize/update lsn of tblock of the page
259
         *
260
         * transaction inherits oldest lsn of pages associated
261
         * with allocation/deallocation of resources (their
262
         * log records are used to reconstruct allocation map
263
         * at recovery time: inode for inode allocation map,
264
         * B+-tree index of extent descriptors for block
265
         * allocation map);
266
         * allocation map pages inherit transaction lsn at
267
         * commit time to allow forwarding log syncpt past log
268
         * records associated with allocation/deallocation of
269
         * resources only after persistent map of these map pages
270
         * have been updated and propagated to home.
271
         */
272
        /*
273
         * initialize transaction lsn:
274
         */
275
        if (tblk->lsn == 0) {
276
                /* inherit lsn of its first page logged */
277
                tblk->lsn = mp->lsn;
278
                log->count++;
279
 
280
                /* insert tblock after the page on logsynclist */
281
                list_add(&tblk->synclist, &mp->synclist);
282
        }
283
        /*
284
         * update transaction lsn:
285
         */
286
        else {
287
                /* inherit oldest/smallest lsn of page */
288
                logdiff(diffp, mp->lsn, log);
289
                logdiff(difft, tblk->lsn, log);
290
                if (diffp < difft) {
291
                        /* update tblock lsn with page lsn */
292
                        tblk->lsn = mp->lsn;
293
 
294
                        /* move tblock after page on logsynclist */
295
                        list_del(&tblk->synclist);
296
                        list_add(&tblk->synclist, &mp->synclist);
297
                }
298
        }
299
 
300
        LOGSYNC_UNLOCK(log);
301
 
302
        /*
303
         *      write the log record
304
         */
305
      writeRecord:
306
        lsn = lmWriteRecord(log, tblk, lrd, tlck);
307
 
308
        /*
309
         * forward log syncpt if log reached next syncpt trigger
310
         */
311
        logdiff(diffp, lsn, log);
312
        if (diffp >= log->nextsync)
313
                lsn = lmLogSync(log, 0);
314
 
315
        /* update end-of-log lsn */
316
        log->lsn = lsn;
317
 
318
        LOG_UNLOCK(log);
319
 
320
        /* return end-of-log address */
321
        return lsn;
322
}
323
 
324
 
325
/*
326
 * NAME:        lmWriteRecord()
327
 *
328
 * FUNCTION:    move the log record to current log page
329
 *
330
 * PARAMETER:   cd      - commit descriptor
331
 *
332
 * RETURN:      end-of-log address
333
 *
334
 * serialization: LOG_LOCK() held on entry/exit
335
 */
336
static int
337
lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
338
              struct tlock * tlck)
339
{
340
        int lsn = 0;             /* end-of-log address */
341
        struct lbuf *bp;        /* dst log page buffer */
342
        struct logpage *lp;     /* dst log page */
343
        caddr_t dst;            /* destination address in log page */
344
        int dstoffset;          /* end-of-log offset in log page */
345
        int freespace;          /* free space in log page */
346
        caddr_t p;              /* src meta-data page */
347
        caddr_t src;
348
        int srclen;
349
        int nbytes;             /* number of bytes to move */
350
        int i;
351
        int len;
352
        struct linelock *linelock;
353
        struct lv *lv;
354
        struct lvd *lvd;
355
        int l2linesize;
356
 
357
        len = 0;
358
 
359
        /* retrieve destination log page to write */
360
        bp = (struct lbuf *) log->bp;
361
        lp = (struct logpage *) bp->l_ldata;
362
        dstoffset = log->eor;
363
 
364
        /* any log data to write ? */
365
        if (tlck == NULL)
366
                goto moveLrd;
367
 
368
        /*
369
         *      move log record data
370
         */
371
        /* retrieve source meta-data page to log */
372
        if (tlck->flag & tlckPAGELOCK) {
373
                p = (caddr_t) (tlck->mp->data);
374
                linelock = (struct linelock *) & tlck->lock;
375
        }
376
        /* retrieve source in-memory inode to log */
377
        else if (tlck->flag & tlckINODELOCK) {
378
                if (tlck->type & tlckDTREE)
379
                        p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
380
                else
381
                        p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
382
                linelock = (struct linelock *) & tlck->lock;
383
        }
384
#ifdef  _JFS_WIP
385
        else if (tlck->flag & tlckINLINELOCK) {
386
 
387
                inlinelock = (struct inlinelock *) & tlck;
388
                p = (caddr_t) & inlinelock->pxd;
389
                linelock = (struct linelock *) & tlck;
390
        }
391
#endif                          /* _JFS_WIP */
392
        else {
393
                jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
394
                return 0;        /* Probably should trap */
395
        }
396
        l2linesize = linelock->l2linesize;
397
 
398
      moveData:
399
        ASSERT(linelock->index <= linelock->maxcnt);
400
 
401
        lv = linelock->lv;
402
        for (i = 0; i < linelock->index; i++, lv++) {
403
                if (lv->length == 0)
404
                        continue;
405
 
406
                /* is page full ? */
407
                if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
408
                        /* page become full: move on to next page */
409
                        lmNextPage(log);
410
 
411
                        bp = log->bp;
412
                        lp = (struct logpage *) bp->l_ldata;
413
                        dstoffset = LOGPHDRSIZE;
414
                }
415
 
416
                /*
417
                 * move log vector data
418
                 */
419
                src = (u8 *) p + (lv->offset << l2linesize);
420
                srclen = lv->length << l2linesize;
421
                len += srclen;
422
                while (srclen > 0) {
423
                        freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
424
                        nbytes = min(freespace, srclen);
425
                        dst = (caddr_t) lp + dstoffset;
426
                        memcpy(dst, src, nbytes);
427
                        dstoffset += nbytes;
428
 
429
                        /* is page not full ? */
430
                        if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
431
                                break;
432
 
433
                        /* page become full: move on to next page */
434
                        lmNextPage(log);
435
 
436
                        bp = (struct lbuf *) log->bp;
437
                        lp = (struct logpage *) bp->l_ldata;
438
                        dstoffset = LOGPHDRSIZE;
439
 
440
                        srclen -= nbytes;
441
                        src += nbytes;
442
                }
443
 
444
                /*
445
                 * move log vector descriptor
446
                 */
447
                len += 4;
448
                lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
449
                lvd->offset = cpu_to_le16(lv->offset);
450
                lvd->length = cpu_to_le16(lv->length);
451
                dstoffset += 4;
452
                jfs_info("lmWriteRecord: lv offset:%d length:%d",
453
                         lv->offset, lv->length);
454
        }
455
 
456
        if ((i = linelock->next)) {
457
                linelock = (struct linelock *) lid_to_tlock(i);
458
                goto moveData;
459
        }
460
 
461
        /*
462
         *      move log record descriptor
463
         */
464
      moveLrd:
465
        lrd->length = cpu_to_le16(len);
466
 
467
        src = (caddr_t) lrd;
468
        srclen = LOGRDSIZE;
469
 
470
        while (srclen > 0) {
471
                freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
472
                nbytes = min(freespace, srclen);
473
                dst = (caddr_t) lp + dstoffset;
474
                memcpy(dst, src, nbytes);
475
 
476
                dstoffset += nbytes;
477
                srclen -= nbytes;
478
 
479
                /* are there more to move than freespace of page ? */
480
                if (srclen)
481
                        goto pageFull;
482
 
483
                /*
484
                 * end of log record descriptor
485
                 */
486
 
487
                /* update last log record eor */
488
                log->eor = dstoffset;
489
                bp->l_eor = dstoffset;
490
                lsn = (log->page << L2LOGPSIZE) + dstoffset;
491
 
492
                if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
493
                        tblk->clsn = lsn;
494
                        jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
495
                                 bp->l_eor);
496
 
497
                        INCREMENT(lmStat.commit);       /* # of commit */
498
 
499
                        /*
500
                         * enqueue tblock for group commit:
501
                         *
502
                         * enqueue tblock of non-trivial/synchronous COMMIT
503
                         * at tail of group commit queue
504
                         * (trivial/asynchronous COMMITs are ignored by
505
                         * group commit.)
506
                         */
507
                        LOGGC_LOCK(log);
508
 
509
                        /* init tblock gc state */
510
                        tblk->flag = tblkGC_QUEUE;
511
                        tblk->bp = log->bp;
512
                        tblk->pn = log->page;
513
                        tblk->eor = log->eor;
514
 
515
                        /* enqueue transaction to commit queue */
516
                        tblk->cqnext = NULL;
517
                        if (log->cqueue.head) {
518
                                log->cqueue.tail->cqnext = tblk;
519
                                log->cqueue.tail = tblk;
520
                        } else
521
                                log->cqueue.head = log->cqueue.tail = tblk;
522
 
523
                        LOGGC_UNLOCK(log);
524
                }
525
 
526
                jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
527
                        le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
528
 
529
                /* page not full ? */
530
                if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
531
                        return lsn;
532
 
533
              pageFull:
534
                /* page become full: move on to next page */
535
                lmNextPage(log);
536
 
537
                bp = (struct lbuf *) log->bp;
538
                lp = (struct logpage *) bp->l_ldata;
539
                dstoffset = LOGPHDRSIZE;
540
                src += nbytes;
541
        }
542
 
543
        return lsn;
544
}
545
 
546
 
547
/*
548
 * NAME:        lmNextPage()
549
 *
550
 * FUNCTION:    write current page and allocate next page.
551
 *
552
 * PARAMETER:   log
553
 *
554
 * RETURN:      0
555
 *
556
 * serialization: LOG_LOCK() held on entry/exit
557
 */
558
static int lmNextPage(struct jfs_log * log)
559
{
560
        struct logpage *lp;
561
        int lspn;               /* log sequence page number */
562
        int pn;                 /* current page number */
563
        struct lbuf *bp;
564
        struct lbuf *nextbp;
565
        struct tblock *tblk;
566
 
567
        /* get current log page number and log sequence page number */
568
        pn = log->page;
569
        bp = log->bp;
570
        lp = (struct logpage *) bp->l_ldata;
571
        lspn = le32_to_cpu(lp->h.page);
572
 
573
        LOGGC_LOCK(log);
574
 
575
        /*
576
         *      write or queue the full page at the tail of write queue
577
         */
578
        /* get the tail tblk on commit queue */
579
        tblk = log->cqueue.tail;
580
 
581
        /* every tblk who has COMMIT record on the current page,
582
         * and has not been committed, must be on commit queue
583
         * since tblk is queued at commit queueu at the time
584
         * of writing its COMMIT record on the page before
585
         * page becomes full (even though the tblk thread
586
         * who wrote COMMIT record may have been suspended
587
         * currently);
588
         */
589
 
590
        /* is page bound with outstanding tail tblk ? */
591
        if (tblk && tblk->pn == pn) {
592
                /* mark tblk for end-of-page */
593
                tblk->flag |= tblkGC_EOP;
594
 
595
                if (log->cflag & logGC_PAGEOUT) {
596
                        /* if page is not already on write queue,
597
                         * just enqueue (no lbmWRITE to prevent redrive)
598
                         * buffer to wqueue to ensure correct serial order
599
                         * of the pages since log pages will be added
600
                         * continuously
601
                         */
602
                        if (bp->l_wqnext == NULL)
603
                                lbmWrite(log, bp, 0, 0);
604
                } else {
605
                        /*
606
                         * No current GC leader, initiate group commit
607
                         */
608
                        log->cflag |= logGC_PAGEOUT;
609
                        lmGCwrite(log, 0);
610
                }
611
        }
612
        /* page is not bound with outstanding tblk:
613
         * init write or mark it to be redriven (lbmWRITE)
614
         */
615
        else {
616
                /* finalize the page */
617
                bp->l_ceor = bp->l_eor;
618
                lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
619
                lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
620
        }
621
        LOGGC_UNLOCK(log);
622
 
623
        /*
624
         *      allocate/initialize next page
625
         */
626
        /* if log wraps, the first data page of log is 2
627
         * (0 never used, 1 is superblock).
628
         */
629
        log->page = (pn == log->size - 1) ? 2 : pn + 1;
630
        log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */
631
 
632
        /* allocate/initialize next log page buffer */
633
        nextbp = lbmAllocate(log, log->page);
634
        nextbp->l_eor = log->eor;
635
        log->bp = nextbp;
636
 
637
        /* initialize next log page */
638
        lp = (struct logpage *) nextbp->l_ldata;
639
        lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
640
        lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
641
 
642
        return 0;
643
}
644
 
645
 
646
/*
647
 * NAME:        lmGroupCommit()
648
 *
649
 * FUNCTION:    group commit
650
 *      initiate pageout of the pages with COMMIT in the order of
651
 *      page number - redrive pageout of the page at the head of
652
 *      pageout queue until full page has been written.
653
 *
654
 * RETURN:
655
 *
656
 * NOTE:
657
 *      LOGGC_LOCK serializes log group commit queue, and
658
 *      transaction blocks on the commit queue.
659
 *      N.B. LOG_LOCK is NOT held during lmGroupCommit().
660
 */
661
int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
662
{
663
        int rc = 0;
664
 
665
        LOGGC_LOCK(log);
666
 
667
        /* group committed already ? */
668
        if (tblk->flag & tblkGC_COMMITTED) {
669
                if (tblk->flag & tblkGC_ERROR)
670
                        rc = -EIO;
671
 
672
                LOGGC_UNLOCK(log);
673
                return rc;
674
        }
675
        jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
676
 
677
        if (tblk->xflag & COMMIT_LAZY)
678
                tblk->flag |= tblkGC_LAZY;
679
 
680
        if ((!(log->cflag & logGC_PAGEOUT)) && log->cqueue.head &&
681
            (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag))) {
682
                /*
683
                 * No pageout in progress
684
                 *
685
                 * start group commit as its group leader.
686
                 */
687
                log->cflag |= logGC_PAGEOUT;
688
 
689
                lmGCwrite(log, 0);
690
        }
691
 
692
        if (tblk->xflag & COMMIT_LAZY) {
693
                /*
694
                 * Lazy transactions can leave now
695
                 */
696
                LOGGC_UNLOCK(log);
697
                return 0;
698
        }
699
 
700
        /* lmGCwrite gives up LOGGC_LOCK, check again */
701
 
702
        if (tblk->flag & tblkGC_COMMITTED) {
703
                if (tblk->flag & tblkGC_ERROR)
704
                        rc = -EIO;
705
 
706
                LOGGC_UNLOCK(log);
707
                return rc;
708
        }
709
 
710
        /* upcount transaction waiting for completion
711
         */
712
        log->gcrtc++;
713
        tblk->flag |= tblkGC_READY;
714
 
715
        __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
716
                     LOGGC_LOCK(log), LOGGC_UNLOCK(log));
717
 
718
        /* removed from commit queue */
719
        if (tblk->flag & tblkGC_ERROR)
720
                rc = -EIO;
721
 
722
        LOGGC_UNLOCK(log);
723
        return rc;
724
}
725
 
726
/*
727
 * NAME:        lmGCwrite()
728
 *
729
 * FUNCTION:    group commit write
730
 *      initiate write of log page, building a group of all transactions
731
 *      with commit records on that page.
732
 *
733
 * RETURN:      None
734
 *
735
 * NOTE:
736
 *      LOGGC_LOCK must be held by caller.
737
 *      N.B. LOG_LOCK is NOT held during lmGroupCommit().
738
 */
739
static void lmGCwrite(struct jfs_log * log, int cant_write)
740
{
741
        struct lbuf *bp;
742
        struct logpage *lp;
743
        int gcpn;               /* group commit page number */
744
        struct tblock *tblk;
745
        struct tblock *xtblk;
746
 
747
        /*
748
         * build the commit group of a log page
749
         *
750
         * scan commit queue and make a commit group of all
751
         * transactions with COMMIT records on the same log page.
752
         */
753
        /* get the head tblk on the commit queue */
754
        tblk = xtblk = log->cqueue.head;
755
        gcpn = tblk->pn;
756
 
757
        while (tblk && tblk->pn == gcpn) {
758
                xtblk = tblk;
759
 
760
                /* state transition: (QUEUE, READY) -> COMMIT */
761
                tblk->flag |= tblkGC_COMMIT;
762
                tblk = tblk->cqnext;
763
        }
764
        tblk = xtblk;           /* last tblk of the page */
765
 
766
        /*
767
         * pageout to commit transactions on the log page.
768
         */
769
        bp = (struct lbuf *) tblk->bp;
770
        lp = (struct logpage *) bp->l_ldata;
771
        /* is page already full ? */
772
        if (tblk->flag & tblkGC_EOP) {
773
                /* mark page to free at end of group commit of the page */
774
                tblk->flag &= ~tblkGC_EOP;
775
                tblk->flag |= tblkGC_FREE;
776
                bp->l_ceor = bp->l_eor;
777
                lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
778
                lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
779
                         cant_write);
780
                INCREMENT(lmStat.full_page);
781
        }
782
        /* page is not yet full */
783
        else {
784
                bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */
785
                lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
786
                lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
787
                INCREMENT(lmStat.partial_page);
788
        }
789
}
790
 
791
/*
792
 * NAME:        lmPostGC()
793
 *
794
 * FUNCTION:    group commit post-processing
795
 *      Processes transactions after their commit records have been written
796
 *      to disk, redriving log I/O if necessary.
797
 *
798
 * RETURN:      None
799
 *
800
 * NOTE:
801
 *      This routine is called a interrupt time by lbmIODone
802
 */
803
static void lmPostGC(struct lbuf * bp)
804
{
805
        unsigned long flags;
806
        struct jfs_log *log = bp->l_log;
807
        struct logpage *lp;
808
        struct tblock *tblk;
809
 
810
        //LOGGC_LOCK(log);
811
        spin_lock_irqsave(&log->gclock, flags);
812
        /*
813
         * current pageout of group commit completed.
814
         *
815
         * remove/wakeup transactions from commit queue who were
816
         * group committed with the current log page
817
         */
818
        while ((tblk = log->cqueue.head) && (tblk->flag & tblkGC_COMMIT)) {
819
                /* if transaction was marked GC_COMMIT then
820
                 * it has been shipped in the current pageout
821
                 * and made it to disk - it is committed.
822
                 */
823
 
824
                if (bp->l_flag & lbmERROR)
825
                        tblk->flag |= tblkGC_ERROR;
826
 
827
                /* remove it from the commit queue */
828
                log->cqueue.head = tblk->cqnext;
829
                if (log->cqueue.head == NULL)
830
                        log->cqueue.tail = NULL;
831
                tblk->flag &= ~tblkGC_QUEUE;
832
                tblk->cqnext = 0;
833
 
834
                if (tblk == log->flush_tblk) {
835
                        /* we can stop flushing the log now */
836
                        clear_bit(log_FLUSH, &log->flag);
837
                        log->flush_tblk = NULL;
838
                }
839
 
840
                jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
841
                         tblk->flag);
842
 
843
                if (!(tblk->xflag & COMMIT_FORCE))
844
                        /*
845
                         * Hand tblk over to lazy commit thread
846
                         */
847
                        txLazyUnlock(tblk);
848
                else {
849
                        /* state transition: COMMIT -> COMMITTED */
850
                        tblk->flag |= tblkGC_COMMITTED;
851
 
852
                        if (tblk->flag & tblkGC_READY)
853
                                log->gcrtc--;
854
 
855
                        LOGGC_WAKEUP(tblk);
856
                }
857
 
858
                /* was page full before pageout ?
859
                 * (and this is the last tblk bound with the page)
860
                 */
861
                if (tblk->flag & tblkGC_FREE)
862
                        lbmFree(bp);
863
                /* did page become full after pageout ?
864
                 * (and this is the last tblk bound with the page)
865
                 */
866
                else if (tblk->flag & tblkGC_EOP) {
867
                        /* finalize the page */
868
                        lp = (struct logpage *) bp->l_ldata;
869
                        bp->l_ceor = bp->l_eor;
870
                        lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
871
                        jfs_info("lmPostGC: calling lbmWrite");
872
                        lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
873
                                 1);
874
                }
875
 
876
        }
877
 
878
        /* are there any transactions who have entered lnGroupCommit()
879
         * (whose COMMITs are after that of the last log page written.
880
         * They are waiting for new group commit (above at (SLEEP 1))
881
         * or lazy transactions are on a full (queued) log page,
882
         * select the latest ready transaction as new group leader and
883
         * wake her up to lead her group.
884
         */
885
        if ((tblk = log->cqueue.head) &&
886
            ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
887
             test_bit(log_FLUSH, &log->flag)))
888
                /*
889
                 * Call lmGCwrite with new group leader
890
                 */
891
                lmGCwrite(log, 1);
892
 
893
        /* no transaction are ready yet (transactions are only just
894
         * queued (GC_QUEUE) and not entered for group commit yet).
895
         * the first transaction entering group commit
896
         * will elect herself as new group leader.
897
         */
898
        else
899
                log->cflag &= ~logGC_PAGEOUT;
900
 
901
        //LOGGC_UNLOCK(log);
902
        spin_unlock_irqrestore(&log->gclock, flags);
903
        return;
904
}
905
 
906
/*
907
 * NAME:        lmLogSync()
908
 *
909
 * FUNCTION:    write log SYNCPT record for specified log
910
 *      if new sync address is available
911
 *      (normally the case if sync() is executed by back-ground
912
 *      process).
913
 *      if not, explicitly run jfs_blogsync() to initiate
914
 *      getting of new sync address.
915
 *      calculate new value of i_nextsync which determines when
916
 *      this code is called again.
917
 *
918
 *      this is called only from lmLog().
919
 *
920
 * PARAMETER:   ip      - pointer to logs inode.
921
 *
922
 * RETURN:      0
923
 *
924
 * serialization: LOG_LOCK() held on entry/exit
925
 */
926
static int lmLogSync(struct jfs_log * log, int nosyncwait)
927
{
928
        int logsize;
929
        int written;            /* written since last syncpt */
930
        int free;               /* free space left available */
931
        int delta;              /* additional delta to write normally */
932
        int more;               /* additional write granted */
933
        struct lrd lrd;
934
        int lsn;
935
        struct logsyncblk *lp;
936
 
937
        /*
938
         *      forward syncpt
939
         */
940
        /* if last sync is same as last syncpt,
941
         * invoke sync point forward processing to update sync.
942
         */
943
 
944
        if (log->sync == log->syncpt) {
945
                LOGSYNC_LOCK(log);
946
                /* ToDo: push dirty metapages out to disk */
947
//              bmLogSync(log);
948
 
949
                if (list_empty(&log->synclist))
950
                        log->sync = log->lsn;
951
                else {
952
                        lp = list_entry(log->synclist.next,
953
                                        struct logsyncblk, synclist);
954
                        log->sync = lp->lsn;
955
                }
956
                LOGSYNC_UNLOCK(log);
957
 
958
        }
959
 
960
        /* if sync is different from last syncpt,
961
         * write a SYNCPT record with syncpt = sync.
962
         * reset syncpt = sync
963
         */
964
        if (log->sync != log->syncpt) {
965
                struct super_block *sb = log->sb;
966
                struct jfs_sb_info *sbi = JFS_SBI(sb);
967
 
968
                /*
969
                 * We need to make sure all of the "written" metapages
970
                 * actually make it to disk
971
                 */
972
                fsync_inode_data_buffers(sbi->ipbmap);
973
                fsync_inode_data_buffers(sbi->ipimap);
974
                fsync_inode_data_buffers(sb->s_bdev->bd_inode);
975
 
976
                lrd.logtid = 0;
977
                lrd.backchain = 0;
978
                lrd.type = cpu_to_le16(LOG_SYNCPT);
979
                lrd.length = 0;
980
                lrd.log.syncpt.sync = cpu_to_le32(log->sync);
981
                lsn = lmWriteRecord(log, NULL, &lrd, NULL);
982
 
983
                log->syncpt = log->sync;
984
        } else
985
                lsn = log->lsn;
986
 
987
        /*
988
         *      setup next syncpt trigger (SWAG)
989
         */
990
        logsize = log->logsize;
991
 
992
        logdiff(written, lsn, log);
993
        free = logsize - written;
994
        delta = LOGSYNC_DELTA(logsize);
995
        more = min(free / 2, delta);
996
        if (more < 2 * LOGPSIZE) {
997
                jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
998
                /*
999
                 *      log wrapping
1000
                 *
1001
                 * option 1 - panic ? No.!
1002
                 * option 2 - shutdown file systems
1003
                 *            associated with log ?
1004
                 * option 3 - extend log ?
1005
                 */
1006
                /*
1007
                 * option 4 - second chance
1008
                 *
1009
                 * mark log wrapped, and continue.
1010
                 * when all active transactions are completed,
1011
                 * mark log vaild for recovery.
1012
                 * if crashed during invalid state, log state
1013
                 * implies invald log, forcing fsck().
1014
                 */
1015
                /* mark log state log wrap in log superblock */
1016
                /* log->state = LOGWRAP; */
1017
 
1018
                /* reset sync point computation */
1019
                log->syncpt = log->sync = lsn;
1020
                log->nextsync = delta;
1021
        } else
1022
                /* next syncpt trigger = written + more */
1023
                log->nextsync = written + more;
1024
 
1025
        /* return if lmLogSync() from outside of transaction, e.g., sync() */
1026
        if (nosyncwait)
1027
                return lsn;
1028
 
1029
        /* if number of bytes written from last sync point is more
1030
         * than 1/4 of the log size, stop new transactions from
1031
         * starting until all current transactions are completed
1032
         * by setting syncbarrier flag.
1033
         */
1034
        if (written > LOGSYNC_BARRIER(logsize) && logsize > 32 * LOGPSIZE) {
1035
                set_bit(log_SYNCBARRIER, &log->flag);
1036
                jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1037
                         log->syncpt);
1038
                /*
1039
                 * We may have to initiate group commit
1040
                 */
1041
                jfs_flush_journal(log, 0);
1042
        }
1043
 
1044
        return lsn;
1045
}
1046
 
1047
 
1048
/*
1049
 * NAME:        lmLogOpen()
1050
 *
1051
 * FUNCTION:    open the log on first open;
1052
 *      insert filesystem in the active list of the log.
1053
 *
1054
 * PARAMETER:   ipmnt   - file system mount inode
1055
 *              iplog   - log inode (out)
1056
 *
1057
 * RETURN:
1058
 *
1059
 * serialization:
1060
 */
1061
int lmLogOpen(struct super_block *sb, struct jfs_log ** logptr)
1062
{
1063
        int rc;
1064
        struct block_device *bdev;
1065
        struct jfs_log *log;
1066
 
1067
        if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1068
                return -ENOMEM;
1069
        memset(log, 0, sizeof(struct jfs_log));
1070
        init_waitqueue_head(&log->syncwait);
1071
 
1072
        log->sb = sb;           /* This should be a list */
1073
 
1074
        if (!(JFS_SBI(sb)->mntflag & JFS_INLINELOG))
1075
                goto externalLog;
1076
 
1077
        /*
1078
         *      in-line log in host file system
1079
         *
1080
         * file system to log have 1-to-1 relationship;
1081
         */
1082
 
1083
        set_bit(log_INLINELOG, &log->flag);
1084
        log->bdev = sb->s_bdev;
1085
        log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1086
        log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1087
            (L2LOGPSIZE - sb->s_blocksize_bits);
1088
        log->l2bsize = sb->s_blocksize_bits;
1089
        ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1090
 
1091
        /*
1092
         * initialize log.
1093
         */
1094
        if ((rc = lmLogInit(log)))
1095
                goto free;
1096
        goto out;
1097
 
1098
        /*
1099
         *      external log as separate logical volume
1100
         *
1101
         * file systems to log may have n-to-1 relationship;
1102
         */
1103
      externalLog:
1104
 
1105
        /*
1106
         * TODO: Check for already opened log devices
1107
         */
1108
 
1109
        if (!(bdev = bdget(kdev_t_to_nr(JFS_SBI(sb)->logdev)))) {
1110
                rc = -ENODEV;
1111
                goto free;
1112
        }
1113
 
1114
        if ((rc = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_FS))) {
1115
                goto free;
1116
        }
1117
 
1118
        log->bdev = bdev;
1119
        memcpy(log->uuid, JFS_SBI(sb)->loguuid, sizeof(log->uuid));
1120
 
1121
        /*
1122
         * initialize log:
1123
         */
1124
        if ((rc = lmLogInit(log)))
1125
                goto close;
1126
 
1127
        /*
1128
         * add file system to log active file system list
1129
         */
1130
        if ((rc = lmLogFileSystem(log, JFS_SBI(sb)->uuid, 1)))
1131
                goto shutdown;
1132
 
1133
      out:
1134
        *logptr = log;
1135
        return 0;
1136
 
1137
        /*
1138
         *      unwind on error
1139
         */
1140
      shutdown:         /* unwind lbmLogInit() */
1141
        lbmLogShutdown(log);
1142
 
1143
      close:            /* close external log device */
1144
        blkdev_put(bdev, BDEV_FS);
1145
 
1146
      free:             /* free log descriptor */
1147
        kfree(log);
1148
 
1149
        jfs_warn("lmLogOpen: exit(%d)", rc);
1150
        return rc;
1151
}
1152
 
1153
 
1154
/*
1155
 * NAME:        lmLogInit()
1156
 *
1157
 * FUNCTION:    log initialization at first log open.
1158
 *
1159
 *      logredo() (or logformat()) should have been run previously.
1160
 *      initialize the log inode from log superblock.
1161
 *      set the log state in the superblock to LOGMOUNT and
1162
 *      write SYNCPT log record.
1163
 *
1164
 * PARAMETER:   log     - log structure
1165
 *
1166
 * RETURN:      0        - if ok
1167
 *              -EINVAL - bad log magic number or superblock dirty
1168
 *              error returned from logwait()
1169
 *
1170
 * serialization: single first open thread
1171
 */
1172
int lmLogInit(struct jfs_log * log)
1173
{
1174
        int rc = 0;
1175
        struct lrd lrd;
1176
        struct logsuper *logsuper;
1177
        struct lbuf *bpsuper;
1178
        struct lbuf *bp;
1179
        struct logpage *lp;
1180
        int lsn;
1181
 
1182
        jfs_info("lmLogInit: log:0x%p", log);
1183
 
1184
        /*
1185
         * log inode is overlaid on generic inode where
1186
         * dinode have been zeroed out by iRead();
1187
         */
1188
 
1189
        /*
1190
         * initialize log i/o
1191
         */
1192
        if ((rc = lbmLogInit(log)))
1193
                return rc;
1194
 
1195
        /*
1196
         * validate log superblock
1197
         */
1198
 
1199
 
1200
        if (!test_bit(log_INLINELOG, &log->flag))
1201
                log->l2bsize = 12;      /* XXX kludge alert XXX */
1202
        if ((rc = lbmRead(log, 1, &bpsuper)))
1203
                goto errout10;
1204
 
1205
        logsuper = (struct logsuper *) bpsuper->l_ldata;
1206
 
1207
        if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1208
                jfs_warn("*** Log Format Error ! ***");
1209
                rc = -EINVAL;
1210
                goto errout20;
1211
        }
1212
 
1213
        /* logredo() should have been run successfully. */
1214
        if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1215
                jfs_warn("*** Log Is Dirty ! ***");
1216
                rc = -EINVAL;
1217
                goto errout20;
1218
        }
1219
 
1220
        /* initialize log inode from log superblock */
1221
        if (test_bit(log_INLINELOG,&log->flag)) {
1222
                if (log->size != le32_to_cpu(logsuper->size)) {
1223
                        rc = -EINVAL;
1224
                        goto errout20;
1225
                }
1226
                jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
1227
                        log, (unsigned long long) log->base, log->size);
1228
        } else {
1229
                if (memcmp(logsuper->uuid, log->uuid, 16)) {
1230
                        jfs_warn("wrong uuid on JFS log device");
1231
                        goto errout20;
1232
                }
1233
                log->size = le32_to_cpu(logsuper->size);
1234
                log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1235
                jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
1236
                        log, (unsigned long long) log->base, log->size);
1237
        }
1238
 
1239
        log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1240
        log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1241
 
1242
        /* check for disabled journaling to disk */
1243
        if (JFS_SBI(log->sb)->flag & JFS_NOINTEGRITY) {
1244
                log->no_integrity = 1;
1245
                log->ni_page = log->page;
1246
                log->ni_eor = log->eor;
1247
        }
1248
        else
1249
                log->no_integrity = 0;
1250
 
1251
        /*
1252
         * initialize for log append write mode
1253
         */
1254
        /* establish current/end-of-log page/buffer */
1255
        if ((rc = lbmRead(log, log->page, &bp)))
1256
                goto errout20;
1257
 
1258
        lp = (struct logpage *) bp->l_ldata;
1259
 
1260
        jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1261
                 le32_to_cpu(logsuper->end), log->page, log->eor,
1262
                 le16_to_cpu(lp->h.eor));
1263
 
1264
//      ASSERT(log->eor == lp->h.eor);
1265
 
1266
        log->bp = bp;
1267
        bp->l_pn = log->page;
1268
        bp->l_eor = log->eor;
1269
 
1270
        /* initialize the group commit serialization lock */
1271
        LOGGC_LOCK_INIT(log);
1272
 
1273
        /* if current page is full, move on to next page */
1274
        if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1275
                lmNextPage(log);
1276
 
1277
        /* allocate/initialize the log write serialization lock */
1278
        LOG_LOCK_INIT(log);
1279
 
1280
        /*
1281
         * initialize log syncpoint
1282
         */
1283
        /*
1284
         * write the first SYNCPT record with syncpoint = 0
1285
         * (i.e., log redo up to HERE !);
1286
         * remove current page from lbm write queue at end of pageout
1287
         * (to write log superblock update), but do not release to freelist;
1288
         */
1289
        lrd.logtid = 0;
1290
        lrd.backchain = 0;
1291
        lrd.type = cpu_to_le16(LOG_SYNCPT);
1292
        lrd.length = 0;
1293
        lrd.log.syncpt.sync = 0;
1294
        lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1295
        bp = log->bp;
1296
        bp->l_ceor = bp->l_eor;
1297
        lp = (struct logpage *) bp->l_ldata;
1298
        lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1299
        lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1300
        if ((rc = lbmIOWait(bp, 0)))
1301
                goto errout30;
1302
 
1303
        /* initialize logsync parameters */
1304
        log->logsize = (log->size - 2) << L2LOGPSIZE;
1305
        log->lsn = lsn;
1306
        log->syncpt = lsn;
1307
        log->sync = log->syncpt;
1308
        log->nextsync = LOGSYNC_DELTA(log->logsize);
1309
 
1310
        jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1311
                 log->lsn, log->syncpt, log->sync);
1312
 
1313
        LOGSYNC_LOCK_INIT(log);
1314
 
1315
        INIT_LIST_HEAD(&log->synclist);
1316
 
1317
        log->cqueue.head = log->cqueue.tail = NULL;
1318
        log->flush_tblk = NULL;
1319
 
1320
        log->count = 0;
1321
 
1322
        /*
1323
         * initialize for lazy/group commit
1324
         */
1325
        log->clsn = lsn;
1326
 
1327
        /*
1328
         * update/write superblock
1329
         */
1330
        logsuper->state = cpu_to_le32(LOGMOUNT);
1331
        log->serial = le32_to_cpu(logsuper->serial) + 1;
1332
        logsuper->serial = cpu_to_le32(log->serial);
1333
        lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1334
        if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1335
                goto errout30;
1336
 
1337
        return 0;
1338
 
1339
        /*
1340
         *      unwind on error
1341
         */
1342
      errout30:         /* release log page */
1343
        lbmFree(bp);
1344
 
1345
      errout20:         /* release log superblock */
1346
        lbmFree(bpsuper);
1347
 
1348
      errout10:         /* unwind lbmLogInit() */
1349
        lbmLogShutdown(log);
1350
 
1351
        jfs_warn("lmLogInit: exit(%d)", rc);
1352
        return rc;
1353
}
1354
 
1355
 
1356
/*
1357
 * NAME:        lmLogClose()
1358
 *
1359
 * FUNCTION:    remove file system <ipmnt> from active list of log <iplog>
1360
 *              and close it on last close.
1361
 *
1362
 * PARAMETER:   sb      - superblock
1363
 *              log     - log inode
1364
 *
1365
 * RETURN:      errors from subroutines
1366
 *
1367
 * serialization:
1368
 */
1369
int lmLogClose(struct super_block *sb, struct jfs_log * log)
1370
{
1371
        int rc;
1372
 
1373
        jfs_info("lmLogClose: log:0x%p", log);
1374
 
1375
        if (!test_bit(log_INLINELOG, &log->flag))
1376
                goto externalLog;
1377
 
1378
        /*
1379
         *      in-line log in host file system
1380
         */
1381
        rc = lmLogShutdown(log);
1382
        goto out;
1383
 
1384
        /*
1385
         *      external log as separate logical volume
1386
         */
1387
      externalLog:
1388
        lmLogFileSystem(log, JFS_SBI(sb)->uuid, 0);
1389
        rc = lmLogShutdown(log);
1390
        blkdev_put(log->bdev, BDEV_FS);
1391
 
1392
      out:
1393
        jfs_info("lmLogClose: exit(%d)", rc);
1394
        return rc;
1395
}
1396
 
1397
 
1398
/*
1399
 * NAME:        jfs_flush_journal()
1400
 *
1401
 * FUNCTION:    initiate write of any outstanding transactions to the journal
1402
 *              and optionally wait until they are all written to disk
1403
 *
1404
 *              wait == 0  flush until latest txn is committed, don't wait
1405
 *              wait == 1  flush until latest txn is committed, wait
1406
 *              wait > 1   flush until all txn's are complete, wait
1407
 */
1408
void jfs_flush_journal(struct jfs_log *log, int wait)
1409
{
1410
        int i;
1411
        struct tblock *target;
1412
 
1413
        if (!log)
1414
                /* jfs_write_inode may call us during read-only mount */
1415
                return;
1416
 
1417
        jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1418
 
1419
        LOGGC_LOCK(log);
1420
 
1421
        target = log->cqueue.head;
1422
 
1423
        if (target) {
1424
                /*
1425
                 * This ensures that we will keep writing to the journal as long
1426
                 * as there are unwritten commit records
1427
                 */
1428
 
1429
                if (test_bit(log_FLUSH, &log->flag)) {
1430
                        /*
1431
                         * We're already flushing.
1432
                         * if flush_tblk is NULL, we are flushing everything,
1433
                         * so leave it that way.  Otherwise, update it to the
1434
                         * latest transaction
1435
                         */
1436
                        if (log->flush_tblk)
1437
                                log->flush_tblk = target;
1438
                } else {
1439
                        /* Only flush until latest transaction is committed */
1440
                        log->flush_tblk = target;
1441
                        set_bit(log_FLUSH, &log->flag);
1442
 
1443
                        /*
1444
                         * Initiate I/O on outstanding transactions
1445
                         */
1446
                        if (!(log->cflag & logGC_PAGEOUT)) {
1447
                                log->cflag |= logGC_PAGEOUT;
1448
                                lmGCwrite(log, 0);
1449
                        }
1450
                }
1451
        }
1452
        if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1453
                /* Flush until all activity complete */
1454
                set_bit(log_FLUSH, &log->flag);
1455
                log->flush_tblk = NULL;
1456
        }
1457
 
1458
        if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1459
                DECLARE_WAITQUEUE(__wait, current);
1460
 
1461
                add_wait_queue(&target->gcwait, &__wait);
1462
                set_current_state(TASK_UNINTERRUPTIBLE);
1463
                LOGGC_UNLOCK(log);
1464
                schedule();
1465
                current->state = TASK_RUNNING;
1466
                LOGGC_LOCK(log);
1467
                remove_wait_queue(&target->gcwait, &__wait);
1468
        }
1469
        LOGGC_UNLOCK(log);
1470
 
1471
        if (wait < 2)
1472
                return;
1473
 
1474
        /*
1475
         * If there was recent activity, we may need to wait
1476
         * for the lazycommit thread to catch up
1477
         */
1478
        if (log->cqueue.head || !list_empty(&log->synclist)) {
1479
                for (i = 0; i < 800; i++) {      /* Too much? */
1480
                        current->state = TASK_INTERRUPTIBLE;
1481
                        schedule_timeout(HZ / 4);
1482
                        if ((log->cqueue.head == NULL) &&
1483
                            list_empty(&log->synclist))
1484
                                break;
1485
                }
1486
        }
1487
        assert(log->cqueue.head == NULL);
1488
        assert(list_empty(&log->synclist));
1489
        clear_bit(log_FLUSH, &log->flag);
1490
}
1491
 
1492
/*
1493
 * NAME:        lmLogShutdown()
1494
 *
1495
 * FUNCTION:    log shutdown at last LogClose().
1496
 *
1497
 *              write log syncpt record.
1498
 *              update super block to set redone flag to 0.
1499
 *
1500
 * PARAMETER:   log     - log inode
1501
 *
1502
 * RETURN:      0        - success
1503
 *
1504
 * serialization: single last close thread
1505
 */
1506
int lmLogShutdown(struct jfs_log * log)
1507
{
1508
        int rc;
1509
        struct lrd lrd;
1510
        int lsn;
1511
        struct logsuper *logsuper;
1512
        struct lbuf *bpsuper;
1513
        struct lbuf *bp;
1514
        struct logpage *lp;
1515
 
1516
        jfs_info("lmLogShutdown: log:0x%p", log);
1517
 
1518
        jfs_flush_journal(log, 2);
1519
 
1520
        /*
1521
         * We need to make sure all of the "written" metapages
1522
         * actually make it to disk
1523
         */
1524
        fsync_no_super(log->sb->s_dev);
1525
 
1526
        /*
1527
         * write the last SYNCPT record with syncpoint = 0
1528
         * (i.e., log redo up to HERE !)
1529
         */
1530
        lrd.logtid = 0;
1531
        lrd.backchain = 0;
1532
        lrd.type = cpu_to_le16(LOG_SYNCPT);
1533
        lrd.length = 0;
1534
        lrd.log.syncpt.sync = 0;
1535
 
1536
        /* check for disabled journaling to disk */
1537
        if (JFS_SBI(log->sb)->flag & JFS_NOINTEGRITY) {
1538
                log->no_integrity = 0;
1539
                log->page = log->ni_page;
1540
                log->eor = log->ni_eor;
1541
        }
1542
 
1543
        lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1544
        bp = log->bp;
1545
        lp = (struct logpage *) bp->l_ldata;
1546
        lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1547
        lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1548
        lbmIOWait(log->bp, lbmFREE);
1549
 
1550
        /*
1551
         * synchronous update log superblock
1552
         * mark log state as shutdown cleanly
1553
         * (i.e., Log does not need to be replayed).
1554
         */
1555
        if ((rc = lbmRead(log, 1, &bpsuper)))
1556
                goto out;
1557
 
1558
        logsuper = (struct logsuper *) bpsuper->l_ldata;
1559
        logsuper->state = cpu_to_le32(LOGREDONE);
1560
        logsuper->end = cpu_to_le32(lsn);
1561
        lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1562
        rc = lbmIOWait(bpsuper, lbmFREE);
1563
 
1564
        jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1565
                 lsn, log->page, log->eor);
1566
 
1567
      out:
1568
        /*
1569
         * shutdown per log i/o
1570
         */
1571
        lbmLogShutdown(log);
1572
 
1573
        if (rc) {
1574
                jfs_warn("lmLogShutdown: exit(%d)", rc);
1575
        }
1576
        return rc;
1577
}
1578
 
1579
 
1580
/*
1581
 * NAME:        lmLogFileSystem()
1582
 *
1583
 * FUNCTION:    insert (<activate> = true)/remove (<activate> = false)
1584
 *      file system into/from log active file system list.
1585
 *
1586
 * PARAMETE:    log     - pointer to logs inode.
1587
 *              fsdev   - kdev_t of filesystem.
1588
 *              serial  - pointer to returned log serial number
1589
 *              activate - insert/remove device from active list.
1590
 *
1591
 * RETURN:      0        - success
1592
 *              errors returned by vms_iowait().
1593
 */
1594
static int lmLogFileSystem(struct jfs_log * log, char *uuid, int activate)
1595
{
1596
        int rc = 0;
1597
        int i;
1598
        struct logsuper *logsuper;
1599
        struct lbuf *bpsuper;
1600
 
1601
        /*
1602
         * insert/remove file system device to log active file system list.
1603
         */
1604
        if ((rc = lbmRead(log, 1, &bpsuper)))
1605
                return rc;
1606
 
1607
        logsuper = (struct logsuper *) bpsuper->l_ldata;
1608
        if (activate) {
1609
                for (i = 0; i < MAX_ACTIVE; i++)
1610
                        if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) {
1611
                                memcpy(logsuper->active[i].uuid, uuid, 16);
1612
                                break;
1613
                        }
1614
                if (i == MAX_ACTIVE) {
1615
                        jfs_warn("Too many file systems sharing journal!");
1616
                        lbmFree(bpsuper);
1617
                        return -EMFILE; /* Is there a better rc? */
1618
                }
1619
        } else {
1620
                for (i = 0; i < MAX_ACTIVE; i++)
1621
                        if (!memcmp(logsuper->active[i].uuid, uuid, 16)) {
1622
                                memcpy(logsuper->active[i].uuid, NULL_UUID, 16);
1623
                                break;
1624
                        }
1625
                if (i == MAX_ACTIVE) {
1626
                        jfs_warn("Somebody stomped on the journal!");
1627
                        lbmFree(bpsuper);
1628
                        return -EIO;
1629
                }
1630
 
1631
        }
1632
 
1633
        /*
1634
         * synchronous write log superblock:
1635
         *
1636
         * write sidestream bypassing write queue:
1637
         * at file system mount, log super block is updated for
1638
         * activation of the file system before any log record
1639
         * (MOUNT record) of the file system, and at file system
1640
         * unmount, all meta data for the file system has been
1641
         * flushed before log super block is updated for deactivation
1642
         * of the file system.
1643
         */
1644
        lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1645
        rc = lbmIOWait(bpsuper, lbmFREE);
1646
 
1647
        return rc;
1648
}
1649
 
1650
/*
1651
 *              log buffer manager (lbm)
1652
 *              ------------------------
1653
 *
1654
 * special purpose buffer manager supporting log i/o requirements.
1655
 *
1656
 * per log write queue:
1657
 * log pageout occurs in serial order by fifo write queue and
1658
 * restricting to a single i/o in pregress at any one time.
1659
 * a circular singly-linked list
1660
 * (log->wrqueue points to the tail, and buffers are linked via
1661
 * bp->wrqueue field), and
1662
 * maintains log page in pageout ot waiting for pageout in serial pageout.
1663
 */
1664
 
1665
/*
1666
 *      lbmLogInit()
1667
 *
1668
 * initialize per log I/O setup at lmLogInit()
1669
 */
1670
static int lbmLogInit(struct jfs_log * log)
1671
{                               /* log inode */
1672
        int i;
1673
        struct lbuf *lbuf;
1674
 
1675
        jfs_info("lbmLogInit: log:0x%p", log);
1676
 
1677
        /* initialize current buffer cursor */
1678
        log->bp = NULL;
1679
 
1680
        /* initialize log device write queue */
1681
        log->wqueue = NULL;
1682
 
1683
        /*
1684
         * Each log has its own buffer pages allocated to it.  These are
1685
         * not managed by the page cache.  This ensures that a transaction
1686
         * writing to the log does not block trying to allocate a page from
1687
         * the page cache (for the log).  This would be bad, since page
1688
         * allocation waits on the kswapd thread that may be committing inodes
1689
         * which would cause log activity.  Was that clear?  I'm trying to
1690
         * avoid deadlock here.
1691
         */
1692
        init_waitqueue_head(&log->free_wait);
1693
 
1694
        log->lbuf_free = NULL;
1695
 
1696
        for (i = 0; i < LOGPAGES; i++) {
1697
                lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1698
                if (lbuf == 0)
1699
                        goto error;
1700
                lbuf->l_bh.b_data = lbuf->l_ldata =
1701
                    (char *) get_zeroed_page(GFP_KERNEL);
1702
                if (lbuf->l_ldata == 0) {
1703
                        kfree(lbuf);
1704
                        goto error;
1705
                }
1706
                lbuf->l_log = log;
1707
                init_waitqueue_head(&lbuf->l_ioevent);
1708
 
1709
                lbuf->l_bh.b_size = LOGPSIZE;
1710
                lbuf->l_bh.b_dev = to_kdev_t(log->bdev->bd_dev);
1711
                lbuf->l_bh.b_end_io = lbmIODone;
1712
                lbuf->l_bh.b_private = lbuf;
1713
                lbuf->l_bh.b_page = virt_to_page(lbuf->l_ldata);
1714
                lbuf->l_bh.b_state = 0;
1715
                init_waitqueue_head(&lbuf->l_bh.b_wait);
1716
 
1717
                lbuf->l_freelist = log->lbuf_free;
1718
                log->lbuf_free = lbuf;
1719
        }
1720
 
1721
        return (0);
1722
 
1723
      error:
1724
        lbmLogShutdown(log);
1725
        return -ENOMEM;
1726
}
1727
 
1728
 
1729
/*
1730
 *      lbmLogShutdown()
1731
 *
1732
 * finalize per log I/O setup at lmLogShutdown()
1733
 */
1734
static void lbmLogShutdown(struct jfs_log * log)
1735
{
1736
        struct lbuf *lbuf;
1737
 
1738
        jfs_info("lbmLogShutdown: log:0x%p", log);
1739
 
1740
        lbuf = log->lbuf_free;
1741
        while (lbuf) {
1742
                struct lbuf *next = lbuf->l_freelist;
1743
                free_page((unsigned long) lbuf->l_ldata);
1744
                kfree(lbuf);
1745
                lbuf = next;
1746
        }
1747
 
1748
        log->bp = NULL;
1749
}
1750
 
1751
 
1752
/*
1753
 *      lbmAllocate()
1754
 *
1755
 * allocate an empty log buffer
1756
 */
1757
static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1758
{
1759
        struct lbuf *bp;
1760
        unsigned long flags;
1761
 
1762
        /*
1763
         * recycle from log buffer freelist if any
1764
         */
1765
        LCACHE_LOCK(flags);
1766
        LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1767
        log->lbuf_free = bp->l_freelist;
1768
        LCACHE_UNLOCK(flags);
1769
 
1770
        bp->l_flag = 0;
1771
 
1772
        bp->l_wqnext = NULL;
1773
        bp->l_freelist = NULL;
1774
 
1775
        bp->l_pn = pn;
1776
        bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1777
        bp->l_bh.b_blocknr = bp->l_blkno;
1778
        bp->l_ceor = 0;
1779
 
1780
        return bp;
1781
}
1782
 
1783
 
1784
/*
1785
 *      lbmFree()
1786
 *
1787
 * release a log buffer to freelist
1788
 */
1789
static void lbmFree(struct lbuf * bp)
1790
{
1791
        unsigned long flags;
1792
 
1793
        LCACHE_LOCK(flags);
1794
 
1795
        lbmfree(bp);
1796
 
1797
        LCACHE_UNLOCK(flags);
1798
}
1799
 
1800
static void lbmfree(struct lbuf * bp)
1801
{
1802
        struct jfs_log *log = bp->l_log;
1803
 
1804
        assert(bp->l_wqnext == NULL);
1805
 
1806
        /*
1807
         * return the buffer to head of freelist
1808
         */
1809
        bp->l_freelist = log->lbuf_free;
1810
        log->lbuf_free = bp;
1811
 
1812
        wake_up(&log->free_wait);
1813
        return;
1814
}
1815
 
1816
 
1817
/*
1818
 * NAME:        lbmRedrive
1819
 *
1820
 * FUNCTION:    add a log buffer to the the log redrive list
1821
 *
1822
 * PARAMETER:
1823
 *     bp       - log buffer
1824
 *
1825
 * NOTES:
1826
 *      Takes log_redrive_lock.
1827
 */
1828
static inline void lbmRedrive(struct lbuf *bp)
1829
{
1830
        unsigned long flags;
1831
 
1832
        spin_lock_irqsave(&log_redrive_lock, flags);
1833
        bp->l_redrive_next = log_redrive_list;
1834
        log_redrive_list = bp;
1835
        spin_unlock_irqrestore(&log_redrive_lock, flags);
1836
 
1837
        wake_up(&jfs_IO_thread_wait);
1838
}
1839
 
1840
 
1841
/*
1842
 *      lbmRead()
1843
 */
1844
static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1845
{
1846
        struct lbuf *bp;
1847
 
1848
        /*
1849
         * allocate a log buffer
1850
         */
1851
        *bpp = bp = lbmAllocate(log, pn);
1852
        jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1853
 
1854
        bp->l_flag |= lbmREAD;
1855
        bp->l_bh.b_reqnext = NULL;
1856
        clear_bit(BH_Uptodate, &bp->l_bh.b_state);
1857
        lock_buffer(&bp->l_bh);
1858
        set_bit(BH_Mapped, &bp->l_bh.b_state);
1859
        set_bit(BH_Req, &bp->l_bh.b_state);
1860
        bp->l_bh.b_rdev = bp->l_bh.b_dev;
1861
        bp->l_bh.b_rsector = bp->l_blkno << (log->l2bsize - 9);
1862
        generic_make_request(READ, &bp->l_bh);
1863
        run_task_queue(&tq_disk);
1864
 
1865
        wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
1866
 
1867
        return 0;
1868
}
1869
 
1870
 
1871
/*
1872
 *      lbmWrite()
1873
 *
1874
 * buffer at head of pageout queue stays after completion of
1875
 * partial-page pageout and redriven by explicit initiation of
1876
 * pageout by caller until full-page pageout is completed and
1877
 * released.
1878
 *
1879
 * device driver i/o done redrives pageout of new buffer at
1880
 * head of pageout queue when current buffer at head of pageout
1881
 * queue is released at the completion of its full-page pageout.
1882
 *
1883
 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
1884
 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
1885
 */
1886
static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
1887
                     int cant_block)
1888
{
1889
        struct lbuf *tail;
1890
        unsigned long flags;
1891
 
1892
        jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
1893
 
1894
        /* map the logical block address to physical block address */
1895
        bp->l_blkno =
1896
            log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
1897
 
1898
        LCACHE_LOCK(flags);             /* disable+lock */
1899
 
1900
        /*
1901
         * initialize buffer for device driver
1902
         */
1903
        bp->l_flag = flag;
1904
 
1905
        /*
1906
         *      insert bp at tail of write queue associated with log
1907
         *
1908
         * (request is either for bp already/currently at head of queue
1909
         * or new bp to be inserted at tail)
1910
         */
1911
        tail = log->wqueue;
1912
 
1913
        /* is buffer not already on write queue ? */
1914
        if (bp->l_wqnext == NULL) {
1915
                /* insert at tail of wqueue */
1916
                if (tail == NULL) {
1917
                        log->wqueue = bp;
1918
                        bp->l_wqnext = bp;
1919
                } else {
1920
                        log->wqueue = bp;
1921
                        bp->l_wqnext = tail->l_wqnext;
1922
                        tail->l_wqnext = bp;
1923
                }
1924
 
1925
                tail = bp;
1926
        }
1927
 
1928
        /* is buffer at head of wqueue and for write ? */
1929
        if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
1930
                LCACHE_UNLOCK(flags);   /* unlock+enable */
1931
                return;
1932
        }
1933
 
1934
        LCACHE_UNLOCK(flags);   /* unlock+enable */
1935
 
1936
        if (cant_block)
1937
                lbmRedrive(bp);
1938
        else if (flag & lbmSYNC)
1939
                lbmStartIO(bp);
1940
        else {
1941
                LOGGC_UNLOCK(log);
1942
                lbmStartIO(bp);
1943
                LOGGC_LOCK(log);
1944
        }
1945
}
1946
 
1947
 
1948
/*
1949
 *      lbmDirectWrite()
1950
 *
1951
 * initiate pageout bypassing write queue for sidestream
1952
 * (e.g., log superblock) write;
1953
 */
1954
static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
1955
{
1956
        jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
1957
                 bp, flag, bp->l_pn);
1958
 
1959
        /*
1960
         * initialize buffer for device driver
1961
         */
1962
        bp->l_flag = flag | lbmDIRECT;
1963
 
1964
        /* map the logical block address to physical block address */
1965
        bp->l_blkno =
1966
            log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
1967
 
1968
        /*
1969
         *      initiate pageout of the page
1970
         */
1971
        lbmStartIO(bp);
1972
}
1973
 
1974
 
1975
/*
1976
 * NAME:        lbmStartIO()
1977
 *
1978
 * FUNCTION:    Interface to DD strategy routine
1979
 *
1980
 * RETURN:      none
1981
 *
1982
 * serialization: LCACHE_LOCK() is NOT held during log i/o;
1983
 */
1984
static void lbmStartIO(struct lbuf * bp)
1985
{
1986
        jfs_info("lbmStartIO");
1987
 
1988
        bp->l_bh.b_reqnext = NULL;
1989
        set_bit(BH_Dirty, &bp->l_bh.b_state);
1990
//      lock_buffer(&bp->l_bh);
1991
        assert(!test_bit(BH_Lock, &bp->l_bh.b_state));
1992
        set_bit(BH_Lock, &bp->l_bh.b_state);
1993
 
1994
        set_bit(BH_Mapped, &bp->l_bh.b_state);
1995
        set_bit(BH_Req, &bp->l_bh.b_state);
1996
        bp->l_bh.b_rdev = bp->l_bh.b_dev;
1997
        bp->l_bh.b_rsector = bp->l_blkno << (bp->l_log->l2bsize - 9);
1998
 
1999
        if (bp->l_log->no_integrity)
2000
                /* don't really do I/O */
2001
                lbmIODone(&bp->l_bh, 1);
2002
         else
2003
                generic_make_request(WRITE, &bp->l_bh);
2004
 
2005
        INCREMENT(lmStat.submitted);
2006
        run_task_queue(&tq_disk);
2007
}
2008
 
2009
 
2010
/*
2011
 *      lbmIOWait()
2012
 */
2013
static int lbmIOWait(struct lbuf * bp, int flag)
2014
{
2015
        unsigned long flags;
2016
        int rc = 0;
2017
 
2018
        jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2019
 
2020
        LCACHE_LOCK(flags);             /* disable+lock */
2021
 
2022
        LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2023
 
2024
        rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2025
 
2026
        if (flag & lbmFREE)
2027
                lbmfree(bp);
2028
 
2029
        LCACHE_UNLOCK(flags);   /* unlock+enable */
2030
 
2031
        jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2032
        return rc;
2033
}
2034
 
2035
/*
2036
 *      lbmIODone()
2037
 *
2038
 * executed at INTIODONE level
2039
 */
2040
static void lbmIODone(struct buffer_head *bh, int uptodate)
2041
{
2042
        struct lbuf *bp = bh->b_private;
2043
        struct lbuf *nextbp, *tail;
2044
        struct jfs_log *log;
2045
        unsigned long flags;
2046
 
2047
        /*
2048
         * get back jfs buffer bound to the i/o buffer
2049
         */
2050
        jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2051
 
2052
        LCACHE_LOCK(flags);             /* disable+lock */
2053
 
2054
        unlock_buffer(&bp->l_bh);
2055
        bp->l_flag |= lbmDONE;
2056
 
2057
        if (!uptodate) {
2058
                bp->l_flag |= lbmERROR;
2059
 
2060
                jfs_err("lbmIODone: I/O error in JFS log");
2061
        }
2062
 
2063
        /*
2064
         *      pagein completion
2065
         */
2066
        if (bp->l_flag & lbmREAD) {
2067
                bp->l_flag &= ~lbmREAD;
2068
 
2069
                LCACHE_UNLOCK(flags);   /* unlock+enable */
2070
 
2071
                /* wakeup I/O initiator */
2072
                LCACHE_WAKEUP(&bp->l_ioevent);
2073
 
2074
                return;
2075
        }
2076
 
2077
        /*
2078
         *      pageout completion
2079
         *
2080
         * the bp at the head of write queue has completed pageout.
2081
         *
2082
         * if single-commit/full-page pageout, remove the current buffer
2083
         * from head of pageout queue, and redrive pageout with
2084
         * the new buffer at head of pageout queue;
2085
         * otherwise, the partial-page pageout buffer stays at
2086
         * the head of pageout queue to be redriven for pageout
2087
         * by lmGroupCommit() until full-page pageout is completed.
2088
         */
2089
        bp->l_flag &= ~lbmWRITE;
2090
        INCREMENT(lmStat.pagedone);
2091
 
2092
        /* update committed lsn */
2093
        log = bp->l_log;
2094
        log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2095
 
2096
        if (bp->l_flag & lbmDIRECT) {
2097
                LCACHE_WAKEUP(&bp->l_ioevent);
2098
                LCACHE_UNLOCK(flags);
2099
                return;
2100
        }
2101
 
2102
        tail = log->wqueue;
2103
 
2104
        /* single element queue */
2105
        if (bp == tail) {
2106
                /* remove head buffer of full-page pageout
2107
                 * from log device write queue
2108
                 */
2109
                if (bp->l_flag & lbmRELEASE) {
2110
                        log->wqueue = NULL;
2111
                        bp->l_wqnext = NULL;
2112
                }
2113
        }
2114
        /* multi element queue */
2115
        else {
2116
                /* remove head buffer of full-page pageout
2117
                 * from log device write queue
2118
                 */
2119
                if (bp->l_flag & lbmRELEASE) {
2120
                        nextbp = tail->l_wqnext = bp->l_wqnext;
2121
                        bp->l_wqnext = NULL;
2122
 
2123
                        /*
2124
                         * redrive pageout of next page at head of write queue:
2125
                         * redrive next page without any bound tblk
2126
                         * (i.e., page w/o any COMMIT records), or
2127
                         * first page of new group commit which has been
2128
                         * queued after current page (subsequent pageout
2129
                         * is performed synchronously, except page without
2130
                         * any COMMITs) by lmGroupCommit() as indicated
2131
                         * by lbmWRITE flag;
2132
                         */
2133
                        if (nextbp->l_flag & lbmWRITE) {
2134
                                /*
2135
                                 * We can't do the I/O at interrupt time.
2136
                                 * The jfsIO thread can do it
2137
                                 */
2138
                                lbmRedrive(nextbp);
2139
                        }
2140
                }
2141
        }
2142
 
2143
        /*
2144
         *      synchronous pageout:
2145
         *
2146
         * buffer has not necessarily been removed from write queue
2147
         * (e.g., synchronous write of partial-page with COMMIT):
2148
         * leave buffer for i/o initiator to dispose
2149
         */
2150
        if (bp->l_flag & lbmSYNC) {
2151
                LCACHE_UNLOCK(flags);   /* unlock+enable */
2152
 
2153
                /* wakeup I/O initiator */
2154
                LCACHE_WAKEUP(&bp->l_ioevent);
2155
        }
2156
 
2157
        /*
2158
         *      Group Commit pageout:
2159
         */
2160
        else if (bp->l_flag & lbmGC) {
2161
                LCACHE_UNLOCK(flags);
2162
                lmPostGC(bp);
2163
        }
2164
 
2165
        /*
2166
         *      asynchronous pageout:
2167
         *
2168
         * buffer must have been removed from write queue:
2169
         * insert buffer at head of freelist where it can be recycled
2170
         */
2171
        else {
2172
                assert(bp->l_flag & lbmRELEASE);
2173
                assert(bp->l_flag & lbmFREE);
2174
                lbmfree(bp);
2175
 
2176
                LCACHE_UNLOCK(flags);   /* unlock+enable */
2177
        }
2178
}
2179
 
2180
int jfsIOWait(void *arg)
2181
{
2182
        struct lbuf *bp;
2183
 
2184
        lock_kernel();
2185
 
2186
        daemonize();
2187
        current->tty = NULL;
2188
        strcpy(current->comm, "jfsIO");
2189
 
2190
        unlock_kernel();
2191
 
2192
        spin_lock_irq(&current->sigmask_lock);
2193
        sigfillset(&current->blocked);
2194
        recalc_sigpending(current);
2195
        spin_unlock_irq(&current->sigmask_lock);
2196
 
2197
        complete(&jfsIOwait);
2198
 
2199
        do {
2200
                DECLARE_WAITQUEUE(wq, current);
2201
 
2202
                spin_lock_irq(&log_redrive_lock);
2203
                while ((bp = log_redrive_list)) {
2204
                        log_redrive_list = bp->l_redrive_next;
2205
                        bp->l_redrive_next = NULL;
2206
                        spin_unlock_irq(&log_redrive_lock);
2207
                        lbmStartIO(bp);
2208
                        spin_lock_irq(&log_redrive_lock);
2209
                }
2210
                add_wait_queue(&jfs_IO_thread_wait, &wq);
2211
                set_current_state(TASK_INTERRUPTIBLE);
2212
                spin_unlock_irq(&log_redrive_lock);
2213
                schedule();
2214
                current->state = TASK_RUNNING;
2215
                remove_wait_queue(&jfs_IO_thread_wait, &wq);
2216
        } while (!jfs_stop_threads);
2217
 
2218
        jfs_info("jfsIOWait being killed!");
2219
        complete_and_exit(&jfsIOwait, 0);
2220
}
2221
 
2222
/*
2223
 * NAME:        lmLogFormat()/jfs_logform()
2224
 *
2225
 * FUNCTION:    format file system log
2226
 *
2227
 * PARAMETERS:
2228
 *      log     - volume log
2229
 *      logAddress - start address of log space in FS block
2230
 *      logSize - length of log space in FS block;
2231
 *
2232
 * RETURN:      0        - success
2233
 *              -EIO    - i/o error
2234
 *
2235
 * XXX: We're synchronously writing one page at a time.  This needs to
2236
 *      be improved by writing multiple pages at once.
2237
 */
2238
int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2239
{
2240
        int rc = -EIO;
2241
        struct jfs_sb_info *sbi = JFS_SBI(log->sb);
2242
        struct logsuper *logsuper;
2243
        struct logpage *lp;
2244
        int lspn;               /* log sequence page number */
2245
        struct lrd *lrd_ptr;
2246
        int npages = 0;
2247
        struct lbuf *bp;
2248
 
2249
        jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2250
                 (long long)logAddress, logSize);
2251
 
2252
        /* allocate a log buffer */
2253
        bp = lbmAllocate(log, 1);
2254
 
2255
        npages = logSize >> sbi->l2nbperpage;
2256
 
2257
        /*
2258
         *      log space:
2259
         *
2260
         * page 0 - reserved;
2261
         * page 1 - log superblock;
2262
         * page 2 - log data page: A SYNC log record is written
2263
         *          into this page at logform time;
2264
         * pages 3-N - log data page: set to empty log data pages;
2265
         */
2266
        /*
2267
         *      init log superblock: log page 1
2268
         */
2269
        logsuper = (struct logsuper *) bp->l_ldata;
2270
 
2271
        logsuper->magic = cpu_to_le32(LOGMAGIC);
2272
        logsuper->version = cpu_to_le32(LOGVERSION);
2273
        logsuper->state = cpu_to_le32(LOGREDONE);
2274
        logsuper->flag = cpu_to_le32(sbi->mntflag);     /* ? */
2275
        logsuper->size = cpu_to_le32(npages);
2276
        logsuper->bsize = cpu_to_le32(sbi->bsize);
2277
        logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2278
        logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2279
 
2280
        bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2281
        bp->l_blkno = logAddress + sbi->nbperpage;
2282
        lbmStartIO(bp);
2283
        if ((rc = lbmIOWait(bp, 0)))
2284
                goto exit;
2285
 
2286
        /*
2287
         *      init pages 2 to npages-1 as log data pages:
2288
         *
2289
         * log page sequence number (lpsn) initialization:
2290
         *
2291
         * pn:   0     1     2     3                 n-1
2292
         *       +-----+-----+=====+=====+===.....===+=====+
2293
         * lspn:             N-1   0     1           N-2
2294
         *                   <--- N page circular file ---->
2295
         *
2296
         * the N (= npages-2) data pages of the log is maintained as
2297
         * a circular file for the log records;
2298
         * lpsn grows by 1 monotonically as each log page is written
2299
         * to the circular file of the log;
2300
         * and setLogpage() will not reset the page number even if
2301
         * the eor is equal to LOGPHDRSIZE. In order for binary search
2302
         * still work in find log end process, we have to simulate the
2303
         * log wrap situation at the log format time.
2304
         * The 1st log page written will have the highest lpsn. Then
2305
         * the succeeding log pages will have ascending order of
2306
         * the lspn starting from 0, ... (N-2)
2307
         */
2308
        lp = (struct logpage *) bp->l_ldata;
2309
        /*
2310
         * initialize 1st log page to be written: lpsn = N - 1,
2311
         * write a SYNCPT log record is written to this page
2312
         */
2313
        lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2314
        lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2315
 
2316
        lrd_ptr = (struct lrd *) &lp->data;
2317
        lrd_ptr->logtid = 0;
2318
        lrd_ptr->backchain = 0;
2319
        lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2320
        lrd_ptr->length = 0;
2321
        lrd_ptr->log.syncpt.sync = 0;
2322
 
2323
        bp->l_blkno += sbi->nbperpage;
2324
        bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2325
        lbmStartIO(bp);
2326
        if ((rc = lbmIOWait(bp, 0)))
2327
                goto exit;
2328
 
2329
        /*
2330
         *      initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2331
         */
2332
        for (lspn = 0; lspn < npages - 3; lspn++) {
2333
                lp->h.page = lp->t.page = cpu_to_le32(lspn);
2334
                lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2335
 
2336
                bp->l_blkno += sbi->nbperpage;
2337
                bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2338
                lbmStartIO(bp);
2339
                if ((rc = lbmIOWait(bp, 0)))
2340
                        goto exit;
2341
        }
2342
 
2343
        rc = 0;
2344
exit:
2345
        /*
2346
         *      finalize log
2347
         */
2348
        /* release the buffer */
2349
        lbmFree(bp);
2350
 
2351
        return rc;
2352
}
2353
 
2354
#ifdef CONFIG_JFS_STATISTICS
2355
int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length,
2356
                      int *eof, void *data)
2357
{
2358
        int len = 0;
2359
        off_t begin;
2360
 
2361
        len += sprintf(buffer,
2362
                       "JFS Logmgr stats\n"
2363
                       "================\n"
2364
                       "commits = %d\n"
2365
                       "writes submitted = %d\n"
2366
                       "writes completed = %d\n"
2367
                       "full pages submitted = %d\n"
2368
                       "partial pages submitted = %d\n",
2369
                       lmStat.commit,
2370
                       lmStat.submitted,
2371
                       lmStat.pagedone,
2372
                       lmStat.full_page,
2373
                       lmStat.partial_page);
2374
 
2375
        begin = offset;
2376
        *start = buffer + begin;
2377
        len -= begin;
2378
 
2379
        if (len > length)
2380
                len = length;
2381
        else
2382
                *eof = 1;
2383
 
2384
        if (len < 0)
2385
                len = 0;
2386
 
2387
        return len;
2388
}
2389
#endif /* CONFIG_JFS_STATISTICS */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.