OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [fs/] [jfs/] [jfs_txnmgr.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 *   Copyright (C) International Business Machines Corp., 2000-2003
3
 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
4
 *
5
 *   This program is free software;  you can redistribute it and/or modify
6
 *   it under the terms of the GNU General Public License as published by
7
 *   the Free Software Foundation; either version 2 of the License, or
8
 *   (at your option) any later version.
9
 *
10
 *   This program is distributed in the hope that it will be useful,
11
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
12
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
13
 *   the GNU General Public License for more details.
14
 *
15
 *   You should have received a copy of the GNU General Public License
16
 *   along with this program;  if not, write to the Free Software
17
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
 */
19
 
20
/*
21
 *      jfs_txnmgr.c: transaction manager
22
 *
23
 * notes:
24
 * transaction starts with txBegin() and ends with txCommit()
25
 * or txAbort().
26
 *
27
 * tlock is acquired at the time of update;
28
 * (obviate scan at commit time for xtree and dtree)
29
 * tlock and mp points to each other;
30
 * (no hashlist for mp -> tlock).
31
 *
32
 * special cases:
33
 * tlock on in-memory inode:
34
 * in-place tlock in the in-memory inode itself;
35
 * converted to page lock by iWrite() at commit time.
36
 *
37
 * tlock during write()/mmap() under anonymous transaction (tid = 0):
38
 * transferred (?) to transaction at commit time.
39
 *
40
 * use the page itself to update allocation maps
41
 * (obviate intermediate replication of allocation/deallocation data)
42
 * hold on to mp+lock thru update of maps
43
 */
44
 
45
 
46
#include <linux/fs.h>
47
#include <linux/vmalloc.h>
48
#include <linux/smp_lock.h>
49
#include <linux/completion.h>
50
#include "jfs_incore.h"
51
#include "jfs_filsys.h"
52
#include "jfs_metapage.h"
53
#include "jfs_dinode.h"
54
#include "jfs_imap.h"
55
#include "jfs_dmap.h"
56
#include "jfs_superblock.h"
57
#include "jfs_debug.h"
58
 
59
/*
60
 *      transaction management structures
61
 */
62
static struct {
63
        /* tblock */
64
        int freetid;            /* index of a free tid structure */
65
        wait_queue_head_t freewait;     /* eventlist of free tblock */
66
 
67
        /* tlock */
68
        int freelock;           /* index first free lock word */
69
        wait_queue_head_t freelockwait; /* eventlist of free tlock */
70
        wait_queue_head_t lowlockwait;  /* eventlist of ample tlocks */
71
        int tlocksInUse;        /* Number of tlocks in use */
72
        int TlocksLow;          /* Indicates low number of available tlocks */
73
        spinlock_t LazyLock;    /* synchronize sync_queue & unlock_queue */
74
/*      struct tblock *sync_queue; * Transactions waiting for data sync */
75
        struct tblock *unlock_queue;    /* Txns waiting to be released */
76
        struct tblock *unlock_tail;     /* Tail of unlock_queue */
77
        struct list_head anon_list;     /* inodes having anonymous txns */
78
        struct list_head anon_list2;    /* inodes having anonymous txns
79
                                           that couldn't be sync'ed */
80
} TxAnchor;
81
 
82
#ifdef CONFIG_JFS_STATISTICS
83
struct {
84
        uint txBegin;
85
        uint txBegin_barrier;
86
        uint txBegin_lockslow;
87
        uint txBegin_freetid;
88
        uint txBeginAnon;
89
        uint txBeginAnon_barrier;
90
        uint txBeginAnon_lockslow;
91
        uint txLockAlloc;
92
        uint txLockAlloc_freelock;
93
} TxStat;
94
#endif
95
 
96
static int nTxBlock = 512;      /* number of transaction blocks */
97
struct tblock *TxBlock;         /* transaction block table */
98
 
99
static int nTxLock = 4096;      /* number of transaction locks */
100
static int TxLockLWM = 4096*.4; /* Low water mark for number of txLocks used */
101
static int TxLockHWM = 4096*.8; /* High water mark for number of txLocks used */
102
struct tlock *TxLock;           /* transaction lock table */
103
 
104
 
105
/*
106
 *      transaction management lock
107
 */
108
static spinlock_t jfsTxnLock = SPIN_LOCK_UNLOCKED;
109
 
110
#define TXN_LOCK()              spin_lock(&jfsTxnLock)
111
#define TXN_UNLOCK()            spin_unlock(&jfsTxnLock)
112
 
113
#define LAZY_LOCK_INIT()        spin_lock_init(&TxAnchor.LazyLock);
114
#define LAZY_LOCK(flags)        spin_lock_irqsave(&TxAnchor.LazyLock, flags)
115
#define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags)
116
 
117
DECLARE_WAIT_QUEUE_HEAD(jfs_sync_thread_wait);
118
DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait);
119
 
120
/*
121
 * Retry logic exist outside these macros to protect from spurrious wakeups.
122
 */
123
static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
124
{
125
        DECLARE_WAITQUEUE(wait, current);
126
 
127
        add_wait_queue(event, &wait);
128
        set_current_state(TASK_UNINTERRUPTIBLE);
129
        TXN_UNLOCK();
130
        schedule();
131
        current->state = TASK_RUNNING;
132
        remove_wait_queue(event, &wait);
133
}
134
 
135
#define TXN_SLEEP(event)\
136
{\
137
        TXN_SLEEP_DROP_LOCK(event);\
138
        TXN_LOCK();\
139
}
140
 
141
#define TXN_WAKEUP(event) wake_up_all(event)
142
 
143
 
144
/*
145
 *      statistics
146
 */
147
struct {
148
        tid_t maxtid;           /* 4: biggest tid ever used */
149
        lid_t maxlid;           /* 4: biggest lid ever used */
150
        int ntid;               /* 4: # of transactions performed */
151
        int nlid;               /* 4: # of tlocks acquired */
152
        int waitlock;           /* 4: # of tlock wait */
153
} stattx;
154
 
155
 
156
/*
157
 * external references
158
 */
159
extern int lmGroupCommit(struct jfs_log *, struct tblock *);
160
extern void lmSync(struct jfs_log *);
161
extern int jfs_commit_inode(struct inode *, int);
162
extern int jfs_stop_threads;
163
 
164
struct task_struct *jfsCommitTask;
165
extern struct completion jfsIOwait;
166
 
167
/*
168
 * forward references
169
 */
170
static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
171
                struct tlock * tlck, struct commit * cd);
172
static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
173
                struct tlock * tlck);
174
static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
175
                struct tlock * tlck);
176
static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
177
                struct tlock * tlck);
178
static void txAbortCommit(struct commit * cd);
179
static void txAllocPMap(struct inode *ip, struct maplock * maplock,
180
                struct tblock * tblk);
181
static void txForce(struct tblock * tblk);
182
static int txLog(struct jfs_log * log, struct tblock * tblk,
183
                struct commit * cd);
184
static void txUpdateMap(struct tblock * tblk);
185
static void txRelease(struct tblock * tblk);
186
static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
187
           struct tlock * tlck);
188
static void LogSyncRelease(struct metapage * mp);
189
 
190
/*
191
 *              transaction block/lock management
192
 *              ---------------------------------
193
 */
194
 
195
/*
196
 * Get a transaction lock from the free list.  If the number in use is
197
 * greater than the high water mark, wake up the sync daemon.  This should
198
 * free some anonymous transaction locks.  (TXN_LOCK must be held.)
199
 */
200
static lid_t txLockAlloc(void)
201
{
202
        lid_t lid;
203
 
204
        INCREMENT(TxStat.txLockAlloc);
205
        if (!TxAnchor.freelock) {
206
                INCREMENT(TxStat.txLockAlloc_freelock);
207
        }
208
 
209
        while (!(lid = TxAnchor.freelock))
210
                TXN_SLEEP(&TxAnchor.freelockwait);
211
        TxAnchor.freelock = TxLock[lid].next;
212
        HIGHWATERMARK(stattx.maxlid, lid);
213
        if ((++TxAnchor.tlocksInUse > TxLockHWM) && (TxAnchor.TlocksLow == 0)) {
214
                jfs_info("txLockAlloc TlocksLow");
215
                TxAnchor.TlocksLow = 1;
216
                wake_up(&jfs_sync_thread_wait);
217
        }
218
 
219
        return lid;
220
}
221
 
222
static void txLockFree(lid_t lid)
223
{
224
        TxLock[lid].next = TxAnchor.freelock;
225
        TxAnchor.freelock = lid;
226
        TxAnchor.tlocksInUse--;
227
        if (TxAnchor.TlocksLow && (TxAnchor.tlocksInUse < TxLockLWM)) {
228
                jfs_info("txLockFree TlocksLow no more");
229
                TxAnchor.TlocksLow = 0;
230
                TXN_WAKEUP(&TxAnchor.lowlockwait);
231
        }
232
        TXN_WAKEUP(&TxAnchor.freelockwait);
233
}
234
 
235
/*
236
 * NAME:        txInit()
237
 *
238
 * FUNCTION:    initialize transaction management structures
239
 *
240
 * RETURN:
241
 *
242
 * serialization: single thread at jfs_init()
243
 */
244
int txInit(void)
245
{
246
        int k, size;
247
 
248
        /*
249
         * initialize transaction block (tblock) table
250
         *
251
         * transaction id (tid) = tblock index
252
         * tid = 0 is reserved.
253
         */
254
        size = sizeof(struct tblock) * nTxBlock;
255
        TxBlock = (struct tblock *) vmalloc(size);
256
        if (TxBlock == NULL)
257
                return -ENOMEM;
258
 
259
        for (k = 1; k < nTxBlock - 1; k++) {
260
                TxBlock[k].next = k + 1;
261
                init_waitqueue_head(&TxBlock[k].gcwait);
262
                init_waitqueue_head(&TxBlock[k].waitor);
263
        }
264
        TxBlock[k].next = 0;
265
        init_waitqueue_head(&TxBlock[k].gcwait);
266
        init_waitqueue_head(&TxBlock[k].waitor);
267
 
268
        TxAnchor.freetid = 1;
269
        init_waitqueue_head(&TxAnchor.freewait);
270
 
271
        stattx.maxtid = 1;      /* statistics */
272
 
273
        /*
274
         * initialize transaction lock (tlock) table
275
         *
276
         * transaction lock id = tlock index
277
         * tlock id = 0 is reserved.
278
         */
279
        size = sizeof(struct tlock) * nTxLock;
280
        TxLock = (struct tlock *) vmalloc(size);
281
        if (TxLock == NULL) {
282
                vfree(TxBlock);
283
                return -ENOMEM;
284
        }
285
 
286
        /* initialize tlock table */
287
        for (k = 1; k < nTxLock - 1; k++)
288
                TxLock[k].next = k + 1;
289
        TxLock[k].next = 0;
290
        init_waitqueue_head(&TxAnchor.freelockwait);
291
        init_waitqueue_head(&TxAnchor.lowlockwait);
292
 
293
        TxAnchor.freelock = 1;
294
        TxAnchor.tlocksInUse = 0;
295
        INIT_LIST_HEAD(&TxAnchor.anon_list);
296
        INIT_LIST_HEAD(&TxAnchor.anon_list2);
297
 
298
        stattx.maxlid = 1;      /* statistics */
299
 
300
        return 0;
301
}
302
 
303
/*
304
 * NAME:        txExit()
305
 *
306
 * FUNCTION:    clean up when module is unloaded
307
 */
308
void txExit(void)
309
{
310
        vfree(TxLock);
311
        TxLock = 0;
312
        vfree(TxBlock);
313
        TxBlock = 0;
314
}
315
 
316
 
317
/*
318
 * NAME:        txBegin()
319
 *
320
 * FUNCTION:    start a transaction.
321
 *
322
 * PARAMETER:   sb      - superblock
323
 *              flag    - force for nested tx;
324
 *
325
 * RETURN:      tid     - transaction id
326
 *
327
 * note: flag force allows to start tx for nested tx
328
 * to prevent deadlock on logsync barrier;
329
 */
330
tid_t txBegin(struct super_block *sb, int flag)
331
{
332
        tid_t t;
333
        struct tblock *tblk;
334
        struct jfs_log *log;
335
 
336
        jfs_info("txBegin: flag = 0x%x", flag);
337
        log = JFS_SBI(sb)->log;
338
 
339
        TXN_LOCK();
340
 
341
        INCREMENT(TxStat.txBegin);
342
 
343
      retry:
344
        if (!(flag & COMMIT_FORCE)) {
345
                /*
346
                 * synchronize with logsync barrier
347
                 */
348
                if (test_bit(log_SYNCBARRIER, &log->flag) ||
349
                    test_bit(log_QUIESCE, &log->flag)) {
350
                        INCREMENT(TxStat.txBegin_barrier);
351
                        TXN_SLEEP(&log->syncwait);
352
                        goto retry;
353
                }
354
        }
355
        if (flag == 0) {
356
                /*
357
                 * Don't begin transaction if we're getting starved for tlocks
358
                 * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately
359
                 * free tlocks)
360
                 */
361
                if (TxAnchor.TlocksLow) {
362
                        INCREMENT(TxStat.txBegin_lockslow);
363
                        TXN_SLEEP(&TxAnchor.lowlockwait);
364
                        goto retry;
365
                }
366
        }
367
 
368
        /*
369
         * allocate transaction id/block
370
         */
371
        if ((t = TxAnchor.freetid) == 0) {
372
                jfs_info("txBegin: waiting for free tid");
373
                INCREMENT(TxStat.txBegin_freetid);
374
                TXN_SLEEP(&TxAnchor.freewait);
375
                goto retry;
376
        }
377
 
378
        tblk = tid_to_tblock(t);
379
 
380
        if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) {
381
                /* Don't let a non-forced transaction take the last tblk */
382
                jfs_info("txBegin: waiting for free tid");
383
                INCREMENT(TxStat.txBegin_freetid);
384
                TXN_SLEEP(&TxAnchor.freewait);
385
                goto retry;
386
        }
387
 
388
        TxAnchor.freetid = tblk->next;
389
 
390
        /*
391
         * initialize transaction
392
         */
393
 
394
        /*
395
         * We can't zero the whole thing or we screw up another thread being
396
         * awakened after sleeping on tblk->waitor
397
         *
398
         * memset(tblk, 0, sizeof(struct tblock));
399
         */
400
        tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0;
401
 
402
        tblk->sb = sb;
403
        ++log->logtid;
404
        tblk->logtid = log->logtid;
405
 
406
        ++log->active;
407
 
408
        HIGHWATERMARK(stattx.maxtid, t);        /* statistics */
409
        INCREMENT(stattx.ntid); /* statistics */
410
 
411
        TXN_UNLOCK();
412
 
413
        jfs_info("txBegin: returning tid = %d", t);
414
 
415
        return t;
416
}
417
 
418
 
419
/*
420
 * NAME:        txBeginAnon()
421
 *
422
 * FUNCTION:    start an anonymous transaction.
423
 *              Blocks if logsync or available tlocks are low to prevent
424
 *              anonymous tlocks from depleting supply.
425
 *
426
 * PARAMETER:   sb      - superblock
427
 *
428
 * RETURN:      none
429
 */
430
void txBeginAnon(struct super_block *sb)
431
{
432
        struct jfs_log *log;
433
 
434
        log = JFS_SBI(sb)->log;
435
 
436
        TXN_LOCK();
437
        INCREMENT(TxStat.txBeginAnon);
438
 
439
      retry:
440
        /*
441
         * synchronize with logsync barrier
442
         */
443
        if (test_bit(log_SYNCBARRIER, &log->flag) ||
444
            test_bit(log_QUIESCE, &log->flag)) {
445
                INCREMENT(TxStat.txBeginAnon_barrier);
446
                TXN_SLEEP(&log->syncwait);
447
                goto retry;
448
        }
449
 
450
        /*
451
         * Don't begin transaction if we're getting starved for tlocks
452
         */
453
        if (TxAnchor.TlocksLow) {
454
                INCREMENT(TxStat.txBeginAnon_lockslow);
455
                TXN_SLEEP(&TxAnchor.lowlockwait);
456
                goto retry;
457
        }
458
        TXN_UNLOCK();
459
}
460
 
461
 
462
/*
463
 *      txEnd()
464
 *
465
 * function: free specified transaction block.
466
 *
467
 *      logsync barrier processing:
468
 *
469
 * serialization:
470
 */
471
void txEnd(tid_t tid)
472
{
473
        struct tblock *tblk = tid_to_tblock(tid);
474
        struct jfs_log *log;
475
 
476
        jfs_info("txEnd: tid = %d", tid);
477
        TXN_LOCK();
478
 
479
        /*
480
         * wakeup transactions waiting on the page locked
481
         * by the current transaction
482
         */
483
        TXN_WAKEUP(&tblk->waitor);
484
 
485
        log = JFS_SBI(tblk->sb)->log;
486
 
487
        /*
488
         * Lazy commit thread can't free this guy until we mark it UNLOCKED,
489
         * otherwise, we would be left with a transaction that may have been
490
         * reused.
491
         *
492
         * Lazy commit thread will turn off tblkGC_LAZY before calling this
493
         * routine.
494
         */
495
        if (tblk->flag & tblkGC_LAZY) {
496
                jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk);
497
                TXN_UNLOCK();
498
 
499
                spin_lock_irq(&log->gclock);    // LOGGC_LOCK
500
                tblk->flag |= tblkGC_UNLOCKED;
501
                spin_unlock_irq(&log->gclock);  // LOGGC_UNLOCK
502
                return;
503
        }
504
 
505
        jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk);
506
 
507
        assert(tblk->next == 0);
508
 
509
        /*
510
         * insert tblock back on freelist
511
         */
512
        tblk->next = TxAnchor.freetid;
513
        TxAnchor.freetid = tid;
514
 
515
        /*
516
         * mark the tblock not active
517
         */
518
        if (--log->active == 0) {
519
                clear_bit(log_FLUSH, &log->flag);
520
 
521
                /*
522
                 * synchronize with logsync barrier
523
                 */
524
                if (test_bit(log_SYNCBARRIER, &log->flag)) {
525
                        /* forward log syncpt */
526
                        /* lmSync(log); */
527
 
528
                        jfs_info("log barrier off: 0x%x", log->lsn);
529
 
530
                        /* enable new transactions start */
531
                        clear_bit(log_SYNCBARRIER, &log->flag);
532
 
533
                        /* wakeup all waitors for logsync barrier */
534
                        TXN_WAKEUP(&log->syncwait);
535
                }
536
        }
537
 
538
        /*
539
         * wakeup all waitors for a free tblock
540
         */
541
        TXN_WAKEUP(&TxAnchor.freewait);
542
 
543
        TXN_UNLOCK();
544
}
545
 
546
 
547
/*
548
 *      txLock()
549
 *
550
 * function: acquire a transaction lock on the specified <mp>
551
 *
552
 * parameter:
553
 *
554
 * return:      transaction lock id
555
 *
556
 * serialization:
557
 */
558
struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
559
                     int type)
560
{
561
        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
562
        int dir_xtree = 0;
563
        lid_t lid;
564
        tid_t xtid;
565
        struct tlock *tlck;
566
        struct xtlock *xtlck;
567
        struct linelock *linelock;
568
        xtpage_t *p;
569
        struct tblock *tblk;
570
 
571
        assert(!test_cflag(COMMIT_Nolink, ip));
572
 
573
        TXN_LOCK();
574
 
575
        if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) &&
576
            !(mp->xflag & COMMIT_PAGE)) {
577
                /*
578
                 * Directory inode is special.  It can have both an xtree tlock
579
                 * and a dtree tlock associated with it.
580
                 */
581
                dir_xtree = 1;
582
                lid = jfs_ip->xtlid;
583
        } else
584
                lid = mp->lid;
585
 
586
        /* is page not locked by a transaction ? */
587
        if (lid == 0)
588
                goto allocateLock;
589
 
590
        jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid);
591
 
592
        /* is page locked by the requester transaction ? */
593
        tlck = lid_to_tlock(lid);
594
        if ((xtid = tlck->tid) == tid)
595
                goto grantLock;
596
 
597
        /*
598
         * is page locked by anonymous transaction/lock ?
599
         *
600
         * (page update without transaction (i.e., file write) is
601
         * locked under anonymous transaction tid = 0:
602
         * anonymous tlocks maintained on anonymous tlock list of
603
         * the inode of the page and available to all anonymous
604
         * transactions until txCommit() time at which point
605
         * they are transferred to the transaction tlock list of
606
         * the commiting transaction of the inode)
607
         */
608
        if (xtid == 0) {
609
                tlck->tid = tid;
610
                tblk = tid_to_tblock(tid);
611
                /*
612
                 * The order of the tlocks in the transaction is important
613
                 * (during truncate, child xtree pages must be freed before
614
                 * parent's tlocks change the working map).
615
                 * Take tlock off anonymous list and add to tail of
616
                 * transaction list
617
                 *
618
                 * Note:  We really need to get rid of the tid & lid and
619
                 * use list_head's.  This code is getting UGLY!
620
                 */
621
                if (jfs_ip->atlhead == lid) {
622
                        if (jfs_ip->atltail == lid) {
623
                                /* only anonymous txn.
624
                                 * Remove from anon_list
625
                                 */
626
                                list_del_init(&jfs_ip->anon_inode_list);
627
                        }
628
                        jfs_ip->atlhead = tlck->next;
629
                } else {
630
                        lid_t last;
631
                        for (last = jfs_ip->atlhead;
632
                             lid_to_tlock(last)->next != lid;
633
                             last = lid_to_tlock(last)->next) {
634
                                assert(last);
635
                        }
636
                        lid_to_tlock(last)->next = tlck->next;
637
                        if (jfs_ip->atltail == lid)
638
                                jfs_ip->atltail = last;
639
                }
640
 
641
                /* insert the tlock at tail of transaction tlock list */
642
 
643
                if (tblk->next)
644
                        lid_to_tlock(tblk->last)->next = lid;
645
                else
646
                        tblk->next = lid;
647
                tlck->next = 0;
648
                tblk->last = lid;
649
 
650
                goto grantLock;
651
        }
652
 
653
        goto waitLock;
654
 
655
        /*
656
         * allocate a tlock
657
         */
658
      allocateLock:
659
        lid = txLockAlloc();
660
        tlck = lid_to_tlock(lid);
661
 
662
        /*
663
         * initialize tlock
664
         */
665
        tlck->tid = tid;
666
 
667
        /* mark tlock for meta-data page */
668
        if (mp->xflag & COMMIT_PAGE) {
669
 
670
                tlck->flag = tlckPAGELOCK;
671
 
672
                /* mark the page dirty and nohomeok */
673
                mark_metapage_dirty(mp);
674
                atomic_inc(&mp->nohomeok);
675
 
676
                jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p",
677
                         mp, atomic_read(&mp->nohomeok), tid, tlck);
678
 
679
                /* if anonymous transaction, and buffer is on the group
680
                 * commit synclist, mark inode to show this.  This will
681
                 * prevent the buffer from being marked nohomeok for too
682
                 * long a time.
683
                 */
684
                if ((tid == 0) && mp->lsn)
685
                        set_cflag(COMMIT_Synclist, ip);
686
        }
687
        /* mark tlock for in-memory inode */
688
        else
689
                tlck->flag = tlckINODELOCK;
690
 
691
        tlck->type = 0;
692
 
693
        /* bind the tlock and the page */
694
        tlck->ip = ip;
695
        tlck->mp = mp;
696
        if (dir_xtree)
697
                jfs_ip->xtlid = lid;
698
        else
699
                mp->lid = lid;
700
 
701
        /*
702
         * enqueue transaction lock to transaction/inode
703
         */
704
        /* insert the tlock at tail of transaction tlock list */
705
        if (tid) {
706
                tblk = tid_to_tblock(tid);
707
                if (tblk->next)
708
                        lid_to_tlock(tblk->last)->next = lid;
709
                else
710
                        tblk->next = lid;
711
                tlck->next = 0;
712
                tblk->last = lid;
713
        }
714
        /* anonymous transaction:
715
         * insert the tlock at head of inode anonymous tlock list
716
         */
717
        else {
718
                tlck->next = jfs_ip->atlhead;
719
                jfs_ip->atlhead = lid;
720
                if (tlck->next == 0) {
721
                        /* This inode's first anonymous transaction */
722
                        jfs_ip->atltail = lid;
723
                        list_add_tail(&jfs_ip->anon_inode_list,
724
                                      &TxAnchor.anon_list);
725
                }
726
        }
727
 
728
        /* initialize type dependent area for linelock */
729
        linelock = (struct linelock *) & tlck->lock;
730
        linelock->next = 0;
731
        linelock->flag = tlckLINELOCK;
732
        linelock->maxcnt = TLOCKSHORT;
733
        linelock->index = 0;
734
 
735
        switch (type & tlckTYPE) {
736
        case tlckDTREE:
737
                linelock->l2linesize = L2DTSLOTSIZE;
738
                break;
739
 
740
        case tlckXTREE:
741
                linelock->l2linesize = L2XTSLOTSIZE;
742
 
743
                xtlck = (struct xtlock *) linelock;
744
                xtlck->header.offset = 0;
745
                xtlck->header.length = 2;
746
 
747
                if (type & tlckNEW) {
748
                        xtlck->lwm.offset = XTENTRYSTART;
749
                } else {
750
                        if (mp->xflag & COMMIT_PAGE)
751
                                p = (xtpage_t *) mp->data;
752
                        else
753
                                p = &jfs_ip->i_xtroot;
754
                        xtlck->lwm.offset =
755
                            le16_to_cpu(p->header.nextindex);
756
                }
757
                xtlck->lwm.length = 0;   /* ! */
758
                xtlck->twm.offset = 0;
759
                xtlck->hwm.offset = 0;
760
 
761
                xtlck->index = 2;
762
                break;
763
 
764
        case tlckINODE:
765
                linelock->l2linesize = L2INODESLOTSIZE;
766
                break;
767
 
768
        case tlckDATA:
769
                linelock->l2linesize = L2DATASLOTSIZE;
770
                break;
771
 
772
        default:
773
                jfs_err("UFO tlock:0x%p", tlck);
774
        }
775
 
776
        /*
777
         * update tlock vector
778
         */
779
      grantLock:
780
        tlck->type |= type;
781
 
782
        TXN_UNLOCK();
783
 
784
        return tlck;
785
 
786
        /*
787
         * page is being locked by another transaction:
788
         */
789
      waitLock:
790
        /* Only locks on ipimap or ipaimap should reach here */
791
        /* assert(jfs_ip->fileset == AGGREGATE_I); */
792
        if (jfs_ip->fileset != AGGREGATE_I) {
793
                jfs_err("txLock: trying to lock locked page!");
794
                dump_mem("ip", ip, sizeof(struct inode));
795
                dump_mem("mp", mp, sizeof(struct metapage));
796
                dump_mem("Locker's tblk", tid_to_tblock(tid),
797
                         sizeof(struct tblock));
798
                dump_mem("Tlock", tlck, sizeof(struct tlock));
799
                BUG();
800
        }
801
        INCREMENT(stattx.waitlock);     /* statistics */
802
        release_metapage(mp);
803
 
804
        jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d",
805
                 tid, xtid, lid);
806
        TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor);
807
        jfs_info("txLock: awakened     tid = %d, lid = %d", tid, lid);
808
 
809
        return NULL;
810
}
811
 
812
 
813
/*
814
 * NAME:        txRelease()
815
 *
816
 * FUNCTION:    Release buffers associated with transaction locks, but don't
817
 *              mark homeok yet.  The allows other transactions to modify
818
 *              buffers, but won't let them go to disk until commit record
819
 *              actually gets written.
820
 *
821
 * PARAMETER:
822
 *              tblk    -
823
 *
824
 * RETURN:      Errors from subroutines.
825
 */
826
static void txRelease(struct tblock * tblk)
827
{
828
        struct metapage *mp;
829
        lid_t lid;
830
        struct tlock *tlck;
831
 
832
        TXN_LOCK();
833
 
834
        for (lid = tblk->next; lid; lid = tlck->next) {
835
                tlck = lid_to_tlock(lid);
836
                if ((mp = tlck->mp) != NULL &&
837
                    (tlck->type & tlckBTROOT) == 0) {
838
                        assert(mp->xflag & COMMIT_PAGE);
839
                        mp->lid = 0;
840
                }
841
        }
842
 
843
        /*
844
         * wakeup transactions waiting on a page locked
845
         * by the current transaction
846
         */
847
        TXN_WAKEUP(&tblk->waitor);
848
 
849
        TXN_UNLOCK();
850
}
851
 
852
 
853
/*
854
 * NAME:        txUnlock()
855
 *
856
 * FUNCTION:    Initiates pageout of pages modified by tid in journalled
857
 *              objects and frees their lockwords.
858
 */
859
static void txUnlock(struct tblock * tblk)
860
{
861
        struct tlock *tlck;
862
        struct linelock *linelock;
863
        lid_t lid, next, llid, k;
864
        struct metapage *mp;
865
        struct jfs_log *log;
866
        int difft, diffp;
867
 
868
        jfs_info("txUnlock: tblk = 0x%p", tblk);
869
        log = JFS_SBI(tblk->sb)->log;
870
 
871
        /*
872
         * mark page under tlock homeok (its log has been written):
873
         */
874
        for (lid = tblk->next; lid; lid = next) {
875
                tlck = lid_to_tlock(lid);
876
                next = tlck->next;
877
 
878
                jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck);
879
 
880
                /* unbind page from tlock */
881
                if ((mp = tlck->mp) != NULL &&
882
                    (tlck->type & tlckBTROOT) == 0) {
883
                        assert(mp->xflag & COMMIT_PAGE);
884
 
885
                        /* hold buffer
886
                         *
887
                         * It's possible that someone else has the metapage.
888
                         * The only things were changing are nohomeok, which
889
                         * is handled atomically, and clsn which is protected
890
                         * by the LOGSYNC_LOCK.
891
                         */
892
                        hold_metapage(mp, 1);
893
 
894
                        assert(atomic_read(&mp->nohomeok) > 0);
895
                        atomic_dec(&mp->nohomeok);
896
 
897
                        /* inherit younger/larger clsn */
898
                        LOGSYNC_LOCK(log);
899
                        if (mp->clsn) {
900
                                logdiff(difft, tblk->clsn, log);
901
                                logdiff(diffp, mp->clsn, log);
902
                                if (difft > diffp)
903
                                        mp->clsn = tblk->clsn;
904
                        } else
905
                                mp->clsn = tblk->clsn;
906
                        LOGSYNC_UNLOCK(log);
907
 
908
                        assert(!(tlck->flag & tlckFREEPAGE));
909
 
910
                        if (tlck->flag & tlckWRITEPAGE) {
911
                                write_metapage(mp);
912
                        } else {
913
                                /* release page which has been forced */
914
                                release_metapage(mp);
915
                        }
916
                }
917
 
918
                /* insert tlock, and linelock(s) of the tlock if any,
919
                 * at head of freelist
920
                 */
921
                TXN_LOCK();
922
 
923
                llid = ((struct linelock *) & tlck->lock)->next;
924
                while (llid) {
925
                        linelock = (struct linelock *) lid_to_tlock(llid);
926
                        k = linelock->next;
927
                        txLockFree(llid);
928
                        llid = k;
929
                }
930
                txLockFree(lid);
931
 
932
                TXN_UNLOCK();
933
        }
934
        tblk->next = tblk->last = 0;
935
 
936
        /*
937
         * remove tblock from logsynclist
938
         * (allocation map pages inherited lsn of tblk and
939
         * has been inserted in logsync list at txUpdateMap())
940
         */
941
        if (tblk->lsn) {
942
                LOGSYNC_LOCK(log);
943
                log->count--;
944
                list_del(&tblk->synclist);
945
                LOGSYNC_UNLOCK(log);
946
        }
947
}
948
 
949
 
950
/*
951
 *      txMaplock()
952
 *
953
 * function: allocate a transaction lock for freed page/entry;
954
 *      for freed page, maplock is used as xtlock/dtlock type;
955
 */
956
struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
957
{
958
        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
959
        lid_t lid;
960
        struct tblock *tblk;
961
        struct tlock *tlck;
962
        struct maplock *maplock;
963
 
964
        TXN_LOCK();
965
 
966
        /*
967
         * allocate a tlock
968
         */
969
        lid = txLockAlloc();
970
        tlck = lid_to_tlock(lid);
971
 
972
        /*
973
         * initialize tlock
974
         */
975
        tlck->tid = tid;
976
 
977
        /* bind the tlock and the object */
978
        tlck->flag = tlckINODELOCK;
979
        tlck->ip = ip;
980
        tlck->mp = NULL;
981
 
982
        tlck->type = type;
983
 
984
        /*
985
         * enqueue transaction lock to transaction/inode
986
         */
987
        /* insert the tlock at tail of transaction tlock list */
988
        if (tid) {
989
                tblk = tid_to_tblock(tid);
990
                if (tblk->next)
991
                        lid_to_tlock(tblk->last)->next = lid;
992
                else
993
                        tblk->next = lid;
994
                tlck->next = 0;
995
                tblk->last = lid;
996
        }
997
        /* anonymous transaction:
998
         * insert the tlock at head of inode anonymous tlock list
999
         */
1000
        else {
1001
                tlck->next = jfs_ip->atlhead;
1002
                jfs_ip->atlhead = lid;
1003
                if (tlck->next == 0) {
1004
                        /* This inode's first anonymous transaction */
1005
                        jfs_ip->atltail = lid;
1006
                        list_add_tail(&jfs_ip->anon_inode_list,
1007
                                      &TxAnchor.anon_list);
1008
                }
1009
        }
1010
 
1011
        TXN_UNLOCK();
1012
 
1013
        /* initialize type dependent area for maplock */
1014
        maplock = (struct maplock *) & tlck->lock;
1015
        maplock->next = 0;
1016
        maplock->maxcnt = 0;
1017
        maplock->index = 0;
1018
 
1019
        return tlck;
1020
}
1021
 
1022
 
1023
/*
1024
 *      txLinelock()
1025
 *
1026
 * function: allocate a transaction lock for log vector list
1027
 */
1028
struct linelock *txLinelock(struct linelock * tlock)
1029
{
1030
        lid_t lid;
1031
        struct tlock *tlck;
1032
        struct linelock *linelock;
1033
 
1034
        TXN_LOCK();
1035
 
1036
        /* allocate a TxLock structure */
1037
        lid = txLockAlloc();
1038
        tlck = lid_to_tlock(lid);
1039
 
1040
        TXN_UNLOCK();
1041
 
1042
        /* initialize linelock */
1043
        linelock = (struct linelock *) tlck;
1044
        linelock->next = 0;
1045
        linelock->flag = tlckLINELOCK;
1046
        linelock->maxcnt = TLOCKLONG;
1047
        linelock->index = 0;
1048
 
1049
        /* append linelock after tlock */
1050
        linelock->next = tlock->next;
1051
        tlock->next = lid;
1052
 
1053
        return linelock;
1054
}
1055
 
1056
 
1057
 
1058
/*
1059
 *              transaction commit management
1060
 *              -----------------------------
1061
 */
1062
 
1063
/*
1064
 * NAME:        txCommit()
1065
 *
1066
 * FUNCTION:    commit the changes to the objects specified in
1067
 *              clist.  For journalled segments only the
1068
 *              changes of the caller are committed, ie by tid.
1069
 *              for non-journalled segments the data are flushed to
1070
 *              disk and then the change to the disk inode and indirect
1071
 *              blocks committed (so blocks newly allocated to the
1072
 *              segment will be made a part of the segment atomically).
1073
 *
1074
 *              all of the segments specified in clist must be in
1075
 *              one file system. no more than 6 segments are needed
1076
 *              to handle all unix svcs.
1077
 *
1078
 *              if the i_nlink field (i.e. disk inode link count)
1079
 *              is zero, and the type of inode is a regular file or
1080
 *              directory, or symbolic link , the inode is truncated
1081
 *              to zero length. the truncation is committed but the
1082
 *              VM resources are unaffected until it is closed (see
1083
 *              iput and iclose).
1084
 *
1085
 * PARAMETER:
1086
 *
1087
 * RETURN:
1088
 *
1089
 * serialization:
1090
 *              on entry the inode lock on each segment is assumed
1091
 *              to be held.
1092
 *
1093
 * i/o error:
1094
 */
1095
int txCommit(tid_t tid,         /* transaction identifier */
1096
             int nip,           /* number of inodes to commit */
1097
             struct inode **iplist,     /* list of inode to commit */
1098
             int flag)
1099
{
1100
        int rc = 0;
1101
        struct commit cd;
1102
        struct jfs_log *log;
1103
        struct tblock *tblk;
1104
        struct lrd *lrd;
1105
        int lsn;
1106
        struct inode *ip;
1107
        struct jfs_inode_info *jfs_ip;
1108
        int k, n;
1109
        ino_t top;
1110
        struct super_block *sb;
1111
 
1112
        jfs_info("txCommit, tid = %d, flag = %d", tid, flag);
1113
        /* is read-only file system ? */
1114
        if (isReadOnly(iplist[0])) {
1115
                rc = -EROFS;
1116
                goto TheEnd;
1117
        }
1118
 
1119
        sb = cd.sb = iplist[0]->i_sb;
1120
        cd.tid = tid;
1121
 
1122
        if (tid == 0)
1123
                tid = txBegin(sb, 0);
1124
        tblk = tid_to_tblock(tid);
1125
 
1126
        /*
1127
         * initialize commit structure
1128
         */
1129
        log = JFS_SBI(sb)->log;
1130
        cd.log = log;
1131
 
1132
        /* initialize log record descriptor in commit */
1133
        lrd = &cd.lrd;
1134
        lrd->logtid = cpu_to_le32(tblk->logtid);
1135
        lrd->backchain = 0;
1136
 
1137
        tblk->xflag |= flag;
1138
 
1139
        if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
1140
                tblk->xflag |= COMMIT_LAZY;
1141
        /*
1142
         *      prepare non-journaled objects for commit
1143
         *
1144
         * flush data pages of non-journaled file
1145
         * to prevent the file getting non-initialized disk blocks
1146
         * in case of crash.
1147
         * (new blocks - )
1148
         */
1149
        cd.iplist = iplist;
1150
        cd.nip = nip;
1151
 
1152
        /*
1153
         *      acquire transaction lock on (on-disk) inodes
1154
         *
1155
         * update on-disk inode from in-memory inode
1156
         * acquiring transaction locks for AFTER records
1157
         * on the on-disk inode of file object
1158
         *
1159
         * sort the inodes array by inode number in descending order
1160
         * to prevent deadlock when acquiring transaction lock
1161
         * of on-disk inodes on multiple on-disk inode pages by
1162
         * multiple concurrent transactions
1163
         */
1164
        for (k = 0; k < cd.nip; k++) {
1165
                top = (cd.iplist[k])->i_ino;
1166
                for (n = k + 1; n < cd.nip; n++) {
1167
                        ip = cd.iplist[n];
1168
                        if (ip->i_ino > top) {
1169
                                top = ip->i_ino;
1170
                                cd.iplist[n] = cd.iplist[k];
1171
                                cd.iplist[k] = ip;
1172
                        }
1173
                }
1174
 
1175
                ip = cd.iplist[k];
1176
                jfs_ip = JFS_IP(ip);
1177
 
1178
                if (test_and_clear_cflag(COMMIT_Syncdata, ip) &&
1179
                    ((tblk->flag && COMMIT_DELETE) == 0))
1180
                        fsync_inode_data_buffers(ip);
1181
 
1182
                /*
1183
                 * Mark inode as not dirty.  It will still be on the dirty
1184
                 * inode list, but we'll know not to commit it again unless
1185
                 * it gets marked dirty again
1186
                 */
1187
                clear_cflag(COMMIT_Dirty, ip);
1188
 
1189
                /* inherit anonymous tlock(s) of inode */
1190
                if (jfs_ip->atlhead) {
1191
                        lid_to_tlock(jfs_ip->atltail)->next = tblk->next;
1192
                        tblk->next = jfs_ip->atlhead;
1193
                        if (!tblk->last)
1194
                                tblk->last = jfs_ip->atltail;
1195
                        jfs_ip->atlhead = jfs_ip->atltail = 0;
1196
                        TXN_LOCK();
1197
                        list_del_init(&jfs_ip->anon_inode_list);
1198
                        TXN_UNLOCK();
1199
                }
1200
 
1201
                /*
1202
                 * acquire transaction lock on on-disk inode page
1203
                 * (become first tlock of the tblk's tlock list)
1204
                 */
1205
                if (((rc = diWrite(tid, ip))))
1206
                        goto out;
1207
        }
1208
 
1209
        /*
1210
         *      write log records from transaction locks
1211
         *
1212
         * txUpdateMap() resets XAD_NEW in XAD.
1213
         */
1214
        if ((rc = txLog(log, tblk, &cd)))
1215
                goto TheEnd;
1216
 
1217
        /*
1218
         * Ensure that inode isn't reused before
1219
         * lazy commit thread finishes processing
1220
         */
1221
        if (tblk->xflag & (COMMIT_CREATE | COMMIT_DELETE)) {
1222
                atomic_inc(&tblk->ip->i_count);
1223
                /*
1224
                 * Avoid a rare deadlock
1225
                 *
1226
                 * If the inode is locked, we may be blocked in
1227
                 * jfs_commit_inode.  If so, we don't want the
1228
                 * lazy_commit thread doing the last iput() on the inode
1229
                 * since that may block on the locked inode.  Instead,
1230
                 * commit the transaction synchronously, so the last iput
1231
                 * will be done by the calling thread (or later)
1232
                 */
1233
                if (tblk->ip->i_state & I_LOCK)
1234
                        tblk->xflag &= ~COMMIT_LAZY;
1235
        }
1236
 
1237
        ASSERT((!(tblk->xflag & COMMIT_DELETE)) ||
1238
               ((tblk->ip->i_nlink == 0) &&
1239
                !test_cflag(COMMIT_Nolink, tblk->ip)));
1240
 
1241
        /*
1242
         *      write COMMIT log record
1243
         */
1244
        lrd->type = cpu_to_le16(LOG_COMMIT);
1245
        lrd->length = 0;
1246
        lsn = lmLog(log, tblk, lrd, NULL);
1247
 
1248
        lmGroupCommit(log, tblk);
1249
 
1250
        /*
1251
         *      - transaction is now committed -
1252
         */
1253
 
1254
        /*
1255
         * force pages in careful update
1256
         * (imap addressing structure update)
1257
         */
1258
        if (flag & COMMIT_FORCE)
1259
                txForce(tblk);
1260
 
1261
        /*
1262
         *      update allocation map.
1263
         *
1264
         * update inode allocation map and inode:
1265
         * free pager lock on memory object of inode if any.
1266
         * update  block allocation map.
1267
         *
1268
         * txUpdateMap() resets XAD_NEW in XAD.
1269
         */
1270
        if (tblk->xflag & COMMIT_FORCE)
1271
                txUpdateMap(tblk);
1272
 
1273
        /*
1274
         *      free transaction locks and pageout/free pages
1275
         */
1276
        txRelease(tblk);
1277
 
1278
        if ((tblk->flag & tblkGC_LAZY) == 0)
1279
                txUnlock(tblk);
1280
 
1281
 
1282
        /*
1283
         *      reset in-memory object state
1284
         */
1285
        for (k = 0; k < cd.nip; k++) {
1286
                ip = cd.iplist[k];
1287
                jfs_ip = JFS_IP(ip);
1288
 
1289
                /*
1290
                 * reset in-memory inode state
1291
                 */
1292
                jfs_ip->bxflag = 0;
1293
                jfs_ip->blid = 0;
1294
        }
1295
 
1296
      out:
1297
        if (rc != 0)
1298
                txAbortCommit(&cd);
1299
 
1300
      TheEnd:
1301
        jfs_info("txCommit: tid = %d, returning %d", tid, rc);
1302
        return rc;
1303
}
1304
 
1305
 
1306
/*
1307
 * NAME:        txLog()
1308
 *
1309
 * FUNCTION:    Writes AFTER log records for all lines modified
1310
 *              by tid for segments specified by inodes in comdata.
1311
 *              Code assumes only WRITELOCKS are recorded in lockwords.
1312
 *
1313
 * PARAMETERS:
1314
 *
1315
 * RETURN :
1316
 */
1317
static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd)
1318
{
1319
        int rc = 0;
1320
        struct inode *ip;
1321
        lid_t lid;
1322
        struct tlock *tlck;
1323
        struct lrd *lrd = &cd->lrd;
1324
 
1325
        /*
1326
         * write log record(s) for each tlock of transaction,
1327
         */
1328
        for (lid = tblk->next; lid; lid = tlck->next) {
1329
                tlck = lid_to_tlock(lid);
1330
 
1331
                tlck->flag |= tlckLOG;
1332
 
1333
                /* initialize lrd common */
1334
                ip = tlck->ip;
1335
                lrd->aggregate = cpu_to_le32(kdev_t_to_nr(ip->i_dev));
1336
                lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset);
1337
                lrd->log.redopage.inode = cpu_to_le32(ip->i_ino);
1338
 
1339
                /* write log record of page from the tlock */
1340
                switch (tlck->type & tlckTYPE) {
1341
                case tlckXTREE:
1342
                        xtLog(log, tblk, lrd, tlck);
1343
                        break;
1344
 
1345
                case tlckDTREE:
1346
                        dtLog(log, tblk, lrd, tlck);
1347
                        break;
1348
 
1349
                case tlckINODE:
1350
                        diLog(log, tblk, lrd, tlck, cd);
1351
                        break;
1352
 
1353
                case tlckMAP:
1354
                        mapLog(log, tblk, lrd, tlck);
1355
                        break;
1356
 
1357
                case tlckDATA:
1358
                        dataLog(log, tblk, lrd, tlck);
1359
                        break;
1360
 
1361
                default:
1362
                        jfs_err("UFO tlock:0x%p", tlck);
1363
                }
1364
        }
1365
 
1366
        return rc;
1367
}
1368
 
1369
 
1370
/*
1371
 *      diLog()
1372
 *
1373
 * function:    log inode tlock and format maplock to update bmap;
1374
 */
1375
static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1376
          struct tlock * tlck, struct commit * cd)
1377
{
1378
        int rc = 0;
1379
        struct metapage *mp;
1380
        pxd_t *pxd;
1381
        struct pxd_lock *pxdlock;
1382
 
1383
        mp = tlck->mp;
1384
 
1385
        /* initialize as REDOPAGE record format */
1386
        lrd->log.redopage.type = cpu_to_le16(LOG_INODE);
1387
        lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE);
1388
 
1389
        pxd = &lrd->log.redopage.pxd;
1390
 
1391
        /*
1392
         *      inode after image
1393
         */
1394
        if (tlck->type & tlckENTRY) {
1395
                /* log after-image for logredo(): */
1396
                lrd->type = cpu_to_le16(LOG_REDOPAGE);
1397
//              *pxd = mp->cm_pxd;
1398
                PXDaddress(pxd, mp->index);
1399
                PXDlength(pxd,
1400
                          mp->logical_size >> tblk->sb->s_blocksize_bits);
1401
                lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1402
 
1403
                /* mark page as homeward bound */
1404
                tlck->flag |= tlckWRITEPAGE;
1405
        } else if (tlck->type & tlckFREE) {
1406
                /*
1407
                 *      free inode extent
1408
                 *
1409
                 * (pages of the freed inode extent have been invalidated and
1410
                 * a maplock for free of the extent has been formatted at
1411
                 * txLock() time);
1412
                 *
1413
                 * the tlock had been acquired on the inode allocation map page
1414
                 * (iag) that specifies the freed extent, even though the map
1415
                 * page is not itself logged, to prevent pageout of the map
1416
                 * page before the log;
1417
                 */
1418
 
1419
                /* log LOG_NOREDOINOEXT of the freed inode extent for
1420
                 * logredo() to start NoRedoPage filters, and to update
1421
                 * imap and bmap for free of the extent;
1422
                 */
1423
                lrd->type = cpu_to_le16(LOG_NOREDOINOEXT);
1424
                /*
1425
                 * For the LOG_NOREDOINOEXT record, we need
1426
                 * to pass the IAG number and inode extent
1427
                 * index (within that IAG) from which the
1428
                 * the extent being released.  These have been
1429
                 * passed to us in the iplist[1] and iplist[2].
1430
                 */
1431
                lrd->log.noredoinoext.iagnum =
1432
                    cpu_to_le32((u32) (size_t) cd->iplist[1]);
1433
                lrd->log.noredoinoext.inoext_idx =
1434
                    cpu_to_le32((u32) (size_t) cd->iplist[2]);
1435
 
1436
                pxdlock = (struct pxd_lock *) & tlck->lock;
1437
                *pxd = pxdlock->pxd;
1438
                lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1439
 
1440
                /* update bmap */
1441
                tlck->flag |= tlckUPDATEMAP;
1442
 
1443
                /* mark page as homeward bound */
1444
                tlck->flag |= tlckWRITEPAGE;
1445
        } else
1446
                jfs_err("diLog: UFO type tlck:0x%p", tlck);
1447
#ifdef  _JFS_WIP
1448
        /*
1449
         *      alloc/free external EA extent
1450
         *
1451
         * a maplock for txUpdateMap() to update bPWMAP for alloc/free
1452
         * of the extent has been formatted at txLock() time;
1453
         */
1454
        else {
1455
                assert(tlck->type & tlckEA);
1456
 
1457
                /* log LOG_UPDATEMAP for logredo() to update bmap for
1458
                 * alloc of new (and free of old) external EA extent;
1459
                 */
1460
                lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1461
                pxdlock = (struct pxd_lock *) & tlck->lock;
1462
                nlock = pxdlock->index;
1463
                for (i = 0; i < nlock; i++, pxdlock++) {
1464
                        if (pxdlock->flag & mlckALLOCPXD)
1465
                                lrd->log.updatemap.type =
1466
                                    cpu_to_le16(LOG_ALLOCPXD);
1467
                        else
1468
                                lrd->log.updatemap.type =
1469
                                    cpu_to_le16(LOG_FREEPXD);
1470
                        lrd->log.updatemap.nxd = cpu_to_le16(1);
1471
                        lrd->log.updatemap.pxd = pxdlock->pxd;
1472
                        lrd->backchain =
1473
                            cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1474
                }
1475
 
1476
                /* update bmap */
1477
                tlck->flag |= tlckUPDATEMAP;
1478
        }
1479
#endif                          /* _JFS_WIP */
1480
 
1481
        return rc;
1482
}
1483
 
1484
 
1485
/*
1486
 *      dataLog()
1487
 *
1488
 * function:    log data tlock
1489
 */
1490
static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1491
            struct tlock * tlck)
1492
{
1493
        struct metapage *mp;
1494
        pxd_t *pxd;
1495
 
1496
        mp = tlck->mp;
1497
 
1498
        /* initialize as REDOPAGE record format */
1499
        lrd->log.redopage.type = cpu_to_le16(LOG_DATA);
1500
        lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE);
1501
 
1502
        pxd = &lrd->log.redopage.pxd;
1503
 
1504
        /* log after-image for logredo(): */
1505
        lrd->type = cpu_to_le16(LOG_REDOPAGE);
1506
 
1507
        if (JFS_IP(tlck->ip)->next_index < MAX_INLINE_DIRTABLE_ENTRY) {
1508
                /*
1509
                 * The table has been truncated, we've must have deleted
1510
                 * the last entry, so don't bother logging this
1511
                 */
1512
                mp->lid = 0;
1513
                hold_metapage(mp, 0);
1514
                atomic_dec(&mp->nohomeok);
1515
                discard_metapage(mp);
1516
                tlck->mp = 0;
1517
                return 0;
1518
        }
1519
 
1520
        PXDaddress(pxd, mp->index);
1521
        PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits);
1522
 
1523
        lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1524
 
1525
        /* mark page as homeward bound */
1526
        tlck->flag |= tlckWRITEPAGE;
1527
 
1528
        return 0;
1529
}
1530
 
1531
 
1532
/*
1533
 *      dtLog()
1534
 *
1535
 * function:    log dtree tlock and format maplock to update bmap;
1536
 */
1537
static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1538
           struct tlock * tlck)
1539
{
1540
        struct metapage *mp;
1541
        struct pxd_lock *pxdlock;
1542
        pxd_t *pxd;
1543
 
1544
        mp = tlck->mp;
1545
 
1546
        /* initialize as REDOPAGE/NOREDOPAGE record format */
1547
        lrd->log.redopage.type = cpu_to_le16(LOG_DTREE);
1548
        lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE);
1549
 
1550
        pxd = &lrd->log.redopage.pxd;
1551
 
1552
        if (tlck->type & tlckBTROOT)
1553
                lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1554
 
1555
        /*
1556
         *      page extension via relocation: entry insertion;
1557
         *      page extension in-place: entry insertion;
1558
         *      new right page from page split, reinitialized in-line
1559
         *      root from root page split: entry insertion;
1560
         */
1561
        if (tlck->type & (tlckNEW | tlckEXTEND)) {
1562
                /* log after-image of the new page for logredo():
1563
                 * mark log (LOG_NEW) for logredo() to initialize
1564
                 * freelist and update bmap for alloc of the new page;
1565
                 */
1566
                lrd->type = cpu_to_le16(LOG_REDOPAGE);
1567
                if (tlck->type & tlckEXTEND)
1568
                        lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND);
1569
                else
1570
                        lrd->log.redopage.type |= cpu_to_le16(LOG_NEW);
1571
//              *pxd = mp->cm_pxd;
1572
                PXDaddress(pxd, mp->index);
1573
                PXDlength(pxd,
1574
                          mp->logical_size >> tblk->sb->s_blocksize_bits);
1575
                lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1576
 
1577
                /* format a maplock for txUpdateMap() to update bPMAP for
1578
                 * alloc of the new page;
1579
                 */
1580
                if (tlck->type & tlckBTROOT)
1581
                        return;
1582
                tlck->flag |= tlckUPDATEMAP;
1583
                pxdlock = (struct pxd_lock *) & tlck->lock;
1584
                pxdlock->flag = mlckALLOCPXD;
1585
                pxdlock->pxd = *pxd;
1586
 
1587
                pxdlock->index = 1;
1588
 
1589
                /* mark page as homeward bound */
1590
                tlck->flag |= tlckWRITEPAGE;
1591
                return;
1592
        }
1593
 
1594
        /*
1595
         *      entry insertion/deletion,
1596
         *      sibling page link update (old right page before split);
1597
         */
1598
        if (tlck->type & (tlckENTRY | tlckRELINK)) {
1599
                /* log after-image for logredo(): */
1600
                lrd->type = cpu_to_le16(LOG_REDOPAGE);
1601
                PXDaddress(pxd, mp->index);
1602
                PXDlength(pxd,
1603
                          mp->logical_size >> tblk->sb->s_blocksize_bits);
1604
                lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1605
 
1606
                /* mark page as homeward bound */
1607
                tlck->flag |= tlckWRITEPAGE;
1608
                return;
1609
        }
1610
 
1611
        /*
1612
         *      page deletion: page has been invalidated
1613
         *      page relocation: source extent
1614
         *
1615
         *      a maplock for free of the page has been formatted
1616
         *      at txLock() time);
1617
         */
1618
        if (tlck->type & (tlckFREE | tlckRELOCATE)) {
1619
                /* log LOG_NOREDOPAGE of the deleted page for logredo()
1620
                 * to start NoRedoPage filter and to update bmap for free
1621
                 * of the deletd page
1622
                 */
1623
                lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1624
                pxdlock = (struct pxd_lock *) & tlck->lock;
1625
                *pxd = pxdlock->pxd;
1626
                lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1627
 
1628
                /* a maplock for txUpdateMap() for free of the page
1629
                 * has been formatted at txLock() time;
1630
                 */
1631
                tlck->flag |= tlckUPDATEMAP;
1632
        }
1633
        return;
1634
}
1635
 
1636
 
1637
/*
1638
 *      xtLog()
1639
 *
1640
 * function:    log xtree tlock and format maplock to update bmap;
1641
 */
1642
static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1643
           struct tlock * tlck)
1644
{
1645
        struct inode *ip;
1646
        struct metapage *mp;
1647
        xtpage_t *p;
1648
        struct xtlock *xtlck;
1649
        struct maplock *maplock;
1650
        struct xdlistlock *xadlock;
1651
        struct pxd_lock *pxdlock;
1652
        pxd_t *pxd;
1653
        int next, lwm, hwm;
1654
 
1655
        ip = tlck->ip;
1656
        mp = tlck->mp;
1657
 
1658
        /* initialize as REDOPAGE/NOREDOPAGE record format */
1659
        lrd->log.redopage.type = cpu_to_le16(LOG_XTREE);
1660
        lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE);
1661
 
1662
        pxd = &lrd->log.redopage.pxd;
1663
 
1664
        if (tlck->type & tlckBTROOT) {
1665
                lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1666
                p = &JFS_IP(ip)->i_xtroot;
1667
                if (S_ISDIR(ip->i_mode))
1668
                        lrd->log.redopage.type |=
1669
                            cpu_to_le16(LOG_DIR_XTREE);
1670
        } else
1671
                p = (xtpage_t *) mp->data;
1672
        next = le16_to_cpu(p->header.nextindex);
1673
 
1674
        xtlck = (struct xtlock *) & tlck->lock;
1675
 
1676
        maplock = (struct maplock *) & tlck->lock;
1677
        xadlock = (struct xdlistlock *) maplock;
1678
 
1679
        /*
1680
         *      entry insertion/extension;
1681
         *      sibling page link update (old right page before split);
1682
         */
1683
        if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
1684
                /* log after-image for logredo():
1685
                 * logredo() will update bmap for alloc of new/extended
1686
                 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1687
                 * after-image of XADlist;
1688
                 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1689
                 * applying the after-image to the meta-data page.
1690
                 */
1691
                lrd->type = cpu_to_le16(LOG_REDOPAGE);
1692
//              *pxd = mp->cm_pxd;
1693
                PXDaddress(pxd, mp->index);
1694
                PXDlength(pxd,
1695
                          mp->logical_size >> tblk->sb->s_blocksize_bits);
1696
                lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1697
 
1698
                /* format a maplock for txUpdateMap() to update bPMAP
1699
                 * for alloc of new/extended extents of XAD[lwm:next)
1700
                 * from the page itself;
1701
                 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
1702
                 */
1703
                lwm = xtlck->lwm.offset;
1704
                if (lwm == 0)
1705
                        lwm = XTPAGEMAXSLOT;
1706
 
1707
                if (lwm == next)
1708
                        goto out;
1709
                assert(lwm < next);
1710
                tlck->flag |= tlckUPDATEMAP;
1711
                xadlock->flag = mlckALLOCXADLIST;
1712
                xadlock->count = next - lwm;
1713
                if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) {
1714
                        int i;
1715
                        /*
1716
                         * Lazy commit may allow xtree to be modified before
1717
                         * txUpdateMap runs.  Copy xad into linelock to
1718
                         * preserve correct data.
1719
                         */
1720
                        xadlock->xdlist = &xtlck->pxdlock;
1721
                        memcpy(xadlock->xdlist, &p->xad[lwm],
1722
                               sizeof(xad_t) * xadlock->count);
1723
 
1724
                        for (i = 0; i < xadlock->count; i++)
1725
                                p->xad[lwm + i].flag &=
1726
                                    ~(XAD_NEW | XAD_EXTENDED);
1727
                } else {
1728
                        /*
1729
                         * xdlist will point to into inode's xtree, ensure
1730
                         * that transaction is not committed lazily.
1731
                         */
1732
                        xadlock->xdlist = &p->xad[lwm];
1733
                        tblk->xflag &= ~COMMIT_LAZY;
1734
                }
1735
                jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d "
1736
                         "count:%d", tlck->ip, mp, tlck, lwm, xadlock->count);
1737
 
1738
                maplock->index = 1;
1739
 
1740
              out:
1741
                /* mark page as homeward bound */
1742
                tlck->flag |= tlckWRITEPAGE;
1743
 
1744
                return;
1745
        }
1746
 
1747
        /*
1748
         *      page deletion: file deletion/truncation (ref. xtTruncate())
1749
         *
1750
         * (page will be invalidated after log is written and bmap
1751
         * is updated from the page);
1752
         */
1753
        if (tlck->type & tlckFREE) {
1754
                /* LOG_NOREDOPAGE log for NoRedoPage filter:
1755
                 * if page free from file delete, NoRedoFile filter from
1756
                 * inode image of zero link count will subsume NoRedoPage
1757
                 * filters for each page;
1758
                 * if page free from file truncattion, write NoRedoPage
1759
                 * filter;
1760
                 *
1761
                 * upadte of block allocation map for the page itself:
1762
                 * if page free from deletion and truncation, LOG_UPDATEMAP
1763
                 * log for the page itself is generated from processing
1764
                 * its parent page xad entries;
1765
                 */
1766
                /* if page free from file truncation, log LOG_NOREDOPAGE
1767
                 * of the deleted page for logredo() to start NoRedoPage
1768
                 * filter for the page;
1769
                 */
1770
                if (tblk->xflag & COMMIT_TRUNCATE) {
1771
                        /* write NOREDOPAGE for the page */
1772
                        lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1773
                        PXDaddress(pxd, mp->index);
1774
                        PXDlength(pxd,
1775
                                  mp->logical_size >> tblk->sb->
1776
                                  s_blocksize_bits);
1777
                        lrd->backchain =
1778
                            cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1779
 
1780
                        if (tlck->type & tlckBTROOT) {
1781
                                /* Empty xtree must be logged */
1782
                                lrd->type = cpu_to_le16(LOG_REDOPAGE);
1783
                                lrd->backchain =
1784
                                    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1785
                        }
1786
                }
1787
 
1788
                /* init LOG_UPDATEMAP of the freed extents
1789
                 * XAD[XTENTRYSTART:hwm) from the deleted page itself
1790
                 * for logredo() to update bmap;
1791
                 */
1792
                lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1793
                lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST);
1794
                xtlck = (struct xtlock *) & tlck->lock;
1795
                hwm = xtlck->hwm.offset;
1796
                lrd->log.updatemap.nxd =
1797
                    cpu_to_le16(hwm - XTENTRYSTART + 1);
1798
                /* reformat linelock for lmLog() */
1799
                xtlck->header.offset = XTENTRYSTART;
1800
                xtlck->header.length = hwm - XTENTRYSTART + 1;
1801
                xtlck->index = 1;
1802
                lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1803
 
1804
                /* format a maplock for txUpdateMap() to update bmap
1805
                 * to free extents of XAD[XTENTRYSTART:hwm) from the
1806
                 * deleted page itself;
1807
                 */
1808
                tlck->flag |= tlckUPDATEMAP;
1809
                xadlock->flag = mlckFREEXADLIST;
1810
                xadlock->count = hwm - XTENTRYSTART + 1;
1811
                if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) {
1812
                        /*
1813
                         * Lazy commit may allow xtree to be modified before
1814
                         * txUpdateMap runs.  Copy xad into linelock to
1815
                         * preserve correct data.
1816
                         */
1817
                        xadlock->xdlist = &xtlck->pxdlock;
1818
                        memcpy(xadlock->xdlist, &p->xad[XTENTRYSTART],
1819
                               sizeof(xad_t) * xadlock->count);
1820
                } else {
1821
                        /*
1822
                         * xdlist will point to into inode's xtree, ensure
1823
                         * that transaction is not committed lazily.
1824
                         */
1825
                        xadlock->xdlist = &p->xad[XTENTRYSTART];
1826
                        tblk->xflag &= ~COMMIT_LAZY;
1827
                }
1828
                jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2",
1829
                         tlck->ip, mp, xadlock->count);
1830
 
1831
                maplock->index = 1;
1832
 
1833
                /* mark page as invalid */
1834
                if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode))
1835
                    && !(tlck->type & tlckBTROOT))
1836
                        tlck->flag |= tlckFREEPAGE;
1837
                /*
1838
                   else (tblk->xflag & COMMIT_PMAP)
1839
                   ? release the page;
1840
                 */
1841
                return;
1842
        }
1843
 
1844
        /*
1845
         *      page/entry truncation: file truncation (ref. xtTruncate())
1846
         *
1847
         *     |----------+------+------+---------------|
1848
         *                |      |      |
1849
         *                |      |     hwm - hwm before truncation
1850
         *                |     next - truncation point
1851
         *               lwm - lwm before truncation
1852
         * header ?
1853
         */
1854
        if (tlck->type & tlckTRUNCATE) {
1855
                pxd_t tpxd;     /* truncated extent of xad */
1856
                int twm;
1857
 
1858
                /*
1859
                 * For truncation the entire linelock may be used, so it would
1860
                 * be difficult to store xad list in linelock itself.
1861
                 * Therefore, we'll just force transaction to be committed
1862
                 * synchronously, so that xtree pages won't be changed before
1863
                 * txUpdateMap runs.
1864
                 */
1865
                tblk->xflag &= ~COMMIT_LAZY;
1866
                lwm = xtlck->lwm.offset;
1867
                if (lwm == 0)
1868
                        lwm = XTPAGEMAXSLOT;
1869
                hwm = xtlck->hwm.offset;
1870
                twm = xtlck->twm.offset;
1871
 
1872
                /*
1873
                 *      write log records
1874
                 */
1875
                /*
1876
                 * allocate entries XAD[lwm:next]:
1877
                 */
1878
                if (lwm < next) {
1879
                        /* log after-image for logredo():
1880
                         * logredo() will update bmap for alloc of new/extended
1881
                         * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1882
                         * after-image of XADlist;
1883
                         * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1884
                         * applying the after-image to the meta-data page.
1885
                         */
1886
                        lrd->type = cpu_to_le16(LOG_REDOPAGE);
1887
                        PXDaddress(pxd, mp->index);
1888
                        PXDlength(pxd,
1889
                                  mp->logical_size >> tblk->sb->
1890
                                  s_blocksize_bits);
1891
                        lrd->backchain =
1892
                            cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1893
                }
1894
 
1895
                /*
1896
                 * truncate entry XAD[twm == next - 1]:
1897
                 */
1898
                if (twm == next - 1) {
1899
                        /* init LOG_UPDATEMAP for logredo() to update bmap for
1900
                         * free of truncated delta extent of the truncated
1901
                         * entry XAD[next - 1]:
1902
                         * (xtlck->pxdlock = truncated delta extent);
1903
                         */
1904
                        pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
1905
                        /* assert(pxdlock->type & tlckTRUNCATE); */
1906
                        lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1907
                        lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
1908
                        lrd->log.updatemap.nxd = cpu_to_le16(1);
1909
                        lrd->log.updatemap.pxd = pxdlock->pxd;
1910
                        tpxd = pxdlock->pxd;    /* save to format maplock */
1911
                        lrd->backchain =
1912
                            cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1913
                }
1914
 
1915
                /*
1916
                 * free entries XAD[next:hwm]:
1917
                 */
1918
                if (hwm >= next) {
1919
                        /* init LOG_UPDATEMAP of the freed extents
1920
                         * XAD[next:hwm] from the deleted page itself
1921
                         * for logredo() to update bmap;
1922
                         */
1923
                        lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1924
                        lrd->log.updatemap.type =
1925
                            cpu_to_le16(LOG_FREEXADLIST);
1926
                        xtlck = (struct xtlock *) & tlck->lock;
1927
                        hwm = xtlck->hwm.offset;
1928
                        lrd->log.updatemap.nxd =
1929
                            cpu_to_le16(hwm - next + 1);
1930
                        /* reformat linelock for lmLog() */
1931
                        xtlck->header.offset = next;
1932
                        xtlck->header.length = hwm - next + 1;
1933
                        xtlck->index = 1;
1934
                        lrd->backchain =
1935
                            cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1936
                }
1937
 
1938
                /*
1939
                 *      format maplock(s) for txUpdateMap() to update bmap
1940
                 */
1941
                maplock->index = 0;
1942
 
1943
                /*
1944
                 * allocate entries XAD[lwm:next):
1945
                 */
1946
                if (lwm < next) {
1947
                        /* format a maplock for txUpdateMap() to update bPMAP
1948
                         * for alloc of new/extended extents of XAD[lwm:next)
1949
                         * from the page itself;
1950
                         * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
1951
                         */
1952
                        tlck->flag |= tlckUPDATEMAP;
1953
                        xadlock->flag = mlckALLOCXADLIST;
1954
                        xadlock->count = next - lwm;
1955
                        xadlock->xdlist = &p->xad[lwm];
1956
 
1957
                        jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d "
1958
                                 "lwm:%d next:%d",
1959
                                 tlck->ip, mp, xadlock->count, lwm, next);
1960
                        maplock->index++;
1961
                        xadlock++;
1962
                }
1963
 
1964
                /*
1965
                 * truncate entry XAD[twm == next - 1]:
1966
                 */
1967
                if (twm == next - 1) {
1968
                        struct pxd_lock *pxdlock;
1969
 
1970
                        /* format a maplock for txUpdateMap() to update bmap
1971
                         * to free truncated delta extent of the truncated
1972
                         * entry XAD[next - 1];
1973
                         * (xtlck->pxdlock = truncated delta extent);
1974
                         */
1975
                        tlck->flag |= tlckUPDATEMAP;
1976
                        pxdlock = (struct pxd_lock *) xadlock;
1977
                        pxdlock->flag = mlckFREEPXD;
1978
                        pxdlock->count = 1;
1979
                        pxdlock->pxd = tpxd;
1980
 
1981
                        jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d "
1982
                                 "hwm:%d", ip, mp, pxdlock->count, hwm);
1983
                        maplock->index++;
1984
                        xadlock++;
1985
                }
1986
 
1987
                /*
1988
                 * free entries XAD[next:hwm]:
1989
                 */
1990
                if (hwm >= next) {
1991
                        /* format a maplock for txUpdateMap() to update bmap
1992
                         * to free extents of XAD[next:hwm] from thedeleted
1993
                         * page itself;
1994
                         */
1995
                        tlck->flag |= tlckUPDATEMAP;
1996
                        xadlock->flag = mlckFREEXADLIST;
1997
                        xadlock->count = hwm - next + 1;
1998
                        xadlock->xdlist = &p->xad[next];
1999
 
2000
                        jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d "
2001
                                 "next:%d hwm:%d",
2002
                                 tlck->ip, mp, xadlock->count, next, hwm);
2003
                        maplock->index++;
2004
                }
2005
 
2006
                /* mark page as homeward bound */
2007
                tlck->flag |= tlckWRITEPAGE;
2008
        }
2009
        return;
2010
}
2011
 
2012
 
2013
/*
2014
 *      mapLog()
2015
 *
2016
 * function:    log from maplock of freed data extents;
2017
 */
2018
void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2019
            struct tlock * tlck)
2020
{
2021
        struct pxd_lock *pxdlock;
2022
        int i, nlock;
2023
        pxd_t *pxd;
2024
 
2025
        /*
2026
         *      page relocation: free the source page extent
2027
         *
2028
         * a maplock for txUpdateMap() for free of the page
2029
         * has been formatted at txLock() time saving the src
2030
         * relocated page address;
2031
         */
2032
        if (tlck->type & tlckRELOCATE) {
2033
                /* log LOG_NOREDOPAGE of the old relocated page
2034
                 * for logredo() to start NoRedoPage filter;
2035
                 */
2036
                lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
2037
                pxdlock = (struct pxd_lock *) & tlck->lock;
2038
                pxd = &lrd->log.redopage.pxd;
2039
                *pxd = pxdlock->pxd;
2040
                lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2041
 
2042
                /* (N.B. currently, logredo() does NOT update bmap
2043
                 * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE);
2044
                 * if page free from relocation, LOG_UPDATEMAP log is
2045
                 * specifically generated now for logredo()
2046
                 * to update bmap for free of src relocated page;
2047
                 * (new flag LOG_RELOCATE may be introduced which will
2048
                 * inform logredo() to start NORedoPage filter and also
2049
                 * update block allocation map at the same time, thus
2050
                 * avoiding an extra log write);
2051
                 */
2052
                lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2053
                lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
2054
                lrd->log.updatemap.nxd = cpu_to_le16(1);
2055
                lrd->log.updatemap.pxd = pxdlock->pxd;
2056
                lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2057
 
2058
                /* a maplock for txUpdateMap() for free of the page
2059
                 * has been formatted at txLock() time;
2060
                 */
2061
                tlck->flag |= tlckUPDATEMAP;
2062
                return;
2063
        }
2064
        /*
2065
 
2066
         * Otherwise it's not a relocate request
2067
         *
2068
         */
2069
        else {
2070
                /* log LOG_UPDATEMAP for logredo() to update bmap for
2071
                 * free of truncated/relocated delta extent of the data;
2072
                 * e.g.: external EA extent, relocated/truncated extent
2073
                 * from xtTailgate();
2074
                 */
2075
                lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2076
                pxdlock = (struct pxd_lock *) & tlck->lock;
2077
                nlock = pxdlock->index;
2078
                for (i = 0; i < nlock; i++, pxdlock++) {
2079
                        if (pxdlock->flag & mlckALLOCPXD)
2080
                                lrd->log.updatemap.type =
2081
                                    cpu_to_le16(LOG_ALLOCPXD);
2082
                        else
2083
                                lrd->log.updatemap.type =
2084
                                    cpu_to_le16(LOG_FREEPXD);
2085
                        lrd->log.updatemap.nxd = cpu_to_le16(1);
2086
                        lrd->log.updatemap.pxd = pxdlock->pxd;
2087
                        lrd->backchain =
2088
                            cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2089
                        jfs_info("mapLog: xaddr:0x%lx xlen:0x%x",
2090
                                 (ulong) addressPXD(&pxdlock->pxd),
2091
                                 lengthPXD(&pxdlock->pxd));
2092
                }
2093
 
2094
                /* update bmap */
2095
                tlck->flag |= tlckUPDATEMAP;
2096
        }
2097
}
2098
 
2099
 
2100
/*
2101
 *      txEA()
2102
 *
2103
 * function:    acquire maplock for EA/ACL extents or
2104
 *              set COMMIT_INLINE flag;
2105
 */
2106
void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
2107
{
2108
        struct tlock *tlck = NULL;
2109
        struct pxd_lock *maplock = NULL, *pxdlock = NULL;
2110
 
2111
        /*
2112
         * format maplock for alloc of new EA extent
2113
         */
2114
        if (newea) {
2115
                /* Since the newea could be a completely zeroed entry we need to
2116
                 * check for the two flags which indicate we should actually
2117
                 * commit new EA data
2118
                 */
2119
                if (newea->flag & DXD_EXTENT) {
2120
                        tlck = txMaplock(tid, ip, tlckMAP);
2121
                        maplock = (struct pxd_lock *) & tlck->lock;
2122
                        pxdlock = (struct pxd_lock *) maplock;
2123
                        pxdlock->flag = mlckALLOCPXD;
2124
                        PXDaddress(&pxdlock->pxd, addressDXD(newea));
2125
                        PXDlength(&pxdlock->pxd, lengthDXD(newea));
2126
                        pxdlock++;
2127
                        maplock->index = 1;
2128
                } else if (newea->flag & DXD_INLINE) {
2129
                        tlck = NULL;
2130
 
2131
                        set_cflag(COMMIT_Inlineea, ip);
2132
                }
2133
        }
2134
 
2135
        /*
2136
         * format maplock for free of old EA extent
2137
         */
2138
        if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) {
2139
                if (tlck == NULL) {
2140
                        tlck = txMaplock(tid, ip, tlckMAP);
2141
                        maplock = (struct pxd_lock *) & tlck->lock;
2142
                        pxdlock = (struct pxd_lock *) maplock;
2143
                        maplock->index = 0;
2144
                }
2145
                pxdlock->flag = mlckFREEPXD;
2146
                PXDaddress(&pxdlock->pxd, addressDXD(oldea));
2147
                PXDlength(&pxdlock->pxd, lengthDXD(oldea));
2148
                maplock->index++;
2149
        }
2150
}
2151
 
2152
 
2153
/*
2154
 *      txForce()
2155
 *
2156
 * function: synchronously write pages locked by transaction
2157
 *              after txLog() but before txUpdateMap();
2158
 */
2159
void txForce(struct tblock * tblk)
2160
{
2161
        struct tlock *tlck;
2162
        lid_t lid, next;
2163
        struct metapage *mp;
2164
 
2165
        /*
2166
         * reverse the order of transaction tlocks in
2167
         * careful update order of address index pages
2168
         * (right to left, bottom up)
2169
         */
2170
        tlck = lid_to_tlock(tblk->next);
2171
        lid = tlck->next;
2172
        tlck->next = 0;
2173
        while (lid) {
2174
                tlck = lid_to_tlock(lid);
2175
                next = tlck->next;
2176
                tlck->next = tblk->next;
2177
                tblk->next = lid;
2178
                lid = next;
2179
        }
2180
 
2181
        /*
2182
         * synchronously write the page, and
2183
         * hold the page for txUpdateMap();
2184
         */
2185
        for (lid = tblk->next; lid; lid = next) {
2186
                tlck = lid_to_tlock(lid);
2187
                next = tlck->next;
2188
 
2189
                if ((mp = tlck->mp) != NULL &&
2190
                    (tlck->type & tlckBTROOT) == 0) {
2191
                        assert(mp->xflag & COMMIT_PAGE);
2192
 
2193
                        if (tlck->flag & tlckWRITEPAGE) {
2194
                                tlck->flag &= ~tlckWRITEPAGE;
2195
 
2196
                                /* do not release page to freelist */
2197
 
2198
                                /*
2199
                                 * The "right" thing to do here is to
2200
                                 * synchronously write the metadata.
2201
                                 * With the current implementation this
2202
                                 * is hard since write_metapage requires
2203
                                 * us to kunmap & remap the page.  If we
2204
                                 * have tlocks pointing into the metadata
2205
                                 * pages, we don't want to do this.  I think
2206
                                 * we can get by with synchronously writing
2207
                                 * the pages when they are released.
2208
                                 */
2209
                                assert(atomic_read(&mp->nohomeok));
2210
                                set_bit(META_dirty, &mp->flag);
2211
                                set_bit(META_sync, &mp->flag);
2212
                        }
2213
                }
2214
        }
2215
}
2216
 
2217
 
2218
/*
2219
 *      txUpdateMap()
2220
 *
2221
 * function:    update persistent allocation map (and working map
2222
 *              if appropriate);
2223
 *
2224
 * parameter:
2225
 */
2226
static void txUpdateMap(struct tblock * tblk)
2227
{
2228
        struct inode *ip;
2229
        struct inode *ipimap;
2230
        lid_t lid;
2231
        struct tlock *tlck;
2232
        struct maplock *maplock;
2233
        struct pxd_lock pxdlock;
2234
        int maptype;
2235
        int k, nlock;
2236
        struct metapage *mp = 0;
2237
 
2238
        ipimap = JFS_SBI(tblk->sb)->ipimap;
2239
 
2240
        maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP;
2241
 
2242
 
2243
        /*
2244
         *      update block allocation map
2245
         *
2246
         * update allocation state in pmap (and wmap) and
2247
         * update lsn of the pmap page;
2248
         */
2249
        /*
2250
         * scan each tlock/page of transaction for block allocation/free:
2251
         *
2252
         * for each tlock/page of transaction, update map.
2253
         *  ? are there tlock for pmap and pwmap at the same time ?
2254
         */
2255
        for (lid = tblk->next; lid; lid = tlck->next) {
2256
                tlck = lid_to_tlock(lid);
2257
 
2258
                if ((tlck->flag & tlckUPDATEMAP) == 0)
2259
                        continue;
2260
 
2261
                if (tlck->flag & tlckFREEPAGE) {
2262
                        /*
2263
                         * Another thread may attempt to reuse freed space
2264
                         * immediately, so we want to get rid of the metapage
2265
                         * before anyone else has a chance to get it.
2266
                         * Lock metapage, update maps, then invalidate
2267
                         * the metapage.
2268
                         */
2269
                        mp = tlck->mp;
2270
                        ASSERT(mp->xflag & COMMIT_PAGE);
2271
                        hold_metapage(mp, 0);
2272
                }
2273
 
2274
                /*
2275
                 * extent list:
2276
                 * . in-line PXD list:
2277
                 * . out-of-line XAD list:
2278
                 */
2279
                maplock = (struct maplock *) & tlck->lock;
2280
                nlock = maplock->index;
2281
 
2282
                for (k = 0; k < nlock; k++, maplock++) {
2283
                        /*
2284
                         * allocate blocks in persistent map:
2285
                         *
2286
                         * blocks have been allocated from wmap at alloc time;
2287
                         */
2288
                        if (maplock->flag & mlckALLOC) {
2289
                                txAllocPMap(ipimap, maplock, tblk);
2290
                        }
2291
                        /*
2292
                         * free blocks in persistent and working map:
2293
                         * blocks will be freed in pmap and then in wmap;
2294
                         *
2295
                         * ? tblock specifies the PMAP/PWMAP based upon
2296
                         * transaction
2297
                         *
2298
                         * free blocks in persistent map:
2299
                         * blocks will be freed from wmap at last reference
2300
                         * release of the object for regular files;
2301
                         *
2302
                         * Alway free blocks from both persistent & working
2303
                         * maps for directories
2304
                         */
2305
                        else {  /* (maplock->flag & mlckFREE) */
2306
 
2307
                                if (S_ISDIR(tlck->ip->i_mode))
2308
                                        txFreeMap(ipimap, maplock,
2309
                                                  tblk, COMMIT_PWMAP);
2310
                                else
2311
                                        txFreeMap(ipimap, maplock,
2312
                                                  tblk, maptype);
2313
                        }
2314
                }
2315
                if (tlck->flag & tlckFREEPAGE) {
2316
                        if (!(tblk->flag & tblkGC_LAZY)) {
2317
                                /* This is equivalent to txRelease */
2318
                                ASSERT(mp->lid == lid);
2319
                                tlck->mp->lid = 0;
2320
                        }
2321
                        assert(atomic_read(&mp->nohomeok) == 1);
2322
                        atomic_dec(&mp->nohomeok);
2323
                        discard_metapage(mp);
2324
                        tlck->mp = 0;
2325
                }
2326
        }
2327
        /*
2328
         *      update inode allocation map
2329
         *
2330
         * update allocation state in pmap and
2331
         * update lsn of the pmap page;
2332
         * update in-memory inode flag/state
2333
         *
2334
         * unlock mapper/write lock
2335
         */
2336
        if (tblk->xflag & COMMIT_CREATE) {
2337
                ip = tblk->ip;
2338
 
2339
                ASSERT(test_cflag(COMMIT_New, ip));
2340
                clear_cflag(COMMIT_New, ip);
2341
 
2342
                diUpdatePMap(ipimap, ip->i_ino, FALSE, tblk);
2343
                ipimap->i_state |= I_DIRTY;
2344
                /* update persistent block allocation map
2345
                 * for the allocation of inode extent;
2346
                 */
2347
                pxdlock.flag = mlckALLOCPXD;
2348
                pxdlock.pxd = JFS_IP(ip)->ixpxd;
2349
                pxdlock.index = 1;
2350
                txAllocPMap(ip, (struct maplock *) & pxdlock, tblk);
2351
                iput(ip);
2352
        } else if (tblk->xflag & COMMIT_DELETE) {
2353
                ip = tblk->ip;
2354
                diUpdatePMap(ipimap, ip->i_ino, TRUE, tblk);
2355
                ipimap->i_state |= I_DIRTY;
2356
                iput(ip);
2357
        }
2358
}
2359
 
2360
 
2361
/*
2362
 *      txAllocPMap()
2363
 *
2364
 * function: allocate from persistent map;
2365
 *
2366
 * parameter:
2367
 *      ipbmap  -
2368
 *      malock -
2369
 *              xad list:
2370
 *              pxd:
2371
 *
2372
 *      maptype -
2373
 *              allocate from persistent map;
2374
 *              free from persistent map;
2375
 *              (e.g., tmp file - free from working map at releae
2376
 *               of last reference);
2377
 *              free from persistent and working map;
2378
 *
2379
 *      lsn     - log sequence number;
2380
 */
2381
static void txAllocPMap(struct inode *ip, struct maplock * maplock,
2382
                        struct tblock * tblk)
2383
{
2384
        struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2385
        struct xdlistlock *xadlistlock;
2386
        xad_t *xad;
2387
        s64 xaddr;
2388
        int xlen;
2389
        struct pxd_lock *pxdlock;
2390
        struct xdlistlock *pxdlistlock;
2391
        pxd_t *pxd;
2392
        int n;
2393
 
2394
        /*
2395
         * allocate from persistent map;
2396
         */
2397
        if (maplock->flag & mlckALLOCXADLIST) {
2398
                xadlistlock = (struct xdlistlock *) maplock;
2399
                xad = xadlistlock->xdlist;
2400
                for (n = 0; n < xadlistlock->count; n++, xad++) {
2401
                        if (xad->flag & (XAD_NEW | XAD_EXTENDED)) {
2402
                                xaddr = addressXAD(xad);
2403
                                xlen = lengthXAD(xad);
2404
                                dbUpdatePMap(ipbmap, FALSE, xaddr,
2405
                                             (s64) xlen, tblk);
2406
                                xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
2407
                                jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2408
                                         (ulong) xaddr, xlen);
2409
                        }
2410
                }
2411
        } else if (maplock->flag & mlckALLOCPXD) {
2412
                pxdlock = (struct pxd_lock *) maplock;
2413
                xaddr = addressPXD(&pxdlock->pxd);
2414
                xlen = lengthPXD(&pxdlock->pxd);
2415
                dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen, tblk);
2416
                jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen);
2417
        } else {                /* (maplock->flag & mlckALLOCPXDLIST) */
2418
 
2419
                pxdlistlock = (struct xdlistlock *) maplock;
2420
                pxd = pxdlistlock->xdlist;
2421
                for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2422
                        xaddr = addressPXD(pxd);
2423
                        xlen = lengthPXD(pxd);
2424
                        dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen,
2425
                                     tblk);
2426
                        jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2427
                                 (ulong) xaddr, xlen);
2428
                }
2429
        }
2430
}
2431
 
2432
 
2433
/*
2434
 *      txFreeMap()
2435
 *
2436
 * function:    free from persistent and/or working map;
2437
 *
2438
 * todo: optimization
2439
 */
2440
void txFreeMap(struct inode *ip,
2441
               struct maplock * maplock, struct tblock * tblk, int maptype)
2442
{
2443
        struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2444
        struct xdlistlock *xadlistlock;
2445
        xad_t *xad;
2446
        s64 xaddr;
2447
        int xlen;
2448
        struct pxd_lock *pxdlock;
2449
        struct xdlistlock *pxdlistlock;
2450
        pxd_t *pxd;
2451
        int n;
2452
 
2453
        jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x",
2454
                 tblk, maplock, maptype);
2455
 
2456
        /*
2457
         * free from persistent map;
2458
         */
2459
        if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) {
2460
                if (maplock->flag & mlckFREEXADLIST) {
2461
                        xadlistlock = (struct xdlistlock *) maplock;
2462
                        xad = xadlistlock->xdlist;
2463
                        for (n = 0; n < xadlistlock->count; n++, xad++) {
2464
                                if (!(xad->flag & XAD_NEW)) {
2465
                                        xaddr = addressXAD(xad);
2466
                                        xlen = lengthXAD(xad);
2467
                                        dbUpdatePMap(ipbmap, TRUE, xaddr,
2468
                                                     (s64) xlen, tblk);
2469
                                        jfs_info("freePMap: xaddr:0x%lx "
2470
                                                 "xlen:%d",
2471
                                                 (ulong) xaddr, xlen);
2472
                                }
2473
                        }
2474
                } else if (maplock->flag & mlckFREEPXD) {
2475
                        pxdlock = (struct pxd_lock *) maplock;
2476
                        xaddr = addressPXD(&pxdlock->pxd);
2477
                        xlen = lengthPXD(&pxdlock->pxd);
2478
                        dbUpdatePMap(ipbmap, TRUE, xaddr, (s64) xlen,
2479
                                     tblk);
2480
                        jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2481
                                 (ulong) xaddr, xlen);
2482
                } else {        /* (maplock->flag & mlckALLOCPXDLIST) */
2483
 
2484
                        pxdlistlock = (struct xdlistlock *) maplock;
2485
                        pxd = pxdlistlock->xdlist;
2486
                        for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2487
                                xaddr = addressPXD(pxd);
2488
                                xlen = lengthPXD(pxd);
2489
                                dbUpdatePMap(ipbmap, TRUE, xaddr,
2490
                                             (s64) xlen, tblk);
2491
                                jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2492
                                         (ulong) xaddr, xlen);
2493
                        }
2494
                }
2495
        }
2496
 
2497
        /*
2498
         * free from working map;
2499
         */
2500
        if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) {
2501
                if (maplock->flag & mlckFREEXADLIST) {
2502
                        xadlistlock = (struct xdlistlock *) maplock;
2503
                        xad = xadlistlock->xdlist;
2504
                        for (n = 0; n < xadlistlock->count; n++, xad++) {
2505
                                xaddr = addressXAD(xad);
2506
                                xlen = lengthXAD(xad);
2507
                                dbFree(ip, xaddr, (s64) xlen);
2508
                                xad->flag = 0;
2509
                                jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2510
                                         (ulong) xaddr, xlen);
2511
                        }
2512
                } else if (maplock->flag & mlckFREEPXD) {
2513
                        pxdlock = (struct pxd_lock *) maplock;
2514
                        xaddr = addressPXD(&pxdlock->pxd);
2515
                        xlen = lengthPXD(&pxdlock->pxd);
2516
                        dbFree(ip, xaddr, (s64) xlen);
2517
                        jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2518
                                 (ulong) xaddr, xlen);
2519
                } else {        /* (maplock->flag & mlckFREEPXDLIST) */
2520
 
2521
                        pxdlistlock = (struct xdlistlock *) maplock;
2522
                        pxd = pxdlistlock->xdlist;
2523
                        for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2524
                                xaddr = addressPXD(pxd);
2525
                                xlen = lengthPXD(pxd);
2526
                                dbFree(ip, xaddr, (s64) xlen);
2527
                                jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2528
                                         (ulong) xaddr, xlen);
2529
                        }
2530
                }
2531
        }
2532
}
2533
 
2534
 
2535
/*
2536
 *      txFreelock()
2537
 *
2538
 * function:    remove tlock from inode anonymous locklist
2539
 */
2540
void txFreelock(struct inode *ip)
2541
{
2542
        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2543
        struct tlock *xtlck, *tlck;
2544
        lid_t xlid = 0, lid;
2545
 
2546
        if (!jfs_ip->atlhead)
2547
                return;
2548
 
2549
        xtlck = (struct tlock *) &jfs_ip->atlhead;
2550
 
2551
        while ((lid = xtlck->next)) {
2552
                tlck = lid_to_tlock(lid);
2553
                if (tlck->flag & tlckFREELOCK) {
2554
                        xtlck->next = tlck->next;
2555
                        txLockFree(lid);
2556
                } else {
2557
                        xtlck = tlck;
2558
                        xlid = lid;
2559
                }
2560
        }
2561
 
2562
        if (jfs_ip->atlhead)
2563
                jfs_ip->atltail = xlid;
2564
        else {
2565
                jfs_ip->atltail = 0;
2566
                /*
2567
                 * If inode was on anon_list, remove it
2568
                 */
2569
                TXN_LOCK();
2570
                list_del_init(&jfs_ip->anon_inode_list);
2571
                TXN_UNLOCK();
2572
        }
2573
}
2574
 
2575
 
2576
/*
2577
 *      txAbort()
2578
 *
2579
 * function: abort tx before commit;
2580
 *
2581
 * frees line-locks and segment locks for all
2582
 * segments in comdata structure.
2583
 * Optionally sets state of file-system to FM_DIRTY in super-block.
2584
 * log age of page-frames in memory for which caller has
2585
 * are reset to 0 (to avoid logwarap).
2586
 */
2587
void txAbort(tid_t tid, int dirty)
2588
{
2589
        lid_t lid, next;
2590
        struct metapage *mp;
2591
        struct tblock *tblk = tid_to_tblock(tid);
2592
 
2593
        jfs_warn("txAbort: tid:%d dirty:0x%x", tid, dirty);
2594
 
2595
        /*
2596
         * free tlocks of the transaction
2597
         */
2598
        for (lid = tblk->next; lid; lid = next) {
2599
                next = lid_to_tlock(lid)->next;
2600
 
2601
                mp = lid_to_tlock(lid)->mp;
2602
 
2603
                if (mp) {
2604
                        mp->lid = 0;
2605
 
2606
                        /*
2607
                         * reset lsn of page to avoid logwarap:
2608
                         *
2609
                         * (page may have been previously committed by another
2610
                         * transaction(s) but has not been paged, i.e.,
2611
                         * it may be on logsync list even though it has not
2612
                         * been logged for the current tx.)
2613
                         */
2614
                        if (mp->xflag & COMMIT_PAGE && mp->lsn)
2615
                                LogSyncRelease(mp);
2616
                }
2617
                /* insert tlock at head of freelist */
2618
                TXN_LOCK();
2619
                txLockFree(lid);
2620
                TXN_UNLOCK();
2621
        }
2622
 
2623
        /* caller will free the transaction block */
2624
 
2625
        tblk->next = tblk->last = 0;
2626
 
2627
        /*
2628
         * mark filesystem dirty
2629
         */
2630
        if (dirty)
2631
                jfs_error(tblk->sb, "txAbort");
2632
 
2633
        return;
2634
}
2635
 
2636
 
2637
/*
2638
 *      txAbortCommit()
2639
 *
2640
 * function: abort commit.
2641
 *
2642
 * frees tlocks of transaction; line-locks and segment locks for all
2643
 * segments in comdata structure. frees malloc storage
2644
 * sets state of file-system to FM_MDIRTY in super-block.
2645
 * log age of page-frames in memory for which caller has
2646
 * are reset to 0 (to avoid logwarap).
2647
 */
2648
static void txAbortCommit(struct commit * cd)
2649
{
2650
        struct tblock *tblk;
2651
        tid_t tid;
2652
        lid_t lid, next;
2653
        struct metapage *mp;
2654
 
2655
        jfs_warn("txAbortCommit: cd:0x%p", cd);
2656
 
2657
        /*
2658
         * free tlocks of the transaction
2659
         */
2660
        tid = cd->tid;
2661
        tblk = tid_to_tblock(tid);
2662
        for (lid = tblk->next; lid; lid = next) {
2663
                next = lid_to_tlock(lid)->next;
2664
 
2665
                mp = lid_to_tlock(lid)->mp;
2666
                if (mp) {
2667
                        mp->lid = 0;
2668
 
2669
                        /*
2670
                         * reset lsn of page to avoid logwarap;
2671
                         */
2672
                        if (mp->xflag & COMMIT_PAGE)
2673
                                LogSyncRelease(mp);
2674
                }
2675
 
2676
                /* insert tlock at head of freelist */
2677
                TXN_LOCK();
2678
                txLockFree(lid);
2679
                TXN_UNLOCK();
2680
        }
2681
 
2682
        tblk->next = tblk->last = 0;
2683
 
2684
        /* free the transaction block */
2685
        txEnd(tid);
2686
 
2687
        /*
2688
         * mark filesystem dirty
2689
         */
2690
        jfs_error(cd->sb, "txAbortCommit");
2691
}
2692
 
2693
 
2694
/*
2695
 *      txLazyCommit(void)
2696
 *
2697
 *      All transactions except those changing ipimap (COMMIT_FORCE) are
2698
 *      processed by this routine.  This insures that the inode and block
2699
 *      allocation maps are updated in order.  For synchronous transactions,
2700
 *      let the user thread finish processing after txUpdateMap() is called.
2701
 */
2702
static void txLazyCommit(struct tblock * tblk)
2703
{
2704
        struct jfs_log *log;
2705
 
2706
        while (((tblk->flag & tblkGC_READY) == 0) &&
2707
               ((tblk->flag & tblkGC_UNLOCKED) == 0)) {
2708
                /* We must have gotten ahead of the user thread
2709
                 */
2710
                jfs_info("txLazyCommit: tblk 0x%p not unlocked", tblk);
2711
                yield();
2712
        }
2713
 
2714
        jfs_info("txLazyCommit: processing tblk 0x%p", tblk);
2715
 
2716
        txUpdateMap(tblk);
2717
 
2718
        log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
2719
 
2720
        spin_lock_irq(&log->gclock);    // LOGGC_LOCK
2721
 
2722
        tblk->flag |= tblkGC_COMMITTED;
2723
 
2724
        if (tblk->flag & tblkGC_READY)
2725
                log->gcrtc--;
2726
 
2727
        wake_up_all(&tblk->gcwait);     // LOGGC_WAKEUP
2728
 
2729
        /*
2730
         * Can't release log->gclock until we've tested tblk->flag
2731
         */
2732
        if (tblk->flag & tblkGC_LAZY) {
2733
                spin_unlock_irq(&log->gclock);  // LOGGC_UNLOCK
2734
                txUnlock(tblk);
2735
                tblk->flag &= ~tblkGC_LAZY;
2736
                txEnd(tblk - TxBlock);  /* Convert back to tid */
2737
        } else
2738
                spin_unlock_irq(&log->gclock);  // LOGGC_UNLOCK
2739
 
2740
        jfs_info("txLazyCommit: done: tblk = 0x%p", tblk);
2741
}
2742
 
2743
/*
2744
 *      jfs_lazycommit(void)
2745
 *
2746
 *      To be run as a kernel daemon.  If lbmIODone is called in an interrupt
2747
 *      context, or where blocking is not wanted, this routine will process
2748
 *      committed transactions from the unlock queue.
2749
 */
2750
int jfs_lazycommit(void *arg)
2751
{
2752
        int WorkDone;
2753
        struct tblock *tblk;
2754
        unsigned long flags;
2755
 
2756
        lock_kernel();
2757
 
2758
        daemonize();
2759
        current->tty = NULL;
2760
        strcpy(current->comm, "jfsCommit");
2761
 
2762
        unlock_kernel();
2763
 
2764
        jfsCommitTask = current;
2765
 
2766
        spin_lock_irq(&current->sigmask_lock);
2767
        sigfillset(&current->blocked);
2768
        recalc_sigpending(current);
2769
        spin_unlock_irq(&current->sigmask_lock);
2770
 
2771
        LAZY_LOCK_INIT();
2772
        TxAnchor.unlock_queue = TxAnchor.unlock_tail = 0;
2773
 
2774
        complete(&jfsIOwait);
2775
 
2776
        do {
2777
                DECLARE_WAITQUEUE(wq, current);
2778
 
2779
                LAZY_LOCK(flags);
2780
restart:
2781
                WorkDone = 0;
2782
                while ((tblk = TxAnchor.unlock_queue)) {
2783
                        /*
2784
                         * We can't get ahead of user thread.  Spinning is
2785
                         * simpler than blocking/waking.  We shouldn't spin
2786
                         * very long, since user thread shouldn't be blocking
2787
                         * between lmGroupCommit & txEnd.
2788
                         */
2789
                        WorkDone = 1;
2790
 
2791
                        /*
2792
                         * Remove first transaction from queue
2793
                         */
2794
                        TxAnchor.unlock_queue = tblk->cqnext;
2795
                        tblk->cqnext = 0;
2796
                        if (TxAnchor.unlock_tail == tblk)
2797
                                TxAnchor.unlock_tail = 0;
2798
 
2799
                        LAZY_UNLOCK(flags);
2800
                        txLazyCommit(tblk);
2801
 
2802
                        /*
2803
                         * We can be running indefinately if other processors
2804
                         * are adding transactions to this list
2805
                         */
2806
                        cond_resched();
2807
                        LAZY_LOCK(flags);
2808
                }
2809
 
2810
                if (WorkDone)
2811
                        goto restart;
2812
 
2813
                add_wait_queue(&jfs_commit_thread_wait, &wq);
2814
                set_current_state(TASK_INTERRUPTIBLE);
2815
                LAZY_UNLOCK(flags);
2816
                schedule();
2817
                current->state = TASK_RUNNING;
2818
                remove_wait_queue(&jfs_commit_thread_wait, &wq);
2819
        } while (!jfs_stop_threads);
2820
 
2821
        if (TxAnchor.unlock_queue)
2822
                jfs_err("jfs_lazycommit being killed w/pending transactions!");
2823
        else
2824
                jfs_info("jfs_lazycommit being killed\n");
2825
        complete_and_exit(&jfsIOwait, 0);
2826
}
2827
 
2828
void txLazyUnlock(struct tblock * tblk)
2829
{
2830
        unsigned long flags;
2831
 
2832
        LAZY_LOCK(flags);
2833
 
2834
        if (TxAnchor.unlock_tail)
2835
                TxAnchor.unlock_tail->cqnext = tblk;
2836
        else
2837
                TxAnchor.unlock_queue = tblk;
2838
        TxAnchor.unlock_tail = tblk;
2839
        tblk->cqnext = 0;
2840
        LAZY_UNLOCK(flags);
2841
        wake_up(&jfs_commit_thread_wait);
2842
}
2843
 
2844
static void LogSyncRelease(struct metapage * mp)
2845
{
2846
        struct jfs_log *log = mp->log;
2847
 
2848
        assert(atomic_read(&mp->nohomeok));
2849
        assert(log);
2850
        atomic_dec(&mp->nohomeok);
2851
 
2852
        if (atomic_read(&mp->nohomeok))
2853
                return;
2854
 
2855
        hold_metapage(mp, 0);
2856
 
2857
        LOGSYNC_LOCK(log);
2858
        mp->log = NULL;
2859
        mp->lsn = 0;
2860
        mp->clsn = 0;
2861
        log->count--;
2862
        list_del_init(&mp->synclist);
2863
        LOGSYNC_UNLOCK(log);
2864
 
2865
        release_metapage(mp);
2866
}
2867
 
2868
/*
2869
 *      txQuiesce
2870
 *
2871
 *      Block all new transactions and push anonymous transactions to
2872
 *      completion
2873
 *
2874
 *      This does almost the same thing as jfs_sync below.  We don't
2875
 *      worry about deadlocking when TlocksLow is set, since we would
2876
 *      expect jfs_sync to get us out of that jam.
2877
 */
2878
void txQuiesce(struct super_block *sb)
2879
{
2880
        struct inode *ip;
2881
        struct jfs_inode_info *jfs_ip;
2882
        struct jfs_log *log = JFS_SBI(sb)->log;
2883
        tid_t tid;
2884
 
2885
        set_bit(log_QUIESCE, &log->flag);
2886
 
2887
        TXN_LOCK();
2888
restart:
2889
        while (!list_empty(&TxAnchor.anon_list)) {
2890
                jfs_ip = list_entry(TxAnchor.anon_list.next,
2891
                                    struct jfs_inode_info,
2892
                                    anon_inode_list);
2893
                ip = jfs_ip->inode;
2894
 
2895
                /*
2896
                 * inode will be removed from anonymous list
2897
                 * when it is committed
2898
                 */
2899
                TXN_UNLOCK();
2900
                tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE);
2901
                down(&jfs_ip->commit_sem);
2902
                txCommit(tid, 1, &ip, 0);
2903
                txEnd(tid);
2904
                up(&jfs_ip->commit_sem);
2905
                /*
2906
                 * Just to be safe.  I don't know how
2907
                 * long we can run without blocking
2908
                 */
2909
                cond_resched();
2910
                TXN_LOCK();
2911
        }
2912
 
2913
        /*
2914
         * If jfs_sync is running in parallel, there could be some inodes
2915
         * on anon_list2.  Let's check.
2916
         */
2917
        if (!list_empty(&TxAnchor.anon_list2)) {
2918
                list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2919
                INIT_LIST_HEAD(&TxAnchor.anon_list2);
2920
                goto restart;
2921
        }
2922
        TXN_UNLOCK();
2923
 
2924
        /*
2925
         * We may need to kick off the group commit
2926
         */
2927
        jfs_flush_journal(log, 0);
2928
}
2929
 
2930
/*
2931
 * txResume()
2932
 *
2933
 * Allows transactions to start again following txQuiesce
2934
 */
2935
void txResume(struct super_block *sb)
2936
{
2937
        struct jfs_log *log = JFS_SBI(sb)->log;
2938
 
2939
        clear_bit(log_QUIESCE, &log->flag);
2940
        TXN_WAKEUP(&log->syncwait);
2941
}
2942
 
2943
/*
2944
 *      jfs_sync(void)
2945
 *
2946
 *      To be run as a kernel daemon.  This is awakened when tlocks run low.
2947
 *      We write any inodes that have anonymous tlocks so they will become
2948
 *      available.
2949
 */
2950
int jfs_sync(void *arg)
2951
{
2952
        struct inode *ip;
2953
        struct jfs_inode_info *jfs_ip;
2954
        int rc;
2955
        tid_t tid;
2956
 
2957
        lock_kernel();
2958
 
2959
        daemonize();
2960
        current->tty = NULL;
2961
        strcpy(current->comm, "jfsSync");
2962
 
2963
        unlock_kernel();
2964
 
2965
        spin_lock_irq(&current->sigmask_lock);
2966
        sigfillset(&current->blocked);
2967
        recalc_sigpending(current);
2968
        spin_unlock_irq(&current->sigmask_lock);
2969
 
2970
        complete(&jfsIOwait);
2971
 
2972
        do {
2973
                DECLARE_WAITQUEUE(wq, current);
2974
                /*
2975
                 * write each inode on the anonymous inode list
2976
                 */
2977
                TXN_LOCK();
2978
                while (TxAnchor.TlocksLow && !list_empty(&TxAnchor.anon_list)) {
2979
                        jfs_ip = list_entry(TxAnchor.anon_list.next,
2980
                                            struct jfs_inode_info,
2981
                                            anon_inode_list);
2982
                        ip = jfs_ip->inode;
2983
 
2984
                        if (! igrab(ip)) {
2985
                                /*
2986
                                 * Inode is being freed
2987
                                 */
2988
                                list_del_init(&jfs_ip->anon_inode_list);
2989
                        } else if (! down_trylock(&jfs_ip->commit_sem)) {
2990
                                /*
2991
                                 * inode will be removed from anonymous list
2992
                                 * when it is committed
2993
                                 */
2994
                                TXN_UNLOCK();
2995
                                tid = txBegin(ip->i_sb, COMMIT_INODE);
2996
                                rc = txCommit(tid, 1, &ip, 0);
2997
                                txEnd(tid);
2998
                                up(&jfs_ip->commit_sem);
2999
 
3000
                                iput(ip);
3001
                                /*
3002
                                 * Just to be safe.  I don't know how
3003
                                 * long we can run without blocking
3004
                                 */
3005
                                cond_resched();
3006
                                TXN_LOCK();
3007
                        } else {
3008
                                /* We can't get the commit semaphore.  It may
3009
                                 * be held by a thread waiting for tlock's
3010
                                 * so let's not block here.  Save it to
3011
                                 * put back on the anon_list.
3012
                                 */
3013
 
3014
                                /* Take off anon_list */
3015
                                list_del(&jfs_ip->anon_inode_list);
3016
 
3017
                                /* Put on anon_list2 */
3018
                                list_add(&jfs_ip->anon_inode_list,
3019
                                         &TxAnchor.anon_list2);
3020
 
3021
                                TXN_UNLOCK();
3022
                                iput(ip);
3023
                                TXN_LOCK();
3024
                        }
3025
                }
3026
                /* Add anon_list2 back to anon_list */
3027
                if (!list_empty(&TxAnchor.anon_list2)) {
3028
                        list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list);
3029
                        INIT_LIST_HEAD(&TxAnchor.anon_list2);
3030
                }
3031
                add_wait_queue(&jfs_sync_thread_wait, &wq);
3032
                set_current_state(TASK_INTERRUPTIBLE);
3033
                TXN_UNLOCK();
3034
                schedule();
3035
                current->state = TASK_RUNNING;
3036
                remove_wait_queue(&jfs_sync_thread_wait, &wq);
3037
        } while (!jfs_stop_threads);
3038
 
3039
        jfs_info("jfs_sync being killed");
3040
        complete_and_exit(&jfsIOwait, 0);
3041
}
3042
 
3043
#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
3044
int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length,
3045
                      int *eof, void *data)
3046
{
3047
        int len = 0;
3048
        off_t begin;
3049
        char *freewait;
3050
        char *freelockwait;
3051
        char *lowlockwait;
3052
 
3053
        freewait =
3054
            waitqueue_active(&TxAnchor.freewait) ? "active" : "empty";
3055
        freelockwait =
3056
            waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty";
3057
        lowlockwait =
3058
            waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty";
3059
 
3060
        len += sprintf(buffer,
3061
                       "JFS TxAnchor\n"
3062
                       "============\n"
3063
                       "freetid = %d\n"
3064
                       "freewait = %s\n"
3065
                       "freelock = %d\n"
3066
                       "freelockwait = %s\n"
3067
                       "lowlockwait = %s\n"
3068
                       "tlocksInUse = %d\n"
3069
                       "TlocksLow = %d\n"
3070
                       "unlock_queue = 0x%p\n"
3071
                       "unlock_tail = 0x%p\n",
3072
                       TxAnchor.freetid,
3073
                       freewait,
3074
                       TxAnchor.freelock,
3075
                       freelockwait,
3076
                       lowlockwait,
3077
                       TxAnchor.tlocksInUse,
3078
                       TxAnchor.TlocksLow,
3079
                       TxAnchor.unlock_queue,
3080
                       TxAnchor.unlock_tail);
3081
 
3082
        begin = offset;
3083
        *start = buffer + begin;
3084
        len -= begin;
3085
 
3086
        if (len > length)
3087
                len = length;
3088
        else
3089
                *eof = 1;
3090
 
3091
        if (len < 0)
3092
                len = 0;
3093
 
3094
        return len;
3095
}
3096
#endif
3097
 
3098
#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
3099
int jfs_txstats_read(char *buffer, char **start, off_t offset, int length,
3100
                     int *eof, void *data)
3101
{
3102
        int len = 0;
3103
        off_t begin;
3104
 
3105
        len += sprintf(buffer,
3106
                       "JFS TxStats\n"
3107
                       "===========\n"
3108
                       "calls to txBegin = %d\n"
3109
                       "txBegin blocked by sync barrier = %d\n"
3110
                       "txBegin blocked by tlocks low = %d\n"
3111
                       "txBegin blocked by no free tid = %d\n"
3112
                       "calls to txBeginAnon = %d\n"
3113
                       "txBeginAnon blocked by sync barrier = %d\n"
3114
                       "txBeginAnon blocked by tlocks low = %d\n"
3115
                       "calls to txLockAlloc = %d\n"
3116
                       "tLockAlloc blocked by no free lock = %d\n",
3117
                       TxStat.txBegin,
3118
                       TxStat.txBegin_barrier,
3119
                       TxStat.txBegin_lockslow,
3120
                       TxStat.txBegin_freetid,
3121
                       TxStat.txBeginAnon,
3122
                       TxStat.txBeginAnon_barrier,
3123
                       TxStat.txBeginAnon_lockslow,
3124
                       TxStat.txLockAlloc,
3125
                       TxStat.txLockAlloc_freelock);
3126
 
3127
        begin = offset;
3128
        *start = buffer + begin;
3129
        len -= begin;
3130
 
3131
        if (len > length)
3132
                len = length;
3133
        else
3134
                *eof = 1;
3135
 
3136
        if (len < 0)
3137
                len = 0;
3138
 
3139
        return len;
3140
}
3141
#endif

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.