OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [fs/] [jfs/] [jfs_xtree.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 *   Copyright (C) International Business Machines Corp., 2000-2003
3
 *
4
 *   This program is free software;  you can redistribute it and/or modify
5
 *   it under the terms of the GNU General Public License as published by
6
 *   the Free Software Foundation; either version 2 of the License, or
7
 *   (at your option) any later version.
8
 *
9
 *   This program is distributed in the hope that it will be useful,
10
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
11
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
12
 *   the GNU General Public License for more details.
13
 *
14
 *   You should have received a copy of the GNU General Public License
15
 *   along with this program;  if not, write to the Free Software
16
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
 */
18
/*
19
 *      jfs_xtree.c: extent allocation descriptor B+-tree manager
20
 */
21
 
22
#include <linux/fs.h>
23
#include "jfs_incore.h"
24
#include "jfs_filsys.h"
25
#include "jfs_metapage.h"
26
#include "jfs_dmap.h"
27
#include "jfs_dinode.h"
28
#include "jfs_superblock.h"
29
#include "jfs_debug.h"
30
 
31
/*
32
 * xtree local flag
33
 */
34
#define XT_INSERT       0x00000001
35
 
36
/*
37
 *       xtree key/entry comparison: extent offset
38
 *
39
 * return:
40
 *      -1: k < start of extent
41
 *       0: start_of_extent <= k <= end_of_extent
42
 *       1: k > end_of_extent
43
 */
44
#define XT_CMP(CMP, K, X, OFFSET64)\
45
{\
46
        OFFSET64 = offsetXAD(X);\
47
        (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
48
              ((K) < OFFSET64) ? -1 : 0;\
49
}
50
 
51
/* write a xad entry */
52
#define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
53
{\
54
        (XAD)->flag = (FLAG);\
55
        XADoffset((XAD), (OFF));\
56
        XADlength((XAD), (LEN));\
57
        XADaddress((XAD), (ADDR));\
58
}
59
 
60
#define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
61
 
62
/* get page buffer for specified block address */
63
/* ToDo: Replace this ugly macro with a function */
64
#define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\
65
{\
66
        BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\
67
        if (!(RC))\
68
        {\
69
                if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\
70
                    (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\
71
                    (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\
72
                {\
73
                        jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\
74
                        BT_PUTPAGE(MP);\
75
                        MP = NULL;\
76
                        RC = -EIO;\
77
                }\
78
        }\
79
}
80
 
81
/* for consistency */
82
#define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
83
 
84
#define XT_GETSEARCH(IP, LEAF, BN, MP,  P, INDEX) \
85
        BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
86
/* xtree entry parameter descriptor */
87
struct xtsplit {
88
        struct metapage *mp;
89
        s16 index;
90
        u8 flag;
91
        s64 off;
92
        s64 addr;
93
        int len;
94
        struct pxdlist *pxdlist;
95
};
96
 
97
 
98
/*
99
 *      statistics
100
 */
101
#ifdef CONFIG_JFS_STATISTICS
102
static struct {
103
        uint search;
104
        uint fastSearch;
105
        uint split;
106
} xtStat;
107
#endif
108
 
109
 
110
/*
111
 * forward references
112
 */
113
static int xtSearch(struct inode *ip,
114
                    s64 xoff, int *cmpp, struct btstack * btstack, int flag);
115
 
116
static int xtSplitUp(tid_t tid,
117
                     struct inode *ip,
118
                     struct xtsplit * split, struct btstack * btstack);
119
 
120
static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
121
                       struct metapage ** rmpp, s64 * rbnp);
122
 
123
static int xtSplitRoot(tid_t tid, struct inode *ip,
124
                       struct xtsplit * split, struct metapage ** rmpp);
125
 
126
#ifdef _STILL_TO_PORT
127
static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
128
                      xtpage_t * fp, struct btstack * btstack);
129
 
130
static int xtSearchNode(struct inode *ip,
131
                        xad_t * xad,
132
                        int *cmpp, struct btstack * btstack, int flag);
133
 
134
static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
135
#endif                          /*  _STILL_TO_PORT */
136
 
137
/* External references */
138
 
139
/*
140
 *      debug control
141
 */
142
/*      #define _JFS_DEBUG_XTREE        1 */
143
 
144
 
145
/*
146
 *      xtLookup()
147
 *
148
 * function: map a single page into a physical extent;
149
 */
150
int xtLookup(struct inode *ip, s64 lstart,
151
             s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
152
{
153
        int rc = 0;
154
        struct btstack btstack;
155
        int cmp;
156
        s64 bn;
157
        struct metapage *mp;
158
        xtpage_t *p;
159
        int index;
160
        xad_t *xad;
161
        s64 size, xoff, xend;
162
        int xlen;
163
        s64 xaddr;
164
 
165
        *plen = 0;
166
 
167
        if (!no_check) {
168
                /* is lookup offset beyond eof ? */
169
                size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
170
                    JFS_SBI(ip->i_sb)->l2bsize;
171
                if (lstart >= size) {
172
                        jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)",
173
                                (ulong) lstart, (ulong) size);
174
                        return 0;
175
                }
176
        }
177
 
178
        /*
179
         * search for the xad entry covering the logical extent
180
         */
181
//search:
182
        if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0))) {
183
                jfs_err("xtLookup: xtSearch returned %d", rc);
184
                return rc;
185
        }
186
 
187
        /*
188
         *      compute the physical extent covering logical extent
189
         *
190
         * N.B. search may have failed (e.g., hole in sparse file),
191
         * and returned the index of the next entry.
192
         */
193
        /* retrieve search result */
194
        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
195
 
196
        /* is xad found covering start of logical extent ?
197
         * lstart is a page start address,
198
         * i.e., lstart cannot start in a hole;
199
         */
200
        if (cmp)
201
                goto out;
202
 
203
        /*
204
         * lxd covered by xad
205
         */
206
        xad = &p->xad[index];
207
        xoff = offsetXAD(xad);
208
        xlen = lengthXAD(xad);
209
        xend = xoff + xlen;
210
        xaddr = addressXAD(xad);
211
 
212
        /* initialize new pxd */
213
        *pflag = xad->flag;
214
        *paddr = xaddr + (lstart - xoff);
215
        /* a page must be fully covered by an xad */
216
        *plen = min(xend - lstart, llen);
217
 
218
      out:
219
        XT_PUTPAGE(mp);
220
 
221
        return rc;
222
}
223
 
224
 
225
/*
226
 *      xtLookupList()
227
 *
228
 * function: map a single logical extent into a list of physical extent;
229
 *
230
 * parameter:
231
 *      struct inode    *ip,
232
 *      struct lxdlist  *lxdlist,       lxd list (in)
233
 *      struct xadlist  *xadlist,       xad list (in/out)
234
 *      int             flag)
235
 *
236
 * coverage of lxd by xad under assumption of
237
 * . lxd's are ordered and disjoint.
238
 * . xad's are ordered and disjoint.
239
 *
240
 * return:
241
 *      0:      success
242
 *
243
 * note: a page being written (even a single byte) is backed fully,
244
 *      except the last page which is only backed with blocks
245
 *      required to cover the last byte;
246
 *      the extent backing a page is fully contained within an xad;
247
 */
248
int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
249
                 struct xadlist * xadlist, int flag)
250
{
251
        int rc = 0;
252
        struct btstack btstack;
253
        int cmp;
254
        s64 bn;
255
        struct metapage *mp;
256
        xtpage_t *p;
257
        int index;
258
        lxd_t *lxd;
259
        xad_t *xad, *pxd;
260
        s64 size, lstart, lend, xstart, xend, pstart;
261
        s64 llen, xlen, plen;
262
        s64 xaddr, paddr;
263
        int nlxd, npxd, maxnpxd;
264
 
265
        npxd = xadlist->nxad = 0;
266
        maxnpxd = xadlist->maxnxad;
267
        pxd = xadlist->xad;
268
 
269
        nlxd = lxdlist->nlxd;
270
        lxd = lxdlist->lxd;
271
 
272
        lstart = offsetLXD(lxd);
273
        llen = lengthLXD(lxd);
274
        lend = lstart + llen;
275
 
276
        size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
277
            JFS_SBI(ip->i_sb)->l2bsize;
278
 
279
        /*
280
         * search for the xad entry covering the logical extent
281
         */
282
      search:
283
        if (lstart >= size)
284
                return 0;
285
 
286
        if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0)))
287
                return rc;
288
 
289
        /*
290
         *      compute the physical extent covering logical extent
291
         *
292
         * N.B. search may have failed (e.g., hole in sparse file),
293
         * and returned the index of the next entry.
294
         */
295
//map:
296
        /* retrieve search result */
297
        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
298
 
299
        /* is xad on the next sibling page ? */
300
        if (index == le16_to_cpu(p->header.nextindex)) {
301
                if (p->header.flag & BT_ROOT)
302
                        goto mapend;
303
 
304
                if ((bn = le64_to_cpu(p->header.next)) == 0)
305
                        goto mapend;
306
 
307
                XT_PUTPAGE(mp);
308
 
309
                /* get next sibling page */
310
                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
311
                if (rc)
312
                        return rc;
313
 
314
                index = XTENTRYSTART;
315
        }
316
 
317
        xad = &p->xad[index];
318
 
319
        /*
320
         * is lxd covered by xad ?
321
         */
322
      compare:
323
        xstart = offsetXAD(xad);
324
        xlen = lengthXAD(xad);
325
        xend = xstart + xlen;
326
        xaddr = addressXAD(xad);
327
 
328
      compare1:
329
        if (xstart < lstart)
330
                goto compare2;
331
 
332
        /* (lstart <= xstart) */
333
 
334
        /* lxd is NOT covered by xad */
335
        if (lend <= xstart) {
336
                /*
337
                 * get next lxd
338
                 */
339
                if (--nlxd == 0)
340
                        goto mapend;
341
                lxd++;
342
 
343
                lstart = offsetLXD(lxd);
344
                llen = lengthLXD(lxd);
345
                lend = lstart + llen;
346
                if (lstart >= size)
347
                        goto mapend;
348
 
349
                /* compare with the current xad  */
350
                goto compare1;
351
        }
352
        /* lxd is covered by xad */
353
        else {                  /* (xstart < lend) */
354
 
355
                /* initialize new pxd */
356
                pstart = xstart;
357
                plen = min(lend - xstart, xlen);
358
                paddr = xaddr;
359
 
360
                goto cover;
361
        }
362
 
363
        /* (xstart < lstart) */
364
      compare2:
365
        /* lxd is covered by xad */
366
        if (lstart < xend) {
367
                /* initialize new pxd */
368
                pstart = lstart;
369
                plen = min(xend - lstart, llen);
370
                paddr = xaddr + (lstart - xstart);
371
 
372
                goto cover;
373
        }
374
        /* lxd is NOT covered by xad */
375
        else {                  /* (xend <= lstart) */
376
 
377
                /*
378
                 * get next xad
379
                 *
380
                 * linear search next xad covering lxd on
381
                 * the current xad page, and then tree search
382
                 */
383
                if (index == le16_to_cpu(p->header.nextindex) - 1) {
384
                        if (p->header.flag & BT_ROOT)
385
                                goto mapend;
386
 
387
                        XT_PUTPAGE(mp);
388
                        goto search;
389
                } else {
390
                        index++;
391
                        xad++;
392
 
393
                        /* compare with new xad */
394
                        goto compare;
395
                }
396
        }
397
 
398
        /*
399
         * lxd is covered by xad and a new pxd has been initialized
400
         * (lstart <= xstart < lend) or (xstart < lstart < xend)
401
         */
402
      cover:
403
        /* finalize pxd corresponding to current xad */
404
        XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr);
405
 
406
        if (++npxd >= maxnpxd)
407
                goto mapend;
408
        pxd++;
409
 
410
        /*
411
         * lxd is fully covered by xad
412
         */
413
        if (lend <= xend) {
414
                /*
415
                 * get next lxd
416
                 */
417
                if (--nlxd == 0)
418
                        goto mapend;
419
                lxd++;
420
 
421
                lstart = offsetLXD(lxd);
422
                llen = lengthLXD(lxd);
423
                lend = lstart + llen;
424
                if (lstart >= size)
425
                        goto mapend;
426
 
427
                /*
428
                 * test for old xad covering new lxd
429
                 * (old xstart < new lstart)
430
                 */
431
                goto compare2;
432
        }
433
        /*
434
         * lxd is partially covered by xad
435
         */
436
        else {                  /* (xend < lend)  */
437
 
438
                /*
439
                 * get next xad
440
                 *
441
                 * linear search next xad covering lxd on
442
                 * the current xad page, and then next xad page search
443
                 */
444
                if (index == le16_to_cpu(p->header.nextindex) - 1) {
445
                        if (p->header.flag & BT_ROOT)
446
                                goto mapend;
447
 
448
                        if ((bn = le64_to_cpu(p->header.next)) == 0)
449
                                goto mapend;
450
 
451
                        XT_PUTPAGE(mp);
452
 
453
                        /* get next sibling page */
454
                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
455
                        if (rc)
456
                                return rc;
457
 
458
                        index = XTENTRYSTART;
459
                        xad = &p->xad[index];
460
                } else {
461
                        index++;
462
                        xad++;
463
                }
464
 
465
                /*
466
                 * test for new xad covering old lxd
467
                 * (old lstart < new xstart)
468
                 */
469
                goto compare;
470
        }
471
 
472
      mapend:
473
        xadlist->nxad = npxd;
474
 
475
//out:
476
        XT_PUTPAGE(mp);
477
 
478
        return rc;
479
}
480
 
481
 
482
/*
483
 *      xtSearch()
484
 *
485
 * function:    search for the xad entry covering specified offset.
486
 *
487
 * parameters:
488
 *      ip      - file object;
489
 *      xoff    - extent offset;
490
 *      cmpp    - comparison result:
491
 *      btstack - traverse stack;
492
 *      flag    - search process flag (XT_INSERT);
493
 *
494
 * returns:
495
 *      btstack contains (bn, index) of search path traversed to the entry.
496
 *      *cmpp is set to result of comparison with the entry returned.
497
 *      the page containing the entry is pinned at exit.
498
 */
499
static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */
500
                    int *cmpp, struct btstack * btstack, int flag)
501
{
502
        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
503
        int rc = 0;
504
        int cmp = 1;            /* init for empty page */
505
        s64 bn;                 /* block number */
506
        struct metapage *mp;    /* page buffer */
507
        xtpage_t *p;            /* page */
508
        xad_t *xad;
509
        int base, index, lim, btindex;
510
        struct btframe *btsp;
511
        int nsplit = 0;          /* number of pages to split */
512
        s64 t64;
513
 
514
        INCREMENT(xtStat.search);
515
 
516
        BT_CLR(btstack);
517
 
518
        btstack->nsplit = 0;
519
 
520
        /*
521
         *      search down tree from root:
522
         *
523
         * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
524
         * internal page, child page Pi contains entry with k, Ki <= K < Kj.
525
         *
526
         * if entry with search key K is not found
527
         * internal page search find the entry with largest key Ki
528
         * less than K which point to the child page to search;
529
         * leaf page search find the entry with smallest key Kj
530
         * greater than K so that the returned index is the position of
531
         * the entry to be shifted right for insertion of new entry.
532
         * for empty tree, search key is greater than any key of the tree.
533
         *
534
         * by convention, root bn = 0.
535
         */
536
        for (bn = 0;;) {
537
                /* get/pin the page to search */
538
                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
539
                if (rc)
540
                        return rc;
541
 
542
                /* try sequential access heuristics with the previous
543
                 * access entry in target leaf page:
544
                 * once search narrowed down into the target leaf,
545
                 * key must either match an entry in the leaf or
546
                 * key entry does not exist in the tree;
547
                 */
548
//fastSearch:
549
                if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
550
                    (p->header.flag & BT_LEAF) &&
551
                    (index = jfs_ip->btindex) <
552
                    le16_to_cpu(p->header.nextindex)) {
553
                        xad = &p->xad[index];
554
                        t64 = offsetXAD(xad);
555
                        if (xoff < t64 + lengthXAD(xad)) {
556
                                if (xoff >= t64) {
557
                                        *cmpp = 0;
558
                                        goto out;
559
                                }
560
 
561
                                /* stop sequential access heuristics */
562
                                goto binarySearch;
563
                        } else {        /* (t64 + lengthXAD(xad)) <= xoff */
564
 
565
                                /* try next sequential entry */
566
                                index++;
567
                                if (index <
568
                                    le16_to_cpu(p->header.nextindex)) {
569
                                        xad++;
570
                                        t64 = offsetXAD(xad);
571
                                        if (xoff < t64 + lengthXAD(xad)) {
572
                                                if (xoff >= t64) {
573
                                                        *cmpp = 0;
574
                                                        goto out;
575
                                                }
576
 
577
                                                /* miss: key falls between
578
                                                 * previous and this entry
579
                                                 */
580
                                                *cmpp = 1;
581
                                                goto out;
582
                                        }
583
 
584
                                        /* (xoff >= t64 + lengthXAD(xad));
585
                                         * matching entry may be further out:
586
                                         * stop heuristic search
587
                                         */
588
                                        /* stop sequential access heuristics */
589
                                        goto binarySearch;
590
                                }
591
 
592
                                /* (index == p->header.nextindex);
593
                                 * miss: key entry does not exist in
594
                                 * the target leaf/tree
595
                                 */
596
                                *cmpp = 1;
597
                                goto out;
598
                        }
599
 
600
                        /*
601
                         * if hit, return index of the entry found, and
602
                         * if miss, where new entry with search key is
603
                         * to be inserted;
604
                         */
605
                      out:
606
                        /* compute number of pages to split */
607
                        if (flag & XT_INSERT) {
608
                                if (p->header.nextindex ==      /* little-endian */
609
                                    p->header.maxentry)
610
                                        nsplit++;
611
                                else
612
                                        nsplit = 0;
613
                                btstack->nsplit = nsplit;
614
                        }
615
 
616
                        /* save search result */
617
                        btsp = btstack->top;
618
                        btsp->bn = bn;
619
                        btsp->index = index;
620
                        btsp->mp = mp;
621
 
622
                        /* update sequential access heuristics */
623
                        jfs_ip->btindex = index;
624
 
625
                        INCREMENT(xtStat.fastSearch);
626
                        return 0;
627
                }
628
 
629
                /* well, ... full search now */
630
              binarySearch:
631
                lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
632
 
633
                /*
634
                 * binary search with search key K on the current page
635
                 */
636
                for (base = XTENTRYSTART; lim; lim >>= 1) {
637
                        index = base + (lim >> 1);
638
 
639
                        XT_CMP(cmp, xoff, &p->xad[index], t64);
640
                        if (cmp == 0) {
641
                                /*
642
                                 *      search hit
643
                                 */
644
                                /* search hit - leaf page:
645
                                 * return the entry found
646
                                 */
647
                                if (p->header.flag & BT_LEAF) {
648
                                        *cmpp = cmp;
649
 
650
                                        /* compute number of pages to split */
651
                                        if (flag & XT_INSERT) {
652
                                                if (p->header.nextindex ==
653
                                                    p->header.maxentry)
654
                                                        nsplit++;
655
                                                else
656
                                                        nsplit = 0;
657
                                                btstack->nsplit = nsplit;
658
                                        }
659
 
660
                                        /* save search result */
661
                                        btsp = btstack->top;
662
                                        btsp->bn = bn;
663
                                        btsp->index = index;
664
                                        btsp->mp = mp;
665
 
666
                                        /* init sequential access heuristics */
667
                                        btindex = jfs_ip->btindex;
668
                                        if (index == btindex ||
669
                                            index == btindex + 1)
670
                                                jfs_ip->btorder = BT_SEQUENTIAL;
671
                                        else
672
                                                jfs_ip->btorder = BT_RANDOM;
673
                                        jfs_ip->btindex = index;
674
 
675
                                        return 0;
676
                                }
677
 
678
                                /* search hit - internal page:
679
                                 * descend/search its child page
680
                                 */
681
                                goto next;
682
                        }
683
 
684
                        if (cmp > 0) {
685
                                base = index + 1;
686
                                --lim;
687
                        }
688
                }
689
 
690
                /*
691
                 *      search miss
692
                 *
693
                 * base is the smallest index with key (Kj) greater than
694
                 * search key (K) and may be zero or maxentry index.
695
                 */
696
                /*
697
                 * search miss - leaf page:
698
                 *
699
                 * return location of entry (base) where new entry with
700
                 * search key K is to be inserted.
701
                 */
702
                if (p->header.flag & BT_LEAF) {
703
                        *cmpp = cmp;
704
 
705
                        /* compute number of pages to split */
706
                        if (flag & XT_INSERT) {
707
                                if (p->header.nextindex ==
708
                                    p->header.maxentry)
709
                                        nsplit++;
710
                                else
711
                                        nsplit = 0;
712
                                btstack->nsplit = nsplit;
713
                        }
714
 
715
                        /* save search result */
716
                        btsp = btstack->top;
717
                        btsp->bn = bn;
718
                        btsp->index = base;
719
                        btsp->mp = mp;
720
 
721
                        /* init sequential access heuristics */
722
                        btindex = jfs_ip->btindex;
723
                        if (base == btindex || base == btindex + 1)
724
                                jfs_ip->btorder = BT_SEQUENTIAL;
725
                        else
726
                                jfs_ip->btorder = BT_RANDOM;
727
                        jfs_ip->btindex = base;
728
 
729
                        return 0;
730
                }
731
 
732
                /*
733
                 * search miss - non-leaf page:
734
                 *
735
                 * if base is non-zero, decrement base by one to get the parent
736
                 * entry of the child page to search.
737
                 */
738
                index = base ? base - 1 : base;
739
 
740
                /*
741
                 * go down to child page
742
                 */
743
              next:
744
                /* update number of pages to split */
745
                if (p->header.nextindex == p->header.maxentry)
746
                        nsplit++;
747
                else
748
                        nsplit = 0;
749
 
750
                /* push (bn, index) of the parent page/entry */
751
                BT_PUSH(btstack, bn, index);
752
 
753
                /* get the child page block number */
754
                bn = addressXAD(&p->xad[index]);
755
 
756
                /* unpin the parent page */
757
                XT_PUTPAGE(mp);
758
        }
759
}
760
 
761
/*
762
 *      xtInsert()
763
 *
764
 * function:
765
 *
766
 * parameter:
767
 *      tid     - transaction id;
768
 *      ip      - file object;
769
 *      xflag   - extent flag (XAD_NOTRECORDED):
770
 *      xoff    - extent offset;
771
 *      xlen    - extent length;
772
 *      xaddrp  - extent address pointer (in/out):
773
 *              if (*xaddrp)
774
 *                      caller allocated data extent at *xaddrp;
775
 *              else
776
 *                      allocate data extent and return its xaddr;
777
 *      flag    -
778
 *
779
 * return:
780
 */
781
int xtInsert(tid_t tid,         /* transaction id */
782
             struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
783
             int flag)
784
{
785
        int rc = 0;
786
        s64 xaddr, hint;
787
        struct metapage *mp;    /* meta-page buffer */
788
        xtpage_t *p;            /* base B+-tree index page */
789
        s64 bn;
790
        int index, nextindex;
791
        struct btstack btstack; /* traverse stack */
792
        struct xtsplit split;   /* split information */
793
        xad_t *xad;
794
        int cmp;
795
        struct tlock *tlck;
796
        struct xtlock *xtlck;
797
 
798
        jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
799
 
800
        /*
801
         *      search for the entry location at which to insert:
802
         *
803
         * xtFastSearch() and xtSearch() both returns (leaf page
804
         * pinned, index at which to insert).
805
         * n.b. xtSearch() may return index of maxentry of
806
         * the full page.
807
         */
808
        if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT)))
809
                return rc;
810
 
811
        /* retrieve search result */
812
        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
813
 
814
        /* This test must follow XT_GETSEARCH since mp must be valid if
815
         * we branch to out: */
816
        if (cmp == 0) {
817
                rc = -EEXIST;
818
                goto out;
819
        }
820
 
821
        /*
822
         * allocate data extent requested
823
         *
824
         * allocation hint: last xad
825
         */
826
        if ((xaddr = *xaddrp) == 0) {
827
                if (index > XTENTRYSTART) {
828
                        xad = &p->xad[index - 1];
829
                        hint = addressXAD(xad) + lengthXAD(xad) - 1;
830
                } else
831
                        hint = 0;
832
                if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr)))
833
                        goto out;
834
        }
835
 
836
        /*
837
         *      insert entry for new extent
838
         */
839
        xflag |= XAD_NEW;
840
 
841
        /*
842
         *      if the leaf page is full, split the page and
843
         *      propagate up the router entry for the new page from split
844
         *
845
         * The xtSplitUp() will insert the entry and unpin the leaf page.
846
         */
847
        nextindex = le16_to_cpu(p->header.nextindex);
848
        if (nextindex == le16_to_cpu(p->header.maxentry)) {
849
                split.mp = mp;
850
                split.index = index;
851
                split.flag = xflag;
852
                split.off = xoff;
853
                split.len = xlen;
854
                split.addr = xaddr;
855
                split.pxdlist = NULL;
856
                if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
857
                        /* undo data extent allocation */
858
                        if (*xaddrp == 0)
859
                                dbFree(ip, xaddr, (s64) xlen);
860
                        return rc;
861
                }
862
 
863
                *xaddrp = xaddr;
864
                return 0;
865
        }
866
 
867
        /*
868
         *      insert the new entry into the leaf page
869
         */
870
        /*
871
         * acquire a transaction lock on the leaf page;
872
         *
873
         * action: xad insertion/extension;
874
         */
875
        BT_MARK_DIRTY(mp, ip);
876
 
877
        /* if insert into middle, shift right remaining entries. */
878
        if (index < nextindex)
879
                memmove(&p->xad[index + 1], &p->xad[index],
880
                        (nextindex - index) * sizeof(xad_t));
881
 
882
        /* insert the new entry: mark the entry NEW */
883
        xad = &p->xad[index];
884
        XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
885
 
886
        /* advance next available entry index */
887
        p->header.nextindex =
888
            cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
889
 
890
        /* Don't log it if there are no links to the file */
891
        if (!test_cflag(COMMIT_Nolink, ip)) {
892
                tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
893
                xtlck = (struct xtlock *) & tlck->lock;
894
                xtlck->lwm.offset =
895
                    (xtlck->lwm.offset) ? min(index,
896
                                              (int)xtlck->lwm.offset) : index;
897
                xtlck->lwm.length =
898
                    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
899
        }
900
 
901
        *xaddrp = xaddr;
902
 
903
      out:
904
        /* unpin the leaf page */
905
        XT_PUTPAGE(mp);
906
 
907
        return rc;
908
}
909
 
910
 
911
/*
912
 *      xtSplitUp()
913
 *
914
 * function:
915
 *      split full pages as propagating insertion up the tree
916
 *
917
 * parameter:
918
 *      tid     - transaction id;
919
 *      ip      - file object;
920
 *      split   - entry parameter descriptor;
921
 *      btstack - traverse stack from xtSearch()
922
 *
923
 * return:
924
 */
925
static int
926
xtSplitUp(tid_t tid,
927
          struct inode *ip, struct xtsplit * split, struct btstack * btstack)
928
{
929
        int rc = 0;
930
        struct metapage *smp;
931
        xtpage_t *sp;           /* split page */
932
        struct metapage *rmp;
933
        s64 rbn;                /* new right page block number */
934
        struct metapage *rcmp;
935
        xtpage_t *rcp;          /* right child page */
936
        s64 rcbn;               /* right child page block number */
937
        int skip;               /* index of entry of insertion */
938
        int nextindex;          /* next available entry index of p */
939
        struct btframe *parent; /* parent page entry on traverse stack */
940
        xad_t *xad;
941
        s64 xaddr;
942
        int xlen;
943
        int nsplit;             /* number of pages split */
944
        struct pxdlist pxdlist;
945
        pxd_t *pxd;
946
        struct tlock *tlck;
947
        struct xtlock *xtlck;
948
 
949
        smp = split->mp;
950
        sp = XT_PAGE(ip, smp);
951
 
952
        /* is inode xtree root extension/inline EA area free ? */
953
        if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
954
            (sp->header.maxentry < cpu_to_le16(XTROOTMAXSLOT)) &&
955
            (JFS_IP(ip)->mode2 & INLINEEA)) {
956
                sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
957
                JFS_IP(ip)->mode2 &= ~INLINEEA;
958
 
959
                BT_MARK_DIRTY(smp, ip);
960
                /*
961
                 * acquire a transaction lock on the leaf page;
962
                 *
963
                 * action: xad insertion/extension;
964
                 */
965
 
966
                /* if insert into middle, shift right remaining entries. */
967
                skip = split->index;
968
                nextindex = le16_to_cpu(sp->header.nextindex);
969
                if (skip < nextindex)
970
                        memmove(&sp->xad[skip + 1], &sp->xad[skip],
971
                                (nextindex - skip) * sizeof(xad_t));
972
 
973
                /* insert the new entry: mark the entry NEW */
974
                xad = &sp->xad[skip];
975
                XT_PUTENTRY(xad, split->flag, split->off, split->len,
976
                            split->addr);
977
 
978
                /* advance next available entry index */
979
                sp->header.nextindex =
980
                    cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1);
981
 
982
                /* Don't log it if there are no links to the file */
983
                if (!test_cflag(COMMIT_Nolink, ip)) {
984
                        tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
985
                        xtlck = (struct xtlock *) & tlck->lock;
986
                        xtlck->lwm.offset = (xtlck->lwm.offset) ?
987
                            min(skip, (int)xtlck->lwm.offset) : skip;
988
                        xtlck->lwm.length =
989
                            le16_to_cpu(sp->header.nextindex) -
990
                            xtlck->lwm.offset;
991
                }
992
 
993
                return 0;
994
        }
995
 
996
        /*
997
         * allocate new index blocks to cover index page split(s)
998
         *
999
         * allocation hint: ?
1000
         */
1001
        if (split->pxdlist == NULL) {
1002
                nsplit = btstack->nsplit;
1003
                split->pxdlist = &pxdlist;
1004
                pxdlist.maxnpxd = pxdlist.npxd = 0;
1005
                pxd = &pxdlist.pxd[0];
1006
                xlen = JFS_SBI(ip->i_sb)->nbperpage;
1007
                for (; nsplit > 0; nsplit--, pxd++) {
1008
                        if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
1009
                            == 0) {
1010
                                PXDaddress(pxd, xaddr);
1011
                                PXDlength(pxd, xlen);
1012
 
1013
                                pxdlist.maxnpxd++;
1014
 
1015
                                continue;
1016
                        }
1017
 
1018
                        /* undo allocation */
1019
 
1020
                        XT_PUTPAGE(smp);
1021
                        return rc;
1022
                }
1023
        }
1024
 
1025
        /*
1026
         * Split leaf page <sp> into <sp> and a new right page <rp>.
1027
         *
1028
         * The split routines insert the new entry into the leaf page,
1029
         * and acquire txLock as appropriate.
1030
         * return <rp> pinned and its block number <rpbn>.
1031
         */
1032
        rc = (sp->header.flag & BT_ROOT) ?
1033
            xtSplitRoot(tid, ip, split, &rmp) :
1034
            xtSplitPage(tid, ip, split, &rmp, &rbn);
1035
 
1036
        XT_PUTPAGE(smp);
1037
 
1038
        if (rc)
1039
                return -EIO;
1040
        /*
1041
         * propagate up the router entry for the leaf page just split
1042
         *
1043
         * insert a router entry for the new page into the parent page,
1044
         * propagate the insert/split up the tree by walking back the stack
1045
         * of (bn of parent page, index of child page entry in parent page)
1046
         * that were traversed during the search for the page that split.
1047
         *
1048
         * the propagation of insert/split up the tree stops if the root
1049
         * splits or the page inserted into doesn't have to split to hold
1050
         * the new entry.
1051
         *
1052
         * the parent entry for the split page remains the same, and
1053
         * a new entry is inserted at its right with the first key and
1054
         * block number of the new right page.
1055
         *
1056
         * There are a maximum of 3 pages pinned at any time:
1057
         * right child, left parent and right parent (when the parent splits)
1058
         * to keep the child page pinned while working on the parent.
1059
         * make sure that all pins are released at exit.
1060
         */
1061
        while ((parent = BT_POP(btstack)) != NULL) {
1062
                /* parent page specified by stack frame <parent> */
1063
 
1064
                /* keep current child pages <rcp> pinned */
1065
                rcmp = rmp;
1066
                rcbn = rbn;
1067
                rcp = XT_PAGE(ip, rcmp);
1068
 
1069
                /*
1070
                 * insert router entry in parent for new right child page <rp>
1071
                 */
1072
                /* get/pin the parent page <sp> */
1073
                XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
1074
                if (rc)
1075
                        goto errout2;
1076
 
1077
                /*
1078
                 * The new key entry goes ONE AFTER the index of parent entry,
1079
                 * because the split was to the right.
1080
                 */
1081
                skip = parent->index + 1;
1082
 
1083
                /*
1084
                 * split or shift right remaining entries of the parent page
1085
                 */
1086
                nextindex = le16_to_cpu(sp->header.nextindex);
1087
                /*
1088
                 * parent page is full - split the parent page
1089
                 */
1090
                if (nextindex == le16_to_cpu(sp->header.maxentry)) {
1091
                        /* init for parent page split */
1092
                        split->mp = smp;
1093
                        split->index = skip;    /* index at insert */
1094
                        split->flag = XAD_NEW;
1095
                        split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
1096
                        split->len = JFS_SBI(ip->i_sb)->nbperpage;
1097
                        split->addr = rcbn;
1098
 
1099
                        /* unpin previous right child page */
1100
                        XT_PUTPAGE(rcmp);
1101
 
1102
                        /* The split routines insert the new entry,
1103
                         * and acquire txLock as appropriate.
1104
                         * return <rp> pinned and its block number <rpbn>.
1105
                         */
1106
                        rc = (sp->header.flag & BT_ROOT) ?
1107
                            xtSplitRoot(tid, ip, split, &rmp) :
1108
                            xtSplitPage(tid, ip, split, &rmp, &rbn);
1109
                        if (rc)
1110
                                goto errout1;
1111
 
1112
                        XT_PUTPAGE(smp);
1113
                        /* keep new child page <rp> pinned */
1114
                }
1115
                /*
1116
                 * parent page is not full - insert in parent page
1117
                 */
1118
                else {
1119
                        /*
1120
                         * insert router entry in parent for the right child
1121
                         * page from the first entry of the right child page:
1122
                         */
1123
                        /*
1124
                         * acquire a transaction lock on the parent page;
1125
                         *
1126
                         * action: router xad insertion;
1127
                         */
1128
                        BT_MARK_DIRTY(smp, ip);
1129
 
1130
                        /*
1131
                         * if insert into middle, shift right remaining entries
1132
                         */
1133
                        if (skip < nextindex)
1134
                                memmove(&sp->xad[skip + 1], &sp->xad[skip],
1135
                                        (nextindex -
1136
                                         skip) << L2XTSLOTSIZE);
1137
 
1138
                        /* insert the router entry */
1139
                        xad = &sp->xad[skip];
1140
                        XT_PUTENTRY(xad, XAD_NEW,
1141
                                    offsetXAD(&rcp->xad[XTENTRYSTART]),
1142
                                    JFS_SBI(ip->i_sb)->nbperpage, rcbn);
1143
 
1144
                        /* advance next available entry index. */
1145
                        sp->header.nextindex =
1146
                            cpu_to_le16(le16_to_cpu(sp->header.nextindex) +
1147
                                        1);
1148
 
1149
                        /* Don't log it if there are no links to the file */
1150
                        if (!test_cflag(COMMIT_Nolink, ip)) {
1151
                                tlck = txLock(tid, ip, smp,
1152
                                              tlckXTREE | tlckGROW);
1153
                                xtlck = (struct xtlock *) & tlck->lock;
1154
                                xtlck->lwm.offset = (xtlck->lwm.offset) ?
1155
                                    min(skip, (int)xtlck->lwm.offset) : skip;
1156
                                xtlck->lwm.length =
1157
                                    le16_to_cpu(sp->header.nextindex) -
1158
                                    xtlck->lwm.offset;
1159
                        }
1160
 
1161
                        /* unpin parent page */
1162
                        XT_PUTPAGE(smp);
1163
 
1164
                        /* exit propagate up */
1165
                        break;
1166
                }
1167
        }
1168
 
1169
        /* unpin current right page */
1170
        XT_PUTPAGE(rmp);
1171
 
1172
        return 0;
1173
 
1174
        /*
1175
         * If something fails in the above loop we were already walking back
1176
         * up the tree and the tree is now inconsistent.
1177
         * release all pages we're holding.
1178
         */
1179
      errout1:
1180
        XT_PUTPAGE(smp);
1181
 
1182
      errout2:
1183
        XT_PUTPAGE(rcmp);
1184
 
1185
        return rc;
1186
}
1187
 
1188
 
1189
/*
1190
 *      xtSplitPage()
1191
 *
1192
 * function:
1193
 *      split a full non-root page into
1194
 *      original/split/left page and new right page
1195
 *      i.e., the original/split page remains as left page.
1196
 *
1197
 * parameter:
1198
 *      int             tid,
1199
 *      struct inode    *ip,
1200
 *      struct xtsplit  *split,
1201
 *      struct metapage **rmpp,
1202
 *      u64             *rbnp,
1203
 *
1204
 * return:
1205
 *      Pointer to page in which to insert or NULL on error.
1206
 */
1207
static int
1208
xtSplitPage(tid_t tid, struct inode *ip,
1209
            struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
1210
{
1211
        int rc = 0;
1212
        struct metapage *smp;
1213
        xtpage_t *sp;
1214
        struct metapage *rmp;
1215
        xtpage_t *rp;           /* new right page allocated */
1216
        s64 rbn;                /* new right page block number */
1217
        struct metapage *mp;
1218
        xtpage_t *p;
1219
        s64 nextbn;
1220
        int skip, maxentry, middle, righthalf, n;
1221
        xad_t *xad;
1222
        struct pxdlist *pxdlist;
1223
        pxd_t *pxd;
1224
        struct tlock *tlck;
1225
        struct xtlock *sxtlck = 0, *rxtlck = 0;
1226
 
1227
        smp = split->mp;
1228
        sp = XT_PAGE(ip, smp);
1229
 
1230
        INCREMENT(xtStat.split);
1231
 
1232
        /*
1233
         * allocate the new right page for the split
1234
         */
1235
        pxdlist = split->pxdlist;
1236
        pxd = &pxdlist->pxd[pxdlist->npxd];
1237
        pxdlist->npxd++;
1238
        rbn = addressPXD(pxd);
1239
        rmp = get_metapage(ip, rbn, PSIZE, 1);
1240
        if (rmp == NULL)
1241
                return -EIO;
1242
 
1243
        jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
1244
 
1245
        BT_MARK_DIRTY(rmp, ip);
1246
        /*
1247
         * action: new page;
1248
         */
1249
 
1250
        rp = (xtpage_t *) rmp->data;
1251
        rp->header.self = *pxd;
1252
        rp->header.flag = sp->header.flag & BT_TYPE;
1253
        rp->header.maxentry = sp->header.maxentry;      /* little-endian */
1254
        rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1255
 
1256
        BT_MARK_DIRTY(smp, ip);
1257
        /* Don't log it if there are no links to the file */
1258
        if (!test_cflag(COMMIT_Nolink, ip)) {
1259
                /*
1260
                 * acquire a transaction lock on the new right page;
1261
                 */
1262
                tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1263
                rxtlck = (struct xtlock *) & tlck->lock;
1264
                rxtlck->lwm.offset = XTENTRYSTART;
1265
                /*
1266
                 * acquire a transaction lock on the split page
1267
                 */
1268
                tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
1269
                sxtlck = (struct xtlock *) & tlck->lock;
1270
        }
1271
 
1272
        /*
1273
         * initialize/update sibling pointers of <sp> and <rp>
1274
         */
1275
        nextbn = le64_to_cpu(sp->header.next);
1276
        rp->header.next = cpu_to_le64(nextbn);
1277
        rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1278
        sp->header.next = cpu_to_le64(rbn);
1279
 
1280
        skip = split->index;
1281
 
1282
        /*
1283
         *      sequential append at tail (after last entry of last page)
1284
         *
1285
         * if splitting the last page on a level because of appending
1286
         * a entry to it (skip is maxentry), it's likely that the access is
1287
         * sequential. adding an empty page on the side of the level is less
1288
         * work and can push the fill factor much higher than normal.
1289
         * if we're wrong it's no big deal -  we will do the split the right
1290
         * way next time.
1291
         * (it may look like it's equally easy to do a similar hack for
1292
         * reverse sorted data, that is, split the tree left, but it's not.
1293
         * Be my guest.)
1294
         */
1295
        if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
1296
                /*
1297
                 * acquire a transaction lock on the new/right page;
1298
                 *
1299
                 * action: xad insertion;
1300
                 */
1301
                /* insert entry at the first entry of the new right page */
1302
                xad = &rp->xad[XTENTRYSTART];
1303
                XT_PUTENTRY(xad, split->flag, split->off, split->len,
1304
                            split->addr);
1305
 
1306
                rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1307
 
1308
                if (!test_cflag(COMMIT_Nolink, ip)) {
1309
                        /* rxtlck->lwm.offset = XTENTRYSTART; */
1310
                        rxtlck->lwm.length = 1;
1311
                }
1312
 
1313
                *rmpp = rmp;
1314
                *rbnp = rbn;
1315
 
1316
                ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd));
1317
 
1318
                jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1319
                return 0;
1320
        }
1321
 
1322
        /*
1323
         *      non-sequential insert (at possibly middle page)
1324
         */
1325
 
1326
        /*
1327
         * update previous pointer of old next/right page of <sp>
1328
         */
1329
        if (nextbn != 0) {
1330
                XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
1331
                if (rc) {
1332
                        XT_PUTPAGE(rmp);
1333
                        return rc;
1334
                }
1335
 
1336
                BT_MARK_DIRTY(mp, ip);
1337
                /*
1338
                 * acquire a transaction lock on the next page;
1339
                 *
1340
                 * action:sibling pointer update;
1341
                 */
1342
                if (!test_cflag(COMMIT_Nolink, ip))
1343
                        tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
1344
 
1345
                p->header.prev = cpu_to_le64(rbn);
1346
 
1347
                /* sibling page may have been updated previously, or
1348
                 * it may be updated later;
1349
                 */
1350
 
1351
                XT_PUTPAGE(mp);
1352
        }
1353
 
1354
        /*
1355
         * split the data between the split and new/right pages
1356
         */
1357
        maxentry = le16_to_cpu(sp->header.maxentry);
1358
        middle = maxentry >> 1;
1359
        righthalf = maxentry - middle;
1360
 
1361
        /*
1362
         * skip index in old split/left page - insert into left page:
1363
         */
1364
        if (skip <= middle) {
1365
                /* move right half of split page to the new right page */
1366
                memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1367
                        righthalf << L2XTSLOTSIZE);
1368
 
1369
                /* shift right tail of left half to make room for new entry */
1370
                if (skip < middle)
1371
                        memmove(&sp->xad[skip + 1], &sp->xad[skip],
1372
                                (middle - skip) << L2XTSLOTSIZE);
1373
 
1374
                /* insert new entry */
1375
                xad = &sp->xad[skip];
1376
                XT_PUTENTRY(xad, split->flag, split->off, split->len,
1377
                            split->addr);
1378
 
1379
                /* update page header */
1380
                sp->header.nextindex = cpu_to_le16(middle + 1);
1381
                if (!test_cflag(COMMIT_Nolink, ip)) {
1382
                        sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1383
                            min(skip, (int)sxtlck->lwm.offset) : skip;
1384
                }
1385
 
1386
                rp->header.nextindex =
1387
                    cpu_to_le16(XTENTRYSTART + righthalf);
1388
        }
1389
        /*
1390
         * skip index in new right page - insert into right page:
1391
         */
1392
        else {
1393
                /* move left head of right half to right page */
1394
                n = skip - middle;
1395
                memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1396
                        n << L2XTSLOTSIZE);
1397
 
1398
                /* insert new entry */
1399
                n += XTENTRYSTART;
1400
                xad = &rp->xad[n];
1401
                XT_PUTENTRY(xad, split->flag, split->off, split->len,
1402
                            split->addr);
1403
 
1404
                /* move right tail of right half to right page */
1405
                if (skip < maxentry)
1406
                        memmove(&rp->xad[n + 1], &sp->xad[skip],
1407
                                (maxentry - skip) << L2XTSLOTSIZE);
1408
 
1409
                /* update page header */
1410
                sp->header.nextindex = cpu_to_le16(middle);
1411
                if (!test_cflag(COMMIT_Nolink, ip)) {
1412
                        sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1413
                            min(middle, (int)sxtlck->lwm.offset) : middle;
1414
                }
1415
 
1416
                rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
1417
                                                   righthalf + 1);
1418
        }
1419
 
1420
        if (!test_cflag(COMMIT_Nolink, ip)) {
1421
                sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
1422
                    sxtlck->lwm.offset;
1423
 
1424
                /* rxtlck->lwm.offset = XTENTRYSTART; */
1425
                rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1426
                    XTENTRYSTART;
1427
        }
1428
 
1429
        *rmpp = rmp;
1430
        *rbnp = rbn;
1431
 
1432
        ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd));
1433
 
1434
        jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1435
        return rc;
1436
}
1437
 
1438
 
1439
/*
1440
 *      xtSplitRoot()
1441
 *
1442
 * function:
1443
 *      split the full root page into
1444
 *      original/root/split page and new right page
1445
 *      i.e., root remains fixed in tree anchor (inode) and
1446
 *      the root is copied to a single new right child page
1447
 *      since root page << non-root page, and
1448
 *      the split root page contains a single entry for the
1449
 *      new right child page.
1450
 *
1451
 * parameter:
1452
 *      int             tid,
1453
 *      struct inode    *ip,
1454
 *      struct xtsplit  *split,
1455
 *      struct metapage **rmpp)
1456
 *
1457
 * return:
1458
 *      Pointer to page in which to insert or NULL on error.
1459
 */
1460
static int
1461
xtSplitRoot(tid_t tid,
1462
            struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
1463
{
1464
        xtpage_t *sp;
1465
        struct metapage *rmp;
1466
        xtpage_t *rp;
1467
        s64 rbn;
1468
        int skip, nextindex;
1469
        xad_t *xad;
1470
        pxd_t *pxd;
1471
        struct pxdlist *pxdlist;
1472
        struct tlock *tlck;
1473
        struct xtlock *xtlck;
1474
 
1475
        sp = &JFS_IP(ip)->i_xtroot;
1476
 
1477
        INCREMENT(xtStat.split);
1478
 
1479
        /*
1480
         *      allocate a single (right) child page
1481
         */
1482
        pxdlist = split->pxdlist;
1483
        pxd = &pxdlist->pxd[pxdlist->npxd];
1484
        pxdlist->npxd++;
1485
        rbn = addressPXD(pxd);
1486
        rmp = get_metapage(ip, rbn, PSIZE, 1);
1487
        if (rmp == NULL)
1488
                return -EIO;
1489
 
1490
        jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
1491
 
1492
        /*
1493
         * acquire a transaction lock on the new right page;
1494
         *
1495
         * action: new page;
1496
         */
1497
        BT_MARK_DIRTY(rmp, ip);
1498
 
1499
        rp = (xtpage_t *) rmp->data;
1500
        rp->header.flag =
1501
            (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1502
        rp->header.self = *pxd;
1503
        rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1504
        rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
1505
 
1506
        /* initialize sibling pointers */
1507
        rp->header.next = 0;
1508
        rp->header.prev = 0;
1509
 
1510
        /*
1511
         * copy the in-line root page into new right page extent
1512
         */
1513
        nextindex = le16_to_cpu(sp->header.maxentry);
1514
        memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
1515
                (nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
1516
 
1517
        /*
1518
         * insert the new entry into the new right/child page
1519
         * (skip index in the new right page will not change)
1520
         */
1521
        skip = split->index;
1522
        /* if insert into middle, shift right remaining entries */
1523
        if (skip != nextindex)
1524
                memmove(&rp->xad[skip + 1], &rp->xad[skip],
1525
                        (nextindex - skip) * sizeof(xad_t));
1526
 
1527
        xad = &rp->xad[skip];
1528
        XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
1529
 
1530
        /* update page header */
1531
        rp->header.nextindex = cpu_to_le16(nextindex + 1);
1532
 
1533
        if (!test_cflag(COMMIT_Nolink, ip)) {
1534
                tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1535
                xtlck = (struct xtlock *) & tlck->lock;
1536
                xtlck->lwm.offset = XTENTRYSTART;
1537
                xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1538
                    XTENTRYSTART;
1539
        }
1540
 
1541
        /*
1542
         *      reset the root
1543
         *
1544
         * init root with the single entry for the new right page
1545
         * set the 1st entry offset to 0, which force the left-most key
1546
         * at any level of the tree to be less than any search key.
1547
         */
1548
        /*
1549
         * acquire a transaction lock on the root page (in-memory inode);
1550
         *
1551
         * action: root split;
1552
         */
1553
        BT_MARK_DIRTY(split->mp, ip);
1554
 
1555
        xad = &sp->xad[XTENTRYSTART];
1556
        XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
1557
 
1558
        /* update page header of root */
1559
        sp->header.flag &= ~BT_LEAF;
1560
        sp->header.flag |= BT_INTERNAL;
1561
 
1562
        sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1563
 
1564
        if (!test_cflag(COMMIT_Nolink, ip)) {
1565
                tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
1566
                xtlck = (struct xtlock *) & tlck->lock;
1567
                xtlck->lwm.offset = XTENTRYSTART;
1568
                xtlck->lwm.length = 1;
1569
        }
1570
 
1571
        *rmpp = rmp;
1572
 
1573
        ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd));
1574
 
1575
        jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp);
1576
        return 0;
1577
}
1578
 
1579
 
1580
/*
1581
 *      xtExtend()
1582
 *
1583
 * function: extend in-place;
1584
 *
1585
 * note: existing extent may or may not have been committed.
1586
 * caller is responsible for pager buffer cache update, and
1587
 * working block allocation map update;
1588
 * update pmap: alloc whole extended extent;
1589
 */
1590
int xtExtend(tid_t tid,         /* transaction id */
1591
             struct inode *ip, s64 xoff,        /* delta extent offset */
1592
             s32 xlen,          /* delta extent length */
1593
             int flag)
1594
{
1595
        int rc = 0;
1596
        int cmp;
1597
        struct metapage *mp;    /* meta-page buffer */
1598
        xtpage_t *p;            /* base B+-tree index page */
1599
        s64 bn;
1600
        int index, nextindex, len;
1601
        struct btstack btstack; /* traverse stack */
1602
        struct xtsplit split;   /* split information */
1603
        xad_t *xad;
1604
        s64 xaddr;
1605
        struct tlock *tlck;
1606
        struct xtlock *xtlck = 0;
1607
        int rootsplit = 0;
1608
 
1609
        jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
1610
 
1611
        /* there must exist extent to be extended */
1612
        if ((rc = xtSearch(ip, xoff - 1, &cmp, &btstack, XT_INSERT)))
1613
                return rc;
1614
 
1615
        /* retrieve search result */
1616
        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1617
 
1618
        if (cmp != 0) {
1619
                XT_PUTPAGE(mp);
1620
                jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent");
1621
                return -EIO;
1622
        }
1623
 
1624
        /* extension must be contiguous */
1625
        xad = &p->xad[index];
1626
        if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) {
1627
                XT_PUTPAGE(mp);
1628
                jfs_error(ip->i_sb, "xtExtend: extension is not contiguous");
1629
                return -EIO;
1630
        }
1631
 
1632
        /*
1633
         * acquire a transaction lock on the leaf page;
1634
         *
1635
         * action: xad insertion/extension;
1636
         */
1637
        BT_MARK_DIRTY(mp, ip);
1638
        if (!test_cflag(COMMIT_Nolink, ip)) {
1639
                tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1640
                xtlck = (struct xtlock *) & tlck->lock;
1641
        }
1642
 
1643
        /* extend will overflow extent ? */
1644
        xlen = lengthXAD(xad) + xlen;
1645
        if ((len = xlen - MAXXLEN) <= 0)
1646
                goto extendOld;
1647
 
1648
        /*
1649
         *      extent overflow: insert entry for new extent
1650
         */
1651
//insertNew:
1652
        xoff = offsetXAD(xad) + MAXXLEN;
1653
        xaddr = addressXAD(xad) + MAXXLEN;
1654
        nextindex = le16_to_cpu(p->header.nextindex);
1655
 
1656
        /*
1657
         *      if the leaf page is full, insert the new entry and
1658
         *      propagate up the router entry for the new page from split
1659
         *
1660
         * The xtSplitUp() will insert the entry and unpin the leaf page.
1661
         */
1662
        if (nextindex == le16_to_cpu(p->header.maxentry)) {
1663
                rootsplit = p->header.flag & BT_ROOT;
1664
 
1665
                /* xtSpliUp() unpins leaf pages */
1666
                split.mp = mp;
1667
                split.index = index + 1;
1668
                split.flag = XAD_NEW;
1669
                split.off = xoff;       /* split offset */
1670
                split.len = len;
1671
                split.addr = xaddr;
1672
                split.pxdlist = NULL;
1673
                if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1674
                        return rc;
1675
 
1676
                /*
1677
                 * if leaf root has been split, original root has been
1678
                 * copied to new child page, i.e., original entry now
1679
                 * resides on the new child page;
1680
                 */
1681
                if (rootsplit) {
1682
                        ASSERT(p->header.nextindex ==
1683
                               cpu_to_le16(XTENTRYSTART + 1));
1684
                        xad = &p->xad[XTENTRYSTART];
1685
                        bn = addressXAD(xad);
1686
 
1687
                        /* get new child page */
1688
                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1689
                        if (rc)
1690
                                return rc;
1691
 
1692
                        BT_MARK_DIRTY(mp, ip);
1693
                        if (!test_cflag(COMMIT_Nolink, ip)) {
1694
                                tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1695
                                xtlck = (struct xtlock *) & tlck->lock;
1696
                        }
1697
                } else {
1698
                        /* get back old page */
1699
                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1700
                        if (rc)
1701
                                return rc;
1702
                }
1703
        }
1704
        /*
1705
         *      insert the new entry into the leaf page
1706
         */
1707
        else {
1708
                /* insert the new entry: mark the entry NEW */
1709
                xad = &p->xad[index + 1];
1710
                XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
1711
 
1712
                /* advance next available entry index */
1713
                p->header.nextindex =
1714
                    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1715
        }
1716
 
1717
        /* get back old entry */
1718
        xad = &p->xad[index];
1719
        xlen = MAXXLEN;
1720
 
1721
        /*
1722
         * extend old extent
1723
         */
1724
      extendOld:
1725
        XADlength(xad, xlen);
1726
        if (!(xad->flag & XAD_NEW))
1727
                xad->flag |= XAD_EXTENDED;
1728
 
1729
        if (!test_cflag(COMMIT_Nolink, ip)) {
1730
                xtlck->lwm.offset =
1731
                    (xtlck->lwm.offset) ? min(index,
1732
                                              (int)xtlck->lwm.offset) : index;
1733
                xtlck->lwm.length =
1734
                    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
1735
        }
1736
 
1737
        /* unpin the leaf page */
1738
        XT_PUTPAGE(mp);
1739
 
1740
        return rc;
1741
}
1742
 
1743
#ifdef _NOTYET
1744
/*
1745
 *      xtTailgate()
1746
 *
1747
 * function: split existing 'tail' extent
1748
 *      (split offset >= start offset of tail extent), and
1749
 *      relocate and extend the split tail half;
1750
 *
1751
 * note: existing extent may or may not have been committed.
1752
 * caller is responsible for pager buffer cache update, and
1753
 * working block allocation map update;
1754
 * update pmap: free old split tail extent, alloc new extent;
1755
 */
1756
int xtTailgate(tid_t tid,               /* transaction id */
1757
               struct inode *ip, s64 xoff,      /* split/new extent offset */
1758
               s32 xlen,        /* new extent length */
1759
               s64 xaddr,       /* new extent address */
1760
               int flag)
1761
{
1762
        int rc = 0;
1763
        int cmp;
1764
        struct metapage *mp;    /* meta-page buffer */
1765
        xtpage_t *p;            /* base B+-tree index page */
1766
        s64 bn;
1767
        int index, nextindex, llen, rlen;
1768
        struct btstack btstack; /* traverse stack */
1769
        struct xtsplit split;   /* split information */
1770
        xad_t *xad;
1771
        struct tlock *tlck;
1772
        struct xtlock *xtlck = 0;
1773
        struct tlock *mtlck;
1774
        struct maplock *pxdlock;
1775
        int rootsplit = 0;
1776
 
1777
/*
1778
printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
1779
        (ulong)xoff, xlen, (ulong)xaddr);
1780
*/
1781
 
1782
        /* there must exist extent to be tailgated */
1783
        if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT)))
1784
                return rc;
1785
 
1786
        /* retrieve search result */
1787
        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1788
 
1789
        if (cmp != 0) {
1790
                XT_PUTPAGE(mp);
1791
                jfs_error(ip->i_sb, "xtTailgate: couldn't find extent");
1792
                return -EIO;
1793
        }
1794
 
1795
        /* entry found must be last entry */
1796
        nextindex = le16_to_cpu(p->header.nextindex);
1797
        if (index != nextindex - 1) {
1798
                XT_PUTPAGE(mp);
1799
                jfs_error(ip->i_sb,
1800
                          "xtTailgate: the entry found is not the last entry");
1801
                return -EIO;
1802
        }
1803
 
1804
        BT_MARK_DIRTY(mp, ip);
1805
        /*
1806
         * acquire tlock of the leaf page containing original entry
1807
         */
1808
        if (!test_cflag(COMMIT_Nolink, ip)) {
1809
                tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1810
                xtlck = (struct xtlock *) & tlck->lock;
1811
        }
1812
 
1813
        /* completely replace extent ? */
1814
        xad = &p->xad[index];
1815
/*
1816
printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
1817
        (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
1818
*/
1819
        if ((llen = xoff - offsetXAD(xad)) == 0)
1820
                goto updateOld;
1821
 
1822
        /*
1823
         *      partially replace extent: insert entry for new extent
1824
         */
1825
//insertNew:
1826
        /*
1827
         *      if the leaf page is full, insert the new entry and
1828
         *      propagate up the router entry for the new page from split
1829
         *
1830
         * The xtSplitUp() will insert the entry and unpin the leaf page.
1831
         */
1832
        if (nextindex == le16_to_cpu(p->header.maxentry)) {
1833
                rootsplit = p->header.flag & BT_ROOT;
1834
 
1835
                /* xtSpliUp() unpins leaf pages */
1836
                split.mp = mp;
1837
                split.index = index + 1;
1838
                split.flag = XAD_NEW;
1839
                split.off = xoff;       /* split offset */
1840
                split.len = xlen;
1841
                split.addr = xaddr;
1842
                split.pxdlist = NULL;
1843
                if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1844
                        return rc;
1845
 
1846
                /*
1847
                 * if leaf root has been split, original root has been
1848
                 * copied to new child page, i.e., original entry now
1849
                 * resides on the new child page;
1850
                 */
1851
                if (rootsplit) {
1852
                        ASSERT(p->header.nextindex ==
1853
                               cpu_to_le16(XTENTRYSTART + 1));
1854
                        xad = &p->xad[XTENTRYSTART];
1855
                        bn = addressXAD(xad);
1856
 
1857
                        /* get new child page */
1858
                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1859
                        if (rc)
1860
                                return rc;
1861
 
1862
                        BT_MARK_DIRTY(mp, ip);
1863
                        if (!test_cflag(COMMIT_Nolink, ip)) {
1864
                                tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1865
                                xtlck = (struct xtlock *) & tlck->lock;
1866
                        }
1867
                } else {
1868
                        /* get back old page */
1869
                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1870
                        if (rc)
1871
                                return rc;
1872
                }
1873
        }
1874
        /*
1875
         *      insert the new entry into the leaf page
1876
         */
1877
        else {
1878
                /* insert the new entry: mark the entry NEW */
1879
                xad = &p->xad[index + 1];
1880
                XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
1881
 
1882
                /* advance next available entry index */
1883
                p->header.nextindex =
1884
                    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1885
        }
1886
 
1887
        /* get back old XAD */
1888
        xad = &p->xad[index];
1889
 
1890
        /*
1891
         * truncate/relocate old extent at split offset
1892
         */
1893
      updateOld:
1894
        /* update dmap for old/committed/truncated extent */
1895
        rlen = lengthXAD(xad) - llen;
1896
        if (!(xad->flag & XAD_NEW)) {
1897
                /* free from PWMAP at commit */
1898
                if (!test_cflag(COMMIT_Nolink, ip)) {
1899
                        mtlck = txMaplock(tid, ip, tlckMAP);
1900
                        pxdlock = (struct maplock *) & mtlck->lock;
1901
                        pxdlock->flag = mlckFREEPXD;
1902
                        PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen);
1903
                        PXDlength(&pxdlock->pxd, rlen);
1904
                        pxdlock->index = 1;
1905
                }
1906
        } else
1907
                /* free from WMAP */
1908
                dbFree(ip, addressXAD(xad) + llen, (s64) rlen);
1909
 
1910
        if (llen)
1911
                /* truncate */
1912
                XADlength(xad, llen);
1913
        else
1914
                /* replace */
1915
                XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
1916
 
1917
        if (!test_cflag(COMMIT_Nolink, ip)) {
1918
                xtlck->lwm.offset = (xtlck->lwm.offset) ?
1919
                    min(index, (int)xtlck->lwm.offset) : index;
1920
                xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
1921
                    xtlck->lwm.offset;
1922
        }
1923
 
1924
        /* unpin the leaf page */
1925
        XT_PUTPAGE(mp);
1926
 
1927
        return rc;
1928
}
1929
#endif /* _NOTYET */
1930
 
1931
/*
1932
 *      xtUpdate()
1933
 *
1934
 * function: update XAD;
1935
 *
1936
 *      update extent for allocated_but_not_recorded or
1937
 *      compressed extent;
1938
 *
1939
 * parameter:
1940
 *      nxad    - new XAD;
1941
 *                logical extent of the specified XAD must be completely
1942
 *                contained by an existing XAD;
1943
 */
1944
int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
1945
{                               /* new XAD */
1946
        int rc = 0;
1947
        int cmp;
1948
        struct metapage *mp;    /* meta-page buffer */
1949
        xtpage_t *p;            /* base B+-tree index page */
1950
        s64 bn;
1951
        int index0, index, newindex, nextindex;
1952
        struct btstack btstack; /* traverse stack */
1953
        struct xtsplit split;   /* split information */
1954
        xad_t *xad, *lxad, *rxad;
1955
        int xflag;
1956
        s64 nxoff, xoff;
1957
        int nxlen, xlen, lxlen, rxlen;
1958
        s64 nxaddr, xaddr;
1959
        struct tlock *tlck;
1960
        struct xtlock *xtlck = 0;
1961
        int rootsplit = 0, newpage = 0;
1962
 
1963
        /* there must exist extent to be tailgated */
1964
        nxoff = offsetXAD(nxad);
1965
        nxlen = lengthXAD(nxad);
1966
        nxaddr = addressXAD(nxad);
1967
 
1968
        if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT)))
1969
                return rc;
1970
 
1971
        /* retrieve search result */
1972
        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
1973
 
1974
        if (cmp != 0) {
1975
                XT_PUTPAGE(mp);
1976
                jfs_error(ip->i_sb, "xtUpdate: Could not find extent");
1977
                return -EIO;
1978
        }
1979
 
1980
        BT_MARK_DIRTY(mp, ip);
1981
        /*
1982
         * acquire tlock of the leaf page containing original entry
1983
         */
1984
        if (!test_cflag(COMMIT_Nolink, ip)) {
1985
                tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1986
                xtlck = (struct xtlock *) & tlck->lock;
1987
        }
1988
 
1989
        xad = &p->xad[index0];
1990
        xflag = xad->flag;
1991
        xoff = offsetXAD(xad);
1992
        xlen = lengthXAD(xad);
1993
        xaddr = addressXAD(xad);
1994
 
1995
        /* nXAD must be completely contained within XAD */
1996
        if ((xoff > nxoff) ||
1997
            (nxoff + nxlen > xoff + xlen)) {
1998
                XT_PUTPAGE(mp);
1999
                jfs_error(ip->i_sb,
2000
                          "xtUpdate: nXAD in not completely contained within XAD");
2001
                return -EIO;
2002
        }
2003
 
2004
        index = index0;
2005
        newindex = index + 1;
2006
        nextindex = le16_to_cpu(p->header.nextindex);
2007
 
2008
#ifdef  _JFS_WIP_NOCOALESCE
2009
        if (xoff < nxoff)
2010
                goto updateRight;
2011
 
2012
        /*
2013
         * replace XAD with nXAD
2014
         */
2015
      replace:                  /* (nxoff == xoff) */
2016
        if (nxlen == xlen) {
2017
                /* replace XAD with nXAD:recorded */
2018
                *xad = *nxad;
2019
                xad->flag = xflag & ~XAD_NOTRECORDED;
2020
 
2021
                goto out;
2022
        } else                  /* (nxlen < xlen) */
2023
                goto updateLeft;
2024
#endif                          /* _JFS_WIP_NOCOALESCE */
2025
 
2026
/* #ifdef _JFS_WIP_COALESCE */
2027
        if (xoff < nxoff)
2028
                goto coalesceRight;
2029
 
2030
        /*
2031
         * coalesce with left XAD
2032
         */
2033
//coalesceLeft: /* (xoff == nxoff) */
2034
        /* is XAD first entry of page ? */
2035
        if (index == XTENTRYSTART)
2036
                goto replace;
2037
 
2038
        /* is nXAD logically and physically contiguous with lXAD ? */
2039
        lxad = &p->xad[index - 1];
2040
        lxlen = lengthXAD(lxad);
2041
        if (!(lxad->flag & XAD_NOTRECORDED) &&
2042
            (nxoff == offsetXAD(lxad) + lxlen) &&
2043
            (nxaddr == addressXAD(lxad) + lxlen) &&
2044
            (lxlen + nxlen < MAXXLEN)) {
2045
                /* extend right lXAD */
2046
                index0 = index - 1;
2047
                XADlength(lxad, lxlen + nxlen);
2048
 
2049
                /* If we just merged two extents together, need to make sure the
2050
                 * right extent gets logged.  If the left one is marked XAD_NEW,
2051
                 * then we know it will be logged.  Otherwise, mark as
2052
                 * XAD_EXTENDED
2053
                 */
2054
                if (!(lxad->flag & XAD_NEW))
2055
                        lxad->flag |= XAD_EXTENDED;
2056
 
2057
                if (xlen > nxlen) {
2058
                        /* truncate XAD */
2059
                        XADoffset(xad, xoff + nxlen);
2060
                        XADlength(xad, xlen - nxlen);
2061
                        XADaddress(xad, xaddr + nxlen);
2062
                        goto out;
2063
                } else {        /* (xlen == nxlen) */
2064
 
2065
                        /* remove XAD */
2066
                        if (index < nextindex - 1)
2067
                                memmove(&p->xad[index], &p->xad[index + 1],
2068
                                        (nextindex - index -
2069
                                         1) << L2XTSLOTSIZE);
2070
 
2071
                        p->header.nextindex =
2072
                            cpu_to_le16(le16_to_cpu(p->header.nextindex) -
2073
                                        1);
2074
 
2075
                        index = index0;
2076
                        newindex = index + 1;
2077
                        nextindex = le16_to_cpu(p->header.nextindex);
2078
                        xoff = nxoff = offsetXAD(lxad);
2079
                        xlen = nxlen = lxlen + nxlen;
2080
                        xaddr = nxaddr = addressXAD(lxad);
2081
                        goto coalesceRight;
2082
                }
2083
        }
2084
 
2085
        /*
2086
         * replace XAD with nXAD
2087
         */
2088
      replace:                  /* (nxoff == xoff) */
2089
        if (nxlen == xlen) {
2090
                /* replace XAD with nXAD:recorded */
2091
                *xad = *nxad;
2092
                xad->flag = xflag & ~XAD_NOTRECORDED;
2093
 
2094
                goto coalesceRight;
2095
        } else                  /* (nxlen < xlen) */
2096
                goto updateLeft;
2097
 
2098
        /*
2099
         * coalesce with right XAD
2100
         */
2101
      coalesceRight:            /* (xoff <= nxoff) */
2102
        /* is XAD last entry of page ? */
2103
        if (newindex == nextindex) {
2104
                if (xoff == nxoff)
2105
                        goto out;
2106
                goto updateRight;
2107
        }
2108
 
2109
        /* is nXAD logically and physically contiguous with rXAD ? */
2110
        rxad = &p->xad[index + 1];
2111
        rxlen = lengthXAD(rxad);
2112
        if (!(rxad->flag & XAD_NOTRECORDED) &&
2113
            (nxoff + nxlen == offsetXAD(rxad)) &&
2114
            (nxaddr + nxlen == addressXAD(rxad)) &&
2115
            (rxlen + nxlen < MAXXLEN)) {
2116
                /* extend left rXAD */
2117
                XADoffset(rxad, nxoff);
2118
                XADlength(rxad, rxlen + nxlen);
2119
                XADaddress(rxad, nxaddr);
2120
 
2121
                /* If we just merged two extents together, need to make sure
2122
                 * the left extent gets logged.  If the right one is marked
2123
                 * XAD_NEW, then we know it will be logged.  Otherwise, mark as
2124
                 * XAD_EXTENDED
2125
                 */
2126
                if (!(rxad->flag & XAD_NEW))
2127
                        rxad->flag |= XAD_EXTENDED;
2128
 
2129
                if (xlen > nxlen)
2130
                        /* truncate XAD */
2131
                        XADlength(xad, xlen - nxlen);
2132
                else {          /* (xlen == nxlen) */
2133
 
2134
                        /* remove XAD */
2135
                        memmove(&p->xad[index], &p->xad[index + 1],
2136
                                (nextindex - index - 1) << L2XTSLOTSIZE);
2137
 
2138
                        p->header.nextindex =
2139
                            cpu_to_le16(le16_to_cpu(p->header.nextindex) -
2140
                                        1);
2141
                }
2142
 
2143
                goto out;
2144
        } else if (xoff == nxoff)
2145
                goto out;
2146
 
2147
        if (xoff >= nxoff) {
2148
                XT_PUTPAGE(mp);
2149
                jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff");
2150
                return -EIO;
2151
        }
2152
/* #endif _JFS_WIP_COALESCE */
2153
 
2154
        /*
2155
         * split XAD into (lXAD, nXAD):
2156
         *
2157
         *          |---nXAD--->
2158
         * --|----------XAD----------|--
2159
         *   |-lXAD-|
2160
         */
2161
      updateRight:              /* (xoff < nxoff) */
2162
        /* truncate old XAD as lXAD:not_recorded */
2163
        xad = &p->xad[index];
2164
        XADlength(xad, nxoff - xoff);
2165
 
2166
        /* insert nXAD:recorded */
2167
        if (nextindex == le16_to_cpu(p->header.maxentry)) {
2168
                rootsplit = p->header.flag & BT_ROOT;
2169
 
2170
                /* xtSpliUp() unpins leaf pages */
2171
                split.mp = mp;
2172
                split.index = newindex;
2173
                split.flag = xflag & ~XAD_NOTRECORDED;
2174
                split.off = nxoff;
2175
                split.len = nxlen;
2176
                split.addr = nxaddr;
2177
                split.pxdlist = NULL;
2178
                if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
2179
                        return rc;
2180
 
2181
                /*
2182
                 * if leaf root has been split, original root has been
2183
                 * copied to new child page, i.e., original entry now
2184
                 * resides on the new child page;
2185
                 */
2186
                if (rootsplit) {
2187
                        ASSERT(p->header.nextindex ==
2188
                               cpu_to_le16(XTENTRYSTART + 1));
2189
                        xad = &p->xad[XTENTRYSTART];
2190
                        bn = addressXAD(xad);
2191
 
2192
                        /* get new child page */
2193
                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2194
                        if (rc)
2195
                                return rc;
2196
 
2197
                        BT_MARK_DIRTY(mp, ip);
2198
                        if (!test_cflag(COMMIT_Nolink, ip)) {
2199
                                tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
2200
                                xtlck = (struct xtlock *) & tlck->lock;
2201
                        }
2202
                } else {
2203
                        /* get back old page */
2204
                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2205
                        if (rc)
2206
                                return rc;
2207
 
2208
                        /* is nXAD on new page ? */
2209
                        if (newindex >
2210
                            (le16_to_cpu(p->header.maxentry) >> 1)) {
2211
                                newindex =
2212
                                    newindex -
2213
                                    le16_to_cpu(p->header.nextindex) +
2214
                                    XTENTRYSTART;
2215
                                newpage = 1;
2216
                        }
2217
                }
2218
        } else {
2219
                /* if insert into middle, shift right remaining entries */
2220
                if (newindex < nextindex)
2221
                        memmove(&p->xad[newindex + 1], &p->xad[newindex],
2222
                                (nextindex - newindex) << L2XTSLOTSIZE);
2223
 
2224
                /* insert the entry */
2225
                xad = &p->xad[newindex];
2226
                *xad = *nxad;
2227
                xad->flag = xflag & ~XAD_NOTRECORDED;
2228
 
2229
                /* advance next available entry index. */
2230
                p->header.nextindex =
2231
                    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
2232
        }
2233
 
2234
        /*
2235
         * does nXAD force 3-way split ?
2236
         *
2237
         *          |---nXAD--->|
2238
         * --|----------XAD-------------|--
2239
         *   |-lXAD-|           |-rXAD -|
2240
         */
2241
        if (nxoff + nxlen == xoff + xlen)
2242
                goto out;
2243
 
2244
        /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
2245
        if (newpage) {
2246
                /* close out old page */
2247
                if (!test_cflag(COMMIT_Nolink, ip)) {
2248
                        xtlck->lwm.offset = (xtlck->lwm.offset) ?
2249
                            min(index0, (int)xtlck->lwm.offset) : index0;
2250
                        xtlck->lwm.length =
2251
                            le16_to_cpu(p->header.nextindex) -
2252
                            xtlck->lwm.offset;
2253
                }
2254
 
2255
                bn = le64_to_cpu(p->header.next);
2256
                XT_PUTPAGE(mp);
2257
 
2258
                /* get new right page */
2259
                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2260
                if (rc)
2261
                        return rc;
2262
 
2263
                BT_MARK_DIRTY(mp, ip);
2264
                if (!test_cflag(COMMIT_Nolink, ip)) {
2265
                        tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2266
                        xtlck = (struct xtlock *) & tlck->lock;
2267
                }
2268
 
2269
                index0 = index = newindex;
2270
        } else
2271
                index++;
2272
 
2273
        newindex = index + 1;
2274
        nextindex = le16_to_cpu(p->header.nextindex);
2275
        xlen = xlen - (nxoff - xoff);
2276
        xoff = nxoff;
2277
        xaddr = nxaddr;
2278
 
2279
        /* recompute split pages */
2280
        if (nextindex == le16_to_cpu(p->header.maxentry)) {
2281
                XT_PUTPAGE(mp);
2282
 
2283
                if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT)))
2284
                        return rc;
2285
 
2286
                /* retrieve search result */
2287
                XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
2288
 
2289
                if (cmp != 0) {
2290
                        XT_PUTPAGE(mp);
2291
                        jfs_error(ip->i_sb, "xtUpdate: xtSearch failed");
2292
                        return -EIO;
2293
                }
2294
 
2295
                if (index0 != index) {
2296
                        XT_PUTPAGE(mp);
2297
                        jfs_error(ip->i_sb,
2298
                                  "xtUpdate: unexpected value of index");
2299
                        return -EIO;
2300
                }
2301
        }
2302
 
2303
        /*
2304
         * split XAD into (nXAD, rXAD)
2305
         *
2306
         *          ---nXAD---|
2307
         * --|----------XAD----------|--
2308
         *                    |-rXAD-|
2309
         */
2310
      updateLeft:               /* (nxoff == xoff) && (nxlen < xlen) */
2311
        /* update old XAD with nXAD:recorded */
2312
        xad = &p->xad[index];
2313
        *xad = *nxad;
2314
        xad->flag = xflag & ~XAD_NOTRECORDED;
2315
 
2316
        /* insert rXAD:not_recorded */
2317
        xoff = xoff + nxlen;
2318
        xlen = xlen - nxlen;
2319
        xaddr = xaddr + nxlen;
2320
        if (nextindex == le16_to_cpu(p->header.maxentry)) {
2321
                rootsplit = p->header.flag & BT_ROOT;
2322
 
2323
/*
2324
printf("xtUpdate.updateLeft.split p:0x%p\n", p);
2325
*/
2326
                /* xtSpliUp() unpins leaf pages */
2327
                split.mp = mp;
2328
                split.index = newindex;
2329
                split.flag = xflag;
2330
                split.off = xoff;
2331
                split.len = xlen;
2332
                split.addr = xaddr;
2333
                split.pxdlist = NULL;
2334
                if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
2335
                        return rc;
2336
 
2337
                /*
2338
                 * if leaf root has been split, original root has been
2339
                 * copied to new child page, i.e., original entry now
2340
                 * resides on the new child page;
2341
                 */
2342
                if (rootsplit) {
2343
                        ASSERT(p->header.nextindex ==
2344
                               cpu_to_le16(XTENTRYSTART + 1));
2345
                        xad = &p->xad[XTENTRYSTART];
2346
                        bn = addressXAD(xad);
2347
 
2348
                        /* get new child page */
2349
                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2350
                        if (rc)
2351
                                return rc;
2352
 
2353
                        BT_MARK_DIRTY(mp, ip);
2354
                        if (!test_cflag(COMMIT_Nolink, ip)) {
2355
                                tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
2356
                                xtlck = (struct xtlock *) & tlck->lock;
2357
                        }
2358
                } else {
2359
                        /* get back old page */
2360
                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2361
                        if (rc)
2362
                                return rc;
2363
                }
2364
        } else {
2365
                /* if insert into middle, shift right remaining entries */
2366
                if (newindex < nextindex)
2367
                        memmove(&p->xad[newindex + 1], &p->xad[newindex],
2368
                                (nextindex - newindex) << L2XTSLOTSIZE);
2369
 
2370
                /* insert the entry */
2371
                xad = &p->xad[newindex];
2372
                XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2373
 
2374
                /* advance next available entry index. */
2375
                p->header.nextindex =
2376
                    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
2377
        }
2378
 
2379
      out:
2380
        if (!test_cflag(COMMIT_Nolink, ip)) {
2381
                xtlck->lwm.offset = (xtlck->lwm.offset) ?
2382
                    min(index0, (int)xtlck->lwm.offset) : index0;
2383
                xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2384
                    xtlck->lwm.offset;
2385
        }
2386
 
2387
        /* unpin the leaf page */
2388
        XT_PUTPAGE(mp);
2389
 
2390
        return rc;
2391
}
2392
 
2393
 
2394
/*
2395
 *      xtAppend()
2396
 *
2397
 * function: grow in append mode from contiguous region specified ;
2398
 *
2399
 * parameter:
2400
 *      tid             - transaction id;
2401
 *      ip              - file object;
2402
 *      xflag           - extent flag:
2403
 *      xoff            - extent offset;
2404
 *      maxblocks       - max extent length;
2405
 *      xlen            - extent length (in/out);
2406
 *      xaddrp          - extent address pointer (in/out):
2407
 *      flag            -
2408
 *
2409
 * return:
2410
 */
2411
int xtAppend(tid_t tid,         /* transaction id */
2412
             struct inode *ip, int xflag, s64 xoff, s32 maxblocks,
2413
             s32 * xlenp,       /* (in/out) */
2414
             s64 * xaddrp,      /* (in/out) */
2415
             int flag)
2416
{
2417
        int rc = 0;
2418
        struct metapage *mp;    /* meta-page buffer */
2419
        xtpage_t *p;            /* base B+-tree index page */
2420
        s64 bn, xaddr;
2421
        int index, nextindex;
2422
        struct btstack btstack; /* traverse stack */
2423
        struct xtsplit split;   /* split information */
2424
        xad_t *xad;
2425
        int cmp;
2426
        struct tlock *tlck;
2427
        struct xtlock *xtlck;
2428
        int nsplit, nblocks, xlen;
2429
        struct pxdlist pxdlist;
2430
        pxd_t *pxd;
2431
 
2432
        xaddr = *xaddrp;
2433
        xlen = *xlenp;
2434
        jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx",
2435
                 (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
2436
 
2437
        /*
2438
         *      search for the entry location at which to insert:
2439
         *
2440
         * xtFastSearch() and xtSearch() both returns (leaf page
2441
         * pinned, index at which to insert).
2442
         * n.b. xtSearch() may return index of maxentry of
2443
         * the full page.
2444
         */
2445
        if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT)))
2446
                return rc;
2447
 
2448
        /* retrieve search result */
2449
        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2450
 
2451
        if (cmp == 0) {
2452
                rc = -EEXIST;
2453
                goto out;
2454
        }
2455
//insert:
2456
        /*
2457
         *      insert entry for new extent
2458
         */
2459
        xflag |= XAD_NEW;
2460
 
2461
        /*
2462
         *      if the leaf page is full, split the page and
2463
         *      propagate up the router entry for the new page from split
2464
         *
2465
         * The xtSplitUp() will insert the entry and unpin the leaf page.
2466
         */
2467
        nextindex = le16_to_cpu(p->header.nextindex);
2468
        if (nextindex < le16_to_cpu(p->header.maxentry))
2469
                goto insertLeaf;
2470
 
2471
        /*
2472
         * allocate new index blocks to cover index page split(s)
2473
         */
2474
        nsplit = btstack.nsplit;
2475
        split.pxdlist = &pxdlist;
2476
        pxdlist.maxnpxd = pxdlist.npxd = 0;
2477
        pxd = &pxdlist.pxd[0];
2478
        nblocks = JFS_SBI(ip->i_sb)->nbperpage;
2479
        for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {
2480
                if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
2481
                        PXDaddress(pxd, xaddr);
2482
                        PXDlength(pxd, nblocks);
2483
 
2484
                        pxdlist.maxnpxd++;
2485
 
2486
                        continue;
2487
                }
2488
 
2489
                /* undo allocation */
2490
 
2491
                goto out;
2492
        }
2493
 
2494
        xlen = min(xlen, maxblocks);
2495
 
2496
        /*
2497
         * allocate data extent requested
2498
         */
2499
        if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2500
                goto out;
2501
 
2502
        split.mp = mp;
2503
        split.index = index;
2504
        split.flag = xflag;
2505
        split.off = xoff;
2506
        split.len = xlen;
2507
        split.addr = xaddr;
2508
        if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
2509
                /* undo data extent allocation */
2510
                dbFree(ip, *xaddrp, (s64) * xlenp);
2511
 
2512
                return rc;
2513
        }
2514
 
2515
        *xaddrp = xaddr;
2516
        *xlenp = xlen;
2517
        return 0;
2518
 
2519
        /*
2520
         *      insert the new entry into the leaf page
2521
         */
2522
      insertLeaf:
2523
        /*
2524
         * allocate data extent requested
2525
         */
2526
        if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2527
                goto out;
2528
 
2529
        BT_MARK_DIRTY(mp, ip);
2530
        /*
2531
         * acquire a transaction lock on the leaf page;
2532
         *
2533
         * action: xad insertion/extension;
2534
         */
2535
        tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2536
        xtlck = (struct xtlock *) & tlck->lock;
2537
 
2538
        /* insert the new entry: mark the entry NEW */
2539
        xad = &p->xad[index];
2540
        XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2541
 
2542
        /* advance next available entry index */
2543
        p->header.nextindex =
2544
            cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
2545
 
2546
        xtlck->lwm.offset =
2547
            (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
2548
        xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2549
            xtlck->lwm.offset;
2550
 
2551
        *xaddrp = xaddr;
2552
        *xlenp = xlen;
2553
 
2554
      out:
2555
        /* unpin the leaf page */
2556
        XT_PUTPAGE(mp);
2557
 
2558
        return rc;
2559
}
2560
#ifdef _STILL_TO_PORT
2561
 
2562
/* - TBD for defragmentaion/reorganization -
2563
 *
2564
 *      xtDelete()
2565
 *
2566
 * function:
2567
 *      delete the entry with the specified key.
2568
 *
2569
 *      N.B.: whole extent of the entry is assumed to be deleted.
2570
 *
2571
 * parameter:
2572
 *
2573
 * return:
2574
 *       ENOENT: if the entry is not found.
2575
 *
2576
 * exception:
2577
 */
2578
int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
2579
{
2580
        int rc = 0;
2581
        struct btstack btstack;
2582
        int cmp;
2583
        s64 bn;
2584
        struct metapage *mp;
2585
        xtpage_t *p;
2586
        int index, nextindex;
2587
        struct tlock *tlck;
2588
        struct xtlock *xtlck;
2589
 
2590
        /*
2591
         * find the matching entry; xtSearch() pins the page
2592
         */
2593
        if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0)))
2594
                return rc;
2595
 
2596
        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2597
        if (cmp) {
2598
                /* unpin the leaf page */
2599
                XT_PUTPAGE(mp);
2600
                return -ENOENT;
2601
        }
2602
 
2603
        /*
2604
         * delete the entry from the leaf page
2605
         */
2606
        nextindex = le16_to_cpu(p->header.nextindex);
2607
        p->header.nextindex =
2608
            cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1);
2609
 
2610
        /*
2611
         * if the leaf page bocome empty, free the page
2612
         */
2613
        if (p->header.nextindex == cpu_to_le16(XTENTRYSTART))
2614
                return (xtDeleteUp(tid, ip, mp, p, &btstack));
2615
 
2616
        BT_MARK_DIRTY(mp, ip);
2617
        /*
2618
         * acquire a transaction lock on the leaf page;
2619
         *
2620
         * action:xad deletion;
2621
         */
2622
        tlck = txLock(tid, ip, mp, tlckXTREE);
2623
        xtlck = (struct xtlock *) & tlck->lock;
2624
        xtlck->lwm.offset =
2625
            (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index;
2626
 
2627
        /* if delete from middle, shift left/compact the remaining entries */
2628
        if (index < nextindex - 1)
2629
                memmove(&p->xad[index], &p->xad[index + 1],
2630
                        (nextindex - index - 1) * sizeof(xad_t));
2631
 
2632
        XT_PUTPAGE(mp);
2633
 
2634
        return 0;
2635
}
2636
 
2637
 
2638
/* - TBD for defragmentaion/reorganization -
2639
 *
2640
 *      xtDeleteUp()
2641
 *
2642
 * function:
2643
 *      free empty pages as propagating deletion up the tree
2644
 *
2645
 * parameter:
2646
 *
2647
 * return:
2648
 */
2649
static int
2650
xtDeleteUp(tid_t tid, struct inode *ip,
2651
           struct metapage * fmp, xtpage_t * fp, struct btstack * btstack)
2652
{
2653
        int rc = 0;
2654
        struct metapage *mp;
2655
        xtpage_t *p;
2656
        int index, nextindex;
2657
        s64 xaddr;
2658
        int xlen;
2659
        struct btframe *parent;
2660
        struct tlock *tlck;
2661
        struct xtlock *xtlck;
2662
 
2663
        /*
2664
         * keep root leaf page which has become empty
2665
         */
2666
        if (fp->header.flag & BT_ROOT) {
2667
                /* keep the root page */
2668
                fp->header.flag &= ~BT_INTERNAL;
2669
                fp->header.flag |= BT_LEAF;
2670
                fp->header.nextindex = cpu_to_le16(XTENTRYSTART);
2671
 
2672
                /* XT_PUTPAGE(fmp); */
2673
 
2674
                return 0;
2675
        }
2676
 
2677
        /*
2678
         * free non-root leaf page
2679
         */
2680
        if ((rc = xtRelink(tid, ip, fp))) {
2681
                XT_PUTPAGE(fmp);
2682
                return rc;
2683
        }
2684
 
2685
        xaddr = addressPXD(&fp->header.self);
2686
        xlen = lengthPXD(&fp->header.self);
2687
        /* free the page extent */
2688
        dbFree(ip, xaddr, (s64) xlen);
2689
 
2690
        /* free the buffer page */
2691
        discard_metapage(fmp);
2692
 
2693
        /*
2694
         * propagate page deletion up the index tree
2695
         *
2696
         * If the delete from the parent page makes it empty,
2697
         * continue all the way up the tree.
2698
         * stop if the root page is reached (which is never deleted) or
2699
         * if the entry deletion does not empty the page.
2700
         */
2701
        while ((parent = BT_POP(btstack)) != NULL) {
2702
                /* get/pin the parent page <sp> */
2703
                XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
2704
                if (rc)
2705
                        return rc;
2706
 
2707
                index = parent->index;
2708
 
2709
                /* delete the entry for the freed child page from parent.
2710
                 */
2711
                nextindex = le16_to_cpu(p->header.nextindex);
2712
 
2713
                /*
2714
                 * the parent has the single entry being deleted:
2715
                 * free the parent page which has become empty.
2716
                 */
2717
                if (nextindex == 1) {
2718
                        if (p->header.flag & BT_ROOT) {
2719
                                /* keep the root page */
2720
                                p->header.flag &= ~BT_INTERNAL;
2721
                                p->header.flag |= BT_LEAF;
2722
                                p->header.nextindex =
2723
                                    cpu_to_le16(XTENTRYSTART);
2724
 
2725
                                /* XT_PUTPAGE(mp); */
2726
 
2727
                                break;
2728
                        } else {
2729
                                /* free the parent page */
2730
                                if ((rc = xtRelink(tid, ip, p)))
2731
                                        return rc;
2732
 
2733
                                xaddr = addressPXD(&p->header.self);
2734
                                /* free the page extent */
2735
                                dbFree(ip, xaddr,
2736
                                       (s64) JFS_SBI(ip->i_sb)->nbperpage);
2737
 
2738
                                /* unpin/free the buffer page */
2739
                                discard_metapage(mp);
2740
 
2741
                                /* propagate up */
2742
                                continue;
2743
                        }
2744
                }
2745
                /*
2746
                 * the parent has other entries remaining:
2747
                 * delete the router entry from the parent page.
2748
                 */
2749
                else {
2750
                        BT_MARK_DIRTY(mp, ip);
2751
                        /*
2752
                         * acquire a transaction lock on the leaf page;
2753
                         *
2754
                         * action:xad deletion;
2755
                         */
2756
                        tlck = txLock(tid, ip, mp, tlckXTREE);
2757
                        xtlck = (struct xtlock *) & tlck->lock;
2758
                        xtlck->lwm.offset =
2759
                            (xtlck->lwm.offset) ? min(index,
2760
                                                      xtlck->lwm.
2761
                                                      offset) : index;
2762
 
2763
                        /* if delete from middle,
2764
                         * shift left/compact the remaining entries in the page
2765
                         */
2766
                        if (index < nextindex - 1)
2767
                                memmove(&p->xad[index], &p->xad[index + 1],
2768
                                        (nextindex - index -
2769
                                         1) << L2XTSLOTSIZE);
2770
 
2771
                        p->header.nextindex =
2772
                            cpu_to_le16(le16_to_cpu(p->header.nextindex) -
2773
                                        1);
2774
                        jfs_info("xtDeleteUp(entry): 0x%lx[%d]",
2775
                                 (ulong) parent->bn, index);
2776
                }
2777
 
2778
                /* unpin the parent page */
2779
                XT_PUTPAGE(mp);
2780
 
2781
                /* exit propagation up */
2782
                break;
2783
        }
2784
 
2785
        return 0;
2786
}
2787
 
2788
 
2789
/*
2790
 * NAME:        xtRelocate()
2791
 *
2792
 * FUNCTION:    relocate xtpage or data extent of regular file;
2793
 *              This function is mainly used by defragfs utility.
2794
 *
2795
 * NOTE:        This routine does not have the logic to handle
2796
 *              uncommitted allocated extent. The caller should call
2797
 *              txCommit() to commit all the allocation before call
2798
 *              this routine.
2799
 */
2800
int
2801
xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,  /* old XAD */
2802
           s64 nxaddr,          /* new xaddr */
2803
           int xtype)
2804
{                               /* extent type: XTPAGE or DATAEXT */
2805
        int rc = 0;
2806
        struct tblock *tblk;
2807
        struct tlock *tlck;
2808
        struct xtlock *xtlck;
2809
        struct metapage *mp, *pmp, *lmp, *rmp;  /* meta-page buffer */
2810
        xtpage_t *p, *pp, *rp, *lp;     /* base B+-tree index page */
2811
        xad_t *xad;
2812
        pxd_t *pxd;
2813
        s64 xoff, xsize;
2814
        int xlen;
2815
        s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn;
2816
        cbuf_t *cp;
2817
        s64 offset, nbytes, nbrd, pno;
2818
        int nb, npages, nblks;
2819
        s64 bn;
2820
        int cmp;
2821
        int index;
2822
        struct pxd_lock *pxdlock;
2823
        struct btstack btstack; /* traverse stack */
2824
 
2825
        xtype = xtype & EXTENT_TYPE;
2826
 
2827
        xoff = offsetXAD(oxad);
2828
        oxaddr = addressXAD(oxad);
2829
        xlen = lengthXAD(oxad);
2830
 
2831
        /* validate extent offset */
2832
        offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
2833
        if (offset >= ip->i_size)
2834
                return -ESTALE; /* stale extent */
2835
 
2836
        jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx",
2837
                 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr);
2838
 
2839
        /*
2840
         *      1. get and validate the parent xtpage/xad entry
2841
         *      covering the source extent to be relocated;
2842
         */
2843
        if (xtype == DATAEXT) {
2844
                /* search in leaf entry */
2845
                rc = xtSearch(ip, xoff, &cmp, &btstack, 0);
2846
                if (rc)
2847
                        return rc;
2848
 
2849
                /* retrieve search result */
2850
                XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2851
 
2852
                if (cmp) {
2853
                        XT_PUTPAGE(pmp);
2854
                        return -ESTALE;
2855
                }
2856
 
2857
                /* validate for exact match with a single entry */
2858
                xad = &pp->xad[index];
2859
                if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) {
2860
                        XT_PUTPAGE(pmp);
2861
                        return -ESTALE;
2862
                }
2863
        } else {                /* (xtype == XTPAGE) */
2864
 
2865
                /* search in internal entry */
2866
                rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0);
2867
                if (rc)
2868
                        return rc;
2869
 
2870
                /* retrieve search result */
2871
                XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2872
 
2873
                if (cmp) {
2874
                        XT_PUTPAGE(pmp);
2875
                        return -ESTALE;
2876
                }
2877
 
2878
                /* xtSearchNode() validated for exact match with a single entry
2879
                 */
2880
                xad = &pp->xad[index];
2881
        }
2882
        jfs_info("xtRelocate: parent xad entry validated.");
2883
 
2884
        /*
2885
         *      2. relocate the extent
2886
         */
2887
        if (xtype == DATAEXT) {
2888
                /* if the extent is allocated-but-not-recorded
2889
                 * there is no real data to be moved in this extent,
2890
                 */
2891
                if (xad->flag & XAD_NOTRECORDED)
2892
                        goto out;
2893
                else
2894
                        /* release xtpage for cmRead()/xtLookup() */
2895
                        XT_PUTPAGE(pmp);
2896
 
2897
                /*
2898
                 *      cmRelocate()
2899
                 *
2900
                 * copy target data pages to be relocated;
2901
                 *
2902
                 * data extent must start at page boundary and
2903
                 * multiple of page size (except the last data extent);
2904
                 * read in each page of the source data extent into cbuf,
2905
                 * update the cbuf extent descriptor of the page to be
2906
                 * homeward bound to new dst data extent
2907
                 * copy the data from the old extent to new extent.
2908
                 * copy is essential for compressed files to avoid problems
2909
                 * that can arise if there was a change in compression
2910
                 * algorithms.
2911
                 * it is a good strategy because it may disrupt cache
2912
                 * policy to keep the pages in memory afterwards.
2913
                 */
2914
                offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
2915
                assert((offset & CM_OFFSET) == 0);
2916
                nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize;
2917
                pno = offset >> CM_L2BSIZE;
2918
                npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE;
2919
/*
2920
                npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
2921
                         (offset >> CM_L2BSIZE) + 1;
2922
*/
2923
                sxaddr = oxaddr;
2924
                dxaddr = nxaddr;
2925
 
2926
                /* process the request one cache buffer at a time */
2927
                for (nbrd = 0; nbrd < nbytes; nbrd += nb,
2928
                     offset += nb, pno++, npages--) {
2929
                        /* compute page size */
2930
                        nb = min(nbytes - nbrd, CM_BSIZE);
2931
 
2932
                        /* get the cache buffer of the page */
2933
                        if (rc = cmRead(ip, offset, npages, &cp))
2934
                                break;
2935
 
2936
                        assert(addressPXD(&cp->cm_pxd) == sxaddr);
2937
                        assert(!cp->cm_modified);
2938
 
2939
                        /* bind buffer with the new extent address */
2940
                        nblks = nb >> JFS_IP(ip->i_sb)->l2bsize;
2941
                        cmSetXD(ip, cp, pno, dxaddr, nblks);
2942
 
2943
                        /* release the cbuf, mark it as modified */
2944
                        cmPut(cp, TRUE);
2945
 
2946
                        dxaddr += nblks;
2947
                        sxaddr += nblks;
2948
                }
2949
 
2950
                /* get back parent page */
2951
                if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0)))
2952
                        return rc;
2953
 
2954
                XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2955
                jfs_info("xtRelocate: target data extent relocated.");
2956
        } else {                /* (xtype  == XTPAGE) */
2957
 
2958
                /*
2959
                 * read in the target xtpage from the source extent;
2960
                 */
2961
                XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
2962
                if (rc) {
2963
                        XT_PUTPAGE(pmp);
2964
                        return rc;
2965
                }
2966
 
2967
                /*
2968
                 * read in sibling pages if any to update sibling pointers;
2969
                 */
2970
                rmp = NULL;
2971
                if (p->header.next) {
2972
                        nextbn = le64_to_cpu(p->header.next);
2973
                        XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
2974
                        if (rc) {
2975
                                XT_PUTPAGE(pmp);
2976
                                XT_PUTPAGE(mp);
2977
                                return (rc);
2978
                        }
2979
                }
2980
 
2981
                lmp = NULL;
2982
                if (p->header.prev) {
2983
                        prevbn = le64_to_cpu(p->header.prev);
2984
                        XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
2985
                        if (rc) {
2986
                                XT_PUTPAGE(pmp);
2987
                                XT_PUTPAGE(mp);
2988
                                if (rmp)
2989
                                        XT_PUTPAGE(rmp);
2990
                                return (rc);
2991
                        }
2992
                }
2993
 
2994
                /* at this point, all xtpages to be updated are in memory */
2995
 
2996
                /*
2997
                 * update sibling pointers of sibling xtpages if any;
2998
                 */
2999
                if (lmp) {
3000
                        BT_MARK_DIRTY(lmp, ip);
3001
                        tlck =
3002
                            txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
3003
                        lp->header.next = cpu_to_le64(nxaddr);
3004
                        XT_PUTPAGE(lmp);
3005
                }
3006
 
3007
                if (rmp) {
3008
                        BT_MARK_DIRTY(rmp, ip);
3009
                        tlck =
3010
                            txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
3011
                        rp->header.prev = cpu_to_le64(nxaddr);
3012
                        XT_PUTPAGE(rmp);
3013
                }
3014
 
3015
                /*
3016
                 * update the target xtpage to be relocated
3017
                 *
3018
                 * update the self address of the target page
3019
                 * and write to destination extent;
3020
                 * redo image covers the whole xtpage since it is new page
3021
                 * to the destination extent;
3022
                 * update of bmap for the free of source extent
3023
                 * of the target xtpage itself:
3024
                 * update of bmap for the allocation of destination extent
3025
                 * of the target xtpage itself:
3026
                 * update of bmap for the extents covered by xad entries in
3027
                 * the target xtpage is not necessary since they are not
3028
                 * updated;
3029
                 * if not committed before this relocation,
3030
                 * target page may contain XAD_NEW entries which must
3031
                 * be scanned for bmap update (logredo() always
3032
                 * scan xtpage REDOPAGE image for bmap update);
3033
                 * if committed before this relocation (tlckRELOCATE),
3034
                 * scan may be skipped by commit() and logredo();
3035
                 */
3036
                BT_MARK_DIRTY(mp, ip);
3037
                /* tlckNEW init  xtlck->lwm.offset = XTENTRYSTART; */
3038
                tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW);
3039
                xtlck = (struct xtlock *) & tlck->lock;
3040
 
3041
                /* update the self address in the xtpage header */
3042
                pxd = &p->header.self;
3043
                PXDaddress(pxd, nxaddr);
3044
 
3045
                /* linelock for the after image of the whole page */
3046
                xtlck->lwm.length =
3047
                    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
3048
 
3049
                /* update the buffer extent descriptor of target xtpage */
3050
                xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
3051
                bmSetXD(mp, nxaddr, xsize);
3052
 
3053
                /* unpin the target page to new homeward bound */
3054
                XT_PUTPAGE(mp);
3055
                jfs_info("xtRelocate: target xtpage relocated.");
3056
        }
3057
 
3058
        /*
3059
         *      3. acquire maplock for the source extent to be freed;
3060
         *
3061
         * acquire a maplock saving the src relocated extent address;
3062
         * to free of the extent at commit time;
3063
         */
3064
      out:
3065
        /* if DATAEXT relocation, write a LOG_UPDATEMAP record for
3066
         * free PXD of the source data extent (logredo() will update
3067
         * bmap for free of source data extent), and update bmap for
3068
         * free of the source data extent;
3069
         */
3070
        if (xtype == DATAEXT)
3071
                tlck = txMaplock(tid, ip, tlckMAP);
3072
        /* if XTPAGE relocation, write a LOG_NOREDOPAGE record
3073
         * for the source xtpage (logredo() will init NoRedoPage
3074
         * filter and will also update bmap for free of the source
3075
         * xtpage), and update bmap for free of the source xtpage;
3076
         * N.B. We use tlckMAP instead of tlkcXTREE because there
3077
         *      is no buffer associated with this lock since the buffer
3078
         *      has been redirected to the target location.
3079
         */
3080
        else                    /* (xtype  == XTPAGE) */
3081
                tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE);
3082
 
3083
        pxdlock = (struct pxd_lock *) & tlck->lock;
3084
        pxdlock->flag = mlckFREEPXD;
3085
        PXDaddress(&pxdlock->pxd, oxaddr);
3086
        PXDlength(&pxdlock->pxd, xlen);
3087
        pxdlock->index = 1;
3088
 
3089
        /*
3090
         *      4. update the parent xad entry for relocation;
3091
         *
3092
         * acquire tlck for the parent entry with XAD_NEW as entry
3093
         * update which will write LOG_REDOPAGE and update bmap for
3094
         * allocation of XAD_NEW destination extent;
3095
         */
3096
        jfs_info("xtRelocate: update parent xad entry.");
3097
        BT_MARK_DIRTY(pmp, ip);
3098
        tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW);
3099
        xtlck = (struct xtlock *) & tlck->lock;
3100
 
3101
        /* update the XAD with the new destination extent; */
3102
        xad = &pp->xad[index];
3103
        xad->flag |= XAD_NEW;
3104
        XADaddress(xad, nxaddr);
3105
 
3106
        xtlck->lwm.offset = min(index, xtlck->lwm.offset);
3107
        xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) -
3108
            xtlck->lwm.offset;
3109
 
3110
        /* unpin the parent xtpage */
3111
        XT_PUTPAGE(pmp);
3112
 
3113
        return rc;
3114
}
3115
 
3116
 
3117
/*
3118
 *      xtSearchNode()
3119
 *
3120
 * function:    search for the internal xad entry covering specified extent.
3121
 *              This function is mainly used by defragfs utility.
3122
 *
3123
 * parameters:
3124
 *      ip      - file object;
3125
 *      xad     - extent to find;
3126
 *      cmpp    - comparison result:
3127
 *      btstack - traverse stack;
3128
 *      flag    - search process flag;
3129
 *
3130
 * returns:
3131
 *      btstack contains (bn, index) of search path traversed to the entry.
3132
 *      *cmpp is set to result of comparison with the entry returned.
3133
 *      the page containing the entry is pinned at exit.
3134
 */
3135
static int xtSearchNode(struct inode *ip, xad_t * xad,  /* required XAD entry */
3136
                        int *cmpp, struct btstack * btstack, int flag)
3137
{
3138
        int rc = 0;
3139
        s64 xoff, xaddr;
3140
        int xlen;
3141
        int cmp = 1;            /* init for empty page */
3142
        s64 bn;                 /* block number */
3143
        struct metapage *mp;    /* meta-page buffer */
3144
        xtpage_t *p;            /* page */
3145
        int base, index, lim;
3146
        struct btframe *btsp;
3147
        s64 t64;
3148
 
3149
        BT_CLR(btstack);
3150
 
3151
        xoff = offsetXAD(xad);
3152
        xlen = lengthXAD(xad);
3153
        xaddr = addressXAD(xad);
3154
 
3155
        /*
3156
         *      search down tree from root:
3157
         *
3158
         * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
3159
         * internal page, child page Pi contains entry with k, Ki <= K < Kj.
3160
         *
3161
         * if entry with search key K is not found
3162
         * internal page search find the entry with largest key Ki
3163
         * less than K which point to the child page to search;
3164
         * leaf page search find the entry with smallest key Kj
3165
         * greater than K so that the returned index is the position of
3166
         * the entry to be shifted right for insertion of new entry.
3167
         * for empty tree, search key is greater than any key of the tree.
3168
         *
3169
         * by convention, root bn = 0.
3170
         */
3171
        for (bn = 0;;) {
3172
                /* get/pin the page to search */
3173
                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3174
                if (rc)
3175
                        return rc;
3176
                if (p->header.flag & BT_LEAF) {
3177
                        XT_PUTPAGE(mp);
3178
                        return -ESTALE;
3179
                }
3180
 
3181
                lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
3182
 
3183
                /*
3184
                 * binary search with search key K on the current page
3185
                 */
3186
                for (base = XTENTRYSTART; lim; lim >>= 1) {
3187
                        index = base + (lim >> 1);
3188
 
3189
                        XT_CMP(cmp, xoff, &p->xad[index], t64);
3190
                        if (cmp == 0) {
3191
                                /*
3192
                                 *      search hit
3193
                                 *
3194
                                 * verify for exact match;
3195
                                 */
3196
                                if (xaddr == addressXAD(&p->xad[index]) &&
3197
                                    xoff == offsetXAD(&p->xad[index])) {
3198
                                        *cmpp = cmp;
3199
 
3200
                                        /* save search result */
3201
                                        btsp = btstack->top;
3202
                                        btsp->bn = bn;
3203
                                        btsp->index = index;
3204
                                        btsp->mp = mp;
3205
 
3206
                                        return 0;
3207
                                }
3208
 
3209
                                /* descend/search its child page */
3210
                                goto next;
3211
                        }
3212
 
3213
                        if (cmp > 0) {
3214
                                base = index + 1;
3215
                                --lim;
3216
                        }
3217
                }
3218
 
3219
                /*
3220
                 *      search miss - non-leaf page:
3221
                 *
3222
                 * base is the smallest index with key (Kj) greater than
3223
                 * search key (K) and may be zero or maxentry index.
3224
                 * if base is non-zero, decrement base by one to get the parent
3225
                 * entry of the child page to search.
3226
                 */
3227
                index = base ? base - 1 : base;
3228
 
3229
                /*
3230
                 * go down to child page
3231
                 */
3232
              next:
3233
                /* get the child page block number */
3234
                bn = addressXAD(&p->xad[index]);
3235
 
3236
                /* unpin the parent page */
3237
                XT_PUTPAGE(mp);
3238
        }
3239
}
3240
 
3241
 
3242
/*
3243
 *      xtRelink()
3244
 *
3245
 * function:
3246
 *      link around a freed page.
3247
 *
3248
 * Parameter:
3249
 *      int           tid,
3250
 *      struct inode    *ip,
3251
 *      xtpage_t        *p)
3252
 *
3253
 * returns:
3254
 */
3255
static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p)
3256
{
3257
        int rc = 0;
3258
        struct metapage *mp;
3259
        s64 nextbn, prevbn;
3260
        struct tlock *tlck;
3261
 
3262
        nextbn = le64_to_cpu(p->header.next);
3263
        prevbn = le64_to_cpu(p->header.prev);
3264
 
3265
        /* update prev pointer of the next page */
3266
        if (nextbn != 0) {
3267
                XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
3268
                if (rc)
3269
                        return rc;
3270
 
3271
                /*
3272
                 * acquire a transaction lock on the page;
3273
                 *
3274
                 * action: update prev pointer;
3275
                 */
3276
                BT_MARK_DIRTY(mp, ip);
3277
                tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
3278
 
3279
                /* the page may already have been tlock'd */
3280
 
3281
                p->header.prev = cpu_to_le64(prevbn);
3282
 
3283
                XT_PUTPAGE(mp);
3284
        }
3285
 
3286
        /* update next pointer of the previous page */
3287
        if (prevbn != 0) {
3288
                XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
3289
                if (rc)
3290
                        return rc;
3291
 
3292
                /*
3293
                 * acquire a transaction lock on the page;
3294
                 *
3295
                 * action: update next pointer;
3296
                 */
3297
                BT_MARK_DIRTY(mp, ip);
3298
                tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
3299
 
3300
                /* the page may already have been tlock'd */
3301
 
3302
                p->header.next = le64_to_cpu(nextbn);
3303
 
3304
                XT_PUTPAGE(mp);
3305
        }
3306
 
3307
        return 0;
3308
}
3309
#endif                          /*  _STILL_TO_PORT */
3310
 
3311
 
3312
/*
3313
 *      xtInitRoot()
3314
 *
3315
 * initialize file root (inline in inode)
3316
 */
3317
void xtInitRoot(tid_t tid, struct inode *ip)
3318
{
3319
        xtpage_t *p;
3320
 
3321
        /*
3322
         * acquire a transaction lock on the root
3323
         *
3324
         * action:
3325
         */
3326
        txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
3327
                      tlckXTREE | tlckNEW);
3328
        p = &JFS_IP(ip)->i_xtroot;
3329
 
3330
        p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
3331
        p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3332
 
3333
        if (S_ISDIR(ip->i_mode))
3334
                p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR);
3335
        else {
3336
                p->header.maxentry = cpu_to_le16(XTROOTINITSLOT);
3337
                ip->i_size = 0;
3338
        }
3339
 
3340
 
3341
        return;
3342
}
3343
 
3344
 
3345
/*
3346
 * We can run into a deadlock truncating a file with a large number of
3347
 * xtree pages (large fragmented file).  A robust fix would entail a
3348
 * reservation system where we would reserve a number of metadata pages
3349
 * and tlocks which we would be guaranteed without a deadlock.  Without
3350
 * this, a partial fix is to limit number of metadata pages we will lock
3351
 * in a single transaction.  Currently we will truncate the file so that
3352
 * no more than 50 leaf pages will be locked.  The caller of xtTruncate
3353
 * will be responsible for ensuring that the current transaction gets
3354
 * committed, and that subsequent transactions are created to truncate
3355
 * the file further if needed.
3356
 */
3357
#define MAX_TRUNCATE_LEAVES 50
3358
 
3359
/*
3360
 *      xtTruncate()
3361
 *
3362
 * function:
3363
 *      traverse for truncation logging backward bottom up;
3364
 *      terminate at the last extent entry at the current subtree
3365
 *      root page covering new down size.
3366
 *      truncation may occur within the last extent entry.
3367
 *
3368
 * parameter:
3369
 *      int           tid,
3370
 *      struct inode    *ip,
3371
 *      s64           newsize,
3372
 *      int           type)   {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
3373
 *
3374
 * return:
3375
 *
3376
 * note:
3377
 *      PWMAP:
3378
 *       1. truncate (non-COMMIT_NOLINK file)
3379
 *          by jfs_truncate() or jfs_open(O_TRUNC):
3380
 *          xtree is updated;
3381
 *       2. truncate index table of directory when last entry removed
3382
 *       map update via tlock at commit time;
3383
 *      PMAP:
3384
 *       Call xtTruncate_pmap instead
3385
 *      WMAP:
3386
 *       1. remove (free zero link count) on last reference release
3387
 *          (pmap has been freed at commit zero link count);
3388
 *       2. truncate (COMMIT_NOLINK file, i.e., tmp file):
3389
 *          xtree is updated;
3390
 *       map update directly at truncation time;
3391
 *
3392
 *      if (DELETE)
3393
 *              no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
3394
 *      else if (TRUNCATE)
3395
 *              must write LOG_NOREDOPAGE for deleted index page;
3396
 *
3397
 * pages may already have been tlocked by anonymous transactions
3398
 * during file growth (i.e., write) before truncation;
3399
 *
3400
 * except last truncated entry, deleted entries remains as is
3401
 * in the page (nextindex is updated) for other use
3402
 * (e.g., log/update allocation map): this avoid copying the page
3403
 * info but delay free of pages;
3404
 *
3405
 */
3406
s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3407
{
3408
        int rc = 0;
3409
        s64 teof;
3410
        struct metapage *mp;
3411
        xtpage_t *p;
3412
        s64 bn;
3413
        int index, nextindex;
3414
        xad_t *xad;
3415
        s64 xoff, xaddr;
3416
        int xlen, len, freexlen;
3417
        struct btstack btstack;
3418
        struct btframe *parent;
3419
        struct tblock *tblk = 0;
3420
        struct tlock *tlck = 0;
3421
        struct xtlock *xtlck = 0;
3422
        struct xdlistlock xadlock;      /* maplock for COMMIT_WMAP */
3423
        struct pxd_lock *pxdlock;               /* maplock for COMMIT_WMAP */
3424
        s64 nfreed;
3425
        int freed, log;
3426
        int locked_leaves = 0;
3427
 
3428
        /* save object truncation type */
3429
        if (tid) {
3430
                tblk = tid_to_tblock(tid);
3431
                tblk->xflag |= flag;
3432
        }
3433
 
3434
        nfreed = 0;
3435
 
3436
        flag &= COMMIT_MAP;
3437
        assert(flag != COMMIT_PMAP);
3438
 
3439
        if (flag == COMMIT_PWMAP)
3440
                log = 1;
3441
        else {
3442
                log = 0;
3443
                xadlock.flag = mlckFREEXADLIST;
3444
                xadlock.index = 1;
3445
        }
3446
 
3447
        /*
3448
         * if the newsize is not an integral number of pages,
3449
         * the file between newsize and next page boundary will
3450
         * be cleared.
3451
         * if truncating into a file hole, it will cause
3452
         * a full block to be allocated for the logical block.
3453
         */
3454
 
3455
        /*
3456
         * release page blocks of truncated region <teof, eof>
3457
         *
3458
         * free the data blocks from the leaf index blocks.
3459
         * delete the parent index entries corresponding to
3460
         * the freed child data/index blocks.
3461
         * free the index blocks themselves which aren't needed
3462
         * in new sized file.
3463
         *
3464
         * index blocks are updated only if the blocks are to be
3465
         * retained in the new sized file.
3466
         * if type is PMAP, the data and index pages are NOT
3467
         * freed, and the data and index blocks are NOT freed
3468
         * from  working map.
3469
         * (this will allow continued access of data/index of
3470
         * temporary file (zerolink count file truncated to zero-length)).
3471
         */
3472
        teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
3473
            JFS_SBI(ip->i_sb)->l2bsize;
3474
 
3475
        /* clear stack */
3476
        BT_CLR(&btstack);
3477
 
3478
        /*
3479
         * start with root
3480
         *
3481
         * root resides in the inode
3482
         */
3483
        bn = 0;
3484
 
3485
        /*
3486
         * first access of each page:
3487
         */
3488
      getPage:
3489
        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3490
        if (rc)
3491
                return rc;
3492
 
3493
        /* process entries backward from last index */
3494
        index = le16_to_cpu(p->header.nextindex) - 1;
3495
 
3496
        if (p->header.flag & BT_INTERNAL)
3497
                goto getChild;
3498
 
3499
        /*
3500
         *      leaf page
3501
         */
3502
 
3503
        /* Since this is the rightmost leaf, and we may have already freed
3504
         * a page that was formerly to the right, let's make sure that the
3505
         * next pointer is zero.
3506
         */
3507
        p->header.next = 0;
3508
 
3509
        freed = 0;
3510
 
3511
        /* does region covered by leaf page precede Teof ? */
3512
        xad = &p->xad[index];
3513
        xoff = offsetXAD(xad);
3514
        xlen = lengthXAD(xad);
3515
        if (teof >= xoff + xlen) {
3516
                XT_PUTPAGE(mp);
3517
                goto getParent;
3518
        }
3519
 
3520
        /* (re)acquire tlock of the leaf page */
3521
        if (log) {
3522
                if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
3523
                        /*
3524
                         * We need to limit the size of the transaction
3525
                         * to avoid exhausting pagecache & tlocks
3526
                         */
3527
                        XT_PUTPAGE(mp);
3528
                        newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
3529
                        goto getParent;
3530
                }
3531
                tlck = txLock(tid, ip, mp, tlckXTREE);
3532
                tlck->type = tlckXTREE | tlckTRUNCATE;
3533
                xtlck = (struct xtlock *) & tlck->lock;
3534
                xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
3535
        }
3536
        BT_MARK_DIRTY(mp, ip);
3537
 
3538
        /*
3539
         * scan backward leaf page entries
3540
         */
3541
        for (; index >= XTENTRYSTART; index--) {
3542
                xad = &p->xad[index];
3543
                xoff = offsetXAD(xad);
3544
                xlen = lengthXAD(xad);
3545
                xaddr = addressXAD(xad);
3546
 
3547
                /*
3548
                 * The "data" for a directory is indexed by the block
3549
                 * device's address space.  This metadata must be invalidated
3550
                 * here
3551
                 */
3552
                if (S_ISDIR(ip->i_mode) && (teof == 0))
3553
                        invalidate_xad_metapages(ip, *xad);
3554
                /*
3555
                 * entry beyond eof: continue scan of current page
3556
                 *          xad
3557
                 * ---|---=======------->
3558
                 *   eof
3559
                 */
3560
                if (teof < xoff) {
3561
                        nfreed += xlen;
3562
                        continue;
3563
                }
3564
 
3565
                /*
3566
                 * (xoff <= teof): last entry to be deleted from page;
3567
                 * If other entries remain in page: keep and update the page.
3568
                 */
3569
 
3570
                /*
3571
                 * eof == entry_start: delete the entry
3572
                 *           xad
3573
                 * -------|=======------->
3574
                 *       eof
3575
                 *
3576
                 */
3577
                if (teof == xoff) {
3578
                        nfreed += xlen;
3579
 
3580
                        if (index == XTENTRYSTART)
3581
                                break;
3582
 
3583
                        nextindex = index;
3584
                }
3585
                /*
3586
                 * eof within the entry: truncate the entry.
3587
                 *          xad
3588
                 * -------===|===------->
3589
                 *          eof
3590
                 */
3591
                else if (teof < xoff + xlen) {
3592
                        /* update truncated entry */
3593
                        len = teof - xoff;
3594
                        freexlen = xlen - len;
3595
                        XADlength(xad, len);
3596
 
3597
                        /* save pxd of truncated extent in tlck */
3598
                        xaddr += len;
3599
                        if (log) {      /* COMMIT_PWMAP */
3600
                                xtlck->lwm.offset = (xtlck->lwm.offset) ?
3601
                                    min(index, (int)xtlck->lwm.offset) : index;
3602
                                xtlck->lwm.length = index + 1 -
3603
                                    xtlck->lwm.offset;
3604
                                xtlck->twm.offset = index;
3605
                                pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
3606
                                pxdlock->flag = mlckFREEPXD;
3607
                                PXDaddress(&pxdlock->pxd, xaddr);
3608
                                PXDlength(&pxdlock->pxd, freexlen);
3609
                        }
3610
                        /* free truncated extent */
3611
                        else {  /* COMMIT_WMAP */
3612
 
3613
                                pxdlock = (struct pxd_lock *) & xadlock;
3614
                                pxdlock->flag = mlckFREEPXD;
3615
                                PXDaddress(&pxdlock->pxd, xaddr);
3616
                                PXDlength(&pxdlock->pxd, freexlen);
3617
                                txFreeMap(ip, pxdlock, 0, COMMIT_WMAP);
3618
 
3619
                                /* reset map lock */
3620
                                xadlock.flag = mlckFREEXADLIST;
3621
                        }
3622
 
3623
                        /* current entry is new last entry; */
3624
                        nextindex = index + 1;
3625
 
3626
                        nfreed += freexlen;
3627
                }
3628
                /*
3629
                 * eof beyond the entry:
3630
                 *          xad
3631
                 * -------=======---|--->
3632
                 *                 eof
3633
                 */
3634
                else {          /* (xoff + xlen < teof) */
3635
 
3636
                        nextindex = index + 1;
3637
                }
3638
 
3639
                if (nextindex < le16_to_cpu(p->header.nextindex)) {
3640
                        if (!log) {     /* COMMIT_WAMP */
3641
                                xadlock.xdlist = &p->xad[nextindex];
3642
                                xadlock.count =
3643
                                    le16_to_cpu(p->header.nextindex) -
3644
                                    nextindex;
3645
                                txFreeMap(ip, (struct maplock *) & xadlock, 0,
3646
                                          COMMIT_WMAP);
3647
                        }
3648
                        p->header.nextindex = cpu_to_le16(nextindex);
3649
                }
3650
 
3651
                XT_PUTPAGE(mp);
3652
 
3653
                /* assert(freed == 0); */
3654
                goto getParent;
3655
        }                       /* end scan of leaf page entries */
3656
 
3657
        freed = 1;
3658
 
3659
        /*
3660
         * leaf page become empty: free the page if type != PMAP
3661
         */
3662
        if (log) {              /* COMMIT_PWMAP */
3663
                /* txCommit() with tlckFREE:
3664
                 * free data extents covered by leaf [XTENTRYSTART:hwm);
3665
                 * invalidate leaf if COMMIT_PWMAP;
3666
                 * if (TRUNCATE), will write LOG_NOREDOPAGE;
3667
                 */
3668
                tlck->type = tlckXTREE | tlckFREE;
3669
        } else {                /* COMMIT_WAMP */
3670
 
3671
                /* free data extents covered by leaf */
3672
                xadlock.xdlist = &p->xad[XTENTRYSTART];
3673
                xadlock.count =
3674
                    le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
3675
                txFreeMap(ip, (struct maplock *) & xadlock, 0, COMMIT_WMAP);
3676
        }
3677
 
3678
        if (p->header.flag & BT_ROOT) {
3679
                p->header.flag &= ~BT_INTERNAL;
3680
                p->header.flag |= BT_LEAF;
3681
                p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3682
 
3683
                XT_PUTPAGE(mp); /* debug */
3684
                goto out;
3685
        } else {
3686
                if (log) {      /* COMMIT_PWMAP */
3687
                        /* page will be invalidated at tx completion
3688
                         */
3689
                        XT_PUTPAGE(mp);
3690
                } else {        /* COMMIT_WMAP */
3691
 
3692
                        if (mp->lid)
3693
                                lid_to_tlock(mp->lid)->flag |= tlckFREELOCK;
3694
 
3695
                        /* invalidate empty leaf page */
3696
                        discard_metapage(mp);
3697
                }
3698
        }
3699
 
3700
        /*
3701
         * the leaf page become empty: delete the parent entry
3702
         * for the leaf page if the parent page is to be kept
3703
         * in the new sized file.
3704
         */
3705
 
3706
        /*
3707
         * go back up to the parent page
3708
         */
3709
      getParent:
3710
        /* pop/restore parent entry for the current child page */
3711
        if ((parent = BT_POP(&btstack)) == NULL)
3712
                /* current page must have been root */
3713
                goto out;
3714
 
3715
        /* get back the parent page */
3716
        bn = parent->bn;
3717
        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3718
        if (rc)
3719
                return rc;
3720
 
3721
        index = parent->index;
3722
 
3723
        /*
3724
         * child page was not empty:
3725
         */
3726
        if (freed == 0) {
3727
                /* has any entry deleted from parent ? */
3728
                if (index < le16_to_cpu(p->header.nextindex) - 1) {
3729
                        /* (re)acquire tlock on the parent page */
3730
                        if (log) {      /* COMMIT_PWMAP */
3731
                                /* txCommit() with tlckTRUNCATE:
3732
                                 * free child extents covered by parent [);
3733
                                 */
3734
                                tlck = txLock(tid, ip, mp, tlckXTREE);
3735
                                xtlck = (struct xtlock *) & tlck->lock;
3736
                                if (!(tlck->type & tlckTRUNCATE)) {
3737
                                        xtlck->hwm.offset =
3738
                                            le16_to_cpu(p->header.
3739
                                                        nextindex) - 1;
3740
                                        tlck->type =
3741
                                            tlckXTREE | tlckTRUNCATE;
3742
                                }
3743
                        } else {        /* COMMIT_WMAP */
3744
 
3745
                                /* free child extents covered by parent */
3746
                                xadlock.xdlist = &p->xad[index + 1];
3747
                                xadlock.count =
3748
                                    le16_to_cpu(p->header.nextindex) -
3749
                                    index - 1;
3750
                                txFreeMap(ip, (struct maplock *) & xadlock, 0,
3751
                                          COMMIT_WMAP);
3752
                        }
3753
                        BT_MARK_DIRTY(mp, ip);
3754
 
3755
                        p->header.nextindex = cpu_to_le16(index + 1);
3756
                }
3757
                XT_PUTPAGE(mp);
3758
                goto getParent;
3759
        }
3760
 
3761
        /*
3762
         * child page was empty:
3763
         */
3764
        nfreed += lengthXAD(&p->xad[index]);
3765
 
3766
        /*
3767
         * During working map update, child page's tlock must be handled
3768
         * before parent's.  This is because the parent's tlock will cause
3769
         * the child's disk space to be marked available in the wmap, so
3770
         * it's important that the child page be released by that time.
3771
         *
3772
         * ToDo:  tlocks should be on doubly-linked list, so we can
3773
         * quickly remove it and add it to the end.
3774
         */
3775
 
3776
        /*
3777
         * Move parent page's tlock to the end of the tid's tlock list
3778
         */
3779
        if (log && mp->lid && (tblk->last != mp->lid) &&
3780
            lid_to_tlock(mp->lid)->tid) {
3781
                lid_t lid = mp->lid;
3782
                struct tlock *prev;
3783
 
3784
                tlck = lid_to_tlock(lid);
3785
 
3786
                if (tblk->next == lid)
3787
                        tblk->next = tlck->next;
3788
                else {
3789
                        for (prev = lid_to_tlock(tblk->next);
3790
                             prev->next != lid;
3791
                             prev = lid_to_tlock(prev->next)) {
3792
                                assert(prev->next);
3793
                        }
3794
                        prev->next = tlck->next;
3795
                }
3796
                lid_to_tlock(tblk->last)->next = lid;
3797
                tlck->next = 0;
3798
                tblk->last = lid;
3799
        }
3800
 
3801
        /*
3802
         * parent page become empty: free the page
3803
         */
3804
        if (index == XTENTRYSTART) {
3805
                if (log) {      /* COMMIT_PWMAP */
3806
                        /* txCommit() with tlckFREE:
3807
                         * free child extents covered by parent;
3808
                         * invalidate parent if COMMIT_PWMAP;
3809
                         */
3810
                        tlck = txLock(tid, ip, mp, tlckXTREE);
3811
                        xtlck = (struct xtlock *) & tlck->lock;
3812
                        xtlck->hwm.offset =
3813
                            le16_to_cpu(p->header.nextindex) - 1;
3814
                        tlck->type = tlckXTREE | tlckFREE;
3815
                } else {        /* COMMIT_WMAP */
3816
 
3817
                        /* free child extents covered by parent */
3818
                        xadlock.xdlist = &p->xad[XTENTRYSTART];
3819
                        xadlock.count =
3820
                            le16_to_cpu(p->header.nextindex) -
3821
                            XTENTRYSTART;
3822
                        txFreeMap(ip, (struct maplock *) & xadlock, 0,
3823
                                  COMMIT_WMAP);
3824
                }
3825
                BT_MARK_DIRTY(mp, ip);
3826
 
3827
                if (p->header.flag & BT_ROOT) {
3828
                        p->header.flag &= ~BT_INTERNAL;
3829
                        p->header.flag |= BT_LEAF;
3830
                        p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3831
                        if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) {
3832
                                /*
3833
                                 * Shrink root down to allow inline
3834
                                 * EA (otherwise fsck complains)
3835
                                 */
3836
                                p->header.maxentry =
3837
                                    cpu_to_le16(XTROOTINITSLOT);
3838
                                JFS_IP(ip)->mode2 |= INLINEEA;
3839
                        }
3840
 
3841
                        XT_PUTPAGE(mp); /* debug */
3842
                        goto out;
3843
                } else {
3844
                        if (log) {      /* COMMIT_PWMAP */
3845
                                /* page will be invalidated at tx completion
3846
                                 */
3847
                                XT_PUTPAGE(mp);
3848
                        } else {        /* COMMIT_WMAP */
3849
 
3850
                                if (mp->lid)
3851
                                        lid_to_tlock(mp->lid)->flag |=
3852
                                                tlckFREELOCK;
3853
 
3854
                                /* invalidate parent page */
3855
                                discard_metapage(mp);
3856
                        }
3857
 
3858
                        /* parent has become empty and freed:
3859
                         * go back up to its parent page
3860
                         */
3861
                        /* freed = 1; */
3862
                        goto getParent;
3863
                }
3864
        }
3865
        /*
3866
         * parent page still has entries for front region;
3867
         */
3868
        else {
3869
                /* try truncate region covered by preceding entry
3870
                 * (process backward)
3871
                 */
3872
                index--;
3873
 
3874
                /* go back down to the child page corresponding
3875
                 * to the entry
3876
                 */
3877
                goto getChild;
3878
        }
3879
 
3880
        /*
3881
         *      internal page: go down to child page of current entry
3882
         */
3883
      getChild:
3884
        /* save current parent entry for the child page */
3885
        BT_PUSH(&btstack, bn, index);
3886
 
3887
        /* get child page */
3888
        xad = &p->xad[index];
3889
        bn = addressXAD(xad);
3890
 
3891
        /*
3892
         * first access of each internal entry:
3893
         */
3894
        /* release parent page */
3895
        XT_PUTPAGE(mp);
3896
 
3897
        /* process the child page */
3898
        goto getPage;
3899
 
3900
      out:
3901
        /*
3902
         * update file resource stat
3903
         */
3904
        /* set size
3905
         */
3906
        if (S_ISDIR(ip->i_mode) && !newsize)
3907
                ip->i_size = 1; /* fsck hates zero-length directories */
3908
        else
3909
                ip->i_size = newsize;
3910
 
3911
        /* update nblocks to reflect freed blocks */
3912
        ip->i_blocks -= LBLK2PBLK(ip->i_sb, nfreed);
3913
 
3914
        /*
3915
         * free tlock of invalidated pages
3916
         */
3917
        if (flag == COMMIT_WMAP)
3918
                txFreelock(ip);
3919
 
3920
        return newsize;
3921
}
3922
 
3923
 
3924
/*
3925
 *      xtTruncate_pmap()
3926
 *
3927
 * function:
3928
 *      Perform truncate to zero lenghth for deleted file, leaving the
3929
 *      the xtree and working map untouched.  This allows the file to
3930
 *      be accessed via open file handles, while the delete of the file
3931
 *      is committed to disk.
3932
 *
3933
 * parameter:
3934
 *      tid_t           tid,
3935
 *      struct inode    *ip,
3936
 *      s64             committed_size)
3937
 *
3938
 * return: new committed size
3939
 *
3940
 * note:
3941
 *
3942
 *      To avoid deadlock by holding too many transaction locks, the
3943
 *      truncation may be broken up into multiple transactions.
3944
 *      The committed_size keeps track of part of the file has been
3945
 *      freed from the pmaps.
3946
 */
3947
s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
3948
{
3949
        s64 bn;
3950
        struct btstack btstack;
3951
        int cmp;
3952
        int index;
3953
        int locked_leaves = 0;
3954
        struct metapage *mp;
3955
        xtpage_t *p;
3956
        struct btframe *parent;
3957
        int rc;
3958
        struct tblock *tblk;
3959
        struct tlock *tlck = 0;
3960
        xad_t *xad;
3961
        int xlen;
3962
        s64 xoff;
3963
        struct xtlock *xtlck = 0;
3964
 
3965
        /* save object truncation type */
3966
        tblk = tid_to_tblock(tid);
3967
        tblk->xflag |= COMMIT_PMAP;
3968
 
3969
        /* clear stack */
3970
        BT_CLR(&btstack);
3971
 
3972
        if (committed_size) {
3973
                xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
3974
                rc = xtSearch(ip, xoff, &cmp, &btstack, 0);
3975
                if (rc)
3976
                        return rc;
3977
 
3978
                XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
3979
 
3980
                if (cmp != 0) {
3981
                        XT_PUTPAGE(mp);
3982
                        jfs_error(ip->i_sb,
3983
                                  "xtTruncate_pmap: did not find extent");
3984
                        return -EIO;
3985
                }
3986
        } else {
3987
                /*
3988
                 * start with root
3989
                 *
3990
                 * root resides in the inode
3991
                 */
3992
                bn = 0;
3993
 
3994
                /*
3995
                 * first access of each page:
3996
                 */
3997
      getPage:
3998
                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3999
                if (rc)
4000
                        return rc;
4001
 
4002
                /* process entries backward from last index */
4003
                index = le16_to_cpu(p->header.nextindex) - 1;
4004
 
4005
                if (p->header.flag & BT_INTERNAL)
4006
                        goto getChild;
4007
        }
4008
 
4009
        /*
4010
         *      leaf page
4011
         */
4012
 
4013
        if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
4014
                /*
4015
                 * We need to limit the size of the transaction
4016
                 * to avoid exhausting pagecache & tlocks
4017
                 */
4018
                xad = &p->xad[index];
4019
                xoff = offsetXAD(xad);
4020
                xlen = lengthXAD(xad);
4021
                XT_PUTPAGE(mp);
4022
                return  (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
4023
        }
4024
        tlck = txLock(tid, ip, mp, tlckXTREE);
4025
        tlck->type = tlckXTREE | tlckFREE;
4026
        xtlck = (struct xtlock *) & tlck->lock;
4027
        xtlck->hwm.offset = index;
4028
 
4029
 
4030
        XT_PUTPAGE(mp);
4031
 
4032
        /*
4033
         * go back up to the parent page
4034
         */
4035
      getParent:
4036
        /* pop/restore parent entry for the current child page */
4037
        if ((parent = BT_POP(&btstack)) == NULL)
4038
                /* current page must have been root */
4039
                goto out;
4040
 
4041
        /* get back the parent page */
4042
        bn = parent->bn;
4043
        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4044
        if (rc)
4045
                return rc;
4046
 
4047
        index = parent->index;
4048
 
4049
        /*
4050
         * parent page become empty: free the page
4051
         */
4052
        if (index == XTENTRYSTART) {
4053
                /* txCommit() with tlckFREE:
4054
                 * free child extents covered by parent;
4055
                 * invalidate parent if COMMIT_PWMAP;
4056
                 */
4057
                tlck = txLock(tid, ip, mp, tlckXTREE);
4058
                xtlck = (struct xtlock *) & tlck->lock;
4059
                xtlck->hwm.offset =
4060
                    le16_to_cpu(p->header.nextindex) - 1;
4061
                tlck->type = tlckXTREE | tlckFREE;
4062
 
4063
                XT_PUTPAGE(mp);
4064
 
4065
                if (p->header.flag & BT_ROOT) {
4066
 
4067
                        goto out;
4068
                } else {
4069
                        goto getParent;
4070
                }
4071
        }
4072
        /*
4073
         * parent page still has entries for front region;
4074
         */
4075
        else
4076
                index--;
4077
        /*
4078
         *      internal page: go down to child page of current entry
4079
         */
4080
      getChild:
4081
        /* save current parent entry for the child page */
4082
        BT_PUSH(&btstack, bn, index);
4083
 
4084
        /* get child page */
4085
        xad = &p->xad[index];
4086
        bn = addressXAD(xad);
4087
 
4088
        /*
4089
         * first access of each internal entry:
4090
         */
4091
        /* release parent page */
4092
        XT_PUTPAGE(mp);
4093
 
4094
        /* process the child page */
4095
        goto getPage;
4096
 
4097
      out:
4098
 
4099
        return 0;
4100
}
4101
 
4102
 
4103
#ifdef _JFS_DEBUG_XTREE
4104
/*
4105
 *      xtDisplayTree()
4106
 *
4107
 * function: traverse forward
4108
 */
4109
int xtDisplayTree(struct inode *ip)
4110
{
4111
        int rc = 0;
4112
        struct metapage *mp;
4113
        xtpage_t *p;
4114
        s64 bn, pbn;
4115
        int index, lastindex, v, h;
4116
        xad_t *xad;
4117
        struct btstack btstack;
4118
        struct btframe *btsp;
4119
        struct btframe *parent;
4120
 
4121
        printk("display B+-tree.\n");
4122
 
4123
        /* clear stack */
4124
        btsp = btstack.stack;
4125
 
4126
        /*
4127
         * start with root
4128
         *
4129
         * root resides in the inode
4130
         */
4131
        bn = 0;
4132
        v = h = 0;
4133
 
4134
        /*
4135
         * first access of each page:
4136
         */
4137
      getPage:
4138
        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4139
        if (rc)
4140
                return rc;
4141
 
4142
        /* process entries forward from first index */
4143
        index = XTENTRYSTART;
4144
        lastindex = le16_to_cpu(p->header.nextindex) - 1;
4145
 
4146
        if (p->header.flag & BT_INTERNAL) {
4147
                /*
4148
                 * first access of each internal page
4149
                 */
4150
                goto getChild;
4151
        } else {                /* (p->header.flag & BT_LEAF) */
4152
 
4153
                /*
4154
                 * first access of each leaf page
4155
                 */
4156
                printf("leaf page ");
4157
                xtDisplayPage(ip, bn, p);
4158
 
4159
                /* unpin the leaf page */
4160
                XT_PUTPAGE(mp);
4161
        }
4162
 
4163
        /*
4164
         * go back up to the parent page
4165
         */
4166
      getParent:
4167
        /* pop/restore parent entry for the current child page */
4168
        if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL)
4169
                /* current page must have been root */
4170
                return;
4171
 
4172
        /*
4173
         * parent page scan completed
4174
         */
4175
        if ((index = parent->index) == (lastindex = parent->lastindex)) {
4176
                /* go back up to the parent page */
4177
                goto getParent;
4178
        }
4179
 
4180
        /*
4181
         * parent page has entries remaining
4182
         */
4183
        /* get back the parent page */
4184
        bn = parent->bn;
4185
        /* v = parent->level; */
4186
        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4187
        if (rc)
4188
                return rc;
4189
 
4190
        /* get next parent entry */
4191
        index++;
4192
 
4193
        /*
4194
         * internal page: go down to child page of current entry
4195
         */
4196
      getChild:
4197
        /* push/save current parent entry for the child page */
4198
        btsp->bn = pbn = bn;
4199
        btsp->index = index;
4200
        btsp->lastindex = lastindex;
4201
        /* btsp->level = v; */
4202
        /* btsp->node = h; */
4203
        ++btsp;
4204
 
4205
        /* get child page */
4206
        xad = &p->xad[index];
4207
        bn = addressXAD(xad);
4208
 
4209
        /*
4210
         * first access of each internal entry:
4211
         */
4212
        /* release parent page */
4213
        XT_PUTPAGE(mp);
4214
 
4215
        printk("traverse down 0x%lx[%d]->0x%lx\n", (ulong) pbn, index,
4216
               (ulong) bn);
4217
        v++;
4218
        h = index;
4219
 
4220
        /* process the child page */
4221
        goto getPage;
4222
}
4223
 
4224
 
4225
/*
4226
 *      xtDisplayPage()
4227
 *
4228
 * function: display page
4229
 */
4230
int xtDisplayPage(struct inode *ip, s64 bn, xtpage_t * p)
4231
{
4232
        int rc = 0;
4233
        xad_t *xad;
4234
        s64 xaddr, xoff;
4235
        int xlen, i, j;
4236
 
4237
        /* display page control */
4238
        printf("bn:0x%lx flag:0x%x nextindex:%d\n",
4239
               (ulong) bn, p->header.flag,
4240
               le16_to_cpu(p->header.nextindex));
4241
 
4242
        /* display entries */
4243
        xad = &p->xad[XTENTRYSTART];
4244
                for (i = XTENTRYSTART, j = 1; i < le16_to_cpu(p->header.nextindex);
4245
                     i++, xad++, j++) {
4246
                        xoff = offsetXAD(xad);
4247
                        xaddr = addressXAD(xad);
4248
                        xlen = lengthXAD(xad);
4249
                        printf("\t[%d] 0x%lx:0x%lx(0x%x)", i, (ulong) xoff,
4250
                               (ulong) xaddr, xlen);
4251
 
4252
                        if (j == 4) {
4253
                                printf("\n");
4254
                                j = 0;
4255
                }
4256
        }
4257
 
4258
        printf("\n");
4259
}
4260
#endif                          /* _JFS_DEBUG_XTREE */
4261
 
4262
 
4263
#ifdef _JFS_WIP
4264
/*
4265
 *      xtGather()
4266
 *
4267
 * function:
4268
 *      traverse for allocation acquiring tlock at commit time
4269
 *      (vs at the time of update) logging backward top down
4270
 *
4271
 * note:
4272
 *      problem - establishing that all new allocation have been
4273
 *      processed both for append and random write in sparse file
4274
 *      at the current entry at the current subtree root page
4275
 *
4276
 */
4277
int xtGather(btree_t *t)
4278
{
4279
        int rc = 0;
4280
        xtpage_t *p;
4281
        u64 bn;
4282
        int index;
4283
        btentry_t *e;
4284
        struct btstack btstack;
4285
        struct btsf *parent;
4286
 
4287
        /* clear stack */
4288
        BT_CLR(&btstack);
4289
 
4290
        /*
4291
         * start with root
4292
         *
4293
         * root resides in the inode
4294
         */
4295
        bn = 0;
4296
        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4297
        if (rc)
4298
                return rc;
4299
 
4300
        /* new root is NOT pointed by a new entry
4301
           if (p->header.flag & NEW)
4302
           allocate new page lock;
4303
           write a NEWPAGE log;
4304
         */
4305
 
4306
      dopage:
4307
        /*
4308
         * first access of each page:
4309
         */
4310
        /* process entries backward from last index */
4311
        index = le16_to_cpu(p->header.nextindex) - 1;
4312
 
4313
        if (p->header.flag & BT_LEAF) {
4314
                /*
4315
                 * first access of each leaf page
4316
                 */
4317
                /* process leaf page entries backward */
4318
                for (; index >= XTENTRYSTART; index--) {
4319
                        e = &p->xad[index];
4320
                        /*
4321
                         * if newpage, log NEWPAGE.
4322
                         *
4323
                         if (e->flag & XAD_NEW) {
4324
                         nfound =+ entry->length;
4325
                         update current page lock for the entry;
4326
                         newpage(entry);
4327
                         *
4328
                         * if moved, log move.
4329
                         *
4330
                         } else if (e->flag & XAD_MOVED) {
4331
                         reset flag;
4332
                         update current page lock for the entry;
4333
                         }
4334
                         */
4335
                }
4336
 
4337
                /* unpin the leaf page */
4338
                XT_PUTPAGE(mp);
4339
 
4340
                /*
4341
                 * go back up to the parent page
4342
                 */
4343
              getParent:
4344
                /* restore parent entry for the current child page */
4345
                if ((parent = BT_POP(&btstack)) == NULL)
4346
                        /* current page must have been root */
4347
                        return 0;
4348
 
4349
                if ((index = parent->index) == XTENTRYSTART) {
4350
                        /*
4351
                         * parent page scan completed
4352
                         */
4353
                        /* go back up to the parent page */
4354
                        goto getParent;
4355
                } else {
4356
                        /*
4357
                         * parent page has entries remaining
4358
                         */
4359
                        /* get back the parent page */
4360
                        bn = parent->bn;
4361
                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4362
                        if (rc)
4363
                                return -EIO;
4364
 
4365
                        /* first subroot page which
4366
                         * covers all new allocated blocks
4367
                         * itself not new/modified.
4368
                         * (if modified from split of descendent,
4369
                         * go down path of split page)
4370
 
4371
                         if (nfound == nnew &&
4372
                         !(p->header.flag & (NEW | MOD)))
4373
                         exit scan;
4374
                         */
4375
 
4376
                        /* process parent page entries backward */
4377
                        index--;
4378
                }
4379
        } else {
4380
                /*
4381
                 * first access of each internal page
4382
                 */
4383
        }
4384
 
4385
        /*
4386
         * internal page: go down to child page of current entry
4387
         */
4388
 
4389
        /* save current parent entry for the child page */
4390
        BT_PUSH(&btstack, bn, index);
4391
 
4392
        /* get current entry for the child page */
4393
        e = &p->xad[index];
4394
 
4395
        /*
4396
         * first access of each internal entry:
4397
         */
4398
        /*
4399
         * if new entry, log btree_tnewentry.
4400
         *
4401
         if (e->flag & XAD_NEW)
4402
         update parent page lock for the entry;
4403
         */
4404
 
4405
        /* release parent page */
4406
        XT_PUTPAGE(mp);
4407
 
4408
        /* get child page */
4409
        bn = e->bn;
4410
        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4411
        if (rc)
4412
                return rc;
4413
 
4414
        /*
4415
         * first access of each non-root page:
4416
         */
4417
        /*
4418
         * if new, log btree_newpage.
4419
         *
4420
         if (p->header.flag & NEW)
4421
         allocate new page lock;
4422
         write a NEWPAGE log (next, prev);
4423
         */
4424
 
4425
        /* process the child page */
4426
        goto dopage;
4427
 
4428
      out:
4429
        return 0;
4430
}
4431
#endif                          /* _JFS_WIP */
4432
 
4433
 
4434
#ifdef CONFIG_JFS_STATISTICS
4435
int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length,
4436
                    int *eof, void *data)
4437
{
4438
        int len = 0;
4439
        off_t begin;
4440
 
4441
        len += sprintf(buffer,
4442
                       "JFS Xtree statistics\n"
4443
                       "====================\n"
4444
                       "searches = %d\n"
4445
                       "fast searches = %d\n"
4446
                       "splits = %d\n",
4447
                       xtStat.search,
4448
                       xtStat.fastSearch,
4449
                       xtStat.split);
4450
 
4451
        begin = offset;
4452
        *start = buffer + begin;
4453
        len -= begin;
4454
 
4455
        if (len > length)
4456
                len = length;
4457
        else
4458
                *eof = 1;
4459
 
4460
        if (len < 0)
4461
                len = 0;
4462
 
4463
        return len;
4464
}
4465
#endif

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.