OpenCores
URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [fs/] [jfs/] [jfs_xtree.c] - Blame information for rev 19

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 xianfeng
/*
2
 *   Copyright (C) International Business Machines Corp., 2000-2005
3
 *
4
 *   This program is free software;  you can redistribute it and/or modify
5
 *   it under the terms of the GNU General Public License as published by
6
 *   the Free Software Foundation; either version 2 of the License, or
7
 *   (at your option) any later version.
8
 *
9
 *   This program is distributed in the hope that it will be useful,
10
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
11
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
12
 *   the GNU General Public License for more details.
13
 *
14
 *   You should have received a copy of the GNU General Public License
15
 *   along with this program;  if not, write to the Free Software
16
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
 */
18
/*
19
 *      jfs_xtree.c: extent allocation descriptor B+-tree manager
20
 */
21
 
22
#include <linux/fs.h>
23
#include <linux/quotaops.h>
24
#include "jfs_incore.h"
25
#include "jfs_filsys.h"
26
#include "jfs_metapage.h"
27
#include "jfs_dmap.h"
28
#include "jfs_dinode.h"
29
#include "jfs_superblock.h"
30
#include "jfs_debug.h"
31
 
32
/*
33
 * xtree local flag
34
 */
35
#define XT_INSERT       0x00000001
36
 
37
/*
38
 *      xtree key/entry comparison: extent offset
39
 *
40
 * return:
41
 *      -1: k < start of extent
42
 *       0: start_of_extent <= k <= end_of_extent
43
 *       1: k > end_of_extent
44
 */
45
#define XT_CMP(CMP, K, X, OFFSET64)\
46
{\
47
        OFFSET64 = offsetXAD(X);\
48
        (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
49
                ((K) < OFFSET64) ? -1 : 0;\
50
}
51
 
52
/* write a xad entry */
53
#define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
54
{\
55
        (XAD)->flag = (FLAG);\
56
        XADoffset((XAD), (OFF));\
57
        XADlength((XAD), (LEN));\
58
        XADaddress((XAD), (ADDR));\
59
}
60
 
61
#define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
62
 
63
/* get page buffer for specified block address */
64
/* ToDo: Replace this ugly macro with a function */
65
#define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\
66
{\
67
        BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\
68
        if (!(RC))\
69
        {\
70
                if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\
71
                    (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\
72
                    (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\
73
                {\
74
                        jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\
75
                        BT_PUTPAGE(MP);\
76
                        MP = NULL;\
77
                        RC = -EIO;\
78
                }\
79
        }\
80
}
81
 
82
/* for consistency */
83
#define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
84
 
85
#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
86
        BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
87
/* xtree entry parameter descriptor */
88
struct xtsplit {
89
        struct metapage *mp;
90
        s16 index;
91
        u8 flag;
92
        s64 off;
93
        s64 addr;
94
        int len;
95
        struct pxdlist *pxdlist;
96
};
97
 
98
 
99
/*
100
 *      statistics
101
 */
102
#ifdef CONFIG_JFS_STATISTICS
103
static struct {
104
        uint search;
105
        uint fastSearch;
106
        uint split;
107
} xtStat;
108
#endif
109
 
110
 
111
/*
112
 * forward references
113
 */
114
static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp,
115
                    struct btstack * btstack, int flag);
116
 
117
static int xtSplitUp(tid_t tid,
118
                     struct inode *ip,
119
                     struct xtsplit * split, struct btstack * btstack);
120
 
121
static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
122
                       struct metapage ** rmpp, s64 * rbnp);
123
 
124
static int xtSplitRoot(tid_t tid, struct inode *ip,
125
                       struct xtsplit * split, struct metapage ** rmpp);
126
 
127
#ifdef _STILL_TO_PORT
128
static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
129
                      xtpage_t * fp, struct btstack * btstack);
130
 
131
static int xtSearchNode(struct inode *ip,
132
                        xad_t * xad,
133
                        int *cmpp, struct btstack * btstack, int flag);
134
 
135
static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
136
#endif                          /*  _STILL_TO_PORT */
137
 
138
/*
139
 *      xtLookup()
140
 *
141
 * function: map a single page into a physical extent;
142
 */
143
int xtLookup(struct inode *ip, s64 lstart,
144
             s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
145
{
146
        int rc = 0;
147
        struct btstack btstack;
148
        int cmp;
149
        s64 bn;
150
        struct metapage *mp;
151
        xtpage_t *p;
152
        int index;
153
        xad_t *xad;
154
        s64 next, size, xoff, xend;
155
        int xlen;
156
        s64 xaddr;
157
 
158
        *paddr = 0;
159
        *plen = llen;
160
 
161
        if (!no_check) {
162
                /* is lookup offset beyond eof ? */
163
                size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
164
                    JFS_SBI(ip->i_sb)->l2bsize;
165
                if (lstart >= size) {
166
                        jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)",
167
                                (ulong) lstart, (ulong) size);
168
                        return 0;
169
                }
170
        }
171
 
172
        /*
173
         * search for the xad entry covering the logical extent
174
         */
175
//search:
176
        if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
177
                jfs_err("xtLookup: xtSearch returned %d", rc);
178
                return rc;
179
        }
180
 
181
        /*
182
         *      compute the physical extent covering logical extent
183
         *
184
         * N.B. search may have failed (e.g., hole in sparse file),
185
         * and returned the index of the next entry.
186
         */
187
        /* retrieve search result */
188
        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
189
 
190
        /* is xad found covering start of logical extent ?
191
         * lstart is a page start address,
192
         * i.e., lstart cannot start in a hole;
193
         */
194
        if (cmp) {
195
                if (next)
196
                        *plen = min(next - lstart, llen);
197
                goto out;
198
        }
199
 
200
        /*
201
         * lxd covered by xad
202
         */
203
        xad = &p->xad[index];
204
        xoff = offsetXAD(xad);
205
        xlen = lengthXAD(xad);
206
        xend = xoff + xlen;
207
        xaddr = addressXAD(xad);
208
 
209
        /* initialize new pxd */
210
        *pflag = xad->flag;
211
        *paddr = xaddr + (lstart - xoff);
212
        /* a page must be fully covered by an xad */
213
        *plen = min(xend - lstart, llen);
214
 
215
      out:
216
        XT_PUTPAGE(mp);
217
 
218
        return rc;
219
}
220
 
221
 
222
/*
223
 *      xtLookupList()
224
 *
225
 * function: map a single logical extent into a list of physical extent;
226
 *
227
 * parameter:
228
 *      struct inode    *ip,
229
 *      struct lxdlist  *lxdlist,       lxd list (in)
230
 *      struct xadlist  *xadlist,       xad list (in/out)
231
 *      int             flag)
232
 *
233
 * coverage of lxd by xad under assumption of
234
 * . lxd's are ordered and disjoint.
235
 * . xad's are ordered and disjoint.
236
 *
237
 * return:
238
 *      0:      success
239
 *
240
 * note: a page being written (even a single byte) is backed fully,
241
 *      except the last page which is only backed with blocks
242
 *      required to cover the last byte;
243
 *      the extent backing a page is fully contained within an xad;
244
 */
245
int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
246
                 struct xadlist * xadlist, int flag)
247
{
248
        int rc = 0;
249
        struct btstack btstack;
250
        int cmp;
251
        s64 bn;
252
        struct metapage *mp;
253
        xtpage_t *p;
254
        int index;
255
        lxd_t *lxd;
256
        xad_t *xad, *pxd;
257
        s64 size, lstart, lend, xstart, xend, pstart;
258
        s64 llen, xlen, plen;
259
        s64 xaddr, paddr;
260
        int nlxd, npxd, maxnpxd;
261
 
262
        npxd = xadlist->nxad = 0;
263
        maxnpxd = xadlist->maxnxad;
264
        pxd = xadlist->xad;
265
 
266
        nlxd = lxdlist->nlxd;
267
        lxd = lxdlist->lxd;
268
 
269
        lstart = offsetLXD(lxd);
270
        llen = lengthLXD(lxd);
271
        lend = lstart + llen;
272
 
273
        size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
274
            JFS_SBI(ip->i_sb)->l2bsize;
275
 
276
        /*
277
         * search for the xad entry covering the logical extent
278
         */
279
      search:
280
        if (lstart >= size)
281
                return 0;
282
 
283
        if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0)))
284
                return rc;
285
 
286
        /*
287
         *      compute the physical extent covering logical extent
288
         *
289
         * N.B. search may have failed (e.g., hole in sparse file),
290
         * and returned the index of the next entry.
291
         */
292
//map:
293
        /* retrieve search result */
294
        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
295
 
296
        /* is xad on the next sibling page ? */
297
        if (index == le16_to_cpu(p->header.nextindex)) {
298
                if (p->header.flag & BT_ROOT)
299
                        goto mapend;
300
 
301
                if ((bn = le64_to_cpu(p->header.next)) == 0)
302
                        goto mapend;
303
 
304
                XT_PUTPAGE(mp);
305
 
306
                /* get next sibling page */
307
                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
308
                if (rc)
309
                        return rc;
310
 
311
                index = XTENTRYSTART;
312
        }
313
 
314
        xad = &p->xad[index];
315
 
316
        /*
317
         * is lxd covered by xad ?
318
         */
319
      compare:
320
        xstart = offsetXAD(xad);
321
        xlen = lengthXAD(xad);
322
        xend = xstart + xlen;
323
        xaddr = addressXAD(xad);
324
 
325
      compare1:
326
        if (xstart < lstart)
327
                goto compare2;
328
 
329
        /* (lstart <= xstart) */
330
 
331
        /* lxd is NOT covered by xad */
332
        if (lend <= xstart) {
333
                /*
334
                 * get next lxd
335
                 */
336
                if (--nlxd == 0)
337
                        goto mapend;
338
                lxd++;
339
 
340
                lstart = offsetLXD(lxd);
341
                llen = lengthLXD(lxd);
342
                lend = lstart + llen;
343
                if (lstart >= size)
344
                        goto mapend;
345
 
346
                /* compare with the current xad */
347
                goto compare1;
348
        }
349
        /* lxd is covered by xad */
350
        else {                  /* (xstart < lend) */
351
 
352
                /* initialize new pxd */
353
                pstart = xstart;
354
                plen = min(lend - xstart, xlen);
355
                paddr = xaddr;
356
 
357
                goto cover;
358
        }
359
 
360
        /* (xstart < lstart) */
361
      compare2:
362
        /* lxd is covered by xad */
363
        if (lstart < xend) {
364
                /* initialize new pxd */
365
                pstart = lstart;
366
                plen = min(xend - lstart, llen);
367
                paddr = xaddr + (lstart - xstart);
368
 
369
                goto cover;
370
        }
371
        /* lxd is NOT covered by xad */
372
        else {                  /* (xend <= lstart) */
373
 
374
                /*
375
                 * get next xad
376
                 *
377
                 * linear search next xad covering lxd on
378
                 * the current xad page, and then tree search
379
                 */
380
                if (index == le16_to_cpu(p->header.nextindex) - 1) {
381
                        if (p->header.flag & BT_ROOT)
382
                                goto mapend;
383
 
384
                        XT_PUTPAGE(mp);
385
                        goto search;
386
                } else {
387
                        index++;
388
                        xad++;
389
 
390
                        /* compare with new xad */
391
                        goto compare;
392
                }
393
        }
394
 
395
        /*
396
         * lxd is covered by xad and a new pxd has been initialized
397
         * (lstart <= xstart < lend) or (xstart < lstart < xend)
398
         */
399
      cover:
400
        /* finalize pxd corresponding to current xad */
401
        XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr);
402
 
403
        if (++npxd >= maxnpxd)
404
                goto mapend;
405
        pxd++;
406
 
407
        /*
408
         * lxd is fully covered by xad
409
         */
410
        if (lend <= xend) {
411
                /*
412
                 * get next lxd
413
                 */
414
                if (--nlxd == 0)
415
                        goto mapend;
416
                lxd++;
417
 
418
                lstart = offsetLXD(lxd);
419
                llen = lengthLXD(lxd);
420
                lend = lstart + llen;
421
                if (lstart >= size)
422
                        goto mapend;
423
 
424
                /*
425
                 * test for old xad covering new lxd
426
                 * (old xstart < new lstart)
427
                 */
428
                goto compare2;
429
        }
430
        /*
431
         * lxd is partially covered by xad
432
         */
433
        else {                  /* (xend < lend) */
434
 
435
                /*
436
                 * get next xad
437
                 *
438
                 * linear search next xad covering lxd on
439
                 * the current xad page, and then next xad page search
440
                 */
441
                if (index == le16_to_cpu(p->header.nextindex) - 1) {
442
                        if (p->header.flag & BT_ROOT)
443
                                goto mapend;
444
 
445
                        if ((bn = le64_to_cpu(p->header.next)) == 0)
446
                                goto mapend;
447
 
448
                        XT_PUTPAGE(mp);
449
 
450
                        /* get next sibling page */
451
                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
452
                        if (rc)
453
                                return rc;
454
 
455
                        index = XTENTRYSTART;
456
                        xad = &p->xad[index];
457
                } else {
458
                        index++;
459
                        xad++;
460
                }
461
 
462
                /*
463
                 * test for new xad covering old lxd
464
                 * (old lstart < new xstart)
465
                 */
466
                goto compare;
467
        }
468
 
469
      mapend:
470
        xadlist->nxad = npxd;
471
 
472
//out:
473
        XT_PUTPAGE(mp);
474
 
475
        return rc;
476
}
477
 
478
 
479
/*
480
 *      xtSearch()
481
 *
482
 * function:    search for the xad entry covering specified offset.
483
 *
484
 * parameters:
485
 *      ip      - file object;
486
 *      xoff    - extent offset;
487
 *      nextp   - address of next extent (if any) for search miss
488
 *      cmpp    - comparison result:
489
 *      btstack - traverse stack;
490
 *      flag    - search process flag (XT_INSERT);
491
 *
492
 * returns:
493
 *      btstack contains (bn, index) of search path traversed to the entry.
494
 *      *cmpp is set to result of comparison with the entry returned.
495
 *      the page containing the entry is pinned at exit.
496
 */
497
static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
498
                    int *cmpp, struct btstack * btstack, int flag)
499
{
500
        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
501
        int rc = 0;
502
        int cmp = 1;            /* init for empty page */
503
        s64 bn;                 /* block number */
504
        struct metapage *mp;    /* page buffer */
505
        xtpage_t *p;            /* page */
506
        xad_t *xad;
507
        int base, index, lim, btindex;
508
        struct btframe *btsp;
509
        int nsplit = 0;          /* number of pages to split */
510
        s64 t64;
511
        s64 next = 0;
512
 
513
        INCREMENT(xtStat.search);
514
 
515
        BT_CLR(btstack);
516
 
517
        btstack->nsplit = 0;
518
 
519
        /*
520
         *      search down tree from root:
521
         *
522
         * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
523
         * internal page, child page Pi contains entry with k, Ki <= K < Kj.
524
         *
525
         * if entry with search key K is not found
526
         * internal page search find the entry with largest key Ki
527
         * less than K which point to the child page to search;
528
         * leaf page search find the entry with smallest key Kj
529
         * greater than K so that the returned index is the position of
530
         * the entry to be shifted right for insertion of new entry.
531
         * for empty tree, search key is greater than any key of the tree.
532
         *
533
         * by convention, root bn = 0.
534
         */
535
        for (bn = 0;;) {
536
                /* get/pin the page to search */
537
                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
538
                if (rc)
539
                        return rc;
540
 
541
                /* try sequential access heuristics with the previous
542
                 * access entry in target leaf page:
543
                 * once search narrowed down into the target leaf,
544
                 * key must either match an entry in the leaf or
545
                 * key entry does not exist in the tree;
546
                 */
547
//fastSearch:
548
                if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
549
                    (p->header.flag & BT_LEAF) &&
550
                    (index = jfs_ip->btindex) <
551
                    le16_to_cpu(p->header.nextindex)) {
552
                        xad = &p->xad[index];
553
                        t64 = offsetXAD(xad);
554
                        if (xoff < t64 + lengthXAD(xad)) {
555
                                if (xoff >= t64) {
556
                                        *cmpp = 0;
557
                                        goto out;
558
                                }
559
 
560
                                /* stop sequential access heuristics */
561
                                goto binarySearch;
562
                        } else {        /* (t64 + lengthXAD(xad)) <= xoff */
563
 
564
                                /* try next sequential entry */
565
                                index++;
566
                                if (index <
567
                                    le16_to_cpu(p->header.nextindex)) {
568
                                        xad++;
569
                                        t64 = offsetXAD(xad);
570
                                        if (xoff < t64 + lengthXAD(xad)) {
571
                                                if (xoff >= t64) {
572
                                                        *cmpp = 0;
573
                                                        goto out;
574
                                                }
575
 
576
                                                /* miss: key falls between
577
                                                 * previous and this entry
578
                                                 */
579
                                                *cmpp = 1;
580
                                                next = t64;
581
                                                goto out;
582
                                        }
583
 
584
                                        /* (xoff >= t64 + lengthXAD(xad));
585
                                         * matching entry may be further out:
586
                                         * stop heuristic search
587
                                         */
588
                                        /* stop sequential access heuristics */
589
                                        goto binarySearch;
590
                                }
591
 
592
                                /* (index == p->header.nextindex);
593
                                 * miss: key entry does not exist in
594
                                 * the target leaf/tree
595
                                 */
596
                                *cmpp = 1;
597
                                goto out;
598
                        }
599
 
600
                        /*
601
                         * if hit, return index of the entry found, and
602
                         * if miss, where new entry with search key is
603
                         * to be inserted;
604
                         */
605
                      out:
606
                        /* compute number of pages to split */
607
                        if (flag & XT_INSERT) {
608
                                if (p->header.nextindex ==      /* little-endian */
609
                                    p->header.maxentry)
610
                                        nsplit++;
611
                                else
612
                                        nsplit = 0;
613
                                btstack->nsplit = nsplit;
614
                        }
615
 
616
                        /* save search result */
617
                        btsp = btstack->top;
618
                        btsp->bn = bn;
619
                        btsp->index = index;
620
                        btsp->mp = mp;
621
 
622
                        /* update sequential access heuristics */
623
                        jfs_ip->btindex = index;
624
 
625
                        if (nextp)
626
                                *nextp = next;
627
 
628
                        INCREMENT(xtStat.fastSearch);
629
                        return 0;
630
                }
631
 
632
                /* well, ... full search now */
633
              binarySearch:
634
                lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
635
 
636
                /*
637
                 * binary search with search key K on the current page
638
                 */
639
                for (base = XTENTRYSTART; lim; lim >>= 1) {
640
                        index = base + (lim >> 1);
641
 
642
                        XT_CMP(cmp, xoff, &p->xad[index], t64);
643
                        if (cmp == 0) {
644
                                /*
645
                                 *      search hit
646
                                 */
647
                                /* search hit - leaf page:
648
                                 * return the entry found
649
                                 */
650
                                if (p->header.flag & BT_LEAF) {
651
                                        *cmpp = cmp;
652
 
653
                                        /* compute number of pages to split */
654
                                        if (flag & XT_INSERT) {
655
                                                if (p->header.nextindex ==
656
                                                    p->header.maxentry)
657
                                                        nsplit++;
658
                                                else
659
                                                        nsplit = 0;
660
                                                btstack->nsplit = nsplit;
661
                                        }
662
 
663
                                        /* save search result */
664
                                        btsp = btstack->top;
665
                                        btsp->bn = bn;
666
                                        btsp->index = index;
667
                                        btsp->mp = mp;
668
 
669
                                        /* init sequential access heuristics */
670
                                        btindex = jfs_ip->btindex;
671
                                        if (index == btindex ||
672
                                            index == btindex + 1)
673
                                                jfs_ip->btorder = BT_SEQUENTIAL;
674
                                        else
675
                                                jfs_ip->btorder = BT_RANDOM;
676
                                        jfs_ip->btindex = index;
677
 
678
                                        return 0;
679
                                }
680
                                /* search hit - internal page:
681
                                 * descend/search its child page
682
                                 */
683
                                if (index < le16_to_cpu(p->header.nextindex)-1)
684
                                        next = offsetXAD(&p->xad[index + 1]);
685
                                goto next;
686
                        }
687
 
688
                        if (cmp > 0) {
689
                                base = index + 1;
690
                                --lim;
691
                        }
692
                }
693
 
694
                /*
695
                 *      search miss
696
                 *
697
                 * base is the smallest index with key (Kj) greater than
698
                 * search key (K) and may be zero or maxentry index.
699
                 */
700
                if (base < le16_to_cpu(p->header.nextindex))
701
                        next = offsetXAD(&p->xad[base]);
702
                /*
703
                 * search miss - leaf page:
704
                 *
705
                 * return location of entry (base) where new entry with
706
                 * search key K is to be inserted.
707
                 */
708
                if (p->header.flag & BT_LEAF) {
709
                        *cmpp = cmp;
710
 
711
                        /* compute number of pages to split */
712
                        if (flag & XT_INSERT) {
713
                                if (p->header.nextindex ==
714
                                    p->header.maxentry)
715
                                        nsplit++;
716
                                else
717
                                        nsplit = 0;
718
                                btstack->nsplit = nsplit;
719
                        }
720
 
721
                        /* save search result */
722
                        btsp = btstack->top;
723
                        btsp->bn = bn;
724
                        btsp->index = base;
725
                        btsp->mp = mp;
726
 
727
                        /* init sequential access heuristics */
728
                        btindex = jfs_ip->btindex;
729
                        if (base == btindex || base == btindex + 1)
730
                                jfs_ip->btorder = BT_SEQUENTIAL;
731
                        else
732
                                jfs_ip->btorder = BT_RANDOM;
733
                        jfs_ip->btindex = base;
734
 
735
                        if (nextp)
736
                                *nextp = next;
737
 
738
                        return 0;
739
                }
740
 
741
                /*
742
                 * search miss - non-leaf page:
743
                 *
744
                 * if base is non-zero, decrement base by one to get the parent
745
                 * entry of the child page to search.
746
                 */
747
                index = base ? base - 1 : base;
748
 
749
                /*
750
                 * go down to child page
751
                 */
752
              next:
753
                /* update number of pages to split */
754
                if (p->header.nextindex == p->header.maxentry)
755
                        nsplit++;
756
                else
757
                        nsplit = 0;
758
 
759
                /* push (bn, index) of the parent page/entry */
760
                if (BT_STACK_FULL(btstack)) {
761
                        jfs_error(ip->i_sb, "stack overrun in xtSearch!");
762
                        XT_PUTPAGE(mp);
763
                        return -EIO;
764
                }
765
                BT_PUSH(btstack, bn, index);
766
 
767
                /* get the child page block number */
768
                bn = addressXAD(&p->xad[index]);
769
 
770
                /* unpin the parent page */
771
                XT_PUTPAGE(mp);
772
        }
773
}
774
 
775
/*
776
 *      xtInsert()
777
 *
778
 * function:
779
 *
780
 * parameter:
781
 *      tid     - transaction id;
782
 *      ip      - file object;
783
 *      xflag   - extent flag (XAD_NOTRECORDED):
784
 *      xoff    - extent offset;
785
 *      xlen    - extent length;
786
 *      xaddrp  - extent address pointer (in/out):
787
 *              if (*xaddrp)
788
 *                      caller allocated data extent at *xaddrp;
789
 *              else
790
 *                      allocate data extent and return its xaddr;
791
 *      flag    -
792
 *
793
 * return:
794
 */
795
int xtInsert(tid_t tid,         /* transaction id */
796
             struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
797
             int flag)
798
{
799
        int rc = 0;
800
        s64 xaddr, hint;
801
        struct metapage *mp;    /* meta-page buffer */
802
        xtpage_t *p;            /* base B+-tree index page */
803
        s64 bn;
804
        int index, nextindex;
805
        struct btstack btstack; /* traverse stack */
806
        struct xtsplit split;   /* split information */
807
        xad_t *xad;
808
        int cmp;
809
        s64 next;
810
        struct tlock *tlck;
811
        struct xtlock *xtlck;
812
 
813
        jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
814
 
815
        /*
816
         *      search for the entry location at which to insert:
817
         *
818
         * xtFastSearch() and xtSearch() both returns (leaf page
819
         * pinned, index at which to insert).
820
         * n.b. xtSearch() may return index of maxentry of
821
         * the full page.
822
         */
823
        if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
824
                return rc;
825
 
826
        /* retrieve search result */
827
        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
828
 
829
        /* This test must follow XT_GETSEARCH since mp must be valid if
830
         * we branch to out: */
831
        if ((cmp == 0) || (next && (xlen > next - xoff))) {
832
                rc = -EEXIST;
833
                goto out;
834
        }
835
 
836
        /*
837
         * allocate data extent requested
838
         *
839
         * allocation hint: last xad
840
         */
841
        if ((xaddr = *xaddrp) == 0) {
842
                if (index > XTENTRYSTART) {
843
                        xad = &p->xad[index - 1];
844
                        hint = addressXAD(xad) + lengthXAD(xad) - 1;
845
                } else
846
                        hint = 0;
847
                if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen)))
848
                        goto out;
849
                if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
850
                        DQUOT_FREE_BLOCK(ip, xlen);
851
                        goto out;
852
                }
853
        }
854
 
855
        /*
856
         *      insert entry for new extent
857
         */
858
        xflag |= XAD_NEW;
859
 
860
        /*
861
         *      if the leaf page is full, split the page and
862
         *      propagate up the router entry for the new page from split
863
         *
864
         * The xtSplitUp() will insert the entry and unpin the leaf page.
865
         */
866
        nextindex = le16_to_cpu(p->header.nextindex);
867
        if (nextindex == le16_to_cpu(p->header.maxentry)) {
868
                split.mp = mp;
869
                split.index = index;
870
                split.flag = xflag;
871
                split.off = xoff;
872
                split.len = xlen;
873
                split.addr = xaddr;
874
                split.pxdlist = NULL;
875
                if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
876
                        /* undo data extent allocation */
877
                        if (*xaddrp == 0) {
878
                                dbFree(ip, xaddr, (s64) xlen);
879
                                DQUOT_FREE_BLOCK(ip, xlen);
880
                        }
881
                        return rc;
882
                }
883
 
884
                *xaddrp = xaddr;
885
                return 0;
886
        }
887
 
888
        /*
889
         *      insert the new entry into the leaf page
890
         */
891
        /*
892
         * acquire a transaction lock on the leaf page;
893
         *
894
         * action: xad insertion/extension;
895
         */
896
        BT_MARK_DIRTY(mp, ip);
897
 
898
        /* if insert into middle, shift right remaining entries. */
899
        if (index < nextindex)
900
                memmove(&p->xad[index + 1], &p->xad[index],
901
                        (nextindex - index) * sizeof(xad_t));
902
 
903
        /* insert the new entry: mark the entry NEW */
904
        xad = &p->xad[index];
905
        XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
906
 
907
        /* advance next available entry index */
908
        p->header.nextindex =
909
            cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
910
 
911
        /* Don't log it if there are no links to the file */
912
        if (!test_cflag(COMMIT_Nolink, ip)) {
913
                tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
914
                xtlck = (struct xtlock *) & tlck->lock;
915
                xtlck->lwm.offset =
916
                    (xtlck->lwm.offset) ? min(index,
917
                                              (int)xtlck->lwm.offset) : index;
918
                xtlck->lwm.length =
919
                    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
920
        }
921
 
922
        *xaddrp = xaddr;
923
 
924
      out:
925
        /* unpin the leaf page */
926
        XT_PUTPAGE(mp);
927
 
928
        return rc;
929
}
930
 
931
 
932
/*
933
 *      xtSplitUp()
934
 *
935
 * function:
936
 *      split full pages as propagating insertion up the tree
937
 *
938
 * parameter:
939
 *      tid     - transaction id;
940
 *      ip      - file object;
941
 *      split   - entry parameter descriptor;
942
 *      btstack - traverse stack from xtSearch()
943
 *
944
 * return:
945
 */
946
static int
947
xtSplitUp(tid_t tid,
948
          struct inode *ip, struct xtsplit * split, struct btstack * btstack)
949
{
950
        int rc = 0;
951
        struct metapage *smp;
952
        xtpage_t *sp;           /* split page */
953
        struct metapage *rmp;
954
        s64 rbn;                /* new right page block number */
955
        struct metapage *rcmp;
956
        xtpage_t *rcp;          /* right child page */
957
        s64 rcbn;               /* right child page block number */
958
        int skip;               /* index of entry of insertion */
959
        int nextindex;          /* next available entry index of p */
960
        struct btframe *parent; /* parent page entry on traverse stack */
961
        xad_t *xad;
962
        s64 xaddr;
963
        int xlen;
964
        int nsplit;             /* number of pages split */
965
        struct pxdlist pxdlist;
966
        pxd_t *pxd;
967
        struct tlock *tlck;
968
        struct xtlock *xtlck;
969
 
970
        smp = split->mp;
971
        sp = XT_PAGE(ip, smp);
972
 
973
        /* is inode xtree root extension/inline EA area free ? */
974
        if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
975
            (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) &&
976
            (JFS_IP(ip)->mode2 & INLINEEA)) {
977
                sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
978
                JFS_IP(ip)->mode2 &= ~INLINEEA;
979
 
980
                BT_MARK_DIRTY(smp, ip);
981
                /*
982
                 * acquire a transaction lock on the leaf page;
983
                 *
984
                 * action: xad insertion/extension;
985
                 */
986
 
987
                /* if insert into middle, shift right remaining entries. */
988
                skip = split->index;
989
                nextindex = le16_to_cpu(sp->header.nextindex);
990
                if (skip < nextindex)
991
                        memmove(&sp->xad[skip + 1], &sp->xad[skip],
992
                                (nextindex - skip) * sizeof(xad_t));
993
 
994
                /* insert the new entry: mark the entry NEW */
995
                xad = &sp->xad[skip];
996
                XT_PUTENTRY(xad, split->flag, split->off, split->len,
997
                            split->addr);
998
 
999
                /* advance next available entry index */
1000
                sp->header.nextindex =
1001
                    cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1);
1002
 
1003
                /* Don't log it if there are no links to the file */
1004
                if (!test_cflag(COMMIT_Nolink, ip)) {
1005
                        tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
1006
                        xtlck = (struct xtlock *) & tlck->lock;
1007
                        xtlck->lwm.offset = (xtlck->lwm.offset) ?
1008
                            min(skip, (int)xtlck->lwm.offset) : skip;
1009
                        xtlck->lwm.length =
1010
                            le16_to_cpu(sp->header.nextindex) -
1011
                            xtlck->lwm.offset;
1012
                }
1013
 
1014
                return 0;
1015
        }
1016
 
1017
        /*
1018
         * allocate new index blocks to cover index page split(s)
1019
         *
1020
         * allocation hint: ?
1021
         */
1022
        if (split->pxdlist == NULL) {
1023
                nsplit = btstack->nsplit;
1024
                split->pxdlist = &pxdlist;
1025
                pxdlist.maxnpxd = pxdlist.npxd = 0;
1026
                pxd = &pxdlist.pxd[0];
1027
                xlen = JFS_SBI(ip->i_sb)->nbperpage;
1028
                for (; nsplit > 0; nsplit--, pxd++) {
1029
                        if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
1030
                            == 0) {
1031
                                PXDaddress(pxd, xaddr);
1032
                                PXDlength(pxd, xlen);
1033
 
1034
                                pxdlist.maxnpxd++;
1035
 
1036
                                continue;
1037
                        }
1038
 
1039
                        /* undo allocation */
1040
 
1041
                        XT_PUTPAGE(smp);
1042
                        return rc;
1043
                }
1044
        }
1045
 
1046
        /*
1047
         * Split leaf page <sp> into <sp> and a new right page <rp>.
1048
         *
1049
         * The split routines insert the new entry into the leaf page,
1050
         * and acquire txLock as appropriate.
1051
         * return <rp> pinned and its block number <rpbn>.
1052
         */
1053
        rc = (sp->header.flag & BT_ROOT) ?
1054
            xtSplitRoot(tid, ip, split, &rmp) :
1055
            xtSplitPage(tid, ip, split, &rmp, &rbn);
1056
 
1057
        XT_PUTPAGE(smp);
1058
 
1059
        if (rc)
1060
                return -EIO;
1061
        /*
1062
         * propagate up the router entry for the leaf page just split
1063
         *
1064
         * insert a router entry for the new page into the parent page,
1065
         * propagate the insert/split up the tree by walking back the stack
1066
         * of (bn of parent page, index of child page entry in parent page)
1067
         * that were traversed during the search for the page that split.
1068
         *
1069
         * the propagation of insert/split up the tree stops if the root
1070
         * splits or the page inserted into doesn't have to split to hold
1071
         * the new entry.
1072
         *
1073
         * the parent entry for the split page remains the same, and
1074
         * a new entry is inserted at its right with the first key and
1075
         * block number of the new right page.
1076
         *
1077
         * There are a maximum of 3 pages pinned at any time:
1078
         * right child, left parent and right parent (when the parent splits)
1079
         * to keep the child page pinned while working on the parent.
1080
         * make sure that all pins are released at exit.
1081
         */
1082
        while ((parent = BT_POP(btstack)) != NULL) {
1083
                /* parent page specified by stack frame <parent> */
1084
 
1085
                /* keep current child pages <rcp> pinned */
1086
                rcmp = rmp;
1087
                rcbn = rbn;
1088
                rcp = XT_PAGE(ip, rcmp);
1089
 
1090
                /*
1091
                 * insert router entry in parent for new right child page <rp>
1092
                 */
1093
                /* get/pin the parent page <sp> */
1094
                XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
1095
                if (rc) {
1096
                        XT_PUTPAGE(rcmp);
1097
                        return rc;
1098
                }
1099
 
1100
                /*
1101
                 * The new key entry goes ONE AFTER the index of parent entry,
1102
                 * because the split was to the right.
1103
                 */
1104
                skip = parent->index + 1;
1105
 
1106
                /*
1107
                 * split or shift right remaining entries of the parent page
1108
                 */
1109
                nextindex = le16_to_cpu(sp->header.nextindex);
1110
                /*
1111
                 * parent page is full - split the parent page
1112
                 */
1113
                if (nextindex == le16_to_cpu(sp->header.maxentry)) {
1114
                        /* init for parent page split */
1115
                        split->mp = smp;
1116
                        split->index = skip;    /* index at insert */
1117
                        split->flag = XAD_NEW;
1118
                        split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
1119
                        split->len = JFS_SBI(ip->i_sb)->nbperpage;
1120
                        split->addr = rcbn;
1121
 
1122
                        /* unpin previous right child page */
1123
                        XT_PUTPAGE(rcmp);
1124
 
1125
                        /* The split routines insert the new entry,
1126
                         * and acquire txLock as appropriate.
1127
                         * return <rp> pinned and its block number <rpbn>.
1128
                         */
1129
                        rc = (sp->header.flag & BT_ROOT) ?
1130
                            xtSplitRoot(tid, ip, split, &rmp) :
1131
                            xtSplitPage(tid, ip, split, &rmp, &rbn);
1132
                        if (rc) {
1133
                                XT_PUTPAGE(smp);
1134
                                return rc;
1135
                        }
1136
 
1137
                        XT_PUTPAGE(smp);
1138
                        /* keep new child page <rp> pinned */
1139
                }
1140
                /*
1141
                 * parent page is not full - insert in parent page
1142
                 */
1143
                else {
1144
                        /*
1145
                         * insert router entry in parent for the right child
1146
                         * page from the first entry of the right child page:
1147
                         */
1148
                        /*
1149
                         * acquire a transaction lock on the parent page;
1150
                         *
1151
                         * action: router xad insertion;
1152
                         */
1153
                        BT_MARK_DIRTY(smp, ip);
1154
 
1155
                        /*
1156
                         * if insert into middle, shift right remaining entries
1157
                         */
1158
                        if (skip < nextindex)
1159
                                memmove(&sp->xad[skip + 1], &sp->xad[skip],
1160
                                        (nextindex -
1161
                                         skip) << L2XTSLOTSIZE);
1162
 
1163
                        /* insert the router entry */
1164
                        xad = &sp->xad[skip];
1165
                        XT_PUTENTRY(xad, XAD_NEW,
1166
                                    offsetXAD(&rcp->xad[XTENTRYSTART]),
1167
                                    JFS_SBI(ip->i_sb)->nbperpage, rcbn);
1168
 
1169
                        /* advance next available entry index. */
1170
                        sp->header.nextindex =
1171
                            cpu_to_le16(le16_to_cpu(sp->header.nextindex) +
1172
                                        1);
1173
 
1174
                        /* Don't log it if there are no links to the file */
1175
                        if (!test_cflag(COMMIT_Nolink, ip)) {
1176
                                tlck = txLock(tid, ip, smp,
1177
                                              tlckXTREE | tlckGROW);
1178
                                xtlck = (struct xtlock *) & tlck->lock;
1179
                                xtlck->lwm.offset = (xtlck->lwm.offset) ?
1180
                                    min(skip, (int)xtlck->lwm.offset) : skip;
1181
                                xtlck->lwm.length =
1182
                                    le16_to_cpu(sp->header.nextindex) -
1183
                                    xtlck->lwm.offset;
1184
                        }
1185
 
1186
                        /* unpin parent page */
1187
                        XT_PUTPAGE(smp);
1188
 
1189
                        /* exit propagate up */
1190
                        break;
1191
                }
1192
        }
1193
 
1194
        /* unpin current right page */
1195
        XT_PUTPAGE(rmp);
1196
 
1197
        return 0;
1198
}
1199
 
1200
 
1201
/*
1202
 *      xtSplitPage()
1203
 *
1204
 * function:
1205
 *      split a full non-root page into
1206
 *      original/split/left page and new right page
1207
 *      i.e., the original/split page remains as left page.
1208
 *
1209
 * parameter:
1210
 *      int             tid,
1211
 *      struct inode    *ip,
1212
 *      struct xtsplit  *split,
1213
 *      struct metapage **rmpp,
1214
 *      u64             *rbnp,
1215
 *
1216
 * return:
1217
 *      Pointer to page in which to insert or NULL on error.
1218
 */
1219
static int
1220
xtSplitPage(tid_t tid, struct inode *ip,
1221
            struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
1222
{
1223
        int rc = 0;
1224
        struct metapage *smp;
1225
        xtpage_t *sp;
1226
        struct metapage *rmp;
1227
        xtpage_t *rp;           /* new right page allocated */
1228
        s64 rbn;                /* new right page block number */
1229
        struct metapage *mp;
1230
        xtpage_t *p;
1231
        s64 nextbn;
1232
        int skip, maxentry, middle, righthalf, n;
1233
        xad_t *xad;
1234
        struct pxdlist *pxdlist;
1235
        pxd_t *pxd;
1236
        struct tlock *tlck;
1237
        struct xtlock *sxtlck = NULL, *rxtlck = NULL;
1238
        int quota_allocation = 0;
1239
 
1240
        smp = split->mp;
1241
        sp = XT_PAGE(ip, smp);
1242
 
1243
        INCREMENT(xtStat.split);
1244
 
1245
        pxdlist = split->pxdlist;
1246
        pxd = &pxdlist->pxd[pxdlist->npxd];
1247
        pxdlist->npxd++;
1248
        rbn = addressPXD(pxd);
1249
 
1250
        /* Allocate blocks to quota. */
1251
        if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
1252
                rc = -EDQUOT;
1253
                goto clean_up;
1254
        }
1255
 
1256
        quota_allocation += lengthPXD(pxd);
1257
 
1258
        /*
1259
         * allocate the new right page for the split
1260
         */
1261
        rmp = get_metapage(ip, rbn, PSIZE, 1);
1262
        if (rmp == NULL) {
1263
                rc = -EIO;
1264
                goto clean_up;
1265
        }
1266
 
1267
        jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
1268
 
1269
        BT_MARK_DIRTY(rmp, ip);
1270
        /*
1271
         * action: new page;
1272
         */
1273
 
1274
        rp = (xtpage_t *) rmp->data;
1275
        rp->header.self = *pxd;
1276
        rp->header.flag = sp->header.flag & BT_TYPE;
1277
        rp->header.maxentry = sp->header.maxentry;      /* little-endian */
1278
        rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1279
 
1280
        BT_MARK_DIRTY(smp, ip);
1281
        /* Don't log it if there are no links to the file */
1282
        if (!test_cflag(COMMIT_Nolink, ip)) {
1283
                /*
1284
                 * acquire a transaction lock on the new right page;
1285
                 */
1286
                tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1287
                rxtlck = (struct xtlock *) & tlck->lock;
1288
                rxtlck->lwm.offset = XTENTRYSTART;
1289
                /*
1290
                 * acquire a transaction lock on the split page
1291
                 */
1292
                tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
1293
                sxtlck = (struct xtlock *) & tlck->lock;
1294
        }
1295
 
1296
        /*
1297
         * initialize/update sibling pointers of <sp> and <rp>
1298
         */
1299
        nextbn = le64_to_cpu(sp->header.next);
1300
        rp->header.next = cpu_to_le64(nextbn);
1301
        rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1302
        sp->header.next = cpu_to_le64(rbn);
1303
 
1304
        skip = split->index;
1305
 
1306
        /*
1307
         *      sequential append at tail (after last entry of last page)
1308
         *
1309
         * if splitting the last page on a level because of appending
1310
         * a entry to it (skip is maxentry), it's likely that the access is
1311
         * sequential. adding an empty page on the side of the level is less
1312
         * work and can push the fill factor much higher than normal.
1313
         * if we're wrong it's no big deal -  we will do the split the right
1314
         * way next time.
1315
         * (it may look like it's equally easy to do a similar hack for
1316
         * reverse sorted data, that is, split the tree left, but it's not.
1317
         * Be my guest.)
1318
         */
1319
        if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
1320
                /*
1321
                 * acquire a transaction lock on the new/right page;
1322
                 *
1323
                 * action: xad insertion;
1324
                 */
1325
                /* insert entry at the first entry of the new right page */
1326
                xad = &rp->xad[XTENTRYSTART];
1327
                XT_PUTENTRY(xad, split->flag, split->off, split->len,
1328
                            split->addr);
1329
 
1330
                rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1331
 
1332
                if (!test_cflag(COMMIT_Nolink, ip)) {
1333
                        /* rxtlck->lwm.offset = XTENTRYSTART; */
1334
                        rxtlck->lwm.length = 1;
1335
                }
1336
 
1337
                *rmpp = rmp;
1338
                *rbnp = rbn;
1339
 
1340
                jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1341
                return 0;
1342
        }
1343
 
1344
        /*
1345
         *      non-sequential insert (at possibly middle page)
1346
         */
1347
 
1348
        /*
1349
         * update previous pointer of old next/right page of <sp>
1350
         */
1351
        if (nextbn != 0) {
1352
                XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
1353
                if (rc) {
1354
                        XT_PUTPAGE(rmp);
1355
                        goto clean_up;
1356
                }
1357
 
1358
                BT_MARK_DIRTY(mp, ip);
1359
                /*
1360
                 * acquire a transaction lock on the next page;
1361
                 *
1362
                 * action:sibling pointer update;
1363
                 */
1364
                if (!test_cflag(COMMIT_Nolink, ip))
1365
                        tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
1366
 
1367
                p->header.prev = cpu_to_le64(rbn);
1368
 
1369
                /* sibling page may have been updated previously, or
1370
                 * it may be updated later;
1371
                 */
1372
 
1373
                XT_PUTPAGE(mp);
1374
        }
1375
 
1376
        /*
1377
         * split the data between the split and new/right pages
1378
         */
1379
        maxentry = le16_to_cpu(sp->header.maxentry);
1380
        middle = maxentry >> 1;
1381
        righthalf = maxentry - middle;
1382
 
1383
        /*
1384
         * skip index in old split/left page - insert into left page:
1385
         */
1386
        if (skip <= middle) {
1387
                /* move right half of split page to the new right page */
1388
                memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1389
                        righthalf << L2XTSLOTSIZE);
1390
 
1391
                /* shift right tail of left half to make room for new entry */
1392
                if (skip < middle)
1393
                        memmove(&sp->xad[skip + 1], &sp->xad[skip],
1394
                                (middle - skip) << L2XTSLOTSIZE);
1395
 
1396
                /* insert new entry */
1397
                xad = &sp->xad[skip];
1398
                XT_PUTENTRY(xad, split->flag, split->off, split->len,
1399
                            split->addr);
1400
 
1401
                /* update page header */
1402
                sp->header.nextindex = cpu_to_le16(middle + 1);
1403
                if (!test_cflag(COMMIT_Nolink, ip)) {
1404
                        sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1405
                            min(skip, (int)sxtlck->lwm.offset) : skip;
1406
                }
1407
 
1408
                rp->header.nextindex =
1409
                    cpu_to_le16(XTENTRYSTART + righthalf);
1410
        }
1411
        /*
1412
         * skip index in new right page - insert into right page:
1413
         */
1414
        else {
1415
                /* move left head of right half to right page */
1416
                n = skip - middle;
1417
                memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1418
                        n << L2XTSLOTSIZE);
1419
 
1420
                /* insert new entry */
1421
                n += XTENTRYSTART;
1422
                xad = &rp->xad[n];
1423
                XT_PUTENTRY(xad, split->flag, split->off, split->len,
1424
                            split->addr);
1425
 
1426
                /* move right tail of right half to right page */
1427
                if (skip < maxentry)
1428
                        memmove(&rp->xad[n + 1], &sp->xad[skip],
1429
                                (maxentry - skip) << L2XTSLOTSIZE);
1430
 
1431
                /* update page header */
1432
                sp->header.nextindex = cpu_to_le16(middle);
1433
                if (!test_cflag(COMMIT_Nolink, ip)) {
1434
                        sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1435
                            min(middle, (int)sxtlck->lwm.offset) : middle;
1436
                }
1437
 
1438
                rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
1439
                                                   righthalf + 1);
1440
        }
1441
 
1442
        if (!test_cflag(COMMIT_Nolink, ip)) {
1443
                sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
1444
                    sxtlck->lwm.offset;
1445
 
1446
                /* rxtlck->lwm.offset = XTENTRYSTART; */
1447
                rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1448
                    XTENTRYSTART;
1449
        }
1450
 
1451
        *rmpp = rmp;
1452
        *rbnp = rbn;
1453
 
1454
        jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1455
        return rc;
1456
 
1457
      clean_up:
1458
 
1459
        /* Rollback quota allocation. */
1460
        if (quota_allocation)
1461
                DQUOT_FREE_BLOCK(ip, quota_allocation);
1462
 
1463
        return (rc);
1464
}
1465
 
1466
 
1467
/*
1468
 *      xtSplitRoot()
1469
 *
1470
 * function:
1471
 *      split the full root page into original/root/split page and new
1472
 *      right page
1473
 *      i.e., root remains fixed in tree anchor (inode) and the root is
1474
 *      copied to a single new right child page since root page <<
1475
 *      non-root page, and the split root page contains a single entry
1476
 *      for the new right child page.
1477
 *
1478
 * parameter:
1479
 *      int             tid,
1480
 *      struct inode    *ip,
1481
 *      struct xtsplit  *split,
1482
 *      struct metapage **rmpp)
1483
 *
1484
 * return:
1485
 *      Pointer to page in which to insert or NULL on error.
1486
 */
1487
static int
1488
xtSplitRoot(tid_t tid,
1489
            struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
1490
{
1491
        xtpage_t *sp;
1492
        struct metapage *rmp;
1493
        xtpage_t *rp;
1494
        s64 rbn;
1495
        int skip, nextindex;
1496
        xad_t *xad;
1497
        pxd_t *pxd;
1498
        struct pxdlist *pxdlist;
1499
        struct tlock *tlck;
1500
        struct xtlock *xtlck;
1501
 
1502
        sp = &JFS_IP(ip)->i_xtroot;
1503
 
1504
        INCREMENT(xtStat.split);
1505
 
1506
        /*
1507
         *      allocate a single (right) child page
1508
         */
1509
        pxdlist = split->pxdlist;
1510
        pxd = &pxdlist->pxd[pxdlist->npxd];
1511
        pxdlist->npxd++;
1512
        rbn = addressPXD(pxd);
1513
        rmp = get_metapage(ip, rbn, PSIZE, 1);
1514
        if (rmp == NULL)
1515
                return -EIO;
1516
 
1517
        /* Allocate blocks to quota. */
1518
        if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
1519
                release_metapage(rmp);
1520
                return -EDQUOT;
1521
        }
1522
 
1523
        jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
1524
 
1525
        /*
1526
         * acquire a transaction lock on the new right page;
1527
         *
1528
         * action: new page;
1529
         */
1530
        BT_MARK_DIRTY(rmp, ip);
1531
 
1532
        rp = (xtpage_t *) rmp->data;
1533
        rp->header.flag =
1534
            (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1535
        rp->header.self = *pxd;
1536
        rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1537
        rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
1538
 
1539
        /* initialize sibling pointers */
1540
        rp->header.next = 0;
1541
        rp->header.prev = 0;
1542
 
1543
        /*
1544
         * copy the in-line root page into new right page extent
1545
         */
1546
        nextindex = le16_to_cpu(sp->header.maxentry);
1547
        memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
1548
                (nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
1549
 
1550
        /*
1551
         * insert the new entry into the new right/child page
1552
         * (skip index in the new right page will not change)
1553
         */
1554
        skip = split->index;
1555
        /* if insert into middle, shift right remaining entries */
1556
        if (skip != nextindex)
1557
                memmove(&rp->xad[skip + 1], &rp->xad[skip],
1558
                        (nextindex - skip) * sizeof(xad_t));
1559
 
1560
        xad = &rp->xad[skip];
1561
        XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
1562
 
1563
        /* update page header */
1564
        rp->header.nextindex = cpu_to_le16(nextindex + 1);
1565
 
1566
        if (!test_cflag(COMMIT_Nolink, ip)) {
1567
                tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1568
                xtlck = (struct xtlock *) & tlck->lock;
1569
                xtlck->lwm.offset = XTENTRYSTART;
1570
                xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1571
                    XTENTRYSTART;
1572
        }
1573
 
1574
        /*
1575
         *      reset the root
1576
         *
1577
         * init root with the single entry for the new right page
1578
         * set the 1st entry offset to 0, which force the left-most key
1579
         * at any level of the tree to be less than any search key.
1580
         */
1581
        /*
1582
         * acquire a transaction lock on the root page (in-memory inode);
1583
         *
1584
         * action: root split;
1585
         */
1586
        BT_MARK_DIRTY(split->mp, ip);
1587
 
1588
        xad = &sp->xad[XTENTRYSTART];
1589
        XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
1590
 
1591
        /* update page header of root */
1592
        sp->header.flag &= ~BT_LEAF;
1593
        sp->header.flag |= BT_INTERNAL;
1594
 
1595
        sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1596
 
1597
        if (!test_cflag(COMMIT_Nolink, ip)) {
1598
                tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
1599
                xtlck = (struct xtlock *) & tlck->lock;
1600
                xtlck->lwm.offset = XTENTRYSTART;
1601
                xtlck->lwm.length = 1;
1602
        }
1603
 
1604
        *rmpp = rmp;
1605
 
1606
        jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp);
1607
        return 0;
1608
}
1609
 
1610
 
1611
/*
1612
 *      xtExtend()
1613
 *
1614
 * function: extend in-place;
1615
 *
1616
 * note: existing extent may or may not have been committed.
1617
 * caller is responsible for pager buffer cache update, and
1618
 * working block allocation map update;
1619
 * update pmap: alloc whole extended extent;
1620
 */
1621
int xtExtend(tid_t tid,         /* transaction id */
1622
             struct inode *ip, s64 xoff,        /* delta extent offset */
1623
             s32 xlen,          /* delta extent length */
1624
             int flag)
1625
{
1626
        int rc = 0;
1627
        int cmp;
1628
        struct metapage *mp;    /* meta-page buffer */
1629
        xtpage_t *p;            /* base B+-tree index page */
1630
        s64 bn;
1631
        int index, nextindex, len;
1632
        struct btstack btstack; /* traverse stack */
1633
        struct xtsplit split;   /* split information */
1634
        xad_t *xad;
1635
        s64 xaddr;
1636
        struct tlock *tlck;
1637
        struct xtlock *xtlck = NULL;
1638
 
1639
        jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
1640
 
1641
        /* there must exist extent to be extended */
1642
        if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT)))
1643
                return rc;
1644
 
1645
        /* retrieve search result */
1646
        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1647
 
1648
        if (cmp != 0) {
1649
                XT_PUTPAGE(mp);
1650
                jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent");
1651
                return -EIO;
1652
        }
1653
 
1654
        /* extension must be contiguous */
1655
        xad = &p->xad[index];
1656
        if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) {
1657
                XT_PUTPAGE(mp);
1658
                jfs_error(ip->i_sb, "xtExtend: extension is not contiguous");
1659
                return -EIO;
1660
        }
1661
 
1662
        /*
1663
         * acquire a transaction lock on the leaf page;
1664
         *
1665
         * action: xad insertion/extension;
1666
         */
1667
        BT_MARK_DIRTY(mp, ip);
1668
        if (!test_cflag(COMMIT_Nolink, ip)) {
1669
                tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1670
                xtlck = (struct xtlock *) & tlck->lock;
1671
        }
1672
 
1673
        /* extend will overflow extent ? */
1674
        xlen = lengthXAD(xad) + xlen;
1675
        if ((len = xlen - MAXXLEN) <= 0)
1676
                goto extendOld;
1677
 
1678
        /*
1679
         *      extent overflow: insert entry for new extent
1680
         */
1681
//insertNew:
1682
        xoff = offsetXAD(xad) + MAXXLEN;
1683
        xaddr = addressXAD(xad) + MAXXLEN;
1684
        nextindex = le16_to_cpu(p->header.nextindex);
1685
 
1686
        /*
1687
         *      if the leaf page is full, insert the new entry and
1688
         *      propagate up the router entry for the new page from split
1689
         *
1690
         * The xtSplitUp() will insert the entry and unpin the leaf page.
1691
         */
1692
        if (nextindex == le16_to_cpu(p->header.maxentry)) {
1693
                /* xtSpliUp() unpins leaf pages */
1694
                split.mp = mp;
1695
                split.index = index + 1;
1696
                split.flag = XAD_NEW;
1697
                split.off = xoff;       /* split offset */
1698
                split.len = len;
1699
                split.addr = xaddr;
1700
                split.pxdlist = NULL;
1701
                if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1702
                        return rc;
1703
 
1704
                /* get back old page */
1705
                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1706
                if (rc)
1707
                        return rc;
1708
                /*
1709
                 * if leaf root has been split, original root has been
1710
                 * copied to new child page, i.e., original entry now
1711
                 * resides on the new child page;
1712
                 */
1713
                if (p->header.flag & BT_INTERNAL) {
1714
                        ASSERT(p->header.nextindex ==
1715
                               cpu_to_le16(XTENTRYSTART + 1));
1716
                        xad = &p->xad[XTENTRYSTART];
1717
                        bn = addressXAD(xad);
1718
                        XT_PUTPAGE(mp);
1719
 
1720
                        /* get new child page */
1721
                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1722
                        if (rc)
1723
                                return rc;
1724
 
1725
                        BT_MARK_DIRTY(mp, ip);
1726
                        if (!test_cflag(COMMIT_Nolink, ip)) {
1727
                                tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1728
                                xtlck = (struct xtlock *) & tlck->lock;
1729
                        }
1730
                }
1731
        }
1732
        /*
1733
         *      insert the new entry into the leaf page
1734
         */
1735
        else {
1736
                /* insert the new entry: mark the entry NEW */
1737
                xad = &p->xad[index + 1];
1738
                XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
1739
 
1740
                /* advance next available entry index */
1741
                p->header.nextindex =
1742
                    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1743
        }
1744
 
1745
        /* get back old entry */
1746
        xad = &p->xad[index];
1747
        xlen = MAXXLEN;
1748
 
1749
        /*
1750
         * extend old extent
1751
         */
1752
      extendOld:
1753
        XADlength(xad, xlen);
1754
        if (!(xad->flag & XAD_NEW))
1755
                xad->flag |= XAD_EXTENDED;
1756
 
1757
        if (!test_cflag(COMMIT_Nolink, ip)) {
1758
                xtlck->lwm.offset =
1759
                    (xtlck->lwm.offset) ? min(index,
1760
                                              (int)xtlck->lwm.offset) : index;
1761
                xtlck->lwm.length =
1762
                    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
1763
        }
1764
 
1765
        /* unpin the leaf page */
1766
        XT_PUTPAGE(mp);
1767
 
1768
        return rc;
1769
}
1770
 
1771
#ifdef _NOTYET
1772
/*
1773
 *      xtTailgate()
1774
 *
1775
 * function: split existing 'tail' extent
1776
 *      (split offset >= start offset of tail extent), and
1777
 *      relocate and extend the split tail half;
1778
 *
1779
 * note: existing extent may or may not have been committed.
1780
 * caller is responsible for pager buffer cache update, and
1781
 * working block allocation map update;
1782
 * update pmap: free old split tail extent, alloc new extent;
1783
 */
1784
int xtTailgate(tid_t tid,               /* transaction id */
1785
               struct inode *ip, s64 xoff,      /* split/new extent offset */
1786
               s32 xlen,        /* new extent length */
1787
               s64 xaddr,       /* new extent address */
1788
               int flag)
1789
{
1790
        int rc = 0;
1791
        int cmp;
1792
        struct metapage *mp;    /* meta-page buffer */
1793
        xtpage_t *p;            /* base B+-tree index page */
1794
        s64 bn;
1795
        int index, nextindex, llen, rlen;
1796
        struct btstack btstack; /* traverse stack */
1797
        struct xtsplit split;   /* split information */
1798
        xad_t *xad;
1799
        struct tlock *tlck;
1800
        struct xtlock *xtlck = 0;
1801
        struct tlock *mtlck;
1802
        struct maplock *pxdlock;
1803
 
1804
/*
1805
printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
1806
        (ulong)xoff, xlen, (ulong)xaddr);
1807
*/
1808
 
1809
        /* there must exist extent to be tailgated */
1810
        if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT)))
1811
                return rc;
1812
 
1813
        /* retrieve search result */
1814
        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1815
 
1816
        if (cmp != 0) {
1817
                XT_PUTPAGE(mp);
1818
                jfs_error(ip->i_sb, "xtTailgate: couldn't find extent");
1819
                return -EIO;
1820
        }
1821
 
1822
        /* entry found must be last entry */
1823
        nextindex = le16_to_cpu(p->header.nextindex);
1824
        if (index != nextindex - 1) {
1825
                XT_PUTPAGE(mp);
1826
                jfs_error(ip->i_sb,
1827
                          "xtTailgate: the entry found is not the last entry");
1828
                return -EIO;
1829
        }
1830
 
1831
        BT_MARK_DIRTY(mp, ip);
1832
        /*
1833
         * acquire tlock of the leaf page containing original entry
1834
         */
1835
        if (!test_cflag(COMMIT_Nolink, ip)) {
1836
                tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1837
                xtlck = (struct xtlock *) & tlck->lock;
1838
        }
1839
 
1840
        /* completely replace extent ? */
1841
        xad = &p->xad[index];
1842
/*
1843
printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
1844
        (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
1845
*/
1846
        if ((llen = xoff - offsetXAD(xad)) == 0)
1847
                goto updateOld;
1848
 
1849
        /*
1850
         *      partially replace extent: insert entry for new extent
1851
         */
1852
//insertNew:
1853
        /*
1854
         *      if the leaf page is full, insert the new entry and
1855
         *      propagate up the router entry for the new page from split
1856
         *
1857
         * The xtSplitUp() will insert the entry and unpin the leaf page.
1858
         */
1859
        if (nextindex == le16_to_cpu(p->header.maxentry)) {
1860
                /* xtSpliUp() unpins leaf pages */
1861
                split.mp = mp;
1862
                split.index = index + 1;
1863
                split.flag = XAD_NEW;
1864
                split.off = xoff;       /* split offset */
1865
                split.len = xlen;
1866
                split.addr = xaddr;
1867
                split.pxdlist = NULL;
1868
                if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1869
                        return rc;
1870
 
1871
                /* get back old page */
1872
                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1873
                if (rc)
1874
                        return rc;
1875
                /*
1876
                 * if leaf root has been split, original root has been
1877
                 * copied to new child page, i.e., original entry now
1878
                 * resides on the new child page;
1879
                 */
1880
                if (p->header.flag & BT_INTERNAL) {
1881
                        ASSERT(p->header.nextindex ==
1882
                               cpu_to_le16(XTENTRYSTART + 1));
1883
                        xad = &p->xad[XTENTRYSTART];
1884
                        bn = addressXAD(xad);
1885
                        XT_PUTPAGE(mp);
1886
 
1887
                        /* get new child page */
1888
                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1889
                        if (rc)
1890
                                return rc;
1891
 
1892
                        BT_MARK_DIRTY(mp, ip);
1893
                        if (!test_cflag(COMMIT_Nolink, ip)) {
1894
                                tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1895
                                xtlck = (struct xtlock *) & tlck->lock;
1896
                        }
1897
                }
1898
        }
1899
        /*
1900
         *      insert the new entry into the leaf page
1901
         */
1902
        else {
1903
                /* insert the new entry: mark the entry NEW */
1904
                xad = &p->xad[index + 1];
1905
                XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
1906
 
1907
                /* advance next available entry index */
1908
                p->header.nextindex =
1909
                    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1910
        }
1911
 
1912
        /* get back old XAD */
1913
        xad = &p->xad[index];
1914
 
1915
        /*
1916
         * truncate/relocate old extent at split offset
1917
         */
1918
      updateOld:
1919
        /* update dmap for old/committed/truncated extent */
1920
        rlen = lengthXAD(xad) - llen;
1921
        if (!(xad->flag & XAD_NEW)) {
1922
                /* free from PWMAP at commit */
1923
                if (!test_cflag(COMMIT_Nolink, ip)) {
1924
                        mtlck = txMaplock(tid, ip, tlckMAP);
1925
                        pxdlock = (struct maplock *) & mtlck->lock;
1926
                        pxdlock->flag = mlckFREEPXD;
1927
                        PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen);
1928
                        PXDlength(&pxdlock->pxd, rlen);
1929
                        pxdlock->index = 1;
1930
                }
1931
        } else
1932
                /* free from WMAP */
1933
                dbFree(ip, addressXAD(xad) + llen, (s64) rlen);
1934
 
1935
        if (llen)
1936
                /* truncate */
1937
                XADlength(xad, llen);
1938
        else
1939
                /* replace */
1940
                XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
1941
 
1942
        if (!test_cflag(COMMIT_Nolink, ip)) {
1943
                xtlck->lwm.offset = (xtlck->lwm.offset) ?
1944
                    min(index, (int)xtlck->lwm.offset) : index;
1945
                xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
1946
                    xtlck->lwm.offset;
1947
        }
1948
 
1949
        /* unpin the leaf page */
1950
        XT_PUTPAGE(mp);
1951
 
1952
        return rc;
1953
}
1954
#endif /* _NOTYET */
1955
 
1956
/*
1957
 *      xtUpdate()
1958
 *
1959
 * function: update XAD;
1960
 *
1961
 *      update extent for allocated_but_not_recorded or
1962
 *      compressed extent;
1963
 *
1964
 * parameter:
1965
 *      nxad    - new XAD;
1966
 *              logical extent of the specified XAD must be completely
1967
 *              contained by an existing XAD;
1968
 */
1969
int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
1970
{                               /* new XAD */
1971
        int rc = 0;
1972
        int cmp;
1973
        struct metapage *mp;    /* meta-page buffer */
1974
        xtpage_t *p;            /* base B+-tree index page */
1975
        s64 bn;
1976
        int index0, index, newindex, nextindex;
1977
        struct btstack btstack; /* traverse stack */
1978
        struct xtsplit split;   /* split information */
1979
        xad_t *xad, *lxad, *rxad;
1980
        int xflag;
1981
        s64 nxoff, xoff;
1982
        int nxlen, xlen, lxlen, rxlen;
1983
        s64 nxaddr, xaddr;
1984
        struct tlock *tlck;
1985
        struct xtlock *xtlck = NULL;
1986
        int newpage = 0;
1987
 
1988
        /* there must exist extent to be tailgated */
1989
        nxoff = offsetXAD(nxad);
1990
        nxlen = lengthXAD(nxad);
1991
        nxaddr = addressXAD(nxad);
1992
 
1993
        if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
1994
                return rc;
1995
 
1996
        /* retrieve search result */
1997
        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
1998
 
1999
        if (cmp != 0) {
2000
                XT_PUTPAGE(mp);
2001
                jfs_error(ip->i_sb, "xtUpdate: Could not find extent");
2002
                return -EIO;
2003
        }
2004
 
2005
        BT_MARK_DIRTY(mp, ip);
2006
        /*
2007
         * acquire tlock of the leaf page containing original entry
2008
         */
2009
        if (!test_cflag(COMMIT_Nolink, ip)) {
2010
                tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2011
                xtlck = (struct xtlock *) & tlck->lock;
2012
        }
2013
 
2014
        xad = &p->xad[index0];
2015
        xflag = xad->flag;
2016
        xoff = offsetXAD(xad);
2017
        xlen = lengthXAD(xad);
2018
        xaddr = addressXAD(xad);
2019
 
2020
        /* nXAD must be completely contained within XAD */
2021
        if ((xoff > nxoff) ||
2022
            (nxoff + nxlen > xoff + xlen)) {
2023
                XT_PUTPAGE(mp);
2024
                jfs_error(ip->i_sb,
2025
                          "xtUpdate: nXAD in not completely contained within XAD");
2026
                return -EIO;
2027
        }
2028
 
2029
        index = index0;
2030
        newindex = index + 1;
2031
        nextindex = le16_to_cpu(p->header.nextindex);
2032
 
2033
#ifdef  _JFS_WIP_NOCOALESCE
2034
        if (xoff < nxoff)
2035
                goto updateRight;
2036
 
2037
        /*
2038
         * replace XAD with nXAD
2039
         */
2040
      replace:                  /* (nxoff == xoff) */
2041
        if (nxlen == xlen) {
2042
                /* replace XAD with nXAD:recorded */
2043
                *xad = *nxad;
2044
                xad->flag = xflag & ~XAD_NOTRECORDED;
2045
 
2046
                goto out;
2047
        } else                  /* (nxlen < xlen) */
2048
                goto updateLeft;
2049
#endif                          /* _JFS_WIP_NOCOALESCE */
2050
 
2051
/* #ifdef _JFS_WIP_COALESCE */
2052
        if (xoff < nxoff)
2053
                goto coalesceRight;
2054
 
2055
        /*
2056
         * coalesce with left XAD
2057
         */
2058
//coalesceLeft: /* (xoff == nxoff) */
2059
        /* is XAD first entry of page ? */
2060
        if (index == XTENTRYSTART)
2061
                goto replace;
2062
 
2063
        /* is nXAD logically and physically contiguous with lXAD ? */
2064
        lxad = &p->xad[index - 1];
2065
        lxlen = lengthXAD(lxad);
2066
        if (!(lxad->flag & XAD_NOTRECORDED) &&
2067
            (nxoff == offsetXAD(lxad) + lxlen) &&
2068
            (nxaddr == addressXAD(lxad) + lxlen) &&
2069
            (lxlen + nxlen < MAXXLEN)) {
2070
                /* extend right lXAD */
2071
                index0 = index - 1;
2072
                XADlength(lxad, lxlen + nxlen);
2073
 
2074
                /* If we just merged two extents together, need to make sure the
2075
                 * right extent gets logged.  If the left one is marked XAD_NEW,
2076
                 * then we know it will be logged.  Otherwise, mark as
2077
                 * XAD_EXTENDED
2078
                 */
2079
                if (!(lxad->flag & XAD_NEW))
2080
                        lxad->flag |= XAD_EXTENDED;
2081
 
2082
                if (xlen > nxlen) {
2083
                        /* truncate XAD */
2084
                        XADoffset(xad, xoff + nxlen);
2085
                        XADlength(xad, xlen - nxlen);
2086
                        XADaddress(xad, xaddr + nxlen);
2087
                        goto out;
2088
                } else {        /* (xlen == nxlen) */
2089
 
2090
                        /* remove XAD */
2091
                        if (index < nextindex - 1)
2092
                                memmove(&p->xad[index], &p->xad[index + 1],
2093
                                        (nextindex - index -
2094
                                         1) << L2XTSLOTSIZE);
2095
 
2096
                        p->header.nextindex =
2097
                            cpu_to_le16(le16_to_cpu(p->header.nextindex) -
2098
                                        1);
2099
 
2100
                        index = index0;
2101
                        newindex = index + 1;
2102
                        nextindex = le16_to_cpu(p->header.nextindex);
2103
                        xoff = nxoff = offsetXAD(lxad);
2104
                        xlen = nxlen = lxlen + nxlen;
2105
                        xaddr = nxaddr = addressXAD(lxad);
2106
                        goto coalesceRight;
2107
                }
2108
        }
2109
 
2110
        /*
2111
         * replace XAD with nXAD
2112
         */
2113
      replace:                  /* (nxoff == xoff) */
2114
        if (nxlen == xlen) {
2115
                /* replace XAD with nXAD:recorded */
2116
                *xad = *nxad;
2117
                xad->flag = xflag & ~XAD_NOTRECORDED;
2118
 
2119
                goto coalesceRight;
2120
        } else                  /* (nxlen < xlen) */
2121
                goto updateLeft;
2122
 
2123
        /*
2124
         * coalesce with right XAD
2125
         */
2126
      coalesceRight:            /* (xoff <= nxoff) */
2127
        /* is XAD last entry of page ? */
2128
        if (newindex == nextindex) {
2129
                if (xoff == nxoff)
2130
                        goto out;
2131
                goto updateRight;
2132
        }
2133
 
2134
        /* is nXAD logically and physically contiguous with rXAD ? */
2135
        rxad = &p->xad[index + 1];
2136
        rxlen = lengthXAD(rxad);
2137
        if (!(rxad->flag & XAD_NOTRECORDED) &&
2138
            (nxoff + nxlen == offsetXAD(rxad)) &&
2139
            (nxaddr + nxlen == addressXAD(rxad)) &&
2140
            (rxlen + nxlen < MAXXLEN)) {
2141
                /* extend left rXAD */
2142
                XADoffset(rxad, nxoff);
2143
                XADlength(rxad, rxlen + nxlen);
2144
                XADaddress(rxad, nxaddr);
2145
 
2146
                /* If we just merged two extents together, need to make sure
2147
                 * the left extent gets logged.  If the right one is marked
2148
                 * XAD_NEW, then we know it will be logged.  Otherwise, mark as
2149
                 * XAD_EXTENDED
2150
                 */
2151
                if (!(rxad->flag & XAD_NEW))
2152
                        rxad->flag |= XAD_EXTENDED;
2153
 
2154
                if (xlen > nxlen)
2155
                        /* truncate XAD */
2156
                        XADlength(xad, xlen - nxlen);
2157
                else {          /* (xlen == nxlen) */
2158
 
2159
                        /* remove XAD */
2160
                        memmove(&p->xad[index], &p->xad[index + 1],
2161
                                (nextindex - index - 1) << L2XTSLOTSIZE);
2162
 
2163
                        p->header.nextindex =
2164
                            cpu_to_le16(le16_to_cpu(p->header.nextindex) -
2165
                                        1);
2166
                }
2167
 
2168
                goto out;
2169
        } else if (xoff == nxoff)
2170
                goto out;
2171
 
2172
        if (xoff >= nxoff) {
2173
                XT_PUTPAGE(mp);
2174
                jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff");
2175
                return -EIO;
2176
        }
2177
/* #endif _JFS_WIP_COALESCE */
2178
 
2179
        /*
2180
         * split XAD into (lXAD, nXAD):
2181
         *
2182
         *          |---nXAD--->
2183
         * --|----------XAD----------|--
2184
         *   |-lXAD-|
2185
         */
2186
      updateRight:              /* (xoff < nxoff) */
2187
        /* truncate old XAD as lXAD:not_recorded */
2188
        xad = &p->xad[index];
2189
        XADlength(xad, nxoff - xoff);
2190
 
2191
        /* insert nXAD:recorded */
2192
        if (nextindex == le16_to_cpu(p->header.maxentry)) {
2193
 
2194
                /* xtSpliUp() unpins leaf pages */
2195
                split.mp = mp;
2196
                split.index = newindex;
2197
                split.flag = xflag & ~XAD_NOTRECORDED;
2198
                split.off = nxoff;
2199
                split.len = nxlen;
2200
                split.addr = nxaddr;
2201
                split.pxdlist = NULL;
2202
                if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
2203
                        return rc;
2204
 
2205
                /* get back old page */
2206
                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2207
                if (rc)
2208
                        return rc;
2209
                /*
2210
                 * if leaf root has been split, original root has been
2211
                 * copied to new child page, i.e., original entry now
2212
                 * resides on the new child page;
2213
                 */
2214
                if (p->header.flag & BT_INTERNAL) {
2215
                        ASSERT(p->header.nextindex ==
2216
                               cpu_to_le16(XTENTRYSTART + 1));
2217
                        xad = &p->xad[XTENTRYSTART];
2218
                        bn = addressXAD(xad);
2219
                        XT_PUTPAGE(mp);
2220
 
2221
                        /* get new child page */
2222
                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2223
                        if (rc)
2224
                                return rc;
2225
 
2226
                        BT_MARK_DIRTY(mp, ip);
2227
                        if (!test_cflag(COMMIT_Nolink, ip)) {
2228
                                tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
2229
                                xtlck = (struct xtlock *) & tlck->lock;
2230
                        }
2231
                } else {
2232
                        /* is nXAD on new page ? */
2233
                        if (newindex >
2234
                            (le16_to_cpu(p->header.maxentry) >> 1)) {
2235
                                newindex =
2236
                                    newindex -
2237
                                    le16_to_cpu(p->header.nextindex) +
2238
                                    XTENTRYSTART;
2239
                                newpage = 1;
2240
                        }
2241
                }
2242
        } else {
2243
                /* if insert into middle, shift right remaining entries */
2244
                if (newindex < nextindex)
2245
                        memmove(&p->xad[newindex + 1], &p->xad[newindex],
2246
                                (nextindex - newindex) << L2XTSLOTSIZE);
2247
 
2248
                /* insert the entry */
2249
                xad = &p->xad[newindex];
2250
                *xad = *nxad;
2251
                xad->flag = xflag & ~XAD_NOTRECORDED;
2252
 
2253
                /* advance next available entry index. */
2254
                p->header.nextindex =
2255
                    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
2256
        }
2257
 
2258
        /*
2259
         * does nXAD force 3-way split ?
2260
         *
2261
         *          |---nXAD--->|
2262
         * --|----------XAD-------------|--
2263
         *   |-lXAD-|           |-rXAD -|
2264
         */
2265
        if (nxoff + nxlen == xoff + xlen)
2266
                goto out;
2267
 
2268
        /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
2269
        if (newpage) {
2270
                /* close out old page */
2271
                if (!test_cflag(COMMIT_Nolink, ip)) {
2272
                        xtlck->lwm.offset = (xtlck->lwm.offset) ?
2273
                            min(index0, (int)xtlck->lwm.offset) : index0;
2274
                        xtlck->lwm.length =
2275
                            le16_to_cpu(p->header.nextindex) -
2276
                            xtlck->lwm.offset;
2277
                }
2278
 
2279
                bn = le64_to_cpu(p->header.next);
2280
                XT_PUTPAGE(mp);
2281
 
2282
                /* get new right page */
2283
                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2284
                if (rc)
2285
                        return rc;
2286
 
2287
                BT_MARK_DIRTY(mp, ip);
2288
                if (!test_cflag(COMMIT_Nolink, ip)) {
2289
                        tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2290
                        xtlck = (struct xtlock *) & tlck->lock;
2291
                }
2292
 
2293
                index0 = index = newindex;
2294
        } else
2295
                index++;
2296
 
2297
        newindex = index + 1;
2298
        nextindex = le16_to_cpu(p->header.nextindex);
2299
        xlen = xlen - (nxoff - xoff);
2300
        xoff = nxoff;
2301
        xaddr = nxaddr;
2302
 
2303
        /* recompute split pages */
2304
        if (nextindex == le16_to_cpu(p->header.maxentry)) {
2305
                XT_PUTPAGE(mp);
2306
 
2307
                if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
2308
                        return rc;
2309
 
2310
                /* retrieve search result */
2311
                XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
2312
 
2313
                if (cmp != 0) {
2314
                        XT_PUTPAGE(mp);
2315
                        jfs_error(ip->i_sb, "xtUpdate: xtSearch failed");
2316
                        return -EIO;
2317
                }
2318
 
2319
                if (index0 != index) {
2320
                        XT_PUTPAGE(mp);
2321
                        jfs_error(ip->i_sb,
2322
                                  "xtUpdate: unexpected value of index");
2323
                        return -EIO;
2324
                }
2325
        }
2326
 
2327
        /*
2328
         * split XAD into (nXAD, rXAD)
2329
         *
2330
         *          ---nXAD---|
2331
         * --|----------XAD----------|--
2332
         *                    |-rXAD-|
2333
         */
2334
      updateLeft:               /* (nxoff == xoff) && (nxlen < xlen) */
2335
        /* update old XAD with nXAD:recorded */
2336
        xad = &p->xad[index];
2337
        *xad = *nxad;
2338
        xad->flag = xflag & ~XAD_NOTRECORDED;
2339
 
2340
        /* insert rXAD:not_recorded */
2341
        xoff = xoff + nxlen;
2342
        xlen = xlen - nxlen;
2343
        xaddr = xaddr + nxlen;
2344
        if (nextindex == le16_to_cpu(p->header.maxentry)) {
2345
/*
2346
printf("xtUpdate.updateLeft.split p:0x%p\n", p);
2347
*/
2348
                /* xtSpliUp() unpins leaf pages */
2349
                split.mp = mp;
2350
                split.index = newindex;
2351
                split.flag = xflag;
2352
                split.off = xoff;
2353
                split.len = xlen;
2354
                split.addr = xaddr;
2355
                split.pxdlist = NULL;
2356
                if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
2357
                        return rc;
2358
 
2359
                /* get back old page */
2360
                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2361
                if (rc)
2362
                        return rc;
2363
 
2364
                /*
2365
                 * if leaf root has been split, original root has been
2366
                 * copied to new child page, i.e., original entry now
2367
                 * resides on the new child page;
2368
                 */
2369
                if (p->header.flag & BT_INTERNAL) {
2370
                        ASSERT(p->header.nextindex ==
2371
                               cpu_to_le16(XTENTRYSTART + 1));
2372
                        xad = &p->xad[XTENTRYSTART];
2373
                        bn = addressXAD(xad);
2374
                        XT_PUTPAGE(mp);
2375
 
2376
                        /* get new child page */
2377
                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2378
                        if (rc)
2379
                                return rc;
2380
 
2381
                        BT_MARK_DIRTY(mp, ip);
2382
                        if (!test_cflag(COMMIT_Nolink, ip)) {
2383
                                tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
2384
                                xtlck = (struct xtlock *) & tlck->lock;
2385
                        }
2386
                }
2387
        } else {
2388
                /* if insert into middle, shift right remaining entries */
2389
                if (newindex < nextindex)
2390
                        memmove(&p->xad[newindex + 1], &p->xad[newindex],
2391
                                (nextindex - newindex) << L2XTSLOTSIZE);
2392
 
2393
                /* insert the entry */
2394
                xad = &p->xad[newindex];
2395
                XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2396
 
2397
                /* advance next available entry index. */
2398
                p->header.nextindex =
2399
                    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
2400
        }
2401
 
2402
      out:
2403
        if (!test_cflag(COMMIT_Nolink, ip)) {
2404
                xtlck->lwm.offset = (xtlck->lwm.offset) ?
2405
                    min(index0, (int)xtlck->lwm.offset) : index0;
2406
                xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2407
                    xtlck->lwm.offset;
2408
        }
2409
 
2410
        /* unpin the leaf page */
2411
        XT_PUTPAGE(mp);
2412
 
2413
        return rc;
2414
}
2415
 
2416
 
2417
/*
2418
 *      xtAppend()
2419
 *
2420
 * function: grow in append mode from contiguous region specified ;
2421
 *
2422
 * parameter:
2423
 *      tid             - transaction id;
2424
 *      ip              - file object;
2425
 *      xflag           - extent flag:
2426
 *      xoff            - extent offset;
2427
 *      maxblocks       - max extent length;
2428
 *      xlen            - extent length (in/out);
2429
 *      xaddrp          - extent address pointer (in/out):
2430
 *      flag            -
2431
 *
2432
 * return:
2433
 */
2434
int xtAppend(tid_t tid,         /* transaction id */
2435
             struct inode *ip, int xflag, s64 xoff, s32 maxblocks,
2436
             s32 * xlenp,       /* (in/out) */
2437
             s64 * xaddrp,      /* (in/out) */
2438
             int flag)
2439
{
2440
        int rc = 0;
2441
        struct metapage *mp;    /* meta-page buffer */
2442
        xtpage_t *p;            /* base B+-tree index page */
2443
        s64 bn, xaddr;
2444
        int index, nextindex;
2445
        struct btstack btstack; /* traverse stack */
2446
        struct xtsplit split;   /* split information */
2447
        xad_t *xad;
2448
        int cmp;
2449
        struct tlock *tlck;
2450
        struct xtlock *xtlck;
2451
        int nsplit, nblocks, xlen;
2452
        struct pxdlist pxdlist;
2453
        pxd_t *pxd;
2454
        s64 next;
2455
 
2456
        xaddr = *xaddrp;
2457
        xlen = *xlenp;
2458
        jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx",
2459
                 (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
2460
 
2461
        /*
2462
         *      search for the entry location at which to insert:
2463
         *
2464
         * xtFastSearch() and xtSearch() both returns (leaf page
2465
         * pinned, index at which to insert).
2466
         * n.b. xtSearch() may return index of maxentry of
2467
         * the full page.
2468
         */
2469
        if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
2470
                return rc;
2471
 
2472
        /* retrieve search result */
2473
        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2474
 
2475
        if (cmp == 0) {
2476
                rc = -EEXIST;
2477
                goto out;
2478
        }
2479
 
2480
        if (next)
2481
                xlen = min(xlen, (int)(next - xoff));
2482
//insert:
2483
        /*
2484
         *      insert entry for new extent
2485
         */
2486
        xflag |= XAD_NEW;
2487
 
2488
        /*
2489
         *      if the leaf page is full, split the page and
2490
         *      propagate up the router entry for the new page from split
2491
         *
2492
         * The xtSplitUp() will insert the entry and unpin the leaf page.
2493
         */
2494
        nextindex = le16_to_cpu(p->header.nextindex);
2495
        if (nextindex < le16_to_cpu(p->header.maxentry))
2496
                goto insertLeaf;
2497
 
2498
        /*
2499
         * allocate new index blocks to cover index page split(s)
2500
         */
2501
        nsplit = btstack.nsplit;
2502
        split.pxdlist = &pxdlist;
2503
        pxdlist.maxnpxd = pxdlist.npxd = 0;
2504
        pxd = &pxdlist.pxd[0];
2505
        nblocks = JFS_SBI(ip->i_sb)->nbperpage;
2506
        for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {
2507
                if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
2508
                        PXDaddress(pxd, xaddr);
2509
                        PXDlength(pxd, nblocks);
2510
 
2511
                        pxdlist.maxnpxd++;
2512
 
2513
                        continue;
2514
                }
2515
 
2516
                /* undo allocation */
2517
 
2518
                goto out;
2519
        }
2520
 
2521
        xlen = min(xlen, maxblocks);
2522
 
2523
        /*
2524
         * allocate data extent requested
2525
         */
2526
        if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2527
                goto out;
2528
 
2529
        split.mp = mp;
2530
        split.index = index;
2531
        split.flag = xflag;
2532
        split.off = xoff;
2533
        split.len = xlen;
2534
        split.addr = xaddr;
2535
        if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
2536
                /* undo data extent allocation */
2537
                dbFree(ip, *xaddrp, (s64) * xlenp);
2538
 
2539
                return rc;
2540
        }
2541
 
2542
        *xaddrp = xaddr;
2543
        *xlenp = xlen;
2544
        return 0;
2545
 
2546
        /*
2547
         *      insert the new entry into the leaf page
2548
         */
2549
      insertLeaf:
2550
        /*
2551
         * allocate data extent requested
2552
         */
2553
        if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2554
                goto out;
2555
 
2556
        BT_MARK_DIRTY(mp, ip);
2557
        /*
2558
         * acquire a transaction lock on the leaf page;
2559
         *
2560
         * action: xad insertion/extension;
2561
         */
2562
        tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2563
        xtlck = (struct xtlock *) & tlck->lock;
2564
 
2565
        /* insert the new entry: mark the entry NEW */
2566
        xad = &p->xad[index];
2567
        XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2568
 
2569
        /* advance next available entry index */
2570
        p->header.nextindex =
2571
            cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
2572
 
2573
        xtlck->lwm.offset =
2574
            (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
2575
        xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2576
            xtlck->lwm.offset;
2577
 
2578
        *xaddrp = xaddr;
2579
        *xlenp = xlen;
2580
 
2581
      out:
2582
        /* unpin the leaf page */
2583
        XT_PUTPAGE(mp);
2584
 
2585
        return rc;
2586
}
2587
#ifdef _STILL_TO_PORT
2588
 
2589
/* - TBD for defragmentaion/reorganization -
2590
 *
2591
 *      xtDelete()
2592
 *
2593
 * function:
2594
 *      delete the entry with the specified key.
2595
 *
2596
 *      N.B.: whole extent of the entry is assumed to be deleted.
2597
 *
2598
 * parameter:
2599
 *
2600
 * return:
2601
 *      ENOENT: if the entry is not found.
2602
 *
2603
 * exception:
2604
 */
2605
int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
2606
{
2607
        int rc = 0;
2608
        struct btstack btstack;
2609
        int cmp;
2610
        s64 bn;
2611
        struct metapage *mp;
2612
        xtpage_t *p;
2613
        int index, nextindex;
2614
        struct tlock *tlck;
2615
        struct xtlock *xtlck;
2616
 
2617
        /*
2618
         * find the matching entry; xtSearch() pins the page
2619
         */
2620
        if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
2621
                return rc;
2622
 
2623
        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2624
        if (cmp) {
2625
                /* unpin the leaf page */
2626
                XT_PUTPAGE(mp);
2627
                return -ENOENT;
2628
        }
2629
 
2630
        /*
2631
         * delete the entry from the leaf page
2632
         */
2633
        nextindex = le16_to_cpu(p->header.nextindex);
2634
        p->header.nextindex =
2635
            cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1);
2636
 
2637
        /*
2638
         * if the leaf page bocome empty, free the page
2639
         */
2640
        if (p->header.nextindex == cpu_to_le16(XTENTRYSTART))
2641
                return (xtDeleteUp(tid, ip, mp, p, &btstack));
2642
 
2643
        BT_MARK_DIRTY(mp, ip);
2644
        /*
2645
         * acquire a transaction lock on the leaf page;
2646
         *
2647
         * action:xad deletion;
2648
         */
2649
        tlck = txLock(tid, ip, mp, tlckXTREE);
2650
        xtlck = (struct xtlock *) & tlck->lock;
2651
        xtlck->lwm.offset =
2652
            (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index;
2653
 
2654
        /* if delete from middle, shift left/compact the remaining entries */
2655
        if (index < nextindex - 1)
2656
                memmove(&p->xad[index], &p->xad[index + 1],
2657
                        (nextindex - index - 1) * sizeof(xad_t));
2658
 
2659
        XT_PUTPAGE(mp);
2660
 
2661
        return 0;
2662
}
2663
 
2664
 
2665
/* - TBD for defragmentaion/reorganization -
2666
 *
2667
 *      xtDeleteUp()
2668
 *
2669
 * function:
2670
 *      free empty pages as propagating deletion up the tree
2671
 *
2672
 * parameter:
2673
 *
2674
 * return:
2675
 */
2676
static int
2677
xtDeleteUp(tid_t tid, struct inode *ip,
2678
           struct metapage * fmp, xtpage_t * fp, struct btstack * btstack)
2679
{
2680
        int rc = 0;
2681
        struct metapage *mp;
2682
        xtpage_t *p;
2683
        int index, nextindex;
2684
        s64 xaddr;
2685
        int xlen;
2686
        struct btframe *parent;
2687
        struct tlock *tlck;
2688
        struct xtlock *xtlck;
2689
 
2690
        /*
2691
         * keep root leaf page which has become empty
2692
         */
2693
        if (fp->header.flag & BT_ROOT) {
2694
                /* keep the root page */
2695
                fp->header.flag &= ~BT_INTERNAL;
2696
                fp->header.flag |= BT_LEAF;
2697
                fp->header.nextindex = cpu_to_le16(XTENTRYSTART);
2698
 
2699
                /* XT_PUTPAGE(fmp); */
2700
 
2701
                return 0;
2702
        }
2703
 
2704
        /*
2705
         * free non-root leaf page
2706
         */
2707
        if ((rc = xtRelink(tid, ip, fp))) {
2708
                XT_PUTPAGE(fmp);
2709
                return rc;
2710
        }
2711
 
2712
        xaddr = addressPXD(&fp->header.self);
2713
        xlen = lengthPXD(&fp->header.self);
2714
        /* free the page extent */
2715
        dbFree(ip, xaddr, (s64) xlen);
2716
 
2717
        /* free the buffer page */
2718
        discard_metapage(fmp);
2719
 
2720
        /*
2721
         * propagate page deletion up the index tree
2722
         *
2723
         * If the delete from the parent page makes it empty,
2724
         * continue all the way up the tree.
2725
         * stop if the root page is reached (which is never deleted) or
2726
         * if the entry deletion does not empty the page.
2727
         */
2728
        while ((parent = BT_POP(btstack)) != NULL) {
2729
                /* get/pin the parent page <sp> */
2730
                XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
2731
                if (rc)
2732
                        return rc;
2733
 
2734
                index = parent->index;
2735
 
2736
                /* delete the entry for the freed child page from parent.
2737
                 */
2738
                nextindex = le16_to_cpu(p->header.nextindex);
2739
 
2740
                /*
2741
                 * the parent has the single entry being deleted:
2742
                 * free the parent page which has become empty.
2743
                 */
2744
                if (nextindex == 1) {
2745
                        if (p->header.flag & BT_ROOT) {
2746
                                /* keep the root page */
2747
                                p->header.flag &= ~BT_INTERNAL;
2748
                                p->header.flag |= BT_LEAF;
2749
                                p->header.nextindex =
2750
                                    cpu_to_le16(XTENTRYSTART);
2751
 
2752
                                /* XT_PUTPAGE(mp); */
2753
 
2754
                                break;
2755
                        } else {
2756
                                /* free the parent page */
2757
                                if ((rc = xtRelink(tid, ip, p)))
2758
                                        return rc;
2759
 
2760
                                xaddr = addressPXD(&p->header.self);
2761
                                /* free the page extent */
2762
                                dbFree(ip, xaddr,
2763
                                       (s64) JFS_SBI(ip->i_sb)->nbperpage);
2764
 
2765
                                /* unpin/free the buffer page */
2766
                                discard_metapage(mp);
2767
 
2768
                                /* propagate up */
2769
                                continue;
2770
                        }
2771
                }
2772
                /*
2773
                 * the parent has other entries remaining:
2774
                 * delete the router entry from the parent page.
2775
                 */
2776
                else {
2777
                        BT_MARK_DIRTY(mp, ip);
2778
                        /*
2779
                         * acquire a transaction lock on the leaf page;
2780
                         *
2781
                         * action:xad deletion;
2782
                         */
2783
                        tlck = txLock(tid, ip, mp, tlckXTREE);
2784
                        xtlck = (struct xtlock *) & tlck->lock;
2785
                        xtlck->lwm.offset =
2786
                            (xtlck->lwm.offset) ? min(index,
2787
                                                      xtlck->lwm.
2788
                                                      offset) : index;
2789
 
2790
                        /* if delete from middle,
2791
                         * shift left/compact the remaining entries in the page
2792
                         */
2793
                        if (index < nextindex - 1)
2794
                                memmove(&p->xad[index], &p->xad[index + 1],
2795
                                        (nextindex - index -
2796
                                         1) << L2XTSLOTSIZE);
2797
 
2798
                        p->header.nextindex =
2799
                            cpu_to_le16(le16_to_cpu(p->header.nextindex) -
2800
                                        1);
2801
                        jfs_info("xtDeleteUp(entry): 0x%lx[%d]",
2802
                                 (ulong) parent->bn, index);
2803
                }
2804
 
2805
                /* unpin the parent page */
2806
                XT_PUTPAGE(mp);
2807
 
2808
                /* exit propagation up */
2809
                break;
2810
        }
2811
 
2812
        return 0;
2813
}
2814
 
2815
 
2816
/*
2817
 * NAME:        xtRelocate()
2818
 *
2819
 * FUNCTION:    relocate xtpage or data extent of regular file;
2820
 *              This function is mainly used by defragfs utility.
2821
 *
2822
 * NOTE:        This routine does not have the logic to handle
2823
 *              uncommitted allocated extent. The caller should call
2824
 *              txCommit() to commit all the allocation before call
2825
 *              this routine.
2826
 */
2827
int
2828
xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,  /* old XAD */
2829
           s64 nxaddr,          /* new xaddr */
2830
           int xtype)
2831
{                               /* extent type: XTPAGE or DATAEXT */
2832
        int rc = 0;
2833
        struct tblock *tblk;
2834
        struct tlock *tlck;
2835
        struct xtlock *xtlck;
2836
        struct metapage *mp, *pmp, *lmp, *rmp;  /* meta-page buffer */
2837
        xtpage_t *p, *pp, *rp, *lp;     /* base B+-tree index page */
2838
        xad_t *xad;
2839
        pxd_t *pxd;
2840
        s64 xoff, xsize;
2841
        int xlen;
2842
        s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn;
2843
        cbuf_t *cp;
2844
        s64 offset, nbytes, nbrd, pno;
2845
        int nb, npages, nblks;
2846
        s64 bn;
2847
        int cmp;
2848
        int index;
2849
        struct pxd_lock *pxdlock;
2850
        struct btstack btstack; /* traverse stack */
2851
 
2852
        xtype = xtype & EXTENT_TYPE;
2853
 
2854
        xoff = offsetXAD(oxad);
2855
        oxaddr = addressXAD(oxad);
2856
        xlen = lengthXAD(oxad);
2857
 
2858
        /* validate extent offset */
2859
        offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
2860
        if (offset >= ip->i_size)
2861
                return -ESTALE; /* stale extent */
2862
 
2863
        jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx",
2864
                 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr);
2865
 
2866
        /*
2867
         *      1. get and validate the parent xtpage/xad entry
2868
         *      covering the source extent to be relocated;
2869
         */
2870
        if (xtype == DATAEXT) {
2871
                /* search in leaf entry */
2872
                rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
2873
                if (rc)
2874
                        return rc;
2875
 
2876
                /* retrieve search result */
2877
                XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2878
 
2879
                if (cmp) {
2880
                        XT_PUTPAGE(pmp);
2881
                        return -ESTALE;
2882
                }
2883
 
2884
                /* validate for exact match with a single entry */
2885
                xad = &pp->xad[index];
2886
                if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) {
2887
                        XT_PUTPAGE(pmp);
2888
                        return -ESTALE;
2889
                }
2890
        } else {                /* (xtype == XTPAGE) */
2891
 
2892
                /* search in internal entry */
2893
                rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0);
2894
                if (rc)
2895
                        return rc;
2896
 
2897
                /* retrieve search result */
2898
                XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2899
 
2900
                if (cmp) {
2901
                        XT_PUTPAGE(pmp);
2902
                        return -ESTALE;
2903
                }
2904
 
2905
                /* xtSearchNode() validated for exact match with a single entry
2906
                 */
2907
                xad = &pp->xad[index];
2908
        }
2909
        jfs_info("xtRelocate: parent xad entry validated.");
2910
 
2911
        /*
2912
         *      2. relocate the extent
2913
         */
2914
        if (xtype == DATAEXT) {
2915
                /* if the extent is allocated-but-not-recorded
2916
                 * there is no real data to be moved in this extent,
2917
                 */
2918
                if (xad->flag & XAD_NOTRECORDED)
2919
                        goto out;
2920
                else
2921
                        /* release xtpage for cmRead()/xtLookup() */
2922
                        XT_PUTPAGE(pmp);
2923
 
2924
                /*
2925
                 *      cmRelocate()
2926
                 *
2927
                 * copy target data pages to be relocated;
2928
                 *
2929
                 * data extent must start at page boundary and
2930
                 * multiple of page size (except the last data extent);
2931
                 * read in each page of the source data extent into cbuf,
2932
                 * update the cbuf extent descriptor of the page to be
2933
                 * homeward bound to new dst data extent
2934
                 * copy the data from the old extent to new extent.
2935
                 * copy is essential for compressed files to avoid problems
2936
                 * that can arise if there was a change in compression
2937
                 * algorithms.
2938
                 * it is a good strategy because it may disrupt cache
2939
                 * policy to keep the pages in memory afterwards.
2940
                 */
2941
                offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
2942
                assert((offset & CM_OFFSET) == 0);
2943
                nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize;
2944
                pno = offset >> CM_L2BSIZE;
2945
                npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE;
2946
/*
2947
                npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
2948
                          (offset >> CM_L2BSIZE) + 1;
2949
*/
2950
                sxaddr = oxaddr;
2951
                dxaddr = nxaddr;
2952
 
2953
                /* process the request one cache buffer at a time */
2954
                for (nbrd = 0; nbrd < nbytes; nbrd += nb,
2955
                     offset += nb, pno++, npages--) {
2956
                        /* compute page size */
2957
                        nb = min(nbytes - nbrd, CM_BSIZE);
2958
 
2959
                        /* get the cache buffer of the page */
2960
                        if (rc = cmRead(ip, offset, npages, &cp))
2961
                                break;
2962
 
2963
                        assert(addressPXD(&cp->cm_pxd) == sxaddr);
2964
                        assert(!cp->cm_modified);
2965
 
2966
                        /* bind buffer with the new extent address */
2967
                        nblks = nb >> JFS_IP(ip->i_sb)->l2bsize;
2968
                        cmSetXD(ip, cp, pno, dxaddr, nblks);
2969
 
2970
                        /* release the cbuf, mark it as modified */
2971
                        cmPut(cp, true);
2972
 
2973
                        dxaddr += nblks;
2974
                        sxaddr += nblks;
2975
                }
2976
 
2977
                /* get back parent page */
2978
                if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
2979
                        return rc;
2980
 
2981
                XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2982
                jfs_info("xtRelocate: target data extent relocated.");
2983
        } else {                /* (xtype == XTPAGE) */
2984
 
2985
                /*
2986
                 * read in the target xtpage from the source extent;
2987
                 */
2988
                XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
2989
                if (rc) {
2990
                        XT_PUTPAGE(pmp);
2991
                        return rc;
2992
                }
2993
 
2994
                /*
2995
                 * read in sibling pages if any to update sibling pointers;
2996
                 */
2997
                rmp = NULL;
2998
                if (p->header.next) {
2999
                        nextbn = le64_to_cpu(p->header.next);
3000
                        XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
3001
                        if (rc) {
3002
                                XT_PUTPAGE(pmp);
3003
                                XT_PUTPAGE(mp);
3004
                                return (rc);
3005
                        }
3006
                }
3007
 
3008
                lmp = NULL;
3009
                if (p->header.prev) {
3010
                        prevbn = le64_to_cpu(p->header.prev);
3011
                        XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
3012
                        if (rc) {
3013
                                XT_PUTPAGE(pmp);
3014
                                XT_PUTPAGE(mp);
3015
                                if (rmp)
3016
                                        XT_PUTPAGE(rmp);
3017
                                return (rc);
3018
                        }
3019
                }
3020
 
3021
                /* at this point, all xtpages to be updated are in memory */
3022
 
3023
                /*
3024
                 * update sibling pointers of sibling xtpages if any;
3025
                 */
3026
                if (lmp) {
3027
                        BT_MARK_DIRTY(lmp, ip);
3028
                        tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
3029
                        lp->header.next = cpu_to_le64(nxaddr);
3030
                        XT_PUTPAGE(lmp);
3031
                }
3032
 
3033
                if (rmp) {
3034
                        BT_MARK_DIRTY(rmp, ip);
3035
                        tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
3036
                        rp->header.prev = cpu_to_le64(nxaddr);
3037
                        XT_PUTPAGE(rmp);
3038
                }
3039
 
3040
                /*
3041
                 * update the target xtpage to be relocated
3042
                 *
3043
                 * update the self address of the target page
3044
                 * and write to destination extent;
3045
                 * redo image covers the whole xtpage since it is new page
3046
                 * to the destination extent;
3047
                 * update of bmap for the free of source extent
3048
                 * of the target xtpage itself:
3049
                 * update of bmap for the allocation of destination extent
3050
                 * of the target xtpage itself:
3051
                 * update of bmap for the extents covered by xad entries in
3052
                 * the target xtpage is not necessary since they are not
3053
                 * updated;
3054
                 * if not committed before this relocation,
3055
                 * target page may contain XAD_NEW entries which must
3056
                 * be scanned for bmap update (logredo() always
3057
                 * scan xtpage REDOPAGE image for bmap update);
3058
                 * if committed before this relocation (tlckRELOCATE),
3059
                 * scan may be skipped by commit() and logredo();
3060
                 */
3061
                BT_MARK_DIRTY(mp, ip);
3062
                /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */
3063
                tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW);
3064
                xtlck = (struct xtlock *) & tlck->lock;
3065
 
3066
                /* update the self address in the xtpage header */
3067
                pxd = &p->header.self;
3068
                PXDaddress(pxd, nxaddr);
3069
 
3070
                /* linelock for the after image of the whole page */
3071
                xtlck->lwm.length =
3072
                    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
3073
 
3074
                /* update the buffer extent descriptor of target xtpage */
3075
                xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
3076
                bmSetXD(mp, nxaddr, xsize);
3077
 
3078
                /* unpin the target page to new homeward bound */
3079
                XT_PUTPAGE(mp);
3080
                jfs_info("xtRelocate: target xtpage relocated.");
3081
        }
3082
 
3083
        /*
3084
         *      3. acquire maplock for the source extent to be freed;
3085
         *
3086
         * acquire a maplock saving the src relocated extent address;
3087
         * to free of the extent at commit time;
3088
         */
3089
      out:
3090
        /* if DATAEXT relocation, write a LOG_UPDATEMAP record for
3091
         * free PXD of the source data extent (logredo() will update
3092
         * bmap for free of source data extent), and update bmap for
3093
         * free of the source data extent;
3094
         */
3095
        if (xtype == DATAEXT)
3096
                tlck = txMaplock(tid, ip, tlckMAP);
3097
        /* if XTPAGE relocation, write a LOG_NOREDOPAGE record
3098
         * for the source xtpage (logredo() will init NoRedoPage
3099
         * filter and will also update bmap for free of the source
3100
         * xtpage), and update bmap for free of the source xtpage;
3101
         * N.B. We use tlckMAP instead of tlkcXTREE because there
3102
         *      is no buffer associated with this lock since the buffer
3103
         *      has been redirected to the target location.
3104
         */
3105
        else                    /* (xtype == XTPAGE) */
3106
                tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE);
3107
 
3108
        pxdlock = (struct pxd_lock *) & tlck->lock;
3109
        pxdlock->flag = mlckFREEPXD;
3110
        PXDaddress(&pxdlock->pxd, oxaddr);
3111
        PXDlength(&pxdlock->pxd, xlen);
3112
        pxdlock->index = 1;
3113
 
3114
        /*
3115
         *      4. update the parent xad entry for relocation;
3116
         *
3117
         * acquire tlck for the parent entry with XAD_NEW as entry
3118
         * update which will write LOG_REDOPAGE and update bmap for
3119
         * allocation of XAD_NEW destination extent;
3120
         */
3121
        jfs_info("xtRelocate: update parent xad entry.");
3122
        BT_MARK_DIRTY(pmp, ip);
3123
        tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW);
3124
        xtlck = (struct xtlock *) & tlck->lock;
3125
 
3126
        /* update the XAD with the new destination extent; */
3127
        xad = &pp->xad[index];
3128
        xad->flag |= XAD_NEW;
3129
        XADaddress(xad, nxaddr);
3130
 
3131
        xtlck->lwm.offset = min(index, xtlck->lwm.offset);
3132
        xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) -
3133
            xtlck->lwm.offset;
3134
 
3135
        /* unpin the parent xtpage */
3136
        XT_PUTPAGE(pmp);
3137
 
3138
        return rc;
3139
}
3140
 
3141
 
3142
/*
3143
 *      xtSearchNode()
3144
 *
3145
 * function:    search for the internal xad entry covering specified extent.
3146
 *              This function is mainly used by defragfs utility.
3147
 *
3148
 * parameters:
3149
 *      ip      - file object;
3150
 *      xad     - extent to find;
3151
 *      cmpp    - comparison result:
3152
 *      btstack - traverse stack;
3153
 *      flag    - search process flag;
3154
 *
3155
 * returns:
3156
 *      btstack contains (bn, index) of search path traversed to the entry.
3157
 *      *cmpp is set to result of comparison with the entry returned.
3158
 *      the page containing the entry is pinned at exit.
3159
 */
3160
static int xtSearchNode(struct inode *ip, xad_t * xad,  /* required XAD entry */
3161
                        int *cmpp, struct btstack * btstack, int flag)
3162
{
3163
        int rc = 0;
3164
        s64 xoff, xaddr;
3165
        int xlen;
3166
        int cmp = 1;            /* init for empty page */
3167
        s64 bn;                 /* block number */
3168
        struct metapage *mp;    /* meta-page buffer */
3169
        xtpage_t *p;            /* page */
3170
        int base, index, lim;
3171
        struct btframe *btsp;
3172
        s64 t64;
3173
 
3174
        BT_CLR(btstack);
3175
 
3176
        xoff = offsetXAD(xad);
3177
        xlen = lengthXAD(xad);
3178
        xaddr = addressXAD(xad);
3179
 
3180
        /*
3181
         *      search down tree from root:
3182
         *
3183
         * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
3184
         * internal page, child page Pi contains entry with k, Ki <= K < Kj.
3185
         *
3186
         * if entry with search key K is not found
3187
         * internal page search find the entry with largest key Ki
3188
         * less than K which point to the child page to search;
3189
         * leaf page search find the entry with smallest key Kj
3190
         * greater than K so that the returned index is the position of
3191
         * the entry to be shifted right for insertion of new entry.
3192
         * for empty tree, search key is greater than any key of the tree.
3193
         *
3194
         * by convention, root bn = 0.
3195
         */
3196
        for (bn = 0;;) {
3197
                /* get/pin the page to search */
3198
                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3199
                if (rc)
3200
                        return rc;
3201
                if (p->header.flag & BT_LEAF) {
3202
                        XT_PUTPAGE(mp);
3203
                        return -ESTALE;
3204
                }
3205
 
3206
                lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
3207
 
3208
                /*
3209
                 * binary search with search key K on the current page
3210
                 */
3211
                for (base = XTENTRYSTART; lim; lim >>= 1) {
3212
                        index = base + (lim >> 1);
3213
 
3214
                        XT_CMP(cmp, xoff, &p->xad[index], t64);
3215
                        if (cmp == 0) {
3216
                                /*
3217
                                 *      search hit
3218
                                 *
3219
                                 * verify for exact match;
3220
                                 */
3221
                                if (xaddr == addressXAD(&p->xad[index]) &&
3222
                                    xoff == offsetXAD(&p->xad[index])) {
3223
                                        *cmpp = cmp;
3224
 
3225
                                        /* save search result */
3226
                                        btsp = btstack->top;
3227
                                        btsp->bn = bn;
3228
                                        btsp->index = index;
3229
                                        btsp->mp = mp;
3230
 
3231
                                        return 0;
3232
                                }
3233
 
3234
                                /* descend/search its child page */
3235
                                goto next;
3236
                        }
3237
 
3238
                        if (cmp > 0) {
3239
                                base = index + 1;
3240
                                --lim;
3241
                        }
3242
                }
3243
 
3244
                /*
3245
                 *      search miss - non-leaf page:
3246
                 *
3247
                 * base is the smallest index with key (Kj) greater than
3248
                 * search key (K) and may be zero or maxentry index.
3249
                 * if base is non-zero, decrement base by one to get the parent
3250
                 * entry of the child page to search.
3251
                 */
3252
                index = base ? base - 1 : base;
3253
 
3254
                /*
3255
                 * go down to child page
3256
                 */
3257
              next:
3258
                /* get the child page block number */
3259
                bn = addressXAD(&p->xad[index]);
3260
 
3261
                /* unpin the parent page */
3262
                XT_PUTPAGE(mp);
3263
        }
3264
}
3265
 
3266
 
3267
/*
3268
 *      xtRelink()
3269
 *
3270
 * function:
3271
 *      link around a freed page.
3272
 *
3273
 * Parameter:
3274
 *      int             tid,
3275
 *      struct inode    *ip,
3276
 *      xtpage_t        *p)
3277
 *
3278
 * returns:
3279
 */
3280
static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p)
3281
{
3282
        int rc = 0;
3283
        struct metapage *mp;
3284
        s64 nextbn, prevbn;
3285
        struct tlock *tlck;
3286
 
3287
        nextbn = le64_to_cpu(p->header.next);
3288
        prevbn = le64_to_cpu(p->header.prev);
3289
 
3290
        /* update prev pointer of the next page */
3291
        if (nextbn != 0) {
3292
                XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
3293
                if (rc)
3294
                        return rc;
3295
 
3296
                /*
3297
                 * acquire a transaction lock on the page;
3298
                 *
3299
                 * action: update prev pointer;
3300
                 */
3301
                BT_MARK_DIRTY(mp, ip);
3302
                tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
3303
 
3304
                /* the page may already have been tlock'd */
3305
 
3306
                p->header.prev = cpu_to_le64(prevbn);
3307
 
3308
                XT_PUTPAGE(mp);
3309
        }
3310
 
3311
        /* update next pointer of the previous page */
3312
        if (prevbn != 0) {
3313
                XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
3314
                if (rc)
3315
                        return rc;
3316
 
3317
                /*
3318
                 * acquire a transaction lock on the page;
3319
                 *
3320
                 * action: update next pointer;
3321
                 */
3322
                BT_MARK_DIRTY(mp, ip);
3323
                tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
3324
 
3325
                /* the page may already have been tlock'd */
3326
 
3327
                p->header.next = le64_to_cpu(nextbn);
3328
 
3329
                XT_PUTPAGE(mp);
3330
        }
3331
 
3332
        return 0;
3333
}
3334
#endif                          /*  _STILL_TO_PORT */
3335
 
3336
 
3337
/*
3338
 *      xtInitRoot()
3339
 *
3340
 * initialize file root (inline in inode)
3341
 */
3342
void xtInitRoot(tid_t tid, struct inode *ip)
3343
{
3344
        xtpage_t *p;
3345
 
3346
        /*
3347
         * acquire a transaction lock on the root
3348
         *
3349
         * action:
3350
         */
3351
        txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
3352
                      tlckXTREE | tlckNEW);
3353
        p = &JFS_IP(ip)->i_xtroot;
3354
 
3355
        p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
3356
        p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3357
 
3358
        if (S_ISDIR(ip->i_mode))
3359
                p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR);
3360
        else {
3361
                p->header.maxentry = cpu_to_le16(XTROOTINITSLOT);
3362
                ip->i_size = 0;
3363
        }
3364
 
3365
 
3366
        return;
3367
}
3368
 
3369
 
3370
/*
3371
 * We can run into a deadlock truncating a file with a large number of
3372
 * xtree pages (large fragmented file).  A robust fix would entail a
3373
 * reservation system where we would reserve a number of metadata pages
3374
 * and tlocks which we would be guaranteed without a deadlock.  Without
3375
 * this, a partial fix is to limit number of metadata pages we will lock
3376
 * in a single transaction.  Currently we will truncate the file so that
3377
 * no more than 50 leaf pages will be locked.  The caller of xtTruncate
3378
 * will be responsible for ensuring that the current transaction gets
3379
 * committed, and that subsequent transactions are created to truncate
3380
 * the file further if needed.
3381
 */
3382
#define MAX_TRUNCATE_LEAVES 50
3383
 
3384
/*
3385
 *      xtTruncate()
3386
 *
3387
 * function:
3388
 *      traverse for truncation logging backward bottom up;
3389
 *      terminate at the last extent entry at the current subtree
3390
 *      root page covering new down size.
3391
 *      truncation may occur within the last extent entry.
3392
 *
3393
 * parameter:
3394
 *      int             tid,
3395
 *      struct inode    *ip,
3396
 *      s64             newsize,
3397
 *      int             type)   {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
3398
 *
3399
 * return:
3400
 *
3401
 * note:
3402
 *      PWMAP:
3403
 *       1. truncate (non-COMMIT_NOLINK file)
3404
 *          by jfs_truncate() or jfs_open(O_TRUNC):
3405
 *          xtree is updated;
3406
 *       2. truncate index table of directory when last entry removed
3407
 *      map update via tlock at commit time;
3408
 *      PMAP:
3409
 *       Call xtTruncate_pmap instead
3410
 *      WMAP:
3411
 *       1. remove (free zero link count) on last reference release
3412
 *          (pmap has been freed at commit zero link count);
3413
 *       2. truncate (COMMIT_NOLINK file, i.e., tmp file):
3414
 *          xtree is updated;
3415
 *       map update directly at truncation time;
3416
 *
3417
 *      if (DELETE)
3418
 *              no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
3419
 *      else if (TRUNCATE)
3420
 *              must write LOG_NOREDOPAGE for deleted index page;
3421
 *
3422
 * pages may already have been tlocked by anonymous transactions
3423
 * during file growth (i.e., write) before truncation;
3424
 *
3425
 * except last truncated entry, deleted entries remains as is
3426
 * in the page (nextindex is updated) for other use
3427
 * (e.g., log/update allocation map): this avoid copying the page
3428
 * info but delay free of pages;
3429
 *
3430
 */
3431
s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3432
{
3433
        int rc = 0;
3434
        s64 teof;
3435
        struct metapage *mp;
3436
        xtpage_t *p;
3437
        s64 bn;
3438
        int index, nextindex;
3439
        xad_t *xad;
3440
        s64 xoff, xaddr;
3441
        int xlen, len, freexlen;
3442
        struct btstack btstack;
3443
        struct btframe *parent;
3444
        struct tblock *tblk = NULL;
3445
        struct tlock *tlck = NULL;
3446
        struct xtlock *xtlck = NULL;
3447
        struct xdlistlock xadlock;      /* maplock for COMMIT_WMAP */
3448
        struct pxd_lock *pxdlock;               /* maplock for COMMIT_WMAP */
3449
        s64 nfreed;
3450
        int freed, log;
3451
        int locked_leaves = 0;
3452
 
3453
        /* save object truncation type */
3454
        if (tid) {
3455
                tblk = tid_to_tblock(tid);
3456
                tblk->xflag |= flag;
3457
        }
3458
 
3459
        nfreed = 0;
3460
 
3461
        flag &= COMMIT_MAP;
3462
        assert(flag != COMMIT_PMAP);
3463
 
3464
        if (flag == COMMIT_PWMAP)
3465
                log = 1;
3466
        else {
3467
                log = 0;
3468
                xadlock.flag = mlckFREEXADLIST;
3469
                xadlock.index = 1;
3470
        }
3471
 
3472
        /*
3473
         * if the newsize is not an integral number of pages,
3474
         * the file between newsize and next page boundary will
3475
         * be cleared.
3476
         * if truncating into a file hole, it will cause
3477
         * a full block to be allocated for the logical block.
3478
         */
3479
 
3480
        /*
3481
         * release page blocks of truncated region <teof, eof>
3482
         *
3483
         * free the data blocks from the leaf index blocks.
3484
         * delete the parent index entries corresponding to
3485
         * the freed child data/index blocks.
3486
         * free the index blocks themselves which aren't needed
3487
         * in new sized file.
3488
         *
3489
         * index blocks are updated only if the blocks are to be
3490
         * retained in the new sized file.
3491
         * if type is PMAP, the data and index pages are NOT
3492
         * freed, and the data and index blocks are NOT freed
3493
         * from working map.
3494
         * (this will allow continued access of data/index of
3495
         * temporary file (zerolink count file truncated to zero-length)).
3496
         */
3497
        teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
3498
            JFS_SBI(ip->i_sb)->l2bsize;
3499
 
3500
        /* clear stack */
3501
        BT_CLR(&btstack);
3502
 
3503
        /*
3504
         * start with root
3505
         *
3506
         * root resides in the inode
3507
         */
3508
        bn = 0;
3509
 
3510
        /*
3511
         * first access of each page:
3512
         */
3513
      getPage:
3514
        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3515
        if (rc)
3516
                return rc;
3517
 
3518
        /* process entries backward from last index */
3519
        index = le16_to_cpu(p->header.nextindex) - 1;
3520
 
3521
 
3522
        /* Since this is the rightmost page at this level, and we may have
3523
         * already freed a page that was formerly to the right, let's make
3524
         * sure that the next pointer is zero.
3525
         */
3526
        if (p->header.next) {
3527
                if (log)
3528
                        /*
3529
                         * Make sure this change to the header is logged.
3530
                         * If we really truncate this leaf, the flag
3531
                         * will be changed to tlckTRUNCATE
3532
                         */
3533
                        tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
3534
                BT_MARK_DIRTY(mp, ip);
3535
                p->header.next = 0;
3536
        }
3537
 
3538
        if (p->header.flag & BT_INTERNAL)
3539
                goto getChild;
3540
 
3541
        /*
3542
         *      leaf page
3543
         */
3544
        freed = 0;
3545
 
3546
        /* does region covered by leaf page precede Teof ? */
3547
        xad = &p->xad[index];
3548
        xoff = offsetXAD(xad);
3549
        xlen = lengthXAD(xad);
3550
        if (teof >= xoff + xlen) {
3551
                XT_PUTPAGE(mp);
3552
                goto getParent;
3553
        }
3554
 
3555
        /* (re)acquire tlock of the leaf page */
3556
        if (log) {
3557
                if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
3558
                        /*
3559
                         * We need to limit the size of the transaction
3560
                         * to avoid exhausting pagecache & tlocks
3561
                         */
3562
                        XT_PUTPAGE(mp);
3563
                        newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
3564
                        goto getParent;
3565
                }
3566
                tlck = txLock(tid, ip, mp, tlckXTREE);
3567
                tlck->type = tlckXTREE | tlckTRUNCATE;
3568
                xtlck = (struct xtlock *) & tlck->lock;
3569
                xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
3570
        }
3571
        BT_MARK_DIRTY(mp, ip);
3572
 
3573
        /*
3574
         * scan backward leaf page entries
3575
         */
3576
        for (; index >= XTENTRYSTART; index--) {
3577
                xad = &p->xad[index];
3578
                xoff = offsetXAD(xad);
3579
                xlen = lengthXAD(xad);
3580
                xaddr = addressXAD(xad);
3581
 
3582
                /*
3583
                 * The "data" for a directory is indexed by the block
3584
                 * device's address space.  This metadata must be invalidated
3585
                 * here
3586
                 */
3587
                if (S_ISDIR(ip->i_mode) && (teof == 0))
3588
                        invalidate_xad_metapages(ip, *xad);
3589
                /*
3590
                 * entry beyond eof: continue scan of current page
3591
                 *          xad
3592
                 * ---|---=======------->
3593
                 *   eof
3594
                 */
3595
                if (teof < xoff) {
3596
                        nfreed += xlen;
3597
                        continue;
3598
                }
3599
 
3600
                /*
3601
                 * (xoff <= teof): last entry to be deleted from page;
3602
                 * If other entries remain in page: keep and update the page.
3603
                 */
3604
 
3605
                /*
3606
                 * eof == entry_start: delete the entry
3607
                 *           xad
3608
                 * -------|=======------->
3609
                 *       eof
3610
                 *
3611
                 */
3612
                if (teof == xoff) {
3613
                        nfreed += xlen;
3614
 
3615
                        if (index == XTENTRYSTART)
3616
                                break;
3617
 
3618
                        nextindex = index;
3619
                }
3620
                /*
3621
                 * eof within the entry: truncate the entry.
3622
                 *          xad
3623
                 * -------===|===------->
3624
                 *          eof
3625
                 */
3626
                else if (teof < xoff + xlen) {
3627
                        /* update truncated entry */
3628
                        len = teof - xoff;
3629
                        freexlen = xlen - len;
3630
                        XADlength(xad, len);
3631
 
3632
                        /* save pxd of truncated extent in tlck */
3633
                        xaddr += len;
3634
                        if (log) {      /* COMMIT_PWMAP */
3635
                                xtlck->lwm.offset = (xtlck->lwm.offset) ?
3636
                                    min(index, (int)xtlck->lwm.offset) : index;
3637
                                xtlck->lwm.length = index + 1 -
3638
                                    xtlck->lwm.offset;
3639
                                xtlck->twm.offset = index;
3640
                                pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
3641
                                pxdlock->flag = mlckFREEPXD;
3642
                                PXDaddress(&pxdlock->pxd, xaddr);
3643
                                PXDlength(&pxdlock->pxd, freexlen);
3644
                        }
3645
                        /* free truncated extent */
3646
                        else {  /* COMMIT_WMAP */
3647
 
3648
                                pxdlock = (struct pxd_lock *) & xadlock;
3649
                                pxdlock->flag = mlckFREEPXD;
3650
                                PXDaddress(&pxdlock->pxd, xaddr);
3651
                                PXDlength(&pxdlock->pxd, freexlen);
3652
                                txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP);
3653
 
3654
                                /* reset map lock */
3655
                                xadlock.flag = mlckFREEXADLIST;
3656
                        }
3657
 
3658
                        /* current entry is new last entry; */
3659
                        nextindex = index + 1;
3660
 
3661
                        nfreed += freexlen;
3662
                }
3663
                /*
3664
                 * eof beyond the entry:
3665
                 *          xad
3666
                 * -------=======---|--->
3667
                 *                 eof
3668
                 */
3669
                else {          /* (xoff + xlen < teof) */
3670
 
3671
                        nextindex = index + 1;
3672
                }
3673
 
3674
                if (nextindex < le16_to_cpu(p->header.nextindex)) {
3675
                        if (!log) {     /* COMMIT_WAMP */
3676
                                xadlock.xdlist = &p->xad[nextindex];
3677
                                xadlock.count =
3678
                                    le16_to_cpu(p->header.nextindex) -
3679
                                    nextindex;
3680
                                txFreeMap(ip, (struct maplock *) & xadlock,
3681
                                          NULL, COMMIT_WMAP);
3682
                        }
3683
                        p->header.nextindex = cpu_to_le16(nextindex);
3684
                }
3685
 
3686
                XT_PUTPAGE(mp);
3687
 
3688
                /* assert(freed == 0); */
3689
                goto getParent;
3690
        }                       /* end scan of leaf page entries */
3691
 
3692
        freed = 1;
3693
 
3694
        /*
3695
         * leaf page become empty: free the page if type != PMAP
3696
         */
3697
        if (log) {              /* COMMIT_PWMAP */
3698
                /* txCommit() with tlckFREE:
3699
                 * free data extents covered by leaf [XTENTRYSTART:hwm);
3700
                 * invalidate leaf if COMMIT_PWMAP;
3701
                 * if (TRUNCATE), will write LOG_NOREDOPAGE;
3702
                 */
3703
                tlck->type = tlckXTREE | tlckFREE;
3704
        } else {                /* COMMIT_WAMP */
3705
 
3706
                /* free data extents covered by leaf */
3707
                xadlock.xdlist = &p->xad[XTENTRYSTART];
3708
                xadlock.count =
3709
                    le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
3710
                txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP);
3711
        }
3712
 
3713
        if (p->header.flag & BT_ROOT) {
3714
                p->header.flag &= ~BT_INTERNAL;
3715
                p->header.flag |= BT_LEAF;
3716
                p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3717
 
3718
                XT_PUTPAGE(mp); /* debug */
3719
                goto out;
3720
        } else {
3721
                if (log) {      /* COMMIT_PWMAP */
3722
                        /* page will be invalidated at tx completion
3723
                         */
3724
                        XT_PUTPAGE(mp);
3725
                } else {        /* COMMIT_WMAP */
3726
 
3727
                        if (mp->lid)
3728
                                lid_to_tlock(mp->lid)->flag |= tlckFREELOCK;
3729
 
3730
                        /* invalidate empty leaf page */
3731
                        discard_metapage(mp);
3732
                }
3733
        }
3734
 
3735
        /*
3736
         * the leaf page become empty: delete the parent entry
3737
         * for the leaf page if the parent page is to be kept
3738
         * in the new sized file.
3739
         */
3740
 
3741
        /*
3742
         * go back up to the parent page
3743
         */
3744
      getParent:
3745
        /* pop/restore parent entry for the current child page */
3746
        if ((parent = BT_POP(&btstack)) == NULL)
3747
                /* current page must have been root */
3748
                goto out;
3749
 
3750
        /* get back the parent page */
3751
        bn = parent->bn;
3752
        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3753
        if (rc)
3754
                return rc;
3755
 
3756
        index = parent->index;
3757
 
3758
        /*
3759
         * child page was not empty:
3760
         */
3761
        if (freed == 0) {
3762
                /* has any entry deleted from parent ? */
3763
                if (index < le16_to_cpu(p->header.nextindex) - 1) {
3764
                        /* (re)acquire tlock on the parent page */
3765
                        if (log) {      /* COMMIT_PWMAP */
3766
                                /* txCommit() with tlckTRUNCATE:
3767
                                 * free child extents covered by parent [);
3768
                                 */
3769
                                tlck = txLock(tid, ip, mp, tlckXTREE);
3770
                                xtlck = (struct xtlock *) & tlck->lock;
3771
                                if (!(tlck->type & tlckTRUNCATE)) {
3772
                                        xtlck->hwm.offset =
3773
                                            le16_to_cpu(p->header.
3774
                                                        nextindex) - 1;
3775
                                        tlck->type =
3776
                                            tlckXTREE | tlckTRUNCATE;
3777
                                }
3778
                        } else {        /* COMMIT_WMAP */
3779
 
3780
                                /* free child extents covered by parent */
3781
                                xadlock.xdlist = &p->xad[index + 1];
3782
                                xadlock.count =
3783
                                    le16_to_cpu(p->header.nextindex) -
3784
                                    index - 1;
3785
                                txFreeMap(ip, (struct maplock *) & xadlock,
3786
                                          NULL, COMMIT_WMAP);
3787
                        }
3788
                        BT_MARK_DIRTY(mp, ip);
3789
 
3790
                        p->header.nextindex = cpu_to_le16(index + 1);
3791
                }
3792
                XT_PUTPAGE(mp);
3793
                goto getParent;
3794
        }
3795
 
3796
        /*
3797
         * child page was empty:
3798
         */
3799
        nfreed += lengthXAD(&p->xad[index]);
3800
 
3801
        /*
3802
         * During working map update, child page's tlock must be handled
3803
         * before parent's.  This is because the parent's tlock will cause
3804
         * the child's disk space to be marked available in the wmap, so
3805
         * it's important that the child page be released by that time.
3806
         *
3807
         * ToDo:  tlocks should be on doubly-linked list, so we can
3808
         * quickly remove it and add it to the end.
3809
         */
3810
 
3811
        /*
3812
         * Move parent page's tlock to the end of the tid's tlock list
3813
         */
3814
        if (log && mp->lid && (tblk->last != mp->lid) &&
3815
            lid_to_tlock(mp->lid)->tid) {
3816
                lid_t lid = mp->lid;
3817
                struct tlock *prev;
3818
 
3819
                tlck = lid_to_tlock(lid);
3820
 
3821
                if (tblk->next == lid)
3822
                        tblk->next = tlck->next;
3823
                else {
3824
                        for (prev = lid_to_tlock(tblk->next);
3825
                             prev->next != lid;
3826
                             prev = lid_to_tlock(prev->next)) {
3827
                                assert(prev->next);
3828
                        }
3829
                        prev->next = tlck->next;
3830
                }
3831
                lid_to_tlock(tblk->last)->next = lid;
3832
                tlck->next = 0;
3833
                tblk->last = lid;
3834
        }
3835
 
3836
        /*
3837
         * parent page become empty: free the page
3838
         */
3839
        if (index == XTENTRYSTART) {
3840
                if (log) {      /* COMMIT_PWMAP */
3841
                        /* txCommit() with tlckFREE:
3842
                         * free child extents covered by parent;
3843
                         * invalidate parent if COMMIT_PWMAP;
3844
                         */
3845
                        tlck = txLock(tid, ip, mp, tlckXTREE);
3846
                        xtlck = (struct xtlock *) & tlck->lock;
3847
                        xtlck->hwm.offset =
3848
                            le16_to_cpu(p->header.nextindex) - 1;
3849
                        tlck->type = tlckXTREE | tlckFREE;
3850
                } else {        /* COMMIT_WMAP */
3851
 
3852
                        /* free child extents covered by parent */
3853
                        xadlock.xdlist = &p->xad[XTENTRYSTART];
3854
                        xadlock.count =
3855
                            le16_to_cpu(p->header.nextindex) -
3856
                            XTENTRYSTART;
3857
                        txFreeMap(ip, (struct maplock *) & xadlock, NULL,
3858
                                  COMMIT_WMAP);
3859
                }
3860
                BT_MARK_DIRTY(mp, ip);
3861
 
3862
                if (p->header.flag & BT_ROOT) {
3863
                        p->header.flag &= ~BT_INTERNAL;
3864
                        p->header.flag |= BT_LEAF;
3865
                        p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3866
                        if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) {
3867
                                /*
3868
                                 * Shrink root down to allow inline
3869
                                 * EA (otherwise fsck complains)
3870
                                 */
3871
                                p->header.maxentry =
3872
                                    cpu_to_le16(XTROOTINITSLOT);
3873
                                JFS_IP(ip)->mode2 |= INLINEEA;
3874
                        }
3875
 
3876
                        XT_PUTPAGE(mp); /* debug */
3877
                        goto out;
3878
                } else {
3879
                        if (log) {      /* COMMIT_PWMAP */
3880
                                /* page will be invalidated at tx completion
3881
                                 */
3882
                                XT_PUTPAGE(mp);
3883
                        } else {        /* COMMIT_WMAP */
3884
 
3885
                                if (mp->lid)
3886
                                        lid_to_tlock(mp->lid)->flag |=
3887
                                                tlckFREELOCK;
3888
 
3889
                                /* invalidate parent page */
3890
                                discard_metapage(mp);
3891
                        }
3892
 
3893
                        /* parent has become empty and freed:
3894
                         * go back up to its parent page
3895
                         */
3896
                        /* freed = 1; */
3897
                        goto getParent;
3898
                }
3899
        }
3900
        /*
3901
         * parent page still has entries for front region;
3902
         */
3903
        else {
3904
                /* try truncate region covered by preceding entry
3905
                 * (process backward)
3906
                 */
3907
                index--;
3908
 
3909
                /* go back down to the child page corresponding
3910
                 * to the entry
3911
                 */
3912
                goto getChild;
3913
        }
3914
 
3915
        /*
3916
         *      internal page: go down to child page of current entry
3917
         */
3918
      getChild:
3919
        /* save current parent entry for the child page */
3920
        if (BT_STACK_FULL(&btstack)) {
3921
                jfs_error(ip->i_sb, "stack overrun in xtTruncate!");
3922
                XT_PUTPAGE(mp);
3923
                return -EIO;
3924
        }
3925
        BT_PUSH(&btstack, bn, index);
3926
 
3927
        /* get child page */
3928
        xad = &p->xad[index];
3929
        bn = addressXAD(xad);
3930
 
3931
        /*
3932
         * first access of each internal entry:
3933
         */
3934
        /* release parent page */
3935
        XT_PUTPAGE(mp);
3936
 
3937
        /* process the child page */
3938
        goto getPage;
3939
 
3940
      out:
3941
        /*
3942
         * update file resource stat
3943
         */
3944
        /* set size
3945
         */
3946
        if (S_ISDIR(ip->i_mode) && !newsize)
3947
                ip->i_size = 1; /* fsck hates zero-length directories */
3948
        else
3949
                ip->i_size = newsize;
3950
 
3951
        /* update quota allocation to reflect freed blocks */
3952
        DQUOT_FREE_BLOCK(ip, nfreed);
3953
 
3954
        /*
3955
         * free tlock of invalidated pages
3956
         */
3957
        if (flag == COMMIT_WMAP)
3958
                txFreelock(ip);
3959
 
3960
        return newsize;
3961
}
3962
 
3963
 
3964
/*
3965
 *      xtTruncate_pmap()
3966
 *
3967
 * function:
3968
 *      Perform truncate to zero lenghth for deleted file, leaving the
3969
 *      the xtree and working map untouched.  This allows the file to
3970
 *      be accessed via open file handles, while the delete of the file
3971
 *      is committed to disk.
3972
 *
3973
 * parameter:
3974
 *      tid_t           tid,
3975
 *      struct inode    *ip,
3976
 *      s64             committed_size)
3977
 *
3978
 * return: new committed size
3979
 *
3980
 * note:
3981
 *
3982
 *      To avoid deadlock by holding too many transaction locks, the
3983
 *      truncation may be broken up into multiple transactions.
3984
 *      The committed_size keeps track of part of the file has been
3985
 *      freed from the pmaps.
3986
 */
3987
s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
3988
{
3989
        s64 bn;
3990
        struct btstack btstack;
3991
        int cmp;
3992
        int index;
3993
        int locked_leaves = 0;
3994
        struct metapage *mp;
3995
        xtpage_t *p;
3996
        struct btframe *parent;
3997
        int rc;
3998
        struct tblock *tblk;
3999
        struct tlock *tlck = NULL;
4000
        xad_t *xad;
4001
        int xlen;
4002
        s64 xoff;
4003
        struct xtlock *xtlck = NULL;
4004
 
4005
        /* save object truncation type */
4006
        tblk = tid_to_tblock(tid);
4007
        tblk->xflag |= COMMIT_PMAP;
4008
 
4009
        /* clear stack */
4010
        BT_CLR(&btstack);
4011
 
4012
        if (committed_size) {
4013
                xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
4014
                rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
4015
                if (rc)
4016
                        return rc;
4017
 
4018
                XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
4019
 
4020
                if (cmp != 0) {
4021
                        XT_PUTPAGE(mp);
4022
                        jfs_error(ip->i_sb,
4023
                                  "xtTruncate_pmap: did not find extent");
4024
                        return -EIO;
4025
                }
4026
        } else {
4027
                /*
4028
                 * start with root
4029
                 *
4030
                 * root resides in the inode
4031
                 */
4032
                bn = 0;
4033
 
4034
                /*
4035
                 * first access of each page:
4036
                 */
4037
      getPage:
4038
                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4039
                if (rc)
4040
                        return rc;
4041
 
4042
                /* process entries backward from last index */
4043
                index = le16_to_cpu(p->header.nextindex) - 1;
4044
 
4045
                if (p->header.flag & BT_INTERNAL)
4046
                        goto getChild;
4047
        }
4048
 
4049
        /*
4050
         *      leaf page
4051
         */
4052
 
4053
        if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
4054
                /*
4055
                 * We need to limit the size of the transaction
4056
                 * to avoid exhausting pagecache & tlocks
4057
                 */
4058
                xad = &p->xad[index];
4059
                xoff = offsetXAD(xad);
4060
                xlen = lengthXAD(xad);
4061
                XT_PUTPAGE(mp);
4062
                return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
4063
        }
4064
        tlck = txLock(tid, ip, mp, tlckXTREE);
4065
        tlck->type = tlckXTREE | tlckFREE;
4066
        xtlck = (struct xtlock *) & tlck->lock;
4067
        xtlck->hwm.offset = index;
4068
 
4069
 
4070
        XT_PUTPAGE(mp);
4071
 
4072
        /*
4073
         * go back up to the parent page
4074
         */
4075
      getParent:
4076
        /* pop/restore parent entry for the current child page */
4077
        if ((parent = BT_POP(&btstack)) == NULL)
4078
                /* current page must have been root */
4079
                goto out;
4080
 
4081
        /* get back the parent page */
4082
        bn = parent->bn;
4083
        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4084
        if (rc)
4085
                return rc;
4086
 
4087
        index = parent->index;
4088
 
4089
        /*
4090
         * parent page become empty: free the page
4091
         */
4092
        if (index == XTENTRYSTART) {
4093
                /* txCommit() with tlckFREE:
4094
                 * free child extents covered by parent;
4095
                 * invalidate parent if COMMIT_PWMAP;
4096
                 */
4097
                tlck = txLock(tid, ip, mp, tlckXTREE);
4098
                xtlck = (struct xtlock *) & tlck->lock;
4099
                xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
4100
                tlck->type = tlckXTREE | tlckFREE;
4101
 
4102
                XT_PUTPAGE(mp);
4103
 
4104
                if (p->header.flag & BT_ROOT) {
4105
 
4106
                        goto out;
4107
                } else {
4108
                        goto getParent;
4109
                }
4110
        }
4111
        /*
4112
         * parent page still has entries for front region;
4113
         */
4114
        else
4115
                index--;
4116
        /*
4117
         *      internal page: go down to child page of current entry
4118
         */
4119
      getChild:
4120
        /* save current parent entry for the child page */
4121
        if (BT_STACK_FULL(&btstack)) {
4122
                jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!");
4123
                XT_PUTPAGE(mp);
4124
                return -EIO;
4125
        }
4126
        BT_PUSH(&btstack, bn, index);
4127
 
4128
        /* get child page */
4129
        xad = &p->xad[index];
4130
        bn = addressXAD(xad);
4131
 
4132
        /*
4133
         * first access of each internal entry:
4134
         */
4135
        /* release parent page */
4136
        XT_PUTPAGE(mp);
4137
 
4138
        /* process the child page */
4139
        goto getPage;
4140
 
4141
      out:
4142
 
4143
        return 0;
4144
}
4145
 
4146
#ifdef CONFIG_JFS_STATISTICS
4147
int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length,
4148
                    int *eof, void *data)
4149
{
4150
        int len = 0;
4151
        off_t begin;
4152
 
4153
        len += sprintf(buffer,
4154
                       "JFS Xtree statistics\n"
4155
                       "====================\n"
4156
                       "searches = %d\n"
4157
                       "fast searches = %d\n"
4158
                       "splits = %d\n",
4159
                       xtStat.search,
4160
                       xtStat.fastSearch,
4161
                       xtStat.split);
4162
 
4163
        begin = offset;
4164
        *start = buffer + begin;
4165
        len -= begin;
4166
 
4167
        if (len > length)
4168
                len = length;
4169
        else
4170
                *eof = 1;
4171
 
4172
        if (len < 0)
4173
                len = 0;
4174
 
4175
        return len;
4176
}
4177
#endif

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.