OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [fs/] [jfs/] [jfs_dtree.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 *   Copyright (C) International Business Machines Corp., 2000-2003
3
 *
4
 *   This program is free software;  you can redistribute it and/or modify
5
 *   it under the terms of the GNU General Public License as published by
6
 *   the Free Software Foundation; either version 2 of the License, or
7
 *   (at your option) any later version.
8
 *
9
 *   This program is distributed in the hope that it will be useful,
10
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
11
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
12
 *   the GNU General Public License for more details.
13
 *
14
 *   You should have received a copy of the GNU General Public License
15
 *   along with this program;  if not, write to the Free Software
16
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
 */
18
 
19
/*
20
 *      jfs_dtree.c: directory B+-tree manager
21
 *
22
 * B+-tree with variable length key directory:
23
 *
24
 * each directory page is structured as an array of 32-byte
25
 * directory entry slots initialized as a freelist
26
 * to avoid search/compaction of free space at insertion.
27
 * when an entry is inserted, a number of slots are allocated
28
 * from the freelist as required to store variable length data
29
 * of the entry; when the entry is deleted, slots of the entry
30
 * are returned to freelist.
31
 *
32
 * leaf entry stores full name as key and file serial number
33
 * (aka inode number) as data.
34
 * internal/router entry stores sufffix compressed name
35
 * as key and simple extent descriptor as data.
36
 *
37
 * each directory page maintains a sorted entry index table
38
 * which stores the start slot index of sorted entries
39
 * to allow binary search on the table.
40
 *
41
 * directory starts as a root/leaf page in on-disk inode
42
 * inline data area.
43
 * when it becomes full, it starts a leaf of a external extent
44
 * of length of 1 block. each time the first leaf becomes full,
45
 * it is extended rather than split (its size is doubled),
46
 * until its length becoms 4 KBytes, from then the extent is split
47
 * with new 4 Kbyte extent when it becomes full
48
 * to reduce external fragmentation of small directories.
49
 *
50
 * blah, blah, blah, for linear scan of directory in pieces by
51
 * readdir().
52
 *
53
 *
54
 *      case-insensitive directory file system
55
 *
56
 * names are stored in case-sensitive way in leaf entry.
57
 * but stored, searched and compared in case-insensitive (uppercase) order
58
 * (i.e., both search key and entry key are folded for search/compare):
59
 * (note that case-sensitive order is BROKEN in storage, e.g.,
60
 *  sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad
61
 *
62
 *  entries which folds to the same key makes up a equivalent class
63
 *  whose members are stored as contiguous cluster (may cross page boundary)
64
 *  but whose order is arbitrary and acts as duplicate, e.g.,
65
 *  abc, Abc, aBc, abC)
66
 *
67
 * once match is found at leaf, requires scan forward/backward
68
 * either for, in case-insensitive search, duplicate
69
 * or for, in case-sensitive search, for exact match
70
 *
71
 * router entry must be created/stored in case-insensitive way
72
 * in internal entry:
73
 * (right most key of left page and left most key of right page
74
 * are folded, and its suffix compression is propagated as router
75
 * key in parent)
76
 * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB>
77
 * should be made the router key for the split)
78
 *
79
 * case-insensitive search:
80
 *
81
 *      fold search key;
82
 *
83
 *      case-insensitive search of B-tree:
84
 *      for internal entry, router key is already folded;
85
 *      for leaf entry, fold the entry key before comparison.
86
 *
87
 *      if (leaf entry case-insensitive match found)
88
 *              if (next entry satisfies case-insensitive match)
89
 *                      return EDUPLICATE;
90
 *              if (prev entry satisfies case-insensitive match)
91
 *                      return EDUPLICATE;
92
 *              return match;
93
 *      else
94
 *              return no match;
95
 *
96
 *      serialization:
97
 * target directory inode lock is being held on entry/exit
98
 * of all main directory service routines.
99
 *
100
 *      log based recovery:
101
 */
102
 
103
#include <linux/fs.h>
104
#include "jfs_incore.h"
105
#include "jfs_superblock.h"
106
#include "jfs_filsys.h"
107
#include "jfs_metapage.h"
108
#include "jfs_dmap.h"
109
#include "jfs_unicode.h"
110
#include "jfs_debug.h"
111
 
112
/* dtree split parameter */
113
struct dtsplit {
114
        struct metapage *mp;
115
        s16 index;
116
        s16 nslot;
117
        struct component_name *key;
118
        ddata_t *data;
119
        struct pxdlist *pxdlist;
120
};
121
 
122
#define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot)
123
 
124
/* get page buffer for specified block address */
125
#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)\
126
{\
127
        BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot)\
128
        if (!(RC))\
129
        {\
130
                if (((P)->header.nextindex > (((BN)==0)?DTROOTMAXSLOT:(P)->header.maxslot)) ||\
131
                    ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT)))\
132
                {\
133
                        BT_PUTPAGE(MP);\
134
                        jfs_error((IP)->i_sb, "DT_GETPAGE: dtree page corrupt");\
135
                        MP = NULL;\
136
                        RC = -EIO;\
137
                }\
138
        }\
139
}
140
 
141
/* for consistency */
142
#define DT_PUTPAGE(MP) BT_PUTPAGE(MP)
143
 
144
#define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
145
        BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot)
146
 
147
/*
148
 * forward references
149
 */
150
static int dtSplitUp(tid_t tid, struct inode *ip,
151
                     struct dtsplit * split, struct btstack * btstack);
152
 
153
static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
154
                       struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp);
155
 
156
static int dtExtendPage(tid_t tid, struct inode *ip,
157
                        struct dtsplit * split, struct btstack * btstack);
158
 
159
static int dtSplitRoot(tid_t tid, struct inode *ip,
160
                       struct dtsplit * split, struct metapage ** rmpp);
161
 
162
static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
163
                      dtpage_t * fp, struct btstack * btstack);
164
 
165
static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p);
166
 
167
static int dtReadFirst(struct inode *ip, struct btstack * btstack);
168
 
169
static int dtReadNext(struct inode *ip,
170
                      loff_t * offset, struct btstack * btstack);
171
 
172
static int dtCompare(struct component_name * key, dtpage_t * p, int si);
173
 
174
static int ciCompare(struct component_name * key, dtpage_t * p, int si,
175
                     int flag);
176
 
177
static void dtGetKey(dtpage_t * p, int i, struct component_name * key,
178
                     int flag);
179
 
180
static void ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
181
                               int ri, struct component_name * key, int flag);
182
 
183
static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
184
                          ddata_t * data, struct dt_lock **);
185
 
186
static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
187
                        struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
188
                        int do_index);
189
 
190
static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock);
191
 
192
static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock);
193
 
194
static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock);
195
 
196
#define ciToUpper(c)    UniStrupr((c)->name)
197
 
198
/*
199
 *      read_index_page()
200
 *
201
 *      Reads a page of a directory's index table.
202
 *      Having metadata mapped into the directory inode's address space
203
 *      presents a multitude of problems.  We avoid this by mapping to
204
 *      the absolute address space outside of the *_metapage routines
205
 */
206
static struct metapage *read_index_page(struct inode *inode, s64 blkno)
207
{
208
        int rc;
209
        s64 xaddr;
210
        int xflag;
211
        s32 xlen;
212
 
213
        rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
214
        if (rc || (xlen == 0))
215
                return NULL;
216
 
217
        return read_metapage(inode, xaddr, PSIZE, 1);
218
}
219
 
220
/*
221
 *      get_index_page()
222
 *
223
 *      Same as get_index_page(), but get's a new page without reading
224
 */
225
static struct metapage *get_index_page(struct inode *inode, s64 blkno)
226
{
227
        int rc;
228
        s64 xaddr;
229
        int xflag;
230
        s32 xlen;
231
 
232
        rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
233
        if (rc || (xlen == 0))
234
                return NULL;
235
 
236
        return get_metapage(inode, xaddr, PSIZE, 1);
237
}
238
 
239
/*
240
 *      find_index()
241
 *
242
 *      Returns dtree page containing directory table entry for specified
243
 *      index and pointer to its entry.
244
 *
245
 *      mp must be released by caller.
246
 */
247
static struct dir_table_slot *find_index(struct inode *ip, u32 index,
248
                                         struct metapage ** mp, s64 *lblock)
249
{
250
        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
251
        s64 blkno;
252
        s64 offset;
253
        int page_offset;
254
        struct dir_table_slot *slot;
255
        static int maxWarnings = 10;
256
 
257
        if (index < 2) {
258
                if (maxWarnings) {
259
                        jfs_warn("find_entry called with index = %d", index);
260
                        maxWarnings--;
261
                }
262
                return 0;
263
        }
264
 
265
        if (index >= jfs_ip->next_index) {
266
                jfs_warn("find_entry called with index >= next_index");
267
                return 0;
268
        }
269
 
270
        if (jfs_ip->next_index <= (MAX_INLINE_DIRTABLE_ENTRY + 1)) {
271
                /*
272
                 * Inline directory table
273
                 */
274
                *mp = 0;
275
                slot = &jfs_ip->i_dirtable[index - 2];
276
        } else {
277
                offset = (index - 2) * sizeof(struct dir_table_slot);
278
                page_offset = offset & (PSIZE - 1);
279
                blkno = ((offset + 1) >> L2PSIZE) <<
280
                    JFS_SBI(ip->i_sb)->l2nbperpage;
281
 
282
                if (*mp && (*lblock != blkno)) {
283
                        release_metapage(*mp);
284
                        *mp = 0;
285
                }
286
                if (*mp == 0) {
287
                        *lblock = blkno;
288
                        *mp = read_index_page(ip, blkno);
289
                }
290
                if (*mp == 0) {
291
                        jfs_err("free_index: error reading directory table");
292
                        return 0;
293
                }
294
 
295
                slot =
296
                    (struct dir_table_slot *) ((char *) (*mp)->data +
297
                                               page_offset);
298
        }
299
        return slot;
300
}
301
 
302
static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp,
303
                              u32 index)
304
{
305
        struct tlock *tlck;
306
        struct linelock *llck;
307
        struct lv *lv;
308
 
309
        tlck = txLock(tid, ip, mp, tlckDATA);
310
        llck = (struct linelock *) tlck->lock;
311
 
312
        if (llck->index >= llck->maxcnt)
313
                llck = txLinelock(llck);
314
        lv = &llck->lv[llck->index];
315
 
316
        /*
317
         *      Linelock slot size is twice the size of directory table
318
         *      slot size.  512 entries per page.
319
         */
320
        lv->offset = ((index - 2) & 511) >> 1;
321
        lv->length = 1;
322
        llck->index++;
323
}
324
 
325
/*
326
 *      add_index()
327
 *
328
 *      Adds an entry to the directory index table.  This is used to provide
329
 *      each directory entry with a persistent index in which to resume
330
 *      directory traversals
331
 */
332
static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
333
{
334
        struct super_block *sb = ip->i_sb;
335
        struct jfs_sb_info *sbi = JFS_SBI(sb);
336
        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
337
        u64 blkno;
338
        struct dir_table_slot *dirtab_slot;
339
        u32 index;
340
        struct linelock *llck;
341
        struct lv *lv;
342
        struct metapage *mp;
343
        s64 offset;
344
        uint page_offset;
345
        int rc;
346
        struct tlock *tlck;
347
        s64 xaddr;
348
 
349
        ASSERT(DO_INDEX(ip));
350
 
351
        if (jfs_ip->next_index < 2) {
352
                jfs_warn("add_index: next_index = %d.  Resetting!",
353
                           jfs_ip->next_index);
354
                jfs_ip->next_index = 2;
355
        }
356
 
357
        index = jfs_ip->next_index++;
358
 
359
        if (index <= MAX_INLINE_DIRTABLE_ENTRY) {
360
                /*
361
                 * i_size reflects size of index table, or 8 bytes per entry.
362
                 */
363
                ip->i_size = (loff_t) (index - 1) << 3;
364
 
365
                /*
366
                 * dir table fits inline within inode
367
                 */
368
                dirtab_slot = &jfs_ip->i_dirtable[index-2];
369
                dirtab_slot->flag = DIR_INDEX_VALID;
370
                dirtab_slot->slot = slot;
371
                DTSaddress(dirtab_slot, bn);
372
 
373
                set_cflag(COMMIT_Dirtable, ip);
374
 
375
                return index;
376
        }
377
        if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) {
378
                /*
379
                 * It's time to move the inline table to an external
380
                 * page and begin to build the xtree
381
                 */
382
 
383
                /*
384
                 * Save the table, we're going to overwrite it with the
385
                 * xtree root
386
                 */
387
                struct dir_table_slot temp_table[12];
388
                memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table));
389
 
390
                /*
391
                 * Initialize empty x-tree
392
                 */
393
                xtInitRoot(tid, ip);
394
 
395
                /*
396
                 * Allocate the first block & add it to the xtree
397
                 */
398
                xaddr = 0;
399
                if ((rc =
400
                     xtInsert(tid, ip, 0, 0, sbi->nbperpage,
401
                              &xaddr, 0))) {
402
                        jfs_warn("add_index: xtInsert failed!");
403
                        return -EPERM;
404
                }
405
                ip->i_size = PSIZE;
406
                ip->i_blocks += LBLK2PBLK(sb, sbi->nbperpage);
407
 
408
                if ((mp = get_index_page(ip, 0)) == 0) {
409
                        jfs_err("add_index: get_metapage failed!");
410
                        xtTruncate(tid, ip, 0, COMMIT_PWMAP);
411
                        return -EPERM;
412
                }
413
                tlck = txLock(tid, ip, mp, tlckDATA);
414
                llck = (struct linelock *) & tlck->lock;
415
                ASSERT(llck->index == 0);
416
                lv = &llck->lv[0];
417
 
418
                lv->offset = 0;
419
                lv->length = 6; /* tlckDATA slot size is 16 bytes */
420
                llck->index++;
421
 
422
                memcpy(mp->data, temp_table, sizeof(temp_table));
423
 
424
                mark_metapage_dirty(mp);
425
                release_metapage(mp);
426
 
427
                /*
428
                 * Logging is now directed by xtree tlocks
429
                 */
430
                clear_cflag(COMMIT_Dirtable, ip);
431
        }
432
 
433
        offset = (index - 2) * sizeof(struct dir_table_slot);
434
        page_offset = offset & (PSIZE - 1);
435
        blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage;
436
        if (page_offset == 0) {
437
                /*
438
                 * This will be the beginning of a new page
439
                 */
440
                xaddr = 0;
441
                if ((rc =
442
                     xtInsert(tid, ip, 0, blkno, sbi->nbperpage,
443
                              &xaddr, 0))) {
444
                        jfs_warn("add_index: xtInsert failed!");
445
                        jfs_ip->next_index--;
446
                        return -EPERM;
447
                }
448
                ip->i_size += PSIZE;
449
                ip->i_blocks += LBLK2PBLK(sb, sbi->nbperpage);
450
 
451
                if ((mp = get_index_page(ip, blkno)))
452
                        memset(mp->data, 0, PSIZE);      /* Just looks better */
453
                else
454
                        xtTruncate(tid, ip, offset, COMMIT_PWMAP);
455
        } else
456
                mp = read_index_page(ip, blkno);
457
 
458
        if (mp == 0) {
459
                jfs_err("add_index: get/read_metapage failed!");
460
                return -EPERM;
461
        }
462
 
463
        lock_index(tid, ip, mp, index);
464
 
465
        dirtab_slot =
466
            (struct dir_table_slot *) ((char *) mp->data + page_offset);
467
        dirtab_slot->flag = DIR_INDEX_VALID;
468
        dirtab_slot->slot = slot;
469
        DTSaddress(dirtab_slot, bn);
470
 
471
        mark_metapage_dirty(mp);
472
        release_metapage(mp);
473
 
474
        return index;
475
}
476
 
477
/*
478
 *      free_index()
479
 *
480
 *      Marks an entry to the directory index table as free.
481
 */
482
static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next)
483
{
484
        struct dir_table_slot *dirtab_slot;
485
        s64 lblock;
486
        struct metapage *mp = 0;
487
 
488
        dirtab_slot = find_index(ip, index, &mp, &lblock);
489
 
490
        if (dirtab_slot == 0)
491
                return;
492
 
493
        dirtab_slot->flag = DIR_INDEX_FREE;
494
        dirtab_slot->slot = dirtab_slot->addr1 = 0;
495
        dirtab_slot->addr2 = cpu_to_le32(next);
496
 
497
        if (mp) {
498
                lock_index(tid, ip, mp, index);
499
                mark_metapage_dirty(mp);
500
                release_metapage(mp);
501
        } else
502
                set_cflag(COMMIT_Dirtable, ip);
503
}
504
 
505
/*
506
 *      modify_index()
507
 *
508
 *      Changes an entry in the directory index table
509
 */
510
static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn,
511
                         int slot, struct metapage ** mp, u64 *lblock)
512
{
513
        struct dir_table_slot *dirtab_slot;
514
 
515
        dirtab_slot = find_index(ip, index, mp, lblock);
516
 
517
        if (dirtab_slot == 0)
518
                return;
519
 
520
        DTSaddress(dirtab_slot, bn);
521
        dirtab_slot->slot = slot;
522
 
523
        if (*mp) {
524
                lock_index(tid, ip, *mp, index);
525
                mark_metapage_dirty(*mp);
526
        } else
527
                set_cflag(COMMIT_Dirtable, ip);
528
}
529
 
530
/*
531
 *      read_index()
532
 *
533
 *      reads a directory table slot
534
 */
535
static int read_index(struct inode *ip, u32 index,
536
                     struct dir_table_slot * dirtab_slot)
537
{
538
        s64 lblock;
539
        struct metapage *mp = 0;
540
        struct dir_table_slot *slot;
541
 
542
        slot = find_index(ip, index, &mp, &lblock);
543
        if (slot == 0) {
544
                return -EIO;
545
        }
546
 
547
        memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot));
548
 
549
        if (mp)
550
                release_metapage(mp);
551
 
552
        return 0;
553
}
554
 
555
/*
556
 *      dtSearch()
557
 *
558
 * function:
559
 *      Search for the entry with specified key
560
 *
561
 * parameter:
562
 *
563
 * return: 0 - search result on stack, leaf page pinned;
564
 *         errno - I/O error
565
 */
566
int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
567
             struct btstack * btstack, int flag)
568
{
569
        int rc = 0;
570
        int cmp = 1;            /* init for empty page */
571
        s64 bn;
572
        struct metapage *mp;
573
        dtpage_t *p;
574
        s8 *stbl;
575
        int base, index, lim;
576
        struct btframe *btsp;
577
        pxd_t *pxd;
578
        int psize = 288;        /* initial in-line directory */
579
        ino_t inumber;
580
        struct component_name ciKey;
581
        struct super_block *sb = ip->i_sb;
582
 
583
        ciKey.name =
584
            (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
585
                                GFP_NOFS);
586
        if (ciKey.name == 0) {
587
                rc = -ENOMEM;
588
                goto dtSearch_Exit2;
589
        }
590
 
591
 
592
        /* uppercase search key for c-i directory */
593
        UniStrcpy(ciKey.name, key->name);
594
        ciKey.namlen = key->namlen;
595
 
596
        /* only uppercase if case-insensitive support is on */
597
        if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) {
598
                ciToUpper(&ciKey);
599
        }
600
        BT_CLR(btstack);        /* reset stack */
601
 
602
        /* init level count for max pages to split */
603
        btstack->nsplit = 1;
604
 
605
        /*
606
         *      search down tree from root:
607
         *
608
         * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
609
         * internal page, child page Pi contains entry with k, Ki <= K < Kj.
610
         *
611
         * if entry with search key K is not found
612
         * internal page search find the entry with largest key Ki
613
         * less than K which point to the child page to search;
614
         * leaf page search find the entry with smallest key Kj
615
         * greater than K so that the returned index is the position of
616
         * the entry to be shifted right for insertion of new entry.
617
         * for empty tree, search key is greater than any key of the tree.
618
         *
619
         * by convention, root bn = 0.
620
         */
621
        for (bn = 0;;) {
622
                /* get/pin the page to search */
623
                DT_GETPAGE(ip, bn, mp, psize, p, rc);
624
                if (rc)
625
                        goto dtSearch_Exit1;
626
 
627
                /* get sorted entry table of the page */
628
                stbl = DT_GETSTBL(p);
629
 
630
                /*
631
                 * binary search with search key K on the current page.
632
                 */
633
                for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) {
634
                        index = base + (lim >> 1);
635
 
636
                        if (p->header.flag & BT_LEAF) {
637
                                /* uppercase leaf name to compare */
638
                                cmp =
639
                                    ciCompare(&ciKey, p, stbl[index],
640
                                              JFS_SBI(sb)->mntflag);
641
                        } else {
642
                                /* router key is in uppercase */
643
 
644
                                cmp = dtCompare(&ciKey, p, stbl[index]);
645
 
646
 
647
                        }
648
                        if (cmp == 0) {
649
                                /*
650
                                 *      search hit
651
                                 */
652
                                /* search hit - leaf page:
653
                                 * return the entry found
654
                                 */
655
                                if (p->header.flag & BT_LEAF) {
656
                                        inumber = le32_to_cpu(
657
                        ((struct ldtentry *) & p->slot[stbl[index]])->inumber);
658
 
659
                                        /*
660
                                         * search for JFS_LOOKUP
661
                                         */
662
                                        if (flag == JFS_LOOKUP) {
663
                                                *data = inumber;
664
                                                rc = 0;
665
                                                goto out;
666
                                        }
667
 
668
                                        /*
669
                                         * search for JFS_CREATE
670
                                         */
671
                                        if (flag == JFS_CREATE) {
672
                                                *data = inumber;
673
                                                rc = -EEXIST;
674
                                                goto out;
675
                                        }
676
 
677
                                        /*
678
                                         * search for JFS_REMOVE or JFS_RENAME
679
                                         */
680
                                        if ((flag == JFS_REMOVE ||
681
                                             flag == JFS_RENAME) &&
682
                                            *data != inumber) {
683
                                                rc = -ESTALE;
684
                                                goto out;
685
                                        }
686
 
687
                                        /*
688
                                         * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME
689
                                         */
690
                                        /* save search result */
691
                                        *data = inumber;
692
                                        btsp = btstack->top;
693
                                        btsp->bn = bn;
694
                                        btsp->index = index;
695
                                        btsp->mp = mp;
696
 
697
                                        rc = 0;
698
                                        goto dtSearch_Exit1;
699
                                }
700
 
701
                                /* search hit - internal page:
702
                                 * descend/search its child page
703
                                 */
704
                                goto getChild;
705
                        }
706
 
707
                        if (cmp > 0) {
708
                                base = index + 1;
709
                                --lim;
710
                        }
711
                }
712
 
713
                /*
714
                 *      search miss
715
                 *
716
                 * base is the smallest index with key (Kj) greater than
717
                 * search key (K) and may be zero or (maxindex + 1) index.
718
                 */
719
                /*
720
                 * search miss - leaf page
721
                 *
722
                 * return location of entry (base) where new entry with
723
                 * search key K is to be inserted.
724
                 */
725
                if (p->header.flag & BT_LEAF) {
726
                        /*
727
                         * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME
728
                         */
729
                        if (flag == JFS_LOOKUP || flag == JFS_REMOVE ||
730
                            flag == JFS_RENAME) {
731
                                rc = -ENOENT;
732
                                goto out;
733
                        }
734
 
735
                        /*
736
                         * search for JFS_CREATE|JFS_FINDDIR:
737
                         *
738
                         * save search result
739
                         */
740
                        *data = 0;
741
                        btsp = btstack->top;
742
                        btsp->bn = bn;
743
                        btsp->index = base;
744
                        btsp->mp = mp;
745
 
746
                        rc = 0;
747
                        goto dtSearch_Exit1;
748
                }
749
 
750
                /*
751
                 * search miss - internal page
752
                 *
753
                 * if base is non-zero, decrement base by one to get the parent
754
                 * entry of the child page to search.
755
                 */
756
                index = base ? base - 1 : base;
757
 
758
                /*
759
                 * go down to child page
760
                 */
761
              getChild:
762
                /* update max. number of pages to split */
763
                if (btstack->nsplit >= 8) {
764
                        /* Something's corrupted, mark filesytem dirty so
765
                         * chkdsk will fix it.
766
                         */
767
                        jfs_error(sb, "stack overrun in dtSearch!");
768
                        rc = -EIO;
769
                        goto out;
770
                }
771
                btstack->nsplit++;
772
 
773
                /* push (bn, index) of the parent page/entry */
774
                BT_PUSH(btstack, bn, index);
775
 
776
                /* get the child page block number */
777
                pxd = (pxd_t *) & p->slot[stbl[index]];
778
                bn = addressPXD(pxd);
779
                psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
780
 
781
                /* unpin the parent page */
782
                DT_PUTPAGE(mp);
783
        }
784
 
785
      out:
786
        DT_PUTPAGE(mp);
787
 
788
      dtSearch_Exit1:
789
 
790
        kfree(ciKey.name);
791
 
792
      dtSearch_Exit2:
793
 
794
        return rc;
795
}
796
 
797
 
798
/*
799
 *      dtInsert()
800
 *
801
 * function: insert an entry to directory tree
802
 *
803
 * parameter:
804
 *
805
 * return: 0 - success;
806
 *         errno - failure;
807
 */
808
int dtInsert(tid_t tid, struct inode *ip,
809
         struct component_name * name, ino_t * fsn, struct btstack * btstack)
810
{
811
        int rc = 0;
812
        struct metapage *mp;    /* meta-page buffer */
813
        dtpage_t *p;            /* base B+-tree index page */
814
        s64 bn;
815
        int index;
816
        struct dtsplit split;   /* split information */
817
        ddata_t data;
818
        struct dt_lock *dtlck;
819
        int n;
820
        struct tlock *tlck;
821
        struct lv *lv;
822
 
823
        /*
824
         *      retrieve search result
825
         *
826
         * dtSearch() returns (leaf page pinned, index at which to insert).
827
         * n.b. dtSearch() may return index of (maxindex + 1) of
828
         * the full page.
829
         */
830
        DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
831
 
832
        /*
833
         *      insert entry for new key
834
         */
835
        if (DO_INDEX(ip)) {
836
                if (JFS_IP(ip)->next_index == DIREND) {
837
                        DT_PUTPAGE(mp);
838
                        return -EMLINK;
839
                }
840
                n = NDTLEAF(name->namlen);
841
                data.leaf.tid = tid;
842
                data.leaf.ip = ip;
843
        } else {
844
                n = NDTLEAF_LEGACY(name->namlen);
845
                data.leaf.ip = 0;        /* signifies legacy directory format */
846
        }
847
        data.leaf.ino = cpu_to_le32(*fsn);
848
 
849
        /*
850
         *      leaf page does not have enough room for new entry:
851
         *
852
         *      extend/split the leaf page;
853
         *
854
         * dtSplitUp() will insert the entry and unpin the leaf page.
855
         */
856
        if (n > p->header.freecnt) {
857
                split.mp = mp;
858
                split.index = index;
859
                split.nslot = n;
860
                split.key = name;
861
                split.data = &data;
862
                rc = dtSplitUp(tid, ip, &split, btstack);
863
                return rc;
864
        }
865
 
866
        /*
867
         *      leaf page does have enough room for new entry:
868
         *
869
         *      insert the new data entry into the leaf page;
870
         */
871
        BT_MARK_DIRTY(mp, ip);
872
        /*
873
         * acquire a transaction lock on the leaf page
874
         */
875
        tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
876
        dtlck = (struct dt_lock *) & tlck->lock;
877
        ASSERT(dtlck->index == 0);
878
        lv = & dtlck->lv[0];
879
 
880
        /* linelock header */
881
        lv->offset = 0;
882
        lv->length = 1;
883
        dtlck->index++;
884
 
885
        dtInsertEntry(p, index, name, &data, &dtlck);
886
 
887
        /* linelock stbl of non-root leaf page */
888
        if (!(p->header.flag & BT_ROOT)) {
889
                if (dtlck->index >= dtlck->maxcnt)
890
                        dtlck = (struct dt_lock *) txLinelock(dtlck);
891
                lv = & dtlck->lv[dtlck->index];
892
                n = index >> L2DTSLOTSIZE;
893
                lv->offset = p->header.stblindex + n;
894
                lv->length =
895
                    ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
896
                dtlck->index++;
897
        }
898
 
899
        /* unpin the leaf page */
900
        DT_PUTPAGE(mp);
901
 
902
        return 0;
903
}
904
 
905
 
906
/*
907
 *      dtSplitUp()
908
 *
909
 * function: propagate insertion bottom up;
910
 *
911
 * parameter:
912
 *
913
 * return: 0 - success;
914
 *         errno - failure;
915
 *      leaf page unpinned;
916
 */
917
static int dtSplitUp(tid_t tid,
918
          struct inode *ip, struct dtsplit * split, struct btstack * btstack)
919
{
920
        struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
921
        int rc = 0;
922
        struct metapage *smp;
923
        dtpage_t *sp;           /* split page */
924
        struct metapage *rmp;
925
        dtpage_t *rp;           /* new right page split from sp */
926
        pxd_t rpxd;             /* new right page extent descriptor */
927
        struct metapage *lmp;
928
        dtpage_t *lp;           /* left child page */
929
        int skip;               /* index of entry of insertion */
930
        struct btframe *parent; /* parent page entry on traverse stack */
931
        s64 xaddr, nxaddr;
932
        int xlen, xsize;
933
        struct pxdlist pxdlist;
934
        pxd_t *pxd;
935
        struct component_name key = { 0, 0 };
936
        ddata_t *data = split->data;
937
        int n;
938
        struct dt_lock *dtlck;
939
        struct tlock *tlck;
940
        struct lv *lv;
941
 
942
        /* get split page */
943
        smp = split->mp;
944
        sp = DT_PAGE(ip, smp);
945
 
946
        key.name =
947
            (wchar_t *) kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t),
948
                                GFP_NOFS);
949
        if (key.name == 0) {
950
                DT_PUTPAGE(smp);
951
                rc = -ENOMEM;
952
                goto dtSplitUp_Exit;
953
        }
954
 
955
        /*
956
         *      split leaf page
957
         *
958
         * The split routines insert the new entry, and
959
         * acquire txLock as appropriate.
960
         */
961
        /*
962
         *      split root leaf page:
963
         */
964
        if (sp->header.flag & BT_ROOT) {
965
                /*
966
                 * allocate a single extent child page
967
                 */
968
                xlen = 1;
969
                n = sbi->bsize >> L2DTSLOTSIZE;
970
                n -= (n + 31) >> L2DTSLOTSIZE;  /* stbl size */
971
                n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */
972
                if (n <= split->nslot)
973
                        xlen++;
974
                if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)))
975
                        goto freeKeyName;
976
 
977
                pxdlist.maxnpxd = 1;
978
                pxdlist.npxd = 0;
979
                pxd = &pxdlist.pxd[0];
980
                PXDaddress(pxd, xaddr);
981
                PXDlength(pxd, xlen);
982
                split->pxdlist = &pxdlist;
983
                rc = dtSplitRoot(tid, ip, split, &rmp);
984
 
985
                DT_PUTPAGE(rmp);
986
                DT_PUTPAGE(smp);
987
 
988
                goto freeKeyName;
989
        }
990
 
991
        /*
992
         *      extend first leaf page
993
         *
994
         * extend the 1st extent if less than buffer page size
995
         * (dtExtendPage() reurns leaf page unpinned)
996
         */
997
        pxd = &sp->header.self;
998
        xlen = lengthPXD(pxd);
999
        xsize = xlen << sbi->l2bsize;
1000
        if (xsize < PSIZE) {
1001
                xaddr = addressPXD(pxd);
1002
                n = xsize >> L2DTSLOTSIZE;
1003
                n -= (n + 31) >> L2DTSLOTSIZE;  /* stbl size */
1004
                if ((n + sp->header.freecnt) <= split->nslot)
1005
                        n = xlen + (xlen << 1);
1006
                else
1007
                        n = xlen;
1008
                if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen,
1009
                                    (s64) n, &nxaddr)))
1010
                        goto extendOut;
1011
 
1012
                pxdlist.maxnpxd = 1;
1013
                pxdlist.npxd = 0;
1014
                pxd = &pxdlist.pxd[0];
1015
                PXDaddress(pxd, nxaddr)
1016
                    PXDlength(pxd, xlen + n);
1017
                split->pxdlist = &pxdlist;
1018
                if ((rc = dtExtendPage(tid, ip, split, btstack))) {
1019
                        nxaddr = addressPXD(pxd);
1020
                        if (xaddr != nxaddr) {
1021
                                /* free relocated extent */
1022
                                xlen = lengthPXD(pxd);
1023
                                dbFree(ip, nxaddr, (s64) xlen);
1024
                        } else {
1025
                                /* free extended delta */
1026
                                xlen = lengthPXD(pxd) - n;
1027
                                xaddr = addressPXD(pxd) + xlen;
1028
                                dbFree(ip, xaddr, (s64) n);
1029
                        }
1030
                }
1031
 
1032
              extendOut:
1033
                DT_PUTPAGE(smp);
1034
                goto freeKeyName;
1035
        }
1036
 
1037
        /*
1038
         *      split leaf page <sp> into <sp> and a new right page <rp>.
1039
         *
1040
         * return <rp> pinned and its extent descriptor <rpxd>
1041
         */
1042
        /*
1043
         * allocate new directory page extent and
1044
         * new index page(s) to cover page split(s)
1045
         *
1046
         * allocation hint: ?
1047
         */
1048
        n = btstack->nsplit;
1049
        pxdlist.maxnpxd = pxdlist.npxd = 0;
1050
        xlen = sbi->nbperpage;
1051
        for (pxd = pxdlist.pxd; n > 0; n--, pxd++) {
1052
                if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) {
1053
                        PXDaddress(pxd, xaddr);
1054
                        PXDlength(pxd, xlen);
1055
                        pxdlist.maxnpxd++;
1056
                        continue;
1057
                }
1058
 
1059
                DT_PUTPAGE(smp);
1060
 
1061
                /* undo allocation */
1062
                goto splitOut;
1063
        }
1064
 
1065
        split->pxdlist = &pxdlist;
1066
        if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) {
1067
                DT_PUTPAGE(smp);
1068
 
1069
                /* undo allocation */
1070
                goto splitOut;
1071
        }
1072
 
1073
        /*
1074
         * propagate up the router entry for the leaf page just split
1075
         *
1076
         * insert a router entry for the new page into the parent page,
1077
         * propagate the insert/split up the tree by walking back the stack
1078
         * of (bn of parent page, index of child page entry in parent page)
1079
         * that were traversed during the search for the page that split.
1080
         *
1081
         * the propagation of insert/split up the tree stops if the root
1082
         * splits or the page inserted into doesn't have to split to hold
1083
         * the new entry.
1084
         *
1085
         * the parent entry for the split page remains the same, and
1086
         * a new entry is inserted at its right with the first key and
1087
         * block number of the new right page.
1088
         *
1089
         * There are a maximum of 4 pages pinned at any time:
1090
         * two children, left parent and right parent (when the parent splits).
1091
         * keep the child pages pinned while working on the parent.
1092
         * make sure that all pins are released at exit.
1093
         */
1094
        while ((parent = BT_POP(btstack)) != NULL) {
1095
                /* parent page specified by stack frame <parent> */
1096
 
1097
                /* keep current child pages (<lp>, <rp>) pinned */
1098
                lmp = smp;
1099
                lp = sp;
1100
 
1101
                /*
1102
                 * insert router entry in parent for new right child page <rp>
1103
                 */
1104
                /* get the parent page <sp> */
1105
                DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
1106
                if (rc) {
1107
                        DT_PUTPAGE(lmp);
1108
                        DT_PUTPAGE(rmp);
1109
                        goto splitOut;
1110
                }
1111
 
1112
                /*
1113
                 * The new key entry goes ONE AFTER the index of parent entry,
1114
                 * because the split was to the right.
1115
                 */
1116
                skip = parent->index + 1;
1117
 
1118
                /*
1119
                 * compute the key for the router entry
1120
                 *
1121
                 * key suffix compression:
1122
                 * for internal pages that have leaf pages as children,
1123
                 * retain only what's needed to distinguish between
1124
                 * the new entry and the entry on the page to its left.
1125
                 * If the keys compare equal, retain the entire key.
1126
                 *
1127
                 * note that compression is performed only at computing
1128
                 * router key at the lowest internal level.
1129
                 * further compression of the key between pairs of higher
1130
                 * level internal pages loses too much information and
1131
                 * the search may fail.
1132
                 * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,}
1133
                 * results in two adjacent parent entries (a)(xx).
1134
                 * if split occurs between these two entries, and
1135
                 * if compression is applied, the router key of parent entry
1136
                 * of right page (x) will divert search for x into right
1137
                 * subtree and miss x in the left subtree.)
1138
                 *
1139
                 * the entire key must be retained for the next-to-leftmost
1140
                 * internal key at any level of the tree, or search may fail
1141
                 * (e.g., ?)
1142
                 */
1143
                switch (rp->header.flag & BT_TYPE) {
1144
                case BT_LEAF:
1145
                        /*
1146
                         * compute the length of prefix for suffix compression
1147
                         * between last entry of left page and first entry
1148
                         * of right page
1149
                         */
1150
                        if ((sp->header.flag & BT_ROOT && skip > 1) ||
1151
                            sp->header.prev != 0 || skip > 1) {
1152
                                /* compute uppercase router prefix key */
1153
                                ciGetLeafPrefixKey(lp,
1154
                                                   lp->header.nextindex - 1,
1155
                                                   rp, 0, &key, sbi->mntflag);
1156
                        } else {
1157
                                /* next to leftmost entry of
1158
                                   lowest internal level */
1159
 
1160
                                /* compute uppercase router key */
1161
                                dtGetKey(rp, 0, &key, sbi->mntflag);
1162
                                key.name[key.namlen] = 0;
1163
 
1164
                                if ((sbi->mntflag & JFS_OS2) == JFS_OS2)
1165
                                        ciToUpper(&key);
1166
                        }
1167
 
1168
                        n = NDTINTERNAL(key.namlen);
1169
                        break;
1170
 
1171
                case BT_INTERNAL:
1172
                        dtGetKey(rp, 0, &key, sbi->mntflag);
1173
                        n = NDTINTERNAL(key.namlen);
1174
                        break;
1175
 
1176
                default:
1177
                        jfs_err("dtSplitUp(): UFO!");
1178
                        break;
1179
                }
1180
 
1181
                /* unpin left child page */
1182
                DT_PUTPAGE(lmp);
1183
 
1184
                /*
1185
                 * compute the data for the router entry
1186
                 */
1187
                data->xd = rpxd;        /* child page xd */
1188
 
1189
                /*
1190
                 * parent page is full - split the parent page
1191
                 */
1192
                if (n > sp->header.freecnt) {
1193
                        /* init for parent page split */
1194
                        split->mp = smp;
1195
                        split->index = skip;    /* index at insert */
1196
                        split->nslot = n;
1197
                        split->key = &key;
1198
                        /* split->data = data; */
1199
 
1200
                        /* unpin right child page */
1201
                        DT_PUTPAGE(rmp);
1202
 
1203
                        /* The split routines insert the new entry,
1204
                         * acquire txLock as appropriate.
1205
                         * return <rp> pinned and its block number <rbn>.
1206
                         */
1207
                        rc = (sp->header.flag & BT_ROOT) ?
1208
                            dtSplitRoot(tid, ip, split, &rmp) :
1209
                            dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd);
1210
                        if (rc) {
1211
                                DT_PUTPAGE(smp);
1212
                                goto splitOut;
1213
                        }
1214
 
1215
                        /* smp and rmp are pinned */
1216
                }
1217
                /*
1218
                 * parent page is not full - insert router entry in parent page
1219
                 */
1220
                else {
1221
                        BT_MARK_DIRTY(smp, ip);
1222
                        /*
1223
                         * acquire a transaction lock on the parent page
1224
                         */
1225
                        tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
1226
                        dtlck = (struct dt_lock *) & tlck->lock;
1227
                        ASSERT(dtlck->index == 0);
1228
                        lv = & dtlck->lv[0];
1229
 
1230
                        /* linelock header */
1231
                        lv->offset = 0;
1232
                        lv->length = 1;
1233
                        dtlck->index++;
1234
 
1235
                        /* linelock stbl of non-root parent page */
1236
                        if (!(sp->header.flag & BT_ROOT)) {
1237
                                lv++;
1238
                                n = skip >> L2DTSLOTSIZE;
1239
                                lv->offset = sp->header.stblindex + n;
1240
                                lv->length =
1241
                                    ((sp->header.nextindex -
1242
                                      1) >> L2DTSLOTSIZE) - n + 1;
1243
                                dtlck->index++;
1244
                        }
1245
 
1246
                        dtInsertEntry(sp, skip, &key, data, &dtlck);
1247
 
1248
                        /* exit propagate up */
1249
                        break;
1250
                }
1251
        }
1252
 
1253
        /* unpin current split and its right page */
1254
        DT_PUTPAGE(smp);
1255
        DT_PUTPAGE(rmp);
1256
 
1257
        /*
1258
         * free remaining extents allocated for split
1259
         */
1260
      splitOut:
1261
        n = pxdlist.npxd;
1262
        pxd = &pxdlist.pxd[n];
1263
        for (; n < pxdlist.maxnpxd; n++, pxd++)
1264
                dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd));
1265
 
1266
      freeKeyName:
1267
        kfree(key.name);
1268
 
1269
      dtSplitUp_Exit:
1270
 
1271
        return rc;
1272
}
1273
 
1274
 
1275
/*
1276
 *      dtSplitPage()
1277
 *
1278
 * function: Split a non-root page of a btree.
1279
 *
1280
 * parameter:
1281
 *
1282
 * return: 0 - success;
1283
 *         errno - failure;
1284
 *      return split and new page pinned;
1285
 */
1286
static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
1287
            struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp)
1288
{
1289
        struct super_block *sb = ip->i_sb;
1290
        int rc = 0;
1291
        struct metapage *smp;
1292
        dtpage_t *sp;
1293
        struct metapage *rmp;
1294
        dtpage_t *rp;           /* new right page allocated */
1295
        s64 rbn;                /* new right page block number */
1296
        struct metapage *mp;
1297
        dtpage_t *p;
1298
        s64 nextbn;
1299
        struct pxdlist *pxdlist;
1300
        pxd_t *pxd;
1301
        int skip, nextindex, half, left, nxt, off, si;
1302
        struct ldtentry *ldtentry;
1303
        struct idtentry *idtentry;
1304
        u8 *stbl;
1305
        struct dtslot *f;
1306
        int fsi, stblsize;
1307
        int n;
1308
        struct dt_lock *sdtlck, *rdtlck;
1309
        struct tlock *tlck;
1310
        struct dt_lock *dtlck;
1311
        struct lv *slv, *rlv, *lv;
1312
 
1313
        /* get split page */
1314
        smp = split->mp;
1315
        sp = DT_PAGE(ip, smp);
1316
 
1317
        /*
1318
         * allocate the new right page for the split
1319
         */
1320
        pxdlist = split->pxdlist;
1321
        pxd = &pxdlist->pxd[pxdlist->npxd];
1322
        pxdlist->npxd++;
1323
        rbn = addressPXD(pxd);
1324
        rmp = get_metapage(ip, rbn, PSIZE, 1);
1325
        if (rmp == NULL)
1326
                return -EIO;
1327
 
1328
        jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
1329
 
1330
        BT_MARK_DIRTY(rmp, ip);
1331
        /*
1332
         * acquire a transaction lock on the new right page
1333
         */
1334
        tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
1335
        rdtlck = (struct dt_lock *) & tlck->lock;
1336
 
1337
        rp = (dtpage_t *) rmp->data;
1338
        *rpp = rp;
1339
        rp->header.self = *pxd;
1340
 
1341
        BT_MARK_DIRTY(smp, ip);
1342
        /*
1343
         * acquire a transaction lock on the split page
1344
         *
1345
         * action:
1346
         */
1347
        tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
1348
        sdtlck = (struct dt_lock *) & tlck->lock;
1349
 
1350
        /* linelock header of split page */
1351
        ASSERT(sdtlck->index == 0);
1352
        slv = & sdtlck->lv[0];
1353
        slv->offset = 0;
1354
        slv->length = 1;
1355
        sdtlck->index++;
1356
 
1357
        /*
1358
         * initialize/update sibling pointers between sp and rp
1359
         */
1360
        nextbn = le64_to_cpu(sp->header.next);
1361
        rp->header.next = cpu_to_le64(nextbn);
1362
        rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1363
        sp->header.next = cpu_to_le64(rbn);
1364
 
1365
        /*
1366
         * initialize new right page
1367
         */
1368
        rp->header.flag = sp->header.flag;
1369
 
1370
        /* compute sorted entry table at start of extent data area */
1371
        rp->header.nextindex = 0;
1372
        rp->header.stblindex = 1;
1373
 
1374
        n = PSIZE >> L2DTSLOTSIZE;
1375
        rp->header.maxslot = n;
1376
        stblsize = (n + 31) >> L2DTSLOTSIZE;    /* in unit of slot */
1377
 
1378
        /* init freelist */
1379
        fsi = rp->header.stblindex + stblsize;
1380
        rp->header.freelist = fsi;
1381
        rp->header.freecnt = rp->header.maxslot - fsi;
1382
 
1383
        /*
1384
         *      sequential append at tail: append without split
1385
         *
1386
         * If splitting the last page on a level because of appending
1387
         * a entry to it (skip is maxentry), it's likely that the access is
1388
         * sequential. Adding an empty page on the side of the level is less
1389
         * work and can push the fill factor much higher than normal.
1390
         * If we're wrong it's no big deal, we'll just do the split the right
1391
         * way next time.
1392
         * (It may look like it's equally easy to do a similar hack for
1393
         * reverse sorted data, that is, split the tree left,
1394
         * but it's not. Be my guest.)
1395
         */
1396
        if (nextbn == 0 && split->index == sp->header.nextindex) {
1397
                /* linelock header + stbl (first slot) of new page */
1398
                rlv = & rdtlck->lv[rdtlck->index];
1399
                rlv->offset = 0;
1400
                rlv->length = 2;
1401
                rdtlck->index++;
1402
 
1403
                /*
1404
                 * initialize freelist of new right page
1405
                 */
1406
                f = &rp->slot[fsi];
1407
                for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1408
                        f->next = fsi;
1409
                f->next = -1;
1410
 
1411
                /* insert entry at the first entry of the new right page */
1412
                dtInsertEntry(rp, 0, split->key, split->data, &rdtlck);
1413
 
1414
                goto out;
1415
        }
1416
 
1417
        /*
1418
         *      non-sequential insert (at possibly middle page)
1419
         */
1420
 
1421
        /*
1422
         * update prev pointer of previous right sibling page;
1423
         */
1424
        if (nextbn != 0) {
1425
                DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
1426
                if (rc) {
1427
                        discard_metapage(rmp);
1428
                        return rc;
1429
                }
1430
 
1431
                BT_MARK_DIRTY(mp, ip);
1432
                /*
1433
                 * acquire a transaction lock on the next page
1434
                 */
1435
                tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
1436
                jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p",
1437
                        tlck, ip, mp);
1438
                dtlck = (struct dt_lock *) & tlck->lock;
1439
 
1440
                /* linelock header of previous right sibling page */
1441
                lv = & dtlck->lv[dtlck->index];
1442
                lv->offset = 0;
1443
                lv->length = 1;
1444
                dtlck->index++;
1445
 
1446
                p->header.prev = cpu_to_le64(rbn);
1447
 
1448
                DT_PUTPAGE(mp);
1449
        }
1450
 
1451
        /*
1452
         * split the data between the split and right pages.
1453
         */
1454
        skip = split->index;
1455
        half = (PSIZE >> L2DTSLOTSIZE) >> 1;    /* swag */
1456
        left = 0;
1457
 
1458
        /*
1459
         *      compute fill factor for split pages
1460
         *
1461
         * <nxt> traces the next entry to move to rp
1462
         * <off> traces the next entry to stay in sp
1463
         */
1464
        stbl = (u8 *) & sp->slot[sp->header.stblindex];
1465
        nextindex = sp->header.nextindex;
1466
        for (nxt = off = 0; nxt < nextindex; ++off) {
1467
                if (off == skip)
1468
                        /* check for fill factor with new entry size */
1469
                        n = split->nslot;
1470
                else {
1471
                        si = stbl[nxt];
1472
                        switch (sp->header.flag & BT_TYPE) {
1473
                        case BT_LEAF:
1474
                                ldtentry = (struct ldtentry *) & sp->slot[si];
1475
                                if (DO_INDEX(ip))
1476
                                        n = NDTLEAF(ldtentry->namlen);
1477
                                else
1478
                                        n = NDTLEAF_LEGACY(ldtentry->
1479
                                                           namlen);
1480
                                break;
1481
 
1482
                        case BT_INTERNAL:
1483
                                idtentry = (struct idtentry *) & sp->slot[si];
1484
                                n = NDTINTERNAL(idtentry->namlen);
1485
                                break;
1486
 
1487
                        default:
1488
                                break;
1489
                        }
1490
 
1491
                        ++nxt;  /* advance to next entry to move in sp */
1492
                }
1493
 
1494
                left += n;
1495
                if (left >= half)
1496
                        break;
1497
        }
1498
 
1499
        /* <nxt> poins to the 1st entry to move */
1500
 
1501
        /*
1502
         *      move entries to right page
1503
         *
1504
         * dtMoveEntry() initializes rp and reserves entry for insertion
1505
         *
1506
         * split page moved out entries are linelocked;
1507
         * new/right page moved in entries are linelocked;
1508
         */
1509
        /* linelock header + stbl of new right page */
1510
        rlv = & rdtlck->lv[rdtlck->index];
1511
        rlv->offset = 0;
1512
        rlv->length = 5;
1513
        rdtlck->index++;
1514
 
1515
        dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip));
1516
 
1517
        sp->header.nextindex = nxt;
1518
 
1519
        /*
1520
         * finalize freelist of new right page
1521
         */
1522
        fsi = rp->header.freelist;
1523
        f = &rp->slot[fsi];
1524
        for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1525
                f->next = fsi;
1526
        f->next = -1;
1527
 
1528
        /*
1529
         * Update directory index table for entries now in right page
1530
         */
1531
        if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
1532
                s64 lblock;
1533
 
1534
                mp = 0;
1535
                stbl = DT_GETSTBL(rp);
1536
                for (n = 0; n < rp->header.nextindex; n++) {
1537
                        ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
1538
                        modify_index(tid, ip, le32_to_cpu(ldtentry->index),
1539
                                     rbn, n, &mp, &lblock);
1540
                }
1541
                if (mp)
1542
                        release_metapage(mp);
1543
        }
1544
 
1545
        /*
1546
         * the skipped index was on the left page,
1547
         */
1548
        if (skip <= off) {
1549
                /* insert the new entry in the split page */
1550
                dtInsertEntry(sp, skip, split->key, split->data, &sdtlck);
1551
 
1552
                /* linelock stbl of split page */
1553
                if (sdtlck->index >= sdtlck->maxcnt)
1554
                        sdtlck = (struct dt_lock *) txLinelock(sdtlck);
1555
                slv = & sdtlck->lv[sdtlck->index];
1556
                n = skip >> L2DTSLOTSIZE;
1557
                slv->offset = sp->header.stblindex + n;
1558
                slv->length =
1559
                    ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
1560
                sdtlck->index++;
1561
        }
1562
        /*
1563
         * the skipped index was on the right page,
1564
         */
1565
        else {
1566
                /* adjust the skip index to reflect the new position */
1567
                skip -= nxt;
1568
 
1569
                /* insert the new entry in the right page */
1570
                dtInsertEntry(rp, skip, split->key, split->data, &rdtlck);
1571
        }
1572
 
1573
      out:
1574
        *rmpp = rmp;
1575
        *rpxdp = *pxd;
1576
 
1577
        ip->i_blocks += LBLK2PBLK(sb, lengthPXD(pxd));
1578
 
1579
        return rc;
1580
}
1581
 
1582
 
1583
/*
1584
 *      dtExtendPage()
1585
 *
1586
 * function: extend 1st/only directory leaf page
1587
 *
1588
 * parameter:
1589
 *
1590
 * return: 0 - success;
1591
 *         errno - failure;
1592
 *      return extended page pinned;
1593
 */
1594
static int dtExtendPage(tid_t tid,
1595
             struct inode *ip, struct dtsplit * split, struct btstack * btstack)
1596
{
1597
        struct super_block *sb = ip->i_sb;
1598
        int rc;
1599
        struct metapage *smp, *pmp, *mp;
1600
        dtpage_t *sp, *pp;
1601
        struct pxdlist *pxdlist;
1602
        pxd_t *pxd, *tpxd;
1603
        int xlen, xsize;
1604
        int newstblindex, newstblsize;
1605
        int oldstblindex, oldstblsize;
1606
        int fsi, last;
1607
        struct dtslot *f;
1608
        struct btframe *parent;
1609
        int n;
1610
        struct dt_lock *dtlck;
1611
        s64 xaddr, txaddr;
1612
        struct tlock *tlck;
1613
        struct pxd_lock *pxdlock;
1614
        struct lv *lv;
1615
        uint type;
1616
        struct ldtentry *ldtentry;
1617
        u8 *stbl;
1618
 
1619
        /* get page to extend */
1620
        smp = split->mp;
1621
        sp = DT_PAGE(ip, smp);
1622
 
1623
        /* get parent/root page */
1624
        parent = BT_POP(btstack);
1625
        DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc);
1626
        if (rc)
1627
                return (rc);
1628
 
1629
        /*
1630
         *      extend the extent
1631
         */
1632
        pxdlist = split->pxdlist;
1633
        pxd = &pxdlist->pxd[pxdlist->npxd];
1634
        pxdlist->npxd++;
1635
 
1636
        xaddr = addressPXD(pxd);
1637
        tpxd = &sp->header.self;
1638
        txaddr = addressPXD(tpxd);
1639
        /* in-place extension */
1640
        if (xaddr == txaddr) {
1641
                type = tlckEXTEND;
1642
        }
1643
        /* relocation */
1644
        else {
1645
                type = tlckNEW;
1646
 
1647
                /* save moved extent descriptor for later free */
1648
                tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE);
1649
                pxdlock = (struct pxd_lock *) & tlck->lock;
1650
                pxdlock->flag = mlckFREEPXD;
1651
                pxdlock->pxd = sp->header.self;
1652
                pxdlock->index = 1;
1653
 
1654
                /*
1655
                 * Update directory index table to reflect new page address
1656
                 */
1657
                if (DO_INDEX(ip)) {
1658
                        s64 lblock;
1659
 
1660
                        mp = 0;
1661
                        stbl = DT_GETSTBL(sp);
1662
                        for (n = 0; n < sp->header.nextindex; n++) {
1663
                                ldtentry =
1664
                                    (struct ldtentry *) & sp->slot[stbl[n]];
1665
                                modify_index(tid, ip,
1666
                                             le32_to_cpu(ldtentry->index),
1667
                                             xaddr, n, &mp, &lblock);
1668
                        }
1669
                        if (mp)
1670
                                release_metapage(mp);
1671
                }
1672
        }
1673
 
1674
        /*
1675
         *      extend the page
1676
         */
1677
        sp->header.self = *pxd;
1678
 
1679
        jfs_info("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p", ip, smp, sp);
1680
 
1681
        BT_MARK_DIRTY(smp, ip);
1682
        /*
1683
         * acquire a transaction lock on the extended/leaf page
1684
         */
1685
        tlck = txLock(tid, ip, smp, tlckDTREE | type);
1686
        dtlck = (struct dt_lock *) & tlck->lock;
1687
        lv = & dtlck->lv[0];
1688
 
1689
        /* update buffer extent descriptor of extended page */
1690
        xlen = lengthPXD(pxd);
1691
        xsize = xlen << JFS_SBI(sb)->l2bsize;
1692
#ifdef _STILL_TO_PORT
1693
        bmSetXD(smp, xaddr, xsize);
1694
#endif                          /*  _STILL_TO_PORT */
1695
 
1696
        /*
1697
         * copy old stbl to new stbl at start of extended area
1698
         */
1699
        oldstblindex = sp->header.stblindex;
1700
        oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE;
1701
        newstblindex = sp->header.maxslot;
1702
        n = xsize >> L2DTSLOTSIZE;
1703
        newstblsize = (n + 31) >> L2DTSLOTSIZE;
1704
        memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex],
1705
               sp->header.nextindex);
1706
 
1707
        /*
1708
         * in-line extension: linelock old area of extended page
1709
         */
1710
        if (type == tlckEXTEND) {
1711
                /* linelock header */
1712
                lv->offset = 0;
1713
                lv->length = 1;
1714
                dtlck->index++;
1715
                lv++;
1716
 
1717
                /* linelock new stbl of extended page */
1718
                lv->offset = newstblindex;
1719
                lv->length = newstblsize;
1720
        }
1721
        /*
1722
         * relocation: linelock whole relocated area
1723
         */
1724
        else {
1725
                lv->offset = 0;
1726
                lv->length = sp->header.maxslot + newstblsize;
1727
        }
1728
 
1729
        dtlck->index++;
1730
 
1731
        sp->header.maxslot = n;
1732
        sp->header.stblindex = newstblindex;
1733
        /* sp->header.nextindex remains the same */
1734
 
1735
        /*
1736
         * add old stbl region at head of freelist
1737
         */
1738
        fsi = oldstblindex;
1739
        f = &sp->slot[fsi];
1740
        last = sp->header.freelist;
1741
        for (n = 0; n < oldstblsize; n++, fsi++, f++) {
1742
                f->next = last;
1743
                last = fsi;
1744
        }
1745
        sp->header.freelist = last;
1746
        sp->header.freecnt += oldstblsize;
1747
 
1748
        /*
1749
         * append free region of newly extended area at tail of freelist
1750
         */
1751
        /* init free region of newly extended area */
1752
        fsi = n = newstblindex + newstblsize;
1753
        f = &sp->slot[fsi];
1754
        for (fsi++; fsi < sp->header.maxslot; f++, fsi++)
1755
                f->next = fsi;
1756
        f->next = -1;
1757
 
1758
        /* append new free region at tail of old freelist */
1759
        fsi = sp->header.freelist;
1760
        if (fsi == -1)
1761
                sp->header.freelist = n;
1762
        else {
1763
                do {
1764
                        f = &sp->slot[fsi];
1765
                        fsi = f->next;
1766
                } while (fsi != -1);
1767
 
1768
                f->next = n;
1769
        }
1770
 
1771
        sp->header.freecnt += sp->header.maxslot - n;
1772
 
1773
        /*
1774
         * insert the new entry
1775
         */
1776
        dtInsertEntry(sp, split->index, split->key, split->data, &dtlck);
1777
 
1778
        BT_MARK_DIRTY(pmp, ip);
1779
        /*
1780
         * linelock any freeslots residing in old extent
1781
         */
1782
        if (type == tlckEXTEND) {
1783
                n = sp->header.maxslot >> 2;
1784
                if (sp->header.freelist < n)
1785
                        dtLinelockFreelist(sp, n, &dtlck);
1786
        }
1787
 
1788
        /*
1789
         *      update parent entry on the parent/root page
1790
         */
1791
        /*
1792
         * acquire a transaction lock on the parent/root page
1793
         */
1794
        tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
1795
        dtlck = (struct dt_lock *) & tlck->lock;
1796
        lv = & dtlck->lv[dtlck->index];
1797
 
1798
        /* linelock parent entry - 1st slot */
1799
        lv->offset = 1;
1800
        lv->length = 1;
1801
        dtlck->index++;
1802
 
1803
        /* update the parent pxd for page extension */
1804
        tpxd = (pxd_t *) & pp->slot[1];
1805
        *tpxd = *pxd;
1806
 
1807
        /* Since the directory might have an EA and/or ACL associated with it
1808
         * we need to make sure we take that into account when setting the
1809
         * i_nblocks
1810
         */
1811
        ip->i_blocks = LBLK2PBLK(ip->i_sb, xlen +
1812
                                 ((JFS_IP(ip)->ea.flag & DXD_EXTENT) ?
1813
                                  lengthDXD(&JFS_IP(ip)->ea) : 0) +
1814
                                 ((JFS_IP(ip)->acl.flag & DXD_EXTENT) ?
1815
                                  lengthDXD(&JFS_IP(ip)->acl) : 0));
1816
 
1817
        DT_PUTPAGE(pmp);
1818
        return 0;
1819
}
1820
 
1821
 
1822
/*
1823
 *      dtSplitRoot()
1824
 *
1825
 * function:
1826
 *      split the full root page into
1827
 *      original/root/split page and new right page
1828
 *      i.e., root remains fixed in tree anchor (inode) and
1829
 *      the root is copied to a single new right child page
1830
 *      since root page << non-root page, and
1831
 *      the split root page contains a single entry for the
1832
 *      new right child page.
1833
 *
1834
 * parameter:
1835
 *
1836
 * return: 0 - success;
1837
 *         errno - failure;
1838
 *      return new page pinned;
1839
 */
1840
static int dtSplitRoot(tid_t tid,
1841
            struct inode *ip, struct dtsplit * split, struct metapage ** rmpp)
1842
{
1843
        struct super_block *sb = ip->i_sb;
1844
        struct metapage *smp;
1845
        dtroot_t *sp;
1846
        struct metapage *rmp;
1847
        dtpage_t *rp;
1848
        s64 rbn;
1849
        int xlen;
1850
        int xsize;
1851
        struct dtslot *f;
1852
        s8 *stbl;
1853
        int fsi, stblsize, n;
1854
        struct idtentry *s;
1855
        pxd_t *ppxd;
1856
        struct pxdlist *pxdlist;
1857
        pxd_t *pxd;
1858
        struct dt_lock *dtlck;
1859
        struct tlock *tlck;
1860
        struct lv *lv;
1861
 
1862
        /* get split root page */
1863
        smp = split->mp;
1864
        sp = &JFS_IP(ip)->i_dtroot;
1865
 
1866
        /*
1867
         *      allocate/initialize a single (right) child page
1868
         *
1869
         * N.B. at first split, a one (or two) block to fit new entry
1870
         * is allocated; at subsequent split, a full page is allocated;
1871
         */
1872
        pxdlist = split->pxdlist;
1873
        pxd = &pxdlist->pxd[pxdlist->npxd];
1874
        pxdlist->npxd++;
1875
        rbn = addressPXD(pxd);
1876
        xlen = lengthPXD(pxd);
1877
        xsize = xlen << JFS_SBI(sb)->l2bsize;
1878
        rmp = get_metapage(ip, rbn, xsize, 1);
1879
        rp = rmp->data;
1880
 
1881
        BT_MARK_DIRTY(rmp, ip);
1882
        /*
1883
         * acquire a transaction lock on the new right page
1884
         */
1885
        tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
1886
        dtlck = (struct dt_lock *) & tlck->lock;
1887
 
1888
        rp->header.flag =
1889
            (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1890
        rp->header.self = *pxd;
1891
 
1892
        /* initialize sibling pointers */
1893
        rp->header.next = 0;
1894
        rp->header.prev = 0;
1895
 
1896
        /*
1897
         *      move in-line root page into new right page extent
1898
         */
1899
        /* linelock header + copied entries + new stbl (1st slot) in new page */
1900
        ASSERT(dtlck->index == 0);
1901
        lv = & dtlck->lv[0];
1902
        lv->offset = 0;
1903
        lv->length = 10;        /* 1 + 8 + 1 */
1904
        dtlck->index++;
1905
 
1906
        n = xsize >> L2DTSLOTSIZE;
1907
        rp->header.maxslot = n;
1908
        stblsize = (n + 31) >> L2DTSLOTSIZE;
1909
 
1910
        /* copy old stbl to new stbl at start of extended area */
1911
        rp->header.stblindex = DTROOTMAXSLOT;
1912
        stbl = (s8 *) & rp->slot[DTROOTMAXSLOT];
1913
        memcpy(stbl, sp->header.stbl, sp->header.nextindex);
1914
        rp->header.nextindex = sp->header.nextindex;
1915
 
1916
        /* copy old data area to start of new data area */
1917
        memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE);
1918
 
1919
        /*
1920
         * append free region of newly extended area at tail of freelist
1921
         */
1922
        /* init free region of newly extended area */
1923
        fsi = n = DTROOTMAXSLOT + stblsize;
1924
        f = &rp->slot[fsi];
1925
        for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1926
                f->next = fsi;
1927
        f->next = -1;
1928
 
1929
        /* append new free region at tail of old freelist */
1930
        fsi = sp->header.freelist;
1931
        if (fsi == -1)
1932
                rp->header.freelist = n;
1933
        else {
1934
                rp->header.freelist = fsi;
1935
 
1936
                do {
1937
                        f = &rp->slot[fsi];
1938
                        fsi = f->next;
1939
                } while (fsi != -1);
1940
 
1941
                f->next = n;
1942
        }
1943
 
1944
        rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n;
1945
 
1946
        /*
1947
         * Update directory index table for entries now in right page
1948
         */
1949
        if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
1950
                s64 lblock;
1951
                struct metapage *mp = 0;
1952
                struct ldtentry *ldtentry;
1953
 
1954
                stbl = DT_GETSTBL(rp);
1955
                for (n = 0; n < rp->header.nextindex; n++) {
1956
                        ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
1957
                        modify_index(tid, ip, le32_to_cpu(ldtentry->index),
1958
                                     rbn, n, &mp, &lblock);
1959
                }
1960
                if (mp)
1961
                        release_metapage(mp);
1962
        }
1963
        /*
1964
         * insert the new entry into the new right/child page
1965
         * (skip index in the new right page will not change)
1966
         */
1967
        dtInsertEntry(rp, split->index, split->key, split->data, &dtlck);
1968
 
1969
        /*
1970
         *      reset parent/root page
1971
         *
1972
         * set the 1st entry offset to 0, which force the left-most key
1973
         * at any level of the tree to be less than any search key.
1974
         *
1975
         * The btree comparison code guarantees that the left-most key on any
1976
         * level of the tree is never used, so it doesn't need to be filled in.
1977
         */
1978
        BT_MARK_DIRTY(smp, ip);
1979
        /*
1980
         * acquire a transaction lock on the root page (in-memory inode)
1981
         */
1982
        tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT);
1983
        dtlck = (struct dt_lock *) & tlck->lock;
1984
 
1985
        /* linelock root */
1986
        ASSERT(dtlck->index == 0);
1987
        lv = & dtlck->lv[0];
1988
        lv->offset = 0;
1989
        lv->length = DTROOTMAXSLOT;
1990
        dtlck->index++;
1991
 
1992
        /* update page header of root */
1993
        if (sp->header.flag & BT_LEAF) {
1994
                sp->header.flag &= ~BT_LEAF;
1995
                sp->header.flag |= BT_INTERNAL;
1996
        }
1997
 
1998
        /* init the first entry */
1999
        s = (struct idtentry *) & sp->slot[DTENTRYSTART];
2000
        ppxd = (pxd_t *) s;
2001
        *ppxd = *pxd;
2002
        s->next = -1;
2003
        s->namlen = 0;
2004
 
2005
        stbl = sp->header.stbl;
2006
        stbl[0] = DTENTRYSTART;
2007
        sp->header.nextindex = 1;
2008
 
2009
        /* init freelist */
2010
        fsi = DTENTRYSTART + 1;
2011
        f = &sp->slot[fsi];
2012
 
2013
        /* init free region of remaining area */
2014
        for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
2015
                f->next = fsi;
2016
        f->next = -1;
2017
 
2018
        sp->header.freelist = DTENTRYSTART + 1;
2019
        sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1);
2020
 
2021
        *rmpp = rmp;
2022
 
2023
        ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd));
2024
        return 0;
2025
}
2026
 
2027
 
2028
/*
2029
 *      dtDelete()
2030
 *
2031
 * function: delete the entry(s) referenced by a key.
2032
 *
2033
 * parameter:
2034
 *
2035
 * return:
2036
 */
2037
int dtDelete(tid_t tid,
2038
         struct inode *ip, struct component_name * key, ino_t * ino, int flag)
2039
{
2040
        int rc = 0;
2041
        s64 bn;
2042
        struct metapage *mp, *imp;
2043
        dtpage_t *p;
2044
        int index;
2045
        struct btstack btstack;
2046
        struct dt_lock *dtlck;
2047
        struct tlock *tlck;
2048
        struct lv *lv;
2049
        int i;
2050
        struct ldtentry *ldtentry;
2051
        u8 *stbl;
2052
        u32 table_index, next_index;
2053
        struct metapage *nmp;
2054
        dtpage_t *np;
2055
 
2056
        /*
2057
         *      search for the entry to delete:
2058
         *
2059
         * dtSearch() returns (leaf page pinned, index at which to delete).
2060
         */
2061
        if ((rc = dtSearch(ip, key, ino, &btstack, flag)))
2062
                return rc;
2063
 
2064
        /* retrieve search result */
2065
        DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2066
 
2067
        /*
2068
         * We need to find put the index of the next entry into the
2069
         * directory index table in order to resume a readdir from this
2070
         * entry.
2071
         */
2072
        if (DO_INDEX(ip)) {
2073
                stbl = DT_GETSTBL(p);
2074
                ldtentry = (struct ldtentry *) & p->slot[stbl[index]];
2075
                table_index = le32_to_cpu(ldtentry->index);
2076
                if (index == (p->header.nextindex - 1)) {
2077
                        /*
2078
                         * Last entry in this leaf page
2079
                         */
2080
                        if ((p->header.flag & BT_ROOT)
2081
                            || (p->header.next == 0))
2082
                                next_index = -1;
2083
                        else {
2084
                                /* Read next leaf page */
2085
                                DT_GETPAGE(ip, le64_to_cpu(p->header.next),
2086
                                           nmp, PSIZE, np, rc);
2087
                                if (rc)
2088
                                        next_index = -1;
2089
                                else {
2090
                                        stbl = DT_GETSTBL(np);
2091
                                        ldtentry =
2092
                                            (struct ldtentry *) & np->
2093
                                            slot[stbl[0]];
2094
                                        next_index =
2095
                                            le32_to_cpu(ldtentry->index);
2096
                                        DT_PUTPAGE(nmp);
2097
                                }
2098
                        }
2099
                } else {
2100
                        ldtentry =
2101
                            (struct ldtentry *) & p->slot[stbl[index + 1]];
2102
                        next_index = le32_to_cpu(ldtentry->index);
2103
                }
2104
                free_index(tid, ip, table_index, next_index);
2105
        }
2106
        /*
2107
         * the leaf page becomes empty, delete the page
2108
         */
2109
        if (p->header.nextindex == 1) {
2110
                /* delete empty page */
2111
                rc = dtDeleteUp(tid, ip, mp, p, &btstack);
2112
        }
2113
        /*
2114
         * the leaf page has other entries remaining:
2115
         *
2116
         * delete the entry from the leaf page.
2117
         */
2118
        else {
2119
                BT_MARK_DIRTY(mp, ip);
2120
                /*
2121
                 * acquire a transaction lock on the leaf page
2122
                 */
2123
                tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
2124
                dtlck = (struct dt_lock *) & tlck->lock;
2125
 
2126
                /*
2127
                 * Do not assume that dtlck->index will be zero.  During a
2128
                 * rename within a directory, this transaction may have
2129
                 * modified this page already when adding the new entry.
2130
                 */
2131
 
2132
                /* linelock header */
2133
                if (dtlck->index >= dtlck->maxcnt)
2134
                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2135
                lv = & dtlck->lv[dtlck->index];
2136
                lv->offset = 0;
2137
                lv->length = 1;
2138
                dtlck->index++;
2139
 
2140
                /* linelock stbl of non-root leaf page */
2141
                if (!(p->header.flag & BT_ROOT)) {
2142
                        if (dtlck->index >= dtlck->maxcnt)
2143
                                dtlck = (struct dt_lock *) txLinelock(dtlck);
2144
                        lv = & dtlck->lv[dtlck->index];
2145
                        i = index >> L2DTSLOTSIZE;
2146
                        lv->offset = p->header.stblindex + i;
2147
                        lv->length =
2148
                            ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
2149
                            i + 1;
2150
                        dtlck->index++;
2151
                }
2152
 
2153
                /* free the leaf entry */
2154
                dtDeleteEntry(p, index, &dtlck);
2155
 
2156
                /*
2157
                 * Update directory index table for entries moved in stbl
2158
                 */
2159
                if (DO_INDEX(ip) && index < p->header.nextindex) {
2160
                        s64 lblock;
2161
 
2162
                        imp = 0;
2163
                        stbl = DT_GETSTBL(p);
2164
                        for (i = index; i < p->header.nextindex; i++) {
2165
                                ldtentry =
2166
                                    (struct ldtentry *) & p->slot[stbl[i]];
2167
                                modify_index(tid, ip,
2168
                                             le32_to_cpu(ldtentry->index),
2169
                                             bn, i, &imp, &lblock);
2170
                        }
2171
                        if (imp)
2172
                                release_metapage(imp);
2173
                }
2174
 
2175
                DT_PUTPAGE(mp);
2176
        }
2177
 
2178
        return rc;
2179
}
2180
 
2181
 
2182
/*
2183
 *      dtDeleteUp()
2184
 *
2185
 * function:
2186
 *      free empty pages as propagating deletion up the tree
2187
 *
2188
 * parameter:
2189
 *
2190
 * return:
2191
 */
2192
static int dtDeleteUp(tid_t tid, struct inode *ip,
2193
           struct metapage * fmp, dtpage_t * fp, struct btstack * btstack)
2194
{
2195
        int rc = 0;
2196
        struct metapage *mp;
2197
        dtpage_t *p;
2198
        int index, nextindex;
2199
        int xlen;
2200
        struct btframe *parent;
2201
        struct dt_lock *dtlck;
2202
        struct tlock *tlck;
2203
        struct lv *lv;
2204
        struct pxd_lock *pxdlock;
2205
        int i;
2206
 
2207
        /*
2208
         *      keep the root leaf page which has become empty
2209
         */
2210
        if (BT_IS_ROOT(fmp)) {
2211
                /*
2212
                 * reset the root
2213
                 *
2214
                 * dtInitRoot() acquires txlock on the root
2215
                 */
2216
                dtInitRoot(tid, ip, PARENT(ip));
2217
 
2218
                DT_PUTPAGE(fmp);
2219
 
2220
                return 0;
2221
        }
2222
 
2223
        /*
2224
         *      free the non-root leaf page
2225
         */
2226
        /*
2227
         * acquire a transaction lock on the page
2228
         *
2229
         * write FREEXTENT|NOREDOPAGE log record
2230
         * N.B. linelock is overlaid as freed extent descriptor, and
2231
         * the buffer page is freed;
2232
         */
2233
        tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
2234
        pxdlock = (struct pxd_lock *) & tlck->lock;
2235
        pxdlock->flag = mlckFREEPXD;
2236
        pxdlock->pxd = fp->header.self;
2237
        pxdlock->index = 1;
2238
 
2239
        /* update sibling pointers */
2240
        if ((rc = dtRelink(tid, ip, fp))) {
2241
                BT_PUTPAGE(fmp);
2242
                return rc;
2243
        }
2244
 
2245
        xlen = lengthPXD(&fp->header.self);
2246
        ip->i_blocks -= LBLK2PBLK(ip->i_sb, xlen);
2247
 
2248
        /* free/invalidate its buffer page */
2249
        discard_metapage(fmp);
2250
 
2251
        /*
2252
         *      propagate page deletion up the directory tree
2253
         *
2254
         * If the delete from the parent page makes it empty,
2255
         * continue all the way up the tree.
2256
         * stop if the root page is reached (which is never deleted) or
2257
         * if the entry deletion does not empty the page.
2258
         */
2259
        while ((parent = BT_POP(btstack)) != NULL) {
2260
                /* pin the parent page <sp> */
2261
                DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
2262
                if (rc)
2263
                        return rc;
2264
 
2265
                /*
2266
                 * free the extent of the child page deleted
2267
                 */
2268
                index = parent->index;
2269
 
2270
                /*
2271
                 * delete the entry for the child page from parent
2272
                 */
2273
                nextindex = p->header.nextindex;
2274
 
2275
                /*
2276
                 * the parent has the single entry being deleted:
2277
                 *
2278
                 * free the parent page which has become empty.
2279
                 */
2280
                if (nextindex == 1) {
2281
                        /*
2282
                         * keep the root internal page which has become empty
2283
                         */
2284
                        if (p->header.flag & BT_ROOT) {
2285
                                /*
2286
                                 * reset the root
2287
                                 *
2288
                                 * dtInitRoot() acquires txlock on the root
2289
                                 */
2290
                                dtInitRoot(tid, ip, PARENT(ip));
2291
 
2292
                                DT_PUTPAGE(mp);
2293
 
2294
                                return 0;
2295
                        }
2296
                        /*
2297
                         * free the parent page
2298
                         */
2299
                        else {
2300
                                /*
2301
                                 * acquire a transaction lock on the page
2302
                                 *
2303
                                 * write FREEXTENT|NOREDOPAGE log record
2304
                                 */
2305
                                tlck =
2306
                                    txMaplock(tid, ip,
2307
                                              tlckDTREE | tlckFREE);
2308
                                pxdlock = (struct pxd_lock *) & tlck->lock;
2309
                                pxdlock->flag = mlckFREEPXD;
2310
                                pxdlock->pxd = p->header.self;
2311
                                pxdlock->index = 1;
2312
 
2313
                                /* update sibling pointers */
2314
                                if ((rc = dtRelink(tid, ip, p))) {
2315
                                        DT_PUTPAGE(mp);
2316
                                        return rc;
2317
                                }
2318
 
2319
                                xlen = lengthPXD(&p->header.self);
2320
                                ip->i_blocks -= LBLK2PBLK(ip->i_sb, xlen);
2321
 
2322
                                /* free/invalidate its buffer page */
2323
                                discard_metapage(mp);
2324
 
2325
                                /* propagate up */
2326
                                continue;
2327
                        }
2328
                }
2329
 
2330
                /*
2331
                 * the parent has other entries remaining:
2332
                 *
2333
                 * delete the router entry from the parent page.
2334
                 */
2335
                BT_MARK_DIRTY(mp, ip);
2336
                /*
2337
                 * acquire a transaction lock on the page
2338
                 *
2339
                 * action: router entry deletion
2340
                 */
2341
                tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
2342
                dtlck = (struct dt_lock *) & tlck->lock;
2343
 
2344
                /* linelock header */
2345
                if (dtlck->index >= dtlck->maxcnt)
2346
                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2347
                lv = & dtlck->lv[dtlck->index];
2348
                lv->offset = 0;
2349
                lv->length = 1;
2350
                dtlck->index++;
2351
 
2352
                /* linelock stbl of non-root leaf page */
2353
                if (!(p->header.flag & BT_ROOT)) {
2354
                        if (dtlck->index < dtlck->maxcnt)
2355
                                lv++;
2356
                        else {
2357
                                dtlck = (struct dt_lock *) txLinelock(dtlck);
2358
                                lv = & dtlck->lv[0];
2359
                        }
2360
                        i = index >> L2DTSLOTSIZE;
2361
                        lv->offset = p->header.stblindex + i;
2362
                        lv->length =
2363
                            ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
2364
                            i + 1;
2365
                        dtlck->index++;
2366
                }
2367
 
2368
                /* free the router entry */
2369
                dtDeleteEntry(p, index, &dtlck);
2370
 
2371
                /* reset key of new leftmost entry of level (for consistency) */
2372
                if (index == 0 &&
2373
                    ((p->header.flag & BT_ROOT) || p->header.prev == 0))
2374
                        dtTruncateEntry(p, 0, &dtlck);
2375
 
2376
                /* unpin the parent page */
2377
                DT_PUTPAGE(mp);
2378
 
2379
                /* exit propagation up */
2380
                break;
2381
        }
2382
 
2383
        return 0;
2384
}
2385
 
2386
#ifdef _NOTYET
2387
/*
2388
 * NAME:        dtRelocate()
2389
 *
2390
 * FUNCTION:    relocate dtpage (internal or leaf) of directory;
2391
 *              This function is mainly used by defragfs utility.
2392
 */
2393
int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2394
               s64 nxaddr)
2395
{
2396
        int rc = 0;
2397
        struct metapage *mp, *pmp, *lmp, *rmp;
2398
        dtpage_t *p, *pp, *rp = 0, *lp= 0;
2399
        s64 bn;
2400
        int index;
2401
        struct btstack btstack;
2402
        pxd_t *pxd;
2403
        s64 oxaddr, nextbn, prevbn;
2404
        int xlen, xsize;
2405
        struct tlock *tlck;
2406
        struct dt_lock *dtlck;
2407
        struct pxd_lock *pxdlock;
2408
        s8 *stbl;
2409
        struct lv *lv;
2410
 
2411
        oxaddr = addressPXD(opxd);
2412
        xlen = lengthPXD(opxd);
2413
 
2414
        jfs_info("dtRelocate: lmxaddr:%Ld xaddr:%Ld:%Ld xlen:%d",
2415
                   (long long)lmxaddr, (long long)oxaddr, (long long)nxaddr,
2416
                   xlen);
2417
 
2418
        /*
2419
         *      1. get the internal parent dtpage covering
2420
         *      router entry for the tartget page to be relocated;
2421
         */
2422
        rc = dtSearchNode(ip, lmxaddr, opxd, &btstack);
2423
        if (rc)
2424
                return rc;
2425
 
2426
        /* retrieve search result */
2427
        DT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2428
        jfs_info("dtRelocate: parent router entry validated.");
2429
 
2430
        /*
2431
         *      2. relocate the target dtpage
2432
         */
2433
        /* read in the target page from src extent */
2434
        DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
2435
        if (rc) {
2436
                /* release the pinned parent page */
2437
                DT_PUTPAGE(pmp);
2438
                return rc;
2439
        }
2440
 
2441
        /*
2442
         * read in sibling pages if any to update sibling pointers;
2443
         */
2444
        rmp = NULL;
2445
        if (p->header.next) {
2446
                nextbn = le64_to_cpu(p->header.next);
2447
                DT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
2448
                if (rc) {
2449
                        DT_PUTPAGE(mp);
2450
                        DT_PUTPAGE(pmp);
2451
                        return (rc);
2452
                }
2453
        }
2454
 
2455
        lmp = NULL;
2456
        if (p->header.prev) {
2457
                prevbn = le64_to_cpu(p->header.prev);
2458
                DT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
2459
                if (rc) {
2460
                        DT_PUTPAGE(mp);
2461
                        DT_PUTPAGE(pmp);
2462
                        if (rmp)
2463
                                DT_PUTPAGE(rmp);
2464
                        return (rc);
2465
                }
2466
        }
2467
 
2468
        /* at this point, all xtpages to be updated are in memory */
2469
 
2470
        /*
2471
         * update sibling pointers of sibling dtpages if any;
2472
         */
2473
        if (lmp) {
2474
                tlck = txLock(tid, ip, lmp, tlckDTREE | tlckRELINK);
2475
                dtlck = (struct dt_lock *) & tlck->lock;
2476
                /* linelock header */
2477
                ASSERT(dtlck->index == 0);
2478
                lv = & dtlck->lv[0];
2479
                lv->offset = 0;
2480
                lv->length = 1;
2481
                dtlck->index++;
2482
 
2483
                lp->header.next = cpu_to_le64(nxaddr);
2484
                DT_PUTPAGE(lmp);
2485
        }
2486
 
2487
        if (rmp) {
2488
                tlck = txLock(tid, ip, rmp, tlckDTREE | tlckRELINK);
2489
                dtlck = (struct dt_lock *) & tlck->lock;
2490
                /* linelock header */
2491
                ASSERT(dtlck->index == 0);
2492
                lv = & dtlck->lv[0];
2493
                lv->offset = 0;
2494
                lv->length = 1;
2495
                dtlck->index++;
2496
 
2497
                rp->header.prev = cpu_to_le64(nxaddr);
2498
                DT_PUTPAGE(rmp);
2499
        }
2500
 
2501
        /*
2502
         * update the target dtpage to be relocated
2503
         *
2504
         * write LOG_REDOPAGE of LOG_NEW type for dst page
2505
         * for the whole target page (logredo() will apply
2506
         * after image and update bmap for allocation of the
2507
         * dst extent), and update bmap for allocation of
2508
         * the dst extent;
2509
         */
2510
        tlck = txLock(tid, ip, mp, tlckDTREE | tlckNEW);
2511
        dtlck = (struct dt_lock *) & tlck->lock;
2512
        /* linelock header */
2513
        ASSERT(dtlck->index == 0);
2514
        lv = & dtlck->lv[0];
2515
 
2516
        /* update the self address in the dtpage header */
2517
        pxd = &p->header.self;
2518
        PXDaddress(pxd, nxaddr);
2519
 
2520
        /* the dst page is the same as the src page, i.e.,
2521
         * linelock for afterimage of the whole page;
2522
         */
2523
        lv->offset = 0;
2524
        lv->length = p->header.maxslot;
2525
        dtlck->index++;
2526
 
2527
        /* update the buffer extent descriptor of the dtpage */
2528
        xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
2529
#ifdef _STILL_TO_PORT
2530
        bmSetXD(mp, nxaddr, xsize);
2531
#endif /* _STILL_TO_PORT */
2532
        /* unpin the relocated page */
2533
        DT_PUTPAGE(mp);
2534
        jfs_info("dtRelocate: target dtpage relocated.");
2535
 
2536
        /* the moved extent is dtpage, then a LOG_NOREDOPAGE log rec
2537
         * needs to be written (in logredo(), the LOG_NOREDOPAGE log rec
2538
         * will also force a bmap update ).
2539
         */
2540
 
2541
        /*
2542
         *      3. acquire maplock for the source extent to be freed;
2543
         */
2544
        /* for dtpage relocation, write a LOG_NOREDOPAGE record
2545
         * for the source dtpage (logredo() will init NoRedoPage
2546
         * filter and will also update bmap for free of the source
2547
         * dtpage), and upadte bmap for free of the source dtpage;
2548
         */
2549
        tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
2550
        pxdlock = (struct pxd_lock *) & tlck->lock;
2551
        pxdlock->flag = mlckFREEPXD;
2552
        PXDaddress(&pxdlock->pxd, oxaddr);
2553
        PXDlength(&pxdlock->pxd, xlen);
2554
        pxdlock->index = 1;
2555
 
2556
        /*
2557
         *      4. update the parent router entry for relocation;
2558
         *
2559
         * acquire tlck for the parent entry covering the target dtpage;
2560
         * write LOG_REDOPAGE to apply after image only;
2561
         */
2562
        jfs_info("dtRelocate: update parent router entry.");
2563
        tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
2564
        dtlck = (struct dt_lock *) & tlck->lock;
2565
        lv = & dtlck->lv[dtlck->index];
2566
 
2567
        /* update the PXD with the new address */
2568
        stbl = DT_GETSTBL(pp);
2569
        pxd = (pxd_t *) & pp->slot[stbl[index]];
2570
        PXDaddress(pxd, nxaddr);
2571
        lv->offset = stbl[index];
2572
        lv->length = 1;
2573
        dtlck->index++;
2574
 
2575
        /* unpin the parent dtpage */
2576
        DT_PUTPAGE(pmp);
2577
 
2578
        return rc;
2579
}
2580
 
2581
/*
2582
 * NAME:        dtSearchNode()
2583
 *
2584
 * FUNCTION:    Search for an dtpage containing a specified address
2585
 *              This function is mainly used by defragfs utility.
2586
 *
2587
 * NOTE:        Search result on stack, the found page is pinned at exit.
2588
 *              The result page must be an internal dtpage.
2589
 *              lmxaddr give the address of the left most page of the
2590
 *              dtree level, in which the required dtpage resides.
2591
 */
2592
static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
2593
                        struct btstack * btstack)
2594
{
2595
        int rc = 0;
2596
        s64 bn;
2597
        struct metapage *mp;
2598
        dtpage_t *p;
2599
        int psize = 288;        /* initial in-line directory */
2600
        s8 *stbl;
2601
        int i;
2602
        pxd_t *pxd;
2603
        struct btframe *btsp;
2604
 
2605
        BT_CLR(btstack);        /* reset stack */
2606
 
2607
        /*
2608
         *      descend tree to the level with specified leftmost page
2609
         *
2610
         *  by convention, root bn = 0.
2611
         */
2612
        for (bn = 0;;) {
2613
                /* get/pin the page to search */
2614
                DT_GETPAGE(ip, bn, mp, psize, p, rc);
2615
                if (rc)
2616
                        return rc;
2617
 
2618
                /* does the xaddr of leftmost page of the levevl
2619
                 * matches levevl search key ?
2620
                 */
2621
                if (p->header.flag & BT_ROOT) {
2622
                        if (lmxaddr == 0)
2623
                                break;
2624
                } else if (addressPXD(&p->header.self) == lmxaddr)
2625
                        break;
2626
 
2627
                /*
2628
                 * descend down to leftmost child page
2629
                 */
2630
                if (p->header.flag & BT_LEAF) {
2631
                        DT_PUTPAGE(mp);
2632
                        return -ESTALE;
2633
                }
2634
 
2635
                /* get the leftmost entry */
2636
                stbl = DT_GETSTBL(p);
2637
                pxd = (pxd_t *) & p->slot[stbl[0]];
2638
 
2639
                /* get the child page block address */
2640
                bn = addressPXD(pxd);
2641
                psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
2642
                /* unpin the parent page */
2643
                DT_PUTPAGE(mp);
2644
        }
2645
 
2646
        /*
2647
         *      search each page at the current levevl
2648
         */
2649
      loop:
2650
        stbl = DT_GETSTBL(p);
2651
        for (i = 0; i < p->header.nextindex; i++) {
2652
                pxd = (pxd_t *) & p->slot[stbl[i]];
2653
 
2654
                /* found the specified router entry */
2655
                if (addressPXD(pxd) == addressPXD(kpxd) &&
2656
                    lengthPXD(pxd) == lengthPXD(kpxd)) {
2657
                        btsp = btstack->top;
2658
                        btsp->bn = bn;
2659
                        btsp->index = i;
2660
                        btsp->mp = mp;
2661
 
2662
                        return 0;
2663
                }
2664
        }
2665
 
2666
        /* get the right sibling page if any */
2667
        if (p->header.next)
2668
                bn = le64_to_cpu(p->header.next);
2669
        else {
2670
                DT_PUTPAGE(mp);
2671
                return -ESTALE;
2672
        }
2673
 
2674
        /* unpin current page */
2675
        DT_PUTPAGE(mp);
2676
 
2677
        /* get the right sibling page */
2678
        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2679
        if (rc)
2680
                return rc;
2681
 
2682
        goto loop;
2683
}
2684
#endif /* _NOTYET */
2685
 
2686
/*
2687
 *      dtRelink()
2688
 *
2689
 * function:
2690
 *      link around a freed page.
2691
 *
2692
 * parameter:
2693
 *      fp:     page to be freed
2694
 *
2695
 * return:
2696
 */
2697
static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p)
2698
{
2699
        int rc;
2700
        struct metapage *mp;
2701
        s64 nextbn, prevbn;
2702
        struct tlock *tlck;
2703
        struct dt_lock *dtlck;
2704
        struct lv *lv;
2705
 
2706
        nextbn = le64_to_cpu(p->header.next);
2707
        prevbn = le64_to_cpu(p->header.prev);
2708
 
2709
        /* update prev pointer of the next page */
2710
        if (nextbn != 0) {
2711
                DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
2712
                if (rc)
2713
                        return rc;
2714
 
2715
                BT_MARK_DIRTY(mp, ip);
2716
                /*
2717
                 * acquire a transaction lock on the next page
2718
                 *
2719
                 * action: update prev pointer;
2720
                 */
2721
                tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
2722
                jfs_info("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
2723
                        tlck, ip, mp);
2724
                dtlck = (struct dt_lock *) & tlck->lock;
2725
 
2726
                /* linelock header */
2727
                if (dtlck->index >= dtlck->maxcnt)
2728
                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2729
                lv = & dtlck->lv[dtlck->index];
2730
                lv->offset = 0;
2731
                lv->length = 1;
2732
                dtlck->index++;
2733
 
2734
                p->header.prev = cpu_to_le64(prevbn);
2735
                DT_PUTPAGE(mp);
2736
        }
2737
 
2738
        /* update next pointer of the previous page */
2739
        if (prevbn != 0) {
2740
                DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
2741
                if (rc)
2742
                        return rc;
2743
 
2744
                BT_MARK_DIRTY(mp, ip);
2745
                /*
2746
                 * acquire a transaction lock on the prev page
2747
                 *
2748
                 * action: update next pointer;
2749
                 */
2750
                tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
2751
                jfs_info("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
2752
                        tlck, ip, mp);
2753
                dtlck = (struct dt_lock *) & tlck->lock;
2754
 
2755
                /* linelock header */
2756
                if (dtlck->index >= dtlck->maxcnt)
2757
                        dtlck = (struct dt_lock *) txLinelock(dtlck);
2758
                lv = & dtlck->lv[dtlck->index];
2759
                lv->offset = 0;
2760
                lv->length = 1;
2761
                dtlck->index++;
2762
 
2763
                p->header.next = cpu_to_le64(nextbn);
2764
                DT_PUTPAGE(mp);
2765
        }
2766
 
2767
        return 0;
2768
}
2769
 
2770
 
2771
/*
2772
 *      dtInitRoot()
2773
 *
2774
 * initialize directory root (inline in inode)
2775
 */
2776
void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot)
2777
{
2778
        struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2779
        dtroot_t *p;
2780
        int fsi;
2781
        struct dtslot *f;
2782
        struct tlock *tlck;
2783
        struct dt_lock *dtlck;
2784
        struct lv *lv;
2785
        u16 xflag_save;
2786
 
2787
        /*
2788
         * If this was previously an non-empty directory, we need to remove
2789
         * the old directory table.
2790
         */
2791
        if (DO_INDEX(ip)) {
2792
                if (jfs_ip->next_index > (MAX_INLINE_DIRTABLE_ENTRY + 1)) {
2793
                        struct tblock *tblk = tid_to_tblock(tid);
2794
                        /*
2795
                         * We're playing games with the tid's xflag.  If
2796
                         * we're removing a regular file, the file's xtree
2797
                         * is committed with COMMIT_PMAP, but we always
2798
                         * commit the directories xtree with COMMIT_PWMAP.
2799
                         */
2800
                        xflag_save = tblk->xflag;
2801
                        tblk->xflag = 0;
2802
                        /*
2803
                         * xtTruncate isn't guaranteed to fully truncate
2804
                         * the xtree.  The caller needs to check i_size
2805
                         * after committing the transaction to see if
2806
                         * additional truncation is needed.  The
2807
                         * COMMIT_Stale flag tells caller that we
2808
                         * initiated the truncation.
2809
                         */
2810
                        xtTruncate(tid, ip, 0, COMMIT_PWMAP);
2811
                        set_cflag(COMMIT_Stale, ip);
2812
 
2813
                        tblk->xflag = xflag_save;
2814
                } else
2815
                        ip->i_size = 1;
2816
 
2817
                jfs_ip->next_index = 2;
2818
        } else
2819
                ip->i_size = IDATASIZE;
2820
 
2821
        /*
2822
         * acquire a transaction lock on the root
2823
         *
2824
         * action: directory initialization;
2825
         */
2826
        tlck = txLock(tid, ip, (struct metapage *) & jfs_ip->bxflag,
2827
                      tlckDTREE | tlckENTRY | tlckBTROOT);
2828
        dtlck = (struct dt_lock *) & tlck->lock;
2829
 
2830
        /* linelock root */
2831
        ASSERT(dtlck->index == 0);
2832
        lv = & dtlck->lv[0];
2833
        lv->offset = 0;
2834
        lv->length = DTROOTMAXSLOT;
2835
        dtlck->index++;
2836
 
2837
        p = &jfs_ip->i_dtroot;
2838
 
2839
        p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
2840
 
2841
        p->header.nextindex = 0;
2842
 
2843
        /* init freelist */
2844
        fsi = 1;
2845
        f = &p->slot[fsi];
2846
 
2847
        /* init data area of root */
2848
        for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
2849
                f->next = fsi;
2850
        f->next = -1;
2851
 
2852
        p->header.freelist = 1;
2853
        p->header.freecnt = 8;
2854
 
2855
        /* init '..' entry */
2856
        p->header.idotdot = cpu_to_le32(idotdot);
2857
 
2858
#if 0
2859
        ip->i_blocks = LBLK2PBLK(ip->i_sb,
2860
                                 ((jfs_ip->ea.flag & DXD_EXTENT) ?
2861
                                  lengthDXD(&jfs_ip->ea) : 0) +
2862
                                 ((jfs_ip->acl.flag & DXD_EXTENT) ?
2863
                                  lengthDXD(&jfs_ip->acl) : 0));
2864
#endif
2865
 
2866
        return;
2867
}
2868
 
2869
/*
2870
 *      add_missing_indices()
2871
 *
2872
 * function: Fix dtree page in which one or more entries has an invalid index.
2873
 *           fsck.jfs should really fix this, but it currently does not.
2874
 *           Called from jfs_readdir when bad index is detected.
2875
 */
2876
static void add_missing_indices(struct inode *inode, s64 bn)
2877
{
2878
        struct ldtentry *d;
2879
        struct dt_lock *dtlck;
2880
        int i;
2881
        uint index;
2882
        struct lv *lv;
2883
        struct metapage *mp;
2884
        dtpage_t *p;
2885
        int rc;
2886
        s8 *stbl;
2887
        tid_t tid;
2888
        struct tlock *tlck;
2889
 
2890
        tid = txBegin(inode->i_sb, 0);
2891
 
2892
        DT_GETPAGE(inode, bn, mp, PSIZE, p, rc);
2893
 
2894
        if (rc) {
2895
                printk(KERN_ERR "DT_GETPAGE failed!\n");
2896
                goto end;
2897
        }
2898
        BT_MARK_DIRTY(mp, inode);
2899
 
2900
        ASSERT(p->header.flag & BT_LEAF);
2901
 
2902
        tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY);
2903
        dtlck = (struct dt_lock *) &tlck->lock;
2904
 
2905
        stbl = DT_GETSTBL(p);
2906
        for (i = 0; i < p->header.nextindex; i++) {
2907
                d = (struct ldtentry *) &p->slot[stbl[i]];
2908
                index = le32_to_cpu(d->index);
2909
                if ((index < 2) || (index >= JFS_IP(inode)->next_index)) {
2910
                        d->index = cpu_to_le32(add_index(tid, inode, bn, i));
2911
                        if (dtlck->index >= dtlck->maxcnt)
2912
                                dtlck = (struct dt_lock *) txLinelock(dtlck);
2913
                        lv = &dtlck->lv[dtlck->index];
2914
                        lv->offset = stbl[i];
2915
                        lv->length = 1;
2916
                        dtlck->index++;
2917
                }
2918
        }
2919
 
2920
        DT_PUTPAGE(mp);
2921
        (void) txCommit(tid, 1, &inode, 0);
2922
end:
2923
        txEnd(tid);
2924
}
2925
 
2926
/*
2927
 * Buffer to hold directory entry info while traversing a dtree page
2928
 * before being fed to the filldir function
2929
 */
2930
struct jfs_dirent {
2931
        loff_t position;
2932
        int ino;
2933
        u16 name_len;
2934
        char name[0];
2935
};
2936
 
2937
/*
2938
 * function to determine next variable-sized jfs_dirent in buffer
2939
 */
2940
static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent)
2941
{
2942
        return (struct jfs_dirent *)
2943
                ((char *)dirent +
2944
                 ((sizeof (struct jfs_dirent) + dirent->name_len + 1 +
2945
                   sizeof (loff_t) - 1) &
2946
                  ~(sizeof (loff_t) - 1)));
2947
}
2948
 
2949
/*
2950
 *      jfs_readdir()
2951
 *
2952
 * function: read directory entries sequentially
2953
 *      from the specified entry offset
2954
 *
2955
 * parameter:
2956
 *
2957
 * return: offset = (pn, index) of start entry
2958
 *      of next jfs_readdir()/dtRead()
2959
 */
2960
int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
2961
{
2962
        struct inode *ip = filp->f_dentry->d_inode;
2963
        struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab;
2964
        int rc = 0;
2965
        loff_t dtpos;   /* legacy OS/2 style position */
2966
        struct dtoffset {
2967
                s16 pn;
2968
                s16 index;
2969
                s32 unused;
2970
        } *dtoffset = (struct dtoffset *) &dtpos;
2971
        s64 bn;
2972
        struct metapage *mp;
2973
        dtpage_t *p;
2974
        int index;
2975
        s8 *stbl;
2976
        struct btstack btstack;
2977
        int i, next;
2978
        struct ldtentry *d;
2979
        struct dtslot *t;
2980
        int d_namleft, len, outlen;
2981
        unsigned long dirent_buf;
2982
        char *name_ptr;
2983
        u32 dir_index;
2984
        int do_index = 0;
2985
        uint loop_count = 0;
2986
        struct jfs_dirent *jfs_dirent;
2987
        int jfs_dirents;
2988
        int overflow, fix_page, page_fixed = 0;
2989
        static int unique_pos = 2;      /* If we can't fix broken index */
2990
 
2991
        if (filp->f_pos == DIREND)
2992
                return 0;
2993
 
2994
        if (DO_INDEX(ip)) {
2995
                /*
2996
                 * persistent index is stored in directory entries.
2997
                 * Special cases:        0 = .
2998
                 *                       1 = ..
2999
                 *                      -1 = End of directory
3000
                 */
3001
                do_index = 1;
3002
 
3003
                dir_index = (u32) filp->f_pos;
3004
 
3005
                if (dir_index > 1) {
3006
                        struct dir_table_slot dirtab_slot;
3007
 
3008
                        if (dtEmpty(ip) ||
3009
                            (dir_index >= JFS_IP(ip)->next_index)) {
3010
                                /* Stale position.  Directory has shrunk */
3011
                                filp->f_pos = DIREND;
3012
                                return 0;
3013
                        }
3014
                      repeat:
3015
                        rc = read_index(ip, dir_index, &dirtab_slot);
3016
                        if (rc) {
3017
                                filp->f_pos = DIREND;
3018
                                return rc;
3019
                        }
3020
                        if (dirtab_slot.flag == DIR_INDEX_FREE) {
3021
                                if (loop_count++ > JFS_IP(ip)->next_index) {
3022
                                        jfs_err("jfs_readdir detected "
3023
                                                   "infinite loop!");
3024
                                        filp->f_pos = DIREND;
3025
                                        return 0;
3026
                                }
3027
                                dir_index = le32_to_cpu(dirtab_slot.addr2);
3028
                                if (dir_index == -1) {
3029
                                        filp->f_pos = DIREND;
3030
                                        return 0;
3031
                                }
3032
                                goto repeat;
3033
                        }
3034
                        bn = addressDTS(&dirtab_slot);
3035
                        index = dirtab_slot.slot;
3036
                        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3037
                        if (rc) {
3038
                                filp->f_pos = DIREND;
3039
                                return 0;
3040
                        }
3041
                        if (p->header.flag & BT_INTERNAL) {
3042
                                jfs_err("jfs_readdir: bad index table");
3043
                                DT_PUTPAGE(mp);
3044
                                filp->f_pos = -1;
3045
                                return 0;
3046
                        }
3047
                } else {
3048
                        if (dir_index == 0) {
3049
                                /*
3050
                                 * self "."
3051
                                 */
3052
                                filp->f_pos = 0;
3053
                                if (filldir(dirent, ".", 1, 0, ip->i_ino,
3054
                                            DT_DIR))
3055
                                        return 0;
3056
                        }
3057
                        /*
3058
                         * parent ".."
3059
                         */
3060
                        filp->f_pos = 1;
3061
                        if (filldir(dirent, "..", 2, 1, PARENT(ip), DT_DIR))
3062
                                return 0;
3063
 
3064
                        /*
3065
                         * Find first entry of left-most leaf
3066
                         */
3067
                        if (dtEmpty(ip)) {
3068
                                filp->f_pos = DIREND;
3069
                                return 0;
3070
                        }
3071
 
3072
                        if ((rc = dtReadFirst(ip, &btstack)))
3073
                                return rc;
3074
 
3075
                        DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
3076
                }
3077
        } else {
3078
                /*
3079
                 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6
3080
                 *
3081
                 * pn = index = 0:      First entry "."
3082
                 * pn = 0; index = 1:   Second entry ".."
3083
                 * pn > 0:              Real entries, pn=1 -> leftmost page
3084
                 * pn = index = -1:     No more entries
3085
                 */
3086
                dtpos = filp->f_pos;
3087
                if (dtpos == 0) {
3088
                        /* build "." entry */
3089
 
3090
                        if (filldir(dirent, ".", 1, filp->f_pos, ip->i_ino,
3091
                                    DT_DIR))
3092
                                return 0;
3093
                        dtoffset->index = 1;
3094
                        filp->f_pos = dtpos;
3095
                }
3096
 
3097
                if (dtoffset->pn == 0) {
3098
                        if (dtoffset->index == 1) {
3099
                                /* build ".." entry */
3100
 
3101
                                if (filldir(dirent, "..", 2, filp->f_pos,
3102
                                            PARENT(ip), DT_DIR))
3103
                                        return 0;
3104
                        } else {
3105
                                jfs_err("jfs_readdir called with "
3106
                                        "invalid offset!");
3107
                        }
3108
                        dtoffset->pn = 1;
3109
                        dtoffset->index = 0;
3110
                        filp->f_pos = dtpos;
3111
                }
3112
 
3113
                if (dtEmpty(ip)) {
3114
                        filp->f_pos = DIREND;
3115
                        return 0;
3116
                }
3117
 
3118
                if ((rc = dtReadNext(ip, &filp->f_pos, &btstack))) {
3119
                        jfs_err("jfs_readdir: unexpected rc = %d "
3120
                                "from dtReadNext", rc);
3121
                        filp->f_pos = DIREND;
3122
                        return 0;
3123
                }
3124
                /* get start leaf page and index */
3125
                DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
3126
 
3127
                /* offset beyond directory eof ? */
3128
                if (bn < 0) {
3129
                        filp->f_pos = DIREND;
3130
                        return 0;
3131
                }
3132
        }
3133
 
3134
        dirent_buf = __get_free_page(GFP_KERNEL);
3135
        if (dirent_buf == 0) {
3136
                DT_PUTPAGE(mp);
3137
                jfs_warn("jfs_readdir: __get_free_page failed!");
3138
                filp->f_pos = DIREND;
3139
                return -ENOMEM;
3140
        }
3141
 
3142
        while (1) {
3143
                jfs_dirent = (struct jfs_dirent *) dirent_buf;
3144
                jfs_dirents = 0;
3145
                overflow = fix_page = 0;
3146
 
3147
                stbl = DT_GETSTBL(p);
3148
 
3149
                for (i = index; i < p->header.nextindex; i++) {
3150
                        d = (struct ldtentry *) & p->slot[stbl[i]];
3151
 
3152
                        if (((long) jfs_dirent + d->namlen + 1) >
3153
                            (dirent_buf + PSIZE)) {
3154
                                /* DBCS codepages could overrun dirent_buf */
3155
                                index = i;
3156
                                overflow = 1;
3157
                                break;
3158
                        }
3159
 
3160
                        d_namleft = d->namlen;
3161
                        name_ptr = jfs_dirent->name;
3162
                        jfs_dirent->ino = le32_to_cpu(d->inumber);
3163
 
3164
                        if (do_index) {
3165
                                len = min(d_namleft, DTLHDRDATALEN);
3166
                                jfs_dirent->position = le32_to_cpu(d->index);
3167
                                /*
3168
                                 * d->index should always be valid, but it
3169
                                 * isn't.  fsck.jfs doesn't create the
3170
                                 * directory index for the lost+found
3171
                                 * directory.  Rather than let it go,
3172
                                 * we can try to fix it.
3173
                                 */
3174
                                if ((jfs_dirent->position < 2) ||
3175
                                    (jfs_dirent->position >=
3176
                                     JFS_IP(ip)->next_index)) {
3177
                                        if (!page_fixed && !isReadOnly(ip)) {
3178
                                                fix_page = 1;
3179
                                                /*
3180
                                                 * setting overflow and setting
3181
                                                 * index to i will cause the
3182
                                                 * same page to be processed
3183
                                                 * again starting here
3184
                                                 */
3185
                                                overflow = 1;
3186
                                                index = i;
3187
                                                break;
3188
                                        }
3189
                                        jfs_dirent->position = unique_pos++;
3190
                                }
3191
                        } else {
3192
                                jfs_dirent->position = dtpos;
3193
                                len = min(d_namleft, DTLHDRDATALEN_LEGACY);
3194
                        }
3195
 
3196
                        /* copy the name of head/only segment */
3197
                        outlen = jfs_strfromUCS_le(name_ptr, d->name, len,
3198
                                                   codepage);
3199
                        jfs_dirent->name_len = outlen;
3200
 
3201
                        /* copy name in the additional segment(s) */
3202
                        next = d->next;
3203
                        while (next >= 0) {
3204
                                t = (struct dtslot *) & p->slot[next];
3205
                                name_ptr += outlen;
3206
                                d_namleft -= len;
3207
                                /* Sanity Check */
3208
                                if (d_namleft == 0) {
3209
                                        jfs_error(ip->i_sb,
3210
                                                  "JFS:Dtree error: ino = "
3211
                                                  "%ld, bn=%Ld, index = %d",
3212
                                                  (long)ip->i_ino,
3213
                                                  (long long)bn,
3214
                                                  i);
3215
                                        goto skip_one;
3216
                                }
3217
                                len = min(d_namleft, DTSLOTDATALEN);
3218
                                outlen = jfs_strfromUCS_le(name_ptr, t->name,
3219
                                                           len, codepage);
3220
                                jfs_dirent->name_len += outlen;
3221
 
3222
                                next = t->next;
3223
                        }
3224
 
3225
                        jfs_dirents++;
3226
                        jfs_dirent = next_jfs_dirent(jfs_dirent);
3227
skip_one:
3228
                        if (!do_index)
3229
                                dtoffset->index++;
3230
                }
3231
 
3232
                if (!overflow) {
3233
                        /* Point to next leaf page */
3234
                        if (p->header.flag & BT_ROOT)
3235
                                bn = 0;
3236
                        else {
3237
                                bn = le64_to_cpu(p->header.next);
3238
                                index = 0;
3239
                                /* update offset (pn:index) for new page */
3240
                                if (!do_index) {
3241
                                        dtoffset->pn++;
3242
                                        dtoffset->index = 0;
3243
                                }
3244
                        }
3245
                        page_fixed = 0;
3246
                }
3247
 
3248
                /* unpin previous leaf page */
3249
                DT_PUTPAGE(mp);
3250
 
3251
                jfs_dirent = (struct jfs_dirent *) dirent_buf;
3252
                while (jfs_dirents--) {
3253
                        filp->f_pos = jfs_dirent->position;
3254
                        if (filldir(dirent, jfs_dirent->name,
3255
                                    jfs_dirent->name_len, filp->f_pos,
3256
                                    jfs_dirent->ino, DT_UNKNOWN))
3257
                                goto out;
3258
                        jfs_dirent = next_jfs_dirent(jfs_dirent);
3259
                }
3260
 
3261
                if (fix_page) {
3262
                        add_missing_indices(ip, bn);
3263
                        page_fixed = 1;
3264
                }
3265
 
3266
                if (!overflow && (bn == 0)) {
3267
                        filp->f_pos = DIREND;
3268
                        break;
3269
                }
3270
 
3271
                DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3272
                if (rc) {
3273
                        free_page(dirent_buf);
3274
                        return rc;
3275
                }
3276
        }
3277
 
3278
      out:
3279
        free_page(dirent_buf);
3280
 
3281
        return rc;
3282
}
3283
 
3284
 
3285
/*
3286
 *      dtReadFirst()
3287
 *
3288
 * function: get the leftmost page of the directory
3289
 */
3290
static int dtReadFirst(struct inode *ip, struct btstack * btstack)
3291
{
3292
        int rc = 0;
3293
        s64 bn;
3294
        int psize = 288;        /* initial in-line directory */
3295
        struct metapage *mp;
3296
        dtpage_t *p;
3297
        s8 *stbl;
3298
        struct btframe *btsp;
3299
        pxd_t *xd;
3300
 
3301
        BT_CLR(btstack);        /* reset stack */
3302
 
3303
        /*
3304
         *      descend leftmost path of the tree
3305
         *
3306
         * by convention, root bn = 0.
3307
         */
3308
        for (bn = 0;;) {
3309
                DT_GETPAGE(ip, bn, mp, psize, p, rc);
3310
                if (rc)
3311
                        return rc;
3312
 
3313
                /*
3314
                 * leftmost leaf page
3315
                 */
3316
                if (p->header.flag & BT_LEAF) {
3317
                        /* return leftmost entry */
3318
                        btsp = btstack->top;
3319
                        btsp->bn = bn;
3320
                        btsp->index = 0;
3321
                        btsp->mp = mp;
3322
 
3323
                        return 0;
3324
                }
3325
 
3326
                /*
3327
                 * descend down to leftmost child page
3328
                 */
3329
                /* push (bn, index) of the parent page/entry */
3330
                BT_PUSH(btstack, bn, 0);
3331
 
3332
                /* get the leftmost entry */
3333
                stbl = DT_GETSTBL(p);
3334
                xd = (pxd_t *) & p->slot[stbl[0]];
3335
 
3336
                /* get the child page block address */
3337
                bn = addressPXD(xd);
3338
                psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize;
3339
 
3340
                /* unpin the parent page */
3341
                DT_PUTPAGE(mp);
3342
        }
3343
}
3344
 
3345
 
3346
/*
3347
 *      dtReadNext()
3348
 *
3349
 * function: get the page of the specified offset (pn:index)
3350
 *
3351
 * return: if (offset > eof), bn = -1;
3352
 *
3353
 * note: if index > nextindex of the target leaf page,
3354
 * start with 1st entry of next leaf page;
3355
 */
3356
static int dtReadNext(struct inode *ip, loff_t * offset,
3357
                      struct btstack * btstack)
3358
{
3359
        int rc = 0;
3360
        struct dtoffset {
3361
                s16 pn;
3362
                s16 index;
3363
                s32 unused;
3364
        } *dtoffset = (struct dtoffset *) offset;
3365
        s64 bn;
3366
        struct metapage *mp;
3367
        dtpage_t *p;
3368
        int index;
3369
        int pn;
3370
        s8 *stbl;
3371
        struct btframe *btsp, *parent;
3372
        pxd_t *xd;
3373
 
3374
        /*
3375
         * get leftmost leaf page pinned
3376
         */
3377
        if ((rc = dtReadFirst(ip, btstack)))
3378
                return rc;
3379
 
3380
        /* get leaf page */
3381
        DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
3382
 
3383
        /* get the start offset (pn:index) */
3384
        pn = dtoffset->pn - 1;  /* Now pn = 0 represents leftmost leaf */
3385
        index = dtoffset->index;
3386
 
3387
        /* start at leftmost page ? */
3388
        if (pn == 0) {
3389
                /* offset beyond eof ? */
3390
                if (index < p->header.nextindex)
3391
                        goto out;
3392
 
3393
                if (p->header.flag & BT_ROOT) {
3394
                        bn = -1;
3395
                        goto out;
3396
                }
3397
 
3398
                /* start with 1st entry of next leaf page */
3399
                dtoffset->pn++;
3400
                dtoffset->index = index = 0;
3401
                goto a;
3402
        }
3403
 
3404
        /* start at non-leftmost page: scan parent pages for large pn */
3405
        if (p->header.flag & BT_ROOT) {
3406
                bn = -1;
3407
                goto out;
3408
        }
3409
 
3410
        /* start after next leaf page ? */
3411
        if (pn > 1)
3412
                goto b;
3413
 
3414
        /* get leaf page pn = 1 */
3415
      a:
3416
        bn = le64_to_cpu(p->header.next);
3417
 
3418
        /* unpin leaf page */
3419
        DT_PUTPAGE(mp);
3420
 
3421
        /* offset beyond eof ? */
3422
        if (bn == 0) {
3423
                bn = -1;
3424
                goto out;
3425
        }
3426
 
3427
        goto c;
3428
 
3429
        /*
3430
         * scan last internal page level to get target leaf page
3431
         */
3432
      b:
3433
        /* unpin leftmost leaf page */
3434
        DT_PUTPAGE(mp);
3435
 
3436
        /* get left most parent page */
3437
        btsp = btstack->top;
3438
        parent = btsp - 1;
3439
        bn = parent->bn;
3440
        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3441
        if (rc)
3442
                return rc;
3443
 
3444
        /* scan parent pages at last internal page level */
3445
        while (pn >= p->header.nextindex) {
3446
                pn -= p->header.nextindex;
3447
 
3448
                /* get next parent page address */
3449
                bn = le64_to_cpu(p->header.next);
3450
 
3451
                /* unpin current parent page */
3452
                DT_PUTPAGE(mp);
3453
 
3454
                /* offset beyond eof ? */
3455
                if (bn == 0) {
3456
                        bn = -1;
3457
                        goto out;
3458
                }
3459
 
3460
                /* get next parent page */
3461
                DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3462
                if (rc)
3463
                        return rc;
3464
 
3465
                /* update parent page stack frame */
3466
                parent->bn = bn;
3467
        }
3468
 
3469
        /* get leaf page address */
3470
        stbl = DT_GETSTBL(p);
3471
        xd = (pxd_t *) & p->slot[stbl[pn]];
3472
        bn = addressPXD(xd);
3473
 
3474
        /* unpin parent page */
3475
        DT_PUTPAGE(mp);
3476
 
3477
        /*
3478
         * get target leaf page
3479
         */
3480
      c:
3481
        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3482
        if (rc)
3483
                return rc;
3484
 
3485
        /*
3486
         * leaf page has been completed:
3487
         * start with 1st entry of next leaf page
3488
         */
3489
        if (index >= p->header.nextindex) {
3490
                bn = le64_to_cpu(p->header.next);
3491
 
3492
                /* unpin leaf page */
3493
                DT_PUTPAGE(mp);
3494
 
3495
                /* offset beyond eof ? */
3496
                if (bn == 0) {
3497
                        bn = -1;
3498
                        goto out;
3499
                }
3500
 
3501
                /* get next leaf page */
3502
                DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3503
                if (rc)
3504
                        return rc;
3505
 
3506
                /* start with 1st entry of next leaf page */
3507
                dtoffset->pn++;
3508
                dtoffset->index = 0;
3509
        }
3510
 
3511
      out:
3512
        /* return target leaf page pinned */
3513
        btsp = btstack->top;
3514
        btsp->bn = bn;
3515
        btsp->index = dtoffset->index;
3516
        btsp->mp = mp;
3517
 
3518
        return 0;
3519
}
3520
 
3521
 
3522
/*
3523
 *      dtCompare()
3524
 *
3525
 * function: compare search key with an internal entry
3526
 *
3527
 * return:
3528
 *      < 0 if k is < record
3529
 *      = 0 if k is = record
3530
 *      > 0 if k is > record
3531
 */
3532
static int dtCompare(struct component_name * key,       /* search key */
3533
                     dtpage_t * p,      /* directory page */
3534
                     int si)
3535
{                               /* entry slot index */
3536
        wchar_t *kname, *name;
3537
        int klen, namlen, len, rc;
3538
        struct idtentry *ih;
3539
        struct dtslot *t;
3540
 
3541
        /*
3542
         * force the left-most key on internal pages, at any level of
3543
         * the tree, to be less than any search key.
3544
         * this obviates having to update the leftmost key on an internal
3545
         * page when the user inserts a new key in the tree smaller than
3546
         * anything that has been stored.
3547
         *
3548
         * (? if/when dtSearch() narrows down to 1st entry (index = 0),
3549
         * at any internal page at any level of the tree,
3550
         * it descends to child of the entry anyway -
3551
         * ? make the entry as min size dummy entry)
3552
         *
3553
         * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
3554
         * return (1);
3555
         */
3556
 
3557
        kname = key->name;
3558
        klen = key->namlen;
3559
 
3560
        ih = (struct idtentry *) & p->slot[si];
3561
        si = ih->next;
3562
        name = ih->name;
3563
        namlen = ih->namlen;
3564
        len = min(namlen, DTIHDRDATALEN);
3565
 
3566
        /* compare with head/only segment */
3567
        len = min(klen, len);
3568
        if ((rc = UniStrncmp_le(kname, name, len)))
3569
                return rc;
3570
 
3571
        klen -= len;
3572
        namlen -= len;
3573
 
3574
        /* compare with additional segment(s) */
3575
        kname += len;
3576
        while (klen > 0 && namlen > 0) {
3577
                /* compare with next name segment */
3578
                t = (struct dtslot *) & p->slot[si];
3579
                len = min(namlen, DTSLOTDATALEN);
3580
                len = min(klen, len);
3581
                name = t->name;
3582
                if ((rc = UniStrncmp_le(kname, name, len)))
3583
                        return rc;
3584
 
3585
                klen -= len;
3586
                namlen -= len;
3587
                kname += len;
3588
                si = t->next;
3589
        }
3590
 
3591
        return (klen - namlen);
3592
}
3593
 
3594
 
3595
 
3596
 
3597
/*
3598
 *      ciCompare()
3599
 *
3600
 * function: compare search key with an (leaf/internal) entry
3601
 *
3602
 * return:
3603
 *      < 0 if k is < record
3604
 *      = 0 if k is = record
3605
 *      > 0 if k is > record
3606
 */
3607
static int ciCompare(struct component_name * key,       /* search key */
3608
                     dtpage_t * p,      /* directory page */
3609
                     int si,    /* entry slot index */
3610
                     int flag)
3611
{
3612
        wchar_t *kname, *name, x;
3613
        int klen, namlen, len, rc;
3614
        struct ldtentry *lh;
3615
        struct idtentry *ih;
3616
        struct dtslot *t;
3617
        int i;
3618
 
3619
        /*
3620
         * force the left-most key on internal pages, at any level of
3621
         * the tree, to be less than any search key.
3622
         * this obviates having to update the leftmost key on an internal
3623
         * page when the user inserts a new key in the tree smaller than
3624
         * anything that has been stored.
3625
         *
3626
         * (? if/when dtSearch() narrows down to 1st entry (index = 0),
3627
         * at any internal page at any level of the tree,
3628
         * it descends to child of the entry anyway -
3629
         * ? make the entry as min size dummy entry)
3630
         *
3631
         * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
3632
         * return (1);
3633
         */
3634
 
3635
        kname = key->name;
3636
        klen = key->namlen;
3637
 
3638
        /*
3639
         * leaf page entry
3640
         */
3641
        if (p->header.flag & BT_LEAF) {
3642
                lh = (struct ldtentry *) & p->slot[si];
3643
                si = lh->next;
3644
                name = lh->name;
3645
                namlen = lh->namlen;
3646
                if (flag & JFS_DIR_INDEX)
3647
                        len = min(namlen, DTLHDRDATALEN);
3648
                else
3649
                        len = min(namlen, DTLHDRDATALEN_LEGACY);
3650
        }
3651
        /*
3652
         * internal page entry
3653
         */
3654
        else {
3655
                ih = (struct idtentry *) & p->slot[si];
3656
                si = ih->next;
3657
                name = ih->name;
3658
                namlen = ih->namlen;
3659
                len = min(namlen, DTIHDRDATALEN);
3660
        }
3661
 
3662
        /* compare with head/only segment */
3663
        len = min(klen, len);
3664
        for (i = 0; i < len; i++, kname++, name++) {
3665
                /* only uppercase if case-insensitive support is on */
3666
                if ((flag & JFS_OS2) == JFS_OS2)
3667
                        x = UniToupper(le16_to_cpu(*name));
3668
                else
3669
                        x = le16_to_cpu(*name);
3670
                if ((rc = *kname - x))
3671
                        return rc;
3672
        }
3673
 
3674
        klen -= len;
3675
        namlen -= len;
3676
 
3677
        /* compare with additional segment(s) */
3678
        while (klen > 0 && namlen > 0) {
3679
                /* compare with next name segment */
3680
                t = (struct dtslot *) & p->slot[si];
3681
                len = min(namlen, DTSLOTDATALEN);
3682
                len = min(klen, len);
3683
                name = t->name;
3684
                for (i = 0; i < len; i++, kname++, name++) {
3685
                        /* only uppercase if case-insensitive support is on */
3686
                        if ((flag & JFS_OS2) == JFS_OS2)
3687
                                x = UniToupper(le16_to_cpu(*name));
3688
                        else
3689
                                x = le16_to_cpu(*name);
3690
 
3691
                        if ((rc = *kname - x))
3692
                                return rc;
3693
                }
3694
 
3695
                klen -= len;
3696
                namlen -= len;
3697
                si = t->next;
3698
        }
3699
 
3700
        return (klen - namlen);
3701
}
3702
 
3703
 
3704
/*
3705
 *      ciGetLeafPrefixKey()
3706
 *
3707
 * function: compute prefix of suffix compression
3708
 *           from two adjacent leaf entries
3709
 *           across page boundary
3710
 *
3711
 * return:
3712
 *      Number of prefix bytes needed to distinguish b from a.
3713
 */
3714
static void ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
3715
                               int ri, struct component_name * key, int flag)
3716
{
3717
        int klen, namlen;
3718
        wchar_t *pl, *pr, *kname;
3719
        wchar_t lname[JFS_NAME_MAX + 1];
3720
        struct component_name lkey = { 0, lname };
3721
        wchar_t rname[JFS_NAME_MAX + 1];
3722
        struct component_name rkey = { 0, rname };
3723
 
3724
        /* get left and right key */
3725
        dtGetKey(lp, li, &lkey, flag);
3726
        lkey.name[lkey.namlen] = 0;
3727
 
3728
        if ((flag & JFS_OS2) == JFS_OS2)
3729
                ciToUpper(&lkey);
3730
 
3731
        dtGetKey(rp, ri, &rkey, flag);
3732
        rkey.name[rkey.namlen] = 0;
3733
 
3734
 
3735
        if ((flag & JFS_OS2) == JFS_OS2)
3736
                ciToUpper(&rkey);
3737
 
3738
        /* compute prefix */
3739
        klen = 0;
3740
        kname = key->name;
3741
        namlen = min(lkey.namlen, rkey.namlen);
3742
        for (pl = lkey.name, pr = rkey.name;
3743
             namlen; pl++, pr++, namlen--, klen++, kname++) {
3744
                *kname = *pr;
3745
                if (*pl != *pr) {
3746
                        key->namlen = klen + 1;
3747
                        return;
3748
                }
3749
        }
3750
 
3751
        /* l->namlen <= r->namlen since l <= r */
3752
        if (lkey.namlen < rkey.namlen) {
3753
                *kname = *pr;
3754
                key->namlen = klen + 1;
3755
        } else                  /* l->namelen == r->namelen */
3756
                key->namlen = klen;
3757
 
3758
        return;
3759
}
3760
 
3761
 
3762
 
3763
/*
3764
 *      dtGetKey()
3765
 *
3766
 * function: get key of the entry
3767
 */
3768
static void dtGetKey(dtpage_t * p, int i,       /* entry index */
3769
                     struct component_name * key, int flag)
3770
{
3771
        int si;
3772
        s8 *stbl;
3773
        struct ldtentry *lh;
3774
        struct idtentry *ih;
3775
        struct dtslot *t;
3776
        int namlen, len;
3777
        wchar_t *name, *kname;
3778
 
3779
        /* get entry */
3780
        stbl = DT_GETSTBL(p);
3781
        si = stbl[i];
3782
        if (p->header.flag & BT_LEAF) {
3783
                lh = (struct ldtentry *) & p->slot[si];
3784
                si = lh->next;
3785
                namlen = lh->namlen;
3786
                name = lh->name;
3787
                if (flag & JFS_DIR_INDEX)
3788
                        len = min(namlen, DTLHDRDATALEN);
3789
                else
3790
                        len = min(namlen, DTLHDRDATALEN_LEGACY);
3791
        } else {
3792
                ih = (struct idtentry *) & p->slot[si];
3793
                si = ih->next;
3794
                namlen = ih->namlen;
3795
                name = ih->name;
3796
                len = min(namlen, DTIHDRDATALEN);
3797
        }
3798
 
3799
        key->namlen = namlen;
3800
        kname = key->name;
3801
 
3802
        /*
3803
         * move head/only segment
3804
         */
3805
        UniStrncpy_le(kname, name, len);
3806
 
3807
        /*
3808
         * move additional segment(s)
3809
         */
3810
        while (si >= 0) {
3811
                /* get next segment */
3812
                t = &p->slot[si];
3813
                kname += len;
3814
                namlen -= len;
3815
                len = min(namlen, DTSLOTDATALEN);
3816
                UniStrncpy_le(kname, t->name, len);
3817
 
3818
                si = t->next;
3819
        }
3820
}
3821
 
3822
 
3823
/*
3824
 *      dtInsertEntry()
3825
 *
3826
 * function: allocate free slot(s) and
3827
 *           write a leaf/internal entry
3828
 *
3829
 * return: entry slot index
3830
 */
3831
static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
3832
                          ddata_t * data, struct dt_lock ** dtlock)
3833
{
3834
        struct dtslot *h, *t;
3835
        struct ldtentry *lh = 0;
3836
        struct idtentry *ih = 0;
3837
        int hsi, fsi, klen, len, nextindex;
3838
        wchar_t *kname, *name;
3839
        s8 *stbl;
3840
        pxd_t *xd;
3841
        struct dt_lock *dtlck = *dtlock;
3842
        struct lv *lv;
3843
        int xsi, n;
3844
        s64 bn = 0;
3845
        struct metapage *mp = 0;
3846
 
3847
        klen = key->namlen;
3848
        kname = key->name;
3849
 
3850
        /* allocate a free slot */
3851
        hsi = fsi = p->header.freelist;
3852
        h = &p->slot[fsi];
3853
        p->header.freelist = h->next;
3854
        --p->header.freecnt;
3855
 
3856
        /* open new linelock */
3857
        if (dtlck->index >= dtlck->maxcnt)
3858
                dtlck = (struct dt_lock *) txLinelock(dtlck);
3859
 
3860
        lv = & dtlck->lv[dtlck->index];
3861
        lv->offset = hsi;
3862
 
3863
        /* write head/only segment */
3864
        if (p->header.flag & BT_LEAF) {
3865
                lh = (struct ldtentry *) h;
3866
                lh->next = h->next;
3867
                lh->inumber = data->leaf.ino;   /* little-endian */
3868
                lh->namlen = klen;
3869
                name = lh->name;
3870
                if (data->leaf.ip) {
3871
                        len = min(klen, DTLHDRDATALEN);
3872
                        if (!(p->header.flag & BT_ROOT))
3873
                                bn = addressPXD(&p->header.self);
3874
                        lh->index = cpu_to_le32(add_index(data->leaf.tid,
3875
                                                          data->leaf.ip,
3876
                                                          bn, index));
3877
                } else
3878
                        len = min(klen, DTLHDRDATALEN_LEGACY);
3879
        } else {
3880
                ih = (struct idtentry *) h;
3881
                ih->next = h->next;
3882
                xd = (pxd_t *) ih;
3883
                *xd = data->xd;
3884
                ih->namlen = klen;
3885
                name = ih->name;
3886
                len = min(klen, DTIHDRDATALEN);
3887
        }
3888
 
3889
        UniStrncpy_le(name, kname, len);
3890
 
3891
        n = 1;
3892
        xsi = hsi;
3893
 
3894
        /* write additional segment(s) */
3895
        t = h;
3896
        klen -= len;
3897
        while (klen) {
3898
                /* get free slot */
3899
                fsi = p->header.freelist;
3900
                t = &p->slot[fsi];
3901
                p->header.freelist = t->next;
3902
                --p->header.freecnt;
3903
 
3904
                /* is next slot contiguous ? */
3905
                if (fsi != xsi + 1) {
3906
                        /* close current linelock */
3907
                        lv->length = n;
3908
                        dtlck->index++;
3909
 
3910
                        /* open new linelock */
3911
                        if (dtlck->index < dtlck->maxcnt)
3912
                                lv++;
3913
                        else {
3914
                                dtlck = (struct dt_lock *) txLinelock(dtlck);
3915
                                lv = & dtlck->lv[0];
3916
                        }
3917
 
3918
                        lv->offset = fsi;
3919
                        n = 0;
3920
                }
3921
 
3922
                kname += len;
3923
                len = min(klen, DTSLOTDATALEN);
3924
                UniStrncpy_le(t->name, kname, len);
3925
 
3926
                n++;
3927
                xsi = fsi;
3928
                klen -= len;
3929
        }
3930
 
3931
        /* close current linelock */
3932
        lv->length = n;
3933
        dtlck->index++;
3934
 
3935
        *dtlock = dtlck;
3936
 
3937
        /* terminate last/only segment */
3938
        if (h == t) {
3939
                /* single segment entry */
3940
                if (p->header.flag & BT_LEAF)
3941
                        lh->next = -1;
3942
                else
3943
                        ih->next = -1;
3944
        } else
3945
                /* multi-segment entry */
3946
                t->next = -1;
3947
 
3948
        /* if insert into middle, shift right succeeding entries in stbl */
3949
        stbl = DT_GETSTBL(p);
3950
        nextindex = p->header.nextindex;
3951
        if (index < nextindex) {
3952
                memmove(stbl + index + 1, stbl + index, nextindex - index);
3953
 
3954
                if ((p->header.flag & BT_LEAF) && data->leaf.ip) {
3955
                        s64 lblock;
3956
 
3957
                        /*
3958
                         * Need to update slot number for entries that moved
3959
                         * in the stbl
3960
                         */
3961
                        mp = 0;
3962
                        for (n = index + 1; n <= nextindex; n++) {
3963
                                lh = (struct ldtentry *) & (p->slot[stbl[n]]);
3964
                                modify_index(data->leaf.tid, data->leaf.ip,
3965
                                             le32_to_cpu(lh->index), bn, n,
3966
                                             &mp, &lblock);
3967
                        }
3968
                        if (mp)
3969
                                release_metapage(mp);
3970
                }
3971
        }
3972
 
3973
        stbl[index] = hsi;
3974
 
3975
        /* advance next available entry index of stbl */
3976
        ++p->header.nextindex;
3977
}
3978
 
3979
 
3980
/*
3981
 *      dtMoveEntry()
3982
 *
3983
 * function: move entries from split/left page to new/right page
3984
 *
3985
 *      nextindex of dst page and freelist/freecnt of both pages
3986
 *      are updated.
3987
 */
3988
static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
3989
                        struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
3990
                        int do_index)
3991
{
3992
        int ssi, next;          /* src slot index */
3993
        int di;                 /* dst entry index */
3994
        int dsi;                /* dst slot index */
3995
        s8 *sstbl, *dstbl;      /* sorted entry table */
3996
        int snamlen, len;
3997
        struct ldtentry *slh, *dlh = 0;
3998
        struct idtentry *sih, *dih = 0;
3999
        struct dtslot *h, *s, *d;
4000
        struct dt_lock *sdtlck = *sdtlock, *ddtlck = *ddtlock;
4001
        struct lv *slv, *dlv;
4002
        int xssi, ns, nd;
4003
        int sfsi;
4004
 
4005
        sstbl = (s8 *) & sp->slot[sp->header.stblindex];
4006
        dstbl = (s8 *) & dp->slot[dp->header.stblindex];
4007
 
4008
        dsi = dp->header.freelist;      /* first (whole page) free slot */
4009
        sfsi = sp->header.freelist;
4010
 
4011
        /* linelock destination entry slot */
4012
        dlv = & ddtlck->lv[ddtlck->index];
4013
        dlv->offset = dsi;
4014
 
4015
        /* linelock source entry slot */
4016
        slv = & sdtlck->lv[sdtlck->index];
4017
        slv->offset = sstbl[si];
4018
        xssi = slv->offset - 1;
4019
 
4020
        /*
4021
         * move entries
4022
         */
4023
        ns = nd = 0;
4024
        for (di = 0; si < sp->header.nextindex; si++, di++) {
4025
                ssi = sstbl[si];
4026
                dstbl[di] = dsi;
4027
 
4028
                /* is next slot contiguous ? */
4029
                if (ssi != xssi + 1) {
4030
                        /* close current linelock */
4031
                        slv->length = ns;
4032
                        sdtlck->index++;
4033
 
4034
                        /* open new linelock */
4035
                        if (sdtlck->index < sdtlck->maxcnt)
4036
                                slv++;
4037
                        else {
4038
                                sdtlck = (struct dt_lock *) txLinelock(sdtlck);
4039
                                slv = & sdtlck->lv[0];
4040
                        }
4041
 
4042
                        slv->offset = ssi;
4043
                        ns = 0;
4044
                }
4045
 
4046
                /*
4047
                 * move head/only segment of an entry
4048
                 */
4049
                /* get dst slot */
4050
                h = d = &dp->slot[dsi];
4051
 
4052
                /* get src slot and move */
4053
                s = &sp->slot[ssi];
4054
                if (sp->header.flag & BT_LEAF) {
4055
                        /* get source entry */
4056
                        slh = (struct ldtentry *) s;
4057
                        dlh = (struct ldtentry *) h;
4058
                        snamlen = slh->namlen;
4059
 
4060
                        if (do_index) {
4061
                                len = min(snamlen, DTLHDRDATALEN);
4062
                                dlh->index = slh->index; /* little-endian */
4063
                        } else
4064
                                len = min(snamlen, DTLHDRDATALEN_LEGACY);
4065
 
4066
                        memcpy(dlh, slh, 6 + len * 2);
4067
 
4068
                        next = slh->next;
4069
 
4070
                        /* update dst head/only segment next field */
4071
                        dsi++;
4072
                        dlh->next = dsi;
4073
                } else {
4074
                        sih = (struct idtentry *) s;
4075
                        snamlen = sih->namlen;
4076
 
4077
                        len = min(snamlen, DTIHDRDATALEN);
4078
                        dih = (struct idtentry *) h;
4079
                        memcpy(dih, sih, 10 + len * 2);
4080
                        next = sih->next;
4081
 
4082
                        dsi++;
4083
                        dih->next = dsi;
4084
                }
4085
 
4086
                /* free src head/only segment */
4087
                s->next = sfsi;
4088
                s->cnt = 1;
4089
                sfsi = ssi;
4090
 
4091
                ns++;
4092
                nd++;
4093
                xssi = ssi;
4094
 
4095
                /*
4096
                 * move additional segment(s) of the entry
4097
                 */
4098
                snamlen -= len;
4099
                while ((ssi = next) >= 0) {
4100
                        /* is next slot contiguous ? */
4101
                        if (ssi != xssi + 1) {
4102
                                /* close current linelock */
4103
                                slv->length = ns;
4104
                                sdtlck->index++;
4105
 
4106
                                /* open new linelock */
4107
                                if (sdtlck->index < sdtlck->maxcnt)
4108
                                        slv++;
4109
                                else {
4110
                                        sdtlck =
4111
                                            (struct dt_lock *)
4112
                                            txLinelock(sdtlck);
4113
                                        slv = & sdtlck->lv[0];
4114
                                }
4115
 
4116
                                slv->offset = ssi;
4117
                                ns = 0;
4118
                        }
4119
 
4120
                        /* get next source segment */
4121
                        s = &sp->slot[ssi];
4122
 
4123
                        /* get next destination free slot */
4124
                        d++;
4125
 
4126
                        len = min(snamlen, DTSLOTDATALEN);
4127
                        UniStrncpy(d->name, s->name, len);
4128
 
4129
                        ns++;
4130
                        nd++;
4131
                        xssi = ssi;
4132
 
4133
                        dsi++;
4134
                        d->next = dsi;
4135
 
4136
                        /* free source segment */
4137
                        next = s->next;
4138
                        s->next = sfsi;
4139
                        s->cnt = 1;
4140
                        sfsi = ssi;
4141
 
4142
                        snamlen -= len;
4143
                }               /* end while */
4144
 
4145
                /* terminate dst last/only segment */
4146
                if (h == d) {
4147
                        /* single segment entry */
4148
                        if (dp->header.flag & BT_LEAF)
4149
                                dlh->next = -1;
4150
                        else
4151
                                dih->next = -1;
4152
                } else
4153
                        /* multi-segment entry */
4154
                        d->next = -1;
4155
        }                       /* end for */
4156
 
4157
        /* close current linelock */
4158
        slv->length = ns;
4159
        sdtlck->index++;
4160
        *sdtlock = sdtlck;
4161
 
4162
        dlv->length = nd;
4163
        ddtlck->index++;
4164
        *ddtlock = ddtlck;
4165
 
4166
        /* update source header */
4167
        sp->header.freelist = sfsi;
4168
        sp->header.freecnt += nd;
4169
 
4170
        /* update destination header */
4171
        dp->header.nextindex = di;
4172
 
4173
        dp->header.freelist = dsi;
4174
        dp->header.freecnt -= nd;
4175
}
4176
 
4177
 
4178
/*
4179
 *      dtDeleteEntry()
4180
 *
4181
 * function: free a (leaf/internal) entry
4182
 *
4183
 * log freelist header, stbl, and each segment slot of entry
4184
 * (even though last/only segment next field is modified,
4185
 * physical image logging requires all segment slots of
4186
 * the entry logged to avoid applying previous updates
4187
 * to the same slots)
4188
 */
4189
static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock)
4190
{
4191
        int fsi;                /* free entry slot index */
4192
        s8 *stbl;
4193
        struct dtslot *t;
4194
        int si, freecnt;
4195
        struct dt_lock *dtlck = *dtlock;
4196
        struct lv *lv;
4197
        int xsi, n;
4198
 
4199
        /* get free entry slot index */
4200
        stbl = DT_GETSTBL(p);
4201
        fsi = stbl[fi];
4202
 
4203
        /* open new linelock */
4204
        if (dtlck->index >= dtlck->maxcnt)
4205
                dtlck = (struct dt_lock *) txLinelock(dtlck);
4206
        lv = & dtlck->lv[dtlck->index];
4207
 
4208
        lv->offset = fsi;
4209
 
4210
        /* get the head/only segment */
4211
        t = &p->slot[fsi];
4212
        if (p->header.flag & BT_LEAF)
4213
                si = ((struct ldtentry *) t)->next;
4214
        else
4215
                si = ((struct idtentry *) t)->next;
4216
        t->next = si;
4217
        t->cnt = 1;
4218
 
4219
        n = freecnt = 1;
4220
        xsi = fsi;
4221
 
4222
        /* find the last/only segment */
4223
        while (si >= 0) {
4224
                /* is next slot contiguous ? */
4225
                if (si != xsi + 1) {
4226
                        /* close current linelock */
4227
                        lv->length = n;
4228
                        dtlck->index++;
4229
 
4230
                        /* open new linelock */
4231
                        if (dtlck->index < dtlck->maxcnt)
4232
                                lv++;
4233
                        else {
4234
                                dtlck = (struct dt_lock *) txLinelock(dtlck);
4235
                                lv = & dtlck->lv[0];
4236
                        }
4237
 
4238
                        lv->offset = si;
4239
                        n = 0;
4240
                }
4241
 
4242
                n++;
4243
                xsi = si;
4244
                freecnt++;
4245
 
4246
                t = &p->slot[si];
4247
                t->cnt = 1;
4248
                si = t->next;
4249
        }
4250
 
4251
        /* close current linelock */
4252
        lv->length = n;
4253
        dtlck->index++;
4254
 
4255
        *dtlock = dtlck;
4256
 
4257
        /* update freelist */
4258
        t->next = p->header.freelist;
4259
        p->header.freelist = fsi;
4260
        p->header.freecnt += freecnt;
4261
 
4262
        /* if delete from middle,
4263
         * shift left the succedding entries in the stbl
4264
         */
4265
        si = p->header.nextindex;
4266
        if (fi < si - 1)
4267
                memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1);
4268
 
4269
        p->header.nextindex--;
4270
}
4271
 
4272
 
4273
/*
4274
 *      dtTruncateEntry()
4275
 *
4276
 * function: truncate a (leaf/internal) entry
4277
 *
4278
 * log freelist header, stbl, and each segment slot of entry
4279
 * (even though last/only segment next field is modified,
4280
 * physical image logging requires all segment slots of
4281
 * the entry logged to avoid applying previous updates
4282
 * to the same slots)
4283
 */
4284
static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock)
4285
{
4286
        int tsi;                /* truncate entry slot index */
4287
        s8 *stbl;
4288
        struct dtslot *t;
4289
        int si, freecnt;
4290
        struct dt_lock *dtlck = *dtlock;
4291
        struct lv *lv;
4292
        int fsi, xsi, n;
4293
 
4294
        /* get free entry slot index */
4295
        stbl = DT_GETSTBL(p);
4296
        tsi = stbl[ti];
4297
 
4298
        /* open new linelock */
4299
        if (dtlck->index >= dtlck->maxcnt)
4300
                dtlck = (struct dt_lock *) txLinelock(dtlck);
4301
        lv = & dtlck->lv[dtlck->index];
4302
 
4303
        lv->offset = tsi;
4304
 
4305
        /* get the head/only segment */
4306
        t = &p->slot[tsi];
4307
        ASSERT(p->header.flag & BT_INTERNAL);
4308
        ((struct idtentry *) t)->namlen = 0;
4309
        si = ((struct idtentry *) t)->next;
4310
        ((struct idtentry *) t)->next = -1;
4311
 
4312
        n = 1;
4313
        freecnt = 0;
4314
        fsi = si;
4315
        xsi = tsi;
4316
 
4317
        /* find the last/only segment */
4318
        while (si >= 0) {
4319
                /* is next slot contiguous ? */
4320
                if (si != xsi + 1) {
4321
                        /* close current linelock */
4322
                        lv->length = n;
4323
                        dtlck->index++;
4324
 
4325
                        /* open new linelock */
4326
                        if (dtlck->index < dtlck->maxcnt)
4327
                                lv++;
4328
                        else {
4329
                                dtlck = (struct dt_lock *) txLinelock(dtlck);
4330
                                lv = & dtlck->lv[0];
4331
                        }
4332
 
4333
                        lv->offset = si;
4334
                        n = 0;
4335
                }
4336
 
4337
                n++;
4338
                xsi = si;
4339
                freecnt++;
4340
 
4341
                t = &p->slot[si];
4342
                t->cnt = 1;
4343
                si = t->next;
4344
        }
4345
 
4346
        /* close current linelock */
4347
        lv->length = n;
4348
        dtlck->index++;
4349
 
4350
        *dtlock = dtlck;
4351
 
4352
        /* update freelist */
4353
        if (freecnt == 0)
4354
                return;
4355
        t->next = p->header.freelist;
4356
        p->header.freelist = fsi;
4357
        p->header.freecnt += freecnt;
4358
}
4359
 
4360
 
4361
/*
4362
 *      dtLinelockFreelist()
4363
 */
4364
static void dtLinelockFreelist(dtpage_t * p,    /* directory page */
4365
                               int m,   /* max slot index */
4366
                               struct dt_lock ** dtlock)
4367
{
4368
        int fsi;                /* free entry slot index */
4369
        struct dtslot *t;
4370
        int si;
4371
        struct dt_lock *dtlck = *dtlock;
4372
        struct lv *lv;
4373
        int xsi, n;
4374
 
4375
        /* get free entry slot index */
4376
        fsi = p->header.freelist;
4377
 
4378
        /* open new linelock */
4379
        if (dtlck->index >= dtlck->maxcnt)
4380
                dtlck = (struct dt_lock *) txLinelock(dtlck);
4381
        lv = & dtlck->lv[dtlck->index];
4382
 
4383
        lv->offset = fsi;
4384
 
4385
        n = 1;
4386
        xsi = fsi;
4387
 
4388
        t = &p->slot[fsi];
4389
        si = t->next;
4390
 
4391
        /* find the last/only segment */
4392
        while (si < m && si >= 0) {
4393
                /* is next slot contiguous ? */
4394
                if (si != xsi + 1) {
4395
                        /* close current linelock */
4396
                        lv->length = n;
4397
                        dtlck->index++;
4398
 
4399
                        /* open new linelock */
4400
                        if (dtlck->index < dtlck->maxcnt)
4401
                                lv++;
4402
                        else {
4403
                                dtlck = (struct dt_lock *) txLinelock(dtlck);
4404
                                lv = & dtlck->lv[0];
4405
                        }
4406
 
4407
                        lv->offset = si;
4408
                        n = 0;
4409
                }
4410
 
4411
                n++;
4412
                xsi = si;
4413
 
4414
                t = &p->slot[si];
4415
                si = t->next;
4416
        }
4417
 
4418
        /* close current linelock */
4419
        lv->length = n;
4420
        dtlck->index++;
4421
 
4422
        *dtlock = dtlck;
4423
}
4424
 
4425
 
4426
/*
4427
 * NAME: dtModify
4428
 *
4429
 * FUNCTION: Modify the inode number part of a directory entry
4430
 *
4431
 * PARAMETERS:
4432
 *      tid     - Transaction id
4433
 *      ip      - Inode of parent directory
4434
 *      key     - Name of entry to be modified
4435
 *      orig_ino        - Original inode number expected in entry
4436
 *      new_ino - New inode number to put into entry
4437
 *      flag    - JFS_RENAME
4438
 *
4439
 * RETURNS:
4440
 *      -ESTALE - If entry found does not match orig_ino passed in
4441
 *      -ENOENT - If no entry can be found to match key
4442
 *      0        - If successfully modified entry
4443
 */
4444
int dtModify(tid_t tid, struct inode *ip,
4445
         struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag)
4446
{
4447
        int rc;
4448
        s64 bn;
4449
        struct metapage *mp;
4450
        dtpage_t *p;
4451
        int index;
4452
        struct btstack btstack;
4453
        struct tlock *tlck;
4454
        struct dt_lock *dtlck;
4455
        struct lv *lv;
4456
        s8 *stbl;
4457
        int entry_si;           /* entry slot index */
4458
        struct ldtentry *entry;
4459
 
4460
        /*
4461
         *      search for the entry to modify:
4462
         *
4463
         * dtSearch() returns (leaf page pinned, index at which to modify).
4464
         */
4465
        if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag)))
4466
                return rc;
4467
 
4468
        /* retrieve search result */
4469
        DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
4470
 
4471
        BT_MARK_DIRTY(mp, ip);
4472
        /*
4473
         * acquire a transaction lock on the leaf page of named entry
4474
         */
4475
        tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
4476
        dtlck = (struct dt_lock *) & tlck->lock;
4477
 
4478
        /* get slot index of the entry */
4479
        stbl = DT_GETSTBL(p);
4480
        entry_si = stbl[index];
4481
 
4482
        /* linelock entry */
4483
        ASSERT(dtlck->index == 0);
4484
        lv = & dtlck->lv[0];
4485
        lv->offset = entry_si;
4486
        lv->length = 1;
4487
        dtlck->index++;
4488
 
4489
        /* get the head/only segment */
4490
        entry = (struct ldtentry *) & p->slot[entry_si];
4491
 
4492
        /* substitute the inode number of the entry */
4493
        entry->inumber = cpu_to_le32(new_ino);
4494
 
4495
        /* unpin the leaf page */
4496
        DT_PUTPAGE(mp);
4497
 
4498
        return 0;
4499
}
4500
 
4501
#ifdef _JFS_DEBUG_DTREE
4502
/*
4503
 *      dtDisplayTree()
4504
 *
4505
 * function: traverse forward
4506
 */
4507
int dtDisplayTree(struct inode *ip)
4508
{
4509
        int rc;
4510
        struct metapage *mp;
4511
        dtpage_t *p;
4512
        s64 bn, pbn;
4513
        int index, lastindex, v, h;
4514
        pxd_t *xd;
4515
        struct btstack btstack;
4516
        struct btframe *btsp;
4517
        struct btframe *parent;
4518
        u8 *stbl;
4519
        int psize = 256;
4520
 
4521
        printk("display B+-tree.\n");
4522
 
4523
        /* clear stack */
4524
        btsp = btstack.stack;
4525
 
4526
        /*
4527
         * start with root
4528
         *
4529
         * root resides in the inode
4530
         */
4531
        bn = 0;
4532
        v = h = 0;
4533
 
4534
        /*
4535
         * first access of each page:
4536
         */
4537
      newPage:
4538
        DT_GETPAGE(ip, bn, mp, psize, p, rc);
4539
        if (rc)
4540
                return rc;
4541
 
4542
        /* process entries forward from first index */
4543
        index = 0;
4544
        lastindex = p->header.nextindex - 1;
4545
 
4546
        if (p->header.flag & BT_INTERNAL) {
4547
                /*
4548
                 * first access of each internal page
4549
                 */
4550
                printf("internal page ");
4551
                dtDisplayPage(ip, bn, p);
4552
 
4553
                goto getChild;
4554
        } else {                /* (p->header.flag & BT_LEAF) */
4555
 
4556
                /*
4557
                 * first access of each leaf page
4558
                 */
4559
                printf("leaf page ");
4560
                dtDisplayPage(ip, bn, p);
4561
 
4562
                /*
4563
                 * process leaf page entries
4564
                 *
4565
                 for ( ; index <= lastindex; index++)
4566
                 {
4567
                 }
4568
                 */
4569
 
4570
                /* unpin the leaf page */
4571
                DT_PUTPAGE(mp);
4572
        }
4573
 
4574
        /*
4575
         * go back up to the parent page
4576
         */
4577
      getParent:
4578
        /* pop/restore parent entry for the current child page */
4579
        if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL)
4580
                /* current page must have been root */
4581
                return;
4582
 
4583
        /*
4584
         * parent page scan completed
4585
         */
4586
        if ((index = parent->index) == (lastindex = parent->lastindex)) {
4587
                /* go back up to the parent page */
4588
                goto getParent;
4589
        }
4590
 
4591
        /*
4592
         * parent page has entries remaining
4593
         */
4594
        /* get back the parent page */
4595
        bn = parent->bn;
4596
        /* v = parent->level; */
4597
        DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4598
        if (rc)
4599
                return rc;
4600
 
4601
        /* get next parent entry */
4602
        index++;
4603
 
4604
        /*
4605
         * internal page: go down to child page of current entry
4606
         */
4607
      getChild:
4608
        /* push/save current parent entry for the child page */
4609
        btsp->bn = pbn = bn;
4610
        btsp->index = index;
4611
        btsp->lastindex = lastindex;
4612
        /* btsp->level = v; */
4613
        /* btsp->node = h; */
4614
        ++btsp;
4615
 
4616
        /* get current entry for the child page */
4617
        stbl = DT_GETSTBL(p);
4618
        xd = (pxd_t *) & p->slot[stbl[index]];
4619
 
4620
        /*
4621
         * first access of each internal entry:
4622
         */
4623
 
4624
        /* get child page */
4625
        bn = addressPXD(xd);
4626
        psize = lengthPXD(xd) << ip->i_ipmnt->i_l2bsize;
4627
 
4628
        printk("traverse down 0x%Lx[%d]->0x%Lx\n", pbn, index, bn);
4629
        v++;
4630
        h = index;
4631
 
4632
        /* release parent page */
4633
        DT_PUTPAGE(mp);
4634
 
4635
        /* process the child page */
4636
        goto newPage;
4637
}
4638
 
4639
 
4640
/*
4641
 *      dtDisplayPage()
4642
 *
4643
 * function: display page
4644
 */
4645
int dtDisplayPage(struct inode *ip, s64 bn, dtpage_t * p)
4646
{
4647
        int rc;
4648
        struct metapage *mp;
4649
        struct ldtentry *lh;
4650
        struct idtentry *ih;
4651
        pxd_t *xd;
4652
        int i, j;
4653
        u8 *stbl;
4654
        wchar_t name[JFS_NAME_MAX + 1];
4655
        struct component_name key = { 0, name };
4656
        int freepage = 0;
4657
 
4658
        if (p == NULL) {
4659
                freepage = 1;
4660
                DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4661
                if (rc)
4662
                        return rc;
4663
        }
4664
 
4665
        /* display page control */
4666
        printk("bn:0x%Lx flag:0x%08x nextindex:%d\n",
4667
               bn, p->header.flag, p->header.nextindex);
4668
 
4669
        /* display entries */
4670
        stbl = DT_GETSTBL(p);
4671
        for (i = 0, j = 1; i < p->header.nextindex; i++, j++) {
4672
                dtGetKey(p, i, &key, JFS_SBI(ip->i_sb)->mntflag);
4673
                key.name[key.namlen] = '\0';
4674
                if (p->header.flag & BT_LEAF) {
4675
                        lh = (struct ldtentry *) & p->slot[stbl[i]];
4676
                        printf("\t[%d] %s:%d", i, key.name,
4677
                               le32_to_cpu(lh->inumber));
4678
                } else {
4679
                        ih = (struct idtentry *) & p->slot[stbl[i]];
4680
                        xd = (pxd_t *) ih;
4681
                        bn = addressPXD(xd);
4682
                        printf("\t[%d] %s:0x%Lx", i, key.name, bn);
4683
                }
4684
 
4685
                if (j == 4) {
4686
                        printf("\n");
4687
                        j = 0;
4688
                }
4689
        }
4690
 
4691
        printf("\n");
4692
 
4693
        if (freepage)
4694
                DT_PUTPAGE(mp);
4695
 
4696
        return 0;
4697
}
4698
#endif                          /* _JFS_DEBUG_DTREE */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.