OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [tags/] [LINUX_2_4_26_OR32/] [linux/] [linux-2.4/] [fs/] [reiserfs/] [fix_node.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 * Copyright 2000-2002 by Hans Reiser, licensing governed by reiserfs/README
3
 */
4
 
5
/**
6
 ** old_item_num
7
 ** old_entry_num
8
 ** set_entry_sizes
9
 ** create_virtual_node
10
 ** check_left
11
 ** check_right
12
 ** directory_part_size
13
 ** get_num_ver
14
 ** set_parameters
15
 ** is_leaf_removable
16
 ** are_leaves_removable
17
 ** get_empty_nodes
18
 ** get_lfree
19
 ** get_rfree
20
 ** is_left_neighbor_in_cache
21
 ** decrement_key
22
 ** get_far_parent
23
 ** get_parents
24
 ** can_node_be_removed
25
 ** ip_check_balance
26
 ** dc_check_balance_internal
27
 ** dc_check_balance_leaf
28
 ** dc_check_balance
29
 ** check_balance
30
 ** get_direct_parent
31
 ** get_neighbors
32
 ** fix_nodes
33
 **
34
 **
35
 **/
36
 
37
 
38
#include <linux/config.h>
39
#include <linux/sched.h>
40
#include <linux/string.h>
41
#include <linux/locks.h>
42
#include <linux/reiserfs_fs.h>
43
 
44
 
45
/* To make any changes in the tree we find a node, that contains item
46
   to be changed/deleted or position in the node we insert a new item
47
   to. We call this node S. To do balancing we need to decide what we
48
   will shift to left/right neighbor, or to a new node, where new item
49
   will be etc. To make this analysis simpler we build virtual
50
   node. Virtual node is an array of items, that will replace items of
51
   node S. (For instance if we are going to delete an item, virtual
52
   node does not contain it). Virtual node keeps information about
53
   item sizes and types, mergeability of first and last items, sizes
54
   of all entries in directory item. We use this array of items when
55
   calculating what we can shift to neighbors and how many nodes we
56
   have to have if we do not any shiftings, if we shift to left/right
57
   neighbor or to both. */
58
 
59
 
60
/* taking item number in virtual node, returns number of item, that it has in source buffer */
61
static inline int old_item_num (int new_num, int affected_item_num, int mode)
62
{
63
  if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num)
64
    return new_num;
65
 
66
  if (mode == M_INSERT) {
67
 
68
    RFALSE( new_num == 0,
69
            "vs-8005: for INSERT mode and item number of inserted item");
70
 
71
    return new_num - 1;
72
  }
73
 
74
  RFALSE( mode != M_DELETE,
75
          "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'", mode);
76
  /* delete mode */
77
  return new_num + 1;
78
}
79
 
80
static void create_virtual_node (struct tree_balance * tb, int h)
81
{
82
    struct item_head * ih;
83
    struct virtual_node * vn = tb->tb_vn;
84
    int new_num;
85
    struct buffer_head * Sh;    /* this comes from tb->S[h] */
86
 
87
    Sh = PATH_H_PBUFFER (tb->tb_path, h);
88
 
89
    /* size of changed node */
90
    vn->vn_size = MAX_CHILD_SIZE (Sh) - B_FREE_SPACE (Sh) + tb->insert_size[h];
91
 
92
    /* for internal nodes array if virtual items is not created */
93
    if (h) {
94
        vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE);
95
        return;
96
    }
97
 
98
    /* number of items in virtual node  */
99
    vn->vn_nr_item = B_NR_ITEMS (Sh) + ((vn->vn_mode == M_INSERT)? 1 : 0) - ((vn->vn_mode == M_DELETE)? 1 : 0);
100
 
101
    /* first virtual item */
102
    vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1);
103
    memset (vn->vn_vi, 0, vn->vn_nr_item * sizeof (struct virtual_item));
104
    vn->vn_free_ptr += vn->vn_nr_item * sizeof (struct virtual_item);
105
 
106
 
107
    /* first item in the node */
108
    ih = B_N_PITEM_HEAD (Sh, 0);
109
 
110
    /* define the mergeability for 0-th item (if it is not being deleted) */
111
    if (op_is_left_mergeable (&(ih->ih_key), Sh->b_size) && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
112
            vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE;
113
 
114
    /* go through all items those remain in the virtual node (except for the new (inserted) one) */
115
    for (new_num = 0; new_num < vn->vn_nr_item; new_num ++) {
116
        int j;
117
        struct virtual_item * vi = vn->vn_vi + new_num;
118
        int is_affected = ((new_num != vn->vn_affected_item_num) ? 0 : 1);
119
 
120
 
121
        if (is_affected && vn->vn_mode == M_INSERT)
122
            continue;
123
 
124
        /* get item number in source node */
125
        j = old_item_num (new_num, vn->vn_affected_item_num, vn->vn_mode);
126
 
127
        vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE;
128
        vi->vi_ih = ih + j;
129
        vi->vi_item = B_I_PITEM (Sh, ih + j);
130
        vi->vi_uarea = vn->vn_free_ptr;
131
 
132
        // FIXME: there is no check, that item operation did not
133
        // consume too much memory
134
        vn->vn_free_ptr += op_create_vi (vn, vi, is_affected, tb->insert_size [0]);
135
        if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
136
            reiserfs_panic (tb->tb_sb, "vs-8030: create_virtual_node: "
137
                            "virtual node space consumed");
138
 
139
        if (!is_affected)
140
            /* this is not being changed */
141
            continue;
142
 
143
        if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) {
144
            vn->vn_vi[new_num].vi_item_len += tb->insert_size[0];
145
            vi->vi_new_data = vn->vn_data; // pointer to data which is going to be pasted
146
        }
147
    }
148
 
149
 
150
    /* virtual inserted item is not defined yet */
151
    if (vn->vn_mode == M_INSERT) {
152
        struct virtual_item * vi = vn->vn_vi + vn->vn_affected_item_num;
153
 
154
        RFALSE( vn->vn_ins_ih == 0,
155
                "vs-8040: item header of inserted item is not specified");
156
        vi->vi_item_len = tb->insert_size[0];
157
        vi->vi_ih = vn->vn_ins_ih;
158
        vi->vi_item = vn->vn_data;
159
        vi->vi_uarea = vn->vn_free_ptr;
160
 
161
        op_create_vi (vn, vi, 0/*not pasted or cut*/, tb->insert_size [0]);
162
    }
163
 
164
    /* set right merge flag we take right delimiting key and check whether it is a mergeable item */
165
    if (tb->CFR[0]) {
166
        struct key * key;
167
 
168
        key = B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]);
169
        if (op_is_left_mergeable (key, Sh->b_size) && (vn->vn_mode != M_DELETE ||
170
                                                       vn->vn_affected_item_num != B_NR_ITEMS (Sh) - 1))
171
                vn->vn_vi[vn->vn_nr_item-1].vi_type |= VI_TYPE_RIGHT_MERGEABLE;
172
 
173
#ifdef CONFIG_REISERFS_CHECK
174
        if (op_is_left_mergeable (key, Sh->b_size) &&
175
            !(vn->vn_mode != M_DELETE || vn->vn_affected_item_num != B_NR_ITEMS (Sh) - 1) ) {
176
            /* we delete last item and it could be merged with right neighbor's first item */
177
            if (!(B_NR_ITEMS (Sh) == 1 && is_direntry_le_ih (B_N_PITEM_HEAD (Sh, 0)) &&
178
                  I_ENTRY_COUNT (B_N_PITEM_HEAD (Sh, 0)) == 1)) {
179
                /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */
180
                print_block (Sh, 0, -1, -1);
181
                reiserfs_panic (tb->tb_sb, "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c",
182
                                key, vn->vn_affected_item_num, vn->vn_mode, M_DELETE);
183
            } else
184
                /* we can delete directory item, that has only one directory entry in it */
185
                ;
186
        }
187
#endif
188
 
189
    }
190
}
191
 
192
 
193
/* using virtual node check, how many items can be shifted to left
194
   neighbor */
195
static void check_left (struct tree_balance * tb, int h, int cur_free)
196
{
197
    int i;
198
    struct virtual_node * vn = tb->tb_vn;
199
    struct virtual_item * vi;
200
    int d_size, ih_size;
201
 
202
    RFALSE( cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free);
203
 
204
    /* internal level */
205
    if (h > 0) {
206
        tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
207
        return;
208
    }
209
 
210
    /* leaf level */
211
 
212
    if (!cur_free || !vn->vn_nr_item) {
213
        /* no free space or nothing to move */
214
        tb->lnum[h] = 0;
215
        tb->lbytes = -1;
216
        return;
217
    }
218
 
219
    RFALSE( !PATH_H_PPARENT (tb->tb_path, 0),
220
            "vs-8055: parent does not exist or invalid");
221
 
222
    vi = vn->vn_vi;
223
    if ((unsigned int)cur_free >= (vn->vn_size - ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) {
224
        /* all contents of S[0] fits into L[0] */
225
 
226
        RFALSE( vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
227
                "vs-8055: invalid mode or balance condition failed");
228
 
229
        tb->lnum[0] = vn->vn_nr_item;
230
        tb->lbytes = -1;
231
        return;
232
    }
233
 
234
 
235
    d_size = 0, ih_size = IH_SIZE;
236
 
237
    /* first item may be merge with last item in left neighbor */
238
    if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE)
239
        d_size = -((int)IH_SIZE), ih_size = 0;
240
 
241
    tb->lnum[0] = 0;
242
    for (i = 0; i < vn->vn_nr_item; i ++, ih_size = IH_SIZE, d_size = 0, vi ++) {
243
        d_size += vi->vi_item_len;
244
        if (cur_free >= d_size) {
245
            /* the item can be shifted entirely */
246
            cur_free -= d_size;
247
            tb->lnum[0] ++;
248
            continue;
249
        }
250
 
251
        /* the item cannot be shifted entirely, try to split it */
252
        /* check whether L[0] can hold ih and at least one byte of the item body */
253
        if (cur_free <= ih_size) {
254
            /* cannot shift even a part of the current item */
255
            tb->lbytes = -1;
256
            return;
257
        }
258
        cur_free -= ih_size;
259
 
260
        tb->lbytes = op_check_left (vi, cur_free, 0, 0);
261
        if (tb->lbytes != -1)
262
            /* count partially shifted item */
263
            tb->lnum[0] ++;
264
 
265
        break;
266
    }
267
 
268
    return;
269
}
270
 
271
 
272
/* using virtual node check, how many items can be shifted to right
273
   neighbor */
274
static void check_right (struct tree_balance * tb, int h, int cur_free)
275
{
276
    int i;
277
    struct virtual_node * vn = tb->tb_vn;
278
    struct virtual_item * vi;
279
    int d_size, ih_size;
280
 
281
    RFALSE( cur_free < 0, "vs-8070: cur_free < 0");
282
 
283
    /* internal level */
284
    if (h > 0) {
285
        tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
286
        return;
287
    }
288
 
289
    /* leaf level */
290
 
291
    if (!cur_free || !vn->vn_nr_item) {
292
        /* no free space  */
293
        tb->rnum[h] = 0;
294
        tb->rbytes = -1;
295
        return;
296
    }
297
 
298
    RFALSE( !PATH_H_PPARENT (tb->tb_path, 0),
299
            "vs-8075: parent does not exist or invalid");
300
 
301
    vi = vn->vn_vi + vn->vn_nr_item - 1;
302
    if ((unsigned int)cur_free >= (vn->vn_size - ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) {
303
        /* all contents of S[0] fits into R[0] */
304
 
305
        RFALSE( vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
306
                "vs-8080: invalid mode or balance condition failed");
307
 
308
        tb->rnum[h] = vn->vn_nr_item;
309
        tb->rbytes = -1;
310
        return;
311
    }
312
 
313
    d_size = 0, ih_size = IH_SIZE;
314
 
315
    /* last item may be merge with first item in right neighbor */
316
    if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE)
317
        d_size = -(int)IH_SIZE, ih_size = 0;
318
 
319
    tb->rnum[0] = 0;
320
    for (i = vn->vn_nr_item - 1; i >= 0; i --, d_size = 0, ih_size = IH_SIZE, vi --) {
321
        d_size += vi->vi_item_len;
322
        if (cur_free >= d_size) {
323
            /* the item can be shifted entirely */
324
            cur_free -= d_size;
325
            tb->rnum[0] ++;
326
            continue;
327
        }
328
 
329
        /* check whether R[0] can hold ih and at least one byte of the item body */
330
        if ( cur_free <= ih_size ) {    /* cannot shift even a part of the current item */
331
            tb->rbytes = -1;
332
            return;
333
        }
334
 
335
        /* R[0] can hold the header of the item and at least one byte of its body */
336
        cur_free -= ih_size;    /* cur_free is still > 0 */
337
 
338
        tb->rbytes = op_check_right (vi, cur_free);
339
        if (tb->rbytes != -1)
340
            /* count partially shifted item */
341
            tb->rnum[0] ++;
342
 
343
        break;
344
    }
345
 
346
  return;
347
}
348
 
349
 
350
/*
351
 * from - number of items, which are shifted to left neighbor entirely
352
 * to - number of item, which are shifted to right neighbor entirely
353
 * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor
354
 * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */
355
static int get_num_ver (int mode, struct tree_balance * tb, int h,
356
                        int from, int from_bytes,
357
                        int to,   int to_bytes,
358
                        short * snum012, int flow
359
    )
360
{
361
    int i;
362
    int cur_free;
363
    //    int bytes;
364
    int units;
365
    struct virtual_node * vn = tb->tb_vn;
366
    //    struct virtual_item * vi;
367
 
368
    int total_node_size, max_node_size, current_item_size;
369
    int needed_nodes;
370
    int start_item,     /* position of item we start filling node from */
371
        end_item,       /* position of item we finish filling node by */
372
        start_bytes,/* number of first bytes (entries for directory) of start_item-th item
373
                       we do not include into node that is being filled */
374
        end_bytes;      /* number of last bytes (entries for directory) of end_item-th item
375
                           we do node include into node that is being filled */
376
    int split_item_positions[2]; /* these are positions in virtual item of
377
                                    items, that are split between S[0] and
378
                                    S1new and S1new and S2new */
379
 
380
    split_item_positions[0] = -1;
381
    split_item_positions[1] = -1;
382
 
383
    /* We only create additional nodes if we are in insert or paste mode
384
       or we are in replace mode at the internal level. If h is 0 and
385
       the mode is M_REPLACE then in fix_nodes we change the mode to
386
       paste or insert before we get here in the code.  */
387
    RFALSE( tb->insert_size[h] < 0  || (mode != M_INSERT && mode != M_PASTE),
388
            "vs-8100: insert_size < 0 in overflow");
389
 
390
    max_node_size = MAX_CHILD_SIZE (PATH_H_PBUFFER (tb->tb_path, h));
391
 
392
    /* snum012 [0-2] - number of items, that lay
393
       to S[0], first new node and second new node */
394
    snum012[3] = -1;    /* s1bytes */
395
    snum012[4] = -1;    /* s2bytes */
396
 
397
    /* internal level */
398
    if (h > 0) {
399
        i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE);
400
        if (i == max_node_size)
401
            return 1;
402
        return (i / max_node_size + 1);
403
    }
404
 
405
    /* leaf level */
406
    needed_nodes = 1;
407
    total_node_size = 0;
408
    cur_free = max_node_size;
409
 
410
    // start from 'from'-th item
411
    start_item = from;
412
    // skip its first 'start_bytes' units
413
    start_bytes = ((from_bytes != -1) ? from_bytes : 0);
414
 
415
    // last included item is the 'end_item'-th one
416
    end_item = vn->vn_nr_item - to - 1;
417
    // do not count last 'end_bytes' units of 'end_item'-th item
418
    end_bytes = (to_bytes != -1) ? to_bytes : 0;
419
 
420
    /* go through all item beginning from the start_item-th item and ending by
421
       the end_item-th item. Do not count first 'start_bytes' units of
422
       'start_item'-th item and last 'end_bytes' of 'end_item'-th item */
423
 
424
    for (i = start_item; i <= end_item; i ++) {
425
        struct virtual_item * vi = vn->vn_vi + i;
426
        int skip_from_end = ((i == end_item) ? end_bytes : 0);
427
 
428
        RFALSE( needed_nodes > 3, "vs-8105: too many nodes are needed");
429
 
430
        /* get size of current item */
431
        current_item_size = vi->vi_item_len;
432
 
433
        /* do not take in calculation head part (from_bytes) of from-th item */
434
        current_item_size -= op_part_size (vi, 0/*from start*/, start_bytes);
435
 
436
        /* do not take in calculation tail part of last item */
437
        current_item_size -= op_part_size (vi, 1/*from end*/, skip_from_end);
438
 
439
        /* if item fits into current node entierly */
440
        if (total_node_size + current_item_size <= max_node_size) {
441
            snum012[needed_nodes - 1] ++;
442
            total_node_size += current_item_size;
443
            start_bytes = 0;
444
            continue;
445
        }
446
 
447
        if (current_item_size > max_node_size) {
448
            /* virtual item length is longer, than max size of item in
449
               a node. It is impossible for direct item */
450
            RFALSE( is_direct_le_ih (vi->vi_ih),
451
                    "vs-8110: "
452
                    "direct item length is %d. It can not be longer than %d",
453
                    current_item_size, max_node_size);
454
            /* we will try to split it */
455
            flow = 1;
456
        }
457
 
458
        if (!flow) {
459
            /* as we do not split items, take new node and continue */
460
            needed_nodes ++; i --; total_node_size = 0;
461
            continue;
462
        }
463
 
464
        // calculate number of item units which fit into node being
465
        // filled
466
        {
467
            int free_space;
468
 
469
            free_space = max_node_size - total_node_size - IH_SIZE;
470
            units = op_check_left (vi, free_space, start_bytes, skip_from_end);
471
            if (units == -1) {
472
                /* nothing fits into current node, take new node and continue */
473
                needed_nodes ++, i--, total_node_size = 0;
474
                continue;
475
            }
476
        }
477
 
478
        /* something fits into the current node */
479
        //if (snum012[3] != -1 || needed_nodes != 1)
480
        //  reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required");
481
        //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units;
482
        start_bytes += units;
483
        snum012[needed_nodes - 1 + 3] = units;
484
 
485
        if (needed_nodes > 2)
486
            reiserfs_warning (tb->tb_sb, "vs-8111: get_num_ver: split_item_position is out of boundary\n");
487
        snum012[needed_nodes - 1] ++;
488
        split_item_positions[needed_nodes - 1] = i;
489
        needed_nodes ++;
490
        /* continue from the same item with start_bytes != -1 */
491
        start_item = i;
492
        i --;
493
        total_node_size = 0;
494
    }
495
 
496
    // sum012[4] (if it is not -1) contains number of units of which
497
    // are to be in S1new, snum012[3] - to be in S0. They are supposed
498
    // to be S1bytes and S2bytes correspondingly, so recalculate
499
    if (snum012[4] > 0) {
500
        int split_item_num;
501
        int bytes_to_r, bytes_to_l;
502
        int bytes_to_S1new;
503
 
504
        split_item_num = split_item_positions[1];
505
        bytes_to_l = ((from == split_item_num && from_bytes != -1) ? from_bytes : 0);
506
        bytes_to_r = ((end_item == split_item_num && end_bytes != -1) ? end_bytes : 0);
507
        bytes_to_S1new = ((split_item_positions[0] == split_item_positions[1]) ? snum012[3] : 0);
508
 
509
        // s2bytes
510
        snum012[4] = op_unit_num (&vn->vn_vi[split_item_num]) - snum012[4] - bytes_to_r - bytes_to_l - bytes_to_S1new;
511
 
512
        if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY)
513
            reiserfs_warning (tb->tb_sb, "vs-8115: get_num_ver: not directory item\n");
514
    }
515
 
516
    /* now we know S2bytes, calculate S1bytes */
517
    if (snum012[3] > 0) {
518
        int split_item_num;
519
        int bytes_to_r, bytes_to_l;
520
        int bytes_to_S2new;
521
 
522
        split_item_num = split_item_positions[0];
523
        bytes_to_l = ((from == split_item_num && from_bytes != -1) ? from_bytes : 0);
524
        bytes_to_r = ((end_item == split_item_num && end_bytes != -1) ? end_bytes : 0);
525
        bytes_to_S2new = ((split_item_positions[0] == split_item_positions[1] && snum012[4] != -1) ? snum012[4] : 0);
526
 
527
        // s1bytes
528
        snum012[3] = op_unit_num (&vn->vn_vi[split_item_num]) - snum012[3] - bytes_to_r - bytes_to_l - bytes_to_S2new;
529
    }
530
 
531
    return needed_nodes;
532
}
533
 
534
 
535
#ifdef CONFIG_REISERFS_CHECK
536
extern struct tree_balance * cur_tb;
537
#endif
538
 
539
 
540
/* Set parameters for balancing.
541
 * Performs write of results of analysis of balancing into structure tb,
542
 * where it will later be used by the functions that actually do the balancing.
543
 * Parameters:
544
 *      tb      tree_balance structure;
545
 *      h       current level of the node;
546
 *      lnum    number of items from S[h] that must be shifted to L[h];
547
 *      rnum    number of items from S[h] that must be shifted to R[h];
548
 *      blk_num number of blocks that S[h] will be splitted into;
549
 *      s012    number of items that fall into splitted nodes.
550
 *      lbytes  number of bytes which flow to the left neighbor from the item that is not
551
 *              not shifted entirely
552
 *      rbytes  number of bytes which flow to the right neighbor from the item that is not
553
 *              not shifted entirely
554
 *      s1bytes number of bytes which flow to the first  new node when S[0] splits (this number is contained in s012 array)
555
 */
556
 
557
static void set_parameters (struct tree_balance * tb, int h, int lnum,
558
                            int rnum, int blk_num, short * s012, int lb, int rb)
559
{
560
 
561
  tb->lnum[h] = lnum;
562
  tb->rnum[h] = rnum;
563
  tb->blknum[h] = blk_num;
564
 
565
  if (h == 0)
566
    {  /* only for leaf level */
567
      if (s012 != NULL)
568
        {
569
          tb->s0num = * s012 ++,
570
          tb->s1num = * s012 ++,
571
          tb->s2num = * s012 ++;
572
          tb->s1bytes = * s012 ++;
573
          tb->s2bytes = * s012;
574
        }
575
      tb->lbytes = lb;
576
      tb->rbytes = rb;
577
    }
578
  PROC_INFO_ADD( tb -> tb_sb, lnum[ h ], lnum );
579
  PROC_INFO_ADD( tb -> tb_sb, rnum[ h ], rnum );
580
 
581
  PROC_INFO_ADD( tb -> tb_sb, lbytes[ h ], lb );
582
  PROC_INFO_ADD( tb -> tb_sb, rbytes[ h ], rb );
583
}
584
 
585
 
586
 
587
/* check, does node disappear if we shift tb->lnum[0] items to left
588
   neighbor and tb->rnum[0] to the right one. */
589
static int is_leaf_removable (struct tree_balance * tb)
590
{
591
  struct virtual_node * vn = tb->tb_vn;
592
  int to_left, to_right;
593
  int size;
594
  int remain_items;
595
 
596
  /* number of items, that will be shifted to left (right) neighbor
597
     entirely */
598
  to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0);
599
  to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0);
600
  remain_items = vn->vn_nr_item;
601
 
602
  /* how many items remain in S[0] after shiftings to neighbors */
603
  remain_items -= (to_left + to_right);
604
 
605
  if (remain_items < 1) {
606
    /* all content of node can be shifted to neighbors */
607
    set_parameters (tb, 0, to_left, vn->vn_nr_item - to_left, 0, NULL, -1, -1);
608
    return 1;
609
  }
610
 
611
  if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1)
612
    /* S[0] is not removable */
613
    return 0;
614
 
615
  /* check, whether we can divide 1 remaining item between neighbors */
616
 
617
  /* get size of remaining item (in item units) */
618
  size = op_unit_num (&(vn->vn_vi[to_left]));
619
 
620
  if (tb->lbytes + tb->rbytes >= size) {
621
    set_parameters (tb, 0, to_left + 1, to_right + 1, 0, NULL, tb->lbytes, -1);
622
    return 1;
623
  }
624
 
625
  return 0;
626
}
627
 
628
 
629
/* check whether L, S, R can be joined in one node */
630
static int are_leaves_removable (struct tree_balance * tb, int lfree, int rfree)
631
{
632
  struct virtual_node * vn = tb->tb_vn;
633
  int ih_size;
634
  struct buffer_head *S0;
635
 
636
  S0 = PATH_H_PBUFFER (tb->tb_path, 0);
637
 
638
  ih_size = 0;
639
  if (vn->vn_nr_item) {
640
    if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE)
641
      ih_size += IH_SIZE;
642
 
643
        if (vn->vn_vi[vn->vn_nr_item-1].vi_type & VI_TYPE_RIGHT_MERGEABLE)
644
            ih_size += IH_SIZE;
645
    } else {
646
        /* there was only one item and it will be deleted */
647
        struct item_head * ih;
648
 
649
    RFALSE( B_NR_ITEMS (S0) != 1,
650
            "vs-8125: item number must be 1: it is %d", B_NR_ITEMS(S0));
651
 
652
    ih = B_N_PITEM_HEAD (S0, 0);
653
    if (tb->CFR[0] && !comp_short_le_keys (&(ih->ih_key), B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0])))
654
        if (is_direntry_le_ih (ih)) {
655
            /* Directory must be in correct state here: that is
656
               somewhere at the left side should exist first directory
657
               item. But the item being deleted can not be that first
658
               one because its right neighbor is item of the same
659
               directory. (But first item always gets deleted in last
660
               turn). So, neighbors of deleted item can be merged, so
661
               we can save ih_size */
662
            ih_size = IH_SIZE;
663
 
664
            /* we might check that left neighbor exists and is of the
665
               same directory */
666
            RFALSE(le_ih_k_offset (ih) == DOT_OFFSET,
667
                "vs-8130: first directory item can not be removed until directory is not empty");
668
      }
669
 
670
  }
671
 
672
  if (MAX_CHILD_SIZE (S0) + vn->vn_size <= rfree + lfree + ih_size) {
673
    set_parameters (tb, 0, -1, -1, -1, NULL, -1, -1);
674
    PROC_INFO_INC( tb -> tb_sb, leaves_removable );
675
    return 1;
676
  }
677
  return 0;
678
 
679
}
680
 
681
 
682
 
683
/* when we do not split item, lnum and rnum are numbers of entire items */
684
#define SET_PAR_SHIFT_LEFT \
685
if (h)\
686
{\
687
   int to_l;\
688
   \
689
   to_l = (MAX_NR_KEY(Sh)+1 - lpar + vn->vn_nr_item + 1) / 2 -\
690
              (MAX_NR_KEY(Sh) + 1 - lpar);\
691
              \
692
              set_parameters (tb, h, to_l, 0, lnver, NULL, -1, -1);\
693
}\
694
else \
695
{\
696
   if (lset==LEFT_SHIFT_FLOW)\
697
     set_parameters (tb, h, lpar, 0, lnver, snum012+lset,\
698
                     tb->lbytes, -1);\
699
   else\
700
     set_parameters (tb, h, lpar - (tb->lbytes!=-1), 0, lnver, snum012+lset,\
701
                     -1, -1);\
702
}
703
 
704
 
705
#define SET_PAR_SHIFT_RIGHT \
706
if (h)\
707
{\
708
   int to_r;\
709
   \
710
   to_r = (MAX_NR_KEY(Sh)+1 - rpar + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - rpar);\
711
   \
712
   set_parameters (tb, h, 0, to_r, rnver, NULL, -1, -1);\
713
}\
714
else \
715
{\
716
   if (rset==RIGHT_SHIFT_FLOW)\
717
     set_parameters (tb, h, 0, rpar, rnver, snum012+rset,\
718
                  -1, tb->rbytes);\
719
   else\
720
     set_parameters (tb, h, 0, rpar - (tb->rbytes!=-1), rnver, snum012+rset,\
721
                  -1, -1);\
722
}
723
 
724
 
725
void free_buffers_in_tb (
726
                       struct tree_balance * p_s_tb
727
                       ) {
728
  int n_counter;
729
 
730
  decrement_counters_in_path(p_s_tb->tb_path);
731
 
732
  for ( n_counter = 0; n_counter < MAX_HEIGHT; n_counter++ ) {
733
    decrement_bcount(p_s_tb->L[n_counter]);
734
    p_s_tb->L[n_counter] = NULL;
735
    decrement_bcount(p_s_tb->R[n_counter]);
736
    p_s_tb->R[n_counter] = NULL;
737
    decrement_bcount(p_s_tb->FL[n_counter]);
738
    p_s_tb->FL[n_counter] = NULL;
739
    decrement_bcount(p_s_tb->FR[n_counter]);
740
    p_s_tb->FR[n_counter] = NULL;
741
    decrement_bcount(p_s_tb->CFL[n_counter]);
742
    p_s_tb->CFL[n_counter] = NULL;
743
    decrement_bcount(p_s_tb->CFR[n_counter]);
744
    p_s_tb->CFR[n_counter] = NULL;
745
  }
746
}
747
 
748
 
749
/* Get new buffers for storing new nodes that are created while balancing.
750
 * Returns:     SCHEDULE_OCCURRED - schedule occurred while the function worked;
751
 *              CARRY_ON - schedule didn't occur while the function worked;
752
 *              NO_DISK_SPACE - no disk space.
753
 */
754
/* The function is NOT SCHEDULE-SAFE! */
755
static int  get_empty_nodes(
756
              struct tree_balance * p_s_tb,
757
              int n_h
758
            ) {
759
  struct buffer_head  * p_s_new_bh,
760
                      * p_s_Sh = PATH_H_PBUFFER (p_s_tb->tb_path, n_h);
761
  unsigned long       * p_n_blocknr,
762
                        a_n_blocknrs[MAX_AMOUNT_NEEDED] = {0, };
763
  int                   n_counter,
764
                        n_number_of_freeblk,
765
                        n_amount_needed,/* number of needed empty blocks */
766
                        n_retval = CARRY_ON;
767
  struct super_block *  p_s_sb = p_s_tb->tb_sb;
768
 
769
 
770
  /* number_of_freeblk is the number of empty blocks which have been
771
     acquired for use by the balancing algorithm minus the number of
772
     empty blocks used in the previous levels of the analysis,
773
     number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs
774
     after empty blocks are acquired, and the balancing analysis is
775
     then restarted, amount_needed is the number needed by this level
776
     (n_h) of the balancing analysis.
777
 
778
     Note that for systems with many processes writing, it would be
779
     more layout optimal to calculate the total number needed by all
780
     levels and then to run reiserfs_new_blocks to get all of them at once.  */
781
 
782
  /* Initiate number_of_freeblk to the amount acquired prior to the restart of
783
     the analysis or 0 if not restarted, then subtract the amount needed
784
     by all of the levels of the tree below n_h. */
785
  /* blknum includes S[n_h], so we subtract 1 in this calculation */
786
  for ( n_counter = 0, n_number_of_freeblk = p_s_tb->cur_blknum; n_counter < n_h; n_counter++ )
787
    n_number_of_freeblk -= ( p_s_tb->blknum[n_counter] ) ? (p_s_tb->blknum[n_counter] - 1) : 0;
788
 
789
  /* Allocate missing empty blocks. */
790
  /* if p_s_Sh == 0  then we are getting a new root */
791
  n_amount_needed = ( p_s_Sh ) ? (p_s_tb->blknum[n_h] - 1) : 1;
792
  /*  Amount_needed = the amount that we need more than the amount that we have. */
793
  if ( n_amount_needed > n_number_of_freeblk )
794
    n_amount_needed -= n_number_of_freeblk;
795
  else /* If we have enough already then there is nothing to do. */
796
    return CARRY_ON;
797
 
798
  if ( reiserfs_new_form_blocknrs (p_s_tb, a_n_blocknrs,
799
                                   n_amount_needed) == NO_DISK_SPACE )
800
    return NO_DISK_SPACE;
801
 
802
  /* for each blocknumber we just got, get a buffer and stick it on FEB */
803
  for ( p_n_blocknr = a_n_blocknrs, n_counter = 0; n_counter < n_amount_needed;
804
        p_n_blocknr++, n_counter++ ) {
805
 
806
    RFALSE( ! *p_n_blocknr,
807
            "PAP-8135: reiserfs_new_blocknrs failed when got new blocks");
808
 
809
    p_s_new_bh = getblk(p_s_sb->s_dev, *p_n_blocknr, p_s_sb->s_blocksize);
810
    if (atomic_read (&(p_s_new_bh->b_count)) > 1) {
811
/*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&*/
812
/*
813
      reiserfs_warning (p_s_sb, "waiting for buffer %b, iput inode pid = %d, this pid %d, mode %c, %h\n",
814
                        p_s_new_bh, put_inode_pid, current->pid, p_s_tb->tb_vn->vn_mode, p_s_tb->tb_vn->vn_ins_ih);
815
      print_tb (0, 0, 0, p_s_tb, "tb");
816
*/
817
/*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&*/
818
      if (atomic_read(&(p_s_new_bh->b_count)) > 2 ||
819
          !(buffer_journaled(p_s_new_bh) || buffer_journal_dirty(p_s_new_bh))) {
820
        n_retval = REPEAT_SEARCH ;
821
        free_buffers_in_tb (p_s_tb);
822
        wait_buffer_until_released (p_s_new_bh);
823
      }
824
    }
825
    RFALSE( (atomic_read (&(p_s_new_bh->b_count)) != 1 ||
826
             buffer_dirty (p_s_new_bh)) &&
827
            (atomic_read(&(p_s_new_bh->b_count)) > 2 ||
828
             !(buffer_journaled(p_s_new_bh) ||
829
               buffer_journal_dirty(p_s_new_bh))),
830
            "PAP-8140: not free or dirty buffer %b for the new block",
831
            p_s_new_bh);
832
 
833
    /* Put empty buffers into the array. */
834
    if (p_s_tb->FEB[p_s_tb->cur_blknum])
835
      BUG();
836
 
837
    mark_buffer_journal_new(p_s_new_bh) ;
838
    p_s_tb->FEB[p_s_tb->cur_blknum++] = p_s_new_bh;
839
  }
840
 
841
  if ( n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB (p_s_tb) )
842
    n_retval = REPEAT_SEARCH ;
843
 
844
  return n_retval;
845
}
846
 
847
 
848
/* Get free space of the left neighbor, which is stored in the parent
849
 * node of the left neighbor.  */
850
static int get_lfree (struct tree_balance * tb, int h)
851
{
852
    struct buffer_head * l, * f;
853
    int order;
854
 
855
    if ((f = PATH_H_PPARENT (tb->tb_path, h)) == 0 || (l = tb->FL[h]) == 0)
856
        return 0;
857
 
858
    if (f == l)
859
        order = PATH_H_B_ITEM_ORDER (tb->tb_path, h) - 1;
860
    else {
861
        order = B_NR_ITEMS (l);
862
        f = l;
863
    }
864
 
865
    return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f,order)));
866
}
867
 
868
 
869
/* Get free space of the right neighbor,
870
 * which is stored in the parent node of the right neighbor.
871
 */
872
static int get_rfree (struct tree_balance * tb, int h)
873
{
874
  struct buffer_head * r, * f;
875
  int order;
876
 
877
  if ((f = PATH_H_PPARENT (tb->tb_path, h)) == 0 || (r = tb->FR[h]) == 0)
878
    return 0;
879
 
880
  if (f == r)
881
      order = PATH_H_B_ITEM_ORDER (tb->tb_path, h) + 1;
882
  else {
883
      order = 0;
884
      f = r;
885
  }
886
 
887
  return (MAX_CHILD_SIZE(f) - dc_size( B_N_CHILD(f,order)));
888
 
889
}
890
 
891
 
892
/* Check whether left neighbor is in memory. */
893
static int  is_left_neighbor_in_cache(
894
              struct tree_balance * p_s_tb,
895
              int                   n_h
896
            ) {
897
  struct buffer_head  * p_s_father, * left;
898
  struct super_block  * p_s_sb = p_s_tb->tb_sb;
899
  unsigned long         n_left_neighbor_blocknr;
900
  int                   n_left_neighbor_position;
901
 
902
  if ( ! p_s_tb->FL[n_h] ) /* Father of the left neighbor does not exist. */
903
    return 0;
904
 
905
  /* Calculate father of the node to be balanced. */
906
  p_s_father = PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1);
907
 
908
  RFALSE( ! p_s_father ||
909
          ! B_IS_IN_TREE (p_s_father) ||
910
          ! B_IS_IN_TREE (p_s_tb->FL[n_h]) ||
911
          ! buffer_uptodate (p_s_father) ||
912
          ! buffer_uptodate (p_s_tb->FL[n_h]),
913
          "vs-8165: F[h] (%b) or FL[h] (%b) is invalid",
914
          p_s_father, p_s_tb->FL[n_h]);
915
 
916
 
917
  /* Get position of the pointer to the left neighbor into the left father. */
918
  n_left_neighbor_position = ( p_s_father == p_s_tb->FL[n_h] ) ?
919
                      p_s_tb->lkey[n_h] : B_NR_ITEMS (p_s_tb->FL[n_h]);
920
  /* Get left neighbor block number. */
921
  n_left_neighbor_blocknr = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_left_neighbor_position);
922
  /* Look for the left neighbor in the cache. */
923
  if ( (left = sb_get_hash_table(p_s_sb, n_left_neighbor_blocknr)) ) {
924
 
925
    RFALSE( buffer_uptodate (left) && ! B_IS_IN_TREE(left),
926
            "vs-8170: left neighbor (%b %z) is not in the tree", left, left);
927
    put_bh(left) ;
928
    return 1;
929
  }
930
 
931
  return 0;
932
}
933
 
934
 
935
#define LEFT_PARENTS  'l'
936
#define RIGHT_PARENTS 'r'
937
 
938
 
939
static void decrement_key (struct cpu_key * p_s_key)
940
{
941
    // call item specific function for this key
942
    item_ops[cpu_key_k_type (p_s_key)]->decrement_key (p_s_key);
943
}
944
 
945
 
946
 
947
 
948
/* Calculate far left/right parent of the left/right neighbor of the current node, that
949
 * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h].
950
 * Calculate left/right common parent of the current node and L[h]/R[h].
951
 * Calculate left/right delimiting key position.
952
 * Returns:     PATH_INCORRECT   - path in the tree is not correct;
953
                SCHEDULE_OCCURRED - schedule occurred while the function worked;
954
 *              CARRY_ON         - schedule didn't occur while the function worked;
955
 */
956
static int  get_far_parent (struct tree_balance *   p_s_tb,
957
                            int                     n_h,
958
                            struct buffer_head  **  pp_s_father,
959
                            struct buffer_head  **  pp_s_com_father,
960
                            char                    c_lr_par)
961
{
962
    struct buffer_head  * p_s_parent;
963
    INITIALIZE_PATH (s_path_to_neighbor_father);
964
    struct path * p_s_path = p_s_tb->tb_path;
965
    struct cpu_key      s_lr_father_key;
966
    int                   n_counter,
967
        n_position = INT_MAX,
968
        n_first_last_position = 0,
969
        n_path_offset = PATH_H_PATH_OFFSET(p_s_path, n_h);
970
 
971
    /* Starting from F[n_h] go upwards in the tree, and look for the common
972
      ancestor of F[n_h], and its neighbor l/r, that should be obtained. */
973
 
974
    n_counter = n_path_offset;
975
 
976
    RFALSE( n_counter < FIRST_PATH_ELEMENT_OFFSET,
977
            "PAP-8180: invalid path length");
978
 
979
 
980
    for ( ; n_counter > FIRST_PATH_ELEMENT_OFFSET; n_counter--  )  {
981
        /* Check whether parent of the current buffer in the path is really parent in the tree. */
982
        if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_path, n_counter - 1)) )
983
            return REPEAT_SEARCH;
984
        /* Check whether position in the parent is correct. */
985
        if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_counter - 1)) > B_NR_ITEMS(p_s_parent) )
986
            return REPEAT_SEARCH;
987
        /* Check whether parent at the path really points to the child. */
988
        if ( B_N_CHILD_NUM(p_s_parent, n_position) !=
989
             PATH_OFFSET_PBUFFER(p_s_path, n_counter)->b_blocknr )
990
            return REPEAT_SEARCH;
991
        /* Return delimiting key if position in the parent is not equal to first/last one. */
992
        if ( c_lr_par == RIGHT_PARENTS )
993
            n_first_last_position = B_NR_ITEMS (p_s_parent);
994
        if ( n_position != n_first_last_position ) {
995
            *pp_s_com_father = p_s_parent;
996
            get_bh(*pp_s_com_father) ;
997
            /*(*pp_s_com_father = p_s_parent)->b_count++;*/
998
            break;
999
        }
1000
    }
1001
 
1002
    /* if we are in the root of the tree, then there is no common father */
1003
    if ( n_counter == FIRST_PATH_ELEMENT_OFFSET ) {
1004
        /* Check whether first buffer in the path is the root of the tree. */
1005
        if ( PATH_OFFSET_PBUFFER(p_s_tb->tb_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
1006
             SB_ROOT_BLOCK (p_s_tb->tb_sb) ) {
1007
            *pp_s_father = *pp_s_com_father = NULL;
1008
            return CARRY_ON;
1009
        }
1010
        return REPEAT_SEARCH;
1011
    }
1012
 
1013
    RFALSE( B_LEVEL (*pp_s_com_father) <= DISK_LEAF_NODE_LEVEL,
1014
            "PAP-8185: (%b %z) level too small",
1015
            *pp_s_com_father, *pp_s_com_father);
1016
 
1017
    /* Check whether the common parent is locked. */
1018
 
1019
    if ( buffer_locked (*pp_s_com_father) ) {
1020
        __wait_on_buffer(*pp_s_com_father);
1021
        if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) {
1022
            decrement_bcount(*pp_s_com_father);
1023
            return REPEAT_SEARCH;
1024
        }
1025
    }
1026
 
1027
    /* So, we got common parent of the current node and its left/right neighbor.
1028
     Now we are geting the parent of the left/right neighbor. */
1029
 
1030
    /* Form key to get parent of the left/right neighbor. */
1031
    le_key2cpu_key (&s_lr_father_key, B_N_PDELIM_KEY(*pp_s_com_father, ( c_lr_par == LEFT_PARENTS ) ?
1032
                                                     (p_s_tb->lkey[n_h - 1] = n_position - 1) : (p_s_tb->rkey[n_h - 1] = n_position)));
1033
 
1034
 
1035
    if ( c_lr_par == LEFT_PARENTS )
1036
        decrement_key(&s_lr_father_key);
1037
 
1038
    if (search_by_key(p_s_tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, n_h + 1) == IO_ERROR)
1039
        // path is released
1040
        return IO_ERROR;
1041
 
1042
    if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) {
1043
        decrement_counters_in_path(&s_path_to_neighbor_father);
1044
        decrement_bcount(*pp_s_com_father);
1045
        return REPEAT_SEARCH;
1046
    }
1047
 
1048
    *pp_s_father = PATH_PLAST_BUFFER(&s_path_to_neighbor_father);
1049
 
1050
    RFALSE( B_LEVEL (*pp_s_father) != n_h + 1,
1051
            "PAP-8190: (%b %z) level too small", *pp_s_father, *pp_s_father);
1052
    RFALSE( s_path_to_neighbor_father.path_length < FIRST_PATH_ELEMENT_OFFSET,
1053
            "PAP-8192: path length is too small");
1054
 
1055
    s_path_to_neighbor_father.path_length--;
1056
    decrement_counters_in_path(&s_path_to_neighbor_father);
1057
    return CARRY_ON;
1058
}
1059
 
1060
 
1061
/* Get parents of neighbors of node in the path(S[n_path_offset]) and common parents of
1062
 * S[n_path_offset] and L[n_path_offset]/R[n_path_offset]: F[n_path_offset], FL[n_path_offset],
1063
 * FR[n_path_offset], CFL[n_path_offset], CFR[n_path_offset].
1064
 * Calculate numbers of left and right delimiting keys position: lkey[n_path_offset], rkey[n_path_offset].
1065
 * Returns:     SCHEDULE_OCCURRED - schedule occurred while the function worked;
1066
 *              CARRY_ON - schedule didn't occur while the function worked;
1067
 */
1068
static int  get_parents (struct tree_balance * p_s_tb, int n_h)
1069
{
1070
    struct path         * p_s_path = p_s_tb->tb_path;
1071
    int                   n_position,
1072
        n_ret_value,
1073
        n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
1074
    struct buffer_head  * p_s_curf,
1075
        * p_s_curcf;
1076
 
1077
    /* Current node is the root of the tree or will be root of the tree */
1078
    if ( n_path_offset <= FIRST_PATH_ELEMENT_OFFSET ) {
1079
        /* The root can not have parents.
1080
           Release nodes which previously were obtained as parents of the current node neighbors. */
1081
        decrement_bcount(p_s_tb->FL[n_h]);
1082
        decrement_bcount(p_s_tb->CFL[n_h]);
1083
        decrement_bcount(p_s_tb->FR[n_h]);
1084
        decrement_bcount(p_s_tb->CFR[n_h]);
1085
        p_s_tb->FL[n_h] = p_s_tb->CFL[n_h] = p_s_tb->FR[n_h] = p_s_tb->CFR[n_h] = NULL;
1086
        return CARRY_ON;
1087
    }
1088
 
1089
    /* Get parent FL[n_path_offset] of L[n_path_offset]. */
1090
    if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1)) )  {
1091
        /* Current node is not the first child of its parent. */
1092
        /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2;*/
1093
        p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
1094
        get_bh(p_s_curf) ;
1095
        get_bh(p_s_curf) ;
1096
        p_s_tb->lkey[n_h] = n_position - 1;
1097
    }
1098
    else  {
1099
        /* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node.
1100
           Calculate current common parent of L[n_path_offset] and the current node. Note that
1101
           CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset].
1102
           Calculate lkey[n_path_offset]. */
1103
        if ( (n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf,
1104
                                           &p_s_curcf, LEFT_PARENTS)) != CARRY_ON )
1105
            return n_ret_value;
1106
    }
1107
 
1108
    decrement_bcount(p_s_tb->FL[n_h]);
1109
    p_s_tb->FL[n_h] = p_s_curf; /* New initialization of FL[n_h]. */
1110
    decrement_bcount(p_s_tb->CFL[n_h]);
1111
    p_s_tb->CFL[n_h] = p_s_curcf; /* New initialization of CFL[n_h]. */
1112
 
1113
    RFALSE( (p_s_curf && !B_IS_IN_TREE (p_s_curf)) ||
1114
            (p_s_curcf && !B_IS_IN_TREE (p_s_curcf)),
1115
            "PAP-8195: FL (%b) or CFL (%b) is invalid", p_s_curf, p_s_curcf);
1116
 
1117
/* Get parent FR[n_h] of R[n_h]. */
1118
 
1119
/* Current node is the last child of F[n_h]. FR[n_h] != F[n_h]. */
1120
    if ( n_position == B_NR_ITEMS (PATH_H_PBUFFER(p_s_path, n_h + 1)) ) {
1121
/* Calculate current parent of R[n_h], which is the right neighbor of F[n_h].
1122
   Calculate current common parent of R[n_h] and current node. Note that CFR[n_h]
1123
   not equal FR[n_path_offset] and CFR[n_h] not equal F[n_h]. */
1124
        if ( (n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf,  &p_s_curcf, RIGHT_PARENTS)) != CARRY_ON )
1125
            return n_ret_value;
1126
    }
1127
    else {
1128
/* Current node is not the last child of its parent F[n_h]. */
1129
        /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2;*/
1130
        p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
1131
        get_bh(p_s_curf) ;
1132
        get_bh(p_s_curf) ;
1133
        p_s_tb->rkey[n_h] = n_position;
1134
    }
1135
 
1136
    decrement_bcount(p_s_tb->FR[n_h]);
1137
    p_s_tb->FR[n_h] = p_s_curf; /* New initialization of FR[n_path_offset]. */
1138
 
1139
    decrement_bcount(p_s_tb->CFR[n_h]);
1140
    p_s_tb->CFR[n_h] = p_s_curcf; /* New initialization of CFR[n_path_offset]. */
1141
 
1142
    RFALSE( (p_s_curf && !B_IS_IN_TREE (p_s_curf)) ||
1143
            (p_s_curcf && !B_IS_IN_TREE (p_s_curcf)),
1144
            "PAP-8205: FR (%b) or CFR (%b) is invalid", p_s_curf, p_s_curcf);
1145
 
1146
    return CARRY_ON;
1147
}
1148
 
1149
 
1150
/* it is possible to remove node as result of shiftings to
1151
   neighbors even when we insert or paste item. */
1152
static inline int can_node_be_removed (int mode, int lfree, int sfree, int rfree, struct tree_balance * tb, int h)
1153
{
1154
    struct buffer_head * Sh = PATH_H_PBUFFER (tb->tb_path, h);
1155
    int levbytes = tb->insert_size[h];
1156
    struct item_head * ih;
1157
    struct key * r_key = NULL;
1158
 
1159
    ih = B_N_PITEM_HEAD (Sh, 0);
1160
    if ( tb->CFR[h] )
1161
        r_key = B_N_PDELIM_KEY(tb->CFR[h],tb->rkey[h]);
1162
 
1163
    if (
1164
        lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes
1165
        /* shifting may merge items which might save space */
1166
        - (( ! h && op_is_left_mergeable (&(ih->ih_key), Sh->b_size) ) ? IH_SIZE : 0)
1167
        - (( ! h && r_key && op_is_left_mergeable (r_key, Sh->b_size) ) ? IH_SIZE : 0)
1168
        + (( h ) ? KEY_SIZE : 0))
1169
    {
1170
        /* node can not be removed */
1171
        if (sfree >= levbytes ) { /* new item fits into node S[h] without any shifting */
1172
            if ( ! h )
1173
                tb->s0num = B_NR_ITEMS(Sh) + ((mode == M_INSERT ) ? 1 : 0);
1174
            set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
1175
            return NO_BALANCING_NEEDED;
1176
        }
1177
    }
1178
    PROC_INFO_INC( tb -> tb_sb, can_node_be_removed[ h ] );
1179
    return !NO_BALANCING_NEEDED;
1180
}
1181
 
1182
 
1183
 
1184
/* Check whether current node S[h] is balanced when increasing its size by
1185
 * Inserting or Pasting.
1186
 * Calculate parameters for balancing for current level h.
1187
 * Parameters:
1188
 *      tb      tree_balance structure;
1189
 *      h       current level of the node;
1190
 *      inum    item number in S[h];
1191
 *      mode    i - insert, p - paste;
1192
 * Returns:     1 - schedule occurred;
1193
 *              0 - balancing for higher levels needed;
1194
 *             -1 - no balancing for higher levels needed;
1195
 *             -2 - no disk space.
1196
 */
1197
/* ip means Inserting or Pasting */
1198
static int ip_check_balance (struct tree_balance * tb, int h)
1199
{
1200
    struct virtual_node * vn = tb->tb_vn;
1201
    int levbytes,  /* Number of bytes that must be inserted into (value
1202
                      is negative if bytes are deleted) buffer which
1203
                      contains node being balanced.  The mnemonic is
1204
                      that the attempted change in node space used level
1205
                      is levbytes bytes. */
1206
        n_ret_value;
1207
 
1208
    int lfree, sfree, rfree /* free space in L, S and R */;
1209
 
1210
    /* nver is short for number of vertixes, and lnver is the number if
1211
       we shift to the left, rnver is the number if we shift to the
1212
       right, and lrnver is the number if we shift in both directions.
1213
       The goal is to minimize first the number of vertixes, and second,
1214
       the number of vertixes whose contents are changed by shifting,
1215
       and third the number of uncached vertixes whose contents are
1216
       changed by shifting and must be read from disk.  */
1217
    int nver, lnver, rnver, lrnver;
1218
 
1219
    /* used at leaf level only, S0 = S[0] is the node being balanced,
1220
       sInum [ I = 0,1,2 ] is the number of items that will
1221
       remain in node SI after balancing.  S1 and S2 are new
1222
       nodes that might be created. */
1223
 
1224
    /* we perform 8 calls to get_num_ver().  For each call we calculate five parameters.
1225
       where 4th parameter is s1bytes and 5th - s2bytes
1226
    */
1227
    short snum012[40] = {0,};    /* s0num, s1num, s2num for 8 cases
1228
                                   0,1 - do not shift and do not shift but bottle
1229
                                   2 - shift only whole item to left
1230
                                   3 - shift to left and bottle as much as possible
1231
                                   4,5 - shift to right (whole items and as much as possible
1232
                                   6,7 - shift to both directions (whole items and as much as possible)
1233
                                */
1234
 
1235
    /* Sh is the node whose balance is currently being checked */
1236
    struct buffer_head * Sh;
1237
 
1238
    Sh = PATH_H_PBUFFER (tb->tb_path, h);
1239
    levbytes = tb->insert_size[h];
1240
 
1241
    /* Calculate balance parameters for creating new root. */
1242
    if ( ! Sh )  {
1243
        if ( ! h )
1244
            reiserfs_panic (tb->tb_sb, "vs-8210: ip_check_balance: S[0] can not be 0");
1245
        switch ( n_ret_value = get_empty_nodes (tb, h) )  {
1246
        case CARRY_ON:
1247
            set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
1248
            return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */
1249
 
1250
        case NO_DISK_SPACE:
1251
        case REPEAT_SEARCH:
1252
            return n_ret_value;
1253
        default:
1254
            reiserfs_panic(tb->tb_sb, "vs-8215: ip_check_balance: incorrect return value of get_empty_nodes");
1255
        }
1256
    }
1257
 
1258
    if ( (n_ret_value = get_parents (tb, h)) != CARRY_ON ) /* get parents of S[h] neighbors. */
1259
        return n_ret_value;
1260
 
1261
    sfree = B_FREE_SPACE (Sh);
1262
 
1263
    /* get free space of neighbors */
1264
    rfree = get_rfree (tb, h);
1265
    lfree = get_lfree (tb, h);
1266
 
1267
    if (can_node_be_removed (vn->vn_mode, lfree, sfree, rfree, tb, h) == NO_BALANCING_NEEDED)
1268
        /* and new item fits into node S[h] without any shifting */
1269
        return NO_BALANCING_NEEDED;
1270
 
1271
    create_virtual_node (tb, h);
1272
 
1273
    /*
1274
        determine maximal number of items we can shift to the left neighbor (in tb structure)
1275
        and the maximal number of bytes that can flow to the left neighbor
1276
        from the left most liquid item that cannot be shifted from S[0] entirely (returned value)
1277
    */
1278
    check_left (tb, h, lfree);
1279
 
1280
    /*
1281
      determine maximal number of items we can shift to the right neighbor (in tb structure)
1282
      and the maximal number of bytes that can flow to the right neighbor
1283
      from the right most liquid item that cannot be shifted from S[0] entirely (returned value)
1284
    */
1285
    check_right (tb, h, rfree);
1286
 
1287
 
1288
    /* all contents of internal node S[h] can be moved into its
1289
       neighbors, S[h] will be removed after balancing */
1290
    if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) {
1291
        int to_r;
1292
 
1293
        /* Since we are working on internal nodes, and our internal
1294
           nodes have fixed size entries, then we can balance by the
1295
           number of items rather than the space they consume.  In this
1296
           routine we set the left node equal to the right node,
1297
           allowing a difference of less than or equal to 1 child
1298
           pointer. */
1299
        to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 -
1300
            (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
1301
        set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1);
1302
        return CARRY_ON;
1303
    }
1304
 
1305
    /* this checks balance condition, that any two neighboring nodes can not fit in one node */
1306
    RFALSE( h &&
1307
            ( tb->lnum[h] >= vn->vn_nr_item + 1 ||
1308
              tb->rnum[h] >= vn->vn_nr_item + 1),
1309
            "vs-8220: tree is not balanced on internal level");
1310
    RFALSE( ! h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) ||
1311
                    (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1)) ),
1312
            "vs-8225: tree is not balanced on leaf level");
1313
 
1314
    /* all contents of S[0] can be moved into its neighbors
1315
       S[0] will be removed after balancing. */
1316
    if (!h && is_leaf_removable (tb))
1317
        return CARRY_ON;
1318
 
1319
 
1320
    /* why do we perform this check here rather than earlier??
1321
       Answer: we can win 1 node in some cases above. Moreover we
1322
       checked it above, when we checked, that S[0] is not removable
1323
       in principle */
1324
    if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */
1325
        if ( ! h )
1326
            tb->s0num = vn->vn_nr_item;
1327
        set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
1328
        return NO_BALANCING_NEEDED;
1329
    }
1330
 
1331
 
1332
    {
1333
        int lpar, rpar, nset, lset, rset, lrset;
1334
        /*
1335
         * regular overflowing of the node
1336
         */
1337
 
1338
        /* get_num_ver works in 2 modes (FLOW & NO_FLOW)
1339
           lpar, rpar - number of items we can shift to left/right neighbor (including splitting item)
1340
           nset, lset, rset, lrset - shows, whether flowing items give better packing
1341
        */
1342
#define FLOW 1
1343
#define NO_FLOW 0       /* do not any splitting */
1344
 
1345
        /* we choose one the following */
1346
#define NOTHING_SHIFT_NO_FLOW   0
1347
#define NOTHING_SHIFT_FLOW      5
1348
#define LEFT_SHIFT_NO_FLOW      10
1349
#define LEFT_SHIFT_FLOW         15
1350
#define RIGHT_SHIFT_NO_FLOW     20
1351
#define RIGHT_SHIFT_FLOW        25
1352
#define LR_SHIFT_NO_FLOW        30
1353
#define LR_SHIFT_FLOW           35
1354
 
1355
 
1356
        lpar = tb->lnum[h];
1357
        rpar = tb->rnum[h];
1358
 
1359
 
1360
        /* calculate number of blocks S[h] must be split into when
1361
           nothing is shifted to the neighbors,
1362
           as well as number of items in each part of the split node (s012 numbers),
1363
           and number of bytes (s1bytes) of the shared drop which flow to S1 if any */
1364
        nset = NOTHING_SHIFT_NO_FLOW;
1365
        nver = get_num_ver (vn->vn_mode, tb, h,
1366
                            0, -1, h?vn->vn_nr_item:0, -1,
1367
                            snum012, NO_FLOW);
1368
 
1369
        if (!h)
1370
        {
1371
            int nver1;
1372
 
1373
            /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */
1374
            nver1 = get_num_ver (vn->vn_mode, tb, h,
1375
                                 0, -1, 0, -1,
1376
                                 snum012 + NOTHING_SHIFT_FLOW, FLOW);
1377
            if (nver > nver1)
1378
                nset = NOTHING_SHIFT_FLOW, nver = nver1;
1379
        }
1380
 
1381
 
1382
        /* calculate number of blocks S[h] must be split into when
1383
           l_shift_num first items and l_shift_bytes of the right most
1384
           liquid item to be shifted are shifted to the left neighbor,
1385
           as well as number of items in each part of the splitted node (s012 numbers),
1386
           and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1387
        */
1388
        lset = LEFT_SHIFT_NO_FLOW;
1389
        lnver = get_num_ver (vn->vn_mode, tb, h,
1390
                             lpar - (( h || tb->lbytes == -1 ) ? 0 : 1), -1, h ? vn->vn_nr_item:0, -1,
1391
                             snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW);
1392
        if (!h)
1393
        {
1394
            int lnver1;
1395
 
1396
            lnver1 = get_num_ver (vn->vn_mode, tb, h,
1397
                                  lpar - ((tb->lbytes != -1) ? 1 : 0), tb->lbytes, 0, -1,
1398
                                  snum012 + LEFT_SHIFT_FLOW, FLOW);
1399
            if (lnver > lnver1)
1400
                lset = LEFT_SHIFT_FLOW, lnver = lnver1;
1401
        }
1402
 
1403
 
1404
        /* calculate number of blocks S[h] must be split into when
1405
           r_shift_num first items and r_shift_bytes of the left most
1406
           liquid item to be shifted are shifted to the right neighbor,
1407
           as well as number of items in each part of the splitted node (s012 numbers),
1408
           and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1409
        */
1410
        rset = RIGHT_SHIFT_NO_FLOW;
1411
        rnver = get_num_ver (vn->vn_mode, tb, h,
1412
                             0, -1, h ? (vn->vn_nr_item-rpar) : (rpar - (( tb->rbytes != -1 ) ? 1 : 0)), -1,
1413
                             snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW);
1414
        if (!h)
1415
        {
1416
            int rnver1;
1417
 
1418
            rnver1 = get_num_ver (vn->vn_mode, tb, h,
1419
                                  0, -1, (rpar - ((tb->rbytes != -1) ? 1 : 0)), tb->rbytes,
1420
                                  snum012 + RIGHT_SHIFT_FLOW, FLOW);
1421
 
1422
            if (rnver > rnver1)
1423
                rset = RIGHT_SHIFT_FLOW, rnver = rnver1;
1424
        }
1425
 
1426
 
1427
        /* calculate number of blocks S[h] must be split into when
1428
           items are shifted in both directions,
1429
           as well as number of items in each part of the splitted node (s012 numbers),
1430
           and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1431
        */
1432
        lrset = LR_SHIFT_NO_FLOW;
1433
        lrnver = get_num_ver (vn->vn_mode, tb, h,
1434
                              lpar - ((h || tb->lbytes == -1) ? 0 : 1), -1, h ? (vn->vn_nr_item-rpar):(rpar - ((tb->rbytes != -1) ? 1 : 0)), -1,
1435
                              snum012 + LR_SHIFT_NO_FLOW, NO_FLOW);
1436
        if (!h)
1437
        {
1438
            int lrnver1;
1439
 
1440
            lrnver1 = get_num_ver (vn->vn_mode, tb, h,
1441
                                   lpar - ((tb->lbytes != -1) ? 1 : 0), tb->lbytes, (rpar - ((tb->rbytes != -1) ? 1 : 0)), tb->rbytes,
1442
                                   snum012 + LR_SHIFT_FLOW, FLOW);
1443
            if (lrnver > lrnver1)
1444
                lrset = LR_SHIFT_FLOW, lrnver = lrnver1;
1445
        }
1446
 
1447
 
1448
 
1449
        /* Our general shifting strategy is:
1450
           1) to minimized number of new nodes;
1451
           2) to minimized number of neighbors involved in shifting;
1452
           3) to minimized number of disk reads; */
1453
 
1454
        /* we can win TWO or ONE nodes by shifting in both directions */
1455
        if (lrnver < lnver && lrnver < rnver)
1456
        {
1457
            RFALSE( h &&
1458
                    (tb->lnum[h] != 1 ||
1459
                     tb->rnum[h] != 1 ||
1460
                     lrnver != 1 || rnver != 2 || lnver != 2 || h != 1),
1461
                    "vs-8230: bad h");
1462
            if (lrset == LR_SHIFT_FLOW)
1463
                set_parameters (tb, h, tb->lnum[h], tb->rnum[h], lrnver, snum012 + lrset,
1464
                                tb->lbytes, tb->rbytes);
1465
            else
1466
                set_parameters (tb, h, tb->lnum[h] - ((tb->lbytes == -1) ? 0 : 1),
1467
                                tb->rnum[h] - ((tb->rbytes == -1) ? 0 : 1), lrnver, snum012 + lrset, -1, -1);
1468
 
1469
            return CARRY_ON;
1470
        }
1471
 
1472
        /* if shifting doesn't lead to better packing then don't shift */
1473
        if (nver == lrnver)
1474
        {
1475
            set_parameters (tb, h, 0, 0, nver, snum012 + nset, -1, -1);
1476
            return CARRY_ON;
1477
        }
1478
 
1479
 
1480
        /* now we know that for better packing shifting in only one
1481
           direction either to the left or to the right is required */
1482
 
1483
        /*  if shifting to the left is better than shifting to the right */
1484
        if (lnver < rnver)
1485
        {
1486
            SET_PAR_SHIFT_LEFT;
1487
            return CARRY_ON;
1488
        }
1489
 
1490
        /* if shifting to the right is better than shifting to the left */
1491
        if (lnver > rnver)
1492
        {
1493
            SET_PAR_SHIFT_RIGHT;
1494
            return CARRY_ON;
1495
        }
1496
 
1497
 
1498
        /* now shifting in either direction gives the same number
1499
           of nodes and we can make use of the cached neighbors */
1500
        if (is_left_neighbor_in_cache (tb,h))
1501
        {
1502
            SET_PAR_SHIFT_LEFT;
1503
            return CARRY_ON;
1504
        }
1505
 
1506
        /* shift to the right independently on whether the right neighbor in cache or not */
1507
        SET_PAR_SHIFT_RIGHT;
1508
        return CARRY_ON;
1509
    }
1510
}
1511
 
1512
 
1513
/* Check whether current node S[h] is balanced when Decreasing its size by
1514
 * Deleting or Cutting for INTERNAL node of S+tree.
1515
 * Calculate parameters for balancing for current level h.
1516
 * Parameters:
1517
 *      tb      tree_balance structure;
1518
 *      h       current level of the node;
1519
 *      inum    item number in S[h];
1520
 *      mode    i - insert, p - paste;
1521
 * Returns:     1 - schedule occurred;
1522
 *              0 - balancing for higher levels needed;
1523
 *             -1 - no balancing for higher levels needed;
1524
 *             -2 - no disk space.
1525
 *
1526
 * Note: Items of internal nodes have fixed size, so the balance condition for
1527
 * the internal part of S+tree is as for the B-trees.
1528
 */
1529
static int dc_check_balance_internal (struct tree_balance * tb, int h)
1530
{
1531
  struct virtual_node * vn = tb->tb_vn;
1532
 
1533
  /* Sh is the node whose balance is currently being checked,
1534
     and Fh is its father.  */
1535
  struct buffer_head * Sh, * Fh;
1536
  int maxsize,
1537
      n_ret_value;
1538
  int lfree, rfree /* free space in L and R */;
1539
 
1540
  Sh = PATH_H_PBUFFER (tb->tb_path, h);
1541
  Fh = PATH_H_PPARENT (tb->tb_path, h);
1542
 
1543
  maxsize = MAX_CHILD_SIZE(Sh);
1544
 
1545
/*   using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */
1546
/*   new_nr_item = number of items node would have if operation is */
1547
/*      performed without balancing (new_nr_item); */
1548
  create_virtual_node (tb, h);
1549
 
1550
  if ( ! Fh )
1551
    {   /* S[h] is the root. */
1552
      if ( vn->vn_nr_item > 0 )
1553
        {
1554
          set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
1555
          return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */
1556
        }
1557
      /* new_nr_item == 0.
1558
       * Current root will be deleted resulting in
1559
       * decrementing the tree height. */
1560
      set_parameters (tb, h, 0, 0, 0, NULL, -1, -1);
1561
      return CARRY_ON;
1562
    }
1563
 
1564
  if ( (n_ret_value = get_parents(tb,h)) != CARRY_ON )
1565
    return n_ret_value;
1566
 
1567
 
1568
  /* get free space of neighbors */
1569
  rfree = get_rfree (tb, h);
1570
  lfree = get_lfree (tb, h);
1571
 
1572
  /* determine maximal number of items we can fit into neighbors */
1573
  check_left (tb, h, lfree);
1574
  check_right (tb, h, rfree);
1575
 
1576
 
1577
  if ( vn->vn_nr_item >= MIN_NR_KEY(Sh) )
1578
    { /* Balance condition for the internal node is valid.
1579
       * In this case we balance only if it leads to better packing. */
1580
      if ( vn->vn_nr_item == MIN_NR_KEY(Sh) )
1581
        { /* Here we join S[h] with one of its neighbors,
1582
           * which is impossible with greater values of new_nr_item. */
1583
          if ( tb->lnum[h] >= vn->vn_nr_item + 1 )
1584
            {
1585
              /* All contents of S[h] can be moved to L[h]. */
1586
              int n;
1587
              int order_L;
1588
 
1589
              order_L = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1590
              n = dc_size(B_N_CHILD(tb->FL[h],order_L)) / (DC_SIZE + KEY_SIZE);
1591
              set_parameters (tb, h, -n-1, 0, 0, NULL, -1, -1);
1592
              return CARRY_ON;
1593
            }
1594
 
1595
          if ( tb->rnum[h] >= vn->vn_nr_item + 1 )
1596
            {
1597
              /* All contents of S[h] can be moved to R[h]. */
1598
              int n;
1599
              int order_R;
1600
 
1601
              order_R = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==B_NR_ITEMS(Fh)) ? 0 : n + 1;
1602
              n = dc_size(B_N_CHILD(tb->FR[h],order_R)) / (DC_SIZE + KEY_SIZE);
1603
              set_parameters (tb, h, 0, -n-1, 0, NULL, -1, -1);
1604
              return CARRY_ON;
1605
            }
1606
        }
1607
 
1608
      if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)
1609
        {
1610
          /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1611
          int to_r;
1612
 
1613
          to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 -
1614
            (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
1615
          set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1);
1616
          return CARRY_ON;
1617
        }
1618
 
1619
      /* Balancing does not lead to better packing. */
1620
      set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
1621
      return NO_BALANCING_NEEDED;
1622
    }
1623
 
1624
  /* Current node contain insufficient number of items. Balancing is required. */
1625
  /* Check whether we can merge S[h] with left neighbor. */
1626
  if (tb->lnum[h] >= vn->vn_nr_item + 1)
1627
    if (is_left_neighbor_in_cache (tb,h) || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h])
1628
      {
1629
        int n;
1630
        int order_L;
1631
 
1632
        order_L = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1633
        n = dc_size(B_N_CHILD(tb->FL[h],order_L)) / (DC_SIZE + KEY_SIZE);
1634
        set_parameters (tb, h, -n-1, 0, 0, NULL, -1, -1);
1635
        return CARRY_ON;
1636
      }
1637
 
1638
  /* Check whether we can merge S[h] with right neighbor. */
1639
  if (tb->rnum[h] >= vn->vn_nr_item + 1)
1640
    {
1641
      int n;
1642
      int order_R;
1643
 
1644
      order_R = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==B_NR_ITEMS(Fh)) ? 0 : (n + 1);
1645
      n = dc_size(B_N_CHILD(tb->FR[h],order_R)) / (DC_SIZE + KEY_SIZE);
1646
      set_parameters (tb, h, 0, -n-1, 0, NULL, -1, -1);
1647
      return CARRY_ON;
1648
    }
1649
 
1650
  /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1651
  if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)
1652
    {
1653
      int to_r;
1654
 
1655
      to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 -
1656
        (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
1657
      set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1);
1658
      return CARRY_ON;
1659
    }
1660
 
1661
  /* For internal nodes try to borrow item from a neighbor */
1662
  RFALSE( !tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root");
1663
 
1664
  /* Borrow one or two items from caching neighbor */
1665
  if (is_left_neighbor_in_cache (tb,h) || !tb->FR[h])
1666
    {
1667
      int from_l;
1668
 
1669
      from_l = (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item + 1) / 2 -  (vn->vn_nr_item + 1);
1670
      set_parameters (tb, h, -from_l, 0, 1, NULL, -1, -1);
1671
      return CARRY_ON;
1672
    }
1673
 
1674
  set_parameters (tb, h, 0, -((MAX_NR_KEY(Sh)+1-tb->rnum[h]+vn->vn_nr_item+1)/2-(vn->vn_nr_item+1)), 1,
1675
                  NULL, -1, -1);
1676
  return CARRY_ON;
1677
}
1678
 
1679
 
1680
/* Check whether current node S[h] is balanced when Decreasing its size by
1681
 * Deleting or Truncating for LEAF node of S+tree.
1682
 * Calculate parameters for balancing for current level h.
1683
 * Parameters:
1684
 *      tb      tree_balance structure;
1685
 *      h       current level of the node;
1686
 *      inum    item number in S[h];
1687
 *      mode    i - insert, p - paste;
1688
 * Returns:     1 - schedule occurred;
1689
 *              0 - balancing for higher levels needed;
1690
 *             -1 - no balancing for higher levels needed;
1691
 *             -2 - no disk space.
1692
 */
1693
static int dc_check_balance_leaf (struct tree_balance * tb, int h)
1694
{
1695
  struct virtual_node * vn = tb->tb_vn;
1696
 
1697
  /* Number of bytes that must be deleted from
1698
     (value is negative if bytes are deleted) buffer which
1699
     contains node being balanced.  The mnemonic is that the
1700
     attempted change in node space used level is levbytes bytes. */
1701
  int levbytes;
1702
  /* the maximal item size */
1703
  int maxsize,
1704
      n_ret_value;
1705
  /* S0 is the node whose balance is currently being checked,
1706
     and F0 is its father.  */
1707
  struct buffer_head * S0, * F0;
1708
  int lfree, rfree /* free space in L and R */;
1709
 
1710
  S0 = PATH_H_PBUFFER (tb->tb_path, 0);
1711
  F0 = PATH_H_PPARENT (tb->tb_path, 0);
1712
 
1713
  levbytes = tb->insert_size[h];
1714
 
1715
  maxsize = MAX_CHILD_SIZE(S0);         /* maximal possible size of an item */
1716
 
1717
  if ( ! F0 )
1718
    {  /* S[0] is the root now. */
1719
 
1720
      RFALSE( -levbytes >= maxsize - B_FREE_SPACE (S0),
1721
              "vs-8240: attempt to create empty buffer tree");
1722
 
1723
      set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
1724
      return NO_BALANCING_NEEDED;
1725
    }
1726
 
1727
  if ( (n_ret_value = get_parents(tb,h)) != CARRY_ON )
1728
    return n_ret_value;
1729
 
1730
  /* get free space of neighbors */
1731
  rfree = get_rfree (tb, h);
1732
  lfree = get_lfree (tb, h);
1733
 
1734
  create_virtual_node (tb, h);
1735
 
1736
  /* if 3 leaves can be merge to one, set parameters and return */
1737
  if (are_leaves_removable (tb, lfree, rfree))
1738
    return CARRY_ON;
1739
 
1740
  /* determine maximal number of items we can shift to the left/right  neighbor
1741
     and the maximal number of bytes that can flow to the left/right neighbor
1742
     from the left/right most liquid item that cannot be shifted from S[0] entirely
1743
     */
1744
  check_left (tb, h, lfree);
1745
  check_right (tb, h, rfree);
1746
 
1747
  /* check whether we can merge S with left neighbor. */
1748
  if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1)
1749
    if (is_left_neighbor_in_cache (tb,h) ||
1750
        ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) || /* S can not be merged with R */
1751
        !tb->FR[h]) {
1752
 
1753
      RFALSE( !tb->FL[h], "vs-8245: dc_check_balance_leaf: FL[h] must exist");
1754
 
1755
      /* set parameter to merge S[0] with its left neighbor */
1756
      set_parameters (tb, h, -1, 0, 0, NULL, -1, -1);
1757
      return CARRY_ON;
1758
    }
1759
 
1760
  /* check whether we can merge S[0] with right neighbor. */
1761
  if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) {
1762
    set_parameters (tb, h, 0, -1, 0, NULL, -1, -1);
1763
    return CARRY_ON;
1764
  }
1765
 
1766
  /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */
1767
  if (is_leaf_removable (tb))
1768
    return CARRY_ON;
1769
 
1770
  /* Balancing is not required. */
1771
  tb->s0num = vn->vn_nr_item;
1772
  set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
1773
  return NO_BALANCING_NEEDED;
1774
}
1775
 
1776
 
1777
 
1778
/* Check whether current node S[h] is balanced when Decreasing its size by
1779
 * Deleting or Cutting.
1780
 * Calculate parameters for balancing for current level h.
1781
 * Parameters:
1782
 *      tb      tree_balance structure;
1783
 *      h       current level of the node;
1784
 *      inum    item number in S[h];
1785
 *      mode    d - delete, c - cut.
1786
 * Returns:     1 - schedule occurred;
1787
 *              0 - balancing for higher levels needed;
1788
 *             -1 - no balancing for higher levels needed;
1789
 *             -2 - no disk space.
1790
 */
1791
static int dc_check_balance (struct tree_balance * tb, int h)
1792
{
1793
 RFALSE( ! (PATH_H_PBUFFER (tb->tb_path, h)), "vs-8250: S is not initialized");
1794
 
1795
 if ( h )
1796
   return dc_check_balance_internal (tb, h);
1797
 else
1798
   return dc_check_balance_leaf (tb, h);
1799
}
1800
 
1801
 
1802
 
1803
/* Check whether current node S[h] is balanced.
1804
 * Calculate parameters for balancing for current level h.
1805
 * Parameters:
1806
 *
1807
 *      tb      tree_balance structure:
1808
 *
1809
 *              tb is a large structure that must be read about in the header file
1810
 *              at the same time as this procedure if the reader is to successfully
1811
 *              understand this procedure
1812
 *
1813
 *      h       current level of the node;
1814
 *      inum    item number in S[h];
1815
 *      mode    i - insert, p - paste, d - delete, c - cut.
1816
 * Returns:     1 - schedule occurred;
1817
 *              0 - balancing for higher levels needed;
1818
 *             -1 - no balancing for higher levels needed;
1819
 *             -2 - no disk space.
1820
 */
1821
static int check_balance (int mode,
1822
                          struct tree_balance * tb,
1823
                          int h,
1824
                          int inum,
1825
                          int pos_in_item,
1826
                          struct item_head * ins_ih,
1827
                          const void * data
1828
                          )
1829
{
1830
  struct virtual_node * vn;
1831
 
1832
  vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf);
1833
  vn->vn_free_ptr = (char *)(tb->tb_vn + 1);
1834
  vn->vn_mode = mode;
1835
  vn->vn_affected_item_num = inum;
1836
  vn->vn_pos_in_item = pos_in_item;
1837
  vn->vn_ins_ih = ins_ih;
1838
  vn->vn_data = data;
1839
 
1840
  RFALSE( mode == M_INSERT && !vn->vn_ins_ih,
1841
          "vs-8255: ins_ih can not be 0 in insert mode");
1842
 
1843
 if ( tb->insert_size[h] > 0 )
1844
   /* Calculate balance parameters when size of node is increasing. */
1845
   return ip_check_balance (tb, h);
1846
 
1847
 /* Calculate balance parameters when  size of node is decreasing. */
1848
 return dc_check_balance (tb, h);
1849
}
1850
 
1851
 
1852
 
1853
/* Check whether parent at the path is the really parent of the current node.*/
1854
static int  get_direct_parent(
1855
              struct tree_balance * p_s_tb,
1856
              int                   n_h
1857
            ) {
1858
    struct buffer_head  * p_s_bh;
1859
    struct path         * p_s_path      = p_s_tb->tb_path;
1860
    int                   n_position,
1861
        n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
1862
 
1863
    /* We are in the root or in the new root. */
1864
    if ( n_path_offset <= FIRST_PATH_ELEMENT_OFFSET ) {
1865
 
1866
        RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET - 1,
1867
                "PAP-8260: illegal offset in the path");
1868
 
1869
        if ( PATH_OFFSET_PBUFFER(p_s_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
1870
             SB_ROOT_BLOCK (p_s_tb->tb_sb) ) {
1871
            /* Root is not changed. */
1872
            PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1) = NULL;
1873
            PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1) = 0;
1874
            return CARRY_ON;
1875
        }
1876
        return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */
1877
    }
1878
 
1879
    if ( ! B_IS_IN_TREE(p_s_bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1)) )
1880
        return REPEAT_SEARCH; /* Parent in the path is not in the tree. */
1881
 
1882
    if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1)) > B_NR_ITEMS(p_s_bh) )
1883
        return REPEAT_SEARCH;
1884
 
1885
    if ( B_N_CHILD_NUM(p_s_bh, n_position) != PATH_OFFSET_PBUFFER(p_s_path, n_path_offset)->b_blocknr )
1886
        /* Parent in the path is not parent of the current node in the tree. */
1887
        return REPEAT_SEARCH;
1888
 
1889
    if ( buffer_locked(p_s_bh) ) {
1890
        __wait_on_buffer(p_s_bh);
1891
        if ( FILESYSTEM_CHANGED_TB (p_s_tb) )
1892
            return REPEAT_SEARCH;
1893
    }
1894
 
1895
    return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node.  */
1896
}
1897
 
1898
 
1899
/* Using lnum[n_h] and rnum[n_h] we should determine what neighbors
1900
 * of S[n_h] we
1901
 * need in order to balance S[n_h], and get them if necessary.
1902
 * Returns:     SCHEDULE_OCCURRED - schedule occurred while the function worked;
1903
 *              CARRY_ON - schedule didn't occur while the function worked;
1904
 */
1905
static int  get_neighbors(
1906
                    struct tree_balance * p_s_tb,
1907
                    int                   n_h
1908
                  ) {
1909
    int                 n_child_position,
1910
        n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1);
1911
    unsigned long               n_son_number;
1912
    struct super_block  *       p_s_sb = p_s_tb->tb_sb;
1913
    struct buffer_head  * p_s_bh;
1914
 
1915
 
1916
    PROC_INFO_INC( p_s_sb, get_neighbors[ n_h ] );
1917
 
1918
    if ( p_s_tb->lnum[n_h] ) {
1919
        /* We need left neighbor to balance S[n_h]. */
1920
        PROC_INFO_INC( p_s_sb, need_l_neighbor[ n_h ] );
1921
        p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
1922
 
1923
        RFALSE( p_s_bh == p_s_tb->FL[n_h] &&
1924
                ! PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset),
1925
                "PAP-8270: invalid position in the parent");
1926
 
1927
        n_child_position = ( p_s_bh == p_s_tb->FL[n_h] ) ? p_s_tb->lkey[n_h] : B_NR_ITEMS (p_s_tb->FL[n_h]);
1928
        n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position);
1929
        p_s_bh = reiserfs_bread(p_s_sb, n_son_number, p_s_sb->s_blocksize);
1930
        if (!p_s_bh)
1931
            return IO_ERROR;
1932
        if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) {
1933
            decrement_bcount(p_s_bh);
1934
            PROC_INFO_INC( p_s_sb, get_neighbors_restart[ n_h ] );
1935
            return REPEAT_SEARCH;
1936
        }
1937
 
1938
        RFALSE( ! B_IS_IN_TREE(p_s_tb->FL[n_h]) ||
1939
                n_child_position > B_NR_ITEMS(p_s_tb->FL[n_h]) ||
1940
                B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position) !=
1941
                p_s_bh->b_blocknr, "PAP-8275: invalid parent");
1942
        RFALSE( ! B_IS_IN_TREE(p_s_bh), "PAP-8280: invalid child");
1943
        RFALSE( ! n_h &&
1944
                B_FREE_SPACE (p_s_bh) != MAX_CHILD_SIZE (p_s_bh) - dc_size(B_N_CHILD (p_s_tb->FL[0],n_child_position)),
1945
                "PAP-8290: invalid child size of left neighbor");
1946
 
1947
        decrement_bcount(p_s_tb->L[n_h]);
1948
        p_s_tb->L[n_h] = p_s_bh;
1949
    }
1950
 
1951
 
1952
    if ( p_s_tb->rnum[n_h] ) { /* We need right neighbor to balance S[n_path_offset]. */
1953
        PROC_INFO_INC( p_s_sb, need_r_neighbor[ n_h ] );
1954
        p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
1955
 
1956
        RFALSE( p_s_bh == p_s_tb->FR[n_h] &&
1957
                PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset) >= B_NR_ITEMS(p_s_bh),
1958
                "PAP-8295: invalid position in the parent");
1959
 
1960
        n_child_position = ( p_s_bh == p_s_tb->FR[n_h] ) ? p_s_tb->rkey[n_h] + 1 : 0;
1961
        n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position);
1962
        p_s_bh = reiserfs_bread(p_s_sb, n_son_number, p_s_sb->s_blocksize);
1963
        if (!p_s_bh)
1964
            return IO_ERROR;
1965
        if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) {
1966
            decrement_bcount(p_s_bh);
1967
            PROC_INFO_INC( p_s_sb, get_neighbors_restart[ n_h ] );
1968
            return REPEAT_SEARCH;
1969
        }
1970
        decrement_bcount(p_s_tb->R[n_h]);
1971
        p_s_tb->R[n_h] = p_s_bh;
1972
 
1973
        RFALSE( ! n_h && B_FREE_SPACE (p_s_bh) != MAX_CHILD_SIZE (p_s_bh) - dc_size(B_N_CHILD (p_s_tb->FR[0],n_child_position)),
1974
                "PAP-8300: invalid child size of right neighbor (%d != %d - %d)",
1975
                B_FREE_SPACE (p_s_bh), MAX_CHILD_SIZE (p_s_bh),
1976
                dc_size(B_N_CHILD (p_s_tb->FR[0],n_child_position)));
1977
 
1978
    }
1979
    return CARRY_ON;
1980
}
1981
 
1982
#ifdef CONFIG_REISERFS_CHECK
1983
void * reiserfs_kmalloc (size_t size, int flags, struct super_block * s)
1984
{
1985
    void * vp;
1986
    static size_t malloced;
1987
 
1988
 
1989
    vp = kmalloc (size, flags);
1990
    if (vp) {
1991
        s->u.reiserfs_sb.s_kmallocs += size;
1992
        if (s->u.reiserfs_sb.s_kmallocs > malloced + 200000) {
1993
            reiserfs_warning (s, "vs-8301: reiserfs_kmalloc: allocated memory %d\n", s->u.reiserfs_sb.s_kmallocs);
1994
            malloced = s->u.reiserfs_sb.s_kmallocs;
1995
        }
1996
    }
1997
/*printk ("malloc : size %d, allocated %d\n", size, s->u.reiserfs_sb.s_kmallocs);*/
1998
    return vp;
1999
}
2000
 
2001
void reiserfs_kfree (const void * vp, size_t size, struct super_block * s)
2002
{
2003
    if (!vp)
2004
        return;
2005
    kfree (vp);
2006
 
2007
    s->u.reiserfs_sb.s_kmallocs -= size;
2008
    if (s->u.reiserfs_sb.s_kmallocs < 0)
2009
        reiserfs_warning (s, "vs-8302: reiserfs_kfree: allocated memory %d\n", s->u.reiserfs_sb.s_kmallocs);
2010
 
2011
}
2012
#endif
2013
 
2014
 
2015
static int get_virtual_node_size (struct super_block * sb, struct buffer_head * bh)
2016
{
2017
    int max_num_of_items;
2018
    int max_num_of_entries;
2019
    unsigned long blocksize = sb->s_blocksize;
2020
 
2021
#define MIN_NAME_LEN 1
2022
 
2023
    max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN);
2024
    max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) /
2025
                         (DEH_SIZE + MIN_NAME_LEN);
2026
 
2027
    return sizeof(struct virtual_node) +
2028
           max(max_num_of_items * sizeof (struct virtual_item),
2029
               sizeof (struct virtual_item) + sizeof(struct direntry_uarea) +
2030
               (max_num_of_entries - 1) * sizeof (__u16));
2031
}
2032
 
2033
 
2034
 
2035
/* maybe we should fail balancing we are going to perform when kmalloc
2036
   fails several times. But now it will loop until kmalloc gets
2037
   required memory */
2038
static int get_mem_for_virtual_node (struct tree_balance * tb)
2039
{
2040
    int check_fs = 0;
2041
    int size;
2042
    char * buf;
2043
 
2044
    size = get_virtual_node_size (tb->tb_sb, PATH_PLAST_BUFFER (tb->tb_path));
2045
 
2046
    if (size > tb->vn_buf_size) {
2047
        /* we have to allocate more memory for virtual node */
2048
        if (tb->vn_buf) {
2049
            /* free memory allocated before */
2050
            reiserfs_kfree (tb->vn_buf, tb->vn_buf_size, tb->tb_sb);
2051
            /* this is not needed if kfree is atomic */
2052
            check_fs = 1;
2053
        }
2054
 
2055
        /* virtual node requires now more memory */
2056
        tb->vn_buf_size = size;
2057
 
2058
        /* get memory for virtual item */
2059
        buf = reiserfs_kmalloc(size, GFP_ATOMIC, tb->tb_sb);
2060
        if ( ! buf ) {
2061
            /* getting memory with GFP_KERNEL priority may involve
2062
               balancing now (due to indirect_to_direct conversion on
2063
               dcache shrinking). So, release path and collected
2064
               resources here */
2065
            free_buffers_in_tb (tb);
2066
            buf = reiserfs_kmalloc(size, GFP_NOFS, tb->tb_sb);
2067
            if ( !buf ) {
2068
#ifdef CONFIG_REISERFS_CHECK
2069
                reiserfs_warning (tb->tb_sb, "vs-8345: get_mem_for_virtual_node: "
2070
                                  "kmalloc failed. reiserfs kmalloced %d bytes\n",
2071
                                  tb->tb_sb->u.reiserfs_sb.s_kmallocs);
2072
#endif
2073
                tb->vn_buf_size = 0;
2074
            }
2075
            tb->vn_buf = buf;
2076
            schedule() ;
2077
            return REPEAT_SEARCH;
2078
        }
2079
 
2080
        tb->vn_buf = buf;
2081
    }
2082
 
2083
    if ( check_fs && FILESYSTEM_CHANGED_TB (tb) )
2084
        return REPEAT_SEARCH;
2085
 
2086
    return CARRY_ON;
2087
}
2088
 
2089
 
2090
#ifdef CONFIG_REISERFS_CHECK
2091
static void tb_buffer_sanity_check (struct super_block * p_s_sb,
2092
                                    struct buffer_head * p_s_bh,
2093
                                    const char *descr, int level) {
2094
  if (p_s_bh) {
2095
    if (atomic_read (&(p_s_bh->b_count)) <= 0) {
2096
 
2097
      reiserfs_panic (p_s_sb, "tb_buffer_sanity_check(): negative or zero reference counter for buffer %s[%d] (%b)\n", descr, level, p_s_bh);
2098
    }
2099
 
2100
    if ( ! buffer_uptodate (p_s_bh) ) {
2101
      reiserfs_panic (p_s_sb, "tb_buffer_sanity_check(): buffer is not up to date %s[%d] (%b)\n", descr, level, p_s_bh);
2102
    }
2103
 
2104
    if ( ! B_IS_IN_TREE (p_s_bh) ) {
2105
      reiserfs_panic (p_s_sb, "tb_buffer_sanity_check(): buffer is not in tree %s[%d] (%b)\n", descr, level, p_s_bh);
2106
    }
2107
 
2108
    if (p_s_bh->b_dev != p_s_sb->s_dev ||
2109
        p_s_bh->b_size != p_s_sb->s_blocksize ||
2110
        p_s_bh->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
2111
      reiserfs_panic (p_s_sb, "tb_buffer_sanity_check(): check failed for buffer %s[%d] (%b)\n", descr, level, p_s_bh);
2112
    }
2113
  }
2114
}
2115
#else
2116
static void tb_buffer_sanity_check (struct super_block * p_s_sb,
2117
                                    struct buffer_head * p_s_bh,
2118
                                    const char *descr, int level)
2119
{;}
2120
#endif
2121
 
2122
static void clear_all_dirty_bits(struct super_block *s,
2123
                                 struct buffer_head *bh) {
2124
  reiserfs_prepare_for_journal(s, bh, 0) ;
2125
}
2126
 
2127
static int wait_tb_buffers_until_unlocked (struct tree_balance * p_s_tb)
2128
{
2129
    struct buffer_head * locked;
2130
#ifdef CONFIG_REISERFS_CHECK
2131
    int repeat_counter = 0;
2132
#endif
2133
    int i;
2134
 
2135
    do {
2136
 
2137
        locked = NULL;
2138
 
2139
        for ( i = p_s_tb->tb_path->path_length; !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i-- ) {
2140
            if ( PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i) ) {
2141
                /* if I understand correctly, we can only be sure the last buffer
2142
                ** in the path is in the tree --clm
2143
                */
2144
#ifdef CONFIG_REISERFS_CHECK
2145
                if (PATH_PLAST_BUFFER(p_s_tb->tb_path) ==
2146
                    PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) {
2147
                    tb_buffer_sanity_check (p_s_tb->tb_sb,
2148
                                            PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i),
2149
                                            "S",
2150
                                            p_s_tb->tb_path->path_length - i);
2151
                }
2152
#endif
2153
                clear_all_dirty_bits(p_s_tb->tb_sb,
2154
                                     PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i)) ;
2155
 
2156
                if ( buffer_locked (PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i)) )
2157
                    locked = PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i);
2158
            }
2159
        }
2160
 
2161
        for ( i = 0; !locked && i < MAX_HEIGHT && p_s_tb->insert_size[i]; i++ ) {
2162
 
2163
            if (p_s_tb->lnum[i] ) {
2164
 
2165
                if ( p_s_tb->L[i] ) {
2166
                    tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->L[i], "L", i);
2167
                    clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->L[i]) ;
2168
                    if ( buffer_locked (p_s_tb->L[i]) )
2169
                        locked = p_s_tb->L[i];
2170
                }
2171
 
2172
                if ( !locked && p_s_tb->FL[i] ) {
2173
                    tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->FL[i], "FL", i);
2174
                    clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FL[i]) ;
2175
                    if ( buffer_locked (p_s_tb->FL[i]) )
2176
                        locked = p_s_tb->FL[i];
2177
                }
2178
 
2179
                if ( !locked && p_s_tb->CFL[i] ) {
2180
                    tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->CFL[i], "CFL", i);
2181
                    clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->CFL[i]) ;
2182
                    if ( buffer_locked (p_s_tb->CFL[i]) )
2183
                        locked = p_s_tb->CFL[i];
2184
                }
2185
 
2186
            }
2187
 
2188
            if ( !locked && (p_s_tb->rnum[i]) ) {
2189
 
2190
                if ( p_s_tb->R[i] ) {
2191
                    tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->R[i], "R", i);
2192
                    clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->R[i]) ;
2193
                    if ( buffer_locked (p_s_tb->R[i]) )
2194
                        locked = p_s_tb->R[i];
2195
                }
2196
 
2197
 
2198
                if ( !locked && p_s_tb->FR[i] ) {
2199
                    tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->FR[i], "FR", i);
2200
                    clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FR[i]) ;
2201
                    if ( buffer_locked (p_s_tb->FR[i]) )
2202
                        locked = p_s_tb->FR[i];
2203
                }
2204
 
2205
                if ( !locked && p_s_tb->CFR[i] ) {
2206
                    tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->CFR[i], "CFR", i);
2207
                    clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->CFR[i]) ;
2208
                    if ( buffer_locked (p_s_tb->CFR[i]) )
2209
                        locked = p_s_tb->CFR[i];
2210
                }
2211
            }
2212
        }
2213
        /* as far as I can tell, this is not required.  The FEB list seems
2214
        ** to be full of newly allocated nodes, which will never be locked,
2215
        ** dirty, or anything else.
2216
        ** To be safe, I'm putting in the checks and waits in.  For the moment,
2217
        ** they are needed to keep the code in journal.c from complaining
2218
        ** about the buffer.  That code is inside CONFIG_REISERFS_CHECK as well.
2219
        ** --clm
2220
        */
2221
        for ( i = 0; !locked && i < MAX_FEB_SIZE; i++ ) {
2222
            if ( p_s_tb->FEB[i] ) {
2223
                clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FEB[i]) ;
2224
                if (buffer_locked(p_s_tb->FEB[i])) {
2225
                    locked = p_s_tb->FEB[i] ;
2226
                }
2227
            }
2228
        }
2229
 
2230
        if (locked) {
2231
#ifdef CONFIG_REISERFS_CHECK
2232
            repeat_counter++;
2233
            if ( (repeat_counter % 10000) == 0) {
2234
                reiserfs_warning (p_s_tb->tb_sb, "wait_tb_buffers_until_released(): too many iterations waiting for buffer to unlock (%b)\n", locked);
2235
 
2236
                /* Don't loop forever.  Try to recover from possible error. */
2237
 
2238
                return ( FILESYSTEM_CHANGED_TB (p_s_tb) ) ? REPEAT_SEARCH : CARRY_ON;
2239
            }
2240
#endif
2241
            __wait_on_buffer (locked);
2242
            if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) {
2243
                return REPEAT_SEARCH;
2244
            }
2245
        }
2246
 
2247
    } while (locked);
2248
 
2249
    return CARRY_ON;
2250
}
2251
 
2252
 
2253
/* Prepare for balancing, that is
2254
 *      get all necessary parents, and neighbors;
2255
 *      analyze what and where should be moved;
2256
 *      get sufficient number of new nodes;
2257
 * Balancing will start only after all resources will be collected at a time.
2258
 *
2259
 * When ported to SMP kernels, only at the last moment after all needed nodes
2260
 * are collected in cache, will the resources be locked using the usual
2261
 * textbook ordered lock acquisition algorithms.  Note that ensuring that
2262
 * this code neither write locks what it does not need to write lock nor locks out of order
2263
 * will be a pain in the butt that could have been avoided.  Grumble grumble. -Hans
2264
 *
2265
 * fix is meant in the sense of render unchanging
2266
 *
2267
 * Latency might be improved by first gathering a list of what buffers are needed
2268
 * and then getting as many of them in parallel as possible? -Hans
2269
 *
2270
 * Parameters:
2271
 *      op_mode i - insert, d - delete, c - cut (truncate), p - paste (append)
2272
 *      tb      tree_balance structure;
2273
 *      inum    item number in S[h];
2274
 *      pos_in_item - comment this if you can
2275
 *      ins_ih & ins_sd are used when inserting
2276
 * Returns:     1 - schedule occurred while the function worked;
2277
 *              0 - schedule didn't occur while the function worked;
2278
 *             -1 - if no_disk_space
2279
 */
2280
 
2281
 
2282
int fix_nodes (int n_op_mode,
2283
               struct tree_balance *    p_s_tb,
2284
               struct item_head * p_s_ins_ih, // item head of item being inserted
2285
               const void * data // inserted item or data to be pasted
2286
    ) {
2287
    int n_ret_value,
2288
        n_h,
2289
        n_item_num = PATH_LAST_POSITION(p_s_tb->tb_path);
2290
    int n_pos_in_item;
2291
 
2292
    /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared
2293
    ** during wait_tb_buffers_run
2294
    */
2295
    int wait_tb_buffers_run = 0 ;
2296
    int windex ;
2297
    struct buffer_head  * p_s_tbS0 = PATH_PLAST_BUFFER(p_s_tb->tb_path);
2298
 
2299
    ++ p_s_tb -> tb_sb -> u.reiserfs_sb.s_fix_nodes;
2300
 
2301
    n_pos_in_item = p_s_tb->tb_path->pos_in_item;
2302
 
2303
 
2304
    p_s_tb->fs_gen = get_generation (p_s_tb->tb_sb);
2305
 
2306
    /* we prepare and log the super here so it will already be in the
2307
    ** transaction when do_balance needs to change it.
2308
    ** This way do_balance won't have to schedule when trying to prepare
2309
    ** the super for logging
2310
    */
2311
    reiserfs_prepare_for_journal(p_s_tb->tb_sb,
2312
                                 SB_BUFFER_WITH_SB(p_s_tb->tb_sb), 1) ;
2313
    journal_mark_dirty(p_s_tb->transaction_handle, p_s_tb->tb_sb,
2314
                       SB_BUFFER_WITH_SB(p_s_tb->tb_sb)) ;
2315
    if ( FILESYSTEM_CHANGED_TB (p_s_tb) )
2316
        return REPEAT_SEARCH;
2317
 
2318
    /* if it possible in indirect_to_direct conversion */
2319
    if (buffer_locked (p_s_tbS0)) {
2320
        __wait_on_buffer (p_s_tbS0);
2321
        if ( FILESYSTEM_CHANGED_TB (p_s_tb) )
2322
            return REPEAT_SEARCH;
2323
    }
2324
 
2325
#ifdef CONFIG_REISERFS_CHECK
2326
    if ( cur_tb ) {
2327
        print_cur_tb ("fix_nodes");
2328
        reiserfs_panic(p_s_tb->tb_sb,"PAP-8305: fix_nodes:  there is pending do_balance");
2329
    }
2330
 
2331
    if (!buffer_uptodate (p_s_tbS0) || !B_IS_IN_TREE (p_s_tbS0)) {
2332
        reiserfs_panic (p_s_tb->tb_sb, "PAP-8320: fix_nodes: S[0] (%b %z) is not uptodate "
2333
                        "at the beginning of fix_nodes or not in tree (mode %c)", p_s_tbS0, p_s_tbS0, n_op_mode);
2334
    }
2335
 
2336
    /* Check parameters. */
2337
    switch (n_op_mode) {
2338
    case M_INSERT:
2339
        if ( n_item_num <= 0 || n_item_num > B_NR_ITEMS(p_s_tbS0) )
2340
            reiserfs_panic(p_s_tb->tb_sb,"PAP-8330: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert",
2341
                           n_item_num, B_NR_ITEMS(p_s_tbS0));
2342
        break;
2343
    case M_PASTE:
2344
    case M_DELETE:
2345
    case M_CUT:
2346
        if ( n_item_num < 0 || n_item_num >= B_NR_ITEMS(p_s_tbS0) ) {
2347
            print_block (p_s_tbS0, 0, -1, -1);
2348
            printk("mode = %c insert_size = %d\n", n_op_mode, p_s_tb->insert_size[0]);
2349
            reiserfs_panic(p_s_tb->tb_sb,"PAP-8335: fix_nodes: Incorrect item number(%d)", n_item_num);
2350
        }
2351
        break;
2352
    default:
2353
        reiserfs_panic(p_s_tb->tb_sb,"PAP-8340: fix_nodes: Incorrect mode of operation");
2354
    }
2355
#endif
2356
 
2357
    if (get_mem_for_virtual_node (p_s_tb) == REPEAT_SEARCH)
2358
        // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat
2359
        return REPEAT_SEARCH;
2360
 
2361
 
2362
    /* Starting from the leaf level; for all levels n_h of the tree. */
2363
    for ( n_h = 0; n_h < MAX_HEIGHT && p_s_tb->insert_size[n_h]; n_h++ ) {
2364
        if ( (n_ret_value = get_direct_parent(p_s_tb, n_h)) != CARRY_ON ) {
2365
            goto repeat;
2366
        }
2367
 
2368
        if ( (n_ret_value = check_balance (n_op_mode, p_s_tb, n_h, n_item_num,
2369
                                           n_pos_in_item, p_s_ins_ih, data)) != CARRY_ON ) {
2370
            if ( n_ret_value == NO_BALANCING_NEEDED ) {
2371
                /* No balancing for higher levels needed. */
2372
                if ( (n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON ) {
2373
                    goto repeat;
2374
                }
2375
                if ( n_h != MAX_HEIGHT - 1 )
2376
                    p_s_tb->insert_size[n_h + 1] = 0;
2377
                /* ok, analysis and resource gathering are complete */
2378
                break;
2379
            }
2380
            goto repeat;
2381
        }
2382
 
2383
        if ( (n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON ) {
2384
            goto repeat;
2385
        }
2386
 
2387
        if ( (n_ret_value = get_empty_nodes(p_s_tb, n_h)) != CARRY_ON ) {
2388
            goto repeat;        /* No disk space, or schedule occurred and
2389
                                   analysis may be invalid and needs to be redone. */
2390
        }
2391
 
2392
        if ( ! PATH_H_PBUFFER(p_s_tb->tb_path, n_h) ) {
2393
            /* We have a positive insert size but no nodes exist on this
2394
               level, this means that we are creating a new root. */
2395
 
2396
            RFALSE( p_s_tb->blknum[n_h] != 1,
2397
                    "PAP-8350: creating new empty root");
2398
 
2399
            if ( n_h < MAX_HEIGHT - 1 )
2400
                p_s_tb->insert_size[n_h + 1] = 0;
2401
        }
2402
        else
2403
            if ( ! PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1) ) {
2404
                if ( p_s_tb->blknum[n_h] > 1 ) {
2405
                    /* The tree needs to be grown, so this node S[n_h]
2406
                       which is the root node is split into two nodes,
2407
                       and a new node (S[n_h+1]) will be created to
2408
                       become the root node.  */
2409
 
2410
                    RFALSE( n_h == MAX_HEIGHT - 1,
2411
                            "PAP-8355: attempt to create too high of a tree");
2412
 
2413
                    p_s_tb->insert_size[n_h + 1] = (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1) + DC_SIZE;
2414
                }
2415
                else
2416
                    if ( n_h < MAX_HEIGHT - 1 )
2417
                        p_s_tb->insert_size[n_h + 1] = 0;
2418
            }
2419
            else
2420
                p_s_tb->insert_size[n_h + 1] = (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1);
2421
    }
2422
 
2423
 
2424
    windex = push_journal_writer("fix_nodes") ;
2425
    if ((n_ret_value = wait_tb_buffers_until_unlocked (p_s_tb)) == CARRY_ON) {
2426
        pop_journal_writer(windex) ;
2427
        if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
2428
            wait_tb_buffers_run = 1 ;
2429
            n_ret_value = REPEAT_SEARCH ;
2430
            goto repeat;
2431
        } else {
2432
            return CARRY_ON;
2433
        }
2434
    } else {
2435
        wait_tb_buffers_run = 1 ;
2436
        pop_journal_writer(windex) ;
2437
        goto repeat;
2438
    }
2439
 
2440
 repeat:
2441
    // fix_nodes was unable to perform its calculation due to
2442
    // filesystem got changed under us, lack of free disk space or i/o
2443
    // failure. If the first is the case - the search will be
2444
    // repeated. For now - free all resources acquired so far except
2445
    // for the new allocated nodes
2446
    {
2447
        int i;
2448
 
2449
        /* Release path buffers. */
2450
        if (wait_tb_buffers_run) {
2451
            pathrelse_and_restore(p_s_tb->tb_sb, p_s_tb->tb_path) ;
2452
        } else {
2453
            pathrelse (p_s_tb->tb_path);
2454
        }
2455
        /* brelse all resources collected for balancing */
2456
        for ( i = 0; i < MAX_HEIGHT; i++ ) {
2457
            if (wait_tb_buffers_run) {
2458
                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->L[i]);
2459
                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->R[i]);
2460
                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->FL[i]);
2461
                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->FR[i]);
2462
                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->CFL[i]);
2463
                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->CFR[i]);
2464
            }
2465
 
2466
            brelse (p_s_tb->L[i]);p_s_tb->L[i] = 0;
2467
            brelse (p_s_tb->R[i]);p_s_tb->R[i] = 0;
2468
            brelse (p_s_tb->FL[i]);p_s_tb->FL[i] = 0;
2469
            brelse (p_s_tb->FR[i]);p_s_tb->FR[i] = 0;
2470
            brelse (p_s_tb->CFL[i]);p_s_tb->CFL[i] = 0;
2471
            brelse (p_s_tb->CFR[i]);p_s_tb->CFR[i] = 0;
2472
        }
2473
 
2474
        if (wait_tb_buffers_run) {
2475
            for ( i = 0; i < MAX_FEB_SIZE; i++ ) {
2476
                if ( p_s_tb->FEB[i] ) {
2477
                    reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2478
                                                     p_s_tb->FEB[i]) ;
2479
                }
2480
            }
2481
        }
2482
        return n_ret_value;
2483
    }
2484
 
2485
}
2486
 
2487
 
2488
/* Anatoly will probably forgive me renaming p_s_tb to tb. I just
2489
   wanted to make lines shorter */
2490
void unfix_nodes (struct tree_balance * tb)
2491
{
2492
    int i;
2493
 
2494
    /* Release path buffers. */
2495
    pathrelse_and_restore (tb->tb_sb, tb->tb_path);
2496
 
2497
    /* brelse all resources collected for balancing */
2498
    for ( i = 0; i < MAX_HEIGHT; i++ ) {
2499
        reiserfs_restore_prepared_buffer (tb->tb_sb, tb->L[i]);
2500
        reiserfs_restore_prepared_buffer (tb->tb_sb, tb->R[i]);
2501
        reiserfs_restore_prepared_buffer (tb->tb_sb, tb->FL[i]);
2502
        reiserfs_restore_prepared_buffer (tb->tb_sb, tb->FR[i]);
2503
        reiserfs_restore_prepared_buffer (tb->tb_sb, tb->CFL[i]);
2504
        reiserfs_restore_prepared_buffer (tb->tb_sb, tb->CFR[i]);
2505
 
2506
        brelse (tb->L[i]);
2507
        brelse (tb->R[i]);
2508
        brelse (tb->FL[i]);
2509
        brelse (tb->FR[i]);
2510
        brelse (tb->CFL[i]);
2511
        brelse (tb->CFR[i]);
2512
    }
2513
 
2514
    /* deal with list of allocated (used and unused) nodes */
2515
    for ( i = 0; i < MAX_FEB_SIZE; i++ ) {
2516
        if ( tb->FEB[i] ) {
2517
            unsigned long blocknr  = tb->FEB[i]->b_blocknr ;
2518
            /* de-allocated block which was not used by balancing and
2519
               bforget about buffer for it */
2520
            brelse (tb->FEB[i]);
2521
            reiserfs_free_block (tb->transaction_handle, blocknr);
2522
        }
2523
        if (tb->used[i]) {
2524
            /* release used as new nodes including a new root */
2525
            brelse (tb->used[i]);
2526
        }
2527
    }
2528
 
2529
    if (tb->vn_buf)
2530
    reiserfs_kfree (tb->vn_buf, tb->vn_buf_size, tb->tb_sb);
2531
 
2532
}
2533
 
2534
 
2535
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.