OpenCores
URL https://opencores.org/ocsvn/test_project/test_project/trunk

Subversion Repositories test_project

[/] [test_project/] [trunk/] [linux_sd_driver/] [fs/] [reiserfs/] [fix_node.c] - Blame information for rev 62

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 62 marcus.erl
/*
2
 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3
 */
4
 
5
/**
6
 ** old_item_num
7
 ** old_entry_num
8
 ** set_entry_sizes
9
 ** create_virtual_node
10
 ** check_left
11
 ** check_right
12
 ** directory_part_size
13
 ** get_num_ver
14
 ** set_parameters
15
 ** is_leaf_removable
16
 ** are_leaves_removable
17
 ** get_empty_nodes
18
 ** get_lfree
19
 ** get_rfree
20
 ** is_left_neighbor_in_cache
21
 ** decrement_key
22
 ** get_far_parent
23
 ** get_parents
24
 ** can_node_be_removed
25
 ** ip_check_balance
26
 ** dc_check_balance_internal
27
 ** dc_check_balance_leaf
28
 ** dc_check_balance
29
 ** check_balance
30
 ** get_direct_parent
31
 ** get_neighbors
32
 ** fix_nodes
33
 **
34
 **
35
 **/
36
 
37
#include <linux/time.h>
38
#include <linux/string.h>
39
#include <linux/reiserfs_fs.h>
40
#include <linux/buffer_head.h>
41
 
42
/* To make any changes in the tree we find a node, that contains item
43
   to be changed/deleted or position in the node we insert a new item
44
   to. We call this node S. To do balancing we need to decide what we
45
   will shift to left/right neighbor, or to a new node, where new item
46
   will be etc. To make this analysis simpler we build virtual
47
   node. Virtual node is an array of items, that will replace items of
48
   node S. (For instance if we are going to delete an item, virtual
49
   node does not contain it). Virtual node keeps information about
50
   item sizes and types, mergeability of first and last items, sizes
51
   of all entries in directory item. We use this array of items when
52
   calculating what we can shift to neighbors and how many nodes we
53
   have to have if we do not any shiftings, if we shift to left/right
54
   neighbor or to both. */
55
 
56
/* taking item number in virtual node, returns number of item, that it has in source buffer */
57
static inline int old_item_num(int new_num, int affected_item_num, int mode)
58
{
59
        if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num)
60
                return new_num;
61
 
62
        if (mode == M_INSERT) {
63
 
64
                RFALSE(new_num == 0,
65
                       "vs-8005: for INSERT mode and item number of inserted item");
66
 
67
                return new_num - 1;
68
        }
69
 
70
        RFALSE(mode != M_DELETE,
71
               "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'",
72
               mode);
73
        /* delete mode */
74
        return new_num + 1;
75
}
76
 
77
static void create_virtual_node(struct tree_balance *tb, int h)
78
{
79
        struct item_head *ih;
80
        struct virtual_node *vn = tb->tb_vn;
81
        int new_num;
82
        struct buffer_head *Sh; /* this comes from tb->S[h] */
83
 
84
        Sh = PATH_H_PBUFFER(tb->tb_path, h);
85
 
86
        /* size of changed node */
87
        vn->vn_size =
88
            MAX_CHILD_SIZE(Sh) - B_FREE_SPACE(Sh) + tb->insert_size[h];
89
 
90
        /* for internal nodes array if virtual items is not created */
91
        if (h) {
92
                vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE);
93
                return;
94
        }
95
 
96
        /* number of items in virtual node  */
97
        vn->vn_nr_item =
98
            B_NR_ITEMS(Sh) + ((vn->vn_mode == M_INSERT) ? 1 : 0) -
99
            ((vn->vn_mode == M_DELETE) ? 1 : 0);
100
 
101
        /* first virtual item */
102
        vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1);
103
        memset(vn->vn_vi, 0, vn->vn_nr_item * sizeof(struct virtual_item));
104
        vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item);
105
 
106
        /* first item in the node */
107
        ih = B_N_PITEM_HEAD(Sh, 0);
108
 
109
        /* define the mergeability for 0-th item (if it is not being deleted) */
110
        if (op_is_left_mergeable(&(ih->ih_key), Sh->b_size)
111
            && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
112
                vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE;
113
 
114
        /* go through all items those remain in the virtual node (except for the new (inserted) one) */
115
        for (new_num = 0; new_num < vn->vn_nr_item; new_num++) {
116
                int j;
117
                struct virtual_item *vi = vn->vn_vi + new_num;
118
                int is_affected =
119
                    ((new_num != vn->vn_affected_item_num) ? 0 : 1);
120
 
121
                if (is_affected && vn->vn_mode == M_INSERT)
122
                        continue;
123
 
124
                /* get item number in source node */
125
                j = old_item_num(new_num, vn->vn_affected_item_num,
126
                                 vn->vn_mode);
127
 
128
                vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE;
129
                vi->vi_ih = ih + j;
130
                vi->vi_item = B_I_PITEM(Sh, ih + j);
131
                vi->vi_uarea = vn->vn_free_ptr;
132
 
133
                // FIXME: there is no check, that item operation did not
134
                // consume too much memory
135
                vn->vn_free_ptr +=
136
                    op_create_vi(vn, vi, is_affected, tb->insert_size[0]);
137
                if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
138
                        reiserfs_panic(tb->tb_sb,
139
                                       "vs-8030: create_virtual_node: "
140
                                       "virtual node space consumed");
141
 
142
                if (!is_affected)
143
                        /* this is not being changed */
144
                        continue;
145
 
146
                if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) {
147
                        vn->vn_vi[new_num].vi_item_len += tb->insert_size[0];
148
                        vi->vi_new_data = vn->vn_data;  // pointer to data which is going to be pasted
149
                }
150
        }
151
 
152
        /* virtual inserted item is not defined yet */
153
        if (vn->vn_mode == M_INSERT) {
154
                struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num;
155
 
156
                RFALSE(vn->vn_ins_ih == 0,
157
                       "vs-8040: item header of inserted item is not specified");
158
                vi->vi_item_len = tb->insert_size[0];
159
                vi->vi_ih = vn->vn_ins_ih;
160
                vi->vi_item = vn->vn_data;
161
                vi->vi_uarea = vn->vn_free_ptr;
162
 
163
                op_create_vi(vn, vi, 0 /*not pasted or cut */ ,
164
                             tb->insert_size[0]);
165
        }
166
 
167
        /* set right merge flag we take right delimiting key and check whether it is a mergeable item */
168
        if (tb->CFR[0]) {
169
                struct reiserfs_key *key;
170
 
171
                key = B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]);
172
                if (op_is_left_mergeable(key, Sh->b_size)
173
                    && (vn->vn_mode != M_DELETE
174
                        || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1))
175
                        vn->vn_vi[vn->vn_nr_item - 1].vi_type |=
176
                            VI_TYPE_RIGHT_MERGEABLE;
177
 
178
#ifdef CONFIG_REISERFS_CHECK
179
                if (op_is_left_mergeable(key, Sh->b_size) &&
180
                    !(vn->vn_mode != M_DELETE
181
                      || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) {
182
                        /* we delete last item and it could be merged with right neighbor's first item */
183
                        if (!
184
                            (B_NR_ITEMS(Sh) == 1
185
                             && is_direntry_le_ih(B_N_PITEM_HEAD(Sh, 0))
186
                             && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) {
187
                                /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */
188
                                print_block(Sh, 0, -1, -1);
189
                                reiserfs_panic(tb->tb_sb,
190
                                               "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c",
191
                                               key, vn->vn_affected_item_num,
192
                                               vn->vn_mode, M_DELETE);
193
                        }
194
                }
195
#endif
196
 
197
        }
198
}
199
 
200
/* using virtual node check, how many items can be shifted to left
201
   neighbor */
202
static void check_left(struct tree_balance *tb, int h, int cur_free)
203
{
204
        int i;
205
        struct virtual_node *vn = tb->tb_vn;
206
        struct virtual_item *vi;
207
        int d_size, ih_size;
208
 
209
        RFALSE(cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free);
210
 
211
        /* internal level */
212
        if (h > 0) {
213
                tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
214
                return;
215
        }
216
 
217
        /* leaf level */
218
 
219
        if (!cur_free || !vn->vn_nr_item) {
220
                /* no free space or nothing to move */
221
                tb->lnum[h] = 0;
222
                tb->lbytes = -1;
223
                return;
224
        }
225
 
226
        RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
227
               "vs-8055: parent does not exist or invalid");
228
 
229
        vi = vn->vn_vi;
230
        if ((unsigned int)cur_free >=
231
            (vn->vn_size -
232
             ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) {
233
                /* all contents of S[0] fits into L[0] */
234
 
235
                RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
236
                       "vs-8055: invalid mode or balance condition failed");
237
 
238
                tb->lnum[0] = vn->vn_nr_item;
239
                tb->lbytes = -1;
240
                return;
241
        }
242
 
243
        d_size = 0, ih_size = IH_SIZE;
244
 
245
        /* first item may be merge with last item in left neighbor */
246
        if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE)
247
                d_size = -((int)IH_SIZE), ih_size = 0;
248
 
249
        tb->lnum[0] = 0;
250
        for (i = 0; i < vn->vn_nr_item;
251
             i++, ih_size = IH_SIZE, d_size = 0, vi++) {
252
                d_size += vi->vi_item_len;
253
                if (cur_free >= d_size) {
254
                        /* the item can be shifted entirely */
255
                        cur_free -= d_size;
256
                        tb->lnum[0]++;
257
                        continue;
258
                }
259
 
260
                /* the item cannot be shifted entirely, try to split it */
261
                /* check whether L[0] can hold ih and at least one byte of the item body */
262
                if (cur_free <= ih_size) {
263
                        /* cannot shift even a part of the current item */
264
                        tb->lbytes = -1;
265
                        return;
266
                }
267
                cur_free -= ih_size;
268
 
269
                tb->lbytes = op_check_left(vi, cur_free, 0, 0);
270
                if (tb->lbytes != -1)
271
                        /* count partially shifted item */
272
                        tb->lnum[0]++;
273
 
274
                break;
275
        }
276
 
277
        return;
278
}
279
 
280
/* using virtual node check, how many items can be shifted to right
281
   neighbor */
282
static void check_right(struct tree_balance *tb, int h, int cur_free)
283
{
284
        int i;
285
        struct virtual_node *vn = tb->tb_vn;
286
        struct virtual_item *vi;
287
        int d_size, ih_size;
288
 
289
        RFALSE(cur_free < 0, "vs-8070: cur_free < 0");
290
 
291
        /* internal level */
292
        if (h > 0) {
293
                tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
294
                return;
295
        }
296
 
297
        /* leaf level */
298
 
299
        if (!cur_free || !vn->vn_nr_item) {
300
                /* no free space  */
301
                tb->rnum[h] = 0;
302
                tb->rbytes = -1;
303
                return;
304
        }
305
 
306
        RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
307
               "vs-8075: parent does not exist or invalid");
308
 
309
        vi = vn->vn_vi + vn->vn_nr_item - 1;
310
        if ((unsigned int)cur_free >=
311
            (vn->vn_size -
312
             ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) {
313
                /* all contents of S[0] fits into R[0] */
314
 
315
                RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
316
                       "vs-8080: invalid mode or balance condition failed");
317
 
318
                tb->rnum[h] = vn->vn_nr_item;
319
                tb->rbytes = -1;
320
                return;
321
        }
322
 
323
        d_size = 0, ih_size = IH_SIZE;
324
 
325
        /* last item may be merge with first item in right neighbor */
326
        if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE)
327
                d_size = -(int)IH_SIZE, ih_size = 0;
328
 
329
        tb->rnum[0] = 0;
330
        for (i = vn->vn_nr_item - 1; i >= 0;
331
             i--, d_size = 0, ih_size = IH_SIZE, vi--) {
332
                d_size += vi->vi_item_len;
333
                if (cur_free >= d_size) {
334
                        /* the item can be shifted entirely */
335
                        cur_free -= d_size;
336
                        tb->rnum[0]++;
337
                        continue;
338
                }
339
 
340
                /* check whether R[0] can hold ih and at least one byte of the item body */
341
                if (cur_free <= ih_size) {      /* cannot shift even a part of the current item */
342
                        tb->rbytes = -1;
343
                        return;
344
                }
345
 
346
                /* R[0] can hold the header of the item and at least one byte of its body */
347
                cur_free -= ih_size;    /* cur_free is still > 0 */
348
 
349
                tb->rbytes = op_check_right(vi, cur_free);
350
                if (tb->rbytes != -1)
351
                        /* count partially shifted item */
352
                        tb->rnum[0]++;
353
 
354
                break;
355
        }
356
 
357
        return;
358
}
359
 
360
/*
361
 * from - number of items, which are shifted to left neighbor entirely
362
 * to - number of item, which are shifted to right neighbor entirely
363
 * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor
364
 * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */
365
static int get_num_ver(int mode, struct tree_balance *tb, int h,
366
                       int from, int from_bytes,
367
                       int to, int to_bytes, short *snum012, int flow)
368
{
369
        int i;
370
        int cur_free;
371
        //    int bytes;
372
        int units;
373
        struct virtual_node *vn = tb->tb_vn;
374
        //    struct virtual_item * vi;
375
 
376
        int total_node_size, max_node_size, current_item_size;
377
        int needed_nodes;
378
        int start_item,         /* position of item we start filling node from */
379
         end_item,              /* position of item we finish filling node by */
380
         start_bytes,           /* number of first bytes (entries for directory) of start_item-th item
381
                                   we do not include into node that is being filled */
382
         end_bytes;             /* number of last bytes (entries for directory) of end_item-th item
383
                                   we do node include into node that is being filled */
384
        int split_item_positions[2];    /* these are positions in virtual item of
385
                                           items, that are split between S[0] and
386
                                           S1new and S1new and S2new */
387
 
388
        split_item_positions[0] = -1;
389
        split_item_positions[1] = -1;
390
 
391
        /* We only create additional nodes if we are in insert or paste mode
392
           or we are in replace mode at the internal level. If h is 0 and
393
           the mode is M_REPLACE then in fix_nodes we change the mode to
394
           paste or insert before we get here in the code.  */
395
        RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE),
396
               "vs-8100: insert_size < 0 in overflow");
397
 
398
        max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h));
399
 
400
        /* snum012 [0-2] - number of items, that lay
401
           to S[0], first new node and second new node */
402
        snum012[3] = -1;        /* s1bytes */
403
        snum012[4] = -1;        /* s2bytes */
404
 
405
        /* internal level */
406
        if (h > 0) {
407
                i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE);
408
                if (i == max_node_size)
409
                        return 1;
410
                return (i / max_node_size + 1);
411
        }
412
 
413
        /* leaf level */
414
        needed_nodes = 1;
415
        total_node_size = 0;
416
        cur_free = max_node_size;
417
 
418
        // start from 'from'-th item
419
        start_item = from;
420
        // skip its first 'start_bytes' units
421
        start_bytes = ((from_bytes != -1) ? from_bytes : 0);
422
 
423
        // last included item is the 'end_item'-th one
424
        end_item = vn->vn_nr_item - to - 1;
425
        // do not count last 'end_bytes' units of 'end_item'-th item
426
        end_bytes = (to_bytes != -1) ? to_bytes : 0;
427
 
428
        /* go through all item beginning from the start_item-th item and ending by
429
           the end_item-th item. Do not count first 'start_bytes' units of
430
           'start_item'-th item and last 'end_bytes' of 'end_item'-th item */
431
 
432
        for (i = start_item; i <= end_item; i++) {
433
                struct virtual_item *vi = vn->vn_vi + i;
434
                int skip_from_end = ((i == end_item) ? end_bytes : 0);
435
 
436
                RFALSE(needed_nodes > 3, "vs-8105: too many nodes are needed");
437
 
438
                /* get size of current item */
439
                current_item_size = vi->vi_item_len;
440
 
441
                /* do not take in calculation head part (from_bytes) of from-th item */
442
                current_item_size -=
443
                    op_part_size(vi, 0 /*from start */ , start_bytes);
444
 
445
                /* do not take in calculation tail part of last item */
446
                current_item_size -=
447
                    op_part_size(vi, 1 /*from end */ , skip_from_end);
448
 
449
                /* if item fits into current node entierly */
450
                if (total_node_size + current_item_size <= max_node_size) {
451
                        snum012[needed_nodes - 1]++;
452
                        total_node_size += current_item_size;
453
                        start_bytes = 0;
454
                        continue;
455
                }
456
 
457
                if (current_item_size > max_node_size) {
458
                        /* virtual item length is longer, than max size of item in
459
                           a node. It is impossible for direct item */
460
                        RFALSE(is_direct_le_ih(vi->vi_ih),
461
                               "vs-8110: "
462
                               "direct item length is %d. It can not be longer than %d",
463
                               current_item_size, max_node_size);
464
                        /* we will try to split it */
465
                        flow = 1;
466
                }
467
 
468
                if (!flow) {
469
                        /* as we do not split items, take new node and continue */
470
                        needed_nodes++;
471
                        i--;
472
                        total_node_size = 0;
473
                        continue;
474
                }
475
                // calculate number of item units which fit into node being
476
                // filled
477
                {
478
                        int free_space;
479
 
480
                        free_space = max_node_size - total_node_size - IH_SIZE;
481
                        units =
482
                            op_check_left(vi, free_space, start_bytes,
483
                                          skip_from_end);
484
                        if (units == -1) {
485
                                /* nothing fits into current node, take new node and continue */
486
                                needed_nodes++, i--, total_node_size = 0;
487
                                continue;
488
                        }
489
                }
490
 
491
                /* something fits into the current node */
492
                //if (snum012[3] != -1 || needed_nodes != 1)
493
                //  reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required");
494
                //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units;
495
                start_bytes += units;
496
                snum012[needed_nodes - 1 + 3] = units;
497
 
498
                if (needed_nodes > 2)
499
                        reiserfs_warning(tb->tb_sb, "vs-8111: get_num_ver: "
500
                                         "split_item_position is out of boundary");
501
                snum012[needed_nodes - 1]++;
502
                split_item_positions[needed_nodes - 1] = i;
503
                needed_nodes++;
504
                /* continue from the same item with start_bytes != -1 */
505
                start_item = i;
506
                i--;
507
                total_node_size = 0;
508
        }
509
 
510
        // sum012[4] (if it is not -1) contains number of units of which
511
        // are to be in S1new, snum012[3] - to be in S0. They are supposed
512
        // to be S1bytes and S2bytes correspondingly, so recalculate
513
        if (snum012[4] > 0) {
514
                int split_item_num;
515
                int bytes_to_r, bytes_to_l;
516
                int bytes_to_S1new;
517
 
518
                split_item_num = split_item_positions[1];
519
                bytes_to_l =
520
                    ((from == split_item_num
521
                      && from_bytes != -1) ? from_bytes : 0);
522
                bytes_to_r =
523
                    ((end_item == split_item_num
524
                      && end_bytes != -1) ? end_bytes : 0);
525
                bytes_to_S1new =
526
                    ((split_item_positions[0] ==
527
                      split_item_positions[1]) ? snum012[3] : 0);
528
 
529
                // s2bytes
530
                snum012[4] =
531
                    op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] -
532
                    bytes_to_r - bytes_to_l - bytes_to_S1new;
533
 
534
                if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY &&
535
                    vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT)
536
                        reiserfs_warning(tb->tb_sb, "vs-8115: get_num_ver: not "
537
                                         "directory or indirect item");
538
        }
539
 
540
        /* now we know S2bytes, calculate S1bytes */
541
        if (snum012[3] > 0) {
542
                int split_item_num;
543
                int bytes_to_r, bytes_to_l;
544
                int bytes_to_S2new;
545
 
546
                split_item_num = split_item_positions[0];
547
                bytes_to_l =
548
                    ((from == split_item_num
549
                      && from_bytes != -1) ? from_bytes : 0);
550
                bytes_to_r =
551
                    ((end_item == split_item_num
552
                      && end_bytes != -1) ? end_bytes : 0);
553
                bytes_to_S2new =
554
                    ((split_item_positions[0] == split_item_positions[1]
555
                      && snum012[4] != -1) ? snum012[4] : 0);
556
 
557
                // s1bytes
558
                snum012[3] =
559
                    op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] -
560
                    bytes_to_r - bytes_to_l - bytes_to_S2new;
561
        }
562
 
563
        return needed_nodes;
564
}
565
 
566
#ifdef CONFIG_REISERFS_CHECK
567
extern struct tree_balance *cur_tb;
568
#endif
569
 
570
/* Set parameters for balancing.
571
 * Performs write of results of analysis of balancing into structure tb,
572
 * where it will later be used by the functions that actually do the balancing.
573
 * Parameters:
574
 *      tb      tree_balance structure;
575
 *      h       current level of the node;
576
 *      lnum    number of items from S[h] that must be shifted to L[h];
577
 *      rnum    number of items from S[h] that must be shifted to R[h];
578
 *      blk_num number of blocks that S[h] will be splitted into;
579
 *      s012    number of items that fall into splitted nodes.
580
 *      lbytes  number of bytes which flow to the left neighbor from the item that is not
581
 *              not shifted entirely
582
 *      rbytes  number of bytes which flow to the right neighbor from the item that is not
583
 *              not shifted entirely
584
 *      s1bytes number of bytes which flow to the first  new node when S[0] splits (this number is contained in s012 array)
585
 */
586
 
587
static void set_parameters(struct tree_balance *tb, int h, int lnum,
588
                           int rnum, int blk_num, short *s012, int lb, int rb)
589
{
590
 
591
        tb->lnum[h] = lnum;
592
        tb->rnum[h] = rnum;
593
        tb->blknum[h] = blk_num;
594
 
595
        if (h == 0) {            /* only for leaf level */
596
                if (s012 != NULL) {
597
                        tb->s0num = *s012++,
598
                            tb->s1num = *s012++, tb->s2num = *s012++;
599
                        tb->s1bytes = *s012++;
600
                        tb->s2bytes = *s012;
601
                }
602
                tb->lbytes = lb;
603
                tb->rbytes = rb;
604
        }
605
        PROC_INFO_ADD(tb->tb_sb, lnum[h], lnum);
606
        PROC_INFO_ADD(tb->tb_sb, rnum[h], rnum);
607
 
608
        PROC_INFO_ADD(tb->tb_sb, lbytes[h], lb);
609
        PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb);
610
}
611
 
612
/* check, does node disappear if we shift tb->lnum[0] items to left
613
   neighbor and tb->rnum[0] to the right one. */
614
static int is_leaf_removable(struct tree_balance *tb)
615
{
616
        struct virtual_node *vn = tb->tb_vn;
617
        int to_left, to_right;
618
        int size;
619
        int remain_items;
620
 
621
        /* number of items, that will be shifted to left (right) neighbor
622
           entirely */
623
        to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0);
624
        to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0);
625
        remain_items = vn->vn_nr_item;
626
 
627
        /* how many items remain in S[0] after shiftings to neighbors */
628
        remain_items -= (to_left + to_right);
629
 
630
        if (remain_items < 1) {
631
                /* all content of node can be shifted to neighbors */
632
                set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0,
633
                               NULL, -1, -1);
634
                return 1;
635
        }
636
 
637
        if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1)
638
                /* S[0] is not removable */
639
                return 0;
640
 
641
        /* check, whether we can divide 1 remaining item between neighbors */
642
 
643
        /* get size of remaining item (in item units) */
644
        size = op_unit_num(&(vn->vn_vi[to_left]));
645
 
646
        if (tb->lbytes + tb->rbytes >= size) {
647
                set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL,
648
                               tb->lbytes, -1);
649
                return 1;
650
        }
651
 
652
        return 0;
653
}
654
 
655
/* check whether L, S, R can be joined in one node */
656
static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree)
657
{
658
        struct virtual_node *vn = tb->tb_vn;
659
        int ih_size;
660
        struct buffer_head *S0;
661
 
662
        S0 = PATH_H_PBUFFER(tb->tb_path, 0);
663
 
664
        ih_size = 0;
665
        if (vn->vn_nr_item) {
666
                if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE)
667
                        ih_size += IH_SIZE;
668
 
669
                if (vn->vn_vi[vn->vn_nr_item - 1].
670
                    vi_type & VI_TYPE_RIGHT_MERGEABLE)
671
                        ih_size += IH_SIZE;
672
        } else {
673
                /* there was only one item and it will be deleted */
674
                struct item_head *ih;
675
 
676
                RFALSE(B_NR_ITEMS(S0) != 1,
677
                       "vs-8125: item number must be 1: it is %d",
678
                       B_NR_ITEMS(S0));
679
 
680
                ih = B_N_PITEM_HEAD(S0, 0);
681
                if (tb->CFR[0]
682
                    && !comp_short_le_keys(&(ih->ih_key),
683
                                           B_N_PDELIM_KEY(tb->CFR[0],
684
                                                          tb->rkey[0])))
685
                        if (is_direntry_le_ih(ih)) {
686
                                /* Directory must be in correct state here: that is
687
                                   somewhere at the left side should exist first directory
688
                                   item. But the item being deleted can not be that first
689
                                   one because its right neighbor is item of the same
690
                                   directory. (But first item always gets deleted in last
691
                                   turn). So, neighbors of deleted item can be merged, so
692
                                   we can save ih_size */
693
                                ih_size = IH_SIZE;
694
 
695
                                /* we might check that left neighbor exists and is of the
696
                                   same directory */
697
                                RFALSE(le_ih_k_offset(ih) == DOT_OFFSET,
698
                                       "vs-8130: first directory item can not be removed until directory is not empty");
699
                        }
700
 
701
        }
702
 
703
        if (MAX_CHILD_SIZE(S0) + vn->vn_size <= rfree + lfree + ih_size) {
704
                set_parameters(tb, 0, -1, -1, -1, NULL, -1, -1);
705
                PROC_INFO_INC(tb->tb_sb, leaves_removable);
706
                return 1;
707
        }
708
        return 0;
709
 
710
}
711
 
712
/* when we do not split item, lnum and rnum are numbers of entire items */
713
#define SET_PAR_SHIFT_LEFT \
714
if (h)\
715
{\
716
   int to_l;\
717
   \
718
   to_l = (MAX_NR_KEY(Sh)+1 - lpar + vn->vn_nr_item + 1) / 2 -\
719
              (MAX_NR_KEY(Sh) + 1 - lpar);\
720
              \
721
              set_parameters (tb, h, to_l, 0, lnver, NULL, -1, -1);\
722
}\
723
else \
724
{\
725
   if (lset==LEFT_SHIFT_FLOW)\
726
     set_parameters (tb, h, lpar, 0, lnver, snum012+lset,\
727
                     tb->lbytes, -1);\
728
   else\
729
     set_parameters (tb, h, lpar - (tb->lbytes!=-1), 0, lnver, snum012+lset,\
730
                     -1, -1);\
731
}
732
 
733
#define SET_PAR_SHIFT_RIGHT \
734
if (h)\
735
{\
736
   int to_r;\
737
   \
738
   to_r = (MAX_NR_KEY(Sh)+1 - rpar + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - rpar);\
739
   \
740
   set_parameters (tb, h, 0, to_r, rnver, NULL, -1, -1);\
741
}\
742
else \
743
{\
744
   if (rset==RIGHT_SHIFT_FLOW)\
745
     set_parameters (tb, h, 0, rpar, rnver, snum012+rset,\
746
                  -1, tb->rbytes);\
747
   else\
748
     set_parameters (tb, h, 0, rpar - (tb->rbytes!=-1), rnver, snum012+rset,\
749
                  -1, -1);\
750
}
751
 
752
static void free_buffers_in_tb(struct tree_balance *p_s_tb)
753
{
754
        int n_counter;
755
 
756
        decrement_counters_in_path(p_s_tb->tb_path);
757
 
758
        for (n_counter = 0; n_counter < MAX_HEIGHT; n_counter++) {
759
                decrement_bcount(p_s_tb->L[n_counter]);
760
                p_s_tb->L[n_counter] = NULL;
761
                decrement_bcount(p_s_tb->R[n_counter]);
762
                p_s_tb->R[n_counter] = NULL;
763
                decrement_bcount(p_s_tb->FL[n_counter]);
764
                p_s_tb->FL[n_counter] = NULL;
765
                decrement_bcount(p_s_tb->FR[n_counter]);
766
                p_s_tb->FR[n_counter] = NULL;
767
                decrement_bcount(p_s_tb->CFL[n_counter]);
768
                p_s_tb->CFL[n_counter] = NULL;
769
                decrement_bcount(p_s_tb->CFR[n_counter]);
770
                p_s_tb->CFR[n_counter] = NULL;
771
        }
772
}
773
 
774
/* Get new buffers for storing new nodes that are created while balancing.
775
 * Returns:     SCHEDULE_OCCURRED - schedule occurred while the function worked;
776
 *              CARRY_ON - schedule didn't occur while the function worked;
777
 *              NO_DISK_SPACE - no disk space.
778
 */
779
/* The function is NOT SCHEDULE-SAFE! */
780
static int get_empty_nodes(struct tree_balance *p_s_tb, int n_h)
781
{
782
        struct buffer_head *p_s_new_bh,
783
            *p_s_Sh = PATH_H_PBUFFER(p_s_tb->tb_path, n_h);
784
        b_blocknr_t *p_n_blocknr, a_n_blocknrs[MAX_AMOUNT_NEEDED] = { 0, };
785
        int n_counter, n_number_of_freeblk, n_amount_needed,    /* number of needed empty blocks */
786
         n_retval = CARRY_ON;
787
        struct super_block *p_s_sb = p_s_tb->tb_sb;
788
 
789
        /* number_of_freeblk is the number of empty blocks which have been
790
           acquired for use by the balancing algorithm minus the number of
791
           empty blocks used in the previous levels of the analysis,
792
           number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs
793
           after empty blocks are acquired, and the balancing analysis is
794
           then restarted, amount_needed is the number needed by this level
795
           (n_h) of the balancing analysis.
796
 
797
           Note that for systems with many processes writing, it would be
798
           more layout optimal to calculate the total number needed by all
799
           levels and then to run reiserfs_new_blocks to get all of them at once.  */
800
 
801
        /* Initiate number_of_freeblk to the amount acquired prior to the restart of
802
           the analysis or 0 if not restarted, then subtract the amount needed
803
           by all of the levels of the tree below n_h. */
804
        /* blknum includes S[n_h], so we subtract 1 in this calculation */
805
        for (n_counter = 0, n_number_of_freeblk = p_s_tb->cur_blknum;
806
             n_counter < n_h; n_counter++)
807
                n_number_of_freeblk -=
808
                    (p_s_tb->blknum[n_counter]) ? (p_s_tb->blknum[n_counter] -
809
                                                   1) : 0;
810
 
811
        /* Allocate missing empty blocks. */
812
        /* if p_s_Sh == 0  then we are getting a new root */
813
        n_amount_needed = (p_s_Sh) ? (p_s_tb->blknum[n_h] - 1) : 1;
814
        /*  Amount_needed = the amount that we need more than the amount that we have. */
815
        if (n_amount_needed > n_number_of_freeblk)
816
                n_amount_needed -= n_number_of_freeblk;
817
        else                    /* If we have enough already then there is nothing to do. */
818
                return CARRY_ON;
819
 
820
        /* No need to check quota - is not allocated for blocks used for formatted nodes */
821
        if (reiserfs_new_form_blocknrs(p_s_tb, a_n_blocknrs,
822
                                       n_amount_needed) == NO_DISK_SPACE)
823
                return NO_DISK_SPACE;
824
 
825
        /* for each blocknumber we just got, get a buffer and stick it on FEB */
826
        for (p_n_blocknr = a_n_blocknrs, n_counter = 0;
827
             n_counter < n_amount_needed; p_n_blocknr++, n_counter++) {
828
 
829
                RFALSE(!*p_n_blocknr,
830
                       "PAP-8135: reiserfs_new_blocknrs failed when got new blocks");
831
 
832
                p_s_new_bh = sb_getblk(p_s_sb, *p_n_blocknr);
833
                RFALSE(buffer_dirty(p_s_new_bh) ||
834
                       buffer_journaled(p_s_new_bh) ||
835
                       buffer_journal_dirty(p_s_new_bh),
836
                       "PAP-8140: journlaled or dirty buffer %b for the new block",
837
                       p_s_new_bh);
838
 
839
                /* Put empty buffers into the array. */
840
                RFALSE(p_s_tb->FEB[p_s_tb->cur_blknum],
841
                       "PAP-8141: busy slot for new buffer");
842
 
843
                set_buffer_journal_new(p_s_new_bh);
844
                p_s_tb->FEB[p_s_tb->cur_blknum++] = p_s_new_bh;
845
        }
846
 
847
        if (n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB(p_s_tb))
848
                n_retval = REPEAT_SEARCH;
849
 
850
        return n_retval;
851
}
852
 
853
/* Get free space of the left neighbor, which is stored in the parent
854
 * node of the left neighbor.  */
855
static int get_lfree(struct tree_balance *tb, int h)
856
{
857
        struct buffer_head *l, *f;
858
        int order;
859
 
860
        if ((f = PATH_H_PPARENT(tb->tb_path, h)) == 0 || (l = tb->FL[h]) == 0)
861
                return 0;
862
 
863
        if (f == l)
864
                order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) - 1;
865
        else {
866
                order = B_NR_ITEMS(l);
867
                f = l;
868
        }
869
 
870
        return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
871
}
872
 
873
/* Get free space of the right neighbor,
874
 * which is stored in the parent node of the right neighbor.
875
 */
876
static int get_rfree(struct tree_balance *tb, int h)
877
{
878
        struct buffer_head *r, *f;
879
        int order;
880
 
881
        if ((f = PATH_H_PPARENT(tb->tb_path, h)) == 0 || (r = tb->FR[h]) == 0)
882
                return 0;
883
 
884
        if (f == r)
885
                order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) + 1;
886
        else {
887
                order = 0;
888
                f = r;
889
        }
890
 
891
        return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
892
 
893
}
894
 
895
/* Check whether left neighbor is in memory. */
896
static int is_left_neighbor_in_cache(struct tree_balance *p_s_tb, int n_h)
897
{
898
        struct buffer_head *p_s_father, *left;
899
        struct super_block *p_s_sb = p_s_tb->tb_sb;
900
        b_blocknr_t n_left_neighbor_blocknr;
901
        int n_left_neighbor_position;
902
 
903
        if (!p_s_tb->FL[n_h])   /* Father of the left neighbor does not exist. */
904
                return 0;
905
 
906
        /* Calculate father of the node to be balanced. */
907
        p_s_father = PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1);
908
 
909
        RFALSE(!p_s_father ||
910
               !B_IS_IN_TREE(p_s_father) ||
911
               !B_IS_IN_TREE(p_s_tb->FL[n_h]) ||
912
               !buffer_uptodate(p_s_father) ||
913
               !buffer_uptodate(p_s_tb->FL[n_h]),
914
               "vs-8165: F[h] (%b) or FL[h] (%b) is invalid",
915
               p_s_father, p_s_tb->FL[n_h]);
916
 
917
        /* Get position of the pointer to the left neighbor into the left father. */
918
        n_left_neighbor_position = (p_s_father == p_s_tb->FL[n_h]) ?
919
            p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb->FL[n_h]);
920
        /* Get left neighbor block number. */
921
        n_left_neighbor_blocknr =
922
            B_N_CHILD_NUM(p_s_tb->FL[n_h], n_left_neighbor_position);
923
        /* Look for the left neighbor in the cache. */
924
        if ((left = sb_find_get_block(p_s_sb, n_left_neighbor_blocknr))) {
925
 
926
                RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left),
927
                       "vs-8170: left neighbor (%b %z) is not in the tree",
928
                       left, left);
929
                put_bh(left);
930
                return 1;
931
        }
932
 
933
        return 0;
934
}
935
 
936
#define LEFT_PARENTS  'l'
937
#define RIGHT_PARENTS 'r'
938
 
939
static void decrement_key(struct cpu_key *p_s_key)
940
{
941
        // call item specific function for this key
942
        item_ops[cpu_key_k_type(p_s_key)]->decrement_key(p_s_key);
943
}
944
 
945
/* Calculate far left/right parent of the left/right neighbor of the current node, that
946
 * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h].
947
 * Calculate left/right common parent of the current node and L[h]/R[h].
948
 * Calculate left/right delimiting key position.
949
 * Returns:     PATH_INCORRECT   - path in the tree is not correct;
950
                SCHEDULE_OCCURRED - schedule occurred while the function worked;
951
 *              CARRY_ON         - schedule didn't occur while the function worked;
952
 */
953
static int get_far_parent(struct tree_balance *p_s_tb,
954
                          int n_h,
955
                          struct buffer_head **pp_s_father,
956
                          struct buffer_head **pp_s_com_father, char c_lr_par)
957
{
958
        struct buffer_head *p_s_parent;
959
        INITIALIZE_PATH(s_path_to_neighbor_father);
960
        struct treepath *p_s_path = p_s_tb->tb_path;
961
        struct cpu_key s_lr_father_key;
962
        int n_counter,
963
            n_position = INT_MAX,
964
            n_first_last_position = 0,
965
            n_path_offset = PATH_H_PATH_OFFSET(p_s_path, n_h);
966
 
967
        /* Starting from F[n_h] go upwards in the tree, and look for the common
968
           ancestor of F[n_h], and its neighbor l/r, that should be obtained. */
969
 
970
        n_counter = n_path_offset;
971
 
972
        RFALSE(n_counter < FIRST_PATH_ELEMENT_OFFSET,
973
               "PAP-8180: invalid path length");
974
 
975
        for (; n_counter > FIRST_PATH_ELEMENT_OFFSET; n_counter--) {
976
                /* Check whether parent of the current buffer in the path is really parent in the tree. */
977
                if (!B_IS_IN_TREE
978
                    (p_s_parent = PATH_OFFSET_PBUFFER(p_s_path, n_counter - 1)))
979
                        return REPEAT_SEARCH;
980
                /* Check whether position in the parent is correct. */
981
                if ((n_position =
982
                     PATH_OFFSET_POSITION(p_s_path,
983
                                          n_counter - 1)) >
984
                    B_NR_ITEMS(p_s_parent))
985
                        return REPEAT_SEARCH;
986
                /* Check whether parent at the path really points to the child. */
987
                if (B_N_CHILD_NUM(p_s_parent, n_position) !=
988
                    PATH_OFFSET_PBUFFER(p_s_path, n_counter)->b_blocknr)
989
                        return REPEAT_SEARCH;
990
                /* Return delimiting key if position in the parent is not equal to first/last one. */
991
                if (c_lr_par == RIGHT_PARENTS)
992
                        n_first_last_position = B_NR_ITEMS(p_s_parent);
993
                if (n_position != n_first_last_position) {
994
                        *pp_s_com_father = p_s_parent;
995
                        get_bh(*pp_s_com_father);
996
                        /*(*pp_s_com_father = p_s_parent)->b_count++; */
997
                        break;
998
                }
999
        }
1000
 
1001
        /* if we are in the root of the tree, then there is no common father */
1002
        if (n_counter == FIRST_PATH_ELEMENT_OFFSET) {
1003
                /* Check whether first buffer in the path is the root of the tree. */
1004
                if (PATH_OFFSET_PBUFFER
1005
                    (p_s_tb->tb_path,
1006
                     FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
1007
                    SB_ROOT_BLOCK(p_s_tb->tb_sb)) {
1008
                        *pp_s_father = *pp_s_com_father = NULL;
1009
                        return CARRY_ON;
1010
                }
1011
                return REPEAT_SEARCH;
1012
        }
1013
 
1014
        RFALSE(B_LEVEL(*pp_s_com_father) <= DISK_LEAF_NODE_LEVEL,
1015
               "PAP-8185: (%b %z) level too small",
1016
               *pp_s_com_father, *pp_s_com_father);
1017
 
1018
        /* Check whether the common parent is locked. */
1019
 
1020
        if (buffer_locked(*pp_s_com_father)) {
1021
                __wait_on_buffer(*pp_s_com_father);
1022
                if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1023
                        decrement_bcount(*pp_s_com_father);
1024
                        return REPEAT_SEARCH;
1025
                }
1026
        }
1027
 
1028
        /* So, we got common parent of the current node and its left/right neighbor.
1029
           Now we are geting the parent of the left/right neighbor. */
1030
 
1031
        /* Form key to get parent of the left/right neighbor. */
1032
        le_key2cpu_key(&s_lr_father_key,
1033
                       B_N_PDELIM_KEY(*pp_s_com_father,
1034
                                      (c_lr_par ==
1035
                                       LEFT_PARENTS) ? (p_s_tb->lkey[n_h - 1] =
1036
                                                        n_position -
1037
                                                        1) : (p_s_tb->rkey[n_h -
1038
                                                                           1] =
1039
                                                              n_position)));
1040
 
1041
        if (c_lr_par == LEFT_PARENTS)
1042
                decrement_key(&s_lr_father_key);
1043
 
1044
        if (search_by_key
1045
            (p_s_tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father,
1046
             n_h + 1) == IO_ERROR)
1047
                // path is released
1048
                return IO_ERROR;
1049
 
1050
        if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1051
                decrement_counters_in_path(&s_path_to_neighbor_father);
1052
                decrement_bcount(*pp_s_com_father);
1053
                return REPEAT_SEARCH;
1054
        }
1055
 
1056
        *pp_s_father = PATH_PLAST_BUFFER(&s_path_to_neighbor_father);
1057
 
1058
        RFALSE(B_LEVEL(*pp_s_father) != n_h + 1,
1059
               "PAP-8190: (%b %z) level too small", *pp_s_father, *pp_s_father);
1060
        RFALSE(s_path_to_neighbor_father.path_length <
1061
               FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small");
1062
 
1063
        s_path_to_neighbor_father.path_length--;
1064
        decrement_counters_in_path(&s_path_to_neighbor_father);
1065
        return CARRY_ON;
1066
}
1067
 
1068
/* Get parents of neighbors of node in the path(S[n_path_offset]) and common parents of
1069
 * S[n_path_offset] and L[n_path_offset]/R[n_path_offset]: F[n_path_offset], FL[n_path_offset],
1070
 * FR[n_path_offset], CFL[n_path_offset], CFR[n_path_offset].
1071
 * Calculate numbers of left and right delimiting keys position: lkey[n_path_offset], rkey[n_path_offset].
1072
 * Returns:     SCHEDULE_OCCURRED - schedule occurred while the function worked;
1073
 *              CARRY_ON - schedule didn't occur while the function worked;
1074
 */
1075
static int get_parents(struct tree_balance *p_s_tb, int n_h)
1076
{
1077
        struct treepath *p_s_path = p_s_tb->tb_path;
1078
        int n_position,
1079
            n_ret_value,
1080
            n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
1081
        struct buffer_head *p_s_curf, *p_s_curcf;
1082
 
1083
        /* Current node is the root of the tree or will be root of the tree */
1084
        if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1085
                /* The root can not have parents.
1086
                   Release nodes which previously were obtained as parents of the current node neighbors. */
1087
                decrement_bcount(p_s_tb->FL[n_h]);
1088
                decrement_bcount(p_s_tb->CFL[n_h]);
1089
                decrement_bcount(p_s_tb->FR[n_h]);
1090
                decrement_bcount(p_s_tb->CFR[n_h]);
1091
                p_s_tb->FL[n_h] = p_s_tb->CFL[n_h] = p_s_tb->FR[n_h] =
1092
                    p_s_tb->CFR[n_h] = NULL;
1093
                return CARRY_ON;
1094
        }
1095
 
1096
        /* Get parent FL[n_path_offset] of L[n_path_offset]. */
1097
        if ((n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1))) {
1098
                /* Current node is not the first child of its parent. */
1099
                /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */
1100
                p_s_curf = p_s_curcf =
1101
                    PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
1102
                get_bh(p_s_curf);
1103
                get_bh(p_s_curf);
1104
                p_s_tb->lkey[n_h] = n_position - 1;
1105
        } else {
1106
                /* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node.
1107
                   Calculate current common parent of L[n_path_offset] and the current node. Note that
1108
                   CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset].
1109
                   Calculate lkey[n_path_offset]. */
1110
                if ((n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf,
1111
                                                  &p_s_curcf,
1112
                                                  LEFT_PARENTS)) != CARRY_ON)
1113
                        return n_ret_value;
1114
        }
1115
 
1116
        decrement_bcount(p_s_tb->FL[n_h]);
1117
        p_s_tb->FL[n_h] = p_s_curf;     /* New initialization of FL[n_h]. */
1118
        decrement_bcount(p_s_tb->CFL[n_h]);
1119
        p_s_tb->CFL[n_h] = p_s_curcf;   /* New initialization of CFL[n_h]. */
1120
 
1121
        RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) ||
1122
               (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)),
1123
               "PAP-8195: FL (%b) or CFL (%b) is invalid", p_s_curf, p_s_curcf);
1124
 
1125
/* Get parent FR[n_h] of R[n_h]. */
1126
 
1127
/* Current node is the last child of F[n_h]. FR[n_h] != F[n_h]. */
1128
        if (n_position == B_NR_ITEMS(PATH_H_PBUFFER(p_s_path, n_h + 1))) {
1129
/* Calculate current parent of R[n_h], which is the right neighbor of F[n_h].
1130
   Calculate current common parent of R[n_h] and current node. Note that CFR[n_h]
1131
   not equal FR[n_path_offset] and CFR[n_h] not equal F[n_h]. */
1132
                if ((n_ret_value =
1133
                     get_far_parent(p_s_tb, n_h + 1, &p_s_curf, &p_s_curcf,
1134
                                    RIGHT_PARENTS)) != CARRY_ON)
1135
                        return n_ret_value;
1136
        } else {
1137
/* Current node is not the last child of its parent F[n_h]. */
1138
                /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */
1139
                p_s_curf = p_s_curcf =
1140
                    PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
1141
                get_bh(p_s_curf);
1142
                get_bh(p_s_curf);
1143
                p_s_tb->rkey[n_h] = n_position;
1144
        }
1145
 
1146
        decrement_bcount(p_s_tb->FR[n_h]);
1147
        p_s_tb->FR[n_h] = p_s_curf;     /* New initialization of FR[n_path_offset]. */
1148
 
1149
        decrement_bcount(p_s_tb->CFR[n_h]);
1150
        p_s_tb->CFR[n_h] = p_s_curcf;   /* New initialization of CFR[n_path_offset]. */
1151
 
1152
        RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) ||
1153
               (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)),
1154
               "PAP-8205: FR (%b) or CFR (%b) is invalid", p_s_curf, p_s_curcf);
1155
 
1156
        return CARRY_ON;
1157
}
1158
 
1159
/* it is possible to remove node as result of shiftings to
1160
   neighbors even when we insert or paste item. */
1161
static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
1162
                                      struct tree_balance *tb, int h)
1163
{
1164
        struct buffer_head *Sh = PATH_H_PBUFFER(tb->tb_path, h);
1165
        int levbytes = tb->insert_size[h];
1166
        struct item_head *ih;
1167
        struct reiserfs_key *r_key = NULL;
1168
 
1169
        ih = B_N_PITEM_HEAD(Sh, 0);
1170
        if (tb->CFR[h])
1171
                r_key = B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]);
1172
 
1173
        if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes
1174
            /* shifting may merge items which might save space */
1175
            -
1176
            ((!h
1177
              && op_is_left_mergeable(&(ih->ih_key), Sh->b_size)) ? IH_SIZE : 0)
1178
            -
1179
            ((!h && r_key
1180
              && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0)
1181
            + ((h) ? KEY_SIZE : 0)) {
1182
                /* node can not be removed */
1183
                if (sfree >= levbytes) {        /* new item fits into node S[h] without any shifting */
1184
                        if (!h)
1185
                                tb->s0num =
1186
                                    B_NR_ITEMS(Sh) +
1187
                                    ((mode == M_INSERT) ? 1 : 0);
1188
                        set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1189
                        return NO_BALANCING_NEEDED;
1190
                }
1191
        }
1192
        PROC_INFO_INC(tb->tb_sb, can_node_be_removed[h]);
1193
        return !NO_BALANCING_NEEDED;
1194
}
1195
 
1196
/* Check whether current node S[h] is balanced when increasing its size by
1197
 * Inserting or Pasting.
1198
 * Calculate parameters for balancing for current level h.
1199
 * Parameters:
1200
 *      tb      tree_balance structure;
1201
 *      h       current level of the node;
1202
 *      inum    item number in S[h];
1203
 *      mode    i - insert, p - paste;
1204
 * Returns:     1 - schedule occurred;
1205
 *              0 - balancing for higher levels needed;
1206
 *             -1 - no balancing for higher levels needed;
1207
 *             -2 - no disk space.
1208
 */
1209
/* ip means Inserting or Pasting */
1210
static int ip_check_balance(struct tree_balance *tb, int h)
1211
{
1212
        struct virtual_node *vn = tb->tb_vn;
1213
        int levbytes,           /* Number of bytes that must be inserted into (value
1214
                                   is negative if bytes are deleted) buffer which
1215
                                   contains node being balanced.  The mnemonic is
1216
                                   that the attempted change in node space used level
1217
                                   is levbytes bytes. */
1218
         n_ret_value;
1219
 
1220
        int lfree, sfree, rfree /* free space in L, S and R */ ;
1221
 
1222
        /* nver is short for number of vertixes, and lnver is the number if
1223
           we shift to the left, rnver is the number if we shift to the
1224
           right, and lrnver is the number if we shift in both directions.
1225
           The goal is to minimize first the number of vertixes, and second,
1226
           the number of vertixes whose contents are changed by shifting,
1227
           and third the number of uncached vertixes whose contents are
1228
           changed by shifting and must be read from disk.  */
1229
        int nver, lnver, rnver, lrnver;
1230
 
1231
        /* used at leaf level only, S0 = S[0] is the node being balanced,
1232
           sInum [ I = 0,1,2 ] is the number of items that will
1233
           remain in node SI after balancing.  S1 and S2 are new
1234
           nodes that might be created. */
1235
 
1236
        /* we perform 8 calls to get_num_ver().  For each call we calculate five parameters.
1237
           where 4th parameter is s1bytes and 5th - s2bytes
1238
         */
1239
        short snum012[40] = { 0, };      /* s0num, s1num, s2num for 8 cases
1240
                                           0,1 - do not shift and do not shift but bottle
1241
                                           2 - shift only whole item to left
1242
                                           3 - shift to left and bottle as much as possible
1243
                                           4,5 - shift to right (whole items and as much as possible
1244
                                           6,7 - shift to both directions (whole items and as much as possible)
1245
                                         */
1246
 
1247
        /* Sh is the node whose balance is currently being checked */
1248
        struct buffer_head *Sh;
1249
 
1250
        Sh = PATH_H_PBUFFER(tb->tb_path, h);
1251
        levbytes = tb->insert_size[h];
1252
 
1253
        /* Calculate balance parameters for creating new root. */
1254
        if (!Sh) {
1255
                if (!h)
1256
                        reiserfs_panic(tb->tb_sb,
1257
                                       "vs-8210: ip_check_balance: S[0] can not be 0");
1258
                switch (n_ret_value = get_empty_nodes(tb, h)) {
1259
                case CARRY_ON:
1260
                        set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1261
                        return NO_BALANCING_NEEDED;     /* no balancing for higher levels needed */
1262
 
1263
                case NO_DISK_SPACE:
1264
                case REPEAT_SEARCH:
1265
                        return n_ret_value;
1266
                default:
1267
                        reiserfs_panic(tb->tb_sb,
1268
                                       "vs-8215: ip_check_balance: incorrect return value of get_empty_nodes");
1269
                }
1270
        }
1271
 
1272
        if ((n_ret_value = get_parents(tb, h)) != CARRY_ON)     /* get parents of S[h] neighbors. */
1273
                return n_ret_value;
1274
 
1275
        sfree = B_FREE_SPACE(Sh);
1276
 
1277
        /* get free space of neighbors */
1278
        rfree = get_rfree(tb, h);
1279
        lfree = get_lfree(tb, h);
1280
 
1281
        if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) ==
1282
            NO_BALANCING_NEEDED)
1283
                /* and new item fits into node S[h] without any shifting */
1284
                return NO_BALANCING_NEEDED;
1285
 
1286
        create_virtual_node(tb, h);
1287
 
1288
        /*
1289
           determine maximal number of items we can shift to the left neighbor (in tb structure)
1290
           and the maximal number of bytes that can flow to the left neighbor
1291
           from the left most liquid item that cannot be shifted from S[0] entirely (returned value)
1292
         */
1293
        check_left(tb, h, lfree);
1294
 
1295
        /*
1296
           determine maximal number of items we can shift to the right neighbor (in tb structure)
1297
           and the maximal number of bytes that can flow to the right neighbor
1298
           from the right most liquid item that cannot be shifted from S[0] entirely (returned value)
1299
         */
1300
        check_right(tb, h, rfree);
1301
 
1302
        /* all contents of internal node S[h] can be moved into its
1303
           neighbors, S[h] will be removed after balancing */
1304
        if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) {
1305
                int to_r;
1306
 
1307
                /* Since we are working on internal nodes, and our internal
1308
                   nodes have fixed size entries, then we can balance by the
1309
                   number of items rather than the space they consume.  In this
1310
                   routine we set the left node equal to the right node,
1311
                   allowing a difference of less than or equal to 1 child
1312
                   pointer. */
1313
                to_r =
1314
                    ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1315
                     vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
1316
                                                tb->rnum[h]);
1317
                set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
1318
                               -1, -1);
1319
                return CARRY_ON;
1320
        }
1321
 
1322
        /* this checks balance condition, that any two neighboring nodes can not fit in one node */
1323
        RFALSE(h &&
1324
               (tb->lnum[h] >= vn->vn_nr_item + 1 ||
1325
                tb->rnum[h] >= vn->vn_nr_item + 1),
1326
               "vs-8220: tree is not balanced on internal level");
1327
        RFALSE(!h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) ||
1328
                      (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))),
1329
               "vs-8225: tree is not balanced on leaf level");
1330
 
1331
        /* all contents of S[0] can be moved into its neighbors
1332
           S[0] will be removed after balancing. */
1333
        if (!h && is_leaf_removable(tb))
1334
                return CARRY_ON;
1335
 
1336
        /* why do we perform this check here rather than earlier??
1337
           Answer: we can win 1 node in some cases above. Moreover we
1338
           checked it above, when we checked, that S[0] is not removable
1339
           in principle */
1340
        if (sfree >= levbytes) {        /* new item fits into node S[h] without any shifting */
1341
                if (!h)
1342
                        tb->s0num = vn->vn_nr_item;
1343
                set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1344
                return NO_BALANCING_NEEDED;
1345
        }
1346
 
1347
        {
1348
                int lpar, rpar, nset, lset, rset, lrset;
1349
                /*
1350
                 * regular overflowing of the node
1351
                 */
1352
 
1353
                /* get_num_ver works in 2 modes (FLOW & NO_FLOW)
1354
                   lpar, rpar - number of items we can shift to left/right neighbor (including splitting item)
1355
                   nset, lset, rset, lrset - shows, whether flowing items give better packing
1356
                 */
1357
#define FLOW 1
1358
#define NO_FLOW 0               /* do not any splitting */
1359
 
1360
                /* we choose one the following */
1361
#define NOTHING_SHIFT_NO_FLOW   0
1362
#define NOTHING_SHIFT_FLOW      5
1363
#define LEFT_SHIFT_NO_FLOW      10
1364
#define LEFT_SHIFT_FLOW         15
1365
#define RIGHT_SHIFT_NO_FLOW     20
1366
#define RIGHT_SHIFT_FLOW        25
1367
#define LR_SHIFT_NO_FLOW        30
1368
#define LR_SHIFT_FLOW           35
1369
 
1370
                lpar = tb->lnum[h];
1371
                rpar = tb->rnum[h];
1372
 
1373
                /* calculate number of blocks S[h] must be split into when
1374
                   nothing is shifted to the neighbors,
1375
                   as well as number of items in each part of the split node (s012 numbers),
1376
                   and number of bytes (s1bytes) of the shared drop which flow to S1 if any */
1377
                nset = NOTHING_SHIFT_NO_FLOW;
1378
                nver = get_num_ver(vn->vn_mode, tb, h,
1379
                                   0, -1, h ? vn->vn_nr_item : 0, -1,
1380
                                   snum012, NO_FLOW);
1381
 
1382
                if (!h) {
1383
                        int nver1;
1384
 
1385
                        /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */
1386
                        nver1 = get_num_ver(vn->vn_mode, tb, h,
1387
                                            0, -1, 0, -1,
1388
                                            snum012 + NOTHING_SHIFT_FLOW, FLOW);
1389
                        if (nver > nver1)
1390
                                nset = NOTHING_SHIFT_FLOW, nver = nver1;
1391
                }
1392
 
1393
                /* calculate number of blocks S[h] must be split into when
1394
                   l_shift_num first items and l_shift_bytes of the right most
1395
                   liquid item to be shifted are shifted to the left neighbor,
1396
                   as well as number of items in each part of the splitted node (s012 numbers),
1397
                   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1398
                 */
1399
                lset = LEFT_SHIFT_NO_FLOW;
1400
                lnver = get_num_ver(vn->vn_mode, tb, h,
1401
                                    lpar - ((h || tb->lbytes == -1) ? 0 : 1),
1402
                                    -1, h ? vn->vn_nr_item : 0, -1,
1403
                                    snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW);
1404
                if (!h) {
1405
                        int lnver1;
1406
 
1407
                        lnver1 = get_num_ver(vn->vn_mode, tb, h,
1408
                                             lpar -
1409
                                             ((tb->lbytes != -1) ? 1 : 0),
1410
                                             tb->lbytes, 0, -1,
1411
                                             snum012 + LEFT_SHIFT_FLOW, FLOW);
1412
                        if (lnver > lnver1)
1413
                                lset = LEFT_SHIFT_FLOW, lnver = lnver1;
1414
                }
1415
 
1416
                /* calculate number of blocks S[h] must be split into when
1417
                   r_shift_num first items and r_shift_bytes of the left most
1418
                   liquid item to be shifted are shifted to the right neighbor,
1419
                   as well as number of items in each part of the splitted node (s012 numbers),
1420
                   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1421
                 */
1422
                rset = RIGHT_SHIFT_NO_FLOW;
1423
                rnver = get_num_ver(vn->vn_mode, tb, h,
1424
                                    0, -1,
1425
                                    h ? (vn->vn_nr_item - rpar) : (rpar -
1426
                                                                   ((tb->
1427
                                                                     rbytes !=
1428
                                                                     -1) ? 1 :
1429
                                                                    0)), -1,
1430
                                    snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW);
1431
                if (!h) {
1432
                        int rnver1;
1433
 
1434
                        rnver1 = get_num_ver(vn->vn_mode, tb, h,
1435
                                             0, -1,
1436
                                             (rpar -
1437
                                              ((tb->rbytes != -1) ? 1 : 0)),
1438
                                             tb->rbytes,
1439
                                             snum012 + RIGHT_SHIFT_FLOW, FLOW);
1440
 
1441
                        if (rnver > rnver1)
1442
                                rset = RIGHT_SHIFT_FLOW, rnver = rnver1;
1443
                }
1444
 
1445
                /* calculate number of blocks S[h] must be split into when
1446
                   items are shifted in both directions,
1447
                   as well as number of items in each part of the splitted node (s012 numbers),
1448
                   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1449
                 */
1450
                lrset = LR_SHIFT_NO_FLOW;
1451
                lrnver = get_num_ver(vn->vn_mode, tb, h,
1452
                                     lpar - ((h || tb->lbytes == -1) ? 0 : 1),
1453
                                     -1,
1454
                                     h ? (vn->vn_nr_item - rpar) : (rpar -
1455
                                                                    ((tb->
1456
                                                                      rbytes !=
1457
                                                                      -1) ? 1 :
1458
                                                                     0)), -1,
1459
                                     snum012 + LR_SHIFT_NO_FLOW, NO_FLOW);
1460
                if (!h) {
1461
                        int lrnver1;
1462
 
1463
                        lrnver1 = get_num_ver(vn->vn_mode, tb, h,
1464
                                              lpar -
1465
                                              ((tb->lbytes != -1) ? 1 : 0),
1466
                                              tb->lbytes,
1467
                                              (rpar -
1468
                                               ((tb->rbytes != -1) ? 1 : 0)),
1469
                                              tb->rbytes,
1470
                                              snum012 + LR_SHIFT_FLOW, FLOW);
1471
                        if (lrnver > lrnver1)
1472
                                lrset = LR_SHIFT_FLOW, lrnver = lrnver1;
1473
                }
1474
 
1475
                /* Our general shifting strategy is:
1476
                   1) to minimized number of new nodes;
1477
                   2) to minimized number of neighbors involved in shifting;
1478
                   3) to minimized number of disk reads; */
1479
 
1480
                /* we can win TWO or ONE nodes by shifting in both directions */
1481
                if (lrnver < lnver && lrnver < rnver) {
1482
                        RFALSE(h &&
1483
                               (tb->lnum[h] != 1 ||
1484
                                tb->rnum[h] != 1 ||
1485
                                lrnver != 1 || rnver != 2 || lnver != 2
1486
                                || h != 1), "vs-8230: bad h");
1487
                        if (lrset == LR_SHIFT_FLOW)
1488
                                set_parameters(tb, h, tb->lnum[h], tb->rnum[h],
1489
                                               lrnver, snum012 + lrset,
1490
                                               tb->lbytes, tb->rbytes);
1491
                        else
1492
                                set_parameters(tb, h,
1493
                                               tb->lnum[h] -
1494
                                               ((tb->lbytes == -1) ? 0 : 1),
1495
                                               tb->rnum[h] -
1496
                                               ((tb->rbytes == -1) ? 0 : 1),
1497
                                               lrnver, snum012 + lrset, -1, -1);
1498
 
1499
                        return CARRY_ON;
1500
                }
1501
 
1502
                /* if shifting doesn't lead to better packing then don't shift */
1503
                if (nver == lrnver) {
1504
                        set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1,
1505
                                       -1);
1506
                        return CARRY_ON;
1507
                }
1508
 
1509
                /* now we know that for better packing shifting in only one
1510
                   direction either to the left or to the right is required */
1511
 
1512
                /*  if shifting to the left is better than shifting to the right */
1513
                if (lnver < rnver) {
1514
                        SET_PAR_SHIFT_LEFT;
1515
                        return CARRY_ON;
1516
                }
1517
 
1518
                /* if shifting to the right is better than shifting to the left */
1519
                if (lnver > rnver) {
1520
                        SET_PAR_SHIFT_RIGHT;
1521
                        return CARRY_ON;
1522
                }
1523
 
1524
                /* now shifting in either direction gives the same number
1525
                   of nodes and we can make use of the cached neighbors */
1526
                if (is_left_neighbor_in_cache(tb, h)) {
1527
                        SET_PAR_SHIFT_LEFT;
1528
                        return CARRY_ON;
1529
                }
1530
 
1531
                /* shift to the right independently on whether the right neighbor in cache or not */
1532
                SET_PAR_SHIFT_RIGHT;
1533
                return CARRY_ON;
1534
        }
1535
}
1536
 
1537
/* Check whether current node S[h] is balanced when Decreasing its size by
1538
 * Deleting or Cutting for INTERNAL node of S+tree.
1539
 * Calculate parameters for balancing for current level h.
1540
 * Parameters:
1541
 *      tb      tree_balance structure;
1542
 *      h       current level of the node;
1543
 *      inum    item number in S[h];
1544
 *      mode    i - insert, p - paste;
1545
 * Returns:     1 - schedule occurred;
1546
 *              0 - balancing for higher levels needed;
1547
 *             -1 - no balancing for higher levels needed;
1548
 *             -2 - no disk space.
1549
 *
1550
 * Note: Items of internal nodes have fixed size, so the balance condition for
1551
 * the internal part of S+tree is as for the B-trees.
1552
 */
1553
static int dc_check_balance_internal(struct tree_balance *tb, int h)
1554
{
1555
        struct virtual_node *vn = tb->tb_vn;
1556
 
1557
        /* Sh is the node whose balance is currently being checked,
1558
           and Fh is its father.  */
1559
        struct buffer_head *Sh, *Fh;
1560
        int maxsize, n_ret_value;
1561
        int lfree, rfree /* free space in L and R */ ;
1562
 
1563
        Sh = PATH_H_PBUFFER(tb->tb_path, h);
1564
        Fh = PATH_H_PPARENT(tb->tb_path, h);
1565
 
1566
        maxsize = MAX_CHILD_SIZE(Sh);
1567
 
1568
/*   using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */
1569
/*   new_nr_item = number of items node would have if operation is */
1570
/*      performed without balancing (new_nr_item); */
1571
        create_virtual_node(tb, h);
1572
 
1573
        if (!Fh) {              /* S[h] is the root. */
1574
                if (vn->vn_nr_item > 0) {
1575
                        set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1576
                        return NO_BALANCING_NEEDED;     /* no balancing for higher levels needed */
1577
                }
1578
                /* new_nr_item == 0.
1579
                 * Current root will be deleted resulting in
1580
                 * decrementing the tree height. */
1581
                set_parameters(tb, h, 0, 0, 0, NULL, -1, -1);
1582
                return CARRY_ON;
1583
        }
1584
 
1585
        if ((n_ret_value = get_parents(tb, h)) != CARRY_ON)
1586
                return n_ret_value;
1587
 
1588
        /* get free space of neighbors */
1589
        rfree = get_rfree(tb, h);
1590
        lfree = get_lfree(tb, h);
1591
 
1592
        /* determine maximal number of items we can fit into neighbors */
1593
        check_left(tb, h, lfree);
1594
        check_right(tb, h, rfree);
1595
 
1596
        if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { /* Balance condition for the internal node is valid.
1597
                                                 * In this case we balance only if it leads to better packing. */
1598
                if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { /* Here we join S[h] with one of its neighbors,
1599
                                                         * which is impossible with greater values of new_nr_item. */
1600
                        if (tb->lnum[h] >= vn->vn_nr_item + 1) {
1601
                                /* All contents of S[h] can be moved to L[h]. */
1602
                                int n;
1603
                                int order_L;
1604
 
1605
                                order_L =
1606
                                    ((n =
1607
                                      PATH_H_B_ITEM_ORDER(tb->tb_path,
1608
                                                          h)) ==
1609
                                     0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1610
                                n = dc_size(B_N_CHILD(tb->FL[h], order_L)) /
1611
                                    (DC_SIZE + KEY_SIZE);
1612
                                set_parameters(tb, h, -n - 1, 0, 0, NULL, -1,
1613
                                               -1);
1614
                                return CARRY_ON;
1615
                        }
1616
 
1617
                        if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1618
                                /* All contents of S[h] can be moved to R[h]. */
1619
                                int n;
1620
                                int order_R;
1621
 
1622
                                order_R =
1623
                                    ((n =
1624
                                      PATH_H_B_ITEM_ORDER(tb->tb_path,
1625
                                                          h)) ==
1626
                                     B_NR_ITEMS(Fh)) ? 0 : n + 1;
1627
                                n = dc_size(B_N_CHILD(tb->FR[h], order_R)) /
1628
                                    (DC_SIZE + KEY_SIZE);
1629
                                set_parameters(tb, h, 0, -n - 1, 0, NULL, -1,
1630
                                               -1);
1631
                                return CARRY_ON;
1632
                        }
1633
                }
1634
 
1635
                if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1636
                        /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1637
                        int to_r;
1638
 
1639
                        to_r =
1640
                            ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] -
1641
                             tb->rnum[h] + vn->vn_nr_item + 1) / 2 -
1642
                            (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
1643
                        set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r,
1644
                                       0, NULL, -1, -1);
1645
                        return CARRY_ON;
1646
                }
1647
 
1648
                /* Balancing does not lead to better packing. */
1649
                set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1650
                return NO_BALANCING_NEEDED;
1651
        }
1652
 
1653
        /* Current node contain insufficient number of items. Balancing is required. */
1654
        /* Check whether we can merge S[h] with left neighbor. */
1655
        if (tb->lnum[h] >= vn->vn_nr_item + 1)
1656
                if (is_left_neighbor_in_cache(tb, h)
1657
                    || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) {
1658
                        int n;
1659
                        int order_L;
1660
 
1661
                        order_L =
1662
                            ((n =
1663
                              PATH_H_B_ITEM_ORDER(tb->tb_path,
1664
                                                  h)) ==
1665
                             0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1666
                        n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / (DC_SIZE +
1667
                                                                      KEY_SIZE);
1668
                        set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, -1);
1669
                        return CARRY_ON;
1670
                }
1671
 
1672
        /* Check whether we can merge S[h] with right neighbor. */
1673
        if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1674
                int n;
1675
                int order_R;
1676
 
1677
                order_R =
1678
                    ((n =
1679
                      PATH_H_B_ITEM_ORDER(tb->tb_path,
1680
                                          h)) == B_NR_ITEMS(Fh)) ? 0 : (n + 1);
1681
                n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / (DC_SIZE +
1682
                                                              KEY_SIZE);
1683
                set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, -1);
1684
                return CARRY_ON;
1685
        }
1686
 
1687
        /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1688
        if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1689
                int to_r;
1690
 
1691
                to_r =
1692
                    ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1693
                     vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
1694
                                                tb->rnum[h]);
1695
                set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
1696
                               -1, -1);
1697
                return CARRY_ON;
1698
        }
1699
 
1700
        /* For internal nodes try to borrow item from a neighbor */
1701
        RFALSE(!tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root");
1702
 
1703
        /* Borrow one or two items from caching neighbor */
1704
        if (is_left_neighbor_in_cache(tb, h) || !tb->FR[h]) {
1705
                int from_l;
1706
 
1707
                from_l =
1708
                    (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item +
1709
                     1) / 2 - (vn->vn_nr_item + 1);
1710
                set_parameters(tb, h, -from_l, 0, 1, NULL, -1, -1);
1711
                return CARRY_ON;
1712
        }
1713
 
1714
        set_parameters(tb, h, 0,
1715
                       -((MAX_NR_KEY(Sh) + 1 - tb->rnum[h] + vn->vn_nr_item +
1716
                          1) / 2 - (vn->vn_nr_item + 1)), 1, NULL, -1, -1);
1717
        return CARRY_ON;
1718
}
1719
 
1720
/* Check whether current node S[h] is balanced when Decreasing its size by
1721
 * Deleting or Truncating for LEAF node of S+tree.
1722
 * Calculate parameters for balancing for current level h.
1723
 * Parameters:
1724
 *      tb      tree_balance structure;
1725
 *      h       current level of the node;
1726
 *      inum    item number in S[h];
1727
 *      mode    i - insert, p - paste;
1728
 * Returns:     1 - schedule occurred;
1729
 *              0 - balancing for higher levels needed;
1730
 *             -1 - no balancing for higher levels needed;
1731
 *             -2 - no disk space.
1732
 */
1733
static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1734
{
1735
        struct virtual_node *vn = tb->tb_vn;
1736
 
1737
        /* Number of bytes that must be deleted from
1738
           (value is negative if bytes are deleted) buffer which
1739
           contains node being balanced.  The mnemonic is that the
1740
           attempted change in node space used level is levbytes bytes. */
1741
        int levbytes;
1742
        /* the maximal item size */
1743
        int maxsize, n_ret_value;
1744
        /* S0 is the node whose balance is currently being checked,
1745
           and F0 is its father.  */
1746
        struct buffer_head *S0, *F0;
1747
        int lfree, rfree /* free space in L and R */ ;
1748
 
1749
        S0 = PATH_H_PBUFFER(tb->tb_path, 0);
1750
        F0 = PATH_H_PPARENT(tb->tb_path, 0);
1751
 
1752
        levbytes = tb->insert_size[h];
1753
 
1754
        maxsize = MAX_CHILD_SIZE(S0);   /* maximal possible size of an item */
1755
 
1756
        if (!F0) {              /* S[0] is the root now. */
1757
 
1758
                RFALSE(-levbytes >= maxsize - B_FREE_SPACE(S0),
1759
                       "vs-8240: attempt to create empty buffer tree");
1760
 
1761
                set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1762
                return NO_BALANCING_NEEDED;
1763
        }
1764
 
1765
        if ((n_ret_value = get_parents(tb, h)) != CARRY_ON)
1766
                return n_ret_value;
1767
 
1768
        /* get free space of neighbors */
1769
        rfree = get_rfree(tb, h);
1770
        lfree = get_lfree(tb, h);
1771
 
1772
        create_virtual_node(tb, h);
1773
 
1774
        /* if 3 leaves can be merge to one, set parameters and return */
1775
        if (are_leaves_removable(tb, lfree, rfree))
1776
                return CARRY_ON;
1777
 
1778
        /* determine maximal number of items we can shift to the left/right  neighbor
1779
           and the maximal number of bytes that can flow to the left/right neighbor
1780
           from the left/right most liquid item that cannot be shifted from S[0] entirely
1781
         */
1782
        check_left(tb, h, lfree);
1783
        check_right(tb, h, rfree);
1784
 
1785
        /* check whether we can merge S with left neighbor. */
1786
        if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1)
1787
                if (is_left_neighbor_in_cache(tb, h) || ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) ||        /* S can not be merged with R */
1788
                    !tb->FR[h]) {
1789
 
1790
                        RFALSE(!tb->FL[h],
1791
                               "vs-8245: dc_check_balance_leaf: FL[h] must exist");
1792
 
1793
                        /* set parameter to merge S[0] with its left neighbor */
1794
                        set_parameters(tb, h, -1, 0, 0, NULL, -1, -1);
1795
                        return CARRY_ON;
1796
                }
1797
 
1798
        /* check whether we can merge S[0] with right neighbor. */
1799
        if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) {
1800
                set_parameters(tb, h, 0, -1, 0, NULL, -1, -1);
1801
                return CARRY_ON;
1802
        }
1803
 
1804
        /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */
1805
        if (is_leaf_removable(tb))
1806
                return CARRY_ON;
1807
 
1808
        /* Balancing is not required. */
1809
        tb->s0num = vn->vn_nr_item;
1810
        set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1811
        return NO_BALANCING_NEEDED;
1812
}
1813
 
1814
/* Check whether current node S[h] is balanced when Decreasing its size by
1815
 * Deleting or Cutting.
1816
 * Calculate parameters for balancing for current level h.
1817
 * Parameters:
1818
 *      tb      tree_balance structure;
1819
 *      h       current level of the node;
1820
 *      inum    item number in S[h];
1821
 *      mode    d - delete, c - cut.
1822
 * Returns:     1 - schedule occurred;
1823
 *              0 - balancing for higher levels needed;
1824
 *             -1 - no balancing for higher levels needed;
1825
 *             -2 - no disk space.
1826
 */
1827
static int dc_check_balance(struct tree_balance *tb, int h)
1828
{
1829
        RFALSE(!(PATH_H_PBUFFER(tb->tb_path, h)),
1830
               "vs-8250: S is not initialized");
1831
 
1832
        if (h)
1833
                return dc_check_balance_internal(tb, h);
1834
        else
1835
                return dc_check_balance_leaf(tb, h);
1836
}
1837
 
1838
/* Check whether current node S[h] is balanced.
1839
 * Calculate parameters for balancing for current level h.
1840
 * Parameters:
1841
 *
1842
 *      tb      tree_balance structure:
1843
 *
1844
 *              tb is a large structure that must be read about in the header file
1845
 *              at the same time as this procedure if the reader is to successfully
1846
 *              understand this procedure
1847
 *
1848
 *      h       current level of the node;
1849
 *      inum    item number in S[h];
1850
 *      mode    i - insert, p - paste, d - delete, c - cut.
1851
 * Returns:     1 - schedule occurred;
1852
 *              0 - balancing for higher levels needed;
1853
 *             -1 - no balancing for higher levels needed;
1854
 *             -2 - no disk space.
1855
 */
1856
static int check_balance(int mode,
1857
                         struct tree_balance *tb,
1858
                         int h,
1859
                         int inum,
1860
                         int pos_in_item,
1861
                         struct item_head *ins_ih, const void *data)
1862
{
1863
        struct virtual_node *vn;
1864
 
1865
        vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf);
1866
        vn->vn_free_ptr = (char *)(tb->tb_vn + 1);
1867
        vn->vn_mode = mode;
1868
        vn->vn_affected_item_num = inum;
1869
        vn->vn_pos_in_item = pos_in_item;
1870
        vn->vn_ins_ih = ins_ih;
1871
        vn->vn_data = data;
1872
 
1873
        RFALSE(mode == M_INSERT && !vn->vn_ins_ih,
1874
               "vs-8255: ins_ih can not be 0 in insert mode");
1875
 
1876
        if (tb->insert_size[h] > 0)
1877
                /* Calculate balance parameters when size of node is increasing. */
1878
                return ip_check_balance(tb, h);
1879
 
1880
        /* Calculate balance parameters when  size of node is decreasing. */
1881
        return dc_check_balance(tb, h);
1882
}
1883
 
1884
/* Check whether parent at the path is the really parent of the current node.*/
1885
static int get_direct_parent(struct tree_balance *p_s_tb, int n_h)
1886
{
1887
        struct buffer_head *p_s_bh;
1888
        struct treepath *p_s_path = p_s_tb->tb_path;
1889
        int n_position,
1890
            n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
1891
 
1892
        /* We are in the root or in the new root. */
1893
        if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1894
 
1895
                RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET - 1,
1896
                       "PAP-8260: invalid offset in the path");
1897
 
1898
                if (PATH_OFFSET_PBUFFER(p_s_path, FIRST_PATH_ELEMENT_OFFSET)->
1899
                    b_blocknr == SB_ROOT_BLOCK(p_s_tb->tb_sb)) {
1900
                        /* Root is not changed. */
1901
                        PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1) = NULL;
1902
                        PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1) = 0;
1903
                        return CARRY_ON;
1904
                }
1905
                return REPEAT_SEARCH;   /* Root is changed and we must recalculate the path. */
1906
        }
1907
 
1908
        if (!B_IS_IN_TREE
1909
            (p_s_bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1)))
1910
                return REPEAT_SEARCH;   /* Parent in the path is not in the tree. */
1911
 
1912
        if ((n_position =
1913
             PATH_OFFSET_POSITION(p_s_path,
1914
                                  n_path_offset - 1)) > B_NR_ITEMS(p_s_bh))
1915
                return REPEAT_SEARCH;
1916
 
1917
        if (B_N_CHILD_NUM(p_s_bh, n_position) !=
1918
            PATH_OFFSET_PBUFFER(p_s_path, n_path_offset)->b_blocknr)
1919
                /* Parent in the path is not parent of the current node in the tree. */
1920
                return REPEAT_SEARCH;
1921
 
1922
        if (buffer_locked(p_s_bh)) {
1923
                __wait_on_buffer(p_s_bh);
1924
                if (FILESYSTEM_CHANGED_TB(p_s_tb))
1925
                        return REPEAT_SEARCH;
1926
        }
1927
 
1928
        return CARRY_ON;        /* Parent in the path is unlocked and really parent of the current node.  */
1929
}
1930
 
1931
/* Using lnum[n_h] and rnum[n_h] we should determine what neighbors
1932
 * of S[n_h] we
1933
 * need in order to balance S[n_h], and get them if necessary.
1934
 * Returns:     SCHEDULE_OCCURRED - schedule occurred while the function worked;
1935
 *              CARRY_ON - schedule didn't occur while the function worked;
1936
 */
1937
static int get_neighbors(struct tree_balance *p_s_tb, int n_h)
1938
{
1939
        int n_child_position,
1940
            n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1);
1941
        unsigned long n_son_number;
1942
        struct super_block *p_s_sb = p_s_tb->tb_sb;
1943
        struct buffer_head *p_s_bh;
1944
 
1945
        PROC_INFO_INC(p_s_sb, get_neighbors[n_h]);
1946
 
1947
        if (p_s_tb->lnum[n_h]) {
1948
                /* We need left neighbor to balance S[n_h]. */
1949
                PROC_INFO_INC(p_s_sb, need_l_neighbor[n_h]);
1950
                p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
1951
 
1952
                RFALSE(p_s_bh == p_s_tb->FL[n_h] &&
1953
                       !PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset),
1954
                       "PAP-8270: invalid position in the parent");
1955
 
1956
                n_child_position =
1957
                    (p_s_bh ==
1958
                     p_s_tb->FL[n_h]) ? p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb->
1959
                                                                       FL[n_h]);
1960
                n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position);
1961
                p_s_bh = sb_bread(p_s_sb, n_son_number);
1962
                if (!p_s_bh)
1963
                        return IO_ERROR;
1964
                if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1965
                        decrement_bcount(p_s_bh);
1966
                        PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]);
1967
                        return REPEAT_SEARCH;
1968
                }
1969
 
1970
                RFALSE(!B_IS_IN_TREE(p_s_tb->FL[n_h]) ||
1971
                       n_child_position > B_NR_ITEMS(p_s_tb->FL[n_h]) ||
1972
                       B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position) !=
1973
                       p_s_bh->b_blocknr, "PAP-8275: invalid parent");
1974
                RFALSE(!B_IS_IN_TREE(p_s_bh), "PAP-8280: invalid child");
1975
                RFALSE(!n_h &&
1976
                       B_FREE_SPACE(p_s_bh) !=
1977
                       MAX_CHILD_SIZE(p_s_bh) -
1978
                       dc_size(B_N_CHILD(p_s_tb->FL[0], n_child_position)),
1979
                       "PAP-8290: invalid child size of left neighbor");
1980
 
1981
                decrement_bcount(p_s_tb->L[n_h]);
1982
                p_s_tb->L[n_h] = p_s_bh;
1983
        }
1984
 
1985
        if (p_s_tb->rnum[n_h]) {        /* We need right neighbor to balance S[n_path_offset]. */
1986
                PROC_INFO_INC(p_s_sb, need_r_neighbor[n_h]);
1987
                p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
1988
 
1989
                RFALSE(p_s_bh == p_s_tb->FR[n_h] &&
1990
                       PATH_OFFSET_POSITION(p_s_tb->tb_path,
1991
                                            n_path_offset) >=
1992
                       B_NR_ITEMS(p_s_bh),
1993
                       "PAP-8295: invalid position in the parent");
1994
 
1995
                n_child_position =
1996
                    (p_s_bh == p_s_tb->FR[n_h]) ? p_s_tb->rkey[n_h] + 1 : 0;
1997
                n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position);
1998
                p_s_bh = sb_bread(p_s_sb, n_son_number);
1999
                if (!p_s_bh)
2000
                        return IO_ERROR;
2001
                if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
2002
                        decrement_bcount(p_s_bh);
2003
                        PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]);
2004
                        return REPEAT_SEARCH;
2005
                }
2006
                decrement_bcount(p_s_tb->R[n_h]);
2007
                p_s_tb->R[n_h] = p_s_bh;
2008
 
2009
                RFALSE(!n_h
2010
                       && B_FREE_SPACE(p_s_bh) !=
2011
                       MAX_CHILD_SIZE(p_s_bh) -
2012
                       dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position)),
2013
                       "PAP-8300: invalid child size of right neighbor (%d != %d - %d)",
2014
                       B_FREE_SPACE(p_s_bh), MAX_CHILD_SIZE(p_s_bh),
2015
                       dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position)));
2016
 
2017
        }
2018
        return CARRY_ON;
2019
}
2020
 
2021
static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh)
2022
{
2023
        int max_num_of_items;
2024
        int max_num_of_entries;
2025
        unsigned long blocksize = sb->s_blocksize;
2026
 
2027
#define MIN_NAME_LEN 1
2028
 
2029
        max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN);
2030
        max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) /
2031
            (DEH_SIZE + MIN_NAME_LEN);
2032
 
2033
        return sizeof(struct virtual_node) +
2034
            max(max_num_of_items * sizeof(struct virtual_item),
2035
                sizeof(struct virtual_item) + sizeof(struct direntry_uarea) +
2036
                (max_num_of_entries - 1) * sizeof(__u16));
2037
}
2038
 
2039
/* maybe we should fail balancing we are going to perform when kmalloc
2040
   fails several times. But now it will loop until kmalloc gets
2041
   required memory */
2042
static int get_mem_for_virtual_node(struct tree_balance *tb)
2043
{
2044
        int check_fs = 0;
2045
        int size;
2046
        char *buf;
2047
 
2048
        size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path));
2049
 
2050
        if (size > tb->vn_buf_size) {
2051
                /* we have to allocate more memory for virtual node */
2052
                if (tb->vn_buf) {
2053
                        /* free memory allocated before */
2054
                        kfree(tb->vn_buf);
2055
                        /* this is not needed if kfree is atomic */
2056
                        check_fs = 1;
2057
                }
2058
 
2059
                /* virtual node requires now more memory */
2060
                tb->vn_buf_size = size;
2061
 
2062
                /* get memory for virtual item */
2063
                buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN);
2064
                if (!buf) {
2065
                        /* getting memory with GFP_KERNEL priority may involve
2066
                           balancing now (due to indirect_to_direct conversion on
2067
                           dcache shrinking). So, release path and collected
2068
                           resources here */
2069
                        free_buffers_in_tb(tb);
2070
                        buf = kmalloc(size, GFP_NOFS);
2071
                        if (!buf) {
2072
                                tb->vn_buf_size = 0;
2073
                        }
2074
                        tb->vn_buf = buf;
2075
                        schedule();
2076
                        return REPEAT_SEARCH;
2077
                }
2078
 
2079
                tb->vn_buf = buf;
2080
        }
2081
 
2082
        if (check_fs && FILESYSTEM_CHANGED_TB(tb))
2083
                return REPEAT_SEARCH;
2084
 
2085
        return CARRY_ON;
2086
}
2087
 
2088
#ifdef CONFIG_REISERFS_CHECK
2089
static void tb_buffer_sanity_check(struct super_block *p_s_sb,
2090
                                   struct buffer_head *p_s_bh,
2091
                                   const char *descr, int level)
2092
{
2093
        if (p_s_bh) {
2094
                if (atomic_read(&(p_s_bh->b_count)) <= 0) {
2095
 
2096
                        reiserfs_panic(p_s_sb,
2097
                                       "jmacd-1: tb_buffer_sanity_check(): negative or zero reference counter for buffer %s[%d] (%b)\n",
2098
                                       descr, level, p_s_bh);
2099
                }
2100
 
2101
                if (!buffer_uptodate(p_s_bh)) {
2102
                        reiserfs_panic(p_s_sb,
2103
                                       "jmacd-2: tb_buffer_sanity_check(): buffer is not up to date %s[%d] (%b)\n",
2104
                                       descr, level, p_s_bh);
2105
                }
2106
 
2107
                if (!B_IS_IN_TREE(p_s_bh)) {
2108
                        reiserfs_panic(p_s_sb,
2109
                                       "jmacd-3: tb_buffer_sanity_check(): buffer is not in tree %s[%d] (%b)\n",
2110
                                       descr, level, p_s_bh);
2111
                }
2112
 
2113
                if (p_s_bh->b_bdev != p_s_sb->s_bdev) {
2114
                        reiserfs_panic(p_s_sb,
2115
                                       "jmacd-4: tb_buffer_sanity_check(): buffer has wrong device %s[%d] (%b)\n",
2116
                                       descr, level, p_s_bh);
2117
                }
2118
 
2119
                if (p_s_bh->b_size != p_s_sb->s_blocksize) {
2120
                        reiserfs_panic(p_s_sb,
2121
                                       "jmacd-5: tb_buffer_sanity_check(): buffer has wrong blocksize %s[%d] (%b)\n",
2122
                                       descr, level, p_s_bh);
2123
                }
2124
 
2125
                if (p_s_bh->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
2126
                        reiserfs_panic(p_s_sb,
2127
                                       "jmacd-6: tb_buffer_sanity_check(): buffer block number too high %s[%d] (%b)\n",
2128
                                       descr, level, p_s_bh);
2129
                }
2130
        }
2131
}
2132
#else
2133
static void tb_buffer_sanity_check(struct super_block *p_s_sb,
2134
                                   struct buffer_head *p_s_bh,
2135
                                   const char *descr, int level)
2136
{;
2137
}
2138
#endif
2139
 
2140
static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh)
2141
{
2142
        return reiserfs_prepare_for_journal(s, bh, 0);
2143
}
2144
 
2145
static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb)
2146
{
2147
        struct buffer_head *locked;
2148
#ifdef CONFIG_REISERFS_CHECK
2149
        int repeat_counter = 0;
2150
#endif
2151
        int i;
2152
 
2153
        do {
2154
 
2155
                locked = NULL;
2156
 
2157
                for (i = p_s_tb->tb_path->path_length;
2158
                     !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) {
2159
                        if (PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) {
2160
                                /* if I understand correctly, we can only be sure the last buffer
2161
                                 ** in the path is in the tree --clm
2162
                                 */
2163
#ifdef CONFIG_REISERFS_CHECK
2164
                                if (PATH_PLAST_BUFFER(p_s_tb->tb_path) ==
2165
                                    PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) {
2166
                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
2167
                                                               PATH_OFFSET_PBUFFER
2168
                                                               (p_s_tb->tb_path,
2169
                                                                i), "S",
2170
                                                               p_s_tb->tb_path->
2171
                                                               path_length - i);
2172
                                }
2173
#endif
2174
                                if (!clear_all_dirty_bits(p_s_tb->tb_sb,
2175
                                                          PATH_OFFSET_PBUFFER
2176
                                                          (p_s_tb->tb_path,
2177
                                                           i))) {
2178
                                        locked =
2179
                                            PATH_OFFSET_PBUFFER(p_s_tb->tb_path,
2180
                                                                i);
2181
                                }
2182
                        }
2183
                }
2184
 
2185
                for (i = 0; !locked && i < MAX_HEIGHT && p_s_tb->insert_size[i];
2186
                     i++) {
2187
 
2188
                        if (p_s_tb->lnum[i]) {
2189
 
2190
                                if (p_s_tb->L[i]) {
2191
                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
2192
                                                               p_s_tb->L[i],
2193
                                                               "L", i);
2194
                                        if (!clear_all_dirty_bits
2195
                                            (p_s_tb->tb_sb, p_s_tb->L[i]))
2196
                                                locked = p_s_tb->L[i];
2197
                                }
2198
 
2199
                                if (!locked && p_s_tb->FL[i]) {
2200
                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
2201
                                                               p_s_tb->FL[i],
2202
                                                               "FL", i);
2203
                                        if (!clear_all_dirty_bits
2204
                                            (p_s_tb->tb_sb, p_s_tb->FL[i]))
2205
                                                locked = p_s_tb->FL[i];
2206
                                }
2207
 
2208
                                if (!locked && p_s_tb->CFL[i]) {
2209
                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
2210
                                                               p_s_tb->CFL[i],
2211
                                                               "CFL", i);
2212
                                        if (!clear_all_dirty_bits
2213
                                            (p_s_tb->tb_sb, p_s_tb->CFL[i]))
2214
                                                locked = p_s_tb->CFL[i];
2215
                                }
2216
 
2217
                        }
2218
 
2219
                        if (!locked && (p_s_tb->rnum[i])) {
2220
 
2221
                                if (p_s_tb->R[i]) {
2222
                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
2223
                                                               p_s_tb->R[i],
2224
                                                               "R", i);
2225
                                        if (!clear_all_dirty_bits
2226
                                            (p_s_tb->tb_sb, p_s_tb->R[i]))
2227
                                                locked = p_s_tb->R[i];
2228
                                }
2229
 
2230
                                if (!locked && p_s_tb->FR[i]) {
2231
                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
2232
                                                               p_s_tb->FR[i],
2233
                                                               "FR", i);
2234
                                        if (!clear_all_dirty_bits
2235
                                            (p_s_tb->tb_sb, p_s_tb->FR[i]))
2236
                                                locked = p_s_tb->FR[i];
2237
                                }
2238
 
2239
                                if (!locked && p_s_tb->CFR[i]) {
2240
                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
2241
                                                               p_s_tb->CFR[i],
2242
                                                               "CFR", i);
2243
                                        if (!clear_all_dirty_bits
2244
                                            (p_s_tb->tb_sb, p_s_tb->CFR[i]))
2245
                                                locked = p_s_tb->CFR[i];
2246
                                }
2247
                        }
2248
                }
2249
                /* as far as I can tell, this is not required.  The FEB list seems
2250
                 ** to be full of newly allocated nodes, which will never be locked,
2251
                 ** dirty, or anything else.
2252
                 ** To be safe, I'm putting in the checks and waits in.  For the moment,
2253
                 ** they are needed to keep the code in journal.c from complaining
2254
                 ** about the buffer.  That code is inside CONFIG_REISERFS_CHECK as well.
2255
                 ** --clm
2256
                 */
2257
                for (i = 0; !locked && i < MAX_FEB_SIZE; i++) {
2258
                        if (p_s_tb->FEB[i]) {
2259
                                if (!clear_all_dirty_bits
2260
                                    (p_s_tb->tb_sb, p_s_tb->FEB[i]))
2261
                                        locked = p_s_tb->FEB[i];
2262
                        }
2263
                }
2264
 
2265
                if (locked) {
2266
#ifdef CONFIG_REISERFS_CHECK
2267
                        repeat_counter++;
2268
                        if ((repeat_counter % 10000) == 0) {
2269
                                reiserfs_warning(p_s_tb->tb_sb,
2270
                                                 "wait_tb_buffers_until_released(): too many "
2271
                                                 "iterations waiting for buffer to unlock "
2272
                                                 "(%b)", locked);
2273
 
2274
                                /* Don't loop forever.  Try to recover from possible error. */
2275
 
2276
                                return (FILESYSTEM_CHANGED_TB(p_s_tb)) ?
2277
                                    REPEAT_SEARCH : CARRY_ON;
2278
                        }
2279
#endif
2280
                        __wait_on_buffer(locked);
2281
                        if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
2282
                                return REPEAT_SEARCH;
2283
                        }
2284
                }
2285
 
2286
        } while (locked);
2287
 
2288
        return CARRY_ON;
2289
}
2290
 
2291
/* Prepare for balancing, that is
2292
 *      get all necessary parents, and neighbors;
2293
 *      analyze what and where should be moved;
2294
 *      get sufficient number of new nodes;
2295
 * Balancing will start only after all resources will be collected at a time.
2296
 *
2297
 * When ported to SMP kernels, only at the last moment after all needed nodes
2298
 * are collected in cache, will the resources be locked using the usual
2299
 * textbook ordered lock acquisition algorithms.  Note that ensuring that
2300
 * this code neither write locks what it does not need to write lock nor locks out of order
2301
 * will be a pain in the butt that could have been avoided.  Grumble grumble. -Hans
2302
 *
2303
 * fix is meant in the sense of render unchanging
2304
 *
2305
 * Latency might be improved by first gathering a list of what buffers are needed
2306
 * and then getting as many of them in parallel as possible? -Hans
2307
 *
2308
 * Parameters:
2309
 *      op_mode i - insert, d - delete, c - cut (truncate), p - paste (append)
2310
 *      tb      tree_balance structure;
2311
 *      inum    item number in S[h];
2312
 *      pos_in_item - comment this if you can
2313
 *      ins_ih & ins_sd are used when inserting
2314
 * Returns:     1 - schedule occurred while the function worked;
2315
 *              0 - schedule didn't occur while the function worked;
2316
 *             -1 - if no_disk_space
2317
 */
2318
 
2319
int fix_nodes(int n_op_mode, struct tree_balance *p_s_tb, struct item_head *p_s_ins_ih, // item head of item being inserted
2320
              const void *data  // inserted item or data to be pasted
2321
    )
2322
{
2323
        int n_ret_value, n_h, n_item_num = PATH_LAST_POSITION(p_s_tb->tb_path);
2324
        int n_pos_in_item;
2325
 
2326
        /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared
2327
         ** during wait_tb_buffers_run
2328
         */
2329
        int wait_tb_buffers_run = 0;
2330
        struct buffer_head *p_s_tbS0 = PATH_PLAST_BUFFER(p_s_tb->tb_path);
2331
 
2332
        ++REISERFS_SB(p_s_tb->tb_sb)->s_fix_nodes;
2333
 
2334
        n_pos_in_item = p_s_tb->tb_path->pos_in_item;
2335
 
2336
        p_s_tb->fs_gen = get_generation(p_s_tb->tb_sb);
2337
 
2338
        /* we prepare and log the super here so it will already be in the
2339
         ** transaction when do_balance needs to change it.
2340
         ** This way do_balance won't have to schedule when trying to prepare
2341
         ** the super for logging
2342
         */
2343
        reiserfs_prepare_for_journal(p_s_tb->tb_sb,
2344
                                     SB_BUFFER_WITH_SB(p_s_tb->tb_sb), 1);
2345
        journal_mark_dirty(p_s_tb->transaction_handle, p_s_tb->tb_sb,
2346
                           SB_BUFFER_WITH_SB(p_s_tb->tb_sb));
2347
        if (FILESYSTEM_CHANGED_TB(p_s_tb))
2348
                return REPEAT_SEARCH;
2349
 
2350
        /* if it possible in indirect_to_direct conversion */
2351
        if (buffer_locked(p_s_tbS0)) {
2352
                __wait_on_buffer(p_s_tbS0);
2353
                if (FILESYSTEM_CHANGED_TB(p_s_tb))
2354
                        return REPEAT_SEARCH;
2355
        }
2356
#ifdef CONFIG_REISERFS_CHECK
2357
        if (cur_tb) {
2358
                print_cur_tb("fix_nodes");
2359
                reiserfs_panic(p_s_tb->tb_sb,
2360
                               "PAP-8305: fix_nodes:  there is pending do_balance");
2361
        }
2362
 
2363
        if (!buffer_uptodate(p_s_tbS0) || !B_IS_IN_TREE(p_s_tbS0)) {
2364
                reiserfs_panic(p_s_tb->tb_sb,
2365
                               "PAP-8320: fix_nodes: S[0] (%b %z) is not uptodate "
2366
                               "at the beginning of fix_nodes or not in tree (mode %c)",
2367
                               p_s_tbS0, p_s_tbS0, n_op_mode);
2368
        }
2369
 
2370
        /* Check parameters. */
2371
        switch (n_op_mode) {
2372
        case M_INSERT:
2373
                if (n_item_num <= 0 || n_item_num > B_NR_ITEMS(p_s_tbS0))
2374
                        reiserfs_panic(p_s_tb->tb_sb,
2375
                                       "PAP-8330: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert",
2376
                                       n_item_num, B_NR_ITEMS(p_s_tbS0));
2377
                break;
2378
        case M_PASTE:
2379
        case M_DELETE:
2380
        case M_CUT:
2381
                if (n_item_num < 0 || n_item_num >= B_NR_ITEMS(p_s_tbS0)) {
2382
                        print_block(p_s_tbS0, 0, -1, -1);
2383
                        reiserfs_panic(p_s_tb->tb_sb,
2384
                                       "PAP-8335: fix_nodes: Incorrect item number(%d); mode = %c insert_size = %d\n",
2385
                                       n_item_num, n_op_mode,
2386
                                       p_s_tb->insert_size[0]);
2387
                }
2388
                break;
2389
        default:
2390
                reiserfs_panic(p_s_tb->tb_sb,
2391
                               "PAP-8340: fix_nodes: Incorrect mode of operation");
2392
        }
2393
#endif
2394
 
2395
        if (get_mem_for_virtual_node(p_s_tb) == REPEAT_SEARCH)
2396
                // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat
2397
                return REPEAT_SEARCH;
2398
 
2399
        /* Starting from the leaf level; for all levels n_h of the tree. */
2400
        for (n_h = 0; n_h < MAX_HEIGHT && p_s_tb->insert_size[n_h]; n_h++) {
2401
                if ((n_ret_value = get_direct_parent(p_s_tb, n_h)) != CARRY_ON) {
2402
                        goto repeat;
2403
                }
2404
 
2405
                if ((n_ret_value =
2406
                     check_balance(n_op_mode, p_s_tb, n_h, n_item_num,
2407
                                   n_pos_in_item, p_s_ins_ih,
2408
                                   data)) != CARRY_ON) {
2409
                        if (n_ret_value == NO_BALANCING_NEEDED) {
2410
                                /* No balancing for higher levels needed. */
2411
                                if ((n_ret_value =
2412
                                     get_neighbors(p_s_tb, n_h)) != CARRY_ON) {
2413
                                        goto repeat;
2414
                                }
2415
                                if (n_h != MAX_HEIGHT - 1)
2416
                                        p_s_tb->insert_size[n_h + 1] = 0;
2417
                                /* ok, analysis and resource gathering are complete */
2418
                                break;
2419
                        }
2420
                        goto repeat;
2421
                }
2422
 
2423
                if ((n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON) {
2424
                        goto repeat;
2425
                }
2426
 
2427
                if ((n_ret_value = get_empty_nodes(p_s_tb, n_h)) != CARRY_ON) {
2428
                        goto repeat;    /* No disk space, or schedule occurred and
2429
                                           analysis may be invalid and needs to be redone. */
2430
                }
2431
 
2432
                if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h)) {
2433
                        /* We have a positive insert size but no nodes exist on this
2434
                           level, this means that we are creating a new root. */
2435
 
2436
                        RFALSE(p_s_tb->blknum[n_h] != 1,
2437
                               "PAP-8350: creating new empty root");
2438
 
2439
                        if (n_h < MAX_HEIGHT - 1)
2440
                                p_s_tb->insert_size[n_h + 1] = 0;
2441
                } else if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1)) {
2442
                        if (p_s_tb->blknum[n_h] > 1) {
2443
                                /* The tree needs to be grown, so this node S[n_h]
2444
                                   which is the root node is split into two nodes,
2445
                                   and a new node (S[n_h+1]) will be created to
2446
                                   become the root node.  */
2447
 
2448
                                RFALSE(n_h == MAX_HEIGHT - 1,
2449
                                       "PAP-8355: attempt to create too high of a tree");
2450
 
2451
                                p_s_tb->insert_size[n_h + 1] =
2452
                                    (DC_SIZE +
2453
                                     KEY_SIZE) * (p_s_tb->blknum[n_h] - 1) +
2454
                                    DC_SIZE;
2455
                        } else if (n_h < MAX_HEIGHT - 1)
2456
                                p_s_tb->insert_size[n_h + 1] = 0;
2457
                } else
2458
                        p_s_tb->insert_size[n_h + 1] =
2459
                            (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1);
2460
        }
2461
 
2462
        if ((n_ret_value = wait_tb_buffers_until_unlocked(p_s_tb)) == CARRY_ON) {
2463
                if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
2464
                        wait_tb_buffers_run = 1;
2465
                        n_ret_value = REPEAT_SEARCH;
2466
                        goto repeat;
2467
                } else {
2468
                        return CARRY_ON;
2469
                }
2470
        } else {
2471
                wait_tb_buffers_run = 1;
2472
                goto repeat;
2473
        }
2474
 
2475
      repeat:
2476
        // fix_nodes was unable to perform its calculation due to
2477
        // filesystem got changed under us, lack of free disk space or i/o
2478
        // failure. If the first is the case - the search will be
2479
        // repeated. For now - free all resources acquired so far except
2480
        // for the new allocated nodes
2481
        {
2482
                int i;
2483
 
2484
                /* Release path buffers. */
2485
                if (wait_tb_buffers_run) {
2486
                        pathrelse_and_restore(p_s_tb->tb_sb, p_s_tb->tb_path);
2487
                } else {
2488
                        pathrelse(p_s_tb->tb_path);
2489
                }
2490
                /* brelse all resources collected for balancing */
2491
                for (i = 0; i < MAX_HEIGHT; i++) {
2492
                        if (wait_tb_buffers_run) {
2493
                                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2494
                                                                 p_s_tb->L[i]);
2495
                                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2496
                                                                 p_s_tb->R[i]);
2497
                                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2498
                                                                 p_s_tb->FL[i]);
2499
                                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2500
                                                                 p_s_tb->FR[i]);
2501
                                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2502
                                                                 p_s_tb->
2503
                                                                 CFL[i]);
2504
                                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2505
                                                                 p_s_tb->
2506
                                                                 CFR[i]);
2507
                        }
2508
 
2509
                        brelse(p_s_tb->L[i]);
2510
                        p_s_tb->L[i] = NULL;
2511
                        brelse(p_s_tb->R[i]);
2512
                        p_s_tb->R[i] = NULL;
2513
                        brelse(p_s_tb->FL[i]);
2514
                        p_s_tb->FL[i] = NULL;
2515
                        brelse(p_s_tb->FR[i]);
2516
                        p_s_tb->FR[i] = NULL;
2517
                        brelse(p_s_tb->CFL[i]);
2518
                        p_s_tb->CFL[i] = NULL;
2519
                        brelse(p_s_tb->CFR[i]);
2520
                        p_s_tb->CFR[i] = NULL;
2521
                }
2522
 
2523
                if (wait_tb_buffers_run) {
2524
                        for (i = 0; i < MAX_FEB_SIZE; i++) {
2525
                                if (p_s_tb->FEB[i]) {
2526
                                        reiserfs_restore_prepared_buffer
2527
                                            (p_s_tb->tb_sb, p_s_tb->FEB[i]);
2528
                                }
2529
                        }
2530
                }
2531
                return n_ret_value;
2532
        }
2533
 
2534
}
2535
 
2536
/* Anatoly will probably forgive me renaming p_s_tb to tb. I just
2537
   wanted to make lines shorter */
2538
void unfix_nodes(struct tree_balance *tb)
2539
{
2540
        int i;
2541
 
2542
        /* Release path buffers. */
2543
        pathrelse_and_restore(tb->tb_sb, tb->tb_path);
2544
 
2545
        /* brelse all resources collected for balancing */
2546
        for (i = 0; i < MAX_HEIGHT; i++) {
2547
                reiserfs_restore_prepared_buffer(tb->tb_sb, tb->L[i]);
2548
                reiserfs_restore_prepared_buffer(tb->tb_sb, tb->R[i]);
2549
                reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FL[i]);
2550
                reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FR[i]);
2551
                reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFL[i]);
2552
                reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFR[i]);
2553
 
2554
                brelse(tb->L[i]);
2555
                brelse(tb->R[i]);
2556
                brelse(tb->FL[i]);
2557
                brelse(tb->FR[i]);
2558
                brelse(tb->CFL[i]);
2559
                brelse(tb->CFR[i]);
2560
        }
2561
 
2562
        /* deal with list of allocated (used and unused) nodes */
2563
        for (i = 0; i < MAX_FEB_SIZE; i++) {
2564
                if (tb->FEB[i]) {
2565
                        b_blocknr_t blocknr = tb->FEB[i]->b_blocknr;
2566
                        /* de-allocated block which was not used by balancing and
2567
                           bforget about buffer for it */
2568
                        brelse(tb->FEB[i]);
2569
                        reiserfs_free_block(tb->transaction_handle, NULL,
2570
                                            blocknr, 0);
2571
                }
2572
                if (tb->used[i]) {
2573
                        /* release used as new nodes including a new root */
2574
                        brelse(tb->used[i]);
2575
                }
2576
        }
2577
 
2578
        kfree(tb->vn_buf);
2579
 
2580
}

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.