OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [fs/] [reiserfs/] [inode.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 * Copyright 2000-2002 by Hans Reiser, licensing governed by reiserfs/README
3
 */
4
 
5
#include <linux/config.h>
6
#include <linux/sched.h>
7
#include <linux/reiserfs_fs.h>
8
#include <linux/locks.h>
9
#include <linux/smp_lock.h>
10
#include <asm/uaccess.h>
11
#include <asm/unaligned.h>
12
 
13
/* args for the create parameter of reiserfs_get_block */
14
#define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */
15
#define GET_BLOCK_CREATE 1    /* add anything you need to find block */
16
#define GET_BLOCK_NO_HOLE 2   /* return -ENOENT for file holes */
17
#define GET_BLOCK_READ_DIRECT 4  /* read the tail if indirect item not found */
18
#define GET_BLOCK_NO_ISEM     8 /* i_sem is not held, don't preallocate */
19
 
20
static int reiserfs_get_block (struct inode * inode, long block,
21
                               struct buffer_head * bh_result, int create);
22
 
23
/* This spinlock guards inode pkey in private part of inode
24
   against race between find_actor() vs reiserfs_read_inode2 */
25
static spinlock_t keycopy_lock = SPIN_LOCK_UNLOCKED;
26
 
27
void reiserfs_delete_inode (struct inode * inode)
28
{
29
    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2;
30
    int windex ;
31
    struct reiserfs_transaction_handle th ;
32
 
33
 
34
    lock_kernel() ;
35
 
36
    /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
37
    if (INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
38
        down (&inode->i_sem);
39
 
40
        journal_begin(&th, inode->i_sb, jbegin_count) ;
41
        reiserfs_update_inode_transaction(inode) ;
42
        windex = push_journal_writer("delete_inode") ;
43
 
44
        reiserfs_delete_object (&th, inode);
45
        pop_journal_writer(windex) ;
46
 
47
        journal_end(&th, inode->i_sb, jbegin_count) ;
48
 
49
        up (&inode->i_sem);
50
 
51
        /* all items of file are deleted, so we can remove "save" link */
52
        remove_save_link (inode, 0/* not truncate */);
53
    } else {
54
        /* no object items are in the tree */
55
        ;
56
    }
57
    clear_inode (inode); /* note this must go after the journal_end to prevent deadlock */
58
    inode->i_blocks = 0;
59
    unlock_kernel() ;
60
}
61
 
62
static void _make_cpu_key (struct cpu_key * key, int version, __u32 dirid, __u32 objectid,
63
               loff_t offset, int type, int length )
64
{
65
    key->version = version;
66
 
67
    key->on_disk_key.k_dir_id = dirid;
68
    key->on_disk_key.k_objectid = objectid;
69
    set_cpu_key_k_offset (key, offset);
70
    set_cpu_key_k_type (key, type);
71
    key->key_length = length;
72
}
73
 
74
 
75
/* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set
76
   offset and type of key */
77
void make_cpu_key (struct cpu_key * key, const struct inode * inode, loff_t offset,
78
              int type, int length )
79
{
80
  _make_cpu_key (key, get_inode_item_key_version (inode), le32_to_cpu (INODE_PKEY (inode)->k_dir_id),
81
                 le32_to_cpu (INODE_PKEY (inode)->k_objectid),
82
                 offset, type, length);
83
}
84
 
85
 
86
//
87
// when key is 0, do not set version and short key
88
//
89
inline void make_le_item_head (struct item_head * ih, const struct cpu_key * key,
90
                               int version,
91
                               loff_t offset, int type, int length,
92
                               int entry_count/*or ih_free_space*/)
93
{
94
    if (key) {
95
        ih->ih_key.k_dir_id = cpu_to_le32 (key->on_disk_key.k_dir_id);
96
        ih->ih_key.k_objectid = cpu_to_le32 (key->on_disk_key.k_objectid);
97
    }
98
    put_ih_version( ih, version );
99
    set_le_ih_k_offset (ih, offset);
100
    set_le_ih_k_type (ih, type);
101
    put_ih_item_len( ih, length );
102
    /*    set_ih_free_space (ih, 0);*/
103
    // for directory items it is entry count, for directs and stat
104
    // datas - 0xffff, for indirects - 0
105
    put_ih_entry_count( ih, entry_count );
106
}
107
 
108
static void add_to_flushlist(struct inode *inode, struct buffer_head *bh) {
109
    struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb) ;
110
 
111
    buffer_insert_list(bh, &j->j_dirty_buffers) ;
112
}
113
 
114
//
115
// FIXME: we might cache recently accessed indirect item
116
 
117
// Ugh.  Not too eager for that....
118
//  I cut the code until such time as I see a convincing argument (benchmark).
119
// I don't want a bloated inode struct..., and I don't like code complexity....
120
 
121
/* cutting the code is fine, since it really isn't in use yet and is easy
122
** to add back in.  But, Vladimir has a really good idea here.  Think
123
** about what happens for reading a file.  For each page,
124
** The VFS layer calls reiserfs_readpage, who searches the tree to find
125
** an indirect item.  This indirect item has X number of pointers, where
126
** X is a big number if we've done the block allocation right.  But,
127
** we only use one or two of these pointers during each call to readpage,
128
** needlessly researching again later on.
129
**
130
** The size of the cache could be dynamic based on the size of the file.
131
**
132
** I'd also like to see us cache the location the stat data item, since
133
** we are needlessly researching for that frequently.
134
**
135
** --chris
136
*/
137
 
138
/* If this page has a file tail in it, and
139
** it was read in by get_block_create_0, the page data is valid,
140
** but tail is still sitting in a direct item, and we can't write to
141
** it.  So, look through this page, and check all the mapped buffers
142
** to make sure they have valid block numbers.  Any that don't need
143
** to be unmapped, so that block_prepare_write will correctly call
144
** reiserfs_get_block to convert the tail into an unformatted node
145
*/
146
static inline void fix_tail_page_for_writing(struct page *page) {
147
    struct buffer_head *head, *next, *bh ;
148
 
149
    if (page && page->buffers) {
150
        head = page->buffers ;
151
        bh = head ;
152
        do {
153
            next = bh->b_this_page ;
154
            if (buffer_mapped(bh) && bh->b_blocknr == 0) {
155
                reiserfs_unmap_buffer(bh) ;
156
            }
157
            bh = next ;
158
        } while (bh != head) ;
159
    }
160
}
161
 
162
/* reiserfs_get_block does not need to allocate a block only if it has been
163
   done already or non-hole position has been found in the indirect item */
164
static inline int allocation_needed (int retval, b_blocknr_t allocated,
165
                                     struct item_head * ih,
166
                                     __u32 * item, int pos_in_item)
167
{
168
  if (allocated)
169
         return 0;
170
  if (retval == POSITION_FOUND && is_indirect_le_ih (ih) &&
171
      get_block_num(item, pos_in_item))
172
         return 0;
173
  return 1;
174
}
175
 
176
static inline int indirect_item_found (int retval, struct item_head * ih)
177
{
178
  return (retval == POSITION_FOUND) && is_indirect_le_ih (ih);
179
}
180
 
181
 
182
static inline void set_block_dev_mapped (struct buffer_head * bh,
183
                                         b_blocknr_t block, struct inode * inode)
184
{
185
  bh->b_dev = inode->i_dev;
186
  bh->b_blocknr = block;
187
  bh->b_state |= (1UL << BH_Mapped);
188
}
189
 
190
 
191
//
192
// files which were created in the earlier version can not be longer,
193
// than 2 gb
194
//
195
static int file_capable (struct inode * inode, long block)
196
{
197
    if (get_inode_item_key_version (inode) != KEY_FORMAT_3_5 || // it is new file.
198
        block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb
199
        return 1;
200
 
201
    return 0;
202
}
203
 
204
/*static*/ void restart_transaction(struct reiserfs_transaction_handle *th,
205
                                struct inode *inode, struct path *path) {
206
  struct super_block *s = th->t_super ;
207
  int len = th->t_blocks_allocated ;
208
 
209
  pathrelse(path) ;
210
  reiserfs_update_sd(th, inode) ;
211
  journal_end(th, s, len) ;
212
  journal_begin(th, s, len) ;
213
  reiserfs_update_inode_transaction(inode) ;
214
}
215
 
216
// it is called by get_block when create == 0. Returns block number
217
// for 'block'-th logical block of file. When it hits direct item it
218
// returns 0 (being called from bmap) or read direct item into piece
219
// of page (bh_result)
220
 
221
// Please improve the english/clarity in the comment above, as it is
222
// hard to understand.
223
 
224
static int _get_block_create_0 (struct inode * inode, long block,
225
                                 struct buffer_head * bh_result,
226
                                 int args)
227
{
228
    INITIALIZE_PATH (path);
229
    struct cpu_key key;
230
    struct buffer_head * bh;
231
    struct item_head * ih, tmp_ih;
232
    int fs_gen ;
233
    int blocknr;
234
    char * p = NULL;
235
    int chars;
236
    int ret ;
237
    int done = 0 ;
238
    unsigned long offset ;
239
 
240
    // prepare the key to look for the 'block'-th block of file
241
    make_cpu_key (&key, inode,
242
                  (loff_t)block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3);
243
 
244
research:
245
    if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) {
246
        pathrelse (&path);
247
        if (p)
248
            kunmap(bh_result->b_page) ;
249
        // We do not return -ENOENT if there is a hole but page is uptodate, because it means
250
        // That there is some MMAPED data associated with it that is yet to be written to disk.
251
        if ((args & GET_BLOCK_NO_HOLE) && !Page_Uptodate(bh_result->b_page) ) {
252
            return -ENOENT ;
253
        }
254
        return 0 ;
255
    }
256
 
257
    //
258
    bh = get_last_bh (&path);
259
    ih = get_ih (&path);
260
    if (is_indirect_le_ih (ih)) {
261
        __u32 * ind_item = (__u32 *)B_I_PITEM (bh, ih);
262
 
263
        /* FIXME: here we could cache indirect item or part of it in
264
           the inode to avoid search_by_key in case of subsequent
265
           access to file */
266
        blocknr = get_block_num(ind_item, path.pos_in_item) ;
267
        ret = 0 ;
268
        if (blocknr) {
269
            bh_result->b_dev = inode->i_dev;
270
            bh_result->b_blocknr = blocknr;
271
            bh_result->b_state |= (1UL << BH_Mapped);
272
        } else
273
            // We do not return -ENOENT if there is a hole but page is uptodate, because it means
274
            // That there is some MMAPED data associated with it that is yet to be written to disk.
275
            if ((args & GET_BLOCK_NO_HOLE) && !Page_Uptodate(bh_result->b_page) ) {
276
                ret = -ENOENT ;
277
            }
278
 
279
        pathrelse (&path);
280
        if (p)
281
            kunmap(bh_result->b_page) ;
282
        return ret ;
283
    }
284
 
285
    // requested data are in direct item(s)
286
    if (!(args & GET_BLOCK_READ_DIRECT)) {
287
        // we are called by bmap. FIXME: we can not map block of file
288
        // when it is stored in direct item(s)
289
        pathrelse (&path);
290
        if (p)
291
            kunmap(bh_result->b_page) ;
292
        return -ENOENT;
293
    }
294
 
295
    /* if we've got a direct item, and the buffer was uptodate,
296
    ** we don't want to pull data off disk again.  skip to the
297
    ** end, where we map the buffer and return
298
    */
299
    if (buffer_uptodate(bh_result)) {
300
        goto finished ;
301
    } else
302
        /*
303
        ** grab_tail_page can trigger calls to reiserfs_get_block on up to date
304
        ** pages without any buffers.  If the page is up to date, we don't want
305
        ** read old data off disk.  Set the up to date bit on the buffer instead
306
        ** and jump to the end
307
        */
308
            if (Page_Uptodate(bh_result->b_page)) {
309
                mark_buffer_uptodate(bh_result, 1);
310
                goto finished ;
311
    }
312
 
313
    // read file tail into part of page
314
    offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1) ;
315
    fs_gen = get_generation(inode->i_sb) ;
316
    copy_item_head (&tmp_ih, ih);
317
 
318
    /* we only want to kmap if we are reading the tail into the page.
319
    ** this is not the common case, so we don't kmap until we are
320
    ** sure we need to.  But, this means the item might move if
321
    ** kmap schedules
322
    */
323
    if (!p) {
324
        p = (char *)kmap(bh_result->b_page) ;
325
        if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
326
            goto research;
327
        }
328
    }
329
    p += offset ;
330
    memset (p, 0, inode->i_sb->s_blocksize);
331
    do {
332
        if (!is_direct_le_ih (ih)) {
333
            BUG ();
334
        }
335
        /* make sure we don't read more bytes than actually exist in
336
        ** the file.  This can happen in odd cases where i_size isn't
337
        ** correct, and when direct item padding results in a few
338
        ** extra bytes at the end of the direct item
339
        */
340
        if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
341
            break ;
342
        if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) {
343
            chars = inode->i_size - (le_ih_k_offset(ih) - 1) - path.pos_in_item;
344
            done = 1 ;
345
        } else {
346
            chars = ih_item_len(ih) - path.pos_in_item;
347
        }
348
        memcpy (p, B_I_PITEM (bh, ih) + path.pos_in_item, chars);
349
 
350
        if (done)
351
            break ;
352
 
353
        p += chars;
354
 
355
        if (PATH_LAST_POSITION (&path) != (B_NR_ITEMS (bh) - 1))
356
            // we done, if read direct item is not the last item of
357
            // node FIXME: we could try to check right delimiting key
358
            // to see whether direct item continues in the right
359
            // neighbor or rely on i_size
360
            break;
361
 
362
        // update key to look for the next piece
363
        set_cpu_key_k_offset (&key, cpu_key_k_offset (&key) + chars);
364
        if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND)
365
            // we read something from tail, even if now we got IO_ERROR
366
            break;
367
        bh = get_last_bh (&path);
368
        ih = get_ih (&path);
369
    } while (1);
370
 
371
    flush_dcache_page(bh_result->b_page) ;
372
    kunmap(bh_result->b_page) ;
373
 
374
finished:
375
    pathrelse (&path);
376
    bh_result->b_blocknr = 0 ;
377
    bh_result->b_dev = inode->i_dev;
378
    mark_buffer_uptodate (bh_result, 1);
379
    bh_result->b_state |= (1UL << BH_Mapped);
380
    return 0;
381
}
382
 
383
 
384
// this is called to create file map. So, _get_block_create_0 will not
385
// read direct item
386
int reiserfs_bmap (struct inode * inode, long block,
387
                   struct buffer_head * bh_result, int create)
388
{
389
    if (!file_capable (inode, block))
390
        return -EFBIG;
391
 
392
    lock_kernel() ;
393
    /* do not read the direct item */
394
    _get_block_create_0 (inode, block, bh_result, 0) ;
395
    unlock_kernel() ;
396
    return 0;
397
}
398
 
399
/* special version of get_block that is only used by grab_tail_page right
400
** now.  It is sent to block_prepare_write, and when you try to get a
401
** block past the end of the file (or a block from a hole) it returns
402
** -ENOENT instead of a valid buffer.  block_prepare_write expects to
403
** be able to do i/o on the buffers returned, unless an error value
404
** is also returned.
405
**
406
** So, this allows block_prepare_write to be used for reading a single block
407
** in a page.  Where it does not produce a valid page for holes, or past the
408
** end of the file.  This turns out to be exactly what we need for reading
409
** tails for conversion.
410
**
411
** The point of the wrapper is forcing a certain value for create, even
412
** though the VFS layer is calling this function with create==1.  If you
413
** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
414
** don't use this function.
415
*/
416
static int reiserfs_get_block_create_0 (struct inode * inode, long block,
417
                        struct buffer_head * bh_result, int create) {
418
    return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE) ;
419
}
420
 
421
static int reiserfs_get_block_direct_io (struct inode * inode, long block,
422
                        struct buffer_head * bh_result, int create) {
423
    int ret ;
424
 
425
    bh_result->b_page = NULL;
426
    ret = reiserfs_get_block(inode, block, bh_result, create) ;
427
 
428
    /* don't allow direct io onto tail pages */
429
    if (ret == 0 && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
430
        /* make sure future calls to the direct io funcs for this offset
431
        ** in the file fail by unmapping the buffer
432
        */
433
        reiserfs_unmap_buffer(bh_result);
434
        ret = -EINVAL ;
435
    }
436
    /* Possible unpacked tail. Flush the data before pages have
437
       disappeared */
438
    if (inode->u.reiserfs_i.i_flags & i_pack_on_close_mask) {
439
        lock_kernel();
440
        reiserfs_commit_for_inode(inode);
441
        inode->u.reiserfs_i.i_flags &= ~i_pack_on_close_mask;
442
        unlock_kernel();
443
    }
444
    return ret ;
445
}
446
 
447
 
448
/*
449
** helper function for when reiserfs_get_block is called for a hole
450
** but the file tail is still in a direct item
451
** bh_result is the buffer head for the hole
452
** tail_offset is the offset of the start of the tail in the file
453
**
454
** This calls prepare_write, which will start a new transaction
455
** you should not be in a transaction, or have any paths held when you
456
** call this.
457
*/
458
static int convert_tail_for_hole(struct inode *inode,
459
                                 struct buffer_head *bh_result,
460
                                 loff_t tail_offset) {
461
    unsigned long index ;
462
    unsigned long tail_end ;
463
    unsigned long tail_start ;
464
    struct page * tail_page ;
465
    struct page * hole_page = bh_result->b_page ;
466
    int retval = 0 ;
467
 
468
    if ((tail_offset & (bh_result->b_size - 1)) != 1)
469
        return -EIO ;
470
 
471
    /* always try to read until the end of the block */
472
    tail_start = tail_offset & (PAGE_CACHE_SIZE - 1) ;
473
    tail_end = (tail_start | (bh_result->b_size - 1)) + 1 ;
474
 
475
    index = tail_offset >> PAGE_CACHE_SHIFT ;
476
    if ( !hole_page || index != hole_page->index) {
477
        tail_page = grab_cache_page(inode->i_mapping, index) ;
478
        retval = -ENOMEM;
479
        if (!tail_page) {
480
            goto out ;
481
        }
482
    } else {
483
        tail_page = hole_page ;
484
    }
485
 
486
    /* we don't have to make sure the conversion did not happen while
487
    ** we were locking the page because anyone that could convert
488
    ** must first take i_sem.
489
    **
490
    ** We must fix the tail page for writing because it might have buffers
491
    ** that are mapped, but have a block number of 0.  This indicates tail
492
    ** data that has been read directly into the page, and block_prepare_write
493
    ** won't trigger a get_block in this case.
494
    */
495
    fix_tail_page_for_writing(tail_page) ;
496
    retval = block_prepare_write(tail_page, tail_start, tail_end,
497
                                 reiserfs_get_block) ;
498
    if (retval)
499
        goto unlock ;
500
 
501
    /* tail conversion might change the data in the page */
502
    flush_dcache_page(tail_page) ;
503
 
504
    retval = generic_commit_write(NULL, tail_page, tail_start, tail_end) ;
505
 
506
unlock:
507
    if (tail_page != hole_page) {
508
        UnlockPage(tail_page) ;
509
        page_cache_release(tail_page) ;
510
    }
511
out:
512
    return retval ;
513
}
514
 
515
static inline int _allocate_block(struct reiserfs_transaction_handle *th,
516
                           long block,
517
                           struct inode *inode,
518
                           b_blocknr_t *allocated_block_nr,
519
                           struct path * path,
520
                           int flags) {
521
 
522
#ifdef REISERFS_PREALLOCATE
523
    if (!(flags & GET_BLOCK_NO_ISEM)) {
524
        return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, path, block);
525
    }
526
#endif
527
    return reiserfs_new_unf_blocknrs (th, inode, allocated_block_nr, path, block);
528
}
529
 
530
static int reiserfs_get_block (struct inode * inode, long block,
531
                               struct buffer_head * bh_result, int create)
532
{
533
    int repeat, retval;
534
    b_blocknr_t allocated_block_nr = 0;// b_blocknr_t is unsigned long
535
    INITIALIZE_PATH(path);
536
    int pos_in_item;
537
    struct cpu_key key;
538
    struct buffer_head * bh, * unbh = 0;
539
    struct item_head * ih, tmp_ih;
540
    __u32 * item;
541
    int done;
542
    int fs_gen;
543
    int windex ;
544
    struct reiserfs_transaction_handle th ;
545
    /* space reserved in transaction batch:
546
        . 3 balancings in direct->indirect conversion
547
        . 1 block involved into reiserfs_update_sd()
548
       XXX in practically impossible worst case direct2indirect()
549
       can incur (much) more that 3 balancings. */
550
    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1;
551
    int version;
552
    int transaction_started = 0 ;
553
    loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ;
554
 
555
                                /* bad.... */
556
    lock_kernel() ;
557
    th.t_trans_id = 0 ;
558
    version = get_inode_item_key_version (inode);
559
 
560
    if (block < 0) {
561
        unlock_kernel();
562
        return -EIO;
563
    }
564
 
565
    if (!file_capable (inode, block)) {
566
        unlock_kernel() ;
567
        return -EFBIG;
568
    }
569
 
570
    /* if !create, we aren't changing the FS, so we don't need to
571
    ** log anything, so we don't need to start a transaction
572
    */
573
    if (!(create & GET_BLOCK_CREATE)) {
574
        int ret ;
575
        /* find number of block-th logical block of the file */
576
        ret = _get_block_create_0 (inode, block, bh_result,
577
                                   create | GET_BLOCK_READ_DIRECT) ;
578
        unlock_kernel() ;
579
        return ret;
580
    }
581
 
582
    /* If file is of such a size, that it might have a tail and tails are enabled
583
    ** we should mark it as possibly needing tail packing on close
584
    */
585
    if ( (have_large_tails (inode->i_sb) && inode->i_size < block_size (inode)*4) ||
586
         (have_small_tails (inode->i_sb) && inode->i_size < block_size(inode)) )
587
        inode->u.reiserfs_i.i_flags |= i_pack_on_close_mask;
588
 
589
    windex = push_journal_writer("reiserfs_get_block") ;
590
 
591
    /* set the key of the first byte in the 'block'-th block of file */
592
    make_cpu_key (&key, inode, new_offset,
593
                  TYPE_ANY, 3/*key length*/);
594
    if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
595
        journal_begin(&th, inode->i_sb, jbegin_count) ;
596
        reiserfs_update_inode_transaction(inode) ;
597
        transaction_started = 1 ;
598
    }
599
 research:
600
 
601
    retval = search_for_position_by_key (inode->i_sb, &key, &path);
602
    if (retval == IO_ERROR) {
603
        retval = -EIO;
604
        goto failure;
605
    }
606
 
607
    bh = get_last_bh (&path);
608
    ih = get_ih (&path);
609
    item = get_item (&path);
610
    pos_in_item = path.pos_in_item;
611
 
612
    fs_gen = get_generation (inode->i_sb);
613
    copy_item_head (&tmp_ih, ih);
614
 
615
    if (allocation_needed (retval, allocated_block_nr, ih, item, pos_in_item)) {
616
        /* we have to allocate block for the unformatted node */
617
        if (!transaction_started) {
618
            pathrelse(&path) ;
619
            journal_begin(&th, inode->i_sb, jbegin_count) ;
620
            reiserfs_update_inode_transaction(inode) ;
621
            transaction_started = 1 ;
622
            goto research ;
623
        }
624
 
625
        repeat = _allocate_block(&th, block, inode, &allocated_block_nr, &path, create);
626
 
627
        if (repeat == NO_DISK_SPACE) {
628
            /* restart the transaction to give the journal a chance to free
629
            ** some blocks.  releases the path, so we have to go back to
630
            ** research if we succeed on the second try
631
            */
632
            restart_transaction(&th, inode, &path) ;
633
            repeat = _allocate_block(&th, block, inode, &allocated_block_nr, NULL, create);
634
 
635
            if (repeat != NO_DISK_SPACE) {
636
                goto research ;
637
            }
638
            retval = -ENOSPC;
639
            goto failure;
640
        }
641
 
642
        if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
643
            goto research;
644
        }
645
    }
646
 
647
    if (indirect_item_found (retval, ih)) {
648
        b_blocknr_t unfm_ptr;
649
        /* 'block'-th block is in the file already (there is
650
           corresponding cell in some indirect item). But it may be
651
           zero unformatted node pointer (hole) */
652
        unfm_ptr = get_block_num (item, pos_in_item);
653
        if (unfm_ptr == 0) {
654
            /* use allocated block to plug the hole */
655
            reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
656
            if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
657
                reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
658
                goto research;
659
            }
660
            bh_result->b_state |= (1UL << BH_New);
661
            put_block_num(item, pos_in_item, allocated_block_nr) ;
662
            unfm_ptr = allocated_block_nr;
663
            journal_mark_dirty (&th, inode->i_sb, bh);
664
            inode->i_blocks += (inode->i_sb->s_blocksize / 512) ;
665
            reiserfs_update_sd(&th, inode) ;
666
        }
667
        set_block_dev_mapped(bh_result, unfm_ptr, inode);
668
        pathrelse (&path);
669
        pop_journal_writer(windex) ;
670
        if (transaction_started)
671
            journal_end(&th, inode->i_sb, jbegin_count) ;
672
 
673
        unlock_kernel() ;
674
 
675
        /* the item was found, so new blocks were not added to the file
676
        ** there is no need to make sure the inode is updated with this
677
        ** transaction
678
        */
679
        return 0;
680
    }
681
 
682
    if (!transaction_started) {
683
        /* if we don't pathrelse, we could vs-3050 on the buffer if
684
        ** someone is waiting for it (they can't finish until the buffer
685
        ** is released, we can start a new transaction until they finish)
686
        */
687
        pathrelse(&path) ;
688
        journal_begin(&th, inode->i_sb, jbegin_count) ;
689
        reiserfs_update_inode_transaction(inode) ;
690
        transaction_started = 1 ;
691
        goto research;
692
    }
693
 
694
    /* desired position is not found or is in the direct item. We have
695
       to append file with holes up to 'block'-th block converting
696
       direct items to indirect one if necessary */
697
    done = 0;
698
    do {
699
        if (is_statdata_le_ih (ih)) {
700
            __u32 unp = 0;
701
            struct cpu_key tmp_key;
702
 
703
            /* indirect item has to be inserted */
704
            make_le_item_head (&tmp_ih, &key, version, 1, TYPE_INDIRECT,
705
                               UNFM_P_SIZE, 0/* free_space */);
706
 
707
            if (cpu_key_k_offset (&key) == 1) {
708
                /* we are going to add 'block'-th block to the file. Use
709
                   allocated block for that */
710
                unp = cpu_to_le32 (allocated_block_nr);
711
                set_block_dev_mapped (bh_result, allocated_block_nr, inode);
712
                bh_result->b_state |= (1UL << BH_New);
713
                done = 1;
714
            }
715
            tmp_key = key; // ;)
716
            set_cpu_key_k_offset (&tmp_key, 1);
717
            PATH_LAST_POSITION(&path) ++;
718
 
719
            retval = reiserfs_insert_item (&th, &path, &tmp_key, &tmp_ih, (char *)&unp);
720
            if (retval) {
721
                reiserfs_free_block (&th, allocated_block_nr);
722
                goto failure; // retval == -ENOSPC or -EIO or -EEXIST
723
            }
724
            if (unp)
725
                inode->i_blocks += inode->i_sb->s_blocksize / 512;
726
            //mark_tail_converted (inode);
727
        } else if (is_direct_le_ih (ih)) {
728
            /* direct item has to be converted */
729
            loff_t tail_offset;
730
 
731
            tail_offset = ((le_ih_k_offset (ih) - 1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
732
            if (tail_offset == cpu_key_k_offset (&key)) {
733
                /* direct item we just found fits into block we have
734
                   to map. Convert it into unformatted node: use
735
                   bh_result for the conversion */
736
                set_block_dev_mapped (bh_result, allocated_block_nr, inode);
737
                unbh = bh_result;
738
                done = 1;
739
            } else {
740
                /* we have to padd file tail stored in direct item(s)
741
                   up to block size and convert it to unformatted
742
                   node. FIXME: this should also get into page cache */
743
 
744
                pathrelse(&path) ;
745
                journal_end(&th, inode->i_sb, jbegin_count) ;
746
                transaction_started = 0 ;
747
 
748
                retval = convert_tail_for_hole(inode, bh_result, tail_offset) ;
749
                if (retval) {
750
                    if ( retval != -ENOSPC )
751
                        reiserfs_warning(inode->i_sb, "clm-6004: convert tail failed inode %lu, error %d\n", inode->i_ino, retval) ;
752
                    if (allocated_block_nr) {
753
                        /* the bitmap, the super, and the stat data == 3 */
754
                        journal_begin(&th, inode->i_sb, 3) ;
755
                        reiserfs_free_block (&th, allocated_block_nr);
756
                        transaction_started = 1 ;
757
                    }
758
                    goto failure ;
759
                }
760
                goto research ;
761
            }
762
            retval = direct2indirect (&th, inode, &path, unbh, tail_offset);
763
            if (retval) {
764
                reiserfs_unmap_buffer(unbh);
765
                reiserfs_free_block (&th, allocated_block_nr);
766
                goto failure;
767
            }
768
            /* it is important the mark_buffer_uptodate is done after
769
            ** the direct2indirect.  The buffer might contain valid
770
            ** data newer than the data on disk (read by readpage, changed,
771
            ** and then sent here by writepage).  direct2indirect needs
772
            ** to know if unbh was already up to date, so it can decide
773
            ** if the data in unbh needs to be replaced with data from
774
            ** the disk
775
            */
776
            mark_buffer_uptodate (unbh, 1);
777
 
778
            /* unbh->b_page == NULL in case of DIRECT_IO request, this means
779
               buffer will disappear shortly, so it should not be added to
780
               any of our lists.
781
            */
782
            if ( unbh->b_page ) {
783
                /* we've converted the tail, so we must
784
                ** flush unbh before the transaction commits
785
                */
786
                add_to_flushlist(inode, unbh) ;
787
 
788
                /* mark it dirty now to prevent commit_write from adding
789
                 ** this buffer to the inode's dirty buffer list
790
                 */
791
                __mark_buffer_dirty(unbh) ;
792
            }
793
 
794
            //inode->i_blocks += inode->i_sb->s_blocksize / 512;
795
            //mark_tail_converted (inode);
796
        } else {
797
            /* append indirect item with holes if needed, when appending
798
               pointer to 'block'-th block use block, which is already
799
               allocated */
800
            struct cpu_key tmp_key;
801
            unp_t unf_single=0; // We use this in case we need to allocate only
802
                                // one block which is a fastpath
803
            unp_t *un;
804
            __u64 max_to_insert=MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE;
805
            __u64 blocks_needed;
806
 
807
            RFALSE( pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
808
                    "vs-804: invalid position for append");
809
            /* indirect item has to be appended, set up key of that position */
810
            make_cpu_key (&tmp_key, inode,
811
                          le_key_k_offset (version, &(ih->ih_key)) + op_bytes_number (ih, inode->i_sb->s_blocksize),
812
                          //pos_in_item * inode->i_sb->s_blocksize,
813
                          TYPE_INDIRECT, 3);// key type is unimportant
814
 
815
            blocks_needed = 1 + ((cpu_key_k_offset (&key) - cpu_key_k_offset (&tmp_key)) >> inode->i_sb->s_blocksize_bits);
816
            RFALSE( blocks_needed < 0, "green-805: invalid offset");
817
 
818
            if ( blocks_needed == 1 ) {
819
                un = &unf_single;
820
            } else {
821
                un=kmalloc( min(blocks_needed,max_to_insert)*UNFM_P_SIZE,
822
                            GFP_ATOMIC); // We need to avoid scheduling.
823
                if ( !un) {
824
                    un = &unf_single;
825
                    blocks_needed = 1;
826
                    max_to_insert = 0;
827
                } else
828
                    memset(un, 0, UNFM_P_SIZE * min(blocks_needed,max_to_insert));
829
            }
830
            if ( blocks_needed <= max_to_insert) {
831
                /* we are going to add target block to the file. Use allocated
832
                   block for that */
833
                un[blocks_needed-1] = cpu_to_le32 (allocated_block_nr);
834
                set_block_dev_mapped (bh_result, allocated_block_nr, inode);
835
                bh_result->b_state |= (1UL << BH_New);
836
                done = 1;
837
            } else {
838
                /* paste hole to the indirect item */
839
                /* If kmalloc failed, max_to_insert becomes zero and it means we
840
                   only have space for one block */
841
                blocks_needed=max_to_insert?max_to_insert:1;
842
            }
843
            retval = reiserfs_paste_into_item (&th, &path, &tmp_key, (char *)un, UNFM_P_SIZE * blocks_needed);
844
 
845
            if (blocks_needed != 1)
846
                 kfree(un);
847
 
848
            if (retval) {
849
                reiserfs_free_block (&th, allocated_block_nr);
850
                goto failure;
851
            }
852
            if (done) {
853
                inode->i_blocks += inode->i_sb->s_blocksize / 512;
854
            } else {
855
                /* We need to mark new file size in case this function will be
856
                   interrupted/aborted later on. And we may do this only for
857
                   holes. */
858
                inode->i_size += blocks_needed << inode->i_blkbits;
859
            }
860
            //mark_tail_converted (inode);
861
        }
862
 
863
        if (done == 1)
864
            break;
865
 
866
        /* this loop could log more blocks than we had originally asked
867
        ** for.  So, we have to allow the transaction to end if it is
868
        ** too big or too full.  Update the inode so things are
869
        ** consistent if we crash before the function returns
870
        **
871
        ** release the path so that anybody waiting on the path before
872
        ** ending their transaction will be able to continue.
873
        */
874
        if (journal_transaction_should_end(&th, th.t_blocks_allocated)) {
875
          restart_transaction(&th, inode, &path) ;
876
        }
877
        /* inserting indirect pointers for a hole can take a
878
        ** long time.  reschedule if needed
879
        */
880
        if (current->need_resched)
881
            schedule() ;
882
 
883
        retval = search_for_position_by_key (inode->i_sb, &key, &path);
884
        if (retval == IO_ERROR) {
885
            retval = -EIO;
886
            goto failure;
887
        }
888
        if (retval == POSITION_FOUND) {
889
            reiserfs_warning (inode->i_sb, "vs-825: reiserfs_get_block: "
890
                              "%K should not be found\n", &key);
891
            retval = -EEXIST;
892
            if (allocated_block_nr)
893
                reiserfs_free_block (&th, allocated_block_nr);
894
            pathrelse(&path) ;
895
            goto failure;
896
        }
897
        bh = get_last_bh (&path);
898
        ih = get_ih (&path);
899
        item = get_item (&path);
900
        pos_in_item = path.pos_in_item;
901
    } while (1);
902
 
903
 
904
    retval = 0;
905
    reiserfs_check_path(&path) ;
906
 
907
 failure:
908
    if (transaction_started) {
909
      reiserfs_update_sd(&th, inode) ;
910
      journal_end(&th, inode->i_sb, jbegin_count) ;
911
    }
912
    pop_journal_writer(windex) ;
913
    unlock_kernel() ;
914
    reiserfs_check_path(&path) ;
915
    return retval;
916
}
917
 
918
 
919
//
920
// BAD: new directories have stat data of new type and all other items
921
// of old type. Version stored in the inode says about body items, so
922
// in update_stat_data we can not rely on inode, but have to check
923
// item version directly
924
//
925
 
926
// called by read_inode
927
static void init_inode (struct inode * inode, struct path * path)
928
{
929
    struct buffer_head * bh;
930
    struct item_head * ih;
931
    __u32 rdev;
932
    //int version = ITEM_VERSION_1;
933
 
934
    bh = PATH_PLAST_BUFFER (path);
935
    ih = PATH_PITEM_HEAD (path);
936
 
937
    spin_lock(&keycopy_lock);
938
    copy_key (INODE_PKEY (inode), &(ih->ih_key));
939
    spin_unlock(&keycopy_lock);
940
    inode->i_blksize = PAGE_SIZE;
941
 
942
    INIT_LIST_HEAD(&inode->u.reiserfs_i.i_prealloc_list) ;
943
 
944
    if (stat_data_v1 (ih)) {
945
        struct stat_data_v1 * sd = (struct stat_data_v1 *)B_I_PITEM (bh, ih);
946
        unsigned long blocks;
947
 
948
        set_inode_item_key_version (inode, KEY_FORMAT_3_5);
949
        set_inode_sd_version (inode, STAT_DATA_V1);
950
        inode->i_mode  = sd_v1_mode(sd);
951
        inode->i_nlink = sd_v1_nlink(sd);
952
        inode->i_uid   = sd_v1_uid(sd);
953
        inode->i_gid   = sd_v1_gid(sd);
954
        inode->i_size  = sd_v1_size(sd);
955
        inode->i_atime = sd_v1_atime(sd);
956
        inode->i_mtime = sd_v1_mtime(sd);
957
        inode->i_ctime = sd_v1_ctime(sd);
958
 
959
        inode->i_blocks = sd_v1_blocks(sd);
960
        inode->i_generation = le32_to_cpu (INODE_PKEY (inode)->k_dir_id);
961
        blocks = (inode->i_size + 511) >> 9;
962
        blocks = _ROUND_UP (blocks, inode->i_sb->s_blocksize >> 9);
963
        if (inode->i_blocks > blocks) {
964
            // there was a bug in <=3.5.23 when i_blocks could take negative
965
            // values. Starting from 3.5.17 this value could even be stored in
966
            // stat data. For such files we set i_blocks based on file
967
            // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
968
            // only updated if file's inode will ever change
969
            inode->i_blocks = blocks;
970
        }
971
 
972
        rdev = sd_v1_rdev(sd);
973
        inode->u.reiserfs_i.i_first_direct_byte = sd_v1_first_direct_byte(sd);
974
        /* nopack is initially zero for v1 objects. For v2 objects,
975
           nopack is initialised from sd_attrs */
976
        inode->u.reiserfs_i.i_flags &= ~i_nopack_mask;
977
    } else {
978
        // new stat data found, but object may have old items
979
        // (directories and symlinks)
980
        struct stat_data * sd = (struct stat_data *)B_I_PITEM (bh, ih);
981
 
982
        inode->i_mode   = sd_v2_mode(sd);
983
        inode->i_nlink  = sd_v2_nlink(sd);
984
        inode->i_uid    = sd_v2_uid(sd);
985
        inode->i_size   = sd_v2_size(sd);
986
        inode->i_gid    = sd_v2_gid(sd);
987
        inode->i_mtime  = sd_v2_mtime(sd);
988
        inode->i_atime  = sd_v2_atime(sd);
989
        inode->i_ctime  = sd_v2_ctime(sd);
990
        inode->i_blocks = sd_v2_blocks(sd);
991
        rdev            = sd_v2_rdev(sd);
992
        if( S_ISCHR( inode -> i_mode ) || S_ISBLK( inode -> i_mode ) )
993
            inode->i_generation = le32_to_cpu (INODE_PKEY (inode)->k_dir_id);
994
        else
995
            inode->i_generation = sd_v2_generation(sd);
996
 
997
        if (S_ISDIR (inode->i_mode) || S_ISLNK (inode->i_mode))
998
            set_inode_item_key_version (inode, KEY_FORMAT_3_5);
999
        else
1000
            set_inode_item_key_version (inode, KEY_FORMAT_3_6);
1001
 
1002
        set_inode_sd_version (inode, STAT_DATA_V2);
1003
        /* read persistent inode attributes from sd and initalise
1004
           generic inode flags from them */
1005
        inode -> u.reiserfs_i.i_attrs = sd_v2_attrs( sd );
1006
        sd_attrs_to_i_attrs( sd_v2_attrs( sd ), inode );
1007
    }
1008
 
1009
 
1010
    pathrelse (path);
1011
    if (S_ISREG (inode->i_mode)) {
1012
        inode->i_op = &reiserfs_file_inode_operations;
1013
        inode->i_fop = &reiserfs_file_operations;
1014
        inode->i_mapping->a_ops = &reiserfs_address_space_operations ;
1015
    } else if (S_ISDIR (inode->i_mode)) {
1016
        inode->i_op = &reiserfs_dir_inode_operations;
1017
        inode->i_fop = &reiserfs_dir_operations;
1018
    } else if (S_ISLNK (inode->i_mode)) {
1019
        inode->i_op = &page_symlink_inode_operations;
1020
        inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1021
    } else {
1022
        inode->i_blocks = 0;
1023
        init_special_inode(inode, inode->i_mode, rdev) ;
1024
    }
1025
}
1026
 
1027
 
1028
// update new stat data with inode fields
1029
static void inode2sd (void * sd, struct inode * inode)
1030
{
1031
    struct stat_data * sd_v2 = (struct stat_data *)sd;
1032
    __u16 flags;
1033
 
1034
    set_sd_v2_mode(sd_v2, inode->i_mode );
1035
    set_sd_v2_nlink(sd_v2, inode->i_nlink );
1036
    set_sd_v2_uid(sd_v2, inode->i_uid );
1037
    set_sd_v2_size(sd_v2, inode->i_size );
1038
    set_sd_v2_gid(sd_v2, inode->i_gid );
1039
    set_sd_v2_mtime(sd_v2, inode->i_mtime );
1040
    set_sd_v2_atime(sd_v2, inode->i_atime );
1041
    set_sd_v2_ctime(sd_v2, inode->i_ctime );
1042
    set_sd_v2_blocks(sd_v2, inode->i_blocks );
1043
    if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1044
        set_sd_v2_rdev(sd_v2, inode->i_rdev );
1045
    else
1046
        set_sd_v2_generation(sd_v2, inode->i_generation);
1047
    flags = inode -> u.reiserfs_i.i_attrs;
1048
    i_attrs_to_sd_attrs( inode, &flags );
1049
    set_sd_v2_attrs( sd_v2, flags );
1050
}
1051
 
1052
 
1053
// used to copy inode's fields to old stat data
1054
static void inode2sd_v1 (void * sd, struct inode * inode)
1055
{
1056
    struct stat_data_v1 * sd_v1 = (struct stat_data_v1 *)sd;
1057
 
1058
    set_sd_v1_mode(sd_v1, inode->i_mode );
1059
    set_sd_v1_uid(sd_v1, inode->i_uid );
1060
    set_sd_v1_gid(sd_v1, inode->i_gid );
1061
    set_sd_v1_nlink(sd_v1, inode->i_nlink );
1062
    set_sd_v1_size(sd_v1, inode->i_size );
1063
    set_sd_v1_atime(sd_v1, inode->i_atime );
1064
    set_sd_v1_ctime(sd_v1, inode->i_ctime );
1065
    set_sd_v1_mtime(sd_v1, inode->i_mtime );
1066
 
1067
    if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1068
        set_sd_v1_rdev(sd_v1, inode->i_rdev );
1069
    else
1070
        set_sd_v1_blocks(sd_v1, inode->i_blocks );
1071
 
1072
    // Sigh. i_first_direct_byte is back
1073
    set_sd_v1_first_direct_byte(sd_v1, inode->u.reiserfs_i.i_first_direct_byte);
1074
}
1075
 
1076
 
1077
/* NOTE, you must prepare the buffer head before sending it here,
1078
** and then log it after the call
1079
*/
1080
static void update_stat_data (struct path * path, struct inode * inode)
1081
{
1082
    struct buffer_head * bh;
1083
    struct item_head * ih;
1084
 
1085
    bh = PATH_PLAST_BUFFER (path);
1086
    ih = PATH_PITEM_HEAD (path);
1087
 
1088
    if (!is_statdata_le_ih (ih))
1089
        reiserfs_panic (inode->i_sb, "vs-13065: update_stat_data: key %k, found item %h",
1090
                        INODE_PKEY (inode), ih);
1091
 
1092
    if (stat_data_v1 (ih)) {
1093
        // path points to old stat data
1094
        inode2sd_v1 (B_I_PITEM (bh, ih), inode);
1095
    } else {
1096
        inode2sd (B_I_PITEM (bh, ih), inode);
1097
    }
1098
 
1099
    return;
1100
}
1101
 
1102
 
1103
void reiserfs_update_sd (struct reiserfs_transaction_handle *th,
1104
                         struct inode * inode)
1105
{
1106
    struct cpu_key key;
1107
    INITIALIZE_PATH(path);
1108
    struct buffer_head *bh ;
1109
    int fs_gen ;
1110
    struct item_head *ih, tmp_ih ;
1111
    int retval;
1112
 
1113
    make_cpu_key (&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);//key type is unimportant
1114
 
1115
    for(;;) {
1116
        int pos;
1117
        /* look for the object's stat data */
1118
        retval = search_item (inode->i_sb, &key, &path);
1119
        if (retval == IO_ERROR) {
1120
            reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: "
1121
                              "i/o failure occurred trying to update %K stat data\n",
1122
                              &key);
1123
            return;
1124
        }
1125
        if (retval == ITEM_NOT_FOUND) {
1126
            pos = PATH_LAST_POSITION (&path);
1127
            pathrelse(&path) ;
1128
            if (inode->i_nlink == 0) {
1129
                /*printk ("vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found\n");*/
1130
                return;
1131
            }
1132
            reiserfs_warning (inode->i_sb, "vs-13060: reiserfs_update_sd: "
1133
                              "stat data of object %k (nlink == %d) not found (pos %d)\n",
1134
                              INODE_PKEY (inode), inode->i_nlink, pos);
1135
            reiserfs_check_path(&path) ;
1136
            return;
1137
        }
1138
 
1139
        /* sigh, prepare_for_journal might schedule.  When it schedules the
1140
        ** FS might change.  We have to detect that, and loop back to the
1141
        ** search if the stat data item has moved
1142
        */
1143
        bh = get_last_bh(&path) ;
1144
        ih = get_ih(&path) ;
1145
        copy_item_head (&tmp_ih, ih);
1146
        fs_gen = get_generation (inode->i_sb);
1147
        reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
1148
        if (fs_changed (fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) {
1149
            reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
1150
            continue ;  /* Stat_data item has been moved after scheduling. */
1151
        }
1152
        break;
1153
    }
1154
    update_stat_data (&path, inode);
1155
    journal_mark_dirty(th, th->t_super, bh) ;
1156
    pathrelse (&path);
1157
    return;
1158
}
1159
 
1160
/* We need to clear inode key in private part of inode to avoid races between
1161
   blocking iput, knfsd and file deletion with creating of safelinks.*/
1162
static void reiserfs_make_bad_inode(struct inode *inode) {
1163
    memset(INODE_PKEY(inode), 0, KEY_SIZE);
1164
    make_bad_inode(inode);
1165
}
1166
 
1167
void reiserfs_read_inode(struct inode *inode) {
1168
    reiserfs_make_bad_inode(inode) ;
1169
}
1170
 
1171
 
1172
/* looks for stat data in the tree, and fills up the fields of in-core
1173
   inode stat data fields */
1174
void reiserfs_read_inode2 (struct inode * inode, void *p)
1175
{
1176
    INITIALIZE_PATH (path_to_sd);
1177
    struct cpu_key key;
1178
    struct reiserfs_iget4_args *args = (struct reiserfs_iget4_args *)p ;
1179
    unsigned long dirino;
1180
    int retval;
1181
 
1182
    if (!p) {
1183
        reiserfs_make_bad_inode(inode) ;
1184
        return;
1185
    }
1186
 
1187
    dirino = args->objectid ;
1188
 
1189
    /* set version 1, version 2 could be used too, because stat data
1190
       key is the same in both versions */
1191
    key.version = KEY_FORMAT_3_5;
1192
    key.on_disk_key.k_dir_id = dirino;
1193
    key.on_disk_key.k_objectid = inode->i_ino;
1194
    key.on_disk_key.u.k_offset_v1.k_offset = SD_OFFSET;
1195
    key.on_disk_key.u.k_offset_v1.k_uniqueness = SD_UNIQUENESS;
1196
 
1197
    /* look for the object's stat data */
1198
    retval = search_item (inode->i_sb, &key, &path_to_sd);
1199
    if (retval == IO_ERROR) {
1200
        reiserfs_warning (inode->i_sb, "vs-13070: reiserfs_read_inode2: "
1201
                    "i/o failure occurred trying to find stat data of %K\n",
1202
                    &key);
1203
        reiserfs_make_bad_inode(inode) ;
1204
        return;
1205
    }
1206
    if (retval != ITEM_FOUND) {
1207
        /* a stale NFS handle can trigger this without it being an error */
1208
        pathrelse (&path_to_sd);
1209
        reiserfs_make_bad_inode(inode) ;
1210
        inode->i_nlink = 0;
1211
        return;
1212
    }
1213
 
1214
    init_inode (inode, &path_to_sd);
1215
 
1216
    /* It is possible that knfsd is trying to access inode of a file
1217
       that is being removed from the disk by some other thread. As we
1218
       update sd on unlink all that is required is to check for nlink
1219
       here. This bug was first found by Sizif when debugging
1220
       SquidNG/Butterfly, forgotten, and found again after Philippe
1221
       Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
1222
 
1223
       More logical fix would require changes in fs/inode.c:iput() to
1224
       remove inode from hash-table _after_ fs cleaned disk stuff up and
1225
       in iget() to return NULL if I_FREEING inode is found in
1226
       hash-table. */
1227
    /* Currently there is one place where it's ok to meet inode with
1228
       nlink==0: processing of open-unlinked and half-truncated files
1229
       during mount (fs/reiserfs/super.c:finish_unfinished()). */
1230
    if( ( inode -> i_nlink == 0 ) &&
1231
        ! inode -> i_sb -> u.reiserfs_sb.s_is_unlinked_ok ) {
1232
            reiserfs_warning( inode->i_sb, "vs-13075: reiserfs_read_inode2: "
1233
                              "dead inode read from disk %K. "
1234
                              "This is likely to be race with knfsd. Ignore\n",
1235
                              &key );
1236
            reiserfs_make_bad_inode( inode );
1237
    }
1238
 
1239
    reiserfs_check_path(&path_to_sd) ; /* init inode should be relsing */
1240
 
1241
}
1242
 
1243
/**
1244
 * reiserfs_find_actor() - "find actor" reiserfs supplies to iget4().
1245
 *
1246
 * @inode:    inode from hash table to check
1247
 * @inode_no: inode number we are looking for
1248
 * @opaque:   "cookie" passed to iget4(). This is &reiserfs_iget4_args.
1249
 *
1250
 * This function is called by iget4() to distinguish reiserfs inodes
1251
 * having the same inode numbers. Such inodes can only exist due to some
1252
 * error condition. One of them should be bad. Inodes with identical
1253
 * inode numbers (objectids) are distinguished by parent directory ids.
1254
 *
1255
 */
1256
static int reiserfs_find_actor( struct inode *inode,
1257
                                unsigned long inode_no, void *opaque )
1258
{
1259
    struct reiserfs_iget4_args *args;
1260
    int retval;
1261
 
1262
    args = opaque;
1263
    /* We protect against possible parallel init_inode() on another CPU here. */
1264
    spin_lock(&keycopy_lock);
1265
    /* args is already in CPU order */
1266
    if (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args -> objectid)
1267
        retval = 1;
1268
    else
1269
        /* If The key does not match, lets see if we are racing
1270
           with another iget4, that already progressed so far
1271
           to reiserfs_read_inode2() and was preempted in
1272
           call to search_by_key(). The signs of that are:
1273
             Inode is locked
1274
             dirid and object id are zero (not yet initialized)*/
1275
        retval = (inode->i_state & I_LOCK) &&
1276
                 !INODE_PKEY(inode)->k_dir_id &&
1277
                 !INODE_PKEY(inode)->k_objectid;
1278
 
1279
    spin_unlock(&keycopy_lock);
1280
    return retval;
1281
}
1282
 
1283
struct inode * reiserfs_iget (struct super_block * s, const struct cpu_key * key)
1284
{
1285
    struct inode * inode;
1286
    struct reiserfs_iget4_args args ;
1287
 
1288
    args.objectid = key->on_disk_key.k_dir_id ;
1289
    inode = iget4 (s, key->on_disk_key.k_objectid,
1290
                   reiserfs_find_actor, (void *)(&args));
1291
    if (!inode)
1292
        return ERR_PTR(-ENOMEM) ;
1293
 
1294
    if (comp_short_keys (INODE_PKEY (inode), key) || is_bad_inode (inode)) {
1295
        /* either due to i/o error or a stale NFS handle */
1296
        iput (inode);
1297
        inode = 0;
1298
    }
1299
    return inode;
1300
}
1301
 
1302
struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, __u32 *data,
1303
                                     int len, int fhtype, int parent) {
1304
    struct cpu_key key ;
1305
    struct inode *inode = NULL ;
1306
    struct list_head *lp;
1307
    struct dentry *result;
1308
 
1309
    /* fhtype happens to reflect the number of u32s encoded.
1310
     * due to a bug in earlier code, fhtype might indicate there
1311
     * are more u32s then actually fitted.
1312
     * so if fhtype seems to be more than len, reduce fhtype.
1313
     * Valid types are:
1314
     *   2 - objectid + dir_id - legacy support
1315
     *   3 - objectid + dir_id + generation
1316
     *   4 - objectid + dir_id + objectid and dirid of parent - legacy
1317
     *   5 - objectid + dir_id + generation + objectid and dirid of parent
1318
     *   6 - as above plus generation of directory
1319
     * 6 does not fit in NFSv2 handles
1320
     */
1321
    if (fhtype > len) {
1322
            if (fhtype != 6 || len != 5)
1323
                    reiserfs_warning(sb, "nfsd/reiserfs, fhtype=%d, len=%d - odd\n",
1324
                           fhtype, len);
1325
            fhtype = 5;
1326
    }
1327
    if (fhtype < 2 || (parent && fhtype < 4))
1328
        goto out ;
1329
 
1330
    if (! parent) {
1331
            /* this works for handles from old kernels because the default
1332
            ** reiserfs generation number is the packing locality.
1333
            */
1334
            key.on_disk_key.k_objectid = data[0] ;
1335
            key.on_disk_key.k_dir_id = data[1] ;
1336
            inode = reiserfs_iget(sb, &key) ;
1337
            if (inode && !IS_ERR(inode) && (fhtype == 3 || fhtype >= 5) &&
1338
                data[2] != inode->i_generation) {
1339
                    iput(inode) ;
1340
                    inode = NULL ;
1341
            }
1342
    } else {
1343
            key.on_disk_key.k_objectid = data[fhtype>=5?3:2] ;
1344
            key.on_disk_key.k_dir_id = data[fhtype>=5?4:3] ;
1345
            inode = reiserfs_iget(sb, &key) ;
1346
            if (inode && !IS_ERR(inode) && fhtype == 6 &&
1347
                data[5] != inode->i_generation) {
1348
                    iput(inode) ;
1349
                    inode = NULL ;
1350
            }
1351
    }
1352
out:
1353
    if (IS_ERR(inode))
1354
        return ERR_PTR(PTR_ERR(inode));
1355
    if (!inode)
1356
        return ERR_PTR(-ESTALE) ;
1357
 
1358
    /* now to find a dentry.
1359
     * If possible, get a well-connected one
1360
     */
1361
    spin_lock(&dcache_lock);
1362
    for (lp = inode->i_dentry.next; lp != &inode->i_dentry ; lp=lp->next) {
1363
            result = list_entry(lp,struct dentry, d_alias);
1364
            if (! (result->d_flags & DCACHE_NFSD_DISCONNECTED)) {
1365
                    dget_locked(result);
1366
                    result->d_vfs_flags |= DCACHE_REFERENCED;
1367
                    spin_unlock(&dcache_lock);
1368
                    iput(inode);
1369
                    return result;
1370
            }
1371
    }
1372
    spin_unlock(&dcache_lock);
1373
    result = d_alloc_root(inode);
1374
    if (result == NULL) {
1375
            iput(inode);
1376
            return ERR_PTR(-ENOMEM);
1377
    }
1378
    result->d_flags |= DCACHE_NFSD_DISCONNECTED;
1379
    return result;
1380
 
1381
}
1382
 
1383
int reiserfs_dentry_to_fh(struct dentry *dentry, __u32 *data, int *lenp, int need_parent) {
1384
    struct inode *inode = dentry->d_inode ;
1385
    int maxlen = *lenp;
1386
 
1387
    if (maxlen < 3)
1388
        return 255 ;
1389
 
1390
    data[0] = inode->i_ino ;
1391
    data[1] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ;
1392
    data[2] = inode->i_generation ;
1393
    *lenp = 3 ;
1394
    /* no room for directory info? return what we've stored so far */
1395
    if (maxlen < 5 || ! need_parent)
1396
        return 3 ;
1397
 
1398
    inode = dentry->d_parent->d_inode ;
1399
    data[3] = inode->i_ino ;
1400
    data[4] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ;
1401
    *lenp = 5 ;
1402
    if (maxlen < 6)
1403
            return 5 ;
1404
    data[5] = inode->i_generation ;
1405
    *lenp = 6 ;
1406
    return 6 ;
1407
}
1408
 
1409
 
1410
/* looks for stat data, then copies fields to it, marks the buffer
1411
   containing stat data as dirty */
1412
/* reiserfs inodes are never really dirty, since the dirty inode call
1413
** always logs them.  This call allows the VFS inode marking routines
1414
** to properly mark inodes for datasync and such, but only actually
1415
** does something when called for a synchronous update.
1416
*/
1417
void reiserfs_write_inode (struct inode * inode, int do_sync) {
1418
    struct reiserfs_transaction_handle th ;
1419
    int jbegin_count = 1 ;
1420
 
1421
    if (inode->i_sb->s_flags & MS_RDONLY) {
1422
        reiserfs_warning(inode->i_sb, "clm-6005: writing inode %lu on readonly FS\n",
1423
                          inode->i_ino) ;
1424
        return ;
1425
    }
1426
    /* memory pressure can sometimes initiate write_inode calls with sync == 1,
1427
    ** these cases are just when the system needs ram, not when the
1428
    ** inode needs to reach disk for safety, and they can safely be
1429
    ** ignored because the altered inode has already been logged.
1430
    */
1431
    if (do_sync && !(current->flags & PF_MEMALLOC)) {
1432
        lock_kernel() ;
1433
        journal_begin(&th, inode->i_sb, jbegin_count) ;
1434
        reiserfs_update_sd (&th, inode);
1435
        journal_end_sync(&th, inode->i_sb, jbegin_count) ;
1436
        unlock_kernel() ;
1437
    }
1438
}
1439
 
1440
/* FIXME: no need any more. right? */
1441
int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode)
1442
{
1443
  int err = 0;
1444
 
1445
  reiserfs_update_sd (th, inode);
1446
  return err;
1447
}
1448
 
1449
 
1450
/* stat data of new object is inserted already, this inserts the item
1451
   containing "." and ".." entries */
1452
static int reiserfs_new_directory (struct reiserfs_transaction_handle *th,
1453
                                   struct item_head * ih, struct path * path,
1454
                                   const struct inode * dir)
1455
{
1456
    struct super_block * sb = th->t_super;
1457
    char empty_dir [EMPTY_DIR_SIZE];
1458
    char * body = empty_dir;
1459
    struct cpu_key key;
1460
    int retval;
1461
 
1462
    _make_cpu_key (&key, KEY_FORMAT_3_5, le32_to_cpu (ih->ih_key.k_dir_id),
1463
                   le32_to_cpu (ih->ih_key.k_objectid), DOT_OFFSET, TYPE_DIRENTRY, 3/*key length*/);
1464
 
1465
    /* compose item head for new item. Directories consist of items of
1466
       old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
1467
       is done by reiserfs_new_inode */
1468
    if (old_format_only (sb)) {
1469
        make_le_item_head (ih, 0, KEY_FORMAT_3_5, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
1470
 
1471
        make_empty_dir_item_v1 (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid,
1472
                                INODE_PKEY (dir)->k_dir_id,
1473
                                INODE_PKEY (dir)->k_objectid );
1474
    } else {
1475
        make_le_item_head (ih, 0, KEY_FORMAT_3_5, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2);
1476
 
1477
        make_empty_dir_item (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid,
1478
                                INODE_PKEY (dir)->k_dir_id,
1479
                                INODE_PKEY (dir)->k_objectid );
1480
    }
1481
 
1482
    /* look for place in the tree for new item */
1483
    retval = search_item (sb, &key, path);
1484
    if (retval == IO_ERROR) {
1485
        reiserfs_warning (sb, "vs-13080: reiserfs_new_directory: "
1486
                          "i/o failure occurred creating new directory\n");
1487
        return -EIO;
1488
    }
1489
    if (retval == ITEM_FOUND) {
1490
        pathrelse (path);
1491
        reiserfs_warning (sb, "vs-13070: reiserfs_new_directory: "
1492
                          "object with this key exists (%k)\n", &(ih->ih_key));
1493
        return -EEXIST;
1494
    }
1495
 
1496
    /* insert item, that is empty directory item */
1497
    return reiserfs_insert_item (th, path, &key, ih, body);
1498
}
1499
 
1500
 
1501
/* stat data of object has been inserted, this inserts the item
1502
   containing the body of symlink */
1503
static int reiserfs_new_symlink (struct reiserfs_transaction_handle *th,
1504
                                 struct item_head * ih,
1505
                                 struct path * path, const char * symname, int item_len)
1506
{
1507
    struct super_block * sb = th->t_super;
1508
    struct cpu_key key;
1509
    int retval;
1510
 
1511
    _make_cpu_key (&key, KEY_FORMAT_3_5,
1512
                   le32_to_cpu (ih->ih_key.k_dir_id),
1513
                   le32_to_cpu (ih->ih_key.k_objectid),
1514
                   1, TYPE_DIRECT, 3/*key length*/);
1515
 
1516
    make_le_item_head (ih, 0, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len, 0/*free_space*/);
1517
 
1518
    /* look for place in the tree for new item */
1519
    retval = search_item (sb, &key, path);
1520
    if (retval == IO_ERROR) {
1521
        reiserfs_warning (sb, "vs-13080: reiserfs_new_symlinik: "
1522
                          "i/o failure occurred creating new symlink\n");
1523
        return -EIO;
1524
    }
1525
    if (retval == ITEM_FOUND) {
1526
        pathrelse (path);
1527
        reiserfs_warning (sb, "vs-13080: reiserfs_new_symlink: "
1528
                          "object with this key exists (%k)\n", &(ih->ih_key));
1529
        return -EEXIST;
1530
    }
1531
 
1532
    /* insert item, that is body of symlink */
1533
    return reiserfs_insert_item (th, path, &key, ih, symname);
1534
}
1535
 
1536
 
1537
/* inserts the stat data into the tree, and then calls
1538
   reiserfs_new_directory (to insert ".", ".." item if new object is
1539
   directory) or reiserfs_new_symlink (to insert symlink body if new
1540
   object is symlink) or nothing (if new object is regular file)
1541
 
1542
   NOTE! uid and gid must already be set in the inode.  If we return
1543
   non-zero due to an error, we have to drop the quota previously allocated
1544
   for the fresh inode.  This can only be done outside a transaction, so
1545
   if we return non-zero, we also end the transaction.
1546
 
1547
   */
1548
int reiserfs_new_inode (struct reiserfs_transaction_handle *th,
1549
                                struct inode * dir, int mode,
1550
                                const char * symname,
1551
                                /* 0 for regular, EMTRY_DIR_SIZE for dirs,
1552
                                   strlen (symname) for symlinks) */
1553
                                int i_size,
1554
                                struct dentry *dentry,
1555
                                struct inode *inode)
1556
{
1557
    struct super_block * sb;
1558
    INITIALIZE_PATH (path_to_key);
1559
    struct cpu_key key;
1560
    struct item_head ih;
1561
    struct stat_data sd;
1562
    int retval;
1563
    int err ;
1564
 
1565
    if (!dir || !dir->i_nlink) {
1566
        err = -EPERM ;
1567
        goto out_bad_inode ;
1568
    }
1569
 
1570
    sb = dir->i_sb;
1571
    inode -> u.reiserfs_i.i_attrs =
1572
            dir -> u.reiserfs_i.i_attrs & REISERFS_INHERIT_MASK;
1573
    sd_attrs_to_i_attrs( inode -> u.reiserfs_i.i_attrs, inode );
1574
 
1575
    /* symlink cannot be immutable or append only, right? */
1576
    if( S_ISLNK( inode -> i_mode ) )
1577
            inode -> i_flags &= ~ ( S_IMMUTABLE | S_APPEND );
1578
 
1579
    /* item head of new item */
1580
    ih.ih_key.k_dir_id = INODE_PKEY (dir)->k_objectid;
1581
    ih.ih_key.k_objectid = cpu_to_le32 (reiserfs_get_unused_objectid (th));
1582
    if (!ih.ih_key.k_objectid) {
1583
        err = -ENOMEM ;
1584
        goto out_bad_inode ;
1585
    }
1586
    if (old_format_only (sb))
1587
      /* not a perfect generation count, as object ids can be reused, but this
1588
      ** is as good as reiserfs can do right now.
1589
      ** note that the private part of inode isn't filled in yet, we have
1590
      ** to use the directory.
1591
      */
1592
      inode->i_generation = le32_to_cpu (INODE_PKEY (dir)->k_objectid);
1593
    else
1594
#if defined( USE_INODE_GENERATION_COUNTER )
1595
      inode->i_generation =
1596
        le32_to_cpu( sb -> u.reiserfs_sb.s_rs -> s_inode_generation );
1597
#else
1598
      inode->i_generation = ++event;
1599
#endif
1600
    /* fill stat data */
1601
    inode->i_nlink = (S_ISDIR (mode) ? 2 : 1);
1602
 
1603
    /* uid and gid must already be set by the caller for quota init */
1604
 
1605
    inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1606
    inode->i_size = i_size;
1607
    inode->i_blocks = (inode->i_size + 511) >> 9;
1608
    inode->u.reiserfs_i.i_first_direct_byte = S_ISLNK(mode) ? 1 :
1609
      U32_MAX/*NO_BYTES_IN_DIRECT_ITEM*/;
1610
 
1611
    INIT_LIST_HEAD(&inode->u.reiserfs_i.i_prealloc_list) ;
1612
 
1613
    if (old_format_only (sb))
1614
        make_le_item_head (&ih, 0, KEY_FORMAT_3_5, SD_OFFSET, TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
1615
    else
1616
        make_le_item_head (&ih, 0, KEY_FORMAT_3_6, SD_OFFSET, TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
1617
 
1618
    /* key to search for correct place for new stat data */
1619
    _make_cpu_key (&key, KEY_FORMAT_3_6, le32_to_cpu (ih.ih_key.k_dir_id),
1620
                   le32_to_cpu (ih.ih_key.k_objectid), SD_OFFSET, TYPE_STAT_DATA, 3/*key length*/);
1621
 
1622
    /* find proper place for inserting of stat data */
1623
    retval = search_item (sb, &key, &path_to_key);
1624
    if (retval == IO_ERROR) {
1625
        err = -EIO;
1626
        goto out_bad_inode;
1627
    }
1628
    if (retval == ITEM_FOUND) {
1629
        pathrelse (&path_to_key);
1630
        err = -EEXIST;
1631
        goto out_bad_inode;
1632
    }
1633
 
1634
    if (old_format_only (sb)) {
1635
        if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) {
1636
            pathrelse (&path_to_key);
1637
            /* i_uid or i_gid is too big to be stored in stat data v3.5 */
1638
            err = -EINVAL;
1639
            goto out_bad_inode;
1640
        }
1641
        inode2sd_v1 (&sd, inode);
1642
    } else
1643
        inode2sd (&sd, inode);
1644
 
1645
    // these do not go to on-disk stat data
1646
    inode->i_ino = le32_to_cpu (ih.ih_key.k_objectid);
1647
    inode->i_blksize = PAGE_SIZE;
1648
    inode->i_dev = sb->s_dev;
1649
 
1650
    // store in in-core inode the key of stat data and version all
1651
    // object items will have (directory items will have old offset
1652
    // format, other new objects will consist of new items)
1653
    memcpy (INODE_PKEY (inode), &(ih.ih_key), KEY_SIZE);
1654
    if (old_format_only (sb) || S_ISDIR(mode) || S_ISLNK(mode))
1655
        set_inode_item_key_version (inode, KEY_FORMAT_3_5);
1656
    else
1657
        set_inode_item_key_version (inode, KEY_FORMAT_3_6);
1658
    if (old_format_only (sb))
1659
        set_inode_sd_version (inode, STAT_DATA_V1);
1660
    else
1661
        set_inode_sd_version (inode, STAT_DATA_V2);
1662
 
1663
    /* insert the stat data into the tree */
1664
#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1665
    if (dir->u.reiserfs_i.new_packing_locality)
1666
        th->displace_new_blocks = 1;
1667
#endif
1668
    retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, (char *)(&sd));
1669
    if (retval) {
1670
        reiserfs_check_path(&path_to_key) ;
1671
        err = retval;
1672
        goto out_bad_inode;
1673
    }
1674
 
1675
#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1676
    if (!th->displace_new_blocks)
1677
        dir->u.reiserfs_i.new_packing_locality = 0;
1678
#endif
1679
    if (S_ISDIR(mode)) {
1680
        /* insert item with "." and ".." */
1681
        retval = reiserfs_new_directory (th, &ih, &path_to_key, dir);
1682
    }
1683
 
1684
    if (S_ISLNK(mode)) {
1685
        /* insert body of symlink */
1686
        if (!old_format_only (sb))
1687
            i_size = ROUND_UP(i_size);
1688
        retval = reiserfs_new_symlink (th, &ih, &path_to_key, symname, i_size);
1689
    }
1690
    if (retval) {
1691
        err = retval;
1692
        reiserfs_check_path(&path_to_key) ;
1693
        journal_end(th, th->t_super, th->t_blocks_allocated) ;
1694
        goto out_inserted_sd;
1695
    }
1696
 
1697
    insert_inode_hash (inode);
1698
    reiserfs_update_sd(th, inode) ;
1699
    reiserfs_check_path(&path_to_key) ;
1700
 
1701
    return 0;
1702
out_bad_inode:
1703
    /* Invalidate the object, nothing was inserted yet */
1704
    INODE_PKEY(inode)->k_objectid = 0;
1705
 
1706
    /* dquot_drop must be done outside a transaction */
1707
    journal_end(th, th->t_super, th->t_blocks_allocated) ;
1708
    make_bad_inode(inode);
1709
 
1710
out_inserted_sd:
1711
    inode->i_nlink = 0;
1712
    th->t_trans_id = 0 ; /* so the caller can't use this handle later */
1713
    iput(inode) ;
1714
    return err;
1715
}
1716
 
1717
/*
1718
** finds the tail page in the page cache,
1719
** reads the last block in.
1720
**
1721
** On success, page_result is set to a locked, pinned page, and bh_result
1722
** is set to an up to date buffer for the last block in the file.  returns 0.
1723
**
1724
** tail conversion is not done, so bh_result might not be valid for writing
1725
** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before
1726
** trying to write the block.
1727
**
1728
** on failure, nonzero is returned, page_result and bh_result are untouched.
1729
*/
1730
static int grab_tail_page(struct inode *p_s_inode,
1731
                          struct page **page_result,
1732
                          struct buffer_head **bh_result) {
1733
 
1734
    /* we want the page with the last byte in the file,
1735
    ** not the page that will hold the next byte for appending
1736
    */
1737
    unsigned long index = (p_s_inode->i_size-1) >> PAGE_CACHE_SHIFT ;
1738
    unsigned long pos = 0 ;
1739
    unsigned long start = 0 ;
1740
    unsigned long blocksize = p_s_inode->i_sb->s_blocksize ;
1741
    unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1) ;
1742
    struct buffer_head *bh ;
1743
    struct buffer_head *head ;
1744
    struct page * page ;
1745
    int error ;
1746
 
1747
    /* we know that we are only called with inode->i_size > 0.
1748
    ** we also know that a file tail can never be as big as a block
1749
    ** If i_size % blocksize == 0, our file is currently block aligned
1750
    ** and it won't need converting or zeroing after a truncate.
1751
    */
1752
    if ((offset & (blocksize - 1)) == 0) {
1753
        return -ENOENT ;
1754
    }
1755
    page = grab_cache_page(p_s_inode->i_mapping, index) ;
1756
    error = -ENOMEM ;
1757
    if (!page) {
1758
        goto out ;
1759
    }
1760
    /* start within the page of the last block in the file */
1761
    start = (offset / blocksize) * blocksize ;
1762
 
1763
    error = block_prepare_write(page, start, offset,
1764
                                reiserfs_get_block_create_0) ;
1765
    if (error)
1766
        goto unlock ;
1767
 
1768
    kunmap(page) ; /* mapped by block_prepare_write */
1769
 
1770
    head = page->buffers ;
1771
    bh = head;
1772
    do {
1773
        if (pos >= start) {
1774
            break ;
1775
        }
1776
        bh = bh->b_this_page ;
1777
        pos += blocksize ;
1778
    } while(bh != head) ;
1779
 
1780
    if (!buffer_uptodate(bh)) {
1781
        /* note, this should never happen, prepare_write should
1782
        ** be taking care of this for us.  If the buffer isn't up to date,
1783
        ** I've screwed up the code to find the buffer, or the code to
1784
        ** call prepare_write
1785
        */
1786
        reiserfs_warning(p_s_inode->i_sb, "clm-6000: error reading block %lu\n",
1787
                          bh->b_blocknr) ;
1788
        error = -EIO ;
1789
        goto unlock ;
1790
    }
1791
    *bh_result = bh ;
1792
    *page_result = page ;
1793
 
1794
out:
1795
    return error ;
1796
 
1797
unlock:
1798
    UnlockPage(page) ;
1799
    page_cache_release(page) ;
1800
    return error ;
1801
}
1802
 
1803
/*
1804
** vfs version of truncate file.  Must NOT be called with
1805
** a transaction already started.
1806
**
1807
** some code taken from block_truncate_page
1808
*/
1809
void reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) {
1810
    struct reiserfs_transaction_handle th ;
1811
    int windex ;
1812
 
1813
    /* we want the offset for the first byte after the end of the file */
1814
    unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1) ;
1815
    unsigned blocksize = p_s_inode->i_sb->s_blocksize ;
1816
    unsigned length ;
1817
    struct page *page = NULL ;
1818
    int error ;
1819
    struct buffer_head *bh = NULL ;
1820
 
1821
    if (p_s_inode->i_size > 0) {
1822
        if ((error = grab_tail_page(p_s_inode, &page, &bh))) {
1823
            // -ENOENT means we truncated past the end of the file, 
1824
            // and get_block_create_0 could not find a block to read in,
1825
            // which is ok.
1826
            if (error != -ENOENT)
1827
                reiserfs_warning(p_s_inode->i_sb, "clm-6001: grab_tail_page failed %d\n", error);
1828
            page = NULL ;
1829
            bh = NULL ;
1830
        }
1831
    }
1832
 
1833
    /* so, if page != NULL, we have a buffer head for the offset at
1834
    ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
1835
    ** then we have an unformatted node.  Otherwise, we have a direct item,
1836
    ** and no zeroing is required on disk.  We zero after the truncate,
1837
    ** because the truncate might pack the item anyway
1838
    ** (it will unmap bh if it packs).
1839
    */
1840
    /* it is enough to reserve space in transaction for 2 balancings:
1841
       one for "save" link adding and another for the first
1842
       cut_from_item. 1 is for update_sd */
1843
    journal_begin(&th, p_s_inode->i_sb,  JOURNAL_PER_BALANCE_CNT * 2 + 1 ) ;
1844
    reiserfs_update_inode_transaction(p_s_inode) ;
1845
    windex = push_journal_writer("reiserfs_vfs_truncate_file") ;
1846
    if (update_timestamps)
1847
            /* we are doing real truncate: if the system crashes before the last
1848
               transaction of truncating gets committed - on reboot the file
1849
               either appears truncated properly or not truncated at all */
1850
        add_save_link (&th, p_s_inode, 1);
1851
    reiserfs_do_truncate (&th, p_s_inode, page, update_timestamps) ;
1852
    pop_journal_writer(windex) ;
1853
    journal_end(&th, p_s_inode->i_sb,  JOURNAL_PER_BALANCE_CNT * 2 + 1 ) ;
1854
 
1855
    if (update_timestamps)
1856
        remove_save_link (p_s_inode, 1/* truncate */);
1857
 
1858
    if (page) {
1859
        length = offset & (blocksize - 1) ;
1860
        /* if we are not on a block boundary */
1861
        if (length) {
1862
            length = blocksize - length ;
1863
            memset((char *)kmap(page) + offset, 0, length) ;
1864
            flush_dcache_page(page) ;
1865
            kunmap(page) ;
1866
            if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1867
                if (!atomic_set_buffer_dirty(bh)) {
1868
                        set_buffer_flushtime(bh);
1869
                        refile_buffer(bh);
1870
                        buffer_insert_inode_data_queue(bh, p_s_inode);
1871
                        balance_dirty();
1872
                }
1873
            }
1874
        }
1875
        UnlockPage(page) ;
1876
        page_cache_release(page) ;
1877
    }
1878
 
1879
    return ;
1880
}
1881
 
1882
static int map_block_for_writepage(struct inode *inode,
1883
                               struct buffer_head *bh_result,
1884
                               unsigned long block) {
1885
    struct reiserfs_transaction_handle th ;
1886
    int fs_gen ;
1887
    struct item_head tmp_ih ;
1888
    struct item_head *ih ;
1889
    struct buffer_head *bh ;
1890
    __u32 *item ;
1891
    struct cpu_key key ;
1892
    INITIALIZE_PATH(path) ;
1893
    int pos_in_item ;
1894
    int jbegin_count = JOURNAL_PER_BALANCE_CNT ;
1895
    loff_t byte_offset = (block << inode->i_sb->s_blocksize_bits) + 1 ;
1896
    int retval ;
1897
    int use_get_block = 0 ;
1898
    int bytes_copied = 0 ;
1899
    int copy_size ;
1900
 
1901
    kmap(bh_result->b_page) ;
1902
start_over:
1903
    lock_kernel() ;
1904
    journal_begin(&th, inode->i_sb, jbegin_count) ;
1905
    reiserfs_update_inode_transaction(inode) ;
1906
 
1907
    make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3) ;
1908
 
1909
research:
1910
    retval = search_for_position_by_key(inode->i_sb, &key, &path) ;
1911
    if (retval != POSITION_FOUND) {
1912
        use_get_block = 1;
1913
        goto out ;
1914
    }
1915
 
1916
    bh = get_last_bh(&path) ;
1917
    ih = get_ih(&path) ;
1918
    item = get_item(&path) ;
1919
    pos_in_item = path.pos_in_item ;
1920
 
1921
    /* we've found an unformatted node */
1922
    if (indirect_item_found(retval, ih)) {
1923
        if (bytes_copied > 0) {
1924
            reiserfs_warning(inode->i_sb, "clm-6002: bytes_copied %d\n", bytes_copied) ;
1925
        }
1926
        if (!get_block_num(item, pos_in_item)) {
1927
            /* crap, we are writing to a hole */
1928
            use_get_block = 1;
1929
            goto out ;
1930
        }
1931
        set_block_dev_mapped(bh_result, get_block_num(item,pos_in_item),inode);
1932
        mark_buffer_uptodate(bh_result, 1);
1933
    } else if (is_direct_le_ih(ih)) {
1934
        char *p ;
1935
        p = page_address(bh_result->b_page) ;
1936
        p += (byte_offset -1) & (PAGE_CACHE_SIZE - 1) ;
1937
        copy_size = ih_item_len(ih) - pos_in_item;
1938
 
1939
        fs_gen = get_generation(inode->i_sb) ;
1940
        copy_item_head(&tmp_ih, ih) ;
1941
        reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
1942
        if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
1943
            reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
1944
            goto research;
1945
        }
1946
 
1947
        memcpy( B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, copy_size) ;
1948
 
1949
        journal_mark_dirty(&th, inode->i_sb, bh) ;
1950
        bytes_copied += copy_size ;
1951
        set_block_dev_mapped(bh_result, 0, inode);
1952
        mark_buffer_uptodate(bh_result, 1);
1953
 
1954
        /* are there still bytes left? */
1955
        if (bytes_copied < bh_result->b_size &&
1956
            (byte_offset + bytes_copied) < inode->i_size) {
1957
            set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + copy_size) ;
1958
            goto research ;
1959
        }
1960
    } else {
1961
        reiserfs_warning(inode->i_sb, "clm-6003: bad item inode %lu\n", inode->i_ino) ;
1962
        retval = -EIO ;
1963
        goto out ;
1964
    }
1965
    retval = 0 ;
1966
 
1967
out:
1968
    pathrelse(&path) ;
1969
    journal_end(&th, inode->i_sb, jbegin_count) ;
1970
    unlock_kernel() ;
1971
 
1972
    /* this is where we fill in holes in the file. */
1973
    if (use_get_block) {
1974
        retval = reiserfs_get_block(inode, block, bh_result,
1975
                                    GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM) ;
1976
        if (!retval) {
1977
            if (!buffer_mapped(bh_result) || bh_result->b_blocknr == 0) {
1978
                /* get_block failed to find a mapped unformatted node. */
1979
                use_get_block = 0 ;
1980
                goto start_over ;
1981
            }
1982
        }
1983
    }
1984
    kunmap(bh_result->b_page) ;
1985
    return retval ;
1986
}
1987
 
1988
/* helper func to get a buffer head ready for writepage to send to
1989
** ll_rw_block
1990
*/
1991
static inline void submit_bh_for_writepage(struct buffer_head **bhp, int nr) {
1992
    struct buffer_head *bh ;
1993
    int i;
1994
 
1995
    /* lock them all first so the end_io handler doesn't unlock the page
1996
    ** too early
1997
    */
1998
    for(i = 0 ; i < nr ; i++) {
1999
        bh = bhp[i] ;
2000
        lock_buffer(bh) ;
2001
        set_buffer_async_io(bh) ;
2002
    }
2003
    for(i = 0 ; i < nr ; i++) {
2004
        /* submit_bh doesn't care if the buffer is dirty, but nobody
2005
        ** later on in the call chain will be cleaning it.  So, we
2006
        ** clean the buffer here, it still gets written either way.
2007
        */
2008
        bh = bhp[i] ;
2009
        clear_bit(BH_Dirty, &bh->b_state) ;
2010
        set_bit(BH_Uptodate, &bh->b_state) ;
2011
        submit_bh(WRITE, bh) ;
2012
    }
2013
}
2014
 
2015
static int reiserfs_write_full_page(struct page *page) {
2016
    struct inode *inode = page->mapping->host ;
2017
    unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ;
2018
    unsigned last_offset = PAGE_CACHE_SIZE;
2019
    int error = 0;
2020
    unsigned long block ;
2021
    unsigned cur_offset = 0 ;
2022
    struct buffer_head *head, *bh ;
2023
    int partial = 0 ;
2024
    struct buffer_head *arr[PAGE_CACHE_SIZE/512] ;
2025
    int nr = 0 ;
2026
 
2027
    if (!page->buffers) {
2028
        block_prepare_write(page, 0, 0, NULL) ;
2029
        kunmap(page) ;
2030
    }
2031
    /* last page in the file, zero out any contents past the
2032
    ** last byte in the file
2033
    */
2034
    if (page->index >= end_index) {
2035
        last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ;
2036
        /* no file contents in this page */
2037
        if (page->index >= end_index + 1 || !last_offset) {
2038
            error =  -EIO ;
2039
            goto fail ;
2040
        }
2041
        memset((char *)kmap(page)+last_offset, 0, PAGE_CACHE_SIZE-last_offset) ;
2042
        flush_dcache_page(page) ;
2043
        kunmap(page) ;
2044
    }
2045
    head = page->buffers ;
2046
    bh = head ;
2047
    block = page->index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits) ;
2048
    do {
2049
        /* if this offset in the page is outside the file */
2050
        if (cur_offset >= last_offset) {
2051
            if (!buffer_uptodate(bh))
2052
                partial = 1 ;
2053
        } else {
2054
            /* fast path, buffer mapped to an unformatted node */
2055
            if (buffer_mapped(bh) && bh->b_blocknr != 0) {
2056
                arr[nr++] = bh ;
2057
            } else {
2058
                /* buffer not mapped yet, or points to a direct item.
2059
                ** search and dirty or log
2060
                */
2061
                if ((error = map_block_for_writepage(inode, bh, block))) {
2062
                    goto fail ;
2063
                }
2064
                /* map_block_for_writepage either found an unformatted node
2065
                ** and mapped it for us, or it found a direct item
2066
                ** and logged the changes.
2067
                */
2068
                if (buffer_mapped(bh) && bh->b_blocknr != 0) {
2069
                    arr[nr++] = bh ;
2070
                }
2071
            }
2072
        }
2073
        bh = bh->b_this_page ;
2074
        cur_offset += bh->b_size ;
2075
        block++ ;
2076
    } while(bh != head) ;
2077
 
2078
    /* if this page only had a direct item, it is very possible for
2079
    ** nr == 0 without there being any kind of error.
2080
    */
2081
    if (nr) {
2082
        submit_bh_for_writepage(arr, nr) ;
2083
        wakeup_page_waiters(page);
2084
    } else {
2085
        UnlockPage(page) ;
2086
    }
2087
    if (!partial)
2088
        SetPageUptodate(page) ;
2089
 
2090
    return 0 ;
2091
 
2092
fail:
2093
    if (nr) {
2094
        submit_bh_for_writepage(arr, nr) ;
2095
    } else {
2096
        UnlockPage(page) ;
2097
    }
2098
    ClearPageUptodate(page) ;
2099
    return error ;
2100
}
2101
 
2102
 
2103
static int reiserfs_readpage (struct file *f, struct page * page)
2104
{
2105
    return block_read_full_page (page, reiserfs_get_block);
2106
}
2107
 
2108
 
2109
static int reiserfs_writepage (struct page * page)
2110
{
2111
    struct inode *inode = page->mapping->host ;
2112
    reiserfs_wait_on_write_block(inode->i_sb) ;
2113
    return reiserfs_write_full_page(page) ;
2114
}
2115
 
2116
 
2117
int reiserfs_prepare_write(struct file *f, struct page *page,
2118
                           unsigned from, unsigned to) {
2119
    struct inode *inode = page->mapping->host ;
2120
    reiserfs_wait_on_write_block(inode->i_sb) ;
2121
    fix_tail_page_for_writing(page) ;
2122
    return block_prepare_write(page, from, to, reiserfs_get_block) ;
2123
}
2124
 
2125
 
2126
static int reiserfs_aop_bmap(struct address_space *as, long block) {
2127
  return generic_block_bmap(as, block, reiserfs_bmap) ;
2128
}
2129
 
2130
static int reiserfs_commit_write(struct file *f, struct page *page,
2131
                                 unsigned from, unsigned to) {
2132
    struct inode *inode = page->mapping->host ;
2133
    loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2134
    int ret ;
2135
 
2136
    reiserfs_wait_on_write_block(inode->i_sb) ;
2137
 
2138
    /* generic_commit_write does this for us, but does not update the
2139
    ** transaction tracking stuff when the size changes.  So, we have
2140
    ** to do the i_size updates here.
2141
    */
2142
    if (pos > inode->i_size) {
2143
        struct reiserfs_transaction_handle th ;
2144
        lock_kernel();
2145
        /* If the file have grown beyond the border where it
2146
           can have a tail, unmark it as needing a tail
2147
           packing */
2148
        if ( (have_large_tails (inode->i_sb) && inode->i_size > block_size (inode)*4) ||
2149
             (have_small_tails (inode->i_sb) && inode->i_size > block_size(inode)) )
2150
            inode->u.reiserfs_i.i_flags &= ~i_pack_on_close_mask;
2151
 
2152
        journal_begin(&th, inode->i_sb, 1) ;
2153
        reiserfs_update_inode_transaction(inode) ;
2154
        inode->i_size = pos ;
2155
        reiserfs_update_sd(&th, inode) ;
2156
        journal_end(&th, inode->i_sb, 1) ;
2157
        unlock_kernel();
2158
    }
2159
 
2160
    ret = generic_commit_write(f, page, from, to) ;
2161
 
2162
    /* we test for O_SYNC here so we can commit the transaction
2163
    ** for any packed tails the file might have had
2164
    */
2165
    if (f && (f->f_flags & O_SYNC)) {
2166
        lock_kernel() ;
2167
        reiserfs_commit_for_inode(inode) ;
2168
        unlock_kernel();
2169
    }
2170
    return ret ;
2171
}
2172
 
2173
void sd_attrs_to_i_attrs( __u16 sd_attrs, struct inode *inode )
2174
{
2175
        if( reiserfs_attrs( inode -> i_sb ) ) {
2176
                if( sd_attrs & REISERFS_SYNC_FL )
2177
                        inode -> i_flags |= S_SYNC;
2178
                else
2179
                        inode -> i_flags &= ~S_SYNC;
2180
                if( sd_attrs & REISERFS_IMMUTABLE_FL )
2181
                        inode -> i_flags |= S_IMMUTABLE;
2182
                else
2183
                        inode -> i_flags &= ~S_IMMUTABLE;
2184
                if( sd_attrs & REISERFS_APPEND_FL )
2185
                        inode -> i_flags |= S_APPEND;
2186
                else
2187
                        inode -> i_flags &= ~S_APPEND;
2188
                if( sd_attrs & REISERFS_NOATIME_FL )
2189
                        inode -> i_flags |= S_NOATIME;
2190
                else
2191
                        inode -> i_flags &= ~S_NOATIME;
2192
                if( sd_attrs & REISERFS_NOTAIL_FL )
2193
                        inode->u.reiserfs_i.i_flags |= i_nopack_mask;
2194
                else
2195
                        inode->u.reiserfs_i.i_flags &= ~i_nopack_mask;
2196
        }
2197
}
2198
 
2199
void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs )
2200
{
2201
        if( reiserfs_attrs( inode -> i_sb ) ) {
2202
                if( inode -> i_flags & S_IMMUTABLE )
2203
                        *sd_attrs |= REISERFS_IMMUTABLE_FL;
2204
                else
2205
                        *sd_attrs &= ~REISERFS_IMMUTABLE_FL;
2206
                if( inode -> i_flags & S_SYNC )
2207
                        *sd_attrs |= REISERFS_SYNC_FL;
2208
                else
2209
                        *sd_attrs &= ~REISERFS_SYNC_FL;
2210
                if( inode -> i_flags & S_NOATIME )
2211
                        *sd_attrs |= REISERFS_NOATIME_FL;
2212
                else
2213
                        *sd_attrs &= ~REISERFS_NOATIME_FL;
2214
                if( inode->u.reiserfs_i.i_flags & i_nopack_mask )
2215
                        *sd_attrs |= REISERFS_NOTAIL_FL;
2216
                else
2217
                        *sd_attrs &= ~REISERFS_NOTAIL_FL;
2218
        }
2219
}
2220
 
2221
static int reiserfs_direct_io(int rw, struct inode *inode,
2222
                              struct kiobuf *iobuf, unsigned long blocknr,
2223
                              int blocksize)
2224
{
2225
    lock_kernel();
2226
    reiserfs_commit_for_tail(inode);
2227
    unlock_kernel();
2228
    return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize,
2229
                             reiserfs_get_block_direct_io) ;
2230
}
2231
 
2232
struct address_space_operations reiserfs_address_space_operations = {
2233
    writepage: reiserfs_writepage,
2234
    readpage: reiserfs_readpage,
2235
    sync_page: block_sync_page,
2236
    prepare_write: reiserfs_prepare_write,
2237
    commit_write: reiserfs_commit_write,
2238
    bmap: reiserfs_aop_bmap,
2239
    direct_IO: reiserfs_direct_io,
2240
} ;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.