URL
https://opencores.org/ocsvn/or1k/or1k/trunk
Subversion Repositories or1k
Compare Revisions
- This comparison shows the changes necessary to convert path
/or1k/tags/LINUX_2_4_26_OR32/linux/linux-2.4/fs/intermezzo
- from Rev 1279 to Rev 1765
- ↔ Reverse comparison
Rev 1279 → Rev 1765
/super.c
0,0 → 1,402
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com> |
* Copyright (C) 2000 Stelias Computing, Inc. |
* Copyright (C) 2000 Red Hat, Inc. |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* |
* presto's super.c |
*/ |
|
static char rcsid[] __attribute ((unused)) = "$Id: super.c,v 1.1.1.1 2004-04-15 01:09:15 phoenix Exp $"; |
#define INTERMEZZO_VERSION "$Revision: 1.1.1.1 $" |
|
#include <stdarg.h> |
|
#include <asm/bitops.h> |
#include <asm/uaccess.h> |
#include <asm/system.h> |
|
#include <linux/errno.h> |
#include <linux/fs.h> |
#include <linux/ext2_fs.h> |
#include <linux/slab.h> |
#include <linux/vmalloc.h> |
#include <linux/sched.h> |
#include <linux/stat.h> |
#include <linux/string.h> |
#include <linux/locks.h> |
#include <linux/blkdev.h> |
#include <linux/init.h> |
#include <linux/devfs_fs_kernel.h> |
#define __NO_VERSION__ |
#include <linux/module.h> |
|
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
|
#ifdef PRESTO_DEBUG |
long presto_vmemory = 0; |
long presto_kmemory = 0; |
#endif |
|
/* returns an allocated string, copied out from data if opt is found */ |
static char *opt_read(const char *opt, char *data) |
{ |
char *value; |
char *retval; |
|
CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data); |
if ( strncmp(opt, data, strlen(opt)) ) |
return NULL; |
|
if ( (value = strchr(data, '=')) == NULL ) |
return NULL; |
|
value++; |
PRESTO_ALLOC(retval, strlen(value) + 1); |
if ( !retval ) { |
CERROR("InterMezzo: Out of memory!\n"); |
return NULL; |
} |
|
strcpy(retval, value); |
CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval); |
return retval; |
} |
|
static void opt_store(char **dst, char *opt) |
{ |
if (!dst) |
CERROR("intermezzo: store_opt, error dst == NULL\n"); |
|
if (*dst) |
PRESTO_FREE(*dst, strlen(*dst) + 1); |
*dst = opt; |
} |
|
static void opt_set_default(char **dst, char *defval) |
{ |
if (!dst) |
CERROR("intermezzo: store_opt, error dst == NULL\n"); |
|
if (*dst) |
PRESTO_FREE(*dst, strlen(*dst) + 1); |
if (defval) { |
char *def_alloced; |
PRESTO_ALLOC(def_alloced, strlen(defval)+1); |
if (!def_alloced) { |
CERROR("InterMezzo: Out of memory!\n"); |
return ; |
} |
strcpy(def_alloced, defval); |
*dst = def_alloced; |
} |
} |
|
|
/* Find the options for InterMezzo in "options", saving them into the |
* passed pointers. If the pointer is null, the option is discarded. |
* Copy out all non-InterMezzo options into cache_data (to be passed |
* to the read_super operation of the cache). The return value will |
* be a pointer to the end of the cache_data. |
*/ |
static char *presto_options(struct super_block *sb, |
char *options, char *cache_data, |
char **cache_type, char **fileset, |
char **channel) |
{ |
char *this_char; |
char *cache_data_end = cache_data; |
|
/* set the defaults */ |
if (strcmp(sb->s_type->name, "intermezzo") == 0) |
opt_set_default(cache_type, "ext3"); |
else |
opt_set_default(cache_type, "tmpfs"); |
|
if (!options || !cache_data) |
return cache_data_end; |
|
|
CDEBUG(D_SUPER, "parsing options\n"); |
for (this_char = strtok (options, ","); |
this_char != NULL; |
this_char = strtok (NULL, ",")) { |
char *opt; |
CDEBUG(D_SUPER, "this_char %s\n", this_char); |
|
if ( (opt = opt_read("fileset", this_char)) ) { |
opt_store(fileset, opt); |
continue; |
} |
if ( (opt = opt_read("cache_type", this_char)) ) { |
opt_store(cache_type, opt); |
continue; |
} |
if ( (opt = opt_read("channel", this_char)) ) { |
opt_store(channel, opt); |
continue; |
} |
|
cache_data_end += |
sprintf(cache_data_end, "%s%s", |
cache_data_end != cache_data ? ",":"", |
this_char); |
} |
|
return cache_data_end; |
} |
|
static int presto_set_channel(struct presto_cache *cache, char *channel) |
{ |
int minor; |
|
ENTRY; |
if (!channel) { |
minor = izo_psdev_get_free_channel(); |
} else { |
minor = simple_strtoul(channel, NULL, 0); |
} |
if (minor < 0 || minor >= MAX_CHANNEL) { |
CERROR("all channels in use or channel too large %d\n", |
minor); |
return -EINVAL; |
} |
|
cache->cache_psdev = &(izo_channels[minor]); |
list_add(&cache->cache_channel_list, |
&cache->cache_psdev->uc_cache_list); |
|
EXIT; |
return minor; |
} |
|
/* We always need to remove the presto options before passing |
mount options to cache FS */ |
struct super_block * presto_read_super(struct super_block * sb, |
void * data, int silent) |
{ |
struct file_system_type *fstype; |
struct presto_cache *cache = NULL; |
char *cache_data = NULL; |
char *cache_data_end; |
char *cache_type = NULL; |
char *fileset = NULL; |
char *channel = NULL; |
int err; |
unsigned int minor; |
|
ENTRY; |
|
/* reserve space for the cache's data */ |
PRESTO_ALLOC(cache_data, PAGE_SIZE); |
if ( !cache_data ) { |
CERROR("presto_read_super: Cannot allocate data page.\n"); |
EXIT; |
goto out_err; |
} |
|
/* read and validate options */ |
cache_data_end = presto_options(sb, data, cache_data, &cache_type, |
&fileset, &channel); |
|
/* was there anything for the cache filesystem in the data? */ |
if (cache_data_end == cache_data) { |
PRESTO_FREE(cache_data, PAGE_SIZE); |
cache_data = NULL; |
} else { |
CDEBUG(D_SUPER, "cache_data at %p is: %s\n", cache_data, |
cache_data); |
} |
|
/* set up the cache */ |
cache = presto_cache_init(); |
if ( !cache ) { |
CERROR("presto_read_super: failure allocating cache.\n"); |
EXIT; |
goto out_err; |
} |
cache->cache_type = cache_type; |
|
/* link cache to channel */ |
minor = presto_set_channel(cache, channel); |
if (minor < 0) { |
EXIT; |
goto out_err; |
} |
|
CDEBUG(D_SUPER, "Presto: type=%s, fset=%s, dev= %d, flags %x\n", |
cache_type, fileset?fileset:"NULL", minor, cache->cache_flags); |
|
MOD_INC_USE_COUNT; |
|
/* get the filter for the cache */ |
fstype = get_fs_type(cache_type); |
cache->cache_filter = filter_get_filter_fs((const char *)cache_type); |
if ( !fstype || !cache->cache_filter) { |
CERROR("Presto: unrecognized fs type or cache type\n"); |
MOD_DEC_USE_COUNT; |
EXIT; |
goto out_err; |
} |
|
/* can we in fact mount the cache */ |
if ((fstype->fs_flags & FS_REQUIRES_DEV) && !sb->s_bdev) { |
CERROR("filesystem \"%s\" requires a valid block device\n", |
cache_type); |
MOD_DEC_USE_COUNT; |
EXIT; |
goto out_err; |
} |
|
sb = fstype->read_super(sb, cache_data, silent); |
|
/* this might have been freed above */ |
if (cache_data) { |
PRESTO_FREE(cache_data, PAGE_SIZE); |
cache_data = NULL; |
} |
|
if ( !sb ) { |
CERROR("InterMezzo: cache mount failure.\n"); |
MOD_DEC_USE_COUNT; |
EXIT; |
goto out_err; |
} |
|
cache->cache_sb = sb; |
cache->cache_root = dget(sb->s_root); |
|
/* we now know the dev of the cache: hash the cache */ |
presto_cache_add(cache, sb->s_dev); |
err = izo_prepare_fileset(sb->s_root, fileset); |
|
filter_setup_journal_ops(cache->cache_filter, cache->cache_type); |
|
/* make sure we have our own super operations: sb |
still contains the cache operations */ |
filter_setup_super_ops(cache->cache_filter, sb->s_op, |
&presto_super_ops); |
sb->s_op = filter_c2usops(cache->cache_filter); |
|
/* get izo directory operations: sb->s_root->d_inode exists now */ |
filter_setup_dir_ops(cache->cache_filter, sb->s_root->d_inode, |
&presto_dir_iops, &presto_dir_fops); |
filter_setup_dentry_ops(cache->cache_filter, sb->s_root->d_op, |
&presto_dentry_ops); |
sb->s_root->d_inode->i_op = filter_c2udiops(cache->cache_filter); |
sb->s_root->d_inode->i_fop = filter_c2udfops(cache->cache_filter); |
sb->s_root->d_op = filter_c2udops(cache->cache_filter); |
|
EXIT; |
return sb; |
|
out_err: |
CDEBUG(D_SUPER, "out_err called\n"); |
if (cache) |
PRESTO_FREE(cache, sizeof(struct presto_cache)); |
if (cache_data) |
PRESTO_FREE(cache_data, PAGE_SIZE); |
if (fileset) |
PRESTO_FREE(fileset, strlen(fileset) + 1); |
if (channel) |
PRESTO_FREE(channel, strlen(channel) + 1); |
if (cache_type) |
PRESTO_FREE(cache_type, strlen(cache_type) + 1); |
|
CDEBUG(D_MALLOC, "mount error exit: kmem %ld, vmem %ld\n", |
presto_kmemory, presto_vmemory); |
return NULL; |
} |
|
|
|
#ifdef PRESTO_DEVEL |
static DECLARE_FSTYPE(presto_fs_type, "izo", presto_read_super, FS_REQUIRES_DEV); |
static DECLARE_FSTYPE(vpresto_fs_type, "vintermezzo", presto_read_super, FS_LITTER); |
#else |
static DECLARE_FSTYPE(vpresto_fs_type, "vintermezzo", presto_read_super, FS_LITTER); |
static DECLARE_FSTYPE(presto_fs_type, "intermezzo", presto_read_super, FS_REQUIRES_DEV); |
#endif |
|
|
|
int __init init_intermezzo_fs(void) |
{ |
int status; |
|
printk(KERN_INFO "InterMezzo Kernel/Intersync communications " INTERMEZZO_VERSION |
" info@clusterfs.com\n"); |
|
status = presto_psdev_init(); |
if ( status ) { |
CERROR("Problem (%d) in init_intermezzo_psdev\n", status); |
return status; |
} |
|
status = init_intermezzo_sysctl(); |
if (status) { |
CERROR("presto: failed in init_intermezzo_sysctl!\n"); |
} |
|
presto_cache_init_hash(); |
|
if (!presto_init_ddata_cache()) { |
CERROR("presto out of memory!\n"); |
return -ENOMEM; |
} |
|
status = register_filesystem(&presto_fs_type); |
if (status) { |
CERROR("presto: failed in register_filesystem!\n"); |
} |
status = register_filesystem(&vpresto_fs_type); |
if (status) { |
CERROR("vpresto: failed in register_filesystem!\n"); |
} |
return status; |
} |
|
void __exit exit_intermezzo_fs(void) |
{ |
int err; |
|
ENTRY; |
|
if ( (err = unregister_filesystem(&presto_fs_type)) != 0 ) { |
CERROR("presto: failed to unregister filesystem\n"); |
} |
if ( (err = unregister_filesystem(&vpresto_fs_type)) != 0 ) { |
CERROR("vpresto: failed to unregister filesystem\n"); |
} |
|
presto_psdev_cleanup(); |
cleanup_intermezzo_sysctl(); |
presto_cleanup_ddata_cache(); |
CERROR("after cleanup: kmem %ld, vmem %ld\n", |
presto_kmemory, presto_vmemory); |
} |
|
|
MODULE_AUTHOR("Cluster Filesystems Inc. <info@clusterfs.com>"); |
MODULE_DESCRIPTION("InterMezzo Kernel/Intersync communications " INTERMEZZO_VERSION); |
MODULE_LICENSE("GPL"); |
|
module_init(init_intermezzo_fs) |
module_exit(exit_intermezzo_fs) |
/vfs.c
0,0 → 1,2465
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 2001, 2002 Cluster File Systems, Inc. |
* Copyright (C) 2000 Stelias Computing, Inc. |
* Copyright (C) 2000 Red Hat, Inc. |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* |
* vfs.c |
* |
* This file implements kernel downcalls from lento. |
* |
* Author: Rob Simmonds <simmonds@stelias.com> |
* Andreas Dilger <adilger@stelias.com> |
* Copyright (C) 2000 Stelias Computing Inc |
* Copyright (C) 2000 Red Hat Inc. |
* |
* Extended attribute support |
* Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc. |
* |
* This code is based on code from namei.c in the linux file system; |
* see copyright notice below. |
*/ |
|
/** namei.c copyright **/ |
|
/* |
* linux/fs/namei.c |
* |
* Copyright (C) 1991, 1992 Linus Torvalds |
*/ |
|
/* |
* Some corrections by tytso. |
*/ |
|
/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname |
* lookup logic. |
*/ |
|
/** end of namei.c copyright **/ |
|
#include <linux/mm.h> |
#include <linux/proc_fs.h> |
#include <linux/smp_lock.h> |
#include <linux/quotaops.h> |
|
#include <asm/uaccess.h> |
#include <asm/unaligned.h> |
#include <asm/semaphore.h> |
#include <asm/pgtable.h> |
|
#include <linux/file.h> |
#include <linux/fs.h> |
#include <linux/blk.h> |
|
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
|
#ifdef CONFIG_FS_EXT_ATTR |
# include <linux/ext_attr.h> |
|
# ifdef CONFIG_FS_POSIX_ACL |
# include <linux/posix_acl.h> |
# endif |
#endif |
|
extern struct inode_operations presto_sym_iops; |
|
/* Write the last_rcvd values to the last_rcvd file. We don't know what the |
* UUID or last_ctime values are, so we have to read from the file first |
* (sigh). |
* exported for branch_reinter in kml_reint.c*/ |
int presto_write_last_rcvd(struct rec_info *recinfo, |
struct presto_file_set *fset, |
struct lento_vfs_context *info) |
{ |
int rc; |
struct izo_rcvd_rec rcvd_rec; |
|
ENTRY; |
|
memset(&rcvd_rec, 0, sizeof(rcvd_rec)); |
memcpy(rcvd_rec.lr_uuid, info->uuid, sizeof(rcvd_rec.lr_uuid)); |
rcvd_rec.lr_remote_recno = HTON__u64(info->recno); |
rcvd_rec.lr_remote_offset = HTON__u64(info->kml_offset); |
rcvd_rec.lr_local_recno = HTON__u64(recinfo->recno); |
rcvd_rec.lr_local_offset = HTON__u64(recinfo->offset + recinfo->size); |
|
rc = izo_rcvd_write(fset, &rcvd_rec); |
if (rc < 0) { |
/* izo_rcvd_write returns negative errors and non-negative |
* offsets */ |
CERROR("InterMezzo: izo_rcvd_write failed: %d\n", rc); |
EXIT; |
return rc; |
} |
EXIT; |
return 0; |
} |
|
/* |
* It's inline, so penalty for filesystems that don't use sticky bit is |
* minimal. |
*/ |
static inline int check_sticky(struct inode *dir, struct inode *inode) |
{ |
if (!(dir->i_mode & S_ISVTX)) |
return 0; |
if (inode->i_uid == current->fsuid) |
return 0; |
if (dir->i_uid == current->fsuid) |
return 0; |
return !capable(CAP_FOWNER); |
} |
|
/* from linux/fs/namei.c */ |
static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir) |
{ |
int error; |
if (!victim->d_inode || victim->d_parent->d_inode != dir) |
return -ENOENT; |
error = permission(dir,MAY_WRITE | MAY_EXEC); |
if (error) |
return error; |
if (IS_APPEND(dir)) |
return -EPERM; |
if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| |
IS_IMMUTABLE(victim->d_inode)) |
return -EPERM; |
if (isdir) { |
if (!S_ISDIR(victim->d_inode->i_mode)) |
return -ENOTDIR; |
if (IS_ROOT(victim)) |
return -EBUSY; |
} else if (S_ISDIR(victim->d_inode->i_mode)) |
return -EISDIR; |
return 0; |
} |
|
/* from linux/fs/namei.c */ |
static inline int may_create(struct inode *dir, struct dentry *child) { |
if (child->d_inode) |
return -EEXIST; |
if (IS_DEADDIR(dir)) |
return -ENOENT; |
return permission(dir,MAY_WRITE | MAY_EXEC); |
} |
|
#ifdef PRESTO_DEBUG |
/* The loop_discard_io() function is available via a kernel patch to the |
* loop block device. It "works" by accepting writes, but throwing them |
* away, rather than trying to write them to disk. The old method worked |
* by setting the underlying device read-only, but that has the problem |
* that dirty buffers are kept in memory, and ext3 didn't like that at all. |
*/ |
#ifdef CONFIG_LOOP_DISCARD |
#define BLKDEV_FAIL(dev,fail) loop_discard_io(dev,fail) |
#else |
#define BLKDEV_FAIL(dev,fail) set_device_ro(dev, 1) |
#endif |
|
/* If a breakpoint has been set via /proc/sys/intermezzo/intermezzoX/errorval, |
* that is the same as "value", the underlying device will "fail" now. |
*/ |
inline void presto_debug_fail_blkdev(struct presto_file_set *fset, |
unsigned long value) |
{ |
int minor = presto_f2m(fset); |
int errorval = izo_channels[minor].uc_errorval; |
kdev_t dev = fset->fset_dentry->d_inode->i_dev; |
|
if (errorval && errorval == (long)value && !is_read_only(dev)) { |
CDEBUG(D_SUPER, "setting device %s read only\n", kdevname(dev)); |
BLKDEV_FAIL(dev, 1); |
izo_channels[minor].uc_errorval = -dev; |
} |
} |
#else |
#define presto_debug_fail_blkdev(dev,value) do {} while (0) |
#endif |
|
|
static inline int presto_do_kml(struct lento_vfs_context *info, |
struct dentry *dentry) |
{ |
if ( ! (info->flags & LENTO_FL_KML) ) |
return 0; |
if ( presto_chk(dentry, PRESTO_DONT_JOURNAL) ) |
return 0; |
return 1; |
} |
|
static inline int presto_do_rcvd(struct lento_vfs_context *info, |
struct dentry *dentry) |
{ |
if ( ! (info->flags & LENTO_FL_EXPECT) ) |
return 0; |
if ( presto_chk(dentry, PRESTO_DONT_JOURNAL) ) |
return 0; |
return 1; |
} |
|
|
/* XXX fixme: this should not fail, all these dentries are in memory |
when _we_ call this */ |
int presto_settime(struct presto_file_set *fset, |
struct dentry *newobj, |
struct dentry *parent, |
struct dentry *target, |
struct lento_vfs_context *ctx, |
int valid) |
{ |
int error = 0; |
struct dentry *dentry; |
struct inode *inode; |
struct inode_operations *iops; |
struct iattr iattr; |
|
ENTRY; |
if (ctx->flags & LENTO_FL_IGNORE_TIME ) { |
EXIT; |
return 0; |
} |
|
iattr.ia_ctime = ctx->updated_time; |
iattr.ia_mtime = ctx->updated_time; |
iattr.ia_valid = valid; |
|
while (1) { |
if (parent && ctx->flags & LENTO_FL_TOUCH_PARENT) { |
dentry = parent; |
parent = NULL; |
} else if (newobj && ctx->flags & LENTO_FL_TOUCH_NEWOBJ) { |
dentry = newobj; |
newobj = NULL; |
} else if (target) { |
dentry = target; |
target = NULL; |
} else |
break; |
|
inode = dentry->d_inode; |
|
error = -EROFS; |
if (IS_RDONLY(inode)) { |
EXIT; |
return -EROFS; |
} |
|
if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { |
EXIT; |
return -EPERM; |
} |
|
error = -EPERM; |
iops = filter_c2cdiops(fset->fset_cache->cache_filter); |
if (!iops) { |
EXIT; |
return error; |
} |
|
if (iops->setattr != NULL) |
error = iops->setattr(dentry, &iattr); |
else { |
error = 0; |
inode_setattr(dentry->d_inode, &iattr); |
} |
} |
EXIT; |
return error; |
} |
|
void izo_get_rollback_data(struct inode *inode, struct izo_rollback_data *rb) |
{ |
rb->rb_mode = (__u32)inode->i_mode; |
rb->rb_rdev = (__u32)inode->i_rdev; |
rb->rb_uid = (__u64)inode->i_uid; |
rb->rb_gid = (__u64)inode->i_gid; |
} |
|
|
int presto_do_close(struct presto_file_set *fset, struct file *file) |
{ |
struct rec_info rec; |
int rc = -ENOSPC; |
void *handle; |
struct inode *inode = file->f_dentry->d_inode; |
struct presto_file_data *fdata = |
(struct presto_file_data *)file->private_data; |
|
ENTRY; |
presto_getversion(&fdata->fd_info.remote_version, inode); |
|
rc = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); |
if (rc) { |
EXIT; |
return rc; |
} |
|
handle = presto_trans_start(fset, file->f_dentry->d_inode, |
KML_OPCODE_RELEASE); |
if ( IS_ERR(handle) ) { |
CERROR("presto_release: no space for transaction\n"); |
return rc; |
} |
|
if (fdata->fd_info.flags & LENTO_FL_KML) |
rc = presto_journal_close(&rec, fset, file, file->f_dentry, |
&fdata->fd_version, |
&fdata->fd_info.remote_version); |
if (rc) { |
CERROR("presto_close: cannot journal close\n"); |
goto out; |
} |
|
if (fdata->fd_info.flags & LENTO_FL_EXPECT) |
rc = presto_write_last_rcvd(&rec, fset, &fdata->fd_info); |
|
if (rc) { |
CERROR("presto_close: cannot journal last_rcvd\n"); |
goto out; |
} |
presto_trans_commit(fset, handle); |
|
/* cancel the LML record */ |
handle = presto_trans_start(fset, inode, KML_OPCODE_WRITE); |
if ( IS_ERR(handle) ) { |
CERROR("presto_release: no space for clear\n"); |
return -ENOSPC; |
} |
|
rc = presto_clear_lml_close(fset, fdata->fd_lml_offset); |
if (rc < 0 ) { |
CERROR("presto_close: cannot journal close\n"); |
goto out; |
} |
presto_truncate_lml(fset); |
|
out: |
presto_release_space(fset->fset_cache, PRESTO_REQHIGH); |
presto_trans_commit(fset, handle); |
EXIT; |
return rc; |
} |
|
int presto_do_setattr(struct presto_file_set *fset, struct dentry *dentry, |
struct iattr *iattr, struct lento_vfs_context *info) |
{ |
struct rec_info rec; |
struct inode *inode = dentry->d_inode; |
struct inode_operations *iops; |
int error; |
struct presto_version old_ver, new_ver; |
struct izo_rollback_data rb; |
void *handle; |
loff_t old_size=inode->i_size; |
|
ENTRY; |
error = -EROFS; |
if (IS_RDONLY(inode)) { |
EXIT; |
return -EROFS; |
} |
|
if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { |
EXIT; |
return -EPERM; |
} |
|
presto_getversion(&old_ver, dentry->d_inode); |
izo_get_rollback_data(dentry->d_inode, &rb); |
error = -EPERM; |
iops = filter_c2cdiops(fset->fset_cache->cache_filter); |
|
error = presto_reserve_space(fset->fset_cache, 2*PRESTO_REQHIGH); |
if (error) { |
EXIT; |
return error; |
} |
|
if (iattr->ia_valid & ATTR_SIZE) { |
if (izo_mark_dentry(dentry, ~PRESTO_DATA, 0, NULL) != 0) |
CERROR("izo_mark_dentry(inode %ld, ~PRESTO_DATA) " |
"failed\n", dentry->d_inode->i_ino); |
handle = presto_trans_start(fset, dentry->d_inode, |
KML_OPCODE_TRUNC); |
} else { |
handle = presto_trans_start(fset, dentry->d_inode, |
KML_OPCODE_SETATTR); |
} |
|
if ( IS_ERR(handle) ) { |
CERROR("presto_do_setattr: no space for transaction\n"); |
presto_release_space(fset->fset_cache, 2*PRESTO_REQHIGH); |
return -ENOSPC; |
} |
|
if (dentry->d_inode && iops && iops->setattr) { |
error = iops->setattr(dentry, iattr); |
} else { |
error = inode_change_ok(dentry->d_inode, iattr); |
if (!error) |
inode_setattr(inode, iattr); |
} |
|
if (!error && (iattr->ia_valid & ATTR_SIZE)) |
vmtruncate(inode, iattr->ia_size); |
|
if (error) { |
EXIT; |
goto exit; |
} |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_SETATTR | 0x10); |
|
if ( presto_do_kml(info, dentry) ) { |
if ((iattr->ia_valid & ATTR_SIZE) && (old_size != inode->i_size)) { |
struct file file; |
/* Journal a close whenever we see a potential truncate |
* At the receiving end, lento should explicitly remove |
* ATTR_SIZE from the list of valid attributes */ |
presto_getversion(&new_ver, inode); |
file.private_data = NULL; |
file.f_dentry = dentry; |
error = presto_journal_close(&rec, fset, &file, dentry, |
&old_ver, &new_ver); |
} |
|
if (!error) |
error = presto_journal_setattr(&rec, fset, dentry, |
&old_ver, &rb, iattr); |
} |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_SETATTR | 0x20); |
if ( presto_do_rcvd(info, dentry) ) |
error = presto_write_last_rcvd(&rec, fset, info); |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_SETATTR | 0x30); |
|
EXIT; |
exit: |
presto_release_space(fset->fset_cache, 2*PRESTO_REQHIGH); |
presto_trans_commit(fset, handle); |
return error; |
} |
|
int lento_setattr(const char *name, struct iattr *iattr, |
struct lento_vfs_context *info) |
{ |
struct nameidata nd; |
struct dentry *dentry; |
struct presto_file_set *fset; |
int error; |
#ifdef CONFIG_FS_POSIX_ACL |
int (*set_posix_acl)(struct inode *, int type, posix_acl_t *)=NULL; |
#endif |
|
ENTRY; |
CDEBUG(D_PIOCTL,"name %s, valid %#x, mode %#o, uid %d, gid %d, size %Ld\n", |
name, iattr->ia_valid, iattr->ia_mode, iattr->ia_uid, |
iattr->ia_gid, iattr->ia_size); |
CDEBUG(D_PIOCTL, "atime %#lx, mtime %#lx, ctime %#lx, attr_flags %#x\n", |
iattr->ia_atime, iattr->ia_mtime, iattr->ia_ctime, |
iattr->ia_attr_flags); |
CDEBUG(D_PIOCTL, "offset %d, recno %d, flags %#x\n", |
info->slot_offset, info->recno, info->flags); |
|
lock_kernel(); |
error = presto_walk(name, &nd); |
if (error) { |
EXIT; |
goto exit; |
} |
dentry = nd.dentry; |
|
fset = presto_fset(dentry); |
error = -EINVAL; |
if ( !fset ) { |
CERROR("No fileset!\n"); |
EXIT; |
goto exit_lock; |
} |
|
/* NOTE: this prevents us from changing the filetype on setattr, |
* as we normally only want to change permission bits. |
* If this is not correct, then we need to fix the perl code |
* to always send the file type OR'ed with the permission. |
*/ |
if (iattr->ia_valid & ATTR_MODE) { |
int set_mode = iattr->ia_mode; |
iattr->ia_mode = (iattr->ia_mode & S_IALLUGO) | |
(dentry->d_inode->i_mode & ~S_IALLUGO); |
CDEBUG(D_PIOCTL, "chmod: orig %#o, set %#o, result %#o\n", |
dentry->d_inode->i_mode, set_mode, iattr->ia_mode); |
#ifdef CONFIG_FS_POSIX_ACL |
/* ACl code interacts badly with setattr |
* since it tries to modify the ACL using |
* set_ext_attr which recurses back into presto. |
* This only happens if ATTR_MODE is set. |
* Here we are doing a "forced" mode set |
* (initiated by lento), so we disable the |
* set_posix_acl operation which |
* prevents such recursion. -SHP |
* |
* This will probably still be required when native |
* acl journalling is in place. |
*/ |
set_posix_acl=dentry->d_inode->i_op->set_posix_acl; |
dentry->d_inode->i_op->set_posix_acl=NULL; |
#endif |
} |
|
error = presto_do_setattr(fset, dentry, iattr, info); |
|
if (info->flags & LENTO_FL_SET_DDFILEID) { |
struct presto_dentry_data *dd = presto_d2d(dentry); |
if (dd) { |
dd->remote_ino = info->remote_ino; |
dd->remote_generation = info->remote_generation; |
} |
} |
|
#ifdef CONFIG_FS_POSIX_ACL |
/* restore the inode_operations if we changed them*/ |
if (iattr->ia_valid & ATTR_MODE) |
dentry->d_inode->i_op->set_posix_acl=set_posix_acl; |
#endif |
|
|
EXIT; |
exit_lock: |
path_release(&nd); |
exit: |
unlock_kernel(); |
return error; |
} |
|
int presto_do_create(struct presto_file_set *fset, struct dentry *dir, |
struct dentry *dentry, int mode, |
struct lento_vfs_context *info) |
{ |
struct rec_info rec; |
int error; |
struct presto_version tgt_dir_ver, new_file_ver; |
struct inode_operations *iops; |
void *handle; |
|
ENTRY; |
mode &= S_IALLUGO; |
mode |= S_IFREG; |
|
down(&dir->d_inode->i_zombie); |
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); |
if (error) { |
EXIT; |
up(&dir->d_inode->i_zombie); |
return error; |
} |
|
error = may_create(dir->d_inode, dentry); |
if (error) { |
EXIT; |
goto exit_pre_lock; |
} |
|
error = -EPERM; |
iops = filter_c2cdiops(fset->fset_cache->cache_filter); |
if (!iops->create) { |
EXIT; |
goto exit_pre_lock; |
} |
|
presto_getversion(&tgt_dir_ver, dir->d_inode); |
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_CREATE); |
if ( IS_ERR(handle) ) { |
EXIT; |
presto_release_space(fset->fset_cache, PRESTO_REQHIGH); |
CERROR("presto_do_create: no space for transaction\n"); |
error=-ENOSPC; |
goto exit_pre_lock; |
} |
DQUOT_INIT(dir->d_inode); |
lock_kernel(); |
error = iops->create(dir->d_inode, dentry, mode); |
if (error) { |
EXIT; |
goto exit_lock; |
} |
|
if (dentry->d_inode) { |
struct presto_cache *cache = fset->fset_cache; |
/* was this already done? */ |
presto_set_ops(dentry->d_inode, cache->cache_filter); |
|
filter_setup_dentry_ops(cache->cache_filter, |
dentry->d_op, |
&presto_dentry_ops); |
dentry->d_op = filter_c2udops(cache->cache_filter); |
|
/* if Lento creates this file, we won't have data */ |
if ( ISLENTO(presto_c2m(cache)) ) { |
presto_set(dentry, PRESTO_ATTR); |
} else { |
presto_set(dentry, PRESTO_ATTR | PRESTO_DATA); |
} |
} |
|
info->flags |= LENTO_FL_TOUCH_PARENT; |
error = presto_settime(fset, NULL, dir, dentry, |
info, ATTR_CTIME | ATTR_MTIME); |
if (error) { |
EXIT; |
goto exit_lock; |
} |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_CREATE | 0x10); |
|
if ( presto_do_kml(info, dentry) ) { |
presto_getversion(&new_file_ver, dentry->d_inode); |
error = presto_journal_create(&rec, fset, dentry, &tgt_dir_ver, |
&new_file_ver, |
dentry->d_inode->i_mode); |
} |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_CREATE | 0x20); |
|
if ( presto_do_rcvd(info, dentry) ) |
error = presto_write_last_rcvd(&rec, fset, info); |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_CREATE | 0x30); |
|
/* add inode dentry */ |
if (fset->fset_cache->cache_filter->o_trops->tr_add_ilookup ) { |
struct dentry *d; |
d = fset->fset_cache->cache_filter->o_trops->tr_add_ilookup |
(dir->d_inode->i_sb->s_root, dentry); |
} |
|
EXIT; |
|
exit_lock: |
unlock_kernel(); |
presto_trans_commit(fset, handle); |
exit_pre_lock: |
presto_release_space(fset->fset_cache, PRESTO_REQHIGH); |
up(&dir->d_inode->i_zombie); |
return error; |
} |
|
/* from namei.c */ |
static struct dentry *lookup_create(struct nameidata *nd, int is_dir) |
{ |
struct dentry *dentry; |
|
down(&nd->dentry->d_inode->i_sem); |
dentry = ERR_PTR(-EEXIST); |
if (nd->last_type != LAST_NORM) |
goto fail; |
dentry = lookup_hash(&nd->last, nd->dentry); |
if (IS_ERR(dentry)) |
goto fail; |
if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) |
goto enoent; |
return dentry; |
enoent: |
dput(dentry); |
dentry = ERR_PTR(-ENOENT); |
fail: |
return dentry; |
} |
|
int lento_create(const char *name, int mode, struct lento_vfs_context *info) |
{ |
int error; |
struct nameidata nd; |
char * pathname; |
struct dentry *dentry; |
struct presto_file_set *fset; |
|
ENTRY; |
pathname = getname(name); |
error = PTR_ERR(pathname); |
if (IS_ERR(pathname)) { |
EXIT; |
goto exit; |
} |
|
/* this looks up the parent */ |
// if (path_init(pathname, LOOKUP_FOLLOW | LOOKUP_POSITIVE, &nd)) |
if (path_init(pathname, LOOKUP_PARENT, &nd)) |
error = path_walk(pathname, &nd); |
if (error) { |
EXIT; |
goto exit; |
} |
dentry = lookup_create(&nd, 0); |
error = PTR_ERR(dentry); |
if (IS_ERR(dentry)) { |
EXIT; |
goto exit_lock; |
} |
|
fset = presto_fset(dentry); |
error = -EINVAL; |
if ( !fset ) { |
CERROR("No fileset!\n"); |
EXIT; |
goto exit_lock; |
} |
error = presto_do_create(fset, dentry->d_parent, dentry, (mode&S_IALLUGO)|S_IFREG, |
info); |
|
EXIT; |
|
exit_lock: |
path_release (&nd); |
dput(dentry); |
up(&dentry->d_parent->d_inode->i_sem); |
putname(pathname); |
exit: |
return error; |
} |
|
int presto_do_link(struct presto_file_set *fset, struct dentry *old_dentry, |
struct dentry *dir, struct dentry *new_dentry, |
struct lento_vfs_context *info) |
{ |
struct rec_info rec; |
struct inode *inode; |
int error; |
struct inode_operations *iops; |
struct presto_version tgt_dir_ver; |
struct presto_version new_link_ver; |
void *handle; |
|
down(&dir->d_inode->i_zombie); |
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); |
if (error) { |
EXIT; |
up(&dir->d_inode->i_zombie); |
return error; |
} |
error = -ENOENT; |
inode = old_dentry->d_inode; |
if (!inode) |
goto exit_lock; |
|
error = may_create(dir->d_inode, new_dentry); |
if (error) |
goto exit_lock; |
|
error = -EXDEV; |
if (dir->d_inode->i_dev != inode->i_dev) |
goto exit_lock; |
|
/* |
* A link to an append-only or immutable file cannot be created. |
*/ |
error = -EPERM; |
if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { |
EXIT; |
goto exit_lock; |
} |
|
iops = filter_c2cdiops(fset->fset_cache->cache_filter); |
if (!iops->link) { |
EXIT; |
goto exit_lock; |
} |
|
|
presto_getversion(&tgt_dir_ver, dir->d_inode); |
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_LINK); |
if ( IS_ERR(handle) ) { |
presto_release_space(fset->fset_cache, PRESTO_REQHIGH); |
CERROR("presto_do_link: no space for transaction\n"); |
return -ENOSPC; |
} |
|
DQUOT_INIT(dir->d_inode); |
lock_kernel(); |
error = iops->link(old_dentry, dir->d_inode, new_dentry); |
unlock_kernel(); |
if (error) { |
EXIT; |
goto exit_lock; |
} |
|
/* link dd data to that of existing dentry */ |
old_dentry->d_op->d_release(new_dentry); |
if (!presto_d2d(old_dentry)) |
BUG(); |
presto_d2d(old_dentry)->dd_count++; |
|
new_dentry->d_fsdata = presto_d2d(old_dentry); |
|
info->flags |= LENTO_FL_TOUCH_PARENT; |
error = presto_settime(fset, NULL, dir, new_dentry, |
info, ATTR_CTIME); |
if (error) { |
EXIT; |
goto exit_lock; |
} |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_LINK | 0x10); |
presto_getversion(&new_link_ver, new_dentry->d_inode); |
if ( presto_do_kml(info, old_dentry) ) |
error = presto_journal_link(&rec, fset, old_dentry, new_dentry, |
&tgt_dir_ver, &new_link_ver); |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_LINK | 0x20); |
if ( presto_do_rcvd(info, old_dentry) ) |
error = presto_write_last_rcvd(&rec, fset, info); |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_LINK | 0x30); |
EXIT; |
presto_trans_commit(fset, handle); |
exit_lock: |
presto_release_space(fset->fset_cache, PRESTO_REQHIGH); |
up(&dir->d_inode->i_zombie); |
return error; |
} |
|
|
int lento_link(const char * oldname, const char * newname, |
struct lento_vfs_context *info) |
{ |
int error; |
char * from; |
char * to; |
struct presto_file_set *fset; |
|
from = getname(oldname); |
if(IS_ERR(from)) |
return PTR_ERR(from); |
to = getname(newname); |
error = PTR_ERR(to); |
if (!IS_ERR(to)) { |
struct dentry *new_dentry; |
struct nameidata nd, old_nd; |
|
error = 0; |
if (path_init(from, LOOKUP_POSITIVE, &old_nd)) |
error = path_walk(from, &old_nd); |
if (error) |
goto exit; |
if (path_init(to, LOOKUP_PARENT, &nd)) |
error = path_walk(to, &nd); |
if (error) |
goto out; |
error = -EXDEV; |
if (old_nd.mnt != nd.mnt) |
goto out; |
new_dentry = lookup_create(&nd, 0); |
error = PTR_ERR(new_dentry); |
|
if (!IS_ERR(new_dentry)) { |
fset = presto_fset(new_dentry); |
error = -EINVAL; |
if ( !fset ) { |
CERROR("No fileset!\n"); |
EXIT; |
goto out2; |
} |
error = presto_do_link(fset, old_nd.dentry, |
nd.dentry, |
new_dentry, info); |
dput(new_dentry); |
} |
out2: |
up(&nd.dentry->d_inode->i_sem); |
path_release(&nd); |
out: |
path_release(&old_nd); |
exit: |
putname(to); |
} |
putname(from); |
|
return error; |
} |
|
int presto_do_unlink(struct presto_file_set *fset, struct dentry *dir, |
struct dentry *dentry, struct lento_vfs_context *info) |
{ |
struct rec_info rec; |
struct inode_operations *iops; |
struct presto_version tgt_dir_ver, old_file_ver; |
struct izo_rollback_data rb; |
void *handle; |
int do_kml = 0, do_rcvd = 0, linkno = 0, error, old_targetlen = 0; |
char *old_target = NULL; |
|
ENTRY; |
down(&dir->d_inode->i_zombie); |
error = may_delete(dir->d_inode, dentry, 0); |
if (error) { |
EXIT; |
up(&dir->d_inode->i_zombie); |
return error; |
} |
|
error = -EPERM; |
iops = filter_c2cdiops(fset->fset_cache->cache_filter); |
if (!iops->unlink) { |
EXIT; |
up(&dir->d_inode->i_zombie); |
return error; |
} |
|
error = presto_reserve_space(fset->fset_cache, PRESTO_REQLOW); |
if (error) { |
EXIT; |
up(&dir->d_inode->i_zombie); |
return error; |
} |
|
|
if (presto_d2d(dentry)) { |
struct presto_dentry_data *dd = presto_d2d(dentry); |
struct dentry *de = dd->dd_inodentry; |
if (de && dentry->d_inode->i_nlink == 1) { |
dd->dd_count--; |
dd->dd_inodentry = NULL; |
de->d_fsdata = NULL; |
atomic_dec(&de->d_inode->i_count); |
de->d_inode = NULL; |
dput(de); |
} |
} |
|
presto_getversion(&tgt_dir_ver, dir->d_inode); |
presto_getversion(&old_file_ver, dentry->d_inode); |
izo_get_rollback_data(dentry->d_inode, &rb); |
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_UNLINK); |
if ( IS_ERR(handle) ) { |
presto_release_space(fset->fset_cache, PRESTO_REQLOW); |
CERROR("ERROR: presto_do_unlink: no space for transaction. Tell Peter.\n"); |
up(&dir->d_inode->i_zombie); |
return -ENOSPC; |
} |
DQUOT_INIT(dir->d_inode); |
if (d_mountpoint(dentry)) |
error = -EBUSY; |
else { |
lock_kernel(); |
linkno = dentry->d_inode->i_nlink; |
if (linkno > 1) { |
dget(dentry); |
} |
|
if (S_ISLNK(dentry->d_inode->i_mode)) { |
mm_segment_t old_fs; |
struct inode_operations *riops; |
riops = filter_c2csiops(fset->fset_cache->cache_filter); |
|
PRESTO_ALLOC(old_target, PATH_MAX); |
if (old_target == NULL) { |
error = -ENOMEM; |
EXIT; |
goto exit; |
} |
|
old_fs = get_fs(); |
set_fs(get_ds()); |
|
if (riops->readlink == NULL) |
CERROR("InterMezzo %s: no readlink iops.\n", |
__FUNCTION__); |
else |
old_targetlen = |
riops->readlink(dentry, old_target, |
PATH_MAX); |
if (old_targetlen < 0) { |
CERROR("InterMezzo: readlink failed: %ld\n", |
PTR_ERR(old_target)); |
PRESTO_FREE(old_target, PATH_MAX); |
old_target = NULL; |
old_targetlen = 0; |
} |
set_fs(old_fs); |
} |
|
do_kml = presto_do_kml(info, dir); |
do_rcvd = presto_do_rcvd(info, dir); |
error = iops->unlink(dir->d_inode, dentry); |
unlock_kernel(); |
if (!error) |
d_delete(dentry); |
} |
|
if (linkno > 1) { |
/* FIXME: Combine this with the next call? */ |
error = presto_settime(fset, NULL, NULL, dentry, |
info, ATTR_CTIME); |
dput(dentry); |
if (error) { |
EXIT; |
goto exit; |
} |
} |
|
error = presto_settime(fset, NULL, NULL, dir, |
info, ATTR_CTIME | ATTR_MTIME); |
if (error) { |
EXIT; |
goto exit; |
} |
|
up(&dir->d_inode->i_zombie); |
if (error) { |
EXIT; |
goto exit; |
} |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_UNLINK | 0x10); |
if ( do_kml ) |
error = presto_journal_unlink(&rec, fset, dir, &tgt_dir_ver, |
&old_file_ver, &rb, dentry, |
old_target, old_targetlen); |
presto_debug_fail_blkdev(fset, KML_OPCODE_UNLINK | 0x20); |
if ( do_rcvd ) { |
error = presto_write_last_rcvd(&rec, fset, info); |
} |
presto_debug_fail_blkdev(fset, KML_OPCODE_UNLINK | 0x30); |
EXIT; |
exit: |
presto_release_space(fset->fset_cache, PRESTO_REQLOW); |
presto_trans_commit(fset, handle); |
if (old_target != NULL) |
PRESTO_FREE(old_target, PATH_MAX); |
return error; |
} |
|
|
int lento_unlink(const char *pathname, struct lento_vfs_context *info) |
{ |
int error = 0; |
char * name; |
struct dentry *dentry; |
struct nameidata nd; |
struct presto_file_set *fset; |
|
ENTRY; |
|
name = getname(pathname); |
if(IS_ERR(name)) |
return PTR_ERR(name); |
|
if (path_init(name, LOOKUP_PARENT, &nd)) |
error = path_walk(name, &nd); |
if (error) |
goto exit; |
error = -EISDIR; |
if (nd.last_type != LAST_NORM) |
goto exit1; |
down(&nd.dentry->d_inode->i_sem); |
dentry = lookup_hash(&nd.last, nd.dentry); |
error = PTR_ERR(dentry); |
if (!IS_ERR(dentry)) { |
fset = presto_fset(dentry); |
error = -EINVAL; |
if ( !fset ) { |
CERROR("No fileset!\n"); |
EXIT; |
goto exit2; |
} |
/* Why not before? Because we want correct error value */ |
if (nd.last.name[nd.last.len]) |
goto slashes; |
error = presto_do_unlink(fset, nd.dentry, dentry, info); |
exit2: |
EXIT; |
dput(dentry); |
} |
up(&nd.dentry->d_inode->i_sem); |
exit1: |
path_release(&nd); |
exit: |
putname(name); |
|
return error; |
|
slashes: |
error = !dentry->d_inode ? -ENOENT : |
S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR; |
goto exit2; |
} |
|
int presto_do_symlink(struct presto_file_set *fset, struct dentry *dir, |
struct dentry *dentry, const char *oldname, |
struct lento_vfs_context *info) |
{ |
struct rec_info rec; |
int error; |
struct presto_version tgt_dir_ver, new_link_ver; |
struct inode_operations *iops; |
void *handle; |
|
ENTRY; |
down(&dir->d_inode->i_zombie); |
/* record + max path len + space to free */ |
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH + 4096); |
if (error) { |
EXIT; |
up(&dir->d_inode->i_zombie); |
return error; |
} |
|
error = may_create(dir->d_inode, dentry); |
if (error) { |
EXIT; |
goto exit_lock; |
} |
|
error = -EPERM; |
iops = filter_c2cdiops(fset->fset_cache->cache_filter); |
if (!iops->symlink) { |
EXIT; |
goto exit_lock; |
} |
|
presto_getversion(&tgt_dir_ver, dir->d_inode); |
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_SYMLINK); |
if ( IS_ERR(handle) ) { |
presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); |
CERROR("ERROR: presto_do_symlink: no space for transaction. Tell Peter.\n"); |
EXIT; |
up(&dir->d_inode->i_zombie); |
return -ENOSPC; |
} |
DQUOT_INIT(dir->d_inode); |
lock_kernel(); |
error = iops->symlink(dir->d_inode, dentry, oldname); |
if (error) { |
EXIT; |
goto exit; |
} |
|
if (dentry->d_inode) { |
struct presto_cache *cache = fset->fset_cache; |
|
presto_set_ops(dentry->d_inode, cache->cache_filter); |
|
filter_setup_dentry_ops(cache->cache_filter, dentry->d_op, |
&presto_dentry_ops); |
dentry->d_op = filter_c2udops(cache->cache_filter); |
/* XXX ? Cache state ? if Lento creates a symlink */ |
if ( ISLENTO(presto_c2m(cache)) ) { |
presto_set(dentry, PRESTO_ATTR); |
} else { |
presto_set(dentry, PRESTO_ATTR | PRESTO_DATA); |
} |
} |
|
info->flags |= LENTO_FL_TOUCH_PARENT; |
error = presto_settime(fset, NULL, dir, dentry, |
info, ATTR_CTIME | ATTR_MTIME); |
if (error) { |
EXIT; |
goto exit; |
} |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_SYMLINK | 0x10); |
presto_getversion(&new_link_ver, dentry->d_inode); |
if ( presto_do_kml(info, dentry) ) |
error = presto_journal_symlink(&rec, fset, dentry, oldname, |
&tgt_dir_ver, &new_link_ver); |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_SYMLINK | 0x20); |
if ( presto_do_rcvd(info, dentry) ) |
error = presto_write_last_rcvd(&rec, fset, info); |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_SYMLINK | 0x30); |
EXIT; |
exit: |
unlock_kernel(); |
presto_trans_commit(fset, handle); |
exit_lock: |
presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); |
up(&dir->d_inode->i_zombie); |
return error; |
} |
|
int lento_symlink(const char *oldname, const char *newname, |
struct lento_vfs_context *info) |
{ |
int error; |
char *from; |
char *to; |
struct dentry *dentry; |
struct presto_file_set *fset; |
struct nameidata nd; |
|
ENTRY; |
lock_kernel(); |
from = getname(oldname); |
error = PTR_ERR(from); |
if (IS_ERR(from)) { |
EXIT; |
goto exit; |
} |
|
to = getname(newname); |
error = PTR_ERR(to); |
if (IS_ERR(to)) { |
EXIT; |
goto exit_from; |
} |
|
if (path_init(to, LOOKUP_PARENT, &nd)) |
error = path_walk(to, &nd); |
if (error) { |
EXIT; |
goto exit_to; |
} |
|
dentry = lookup_create(&nd, 0); |
error = PTR_ERR(dentry); |
if (IS_ERR(dentry)) { |
path_release(&nd); |
EXIT; |
goto exit_to; |
} |
|
fset = presto_fset(dentry); |
error = -EINVAL; |
if ( !fset ) { |
CERROR("No fileset!\n"); |
path_release(&nd); |
EXIT; |
goto exit_lock; |
} |
error = presto_do_symlink(fset, nd.dentry, |
dentry, from, info); |
path_release(&nd); |
EXIT; |
exit_lock: |
up(&nd.dentry->d_inode->i_sem); |
dput(dentry); |
exit_to: |
putname(to); |
exit_from: |
putname(from); |
exit: |
unlock_kernel(); |
return error; |
} |
|
int presto_do_mkdir(struct presto_file_set *fset, struct dentry *dir, |
struct dentry *dentry, int mode, |
struct lento_vfs_context *info) |
{ |
struct rec_info rec; |
int error; |
struct presto_version tgt_dir_ver, new_dir_ver; |
void *handle; |
|
ENTRY; |
down(&dir->d_inode->i_zombie); |
|
/* one journal record + directory block + room for removals*/ |
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH + 4096); |
if (error) { |
EXIT; |
up(&dir->d_inode->i_zombie); |
return error; |
} |
|
error = may_create(dir->d_inode, dentry); |
if (error) { |
EXIT; |
goto exit_lock; |
} |
|
error = -EPERM; |
if (!filter_c2cdiops(fset->fset_cache->cache_filter)->mkdir) { |
EXIT; |
goto exit_lock; |
} |
|
error = -ENOSPC; |
presto_getversion(&tgt_dir_ver, dir->d_inode); |
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_MKDIR); |
if ( IS_ERR(handle) ) { |
presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); |
CERROR("presto_do_mkdir: no space for transaction\n"); |
goto exit_lock; |
} |
|
DQUOT_INIT(dir->d_inode); |
mode &= (S_IRWXUGO|S_ISVTX); |
lock_kernel(); |
error = filter_c2cdiops(fset->fset_cache->cache_filter)->mkdir(dir->d_inode, dentry, mode); |
if (error) { |
EXIT; |
goto exit; |
} |
|
if ( dentry->d_inode && !error) { |
struct presto_cache *cache = fset->fset_cache; |
|
presto_set_ops(dentry->d_inode, cache->cache_filter); |
|
filter_setup_dentry_ops(cache->cache_filter, |
dentry->d_op, |
&presto_dentry_ops); |
dentry->d_op = filter_c2udops(cache->cache_filter); |
/* if Lento does this, we won't have data */ |
if ( ISLENTO(presto_c2m(cache)) ) { |
presto_set(dentry, PRESTO_ATTR); |
} else { |
presto_set(dentry, PRESTO_ATTR | PRESTO_DATA); |
} |
} |
|
info->flags |= LENTO_FL_TOUCH_PARENT; |
error = presto_settime(fset, NULL, dir, dentry, |
info, ATTR_CTIME | ATTR_MTIME); |
if (error) { |
EXIT; |
goto exit; |
} |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_MKDIR | 0x10); |
presto_getversion(&new_dir_ver, dentry->d_inode); |
if ( presto_do_kml(info, dir) ) |
error = presto_journal_mkdir(&rec, fset, dentry, &tgt_dir_ver, |
&new_dir_ver, |
dentry->d_inode->i_mode); |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_MKDIR | 0x20); |
if ( presto_do_rcvd(info, dentry) ) |
error = presto_write_last_rcvd(&rec, fset, info); |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_MKDIR | 0x30); |
EXIT; |
exit: |
unlock_kernel(); |
presto_trans_commit(fset, handle); |
exit_lock: |
presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); |
up(&dir->d_inode->i_zombie); |
return error; |
} |
|
/* |
* Look out: this function may change a normal dentry |
* into a directory dentry (different size).. |
*/ |
int lento_mkdir(const char *name, int mode, struct lento_vfs_context *info) |
{ |
int error; |
char *pathname; |
struct dentry *dentry; |
struct presto_file_set *fset; |
struct nameidata nd; |
|
ENTRY; |
CDEBUG(D_PIOCTL, "name: %s, mode %o, offset %d, recno %d, flags %x\n", |
name, mode, info->slot_offset, info->recno, info->flags); |
pathname = getname(name); |
error = PTR_ERR(pathname); |
if (IS_ERR(pathname)) { |
EXIT; |
return error; |
} |
|
if (path_init(pathname, LOOKUP_PARENT, &nd)) |
error = path_walk(pathname, &nd); |
if (error) |
goto out_name; |
|
dentry = lookup_create(&nd, 1); |
error = PTR_ERR(dentry); |
if (!IS_ERR(dentry)) { |
fset = presto_fset(dentry); |
error = -EINVAL; |
if (!fset) { |
CERROR("No fileset!\n"); |
EXIT; |
goto out_dput; |
} |
|
error = presto_do_mkdir(fset, nd.dentry, dentry, |
mode & S_IALLUGO, info); |
out_dput: |
dput(dentry); |
} |
up(&nd.dentry->d_inode->i_sem); |
path_release(&nd); |
out_name: |
EXIT; |
putname(pathname); |
CDEBUG(D_PIOCTL, "error: %d\n", error); |
return error; |
} |
|
static void d_unhash(struct dentry *dentry) |
{ |
dget(dentry); |
switch (atomic_read(&dentry->d_count)) { |
default: |
shrink_dcache_parent(dentry); |
if (atomic_read(&dentry->d_count) != 2) |
break; |
case 2: |
d_drop(dentry); |
} |
} |
|
int presto_do_rmdir(struct presto_file_set *fset, struct dentry *dir, |
struct dentry *dentry, struct lento_vfs_context *info) |
{ |
struct rec_info rec; |
int error; |
struct presto_version tgt_dir_ver, old_dir_ver; |
struct izo_rollback_data rb; |
struct inode_operations *iops; |
void *handle; |
int do_kml, do_rcvd; |
int size; |
|
ENTRY; |
error = may_delete(dir->d_inode, dentry, 1); |
if (error) |
return error; |
|
error = -EPERM; |
iops = filter_c2cdiops(fset->fset_cache->cache_filter); |
if (!iops->rmdir) { |
EXIT; |
return error; |
} |
|
size = PRESTO_REQHIGH - dentry->d_inode->i_size; |
error = presto_reserve_space(fset->fset_cache, size); |
if (error) { |
EXIT; |
return error; |
} |
|
presto_getversion(&tgt_dir_ver, dir->d_inode); |
presto_getversion(&old_dir_ver, dentry->d_inode); |
izo_get_rollback_data(dentry->d_inode, &rb); |
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_RMDIR); |
if ( IS_ERR(handle) ) { |
presto_release_space(fset->fset_cache, size); |
CERROR("ERROR: presto_do_rmdir: no space for transaction. Tell Peter.\n"); |
return -ENOSPC; |
} |
|
DQUOT_INIT(dir->d_inode); |
|
do_kml = presto_do_kml(info, dir); |
do_rcvd = presto_do_rcvd(info, dir); |
|
double_down(&dir->d_inode->i_zombie, &dentry->d_inode->i_zombie); |
d_unhash(dentry); |
if (IS_DEADDIR(dir->d_inode)) |
error = -ENOENT; |
else if (d_mountpoint(dentry)) { |
CERROR("foo: d_mountpoint(dentry): ino %ld\n", |
dentry->d_inode->i_ino); |
error = -EBUSY; |
} else { |
lock_kernel(); |
error = iops->rmdir(dir->d_inode, dentry); |
unlock_kernel(); |
if (!error) { |
dentry->d_inode->i_flags |= S_DEAD; |
error = presto_settime(fset, NULL, NULL, dir, info, |
ATTR_CTIME | ATTR_MTIME); |
} |
} |
double_up(&dir->d_inode->i_zombie, &dentry->d_inode->i_zombie); |
if (!error) |
d_delete(dentry); |
dput(dentry); |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_RMDIR | 0x10); |
if ( !error && do_kml ) |
error = presto_journal_rmdir(&rec, fset, dir, &tgt_dir_ver, |
&old_dir_ver, &rb, |
dentry->d_name.len, |
dentry->d_name.name); |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_RMDIR | 0x20); |
if ( !error && do_rcvd ) |
error = presto_write_last_rcvd(&rec, fset, info); |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_RMDIR | 0x30); |
EXIT; |
|
presto_trans_commit(fset, handle); |
presto_release_space(fset->fset_cache, size); |
return error; |
} |
|
int lento_rmdir(const char *pathname, struct lento_vfs_context *info) |
{ |
int error = 0; |
char * name; |
struct dentry *dentry; |
struct presto_file_set *fset; |
struct nameidata nd; |
|
ENTRY; |
name = getname(pathname); |
if(IS_ERR(name)) { |
EXIT; |
return PTR_ERR(name); |
} |
|
if (path_init(name, LOOKUP_PARENT, &nd)) |
error = path_walk(name, &nd); |
if (error) { |
EXIT; |
goto exit; |
} |
switch(nd.last_type) { |
case LAST_DOTDOT: |
error = -ENOTEMPTY; |
EXIT; |
goto exit1; |
case LAST_ROOT: |
case LAST_DOT: |
error = -EBUSY; |
EXIT; |
goto exit1; |
} |
down(&nd.dentry->d_inode->i_sem); |
dentry = lookup_hash(&nd.last, nd.dentry); |
error = PTR_ERR(dentry); |
if (!IS_ERR(dentry)) { |
fset = presto_fset(dentry); |
error = -EINVAL; |
if ( !fset ) { |
CERROR("No fileset!\n"); |
EXIT; |
goto exit_put; |
} |
error = presto_do_rmdir(fset, nd.dentry, dentry, info); |
exit_put: |
dput(dentry); |
} |
up(&nd.dentry->d_inode->i_sem); |
exit1: |
path_release(&nd); |
exit: |
putname(name); |
EXIT; |
return error; |
} |
|
int presto_do_mknod(struct presto_file_set *fset, struct dentry *dir, |
struct dentry *dentry, int mode, dev_t dev, |
struct lento_vfs_context *info) |
{ |
struct rec_info rec; |
int error = -EPERM; |
struct presto_version tgt_dir_ver, new_node_ver; |
struct inode_operations *iops; |
void *handle; |
|
ENTRY; |
|
down(&dir->d_inode->i_zombie); |
/* one KML entry */ |
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); |
if (error) { |
EXIT; |
up(&dir->d_inode->i_zombie); |
return error; |
} |
|
if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) { |
EXIT; |
goto exit_lock; |
} |
|
error = may_create(dir->d_inode, dentry); |
if (error) { |
EXIT; |
goto exit_lock; |
} |
|
error = -EPERM; |
iops = filter_c2cdiops(fset->fset_cache->cache_filter); |
if (!iops->mknod) { |
EXIT; |
goto exit_lock; |
} |
|
DQUOT_INIT(dir->d_inode); |
lock_kernel(); |
|
error = -ENOSPC; |
presto_getversion(&tgt_dir_ver, dir->d_inode); |
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_MKNOD); |
if ( IS_ERR(handle) ) { |
presto_release_space(fset->fset_cache, PRESTO_REQHIGH); |
CERROR("presto_do_mknod: no space for transaction\n"); |
goto exit_lock2; |
} |
|
error = iops->mknod(dir->d_inode, dentry, mode, dev); |
if (error) { |
EXIT; |
goto exit_commit; |
} |
if ( dentry->d_inode) { |
struct presto_cache *cache = fset->fset_cache; |
|
presto_set_ops(dentry->d_inode, cache->cache_filter); |
|
filter_setup_dentry_ops(cache->cache_filter, dentry->d_op, |
&presto_dentry_ops); |
dentry->d_op = filter_c2udops(cache->cache_filter); |
|
/* if Lento does this, we won't have data */ |
if ( ISLENTO(presto_c2m(cache)) ) { |
presto_set(dentry, PRESTO_ATTR); |
} else { |
presto_set(dentry, PRESTO_ATTR | PRESTO_DATA); |
} |
} |
|
error = presto_settime(fset, NULL, NULL, dir, |
info, ATTR_MTIME); |
if (error) { |
EXIT; |
} |
error = presto_settime(fset, NULL, NULL, dentry, |
info, ATTR_CTIME | ATTR_MTIME); |
if (error) { |
EXIT; |
} |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_MKNOD | 0x10); |
presto_getversion(&new_node_ver, dentry->d_inode); |
if ( presto_do_kml(info, dentry) ) |
error = presto_journal_mknod(&rec, fset, dentry, &tgt_dir_ver, |
&new_node_ver, |
dentry->d_inode->i_mode, |
MAJOR(dev), MINOR(dev) ); |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_MKNOD | 0x20); |
if ( presto_do_rcvd(info, dentry) ) |
error = presto_write_last_rcvd(&rec, fset, info); |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_MKNOD | 0x30); |
EXIT; |
exit_commit: |
presto_trans_commit(fset, handle); |
exit_lock2: |
unlock_kernel(); |
exit_lock: |
presto_release_space(fset->fset_cache, PRESTO_REQHIGH); |
up(&dir->d_inode->i_zombie); |
return error; |
} |
|
int lento_mknod(const char *filename, int mode, dev_t dev, |
struct lento_vfs_context *info) |
{ |
int error = 0; |
char * tmp; |
struct dentry * dentry; |
struct nameidata nd; |
struct presto_file_set *fset; |
|
ENTRY; |
|
if (S_ISDIR(mode)) |
return -EPERM; |
tmp = getname(filename); |
if (IS_ERR(tmp)) |
return PTR_ERR(tmp); |
|
if (path_init(tmp, LOOKUP_PARENT, &nd)) |
error = path_walk(tmp, &nd); |
if (error) |
goto out; |
dentry = lookup_create(&nd, 0); |
error = PTR_ERR(dentry); |
if (!IS_ERR(dentry)) { |
fset = presto_fset(dentry); |
error = -EINVAL; |
if ( !fset ) { |
CERROR("No fileset!\n"); |
EXIT; |
goto exit_put; |
} |
switch (mode & S_IFMT) { |
case 0: case S_IFREG: |
error = -EOPNOTSUPP; |
break; |
case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: |
error = presto_do_mknod(fset, nd.dentry, dentry, |
mode, dev, info); |
break; |
case S_IFDIR: |
error = -EPERM; |
break; |
default: |
error = -EINVAL; |
} |
exit_put: |
dput(dentry); |
} |
up(&nd.dentry->d_inode->i_sem); |
path_release(&nd); |
out: |
putname(tmp); |
|
return error; |
} |
|
int do_rename(struct presto_file_set *fset, |
struct dentry *old_parent, struct dentry *old_dentry, |
struct dentry *new_parent, struct dentry *new_dentry, |
struct lento_vfs_context *info) |
{ |
struct rec_info rec; |
int error; |
struct inode_operations *iops; |
struct presto_version src_dir_ver, tgt_dir_ver; |
void *handle; |
int new_inode_unlink = 0; |
struct inode *old_dir = old_parent->d_inode; |
struct inode *new_dir = new_parent->d_inode; |
|
ENTRY; |
presto_getversion(&src_dir_ver, old_dir); |
presto_getversion(&tgt_dir_ver, new_dir); |
|
error = -EPERM; |
iops = filter_c2cdiops(fset->fset_cache->cache_filter); |
if (!iops || !iops->rename) { |
EXIT; |
return error; |
} |
|
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); |
if (error) { |
EXIT; |
return error; |
} |
handle = presto_trans_start(fset, old_dir, KML_OPCODE_RENAME); |
if ( IS_ERR(handle) ) { |
presto_release_space(fset->fset_cache, PRESTO_REQHIGH); |
CERROR("presto_do_rename: no space for transaction\n"); |
return -ENOSPC; |
} |
if (new_dentry->d_inode && new_dentry->d_inode->i_nlink > 1) { |
dget(new_dentry); |
new_inode_unlink = 1; |
} |
|
error = iops->rename(old_dir, old_dentry, new_dir, new_dentry); |
|
if (error) { |
EXIT; |
goto exit; |
} |
|
if (new_inode_unlink) { |
error = presto_settime(fset, NULL, NULL, old_dentry, |
info, ATTR_CTIME); |
dput(old_dentry); |
if (error) { |
EXIT; |
goto exit; |
} |
} |
info->flags |= LENTO_FL_TOUCH_PARENT; |
error = presto_settime(fset, NULL, new_parent, old_parent, |
info, ATTR_CTIME | ATTR_MTIME); |
if (error) { |
EXIT; |
goto exit; |
} |
|
/* XXX make a distinction between cross file set |
* and intra file set renames here |
*/ |
presto_debug_fail_blkdev(fset, KML_OPCODE_RENAME | 0x10); |
if ( presto_do_kml(info, old_dentry) ) |
error = presto_journal_rename(&rec, fset, old_dentry, |
new_dentry, |
&src_dir_ver, &tgt_dir_ver); |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_RENAME | 0x20); |
|
if ( presto_do_rcvd(info, old_dentry) ) |
error = presto_write_last_rcvd(&rec, fset, info); |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_RENAME | 0x30); |
EXIT; |
exit: |
presto_trans_commit(fset, handle); |
presto_release_space(fset->fset_cache, PRESTO_REQHIGH); |
return error; |
} |
|
static |
int presto_rename_dir(struct presto_file_set *fset, struct dentry *old_parent, |
struct dentry *old_dentry, struct dentry *new_parent, |
struct dentry *new_dentry, struct lento_vfs_context *info) |
{ |
int error; |
struct inode *target; |
struct inode *old_dir = old_parent->d_inode; |
struct inode *new_dir = new_parent->d_inode; |
|
if (old_dentry->d_inode == new_dentry->d_inode) |
return 0; |
|
error = may_delete(old_dir, old_dentry, 1); |
if (error) |
return error; |
|
if (new_dir->i_dev != old_dir->i_dev) |
return -EXDEV; |
|
if (!new_dentry->d_inode) |
error = may_create(new_dir, new_dentry); |
else |
error = may_delete(new_dir, new_dentry, 1); |
if (error) |
return error; |
|
if (!old_dir->i_op || !old_dir->i_op->rename) |
return -EPERM; |
|
/* |
* If we are going to change the parent - check write permissions, |
* we'll need to flip '..'. |
*/ |
if (new_dir != old_dir) { |
error = permission(old_dentry->d_inode, MAY_WRITE); |
} |
if (error) |
return error; |
|
DQUOT_INIT(old_dir); |
DQUOT_INIT(new_dir); |
down(&old_dir->i_sb->s_vfs_rename_sem); |
error = -EINVAL; |
if (is_subdir(new_dentry, old_dentry)) |
goto out_unlock; |
target = new_dentry->d_inode; |
if (target) { /* Hastur! Hastur! Hastur! */ |
triple_down(&old_dir->i_zombie, |
&new_dir->i_zombie, |
&target->i_zombie); |
d_unhash(new_dentry); |
} else |
double_down(&old_dir->i_zombie, |
&new_dir->i_zombie); |
if (IS_DEADDIR(old_dir)||IS_DEADDIR(new_dir)) |
error = -ENOENT; |
else if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) |
error = -EBUSY; |
else |
error = do_rename(fset, old_parent, old_dentry, |
new_parent, new_dentry, info); |
if (target) { |
if (!error) |
target->i_flags |= S_DEAD; |
triple_up(&old_dir->i_zombie, |
&new_dir->i_zombie, |
&target->i_zombie); |
if (d_unhashed(new_dentry)) |
d_rehash(new_dentry); |
dput(new_dentry); |
} else |
double_up(&old_dir->i_zombie, |
&new_dir->i_zombie); |
|
if (!error) |
d_move(old_dentry,new_dentry); |
out_unlock: |
up(&old_dir->i_sb->s_vfs_rename_sem); |
return error; |
} |
|
static |
int presto_rename_other(struct presto_file_set *fset, struct dentry *old_parent, |
struct dentry *old_dentry, struct dentry *new_parent, |
struct dentry *new_dentry, struct lento_vfs_context *info) |
{ |
struct inode *old_dir = old_parent->d_inode; |
struct inode *new_dir = new_parent->d_inode; |
int error; |
|
if (old_dentry->d_inode == new_dentry->d_inode) |
return 0; |
|
error = may_delete(old_dir, old_dentry, 0); |
if (error) |
return error; |
|
if (new_dir->i_dev != old_dir->i_dev) |
return -EXDEV; |
|
if (!new_dentry->d_inode) |
error = may_create(new_dir, new_dentry); |
else |
error = may_delete(new_dir, new_dentry, 0); |
if (error) |
return error; |
|
if (!old_dir->i_op || !old_dir->i_op->rename) |
return -EPERM; |
|
DQUOT_INIT(old_dir); |
DQUOT_INIT(new_dir); |
double_down(&old_dir->i_zombie, &new_dir->i_zombie); |
if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) |
error = -EBUSY; |
else |
error = do_rename(fset, old_parent, old_dentry, |
new_parent, new_dentry, info); |
double_up(&old_dir->i_zombie, &new_dir->i_zombie); |
if (error) |
return error; |
/* The following d_move() should become unconditional */ |
if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) { |
d_move(old_dentry, new_dentry); |
} |
return 0; |
} |
|
int presto_do_rename(struct presto_file_set *fset, |
struct dentry *old_parent, struct dentry *old_dentry, |
struct dentry *new_parent, struct dentry *new_dentry, |
struct lento_vfs_context *info) |
{ |
if (S_ISDIR(old_dentry->d_inode->i_mode)) |
return presto_rename_dir(fset, old_parent,old_dentry,new_parent, |
new_dentry, info); |
else |
return presto_rename_other(fset, old_parent, old_dentry, |
new_parent,new_dentry, info); |
} |
|
|
int lento_do_rename(const char *oldname, const char *newname, |
struct lento_vfs_context *info) |
{ |
int error = 0; |
struct dentry * old_dir, * new_dir; |
struct dentry * old_dentry, *new_dentry; |
struct nameidata oldnd, newnd; |
struct presto_file_set *fset; |
|
ENTRY; |
|
if (path_init(oldname, LOOKUP_PARENT, &oldnd)) |
error = path_walk(oldname, &oldnd); |
|
if (error) |
goto exit; |
|
if (path_init(newname, LOOKUP_PARENT, &newnd)) |
error = path_walk(newname, &newnd); |
if (error) |
goto exit1; |
|
error = -EXDEV; |
if (oldnd.mnt != newnd.mnt) |
goto exit2; |
|
old_dir = oldnd.dentry; |
error = -EBUSY; |
if (oldnd.last_type != LAST_NORM) |
goto exit2; |
|
new_dir = newnd.dentry; |
if (newnd.last_type != LAST_NORM) |
goto exit2; |
|
double_lock(new_dir, old_dir); |
|
old_dentry = lookup_hash(&oldnd.last, old_dir); |
error = PTR_ERR(old_dentry); |
if (IS_ERR(old_dentry)) |
goto exit3; |
/* source must exist */ |
error = -ENOENT; |
if (!old_dentry->d_inode) |
goto exit4; |
fset = presto_fset(old_dentry); |
error = -EINVAL; |
if ( !fset ) { |
CERROR("No fileset!\n"); |
EXIT; |
goto exit4; |
} |
/* unless the source is a directory trailing slashes give -ENOTDIR */ |
if (!S_ISDIR(old_dentry->d_inode->i_mode)) { |
error = -ENOTDIR; |
if (oldnd.last.name[oldnd.last.len]) |
goto exit4; |
if (newnd.last.name[newnd.last.len]) |
goto exit4; |
} |
new_dentry = lookup_hash(&newnd.last, new_dir); |
error = PTR_ERR(new_dentry); |
if (IS_ERR(new_dentry)) |
goto exit4; |
|
lock_kernel(); |
error = presto_do_rename(fset, old_dir, old_dentry, |
new_dir, new_dentry, info); |
unlock_kernel(); |
|
dput(new_dentry); |
exit4: |
dput(old_dentry); |
exit3: |
double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem); |
exit2: |
path_release(&newnd); |
exit1: |
path_release(&oldnd); |
exit: |
return error; |
} |
|
int lento_rename(const char * oldname, const char * newname, |
struct lento_vfs_context *info) |
{ |
int error; |
char * from; |
char * to; |
|
from = getname(oldname); |
if(IS_ERR(from)) |
return PTR_ERR(from); |
to = getname(newname); |
error = PTR_ERR(to); |
if (!IS_ERR(to)) { |
error = lento_do_rename(from,to, info); |
putname(to); |
} |
putname(from); |
return error; |
} |
|
struct dentry *presto_iopen(struct dentry *dentry, |
ino_t ino, unsigned int generation) |
{ |
struct presto_file_set *fset; |
char name[48]; |
int error; |
|
ENTRY; |
/* see if we already have the dentry we want */ |
if (dentry->d_inode && dentry->d_inode->i_ino == ino && |
dentry->d_inode->i_generation == generation) { |
EXIT; |
return dentry; |
} |
|
/* Make sure we have a cache beneath us. We should always find at |
* least one dentry inside the cache (if it exists), otherwise not |
* even the cache root exists, or we passed in a bad name. |
*/ |
fset = presto_fset(dentry); |
error = -EINVAL; |
if (!fset) { |
CERROR("No fileset for %*s!\n", |
dentry->d_name.len, dentry->d_name.name); |
EXIT; |
dput(dentry); |
return ERR_PTR(error); |
} |
dput(dentry); |
|
sprintf(name, "%s%#lx%c%#x", |
PRESTO_ILOOKUP_MAGIC, ino, PRESTO_ILOOKUP_SEP, generation); |
CDEBUG(D_PIOCTL, "opening %ld by number (as %s)\n", ino, name); |
return lookup_one_len(name, fset->fset_dentry, strlen(name)); |
} |
|
static struct file *presto_filp_dopen(struct dentry *dentry, int flags) |
{ |
struct file *f; |
struct inode *inode; |
int flag, error; |
|
ENTRY; |
error = -ENFILE; |
f = get_empty_filp(); |
if (!f) { |
CDEBUG(D_PIOCTL, "error getting file pointer\n"); |
EXIT; |
goto out; |
} |
f->f_flags = flag = flags; |
f->f_mode = (flag+1) & O_ACCMODE; |
inode = dentry->d_inode; |
if (f->f_mode & FMODE_WRITE) { |
error = get_write_access(inode); |
if (error) { |
CDEBUG(D_PIOCTL, "error getting write access\n"); |
EXIT; goto cleanup_file; |
} |
} |
|
f->f_dentry = dentry; |
f->f_pos = 0; |
f->f_reada = 0; |
f->f_op = NULL; |
if (inode->i_op) |
/* XXX should we set to presto ops, or leave at cache ops? */ |
f->f_op = inode->i_fop; |
if (f->f_op && f->f_op->open) { |
error = f->f_op->open(inode, f); |
if (error) { |
CDEBUG(D_PIOCTL, "error calling cache 'open'\n"); |
EXIT; |
goto cleanup_all; |
} |
} |
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); |
|
return f; |
|
cleanup_all: |
if (f->f_mode & FMODE_WRITE) |
put_write_access(inode); |
cleanup_file: |
put_filp(f); |
out: |
return ERR_PTR(error); |
} |
|
|
/* Open an inode by number. We pass in the cache root name (or a subdirectory |
* from the cache that is guaranteed to exist) to be able to access the cache. |
*/ |
int lento_iopen(const char *name, ino_t ino, unsigned int generation, |
int flags) |
{ |
char * tmp; |
struct dentry *dentry; |
struct nameidata nd; |
int fd; |
int error; |
|
ENTRY; |
CDEBUG(D_PIOCTL, |
"open %s:inode %#lx (%ld), generation %x (%d), flags %d \n", |
name, ino, ino, generation, generation, flags); |
/* We don't allow creation of files by number only, as it would |
* lead to a dangling files not in any directory. We could also |
* just turn off the flag and ignore it. |
*/ |
if (flags & O_CREAT) { |
CERROR("%s: create file by inode number (%ld) not allowed\n", |
__FUNCTION__, ino); |
EXIT; |
return -EACCES; |
} |
|
tmp = getname(name); |
if (IS_ERR(tmp)) { |
EXIT; |
return PTR_ERR(tmp); |
} |
|
lock_kernel(); |
again: /* look the named file or a parent directory so we can get the cache */ |
error = presto_walk(tmp, &nd); |
if ( error && error != -ENOENT ) { |
EXIT; |
unlock_kernel(); |
return error; |
} |
if (error == -ENOENT) |
dentry = NULL; |
else |
dentry = nd.dentry; |
|
/* we didn't find the named file, so see if a parent exists */ |
if (!dentry) { |
char *slash; |
|
slash = strrchr(tmp, '/'); |
if (slash && slash != tmp) { |
*slash = '\0'; |
path_release(&nd); |
goto again; |
} |
/* we should never get here... */ |
CDEBUG(D_PIOCTL, "no more path components to try!\n"); |
fd = -ENOENT; |
goto exit; |
} |
CDEBUG(D_PIOCTL, "returned dentry %p\n", dentry); |
|
dentry = presto_iopen(dentry, ino, generation); |
fd = PTR_ERR(dentry); |
if (IS_ERR(dentry)) { |
EXIT; |
goto exit; |
} |
|
/* XXX start of code that might be replaced by something like: |
* if (flags & (O_WRONLY | O_RDWR)) { |
* error = get_write_access(dentry->d_inode); |
* if (error) { |
* EXIT; |
* goto cleanup_dput; |
* } |
* } |
* fd = open_dentry(dentry, flags); |
* |
* including the presto_filp_dopen() function (check dget counts!) |
*/ |
fd = get_unused_fd(); |
if (fd < 0) { |
EXIT; |
goto cleanup_dput; |
} |
|
{ |
int error; |
struct file * f = presto_filp_dopen(dentry, flags); |
error = PTR_ERR(f); |
if (IS_ERR(f)) { |
put_unused_fd(fd); |
fd = error; |
EXIT; |
goto cleanup_dput; |
} |
fd_install(fd, f); |
} |
/* end of code that might be replaced by open_dentry */ |
|
EXIT; |
exit: |
unlock_kernel(); |
path_release(&nd); |
putname(tmp); |
return fd; |
|
cleanup_dput: |
putname(&nd); |
goto exit; |
} |
|
#ifdef CONFIG_FS_EXT_ATTR |
|
#ifdef CONFIG_FS_POSIX_ACL |
/* Posix ACL code changes i_mode without using a notify_change (or |
* a mark_inode_dirty!). We need to duplicate this at the reintegrator |
* which is done by this function. This function also takes care of |
* resetting the cached posix acls in this inode. If we don't reset these |
* VFS continues using the old acl information, which by now may be out of |
* date. |
*/ |
int presto_setmode(struct presto_file_set *fset, struct dentry *dentry, |
mode_t mode) |
{ |
struct inode *inode = dentry->d_inode; |
|
ENTRY; |
/* The extended attributes for this inode were modified. |
* At this point we can not be sure if any of the ACL |
* information for this inode was updated. So we will |
* force VFS to reread the acls. Note that we do this |
* only when called from the SETEXTATTR ioctl, which is why we |
* do this while setting the mode of the file. Also note |
* that mark_inode_dirty is not be needed for i_*acl only |
* to force i_mode info to disk, and should be removed once |
* we use notify_change to update the mode. |
* XXX: is mode setting really needed? Just setting acl's should |
* be enough! VFS should change the i_mode as needed? SHP |
*/ |
if (inode->i_acl && |
inode->i_acl != POSIX_ACL_NOT_CACHED) |
posix_acl_release(inode->i_acl); |
if (inode->i_default_acl && |
inode->i_default_acl != POSIX_ACL_NOT_CACHED) |
posix_acl_release(inode->i_default_acl); |
inode->i_acl = POSIX_ACL_NOT_CACHED; |
inode->i_default_acl = POSIX_ACL_NOT_CACHED; |
inode->i_mode = mode; |
/* inode should already be dirty...but just in case */ |
mark_inode_dirty(inode); |
return 0; |
|
#if 0 |
/* XXX: The following code is the preferred way to set mode, |
* however, I need to carefully go through possible recursion |
* paths back into presto. See comments in presto_do_setattr. |
*/ |
{ |
int error=0; |
struct super_operations *sops; |
struct iattr iattr; |
|
iattr.ia_mode = mode; |
iattr.ia_valid = ATTR_MODE|ATTR_FORCE; |
|
error = -EPERM; |
sops = filter_c2csops(fset->fset_cache->cache_filter); |
if (!sops && |
!sops->notify_change) { |
EXIT; |
return error; |
} |
|
error = sops->notify_change(dentry, &iattr); |
|
EXIT; |
return error; |
} |
#endif |
} |
#endif |
|
/* setextattr Interface to cache filesystem */ |
int presto_do_set_ext_attr(struct presto_file_set *fset, |
struct dentry *dentry, |
const char *name, void *buffer, |
size_t buffer_len, int flags, mode_t *mode, |
struct lento_vfs_context *info) |
{ |
struct rec_info rec; |
struct inode *inode = dentry->d_inode; |
struct inode_operations *iops; |
int error; |
struct presto_version ver; |
void *handle; |
char temp[PRESTO_EXT_ATTR_NAME_MAX+1]; |
|
ENTRY; |
error = -EROFS; |
if (IS_RDONLY(inode)) { |
EXIT; |
return -EROFS; |
} |
|
if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { |
EXIT; |
return -EPERM; |
} |
|
presto_getversion(&ver, inode); |
error = -EPERM; |
/* We need to invoke different filters based on whether |
* this dentry is a regular file, directory or symlink. |
*/ |
switch (inode->i_mode & S_IFMT) { |
case S_IFLNK: /* symlink */ |
iops = filter_c2csiops(fset->fset_cache->cache_filter); |
break; |
case S_IFDIR: /* directory */ |
iops = filter_c2cdiops(fset->fset_cache->cache_filter); |
break; |
case S_IFREG: |
default: /* everything else including regular files */ |
iops = filter_c2cfiops(fset->fset_cache->cache_filter); |
} |
|
if (!iops && !iops->set_ext_attr) { |
EXIT; |
return error; |
} |
|
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); |
if (error) { |
EXIT; |
return error; |
} |
|
|
handle = presto_trans_start(fset,dentry->d_inode,KML_OPCODE_SETEXTATTR); |
if ( IS_ERR(handle) ) { |
CERROR("presto_do_set_ext_attr: no space for transaction\n"); |
presto_release_space(fset->fset_cache, PRESTO_REQHIGH); |
return -ENOSPC; |
} |
|
/* We first "truncate" name to the maximum allowable in presto */ |
/* This simulates the strncpy_from_use code in fs/ext_attr.c */ |
strncpy(temp,name,sizeof(temp)); |
|
/* Pass down to cache*/ |
error = iops->set_ext_attr(inode,temp,buffer,buffer_len,flags); |
if (error) { |
EXIT; |
goto exit; |
} |
|
#ifdef CONFIG_FS_POSIX_ACL |
/* Reset mode if specified*/ |
/* XXX: when we do native acl support, move this code out! */ |
if (mode != NULL) { |
error = presto_setmode(fset, dentry, *mode); |
if (error) { |
EXIT; |
goto exit; |
} |
} |
#endif |
|
/* Reset ctime. Only inode change time (ctime) is affected */ |
error = presto_settime(fset, NULL, NULL, dentry, info, ATTR_CTIME); |
if (error) { |
EXIT; |
goto exit; |
} |
|
if (flags & EXT_ATTR_FLAG_USER) { |
CERROR(" USER flag passed to presto_do_set_ext_attr!\n"); |
BUG(); |
} |
|
/* We are here, so set_ext_attr succeeded. We no longer need to keep |
* track of EXT_ATTR_FLAG_{EXISTS,CREATE}, instead, we will force |
* the attribute value during log replay. -SHP |
*/ |
flags &= ~(EXT_ATTR_FLAG_EXISTS | EXT_ATTR_FLAG_CREATE); |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_SETEXTATTR | 0x10); |
if ( presto_do_kml(info, dentry) ) |
error = presto_journal_set_ext_attr |
(&rec, fset, dentry, &ver, name, buffer, |
buffer_len, flags); |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_SETEXTATTR | 0x20); |
if ( presto_do_rcvd(info, dentry) ) |
error = presto_write_last_rcvd(&rec, fset, info); |
|
presto_debug_fail_blkdev(fset, KML_OPCODE_SETEXTATTR | 0x30); |
EXIT; |
exit: |
presto_release_space(fset->fset_cache, PRESTO_REQHIGH); |
presto_trans_commit(fset, handle); |
|
return error; |
} |
#endif |
/inode.c
0,0 → 1,187
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 1996 Peter J. Braam <braam@maths.ox.ac.uk> and |
* Michael Callahan <callahan@maths.ox.ac.uk> |
* Copyright (C) 1999 Carnegie Mellon University |
* Rewritten for Linux 2.1. Peter Braam <braam@cs.cmu.edu> |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* |
* Super block/filesystem wide operations |
*/ |
|
#define __NO_VERSION__ |
#include <linux/module.h> |
#include <linux/kernel.h> |
#include <linux/mm.h> |
#include <linux/string.h> |
#include <linux/stat.h> |
#include <linux/errno.h> |
#include <linux/locks.h> |
#include <linux/unistd.h> |
|
#include <asm/system.h> |
#include <asm/uaccess.h> |
|
#include <linux/fs.h> |
#include <linux/stat.h> |
#include <linux/errno.h> |
#include <linux/locks.h> |
#include <linux/string.h> |
#include <asm/uaccess.h> |
#include <linux/slab.h> |
#include <linux/vmalloc.h> |
#include <asm/segment.h> |
|
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
|
extern void presto_free_cache(struct presto_cache *); |
|
void presto_set_ops(struct inode *inode, struct filter_fs *filter) |
{ |
ENTRY; |
|
if (!inode || is_bad_inode(inode)) |
return; |
|
if (S_ISREG(inode->i_mode)) { |
if ( !filter_c2cfiops(filter) ) { |
filter_setup_file_ops(filter, |
inode, &presto_file_iops, |
&presto_file_fops); |
} |
inode->i_op = filter_c2ufiops(filter); |
inode->i_fop = filter_c2uffops(filter); |
CDEBUG(D_INODE, "set file methods for %ld to %p\n", |
inode->i_ino, inode->i_op); |
} else if (S_ISDIR(inode->i_mode)) { |
inode->i_op = filter_c2udiops(filter); |
inode->i_fop = filter_c2udfops(filter); |
CDEBUG(D_INODE, "set dir methods for %ld to %p ioctl %p\n", |
inode->i_ino, inode->i_op, inode->i_fop->ioctl); |
} else if (S_ISLNK(inode->i_mode)) { |
if ( !filter_c2csiops(filter)) { |
filter_setup_symlink_ops(filter, |
inode, |
&presto_sym_iops, |
&presto_sym_fops); |
} |
inode->i_op = filter_c2usiops(filter); |
inode->i_fop = filter_c2usfops(filter); |
CDEBUG(D_INODE, "set link methods for %ld to %p\n", |
inode->i_ino, inode->i_op); |
} |
EXIT; |
} |
|
void presto_read_inode(struct inode *inode) |
{ |
struct presto_cache *cache; |
|
cache = presto_get_cache(inode); |
if ( !cache ) { |
CERROR("PRESTO: BAD, BAD: cannot find cache\n"); |
make_bad_inode(inode); |
return ; |
} |
|
filter_c2csops(cache->cache_filter)->read_inode(inode); |
|
CDEBUG(D_INODE, "presto_read_inode: ino %ld, gid %d\n", |
inode->i_ino, inode->i_gid); |
|
presto_set_ops(inode, cache->cache_filter); |
/* XXX handle special inodes here or not - probably not? */ |
} |
|
static void presto_put_super(struct super_block *sb) |
{ |
struct presto_cache *cache; |
struct upc_channel *channel; |
struct super_operations *sops; |
struct list_head *lh; |
int err; |
|
ENTRY; |
cache = presto_cache_find(sb->s_dev); |
if (!cache) { |
EXIT; |
goto exit; |
} |
channel = &izo_channels[presto_c2m(cache)]; |
sops = filter_c2csops(cache->cache_filter); |
err = izo_clear_all_fsetroots(cache); |
if (err) { |
CERROR("%s: err %d\n", __FUNCTION__, err); |
} |
PRESTO_FREE(cache->cache_vfsmount, sizeof(struct vfsmount)); |
|
/* look at kill_super - fsync_super is not exported GRRR but |
probably not needed */ |
unlock_super(sb); |
shrink_dcache_parent(cache->cache_root); |
dput(cache->cache_root); |
//fsync_super(sb); |
lock_super(sb); |
|
if (sops->write_super) |
sops->write_super(sb); |
|
if (sops->put_super) |
sops->put_super(sb); |
|
/* free any remaining async upcalls when the filesystem is unmounted */ |
spin_lock(&channel->uc_lock); |
lh = channel->uc_pending.next; |
while ( lh != &channel->uc_pending) { |
struct upc_req *req; |
req = list_entry(lh, struct upc_req, rq_chain); |
|
/* assignment must be here: we are about to free &lh */ |
lh = lh->next; |
if ( ! (req->rq_flags & REQ_ASYNC) ) |
continue; |
list_del(&(req->rq_chain)); |
PRESTO_FREE(req->rq_data, req->rq_bufsize); |
PRESTO_FREE(req, sizeof(struct upc_req)); |
} |
list_del(&cache->cache_channel_list); |
spin_unlock(&channel->uc_lock); |
|
presto_free_cache(cache); |
|
exit: |
CDEBUG(D_MALLOC, "after umount: kmem %ld, vmem %ld\n", |
presto_kmemory, presto_vmemory); |
MOD_DEC_USE_COUNT; |
return ; |
} |
|
struct super_operations presto_super_ops = { |
.read_inode = presto_read_inode, |
.put_super = presto_put_super, |
}; |
|
|
/* symlinks can be chowned */ |
struct inode_operations presto_sym_iops = { |
.setattr = presto_setattr |
}; |
|
/* NULL for now */ |
struct file_operations presto_sym_fops; |
/methods.c
0,0 → 1,497
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 2000 Stelias Computing, Inc. |
* Copyright (C) 2000 Red Hat, Inc. |
* Copyright (C) 2000 Mountain View Data, Inc. |
* |
* Extended Attribute Support |
* Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc. |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* |
*/ |
|
#include <stdarg.h> |
|
#include <asm/bitops.h> |
#include <asm/uaccess.h> |
#include <asm/system.h> |
|
#include <linux/errno.h> |
#include <linux/fs.h> |
#include <linux/ext2_fs.h> |
#include <linux/slab.h> |
#include <linux/vmalloc.h> |
#include <linux/sched.h> |
#include <linux/stat.h> |
#include <linux/string.h> |
#include <linux/locks.h> |
#include <linux/blkdev.h> |
#include <linux/init.h> |
#define __NO_VERSION__ |
#include <linux/module.h> |
|
#include <linux/fsfilter.h> |
#include <linux/intermezzo_fs.h> |
|
|
int filter_print_entry = 0; |
int filter_debug = 0xfffffff; |
/* |
* The function in this file are responsible for setting up the |
* correct methods layered file systems like InterMezzo and snapfs |
*/ |
|
|
static struct filter_fs filter_oppar[FILTER_FS_TYPES]; |
|
/* get to the upper methods (intermezzo, snapfs) */ |
inline struct super_operations *filter_c2usops(struct filter_fs *cache) |
{ |
return &cache->o_fops.filter_sops; |
} |
|
inline struct inode_operations *filter_c2udiops(struct filter_fs *cache) |
{ |
return &cache->o_fops.filter_dir_iops; |
} |
|
|
inline struct inode_operations *filter_c2ufiops(struct filter_fs *cache) |
{ |
return &cache->o_fops.filter_file_iops; |
} |
|
inline struct inode_operations *filter_c2usiops(struct filter_fs *cache) |
{ |
return &cache->o_fops.filter_sym_iops; |
} |
|
|
inline struct file_operations *filter_c2udfops(struct filter_fs *cache) |
{ |
return &cache->o_fops.filter_dir_fops; |
} |
|
inline struct file_operations *filter_c2uffops(struct filter_fs *cache) |
{ |
return &cache->o_fops.filter_file_fops; |
} |
|
inline struct file_operations *filter_c2usfops(struct filter_fs *cache) |
{ |
return &cache->o_fops.filter_sym_fops; |
} |
|
inline struct dentry_operations *filter_c2udops(struct filter_fs *cache) |
{ |
return &cache->o_fops.filter_dentry_ops; |
} |
|
/* get to the cache (lower) methods */ |
inline struct super_operations *filter_c2csops(struct filter_fs *cache) |
{ |
return cache->o_caops.cache_sops; |
} |
|
inline struct inode_operations *filter_c2cdiops(struct filter_fs *cache) |
{ |
return cache->o_caops.cache_dir_iops; |
} |
|
inline struct inode_operations *filter_c2cfiops(struct filter_fs *cache) |
{ |
return cache->o_caops.cache_file_iops; |
} |
|
inline struct inode_operations *filter_c2csiops(struct filter_fs *cache) |
{ |
return cache->o_caops.cache_sym_iops; |
} |
|
inline struct file_operations *filter_c2cdfops(struct filter_fs *cache) |
{ |
return cache->o_caops.cache_dir_fops; |
} |
|
inline struct file_operations *filter_c2cffops(struct filter_fs *cache) |
{ |
return cache->o_caops.cache_file_fops; |
} |
|
inline struct file_operations *filter_c2csfops(struct filter_fs *cache) |
{ |
return cache->o_caops.cache_sym_fops; |
} |
|
inline struct dentry_operations *filter_c2cdops(struct filter_fs *cache) |
{ |
return cache->o_caops.cache_dentry_ops; |
} |
|
|
void filter_setup_journal_ops(struct filter_fs *ops, char *cache_type) |
{ |
if ( strlen(cache_type) == strlen("ext2") && |
memcmp(cache_type, "ext2", strlen("ext2")) == 0 ) { |
#if CONFIG_EXT2_FS |
ops->o_trops = &presto_ext2_journal_ops; |
#else |
ops->o_trops = NULL; |
#endif |
FDEBUG(D_SUPER, "ops at %p\n", ops); |
} |
|
if ( strlen(cache_type) == strlen("ext3") && |
memcmp(cache_type, "ext3", strlen("ext3")) == 0 ) { |
#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE) |
ops->o_trops = &presto_ext3_journal_ops; |
#else |
ops->o_trops = NULL; |
#endif |
FDEBUG(D_SUPER, "ops at %p\n", ops); |
} |
|
if ( strlen(cache_type) == strlen("tmpfs") && |
memcmp(cache_type, "tmpfs", strlen("tmpfs")) == 0 ) { |
#if defined(CONFIG_TMPFS) |
ops->o_trops = &presto_tmpfs_journal_ops; |
#else |
ops->o_trops = NULL; |
#endif |
FDEBUG(D_SUPER, "ops at %p\n", ops); |
} |
|
if ( strlen(cache_type) == strlen("reiserfs") && |
memcmp(cache_type, "reiserfs", strlen("reiserfs")) == 0 ) { |
#if 0 |
/* #if defined(CONFIG_REISERFS_FS) || defined(CONFIG_REISERFS_FS_MODULE) */ |
ops->o_trops = &presto_reiserfs_journal_ops; |
#else |
ops->o_trops = NULL; |
#endif |
FDEBUG(D_SUPER, "ops at %p\n", ops); |
} |
|
if ( strlen(cache_type) == strlen("xfs") && |
memcmp(cache_type, "xfs", strlen("xfs")) == 0 ) { |
#if 0 |
/*#if defined(CONFIG_XFS_FS) || defined (CONFIG_XFS_FS_MODULE) */ |
ops->o_trops = &presto_xfs_journal_ops; |
#else |
ops->o_trops = NULL; |
#endif |
FDEBUG(D_SUPER, "ops at %p\n", ops); |
} |
|
if ( strlen(cache_type) == strlen("obdfs") && |
memcmp(cache_type, "obdfs", strlen("obdfs")) == 0 ) { |
#if defined(CONFIG_OBDFS_FS) || defined (CONFIG_OBDFS_FS_MODULE) |
ops->o_trops = presto_obdfs_journal_ops; |
#else |
ops->o_trops = NULL; |
#endif |
FDEBUG(D_SUPER, "ops at %p\n", ops); |
} |
} |
|
|
/* find the cache for this FS */ |
struct filter_fs *filter_get_filter_fs(const char *cache_type) |
{ |
struct filter_fs *ops = NULL; |
FENTRY; |
|
if ( strlen(cache_type) == strlen("ext2") && |
memcmp(cache_type, "ext2", strlen("ext2")) == 0 ) { |
ops = &filter_oppar[FILTER_FS_EXT2]; |
FDEBUG(D_SUPER, "ops at %p\n", ops); |
} |
|
if ( strlen(cache_type) == strlen("xfs") && |
memcmp(cache_type, "xfs", strlen("xfs")) == 0 ) { |
ops = &filter_oppar[FILTER_FS_XFS]; |
FDEBUG(D_SUPER, "ops at %p\n", ops); |
} |
|
if ( strlen(cache_type) == strlen("ext3") && |
memcmp(cache_type, "ext3", strlen("ext3")) == 0 ) { |
ops = &filter_oppar[FILTER_FS_EXT3]; |
FDEBUG(D_SUPER, "ops at %p\n", ops); |
} |
|
if ( strlen(cache_type) == strlen("tmpfs") && |
memcmp(cache_type, "tmpfs", strlen("tmpfs")) == 0 ) { |
ops = &filter_oppar[FILTER_FS_TMPFS]; |
FDEBUG(D_SUPER, "ops at %p\n", ops); |
} |
|
if ( strlen(cache_type) == strlen("reiserfs") && |
memcmp(cache_type, "reiserfs", strlen("reiserfs")) == 0 ) { |
ops = &filter_oppar[FILTER_FS_REISERFS]; |
FDEBUG(D_SUPER, "ops at %p\n", ops); |
} |
if ( strlen(cache_type) == strlen("obdfs") && |
memcmp(cache_type, "obdfs", strlen("obdfs")) == 0 ) { |
ops = &filter_oppar[FILTER_FS_OBDFS]; |
FDEBUG(D_SUPER, "ops at %p\n", ops); |
} |
|
if (ops == NULL) { |
CERROR("prepare to die: unrecognized cache type for Filter\n"); |
} |
return ops; |
FEXIT; |
} |
|
|
/* |
* Frobnicate the InterMezzo operations |
* this establishes the link between the InterMezzo file system |
* and the underlying file system used for the cache. |
*/ |
|
void filter_setup_super_ops(struct filter_fs *cache, struct super_operations *cache_sops, struct super_operations *filter_sops) |
{ |
/* Get ptr to the shared struct snapfs_ops structure. */ |
struct filter_ops *props = &cache->o_fops; |
/* Get ptr to the shared struct cache_ops structure. */ |
struct cache_ops *caops = &cache->o_caops; |
|
FENTRY; |
|
if ( cache->o_flags & FILTER_DID_SUPER_OPS ) { |
FEXIT; |
return; |
} |
cache->o_flags |= FILTER_DID_SUPER_OPS; |
|
/* Set the cache superblock operations to point to the |
superblock operations of the underlying file system. */ |
caops->cache_sops = cache_sops; |
|
/* |
* Copy the cache (real fs) superblock ops to the "filter" |
* superblock ops as defaults. Some will be changed below |
*/ |
memcpy(&props->filter_sops, cache_sops, sizeof(*cache_sops)); |
|
/* 'put_super' unconditionally is that of filter */ |
if (filter_sops->put_super) { |
props->filter_sops.put_super = filter_sops->put_super; |
} |
|
if (cache_sops->read_inode) { |
props->filter_sops.read_inode = filter_sops->read_inode; |
FDEBUG(D_INODE, "setting filter_read_inode, cache_ops %p, cache %p, ri at %p\n", |
cache, cache, props->filter_sops.read_inode); |
} |
|
if (cache_sops->remount_fs) |
props->filter_sops.remount_fs = filter_sops->remount_fs; |
FEXIT; |
} |
|
|
void filter_setup_dir_ops(struct filter_fs *cache, struct inode *inode, struct inode_operations *filter_iops, struct file_operations *filter_fops) |
{ |
struct inode_operations *cache_filter_iops; |
struct inode_operations *cache_iops = inode->i_op; |
struct file_operations *cache_fops = inode->i_fop; |
FENTRY; |
|
if ( cache->o_flags & FILTER_DID_DIR_OPS ) { |
FEXIT; |
return; |
} |
cache->o_flags |= FILTER_DID_DIR_OPS; |
|
/* former ops become cache_ops */ |
cache->o_caops.cache_dir_iops = cache_iops; |
cache->o_caops.cache_dir_fops = cache_fops; |
FDEBUG(D_SUPER, "filter at %p, cache iops %p, iops %p\n", |
cache, cache_iops, filter_c2udiops(cache)); |
|
/* setup our dir iops: copy and modify */ |
memcpy(filter_c2udiops(cache), cache_iops, sizeof(*cache_iops)); |
|
/* abbreviate */ |
cache_filter_iops = filter_c2udiops(cache); |
|
/* methods that filter if cache filesystem has these ops */ |
if (cache_iops->lookup && filter_iops->lookup) |
cache_filter_iops->lookup = filter_iops->lookup; |
if (cache_iops->create && filter_iops->create) |
cache_filter_iops->create = filter_iops->create; |
if (cache_iops->link && filter_iops->link) |
cache_filter_iops->link = filter_iops->link; |
if (cache_iops->unlink && filter_iops->unlink) |
cache_filter_iops->unlink = filter_iops->unlink; |
if (cache_iops->mkdir && filter_iops->mkdir) |
cache_filter_iops->mkdir = filter_iops->mkdir; |
if (cache_iops->rmdir && filter_iops->rmdir) |
cache_filter_iops->rmdir = filter_iops->rmdir; |
if (cache_iops->symlink && filter_iops->symlink) |
cache_filter_iops->symlink = filter_iops->symlink; |
if (cache_iops->rename && filter_iops->rename) |
cache_filter_iops->rename = filter_iops->rename; |
if (cache_iops->mknod && filter_iops->mknod) |
cache_filter_iops->mknod = filter_iops->mknod; |
if (cache_iops->permission && filter_iops->permission) |
cache_filter_iops->permission = filter_iops->permission; |
if (cache_iops->getattr) |
cache_filter_iops->getattr = filter_iops->getattr; |
/* Some filesystems do not use a setattr method of their own |
instead relying on inode_setattr/write_inode. We still need to |
journal these so we make setattr an unconditional operation. |
XXX: we should probably check for write_inode. SHP |
*/ |
/*if (cache_iops->setattr)*/ |
cache_filter_iops->setattr = filter_iops->setattr; |
#ifdef CONFIG_FS_EXT_ATTR |
/* For now we assume that posix acls are handled through extended |
* attributes. If this is not the case, we must explicitly trap |
* posix_set_acl. SHP |
*/ |
if (cache_iops->set_ext_attr && filter_iops->set_ext_attr) |
cache_filter_iops->set_ext_attr = filter_iops->set_ext_attr; |
#endif |
|
|
/* copy dir fops */ |
memcpy(filter_c2udfops(cache), cache_fops, sizeof(*cache_fops)); |
|
/* unconditional filtering operations */ |
filter_c2udfops(cache)->ioctl = filter_fops->ioctl; |
|
FEXIT; |
} |
|
|
void filter_setup_file_ops(struct filter_fs *cache, struct inode *inode, struct inode_operations *filter_iops, struct file_operations *filter_fops) |
{ |
struct inode_operations *pr_iops; |
struct inode_operations *cache_iops = inode->i_op; |
struct file_operations *cache_fops = inode->i_fop; |
FENTRY; |
|
if ( cache->o_flags & FILTER_DID_FILE_OPS ) { |
FEXIT; |
return; |
} |
cache->o_flags |= FILTER_DID_FILE_OPS; |
|
/* steal the old ops */ |
/* former ops become cache_ops */ |
cache->o_caops.cache_file_iops = cache_iops; |
cache->o_caops.cache_file_fops = cache_fops; |
|
/* abbreviate */ |
pr_iops = filter_c2ufiops(cache); |
|
/* setup our dir iops: copy and modify */ |
memcpy(pr_iops, cache_iops, sizeof(*cache_iops)); |
|
/* copy dir fops */ |
CERROR("*** cache file ops at %p\n", cache_fops); |
memcpy(filter_c2uffops(cache), cache_fops, sizeof(*cache_fops)); |
|
/* assign */ |
/* See comments above in filter_setup_dir_ops. SHP */ |
/*if (cache_iops->setattr)*/ |
pr_iops->setattr = filter_iops->setattr; |
if (cache_iops->getattr) |
pr_iops->getattr = filter_iops->getattr; |
/* XXX Should this be conditional rmr ? */ |
pr_iops->permission = filter_iops->permission; |
#ifdef CONFIG_FS_EXT_ATTR |
/* For now we assume that posix acls are handled through extended |
* attributes. If this is not the case, we must explicitly trap and |
* posix_set_acl |
*/ |
if (cache_iops->set_ext_attr && filter_iops->set_ext_attr) |
pr_iops->set_ext_attr = filter_iops->set_ext_attr; |
#endif |
|
|
/* unconditional filtering operations */ |
filter_c2uffops(cache)->open = filter_fops->open; |
filter_c2uffops(cache)->release = filter_fops->release; |
filter_c2uffops(cache)->write = filter_fops->write; |
filter_c2uffops(cache)->ioctl = filter_fops->ioctl; |
|
FEXIT; |
} |
|
/* XXX in 2.3 there are "fast" and "slow" symlink ops for ext2 XXX */ |
void filter_setup_symlink_ops(struct filter_fs *cache, struct inode *inode, struct inode_operations *filter_iops, struct file_operations *filter_fops) |
{ |
struct inode_operations *pr_iops; |
struct inode_operations *cache_iops = inode->i_op; |
struct file_operations *cache_fops = inode->i_fop; |
FENTRY; |
|
if ( cache->o_flags & FILTER_DID_SYMLINK_OPS ) { |
FEXIT; |
return; |
} |
cache->o_flags |= FILTER_DID_SYMLINK_OPS; |
|
/* steal the old ops */ |
cache->o_caops.cache_sym_iops = cache_iops; |
cache->o_caops.cache_sym_fops = cache_fops; |
|
/* abbreviate */ |
pr_iops = filter_c2usiops(cache); |
|
/* setup our dir iops: copy and modify */ |
memcpy(pr_iops, cache_iops, sizeof(*cache_iops)); |
|
/* See comments above in filter_setup_dir_ops. SHP */ |
/* if (cache_iops->setattr) */ |
pr_iops->setattr = filter_iops->setattr; |
if (cache_iops->getattr) |
pr_iops->getattr = filter_iops->getattr; |
|
/* assign */ |
/* copy fops - careful for symlinks they might be NULL */ |
if ( cache_fops ) { |
memcpy(filter_c2usfops(cache), cache_fops, sizeof(*cache_fops)); |
} |
|
FEXIT; |
} |
|
void filter_setup_dentry_ops(struct filter_fs *cache, |
struct dentry_operations *cache_dop, |
struct dentry_operations *filter_dop) |
{ |
if ( cache->o_flags & FILTER_DID_DENTRY_OPS ) { |
FEXIT; |
return; |
} |
cache->o_flags |= FILTER_DID_DENTRY_OPS; |
|
cache->o_caops.cache_dentry_ops = cache_dop; |
memcpy(&cache->o_fops.filter_dentry_ops, |
filter_dop, sizeof(*filter_dop)); |
|
if (cache_dop && cache_dop != filter_dop && cache_dop->d_revalidate){ |
CERROR("WARNING: filter overriding revalidation!\n"); |
} |
return; |
} |
/cache.c
0,0 → 1,204
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 2000 Stelias Computing, Inc. |
* Copyright (C) 2000 Red Hat, Inc. |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
*/ |
|
#define __NO_VERSION__ |
#include <linux/module.h> |
#include <stdarg.h> |
#include <asm/bitops.h> |
#include <asm/uaccess.h> |
#include <asm/system.h> |
|
#include <linux/errno.h> |
#include <linux/fs.h> |
#include <linux/ext2_fs.h> |
#include <linux/slab.h> |
#include <linux/vmalloc.h> |
#include <linux/sched.h> |
#include <linux/stat.h> |
#include <linux/string.h> |
#include <linux/locks.h> |
#include <linux/blkdev.h> |
#include <linux/init.h> |
|
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
|
/* |
This file contains the routines associated with managing a |
cache of files for InterMezzo. These caches have two reqs: |
- need to be found fast so they are hashed by the device, |
with an attempt to have collision chains of length 1. |
The methods for the cache are set up in methods. |
*/ |
|
extern kmem_cache_t * presto_dentry_slab; |
|
/* the intent of this hash is to have collision chains of length 1 */ |
#define CACHES_BITS 8 |
#define CACHES_SIZE (1 << CACHES_BITS) |
#define CACHES_MASK CACHES_SIZE - 1 |
static struct list_head presto_caches[CACHES_SIZE]; |
|
static inline int presto_cache_hash(kdev_t dev) |
{ |
return (CACHES_MASK) & ((0x000F & (dev)) + ((0x0F00 & (dev)) >>8)); |
} |
|
inline void presto_cache_add(struct presto_cache *cache, kdev_t dev) |
{ |
list_add(&cache->cache_chain, |
&presto_caches[presto_cache_hash(dev)]); |
cache->cache_dev = dev; |
} |
|
inline void presto_cache_init_hash(void) |
{ |
int i; |
for ( i = 0; i < CACHES_SIZE; i++ ) { |
INIT_LIST_HEAD(&presto_caches[i]); |
} |
} |
|
/* map a device to a cache */ |
struct presto_cache *presto_cache_find(kdev_t dev) |
{ |
struct presto_cache *cache; |
struct list_head *lh, *tmp; |
|
lh = tmp = &(presto_caches[presto_cache_hash(dev)]); |
while ( (tmp = lh->next) != lh ) { |
cache = list_entry(tmp, struct presto_cache, cache_chain); |
if ( cache->cache_dev == dev ) { |
return cache; |
} |
} |
return NULL; |
} |
|
|
/* map an inode to a cache */ |
struct presto_cache *presto_get_cache(struct inode *inode) |
{ |
struct presto_cache *cache; |
ENTRY; |
/* find the correct presto_cache here, based on the device */ |
cache = presto_cache_find(inode->i_dev); |
if ( !cache ) { |
CERROR("WARNING: no presto cache for dev %x, ino %ld\n", |
inode->i_dev, inode->i_ino); |
EXIT; |
return NULL; |
} |
EXIT; |
return cache; |
} |
|
/* another debugging routine: check fs is InterMezzo fs */ |
int presto_ispresto(struct inode *inode) |
{ |
struct presto_cache *cache; |
|
if ( !inode ) |
return 0; |
cache = presto_get_cache(inode); |
if ( !cache ) |
return 0; |
return (inode->i_dev == cache->cache_dev); |
} |
|
/* setup a cache structure when we need one */ |
struct presto_cache *presto_cache_init(void) |
{ |
struct presto_cache *cache; |
|
PRESTO_ALLOC(cache, sizeof(struct presto_cache)); |
if ( cache ) { |
memset(cache, 0, sizeof(struct presto_cache)); |
INIT_LIST_HEAD(&cache->cache_chain); |
INIT_LIST_HEAD(&cache->cache_fset_list); |
cache->cache_lock = SPIN_LOCK_UNLOCKED; |
cache->cache_reserved = 0; |
} |
return cache; |
} |
|
/* free a cache structure and all of the memory it is pointing to */ |
inline void presto_free_cache(struct presto_cache *cache) |
{ |
if (!cache) |
return; |
|
list_del(&cache->cache_chain); |
if (cache->cache_sb && cache->cache_sb->s_root && |
presto_d2d(cache->cache_sb->s_root)) { |
kmem_cache_free(presto_dentry_slab, |
presto_d2d(cache->cache_sb->s_root)); |
cache->cache_sb->s_root->d_fsdata = NULL; |
} |
|
PRESTO_FREE(cache, sizeof(struct presto_cache)); |
} |
|
int presto_reserve_space(struct presto_cache *cache, loff_t req) |
{ |
struct filter_fs *filter; |
loff_t avail; |
struct super_block *sb = cache->cache_sb; |
filter = cache->cache_filter; |
if (!filter ) { |
EXIT; |
return 0; |
} |
if (!filter->o_trops ) { |
EXIT; |
return 0; |
} |
if (!filter->o_trops->tr_avail ) { |
EXIT; |
return 0; |
} |
|
spin_lock(&cache->cache_lock); |
avail = filter->o_trops->tr_avail(cache, sb); |
CDEBUG(D_SUPER, "ESC::%ld +++> %ld \n", (long) cache->cache_reserved, |
(long) (cache->cache_reserved + req)); |
CDEBUG(D_SUPER, "ESC::Avail::%ld \n", (long) avail); |
if (req + cache->cache_reserved > avail) { |
spin_unlock(&cache->cache_lock); |
EXIT; |
return -ENOSPC; |
} |
cache->cache_reserved += req; |
spin_unlock(&cache->cache_lock); |
|
EXIT; |
return 0; |
} |
|
void presto_release_space(struct presto_cache *cache, loff_t req) |
{ |
CDEBUG(D_SUPER, "ESC::%ld ---> %ld \n", (long) cache->cache_reserved, |
(long) (cache->cache_reserved - req)); |
spin_lock(&cache->cache_lock); |
cache->cache_reserved -= req; |
spin_unlock(&cache->cache_lock); |
} |
/replicator.c
0,0 → 1,291
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> |
* Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net> |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* |
* Manage RCVD records for clients in the kernel |
* |
*/ |
|
#define __NO_VERSION__ |
#include <linux/module.h> |
#include <stdarg.h> |
#include <asm/uaccess.h> |
|
#include <linux/errno.h> |
|
#include <linux/intermezzo_fs.h> |
|
/* |
* this file contains a hash table of replicators/clients for a |
* fileset. It allows fast lookup and update of reintegration status |
*/ |
|
struct izo_offset_rec { |
struct list_head or_list; |
char or_uuid[16]; |
loff_t or_offset; |
}; |
|
#define RCACHE_BITS 8 |
#define RCACHE_SIZE (1 << RCACHE_BITS) |
#define RCACHE_MASK (RCACHE_SIZE - 1) |
|
static struct list_head * |
izo_rep_cache(void) |
{ |
int i; |
struct list_head *cache; |
PRESTO_ALLOC(cache, sizeof(struct list_head) * RCACHE_SIZE); |
if (cache == NULL) { |
CERROR("intermezzo-fatal: no memory for replicator cache\n"); |
return NULL; |
} |
memset(cache, 0, sizeof(struct list_head) * RCACHE_SIZE); |
for (i = 0; i < RCACHE_SIZE; i++) |
INIT_LIST_HEAD(&cache[i]); |
|
return cache; |
} |
|
static struct list_head * |
izo_rep_hash(struct list_head *cache, char *uuid) |
{ |
return &cache[(RCACHE_MASK & uuid[1])]; |
} |
|
static void |
izo_rep_cache_clean(struct presto_file_set *fset) |
{ |
int i; |
struct list_head *bucket; |
struct list_head *tmp; |
|
if (fset->fset_clients == NULL) |
return; |
for (i = 0; i < RCACHE_SIZE; i++) { |
tmp = bucket = &fset->fset_clients[i]; |
|
tmp = tmp->next; |
while (tmp != bucket) { |
struct izo_offset_rec *offrec; |
tmp = tmp->next; |
list_del(tmp); |
offrec = list_entry(tmp, struct izo_offset_rec, |
or_list); |
PRESTO_FREE(offrec, sizeof(struct izo_offset_rec)); |
} |
} |
} |
|
struct izo_offset_rec * |
izo_rep_cache_find(struct presto_file_set *fset, char *uuid) |
{ |
struct list_head *buck = izo_rep_hash(fset->fset_clients, uuid); |
struct list_head *tmp = buck; |
struct izo_offset_rec *rec = NULL; |
|
while ( (tmp = tmp->next) != buck ) { |
rec = list_entry(tmp, struct izo_offset_rec, or_list); |
if ( memcmp(rec->or_uuid, uuid, sizeof(rec->or_uuid)) == 0 ) |
return rec; |
} |
|
return NULL; |
} |
|
static int |
izo_rep_cache_add(struct presto_file_set *fset, struct izo_rcvd_rec *rec, |
loff_t offset) |
{ |
struct izo_offset_rec *offrec; |
|
if (izo_rep_cache_find(fset, rec->lr_uuid)) { |
CERROR("izo: duplicate client entry %s off %Ld\n", |
fset->fset_name, offset); |
return -EINVAL; |
} |
|
PRESTO_ALLOC(offrec, sizeof(*offrec)); |
if (offrec == NULL) { |
CERROR("izo: cannot allocate offrec\n"); |
return -ENOMEM; |
} |
|
memcpy(offrec->or_uuid, rec->lr_uuid, sizeof(rec->lr_uuid)); |
offrec->or_offset = offset; |
|
list_add(&offrec->or_list, |
izo_rep_hash(fset->fset_clients, rec->lr_uuid)); |
return 0; |
} |
|
int |
izo_rep_cache_init(struct presto_file_set *fset) |
{ |
struct izo_rcvd_rec rec; |
loff_t offset = 0, last_offset = 0; |
|
fset->fset_clients = izo_rep_cache(); |
if (fset->fset_clients == NULL) { |
CERROR("Error initializing client cache\n"); |
return -ENOMEM; |
} |
|
while ( presto_fread(fset->fset_rcvd.fd_file, (char *)&rec, |
sizeof(rec), &offset) == sizeof(rec) ) { |
int rc; |
|
if ((rc = izo_rep_cache_add(fset, &rec, last_offset)) < 0) { |
izo_rep_cache_clean(fset); |
return rc; |
} |
|
last_offset = offset; |
} |
|
return 0; |
} |
|
/* |
* Return local last_rcvd record for the client. Update or create |
* if necessary. |
* |
* XXX: After this call, any -EINVAL from izo_rcvd_get is a real error. |
*/ |
int |
izo_repstatus(struct presto_file_set *fset, __u64 client_kmlsize, |
struct izo_rcvd_rec *lr_client, struct izo_rcvd_rec *lr_server) |
{ |
int rc; |
rc = izo_rcvd_get(lr_server, fset, lr_client->lr_uuid); |
if (rc < 0 && rc != -EINVAL) { |
return rc; |
} |
|
/* client is new or has been reset. */ |
if (rc < 0 || (client_kmlsize == 0 && lr_client->lr_remote_offset == 0)) { |
memset(lr_server, 0, sizeof(*lr_server)); |
memcpy(lr_server->lr_uuid, lr_client->lr_uuid, sizeof(lr_server->lr_uuid)); |
rc = izo_rcvd_write(fset, lr_server); |
if (rc < 0) |
return rc; |
} |
|
/* update intersync */ |
rc = izo_upc_repstatus(presto_f2m(fset), fset->fset_name, lr_server); |
return rc; |
} |
|
loff_t |
izo_rcvd_get(struct izo_rcvd_rec *rec, struct presto_file_set *fset, char *uuid) |
{ |
struct izo_offset_rec *offrec; |
struct izo_rcvd_rec tmprec; |
loff_t offset; |
|
offrec = izo_rep_cache_find(fset, uuid); |
if (offrec == NULL) { |
CDEBUG(D_SPECIAL, "izo_get_rcvd: uuid not in hash.\n"); |
return -EINVAL; |
} |
offset = offrec->or_offset; |
|
if (rec == NULL) |
return offset; |
|
if (presto_fread(fset->fset_rcvd.fd_file, (char *)&tmprec, |
sizeof(tmprec), &offset) != sizeof(tmprec)) { |
CERROR("izo_get_rcvd: Unable to read from last_rcvd file offset " |
"%Lu\n", offset); |
return -EIO; |
} |
|
memcpy(rec->lr_uuid, tmprec.lr_uuid, sizeof(tmprec.lr_uuid)); |
rec->lr_remote_recno = le64_to_cpu(tmprec.lr_remote_recno); |
rec->lr_remote_offset = le64_to_cpu(tmprec.lr_remote_offset); |
rec->lr_local_recno = le64_to_cpu(tmprec.lr_local_recno); |
rec->lr_local_offset = le64_to_cpu(tmprec.lr_local_offset); |
rec->lr_last_ctime = le64_to_cpu(tmprec.lr_last_ctime); |
|
return offrec->or_offset; |
} |
|
/* Try to lookup the UUID in the hash. Insert it if it isn't found. Write the |
* data to the file. |
* |
* Returns the offset of the beginning of the record in the last_rcvd file. */ |
loff_t |
izo_rcvd_write(struct presto_file_set *fset, struct izo_rcvd_rec *rec) |
{ |
struct izo_offset_rec *offrec; |
loff_t offset, rc; |
|
ENTRY; |
|
offrec = izo_rep_cache_find(fset, rec->lr_uuid); |
if (offrec == NULL) { |
/* I don't think it should be possible for an entry to be not in |
* the hash table without also having an invalid offset, but we |
* handle it gracefully regardless. */ |
write_lock(&fset->fset_rcvd.fd_lock); |
offset = fset->fset_rcvd.fd_offset; |
fset->fset_rcvd.fd_offset += sizeof(*rec); |
write_unlock(&fset->fset_rcvd.fd_lock); |
|
rc = izo_rep_cache_add(fset, rec, offset); |
if (rc < 0) { |
EXIT; |
return rc; |
} |
} else |
offset = offrec->or_offset; |
|
|
rc = presto_fwrite(fset->fset_rcvd.fd_file, (char *)rec, sizeof(*rec), |
&offset); |
if (rc == sizeof(*rec)) |
/* presto_fwrite() advances 'offset' */ |
rc = offset - sizeof(*rec); |
|
EXIT; |
return rc; |
} |
|
loff_t |
izo_rcvd_upd_remote(struct presto_file_set *fset, char * uuid, __u64 remote_recno, |
__u64 remote_offset) |
{ |
struct izo_rcvd_rec rec; |
|
loff_t rc; |
|
ENTRY; |
rc = izo_rcvd_get(&rec, fset, uuid); |
if (rc < 0) |
return rc; |
rec.lr_remote_recno = remote_recno; |
rec.lr_remote_offset = remote_offset; |
|
rc = izo_rcvd_write(fset, &rec); |
EXIT; |
if (rc < 0) |
return rc; |
return 0; |
} |
/dcache.c
0,0 → 1,345
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Original version: Copyright (C) 1996 P. Braam and M. Callahan |
* Rewritten for Linux 2.1. Copyright (C) 1997 Carnegie Mellon University |
* d_fsdata and NFS compatiblity fixes Copyright (C) 2001 Tacit Networks, Inc. |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* |
* Directory operations for InterMezzo filesystem |
*/ |
|
/* inode dentry alias list walking code adapted from linux/fs/dcache.c |
* |
* fs/dcache.c |
* |
* (C) 1997 Thomas Schoebel-Theuer, |
* with heavy changes by Linus Torvalds |
*/ |
|
#define __NO_VERSION__ |
#include <linux/types.h> |
#include <linux/kernel.h> |
#include <linux/sched.h> |
#include <linux/fs.h> |
#include <linux/stat.h> |
#include <linux/errno.h> |
#include <linux/locks.h> |
#include <linux/slab.h> |
#include <asm/segment.h> |
#include <asm/uaccess.h> |
#include <linux/string.h> |
#include <linux/smp_lock.h> |
#include <linux/vmalloc.h> |
|
#include <linux/intermezzo_fs.h> |
|
kmem_cache_t * presto_dentry_slab; |
|
/* called when a cache lookup succeeds */ |
static int presto_d_revalidate(struct dentry *de, int flag) |
{ |
struct inode *inode = de->d_inode; |
struct presto_file_set * root_fset; |
|
ENTRY; |
if (!inode) { |
EXIT; |
return 0; |
} |
|
if (is_bad_inode(inode)) { |
EXIT; |
return 0; |
} |
|
if (!presto_d2d(de)) { |
presto_set_dd(de); |
} |
|
if (!presto_d2d(de)) { |
EXIT; |
return 0; |
} |
|
root_fset = presto_d2d(de->d_inode->i_sb->s_root)->dd_fset; |
if (root_fset->fset_flags & FSET_FLAT_BRANCH && |
(presto_d2d(de)->dd_fset != root_fset )) { |
presto_d2d(de)->dd_fset = root_fset; |
} |
|
EXIT; |
return 1; |
|
#if 0 |
/* The following is needed for metadata on demand. */ |
if ( S_ISDIR(inode->i_mode) ) { |
EXIT; |
return (presto_chk(de, PRESTO_DATA) && |
(presto_chk(de, PRESTO_ATTR))); |
} else { |
EXIT; |
return presto_chk(de, PRESTO_ATTR); |
} |
#endif |
} |
|
static void presto_d_release(struct dentry *dentry) |
{ |
if (!presto_d2d(dentry)) { |
/* This can happen for dentries from NFSd */ |
return; |
} |
presto_d2d(dentry)->dd_count--; |
|
if (!presto_d2d(dentry)->dd_count) { |
kmem_cache_free(presto_dentry_slab, presto_d2d(dentry)); |
dentry->d_fsdata = NULL; |
} |
} |
|
struct dentry_operations presto_dentry_ops = |
{ |
.d_revalidate = presto_d_revalidate, |
.d_release = presto_d_release |
}; |
|
static inline int presto_is_dentry_ROOT (struct dentry *dentry) |
{ |
return(dentry_name_cmp(dentry,"ROOT") && |
!dentry_name_cmp(dentry->d_parent,".intermezzo")); |
} |
|
static struct presto_file_set* presto_try_find_fset(struct dentry* dentry, |
int *is_under_d_intermezzo) |
{ |
struct dentry* temp_dentry; |
struct presto_dentry_data *d_data; |
int found_root=0; |
|
ENTRY; |
CDEBUG(D_FSDATA, "finding fileset for %p:%s\n", dentry, |
dentry->d_name.name); |
|
*is_under_d_intermezzo = 0; |
|
/* walk up through the branch to get the fileset */ |
/* The dentry we are passed presumably does not have the correct |
* fset information. However, we still want to start walking up |
* the branch from this dentry to get our found_root and |
* is_under_d_intermezzo decisions correct |
*/ |
for (temp_dentry = dentry ; ; temp_dentry = temp_dentry->d_parent) { |
CDEBUG(D_FSDATA, "--->dentry %p:%*s\n", temp_dentry, |
temp_dentry->d_name.len,temp_dentry->d_name.name); |
if (presto_is_dentry_ROOT(temp_dentry)) |
found_root = 1; |
if (!found_root && |
dentry_name_cmp(temp_dentry, ".intermezzo")) { |
*is_under_d_intermezzo = 1; |
} |
d_data = presto_d2d(temp_dentry); |
if (d_data) { |
/* If we found a "ROOT" dentry while walking up the |
* branch, we will journal regardless of whether |
* we are under .intermezzo or not. |
* If we are already under d_intermezzo don't reverse |
* the decision here...even if we found a "ROOT" |
* dentry above .intermezzo (if we were ever to |
* modify the directory structure). |
*/ |
if (!*is_under_d_intermezzo) |
*is_under_d_intermezzo = !found_root && |
(d_data->dd_flags & PRESTO_DONT_JOURNAL); |
EXIT; |
return d_data->dd_fset; |
} |
if (temp_dentry->d_parent == temp_dentry) { |
break; |
} |
} |
EXIT; |
return NULL; |
} |
|
/* Only call this function on positive dentries */ |
static struct presto_dentry_data* presto_try_find_alias_with_dd ( |
struct dentry* dentry) |
{ |
struct inode *inode=dentry->d_inode; |
struct list_head *head, *next, *tmp; |
struct dentry *tmp_dentry; |
|
/* Search through the alias list for dentries with d_fsdata */ |
spin_lock(&dcache_lock); |
head = &inode->i_dentry; |
next = inode->i_dentry.next; |
while (next != head) { |
tmp = next; |
next = tmp->next; |
tmp_dentry = list_entry(tmp, struct dentry, d_alias); |
if (!presto_d2d(tmp_dentry)) { |
spin_unlock(&dcache_lock); |
return presto_d2d(tmp_dentry); |
} |
} |
spin_unlock(&dcache_lock); |
return NULL; |
} |
|
/* Only call this function on positive dentries */ |
static void presto_set_alias_dd (struct dentry *dentry, |
struct presto_dentry_data* dd) |
{ |
struct inode *inode=dentry->d_inode; |
struct list_head *head, *next, *tmp; |
struct dentry *tmp_dentry; |
|
/* Set d_fsdata for this dentry */ |
dd->dd_count++; |
dentry->d_fsdata = dd; |
|
/* Now set d_fsdata for all dentries in the alias list. */ |
spin_lock(&dcache_lock); |
head = &inode->i_dentry; |
next = inode->i_dentry.next; |
while (next != head) { |
tmp = next; |
next = tmp->next; |
tmp_dentry = list_entry(tmp, struct dentry, d_alias); |
if (!presto_d2d(tmp_dentry)) { |
dd->dd_count++; |
tmp_dentry->d_fsdata = dd; |
} |
} |
spin_unlock(&dcache_lock); |
return; |
} |
|
inline struct presto_dentry_data *izo_alloc_ddata(void) |
{ |
struct presto_dentry_data *dd; |
|
dd = kmem_cache_alloc(presto_dentry_slab, SLAB_KERNEL); |
if (dd == NULL) { |
CERROR("IZO: out of memory trying to allocate presto_dentry_data\n"); |
return NULL; |
} |
memset(dd, 0, sizeof(*dd)); |
dd->dd_count = 1; |
|
return dd; |
} |
|
/* This uses the BKL! */ |
int presto_set_dd(struct dentry * dentry) |
{ |
struct presto_file_set *fset; |
struct presto_dentry_data *dd; |
int is_under_d_izo; |
int error=0; |
|
ENTRY; |
|
if (!dentry) |
BUG(); |
|
lock_kernel(); |
|
/* Did we lose a race? */ |
if (dentry->d_fsdata) { |
CERROR("dentry %p already has d_fsdata set\n", dentry); |
if (dentry->d_inode) |
CERROR(" inode: %ld\n", dentry->d_inode->i_ino); |
EXIT; |
goto out_unlock; |
} |
|
if (dentry->d_inode != NULL) { |
/* NFSd runs find_fh_dentry which instantiates disconnected |
* dentries which are then connected without a lookup(). |
* So it is possible to have connected dentries that do not |
* have d_fsdata set. So we walk the list trying to find |
* an alias which has its d_fsdata set and then use that |
* for all the other dentries as well. |
* - SHP,Vinny. |
*/ |
|
/* If there is an alias with d_fsdata use it. */ |
if ((dd = presto_try_find_alias_with_dd (dentry))) { |
presto_set_alias_dd (dentry, dd); |
EXIT; |
goto out_unlock; |
} |
} else { |
/* Negative dentry */ |
CDEBUG(D_FSDATA,"negative dentry %p: %*s\n", dentry, |
dentry->d_name.len, dentry->d_name.name); |
} |
|
/* No pre-existing d_fsdata, we need to construct one. |
* First, we must walk up the tree to find the fileset |
* If a fileset can't be found, we leave a null fsdata |
* and return EROFS to indicate that we can't journal |
* updates. |
*/ |
fset = presto_try_find_fset (dentry, &is_under_d_izo); |
if (!fset) { |
#ifdef PRESTO_NO_NFS |
CERROR("No fileset for dentry %p: %*s\n", dentry, |
dentry->d_name.len, dentry->d_name.name); |
#endif |
error = -EROFS; |
EXIT; |
goto out_unlock; |
} |
|
dentry->d_fsdata = izo_alloc_ddata(); |
if (!presto_d2d(dentry)) { |
CERROR ("InterMezzo: out of memory allocating d_fsdata\n"); |
error = -ENOMEM; |
goto out_unlock; |
} |
presto_d2d(dentry)->dd_fset = fset; |
if (is_under_d_izo) |
presto_d2d(dentry)->dd_flags |= PRESTO_DONT_JOURNAL; |
EXIT; |
|
out_unlock: |
CDEBUG(D_FSDATA,"presto_set_dd dentry %p: %*s, d_fsdata %p\n", |
dentry, dentry->d_name.len, dentry->d_name.name, |
dentry->d_fsdata); |
unlock_kernel(); |
return error; |
} |
|
int presto_init_ddata_cache(void) |
{ |
ENTRY; |
presto_dentry_slab = |
kmem_cache_create("presto_cache", |
sizeof(struct presto_dentry_data), 0, |
SLAB_HWCACHE_ALIGN, NULL, |
NULL); |
EXIT; |
return (presto_dentry_slab != NULL); |
} |
|
void presto_cleanup_ddata_cache(void) |
{ |
kmem_cache_destroy(presto_dentry_slab); |
} |
/journal.c
0,0 → 1,2453
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 1998 Peter J. Braam |
* Copyright (C) 2001 Cluster File Systems, Inc. |
* Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net> |
* |
* Support for journalling extended attributes |
* Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc. |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
*/ |
|
#include <linux/types.h> |
#include <linux/kernel.h> |
#include <linux/sched.h> |
#include <linux/fs.h> |
#include <linux/slab.h> |
#include <linux/vmalloc.h> |
#include <linux/time.h> |
#include <linux/errno.h> |
#include <linux/locks.h> |
#include <asm/segment.h> |
#include <asm/uaccess.h> |
#include <linux/string.h> |
#include <linux/smp_lock.h> |
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
|
struct presto_reservation_data { |
unsigned int ri_recno; |
loff_t ri_offset; |
loff_t ri_size; |
struct list_head ri_list; |
}; |
|
/* |
* Locking Semantics |
* |
* write lock in struct presto_log_fd: |
* - name: fd_lock |
* - required for: accessing any field in a presto_log_fd |
* - may not be held across I/O |
* - |
* |
*/ |
|
/* |
* reserve record space and/or atomically request state of the log |
* rec will hold the location reserved record upon return |
* this reservation will be placed in the queue |
*/ |
static void presto_reserve_record(struct presto_file_set *fset, |
struct presto_log_fd *fd, |
struct rec_info *rec, |
struct presto_reservation_data *rd) |
{ |
int chunked_record = 0; |
ENTRY; |
|
write_lock(&fd->fd_lock); |
if ( rec->is_kml ) { |
int chunk = 1 << fset->fset_chunkbits; |
int chunk_mask = ~(chunk -1); |
loff_t boundary; |
|
boundary = (fd->fd_offset + chunk - 1) & chunk_mask; |
if ( fd->fd_offset + rec->size >= boundary ) { |
chunked_record = 1; |
fd->fd_offset = boundary; |
} |
} |
|
fd->fd_recno++; |
|
/* this moves the fd_offset back after truncation */ |
if ( list_empty(&fd->fd_reservations) && |
!chunked_record) { |
fd->fd_offset = fd->fd_file->f_dentry->d_inode->i_size; |
} |
|
rec->offset = fd->fd_offset; |
if (rec->is_kml) |
rec->offset += fset->fset_kml_logical_off; |
|
rec->recno = fd->fd_recno; |
|
/* add the reservation data to the end of the list */ |
rd->ri_offset = fd->fd_offset; |
rd->ri_size = rec->size; |
rd->ri_recno = rec->recno; |
list_add(&rd->ri_list, fd->fd_reservations.prev); |
|
fd->fd_offset += rec->size; |
|
write_unlock(&fd->fd_lock); |
|
EXIT; |
} |
|
static inline void presto_release_record(struct presto_log_fd *fd, |
struct presto_reservation_data *rd) |
{ |
write_lock(&fd->fd_lock); |
list_del(&rd->ri_list); |
write_unlock(&fd->fd_lock); |
} |
|
/* XXX should we ask for do_truncate to be exported? */ |
int izo_do_truncate(struct presto_file_set *fset, struct dentry *dentry, |
loff_t length, loff_t size_check) |
{ |
struct inode *inode = dentry->d_inode; |
int error; |
struct iattr newattrs; |
|
ENTRY; |
|
if (length < 0) { |
EXIT; |
return -EINVAL; |
} |
|
down(&inode->i_sem); |
lock_kernel(); |
|
if (size_check != inode->i_size) { |
unlock_kernel(); |
up(&inode->i_sem); |
EXIT; |
return -EALREADY; |
} |
|
newattrs.ia_size = length; |
newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; |
|
if (inode->i_op && inode->i_op->setattr) |
error = inode->i_op->setattr(dentry, &newattrs); |
else { |
inode_setattr(dentry->d_inode, &newattrs); |
error = 0; |
} |
|
unlock_kernel(); |
up(&inode->i_sem); |
EXIT; |
return error; |
} |
|
static void presto_kml_truncate(struct presto_file_set *fset) |
{ |
int rc; |
ENTRY; |
|
write_lock(&fset->fset_kml.fd_lock); |
if (fset->fset_kml.fd_truncating == 1 ) { |
write_unlock(&fset->fset_kml.fd_lock); |
EXIT; |
return; |
} |
|
fset->fset_kml.fd_truncating = 1; |
write_unlock(&fset->fset_kml.fd_lock); |
|
CERROR("islento: %d, count: %d\n", |
ISLENTO(presto_i2m(fset->fset_dentry->d_inode)), |
fset->fset_permit_count); |
|
rc = izo_upc_kml_truncate(fset->fset_cache->cache_psdev->uc_minor, |
fset->fset_lento_off, fset->fset_lento_recno, |
fset->fset_name); |
|
/* Userspace is the only permitholder now, and will retain an exclusive |
* hold on the permit until KML truncation completes. */ |
/* FIXME: double check this code path now that the precise semantics of |
* fset->fset_permit_count have changed. */ |
|
if (rc != 0) { |
write_lock(&fset->fset_kml.fd_lock); |
fset->fset_kml.fd_truncating = 0; |
write_unlock(&fset->fset_kml.fd_lock); |
} |
|
EXIT; |
} |
|
void *presto_trans_start(struct presto_file_set *fset, struct inode *inode, |
int op) |
{ |
ENTRY; |
if ( !fset->fset_cache->cache_filter->o_trops ) { |
EXIT; |
return NULL; |
} |
EXIT; |
return fset->fset_cache->cache_filter->o_trops->tr_start |
(fset, inode, op); |
} |
|
void presto_trans_commit(struct presto_file_set *fset, void *handle) |
{ |
ENTRY; |
if (!fset->fset_cache->cache_filter->o_trops ) { |
EXIT; |
return; |
} |
|
fset->fset_cache->cache_filter->o_trops->tr_commit(fset, handle); |
|
/* Check to see if the KML needs truncated. */ |
if (fset->kml_truncate_size > 0 && |
!fset->fset_kml.fd_truncating && |
fset->fset_kml.fd_offset > fset->kml_truncate_size) { |
CDEBUG(D_JOURNAL, "kml size: %lu; truncating\n", |
(unsigned long)fset->fset_kml.fd_offset); |
presto_kml_truncate(fset); |
} |
EXIT; |
} |
|
inline int presto_no_journal(struct presto_file_set *fset) |
{ |
int minor = fset->fset_cache->cache_psdev->uc_minor; |
return izo_channels[minor].uc_no_journal; |
} |
|
#define size_round(x) (((x)+3) & ~0x3) |
|
#define BUFF_FREE(buf) PRESTO_FREE(buf, PAGE_SIZE) |
#define BUFF_ALLOC(newbuf, oldbuf) \ |
PRESTO_ALLOC(newbuf, PAGE_SIZE); \ |
if ( !newbuf ) { \ |
if (oldbuf) \ |
BUFF_FREE(oldbuf); \ |
return -ENOMEM; \ |
} |
|
/* |
* "buflen" should be PAGE_SIZE or more. |
* Give relative path wrt to a fsetroot |
*/ |
char * presto_path(struct dentry *dentry, struct dentry *root, |
char *buffer, int buflen) |
{ |
char * end = buffer+buflen; |
char * retval; |
|
*--end = '\0'; |
buflen--; |
if (dentry->d_parent != dentry && list_empty(&dentry->d_hash)) { |
buflen -= 10; |
end -= 10; |
memcpy(end, " (deleted)", 10); |
} |
|
/* Get '/' right */ |
retval = end-1; |
*retval = '/'; |
|
for (;;) { |
struct dentry * parent; |
int namelen; |
|
if (dentry == root) |
break; |
parent = dentry->d_parent; |
if (dentry == parent) |
break; |
namelen = dentry->d_name.len; |
buflen -= namelen + 1; |
if (buflen < 0) |
break; |
end -= namelen; |
memcpy(end, dentry->d_name.name, namelen); |
*--end = '/'; |
retval = end; |
dentry = parent; |
} |
return retval; |
} |
|
static inline char *logit(char *buf, const void *value, int size) |
{ |
char *ptr = (char *)value; |
|
memcpy(buf, ptr, size); |
buf += size; |
return buf; |
} |
|
|
static inline char * |
journal_log_prefix_with_groups_and_ids(char *buf, int opcode, |
struct rec_info *rec, |
__u32 ngroups, gid_t *groups, |
__u32 fsuid, __u32 fsgid) |
{ |
struct kml_prefix_hdr p; |
u32 loggroups[NGROUPS_MAX]; |
|
int i; |
|
p.len = cpu_to_le32(rec->size); |
p.version = KML_MAJOR_VERSION | KML_MINOR_VERSION; |
p.pid = cpu_to_le32(current->pid); |
p.auid = cpu_to_le32(current->uid); |
p.fsuid = cpu_to_le32(fsuid); |
p.fsgid = cpu_to_le32(fsgid); |
p.ngroups = cpu_to_le32(ngroups); |
p.opcode = cpu_to_le32(opcode); |
for (i=0 ; i < ngroups ; i++) |
loggroups[i] = cpu_to_le32((__u32) groups[i]); |
|
buf = logit(buf, &p, sizeof(struct kml_prefix_hdr)); |
buf = logit(buf, &loggroups, sizeof(__u32) * ngroups); |
return buf; |
} |
|
static inline char * |
journal_log_prefix(char *buf, int opcode, struct rec_info *rec) |
{ |
__u32 groups[NGROUPS_MAX]; |
int i; |
|
/* convert 16 bit gid's to 32 bit gid's */ |
for (i=0; i<current->ngroups; i++) |
groups[i] = (__u32) current->groups[i]; |
|
return journal_log_prefix_with_groups_and_ids(buf, opcode, rec, |
(__u32)current->ngroups, |
groups, |
(__u32)current->fsuid, |
(__u32)current->fsgid); |
} |
|
static inline char * |
journal_log_prefix_with_groups(char *buf, int opcode, struct rec_info *rec, |
__u32 ngroups, gid_t *groups) |
{ |
return journal_log_prefix_with_groups_and_ids(buf, opcode, rec, |
ngroups, groups, |
(__u32)current->fsuid, |
(__u32)current->fsgid); |
} |
|
static inline char *log_dentry_version(char *buf, struct dentry *dentry) |
{ |
struct presto_version version; |
|
presto_getversion(&version, dentry->d_inode); |
|
version.pv_mtime = HTON__u64(version.pv_mtime); |
version.pv_ctime = HTON__u64(version.pv_ctime); |
version.pv_size = HTON__u64(version.pv_size); |
|
return logit(buf, &version, sizeof(version)); |
} |
|
static inline char *log_version(char *buf, struct presto_version *pv) |
{ |
struct presto_version version; |
|
memcpy(&version, pv, sizeof(version)); |
|
version.pv_mtime = HTON__u64(version.pv_mtime); |
version.pv_ctime = HTON__u64(version.pv_ctime); |
version.pv_size = HTON__u64(version.pv_size); |
|
return logit(buf, &version, sizeof(version)); |
} |
|
static inline char *log_rollback(char *buf, struct izo_rollback_data *rb) |
{ |
struct izo_rollback_data rollback; |
|
memcpy(&rollback, rb, sizeof(rollback)); |
|
rollback.rb_mode = HTON__u32(rollback.rb_mode); |
rollback.rb_rdev = HTON__u32(rollback.rb_rdev); |
rollback.rb_uid = HTON__u64(rollback.rb_uid); |
rollback.rb_gid = HTON__u64(rollback.rb_gid); |
|
return logit(buf, &rollback, sizeof(rollback)); |
} |
|
static inline char *journal_log_suffix(char *buf, char *log, |
struct presto_file_set *fset, |
struct dentry *dentry, |
struct rec_info *rec) |
{ |
struct kml_suffix s; |
struct kml_prefix_hdr *p = (struct kml_prefix_hdr *)log; |
|
#if 0 |
/* XXX needs to be done after reservation, |
disable ths until version 1.2 */ |
if ( dentry ) { |
s.prevrec = cpu_to_le32(rec->offset - |
presto_d2d(dentry)->dd_kml_offset); |
presto_d2d(dentry)->dd_kml_offset = rec->offset; |
} else { |
s.prevrec = -1; |
} |
#endif |
s.prevrec = 0; |
|
/* record number needs to be filled in after reservation |
s.recno = cpu_to_le32(rec->recno); */ |
s.time = cpu_to_le32(CURRENT_TIME); |
s.len = p->len; |
return logit(buf, &s, sizeof(s)); |
} |
|
int izo_log_close(struct presto_log_fd *logfd) |
{ |
int rc = 0; |
|
if (logfd->fd_file) { |
rc = filp_close(logfd->fd_file, 0); |
logfd->fd_file = NULL; |
} else |
CERROR("InterMezzo: %s: no filp\n", __FUNCTION__); |
if (rc != 0) |
CERROR("InterMezzo: close files: filp won't close: %d\n", rc); |
|
return rc; |
} |
|
int presto_fwrite(struct file *file, const char *str, int len, loff_t *off) |
{ |
int rc; |
mm_segment_t old_fs; |
ENTRY; |
|
rc = -EINVAL; |
if ( !off ) { |
EXIT; |
return rc; |
} |
|
if ( ! file ) { |
EXIT; |
return rc; |
} |
|
if ( ! file->f_op ) { |
EXIT; |
return rc; |
} |
|
if ( ! file->f_op->write ) { |
EXIT; |
return rc; |
} |
|
old_fs = get_fs(); |
set_fs(get_ds()); |
rc = file->f_op->write(file, str, len, off); |
if (rc != len) { |
CERROR("presto_fwrite: wrote %d bytes instead of " |
"%d at %ld\n", rc, len, (long)*off); |
rc = -EIO; |
} |
set_fs(old_fs); |
EXIT; |
return rc; |
} |
|
int presto_fread(struct file *file, char *str, int len, loff_t *off) |
{ |
int rc; |
mm_segment_t old_fs; |
ENTRY; |
|
if (len > 512) |
CERROR("presto_fread: read at %Ld for %d bytes, ino %ld\n", |
*off, len, file->f_dentry->d_inode->i_ino); |
|
rc = -EINVAL; |
if ( !off ) { |
EXIT; |
return rc; |
} |
|
if ( ! file ) { |
EXIT; |
return rc; |
} |
|
if ( ! file->f_op ) { |
EXIT; |
return rc; |
} |
|
if ( ! file->f_op->read ) { |
EXIT; |
return rc; |
} |
|
old_fs = get_fs(); |
set_fs(get_ds()); |
rc = file->f_op->read(file, str, len, off); |
if (rc != len) { |
CDEBUG(D_FILE, "presto_fread: read %d bytes instead of " |
"%d at %Ld\n", rc, len, *off); |
rc = -EIO; |
} |
set_fs(old_fs); |
EXIT; |
return rc; |
} |
|
loff_t presto_kml_offset(struct presto_file_set *fset) |
{ |
unsigned int kml_recno; |
struct presto_log_fd *fd = &fset->fset_kml; |
loff_t offset; |
ENTRY; |
|
write_lock(&fd->fd_lock); |
|
/* Determine the largest valid offset, i.e. up until the first |
* reservation held on the file. */ |
if ( !list_empty(&fd->fd_reservations) ) { |
struct presto_reservation_data *rd; |
rd = list_entry(fd->fd_reservations.next, |
struct presto_reservation_data, |
ri_list); |
offset = rd->ri_offset; |
kml_recno = rd->ri_recno; |
} else { |
offset = fd->fd_file->f_dentry->d_inode->i_size; |
kml_recno = fset->fset_kml.fd_recno; |
} |
write_unlock(&fd->fd_lock); |
return offset; |
} |
|
static int presto_kml_dispatch(struct presto_file_set *fset) |
{ |
int rc = 0; |
unsigned int kml_recno; |
struct presto_log_fd *fd = &fset->fset_kml; |
loff_t offset; |
ENTRY; |
|
write_lock(&fd->fd_lock); |
|
/* Determine the largest valid offset, i.e. up until the first |
* reservation held on the file. */ |
if ( !list_empty(&fd->fd_reservations) ) { |
struct presto_reservation_data *rd; |
rd = list_entry(fd->fd_reservations.next, |
struct presto_reservation_data, |
ri_list); |
offset = rd->ri_offset; |
kml_recno = rd->ri_recno; |
} else { |
offset = fd->fd_file->f_dentry->d_inode->i_size; |
kml_recno = fset->fset_kml.fd_recno; |
} |
|
if ( kml_recno < fset->fset_lento_recno ) { |
CERROR("presto_kml_dispatch: smoke is coming\n"); |
write_unlock(&fd->fd_lock); |
EXIT; |
return 0; |
} else if ( kml_recno == fset->fset_lento_recno ) { |
write_unlock(&fd->fd_lock); |
EXIT; |
return 0; |
/* XXX add a further "if" here to delay the KML upcall */ |
#if 0 |
} else if ( kml_recno < fset->fset_lento_recno + 100) { |
write_unlock(&fd->fd_lock); |
EXIT; |
return 0; |
#endif |
} |
CDEBUG(D_PIOCTL, "fset: %s\n", fset->fset_name); |
|
rc = izo_upc_kml(fset->fset_cache->cache_psdev->uc_minor, |
fset->fset_lento_off, fset->fset_lento_recno, |
offset + fset->fset_kml_logical_off, kml_recno, |
fset->fset_name); |
|
if ( rc ) { |
write_unlock(&fd->fd_lock); |
EXIT; |
return rc; |
} |
|
fset->fset_lento_off = offset; |
fset->fset_lento_recno = kml_recno; |
write_unlock(&fd->fd_lock); |
EXIT; |
return 0; |
} |
|
int izo_lookup_file(struct presto_file_set *fset, char *path, |
struct nameidata *nd) |
{ |
int error = 0; |
|
CDEBUG(D_CACHE, "looking up: %s\n", path); |
|
if (path_init(path, LOOKUP_PARENT, nd)) |
error = path_walk(path, nd); |
if (error) { |
EXIT; |
return error; |
} |
|
return 0; |
} |
|
/* FIXME: this function is a mess of locking and error handling. There's got to |
* be a better way. */ |
static int do_truncate_rename(struct presto_file_set *fset, char *oldname, |
char *newname) |
{ |
struct dentry *old_dentry, *new_dentry; |
struct nameidata oldnd, newnd; |
char *oldpath, *newpath; |
int error; |
|
ENTRY; |
|
oldpath = izo_make_path(fset, oldname); |
if (oldpath == NULL) { |
EXIT; |
return -ENOENT; |
} |
|
newpath = izo_make_path(fset, newname); |
if (newpath == NULL) { |
error = -ENOENT; |
EXIT; |
goto exit; |
} |
|
if ((error = izo_lookup_file(fset, oldpath, &oldnd)) != 0) { |
EXIT; |
goto exit1; |
} |
|
if ((error = izo_lookup_file(fset, newpath, &newnd)) != 0) { |
EXIT; |
goto exit2; |
} |
|
double_lock(newnd.dentry, oldnd.dentry); |
old_dentry = lookup_hash(&oldnd.last, oldnd.dentry); |
error = PTR_ERR(old_dentry); |
if (IS_ERR(old_dentry)) { |
EXIT; |
goto exit3; |
} |
error = -ENOENT; |
if (!old_dentry->d_inode) { |
EXIT; |
goto exit4; |
} |
new_dentry = lookup_hash(&newnd.last, newnd.dentry); |
error = PTR_ERR(new_dentry); |
if (IS_ERR(new_dentry)) { |
EXIT; |
goto exit4; |
} |
|
{ |
extern int presto_rename(struct inode *old_dir,struct dentry *old_dentry, |
struct inode *new_dir,struct dentry *new_dentry); |
error = presto_rename(old_dentry->d_parent->d_inode, old_dentry, |
new_dentry->d_parent->d_inode, new_dentry); |
} |
|
dput(new_dentry); |
EXIT; |
exit4: |
dput(old_dentry); |
exit3: |
double_up(&newnd.dentry->d_inode->i_sem, &oldnd.dentry->d_inode->i_sem); |
path_release(&newnd); |
exit2: |
path_release(&oldnd); |
exit1: |
PRESTO_FREE(newpath, strlen(newpath) + 1); |
exit: |
PRESTO_FREE(oldpath, strlen(oldpath) + 1); |
return error; |
} |
|
/* This function is called with the fset->fset_kml.fd_lock held */ |
int presto_finish_kml_truncate(struct presto_file_set *fset, |
unsigned long int offset) |
{ |
struct lento_vfs_context info; |
void *handle; |
struct file *f; |
struct dentry *dentry; |
int error = 0, len; |
struct nameidata nd; |
char *kmlpath = NULL, *smlpath = NULL; |
ENTRY; |
|
if (offset == 0) { |
/* Lento couldn't do what it needed to; abort the truncation. */ |
fset->fset_kml.fd_truncating = 0; |
EXIT; |
return 0; |
} |
|
/* someone is about to write to the end of the KML; try again later. */ |
if ( !list_empty(&fset->fset_kml.fd_reservations) ) { |
EXIT; |
return -EAGAIN; |
} |
|
f = presto_copy_kml_tail(fset, offset); |
if (IS_ERR(f)) { |
EXIT; |
return PTR_ERR(f); |
} |
|
/* In a single transaction: |
* |
* - unlink 'kml' |
* - rename 'kml_tmp' to 'kml' |
* - unlink 'sml' |
* - rename 'sml_tmp' to 'sml' |
* - rewrite the first record of last_rcvd with the new kml |
* offset. |
*/ |
handle = presto_trans_start(fset, fset->fset_dentry->d_inode, |
KML_OPCODE_KML_TRUNC); |
if (IS_ERR(handle)) { |
presto_release_space(fset->fset_cache, PRESTO_REQLOW); |
CERROR("ERROR: presto_finish_kml_truncate: no space for transaction\n"); |
EXIT; |
return -ENOMEM; |
} |
|
memset(&info, 0, sizeof(info)); |
info.flags = LENTO_FL_IGNORE_TIME; |
|
kmlpath = izo_make_path(fset, "kml"); |
if (kmlpath == NULL) { |
error = -ENOMEM; |
CERROR("make_path failed: ENOMEM\n"); |
EXIT; |
goto exit_commit; |
} |
|
if ((error = izo_lookup_file(fset, kmlpath, &nd)) != 0) { |
CERROR("izo_lookup_file(kml) failed: %d.\n", error); |
EXIT; |
goto exit_commit; |
} |
down(&nd.dentry->d_inode->i_sem); |
dentry = lookup_hash(&nd.last, nd.dentry); |
error = PTR_ERR(dentry); |
if (IS_ERR(dentry)) { |
up(&nd.dentry->d_inode->i_sem); |
path_release(&nd); |
CERROR("lookup_hash failed\n"); |
EXIT; |
goto exit_commit; |
} |
error = presto_do_unlink(fset, dentry->d_parent, dentry, &info); |
dput(dentry); |
up(&nd.dentry->d_inode->i_sem); |
path_release(&nd); |
|
if (error != 0) { |
CERROR("presto_do_unlink(kml) failed: %d.\n", error); |
EXIT; |
goto exit_commit; |
} |
|
smlpath = izo_make_path(fset, "sml"); |
if (smlpath == NULL) { |
error = -ENOMEM; |
CERROR("make_path() failed: ENOMEM\n"); |
EXIT; |
goto exit_commit; |
} |
|
if ((error = izo_lookup_file(fset, smlpath, &nd)) != 0) { |
CERROR("izo_lookup_file(sml) failed: %d.\n", error); |
EXIT; |
goto exit_commit; |
} |
down(&nd.dentry->d_inode->i_sem); |
dentry = lookup_hash(&nd.last, nd.dentry); |
error = PTR_ERR(dentry); |
if (IS_ERR(dentry)) { |
up(&nd.dentry->d_inode->i_sem); |
path_release(&nd); |
CERROR("lookup_hash failed\n"); |
EXIT; |
goto exit_commit; |
} |
error = presto_do_unlink(fset, dentry->d_parent, dentry, &info); |
dput(dentry); |
up(&nd.dentry->d_inode->i_sem); |
path_release(&nd); |
|
if (error != 0) { |
CERROR("presto_do_unlink(sml) failed: %d.\n", error); |
EXIT; |
goto exit_commit; |
} |
|
error = do_truncate_rename(fset, "kml_tmp", "kml"); |
if (error != 0) |
CERROR("do_truncate_rename(kml_tmp, kml) failed: %d\n", error); |
error = do_truncate_rename(fset, "sml_tmp", "sml"); |
if (error != 0) |
CERROR("do_truncate_rename(sml_tmp, sml) failed: %d\n", error); |
|
/* Write a new 'last_rcvd' record with the new KML offset */ |
fset->fset_kml_logical_off += offset; |
CDEBUG(D_CACHE, "new kml_logical_offset: %Lu\n", |
fset->fset_kml_logical_off); |
if (presto_write_kml_logical_offset(fset) != 0) { |
CERROR("presto_write_kml_logical_offset failed\n"); |
} |
|
presto_trans_commit(fset, handle); |
|
/* Everything was successful, so swap the KML file descriptors */ |
filp_close(fset->fset_kml.fd_file, NULL); |
fset->fset_kml.fd_file = f; |
fset->fset_kml.fd_offset -= offset; |
fset->fset_kml.fd_truncating = 0; |
|
EXIT; |
return 0; |
|
exit_commit: |
presto_trans_commit(fset, handle); |
len = strlen("/.intermezzo/") + strlen(fset->fset_name) +strlen("sml"); |
if (kmlpath != NULL) |
PRESTO_FREE(kmlpath, len); |
if (smlpath != NULL) |
PRESTO_FREE(smlpath, len); |
return error; |
} |
|
/* structure of an extended log record: |
|
buf-prefix buf-body [string1 [string2 [string3]]] buf-suffix |
|
note: moves offset forward |
*/ |
static inline int presto_write_record(struct file *f, loff_t *off, |
const char *buf, size_t size, |
const char *string1, int len1, |
const char *string2, int len2, |
const char *string3, int len3) |
{ |
size_t prefix_size; |
int rc; |
|
prefix_size = size - sizeof(struct kml_suffix); |
rc = presto_fwrite(f, buf, prefix_size, off); |
if ( rc != prefix_size ) { |
CERROR("Write error!\n"); |
EXIT; |
return -EIO; |
} |
|
if ( string1 && len1 ) { |
rc = presto_fwrite(f, string1, len1, off); |
if ( rc != len1 ) { |
CERROR("Write error!\n"); |
EXIT; |
return -EIO; |
} |
} |
|
if ( string2 && len2 ) { |
rc = presto_fwrite(f, string2, len2, off); |
if ( rc != len2 ) { |
CERROR("Write error!\n"); |
EXIT; |
return -EIO; |
} |
} |
|
if ( string3 && len3 ) { |
rc = presto_fwrite(f, string3, len3, off); |
if ( rc != len3 ) { |
CERROR("Write error!\n"); |
EXIT; |
return -EIO; |
} |
} |
|
rc = presto_fwrite(f, buf + prefix_size, |
sizeof(struct kml_suffix), off); |
if ( rc != sizeof(struct kml_suffix) ) { |
CERROR("Write error!\n"); |
EXIT; |
return -EIO; |
} |
return 0; |
} |
|
|
/* |
* rec->size must be valid prior to calling this function. |
* |
* had to export this for branch_reinter in kml_reint.c |
*/ |
int presto_log(struct presto_file_set *fset, struct rec_info *rec, |
const char *buf, size_t size, |
const char *string1, int len1, |
const char *string2, int len2, |
const char *string3, int len3) |
{ |
int rc; |
struct presto_reservation_data rd; |
loff_t offset; |
struct presto_log_fd *fd; |
struct kml_suffix *s; |
int prefix_size; |
|
ENTRY; |
|
/* buf is NULL when no_journal is in effect */ |
if (!buf) { |
EXIT; |
return -EINVAL; |
} |
|
if (rec->is_kml) { |
fd = &fset->fset_kml; |
} else { |
fd = &fset->fset_lml; |
} |
|
presto_reserve_record(fset, fd, rec, &rd); |
|
if (rec->is_kml) { |
if (rec->offset < fset->fset_kml_logical_off) { |
CERROR("record with pre-trunc offset. tell phil.\n"); |
BUG(); |
} |
offset = rec->offset - fset->fset_kml_logical_off; |
} else { |
offset = rec->offset; |
} |
|
/* now we know the record number */ |
prefix_size = size - sizeof(struct kml_suffix); |
s = (struct kml_suffix *) (buf + prefix_size); |
s->recno = cpu_to_le32(rec->recno); |
|
rc = presto_write_record(fd->fd_file, &offset, buf, size, |
string1, len1, string2, len2, string3, len3); |
if (rc) { |
CERROR("presto: error writing record to %s\n", |
rec->is_kml ? "KML" : "LML"); |
return rc; |
} |
presto_release_record(fd, &rd); |
|
rc = presto_kml_dispatch(fset); |
|
EXIT; |
return rc; |
} |
|
/* read from the record at tail */ |
static int presto_last_record(struct presto_log_fd *fd, loff_t *size, |
loff_t *tail_offset, __u32 *recno, loff_t tail) |
{ |
struct kml_suffix suffix; |
int rc; |
loff_t zeroes; |
|
*recno = 0; |
*tail_offset = 0; |
*size = 0; |
|
if (tail < sizeof(struct kml_prefix_hdr) + sizeof(suffix)) { |
EXIT; |
return 0; |
} |
|
zeroes = tail - sizeof(int); |
while ( zeroes >= 0 ) { |
int data; |
rc = presto_fread(fd->fd_file, (char *)&data, sizeof(data), |
&zeroes); |
if ( rc != sizeof(data) ) { |
rc = -EIO; |
return rc; |
} |
if (data) |
break; |
zeroes -= 2 * sizeof(data); |
} |
|
/* zeroes at the begining of file. this is needed to prevent |
presto_fread errors -SHP |
*/ |
if (zeroes <= 0) return 0; |
|
zeroes -= sizeof(suffix) + sizeof(int); |
rc = presto_fread(fd->fd_file, (char *)&suffix, sizeof(suffix), &zeroes); |
if ( rc != sizeof(suffix) ) { |
EXIT; |
return rc; |
} |
if ( suffix.len > 500 ) { |
CERROR("InterMezzo: Warning long record tail at %ld, rec tail_offset at %ld (size %d)\n", |
(long) zeroes, (long)*tail_offset, suffix.len); |
} |
|
*recno = suffix.recno; |
*size = suffix.len; |
*tail_offset = zeroes; |
return 0; |
} |
|
static int izo_kml_last_recno(struct presto_log_fd *logfd) |
{ |
int rc; |
loff_t size; |
loff_t tail_offset; |
int recno; |
loff_t tail = logfd->fd_file->f_dentry->d_inode->i_size; |
|
rc = presto_last_record(logfd, &size, &tail_offset, &recno, tail); |
if (rc != 0) { |
EXIT; |
return rc; |
} |
|
logfd->fd_offset = tail_offset; |
logfd->fd_recno = recno; |
CDEBUG(D_JOURNAL, "setting fset_kml->fd_recno to %d, offset %Ld\n", |
recno, tail_offset); |
EXIT; |
return 0; |
} |
|
struct file *izo_log_open(struct presto_file_set *fset, char *name, int flags) |
{ |
struct presto_cache *cache = fset->fset_cache; |
struct file *f; |
int error; |
ENTRY; |
|
f = izo_fset_open(fset, name, flags, 0644); |
error = PTR_ERR(f); |
if (IS_ERR(f)) { |
EXIT; |
return f; |
} |
|
error = -EINVAL; |
if ( cache != presto_get_cache(f->f_dentry->d_inode) ) { |
CERROR("InterMezzo: %s cache does not match fset cache!\n",name); |
fset->fset_kml.fd_file = NULL; |
filp_close(f, NULL); |
f = NULL; |
EXIT; |
return f; |
} |
|
if (cache->cache_filter && cache->cache_filter->o_trops && |
cache->cache_filter->o_trops->tr_journal_data) { |
cache->cache_filter->o_trops->tr_journal_data |
(f->f_dentry->d_inode); |
} else { |
CERROR("InterMezzo WARNING: no file data logging!\n"); |
} |
|
EXIT; |
|
return f; |
} |
|
int izo_init_kml_file(struct presto_file_set *fset, struct presto_log_fd *logfd) |
{ |
int error = 0; |
struct file *f; |
|
ENTRY; |
if (logfd->fd_file) { |
CDEBUG(D_INODE, "fset already has KML open\n"); |
EXIT; |
return 0; |
} |
|
logfd->fd_lock = RW_LOCK_UNLOCKED; |
INIT_LIST_HEAD(&logfd->fd_reservations); |
f = izo_log_open(fset, "kml", O_RDWR | O_CREAT); |
if (IS_ERR(f)) { |
error = PTR_ERR(f); |
return error; |
} |
|
logfd->fd_file = f; |
error = izo_kml_last_recno(logfd); |
|
if (error) { |
logfd->fd_file = NULL; |
filp_close(f, NULL); |
CERROR("InterMezzo: IO error in KML of fset %s\n", |
fset->fset_name); |
EXIT; |
return error; |
} |
fset->fset_lento_off = logfd->fd_offset; |
fset->fset_lento_recno = logfd->fd_recno; |
|
EXIT; |
return error; |
} |
|
int izo_init_last_rcvd_file(struct presto_file_set *fset, struct presto_log_fd *logfd) |
{ |
int error = 0; |
struct file *f; |
struct rec_info recinfo; |
|
ENTRY; |
if (logfd->fd_file != NULL) { |
CDEBUG(D_INODE, "fset already has last_rcvd open\n"); |
EXIT; |
return 0; |
} |
|
logfd->fd_lock = RW_LOCK_UNLOCKED; |
INIT_LIST_HEAD(&logfd->fd_reservations); |
f = izo_log_open(fset, "last_rcvd", O_RDWR | O_CREAT); |
if (IS_ERR(f)) { |
error = PTR_ERR(f); |
return error; |
} |
|
logfd->fd_file = f; |
logfd->fd_offset = f->f_dentry->d_inode->i_size; |
|
error = izo_rep_cache_init(fset); |
|
if (presto_read_kml_logical_offset(&recinfo, fset) == 0) { |
fset->fset_kml_logical_off = recinfo.offset; |
} else { |
/* The 'last_rcvd' file doesn't contain a kml offset record, |
* probably because we just created 'last_rcvd'. Write one. */ |
fset->fset_kml_logical_off = 0; |
presto_write_kml_logical_offset(fset); |
} |
|
EXIT; |
return error; |
} |
|
int izo_init_lml_file(struct presto_file_set *fset, struct presto_log_fd *logfd) |
{ |
int error = 0; |
struct file *f; |
|
ENTRY; |
if (logfd->fd_file) { |
CDEBUG(D_INODE, "fset already has lml open\n"); |
EXIT; |
return 0; |
} |
|
logfd->fd_lock = RW_LOCK_UNLOCKED; |
INIT_LIST_HEAD(&logfd->fd_reservations); |
f = izo_log_open(fset, "lml", O_RDWR | O_CREAT); |
if (IS_ERR(f)) { |
error = PTR_ERR(f); |
return error; |
} |
|
logfd->fd_file = f; |
logfd->fd_offset = f->f_dentry->d_inode->i_size; |
|
EXIT; |
return error; |
} |
|
/* Get the KML-offset record from the last_rcvd file */ |
int presto_read_kml_logical_offset(struct rec_info *recinfo, |
struct presto_file_set *fset) |
{ |
loff_t off; |
struct izo_rcvd_rec rec; |
char uuid[16] = {0}; |
|
off = izo_rcvd_get(&rec, fset, uuid); |
if (off < 0) |
return -1; |
|
recinfo->offset = rec.lr_local_offset; |
return 0; |
} |
|
int presto_write_kml_logical_offset(struct presto_file_set *fset) |
{ |
loff_t rc; |
struct izo_rcvd_rec rec; |
char uuid[16] = {0}; |
|
rc = izo_rcvd_get(&rec, fset, uuid); |
if (rc < 0) |
memset(&rec, 0, sizeof(rec)); |
|
rec.lr_local_offset = |
cpu_to_le64(fset->fset_kml_logical_off); |
|
return izo_rcvd_write(fset, &rec); |
} |
|
struct file * presto_copy_kml_tail(struct presto_file_set *fset, |
unsigned long int start) |
{ |
struct file *f; |
int len; |
loff_t read_off, write_off, bytes; |
|
ENTRY; |
|
/* Copy the tail of 'kml' to 'kml_tmp' */ |
f = izo_log_open(fset, "kml_tmp", O_RDWR); |
if (IS_ERR(f)) { |
EXIT; |
return f; |
} |
|
write_off = 0; |
read_off = start; |
bytes = fset->fset_kml.fd_offset - start; |
while (bytes > 0) { |
char buf[4096]; |
int toread; |
|
if (bytes > sizeof(buf)) |
toread = sizeof(buf); |
else |
toread = bytes; |
|
len = presto_fread(fset->fset_kml.fd_file, buf, toread, |
&read_off); |
if (len <= 0) |
break; |
|
if (presto_fwrite(f, buf, len, &write_off) != len) { |
filp_close(f, NULL); |
EXIT; |
return ERR_PTR(-EIO); |
} |
|
bytes -= len; |
} |
|
EXIT; |
return f; |
} |
|
|
/* LML records here */ |
/* this writes an LML record to the LML file (rec->is_kml =0) */ |
int presto_write_lml_close(struct rec_info *rec, |
struct presto_file_set *fset, |
struct file *file, |
__u64 remote_ino, |
__u64 remote_generation, |
struct presto_version *remote_version, |
struct presto_version *new_file_ver) |
{ |
int opcode = KML_OPCODE_CLOSE; |
char *buffer; |
struct dentry *dentry = file->f_dentry; |
__u64 ino; |
__u32 pathlen; |
char *path; |
__u32 generation; |
int size; |
char *logrecord; |
char record[292]; |
struct dentry *root; |
int error; |
|
ENTRY; |
|
if ( presto_no_journal(fset) ) { |
EXIT; |
return 0; |
} |
root = fset->fset_dentry; |
|
BUFF_ALLOC(buffer, NULL); |
path = presto_path(dentry, root, buffer, PAGE_SIZE); |
CDEBUG(D_INODE, "Path: %s\n", path); |
pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); |
ino = cpu_to_le64(dentry->d_inode->i_ino); |
generation = cpu_to_le32(dentry->d_inode->i_generation); |
size = sizeof(__u32) * current->ngroups + |
sizeof(struct kml_prefix_hdr) + sizeof(*new_file_ver) + |
sizeof(ino) + sizeof(generation) + sizeof(pathlen) + |
sizeof(remote_ino) + sizeof(remote_generation) + |
sizeof(remote_version) + sizeof(rec->offset) + |
sizeof(struct kml_suffix); |
|
if ( size > sizeof(record) ) |
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); |
|
rec->is_kml = 0; |
rec->size = size + size_round(le32_to_cpu(pathlen)); |
|
logrecord = journal_log_prefix(record, opcode, rec); |
logrecord = log_version(logrecord, new_file_ver); |
logrecord = logit(logrecord, &ino, sizeof(ino)); |
logrecord = logit(logrecord, &generation, sizeof(generation)); |
logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); |
logrecord = logit(logrecord, &remote_ino, sizeof(remote_ino)); |
logrecord = logit(logrecord, &remote_generation, |
sizeof(remote_generation)); |
logrecord = log_version(logrecord, remote_version); |
logrecord = logit(logrecord, &rec->offset, sizeof(rec->offset)); |
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); |
|
error = presto_log(fset, rec, record, size, |
path, size_round(le32_to_cpu(pathlen)), |
NULL, 0, NULL, 0); |
|
BUFF_FREE(buffer); |
|
EXIT; |
return error; |
} |
|
/* |
* Check if the given record is at the end of the file. If it is, truncate |
* the lml to the record's offset, removing it. Repeat on prior record, |
* until we reach an active record or a reserved record (as defined by the |
* reservations list). |
*/ |
static int presto_truncate_lml_tail(struct presto_file_set *fset) |
{ |
loff_t lml_tail; |
loff_t lml_last_rec; |
loff_t lml_last_recsize; |
loff_t local_offset; |
int recno; |
struct kml_prefix_hdr prefix; |
struct inode *inode = fset->fset_lml.fd_file->f_dentry->d_inode; |
void *handle; |
int rc; |
|
ENTRY; |
/* If someone else is already truncating the LML, return. */ |
write_lock(&fset->fset_lml.fd_lock); |
if (fset->fset_lml.fd_truncating == 1 ) { |
write_unlock(&fset->fset_lml.fd_lock); |
EXIT; |
return 0; |
} |
/* someone is about to write to the end of the LML */ |
if ( !list_empty(&fset->fset_lml.fd_reservations) ) { |
write_unlock(&fset->fset_lml.fd_lock); |
EXIT; |
return 0; |
} |
lml_tail = fset->fset_lml.fd_file->f_dentry->d_inode->i_size; |
/* Nothing to truncate?*/ |
if (lml_tail == 0) { |
write_unlock(&fset->fset_lml.fd_lock); |
EXIT; |
return 0; |
} |
fset->fset_lml.fd_truncating = 1; |
write_unlock(&fset->fset_lml.fd_lock); |
|
presto_last_record(&fset->fset_lml, &lml_last_recsize, |
&lml_last_rec, &recno, lml_tail); |
/* Do we have a record to check? If not we have zeroes at the |
beginning of the file. -SHP |
*/ |
if (lml_last_recsize != 0) { |
local_offset = lml_last_rec - lml_last_recsize; |
rc = presto_fread(fset->fset_lml.fd_file, (char *)&prefix, |
sizeof(prefix), &local_offset); |
if (rc != sizeof(prefix)) { |
EXIT; |
goto tr_out; |
} |
|
if ( prefix.opcode != KML_OPCODE_NOOP ) { |
EXIT; |
rc = 0; |
/* We may have zeroes at the end of the file, should |
we clear them out? -SHP |
*/ |
goto tr_out; |
} |
} else |
lml_last_rec=0; |
|
handle = presto_trans_start(fset, inode, KML_OPCODE_TRUNC); |
if ( IS_ERR(handle) ) { |
EXIT; |
rc = -ENOMEM; |
goto tr_out; |
} |
|
rc = izo_do_truncate(fset, fset->fset_lml.fd_file->f_dentry, |
lml_last_rec - lml_last_recsize, lml_tail); |
presto_trans_commit(fset, handle); |
if ( rc == 0 ) { |
rc = 1; |
} |
EXIT; |
|
tr_out: |
CDEBUG(D_JOURNAL, "rc = %d\n", rc); |
write_lock(&fset->fset_lml.fd_lock); |
fset->fset_lml.fd_truncating = 0; |
write_unlock(&fset->fset_lml.fd_lock); |
return rc; |
} |
|
int presto_truncate_lml(struct presto_file_set *fset) |
{ |
int rc; |
ENTRY; |
|
while ( (rc = presto_truncate_lml_tail(fset)) > 0); |
if ( rc < 0 && rc != -EALREADY) { |
CERROR("truncate_lml error %d\n", rc); |
} |
EXIT; |
return rc; |
} |
|
int presto_clear_lml_close(struct presto_file_set *fset, loff_t lml_offset) |
{ |
int rc; |
struct kml_prefix_hdr record; |
loff_t offset = lml_offset; |
|
ENTRY; |
|
if ( presto_no_journal(fset) ) { |
EXIT; |
return 0; |
} |
|
CDEBUG(D_JOURNAL, "reading prefix: off %ld, size %d\n", |
(long)lml_offset, sizeof(record)); |
rc = presto_fread(fset->fset_lml.fd_file, (char *)&record, |
sizeof(record), &offset); |
|
if ( rc != sizeof(record) ) { |
CERROR("presto: clear_lml io error %d\n", rc); |
EXIT; |
return -EIO; |
} |
|
/* overwrite the prefix */ |
CDEBUG(D_JOURNAL, "overwriting prefix: off %ld\n", (long)lml_offset); |
record.opcode = KML_OPCODE_NOOP; |
offset = lml_offset; |
/* note: this does just a single transaction in the cache */ |
rc = presto_fwrite(fset->fset_lml.fd_file, (char *)(&record), |
sizeof(record), &offset); |
if ( rc != sizeof(record) ) { |
EXIT; |
return -EIO; |
} |
|
EXIT; |
return 0; |
} |
|
|
|
/* now a journal function for every operation */ |
|
int presto_journal_setattr(struct rec_info *rec, struct presto_file_set *fset, |
struct dentry *dentry, struct presto_version *old_ver, |
struct izo_rollback_data *rb, struct iattr *iattr) |
{ |
int opcode = KML_OPCODE_SETATTR; |
char *buffer, *path, *logrecord, record[316]; |
struct dentry *root; |
__u32 uid, gid, mode, valid, flags, pathlen; |
__u64 fsize, mtime, ctime; |
int error, size; |
|
ENTRY; |
if ( presto_no_journal(fset) ) { |
EXIT; |
return 0; |
} |
|
if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0) |
|| ((dentry->d_parent != dentry) && list_empty(&dentry->d_hash))) { |
EXIT; |
return 0; |
} |
|
root = fset->fset_dentry; |
|
BUFF_ALLOC(buffer, NULL); |
path = presto_path(dentry, root, buffer, PAGE_SIZE); |
pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); |
size = sizeof(__u32) * current->ngroups + |
sizeof(struct kml_prefix_hdr) + sizeof(*old_ver) + |
sizeof(valid) + sizeof(mode) + sizeof(uid) + sizeof(gid) + |
sizeof(fsize) + sizeof(mtime) + sizeof(ctime) + sizeof(flags) + |
sizeof(pathlen) + sizeof(*rb) + sizeof(struct kml_suffix); |
|
if ( size > sizeof(record) ) |
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); |
|
/* Only journal one kind of mtime, and not atime at all. Also don't |
* journal bogus data in iattr, to make the journal more compressible. |
*/ |
if (iattr->ia_valid & ATTR_MTIME_SET) |
iattr->ia_valid = iattr->ia_valid | ATTR_MTIME; |
valid = cpu_to_le32(iattr->ia_valid & ~(ATTR_ATIME | ATTR_MTIME_SET | |
ATTR_ATIME_SET)); |
mode = iattr->ia_valid & ATTR_MODE ? cpu_to_le32(iattr->ia_mode): 0; |
uid = iattr->ia_valid & ATTR_UID ? cpu_to_le32(iattr->ia_uid): 0; |
gid = iattr->ia_valid & ATTR_GID ? cpu_to_le32(iattr->ia_gid): 0; |
fsize = iattr->ia_valid & ATTR_SIZE ? cpu_to_le64(iattr->ia_size): 0; |
mtime = iattr->ia_valid & ATTR_MTIME ? cpu_to_le64(iattr->ia_mtime): 0; |
ctime = iattr->ia_valid & ATTR_CTIME ? cpu_to_le64(iattr->ia_ctime): 0; |
flags = iattr->ia_valid & ATTR_ATTR_FLAG ? |
cpu_to_le32(iattr->ia_attr_flags): 0; |
|
rec->is_kml = 1; |
rec->size = size + size_round(le32_to_cpu(pathlen)); |
|
logrecord = journal_log_prefix(record, opcode, rec); |
logrecord = log_version(logrecord, old_ver); |
logrecord = logit(logrecord, &valid, sizeof(valid)); |
logrecord = logit(logrecord, &mode, sizeof(mode)); |
logrecord = logit(logrecord, &uid, sizeof(uid)); |
logrecord = logit(logrecord, &gid, sizeof(gid)); |
logrecord = logit(logrecord, &fsize, sizeof(fsize)); |
logrecord = logit(logrecord, &mtime, sizeof(mtime)); |
logrecord = logit(logrecord, &ctime, sizeof(ctime)); |
logrecord = logit(logrecord, &flags, sizeof(flags)); |
logrecord = log_rollback(logrecord, rb); |
logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); |
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); |
|
error = presto_log(fset, rec, record, size, |
path, size_round(le32_to_cpu(pathlen)), |
NULL, 0, NULL, 0); |
|
BUFF_FREE(buffer); |
EXIT; |
return error; |
} |
|
int presto_get_fileid(int minor, struct presto_file_set *fset, |
struct dentry *dentry) |
{ |
int opcode = KML_OPCODE_GET_FILEID; |
struct rec_info rec; |
char *buffer, *path, *logrecord, record[4096]; /*include path*/ |
struct dentry *root; |
__u32 uid, gid, pathlen; |
int error, size; |
struct kml_suffix *suffix; |
|
ENTRY; |
|
root = fset->fset_dentry; |
|
uid = cpu_to_le32(dentry->d_inode->i_uid); |
gid = cpu_to_le32(dentry->d_inode->i_gid); |
BUFF_ALLOC(buffer, NULL); |
path = presto_path(dentry, root, buffer, PAGE_SIZE); |
pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); |
size = sizeof(__u32) * current->ngroups + |
sizeof(struct kml_prefix_hdr) + sizeof(pathlen) + |
size_round(le32_to_cpu(pathlen)) + |
sizeof(struct kml_suffix); |
|
CDEBUG(D_FILE, "kml size: %d\n", size); |
if ( size > sizeof(record) ) |
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); |
|
memset(&rec, 0, sizeof(rec)); |
rec.is_kml = 1; |
rec.size = size; |
|
logrecord = journal_log_prefix(record, opcode, &rec); |
logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); |
logrecord = logit(logrecord, path, size_round(le32_to_cpu(pathlen))); |
suffix = (struct kml_suffix *)logrecord; |
logrecord = journal_log_suffix(logrecord, record, fset, dentry, &rec); |
/* journal_log_suffix expects journal_log to set this */ |
suffix->recno = 0; |
|
CDEBUG(D_FILE, "actual kml size: %d\n", logrecord - record); |
CDEBUG(D_FILE, "get fileid: uid %d, gid %d, path: %s\n", uid, gid,path); |
|
error = izo_upc_get_fileid(minor, size, record, |
size_round(le32_to_cpu(pathlen)), path, |
fset->fset_name); |
|
BUFF_FREE(buffer); |
EXIT; |
return error; |
} |
|
int presto_journal_create(struct rec_info *rec, struct presto_file_set *fset, |
struct dentry *dentry, |
struct presto_version *tgt_dir_ver, |
struct presto_version *new_file_ver, int mode) |
{ |
int opcode = KML_OPCODE_CREATE; |
char *buffer, *path, *logrecord, record[292]; |
struct dentry *root; |
__u32 uid, gid, lmode, pathlen; |
int error, size; |
|
ENTRY; |
if ( presto_no_journal(fset) ) { |
EXIT; |
return 0; |
} |
|
root = fset->fset_dentry; |
|
uid = cpu_to_le32(dentry->d_inode->i_uid); |
gid = cpu_to_le32(dentry->d_inode->i_gid); |
lmode = cpu_to_le32(mode); |
|
BUFF_ALLOC(buffer, NULL); |
path = presto_path(dentry, root, buffer, PAGE_SIZE); |
pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); |
size = sizeof(__u32) * current->ngroups + |
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) + |
sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(pathlen) + |
sizeof(struct kml_suffix); |
|
if ( size > sizeof(record) ) |
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); |
|
rec->is_kml = 1; |
rec->size = size + size_round(le32_to_cpu(pathlen)); |
|
logrecord = journal_log_prefix(record, opcode, rec); |
logrecord = log_version(logrecord, tgt_dir_ver); |
logrecord = log_dentry_version(logrecord, dentry->d_parent); |
logrecord = log_version(logrecord, new_file_ver); |
logrecord = logit(logrecord, &lmode, sizeof(lmode)); |
logrecord = logit(logrecord, &uid, sizeof(uid)); |
logrecord = logit(logrecord, &gid, sizeof(gid)); |
logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); |
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); |
|
error = presto_log(fset, rec, record, size, |
path, size_round(le32_to_cpu(pathlen)), |
NULL, 0, NULL, 0); |
|
BUFF_FREE(buffer); |
EXIT; |
return error; |
} |
|
int presto_journal_symlink(struct rec_info *rec, struct presto_file_set *fset, |
struct dentry *dentry, const char *target, |
struct presto_version *tgt_dir_ver, |
struct presto_version *new_link_ver) |
{ |
int opcode = KML_OPCODE_SYMLINK; |
char *buffer, *path, *logrecord, record[292]; |
struct dentry *root; |
__u32 uid, gid, pathlen; |
__u32 targetlen = cpu_to_le32(strlen(target)); |
int error, size; |
|
ENTRY; |
if ( presto_no_journal(fset) ) { |
EXIT; |
return 0; |
} |
|
root = fset->fset_dentry; |
|
uid = cpu_to_le32(dentry->d_inode->i_uid); |
gid = cpu_to_le32(dentry->d_inode->i_gid); |
|
BUFF_ALLOC(buffer, NULL); |
path = presto_path(dentry, root, buffer, PAGE_SIZE); |
pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); |
size = sizeof(__u32) * current->ngroups + |
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) + |
sizeof(uid) + sizeof(gid) + sizeof(pathlen) + |
sizeof(targetlen) + sizeof(struct kml_suffix); |
|
if ( size > sizeof(record) ) |
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); |
|
rec->is_kml = 1; |
rec->size = size + size_round(le32_to_cpu(pathlen)) + |
size_round(le32_to_cpu(targetlen)); |
|
logrecord = journal_log_prefix(record, opcode, rec); |
logrecord = log_version(logrecord, tgt_dir_ver); |
logrecord = log_dentry_version(logrecord, dentry->d_parent); |
logrecord = log_version(logrecord, new_link_ver); |
logrecord = logit(logrecord, &uid, sizeof(uid)); |
logrecord = logit(logrecord, &gid, sizeof(gid)); |
logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); |
logrecord = logit(logrecord, &targetlen, sizeof(targetlen)); |
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); |
|
error = presto_log(fset, rec, record, size, |
path, size_round(le32_to_cpu(pathlen)), |
target, size_round(le32_to_cpu(targetlen)), |
NULL, 0); |
|
BUFF_FREE(buffer); |
EXIT; |
return error; |
} |
|
int presto_journal_mkdir(struct rec_info *rec, struct presto_file_set *fset, |
struct dentry *dentry, |
struct presto_version *tgt_dir_ver, |
struct presto_version *new_dir_ver, int mode) |
{ |
int opcode = KML_OPCODE_MKDIR; |
char *buffer, *path, *logrecord, record[292]; |
struct dentry *root; |
__u32 uid, gid, lmode, pathlen; |
int error, size; |
|
ENTRY; |
if ( presto_no_journal(fset) ) { |
EXIT; |
return 0; |
} |
|
root = fset->fset_dentry; |
|
uid = cpu_to_le32(dentry->d_inode->i_uid); |
gid = cpu_to_le32(dentry->d_inode->i_gid); |
lmode = cpu_to_le32(mode); |
|
BUFF_ALLOC(buffer, NULL); |
path = presto_path(dentry, root, buffer, PAGE_SIZE); |
pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); |
size = sizeof(__u32) * current->ngroups + |
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) + |
sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(pathlen) + |
sizeof(struct kml_suffix); |
|
if ( size > sizeof(record) ) |
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); |
|
rec->is_kml = 1; |
rec->size = size + size_round(le32_to_cpu(pathlen)); |
logrecord = journal_log_prefix(record, opcode, rec); |
|
logrecord = log_version(logrecord, tgt_dir_ver); |
logrecord = log_dentry_version(logrecord, dentry->d_parent); |
logrecord = log_version(logrecord, new_dir_ver); |
logrecord = logit(logrecord, &lmode, sizeof(lmode)); |
logrecord = logit(logrecord, &uid, sizeof(uid)); |
logrecord = logit(logrecord, &gid, sizeof(gid)); |
logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); |
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); |
|
error = presto_log(fset, rec, record, size, |
path, size_round(le32_to_cpu(pathlen)), |
NULL, 0, NULL, 0); |
|
BUFF_FREE(buffer); |
EXIT; |
return error; |
} |
|
|
int |
presto_journal_rmdir(struct rec_info *rec, struct presto_file_set *fset, |
struct dentry *dir, struct presto_version *tgt_dir_ver, |
struct presto_version *old_dir_ver, |
struct izo_rollback_data *rb, int len, const char *name) |
{ |
int opcode = KML_OPCODE_RMDIR; |
char *buffer, *path, *logrecord, record[316]; |
__u32 pathlen, llen; |
struct dentry *root; |
int error, size; |
|
ENTRY; |
if ( presto_no_journal(fset) ) { |
EXIT; |
return 0; |
} |
|
root = fset->fset_dentry; |
|
llen = cpu_to_le32(len); |
BUFF_ALLOC(buffer, NULL); |
path = presto_path(dir, root, buffer, PAGE_SIZE); |
pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); |
size = sizeof(__u32) * current->ngroups + |
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) + |
sizeof(pathlen) + sizeof(llen) + sizeof(*rb) + |
sizeof(struct kml_suffix); |
|
if ( size > sizeof(record) ) |
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); |
|
CDEBUG(D_JOURNAL, "path: %s (%d), name: %s (%d), size %d\n", |
path, pathlen, name, len, size); |
|
rec->is_kml = 1; |
rec->size = size + size_round(le32_to_cpu(pathlen)) + |
size_round(len); |
|
logrecord = journal_log_prefix(record, opcode, rec); |
logrecord = log_version(logrecord, tgt_dir_ver); |
logrecord = log_dentry_version(logrecord, dir); |
logrecord = log_version(logrecord, old_dir_ver); |
logrecord = logit(logrecord, rb, sizeof(*rb)); |
logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); |
logrecord = logit(logrecord, &llen, sizeof(llen)); |
logrecord = journal_log_suffix(logrecord, record, fset, dir, rec); |
error = presto_log(fset, rec, record, size, |
path, size_round(le32_to_cpu(pathlen)), |
name, size_round(len), |
NULL, 0); |
|
BUFF_FREE(buffer); |
EXIT; |
return error; |
} |
|
|
int |
presto_journal_mknod(struct rec_info *rec, struct presto_file_set *fset, |
struct dentry *dentry, struct presto_version *tgt_dir_ver, |
struct presto_version *new_node_ver, int mode, |
int dmajor, int dminor ) |
{ |
int opcode = KML_OPCODE_MKNOD; |
char *buffer, *path, *logrecord, record[292]; |
struct dentry *root; |
__u32 uid, gid, lmode, lmajor, lminor, pathlen; |
int error, size; |
|
ENTRY; |
if ( presto_no_journal(fset) ) { |
EXIT; |
return 0; |
} |
|
root = fset->fset_dentry; |
|
uid = cpu_to_le32(dentry->d_inode->i_uid); |
gid = cpu_to_le32(dentry->d_inode->i_gid); |
lmode = cpu_to_le32(mode); |
lmajor = cpu_to_le32(dmajor); |
lminor = cpu_to_le32(dminor); |
|
BUFF_ALLOC(buffer, NULL); |
path = presto_path(dentry, root, buffer, PAGE_SIZE); |
pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); |
size = sizeof(__u32) * current->ngroups + |
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) + |
sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(lmajor) + |
sizeof(lminor) + sizeof(pathlen) + |
sizeof(struct kml_suffix); |
|
if ( size > sizeof(record) ) |
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); |
|
rec->is_kml = 1; |
rec->size = size + size_round(le32_to_cpu(pathlen)); |
|
logrecord = journal_log_prefix(record, opcode, rec); |
logrecord = log_version(logrecord, tgt_dir_ver); |
logrecord = log_dentry_version(logrecord, dentry->d_parent); |
logrecord = log_version(logrecord, new_node_ver); |
logrecord = logit(logrecord, &lmode, sizeof(lmode)); |
logrecord = logit(logrecord, &uid, sizeof(uid)); |
logrecord = logit(logrecord, &gid, sizeof(gid)); |
logrecord = logit(logrecord, &lmajor, sizeof(lmajor)); |
logrecord = logit(logrecord, &lminor, sizeof(lminor)); |
logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); |
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); |
|
error = presto_log(fset, rec, record, size, |
path, size_round(le32_to_cpu(pathlen)), |
NULL, 0, NULL, 0); |
|
BUFF_FREE(buffer); |
EXIT; |
return error; |
} |
|
int |
presto_journal_link(struct rec_info *rec, struct presto_file_set *fset, |
struct dentry *src, struct dentry *tgt, |
struct presto_version *tgt_dir_ver, |
struct presto_version *new_link_ver) |
{ |
int opcode = KML_OPCODE_LINK; |
char *buffer, *srcbuffer, *path, *srcpath, *logrecord, record[292]; |
__u32 pathlen, srcpathlen; |
struct dentry *root; |
int error, size; |
|
ENTRY; |
if ( presto_no_journal(fset) ) { |
EXIT; |
return 0; |
} |
|
root = fset->fset_dentry; |
|
BUFF_ALLOC(srcbuffer, NULL); |
srcpath = presto_path(src, root, srcbuffer, PAGE_SIZE); |
srcpathlen = cpu_to_le32(MYPATHLEN(srcbuffer, srcpath)); |
|
BUFF_ALLOC(buffer, srcbuffer); |
path = presto_path(tgt, root, buffer, PAGE_SIZE); |
pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); |
size = sizeof(__u32) * current->ngroups + |
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) + |
sizeof(srcpathlen) + sizeof(pathlen) + |
sizeof(struct kml_suffix); |
|
if ( size > sizeof(record) ) |
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); |
|
rec->is_kml = 1; |
rec->size = size + size_round(le32_to_cpu(pathlen)) + |
size_round(le32_to_cpu(srcpathlen)); |
|
logrecord = journal_log_prefix(record, opcode, rec); |
logrecord = log_version(logrecord, tgt_dir_ver); |
logrecord = log_dentry_version(logrecord, tgt->d_parent); |
logrecord = log_version(logrecord, new_link_ver); |
logrecord = logit(logrecord, &srcpathlen, sizeof(srcpathlen)); |
logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); |
logrecord = journal_log_suffix(logrecord, record, fset, tgt, rec); |
|
error = presto_log(fset, rec, record, size, |
srcpath, size_round(le32_to_cpu(srcpathlen)), |
path, size_round(le32_to_cpu(pathlen)), |
NULL, 0); |
|
BUFF_FREE(srcbuffer); |
BUFF_FREE(buffer); |
EXIT; |
return error; |
} |
|
|
int presto_journal_rename(struct rec_info *rec, struct presto_file_set *fset, |
struct dentry *src, struct dentry *tgt, |
struct presto_version *src_dir_ver, |
struct presto_version *tgt_dir_ver) |
{ |
int opcode = KML_OPCODE_RENAME; |
char *buffer, *srcbuffer, *path, *srcpath, *logrecord, record[292]; |
__u32 pathlen, srcpathlen; |
struct dentry *root; |
int error, size; |
|
ENTRY; |
if ( presto_no_journal(fset) ) { |
EXIT; |
return 0; |
} |
|
root = fset->fset_dentry; |
|
BUFF_ALLOC(srcbuffer, NULL); |
srcpath = presto_path(src, root, srcbuffer, PAGE_SIZE); |
srcpathlen = cpu_to_le32(MYPATHLEN(srcbuffer, srcpath)); |
|
BUFF_ALLOC(buffer, srcbuffer); |
path = presto_path(tgt, root, buffer, PAGE_SIZE); |
pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); |
size = sizeof(__u32) * current->ngroups + |
sizeof(struct kml_prefix_hdr) + 4 * sizeof(*src_dir_ver) + |
sizeof(srcpathlen) + sizeof(pathlen) + |
sizeof(struct kml_suffix); |
|
if ( size > sizeof(record) ) |
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); |
|
rec->is_kml = 1; |
rec->size = size + size_round(le32_to_cpu(pathlen)) + |
size_round(le32_to_cpu(srcpathlen)); |
|
logrecord = journal_log_prefix(record, opcode, rec); |
logrecord = log_version(logrecord, src_dir_ver); |
logrecord = log_dentry_version(logrecord, src->d_parent); |
logrecord = log_version(logrecord, tgt_dir_ver); |
logrecord = log_dentry_version(logrecord, tgt->d_parent); |
logrecord = logit(logrecord, &srcpathlen, sizeof(srcpathlen)); |
logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); |
logrecord = journal_log_suffix(logrecord, record, fset, tgt, rec); |
|
error = presto_log(fset, rec, record, size, |
srcpath, size_round(le32_to_cpu(srcpathlen)), |
path, size_round(le32_to_cpu(pathlen)), |
NULL, 0); |
|
BUFF_FREE(buffer); |
BUFF_FREE(srcbuffer); |
EXIT; |
return error; |
} |
|
int presto_journal_unlink(struct rec_info *rec, struct presto_file_set *fset, |
struct dentry *dir, struct presto_version *tgt_dir_ver, |
struct presto_version *old_file_ver, |
struct izo_rollback_data *rb, struct dentry *dentry, |
char *old_target, int old_targetlen) |
{ |
int opcode = KML_OPCODE_UNLINK; |
char *buffer, *path, *logrecord, record[316]; |
const char *name; |
__u32 pathlen, llen; |
struct dentry *root; |
int error, size, len; |
|
ENTRY; |
if ( presto_no_journal(fset) ) { |
EXIT; |
return 0; |
} |
|
root = fset->fset_dentry; |
|
name = dentry->d_name.name; |
len = dentry->d_name.len; |
|
llen = cpu_to_le32(len); |
BUFF_ALLOC(buffer, NULL); |
path = presto_path(dir, root, buffer, PAGE_SIZE); |
pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); |
size = sizeof(__u32) * current->ngroups + |
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) + |
sizeof(pathlen) + sizeof(llen) + sizeof(*rb) + |
sizeof(old_targetlen) + sizeof(struct kml_suffix); |
|
if ( size > sizeof(record) ) |
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); |
|
rec->is_kml = 1; |
rec->size = size + size_round(le32_to_cpu(pathlen)) + size_round(len) + |
size_round(old_targetlen); |
|
logrecord = journal_log_prefix(record, opcode, rec); |
logrecord = log_version(logrecord, tgt_dir_ver); |
logrecord = log_dentry_version(logrecord, dir); |
logrecord = log_version(logrecord, old_file_ver); |
logrecord = log_rollback(logrecord, rb); |
logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); |
logrecord = logit(logrecord, &llen, sizeof(llen)); |
logrecord = logit(logrecord, &old_targetlen, sizeof(old_targetlen)); |
logrecord = journal_log_suffix(logrecord, record, fset, dir, rec); |
|
error = presto_log(fset, rec, record, size, |
path, size_round(le32_to_cpu(pathlen)), |
name, size_round(len), |
old_target, size_round(old_targetlen)); |
|
BUFF_FREE(buffer); |
EXIT; |
return error; |
} |
|
int |
presto_journal_close(struct rec_info *rec, struct presto_file_set *fset, |
struct file *file, struct dentry *dentry, |
struct presto_version *old_file_ver, |
struct presto_version *new_file_ver) |
{ |
int opcode = KML_OPCODE_CLOSE; |
struct presto_file_data *fd; |
char *buffer, *path, *logrecord, record[316]; |
struct dentry *root; |
int error, size, i; |
__u32 pathlen, generation; |
__u64 ino; |
__u32 open_fsuid; |
__u32 open_fsgid; |
__u32 open_ngroups; |
__u32 open_groups[NGROUPS_MAX]; |
__u32 open_mode; |
__u32 open_uid; |
__u32 open_gid; |
|
ENTRY; |
|
if ( presto_no_journal(fset) ) { |
EXIT; |
return 0; |
} |
|
if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0) |
|| ((dentry->d_parent != dentry) && list_empty(&dentry->d_hash))) { |
EXIT; |
return 0; |
} |
|
root = fset->fset_dentry; |
|
fd = (struct presto_file_data *)file->private_data; |
if (fd) { |
open_ngroups = fd->fd_ngroups; |
for (i = 0; i < fd->fd_ngroups; i++) |
open_groups[i] = (__u32) fd->fd_groups[i]; |
open_mode = fd->fd_mode; |
open_uid = fd->fd_uid; |
open_gid = fd->fd_gid; |
open_fsuid = fd->fd_fsuid; |
open_fsgid = fd->fd_fsgid; |
} else { |
open_ngroups = current->ngroups; |
for (i=0; i<current->ngroups; i++) |
open_groups[i] = (__u32) current->groups[i]; |
open_mode = dentry->d_inode->i_mode; |
open_uid = dentry->d_inode->i_uid; |
open_gid = dentry->d_inode->i_gid; |
open_fsuid = current->fsuid; |
open_fsgid = current->fsgid; |
} |
BUFF_ALLOC(buffer, NULL); |
path = presto_path(dentry, root, buffer, PAGE_SIZE); |
pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); |
ino = cpu_to_le64(dentry->d_inode->i_ino); |
generation = cpu_to_le32(dentry->d_inode->i_generation); |
size = sizeof(__u32) * open_ngroups + |
sizeof(open_mode) + sizeof(open_uid) + sizeof(open_gid) + |
sizeof(struct kml_prefix_hdr) + sizeof(*old_file_ver) + |
sizeof(*new_file_ver) + sizeof(ino) + sizeof(generation) + |
sizeof(pathlen) + sizeof(struct kml_suffix); |
|
if ( size > sizeof(record) ) |
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); |
|
rec->is_kml = 1; |
rec->size = size + size_round(le32_to_cpu(pathlen)); |
|
logrecord = journal_log_prefix_with_groups_and_ids( |
record, opcode, rec, open_ngroups, open_groups, |
open_fsuid, open_fsgid); |
logrecord = logit(logrecord, &open_mode, sizeof(open_mode)); |
logrecord = logit(logrecord, &open_uid, sizeof(open_uid)); |
logrecord = logit(logrecord, &open_gid, sizeof(open_gid)); |
logrecord = log_version(logrecord, old_file_ver); |
logrecord = log_version(logrecord, new_file_ver); |
logrecord = logit(logrecord, &ino, sizeof(ino)); |
logrecord = logit(logrecord, &generation, sizeof(generation)); |
logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); |
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); |
|
error = presto_log(fset, rec, record, size, |
path, size_round(le32_to_cpu(pathlen)), |
NULL, 0, NULL, 0); |
BUFF_FREE(buffer); |
|
EXIT; |
return error; |
} |
|
int presto_rewrite_close(struct rec_info *rec, struct presto_file_set *fset, |
char *path, __u32 pathlen, |
int ngroups, __u32 *groups, |
__u64 ino, __u32 generation, |
struct presto_version *new_file_ver) |
{ |
int opcode = KML_OPCODE_CLOSE; |
char *logrecord, record[292]; |
struct dentry *root; |
int error, size; |
|
ENTRY; |
|
if ( presto_no_journal(fset) ) { |
EXIT; |
return 0; |
} |
|
root = fset->fset_dentry; |
|
size = sizeof(__u32) * ngroups + |
sizeof(struct kml_prefix_hdr) + sizeof(*new_file_ver) + |
sizeof(ino) + sizeof(generation) + |
sizeof(le32_to_cpu(pathlen)) + |
sizeof(struct kml_suffix); |
|
if ( size > sizeof(record) ) |
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); |
|
rec->is_kml = 1; |
rec->size = size + size_round(le32_to_cpu(pathlen)); |
|
logrecord = journal_log_prefix_with_groups(record, opcode, rec, |
ngroups, groups); |
logrecord = log_version(logrecord, new_file_ver); |
logrecord = logit(logrecord, &ino, sizeof(ino)); |
logrecord = logit(logrecord, &generation, sizeof(generation)); |
logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); |
logrecord = journal_log_suffix(logrecord, record, fset, NULL, rec); |
|
error = presto_log(fset, rec, record, size, |
path, size_round(le32_to_cpu(pathlen)), |
NULL, 0, NULL, 0); |
|
EXIT; |
return error; |
} |
|
|
/* write closes for the local close records in the LML */ |
int presto_complete_lml(struct presto_file_set *fset) |
{ |
__u32 groups[NGROUPS_MAX]; |
loff_t lml_offset; |
loff_t read_offset; |
char *buffer; |
void *handle; |
struct rec_info rec; |
struct close_rec { |
struct presto_version new_file_ver; |
__u64 ino; |
__u32 generation; |
__u32 pathlen; |
__u64 remote_ino; |
__u32 remote_generation; |
__u32 remote_version; |
__u64 lml_offset; |
} close_rec; |
struct file *file = fset->fset_lml.fd_file; |
struct kml_prefix_hdr prefix; |
int rc = 0; |
ENTRY; |
|
lml_offset = 0; |
again: |
if (lml_offset >= file->f_dentry->d_inode->i_size) { |
EXIT; |
return rc; |
} |
|
read_offset = lml_offset; |
rc = presto_fread(file, (char *)&prefix, |
sizeof(prefix), &read_offset); |
if ( rc != sizeof(prefix) ) { |
EXIT; |
CERROR("presto_complete_lml: ioerror - 1, tell Peter\n"); |
return -EIO; |
} |
|
if ( prefix.opcode == KML_OPCODE_NOOP ) { |
lml_offset += prefix.len; |
goto again; |
} |
|
rc = presto_fread(file, (char *)groups, |
prefix.ngroups * sizeof(__u32), &read_offset); |
if ( rc != prefix.ngroups * sizeof(__u32) ) { |
EXIT; |
CERROR("presto_complete_lml: ioerror - 2, tell Peter\n"); |
return -EIO; |
} |
|
rc = presto_fread(file, (char *)&close_rec, |
sizeof(close_rec), &read_offset); |
if ( rc != sizeof(close_rec) ) { |
EXIT; |
CERROR("presto_complete_lml: ioerror - 3, tell Peter\n"); |
return -EIO; |
} |
|
/* is this a backfetch or a close record? */ |
if ( le64_to_cpu(close_rec.remote_ino) != 0 ) { |
lml_offset += prefix.len; |
goto again; |
} |
|
BUFF_ALLOC(buffer, NULL); |
rc = presto_fread(file, (char *)buffer, |
le32_to_cpu(close_rec.pathlen), &read_offset); |
if ( rc != le32_to_cpu(close_rec.pathlen) ) { |
EXIT; |
CERROR("presto_complete_lml: ioerror - 4, tell Peter\n"); |
return -EIO; |
} |
|
handle = presto_trans_start(fset, file->f_dentry->d_inode, |
KML_OPCODE_RELEASE); |
if ( IS_ERR(handle) ) { |
EXIT; |
return -ENOMEM; |
} |
|
rc = presto_clear_lml_close(fset, lml_offset); |
if ( rc ) { |
CERROR("error during clearing: %d\n", rc); |
presto_trans_commit(fset, handle); |
EXIT; |
return rc; |
} |
|
rc = presto_rewrite_close(&rec, fset, buffer, close_rec.pathlen, |
prefix.ngroups, groups, |
close_rec.ino, close_rec.generation, |
&close_rec.new_file_ver); |
if ( rc ) { |
CERROR("error during rewrite close: %d\n", rc); |
presto_trans_commit(fset, handle); |
EXIT; |
return rc; |
} |
|
presto_trans_commit(fset, handle); |
if ( rc ) { |
CERROR("error during truncation: %d\n", rc); |
EXIT; |
return rc; |
} |
|
lml_offset += prefix.len; |
CDEBUG(D_JOURNAL, "next LML record at: %ld\n", (long)lml_offset); |
goto again; |
|
EXIT; |
return -EINVAL; |
} |
|
|
#ifdef CONFIG_FS_EXT_ATTR |
/* Journal an ea operation. A NULL buffer implies the attribute is |
* getting deleted. In this case we simply change the opcode, but nothing |
* else is affected. |
*/ |
int presto_journal_set_ext_attr (struct rec_info *rec, |
struct presto_file_set *fset, |
struct dentry *dentry, |
struct presto_version *ver, const char *name, |
const char *buffer, int buffer_len, |
int flags) |
{ |
int opcode = (buffer == NULL) ? |
KML_OPCODE_DELEXTATTR : |
KML_OPCODE_SETEXTATTR ; |
char *temp, *path, *logrecord, record[292]; |
struct dentry *root; |
int error, size; |
__u32 namelen=cpu_to_le32(strnlen(name,PRESTO_EXT_ATTR_NAME_MAX)); |
__u32 buflen=(buffer != NULL)? cpu_to_le32(buffer_len): cpu_to_le32(0); |
__u32 mode, pathlen; |
|
ENTRY; |
if ( presto_no_journal(fset) ) { |
EXIT; |
return 0; |
} |
|
if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0) |
|| ((dentry->d_parent != dentry) && list_empty(&dentry->d_hash))) { |
EXIT; |
return 0; |
} |
|
root = fset->fset_dentry; |
|
BUFF_ALLOC(temp, NULL); |
path = presto_path(dentry, root, temp, PAGE_SIZE); |
pathlen = cpu_to_le32(MYPATHLEN(temp, path)); |
|
flags=cpu_to_le32(flags); |
/* Ugly, but needed. posix ACLs change the mode without using |
* setattr, we need to record these changes. The EA code per se |
* is not really affected. |
*/ |
mode=cpu_to_le32(dentry->d_inode->i_mode); |
|
size = sizeof(__u32) * current->ngroups + |
sizeof(struct kml_prefix_hdr) + |
2 * sizeof(struct presto_version) + |
sizeof(flags) + sizeof(mode) + sizeof(namelen) + |
sizeof(buflen) + sizeof(pathlen) + |
sizeof(struct kml_suffix); |
|
if ( size > sizeof(record) ) |
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); |
|
rec->is_kml = 1; |
/* Make space for a path, a attr name and value*/ |
/* We use the buflen instead of buffer_len to make sure that we |
* journal the right length. This may be a little paranoid, but |
* with 64 bits round the corner, I would rather be safe than sorry! |
* Also this handles deletes with non-zero buffer_lengths correctly. |
* SHP |
*/ |
rec->size = size + size_round(le32_to_cpu(pathlen)) + |
size_round(le32_to_cpu(namelen)) + |
size_round(le32_to_cpu(buflen)); |
|
logrecord = journal_log_prefix(record, opcode, rec); |
logrecord = log_version(logrecord, ver); |
logrecord = log_dentry_version(logrecord, dentry); |
logrecord = logit(logrecord, &flags, sizeof(flags)); |
logrecord = logit(logrecord, &mode, sizeof(flags)); |
logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); |
logrecord = logit(logrecord, &namelen, sizeof(namelen)); |
logrecord = logit(logrecord, &buflen, sizeof(buflen)); |
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); |
|
error = presto_log(fset, rec, record, size, |
path, size_round(le32_to_cpu(pathlen)), |
name, size_round(le32_to_cpu(namelen)), |
buffer, size_round(le32_to_cpu(buflen))); |
|
BUFF_FREE(temp); |
EXIT; |
return error; |
} |
#endif |
/presto.c
0,0 → 1,740
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Author: Peter J. Braam <braam@clusterfs.com> |
* Copyright (C) 1998 Stelias Computing Inc |
* Copyright (C) 1999 Red Hat Inc. |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* |
* This file implements basic routines supporting the semantics |
*/ |
#include <linux/types.h> |
#include <linux/kernel.h> |
#include <linux/sched.h> |
#include <linux/fs.h> |
#include <linux/stat.h> |
#include <linux/errno.h> |
#include <linux/vmalloc.h> |
#include <linux/slab.h> |
#include <linux/locks.h> |
#include <asm/segment.h> |
#include <asm/uaccess.h> |
#include <linux/string.h> |
#include <linux/smp_lock.h> |
|
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
|
int presto_walk(const char *name, struct nameidata *nd) |
{ |
int err; |
/* we do not follow symlinks to support symlink operations |
correctly. The vfs should always hand us resolved dentries |
so we should not be required to use LOOKUP_FOLLOW. At the |
reintegrating end, lento again should be working with the |
resolved pathname and not the symlink. SHP |
XXX: This code implies that direct symlinks do not work. SHP |
*/ |
unsigned int flags = LOOKUP_POSITIVE; |
|
ENTRY; |
err = 0; |
if (path_init(name, flags, nd)) |
err = path_walk(name, nd); |
return err; |
} |
|
|
/* find the presto minor device for this inode */ |
int presto_i2m(struct inode *inode) |
{ |
struct presto_cache *cache; |
ENTRY; |
cache = presto_get_cache(inode); |
CDEBUG(D_PSDEV, "\n"); |
if ( !cache ) { |
CERROR("PRESTO: BAD: cannot find cache for dev %d, ino %ld\n", |
inode->i_dev, inode->i_ino); |
EXIT; |
return -1; |
} |
EXIT; |
return cache->cache_psdev->uc_minor; |
} |
|
inline int presto_f2m(struct presto_file_set *fset) |
{ |
return fset->fset_cache->cache_psdev->uc_minor; |
|
} |
|
inline int presto_c2m(struct presto_cache *cache) |
{ |
return cache->cache_psdev->uc_minor; |
|
} |
|
/* XXX check this out */ |
struct presto_file_set *presto_path2fileset(const char *name) |
{ |
struct nameidata nd; |
struct presto_file_set *fileset; |
int error; |
ENTRY; |
|
error = presto_walk(name, &nd); |
if (!error) { |
#if 0 |
error = do_revalidate(nd.dentry); |
#endif |
if (!error) |
fileset = presto_fset(nd.dentry); |
path_release(&nd); |
EXIT; |
} else |
fileset = ERR_PTR(error); |
|
EXIT; |
return fileset; |
} |
|
/* check a flag on this dentry or fset root. Semantics: |
- most flags: test if it is set |
- PRESTO_ATTR, PRESTO_DATA return 1 if PRESTO_FSETINSYNC is set |
*/ |
int presto_chk(struct dentry *dentry, int flag) |
{ |
int minor; |
struct presto_file_set *fset = presto_fset(dentry); |
|
ENTRY; |
minor = presto_i2m(dentry->d_inode); |
if ( izo_channels[minor].uc_no_filter ) { |
EXIT; |
return ~0; |
} |
|
/* if the fileset is in sync DATA and ATTR are OK */ |
if ( fset && |
(flag == PRESTO_ATTR || flag == PRESTO_DATA) && |
(fset->fset_flags & FSET_INSYNC) ) { |
CDEBUG(D_INODE, "fset in sync (ino %ld)!\n", |
fset->fset_dentry->d_inode->i_ino); |
EXIT; |
return 1; |
} |
|
EXIT; |
return (presto_d2d(dentry)->dd_flags & flag); |
} |
|
/* set a bit in the dentry flags */ |
void presto_set(struct dentry *dentry, int flag) |
{ |
ENTRY; |
if ( dentry->d_inode ) { |
CDEBUG(D_INODE, "SET ino %ld, flag %x\n", |
dentry->d_inode->i_ino, flag); |
} |
if ( presto_d2d(dentry) == NULL) { |
CERROR("dentry without d_fsdata in presto_set: %p: %*s", dentry, |
dentry->d_name.len, dentry->d_name.name); |
BUG(); |
} |
presto_d2d(dentry)->dd_flags |= flag; |
EXIT; |
} |
|
/* given a path: complete the closes on the fset */ |
int lento_complete_closes(char *path) |
{ |
struct nameidata nd; |
struct dentry *dentry; |
int error; |
struct presto_file_set *fset; |
ENTRY; |
|
error = presto_walk(path, &nd); |
if (error) { |
EXIT; |
return error; |
} |
|
dentry = nd.dentry; |
|
error = -ENXIO; |
if ( !presto_ispresto(dentry->d_inode) ) { |
EXIT; |
goto out_complete; |
} |
|
fset = presto_fset(dentry); |
error = -EINVAL; |
if ( !fset ) { |
CERROR("No fileset!\n"); |
EXIT; |
goto out_complete; |
} |
|
/* transactions and locking are internal to this function */ |
error = presto_complete_lml(fset); |
|
EXIT; |
out_complete: |
path_release(&nd); |
return error; |
} |
|
#if 0 |
/* given a path: write a close record and cancel an LML record, finally |
call truncate LML. Lento is doing this so it goes in with uid/gid's |
root. |
*/ |
int lento_cancel_lml(char *path, |
__u64 lml_offset, |
__u64 remote_ino, |
__u32 remote_generation, |
__u32 remote_version, |
struct lento_vfs_context *info) |
{ |
struct nameidata nd; |
struct rec_info rec; |
struct dentry *dentry; |
int error; |
struct presto_file_set *fset; |
void *handle; |
struct presto_version new_ver; |
ENTRY; |
|
|
error = presto_walk(path, &nd); |
if (error) { |
EXIT; |
return error; |
} |
dentry = nd.dentry; |
|
error = -ENXIO; |
if ( !presto_ispresto(dentry->d_inode) ) { |
EXIT; |
goto out_cancel_lml; |
} |
|
fset = presto_fset(dentry); |
|
error=-EINVAL; |
if (fset==NULL) { |
CERROR("No fileset!\n"); |
EXIT; |
goto out_cancel_lml; |
} |
|
/* this only requires a transaction below which is automatic */ |
handle = presto_trans_start(fset, dentry->d_inode, PRESTO_OP_RELEASE); |
if ( IS_ERR(handle) ) { |
error = -ENOMEM; |
EXIT; |
goto out_cancel_lml; |
} |
|
if (info->flags & LENTO_FL_CANCEL_LML) { |
error = presto_clear_lml_close(fset, lml_offset); |
if ( error ) { |
presto_trans_commit(fset, handle); |
EXIT; |
goto out_cancel_lml; |
} |
} |
|
|
if (info->flags & LENTO_FL_WRITE_KML) { |
struct file file; |
file.private_data = NULL; |
file.f_dentry = dentry; |
presto_getversion(&new_ver, dentry->d_inode); |
error = presto_journal_close(&rec, fset, &file, dentry, |
&new_ver); |
if ( error ) { |
EXIT; |
presto_trans_commit(fset, handle); |
goto out_cancel_lml; |
} |
} |
|
if (info->flags & LENTO_FL_WRITE_EXPECT) { |
error = presto_write_last_rcvd(&rec, fset, info); |
if ( error < 0 ) { |
EXIT; |
presto_trans_commit(fset, handle); |
goto out_cancel_lml; |
} |
} |
|
presto_trans_commit(fset, handle); |
|
if (info->flags & LENTO_FL_CANCEL_LML) { |
presto_truncate_lml(fset); |
} |
|
|
out_cancel_lml: |
EXIT; |
path_release(&nd); |
return error; |
} |
#endif |
|
/* given a dentry, operate on the flags in its dentry. Used by downcalls */ |
int izo_mark_dentry(struct dentry *dentry, int and_flag, int or_flag, |
int *res) |
{ |
int error = 0; |
|
if (presto_d2d(dentry) == NULL) { |
CERROR("InterMezzo: no ddata for inode %ld in %s\n", |
dentry->d_inode->i_ino, __FUNCTION__); |
return -EINVAL; |
} |
|
CDEBUG(D_INODE, "inode: %ld, and flag %x, or flag %x, dd_flags %x\n", |
dentry->d_inode->i_ino, and_flag, or_flag, |
presto_d2d(dentry)->dd_flags); |
|
presto_d2d(dentry)->dd_flags &= and_flag; |
presto_d2d(dentry)->dd_flags |= or_flag; |
if (res) |
*res = presto_d2d(dentry)->dd_flags; |
|
return error; |
} |
|
/* given a path, operate on the flags in its cache. Used by mark_ioctl */ |
int izo_mark_cache(struct dentry *dentry, int and_flag, int or_flag, |
int *res) |
{ |
struct presto_cache *cache; |
|
if (presto_d2d(dentry) == NULL) { |
CERROR("InterMezzo: no ddata for inode %ld in %s\n", |
dentry->d_inode->i_ino, __FUNCTION__); |
return -EINVAL; |
} |
|
CDEBUG(D_INODE, "inode: %ld, and flag %x, or flag %x, dd_flags %x\n", |
dentry->d_inode->i_ino, and_flag, or_flag, |
presto_d2d(dentry)->dd_flags); |
|
cache = presto_get_cache(dentry->d_inode); |
if ( !cache ) { |
CERROR("PRESTO: BAD: cannot find cache in izo_mark_cache\n"); |
return -EBADF; |
} |
|
((int)cache->cache_flags) &= and_flag; |
((int)cache->cache_flags) |= or_flag; |
if (res) |
*res = (int)cache->cache_flags; |
|
return 0; |
} |
|
int presto_set_max_kml_size(const char *path, unsigned long max_size) |
{ |
struct presto_file_set *fset; |
|
ENTRY; |
|
fset = presto_path2fileset(path); |
if (IS_ERR(fset)) { |
EXIT; |
return PTR_ERR(fset); |
} |
|
fset->kml_truncate_size = max_size; |
CDEBUG(D_CACHE, "KML truncate size set to %lu bytes for fset %s.\n", |
max_size, path); |
|
EXIT; |
return 0; |
} |
|
int izo_mark_fset(struct dentry *dentry, int and_flag, int or_flag, |
int * res) |
{ |
struct presto_file_set *fset; |
|
fset = presto_fset(dentry); |
if ( !fset ) { |
CERROR("PRESTO: BAD: cannot find cache in izo_mark_cache\n"); |
make_bad_inode(dentry->d_inode); |
return -EBADF; |
} |
((int)fset->fset_flags) &= and_flag; |
((int)fset->fset_flags) |= or_flag; |
if (res) |
*res = (int)fset->fset_flags; |
|
return 0; |
} |
|
/* talk to Lento about the permit */ |
static int presto_permit_upcall(struct dentry *dentry) |
{ |
int rc; |
char *path, *buffer; |
int pathlen; |
int minor; |
int fsetnamelen; |
struct presto_file_set *fset = NULL; |
|
ENTRY; |
|
if ( (minor = presto_i2m(dentry->d_inode)) < 0) { |
EXIT; |
return -EINVAL; |
} |
|
fset = presto_fset(dentry); |
if (!fset) { |
EXIT; |
return -ENOTCONN; |
} |
|
if ( !presto_lento_up(minor) ) { |
if ( fset->fset_flags & FSET_STEAL_PERMIT ) { |
EXIT; |
return 0; |
} else { |
EXIT; |
return -ENOTCONN; |
} |
} |
|
PRESTO_ALLOC(buffer, PAGE_SIZE); |
if ( !buffer ) { |
CERROR("PRESTO: out of memory!\n"); |
EXIT; |
return -ENOMEM; |
} |
path = presto_path(dentry, fset->fset_dentry, buffer, PAGE_SIZE); |
pathlen = MYPATHLEN(buffer, path); |
fsetnamelen = strlen(fset->fset_name); |
rc = izo_upc_permit(minor, dentry, pathlen, path, fset->fset_name); |
PRESTO_FREE(buffer, PAGE_SIZE); |
EXIT; |
return rc; |
} |
|
/* get a write permit for the fileset of this inode |
* - if this returns a negative value there was an error |
* - if 0 is returned the permit was already in the kernel -- or -- |
* Lento gave us the permit without reintegration |
* - lento returns the number of records it reintegrated |
* |
* Note that if this fileset has branches, a permit will -never- to a normal |
* process for writing in the data area (ie, outside of .intermezzo) |
*/ |
int presto_get_permit(struct inode * inode) |
{ |
struct dentry *de; |
struct presto_file_set *fset; |
int minor = presto_i2m(inode); |
int rc = 0; |
|
ENTRY; |
if (minor < 0) { |
EXIT; |
return -1; |
} |
|
if ( ISLENTO(minor) ) { |
EXIT; |
return 0; |
} |
|
if (list_empty(&inode->i_dentry)) { |
CERROR("No alias for inode %d\n", (int) inode->i_ino); |
EXIT; |
return -EINVAL; |
} |
|
de = list_entry(inode->i_dentry.next, struct dentry, d_alias); |
|
if (presto_chk(de, PRESTO_DONT_JOURNAL)) { |
EXIT; |
return 0; |
} |
|
fset = presto_fset(de); |
if ( !fset ) { |
CERROR("Presto: no fileset in presto_get_permit!\n"); |
EXIT; |
return -EINVAL; |
} |
|
if (fset->fset_flags & FSET_HAS_BRANCHES) { |
EXIT; |
return -EROFS; |
} |
|
spin_lock(&fset->fset_permit_lock); |
if (fset->fset_flags & FSET_HASPERMIT) { |
fset->fset_permit_count++; |
CDEBUG(D_INODE, "permit count now %d, inode %lx\n", |
fset->fset_permit_count, inode->i_ino); |
spin_unlock(&fset->fset_permit_lock); |
EXIT; |
return 0; |
} |
|
/* Allow reintegration to proceed without locks -SHP */ |
fset->fset_permit_upcall_count++; |
if (fset->fset_permit_upcall_count == 1) { |
spin_unlock(&fset->fset_permit_lock); |
rc = presto_permit_upcall(fset->fset_dentry); |
spin_lock(&fset->fset_permit_lock); |
fset->fset_permit_upcall_count--; |
if (rc == 0) { |
izo_mark_fset(fset->fset_dentry, ~0, FSET_HASPERMIT, |
NULL); |
fset->fset_permit_count++; |
} else if (rc == ENOTCONN) { |
CERROR("InterMezzo: disconnected operation. stealing permit.\n"); |
izo_mark_fset(fset->fset_dentry, ~0, FSET_HASPERMIT, |
NULL); |
fset->fset_permit_count++; |
/* set a disconnected flag here to stop upcalls */ |
rc = 0; |
} else { |
CERROR("InterMezzo: presto_permit_upcall failed: %d\n", rc); |
rc = -EROFS; |
/* go to sleep here and try again? */ |
} |
wake_up_interruptible(&fset->fset_permit_queue); |
} else { |
/* Someone is already doing an upcall; go to sleep. */ |
DECLARE_WAITQUEUE(wait, current); |
|
spin_unlock(&fset->fset_permit_lock); |
add_wait_queue(&fset->fset_permit_queue, &wait); |
while (1) { |
set_current_state(TASK_INTERRUPTIBLE); |
|
spin_lock(&fset->fset_permit_lock); |
if (fset->fset_permit_upcall_count == 0) |
break; |
spin_unlock(&fset->fset_permit_lock); |
|
if (signal_pending(current)) { |
remove_wait_queue(&fset->fset_permit_queue, |
&wait); |
return -ERESTARTSYS; |
} |
schedule(); |
} |
remove_wait_queue(&fset->fset_permit_queue, &wait); |
/* We've been woken up: do we have the permit? */ |
if (fset->fset_flags & FSET_HASPERMIT) |
/* FIXME: Is this the right thing? */ |
rc = -EAGAIN; |
} |
|
CDEBUG(D_INODE, "permit count now %d, ino %ld (likely 1), " |
"rc %d\n", fset->fset_permit_count, inode->i_ino, rc); |
spin_unlock(&fset->fset_permit_lock); |
EXIT; |
return rc; |
} |
|
int presto_put_permit(struct inode * inode) |
{ |
struct dentry *de; |
struct presto_file_set *fset; |
int minor = presto_i2m(inode); |
|
ENTRY; |
if (minor < 0) { |
EXIT; |
return -1; |
} |
|
if ( ISLENTO(minor) ) { |
EXIT; |
return 0; |
} |
|
if (list_empty(&inode->i_dentry)) { |
CERROR("No alias for inode %d\n", (int) inode->i_ino); |
EXIT; |
return -1; |
} |
|
de = list_entry(inode->i_dentry.next, struct dentry, d_alias); |
|
fset = presto_fset(de); |
if ( !fset ) { |
CERROR("InterMezzo: no fileset in %s!\n", __FUNCTION__); |
EXIT; |
return -1; |
} |
|
if (presto_chk(de, PRESTO_DONT_JOURNAL)) { |
EXIT; |
return 0; |
} |
|
spin_lock(&fset->fset_permit_lock); |
if (fset->fset_flags & FSET_HASPERMIT) { |
if (fset->fset_permit_count > 0) |
fset->fset_permit_count--; |
else |
CERROR("Put permit while permit count is 0, " |
"inode %ld!\n", inode->i_ino); |
} else { |
fset->fset_permit_count = 0; |
CERROR("InterMezzo: put permit while no permit, inode %ld, " |
"flags %x!\n", inode->i_ino, fset->fset_flags); |
} |
|
CDEBUG(D_INODE, "permit count now %d, inode %ld\n", |
fset->fset_permit_count, inode->i_ino); |
|
if (fset->fset_flags & FSET_PERMIT_WAITING && |
fset->fset_permit_count == 0) { |
CDEBUG(D_INODE, "permit count now 0, ino %ld, wake sleepers\n", |
inode->i_ino); |
wake_up_interruptible(&fset->fset_permit_queue); |
} |
spin_unlock(&fset->fset_permit_lock); |
|
EXIT; |
return 0; |
} |
|
void presto_getversion(struct presto_version * presto_version, |
struct inode * inode) |
{ |
presto_version->pv_mtime = (__u64)inode->i_mtime; |
presto_version->pv_ctime = (__u64)inode->i_ctime; |
presto_version->pv_size = (__u64)inode->i_size; |
} |
|
|
/* If uuid is non-null, it is the uuid of the peer that's making the revocation |
* request. If it is null, this request was made locally, without external |
* pressure to give up the permit. This most often occurs when a client |
* starts up. |
* |
* FIXME: this function needs to be refactored slightly once we start handling |
* multiple clients. |
*/ |
int izo_revoke_permit(struct dentry *dentry, __u8 uuid[16]) |
{ |
struct presto_file_set *fset; |
DECLARE_WAITQUEUE(wait, current); |
int minor, rc; |
|
ENTRY; |
|
minor = presto_i2m(dentry->d_inode); |
if (minor < 0) { |
EXIT; |
return -ENODEV; |
} |
|
fset = presto_fset(dentry); |
if (fset == NULL) { |
EXIT; |
return -ENODEV; |
} |
|
spin_lock(&fset->fset_permit_lock); |
if (fset->fset_flags & FSET_PERMIT_WAITING) { |
CERROR("InterMezzo: Two processes are waiting on the same permit--this not yet supported! Aborting this particular permit request...\n"); |
EXIT; |
spin_unlock(&fset->fset_permit_lock); |
return -EINVAL; |
} |
|
if (fset->fset_permit_count == 0) |
goto got_permit; |
|
/* Something is still using this permit. Mark that we're waiting for it |
* and go to sleep. */ |
rc = izo_mark_fset(dentry, ~0, FSET_PERMIT_WAITING, NULL); |
spin_unlock(&fset->fset_permit_lock); |
if (rc < 0) { |
EXIT; |
return rc; |
} |
|
add_wait_queue(&fset->fset_permit_queue, &wait); |
while (1) { |
set_current_state(TASK_INTERRUPTIBLE); |
|
spin_lock(&fset->fset_permit_lock); |
if (fset->fset_permit_count == 0) |
break; |
spin_unlock(&fset->fset_permit_lock); |
|
if (signal_pending(current)) { |
/* FIXME: there must be a better thing to return... */ |
remove_wait_queue(&fset->fset_permit_queue, &wait); |
EXIT; |
return -ERESTARTSYS; |
} |
|
/* FIXME: maybe there should be a timeout here. */ |
|
schedule(); |
} |
|
remove_wait_queue(&fset->fset_permit_queue, &wait); |
got_permit: |
/* By this point fset->fset_permit_count is zero and we're holding the |
* lock. */ |
CDEBUG(D_CACHE, "InterMezzo: releasing permit inode %ld\n", |
dentry->d_inode->i_ino); |
|
if (uuid != NULL) { |
rc = izo_upc_revoke_permit(minor, fset->fset_name, uuid); |
if (rc < 0) { |
spin_unlock(&fset->fset_permit_lock); |
EXIT; |
return rc; |
} |
} |
|
izo_mark_fset(fset->fset_dentry, ~FSET_PERMIT_WAITING, 0, NULL); |
izo_mark_fset(fset->fset_dentry, ~FSET_HASPERMIT, 0, NULL); |
spin_unlock(&fset->fset_permit_lock); |
EXIT; |
return 0; |
} |
|
inline int presto_is_read_only(struct presto_file_set * fset) |
{ |
int minor, mask; |
struct presto_cache *cache = fset->fset_cache; |
|
minor= cache->cache_psdev->uc_minor; |
mask= (ISLENTO(minor)? FSET_LENTO_RO : FSET_CLIENT_RO); |
if ( fset->fset_flags & mask ) |
return 1; |
mask= (ISLENTO(minor)? CACHE_LENTO_RO : CACHE_CLIENT_RO); |
return ((cache->cache_flags & mask)? 1 : 0); |
} |
/journal_reiserfs.c
0,0 → 1,142
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com> |
* Copyright (C) 2000 Red Hat, Inc. |
* Copyright (C) 2000 Los Alamos National Laboratory |
* Copyright (C) 2000 TurboLinux, Inc. |
* Copyright (C) 2001 Mountain View Data, Inc. |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
*/ |
|
#include <linux/types.h> |
#include <linux/param.h> |
#include <linux/sched.h> |
#include <linux/fs.h> |
#include <linux/slab.h> |
#include <linux/vmalloc.h> |
#include <linux/stat.h> |
#include <linux/errno.h> |
#include <linux/smp_lock.h> |
#include <linux/locks.h> |
#include <asm/segment.h> |
#include <asm/uaccess.h> |
#include <linux/string.h> |
#if 0 |
#if defined(CONFIG_REISERFS_FS) || defined(CONFIG_REISERFS_FS_MODULE) |
#include <linux/reiserfs_fs.h> |
#include <linux/reiserfs_fs_sb.h> |
#include <linux/reiserfs_fs_i.h> |
#endif |
|
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
|
#if defined(CONFIG_REISERFS_FS) || defined(CONFIG_REISERFS_FS_MODULE) |
|
|
static loff_t presto_reiserfs_freespace(struct presto_cache *cache, |
struct super_block *sb) |
{ |
struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (sb); |
loff_t avail; |
|
avail = le32_to_cpu(rs->s_free_blocks) * |
le16_to_cpu(rs->s_blocksize); |
return avail; |
} |
|
/* start the filesystem journal operations */ |
static void *presto_reiserfs_trans_start(struct presto_file_set *fset, |
struct inode *inode, |
int op) |
{ |
int jblocks; |
__u32 avail_kmlblocks; |
struct reiserfs_transaction_handle *th ; |
|
PRESTO_ALLOC(th, sizeof(*th)); |
if (!th) { |
CERROR("presto: No memory for trans handle\n"); |
return NULL; |
} |
|
avail_kmlblocks = presto_reiserfs_freespace(fset->fset_cache, |
inode->i_sb); |
if ( presto_no_journal(fset) || |
strcmp(fset->fset_cache->cache_type, "reiserfs")) |
{ |
CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n", |
fset->fset_cache->cache_type); |
return NULL; |
} |
|
if ( avail_kmlblocks < 3 ) { |
return ERR_PTR(-ENOSPC); |
} |
|
if ( (op != PRESTO_OP_UNLINK && op != PRESTO_OP_RMDIR) |
&& avail_kmlblocks < 6 ) { |
return ERR_PTR(-ENOSPC); |
} |
|
jblocks = 3 + JOURNAL_PER_BALANCE_CNT * 4; |
CDEBUG(D_JOURNAL, "creating journal handle (%d blocks)\n", jblocks); |
|
lock_kernel(); |
journal_begin(th, inode->i_sb, jblocks); |
unlock_kernel(); |
return th; |
} |
|
static void presto_reiserfs_trans_commit(struct presto_file_set *fset, |
void *handle) |
{ |
int jblocks; |
jblocks = 3 + JOURNAL_PER_BALANCE_CNT * 4; |
|
lock_kernel(); |
journal_end(handle, fset->fset_cache->cache_sb, jblocks); |
unlock_kernel(); |
PRESTO_FREE(handle, sizeof(struct reiserfs_transaction_handle)); |
} |
|
static void presto_reiserfs_journal_file_data(struct inode *inode) |
{ |
#ifdef EXT3_JOURNAL_DATA_FL |
inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL; |
#else |
#warning You must have a facility to enable journaled writes for recovery! |
#endif |
} |
|
static int presto_reiserfs_has_all_data(struct inode *inode) |
{ |
BUG(); |
return 0; |
} |
|
struct journal_ops presto_reiserfs_journal_ops = { |
.tr_all_data = presto_reiserfs_has_all_data, |
.tr_avail = presto_reiserfs_freespace, |
.tr_start = presto_reiserfs_trans_start, |
.tr_commit = presto_reiserfs_trans_commit, |
.tr_journal_data = presto_reiserfs_journal_file_data |
}; |
|
#endif |
#endif |
/journal_ext2.c
0,0 → 1,91
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com> |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
*/ |
|
#include <linux/types.h> |
#include <linux/kernel.h> |
#include <linux/sched.h> |
#include <linux/fs.h> |
#include <linux/slab.h> |
#include <linux/vmalloc.h> |
#include <linux/stat.h> |
#include <linux/errno.h> |
#include <linux/locks.h> |
#include <asm/segment.h> |
#include <asm/uaccess.h> |
#include <linux/string.h> |
#include <linux/ext2_fs.h> |
|
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
|
#if defined(CONFIG_EXT2_FS) |
|
/* EXT2 has no journalling, so these functions do nothing */ |
static loff_t presto_e2_freespace(struct presto_cache *cache, |
struct super_block *sb) |
{ |
unsigned long freebl = le32_to_cpu(sb->u.ext2_sb.s_es->s_free_blocks_count); |
unsigned long avail = freebl - le32_to_cpu(sb->u.ext2_sb.s_es->s_r_blocks_count); |
return (avail << EXT2_BLOCK_SIZE_BITS(sb)); |
} |
|
/* start the filesystem journal operations */ |
static void *presto_e2_trans_start(struct presto_file_set *fset, struct inode *inode, int op) |
{ |
__u32 avail_kmlblocks; |
|
if ( presto_no_journal(fset) || |
strcmp(fset->fset_cache->cache_type, "ext2")) |
return NULL; |
|
avail_kmlblocks = inode->i_sb->u.ext2_sb.s_es->s_free_blocks_count; |
|
if ( avail_kmlblocks < 3 ) { |
return ERR_PTR(-ENOSPC); |
} |
|
if ( (op != KML_OPCODE_UNLINK && op != KML_OPCODE_RMDIR) |
&& avail_kmlblocks < 6 ) { |
return ERR_PTR(-ENOSPC); |
} |
return (void *) 1; |
} |
|
static void presto_e2_trans_commit(struct presto_file_set *fset, void *handle) |
{ |
do {} while (0); |
} |
|
static int presto_e2_has_all_data(struct inode *inode) |
{ |
BUG(); |
return 0; |
} |
|
struct journal_ops presto_ext2_journal_ops = { |
tr_all_data: presto_e2_has_all_data, |
tr_avail: presto_e2_freespace, |
tr_start: presto_e2_trans_start, |
tr_commit: presto_e2_trans_commit, |
tr_journal_data: NULL |
}; |
|
#endif /* CONFIG_EXT2_FS */ |
/journal_ext3.c
0,0 → 1,285
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com> |
* Copyright (C) 2000 Red Hat, Inc. |
* Copyright (C) 2000 Los Alamos National Laboratory |
* Copyright (C) 2000 TurboLinux, Inc. |
* Copyright (C) 2001 Mountain View Data, Inc. |
* Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net> |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
*/ |
|
#include <linux/types.h> |
#include <linux/param.h> |
#include <linux/kernel.h> |
#include <linux/sched.h> |
#include <linux/fs.h> |
#include <linux/slab.h> |
#include <linux/vmalloc.h> |
#include <linux/stat.h> |
#include <linux/errno.h> |
#include <linux/locks.h> |
#include <asm/segment.h> |
#include <asm/uaccess.h> |
#include <linux/string.h> |
#include <linux/smp_lock.h> |
#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE) |
#include <linux/jbd.h> |
#include <linux/ext3_fs.h> |
#include <linux/ext3_jbd.h> |
#endif |
|
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
|
#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE) |
|
#define MAX_PATH_BLOCKS(inode) (PATH_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb)) |
#define MAX_NAME_BLOCKS(inode) (NAME_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb)) |
|
/* space requirements: |
presto_do_truncate: |
used to truncate the KML forward to next fset->chunksize boundary |
- zero partial block |
- update inode |
presto_write_record: |
write header (< one block) |
write one path (< MAX_PATHLEN) |
possibly write another path (< MAX_PATHLEN) |
write suffix (< one block) |
presto_update_last_rcvd |
write one block |
*/ |
|
static loff_t presto_e3_freespace(struct presto_cache *cache, |
struct super_block *sb) |
{ |
loff_t freebl = le32_to_cpu(sb->u.ext3_sb.s_es->s_free_blocks_count); |
loff_t avail = freebl - |
le32_to_cpu(sb->u.ext3_sb.s_es->s_r_blocks_count); |
return (avail << EXT3_BLOCK_SIZE_BITS(sb)); |
} |
|
/* start the filesystem journal operations */ |
static void *presto_e3_trans_start(struct presto_file_set *fset, |
struct inode *inode, |
int op) |
{ |
int jblocks; |
int trunc_blks, one_path_blks, extra_path_blks, |
extra_name_blks, lml_blks; |
__u32 avail_kmlblocks; |
handle_t *handle; |
|
if ( presto_no_journal(fset) || |
strcmp(fset->fset_cache->cache_type, "ext3")) |
{ |
CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n", |
fset->fset_cache->cache_type); |
return NULL; |
} |
|
avail_kmlblocks = inode->i_sb->u.ext3_sb.s_es->s_free_blocks_count; |
|
if ( avail_kmlblocks < 3 ) { |
return ERR_PTR(-ENOSPC); |
} |
|
if ( (op != KML_OPCODE_UNLINK && op != KML_OPCODE_RMDIR) |
&& avail_kmlblocks < 6 ) { |
return ERR_PTR(-ENOSPC); |
} |
|
/* Need journal space for: |
at least three writes to KML (two one block writes, one a path) |
possibly a second name (unlink, rmdir) |
possibly a second path (symlink, rename) |
a one block write to the last rcvd file |
*/ |
|
trunc_blks = EXT3_DATA_TRANS_BLOCKS + 1; |
one_path_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 3; |
lml_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 2; |
extra_path_blks = EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode); |
extra_name_blks = EXT3_DATA_TRANS_BLOCKS + MAX_NAME_BLOCKS(inode); |
|
/* additional blocks appear for "two pathname" operations |
and operations involving the LML records |
*/ |
switch (op) { |
case KML_OPCODE_TRUNC: |
jblocks = one_path_blks + extra_name_blks + trunc_blks |
+ EXT3_DELETE_TRANS_BLOCKS; |
break; |
case KML_OPCODE_KML_TRUNC: |
/* Hopefully this is a little better, but I'm still mostly |
* guessing here. */ |
/* unlink 1 */ |
jblocks = extra_name_blks + trunc_blks + |
EXT3_DELETE_TRANS_BLOCKS + 2; |
|
/* unlink 2 */ |
jblocks += extra_name_blks + trunc_blks + |
EXT3_DELETE_TRANS_BLOCKS + 2; |
|
/* rename 1 */ |
jblocks += 2 * extra_path_blks + trunc_blks + |
2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3; |
|
/* rename 2 */ |
jblocks += 2 * extra_path_blks + trunc_blks + |
2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3; |
break; |
case KML_OPCODE_RELEASE: |
/* |
jblocks = one_path_blks + lml_blks + 2*trunc_blks; |
*/ |
jblocks = one_path_blks; |
break; |
case KML_OPCODE_SETATTR: |
jblocks = one_path_blks + trunc_blks + 1 ; |
break; |
case KML_OPCODE_CREATE: |
jblocks = one_path_blks + trunc_blks |
+ EXT3_DATA_TRANS_BLOCKS + 3 + 2; |
break; |
case KML_OPCODE_LINK: |
jblocks = one_path_blks + trunc_blks |
+ EXT3_DATA_TRANS_BLOCKS + 2; |
break; |
case KML_OPCODE_UNLINK: |
jblocks = one_path_blks + extra_name_blks + trunc_blks |
+ EXT3_DELETE_TRANS_BLOCKS + 2; |
break; |
case KML_OPCODE_SYMLINK: |
jblocks = one_path_blks + extra_path_blks + trunc_blks |
+ EXT3_DATA_TRANS_BLOCKS + 5; |
break; |
case KML_OPCODE_MKDIR: |
jblocks = one_path_blks + trunc_blks |
+ EXT3_DATA_TRANS_BLOCKS + 4 + 2; |
break; |
case KML_OPCODE_RMDIR: |
jblocks = one_path_blks + extra_name_blks + trunc_blks |
+ EXT3_DELETE_TRANS_BLOCKS + 1; |
break; |
case KML_OPCODE_MKNOD: |
jblocks = one_path_blks + trunc_blks + |
EXT3_DATA_TRANS_BLOCKS + 3 + 2; |
break; |
case KML_OPCODE_RENAME: |
jblocks = one_path_blks + extra_path_blks + trunc_blks + |
2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3; |
break; |
case KML_OPCODE_WRITE: |
jblocks = one_path_blks; |
/* add this when we can wrap our transaction with |
that of ext3_file_write (ordered writes) |
+ EXT3_DATA_TRANS_BLOCKS; |
*/ |
break; |
default: |
CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op); |
return NULL; |
} |
|
CDEBUG(D_JOURNAL, "creating journal handle (%d blocks) for op %d\n", |
jblocks, op); |
/* journal_start/stop does not do its own locking while updating |
* the handle/transaction information. Hence we create our own |
* critical section to protect these calls. -SHP |
*/ |
lock_kernel(); |
handle = journal_start(EXT3_JOURNAL(inode), jblocks); |
unlock_kernel(); |
return handle; |
} |
|
static void presto_e3_trans_commit(struct presto_file_set *fset, void *handle) |
{ |
if ( presto_no_journal(fset) || !handle) |
return; |
|
/* See comments before journal_start above. -SHP */ |
lock_kernel(); |
journal_stop(handle); |
unlock_kernel(); |
} |
|
static void presto_e3_journal_file_data(struct inode *inode) |
{ |
#ifdef EXT3_JOURNAL_DATA_FL |
inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL; |
#else |
#warning You must have a facility to enable journaled writes for recovery! |
#endif |
} |
|
/* The logic here is a slightly modified version of ext3/inode.c:block_to_path |
*/ |
static int presto_e3_has_all_data(struct inode *inode) |
{ |
int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb); |
int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb); |
const long direct_blocks = EXT3_NDIR_BLOCKS, |
indirect_blocks = ptrs, |
double_blocks = (1 << (ptrs_bits * 2)); |
long block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> |
inode->i_sb->s_blocksize_bits; |
|
ENTRY; |
|
if (inode->i_size == 0) { |
EXIT; |
return 1; |
} |
|
if (block < direct_blocks) { |
/* No indirect blocks, no problem. */ |
} else if (block < indirect_blocks + direct_blocks) { |
block++; |
} else if (block < double_blocks + indirect_blocks + direct_blocks) { |
block += 2; |
} else if (((block - double_blocks - indirect_blocks - direct_blocks) |
>> (ptrs_bits * 2)) < ptrs) { |
block += 3; |
} |
|
block *= (inode->i_sb->s_blocksize / 512); |
|
CDEBUG(D_CACHE, "Need %ld blocks, have %ld.\n", block, inode->i_blocks); |
|
if (block > inode->i_blocks) { |
EXIT; |
return 0; |
} |
|
EXIT; |
return 1; |
} |
|
struct journal_ops presto_ext3_journal_ops = { |
.tr_all_data = presto_e3_has_all_data, |
.tr_avail = presto_e3_freespace, |
.tr_start = presto_e3_trans_start, |
.tr_commit = presto_e3_trans_commit, |
.tr_journal_data = presto_e3_journal_file_data, |
.tr_ilookup = presto_iget_ilookup |
}; |
|
#endif /* CONFIG_EXT3_FS */ |
/dir.c
0,0 → 1,1415
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 2000 Stelias Computing, Inc. |
* Copyright (C) 2000 Red Hat, Inc. |
* Copyright (C) 2000 Tacitus Systems |
* Copyright (C) 2000 Peter J. Braam |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
*/ |
|
#include <stdarg.h> |
|
#include <asm/bitops.h> |
#include <asm/uaccess.h> |
#include <asm/system.h> |
#include <linux/smp_lock.h> |
|
#include <linux/errno.h> |
#include <linux/fs.h> |
#include <linux/ext2_fs.h> |
#include <linux/slab.h> |
#include <linux/vmalloc.h> |
#include <linux/sched.h> |
#include <linux/stat.h> |
#include <linux/string.h> |
#include <linux/locks.h> |
#include <linux/blkdev.h> |
#include <linux/init.h> |
#define __NO_VERSION__ |
#include <linux/module.h> |
|
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
|
static inline void presto_relock_sem(struct inode *dir) |
{ |
/* the lock from sys_mkdir / lookup_create */ |
down(&dir->i_sem); |
/* the rest is done by the do_{create,mkdir, ...} */ |
} |
|
static inline void presto_relock_other(struct inode *dir) |
{ |
/* vfs_mkdir locks */ |
down(&dir->i_zombie); |
lock_kernel(); |
} |
|
static inline void presto_fulllock(struct inode *dir) |
{ |
/* the lock from sys_mkdir / lookup_create */ |
down(&dir->i_sem); |
/* vfs_mkdir locks */ |
down(&dir->i_zombie); |
lock_kernel(); |
} |
|
static inline void presto_unlock(struct inode *dir) |
{ |
/* vfs_mkdir locks */ |
unlock_kernel(); |
up(&dir->i_zombie); |
/* the lock from sys_mkdir / lookup_create */ |
up(&dir->i_sem); |
} |
|
|
/* |
* these are initialized in super.c |
*/ |
extern int presto_permission(struct inode *inode, int mask); |
static int izo_authorized_uid = 0; |
|
int izo_dentry_is_ilookup(struct dentry *dentry, ino_t *id, |
unsigned int *generation) |
{ |
char tmpname[64]; |
char *next; |
|
ENTRY; |
/* prefix is 7 characters: '...ino:' */ |
if ( dentry->d_name.len < 7 || dentry->d_name.len > 64 || |
memcmp(dentry->d_name.name, PRESTO_ILOOKUP_MAGIC, 7) != 0 ) { |
EXIT; |
return 0; |
} |
|
memcpy(tmpname, dentry->d_name.name + 7, dentry->d_name.len - 7); |
*(tmpname + dentry->d_name.len - 7) = '\0'; |
|
/* name is of the form ...ino:<inode number>:<generation> */ |
*id = simple_strtoul(tmpname, &next, 16); |
if ( *next == PRESTO_ILOOKUP_SEP ) { |
*generation = simple_strtoul(next + 1, 0, 16); |
CDEBUG(D_INODE, "ino string: %s, Id = %lx (%lu), " |
"generation %x (%d)\n", |
tmpname, *id, *id, *generation, *generation); |
EXIT; |
return 1; |
} else { |
EXIT; |
return 0; |
} |
} |
|
struct dentry *presto_tmpfs_ilookup(struct inode *dir, |
struct dentry *dentry, |
ino_t ino, |
unsigned int generation) |
{ |
return dentry; |
} |
|
|
inline int presto_can_ilookup(void) |
{ |
return (current->euid == izo_authorized_uid || |
capable(CAP_DAC_READ_SEARCH)); |
} |
|
struct dentry *presto_iget_ilookup(struct inode *dir, |
struct dentry *dentry, |
ino_t ino, |
unsigned int generation) |
{ |
struct inode *inode; |
int error; |
|
ENTRY; |
|
if ( !presto_can_ilookup() ) { |
CERROR("ilookup denied: euid %u, authorized_uid %u\n", |
current->euid, izo_authorized_uid); |
return ERR_PTR(-EPERM); |
} |
error = -ENOENT; |
inode = iget(dir->i_sb, ino); |
if (!inode) { |
CERROR("fatal: NULL inode ino %lu\n", ino); |
goto cleanup_iput; |
} |
if (is_bad_inode(inode) || inode->i_nlink == 0) { |
CERROR("fatal: bad inode ino %lu, links %d\n", ino, inode->i_nlink); |
goto cleanup_iput; |
} |
if (inode->i_generation != generation) { |
CERROR("fatal: bad generation %u (want %u)\n", |
inode->i_generation, generation); |
goto cleanup_iput; |
} |
|
d_instantiate(dentry, inode); |
dentry->d_flags |= DCACHE_NFSD_DISCONNECTED; /* NFS hack */ |
|
EXIT; |
return NULL; |
|
cleanup_iput: |
if (inode) |
iput(inode); |
return ERR_PTR(error); |
} |
|
struct dentry *presto_add_ilookup_dentry(struct dentry *parent, |
struct dentry *real) |
{ |
struct inode *inode = real->d_inode; |
struct dentry *de; |
char buf[32]; |
char *ptr = buf; |
struct dentry *inodir; |
struct presto_dentry_data *dd; |
|
inodir = lookup_one_len("..iopen..", parent, strlen("..iopen..")); |
if (!inodir || IS_ERR(inodir) || !inodir->d_inode ) { |
CERROR("%s: bad ..iopen.. lookup\n", __FUNCTION__); |
return NULL; |
} |
inodir->d_inode->i_op = &presto_dir_iops; |
|
snprintf(ptr, 32, "...ino:%lx:%x", inode->i_ino, inode->i_generation); |
|
de = lookup_one_len(ptr, inodir, strlen(ptr)); |
if (!de || IS_ERR(de)) { |
CERROR("%s: bad ...ino lookup %ld\n", |
__FUNCTION__, PTR_ERR(de)); |
dput(inodir); |
return NULL; |
} |
|
dd = presto_d2d(real); |
if (!dd) |
BUG(); |
|
/* already exists */ |
if (de->d_inode) |
BUG(); |
#if 0 |
if (de->d_inode != inode ) { |
CERROR("XX de->d_inode %ld, inode %ld\n", |
de->d_inode->i_ino, inode->i_ino); |
BUG(); |
} |
if (dd->dd_inodentry) { |
CERROR("inodentry exists %ld \n", inode->i_ino); |
BUG(); |
} |
dput(inodir); |
return de; |
} |
#endif |
|
if (presto_d2d(de)) |
BUG(); |
|
atomic_inc(&inode->i_count); |
de->d_op = &presto_dentry_ops; |
d_add(de, inode); |
if (!de->d_op) |
CERROR("DD: no ops dentry %p, dd %p\n", de, dd); |
dd->dd_inodentry = de; |
dd->dd_count++; |
de->d_fsdata = dd; |
|
dput(inodir); |
return de; |
} |
|
struct dentry *presto_lookup(struct inode * dir, struct dentry *dentry) |
{ |
int rc = 0; |
struct dentry *de; |
struct presto_cache *cache; |
int minor; |
ino_t ino; |
unsigned int generation; |
struct inode_operations *iops; |
int is_ilookup = 0; |
|
ENTRY; |
cache = presto_get_cache(dir); |
if (cache == NULL) { |
CERROR("InterMezzo BUG: no cache in presto_lookup " |
"(dir ino: %ld)!\n", dir->i_ino); |
EXIT; |
return NULL; |
} |
minor = presto_c2m(cache); |
|
iops = filter_c2cdiops(cache->cache_filter); |
if (!iops || !iops->lookup) { |
CERROR("InterMezzo BUG: filesystem has no lookup\n"); |
EXIT; |
return NULL; |
} |
|
|
CDEBUG(D_CACHE, "dentry %p, dir ino: %ld, name: %*s, islento: %d\n", |
dentry, dir->i_ino, dentry->d_name.len, dentry->d_name.name, |
ISLENTO(minor)); |
|
if (dentry->d_fsdata) |
CERROR("DD -- BAD dentry %p has data\n", dentry); |
|
dentry->d_fsdata = NULL; |
#if 0 |
if (ext2_check_for_iopen(dir, dentry)) |
de = NULL; |
else { |
#endif |
if ( izo_dentry_is_ilookup(dentry, &ino, &generation) ) { |
de = cache->cache_filter->o_trops->tr_ilookup |
(dir, dentry, ino, generation); |
is_ilookup = 1; |
} else |
de = iops->lookup(dir, dentry); |
#if 0 |
} |
#endif |
|
if ( IS_ERR(de) ) { |
CERROR("dentry lookup error %ld\n", PTR_ERR(de)); |
return de; |
} |
|
/* some file systems have no read_inode: set methods here */ |
if (dentry->d_inode) |
presto_set_ops(dentry->d_inode, cache->cache_filter); |
|
filter_setup_dentry_ops(cache->cache_filter, |
dentry->d_op, &presto_dentry_ops); |
dentry->d_op = filter_c2udops(cache->cache_filter); |
|
/* In lookup we will tolerate EROFS return codes from presto_set_dd |
* to placate NFS. EROFS indicates that a fileset was not found but |
* we should still be able to continue through a lookup. |
* Anything else is a hard error and must be returned to VFS. */ |
if (!is_ilookup) |
rc = presto_set_dd(dentry); |
if (rc && rc != -EROFS) { |
CERROR("presto_set_dd failed (dir %ld, name %*s): %d\n", |
dir->i_ino, dentry->d_name.len, dentry->d_name.name, rc); |
return ERR_PTR(rc); |
} |
|
EXIT; |
return NULL; |
} |
|
static inline int presto_check_set_fsdata (struct dentry *de) |
{ |
if (presto_d2d(de) == NULL) { |
#ifdef PRESTO_NO_NFS |
CERROR("dentry without fsdata: %p: %*s\n", de, |
de->d_name.len, de->d_name.name); |
BUG(); |
#endif |
return presto_set_dd (de); |
} |
|
return 0; |
} |
|
int presto_setattr(struct dentry *de, struct iattr *iattr) |
{ |
int error; |
struct presto_cache *cache; |
struct presto_file_set *fset; |
struct lento_vfs_context info = { 0, 0, 0 }; |
|
ENTRY; |
|
error = presto_prep(de, &cache, &fset); |
if ( error ) { |
EXIT; |
return error; |
} |
|
if (!iattr->ia_valid) |
CDEBUG(D_INODE, "presto_setattr: iattr is not valid\n"); |
|
CDEBUG(D_INODE, "valid %#x, mode %#o, uid %u, gid %u, size %Lu, " |
"atime %lu mtime %lu ctime %lu flags %d\n", |
iattr->ia_valid, iattr->ia_mode, iattr->ia_uid, iattr->ia_gid, |
iattr->ia_size, iattr->ia_atime, iattr->ia_mtime, |
iattr->ia_ctime, iattr->ia_attr_flags); |
|
if ( presto_get_permit(de->d_inode) < 0 ) { |
EXIT; |
return -EROFS; |
} |
|
if (!ISLENTO(presto_c2m(cache))) |
info.flags = LENTO_FL_KML; |
info.flags |= LENTO_FL_IGNORE_TIME; |
error = presto_do_setattr(fset, de, iattr, &info); |
presto_put_permit(de->d_inode); |
return error; |
} |
|
/* |
* Now the meat: the fs operations that require journaling |
* |
* |
* XXX: some of these need modifications for hierarchical filesets |
*/ |
|
int presto_prep(struct dentry *dentry, struct presto_cache **cache, |
struct presto_file_set **fset) |
{ |
int rc; |
|
/* NFS might pass us dentries which have not gone through lookup. |
* Test and set d_fsdata for such dentries |
*/ |
rc = presto_check_set_fsdata (dentry); |
if (rc) return rc; |
|
*fset = presto_fset(dentry); |
if ( *fset == NULL ) { |
CERROR("No file set for dentry at %p: %*s\n", dentry, |
dentry->d_name.len, dentry->d_name.name); |
return -EROFS; |
} |
|
*cache = (*fset)->fset_cache; |
if ( *cache == NULL ) { |
CERROR("PRESTO: BAD, BAD: cannot find cache\n"); |
return -EBADF; |
} |
|
CDEBUG(D_PIOCTL, "---> cache flags %x, fset flags %x\n", |
(*cache)->cache_flags, (*fset)->fset_flags); |
if( presto_is_read_only(*fset) ) { |
CERROR("PRESTO: cannot modify read-only fileset, minor %d.\n", |
presto_c2m(*cache)); |
return -EROFS; |
} |
return 0; |
} |
|
static int presto_create(struct inode * dir, struct dentry * dentry, int mode) |
{ |
int error; |
struct presto_cache *cache; |
struct dentry *parent = dentry->d_parent; |
struct lento_vfs_context info; |
struct presto_file_set *fset; |
|
ENTRY; |
error = presto_check_set_fsdata(dentry); |
if ( error ) { |
EXIT; |
return error; |
} |
|
error = presto_prep(dentry->d_parent, &cache, &fset); |
if ( error ) { |
EXIT; |
return error; |
} |
presto_unlock(dir); |
|
/* Does blocking and non-blocking behavious need to be |
checked for. Without blocking (return 1), the permit |
was acquired without reintegration |
*/ |
if ( presto_get_permit(dir) < 0 ) { |
EXIT; |
presto_fulllock(dir); |
return -EROFS; |
} |
|
presto_relock_sem(dir); |
parent = dentry->d_parent; |
memset(&info, 0, sizeof(info)); |
if (!ISLENTO(presto_c2m(cache))) |
info.flags = LENTO_FL_KML; |
info.flags |= LENTO_FL_IGNORE_TIME; |
error = presto_do_create(fset, parent, dentry, mode, &info); |
|
presto_relock_other(dir); |
presto_put_permit(dir); |
EXIT; |
return error; |
} |
|
static int presto_link(struct dentry *old_dentry, struct inode *dir, |
struct dentry *new_dentry) |
{ |
int error; |
struct presto_cache *cache, *new_cache; |
struct presto_file_set *fset, *new_fset; |
struct dentry *parent = new_dentry->d_parent; |
struct lento_vfs_context info; |
|
ENTRY; |
error = presto_prep(old_dentry, &cache, &fset); |
if ( error ) { |
EXIT; |
return error; |
} |
|
error = presto_check_set_fsdata(new_dentry); |
if ( error ) { |
EXIT; |
return error; |
} |
|
error = presto_prep(new_dentry->d_parent, &new_cache, &new_fset); |
if ( error ) { |
EXIT; |
return error; |
} |
|
if (fset != new_fset) { |
EXIT; |
return -EXDEV; |
} |
|
presto_unlock(dir); |
if ( presto_get_permit(old_dentry->d_inode) < 0 ) { |
EXIT; |
presto_fulllock(dir); |
return -EROFS; |
} |
|
if ( presto_get_permit(dir) < 0 ) { |
EXIT; |
presto_fulllock(dir); |
return -EROFS; |
} |
|
presto_relock_sem(dir); |
parent = new_dentry->d_parent; |
|
memset(&info, 0, sizeof(info)); |
if (!ISLENTO(presto_c2m(cache))) |
info.flags = LENTO_FL_KML; |
info.flags |= LENTO_FL_IGNORE_TIME; |
error = presto_do_link(fset, old_dentry, parent, |
new_dentry, &info); |
|
#if 0 |
/* XXX for links this is not right */ |
if (cache->cache_filter->o_trops->tr_add_ilookup ) { |
struct dentry *d; |
d = cache->cache_filter->o_trops->tr_add_ilookup |
(dir->i_sb->s_root, new_dentry, 1); |
} |
#endif |
|
presto_relock_other(dir); |
presto_put_permit(dir); |
presto_put_permit(old_dentry->d_inode); |
return error; |
} |
|
static int presto_mkdir(struct inode * dir, struct dentry * dentry, int mode) |
{ |
int error; |
struct presto_file_set *fset; |
struct presto_cache *cache; |
struct dentry *parent = dentry->d_parent; |
struct lento_vfs_context info; |
|
ENTRY; |
|
error = presto_check_set_fsdata(dentry); |
if ( error ) { |
EXIT; |
return error; |
} |
|
error = presto_prep(dentry->d_parent, &cache, &fset); |
if ( error ) { |
EXIT; |
return error; |
} |
|
presto_unlock(dir); |
|
if ( presto_get_permit(dir) < 0 ) { |
EXIT; |
presto_fulllock(dir); |
return -EROFS; |
} |
|
memset(&info, 0, sizeof(info)); |
if (!ISLENTO(presto_c2m(cache))) |
info.flags = LENTO_FL_KML; |
info.flags |= LENTO_FL_IGNORE_TIME; |
|
presto_relock_sem(dir); |
parent = dentry->d_parent; |
error = presto_do_mkdir(fset, parent, dentry, mode, &info); |
presto_relock_other(dir); |
presto_put_permit(dir); |
return error; |
} |
|
|
|
static int presto_symlink(struct inode *dir, struct dentry *dentry, |
const char *name) |
{ |
int error; |
struct presto_cache *cache; |
struct presto_file_set *fset; |
struct dentry *parent = dentry->d_parent; |
struct lento_vfs_context info; |
|
ENTRY; |
error = presto_check_set_fsdata(dentry); |
if ( error ) { |
EXIT; |
return error; |
} |
|
error = presto_prep(dentry->d_parent, &cache, &fset); |
if ( error ) { |
EXIT; |
return error; |
} |
|
presto_unlock(dir); |
if ( presto_get_permit(dir) < 0 ) { |
EXIT; |
presto_fulllock(dir); |
return -EROFS; |
} |
|
presto_relock_sem(dir); |
parent = dentry->d_parent; |
memset(&info, 0, sizeof(info)); |
if (!ISLENTO(presto_c2m(cache))) |
info.flags = LENTO_FL_KML; |
info.flags |= LENTO_FL_IGNORE_TIME; |
error = presto_do_symlink(fset, parent, dentry, name, &info); |
presto_relock_other(dir); |
presto_put_permit(dir); |
return error; |
} |
|
int presto_unlink(struct inode *dir, struct dentry *dentry) |
{ |
int error; |
struct presto_cache *cache; |
struct presto_file_set *fset; |
struct dentry *parent = dentry->d_parent; |
struct lento_vfs_context info; |
|
ENTRY; |
error = presto_check_set_fsdata(dentry); |
if ( error ) { |
EXIT; |
return error; |
} |
|
error = presto_prep(dentry->d_parent, &cache, &fset); |
if ( error ) { |
EXIT; |
return error; |
} |
|
presto_unlock(dir); |
if ( presto_get_permit(dir) < 0 ) { |
EXIT; |
presto_fulllock(dir); |
return -EROFS; |
} |
|
presto_relock_sem(dir); |
parent = dentry->d_parent; |
memset(&info, 0, sizeof(info)); |
if (!ISLENTO(presto_c2m(cache))) |
info.flags = LENTO_FL_KML; |
info.flags |= LENTO_FL_IGNORE_TIME; |
|
error = presto_do_unlink(fset, parent, dentry, &info); |
|
presto_relock_other(dir); |
presto_put_permit(dir); |
return error; |
} |
|
static int presto_rmdir(struct inode *dir, struct dentry *dentry) |
{ |
int error; |
struct presto_cache *cache; |
struct presto_file_set *fset; |
struct dentry *parent = dentry->d_parent; |
struct lento_vfs_context info; |
|
ENTRY; |
CDEBUG(D_FILE, "prepping presto\n"); |
error = presto_check_set_fsdata(dentry); |
|
if ( error ) { |
EXIT; |
return error; |
} |
|
error = presto_prep(dentry->d_parent, &cache, &fset); |
if ( error ) { |
EXIT; |
return error; |
} |
|
CDEBUG(D_FILE, "unlocking\n"); |
/* We need to dget() before the dput in double_unlock, to ensure we |
* still have dentry references. double_lock doesn't do dget for us. |
*/ |
unlock_kernel(); |
if (d_unhashed(dentry)) |
d_rehash(dentry); |
double_up(&dir->i_zombie, &dentry->d_inode->i_zombie); |
double_up(&dir->i_sem, &dentry->d_inode->i_sem); |
|
CDEBUG(D_FILE, "getting permit\n"); |
if ( presto_get_permit(parent->d_inode) < 0 ) { |
EXIT; |
double_down(&dir->i_sem, &dentry->d_inode->i_sem); |
double_down(&dir->i_zombie, &dentry->d_inode->i_zombie); |
|
lock_kernel(); |
return -EROFS; |
} |
CDEBUG(D_FILE, "locking\n"); |
|
double_down(&dir->i_sem, &dentry->d_inode->i_sem); |
parent = dentry->d_parent; |
memset(&info, 0, sizeof(info)); |
if (!ISLENTO(presto_c2m(cache))) |
info.flags = LENTO_FL_KML; |
info.flags |= LENTO_FL_IGNORE_TIME; |
error = presto_do_rmdir(fset, parent, dentry, &info); |
presto_put_permit(parent->d_inode); |
lock_kernel(); |
EXIT; |
return error; |
} |
|
static int presto_mknod(struct inode * dir, struct dentry * dentry, int mode, int rdev) |
{ |
int error; |
struct presto_cache *cache; |
struct presto_file_set *fset; |
struct dentry *parent = dentry->d_parent; |
struct lento_vfs_context info; |
|
ENTRY; |
error = presto_check_set_fsdata(dentry); |
if ( error ) { |
EXIT; |
return error; |
} |
|
error = presto_prep(dentry->d_parent, &cache, &fset); |
if ( error ) { |
EXIT; |
return error; |
} |
|
presto_unlock(dir); |
if ( presto_get_permit(dir) < 0 ) { |
EXIT; |
presto_fulllock(dir); |
return -EROFS; |
} |
|
presto_relock_sem(dir); |
parent = dentry->d_parent; |
memset(&info, 0, sizeof(info)); |
if (!ISLENTO(presto_c2m(cache))) |
info.flags = LENTO_FL_KML; |
info.flags |= LENTO_FL_IGNORE_TIME; |
error = presto_do_mknod(fset, parent, dentry, mode, rdev, &info); |
presto_relock_other(dir); |
presto_put_permit(dir); |
EXIT; |
return error; |
} |
|
inline void presto_triple_unlock(struct inode *old_dir, struct inode *new_dir, |
struct dentry *old_dentry, |
struct dentry *new_dentry, int triple) |
{ |
/* rename_dir case */ |
if (S_ISDIR(old_dentry->d_inode->i_mode)) { |
if (triple) { |
triple_up(&old_dir->i_zombie, |
&new_dir->i_zombie, |
&new_dentry->d_inode->i_zombie); |
} else { |
double_up(&old_dir->i_zombie, |
&new_dir->i_zombie); |
} |
up(&old_dir->i_sb->s_vfs_rename_sem); |
} else /* this case is rename_other */ |
double_up(&old_dir->i_zombie, &new_dir->i_zombie); |
/* done by do_rename */ |
unlock_kernel(); |
double_up(&old_dir->i_sem, &new_dir->i_sem); |
} |
|
inline void presto_triple_fulllock(struct inode *old_dir, |
struct inode *new_dir, |
struct dentry *old_dentry, |
struct dentry *new_dentry, int triple) |
{ |
/* done by do_rename */ |
double_down(&old_dir->i_sem, &new_dir->i_sem); |
lock_kernel(); |
/* rename_dir case */ |
if (S_ISDIR(old_dentry->d_inode->i_mode)) { |
down(&old_dir->i_sb->s_vfs_rename_sem); |
if (triple) { |
triple_down(&old_dir->i_zombie, |
&new_dir->i_zombie, |
&new_dentry->d_inode->i_zombie); |
} else { |
double_down(&old_dir->i_zombie, |
&new_dir->i_zombie); |
} |
} else /* this case is rename_other */ |
double_down(&old_dir->i_zombie, &new_dir->i_zombie); |
} |
|
inline void presto_triple_relock_sem(struct inode *old_dir, |
struct inode *new_dir, |
struct dentry *old_dentry, |
struct dentry *new_dentry, int triple) |
{ |
/* done by do_rename */ |
double_down(&old_dir->i_sem, &new_dir->i_sem); |
lock_kernel(); |
} |
|
inline void presto_triple_relock_other(struct inode *old_dir, |
struct inode *new_dir, |
struct dentry *old_dentry, |
struct dentry *new_dentry, int triple) |
{ |
/* rename_dir case */ |
if (S_ISDIR(old_dentry->d_inode->i_mode)) { |
down(&old_dir->i_sb->s_vfs_rename_sem); |
if (triple) { |
triple_down(&old_dir->i_zombie, |
&new_dir->i_zombie, |
&new_dentry->d_inode->i_zombie); |
} else { |
double_down(&old_dir->i_zombie, |
&new_dir->i_zombie); |
} |
} else /* this case is rename_other */ |
double_down(&old_dir->i_zombie, &new_dir->i_zombie); |
} |
|
|
// XXX this can be optimized: renamtes across filesets only require |
// multiple KML records, but can locally be executed normally. |
int presto_rename(struct inode *old_dir, struct dentry *old_dentry, |
struct inode *new_dir, struct dentry *new_dentry) |
{ |
int error; |
struct presto_cache *cache, *new_cache; |
struct presto_file_set *fset, *new_fset; |
struct lento_vfs_context info; |
struct dentry *old_parent = old_dentry->d_parent; |
struct dentry *new_parent = new_dentry->d_parent; |
int triple; |
|
ENTRY; |
error = presto_prep(old_dentry, &cache, &fset); |
if ( error ) { |
EXIT; |
return error; |
} |
error = presto_prep(new_parent, &new_cache, &new_fset); |
if ( error ) { |
EXIT; |
return error; |
} |
|
if ( fset != new_fset ) { |
EXIT; |
return -EXDEV; |
} |
|
/* We need to do dget before the dput in double_unlock, to ensure we |
* still have dentry references. double_lock doesn't do dget for us. |
*/ |
|
triple = (S_ISDIR(old_dentry->d_inode->i_mode) && new_dentry->d_inode)? |
1:0; |
|
presto_triple_unlock(old_dir, new_dir, old_dentry, new_dentry, triple); |
|
if ( presto_get_permit(old_dir) < 0 ) { |
EXIT; |
presto_triple_fulllock(old_dir, new_dir, old_dentry, new_dentry, triple); |
return -EROFS; |
} |
if ( presto_get_permit(new_dir) < 0 ) { |
EXIT; |
presto_triple_fulllock(old_dir, new_dir, old_dentry, new_dentry, triple); |
return -EROFS; |
} |
|
presto_triple_relock_sem(old_dir, new_dir, old_dentry, new_dentry, triple); |
memset(&info, 0, sizeof(info)); |
if (!ISLENTO(presto_c2m(cache))) |
info.flags = LENTO_FL_KML; |
info.flags |= LENTO_FL_IGNORE_TIME; |
error = do_rename(fset, old_parent, old_dentry, new_parent, |
new_dentry, &info); |
presto_triple_relock_other(old_dir, new_dir, old_dentry, new_dentry, triple); |
|
presto_put_permit(new_dir); |
presto_put_permit(old_dir); |
return error; |
} |
|
/* basically this allows the ilookup processes access to all files for |
* reading, while not making ilookup totally insecure. This could all |
* go away if we could set the CAP_DAC_READ_SEARCH capability for the client. |
*/ |
/* If posix acls are available, the underlying cache fs will export the |
* appropriate permission function. Thus we do not worry here about ACLs |
* or EAs. -SHP |
*/ |
int presto_permission(struct inode *inode, int mask) |
{ |
unsigned short mode = inode->i_mode; |
struct presto_cache *cache; |
int rc; |
|
ENTRY; |
if ( presto_can_ilookup() && !(mask & S_IWOTH)) { |
CDEBUG(D_CACHE, "ilookup on %ld OK\n", inode->i_ino); |
EXIT; |
return 0; |
} |
|
cache = presto_get_cache(inode); |
|
if ( cache ) { |
/* we only override the file/dir permission operations */ |
struct inode_operations *fiops = filter_c2cfiops(cache->cache_filter); |
struct inode_operations *diops = filter_c2cdiops(cache->cache_filter); |
|
if ( S_ISREG(mode) && fiops && fiops->permission ) { |
EXIT; |
return fiops->permission(inode, mask); |
} |
if ( S_ISDIR(mode) && diops && diops->permission ) { |
EXIT; |
return diops->permission(inode, mask); |
} |
} |
|
/* The cache filesystem doesn't have its own permission function, |
* but we don't want to duplicate the VFS code here. In order |
* to avoid looping from permission calling this function again, |
* we temporarily override the permission operation while we call |
* the VFS permission function. |
*/ |
inode->i_op->permission = NULL; |
rc = permission(inode, mask); |
inode->i_op->permission = &presto_permission; |
|
EXIT; |
return rc; |
} |
|
|
int presto_ioctl(struct inode *inode, struct file *file, |
unsigned int cmd, unsigned long arg) |
{ |
char buf[1024]; |
struct izo_ioctl_data *data = NULL; |
struct presto_dentry_data *dd; |
int rc; |
|
ENTRY; |
|
/* Try the filesystem's ioctl first, and return if it succeeded. */ |
dd = presto_d2d(file->f_dentry); |
if (dd && dd->dd_fset) { |
int (*cache_ioctl)(struct inode *, struct file *, unsigned int, unsigned long ) = filter_c2cdfops(dd->dd_fset->fset_cache->cache_filter)->ioctl; |
rc = -ENOTTY; |
if (cache_ioctl) |
rc = cache_ioctl(inode, file, cmd, arg); |
if (rc != -ENOTTY) { |
EXIT; |
return rc; |
} |
} |
|
if (current->euid != 0 && current->euid != izo_authorized_uid) { |
EXIT; |
return -EPERM; |
} |
|
memset(buf, 0, sizeof(buf)); |
|
if (izo_ioctl_getdata(buf, buf + 1024, (void *)arg)) { |
CERROR("intermezzo ioctl: data error\n"); |
return -EINVAL; |
} |
data = (struct izo_ioctl_data *)buf; |
|
switch(cmd) { |
case IZO_IOC_REINTKML: { |
int rc; |
int cperr; |
rc = kml_reint_rec(file, data); |
|
EXIT; |
cperr = copy_to_user((char *)arg, data, sizeof(*data)); |
if (cperr) { |
CERROR("WARNING: cperr %d\n", cperr); |
rc = -EFAULT; |
} |
return rc; |
} |
|
case IZO_IOC_GET_RCVD: { |
struct izo_rcvd_rec rec; |
struct presto_file_set *fset; |
int rc; |
|
fset = presto_fset(file->f_dentry); |
if (fset == NULL) { |
EXIT; |
return -ENODEV; |
} |
rc = izo_rcvd_get(&rec, fset, data->ioc_uuid); |
if (rc < 0) { |
EXIT; |
return rc; |
} |
|
EXIT; |
return copy_to_user((char *)arg, &rec, sizeof(rec))? -EFAULT : 0; |
} |
|
case IZO_IOC_REPSTATUS: { |
__u64 client_kmlsize; |
struct izo_rcvd_rec *lr_client; |
struct izo_rcvd_rec rec; |
struct presto_file_set *fset; |
int minor; |
int rc; |
|
fset = presto_fset(file->f_dentry); |
if (fset == NULL) { |
EXIT; |
return -ENODEV; |
} |
minor = presto_f2m(fset); |
|
client_kmlsize = data->ioc_kmlsize; |
lr_client = (struct izo_rcvd_rec *) data->ioc_pbuf1; |
|
rc = izo_repstatus(fset, client_kmlsize, |
lr_client, &rec); |
if (rc < 0) { |
EXIT; |
return rc; |
} |
|
EXIT; |
return copy_to_user((char *)arg, &rec, sizeof(rec))? -EFAULT : 0; |
} |
|
case IZO_IOC_GET_CHANNEL: { |
struct presto_file_set *fset; |
|
fset = presto_fset(file->f_dentry); |
if (fset == NULL) { |
EXIT; |
return -ENODEV; |
} |
|
data->ioc_dev = fset->fset_cache->cache_psdev->uc_minor; |
CDEBUG(D_PSDEV, "CHANNEL %d\n", data->ioc_dev); |
EXIT; |
return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT : 0; |
} |
|
case IZO_IOC_SET_IOCTL_UID: |
izo_authorized_uid = data->ioc_uid; |
EXIT; |
return 0; |
|
case IZO_IOC_SET_PID: |
rc = izo_psdev_setpid(data->ioc_dev); |
EXIT; |
return rc; |
|
case IZO_IOC_SET_CHANNEL: |
rc = izo_psdev_setchannel(file, data->ioc_dev); |
EXIT; |
return rc; |
|
case IZO_IOC_GET_KML_SIZE: { |
struct presto_file_set *fset; |
__u64 kmlsize; |
|
fset = presto_fset(file->f_dentry); |
if (fset == NULL) { |
EXIT; |
return -ENODEV; |
} |
|
kmlsize = presto_kml_offset(fset) + fset->fset_kml_logical_off; |
|
EXIT; |
return copy_to_user((char *)arg, &kmlsize, sizeof(kmlsize))?-EFAULT : 0; |
} |
|
case IZO_IOC_PURGE_FILE_DATA: { |
struct presto_file_set *fset; |
|
fset = presto_fset(file->f_dentry); |
if (fset == NULL) { |
EXIT; |
return -ENODEV; |
} |
|
rc = izo_purge_file(fset, data->ioc_inlbuf1); |
EXIT; |
return rc; |
} |
|
case IZO_IOC_GET_FILEID: { |
rc = izo_get_fileid(file, data); |
EXIT; |
if (rc) |
return rc; |
return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT : 0; |
} |
|
case IZO_IOC_SET_FILEID: { |
rc = izo_set_fileid(file, data); |
EXIT; |
if (rc) |
return rc; |
return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT : 0; |
} |
|
case IZO_IOC_ADJUST_LML: { |
struct lento_vfs_context *info; |
info = (struct lento_vfs_context *)data->ioc_inlbuf1; |
rc = presto_adjust_lml(file, info); |
EXIT; |
return rc; |
} |
|
case IZO_IOC_CONNECT: { |
struct presto_file_set *fset; |
int minor; |
|
fset = presto_fset(file->f_dentry); |
if (fset == NULL) { |
EXIT; |
return -ENODEV; |
} |
minor = presto_f2m(fset); |
|
rc = izo_upc_connect(minor, data->ioc_ino, |
data->ioc_generation, data->ioc_uuid, |
data->ioc_flags); |
EXIT; |
return rc; |
} |
|
case IZO_IOC_GO_FETCH_KML: { |
struct presto_file_set *fset; |
int minor; |
|
fset = presto_fset(file->f_dentry); |
if (fset == NULL) { |
EXIT; |
return -ENODEV; |
} |
minor = presto_f2m(fset); |
|
rc = izo_upc_go_fetch_kml(minor, fset->fset_name, |
data->ioc_uuid, data->ioc_kmlsize); |
EXIT; |
return rc; |
} |
|
case IZO_IOC_REVOKE_PERMIT: |
if (data->ioc_flags) |
rc = izo_revoke_permit(file->f_dentry, data->ioc_uuid); |
else |
rc = izo_revoke_permit(file->f_dentry, NULL); |
EXIT; |
return rc; |
|
case IZO_IOC_CLEAR_FSET: |
rc = izo_clear_fsetroot(file->f_dentry); |
EXIT; |
return rc; |
|
case IZO_IOC_CLEAR_ALL_FSETS: { |
struct presto_file_set *fset; |
|
fset = presto_fset(file->f_dentry); |
if (fset == NULL) { |
EXIT; |
return -ENODEV; |
} |
|
rc = izo_clear_all_fsetroots(fset->fset_cache); |
EXIT; |
return rc; |
} |
|
case IZO_IOC_SET_FSET: |
/* |
* Mark this dentry as being a fileset root. |
*/ |
rc = presto_set_fsetroot_from_ioc(file->f_dentry, |
data->ioc_inlbuf1, |
data->ioc_flags); |
EXIT; |
return rc; |
|
|
case IZO_IOC_MARK: { |
int res = 0; /* resulting flags - returned to user */ |
int error; |
|
CDEBUG(D_DOWNCALL, "mark inode: %ld, and: %x, or: %x, what %d\n", |
file->f_dentry->d_inode->i_ino, data->ioc_and_flag, |
data->ioc_or_flag, data->ioc_mark_what); |
|
switch (data->ioc_mark_what) { |
case MARK_DENTRY: |
error = izo_mark_dentry(file->f_dentry, |
data->ioc_and_flag, |
data->ioc_or_flag, &res); |
break; |
case MARK_FSET: |
error = izo_mark_fset(file->f_dentry, |
data->ioc_and_flag, |
data->ioc_or_flag, &res); |
break; |
case MARK_CACHE: |
error = izo_mark_cache(file->f_dentry, |
data->ioc_and_flag, |
data->ioc_or_flag, &res); |
break; |
case MARK_GETFL: { |
int fflags, cflags; |
data->ioc_and_flag = 0xffffffff; |
data->ioc_or_flag = 0; |
error = izo_mark_dentry(file->f_dentry, |
data->ioc_and_flag, |
data->ioc_or_flag, &res); |
if (error) |
break; |
error = izo_mark_fset(file->f_dentry, |
data->ioc_and_flag, |
data->ioc_or_flag, &fflags); |
if (error) |
break; |
error = izo_mark_cache(file->f_dentry, |
data->ioc_and_flag, |
data->ioc_or_flag, |
&cflags); |
|
if (error) |
break; |
data->ioc_and_flag = fflags; |
data->ioc_or_flag = cflags; |
break; |
} |
default: |
error = -EINVAL; |
} |
|
if (error) { |
EXIT; |
return error; |
} |
data->ioc_mark_what = res; |
CDEBUG(D_DOWNCALL, "mark inode: %ld, and: %x, or: %x, what %x\n", |
file->f_dentry->d_inode->i_ino, data->ioc_and_flag, |
data->ioc_or_flag, data->ioc_mark_what); |
|
EXIT; |
return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT : 0; |
} |
#if 0 |
case IZO_IOC_CLIENT_MAKE_BRANCH: { |
struct presto_file_set *fset; |
int minor; |
|
fset = presto_fset(file->f_dentry); |
if (fset == NULL) { |
EXIT; |
return -ENODEV; |
} |
minor = presto_f2m(fset); |
|
rc = izo_upc_client_make_branch(minor, fset->fset_name, |
data->ioc_inlbuf1, |
data->ioc_inlbuf2); |
EXIT; |
return rc; |
} |
#endif |
case IZO_IOC_SERVER_MAKE_BRANCH: { |
struct presto_file_set *fset; |
int minor; |
|
fset = presto_fset(file->f_dentry); |
if (fset == NULL) { |
EXIT; |
return -ENODEV; |
} |
minor = presto_f2m(fset); |
|
izo_upc_server_make_branch(minor, data->ioc_inlbuf1); |
EXIT; |
return 0; |
} |
case IZO_IOC_SET_KMLSIZE: { |
struct presto_file_set *fset; |
int minor; |
struct izo_rcvd_rec rec; |
|
fset = presto_fset(file->f_dentry); |
if (fset == NULL) { |
EXIT; |
return -ENODEV; |
} |
minor = presto_f2m(fset); |
|
rc = izo_upc_set_kmlsize(minor, fset->fset_name, data->ioc_uuid, |
data->ioc_kmlsize); |
|
if (rc != 0) { |
EXIT; |
return rc; |
} |
|
rc = izo_rcvd_get(&rec, fset, data->ioc_uuid); |
if (rc == -EINVAL) { |
/* We don't know anything about this uuid yet; no |
* worries. */ |
memset(&rec, 0, sizeof(rec)); |
} else if (rc <= 0) { |
CERROR("InterMezzo: error reading last_rcvd: %d\n", rc); |
EXIT; |
return rc; |
} |
rec.lr_remote_offset = data->ioc_kmlsize; |
rc = izo_rcvd_write(fset, &rec); |
if (rc <= 0) { |
CERROR("InterMezzo: error writing last_rcvd: %d\n", rc); |
EXIT; |
return rc; |
} |
EXIT; |
return rc; |
} |
case IZO_IOC_BRANCH_UNDO: { |
struct presto_file_set *fset; |
int minor; |
|
fset = presto_fset(file->f_dentry); |
if (fset == NULL) { |
EXIT; |
return -ENODEV; |
} |
minor = presto_f2m(fset); |
|
rc = izo_upc_branch_undo(minor, fset->fset_name, |
data->ioc_inlbuf1); |
EXIT; |
return rc; |
} |
case IZO_IOC_BRANCH_REDO: { |
struct presto_file_set *fset; |
int minor; |
|
fset = presto_fset(file->f_dentry); |
if (fset == NULL) { |
EXIT; |
return -ENODEV; |
} |
minor = presto_f2m(fset); |
|
rc = izo_upc_branch_redo(minor, fset->fset_name, |
data->ioc_inlbuf1); |
EXIT; |
return rc; |
} |
|
case TCGETS: |
EXIT; |
return -EINVAL; |
|
default: |
EXIT; |
return -EINVAL; |
|
} |
EXIT; |
return 0; |
} |
|
struct file_operations presto_dir_fops = { |
.ioctl = presto_ioctl |
}; |
|
struct inode_operations presto_dir_iops = { |
.create = presto_create, |
.lookup = presto_lookup, |
.link = presto_link, |
.unlink = presto_unlink, |
.symlink = presto_symlink, |
.mkdir = presto_mkdir, |
.rmdir = presto_rmdir, |
.mknod = presto_mknod, |
.rename = presto_rename, |
.permission = presto_permission, |
.setattr = presto_setattr, |
#ifdef CONFIG_FS_EXT_ATTR |
.set_ext_attr = presto_set_ext_attr, |
#endif |
}; |
|
|
/file.c
0,0 → 1,539
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 2000 Stelias Computing, Inc. |
* Copyright (C) 2000 Red Hat, Inc. |
* Copyright (C) 2000 TurboLinux, Inc. |
* Copyright (C) 2000 Los Alamos National Laboratory. |
* Copyright (C) 2000, 2001 Tacit Networks, Inc. |
* Copyright (C) 2000 Peter J. Braam |
* Copyright (C) 2001 Mountain View Data, Inc. |
* Copyright (C) 2001 Cluster File Systems, Inc. |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* |
* This file manages file I/O |
* |
*/ |
|
#include <stdarg.h> |
|
#include <asm/bitops.h> |
#include <asm/uaccess.h> |
#include <asm/system.h> |
|
#include <linux/errno.h> |
#include <linux/fs.h> |
#include <linux/ext2_fs.h> |
#include <linux/slab.h> |
#include <linux/vmalloc.h> |
#include <linux/sched.h> |
#include <linux/stat.h> |
#include <linux/string.h> |
#include <linux/locks.h> |
#include <linux/blkdev.h> |
#include <linux/init.h> |
#include <linux/smp_lock.h> |
#define __NO_VERSION__ |
#include <linux/module.h> |
|
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
#include <linux/fsfilter.h> |
/* |
* these are initialized in super.c |
*/ |
extern int presto_permission(struct inode *inode, int mask); |
|
|
static int presto_open_upcall(int minor, struct dentry *de) |
{ |
int rc = 0; |
char *path, *buffer; |
struct presto_file_set *fset; |
int pathlen; |
struct lento_vfs_context info; |
struct presto_dentry_data *dd = presto_d2d(de); |
|
PRESTO_ALLOC(buffer, PAGE_SIZE); |
if ( !buffer ) { |
CERROR("PRESTO: out of memory!\n"); |
return -ENOMEM; |
} |
fset = presto_fset(de); |
path = presto_path(de, fset->fset_dentry, buffer, PAGE_SIZE); |
pathlen = MYPATHLEN(buffer, path); |
|
CDEBUG(D_FILE, "de %p, dd %p\n", de, dd); |
if (dd->remote_ino == 0) { |
rc = presto_get_fileid(minor, fset, de); |
} |
memset (&info, 0, sizeof(info)); |
if (dd->remote_ino > 0) { |
info.remote_ino = dd->remote_ino; |
info.remote_generation = dd->remote_generation; |
} else |
CERROR("get_fileid failed %d, ino: %Lx, fetching by name\n", rc, |
dd->remote_ino); |
|
rc = izo_upc_open(minor, pathlen, path, fset->fset_name, &info); |
PRESTO_FREE(buffer, PAGE_SIZE); |
return rc; |
} |
|
static inline int open_check_dod(struct file *file, |
struct presto_file_set *fset) |
{ |
int gen, is_iopen = 0, minor; |
struct presto_cache *cache = fset->fset_cache; |
ino_t inum; |
|
minor = presto_c2m(cache); |
|
if ( ISLENTO(minor) ) { |
CDEBUG(D_CACHE, "is lento, not doing DOD.\n"); |
return 0; |
} |
|
/* Files are only ever opened by inode during backfetches, when by |
* definition we have the authoritative copy of the data. No DOD. */ |
is_iopen = izo_dentry_is_ilookup(file->f_dentry, &inum, &gen); |
|
if (is_iopen) { |
CDEBUG(D_CACHE, "doing iopen, not doing DOD.\n"); |
return 0; |
} |
|
if (!(fset->fset_flags & FSET_DATA_ON_DEMAND)) { |
CDEBUG(D_CACHE, "fileset not on demand.\n"); |
return 0; |
} |
|
if (file->f_flags & O_TRUNC) { |
CDEBUG(D_CACHE, "fileset dod: O_TRUNC.\n"); |
return 0; |
} |
|
if (presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL)) { |
CDEBUG(D_CACHE, "file under .intermezzo, not doing DOD\n"); |
return 0; |
} |
|
if (presto_chk(file->f_dentry, PRESTO_DATA)) { |
CDEBUG(D_CACHE, "PRESTO_DATA is set, not doing DOD.\n"); |
return 0; |
} |
|
if (cache->cache_filter->o_trops->tr_all_data(file->f_dentry->d_inode)) { |
CDEBUG(D_CACHE, "file not sparse, not doing DOD.\n"); |
return 0; |
} |
|
return 1; |
} |
|
static int presto_file_open(struct inode *inode, struct file *file) |
{ |
int rc = 0; |
struct file_operations *fops; |
struct presto_cache *cache; |
struct presto_file_set *fset; |
struct presto_file_data *fdata; |
int writable = (file->f_flags & (O_RDWR | O_WRONLY)); |
int minor, i; |
|
ENTRY; |
|
if (presto_prep(file->f_dentry, &cache, &fset) < 0) { |
EXIT; |
return -EBADF; |
} |
|
minor = presto_c2m(cache); |
|
CDEBUG(D_CACHE, "DATA_OK: %d, ino: %ld, islento: %d\n", |
presto_chk(file->f_dentry, PRESTO_DATA), inode->i_ino, |
ISLENTO(minor)); |
|
if ( !ISLENTO(minor) && (file->f_flags & O_RDWR || |
file->f_flags & O_WRONLY)) { |
CDEBUG(D_CACHE, "calling presto_get_permit\n"); |
if ( presto_get_permit(inode) < 0 ) { |
EXIT; |
return -EROFS; |
} |
presto_put_permit(inode); |
} |
|
if (open_check_dod(file, fset)) { |
CDEBUG(D_CACHE, "presto_open_upcall\n"); |
CDEBUG(D_CACHE, "dentry: %p setting DATA, ATTR\n", file->f_dentry); |
presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA); |
rc = presto_open_upcall(minor, file->f_dentry); |
if (rc) { |
EXIT; |
CERROR("%s: returning error %d\n", __FUNCTION__, rc); |
return rc; |
} |
|
} |
|
/* file was truncated upon open: do not refetch */ |
if (file->f_flags & O_TRUNC) { |
CDEBUG(D_CACHE, "setting DATA, ATTR\n"); |
presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA); |
} |
|
fops = filter_c2cffops(cache->cache_filter); |
if ( fops->open ) { |
CDEBUG(D_CACHE, "calling fs open\n"); |
rc = fops->open(inode, file); |
|
if (rc) { |
EXIT; |
return rc; |
} |
} |
|
if (writable) { |
PRESTO_ALLOC(fdata, sizeof(*fdata)); |
if (!fdata) { |
EXIT; |
return -ENOMEM; |
} |
/* LOCK: XXX check that the kernel lock protects this alloc */ |
fdata->fd_do_lml = 0; |
fdata->fd_bytes_written = 0; |
fdata->fd_fsuid = current->fsuid; |
fdata->fd_fsgid = current->fsgid; |
fdata->fd_mode = file->f_dentry->d_inode->i_mode; |
fdata->fd_uid = file->f_dentry->d_inode->i_uid; |
fdata->fd_gid = file->f_dentry->d_inode->i_gid; |
fdata->fd_ngroups = current->ngroups; |
for (i=0 ; i < current->ngroups ; i++) |
fdata->fd_groups[i] = current->groups[i]; |
if (!ISLENTO(minor)) |
fdata->fd_info.flags = LENTO_FL_KML; |
else { |
/* this is for the case of DOD, |
reint_close will adjust flags if needed */ |
fdata->fd_info.flags = 0; |
} |
|
presto_getversion(&fdata->fd_version, inode); |
file->private_data = fdata; |
} else { |
file->private_data = NULL; |
} |
|
EXIT; |
return 0; |
} |
|
int presto_adjust_lml(struct file *file, struct lento_vfs_context *info) |
{ |
struct presto_file_data *fdata = |
(struct presto_file_data *) file->private_data; |
|
if (!fdata) { |
EXIT; |
return -EINVAL; |
} |
|
memcpy(&fdata->fd_info, info, sizeof(*info)); |
EXIT; |
return 0; |
} |
|
|
static int presto_file_release(struct inode *inode, struct file *file) |
{ |
int rc; |
struct file_operations *fops; |
struct presto_cache *cache; |
struct presto_file_set *fset; |
struct presto_file_data *fdata = |
(struct presto_file_data *)file->private_data; |
ENTRY; |
|
rc = presto_prep(file->f_dentry, &cache, &fset); |
if ( rc ) { |
EXIT; |
return rc; |
} |
|
fops = filter_c2cffops(cache->cache_filter); |
if (fops && fops->release) |
rc = fops->release(inode, file); |
|
CDEBUG(D_CACHE, "islento = %d (minor %d), rc %d, data %p\n", |
ISLENTO(cache->cache_psdev->uc_minor), |
cache->cache_psdev->uc_minor, rc, fdata); |
|
/* this file was modified: ignore close errors, write KML */ |
if (fdata && fdata->fd_do_lml) { |
/* XXX: remove when lento gets file granularity cd */ |
if ( presto_get_permit(inode) < 0 ) { |
EXIT; |
return -EROFS; |
} |
|
fdata->fd_info.updated_time = file->f_dentry->d_inode->i_mtime; |
rc = presto_do_close(fset, file); |
presto_put_permit(inode); |
} |
|
if (!rc && fdata) { |
PRESTO_FREE(fdata, sizeof(*fdata)); |
file->private_data = NULL; |
} |
|
EXIT; |
return rc; |
} |
|
static void presto_apply_write_policy(struct file *file, |
struct presto_file_set *fset, loff_t res) |
{ |
struct presto_file_data *fdata = |
(struct presto_file_data *)file->private_data; |
struct presto_cache *cache = fset->fset_cache; |
struct presto_version new_file_ver; |
int error; |
struct rec_info rec; |
|
/* Here we do a journal close after a fixed or a specified |
amount of KBytes, currently a global parameter set with |
sysctl. If files are open for a long time, this gives added |
protection. (XXX todo: per cache, add ioctl, handle |
journaling in a thread, add more options etc.) |
*/ |
|
if ((fset->fset_flags & FSET_JCLOSE_ON_WRITE) && |
(!ISLENTO(cache->cache_psdev->uc_minor))) { |
fdata->fd_bytes_written += res; |
|
if (fdata->fd_bytes_written >= fset->fset_file_maxio) { |
presto_getversion(&new_file_ver, |
file->f_dentry->d_inode); |
/* This is really heavy weight and should be fixed |
ASAP. At most we should be recording the number |
of bytes written and not locking the kernel, |
wait for permits, etc, on the write path. SHP |
*/ |
lock_kernel(); |
if ( presto_get_permit(file->f_dentry->d_inode) < 0 ) { |
EXIT; |
/* we must be disconnected, not to worry */ |
unlock_kernel(); |
return; |
} |
error = presto_journal_close(&rec, fset, file, |
file->f_dentry, |
&fdata->fd_version, |
&new_file_ver); |
presto_put_permit(file->f_dentry->d_inode); |
unlock_kernel(); |
if ( error ) { |
CERROR("presto_close: cannot journal close\n"); |
/* XXX these errors are really bad */ |
/* panic(); */ |
return; |
} |
fdata->fd_bytes_written = 0; |
} |
} |
} |
|
static ssize_t presto_file_write(struct file *file, const char *buf, |
size_t size, loff_t *off) |
{ |
struct rec_info rec; |
int error; |
struct presto_cache *cache; |
struct presto_file_set *fset; |
struct file_operations *fops; |
ssize_t res; |
int do_lml_here; |
void *handle = NULL; |
unsigned long blocks; |
struct presto_file_data *fdata; |
loff_t res_size; |
|
error = presto_prep(file->f_dentry, &cache, &fset); |
if ( error ) { |
EXIT; |
return error; |
} |
|
blocks = (size >> file->f_dentry->d_inode->i_sb->s_blocksize_bits) + 1; |
/* XXX 3 is for ext2 indirect blocks ... */ |
res_size = 2 * PRESTO_REQHIGH + ((blocks+3) |
<< file->f_dentry->d_inode->i_sb->s_blocksize_bits); |
|
error = presto_reserve_space(fset->fset_cache, res_size); |
CDEBUG(D_INODE, "Reserved %Ld for %d\n", res_size, size); |
if ( error ) { |
EXIT; |
return -ENOSPC; |
} |
|
CDEBUG(D_INODE, "islento %d, minor: %d\n", |
ISLENTO(cache->cache_psdev->uc_minor), |
cache->cache_psdev->uc_minor); |
|
/* |
* XXX this lock should become a per inode lock when |
* Vinny's changes are in; we could just use i_sem. |
*/ |
read_lock(&fset->fset_lml.fd_lock); |
fdata = (struct presto_file_data *)file->private_data; |
do_lml_here = size && (fdata->fd_do_lml == 0) && |
!presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL); |
|
if (do_lml_here) |
fdata->fd_do_lml = 1; |
read_unlock(&fset->fset_lml.fd_lock); |
|
/* XXX |
There might be a bug here. We need to make |
absolutely sure that the ext3_file_write commits |
after our transaction that writes the LML record. |
Nesting the file write helps if new blocks are allocated. |
*/ |
res = 0; |
if (do_lml_here) { |
struct presto_version file_version; |
/* handle different space reqs from file system below! */ |
handle = presto_trans_start(fset, file->f_dentry->d_inode, |
KML_OPCODE_WRITE); |
if ( IS_ERR(handle) ) { |
presto_release_space(fset->fset_cache, res_size); |
CERROR("presto_write: no space for transaction\n"); |
return -ENOSPC; |
} |
|
presto_getversion(&file_version, file->f_dentry->d_inode); |
res = presto_write_lml_close(&rec, fset, file, |
fdata->fd_info.remote_ino, |
fdata->fd_info.remote_generation, |
&fdata->fd_info.remote_version, |
&file_version); |
fdata->fd_lml_offset = rec.offset; |
if ( res ) { |
CERROR("intermezzo: PANIC failed to write LML\n"); |
*(int *)0 = 1; |
EXIT; |
goto exit_write; |
} |
presto_trans_commit(fset, handle); |
} |
|
fops = filter_c2cffops(cache->cache_filter); |
res = fops->write(file, buf, size, off); |
if ( res != size ) { |
CDEBUG(D_FILE, "file write returns short write: size %d, res %d\n", size, res); |
} |
|
if ( (res > 0) && fdata ) |
presto_apply_write_policy(file, fset, res); |
|
exit_write: |
presto_release_space(fset->fset_cache, res_size); |
return res; |
} |
|
struct file_operations presto_file_fops = { |
.write = presto_file_write, |
.open = presto_file_open, |
.release = presto_file_release, |
.ioctl = presto_ioctl |
}; |
|
struct inode_operations presto_file_iops = { |
.permission = presto_permission, |
.setattr = presto_setattr, |
#ifdef CONFIG_FS_EXT_ATTR |
.set_ext_attr = presto_set_ext_attr, |
#endif |
}; |
|
/* FIXME: I bet we want to add a lock here and in presto_file_open. */ |
int izo_purge_file(struct presto_file_set *fset, char *file) |
{ |
#if 0 |
void *handle = NULL; |
char *path = NULL; |
struct nameidata nd; |
struct dentry *dentry; |
int rc = 0, len; |
loff_t oldsize; |
|
/* FIXME: not mtpt it's gone */ |
len = strlen(fset->fset_cache->cache_mtpt) + strlen(file) + 1; |
PRESTO_ALLOC(path, len + 1); |
if (path == NULL) |
return -1; |
|
sprintf(path, "%s/%s", fset->fset_cache->cache_mtpt, file); |
rc = izo_lookup_file(fset, path, &nd); |
if (rc) |
goto error; |
dentry = nd.dentry; |
|
/* FIXME: take a lock here */ |
|
if (dentry->d_inode->i_atime > CURRENT_TIME - 5) { |
/* We lost the race; this file was accessed while we were doing |
* ioctls and lookups and whatnot. */ |
rc = -EBUSY; |
goto error_unlock; |
} |
|
/* FIXME: Check if this file is open. */ |
|
handle = presto_trans_start(fset, dentry->d_inode, KML_OPCODE_TRUNC); |
if (IS_ERR(handle)) { |
rc = -ENOMEM; |
goto error_unlock; |
} |
|
/* FIXME: Write LML record */ |
|
oldsize = dentry->d_inode->i_size; |
rc = izo_do_truncate(fset, dentry, 0, oldsize); |
if (rc != 0) |
goto error_clear; |
rc = izo_do_truncate(fset, dentry, oldsize, 0); |
if (rc != 0) |
goto error_clear; |
|
error_clear: |
/* FIXME: clear LML record */ |
|
error_unlock: |
/* FIXME: release the lock here */ |
|
error: |
if (handle != NULL && !IS_ERR(handle)) |
presto_trans_commit(fset, handle); |
if (path != NULL) |
PRESTO_FREE(path, len + 1); |
return rc; |
#else |
return 0; |
#endif |
} |
/upcall.c
0,0 → 1,557
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 2001, 2002 Cluster File Systems, Inc. <braam@clusterfs.com> |
* Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net> |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* |
* Mostly platform independent upcall operations to a cache manager: |
* -- upcalls |
* -- upcall routines |
* |
*/ |
|
#include <asm/system.h> |
#include <asm/segment.h> |
#include <asm/signal.h> |
#include <linux/signal.h> |
|
#include <linux/types.h> |
#include <linux/kernel.h> |
#include <linux/mm.h> |
#include <linux/vmalloc.h> |
#include <linux/slab.h> |
#include <linux/sched.h> |
#include <linux/fs.h> |
#include <linux/stat.h> |
#include <linux/errno.h> |
#include <linux/locks.h> |
#include <linux/string.h> |
#include <asm/uaccess.h> |
#include <linux/vmalloc.h> |
#include <asm/segment.h> |
|
#include <linux/intermezzo_lib.h> |
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
|
#include <linux/intermezzo_idl.h> |
|
/* |
At present: |
-- Asynchronous calls: |
- kml: give a "more" kml indication to userland |
- kml_truncate: initiate KML truncation |
- release_permit: kernel is done with permit |
-- Synchronous |
- open: fetch file |
- permit: get a permit |
|
Errors returned by user level code are positive |
|
*/ |
|
static struct izo_upcall_hdr *upc_pack(__u32 opcode, int pathlen, char *path, |
char *fsetname, int reclen, char *rec, |
int *size) |
{ |
struct izo_upcall_hdr *hdr; |
char *ptr; |
ENTRY; |
|
*size = sizeof(struct izo_upcall_hdr); |
if ( fsetname ) { |
*size += round_strlen(fsetname); |
} |
if ( path ) { |
*size += round_strlen(path); |
} |
if ( rec ) { |
*size += size_round(reclen); |
} |
PRESTO_ALLOC(hdr, *size); |
if (!hdr) { |
CERROR("intermezzo upcall: out of memory (opc %d)\n", opcode); |
EXIT; |
return NULL; |
} |
memset(hdr, 0, *size); |
|
ptr = (char *)hdr + sizeof(*hdr); |
|
/* XXX do we need fsuid ? */ |
hdr->u_len = *size; |
hdr->u_version = IZO_UPC_VERSION; |
hdr->u_opc = opcode; |
hdr->u_pid = current->pid; |
hdr->u_uid = current->fsuid; |
|
if (path) { |
/*XXX Robert: please review what len to pass in for |
NUL terminated strings */ |
hdr->u_pathlen = strlen(path); |
LOGL0(path, hdr->u_pathlen, ptr); |
} |
if (fsetname) { |
hdr->u_fsetlen = strlen(fsetname); |
LOGL0(fsetname, strlen(fsetname), ptr); |
} |
if (rec) { |
hdr->u_reclen = reclen; |
LOGL(rec, reclen, ptr); |
} |
|
EXIT; |
return hdr; |
} |
|
/* the upcalls */ |
int izo_upc_kml(int minor, __u64 offset, __u32 first_recno, __u64 length, __u32 last_recno, char *fsetname) |
{ |
int size; |
int error; |
struct izo_upcall_hdr *hdr; |
|
ENTRY; |
if (!presto_lento_up(minor)) { |
EXIT; |
return 0; |
} |
|
hdr = upc_pack(IZO_UPC_KML, 0, NULL, fsetname, 0, NULL, &size); |
if (!hdr || IS_ERR(hdr)) { |
EXIT; |
return -PTR_ERR(hdr); |
} |
|
hdr->u_offset = offset; |
hdr->u_first_recno = first_recno; |
hdr->u_length = length; |
hdr->u_last_recno = last_recno; |
|
CDEBUG(D_UPCALL, "KML: fileset %s, offset %Lu, length %Lu, " |
"first %u, last %d; minor %d\n", |
fsetname, hdr->u_offset, hdr->u_length, hdr->u_first_recno, |
hdr->u_last_recno, minor); |
|
error = izo_upc_upcall(minor, &size, hdr, ASYNCHRONOUS); |
|
EXIT; |
return -error; |
} |
|
int izo_upc_kml_truncate(int minor, __u64 length, __u32 last_recno, char *fsetname) |
{ |
int size; |
int error; |
struct izo_upcall_hdr *hdr; |
|
ENTRY; |
if (!presto_lento_up(minor)) { |
EXIT; |
return 0; |
} |
|
hdr = upc_pack(IZO_UPC_KML_TRUNC, 0, NULL, fsetname, 0, NULL, &size); |
if (!hdr || IS_ERR(hdr)) { |
EXIT; |
return -PTR_ERR(hdr); |
} |
|
hdr->u_length = length; |
hdr->u_last_recno = last_recno; |
|
CDEBUG(D_UPCALL, "KML TRUNCATE: fileset %s, length %Lu, " |
"last recno %d, minor %d\n", |
fsetname, hdr->u_length, hdr->u_last_recno, minor); |
|
error = izo_upc_upcall(minor, &size, hdr, ASYNCHRONOUS); |
|
EXIT; |
return error; |
} |
|
int izo_upc_open(int minor, __u32 pathlen, char *path, char *fsetname, struct lento_vfs_context *info) |
{ |
int size; |
int error; |
struct izo_upcall_hdr *hdr; |
ENTRY; |
|
if (!presto_lento_up(minor)) { |
EXIT; |
return -EIO; |
} |
|
hdr = upc_pack(IZO_UPC_OPEN, pathlen, path, fsetname, |
sizeof(*info), (char*)info, &size); |
if (!hdr || IS_ERR(hdr)) { |
EXIT; |
return -PTR_ERR(hdr); |
} |
|
CDEBUG(D_UPCALL, "path %s\n", path); |
|
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); |
if (error) |
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error); |
|
EXIT; |
return -error; |
} |
|
int izo_upc_get_fileid(int minor, __u32 reclen, char *rec, |
__u32 pathlen, char *path, char *fsetname) |
{ |
int size; |
int error; |
struct izo_upcall_hdr *hdr; |
ENTRY; |
|
if (!presto_lento_up(minor)) { |
EXIT; |
return -EIO; |
} |
|
hdr = upc_pack(IZO_UPC_GET_FILEID, pathlen, path, fsetname, reclen, rec, &size); |
if (!hdr || IS_ERR(hdr)) { |
EXIT; |
return -PTR_ERR(hdr); |
} |
|
CDEBUG(D_UPCALL, "path %s\n", path); |
|
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); |
if (error) |
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error); |
|
EXIT; |
return -error; |
} |
|
int izo_upc_backfetch(int minor, char *path, char *fsetname, struct lento_vfs_context *info) |
{ |
int size; |
int error; |
struct izo_upcall_hdr *hdr; |
ENTRY; |
|
if (!presto_lento_up(minor)) { |
EXIT; |
return -EIO; |
} |
|
hdr = upc_pack(IZO_UPC_BACKFETCH, strlen(path), path, fsetname, |
sizeof(*info), (char *)info, &size); |
if (!hdr || IS_ERR(hdr)) { |
EXIT; |
return -PTR_ERR(hdr); |
} |
|
/* This is currently synchronous, kml_reint_record blocks */ |
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); |
if (error) |
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error); |
|
EXIT; |
return -error; |
} |
|
int izo_upc_permit(int minor, struct dentry *dentry, __u32 pathlen, char *path, |
char *fsetname) |
{ |
int size; |
int error; |
struct izo_upcall_hdr *hdr; |
|
ENTRY; |
|
hdr = upc_pack(IZO_UPC_PERMIT, pathlen, path, fsetname, 0, NULL, &size); |
if (!hdr || IS_ERR(hdr)) { |
EXIT; |
return -PTR_ERR(hdr); |
} |
|
CDEBUG(D_UPCALL, "Permit minor %d path %s\n", minor, path); |
|
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); |
|
if (error == -EROFS) { |
int err; |
CERROR("InterMezzo: ERROR - requested permit for read-only " |
"fileset.\n Setting \"%s\" read-only!\n", path); |
err = izo_mark_cache(dentry, 0xFFFFFFFF, CACHE_CLIENT_RO, NULL); |
if (err) |
CERROR("InterMezzo ERROR: mark_cache %d\n", err); |
} else if (error) { |
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error); |
} |
|
EXIT; |
return error; |
} |
|
/* This is a ping-pong upcall handled on the server when a client (uuid) |
* requests the permit for itself. */ |
int izo_upc_revoke_permit(int minor, char *fsetname, __u8 uuid[16]) |
{ |
int size; |
int error; |
struct izo_upcall_hdr *hdr; |
|
ENTRY; |
|
hdr = upc_pack(IZO_UPC_REVOKE_PERMIT, 0, NULL, fsetname, 0, NULL, &size); |
if (!hdr || IS_ERR(hdr)) { |
EXIT; |
return -PTR_ERR(hdr); |
} |
|
memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid)); |
|
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); |
|
if (error) |
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error); |
|
EXIT; |
return -error; |
} |
|
int izo_upc_go_fetch_kml(int minor, char *fsetname, __u8 uuid[16], |
__u64 kmlsize) |
{ |
int size; |
int error; |
struct izo_upcall_hdr *hdr; |
ENTRY; |
|
if (!presto_lento_up(minor)) { |
EXIT; |
return -EIO; |
} |
|
hdr = upc_pack(IZO_UPC_GO_FETCH_KML, 0, NULL, fsetname, 0, NULL, &size); |
if (!hdr || IS_ERR(hdr)) { |
EXIT; |
return -PTR_ERR(hdr); |
} |
|
hdr->u_offset = kmlsize; |
memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid)); |
|
error = izo_upc_upcall(minor, &size, hdr, ASYNCHRONOUS); |
if (error) |
CERROR("%s: error %d\n", __FUNCTION__, error); |
|
EXIT; |
return -error; |
} |
|
int izo_upc_connect(int minor, __u64 ip_address, __u64 port, __u8 uuid[16], |
int client_flag) |
{ |
int size; |
int error; |
struct izo_upcall_hdr *hdr; |
ENTRY; |
|
if (!presto_lento_up(minor)) { |
EXIT; |
return -EIO; |
} |
|
hdr = upc_pack(IZO_UPC_CONNECT, 0, NULL, NULL, 0, NULL, &size); |
if (!hdr || IS_ERR(hdr)) { |
EXIT; |
return -PTR_ERR(hdr); |
} |
|
hdr->u_offset = ip_address; |
hdr->u_length = port; |
memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid)); |
hdr->u_first_recno = client_flag; |
|
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); |
if (error) { |
CERROR("%s: error %d\n", __FUNCTION__, error); |
} |
|
EXIT; |
return -error; |
} |
|
int izo_upc_set_kmlsize(int minor, char *fsetname, __u8 uuid[16], __u64 kmlsize) |
{ |
int size; |
int error; |
struct izo_upcall_hdr *hdr; |
ENTRY; |
|
if (!presto_lento_up(minor)) { |
EXIT; |
return -EIO; |
} |
|
hdr = upc_pack(IZO_UPC_SET_KMLSIZE, 0, NULL, fsetname, 0, NULL, &size); |
if (!hdr || IS_ERR(hdr)) { |
EXIT; |
return -PTR_ERR(hdr); |
} |
|
memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid)); |
hdr->u_length = kmlsize; |
|
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); |
if (error) |
CERROR("%s: error %d\n", __FUNCTION__, error); |
|
EXIT; |
return -error; |
} |
|
int izo_upc_repstatus(int minor, char * fsetname, struct izo_rcvd_rec *lr_server) |
{ |
int size; |
int error; |
struct izo_upcall_hdr *hdr; |
ENTRY; |
|
if (!presto_lento_up(minor)) { |
EXIT; |
return -EIO; |
} |
|
hdr = upc_pack(IZO_UPC_REPSTATUS, 0, NULL, fsetname, |
sizeof(*lr_server), (char*)lr_server, |
&size); |
if (!hdr || IS_ERR(hdr)) { |
EXIT; |
return -PTR_ERR(hdr); |
} |
|
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); |
if (error) |
CERROR("%s: error %d\n", __FUNCTION__, error); |
|
EXIT; |
return -error; |
} |
|
|
#if 0 |
int izo_upc_client_make_branch(int minor, char *fsetname, char *tagname, |
char *branchname) |
{ |
int size, error; |
struct izo_upcall_hdr *hdr; |
int pathlen; |
char *path; |
ENTRY; |
|
hdr = upc_pack(IZO_UPC_CLIENT_MAKE_BRANCH, strlen(tagname), tagname, |
fsetname, strlen(branchname) + 1, branchname, &size); |
if (!hdr || IS_ERR(hdr)) { |
error = -PTR_ERR(hdr); |
goto error; |
} |
|
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); |
if (error) |
CERROR("InterMezzo: error %d\n", error); |
|
error: |
PRESTO_FREE(path, pathlen); |
EXIT; |
return error; |
} |
#endif |
|
int izo_upc_server_make_branch(int minor, char *fsetname) |
{ |
int size, error; |
struct izo_upcall_hdr *hdr; |
ENTRY; |
|
hdr = upc_pack(IZO_UPC_SERVER_MAKE_BRANCH, 0, NULL, fsetname, 0, NULL, &size); |
if (!hdr || IS_ERR(hdr)) { |
error = -PTR_ERR(hdr); |
goto error; |
} |
|
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); |
if (error) |
CERROR("InterMezzo: error %d\n", error); |
|
error: |
EXIT; |
return -error; |
} |
|
int izo_upc_branch_undo(int minor, char *fsetname, char *branchname) |
{ |
int size; |
int error; |
struct izo_upcall_hdr *hdr; |
ENTRY; |
|
if (!presto_lento_up(minor)) { |
EXIT; |
return -EIO; |
} |
|
hdr = upc_pack(IZO_UPC_BRANCH_UNDO, strlen(branchname), branchname, |
fsetname, 0, NULL, &size); |
if (!hdr || IS_ERR(hdr)) { |
EXIT; |
return -PTR_ERR(hdr); |
} |
|
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); |
if (error) |
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error); |
|
EXIT; |
return -error; |
} |
|
int izo_upc_branch_redo(int minor, char *fsetname, char *branchname) |
{ |
int size; |
int error; |
struct izo_upcall_hdr *hdr; |
ENTRY; |
|
if (!presto_lento_up(minor)) { |
EXIT; |
return -EIO; |
} |
|
hdr = upc_pack(IZO_UPC_BRANCH_REDO, strlen(branchname) + 1, branchname, |
fsetname, 0, NULL, &size); |
if (!hdr || IS_ERR(hdr)) { |
EXIT; |
return -PTR_ERR(hdr); |
} |
|
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); |
if (error) |
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error); |
|
EXIT; |
return -error; |
} |
/sysctl.c
0,0 → 1,369
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 1999 Peter J. Braam <braam@clusterfs.com> |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* |
* Sysctrl entries for Intermezzo! |
*/ |
|
#define __NO_VERSION__ |
#include <linux/config.h> /* for CONFIG_PROC_FS */ |
#include <linux/module.h> |
#include <linux/sched.h> |
#include <linux/mm.h> |
#include <linux/sysctl.h> |
#include <linux/swapctl.h> |
#include <linux/proc_fs.h> |
#include <linux/slab.h> |
#include <linux/vmalloc.h> |
#include <linux/stat.h> |
#include <linux/ctype.h> |
#include <linux/init.h> |
#include <asm/bitops.h> |
#include <asm/segment.h> |
#include <asm/uaccess.h> |
#include <linux/utsname.h> |
#include <linux/blk.h> |
|
|
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
|
/* /proc entries */ |
|
#ifdef CONFIG_PROC_FS |
struct proc_dir_entry *proc_fs_intermezzo; |
int intermezzo_mount_get_info( char * buffer, char ** start, off_t offset, |
int length) |
{ |
int len=0; |
|
/* this works as long as we are below 1024 characters! */ |
*start = buffer + offset; |
len -= offset; |
|
if ( len < 0 ) |
return -EINVAL; |
|
return len; |
} |
|
#endif |
|
|
/* SYSCTL below */ |
|
static struct ctl_table_header *intermezzo_table_header = NULL; |
/* 0x100 to avoid any chance of collisions at any point in the tree with |
* non-directories |
*/ |
#define PSDEV_INTERMEZZO (0x100) |
|
#define PSDEV_DEBUG 1 /* control debugging */ |
#define PSDEV_TRACE 2 /* control enter/leave pattern */ |
#define PSDEV_TIMEOUT 3 /* timeout on upcalls to become intrble */ |
#define PSDEV_HARD 4 /* mount type "hard" or "soft" */ |
#define PSDEV_NO_FILTER 5 /* controls presto_chk */ |
#define PSDEV_NO_JOURNAL 6 /* controls presto_chk */ |
#define PSDEV_NO_UPCALL 7 /* controls lento_upcall */ |
#define PSDEV_ERRORVAL 8 /* controls presto_debug_fail_blkdev */ |
#define PSDEV_EXCL_GID 9 /* which GID is ignored by presto */ |
#define PSDEV_BYTES_TO_CLOSE 11 /* bytes to write before close */ |
|
/* These are global presto control options */ |
#define PRESTO_PRIMARY_CTLCNT 2 |
static struct ctl_table presto_table[ PRESTO_PRIMARY_CTLCNT + MAX_CHANNEL + 1] = |
{ |
{PSDEV_DEBUG, "debug", &presto_debug, sizeof(int), 0644, NULL, &proc_dointvec}, |
{PSDEV_TRACE, "trace", &presto_print_entry, sizeof(int), 0644, NULL, &proc_dointvec}, |
}; |
|
/* |
* Intalling the sysctl entries: strategy |
* - have templates for each /proc/sys/intermezzo/ entry |
* such an entry exists for each /dev/presto |
* (proto_channel_entry) |
* - have a template for the contents of such directories |
* (proto_psdev_table) |
* - have the master table (presto_table) |
* |
* When installing, malloc, memcpy and fix up the pointers to point to |
* the appropriate constants in izo_channels[your_minor] |
*/ |
|
static ctl_table proto_psdev_table[] = { |
{PSDEV_HARD, "hard", 0, sizeof(int), 0644, NULL, &proc_dointvec}, |
{PSDEV_NO_FILTER, "no_filter", 0, sizeof(int), 0644, NULL, &proc_dointvec}, |
{PSDEV_NO_JOURNAL, "no_journal", NULL, sizeof(int), 0644, NULL, &proc_dointvec}, |
{PSDEV_NO_UPCALL, "no_upcall", NULL, sizeof(int), 0644, NULL, &proc_dointvec}, |
{PSDEV_TIMEOUT, "timeout", NULL, sizeof(int), 0644, NULL, &proc_dointvec}, |
#ifdef PRESTO_DEBUG |
{PSDEV_ERRORVAL, "errorval", NULL, sizeof(int), 0644, NULL, &proc_dointvec}, |
#endif |
{ 0 } |
}; |
|
static ctl_table proto_channel_entry = { |
PSDEV_INTERMEZZO, 0, NULL, 0, 0555, 0, |
}; |
|
static ctl_table intermezzo_table[2] = { |
{PSDEV_INTERMEZZO, "intermezzo", NULL, 0, 0555, presto_table}, |
{0} |
}; |
|
/* support for external setting and getting of opts. */ |
/* particularly via ioctl. The Right way to do this is via sysctl, |
* but that will have to wait until intermezzo gets its own nice set of |
* sysctl IDs |
*/ |
/* we made these separate as setting may in future be more restricted |
* than getting |
*/ |
#ifdef RON_MINNICH |
int dosetopt(int minor, struct psdev_opt *opt) |
{ |
int retval = 0; |
int newval = opt->optval; |
|
ENTRY; |
|
switch(opt->optname) { |
|
case PSDEV_TIMEOUT: |
izo_channels[minor].uc_timeout = newval; |
break; |
|
case PSDEV_HARD: |
izo_channels[minor].uc_hard = newval; |
break; |
|
case PSDEV_NO_FILTER: |
izo_channels[minor].uc_no_filter = newval; |
break; |
|
case PSDEV_NO_JOURNAL: |
izo_channels[minor].uc_no_journal = newval; |
break; |
|
case PSDEV_NO_UPCALL: |
izo_channels[minor].uc_no_upcall = newval; |
break; |
|
#ifdef PRESTO_DEBUG |
case PSDEV_ERRORVAL: { |
/* If we have a positive arg, set a breakpoint for that |
* value. If we have a negative arg, make that device |
* read-only. FIXME It would be much better to only |
* allow setting the underlying device read-only for the |
* current presto cache. |
*/ |
int errorval = izo_channels[minor].uc_errorval; |
if (errorval < 0) { |
if (newval == 0) |
set_device_ro(-errorval, 0); |
else |
CERROR("device %s already read only\n", |
kdevname(-errorval)); |
} else { |
if (newval < 0) |
set_device_ro(-newval, 1); |
izo_channels[minor].uc_errorval = newval; |
CDEBUG(D_PSDEV, "setting errorval to %d\n", newval); |
} |
|
break; |
} |
#endif |
|
case PSDEV_TRACE: |
case PSDEV_DEBUG: |
case PSDEV_BYTES_TO_CLOSE: |
default: |
CDEBUG(D_PSDEV, |
"ioctl: dosetopt: minor %d, bad optname 0x%x, \n", |
minor, opt->optname); |
|
retval = -EINVAL; |
} |
|
EXIT; |
return retval; |
} |
|
int dogetopt(int minor, struct psdev_opt *opt) |
{ |
int retval = 0; |
|
ENTRY; |
|
switch(opt->optname) { |
|
case PSDEV_TIMEOUT: |
opt->optval = izo_channels[minor].uc_timeout; |
break; |
|
case PSDEV_HARD: |
opt->optval = izo_channels[minor].uc_hard; |
break; |
|
case PSDEV_NO_FILTER: |
opt->optval = izo_channels[minor].uc_no_filter; |
break; |
|
case PSDEV_NO_JOURNAL: |
opt->optval = izo_channels[minor].uc_no_journal; |
break; |
|
case PSDEV_NO_UPCALL: |
opt->optval = izo_channels[minor].uc_no_upcall; |
break; |
|
#ifdef PSDEV_DEBUG |
case PSDEV_ERRORVAL: { |
int errorval = izo_channels[minor].uc_errorval; |
if (errorval < 0 && is_read_only(-errorval)) |
CERROR("device %s has been set read-only\n", |
kdevname(-errorval)); |
opt->optval = izo_channels[minor].uc_errorval; |
break; |
} |
#endif |
|
case PSDEV_TRACE: |
case PSDEV_DEBUG: |
case PSDEV_BYTES_TO_CLOSE: |
default: |
CDEBUG(D_PSDEV, |
"ioctl: dogetopt: minor %d, bad optval 0x%x, \n", |
minor, opt->optname); |
|
retval = -EINVAL; |
} |
|
EXIT; |
return retval; |
} |
#endif |
|
|
/* allocate the tables for the presto devices. We need |
* sizeof(proto_channel_table)/sizeof(proto_channel_table[0]) |
* entries for each dev |
*/ |
int /* __init */ init_intermezzo_sysctl(void) |
{ |
int i; |
int total_dev = MAX_CHANNEL; |
int entries_per_dev = sizeof(proto_psdev_table) / |
sizeof(proto_psdev_table[0]); |
int total_entries = entries_per_dev * total_dev; |
ctl_table *dev_ctl_table; |
|
PRESTO_ALLOC(dev_ctl_table, sizeof(ctl_table) * total_entries); |
|
if (! dev_ctl_table) { |
CERROR("WARNING: presto couldn't allocate dev_ctl_table\n"); |
EXIT; |
return -ENOMEM; |
} |
|
/* now fill in the entries ... we put the individual presto<x> |
* entries at the end of the table, and the per-presto stuff |
* starting at the front. We assume that the compiler makes |
* this code more efficient, but really, who cares ... it |
* happens once per reboot. |
*/ |
for(i = 0; i < total_dev; i++) { |
/* entry for this /proc/sys/intermezzo/intermezzo"i" */ |
ctl_table *psdev = &presto_table[i + PRESTO_PRIMARY_CTLCNT]; |
/* entries for the individual "files" in this "directory" */ |
ctl_table *psdev_entries = &dev_ctl_table[i * entries_per_dev]; |
/* init the psdev and psdev_entries with the prototypes */ |
*psdev = proto_channel_entry; |
memcpy(psdev_entries, proto_psdev_table, |
sizeof(proto_psdev_table)); |
/* now specialize them ... */ |
/* the psdev has to point to psdev_entries, and fix the number */ |
psdev->ctl_name = psdev->ctl_name + i + 1; /* sorry */ |
|
PRESTO_ALLOC((void*)psdev->procname, PROCNAME_SIZE); |
if (!psdev->procname) { |
PRESTO_FREE(dev_ctl_table, |
sizeof(ctl_table) * total_entries); |
return -ENOMEM; |
} |
sprintf((char *) psdev->procname, "intermezzo%d", i); |
/* hook presto into */ |
psdev->child = psdev_entries; |
|
/* now for each psdev entry ... */ |
psdev_entries[0].data = &(izo_channels[i].uc_hard); |
psdev_entries[1].data = &(izo_channels[i].uc_no_filter); |
psdev_entries[2].data = &(izo_channels[i].uc_no_journal); |
psdev_entries[3].data = &(izo_channels[i].uc_no_upcall); |
psdev_entries[4].data = &(izo_channels[i].uc_timeout); |
#ifdef PRESTO_DEBUG |
psdev_entries[5].data = &(izo_channels[i].uc_errorval); |
#endif |
} |
|
|
#ifdef CONFIG_SYSCTL |
if ( !intermezzo_table_header ) |
intermezzo_table_header = |
register_sysctl_table(intermezzo_table, 0); |
#endif |
#ifdef CONFIG_PROC_FS |
proc_fs_intermezzo = proc_mkdir("intermezzo", proc_root_fs); |
proc_fs_intermezzo->owner = THIS_MODULE; |
create_proc_info_entry("mounts", 0, proc_fs_intermezzo, |
intermezzo_mount_get_info); |
#endif |
return 0; |
} |
|
void cleanup_intermezzo_sysctl(void) |
{ |
int total_dev = MAX_CHANNEL; |
int entries_per_dev = sizeof(proto_psdev_table) / |
sizeof(proto_psdev_table[0]); |
int total_entries = entries_per_dev * total_dev; |
int i; |
|
#ifdef CONFIG_SYSCTL |
if ( intermezzo_table_header ) |
unregister_sysctl_table(intermezzo_table_header); |
intermezzo_table_header = NULL; |
#endif |
for(i = 0; i < total_dev; i++) { |
/* entry for this /proc/sys/intermezzo/intermezzo"i" */ |
ctl_table *psdev = &presto_table[i + PRESTO_PRIMARY_CTLCNT]; |
PRESTO_FREE(psdev->procname, PROCNAME_SIZE); |
} |
/* presto_table[PRESTO_PRIMARY_CTLCNT].child points to the |
* dev_ctl_table previously allocated in init_intermezzo_psdev() |
*/ |
PRESTO_FREE(presto_table[PRESTO_PRIMARY_CTLCNT].child, sizeof(ctl_table) * total_entries); |
|
#if CONFIG_PROC_FS |
remove_proc_entry("mounts", proc_fs_intermezzo); |
remove_proc_entry("intermezzo", proc_root_fs); |
#endif |
} |
|
/psdev.c
0,0 → 1,651
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* An implementation of a loadable kernel mode driver providing |
* multiple kernel/user space bidirectional communications links. |
* |
* Author: Alan Cox <alan@cymru.net> |
* |
* This program is free software; you can redistribute it and/or |
* modify it under the terms of the GNU General Public License |
* version 2 as published by the Free Software Foundation. |
* |
* Adapted to become the Linux 2.0 Coda pseudo device |
* Peter Braam <braam@maths.ox.ac.uk> |
* Michael Callahan <mjc@emmy.smith.edu> |
* |
* Changes for Linux 2.1 |
* Copyright (c) 1997 Carnegie-Mellon University |
* |
* Redone again for InterMezzo |
* Copyright (c) 1998 Peter J. Braam |
* Copyright (c) 2000 Mountain View Data, Inc. |
* Copyright (c) 2000 Tacitus Systems, Inc. |
* Copyright (c) 2001 Cluster File Systems, Inc. |
* |
*/ |
|
#include <linux/module.h> |
#include <linux/errno.h> |
#include <linux/kernel.h> |
#include <linux/major.h> |
#include <linux/sched.h> |
#include <linux/lp.h> |
#include <linux/slab.h> |
#include <linux/ioport.h> |
#include <linux/fcntl.h> |
#include <linux/delay.h> |
#include <linux/skbuff.h> |
#include <linux/proc_fs.h> |
#include <linux/vmalloc.h> |
#include <linux/fs.h> |
#include <linux/file.h> |
#include <linux/poll.h> |
#include <linux/init.h> |
#include <linux/list.h> |
#include <linux/devfs_fs_kernel.h> |
#include <asm/io.h> |
#include <asm/segment.h> |
#include <asm/system.h> |
#include <asm/poll.h> |
#include <asm/uaccess.h> |
#include <linux/miscdevice.h> |
|
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
|
|
#ifdef PRESTO_DEVEL |
int presto_print_entry = 1; |
int presto_debug = 4095; |
#else |
int presto_print_entry = 0; |
int presto_debug = 0; |
#endif |
|
/* Like inode.c (presto_sym_iops), the initializer is just to prevent |
izo_channels from appearing as a COMMON symbol (and therefore |
interfering with other modules that use the same variable name). */ |
struct upc_channel izo_channels[MAX_CHANNEL] = {{0}}; |
|
int izo_psdev_get_free_channel(void) |
{ |
int i, result = -1; |
|
for (i = 0 ; i < MAX_CHANNEL ; i++ ) { |
if (list_empty(&(izo_channels[i].uc_cache_list))) { |
result = i; |
break; |
} |
} |
return result; |
} |
|
|
int izo_psdev_setpid(int minor) |
{ |
struct upc_channel *channel; |
if (minor < 0 || minor >= MAX_CHANNEL) { |
return -EINVAL; |
} |
|
channel = &(izo_channels[minor]); |
/* |
* This ioctl is performed by each Lento that starts up |
* and wants to do further communication with presto. |
*/ |
CDEBUG(D_PSDEV, "Setting current pid to %d channel %d\n", |
current->pid, minor); |
channel->uc_pid = current->pid; |
spin_lock(&channel->uc_lock); |
if ( !list_empty(&channel->uc_processing) ) { |
struct list_head *lh; |
struct upc_req *req; |
CERROR("WARNING: setpid & processing not empty!\n"); |
lh = &channel->uc_processing; |
while ( (lh = lh->next) != &channel->uc_processing) { |
req = list_entry(lh, struct upc_req, rq_chain); |
/* freeing of req and data is done by the sleeper */ |
wake_up(&req->rq_sleep); |
} |
} |
if ( !list_empty(&channel->uc_processing) ) { |
CERROR("BAD: FAILDED TO CLEAN PROCESSING LIST!\n"); |
} |
spin_unlock(&channel->uc_lock); |
EXIT; |
return 0; |
} |
|
int izo_psdev_setchannel(struct file *file, int fd) |
{ |
|
struct file *psdev_file = fget(fd); |
struct presto_cache *cache = presto_get_cache(file->f_dentry->d_inode); |
|
if (!psdev_file) { |
CERROR("%s: no psdev_file!\n", __FUNCTION__); |
return -EINVAL; |
} |
|
if (!cache) { |
CERROR("%s: no cache!\n", __FUNCTION__); |
fput(psdev_file); |
return -EINVAL; |
} |
|
if (psdev_file->private_data) { |
CERROR("%s: channel already set!\n", __FUNCTION__); |
fput(psdev_file); |
return -EINVAL; |
} |
|
psdev_file->private_data = cache->cache_psdev; |
fput(psdev_file); |
EXIT; |
return 0; |
} |
|
inline int presto_lento_up(int minor) |
{ |
return izo_channels[minor].uc_pid; |
} |
|
static unsigned int presto_psdev_poll(struct file *file, poll_table * wait) |
{ |
struct upc_channel *channel = (struct upc_channel *)file->private_data; |
unsigned int mask = POLLOUT | POLLWRNORM; |
|
/* ENTRY; this will flood you */ |
if ( ! channel ) { |
CERROR("%s: bad psdev file\n", __FUNCTION__); |
return -EBADF; |
} |
|
poll_wait(file, &(channel->uc_waitq), wait); |
|
spin_lock(&channel->uc_lock); |
if (!list_empty(&channel->uc_pending)) { |
CDEBUG(D_PSDEV, "Non-empty pending list.\n"); |
mask |= POLLIN | POLLRDNORM; |
} |
spin_unlock(&channel->uc_lock); |
|
/* EXIT; will flood you */ |
return mask; |
} |
|
/* |
* Receive a message written by Lento to the psdev |
*/ |
static ssize_t presto_psdev_write(struct file *file, const char *buf, |
size_t count, loff_t *off) |
{ |
struct upc_channel *channel = (struct upc_channel *)file->private_data; |
struct upc_req *req = NULL; |
struct upc_req *tmp; |
struct list_head *lh; |
struct izo_upcall_resp hdr; |
int error; |
|
if ( ! channel ) { |
CERROR("%s: bad psdev file\n", __FUNCTION__); |
return -EBADF; |
} |
|
/* Peek at the opcode, uniquefier */ |
if ( count < sizeof(hdr) ) { |
CERROR("presto_psdev_write: Lento didn't write full hdr.\n"); |
return -EINVAL; |
} |
|
error = copy_from_user(&hdr, buf, sizeof(hdr)); |
if ( error ) |
return -EFAULT; |
|
CDEBUG(D_PSDEV, "(process,opc,uniq)=(%d,%d,%d)\n", |
current->pid, hdr.opcode, hdr.unique); |
|
spin_lock(&channel->uc_lock); |
/* Look for the message on the processing queue. */ |
lh = &channel->uc_processing; |
while ( (lh = lh->next) != &channel->uc_processing ) { |
tmp = list_entry(lh, struct upc_req , rq_chain); |
if (tmp->rq_unique == hdr.unique) { |
req = tmp; |
/* unlink here: keeps search length minimal */ |
list_del_init(&req->rq_chain); |
CDEBUG(D_PSDEV,"Eureka opc %d uniq %d!\n", |
hdr.opcode, hdr.unique); |
break; |
} |
} |
spin_unlock(&channel->uc_lock); |
if (!req) { |
CERROR("psdev_write: msg (%d, %d) not found\n", |
hdr.opcode, hdr.unique); |
return(-ESRCH); |
} |
|
/* move data into response buffer. */ |
if (req->rq_bufsize < count) { |
CERROR("psdev_write: too much cnt: %d, cnt: %d, " |
"opc: %d, uniq: %d.\n", |
req->rq_bufsize, count, hdr.opcode, hdr.unique); |
count = req->rq_bufsize; /* don't have more space! */ |
} |
error = copy_from_user(req->rq_data, buf, count); |
if ( error ) |
return -EFAULT; |
|
/* adjust outsize: good upcalls can be aware of this */ |
req->rq_rep_size = count; |
req->rq_flags |= REQ_WRITE; |
|
wake_up(&req->rq_sleep); |
return(count); |
} |
|
/* |
* Read a message from the kernel to Lento |
*/ |
static ssize_t presto_psdev_read(struct file * file, char * buf, |
size_t count, loff_t *off) |
{ |
struct upc_channel *channel = (struct upc_channel *)file->private_data; |
struct upc_req *req; |
int result = count; |
|
if ( ! channel ) { |
CERROR("%s: bad psdev file\n", __FUNCTION__); |
return -EBADF; |
} |
|
spin_lock(&channel->uc_lock); |
if (list_empty(&(channel->uc_pending))) { |
CDEBUG(D_UPCALL, "Empty pending list in read, not good\n"); |
spin_unlock(&channel->uc_lock); |
return -EINVAL; |
} |
req = list_entry((channel->uc_pending.next), struct upc_req, rq_chain); |
list_del(&(req->rq_chain)); |
if (! (req->rq_flags & REQ_ASYNC) ) { |
list_add(&(req->rq_chain), channel->uc_processing.prev); |
} |
spin_unlock(&channel->uc_lock); |
|
req->rq_flags |= REQ_READ; |
|
/* Move the input args into userspace */ |
CDEBUG(D_PSDEV, "\n"); |
if (req->rq_bufsize <= count) { |
result = req->rq_bufsize; |
} |
|
if (count < req->rq_bufsize) { |
CERROR ("psdev_read: buffer too small, read %d of %d bytes\n", |
count, req->rq_bufsize); |
} |
|
if ( copy_to_user(buf, req->rq_data, result) ) { |
BUG(); |
return -EFAULT; |
} |
|
/* If request was asynchronous don't enqueue, but free */ |
if (req->rq_flags & REQ_ASYNC) { |
CDEBUG(D_PSDEV, "psdev_read: async msg (%d, %d), result %d\n", |
req->rq_opcode, req->rq_unique, result); |
PRESTO_FREE(req->rq_data, req->rq_bufsize); |
PRESTO_FREE(req, sizeof(*req)); |
return result; |
} |
|
return result; |
} |
|
|
static int presto_psdev_open(struct inode * inode, struct file * file) |
{ |
ENTRY; |
|
file->private_data = NULL; |
|
MOD_INC_USE_COUNT; |
|
CDEBUG(D_PSDEV, "Psdev_open: caller: %d, flags: %d\n", current->pid, file->f_flags); |
|
EXIT; |
return 0; |
} |
|
|
|
static int presto_psdev_release(struct inode * inode, struct file * file) |
{ |
struct upc_channel *channel = (struct upc_channel *)file->private_data; |
struct upc_req *req; |
struct list_head *lh; |
ENTRY; |
|
if ( ! channel ) { |
CERROR("%s: bad psdev file\n", __FUNCTION__); |
return -EBADF; |
} |
|
MOD_DEC_USE_COUNT; |
CDEBUG(D_PSDEV, "Lento: pid %d\n", current->pid); |
channel->uc_pid = 0; |
|
/* Wake up clients so they can return. */ |
CDEBUG(D_PSDEV, "Wake up clients sleeping for pending.\n"); |
spin_lock(&channel->uc_lock); |
lh = &channel->uc_pending; |
while ( (lh = lh->next) != &channel->uc_pending) { |
req = list_entry(lh, struct upc_req, rq_chain); |
|
/* Async requests stay around for a new lento */ |
if (req->rq_flags & REQ_ASYNC) { |
continue; |
} |
/* the sleeper will free the req and data */ |
req->rq_flags |= REQ_DEAD; |
wake_up(&req->rq_sleep); |
} |
|
CDEBUG(D_PSDEV, "Wake up clients sleeping for processing\n"); |
lh = &channel->uc_processing; |
while ( (lh = lh->next) != &channel->uc_processing) { |
req = list_entry(lh, struct upc_req, rq_chain); |
/* freeing of req and data is done by the sleeper */ |
req->rq_flags |= REQ_DEAD; |
wake_up(&req->rq_sleep); |
} |
spin_unlock(&channel->uc_lock); |
CDEBUG(D_PSDEV, "Done.\n"); |
|
EXIT; |
return 0; |
} |
|
static struct file_operations presto_psdev_fops = { |
.read = presto_psdev_read, |
.write = presto_psdev_write, |
.poll = presto_psdev_poll, |
.open = presto_psdev_open, |
.release = presto_psdev_release |
}; |
|
/* modules setup */ |
static struct miscdevice intermezzo_psdev = { |
INTERMEZZO_MINOR, |
"intermezzo", |
&presto_psdev_fops |
}; |
|
int presto_psdev_init(void) |
{ |
int i; |
int err; |
|
if ( (err = misc_register(&intermezzo_psdev)) ) { |
CERROR("%s: cannot register %d err %d\n", |
__FUNCTION__, INTERMEZZO_MINOR, err); |
return -EIO; |
} |
|
memset(&izo_channels, 0, sizeof(izo_channels)); |
for ( i = 0 ; i < MAX_CHANNEL ; i++ ) { |
struct upc_channel *channel = &(izo_channels[i]); |
INIT_LIST_HEAD(&channel->uc_pending); |
INIT_LIST_HEAD(&channel->uc_processing); |
INIT_LIST_HEAD(&channel->uc_cache_list); |
init_waitqueue_head(&channel->uc_waitq); |
channel->uc_lock = SPIN_LOCK_UNLOCKED; |
channel->uc_hard = 0; |
channel->uc_no_filter = 0; |
channel->uc_no_journal = 0; |
channel->uc_no_upcall = 0; |
channel->uc_timeout = 30; |
channel->uc_errorval = 0; |
channel->uc_minor = i; |
} |
return 0; |
} |
|
void presto_psdev_cleanup(void) |
{ |
int i; |
|
misc_deregister(&intermezzo_psdev); |
|
for ( i = 0 ; i < MAX_CHANNEL ; i++ ) { |
struct upc_channel *channel = &(izo_channels[i]); |
struct list_head *lh; |
|
spin_lock(&channel->uc_lock); |
if ( ! list_empty(&channel->uc_pending)) { |
CERROR("Weird, tell Peter: module cleanup and pending list not empty dev %d\n", i); |
} |
if ( ! list_empty(&channel->uc_processing)) { |
CERROR("Weird, tell Peter: module cleanup and processing list not empty dev %d\n", i); |
} |
if ( ! list_empty(&channel->uc_cache_list)) { |
CERROR("Weird, tell Peter: module cleanup and cache listnot empty dev %d\n", i); |
} |
lh = channel->uc_pending.next; |
while ( lh != &channel->uc_pending) { |
struct upc_req *req; |
|
req = list_entry(lh, struct upc_req, rq_chain); |
lh = lh->next; |
if ( req->rq_flags & REQ_ASYNC ) { |
list_del(&(req->rq_chain)); |
CDEBUG(D_UPCALL, "free pending upcall type %d\n", |
req->rq_opcode); |
PRESTO_FREE(req->rq_data, req->rq_bufsize); |
PRESTO_FREE(req, sizeof(struct upc_req)); |
} else { |
req->rq_flags |= REQ_DEAD; |
wake_up(&req->rq_sleep); |
} |
} |
lh = &channel->uc_processing; |
while ( (lh = lh->next) != &channel->uc_processing ) { |
struct upc_req *req; |
req = list_entry(lh, struct upc_req, rq_chain); |
list_del(&(req->rq_chain)); |
req->rq_flags |= REQ_DEAD; |
wake_up(&req->rq_sleep); |
} |
spin_unlock(&channel->uc_lock); |
} |
} |
|
/* |
* lento_upcall and lento_downcall routines |
*/ |
static inline unsigned long lento_waitfor_upcall |
(struct upc_channel *channel, struct upc_req *req, int minor) |
{ |
DECLARE_WAITQUEUE(wait, current); |
unsigned long posttime; |
|
req->rq_posttime = posttime = jiffies; |
|
add_wait_queue(&req->rq_sleep, &wait); |
for (;;) { |
if ( izo_channels[minor].uc_hard == 0 ) |
set_current_state(TASK_INTERRUPTIBLE); |
else |
set_current_state(TASK_UNINTERRUPTIBLE); |
|
/* got a reply */ |
if ( req->rq_flags & (REQ_WRITE | REQ_DEAD) ) |
break; |
|
/* these cases only apply when TASK_INTERRUPTIBLE */ |
if ( !izo_channels[minor].uc_hard && signal_pending(current) ) { |
/* if this process really wants to die, let it go */ |
if (sigismember(&(current->pending.signal), SIGKILL)|| |
sigismember(&(current->pending.signal), SIGINT) ) |
break; |
/* signal is present: after timeout always return |
really smart idea, probably useless ... */ |
if ( time_after(jiffies, req->rq_posttime + |
izo_channels[minor].uc_timeout * HZ) ) |
break; |
} |
schedule(); |
} |
|
spin_lock(&channel->uc_lock); |
list_del_init(&req->rq_chain); |
spin_unlock(&channel->uc_lock); |
remove_wait_queue(&req->rq_sleep, &wait); |
set_current_state(TASK_RUNNING); |
|
CDEBUG(D_SPECIAL, "posttime: %ld, returned: %ld\n", |
posttime, jiffies-posttime); |
return (jiffies - posttime); |
} |
|
/* |
* lento_upcall will return an error in the case of |
* failed communication with Lento _or_ will peek at Lento |
* reply and return Lento's error. |
* |
* As lento has 2 types of errors, normal errors (positive) and internal |
* errors (negative), normal errors are negated, while internal errors |
* are all mapped to -EINTR, while showing a nice warning message. (jh) |
* |
* lento_upcall will always free buffer, either directly, when an upcall |
* is read (in presto_psdev_read), when the filesystem is unmounted, or |
* when the module is unloaded. |
*/ |
int izo_upc_upcall(int minor, int *size, struct izo_upcall_hdr *buffer, |
int async) |
{ |
unsigned long runtime; |
struct upc_channel *channel; |
struct izo_upcall_resp *out; |
struct upc_req *req; |
int error = 0; |
|
ENTRY; |
channel = &(izo_channels[minor]); |
|
if (channel->uc_no_upcall) { |
EXIT; |
goto exit_buf; |
} |
if (!channel->uc_pid && !async) { |
EXIT; |
error = -ENXIO; |
goto exit_buf; |
} |
|
/* Format the request message. */ |
PRESTO_ALLOC(req, sizeof(struct upc_req)); |
if ( !req ) { |
EXIT; |
error = -ENOMEM; |
goto exit_buf; |
} |
req->rq_data = (void *)buffer; |
req->rq_flags = 0; |
req->rq_bufsize = *size; |
req->rq_rep_size = 0; |
req->rq_opcode = buffer->u_opc; |
req->rq_unique = ++channel->uc_seq; |
init_waitqueue_head(&req->rq_sleep); |
|
/* Fill in the common input args. */ |
buffer->u_uniq = req->rq_unique; |
buffer->u_async = async; |
|
spin_lock(&channel->uc_lock); |
/* Append msg to pending queue and poke Lento. */ |
list_add(&req->rq_chain, channel->uc_pending.prev); |
spin_unlock(&channel->uc_lock); |
CDEBUG(D_UPCALL, |
"Proc %d waking Lento %d for(opc,uniq) =(%d,%d) msg at %p.\n", |
current->pid, channel->uc_pid, req->rq_opcode, |
req->rq_unique, req); |
wake_up_interruptible(&channel->uc_waitq); |
|
if ( async ) { |
/* req, rq_data are freed in presto_psdev_read for async */ |
req->rq_flags = REQ_ASYNC; |
EXIT; |
return 0; |
} |
|
/* We can be interrupted while we wait for Lento to process |
* our request. If the interrupt occurs before Lento has read |
* the request, we dequeue and return. If it occurs after the |
* read but before the reply, we dequeue, send a signal |
* message, and return. If it occurs after the reply we ignore |
* it. In no case do we want to restart the syscall. If it |
* was interrupted by a lento shutdown (psdev_close), return |
* ENODEV. */ |
|
/* Go to sleep. Wake up on signals only after the timeout. */ |
runtime = lento_waitfor_upcall(channel, req, minor); |
|
CDEBUG(D_TIMING, "opc: %d time: %ld uniq: %d size: %d\n", |
req->rq_opcode, jiffies - req->rq_posttime, |
req->rq_unique, req->rq_rep_size); |
CDEBUG(D_UPCALL, |
"..process %d woken up by Lento for req at 0x%x, data at %x\n", |
current->pid, (int)req, (int)req->rq_data); |
|
if (channel->uc_pid) { /* i.e. Lento is still alive */ |
/* Op went through, interrupt or not we go on */ |
if (req->rq_flags & REQ_WRITE) { |
out = (struct izo_upcall_resp *)req->rq_data; |
/* here we map positive Lento errors to kernel errors */ |
if ( out->result < 0 ) { |
CERROR("Tell Peter: Lento returns negative error %d, for oc %d!\n", |
out->result, out->opcode); |
out->result = EINVAL; |
} |
error = -out->result; |
CDEBUG(D_UPCALL, "upcall: (u,o,r) (%d, %d, %d) out at %p\n", |
out->unique, out->opcode, out->result, out); |
*size = req->rq_rep_size; |
EXIT; |
goto exit_req; |
} |
/* Interrupted before lento read it. */ |
if ( !(req->rq_flags & REQ_READ) && signal_pending(current)) { |
CDEBUG(D_UPCALL, |
"Interrupt before read: (op,un)=(%d,%d), flags %x\n", |
req->rq_opcode, req->rq_unique, req->rq_flags); |
/* perhaps the best way to convince the app to give up? */ |
error = -EINTR; |
EXIT; |
goto exit_req; |
} |
|
/* interrupted after Lento did its read, send signal */ |
if ( (req->rq_flags & REQ_READ) && signal_pending(current) ) { |
CDEBUG(D_UPCALL,"Interrupt after read: op = %d.%d, flags = %x\n", |
req->rq_opcode, req->rq_unique, req->rq_flags); |
|
error = -EINTR; |
} else { |
CERROR("Lento: Strange interruption - tell Peter.\n"); |
error = -EINTR; |
} |
} else { /* If lento died i.e. !UC_OPEN(channel) */ |
CERROR("lento_upcall: Lento dead on (op,un) (%d.%d) flags %d\n", |
req->rq_opcode, req->rq_unique, req->rq_flags); |
error = -ENODEV; |
} |
|
exit_req: |
PRESTO_FREE(req, sizeof(struct upc_req)); |
exit_buf: |
return error; |
} |
/journal_tmpfs.c
0,0 → 1,109
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com> |
* Copyright (C) 2000 Red Hat, Inc. |
* Copyright (C) 2000 Los Alamos National Laboratory |
* Copyright (C) 2000 TurboLinux, Inc. |
* Copyright (C) 2001 Mountain View Data, Inc. |
* Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net> |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
*/ |
|
#include <linux/types.h> |
#include <linux/param.h> |
#include <linux/kernel.h> |
#include <linux/sched.h> |
#include <linux/fs.h> |
#include <linux/slab.h> |
#include <linux/vmalloc.h> |
#include <linux/stat.h> |
#include <linux/errno.h> |
#include <linux/locks.h> |
#include <asm/segment.h> |
#include <asm/uaccess.h> |
#include <linux/string.h> |
#include <linux/smp_lock.h> |
#if defined(CONFIG_TMPFS) |
#include <linux/jbd.h> |
#if defined(CONFIG_EXT3) |
#include <linux/ext3_fs.h> |
#include <linux/ext3_jbd.h> |
#endif |
#endif |
|
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
|
#if defined(CONFIG_TMPFS) |
|
/* space requirements: |
presto_do_truncate: |
used to truncate the KML forward to next fset->chunksize boundary |
- zero partial block |
- update inode |
presto_write_record: |
write header (< one block) |
write one path (< MAX_PATHLEN) |
possibly write another path (< MAX_PATHLEN) |
write suffix (< one block) |
presto_update_last_rcvd |
write one block |
*/ |
|
static loff_t presto_tmpfs_freespace(struct presto_cache *cache, |
struct super_block *sb) |
{ |
return (1<<30); |
} |
|
/* start the filesystem journal operations */ |
static void *presto_tmpfs_trans_start(struct presto_file_set *fset, |
struct inode *inode, |
int op) |
{ |
return (void *)1; |
} |
|
static void presto_tmpfs_trans_commit(struct presto_file_set *fset, void *handle) |
{ |
return; |
} |
|
static void presto_tmpfs_journal_file_data(struct inode *inode) |
{ |
return; |
} |
|
/* The logic here is a slightly modified version of ext3/inode.c:block_to_path |
*/ |
static int presto_tmpfs_has_all_data(struct inode *inode) |
{ |
return 0; |
} |
|
struct journal_ops presto_tmpfs_journal_ops = { |
tr_all_data: presto_tmpfs_has_all_data, |
tr_avail: presto_tmpfs_freespace, |
tr_start: presto_tmpfs_trans_start, |
tr_commit: presto_tmpfs_trans_commit, |
tr_journal_data: presto_tmpfs_journal_file_data, |
tr_ilookup: presto_tmpfs_ilookup, |
tr_add_ilookup: presto_add_ilookup_dentry |
}; |
|
#endif /* CONFIG_EXT3_FS */ |
/kml_unpack.c
0,0 → 1,708
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* |
* Unpacking of KML records |
* |
*/ |
|
#ifdef __KERNEL__ |
# include <linux/module.h> |
# include <linux/errno.h> |
# include <linux/kernel.h> |
# include <linux/major.h> |
# include <linux/sched.h> |
# include <linux/lp.h> |
# include <linux/slab.h> |
# include <linux/ioport.h> |
# include <linux/fcntl.h> |
# include <linux/delay.h> |
# include <linux/skbuff.h> |
# include <linux/proc_fs.h> |
# include <linux/vmalloc.h> |
# include <linux/fs.h> |
# include <linux/poll.h> |
# include <linux/init.h> |
# include <linux/list.h> |
# include <linux/stat.h> |
# include <asm/io.h> |
# include <asm/segment.h> |
# include <asm/system.h> |
# include <asm/poll.h> |
# include <asm/uaccess.h> |
#else |
# include <time.h> |
# include <stdio.h> |
# include <string.h> |
# include <stdlib.h> |
# include <errno.h> |
# include <sys/stat.h> |
# include <glib.h> |
#endif |
|
#include <linux/intermezzo_lib.h> |
#include <linux/intermezzo_idl.h> |
#include <linux/intermezzo_fs.h> |
|
int kml_unpack_version(struct presto_version **ver, char **buf, char *end) |
{ |
char *ptr = *buf; |
struct presto_version *pv; |
|
UNLOGP(*ver, struct presto_version, ptr, end); |
pv = *ver; |
pv->pv_mtime = NTOH__u64(pv->pv_mtime); |
pv->pv_ctime = NTOH__u64(pv->pv_ctime); |
pv->pv_size = NTOH__u64(pv->pv_size); |
|
*buf = ptr; |
|
return 0; |
} |
|
|
static int kml_unpack_noop(struct kml_rec *rec, char **buf, char *end) |
{ |
return 0; |
} |
|
|
static int kml_unpack_get_fileid(struct kml_rec *rec, char **buf, char *end) |
{ |
char *ptr = *buf; |
|
LUNLOGV(rec->pathlen, __u32, ptr, end); |
UNLOGL(rec->path, char, rec->pathlen, ptr, end); |
|
*buf = ptr; |
return 0; |
} |
|
static int kml_unpack_create(struct kml_rec *rec, char **buf, char *end) |
{ |
char *ptr = *buf; |
|
kml_unpack_version(&rec->old_parentv, &ptr, end); |
kml_unpack_version(&rec->new_parentv, &ptr, end); |
kml_unpack_version(&rec->new_objectv, &ptr, end); |
LUNLOGV(rec->mode, __u32, ptr, end); |
LUNLOGV(rec->uid, __u32, ptr, end); |
LUNLOGV(rec->gid, __u32, ptr, end); |
LUNLOGV(rec->pathlen, __u32, ptr, end); |
UNLOGL(rec->path, char, rec->pathlen, ptr, end); |
|
*buf = ptr; |
|
return 0; |
} |
|
|
static int kml_unpack_mkdir(struct kml_rec *rec, char **buf, char *end) |
{ |
char *ptr = *buf; |
|
kml_unpack_version(&rec->old_parentv, &ptr, end); |
kml_unpack_version(&rec->new_parentv, &ptr, end); |
kml_unpack_version(&rec->new_objectv, &ptr, end); |
LUNLOGV(rec->mode, __u32, ptr, end); |
LUNLOGV(rec->uid, __u32, ptr, end); |
LUNLOGV(rec->gid, __u32, ptr, end); |
LUNLOGV(rec->pathlen, __u32, ptr, end); |
UNLOGL(rec->path, char, rec->pathlen, ptr, end); |
|
*buf = ptr; |
|
return 0; |
} |
|
|
static int kml_unpack_unlink(struct kml_rec *rec, char **buf, char *end) |
{ |
char *ptr = *buf; |
|
kml_unpack_version(&rec->old_parentv, &ptr, end); |
kml_unpack_version(&rec->new_parentv, &ptr, end); |
kml_unpack_version(&rec->old_objectv, &ptr, end); |
LUNLOGV(rec->old_mode, __u32, ptr, end); |
LUNLOGV(rec->old_rdev, __u32, ptr, end); |
LUNLOGV(rec->old_uid, __u64, ptr, end); |
LUNLOGV(rec->old_gid, __u64, ptr, end); |
LUNLOGV(rec->pathlen, __u32, ptr, end); |
LUNLOGV(rec->targetlen, __u32, ptr, end); |
LUNLOGV(rec->old_targetlen, __u32, ptr, end); |
UNLOGL(rec->path, char, rec->pathlen, ptr, end); |
UNLOGL(rec->target, char, rec->targetlen, ptr, end); |
UNLOGL(rec->old_target, char, rec->old_targetlen, ptr, end); |
|
*buf = ptr; |
|
return 0; |
} |
|
|
static int kml_unpack_rmdir(struct kml_rec *rec, char **buf, char *end) |
{ |
char *ptr = *buf; |
|
kml_unpack_version(&rec->old_parentv, &ptr, end); |
kml_unpack_version(&rec->new_parentv, &ptr, end); |
kml_unpack_version(&rec->old_objectv, &ptr, end); |
LUNLOGV(rec->old_mode, __u32, ptr, end); |
LUNLOGV(rec->old_rdev, __u32, ptr, end); |
LUNLOGV(rec->old_uid, __u64, ptr, end); |
LUNLOGV(rec->old_gid, __u64, ptr, end); |
LUNLOGV(rec->pathlen, __u32, ptr, end); |
LUNLOGV(rec->targetlen, __u32, ptr, end); |
UNLOGL(rec->path, char, rec->pathlen, ptr, end); |
UNLOGL(rec->target, char, rec->targetlen, ptr, end); |
|
*buf = ptr; |
|
return 0; |
} |
|
|
static int kml_unpack_close(struct kml_rec *rec, char **buf, char *end) |
{ |
char *ptr = *buf; |
|
LUNLOGV(rec->mode, __u32, ptr, end); // used for open_mode |
LUNLOGV(rec->uid, __u32, ptr, end); // used for open_uid |
LUNLOGV(rec->gid, __u32, ptr, end); // used for open_gid |
kml_unpack_version(&rec->old_objectv, &ptr, end); |
kml_unpack_version(&rec->new_objectv, &ptr, end); |
LUNLOGV(rec->ino, __u64, ptr, end); |
LUNLOGV(rec->generation, __u32, ptr, end); |
LUNLOGV(rec->pathlen, __u32, ptr, end); |
UNLOGL(rec->path, char, rec->pathlen, ptr, end); |
|
*buf = ptr; |
|
return 0; |
} |
|
|
static int kml_unpack_symlink(struct kml_rec *rec, char **buf, char *end) |
{ |
char *ptr = *buf; |
|
kml_unpack_version(&rec->old_parentv, &ptr, end); |
kml_unpack_version(&rec->new_parentv, &ptr, end); |
kml_unpack_version(&rec->new_objectv, &ptr, end); |
LUNLOGV(rec->uid, __u32, ptr, end); |
LUNLOGV(rec->gid, __u32, ptr, end); |
LUNLOGV(rec->pathlen, __u32, ptr, end); |
LUNLOGV(rec->targetlen, __u32, ptr, end); |
UNLOGL(rec->path, char, rec->pathlen, ptr, end); |
UNLOGL(rec->target, char, rec->targetlen, ptr, end); |
|
*buf = ptr; |
|
return 0; |
} |
|
|
static int kml_unpack_rename(struct kml_rec *rec, char **buf, char *end) |
{ |
char *ptr = *buf; |
|
kml_unpack_version(&rec->old_objectv, &ptr, end); |
kml_unpack_version(&rec->new_objectv, &ptr, end); |
kml_unpack_version(&rec->old_parentv, &ptr, end); |
kml_unpack_version(&rec->new_parentv, &ptr, end); |
LUNLOGV(rec->pathlen, __u32, ptr, end); |
LUNLOGV(rec->targetlen, __u32, ptr, end); |
UNLOGL(rec->path, char, rec->pathlen, ptr, end); |
UNLOGL(rec->target, char, rec->targetlen, ptr, end); |
|
*buf = ptr; |
|
return 0; |
} |
|
|
static int kml_unpack_setattr(struct kml_rec *rec, char **buf, char *end) |
{ |
char *ptr = *buf; |
|
kml_unpack_version(&rec->old_objectv, &ptr, end); |
LUNLOGV(rec->valid, __u32, ptr, end); |
LUNLOGV(rec->mode, __u32, ptr, end); |
LUNLOGV(rec->uid, __u32, ptr, end); |
LUNLOGV(rec->gid, __u32, ptr, end); |
LUNLOGV(rec->size, __u64, ptr, end); |
LUNLOGV(rec->mtime, __u64, ptr, end); |
LUNLOGV(rec->ctime, __u64, ptr, end); |
LUNLOGV(rec->flags, __u32, ptr, end); |
LUNLOGV(rec->old_mode, __u32, ptr, end); |
LUNLOGV(rec->old_rdev, __u32, ptr, end); |
LUNLOGV(rec->old_uid, __u64, ptr, end); |
LUNLOGV(rec->old_gid, __u64, ptr, end); |
LUNLOGV(rec->pathlen, __u32, ptr, end); |
UNLOGL(rec->path, char, rec->pathlen, ptr, end); |
|
*buf = ptr; |
|
return 0; |
} |
|
|
static int kml_unpack_link(struct kml_rec *rec, char **buf, char *end) |
{ |
char *ptr = *buf; |
|
kml_unpack_version(&rec->old_parentv, &ptr, end); |
kml_unpack_version(&rec->new_parentv, &ptr, end); |
kml_unpack_version(&rec->new_objectv, &ptr, end); |
LUNLOGV(rec->pathlen, __u32, ptr, end); |
LUNLOGV(rec->targetlen, __u32, ptr, end); |
UNLOGL(rec->path, char, rec->pathlen, ptr, end); |
UNLOGL(rec->target, char, rec->targetlen, ptr, end); |
|
*buf = ptr; |
|
return 0; |
} |
|
static int kml_unpack_mknod(struct kml_rec *rec, char **buf, char *end) |
{ |
char *ptr = *buf; |
|
kml_unpack_version(&rec->old_parentv, &ptr, end); |
kml_unpack_version(&rec->new_parentv, &ptr, end); |
kml_unpack_version(&rec->new_objectv, &ptr, end); |
LUNLOGV(rec->mode, __u32, ptr, end); |
LUNLOGV(rec->uid, __u32, ptr, end); |
LUNLOGV(rec->gid, __u32, ptr, end); |
LUNLOGV(rec->major, __u32, ptr, end); |
LUNLOGV(rec->minor, __u32, ptr, end); |
LUNLOGV(rec->pathlen, __u32, ptr, end); |
UNLOGL(rec->path, char, rec->pathlen, ptr, end); |
|
*buf = ptr; |
|
return 0; |
} |
|
|
static int kml_unpack_write(struct kml_rec *rec, char **buf, char *end) |
{ |
printf("NOT IMPLEMENTED"); |
return 0; |
} |
|
|
static int kml_unpack_release(struct kml_rec *rec, char **buf, char *end) |
{ |
printf("NOT IMPLEMENTED"); |
return 0; |
} |
|
|
static int kml_unpack_trunc(struct kml_rec *rec, char **buf, char *end) |
{ |
printf("NOT IMPLEMENTED"); |
return 0; |
} |
|
|
static int kml_unpack_setextattr(struct kml_rec *rec, char **buf, char *end) |
{ |
char *ptr = *buf; |
|
kml_unpack_version(&rec->old_objectv, &ptr, end); |
kml_unpack_version(&rec->new_objectv, &ptr, end); |
LUNLOGV(rec->flags, __u32, ptr, end); |
LUNLOGV(rec->mode, __u32, ptr, end); |
LUNLOGV(rec->pathlen, __u32, ptr, end); |
LUNLOGV(rec->namelen, __u32, ptr, end); |
LUNLOGV(rec->targetlen, __u32, ptr, end); |
UNLOGL(rec->path, char, rec->pathlen, ptr, end); |
UNLOGL(rec->name, char, rec->namelen, ptr, end); |
UNLOGL(rec->target, char, rec->targetlen, ptr, end); |
|
*buf = ptr; |
|
return 0; |
} |
|
|
static int kml_unpack_delextattr(struct kml_rec *rec, char **buf, char *end) |
{ |
char *ptr = *buf; |
|
kml_unpack_version(&rec->old_objectv, &ptr, end); |
kml_unpack_version(&rec->new_objectv, &ptr, end); |
LUNLOGV(rec->flags, __u32, ptr, end); |
LUNLOGV(rec->mode, __u32, ptr, end); |
LUNLOGV(rec->pathlen, __u32, ptr, end); |
LUNLOGV(rec->namelen, __u32, ptr, end); |
LUNLOGV(rec->targetlen, __u32, ptr, end); |
UNLOGL(rec->path, char, rec->pathlen, ptr, end); |
UNLOGL(rec->name, char, rec->namelen, ptr, end); |
|
*buf = ptr; |
|
return 0; |
} |
|
static int kml_unpack_open(struct kml_rec *rec, char **buf, char *end) |
{ |
printf("NOT IMPLEMENTED"); |
return 0; |
} |
|
static int kml_unpack_kml_trunc(struct kml_rec *rec, char **buf, char *end) |
{ |
|
printf("NOT IMPLEMENTED"); |
return 0; |
} |
|
|
typedef int (*unpacker)(struct kml_rec *rec, char **buf, char *end); |
|
static unpacker unpackers[KML_OPCODE_NUM] = |
{ |
[KML_OPCODE_NOOP] = kml_unpack_noop, |
[KML_OPCODE_CREATE] = kml_unpack_create, |
[KML_OPCODE_MKDIR] = kml_unpack_mkdir, |
[KML_OPCODE_UNLINK] = kml_unpack_unlink, |
[KML_OPCODE_RMDIR] = kml_unpack_rmdir, |
[KML_OPCODE_CLOSE] = kml_unpack_close, |
[KML_OPCODE_SYMLINK] = kml_unpack_symlink, |
[KML_OPCODE_RENAME] = kml_unpack_rename, |
[KML_OPCODE_SETATTR] = kml_unpack_setattr, |
[KML_OPCODE_LINK] = kml_unpack_link, |
[KML_OPCODE_OPEN] = kml_unpack_open, |
[KML_OPCODE_MKNOD] = kml_unpack_mknod, |
[KML_OPCODE_WRITE] = kml_unpack_write, |
[KML_OPCODE_RELEASE] = kml_unpack_release, |
[KML_OPCODE_TRUNC] = kml_unpack_trunc, |
[KML_OPCODE_SETEXTATTR] = kml_unpack_setextattr, |
[KML_OPCODE_DELEXTATTR] = kml_unpack_delextattr, |
[KML_OPCODE_KML_TRUNC] = kml_unpack_kml_trunc, |
[KML_OPCODE_GET_FILEID] = kml_unpack_get_fileid |
}; |
|
int kml_unpack_prefix(struct kml_rec *rec, char **buf, char *end) |
{ |
char *ptr = *buf; |
int n; |
|
UNLOGP(rec->prefix.hdr, struct kml_prefix_hdr, ptr, end); |
rec->prefix.hdr->len = NTOH__u32(rec->prefix.hdr->len); |
rec->prefix.hdr->version = NTOH__u32(rec->prefix.hdr->version); |
rec->prefix.hdr->pid = NTOH__u32(rec->prefix.hdr->pid); |
rec->prefix.hdr->auid = NTOH__u32(rec->prefix.hdr->auid); |
rec->prefix.hdr->fsuid = NTOH__u32(rec->prefix.hdr->fsuid); |
rec->prefix.hdr->fsgid = NTOH__u32(rec->prefix.hdr->fsgid); |
rec->prefix.hdr->opcode = NTOH__u32(rec->prefix.hdr->opcode); |
rec->prefix.hdr->ngroups = NTOH__u32(rec->prefix.hdr->ngroups); |
|
UNLOGL(rec->prefix.groups, __u32, rec->prefix.hdr->ngroups, ptr, end); |
for (n = 0; n < rec->prefix.hdr->ngroups; n++) { |
rec->prefix.groups[n] = NTOH__u32(rec->prefix.groups[n]); |
} |
|
*buf = ptr; |
|
return 0; |
} |
|
int kml_unpack_suffix(struct kml_rec *rec, char **buf, char *end) |
{ |
char *ptr = *buf; |
|
UNLOGP(rec->suffix, struct kml_suffix, ptr, end); |
rec->suffix->prevrec = NTOH__u32(rec->suffix->prevrec); |
rec->suffix->recno = NTOH__u32(rec->suffix->recno); |
rec->suffix->time = NTOH__u32(rec->suffix->time); |
rec->suffix->len = NTOH__u32(rec->suffix->len); |
|
*buf = ptr; |
|
return 0; |
} |
|
int kml_unpack(struct kml_rec *rec, char **buf, char *end) |
{ |
char *ptr = *buf; |
int err; |
|
if (((unsigned long)ptr % 4) != 0) { |
printf("InterMezzo: %s: record misaligned.\n", __FUNCTION__); |
return -EINVAL; |
} |
|
while (ptr < end) { |
__u32 *i = (__u32 *)ptr; |
if (*i) |
break; |
ptr += sizeof(*i); |
} |
*buf = ptr; |
|
memset(rec, 0, sizeof(*rec)); |
|
err = kml_unpack_prefix(rec, &ptr, end); |
if (err) { |
printf("InterMezzo: %s: unpack_prefix failed: %d\n", |
__FUNCTION__, err); |
return err; |
} |
|
if (rec->prefix.hdr->opcode < 0 || |
rec->prefix.hdr->opcode >= KML_OPCODE_NUM) { |
printf("InterMezzo: %s: invalid opcode (%d)\n", |
__FUNCTION__, rec->prefix.hdr->opcode); |
return -EINVAL; |
} |
err = unpackers[rec->prefix.hdr->opcode](rec, &ptr, end); |
if (err) { |
printf("InterMezzo: %s: unpacker failed: %d\n", |
__FUNCTION__, err); |
return err; |
} |
|
err = kml_unpack_suffix(rec, &ptr, end); |
if (err) { |
printf("InterMezzo: %s: unpack_suffix failed: %d\n", |
__FUNCTION__, err); |
return err; |
} |
|
|
if (rec->prefix.hdr->len != rec->suffix->len) { |
printf("InterMezzo: %s: lengths don't match\n", |
__FUNCTION__); |
return -EINVAL; |
} |
if ((rec->prefix.hdr->len % 4) != 0) { |
printf("InterMezzo: %s: record length not a " |
"multiple of 4.\n", __FUNCTION__); |
return -EINVAL; |
} |
if (ptr - *buf != rec->prefix.hdr->len) { |
printf("InterMezzo: %s: unpacking error\n", |
__FUNCTION__); |
return -EINVAL; |
} |
while (ptr < end) { |
__u32 *i = (__u32 *)ptr; |
if (*i) |
break; |
ptr += sizeof(*i); |
} |
*buf = ptr; |
return 0; |
} |
|
|
#ifndef __KERNEL__ |
#define STR(ptr) ((ptr))? (ptr) : "" |
|
#define OPNAME(n) [KML_OPCODE_##n] = #n |
static char *opnames[KML_OPCODE_NUM] = { |
OPNAME(NOOP), |
OPNAME(CREATE), |
OPNAME(MKDIR), |
OPNAME(UNLINK), |
OPNAME(RMDIR), |
OPNAME(CLOSE), |
OPNAME(SYMLINK), |
OPNAME(RENAME), |
OPNAME(SETATTR), |
OPNAME(LINK), |
OPNAME(OPEN), |
OPNAME(MKNOD), |
OPNAME(WRITE), |
OPNAME(RELEASE), |
OPNAME(TRUNC), |
OPNAME(SETEXTATTR), |
OPNAME(DELEXTATTR), |
OPNAME(KML_TRUNC), |
OPNAME(GET_FILEID) |
}; |
#undef OPNAME |
|
static char *print_opname(int op) |
{ |
if (op < 0 || op >= sizeof (opnames) / sizeof (*opnames)) |
return NULL; |
return opnames[op]; |
} |
|
|
static char *print_time(__u64 i) |
{ |
char buf[128]; |
|
memset(buf, 0, 128); |
|
#ifndef __KERNEL__ |
strftime(buf, 128, "%Y/%m/%d %H:%M:%S", gmtime((time_t *)&i)); |
#else |
sprintf(buf, "%Ld\n", i); |
#endif |
|
return strdup(buf); |
} |
|
static char *print_version(struct presto_version *ver) |
{ |
char ver_buf[128]; |
char *mtime; |
char *ctime; |
|
if (!ver || ver->pv_ctime == 0) { |
return strdup(""); |
} |
mtime = print_time(ver->pv_mtime); |
ctime = print_time(ver->pv_ctime); |
sprintf(ver_buf, "mtime %s, ctime %s, len %lld", |
mtime, ctime, ver->pv_size); |
free(mtime); |
free(ctime); |
return strdup(ver_buf); |
} |
|
|
char *kml_print_rec(struct kml_rec *rec, int brief) |
{ |
char *str; |
char *nov, *oov, *ntv, *otv, *npv, *opv; |
char *rectime, *mtime, *ctime; |
|
if (brief) { |
str = g_strdup_printf(" %08d %7s %*s %*s", |
rec->suffix->recno, |
print_opname (rec->prefix.hdr->opcode), |
rec->pathlen, STR(rec->path), |
rec->targetlen, STR(rec->target)); |
|
return str; |
} |
|
rectime = print_time(rec->suffix->time); |
mtime = print_time(rec->mtime); |
ctime = print_time(rec->ctime); |
|
nov = print_version(rec->new_objectv); |
oov = print_version(rec->old_objectv); |
ntv = print_version(rec->new_targetv); |
otv = print_version(rec->old_targetv); |
npv = print_version(rec->new_parentv); |
opv = print_version(rec->old_parentv); |
|
str = g_strdup_printf("\n -- Record:\n" |
" Recno %d\n" |
" KML off %lld\n" |
" Version %d\n" |
" Len %d\n" |
" Suf len %d\n" |
" Time %s\n" |
" Opcode %d\n" |
" Op %s\n" |
" Pid %d\n" |
" AUid %d\n" |
" Fsuid %d\n" |
" Fsgid %d\n" |
" Prevrec %d\n" |
" Ngroups %d\n" |
//" Groups @{$self->{groups}}\n" |
" -- Path:\n" |
" Inode %d\n" |
" Gen num %u\n" |
" Old mode %o\n" |
" Old rdev %x\n" |
" Old uid %llu\n" |
" Old gid %llu\n" |
" Path %*s\n" |
//" Open_mode %o\n", |
" Pathlen %d\n" |
" Tgt %*s\n" |
" Tgtlen %d\n" |
" Old Tgt %*s\n" |
" Old Tgtln %d\n" |
" -- Attr:\n" |
" Valid %x\n" |
" mode %o, uid %d, gid %d, size %lld, mtime %s, ctime %s rdev %x (%d:%d)\n" |
" -- Versions:\n" |
" New object %s\n" |
" Old object %s\n" |
" New target %s\n" |
" Old target %s\n" |
" New parent %s\n" |
" Old parent %s\n", |
|
rec->suffix->recno, |
rec->offset, |
rec->prefix.hdr->version, |
rec->prefix.hdr->len, |
rec->suffix->len, |
rectime, |
rec->prefix.hdr->opcode, |
print_opname (rec->prefix.hdr->opcode), |
rec->prefix.hdr->pid, |
rec->prefix.hdr->auid, |
rec->prefix.hdr->fsuid, |
rec->prefix.hdr->fsgid, |
rec->suffix->prevrec, |
rec->prefix.hdr->ngroups, |
rec->ino, |
rec->generation, |
rec->old_mode, |
rec->old_rdev, |
rec->old_uid, |
rec->old_gid, |
rec->pathlen, |
STR(rec->path), |
rec->pathlen, |
rec->targetlen, |
STR(rec->target), |
rec->targetlen, |
rec->old_targetlen, |
STR(rec->old_target), |
rec->old_targetlen, |
|
rec->valid, |
rec->mode, |
rec->uid, |
rec->gid, |
rec->size, |
mtime, |
ctime, |
rec->rdev, rec->major, rec->minor, |
nov, oov, ntv, otv, npv, opv); |
|
free(nov); |
free(oov); |
free(ntv); |
free(otv); |
free(npv); |
free(opv); |
|
free(rectime); |
free(ctime); |
free(mtime); |
|
return str; |
} |
#endif |
/kml_reint.c
0,0 → 1,630
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* |
* Reintegration of KML records |
* |
*/ |
|
#define __NO_VERSION__ |
#include <linux/module.h> |
#include <linux/errno.h> |
#include <linux/fs.h> |
#include <linux/kernel.h> |
#include <linux/major.h> |
#include <linux/slab.h> |
#include <linux/vmalloc.h> |
#include <linux/mm.h> |
#include <asm/uaccess.h> |
#include <asm/pgtable.h> |
#include <asm/mmu_context.h> |
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
|
static void kmlreint_pre_secure(struct kml_rec *rec, struct file *dir, |
struct run_ctxt *saved) |
{ |
struct run_ctxt ctxt; |
struct presto_dentry_data *dd = presto_d2d(dir->f_dentry); |
int i; |
|
ctxt.fsuid = rec->prefix.hdr->fsuid; |
ctxt.fsgid = rec->prefix.hdr->fsgid; |
ctxt.fs = KERNEL_DS; |
ctxt.pwd = dd->dd_fset->fset_dentry; |
ctxt.pwdmnt = dd->dd_fset->fset_mnt; |
|
ctxt.root = ctxt.pwd; |
ctxt.rootmnt = ctxt.pwdmnt; |
if (rec->prefix.hdr->ngroups > 0) { |
ctxt.ngroups = rec->prefix.hdr->ngroups; |
for (i = 0; i< ctxt.ngroups; i++) |
ctxt.groups[i] = rec->prefix.groups[i]; |
} else |
ctxt.ngroups = 0; |
|
push_ctxt(saved, &ctxt); |
} |
|
|
/* Append two strings in a less-retarded fashion. */ |
static char * path_join(char *p1, int p1len, char *p2, int p2len) |
{ |
int size = p1len + p2len + 2; /* possibly one extra /, one NULL */ |
char *path; |
|
path = kmalloc(size, GFP_KERNEL); |
if (path == NULL) |
return NULL; |
|
memcpy(path, p1, p1len); |
if (path[p1len - 1] != '/') { |
path[p1len] = '/'; |
p1len++; |
} |
memcpy(path + p1len, p2, p2len); |
path[p1len + p2len] = '\0'; |
|
return path; |
} |
|
static inline int kml_recno_equal(struct kml_rec *rec, |
struct presto_file_set *fset) |
{ |
return (rec->suffix->recno == fset->fset_lento_recno + 1); |
} |
|
static inline int version_equal(struct presto_version *a, struct inode *inode) |
{ |
if (a == NULL) |
return 1; |
|
if (inode == NULL) { |
CERROR("InterMezzo: NULL inode in version_equal()\n"); |
return 0; |
} |
|
if (inode->i_mtime == a->pv_mtime && |
(S_ISDIR(inode->i_mode) || inode->i_size == a->pv_size)) |
return 1; |
|
return 0; |
} |
|
static int reint_close(struct kml_rec *rec, struct file *file, |
struct lento_vfs_context *given_info) |
{ |
struct run_ctxt saved_ctxt; |
int error; |
struct presto_file_set *fset; |
struct lento_vfs_context info; |
ENTRY; |
|
memcpy(&info, given_info, sizeof(*given_info)); |
|
|
CDEBUG (D_KML, "=====REINT_CLOSE::%s\n", rec->path); |
|
fset = presto_fset(file->f_dentry); |
if (fset->fset_flags & FSET_DATA_ON_DEMAND) { |
struct iattr iattr; |
|
iattr.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_SIZE; |
iattr.ia_mtime = (time_t)rec->new_objectv->pv_mtime; |
iattr.ia_ctime = (time_t)rec->new_objectv->pv_ctime; |
iattr.ia_size = (time_t)rec->new_objectv->pv_size; |
|
/* no kml record, but update last rcvd */ |
/* save fileid in dentry for later backfetch */ |
info.flags |= LENTO_FL_EXPECT | LENTO_FL_SET_DDFILEID; |
info.remote_ino = rec->ino; |
info.remote_generation = rec->generation; |
info.flags &= ~LENTO_FL_KML; |
kmlreint_pre_secure(rec, file, &saved_ctxt); |
error = lento_setattr(rec->path, &iattr, &info); |
pop_ctxt(&saved_ctxt); |
|
presto_d2d(file->f_dentry)->dd_flags &= ~PRESTO_DATA; |
} else { |
int minor = presto_f2m(fset); |
|
info.updated_time = rec->new_objectv->pv_mtime; |
memcpy(&info.remote_version, rec->old_objectv, |
sizeof(*rec->old_objectv)); |
info.remote_ino = rec->ino; |
info.remote_generation = rec->generation; |
error = izo_upc_backfetch(minor, rec->path, fset->fset_name, |
&info); |
if (error) { |
CERROR("backfetch error %d\n", error); |
/* if file doesn't exist anymore, then ignore the CLOSE |
* and just update the last_rcvd. |
*/ |
if (error == ENOENT) { |
CDEBUG(D_KML, "manually updating remote offset uuid %s" |
"recno %d offset %Lu\n", info.uuid, info.recno, info.kml_offset); |
error = izo_rcvd_upd_remote(fset, info.uuid, info.recno, info.kml_offset); |
if(error) |
CERROR("izo_rcvd_upd_remote error %d\n", error); |
|
} |
} |
|
/* propagate error to avoid further reint */ |
} |
|
EXIT; |
return error; |
} |
|
static int reint_create(struct kml_rec *rec, struct file *dir, |
struct lento_vfs_context *info) |
{ |
struct run_ctxt saved_ctxt; |
int error; ENTRY; |
|
CDEBUG (D_KML, "=====REINT_CREATE::%s\n", rec->path); |
info->updated_time = rec->new_objectv->pv_ctime; |
kmlreint_pre_secure(rec, dir, &saved_ctxt); |
error = lento_create(rec->path, rec->mode, info); |
pop_ctxt(&saved_ctxt); |
|
EXIT; |
return error; |
} |
|
static int reint_link(struct kml_rec *rec, struct file *dir, |
struct lento_vfs_context *info) |
{ |
struct run_ctxt saved_ctxt; |
int error; |
|
ENTRY; |
|
CDEBUG (D_KML, "=====REINT_LINK::%s -> %s\n", rec->path, rec->target); |
info->updated_time = rec->new_objectv->pv_mtime; |
kmlreint_pre_secure(rec, dir, &saved_ctxt); |
error = lento_link(rec->path, rec->target, info); |
pop_ctxt(&saved_ctxt); |
|
EXIT; |
return error; |
} |
|
static int reint_mkdir(struct kml_rec *rec, struct file *dir, |
struct lento_vfs_context *info) |
{ |
struct run_ctxt saved_ctxt; |
int error; |
|
ENTRY; |
|
CDEBUG (D_KML, "=====REINT_MKDIR::%s\n", rec->path); |
info->updated_time = rec->new_objectv->pv_ctime; |
kmlreint_pre_secure(rec, dir, &saved_ctxt); |
error = lento_mkdir(rec->path, rec->mode, info); |
pop_ctxt(&saved_ctxt); |
|
EXIT; |
return error; |
} |
|
static int reint_mknod(struct kml_rec *rec, struct file *dir, |
struct lento_vfs_context *info) |
{ |
struct run_ctxt saved_ctxt; |
int error, dev; |
|
ENTRY; |
|
CDEBUG (D_KML, "=====REINT_MKNOD::%s\n", rec->path); |
info->updated_time = rec->new_objectv->pv_ctime; |
kmlreint_pre_secure(rec, dir, &saved_ctxt); |
|
dev = rec->rdev ?: MKDEV(rec->major, rec->minor); |
|
error = lento_mknod(rec->path, rec->mode, dev, info); |
pop_ctxt(&saved_ctxt); |
|
EXIT; |
return error; |
} |
|
|
static int reint_noop(struct kml_rec *rec, struct file *dir, |
struct lento_vfs_context *info) |
{ |
return 0; |
} |
|
static int reint_rename(struct kml_rec *rec, struct file *dir, |
struct lento_vfs_context *info) |
{ |
struct run_ctxt saved_ctxt; |
int error; |
|
ENTRY; |
|
CDEBUG (D_KML, "=====REINT_RENAME::%s -> %s\n", rec->path, rec->target); |
info->updated_time = rec->new_objectv->pv_mtime; |
kmlreint_pre_secure(rec, dir, &saved_ctxt); |
error = lento_rename(rec->path, rec->target, info); |
pop_ctxt(&saved_ctxt); |
|
EXIT; |
return error; |
} |
|
static int reint_rmdir(struct kml_rec *rec, struct file *dir, |
struct lento_vfs_context *info) |
{ |
struct run_ctxt saved_ctxt; |
int error; |
char *path; |
|
ENTRY; |
|
path = path_join(rec->path, rec->pathlen - 1, rec->target, rec->targetlen); |
if (path == NULL) { |
EXIT; |
return -ENOMEM; |
} |
|
CDEBUG (D_KML, "=====REINT_RMDIR::%s\n", path); |
info->updated_time = rec->new_parentv->pv_mtime; |
kmlreint_pre_secure(rec, dir, &saved_ctxt); |
error = lento_rmdir(path, info); |
pop_ctxt(&saved_ctxt); |
|
kfree(path); |
EXIT; |
return error; |
} |
|
static int reint_setattr(struct kml_rec *rec, struct file *dir, |
struct lento_vfs_context *info) |
{ |
struct run_ctxt saved_ctxt; |
struct iattr iattr; |
int error; |
|
ENTRY; |
|
iattr.ia_valid = rec->valid; |
iattr.ia_mode = (umode_t)rec->mode; |
iattr.ia_uid = (uid_t)rec->uid; |
iattr.ia_gid = (gid_t)rec->gid; |
iattr.ia_size = (off_t)rec->size; |
iattr.ia_ctime = (time_t)rec->ctime; |
iattr.ia_mtime = (time_t)rec->mtime; |
iattr.ia_atime = iattr.ia_mtime; /* We don't track atimes. */ |
iattr.ia_attr_flags = rec->flags; |
|
CDEBUG (D_KML, "=====REINT_SETATTR::%s (%d)\n", rec->path, rec->valid); |
kmlreint_pre_secure(rec, dir, &saved_ctxt); |
error = lento_setattr(rec->path, &iattr, info); |
pop_ctxt(&saved_ctxt); |
|
EXIT; |
return error; |
} |
|
static int reint_symlink(struct kml_rec *rec, struct file *dir, |
struct lento_vfs_context *info) |
{ |
struct run_ctxt saved_ctxt; |
int error; |
|
ENTRY; |
|
CDEBUG (D_KML, "=====REINT_SYMLINK::%s -> %s\n", rec->path, rec->target); |
info->updated_time = rec->new_objectv->pv_ctime; |
kmlreint_pre_secure(rec, dir, &saved_ctxt); |
error = lento_symlink(rec->target, rec->path, info); |
pop_ctxt(&saved_ctxt); |
|
EXIT; |
return error; |
} |
|
static int reint_unlink(struct kml_rec *rec, struct file *dir, |
struct lento_vfs_context *info) |
{ |
struct run_ctxt saved_ctxt; |
int error; |
char *path; |
|
ENTRY; |
|
path = path_join(rec->path, rec->pathlen - 1, rec->target, rec->targetlen); |
if (path == NULL) { |
EXIT; |
return -ENOMEM; |
} |
|
CDEBUG (D_KML, "=====REINT_UNLINK::%s\n", path); |
info->updated_time = rec->new_parentv->pv_mtime; |
kmlreint_pre_secure(rec, dir, &saved_ctxt); |
error = lento_unlink(path, info); |
pop_ctxt(&saved_ctxt); |
|
kfree(path); |
EXIT; |
return error; |
} |
|
static int branch_reint_rename(struct presto_file_set *fset, struct kml_rec *rec, |
struct file *dir, struct lento_vfs_context *info, |
char * kml_data, __u64 kml_size) |
{ |
int error; |
|
ENTRY; |
|
error = reint_rename(rec, dir, info); |
if (error == -ENOENT) { |
/* normal reint failed because path was not found */ |
struct rec_info rec; |
|
CDEBUG(D_KML, "saving branch rename kml\n"); |
rec.is_kml = 1; |
rec.size = kml_size; |
error = presto_log(fset, &rec, kml_data, kml_size, |
NULL, 0, NULL, 0, NULL, 0); |
if (error == 0) |
error = presto_write_last_rcvd(&rec, fset, info); |
} |
|
EXIT; |
return error; |
} |
|
int branch_reinter(struct presto_file_set *fset, struct kml_rec *rec, |
struct file *dir, struct lento_vfs_context *info, |
char * kml_data, __u64 kml_size) |
{ |
int error = 0; |
int op = rec->prefix.hdr->opcode; |
|
if (op == KML_OPCODE_CLOSE) { |
/* regular close and backfetch */ |
error = reint_close(rec, dir, info); |
} else if (op == KML_OPCODE_RENAME) { |
/* rename only if name already exists */ |
error = branch_reint_rename(fset, rec, dir, info, |
kml_data, kml_size); |
} else { |
/* just rewrite kml into branch/kml and update last_rcvd */ |
struct rec_info rec; |
|
CDEBUG(D_KML, "Saving branch kml\n"); |
rec.is_kml = 1; |
rec.size = kml_size; |
error = presto_log(fset, &rec, kml_data, kml_size, |
NULL, 0, NULL, 0, NULL, 0); |
if (error == 0) |
error = presto_write_last_rcvd(&rec, fset, info); |
} |
|
return error; |
} |
|
typedef int (*reinter_t)(struct kml_rec *rec, struct file *basedir, |
struct lento_vfs_context *info); |
|
static reinter_t presto_reinters[KML_OPCODE_NUM] = |
{ |
[KML_OPCODE_CLOSE] = reint_close, |
[KML_OPCODE_CREATE] = reint_create, |
[KML_OPCODE_LINK] = reint_link, |
[KML_OPCODE_MKDIR] = reint_mkdir, |
[KML_OPCODE_MKNOD] = reint_mknod, |
[KML_OPCODE_NOOP] = reint_noop, |
[KML_OPCODE_RENAME] = reint_rename, |
[KML_OPCODE_RMDIR] = reint_rmdir, |
[KML_OPCODE_SETATTR] = reint_setattr, |
[KML_OPCODE_SYMLINK] = reint_symlink, |
[KML_OPCODE_UNLINK] = reint_unlink, |
}; |
|
static inline reinter_t get_reinter(int op) |
{ |
if (op < 0 || op >= sizeof(presto_reinters) / sizeof(reinter_t)) |
return NULL; |
else |
return presto_reinters[op]; |
} |
|
int kml_reint_rec(struct file *dir, struct izo_ioctl_data *data) |
{ |
char *ptr; |
char *end; |
struct kml_rec rec; |
int error = 0; |
struct lento_vfs_context info; |
struct presto_cache *cache; |
struct presto_file_set *fset; |
struct presto_dentry_data *dd = presto_d2d(dir->f_dentry); |
int op; |
reinter_t reinter; |
|
struct izo_rcvd_rec lr_rec; |
int off; |
|
ENTRY; |
|
error = presto_prep(dir->f_dentry, &cache, &fset); |
if ( error ) { |
CERROR("intermezzo: Reintegration on invalid file\n"); |
return error; |
} |
|
if (!dd || !dd->dd_fset || dd->dd_fset->fset_dentry != dir->f_dentry) { |
CERROR("intermezzo: reintegration on non-fset root (ino %ld)\n", |
dir->f_dentry->d_inode->i_ino); |
|
return -EINVAL; |
} |
|
if (data->ioc_plen1 > 64 * 1024) { |
EXIT; |
return -ENOSPC; |
} |
|
ptr = fset->fset_reint_buf; |
end = ptr + data->ioc_plen1; |
|
if (copy_from_user(ptr, data->ioc_pbuf1, data->ioc_plen1)) { |
EXIT; |
error = -EFAULT; |
goto out; |
} |
|
error = kml_unpack(&rec, &ptr, end); |
if (error) { |
EXIT; |
error = -EFAULT; |
goto out; |
} |
|
off = izo_rcvd_get(&lr_rec, fset, data->ioc_uuid); |
if (off < 0) { |
CERROR("No last_rcvd record, setting to 0\n"); |
memset(&lr_rec, 0, sizeof(lr_rec)); |
} |
|
data->ioc_kmlsize = ptr - fset->fset_reint_buf; |
|
if (rec.suffix->recno != lr_rec.lr_remote_recno + 1) { |
CERROR("KML record number %Lu expected, not %d\n", |
lr_rec.lr_remote_recno + 1, |
rec.suffix->recno); |
|
#if 0 |
if (!version_check(&rec, dd->dd_fset, &info)) { |
/* FIXME: do an upcall to resolve conflicts */ |
CERROR("intermezzo: would be a conflict!\n"); |
error = -EINVAL; |
EXIT; |
goto out; |
} |
#endif |
} |
|
op = rec.prefix.hdr->opcode; |
|
reinter = get_reinter(op); |
if (!reinter) { |
CERROR("%s: Unrecognized KML opcode %d\n", __FUNCTION__, op); |
error = -EINVAL; |
EXIT; |
goto out; |
} |
|
info.kml_offset = data->ioc_offset + data->ioc_kmlsize; |
info.recno = rec.suffix->recno; |
info.flags = LENTO_FL_EXPECT; |
if (data->ioc_flags) |
info.flags |= LENTO_FL_KML; |
|
memcpy(info.uuid, data->ioc_uuid, sizeof(info.uuid)); |
|
if (fset->fset_flags & FSET_IS_BRANCH && data->ioc_flags) |
error = branch_reinter(fset, &rec, dir, &info, fset->fset_reint_buf, |
data->ioc_kmlsize); |
else |
error = reinter(&rec, dir, &info); |
out: |
EXIT; |
return error; |
} |
|
int izo_get_fileid(struct file *dir, struct izo_ioctl_data *data) |
{ |
char *buf = NULL; |
char *ptr; |
char *end; |
struct kml_rec rec; |
struct file *file; |
struct presto_cache *cache; |
struct presto_file_set *fset; |
struct presto_dentry_data *dd = presto_d2d(dir->f_dentry); |
struct run_ctxt saved_ctxt; |
int error; |
|
ENTRY; |
|
error = presto_prep(dir->f_dentry, &cache, &fset); |
if ( error ) { |
CERROR("intermezzo: Reintegration on invalid file\n"); |
return error; |
} |
|
if (!dd || !dd->dd_fset || dd->dd_fset->fset_dentry != dir->f_dentry) { |
CERROR("intermezzo: reintegration on non-fset root (ino %ld)\n", |
dir->f_dentry->d_inode->i_ino); |
|
return -EINVAL; |
} |
|
|
PRESTO_ALLOC(buf, data->ioc_plen1); |
if (!buf) { |
EXIT; |
return -ENOMEM; |
} |
ptr = buf; |
end = buf + data->ioc_plen1; |
|
if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) { |
EXIT; |
PRESTO_FREE(buf, data->ioc_plen1); |
return -EFAULT; |
} |
|
error = kml_unpack(&rec, &ptr, end); |
if (error) { |
EXIT; |
PRESTO_FREE(buf, data->ioc_plen1); |
return -EFAULT; |
} |
|
kmlreint_pre_secure(&rec, dir, &saved_ctxt); |
|
file = filp_open(rec.path, O_RDONLY, 0); |
if (!file || IS_ERR(file)) { |
error = PTR_ERR(file); |
goto out; |
} |
data->ioc_ino = file->f_dentry->d_inode->i_ino; |
data->ioc_generation = file->f_dentry->d_inode->i_generation; |
filp_close(file, 0); |
|
CDEBUG(D_FILE, "%s ino %Lx, gen %Lx\n", rec.path, |
data->ioc_ino, data->ioc_generation); |
|
out: |
if (buf) |
PRESTO_FREE(buf, data->ioc_plen1); |
pop_ctxt(&saved_ctxt); |
EXIT; |
return error; |
} |
|
|
/journal_obdfs.c
0,0 → 1,194
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com> |
* Copyright (C) 2000 Red Hat, Inc. |
* Copyright (C) 2000 Los Alamos National Laboratory |
* Copyright (C) 2000 TurboLinux, Inc. |
* Copyright (C) 2001 Mountain View Data, Inc. |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
*/ |
|
#include <linux/types.h> |
#include <linux/param.h> |
#include <linux/kernel.h> |
#include <linux/sched.h> |
#include <linux/fs.h> |
#include <linux/slab.h> |
#include <linux/vmalloc.h> |
#include <linux/stat.h> |
#include <linux/errno.h> |
#include <linux/locks.h> |
#include <asm/segment.h> |
#include <asm/uaccess.h> |
#include <linux/string.h> |
#ifdef CONFIG_OBDFS_FS |
#include /usr/src/obd/include/linux/obdfs.h |
#endif |
|
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
|
#ifdef CONFIG_OBDFS_FS |
|
|
static unsigned long presto_obdfs_freespace(struct presto_file_set *fset, |
struct super_block *sb) |
{ |
return 0x0fffff; |
} |
|
/* start the filesystem journal operations */ |
static void *presto_obdfs_trans_start(struct presto_file_set *fset, |
struct inode *inode, |
int op) |
{ |
|
return (void *) 1; |
} |
|
#if 0 |
int jblocks; |
int trunc_blks, one_path_blks, extra_path_blks, |
extra_name_blks, lml_blks; |
__u32 avail_kmlblocks; |
|
if ( presto_no_journal(fset) || |
strcmp(fset->fset_cache->cache_type, "ext3")) |
{ |
CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n", |
fset->fset_cache->cache_type); |
return NULL; |
} |
|
avail_kmlblocks = inode->i_sb->u.ext3_sb.s_es->s_free_blocks_count; |
|
if ( avail_kmlblocks < 3 ) { |
return ERR_PTR(-ENOSPC); |
} |
|
if ( (op != PRESTO_OP_UNLINK && op != PRESTO_OP_RMDIR) |
&& avail_kmlblocks < 6 ) { |
return ERR_PTR(-ENOSPC); |
} |
|
/* Need journal space for: |
at least three writes to KML (two one block writes, one a path) |
possibly a second name (unlink, rmdir) |
possibly a second path (symlink, rename) |
a one block write to the last rcvd file |
*/ |
|
trunc_blks = EXT3_DATA_TRANS_BLOCKS + 1; |
one_path_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 3; |
lml_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 2; |
extra_path_blks = EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode); |
extra_name_blks = EXT3_DATA_TRANS_BLOCKS + MAX_NAME_BLOCKS(inode); |
|
/* additional blocks appear for "two pathname" operations |
and operations involving the LML records |
*/ |
switch (op) { |
case PRESTO_OP_TRUNC: |
jblocks = one_path_blks + extra_name_blks + trunc_blks |
+ EXT3_DELETE_TRANS_BLOCKS; |
break; |
case PRESTO_OP_RELEASE: |
/* |
jblocks = one_path_blks + lml_blks + 2*trunc_blks; |
*/ |
jblocks = one_path_blks; |
break; |
case PRESTO_OP_SETATTR: |
jblocks = one_path_blks + trunc_blks + 1 ; |
break; |
case PRESTO_OP_CREATE: |
jblocks = one_path_blks + trunc_blks |
+ EXT3_DATA_TRANS_BLOCKS + 3; |
break; |
case PRESTO_OP_LINK: |
jblocks = one_path_blks + trunc_blks |
+ EXT3_DATA_TRANS_BLOCKS; |
break; |
case PRESTO_OP_UNLINK: |
jblocks = one_path_blks + extra_name_blks + trunc_blks |
+ EXT3_DELETE_TRANS_BLOCKS; |
break; |
case PRESTO_OP_SYMLINK: |
jblocks = one_path_blks + extra_path_blks + trunc_blks |
+ EXT3_DATA_TRANS_BLOCKS + 5; |
break; |
case PRESTO_OP_MKDIR: |
jblocks = one_path_blks + trunc_blks |
+ EXT3_DATA_TRANS_BLOCKS + 4; |
break; |
case PRESTO_OP_RMDIR: |
jblocks = one_path_blks + extra_name_blks + trunc_blks |
+ EXT3_DELETE_TRANS_BLOCKS; |
break; |
case PRESTO_OP_MKNOD: |
jblocks = one_path_blks + trunc_blks + |
EXT3_DATA_TRANS_BLOCKS + 3; |
break; |
case PRESTO_OP_RENAME: |
jblocks = one_path_blks + extra_path_blks + trunc_blks + |
2 * EXT3_DATA_TRANS_BLOCKS + 2; |
break; |
case PRESTO_OP_WRITE: |
jblocks = one_path_blks; |
/* add this when we can wrap our transaction with |
that of ext3_file_write (ordered writes) |
+ EXT3_DATA_TRANS_BLOCKS; |
*/ |
break; |
default: |
CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op); |
return NULL; |
} |
|
CDEBUG(D_JOURNAL, "creating journal handle (%d blocks)\n", jblocks); |
return journal_start(EXT3_JOURNAL(inode), jblocks); |
} |
#endif |
|
void presto_obdfs_trans_commit(struct presto_file_set *fset, void *handle) |
{ |
#if 0 |
if ( presto_no_journal(fset) || !handle) |
return; |
|
journal_stop(handle); |
#endif |
} |
|
void presto_obdfs_journal_file_data(struct inode *inode) |
{ |
#ifdef EXT3_JOURNAL_DATA_FL |
inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL; |
#else |
#warning You must have a facility to enable journaled writes for recovery! |
#endif |
} |
|
struct journal_ops presto_obdfs_journal_ops = { |
.tr_avail = presto_obdfs_freespace, |
.tr_start = presto_obdfs_trans_start, |
.tr_commit = presto_obdfs_trans_commit, |
.tr_journal_data = presto_obdfs_journal_file_data |
}; |
|
#endif |
/journal_xfs.c
0,0 → 1,162
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com> |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
*/ |
|
#include <linux/types.h> |
#include <linux/kernel.h> |
#include <linux/sched.h> |
#include <linux/fs.h> |
#include <linux/slab.h> |
#include <linux/vmalloc.h> |
#include <linux/stat.h> |
#include <linux/errno.h> |
#include <linux/locks.h> |
#include <asm/segment.h> |
#include <asm/uaccess.h> |
#include <linux/string.h> |
#if 0 |
/* XFS Support not there yet */ |
#ifdef CONFIG_FS_XFS |
#include <linux/xfs_fs.h> |
#endif |
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
#include <linux/intermezzo_journal.h> |
|
#if 0 |
|
/* XFS has journalling, but these functions do nothing yet... */ |
|
static unsigned long presto_xfs_freespace(struct presto_file_set *fset, |
struct super_block *sb) |
{ |
|
#if 0 |
vfs_t *vfsp = LINVFS_GET_VFS(sb); |
struct statvfs_t stat; |
bhv_desc_t *bdp; |
unsigned long avail; |
int rc; |
|
VFS_STATVFS(vfsp, &stat, NULL, rc); |
avail = statp.f_bfree; |
|
return sbp->sb_fdblocks;; |
#endif |
return 0x0fffffff; |
} |
|
|
/* start the filesystem journal operations */ |
static void * |
presto_xfs_trans_start(struct presto_file_set *fset, |
struct inode *inode, int op) |
{ |
int xfs_op; |
/* do a free blocks check as in journal_ext3? does anything protect |
* the space in that case or can it disappear out from under us |
* anyway? */ |
|
/* copied from xfs_trans.h, skipping header maze for now */ |
#define XFS_TRANS_SETATTR_NOT_SIZE 1 |
#define XFS_TRANS_SETATTR_SIZE 2 |
#define XFS_TRANS_INACTIVE 3 |
#define XFS_TRANS_CREATE 4 |
#define XFS_TRANS_CREATE_TRUNC 5 |
#define XFS_TRANS_TRUNCATE_FILE 6 |
#define XFS_TRANS_REMOVE 7 |
#define XFS_TRANS_LINK 8 |
#define XFS_TRANS_RENAME 9 |
#define XFS_TRANS_MKDIR 10 |
#define XFS_TRANS_RMDIR 11 |
#define XFS_TRANS_SYMLINK 12 |
|
/* map the op onto the values for XFS so it can do reservation. if |
* we don't have enough info to differentiate between e.g. setattr |
* with or without size, what do we do? will it adjust? */ |
switch (op) { |
case PRESTO_OP_SETATTR: |
/* or XFS_TRANS_SETATTR_NOT_SIZE? */ |
xfs_op = XFS_TRANS_SETATTR_SIZE; |
break; |
case PRESTO_OP_CREATE: |
/* or CREATE_TRUNC? */ |
xfs_op = XFS_TRANS_CREATE; |
break; |
case PRESTO_OP_LINK: |
xfs_op = XFS_TRANS_LINK; |
break; |
case PRESTO_OP_UNLINK: |
xfs_op = XFS_TRANS_REMOVE; |
break; |
case PRESTO_OP_SYMLINK: |
xfs_op = XFS_TRANS_SYMLINK; |
break; |
case PRESTO_OP_MKDIR: |
xfs_op = XFS_TRANS_MKDIR; |
break; |
case PRESTO_OP_RMDIR: |
xfs_op = XFS_TRANS_RMDIR; |
break; |
case PRESTO_OP_MKNOD: |
/* XXX can't find an analog for mknod? */ |
xfs_op = XFS_TRANS_CREATE; |
break; |
case PRESTO_OP_RENAME: |
xfs_op = XFS_TRANS_RENAME; |
break; |
default: |
CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op); |
return NULL; |
} |
|
return xfs_trans_start(inode, xfs_op); |
} |
|
static void presto_xfs_trans_commit(struct presto_file_set *fset, void *handle) |
{ |
/* assert (handle == current->j_handle) */ |
xfs_trans_stop(handle); |
} |
|
static void presto_xfs_journal_file_data(struct inode *inode) |
{ |
return; |
} |
|
static int presto_xfs_has_all_data(struct inode *inode) |
{ |
BUG(); |
return 0; |
} |
|
struct journal_ops presto_xfs_journal_ops = { |
.tr_all_data = presto_xfs_has_all_data, |
.tr_avail = presto_xfs_freespace, |
.tr_start = presto_xfs_trans_start, |
.tr_commit = presto_xfs_trans_commit, |
.tr_journal_data = presto_xfs_journal_file_data |
}; |
|
#endif |
|
|
#endif /* CONFIG_XFS_FS */ |
|
/ext_attr.c
0,0 → 1,205
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 2001 Tacit Networks, Inc. |
* Author: Shirish H. Phatak <shirish@tacitnetworks.com> |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* |
* Extended attribute handling for presto. |
*/ |
|
#define __NO_VERSION__ |
#include <linux/module.h> |
#include <linux/kernel.h> |
#include <linux/mm.h> |
#include <linux/string.h> |
#include <linux/stat.h> |
#include <linux/errno.h> |
#include <linux/locks.h> |
#include <linux/unistd.h> |
|
#include <asm/system.h> |
#include <asm/uaccess.h> |
|
#include <linux/fs.h> |
#include <linux/stat.h> |
#include <linux/errno.h> |
#include <linux/locks.h> |
#include <linux/string.h> |
#include <asm/uaccess.h> |
#include <linux/slab.h> |
#include <linux/vmalloc.h> |
#include <asm/segment.h> |
#include <linux/smp_lock.h> |
|
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
|
#ifdef CONFIG_FS_EXT_ATTR |
#include <linux/ext_attr.h> |
|
extern inline void presto_debug_fail_blkdev(struct presto_file_set *fset, |
unsigned long value); |
|
|
/* VFS interface */ |
/* XXX! Fixme test for user defined attributes */ |
int presto_set_ext_attr(struct inode *inode, |
const char *name, void *buffer, |
size_t buffer_len, int flags) |
{ |
int error; |
struct presto_cache *cache; |
struct presto_file_set *fset; |
struct lento_vfs_context info; |
struct dentry *dentry; |
int minor = presto_i2m(inode); |
char *buf = NULL; |
|
ENTRY; |
if (minor < 0) { |
EXIT; |
return -1; |
} |
|
if ( ISLENTO(minor) ) { |
EXIT; |
return -EINVAL; |
} |
|
/* BAD...vfs should really pass down the dentry to use, especially |
* since every other operation in iops does. But for now |
* we do a reverse mapping from inode to the first dentry |
*/ |
if (list_empty(&inode->i_dentry)) { |
CERROR("No alias for inode %d\n", (int) inode->i_ino); |
EXIT; |
return -EINVAL; |
} |
|
dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias); |
|
error = presto_prep(dentry, &cache, &fset); |
if ( error ) { |
EXIT; |
return error; |
} |
|
if ((buffer != NULL) && (buffer_len != 0)) { |
/* If buffer is a user space pointer copy it to kernel space |
* and reset the flag. We do this since the journal functions need |
* access to the contents of the buffer, and the file system |
* does not care. When we actually invoke the function, we remove |
* the EXT_ATTR_FLAG_USER flag. |
* |
* XXX:Check if the "fs does not care" assertion is always true -SHP |
* (works for ext3) |
*/ |
if (flags & EXT_ATTR_FLAG_USER) { |
PRESTO_ALLOC(buf, buffer_len); |
if (!buf) { |
CERROR("InterMezzo: out of memory!!!\n"); |
return -ENOMEM; |
} |
error = copy_from_user(buf, buffer, buffer_len); |
if (error) |
return -EFAULT; |
} else |
buf = buffer; |
} else |
buf = buffer; |
|
if ( presto_get_permit(inode) < 0 ) { |
EXIT; |
if (buffer_len && (flags & EXT_ATTR_FLAG_USER)) |
PRESTO_FREE(buf, buffer_len); |
return -EROFS; |
} |
|
/* Simulate presto_setup_info */ |
memset(&info, 0, sizeof(info)); |
/* For now redundant..but we keep it around just in case */ |
info.flags = LENTO_FL_IGNORE_TIME; |
if (!ISLENTO(cache->cache_psdev->uc_minor)) |
info.flags |= LENTO_FL_KML; |
|
/* We pass in the kernel space pointer and reset the |
* EXT_ATTR_FLAG_USER flag. |
* See comments above. |
*/ |
/* Note that mode is already set by VFS so we send in a NULL */ |
error = presto_do_set_ext_attr(fset, dentry, name, buf, |
buffer_len, flags & ~EXT_ATTR_FLAG_USER, |
NULL, &info); |
presto_put_permit(inode); |
|
if (buffer_len && (flags & EXT_ATTR_FLAG_USER)) |
PRESTO_FREE(buf, buffer_len); |
EXIT; |
return error; |
} |
|
/* Lento Interface */ |
/* XXX: ignore flags? We should be forcing these operations through? -SHP*/ |
int lento_set_ext_attr(const char *path, const char *name, |
void *buffer, size_t buffer_len, int flags, mode_t mode, |
struct lento_vfs_context *info) |
{ |
int error; |
char * pathname; |
struct nameidata nd; |
struct dentry *dentry; |
struct presto_file_set *fset; |
|
ENTRY; |
lock_kernel(); |
|
pathname=getname(path); |
error = PTR_ERR(pathname); |
if (IS_ERR(pathname)) { |
EXIT; |
goto exit; |
} |
|
/* Note that ext_attrs apply to both files and directories..*/ |
error=presto_walk(pathname,&nd); |
if (error) |
goto exit; |
dentry = nd.dentry; |
|
fset = presto_fset(dentry); |
error = -EINVAL; |
if ( !fset ) { |
CERROR("No fileset!\n"); |
EXIT; |
goto exit_dentry; |
} |
|
if (buffer==NULL) buffer_len=0; |
|
error = presto_do_set_ext_attr(fset, dentry, name, buffer, |
buffer_len, flags, &mode, info); |
exit_dentry: |
path_release(&nd); |
exit_path: |
putname(pathname); |
exit: |
unlock_kernel(); |
return error; |
} |
|
#endif /*CONFIG_FS_EXT_ATTR*/ |
/fileset.c
0,0 → 1,675
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- |
* vim:expandtab:shiftwidth=8:tabstop=8: |
* |
* Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> |
* |
* This file is part of InterMezzo, http://www.inter-mezzo.org. |
* |
* InterMezzo is free software; you can redistribute it and/or |
* modify it under the terms of version 2 of the GNU General Public |
* License as published by the Free Software Foundation. |
* |
* InterMezzo is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with InterMezzo; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* |
* Managing filesets |
* |
*/ |
|
#define __NO_VERSION__ |
#include <stdarg.h> |
|
#include <asm/bitops.h> |
#include <asm/uaccess.h> |
#include <asm/system.h> |
|
#include <linux/errno.h> |
#include <linux/fs.h> |
#include <linux/ext2_fs.h> |
#include <linux/slab.h> |
#include <linux/vmalloc.h> |
#include <linux/sched.h> |
#include <linux/stat.h> |
#include <linux/string.h> |
#include <linux/locks.h> |
#include <linux/blkdev.h> |
#include <linux/init.h> |
#include <linux/module.h> |
|
#include <linux/intermezzo_fs.h> |
#include <linux/intermezzo_psdev.h> |
|
static inline struct presto_file_set *presto_dentry2fset(struct dentry *dentry) |
{ |
if (presto_d2d(dentry) == NULL) { |
EXIT; |
return NULL; |
} |
return presto_d2d(dentry)->dd_fset; |
} |
|
/* find the fileset dentry for this dentry */ |
struct presto_file_set *presto_fset(struct dentry *de) |
{ |
struct dentry *fsde; |
ENTRY; |
if ( !de->d_inode ) { |
/* FIXME: is this ok to be NULL? */ |
CDEBUG(D_INODE,"presto_fset: warning %*s has NULL inode.\n", |
de->d_name.len, de->d_name.name); |
} |
for (fsde = de;; fsde = fsde->d_parent) { |
if ( presto_dentry2fset(fsde) ) { |
EXIT; |
return presto_dentry2fset(fsde); |
} |
if (fsde->d_parent == fsde) |
break; |
} |
EXIT; |
return NULL; |
} |
|
int presto_get_lastrecno(char *path, off_t *recno) |
{ |
struct nameidata nd; |
struct presto_file_set *fset; |
struct dentry *dentry; |
int error; |
ENTRY; |
|
error = presto_walk(path, &nd); |
if (error) { |
EXIT; |
return error; |
} |
|
dentry = nd.dentry; |
|
error = -ENXIO; |
if ( !presto_ispresto(dentry->d_inode) ) { |
EXIT; |
goto kml_out; |
} |
|
error = -EINVAL; |
if ( ! presto_dentry2fset(dentry)) { |
EXIT; |
goto kml_out; |
} |
|
fset = presto_dentry2fset(dentry); |
if (!fset) { |
EXIT; |
goto kml_out; |
} |
error = 0; |
*recno = fset->fset_kml.fd_recno; |
|
kml_out: |
path_release(&nd); |
return error; |
} |
|
static char * _izo_make_path(char *fsetname, char *name) |
{ |
char *path = NULL; |
int len; |
|
len = strlen("/.intermezzo/") + strlen(fsetname) |
+ 1 + strlen(name) + 1; |
|
PRESTO_ALLOC(path, len); |
if (path == NULL) |
return NULL; |
|
sprintf(path, "/.intermezzo/%s/%s", fsetname, name); |
|
return path; |
} |
|
char * izo_make_path(struct presto_file_set *fset, char *name) |
{ |
return _izo_make_path(fset->fset_name, name); |
} |
|
static struct file *_izo_fset_open(char *fsetname, char *name, int flags, int mode) |
{ |
char *path; |
struct file *f; |
int error; |
ENTRY; |
|
path = _izo_make_path(fsetname, name); |
if (path == NULL) { |
EXIT; |
return ERR_PTR(-ENOMEM); |
} |
|
CDEBUG(D_INODE, "opening file %s\n", path); |
f = filp_open(path, flags, mode); |
error = PTR_ERR(f); |
if (IS_ERR(f)) { |
CDEBUG(D_INODE, "Error %d\n", error); |
} |
|
PRESTO_FREE(path, strlen(path)); |
|
EXIT; |
return f; |
|
} |
|
struct file *izo_fset_open(struct presto_file_set *fset, char *name, int flags, int mode) |
{ |
return _izo_fset_open(fset->fset_name, name, flags, mode); |
} |
|
|
|
/* |
* note: this routine "pins" a dentry for a fileset root |
*/ |
int presto_set_fsetroot(struct dentry *ioctl_dentry, char *fsetname, |
unsigned int flags) |
{ |
struct presto_file_set *fset = NULL; |
struct presto_cache *cache; |
int error; |
struct file *fset_root; |
struct dentry *dentry; |
|
ENTRY; |
|
fset_root = _izo_fset_open(fsetname, "ROOT", O_RDONLY, 000); |
if (IS_ERR(fset_root)) { |
CERROR("Can't open %s/ROOT\n", fsetname); |
EXIT; |
error = PTR_ERR(fset_root); |
goto out; |
} |
dentry = dget(fset_root->f_dentry); |
filp_close(fset_root, NULL); |
|
dentry->d_inode->i_op = ioctl_dentry->d_inode->i_op; |
dentry->d_inode->i_fop = ioctl_dentry->d_inode->i_fop; |
dentry->d_op = ioctl_dentry->d_op; |
fset = presto_dentry2fset(dentry); |
if (fset && (fset->fset_dentry == dentry) ) { |
CERROR("Fsetroot already set (inode %ld)\n", |
dentry->d_inode->i_ino); |
/* XXX: ignore because clear_fsetroot is broken */ |
#if 0 |
dput(dentry); |
EXIT; |
error = -EEXIST; |
goto out; |
#endif |
} |
|
cache = presto_get_cache(dentry->d_inode); |
if (!cache) { |
CERROR("No cache found for inode %ld\n", |
dentry->d_inode->i_ino); |
EXIT; |
error = -ENODEV; |
goto out_free; |
} |
|
PRESTO_ALLOC(fset, sizeof(*fset)); |
if ( !fset ) { |
CERROR("No memory allocating fset for %s\n", fsetname); |
EXIT; |
error = -ENOMEM; |
goto out_free; |
} |
CDEBUG(D_INODE, "fset at %p\n", fset); |
|
CDEBUG(D_INODE, "InterMezzo: fsetroot: inode %ld, fileset name %s\n", |
dentry->d_inode->i_ino, fsetname); |
|
fset->fset_mnt = mntget(current->fs->pwdmnt); |
fset->fset_cache = cache; |
fset->fset_dentry = dentry; |
fset->fset_name = strdup(fsetname); |
fset->fset_chunkbits = CHUNK_BITS; |
fset->fset_flags = flags; |
fset->fset_file_maxio = FSET_DEFAULT_MAX_FILEIO; |
fset->fset_permit_lock = SPIN_LOCK_UNLOCKED; |
PRESTO_ALLOC(fset->fset_reint_buf, 64 * 1024); |
if (fset->fset_reint_buf == NULL) { |
EXIT; |
error = -ENOMEM; |
goto out_free; |
} |
init_waitqueue_head(&fset->fset_permit_queue); |
|
if (presto_d2d(dentry) == NULL) { |
dentry->d_fsdata = izo_alloc_ddata(); |
} |
if (presto_d2d(dentry) == NULL) { |
CERROR("InterMezzo: %s: no memory\n", __FUNCTION__); |
EXIT; |
error = -ENOMEM; |
goto out_free; |
} |
presto_d2d(dentry)->dd_fset = fset; |
list_add(&fset->fset_list, &cache->cache_fset_list); |
|
error = izo_init_kml_file(fset, &fset->fset_kml); |
if ( error ) { |
EXIT; |
CDEBUG(D_JOURNAL, "Error init_kml %d\n", error); |
goto out_list_del; |
} |
|
error = izo_init_lml_file(fset, &fset->fset_lml); |
if ( error ) { |
int rc; |
EXIT; |
rc = izo_log_close(&fset->fset_kml); |
CDEBUG(D_JOURNAL, "Error init_lml %d, cleanup %d\n", error, rc); |
goto out_list_del; |
} |
|
/* init_last_rcvd_file could trigger a presto_file_write(), which |
* requires that the lml structure be initialized. -phil */ |
error = izo_init_last_rcvd_file(fset, &fset->fset_rcvd); |
if ( error ) { |
int rc; |
EXIT; |
rc = izo_log_close(&fset->fset_kml); |
rc = izo_log_close(&fset->fset_lml); |
CDEBUG(D_JOURNAL, "Error init_lastrcvd %d, cleanup %d\n", error, rc); |
goto out_list_del; |
} |
|
CDEBUG(D_PIOCTL, "-------> fset at %p, dentry at %p, mtpt %p," |
"fset %s, cache %p, presto_d2d(dentry)->dd_fset %p\n", |
fset, dentry, fset->fset_dentry, fset->fset_name, cache, |
presto_d2d(dentry)->dd_fset); |
|
EXIT; |
return 0; |
|
out_list_del: |
list_del(&fset->fset_list); |
presto_d2d(dentry)->dd_fset = NULL; |
out_free: |
if (fset) { |
mntput(fset->fset_mnt); |
if (fset->fset_reint_buf != NULL) |
PRESTO_FREE(fset->fset_reint_buf, 64 * 1024); |
PRESTO_FREE(fset, sizeof(*fset)); |
} |
dput(dentry); |
out: |
return error; |
} |
|
static int izo_cleanup_fset(struct presto_file_set *fset) |
{ |
int error; |
struct presto_cache *cache; |
|
ENTRY; |
|
CERROR("Cleaning up fset %s\n", fset->fset_name); |
|
error = izo_log_close(&fset->fset_kml); |
if (error) |
CERROR("InterMezzo: Closing kml for fset %s: %d\n", |
fset->fset_name, error); |
error = izo_log_close(&fset->fset_lml); |
if (error) |
CERROR("InterMezzo: Closing lml for fset %s: %d\n", |
fset->fset_name, error); |
error = izo_log_close(&fset->fset_rcvd); |
if (error) |
CERROR("InterMezzo: Closing last_rcvd for fset %s: %d\n", |
fset->fset_name, error); |
|
cache = fset->fset_cache; |
|
list_del(&fset->fset_list); |
|
presto_d2d(fset->fset_dentry)->dd_fset = NULL; |
dput(fset->fset_dentry); |
mntput(fset->fset_mnt); |
|
PRESTO_FREE(fset->fset_name, strlen(fset->fset_name) + 1); |
PRESTO_FREE(fset->fset_reint_buf, 64 * 1024); |
PRESTO_FREE(fset, sizeof(*fset)); |
EXIT; |
return error; |
} |
|
int izo_clear_fsetroot(struct dentry *dentry) |
{ |
struct presto_file_set *fset; |
|
ENTRY; |
|
fset = presto_dentry2fset(dentry); |
if (!fset) { |
EXIT; |
return -EINVAL; |
} |
|
izo_cleanup_fset(fset); |
EXIT; |
return 0; |
} |
|
int izo_clear_all_fsetroots(struct presto_cache *cache) |
{ |
struct presto_file_set *fset; |
struct list_head *tmp,*tmpnext; |
int error; |
|
error = 0; |
tmp = &cache->cache_fset_list; |
tmpnext = tmp->next; |
while ( tmpnext != &cache->cache_fset_list) { |
tmp = tmpnext; |
tmpnext = tmp->next; |
fset = list_entry(tmp, struct presto_file_set, fset_list); |
|
error = izo_cleanup_fset(fset); |
if (error) |
break; |
} |
return error; |
} |
|
static struct vfsmount *izo_alloc_vfsmnt(void) |
{ |
struct vfsmount *mnt; |
PRESTO_ALLOC(mnt, sizeof(*mnt)); |
if (mnt) { |
memset(mnt, 0, sizeof(struct vfsmount)); |
atomic_set(&mnt->mnt_count,1); |
INIT_LIST_HEAD(&mnt->mnt_hash); |
INIT_LIST_HEAD(&mnt->mnt_child); |
INIT_LIST_HEAD(&mnt->mnt_mounts); |
INIT_LIST_HEAD(&mnt->mnt_list); |
} |
return mnt; |
} |
|
|
static void izo_setup_ctxt(struct dentry *root, struct vfsmount *mnt, |
struct run_ctxt *save) |
{ |
struct run_ctxt new; |
|
mnt->mnt_root = root; |
mnt->mnt_sb = root->d_inode->i_sb; |
unlock_super(mnt->mnt_sb); |
|
new.rootmnt = mnt; |
new.root = root; |
new.pwdmnt = mnt; |
new.pwd = root; |
new.fsuid = 0; |
new.fsgid = 0; |
new.fs = get_fs(); |
/* XXX where can we get the groups from? */ |
new.ngroups = 0; |
|
push_ctxt(save, &new); |
} |
|
static void izo_cleanup_ctxt(struct vfsmount *mnt, struct run_ctxt *save) |
{ |
lock_super(mnt->mnt_sb); |
pop_ctxt(save); |
} |
|
static int izo_simple_mkdir(struct dentry *dir, char *name, int mode) |
{ |
struct dentry *dchild; |
int err; |
ENTRY; |
|
dchild = lookup_one_len(name, dir, strlen(name)); |
if (IS_ERR(dchild)) { |
EXIT; |
return PTR_ERR(dchild); |
} |
|
if (dchild->d_inode) { |
dput(dchild); |
EXIT; |
return -EEXIST; |
} |
|
err = vfs_mkdir(dir->d_inode, dchild, mode); |
dput(dchild); |
|
EXIT; |
return err; |
} |
|
static int izo_simple_symlink(struct dentry *dir, char *name, char *tgt) |
{ |
struct dentry *dchild; |
int err; |
ENTRY; |
|
dchild = lookup_one_len(name, dir, strlen(name)); |
if (IS_ERR(dchild)) { |
EXIT; |
return PTR_ERR(dchild); |
} |
|
if (dchild->d_inode) { |
dput(dchild); |
EXIT; |
return -EEXIST; |
} |
|
err = vfs_symlink(dir->d_inode, dchild, tgt); |
dput(dchild); |
|
EXIT; |
return err; |
} |
|
/* |
* run set_fsetroot in chroot environment |
*/ |
int presto_set_fsetroot_from_ioc(struct dentry *root, char *fsetname, |
unsigned int flags) |
{ |
int rc; |
struct presto_cache *cache; |
struct vfsmount *mnt; |
struct run_ctxt save; |
|
if (root != root->d_inode->i_sb->s_root) { |
CERROR ("IOC_SET_FSET must be called on mount point\n"); |
return -ENODEV; |
} |
|
cache = presto_get_cache(root->d_inode); |
mnt = cache->cache_vfsmount; |
if (!mnt) { |
EXIT; |
return -ENOMEM; |
} |
|
izo_setup_ctxt(root, mnt, &save); |
rc = presto_set_fsetroot(root, fsetname, flags); |
izo_cleanup_ctxt(mnt, &save); |
return rc; |
} |
|
/* XXX: this function should detect if fsetname is already in use for |
the cache under root |
*/ |
int izo_prepare_fileset(struct dentry *root, char *fsetname) |
{ |
int err; |
struct dentry *dotizo = NULL, *fsetdir = NULL, *dotiopen = NULL; |
struct presto_cache *cache; |
struct vfsmount *mnt; |
struct run_ctxt save; |
|
cache = presto_get_cache(root->d_inode); |
mnt = cache->cache_vfsmount = izo_alloc_vfsmnt(); |
if (!mnt) { |
EXIT; |
return -ENOMEM; |
} |
|
if (!fsetname) |
fsetname = "rootfset"; |
|
izo_setup_ctxt(root, mnt, &save); |
|
err = izo_simple_mkdir(root, ".intermezzo", 0755); |
CDEBUG(D_CACHE, "mkdir on .intermezzo err %d\n", err); |
|
err = izo_simple_mkdir(root, "..iopen..", 0755); |
CDEBUG(D_CACHE, "mkdir on ..iopen.. err %d\n", err); |
|
dotiopen = lookup_one_len("..iopen..", root, strlen("..iopen..")); |
if (IS_ERR(dotiopen)) { |
EXIT; |
goto out; |
} |
dotiopen->d_inode->i_op = &presto_dir_iops; |
dput(dotiopen); |
|
|
dotizo = lookup_one_len(".intermezzo", root, strlen(".intermezzo")); |
if (IS_ERR(dotizo)) { |
EXIT; |
goto out; |
} |
|
|
err = izo_simple_mkdir(dotizo, fsetname, 0755); |
CDEBUG(D_CACHE, "mkdir err %d\n", err); |
|
/* XXX find the dentry of the root of the fileset (root for now) */ |
fsetdir = lookup_one_len(fsetname, dotizo, strlen(fsetname)); |
if (IS_ERR(fsetdir)) { |
EXIT; |
goto out; |
} |
|
err = izo_simple_symlink(fsetdir, "ROOT", "../.."); |
|
/* XXX read flags from flags file */ |
err = presto_set_fsetroot(root, fsetname, 0); |
CDEBUG(D_CACHE, "set_fsetroot err %d\n", err); |
|
out: |
if (dotizo && !IS_ERR(dotizo)) |
dput(dotizo); |
if (fsetdir && !IS_ERR(fsetdir)) |
dput(fsetdir); |
izo_cleanup_ctxt(mnt, &save); |
return err; |
} |
|
int izo_set_fileid(struct file *dir, struct izo_ioctl_data *data) |
{ |
int rc = 0; |
struct presto_cache *cache; |
struct vfsmount *mnt; |
struct run_ctxt save; |
struct nameidata nd; |
struct dentry *dentry; |
struct presto_dentry_data *dd; |
struct dentry *root; |
char *buf = NULL; |
|
ENTRY; |
|
|
root = dir->f_dentry; |
|
/* actually, needs to be called on ROOT of fset, not mount point |
if (root != root->d_inode->i_sb->s_root) { |
CERROR ("IOC_SET_FSET must be called on mount point\n"); |
return -ENODEV; |
} |
*/ |
|
cache = presto_get_cache(root->d_inode); |
mnt = cache->cache_vfsmount; |
if (!mnt) { |
EXIT; |
return -ENOMEM; |
} |
|
izo_setup_ctxt(root, mnt, &save); |
|
PRESTO_ALLOC(buf, data->ioc_plen1); |
if (!buf) { |
rc = -ENOMEM; |
EXIT; |
goto out; |
} |
if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) { |
rc = -EFAULT; |
EXIT; |
goto out; |
} |
|
rc = presto_walk(buf, &nd); |
if (rc) { |
CERROR("Unable to open: %s\n", buf); |
EXIT; |
goto out; |
} |
dentry = nd.dentry; |
if (!dentry) { |
CERROR("no dentry!\n"); |
rc = -EINVAL; |
EXIT; |
goto out_close; |
} |
dd = presto_d2d(dentry); |
if (!dd) { |
CERROR("no dentry_data!\n"); |
rc = -EINVAL; |
EXIT; |
goto out_close; |
} |
|
CDEBUG(D_FILE,"de:%p dd:%p\n", dentry, dd); |
|
if (dd->remote_ino != 0) { |
CERROR("remote_ino already set? %Lx:%Lx\n", dd->remote_ino, |
dd->remote_generation); |
rc = 0; |
EXIT; |
goto out_close; |
} |
|
|
CDEBUG(D_FILE,"setting %p %p, %s to %Lx:%Lx\n", dentry, dd, |
buf, data->ioc_ino, |
data->ioc_generation); |
dd->remote_ino = data->ioc_ino; |
dd->remote_generation = data->ioc_generation; |
|
EXIT; |
out_close: |
path_release(&nd); |
out: |
if (buf) |
PRESTO_FREE(buf, data->ioc_plen1); |
izo_cleanup_ctxt(mnt, &save); |
return rc; |
} |
/Makefile
0,0 → 1,15
# |
# Makefile 1.00 Peter Braam <braam@clusterfs.com> |
# |
|
O_TARGET := intermezzo.o |
|
obj-y := cache.o dcache.o dir.o ext_attr.o file.o fileset.o inode.o \ |
journal.o journal_ext2.o journal_ext3.o journal_obdfs.o \ |
journal_reiserfs.o journal_tmpfs.o journal_xfs.o kml_reint.o \ |
kml_unpack.o methods.o presto.o psdev.o replicator.o super.o \ |
sysctl.o upcall.o vfs.o |
|
obj-m := $(O_TARGET) |
|
include $(TOPDIR)/Rules.make |