OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /or1k/tags/LINUX_2_4_26_OR32/linux/linux-2.4/fs/intermezzo
    from Rev 1279 to Rev 1765
    Reverse comparison

Rev 1279 → Rev 1765

/super.c
0,0 → 1,402
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
* Copyright (C) 2000 Stelias Computing, Inc.
* Copyright (C) 2000 Red Hat, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* presto's super.c
*/
 
static char rcsid[] __attribute ((unused)) = "$Id: super.c,v 1.1.1.1 2004-04-15 01:09:15 phoenix Exp $";
#define INTERMEZZO_VERSION "$Revision: 1.1.1.1 $"
 
#include <stdarg.h>
 
#include <asm/bitops.h>
#include <asm/uaccess.h>
#include <asm/system.h>
 
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/ext2_fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/locks.h>
#include <linux/blkdev.h>
#include <linux/init.h>
#include <linux/devfs_fs_kernel.h>
#define __NO_VERSION__
#include <linux/module.h>
 
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
 
#ifdef PRESTO_DEBUG
long presto_vmemory = 0;
long presto_kmemory = 0;
#endif
 
/* returns an allocated string, copied out from data if opt is found */
static char *opt_read(const char *opt, char *data)
{
char *value;
char *retval;
 
CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
if ( strncmp(opt, data, strlen(opt)) )
return NULL;
 
if ( (value = strchr(data, '=')) == NULL )
return NULL;
 
value++;
PRESTO_ALLOC(retval, strlen(value) + 1);
if ( !retval ) {
CERROR("InterMezzo: Out of memory!\n");
return NULL;
}
 
strcpy(retval, value);
CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
return retval;
}
 
static void opt_store(char **dst, char *opt)
{
if (!dst)
CERROR("intermezzo: store_opt, error dst == NULL\n");
 
if (*dst)
PRESTO_FREE(*dst, strlen(*dst) + 1);
*dst = opt;
}
 
static void opt_set_default(char **dst, char *defval)
{
if (!dst)
CERROR("intermezzo: store_opt, error dst == NULL\n");
 
if (*dst)
PRESTO_FREE(*dst, strlen(*dst) + 1);
if (defval) {
char *def_alloced;
PRESTO_ALLOC(def_alloced, strlen(defval)+1);
if (!def_alloced) {
CERROR("InterMezzo: Out of memory!\n");
return ;
}
strcpy(def_alloced, defval);
*dst = def_alloced;
}
}
 
 
/* Find the options for InterMezzo in "options", saving them into the
* passed pointers. If the pointer is null, the option is discarded.
* Copy out all non-InterMezzo options into cache_data (to be passed
* to the read_super operation of the cache). The return value will
* be a pointer to the end of the cache_data.
*/
static char *presto_options(struct super_block *sb,
char *options, char *cache_data,
char **cache_type, char **fileset,
char **channel)
{
char *this_char;
char *cache_data_end = cache_data;
 
/* set the defaults */
if (strcmp(sb->s_type->name, "intermezzo") == 0)
opt_set_default(cache_type, "ext3");
else
opt_set_default(cache_type, "tmpfs");
if (!options || !cache_data)
return cache_data_end;
 
 
CDEBUG(D_SUPER, "parsing options\n");
for (this_char = strtok (options, ",");
this_char != NULL;
this_char = strtok (NULL, ",")) {
char *opt;
CDEBUG(D_SUPER, "this_char %s\n", this_char);
 
if ( (opt = opt_read("fileset", this_char)) ) {
opt_store(fileset, opt);
continue;
}
if ( (opt = opt_read("cache_type", this_char)) ) {
opt_store(cache_type, opt);
continue;
}
if ( (opt = opt_read("channel", this_char)) ) {
opt_store(channel, opt);
continue;
}
 
cache_data_end +=
sprintf(cache_data_end, "%s%s",
cache_data_end != cache_data ? ",":"",
this_char);
}
 
return cache_data_end;
}
 
static int presto_set_channel(struct presto_cache *cache, char *channel)
{
int minor;
 
ENTRY;
if (!channel) {
minor = izo_psdev_get_free_channel();
} else {
minor = simple_strtoul(channel, NULL, 0);
}
if (minor < 0 || minor >= MAX_CHANNEL) {
CERROR("all channels in use or channel too large %d\n",
minor);
return -EINVAL;
}
cache->cache_psdev = &(izo_channels[minor]);
list_add(&cache->cache_channel_list,
&cache->cache_psdev->uc_cache_list);
 
EXIT;
return minor;
}
 
/* We always need to remove the presto options before passing
mount options to cache FS */
struct super_block * presto_read_super(struct super_block * sb,
void * data, int silent)
{
struct file_system_type *fstype;
struct presto_cache *cache = NULL;
char *cache_data = NULL;
char *cache_data_end;
char *cache_type = NULL;
char *fileset = NULL;
char *channel = NULL;
int err;
unsigned int minor;
 
ENTRY;
 
/* reserve space for the cache's data */
PRESTO_ALLOC(cache_data, PAGE_SIZE);
if ( !cache_data ) {
CERROR("presto_read_super: Cannot allocate data page.\n");
EXIT;
goto out_err;
}
 
/* read and validate options */
cache_data_end = presto_options(sb, data, cache_data, &cache_type,
&fileset, &channel);
 
/* was there anything for the cache filesystem in the data? */
if (cache_data_end == cache_data) {
PRESTO_FREE(cache_data, PAGE_SIZE);
cache_data = NULL;
} else {
CDEBUG(D_SUPER, "cache_data at %p is: %s\n", cache_data,
cache_data);
}
 
/* set up the cache */
cache = presto_cache_init();
if ( !cache ) {
CERROR("presto_read_super: failure allocating cache.\n");
EXIT;
goto out_err;
}
cache->cache_type = cache_type;
 
/* link cache to channel */
minor = presto_set_channel(cache, channel);
if (minor < 0) {
EXIT;
goto out_err;
}
 
CDEBUG(D_SUPER, "Presto: type=%s, fset=%s, dev= %d, flags %x\n",
cache_type, fileset?fileset:"NULL", minor, cache->cache_flags);
 
MOD_INC_USE_COUNT;
 
/* get the filter for the cache */
fstype = get_fs_type(cache_type);
cache->cache_filter = filter_get_filter_fs((const char *)cache_type);
if ( !fstype || !cache->cache_filter) {
CERROR("Presto: unrecognized fs type or cache type\n");
MOD_DEC_USE_COUNT;
EXIT;
goto out_err;
}
 
/* can we in fact mount the cache */
if ((fstype->fs_flags & FS_REQUIRES_DEV) && !sb->s_bdev) {
CERROR("filesystem \"%s\" requires a valid block device\n",
cache_type);
MOD_DEC_USE_COUNT;
EXIT;
goto out_err;
}
 
sb = fstype->read_super(sb, cache_data, silent);
 
/* this might have been freed above */
if (cache_data) {
PRESTO_FREE(cache_data, PAGE_SIZE);
cache_data = NULL;
}
 
if ( !sb ) {
CERROR("InterMezzo: cache mount failure.\n");
MOD_DEC_USE_COUNT;
EXIT;
goto out_err;
}
 
cache->cache_sb = sb;
cache->cache_root = dget(sb->s_root);
 
/* we now know the dev of the cache: hash the cache */
presto_cache_add(cache, sb->s_dev);
err = izo_prepare_fileset(sb->s_root, fileset);
 
filter_setup_journal_ops(cache->cache_filter, cache->cache_type);
 
/* make sure we have our own super operations: sb
still contains the cache operations */
filter_setup_super_ops(cache->cache_filter, sb->s_op,
&presto_super_ops);
sb->s_op = filter_c2usops(cache->cache_filter);
 
/* get izo directory operations: sb->s_root->d_inode exists now */
filter_setup_dir_ops(cache->cache_filter, sb->s_root->d_inode,
&presto_dir_iops, &presto_dir_fops);
filter_setup_dentry_ops(cache->cache_filter, sb->s_root->d_op,
&presto_dentry_ops);
sb->s_root->d_inode->i_op = filter_c2udiops(cache->cache_filter);
sb->s_root->d_inode->i_fop = filter_c2udfops(cache->cache_filter);
sb->s_root->d_op = filter_c2udops(cache->cache_filter);
 
EXIT;
return sb;
 
out_err:
CDEBUG(D_SUPER, "out_err called\n");
if (cache)
PRESTO_FREE(cache, sizeof(struct presto_cache));
if (cache_data)
PRESTO_FREE(cache_data, PAGE_SIZE);
if (fileset)
PRESTO_FREE(fileset, strlen(fileset) + 1);
if (channel)
PRESTO_FREE(channel, strlen(channel) + 1);
if (cache_type)
PRESTO_FREE(cache_type, strlen(cache_type) + 1);
 
CDEBUG(D_MALLOC, "mount error exit: kmem %ld, vmem %ld\n",
presto_kmemory, presto_vmemory);
return NULL;
}
 
 
 
#ifdef PRESTO_DEVEL
static DECLARE_FSTYPE(presto_fs_type, "izo", presto_read_super, FS_REQUIRES_DEV);
static DECLARE_FSTYPE(vpresto_fs_type, "vintermezzo", presto_read_super, FS_LITTER);
#else
static DECLARE_FSTYPE(vpresto_fs_type, "vintermezzo", presto_read_super, FS_LITTER);
static DECLARE_FSTYPE(presto_fs_type, "intermezzo", presto_read_super, FS_REQUIRES_DEV);
#endif
 
 
 
int __init init_intermezzo_fs(void)
{
int status;
 
printk(KERN_INFO "InterMezzo Kernel/Intersync communications " INTERMEZZO_VERSION
" info@clusterfs.com\n");
 
status = presto_psdev_init();
if ( status ) {
CERROR("Problem (%d) in init_intermezzo_psdev\n", status);
return status;
}
 
status = init_intermezzo_sysctl();
if (status) {
CERROR("presto: failed in init_intermezzo_sysctl!\n");
}
 
presto_cache_init_hash();
 
if (!presto_init_ddata_cache()) {
CERROR("presto out of memory!\n");
return -ENOMEM;
}
 
status = register_filesystem(&presto_fs_type);
if (status) {
CERROR("presto: failed in register_filesystem!\n");
}
status = register_filesystem(&vpresto_fs_type);
if (status) {
CERROR("vpresto: failed in register_filesystem!\n");
}
return status;
}
 
void __exit exit_intermezzo_fs(void)
{
int err;
 
ENTRY;
 
if ( (err = unregister_filesystem(&presto_fs_type)) != 0 ) {
CERROR("presto: failed to unregister filesystem\n");
}
if ( (err = unregister_filesystem(&vpresto_fs_type)) != 0 ) {
CERROR("vpresto: failed to unregister filesystem\n");
}
 
presto_psdev_cleanup();
cleanup_intermezzo_sysctl();
presto_cleanup_ddata_cache();
CERROR("after cleanup: kmem %ld, vmem %ld\n",
presto_kmemory, presto_vmemory);
}
 
 
MODULE_AUTHOR("Cluster Filesystems Inc. <info@clusterfs.com>");
MODULE_DESCRIPTION("InterMezzo Kernel/Intersync communications " INTERMEZZO_VERSION);
MODULE_LICENSE("GPL");
 
module_init(init_intermezzo_fs)
module_exit(exit_intermezzo_fs)
/vfs.c
0,0 → 1,2465
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2001, 2002 Cluster File Systems, Inc.
* Copyright (C) 2000 Stelias Computing, Inc.
* Copyright (C) 2000 Red Hat, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* vfs.c
*
* This file implements kernel downcalls from lento.
*
* Author: Rob Simmonds <simmonds@stelias.com>
* Andreas Dilger <adilger@stelias.com>
* Copyright (C) 2000 Stelias Computing Inc
* Copyright (C) 2000 Red Hat Inc.
*
* Extended attribute support
* Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc.
*
* This code is based on code from namei.c in the linux file system;
* see copyright notice below.
*/
 
/** namei.c copyright **/
 
/*
* linux/fs/namei.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*/
 
/*
* Some corrections by tytso.
*/
 
/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
* lookup logic.
*/
 
/** end of namei.c copyright **/
 
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/smp_lock.h>
#include <linux/quotaops.h>
 
#include <asm/uaccess.h>
#include <asm/unaligned.h>
#include <asm/semaphore.h>
#include <asm/pgtable.h>
 
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/blk.h>
 
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
 
#ifdef CONFIG_FS_EXT_ATTR
# include <linux/ext_attr.h>
 
# ifdef CONFIG_FS_POSIX_ACL
# include <linux/posix_acl.h>
# endif
#endif
 
extern struct inode_operations presto_sym_iops;
 
/* Write the last_rcvd values to the last_rcvd file. We don't know what the
* UUID or last_ctime values are, so we have to read from the file first
* (sigh).
* exported for branch_reinter in kml_reint.c*/
int presto_write_last_rcvd(struct rec_info *recinfo,
struct presto_file_set *fset,
struct lento_vfs_context *info)
{
int rc;
struct izo_rcvd_rec rcvd_rec;
 
ENTRY;
 
memset(&rcvd_rec, 0, sizeof(rcvd_rec));
memcpy(rcvd_rec.lr_uuid, info->uuid, sizeof(rcvd_rec.lr_uuid));
rcvd_rec.lr_remote_recno = HTON__u64(info->recno);
rcvd_rec.lr_remote_offset = HTON__u64(info->kml_offset);
rcvd_rec.lr_local_recno = HTON__u64(recinfo->recno);
rcvd_rec.lr_local_offset = HTON__u64(recinfo->offset + recinfo->size);
 
rc = izo_rcvd_write(fset, &rcvd_rec);
if (rc < 0) {
/* izo_rcvd_write returns negative errors and non-negative
* offsets */
CERROR("InterMezzo: izo_rcvd_write failed: %d\n", rc);
EXIT;
return rc;
}
EXIT;
return 0;
}
 
/*
* It's inline, so penalty for filesystems that don't use sticky bit is
* minimal.
*/
static inline int check_sticky(struct inode *dir, struct inode *inode)
{
if (!(dir->i_mode & S_ISVTX))
return 0;
if (inode->i_uid == current->fsuid)
return 0;
if (dir->i_uid == current->fsuid)
return 0;
return !capable(CAP_FOWNER);
}
 
/* from linux/fs/namei.c */
static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
{
int error;
if (!victim->d_inode || victim->d_parent->d_inode != dir)
return -ENOENT;
error = permission(dir,MAY_WRITE | MAY_EXEC);
if (error)
return error;
if (IS_APPEND(dir))
return -EPERM;
if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
IS_IMMUTABLE(victim->d_inode))
return -EPERM;
if (isdir) {
if (!S_ISDIR(victim->d_inode->i_mode))
return -ENOTDIR;
if (IS_ROOT(victim))
return -EBUSY;
} else if (S_ISDIR(victim->d_inode->i_mode))
return -EISDIR;
return 0;
}
 
/* from linux/fs/namei.c */
static inline int may_create(struct inode *dir, struct dentry *child) {
if (child->d_inode)
return -EEXIST;
if (IS_DEADDIR(dir))
return -ENOENT;
return permission(dir,MAY_WRITE | MAY_EXEC);
}
 
#ifdef PRESTO_DEBUG
/* The loop_discard_io() function is available via a kernel patch to the
* loop block device. It "works" by accepting writes, but throwing them
* away, rather than trying to write them to disk. The old method worked
* by setting the underlying device read-only, but that has the problem
* that dirty buffers are kept in memory, and ext3 didn't like that at all.
*/
#ifdef CONFIG_LOOP_DISCARD
#define BLKDEV_FAIL(dev,fail) loop_discard_io(dev,fail)
#else
#define BLKDEV_FAIL(dev,fail) set_device_ro(dev, 1)
#endif
 
/* If a breakpoint has been set via /proc/sys/intermezzo/intermezzoX/errorval,
* that is the same as "value", the underlying device will "fail" now.
*/
inline void presto_debug_fail_blkdev(struct presto_file_set *fset,
unsigned long value)
{
int minor = presto_f2m(fset);
int errorval = izo_channels[minor].uc_errorval;
kdev_t dev = fset->fset_dentry->d_inode->i_dev;
 
if (errorval && errorval == (long)value && !is_read_only(dev)) {
CDEBUG(D_SUPER, "setting device %s read only\n", kdevname(dev));
BLKDEV_FAIL(dev, 1);
izo_channels[minor].uc_errorval = -dev;
}
}
#else
#define presto_debug_fail_blkdev(dev,value) do {} while (0)
#endif
 
 
static inline int presto_do_kml(struct lento_vfs_context *info,
struct dentry *dentry)
{
if ( ! (info->flags & LENTO_FL_KML) )
return 0;
if ( presto_chk(dentry, PRESTO_DONT_JOURNAL) )
return 0;
return 1;
}
 
static inline int presto_do_rcvd(struct lento_vfs_context *info,
struct dentry *dentry)
{
if ( ! (info->flags & LENTO_FL_EXPECT) )
return 0;
if ( presto_chk(dentry, PRESTO_DONT_JOURNAL) )
return 0;
return 1;
}
 
 
/* XXX fixme: this should not fail, all these dentries are in memory
when _we_ call this */
int presto_settime(struct presto_file_set *fset,
struct dentry *newobj,
struct dentry *parent,
struct dentry *target,
struct lento_vfs_context *ctx,
int valid)
{
int error = 0;
struct dentry *dentry;
struct inode *inode;
struct inode_operations *iops;
struct iattr iattr;
 
ENTRY;
if (ctx->flags & LENTO_FL_IGNORE_TIME ) {
EXIT;
return 0;
}
 
iattr.ia_ctime = ctx->updated_time;
iattr.ia_mtime = ctx->updated_time;
iattr.ia_valid = valid;
 
while (1) {
if (parent && ctx->flags & LENTO_FL_TOUCH_PARENT) {
dentry = parent;
parent = NULL;
} else if (newobj && ctx->flags & LENTO_FL_TOUCH_NEWOBJ) {
dentry = newobj;
newobj = NULL;
} else if (target) {
dentry = target;
target = NULL;
} else
break;
 
inode = dentry->d_inode;
 
error = -EROFS;
if (IS_RDONLY(inode)) {
EXIT;
return -EROFS;
}
 
if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
EXIT;
return -EPERM;
}
 
error = -EPERM;
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
if (!iops) {
EXIT;
return error;
}
 
if (iops->setattr != NULL)
error = iops->setattr(dentry, &iattr);
else {
error = 0;
inode_setattr(dentry->d_inode, &iattr);
}
}
EXIT;
return error;
}
 
void izo_get_rollback_data(struct inode *inode, struct izo_rollback_data *rb)
{
rb->rb_mode = (__u32)inode->i_mode;
rb->rb_rdev = (__u32)inode->i_rdev;
rb->rb_uid = (__u64)inode->i_uid;
rb->rb_gid = (__u64)inode->i_gid;
}
 
 
int presto_do_close(struct presto_file_set *fset, struct file *file)
{
struct rec_info rec;
int rc = -ENOSPC;
void *handle;
struct inode *inode = file->f_dentry->d_inode;
struct presto_file_data *fdata =
(struct presto_file_data *)file->private_data;
 
ENTRY;
presto_getversion(&fdata->fd_info.remote_version, inode);
 
rc = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH);
if (rc) {
EXIT;
return rc;
}
 
handle = presto_trans_start(fset, file->f_dentry->d_inode,
KML_OPCODE_RELEASE);
if ( IS_ERR(handle) ) {
CERROR("presto_release: no space for transaction\n");
return rc;
}
 
if (fdata->fd_info.flags & LENTO_FL_KML)
rc = presto_journal_close(&rec, fset, file, file->f_dentry,
&fdata->fd_version,
&fdata->fd_info.remote_version);
if (rc) {
CERROR("presto_close: cannot journal close\n");
goto out;
}
 
if (fdata->fd_info.flags & LENTO_FL_EXPECT)
rc = presto_write_last_rcvd(&rec, fset, &fdata->fd_info);
 
if (rc) {
CERROR("presto_close: cannot journal last_rcvd\n");
goto out;
}
presto_trans_commit(fset, handle);
/* cancel the LML record */
handle = presto_trans_start(fset, inode, KML_OPCODE_WRITE);
if ( IS_ERR(handle) ) {
CERROR("presto_release: no space for clear\n");
return -ENOSPC;
}
 
rc = presto_clear_lml_close(fset, fdata->fd_lml_offset);
if (rc < 0 ) {
CERROR("presto_close: cannot journal close\n");
goto out;
}
presto_truncate_lml(fset);
 
out:
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
presto_trans_commit(fset, handle);
EXIT;
return rc;
}
 
int presto_do_setattr(struct presto_file_set *fset, struct dentry *dentry,
struct iattr *iattr, struct lento_vfs_context *info)
{
struct rec_info rec;
struct inode *inode = dentry->d_inode;
struct inode_operations *iops;
int error;
struct presto_version old_ver, new_ver;
struct izo_rollback_data rb;
void *handle;
loff_t old_size=inode->i_size;
 
ENTRY;
error = -EROFS;
if (IS_RDONLY(inode)) {
EXIT;
return -EROFS;
}
 
if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
EXIT;
return -EPERM;
}
 
presto_getversion(&old_ver, dentry->d_inode);
izo_get_rollback_data(dentry->d_inode, &rb);
error = -EPERM;
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
 
error = presto_reserve_space(fset->fset_cache, 2*PRESTO_REQHIGH);
if (error) {
EXIT;
return error;
}
 
if (iattr->ia_valid & ATTR_SIZE) {
if (izo_mark_dentry(dentry, ~PRESTO_DATA, 0, NULL) != 0)
CERROR("izo_mark_dentry(inode %ld, ~PRESTO_DATA) "
"failed\n", dentry->d_inode->i_ino);
handle = presto_trans_start(fset, dentry->d_inode,
KML_OPCODE_TRUNC);
} else {
handle = presto_trans_start(fset, dentry->d_inode,
KML_OPCODE_SETATTR);
}
 
if ( IS_ERR(handle) ) {
CERROR("presto_do_setattr: no space for transaction\n");
presto_release_space(fset->fset_cache, 2*PRESTO_REQHIGH);
return -ENOSPC;
}
 
if (dentry->d_inode && iops && iops->setattr) {
error = iops->setattr(dentry, iattr);
} else {
error = inode_change_ok(dentry->d_inode, iattr);
if (!error)
inode_setattr(inode, iattr);
}
 
if (!error && (iattr->ia_valid & ATTR_SIZE))
vmtruncate(inode, iattr->ia_size);
 
if (error) {
EXIT;
goto exit;
}
 
presto_debug_fail_blkdev(fset, KML_OPCODE_SETATTR | 0x10);
 
if ( presto_do_kml(info, dentry) ) {
if ((iattr->ia_valid & ATTR_SIZE) && (old_size != inode->i_size)) {
struct file file;
/* Journal a close whenever we see a potential truncate
* At the receiving end, lento should explicitly remove
* ATTR_SIZE from the list of valid attributes */
presto_getversion(&new_ver, inode);
file.private_data = NULL;
file.f_dentry = dentry;
error = presto_journal_close(&rec, fset, &file, dentry,
&old_ver, &new_ver);
}
 
if (!error)
error = presto_journal_setattr(&rec, fset, dentry,
&old_ver, &rb, iattr);
}
 
presto_debug_fail_blkdev(fset, KML_OPCODE_SETATTR | 0x20);
if ( presto_do_rcvd(info, dentry) )
error = presto_write_last_rcvd(&rec, fset, info);
 
presto_debug_fail_blkdev(fset, KML_OPCODE_SETATTR | 0x30);
 
EXIT;
exit:
presto_release_space(fset->fset_cache, 2*PRESTO_REQHIGH);
presto_trans_commit(fset, handle);
return error;
}
 
int lento_setattr(const char *name, struct iattr *iattr,
struct lento_vfs_context *info)
{
struct nameidata nd;
struct dentry *dentry;
struct presto_file_set *fset;
int error;
#ifdef CONFIG_FS_POSIX_ACL
int (*set_posix_acl)(struct inode *, int type, posix_acl_t *)=NULL;
#endif
 
ENTRY;
CDEBUG(D_PIOCTL,"name %s, valid %#x, mode %#o, uid %d, gid %d, size %Ld\n",
name, iattr->ia_valid, iattr->ia_mode, iattr->ia_uid,
iattr->ia_gid, iattr->ia_size);
CDEBUG(D_PIOCTL, "atime %#lx, mtime %#lx, ctime %#lx, attr_flags %#x\n",
iattr->ia_atime, iattr->ia_mtime, iattr->ia_ctime,
iattr->ia_attr_flags);
CDEBUG(D_PIOCTL, "offset %d, recno %d, flags %#x\n",
info->slot_offset, info->recno, info->flags);
 
lock_kernel();
error = presto_walk(name, &nd);
if (error) {
EXIT;
goto exit;
}
dentry = nd.dentry;
fset = presto_fset(dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
EXIT;
goto exit_lock;
}
 
/* NOTE: this prevents us from changing the filetype on setattr,
* as we normally only want to change permission bits.
* If this is not correct, then we need to fix the perl code
* to always send the file type OR'ed with the permission.
*/
if (iattr->ia_valid & ATTR_MODE) {
int set_mode = iattr->ia_mode;
iattr->ia_mode = (iattr->ia_mode & S_IALLUGO) |
(dentry->d_inode->i_mode & ~S_IALLUGO);
CDEBUG(D_PIOCTL, "chmod: orig %#o, set %#o, result %#o\n",
dentry->d_inode->i_mode, set_mode, iattr->ia_mode);
#ifdef CONFIG_FS_POSIX_ACL
/* ACl code interacts badly with setattr
* since it tries to modify the ACL using
* set_ext_attr which recurses back into presto.
* This only happens if ATTR_MODE is set.
* Here we are doing a "forced" mode set
* (initiated by lento), so we disable the
* set_posix_acl operation which
* prevents such recursion. -SHP
*
* This will probably still be required when native
* acl journalling is in place.
*/
set_posix_acl=dentry->d_inode->i_op->set_posix_acl;
dentry->d_inode->i_op->set_posix_acl=NULL;
#endif
}
 
error = presto_do_setattr(fset, dentry, iattr, info);
 
if (info->flags & LENTO_FL_SET_DDFILEID) {
struct presto_dentry_data *dd = presto_d2d(dentry);
if (dd) {
dd->remote_ino = info->remote_ino;
dd->remote_generation = info->remote_generation;
}
}
 
#ifdef CONFIG_FS_POSIX_ACL
/* restore the inode_operations if we changed them*/
if (iattr->ia_valid & ATTR_MODE)
dentry->d_inode->i_op->set_posix_acl=set_posix_acl;
#endif
 
 
EXIT;
exit_lock:
path_release(&nd);
exit:
unlock_kernel();
return error;
}
 
int presto_do_create(struct presto_file_set *fset, struct dentry *dir,
struct dentry *dentry, int mode,
struct lento_vfs_context *info)
{
struct rec_info rec;
int error;
struct presto_version tgt_dir_ver, new_file_ver;
struct inode_operations *iops;
void *handle;
 
ENTRY;
mode &= S_IALLUGO;
mode |= S_IFREG;
 
down(&dir->d_inode->i_zombie);
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH);
if (error) {
EXIT;
up(&dir->d_inode->i_zombie);
return error;
}
 
error = may_create(dir->d_inode, dentry);
if (error) {
EXIT;
goto exit_pre_lock;
}
 
error = -EPERM;
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
if (!iops->create) {
EXIT;
goto exit_pre_lock;
}
 
presto_getversion(&tgt_dir_ver, dir->d_inode);
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_CREATE);
if ( IS_ERR(handle) ) {
EXIT;
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
CERROR("presto_do_create: no space for transaction\n");
error=-ENOSPC;
goto exit_pre_lock;
}
DQUOT_INIT(dir->d_inode);
lock_kernel();
error = iops->create(dir->d_inode, dentry, mode);
if (error) {
EXIT;
goto exit_lock;
}
 
if (dentry->d_inode) {
struct presto_cache *cache = fset->fset_cache;
/* was this already done? */
presto_set_ops(dentry->d_inode, cache->cache_filter);
 
filter_setup_dentry_ops(cache->cache_filter,
dentry->d_op,
&presto_dentry_ops);
dentry->d_op = filter_c2udops(cache->cache_filter);
 
/* if Lento creates this file, we won't have data */
if ( ISLENTO(presto_c2m(cache)) ) {
presto_set(dentry, PRESTO_ATTR);
} else {
presto_set(dentry, PRESTO_ATTR | PRESTO_DATA);
}
}
 
info->flags |= LENTO_FL_TOUCH_PARENT;
error = presto_settime(fset, NULL, dir, dentry,
info, ATTR_CTIME | ATTR_MTIME);
if (error) {
EXIT;
goto exit_lock;
}
 
presto_debug_fail_blkdev(fset, KML_OPCODE_CREATE | 0x10);
 
if ( presto_do_kml(info, dentry) ) {
presto_getversion(&new_file_ver, dentry->d_inode);
error = presto_journal_create(&rec, fset, dentry, &tgt_dir_ver,
&new_file_ver,
dentry->d_inode->i_mode);
}
 
presto_debug_fail_blkdev(fset, KML_OPCODE_CREATE | 0x20);
 
if ( presto_do_rcvd(info, dentry) )
error = presto_write_last_rcvd(&rec, fset, info);
 
presto_debug_fail_blkdev(fset, KML_OPCODE_CREATE | 0x30);
 
/* add inode dentry */
if (fset->fset_cache->cache_filter->o_trops->tr_add_ilookup ) {
struct dentry *d;
d = fset->fset_cache->cache_filter->o_trops->tr_add_ilookup
(dir->d_inode->i_sb->s_root, dentry);
}
 
EXIT;
 
exit_lock:
unlock_kernel();
presto_trans_commit(fset, handle);
exit_pre_lock:
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
up(&dir->d_inode->i_zombie);
return error;
}
 
/* from namei.c */
static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
{
struct dentry *dentry;
 
down(&nd->dentry->d_inode->i_sem);
dentry = ERR_PTR(-EEXIST);
if (nd->last_type != LAST_NORM)
goto fail;
dentry = lookup_hash(&nd->last, nd->dentry);
if (IS_ERR(dentry))
goto fail;
if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
goto enoent;
return dentry;
enoent:
dput(dentry);
dentry = ERR_PTR(-ENOENT);
fail:
return dentry;
}
 
int lento_create(const char *name, int mode, struct lento_vfs_context *info)
{
int error;
struct nameidata nd;
char * pathname;
struct dentry *dentry;
struct presto_file_set *fset;
 
ENTRY;
pathname = getname(name);
error = PTR_ERR(pathname);
if (IS_ERR(pathname)) {
EXIT;
goto exit;
}
 
/* this looks up the parent */
// if (path_init(pathname, LOOKUP_FOLLOW | LOOKUP_POSITIVE, &nd))
if (path_init(pathname, LOOKUP_PARENT, &nd))
error = path_walk(pathname, &nd);
if (error) {
EXIT;
goto exit;
}
dentry = lookup_create(&nd, 0);
error = PTR_ERR(dentry);
if (IS_ERR(dentry)) {
EXIT;
goto exit_lock;
}
 
fset = presto_fset(dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
EXIT;
goto exit_lock;
}
error = presto_do_create(fset, dentry->d_parent, dentry, (mode&S_IALLUGO)|S_IFREG,
info);
 
EXIT;
 
exit_lock:
path_release (&nd);
dput(dentry);
up(&dentry->d_parent->d_inode->i_sem);
putname(pathname);
exit:
return error;
}
 
int presto_do_link(struct presto_file_set *fset, struct dentry *old_dentry,
struct dentry *dir, struct dentry *new_dentry,
struct lento_vfs_context *info)
{
struct rec_info rec;
struct inode *inode;
int error;
struct inode_operations *iops;
struct presto_version tgt_dir_ver;
struct presto_version new_link_ver;
void *handle;
 
down(&dir->d_inode->i_zombie);
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH);
if (error) {
EXIT;
up(&dir->d_inode->i_zombie);
return error;
}
error = -ENOENT;
inode = old_dentry->d_inode;
if (!inode)
goto exit_lock;
 
error = may_create(dir->d_inode, new_dentry);
if (error)
goto exit_lock;
 
error = -EXDEV;
if (dir->d_inode->i_dev != inode->i_dev)
goto exit_lock;
 
/*
* A link to an append-only or immutable file cannot be created.
*/
error = -EPERM;
if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
EXIT;
goto exit_lock;
}
 
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
if (!iops->link) {
EXIT;
goto exit_lock;
}
 
 
presto_getversion(&tgt_dir_ver, dir->d_inode);
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_LINK);
if ( IS_ERR(handle) ) {
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
CERROR("presto_do_link: no space for transaction\n");
return -ENOSPC;
}
 
DQUOT_INIT(dir->d_inode);
lock_kernel();
error = iops->link(old_dentry, dir->d_inode, new_dentry);
unlock_kernel();
if (error) {
EXIT;
goto exit_lock;
}
 
/* link dd data to that of existing dentry */
old_dentry->d_op->d_release(new_dentry);
if (!presto_d2d(old_dentry))
BUG();
presto_d2d(old_dentry)->dd_count++;
 
new_dentry->d_fsdata = presto_d2d(old_dentry);
 
info->flags |= LENTO_FL_TOUCH_PARENT;
error = presto_settime(fset, NULL, dir, new_dentry,
info, ATTR_CTIME);
if (error) {
EXIT;
goto exit_lock;
}
 
presto_debug_fail_blkdev(fset, KML_OPCODE_LINK | 0x10);
presto_getversion(&new_link_ver, new_dentry->d_inode);
if ( presto_do_kml(info, old_dentry) )
error = presto_journal_link(&rec, fset, old_dentry, new_dentry,
&tgt_dir_ver, &new_link_ver);
 
presto_debug_fail_blkdev(fset, KML_OPCODE_LINK | 0x20);
if ( presto_do_rcvd(info, old_dentry) )
error = presto_write_last_rcvd(&rec, fset, info);
 
presto_debug_fail_blkdev(fset, KML_OPCODE_LINK | 0x30);
EXIT;
presto_trans_commit(fset, handle);
exit_lock:
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
up(&dir->d_inode->i_zombie);
return error;
}
 
 
int lento_link(const char * oldname, const char * newname,
struct lento_vfs_context *info)
{
int error;
char * from;
char * to;
struct presto_file_set *fset;
 
from = getname(oldname);
if(IS_ERR(from))
return PTR_ERR(from);
to = getname(newname);
error = PTR_ERR(to);
if (!IS_ERR(to)) {
struct dentry *new_dentry;
struct nameidata nd, old_nd;
 
error = 0;
if (path_init(from, LOOKUP_POSITIVE, &old_nd))
error = path_walk(from, &old_nd);
if (error)
goto exit;
if (path_init(to, LOOKUP_PARENT, &nd))
error = path_walk(to, &nd);
if (error)
goto out;
error = -EXDEV;
if (old_nd.mnt != nd.mnt)
goto out;
new_dentry = lookup_create(&nd, 0);
error = PTR_ERR(new_dentry);
 
if (!IS_ERR(new_dentry)) {
fset = presto_fset(new_dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
EXIT;
goto out2;
}
error = presto_do_link(fset, old_nd.dentry,
nd.dentry,
new_dentry, info);
dput(new_dentry);
}
out2:
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
out:
path_release(&old_nd);
exit:
putname(to);
}
putname(from);
 
return error;
}
 
int presto_do_unlink(struct presto_file_set *fset, struct dentry *dir,
struct dentry *dentry, struct lento_vfs_context *info)
{
struct rec_info rec;
struct inode_operations *iops;
struct presto_version tgt_dir_ver, old_file_ver;
struct izo_rollback_data rb;
void *handle;
int do_kml = 0, do_rcvd = 0, linkno = 0, error, old_targetlen = 0;
char *old_target = NULL;
 
ENTRY;
down(&dir->d_inode->i_zombie);
error = may_delete(dir->d_inode, dentry, 0);
if (error) {
EXIT;
up(&dir->d_inode->i_zombie);
return error;
}
 
error = -EPERM;
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
if (!iops->unlink) {
EXIT;
up(&dir->d_inode->i_zombie);
return error;
}
 
error = presto_reserve_space(fset->fset_cache, PRESTO_REQLOW);
if (error) {
EXIT;
up(&dir->d_inode->i_zombie);
return error;
}
 
 
if (presto_d2d(dentry)) {
struct presto_dentry_data *dd = presto_d2d(dentry);
struct dentry *de = dd->dd_inodentry;
if (de && dentry->d_inode->i_nlink == 1) {
dd->dd_count--;
dd->dd_inodentry = NULL;
de->d_fsdata = NULL;
atomic_dec(&de->d_inode->i_count);
de->d_inode = NULL;
dput(de);
}
}
 
presto_getversion(&tgt_dir_ver, dir->d_inode);
presto_getversion(&old_file_ver, dentry->d_inode);
izo_get_rollback_data(dentry->d_inode, &rb);
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_UNLINK);
if ( IS_ERR(handle) ) {
presto_release_space(fset->fset_cache, PRESTO_REQLOW);
CERROR("ERROR: presto_do_unlink: no space for transaction. Tell Peter.\n");
up(&dir->d_inode->i_zombie);
return -ENOSPC;
}
DQUOT_INIT(dir->d_inode);
if (d_mountpoint(dentry))
error = -EBUSY;
else {
lock_kernel();
linkno = dentry->d_inode->i_nlink;
if (linkno > 1) {
dget(dentry);
}
 
if (S_ISLNK(dentry->d_inode->i_mode)) {
mm_segment_t old_fs;
struct inode_operations *riops;
riops = filter_c2csiops(fset->fset_cache->cache_filter);
 
PRESTO_ALLOC(old_target, PATH_MAX);
if (old_target == NULL) {
error = -ENOMEM;
EXIT;
goto exit;
}
 
old_fs = get_fs();
set_fs(get_ds());
 
if (riops->readlink == NULL)
CERROR("InterMezzo %s: no readlink iops.\n",
__FUNCTION__);
else
old_targetlen =
riops->readlink(dentry, old_target,
PATH_MAX);
if (old_targetlen < 0) {
CERROR("InterMezzo: readlink failed: %ld\n",
PTR_ERR(old_target));
PRESTO_FREE(old_target, PATH_MAX);
old_target = NULL;
old_targetlen = 0;
}
set_fs(old_fs);
}
 
do_kml = presto_do_kml(info, dir);
do_rcvd = presto_do_rcvd(info, dir);
error = iops->unlink(dir->d_inode, dentry);
unlock_kernel();
if (!error)
d_delete(dentry);
}
 
if (linkno > 1) {
/* FIXME: Combine this with the next call? */
error = presto_settime(fset, NULL, NULL, dentry,
info, ATTR_CTIME);
dput(dentry);
if (error) {
EXIT;
goto exit;
}
}
 
error = presto_settime(fset, NULL, NULL, dir,
info, ATTR_CTIME | ATTR_MTIME);
if (error) {
EXIT;
goto exit;
}
 
up(&dir->d_inode->i_zombie);
if (error) {
EXIT;
goto exit;
}
 
presto_debug_fail_blkdev(fset, KML_OPCODE_UNLINK | 0x10);
if ( do_kml )
error = presto_journal_unlink(&rec, fset, dir, &tgt_dir_ver,
&old_file_ver, &rb, dentry,
old_target, old_targetlen);
presto_debug_fail_blkdev(fset, KML_OPCODE_UNLINK | 0x20);
if ( do_rcvd ) {
error = presto_write_last_rcvd(&rec, fset, info);
}
presto_debug_fail_blkdev(fset, KML_OPCODE_UNLINK | 0x30);
EXIT;
exit:
presto_release_space(fset->fset_cache, PRESTO_REQLOW);
presto_trans_commit(fset, handle);
if (old_target != NULL)
PRESTO_FREE(old_target, PATH_MAX);
return error;
}
 
 
int lento_unlink(const char *pathname, struct lento_vfs_context *info)
{
int error = 0;
char * name;
struct dentry *dentry;
struct nameidata nd;
struct presto_file_set *fset;
 
ENTRY;
 
name = getname(pathname);
if(IS_ERR(name))
return PTR_ERR(name);
 
if (path_init(name, LOOKUP_PARENT, &nd))
error = path_walk(name, &nd);
if (error)
goto exit;
error = -EISDIR;
if (nd.last_type != LAST_NORM)
goto exit1;
down(&nd.dentry->d_inode->i_sem);
dentry = lookup_hash(&nd.last, nd.dentry);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
fset = presto_fset(dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
EXIT;
goto exit2;
}
/* Why not before? Because we want correct error value */
if (nd.last.name[nd.last.len])
goto slashes;
error = presto_do_unlink(fset, nd.dentry, dentry, info);
exit2:
EXIT;
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
exit1:
path_release(&nd);
exit:
putname(name);
 
return error;
 
slashes:
error = !dentry->d_inode ? -ENOENT :
S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
goto exit2;
}
 
int presto_do_symlink(struct presto_file_set *fset, struct dentry *dir,
struct dentry *dentry, const char *oldname,
struct lento_vfs_context *info)
{
struct rec_info rec;
int error;
struct presto_version tgt_dir_ver, new_link_ver;
struct inode_operations *iops;
void *handle;
 
ENTRY;
down(&dir->d_inode->i_zombie);
/* record + max path len + space to free */
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH + 4096);
if (error) {
EXIT;
up(&dir->d_inode->i_zombie);
return error;
}
 
error = may_create(dir->d_inode, dentry);
if (error) {
EXIT;
goto exit_lock;
}
 
error = -EPERM;
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
if (!iops->symlink) {
EXIT;
goto exit_lock;
}
 
presto_getversion(&tgt_dir_ver, dir->d_inode);
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_SYMLINK);
if ( IS_ERR(handle) ) {
presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096);
CERROR("ERROR: presto_do_symlink: no space for transaction. Tell Peter.\n");
EXIT;
up(&dir->d_inode->i_zombie);
return -ENOSPC;
}
DQUOT_INIT(dir->d_inode);
lock_kernel();
error = iops->symlink(dir->d_inode, dentry, oldname);
if (error) {
EXIT;
goto exit;
}
 
if (dentry->d_inode) {
struct presto_cache *cache = fset->fset_cache;
presto_set_ops(dentry->d_inode, cache->cache_filter);
 
filter_setup_dentry_ops(cache->cache_filter, dentry->d_op,
&presto_dentry_ops);
dentry->d_op = filter_c2udops(cache->cache_filter);
/* XXX ? Cache state ? if Lento creates a symlink */
if ( ISLENTO(presto_c2m(cache)) ) {
presto_set(dentry, PRESTO_ATTR);
} else {
presto_set(dentry, PRESTO_ATTR | PRESTO_DATA);
}
}
 
info->flags |= LENTO_FL_TOUCH_PARENT;
error = presto_settime(fset, NULL, dir, dentry,
info, ATTR_CTIME | ATTR_MTIME);
if (error) {
EXIT;
goto exit;
}
 
presto_debug_fail_blkdev(fset, KML_OPCODE_SYMLINK | 0x10);
presto_getversion(&new_link_ver, dentry->d_inode);
if ( presto_do_kml(info, dentry) )
error = presto_journal_symlink(&rec, fset, dentry, oldname,
&tgt_dir_ver, &new_link_ver);
 
presto_debug_fail_blkdev(fset, KML_OPCODE_SYMLINK | 0x20);
if ( presto_do_rcvd(info, dentry) )
error = presto_write_last_rcvd(&rec, fset, info);
 
presto_debug_fail_blkdev(fset, KML_OPCODE_SYMLINK | 0x30);
EXIT;
exit:
unlock_kernel();
presto_trans_commit(fset, handle);
exit_lock:
presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096);
up(&dir->d_inode->i_zombie);
return error;
}
 
int lento_symlink(const char *oldname, const char *newname,
struct lento_vfs_context *info)
{
int error;
char *from;
char *to;
struct dentry *dentry;
struct presto_file_set *fset;
struct nameidata nd;
 
ENTRY;
lock_kernel();
from = getname(oldname);
error = PTR_ERR(from);
if (IS_ERR(from)) {
EXIT;
goto exit;
}
 
to = getname(newname);
error = PTR_ERR(to);
if (IS_ERR(to)) {
EXIT;
goto exit_from;
}
 
if (path_init(to, LOOKUP_PARENT, &nd))
error = path_walk(to, &nd);
if (error) {
EXIT;
goto exit_to;
}
 
dentry = lookup_create(&nd, 0);
error = PTR_ERR(dentry);
if (IS_ERR(dentry)) {
path_release(&nd);
EXIT;
goto exit_to;
}
 
fset = presto_fset(dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
path_release(&nd);
EXIT;
goto exit_lock;
}
error = presto_do_symlink(fset, nd.dentry,
dentry, from, info);
path_release(&nd);
EXIT;
exit_lock:
up(&nd.dentry->d_inode->i_sem);
dput(dentry);
exit_to:
putname(to);
exit_from:
putname(from);
exit:
unlock_kernel();
return error;
}
 
int presto_do_mkdir(struct presto_file_set *fset, struct dentry *dir,
struct dentry *dentry, int mode,
struct lento_vfs_context *info)
{
struct rec_info rec;
int error;
struct presto_version tgt_dir_ver, new_dir_ver;
void *handle;
 
ENTRY;
down(&dir->d_inode->i_zombie);
 
/* one journal record + directory block + room for removals*/
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH + 4096);
if (error) {
EXIT;
up(&dir->d_inode->i_zombie);
return error;
}
 
error = may_create(dir->d_inode, dentry);
if (error) {
EXIT;
goto exit_lock;
}
 
error = -EPERM;
if (!filter_c2cdiops(fset->fset_cache->cache_filter)->mkdir) {
EXIT;
goto exit_lock;
}
 
error = -ENOSPC;
presto_getversion(&tgt_dir_ver, dir->d_inode);
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_MKDIR);
if ( IS_ERR(handle) ) {
presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096);
CERROR("presto_do_mkdir: no space for transaction\n");
goto exit_lock;
}
 
DQUOT_INIT(dir->d_inode);
mode &= (S_IRWXUGO|S_ISVTX);
lock_kernel();
error = filter_c2cdiops(fset->fset_cache->cache_filter)->mkdir(dir->d_inode, dentry, mode);
if (error) {
EXIT;
goto exit;
}
 
if ( dentry->d_inode && !error) {
struct presto_cache *cache = fset->fset_cache;
 
presto_set_ops(dentry->d_inode, cache->cache_filter);
 
filter_setup_dentry_ops(cache->cache_filter,
dentry->d_op,
&presto_dentry_ops);
dentry->d_op = filter_c2udops(cache->cache_filter);
/* if Lento does this, we won't have data */
if ( ISLENTO(presto_c2m(cache)) ) {
presto_set(dentry, PRESTO_ATTR);
} else {
presto_set(dentry, PRESTO_ATTR | PRESTO_DATA);
}
}
 
info->flags |= LENTO_FL_TOUCH_PARENT;
error = presto_settime(fset, NULL, dir, dentry,
info, ATTR_CTIME | ATTR_MTIME);
if (error) {
EXIT;
goto exit;
}
 
presto_debug_fail_blkdev(fset, KML_OPCODE_MKDIR | 0x10);
presto_getversion(&new_dir_ver, dentry->d_inode);
if ( presto_do_kml(info, dir) )
error = presto_journal_mkdir(&rec, fset, dentry, &tgt_dir_ver,
&new_dir_ver,
dentry->d_inode->i_mode);
 
presto_debug_fail_blkdev(fset, KML_OPCODE_MKDIR | 0x20);
if ( presto_do_rcvd(info, dentry) )
error = presto_write_last_rcvd(&rec, fset, info);
 
presto_debug_fail_blkdev(fset, KML_OPCODE_MKDIR | 0x30);
EXIT;
exit:
unlock_kernel();
presto_trans_commit(fset, handle);
exit_lock:
presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096);
up(&dir->d_inode->i_zombie);
return error;
}
 
/*
* Look out: this function may change a normal dentry
* into a directory dentry (different size)..
*/
int lento_mkdir(const char *name, int mode, struct lento_vfs_context *info)
{
int error;
char *pathname;
struct dentry *dentry;
struct presto_file_set *fset;
struct nameidata nd;
 
ENTRY;
CDEBUG(D_PIOCTL, "name: %s, mode %o, offset %d, recno %d, flags %x\n",
name, mode, info->slot_offset, info->recno, info->flags);
pathname = getname(name);
error = PTR_ERR(pathname);
if (IS_ERR(pathname)) {
EXIT;
return error;
}
 
if (path_init(pathname, LOOKUP_PARENT, &nd))
error = path_walk(pathname, &nd);
if (error)
goto out_name;
 
dentry = lookup_create(&nd, 1);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
fset = presto_fset(dentry);
error = -EINVAL;
if (!fset) {
CERROR("No fileset!\n");
EXIT;
goto out_dput;
}
 
error = presto_do_mkdir(fset, nd.dentry, dentry,
mode & S_IALLUGO, info);
out_dput:
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
out_name:
EXIT;
putname(pathname);
CDEBUG(D_PIOCTL, "error: %d\n", error);
return error;
}
 
static void d_unhash(struct dentry *dentry)
{
dget(dentry);
switch (atomic_read(&dentry->d_count)) {
default:
shrink_dcache_parent(dentry);
if (atomic_read(&dentry->d_count) != 2)
break;
case 2:
d_drop(dentry);
}
}
 
int presto_do_rmdir(struct presto_file_set *fset, struct dentry *dir,
struct dentry *dentry, struct lento_vfs_context *info)
{
struct rec_info rec;
int error;
struct presto_version tgt_dir_ver, old_dir_ver;
struct izo_rollback_data rb;
struct inode_operations *iops;
void *handle;
int do_kml, do_rcvd;
int size;
 
ENTRY;
error = may_delete(dir->d_inode, dentry, 1);
if (error)
return error;
 
error = -EPERM;
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
if (!iops->rmdir) {
EXIT;
return error;
}
 
size = PRESTO_REQHIGH - dentry->d_inode->i_size;
error = presto_reserve_space(fset->fset_cache, size);
if (error) {
EXIT;
return error;
}
 
presto_getversion(&tgt_dir_ver, dir->d_inode);
presto_getversion(&old_dir_ver, dentry->d_inode);
izo_get_rollback_data(dentry->d_inode, &rb);
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_RMDIR);
if ( IS_ERR(handle) ) {
presto_release_space(fset->fset_cache, size);
CERROR("ERROR: presto_do_rmdir: no space for transaction. Tell Peter.\n");
return -ENOSPC;
}
 
DQUOT_INIT(dir->d_inode);
 
do_kml = presto_do_kml(info, dir);
do_rcvd = presto_do_rcvd(info, dir);
 
double_down(&dir->d_inode->i_zombie, &dentry->d_inode->i_zombie);
d_unhash(dentry);
if (IS_DEADDIR(dir->d_inode))
error = -ENOENT;
else if (d_mountpoint(dentry)) {
CERROR("foo: d_mountpoint(dentry): ino %ld\n",
dentry->d_inode->i_ino);
error = -EBUSY;
} else {
lock_kernel();
error = iops->rmdir(dir->d_inode, dentry);
unlock_kernel();
if (!error) {
dentry->d_inode->i_flags |= S_DEAD;
error = presto_settime(fset, NULL, NULL, dir, info,
ATTR_CTIME | ATTR_MTIME);
}
}
double_up(&dir->d_inode->i_zombie, &dentry->d_inode->i_zombie);
if (!error)
d_delete(dentry);
dput(dentry);
 
presto_debug_fail_blkdev(fset, KML_OPCODE_RMDIR | 0x10);
if ( !error && do_kml )
error = presto_journal_rmdir(&rec, fset, dir, &tgt_dir_ver,
&old_dir_ver, &rb,
dentry->d_name.len,
dentry->d_name.name);
 
presto_debug_fail_blkdev(fset, KML_OPCODE_RMDIR | 0x20);
if ( !error && do_rcvd )
error = presto_write_last_rcvd(&rec, fset, info);
 
presto_debug_fail_blkdev(fset, KML_OPCODE_RMDIR | 0x30);
EXIT;
 
presto_trans_commit(fset, handle);
presto_release_space(fset->fset_cache, size);
return error;
}
 
int lento_rmdir(const char *pathname, struct lento_vfs_context *info)
{
int error = 0;
char * name;
struct dentry *dentry;
struct presto_file_set *fset;
struct nameidata nd;
 
ENTRY;
name = getname(pathname);
if(IS_ERR(name)) {
EXIT;
return PTR_ERR(name);
}
 
if (path_init(name, LOOKUP_PARENT, &nd))
error = path_walk(name, &nd);
if (error) {
EXIT;
goto exit;
}
switch(nd.last_type) {
case LAST_DOTDOT:
error = -ENOTEMPTY;
EXIT;
goto exit1;
case LAST_ROOT:
case LAST_DOT:
error = -EBUSY;
EXIT;
goto exit1;
}
down(&nd.dentry->d_inode->i_sem);
dentry = lookup_hash(&nd.last, nd.dentry);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
fset = presto_fset(dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
EXIT;
goto exit_put;
}
error = presto_do_rmdir(fset, nd.dentry, dentry, info);
exit_put:
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
exit1:
path_release(&nd);
exit:
putname(name);
EXIT;
return error;
}
 
int presto_do_mknod(struct presto_file_set *fset, struct dentry *dir,
struct dentry *dentry, int mode, dev_t dev,
struct lento_vfs_context *info)
{
struct rec_info rec;
int error = -EPERM;
struct presto_version tgt_dir_ver, new_node_ver;
struct inode_operations *iops;
void *handle;
 
ENTRY;
 
down(&dir->d_inode->i_zombie);
/* one KML entry */
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH);
if (error) {
EXIT;
up(&dir->d_inode->i_zombie);
return error;
}
 
if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) {
EXIT;
goto exit_lock;
}
 
error = may_create(dir->d_inode, dentry);
if (error) {
EXIT;
goto exit_lock;
}
 
error = -EPERM;
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
if (!iops->mknod) {
EXIT;
goto exit_lock;
}
 
DQUOT_INIT(dir->d_inode);
lock_kernel();
error = -ENOSPC;
presto_getversion(&tgt_dir_ver, dir->d_inode);
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_MKNOD);
if ( IS_ERR(handle) ) {
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
CERROR("presto_do_mknod: no space for transaction\n");
goto exit_lock2;
}
 
error = iops->mknod(dir->d_inode, dentry, mode, dev);
if (error) {
EXIT;
goto exit_commit;
}
if ( dentry->d_inode) {
struct presto_cache *cache = fset->fset_cache;
 
presto_set_ops(dentry->d_inode, cache->cache_filter);
 
filter_setup_dentry_ops(cache->cache_filter, dentry->d_op,
&presto_dentry_ops);
dentry->d_op = filter_c2udops(cache->cache_filter);
 
/* if Lento does this, we won't have data */
if ( ISLENTO(presto_c2m(cache)) ) {
presto_set(dentry, PRESTO_ATTR);
} else {
presto_set(dentry, PRESTO_ATTR | PRESTO_DATA);
}
}
 
error = presto_settime(fset, NULL, NULL, dir,
info, ATTR_MTIME);
if (error) {
EXIT;
}
error = presto_settime(fset, NULL, NULL, dentry,
info, ATTR_CTIME | ATTR_MTIME);
if (error) {
EXIT;
}
 
presto_debug_fail_blkdev(fset, KML_OPCODE_MKNOD | 0x10);
presto_getversion(&new_node_ver, dentry->d_inode);
if ( presto_do_kml(info, dentry) )
error = presto_journal_mknod(&rec, fset, dentry, &tgt_dir_ver,
&new_node_ver,
dentry->d_inode->i_mode,
MAJOR(dev), MINOR(dev) );
 
presto_debug_fail_blkdev(fset, KML_OPCODE_MKNOD | 0x20);
if ( presto_do_rcvd(info, dentry) )
error = presto_write_last_rcvd(&rec, fset, info);
 
presto_debug_fail_blkdev(fset, KML_OPCODE_MKNOD | 0x30);
EXIT;
exit_commit:
presto_trans_commit(fset, handle);
exit_lock2:
unlock_kernel();
exit_lock:
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
up(&dir->d_inode->i_zombie);
return error;
}
 
int lento_mknod(const char *filename, int mode, dev_t dev,
struct lento_vfs_context *info)
{
int error = 0;
char * tmp;
struct dentry * dentry;
struct nameidata nd;
struct presto_file_set *fset;
 
ENTRY;
 
if (S_ISDIR(mode))
return -EPERM;
tmp = getname(filename);
if (IS_ERR(tmp))
return PTR_ERR(tmp);
 
if (path_init(tmp, LOOKUP_PARENT, &nd))
error = path_walk(tmp, &nd);
if (error)
goto out;
dentry = lookup_create(&nd, 0);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
fset = presto_fset(dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
EXIT;
goto exit_put;
}
switch (mode & S_IFMT) {
case 0: case S_IFREG:
error = -EOPNOTSUPP;
break;
case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
error = presto_do_mknod(fset, nd.dentry, dentry,
mode, dev, info);
break;
case S_IFDIR:
error = -EPERM;
break;
default:
error = -EINVAL;
}
exit_put:
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
out:
putname(tmp);
 
return error;
}
 
int do_rename(struct presto_file_set *fset,
struct dentry *old_parent, struct dentry *old_dentry,
struct dentry *new_parent, struct dentry *new_dentry,
struct lento_vfs_context *info)
{
struct rec_info rec;
int error;
struct inode_operations *iops;
struct presto_version src_dir_ver, tgt_dir_ver;
void *handle;
int new_inode_unlink = 0;
struct inode *old_dir = old_parent->d_inode;
struct inode *new_dir = new_parent->d_inode;
 
ENTRY;
presto_getversion(&src_dir_ver, old_dir);
presto_getversion(&tgt_dir_ver, new_dir);
 
error = -EPERM;
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
if (!iops || !iops->rename) {
EXIT;
return error;
}
 
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH);
if (error) {
EXIT;
return error;
}
handle = presto_trans_start(fset, old_dir, KML_OPCODE_RENAME);
if ( IS_ERR(handle) ) {
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
CERROR("presto_do_rename: no space for transaction\n");
return -ENOSPC;
}
if (new_dentry->d_inode && new_dentry->d_inode->i_nlink > 1) {
dget(new_dentry);
new_inode_unlink = 1;
}
 
error = iops->rename(old_dir, old_dentry, new_dir, new_dentry);
 
if (error) {
EXIT;
goto exit;
}
 
if (new_inode_unlink) {
error = presto_settime(fset, NULL, NULL, old_dentry,
info, ATTR_CTIME);
dput(old_dentry);
if (error) {
EXIT;
goto exit;
}
}
info->flags |= LENTO_FL_TOUCH_PARENT;
error = presto_settime(fset, NULL, new_parent, old_parent,
info, ATTR_CTIME | ATTR_MTIME);
if (error) {
EXIT;
goto exit;
}
 
/* XXX make a distinction between cross file set
* and intra file set renames here
*/
presto_debug_fail_blkdev(fset, KML_OPCODE_RENAME | 0x10);
if ( presto_do_kml(info, old_dentry) )
error = presto_journal_rename(&rec, fset, old_dentry,
new_dentry,
&src_dir_ver, &tgt_dir_ver);
 
presto_debug_fail_blkdev(fset, KML_OPCODE_RENAME | 0x20);
 
if ( presto_do_rcvd(info, old_dentry) )
error = presto_write_last_rcvd(&rec, fset, info);
 
presto_debug_fail_blkdev(fset, KML_OPCODE_RENAME | 0x30);
EXIT;
exit:
presto_trans_commit(fset, handle);
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
return error;
}
 
static
int presto_rename_dir(struct presto_file_set *fset, struct dentry *old_parent,
struct dentry *old_dentry, struct dentry *new_parent,
struct dentry *new_dentry, struct lento_vfs_context *info)
{
int error;
struct inode *target;
struct inode *old_dir = old_parent->d_inode;
struct inode *new_dir = new_parent->d_inode;
 
if (old_dentry->d_inode == new_dentry->d_inode)
return 0;
 
error = may_delete(old_dir, old_dentry, 1);
if (error)
return error;
 
if (new_dir->i_dev != old_dir->i_dev)
return -EXDEV;
 
if (!new_dentry->d_inode)
error = may_create(new_dir, new_dentry);
else
error = may_delete(new_dir, new_dentry, 1);
if (error)
return error;
 
if (!old_dir->i_op || !old_dir->i_op->rename)
return -EPERM;
 
/*
* If we are going to change the parent - check write permissions,
* we'll need to flip '..'.
*/
if (new_dir != old_dir) {
error = permission(old_dentry->d_inode, MAY_WRITE);
}
if (error)
return error;
 
DQUOT_INIT(old_dir);
DQUOT_INIT(new_dir);
down(&old_dir->i_sb->s_vfs_rename_sem);
error = -EINVAL;
if (is_subdir(new_dentry, old_dentry))
goto out_unlock;
target = new_dentry->d_inode;
if (target) { /* Hastur! Hastur! Hastur! */
triple_down(&old_dir->i_zombie,
&new_dir->i_zombie,
&target->i_zombie);
d_unhash(new_dentry);
} else
double_down(&old_dir->i_zombie,
&new_dir->i_zombie);
if (IS_DEADDIR(old_dir)||IS_DEADDIR(new_dir))
error = -ENOENT;
else if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
error = -EBUSY;
else
error = do_rename(fset, old_parent, old_dentry,
new_parent, new_dentry, info);
if (target) {
if (!error)
target->i_flags |= S_DEAD;
triple_up(&old_dir->i_zombie,
&new_dir->i_zombie,
&target->i_zombie);
if (d_unhashed(new_dentry))
d_rehash(new_dentry);
dput(new_dentry);
} else
double_up(&old_dir->i_zombie,
&new_dir->i_zombie);
if (!error)
d_move(old_dentry,new_dentry);
out_unlock:
up(&old_dir->i_sb->s_vfs_rename_sem);
return error;
}
 
static
int presto_rename_other(struct presto_file_set *fset, struct dentry *old_parent,
struct dentry *old_dentry, struct dentry *new_parent,
struct dentry *new_dentry, struct lento_vfs_context *info)
{
struct inode *old_dir = old_parent->d_inode;
struct inode *new_dir = new_parent->d_inode;
int error;
 
if (old_dentry->d_inode == new_dentry->d_inode)
return 0;
 
error = may_delete(old_dir, old_dentry, 0);
if (error)
return error;
 
if (new_dir->i_dev != old_dir->i_dev)
return -EXDEV;
 
if (!new_dentry->d_inode)
error = may_create(new_dir, new_dentry);
else
error = may_delete(new_dir, new_dentry, 0);
if (error)
return error;
 
if (!old_dir->i_op || !old_dir->i_op->rename)
return -EPERM;
 
DQUOT_INIT(old_dir);
DQUOT_INIT(new_dir);
double_down(&old_dir->i_zombie, &new_dir->i_zombie);
if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
error = -EBUSY;
else
error = do_rename(fset, old_parent, old_dentry,
new_parent, new_dentry, info);
double_up(&old_dir->i_zombie, &new_dir->i_zombie);
if (error)
return error;
/* The following d_move() should become unconditional */
if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) {
d_move(old_dentry, new_dentry);
}
return 0;
}
 
int presto_do_rename(struct presto_file_set *fset,
struct dentry *old_parent, struct dentry *old_dentry,
struct dentry *new_parent, struct dentry *new_dentry,
struct lento_vfs_context *info)
{
if (S_ISDIR(old_dentry->d_inode->i_mode))
return presto_rename_dir(fset, old_parent,old_dentry,new_parent,
new_dentry, info);
else
return presto_rename_other(fset, old_parent, old_dentry,
new_parent,new_dentry, info);
}
 
 
int lento_do_rename(const char *oldname, const char *newname,
struct lento_vfs_context *info)
{
int error = 0;
struct dentry * old_dir, * new_dir;
struct dentry * old_dentry, *new_dentry;
struct nameidata oldnd, newnd;
struct presto_file_set *fset;
 
ENTRY;
 
if (path_init(oldname, LOOKUP_PARENT, &oldnd))
error = path_walk(oldname, &oldnd);
 
if (error)
goto exit;
 
if (path_init(newname, LOOKUP_PARENT, &newnd))
error = path_walk(newname, &newnd);
if (error)
goto exit1;
 
error = -EXDEV;
if (oldnd.mnt != newnd.mnt)
goto exit2;
 
old_dir = oldnd.dentry;
error = -EBUSY;
if (oldnd.last_type != LAST_NORM)
goto exit2;
 
new_dir = newnd.dentry;
if (newnd.last_type != LAST_NORM)
goto exit2;
 
double_lock(new_dir, old_dir);
 
old_dentry = lookup_hash(&oldnd.last, old_dir);
error = PTR_ERR(old_dentry);
if (IS_ERR(old_dentry))
goto exit3;
/* source must exist */
error = -ENOENT;
if (!old_dentry->d_inode)
goto exit4;
fset = presto_fset(old_dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
EXIT;
goto exit4;
}
/* unless the source is a directory trailing slashes give -ENOTDIR */
if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
error = -ENOTDIR;
if (oldnd.last.name[oldnd.last.len])
goto exit4;
if (newnd.last.name[newnd.last.len])
goto exit4;
}
new_dentry = lookup_hash(&newnd.last, new_dir);
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
goto exit4;
 
lock_kernel();
error = presto_do_rename(fset, old_dir, old_dentry,
new_dir, new_dentry, info);
unlock_kernel();
 
dput(new_dentry);
exit4:
dput(old_dentry);
exit3:
double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem);
exit2:
path_release(&newnd);
exit1:
path_release(&oldnd);
exit:
return error;
}
 
int lento_rename(const char * oldname, const char * newname,
struct lento_vfs_context *info)
{
int error;
char * from;
char * to;
 
from = getname(oldname);
if(IS_ERR(from))
return PTR_ERR(from);
to = getname(newname);
error = PTR_ERR(to);
if (!IS_ERR(to)) {
error = lento_do_rename(from,to, info);
putname(to);
}
putname(from);
return error;
}
 
struct dentry *presto_iopen(struct dentry *dentry,
ino_t ino, unsigned int generation)
{
struct presto_file_set *fset;
char name[48];
int error;
 
ENTRY;
/* see if we already have the dentry we want */
if (dentry->d_inode && dentry->d_inode->i_ino == ino &&
dentry->d_inode->i_generation == generation) {
EXIT;
return dentry;
}
 
/* Make sure we have a cache beneath us. We should always find at
* least one dentry inside the cache (if it exists), otherwise not
* even the cache root exists, or we passed in a bad name.
*/
fset = presto_fset(dentry);
error = -EINVAL;
if (!fset) {
CERROR("No fileset for %*s!\n",
dentry->d_name.len, dentry->d_name.name);
EXIT;
dput(dentry);
return ERR_PTR(error);
}
dput(dentry);
 
sprintf(name, "%s%#lx%c%#x",
PRESTO_ILOOKUP_MAGIC, ino, PRESTO_ILOOKUP_SEP, generation);
CDEBUG(D_PIOCTL, "opening %ld by number (as %s)\n", ino, name);
return lookup_one_len(name, fset->fset_dentry, strlen(name));
}
 
static struct file *presto_filp_dopen(struct dentry *dentry, int flags)
{
struct file *f;
struct inode *inode;
int flag, error;
 
ENTRY;
error = -ENFILE;
f = get_empty_filp();
if (!f) {
CDEBUG(D_PIOCTL, "error getting file pointer\n");
EXIT;
goto out;
}
f->f_flags = flag = flags;
f->f_mode = (flag+1) & O_ACCMODE;
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
error = get_write_access(inode);
if (error) {
CDEBUG(D_PIOCTL, "error getting write access\n");
EXIT; goto cleanup_file;
}
}
 
f->f_dentry = dentry;
f->f_pos = 0;
f->f_reada = 0;
f->f_op = NULL;
if (inode->i_op)
/* XXX should we set to presto ops, or leave at cache ops? */
f->f_op = inode->i_fop;
if (f->f_op && f->f_op->open) {
error = f->f_op->open(inode, f);
if (error) {
CDEBUG(D_PIOCTL, "error calling cache 'open'\n");
EXIT;
goto cleanup_all;
}
}
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
return f;
 
cleanup_all:
if (f->f_mode & FMODE_WRITE)
put_write_access(inode);
cleanup_file:
put_filp(f);
out:
return ERR_PTR(error);
}
 
 
/* Open an inode by number. We pass in the cache root name (or a subdirectory
* from the cache that is guaranteed to exist) to be able to access the cache.
*/
int lento_iopen(const char *name, ino_t ino, unsigned int generation,
int flags)
{
char * tmp;
struct dentry *dentry;
struct nameidata nd;
int fd;
int error;
 
ENTRY;
CDEBUG(D_PIOCTL,
"open %s:inode %#lx (%ld), generation %x (%d), flags %d \n",
name, ino, ino, generation, generation, flags);
/* We don't allow creation of files by number only, as it would
* lead to a dangling files not in any directory. We could also
* just turn off the flag and ignore it.
*/
if (flags & O_CREAT) {
CERROR("%s: create file by inode number (%ld) not allowed\n",
__FUNCTION__, ino);
EXIT;
return -EACCES;
}
 
tmp = getname(name);
if (IS_ERR(tmp)) {
EXIT;
return PTR_ERR(tmp);
}
 
lock_kernel();
again: /* look the named file or a parent directory so we can get the cache */
error = presto_walk(tmp, &nd);
if ( error && error != -ENOENT ) {
EXIT;
unlock_kernel();
return error;
}
if (error == -ENOENT)
dentry = NULL;
else
dentry = nd.dentry;
 
/* we didn't find the named file, so see if a parent exists */
if (!dentry) {
char *slash;
 
slash = strrchr(tmp, '/');
if (slash && slash != tmp) {
*slash = '\0';
path_release(&nd);
goto again;
}
/* we should never get here... */
CDEBUG(D_PIOCTL, "no more path components to try!\n");
fd = -ENOENT;
goto exit;
}
CDEBUG(D_PIOCTL, "returned dentry %p\n", dentry);
 
dentry = presto_iopen(dentry, ino, generation);
fd = PTR_ERR(dentry);
if (IS_ERR(dentry)) {
EXIT;
goto exit;
}
 
/* XXX start of code that might be replaced by something like:
* if (flags & (O_WRONLY | O_RDWR)) {
* error = get_write_access(dentry->d_inode);
* if (error) {
* EXIT;
* goto cleanup_dput;
* }
* }
* fd = open_dentry(dentry, flags);
*
* including the presto_filp_dopen() function (check dget counts!)
*/
fd = get_unused_fd();
if (fd < 0) {
EXIT;
goto cleanup_dput;
}
 
{
int error;
struct file * f = presto_filp_dopen(dentry, flags);
error = PTR_ERR(f);
if (IS_ERR(f)) {
put_unused_fd(fd);
fd = error;
EXIT;
goto cleanup_dput;
}
fd_install(fd, f);
}
/* end of code that might be replaced by open_dentry */
 
EXIT;
exit:
unlock_kernel();
path_release(&nd);
putname(tmp);
return fd;
 
cleanup_dput:
putname(&nd);
goto exit;
}
 
#ifdef CONFIG_FS_EXT_ATTR
 
#ifdef CONFIG_FS_POSIX_ACL
/* Posix ACL code changes i_mode without using a notify_change (or
* a mark_inode_dirty!). We need to duplicate this at the reintegrator
* which is done by this function. This function also takes care of
* resetting the cached posix acls in this inode. If we don't reset these
* VFS continues using the old acl information, which by now may be out of
* date.
*/
int presto_setmode(struct presto_file_set *fset, struct dentry *dentry,
mode_t mode)
{
struct inode *inode = dentry->d_inode;
 
ENTRY;
/* The extended attributes for this inode were modified.
* At this point we can not be sure if any of the ACL
* information for this inode was updated. So we will
* force VFS to reread the acls. Note that we do this
* only when called from the SETEXTATTR ioctl, which is why we
* do this while setting the mode of the file. Also note
* that mark_inode_dirty is not be needed for i_*acl only
* to force i_mode info to disk, and should be removed once
* we use notify_change to update the mode.
* XXX: is mode setting really needed? Just setting acl's should
* be enough! VFS should change the i_mode as needed? SHP
*/
if (inode->i_acl &&
inode->i_acl != POSIX_ACL_NOT_CACHED)
posix_acl_release(inode->i_acl);
if (inode->i_default_acl &&
inode->i_default_acl != POSIX_ACL_NOT_CACHED)
posix_acl_release(inode->i_default_acl);
inode->i_acl = POSIX_ACL_NOT_CACHED;
inode->i_default_acl = POSIX_ACL_NOT_CACHED;
inode->i_mode = mode;
/* inode should already be dirty...but just in case */
mark_inode_dirty(inode);
return 0;
 
#if 0
/* XXX: The following code is the preferred way to set mode,
* however, I need to carefully go through possible recursion
* paths back into presto. See comments in presto_do_setattr.
*/
{
int error=0;
struct super_operations *sops;
struct iattr iattr;
 
iattr.ia_mode = mode;
iattr.ia_valid = ATTR_MODE|ATTR_FORCE;
 
error = -EPERM;
sops = filter_c2csops(fset->fset_cache->cache_filter);
if (!sops &&
!sops->notify_change) {
EXIT;
return error;
}
 
error = sops->notify_change(dentry, &iattr);
 
EXIT;
return error;
}
#endif
}
#endif
 
/* setextattr Interface to cache filesystem */
int presto_do_set_ext_attr(struct presto_file_set *fset,
struct dentry *dentry,
const char *name, void *buffer,
size_t buffer_len, int flags, mode_t *mode,
struct lento_vfs_context *info)
{
struct rec_info rec;
struct inode *inode = dentry->d_inode;
struct inode_operations *iops;
int error;
struct presto_version ver;
void *handle;
char temp[PRESTO_EXT_ATTR_NAME_MAX+1];
 
ENTRY;
error = -EROFS;
if (IS_RDONLY(inode)) {
EXIT;
return -EROFS;
}
 
if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
EXIT;
return -EPERM;
}
 
presto_getversion(&ver, inode);
error = -EPERM;
/* We need to invoke different filters based on whether
* this dentry is a regular file, directory or symlink.
*/
switch (inode->i_mode & S_IFMT) {
case S_IFLNK: /* symlink */
iops = filter_c2csiops(fset->fset_cache->cache_filter);
break;
case S_IFDIR: /* directory */
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
break;
case S_IFREG:
default: /* everything else including regular files */
iops = filter_c2cfiops(fset->fset_cache->cache_filter);
}
 
if (!iops && !iops->set_ext_attr) {
EXIT;
return error;
}
 
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH);
if (error) {
EXIT;
return error;
}
 
handle = presto_trans_start(fset,dentry->d_inode,KML_OPCODE_SETEXTATTR);
if ( IS_ERR(handle) ) {
CERROR("presto_do_set_ext_attr: no space for transaction\n");
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
return -ENOSPC;
}
 
/* We first "truncate" name to the maximum allowable in presto */
/* This simulates the strncpy_from_use code in fs/ext_attr.c */
strncpy(temp,name,sizeof(temp));
 
/* Pass down to cache*/
error = iops->set_ext_attr(inode,temp,buffer,buffer_len,flags);
if (error) {
EXIT;
goto exit;
}
 
#ifdef CONFIG_FS_POSIX_ACL
/* Reset mode if specified*/
/* XXX: when we do native acl support, move this code out! */
if (mode != NULL) {
error = presto_setmode(fset, dentry, *mode);
if (error) {
EXIT;
goto exit;
}
}
#endif
 
/* Reset ctime. Only inode change time (ctime) is affected */
error = presto_settime(fset, NULL, NULL, dentry, info, ATTR_CTIME);
if (error) {
EXIT;
goto exit;
}
 
if (flags & EXT_ATTR_FLAG_USER) {
CERROR(" USER flag passed to presto_do_set_ext_attr!\n");
BUG();
}
 
/* We are here, so set_ext_attr succeeded. We no longer need to keep
* track of EXT_ATTR_FLAG_{EXISTS,CREATE}, instead, we will force
* the attribute value during log replay. -SHP
*/
flags &= ~(EXT_ATTR_FLAG_EXISTS | EXT_ATTR_FLAG_CREATE);
 
presto_debug_fail_blkdev(fset, KML_OPCODE_SETEXTATTR | 0x10);
if ( presto_do_kml(info, dentry) )
error = presto_journal_set_ext_attr
(&rec, fset, dentry, &ver, name, buffer,
buffer_len, flags);
 
presto_debug_fail_blkdev(fset, KML_OPCODE_SETEXTATTR | 0x20);
if ( presto_do_rcvd(info, dentry) )
error = presto_write_last_rcvd(&rec, fset, info);
 
presto_debug_fail_blkdev(fset, KML_OPCODE_SETEXTATTR | 0x30);
EXIT;
exit:
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
presto_trans_commit(fset, handle);
 
return error;
}
#endif
/inode.c
0,0 → 1,187
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1996 Peter J. Braam <braam@maths.ox.ac.uk> and
* Michael Callahan <callahan@maths.ox.ac.uk>
* Copyright (C) 1999 Carnegie Mellon University
* Rewritten for Linux 2.1. Peter Braam <braam@cs.cmu.edu>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Super block/filesystem wide operations
*/
 
#define __NO_VERSION__
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/locks.h>
#include <linux/unistd.h>
 
#include <asm/system.h>
#include <asm/uaccess.h>
 
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/locks.h>
#include <linux/string.h>
#include <asm/uaccess.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <asm/segment.h>
 
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
 
extern void presto_free_cache(struct presto_cache *);
 
void presto_set_ops(struct inode *inode, struct filter_fs *filter)
{
ENTRY;
 
if (!inode || is_bad_inode(inode))
return;
 
if (S_ISREG(inode->i_mode)) {
if ( !filter_c2cfiops(filter) ) {
filter_setup_file_ops(filter,
inode, &presto_file_iops,
&presto_file_fops);
}
inode->i_op = filter_c2ufiops(filter);
inode->i_fop = filter_c2uffops(filter);
CDEBUG(D_INODE, "set file methods for %ld to %p\n",
inode->i_ino, inode->i_op);
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = filter_c2udiops(filter);
inode->i_fop = filter_c2udfops(filter);
CDEBUG(D_INODE, "set dir methods for %ld to %p ioctl %p\n",
inode->i_ino, inode->i_op, inode->i_fop->ioctl);
} else if (S_ISLNK(inode->i_mode)) {
if ( !filter_c2csiops(filter)) {
filter_setup_symlink_ops(filter,
inode,
&presto_sym_iops,
&presto_sym_fops);
}
inode->i_op = filter_c2usiops(filter);
inode->i_fop = filter_c2usfops(filter);
CDEBUG(D_INODE, "set link methods for %ld to %p\n",
inode->i_ino, inode->i_op);
}
EXIT;
}
 
void presto_read_inode(struct inode *inode)
{
struct presto_cache *cache;
 
cache = presto_get_cache(inode);
if ( !cache ) {
CERROR("PRESTO: BAD, BAD: cannot find cache\n");
make_bad_inode(inode);
return ;
}
 
filter_c2csops(cache->cache_filter)->read_inode(inode);
 
CDEBUG(D_INODE, "presto_read_inode: ino %ld, gid %d\n",
inode->i_ino, inode->i_gid);
 
presto_set_ops(inode, cache->cache_filter);
/* XXX handle special inodes here or not - probably not? */
}
 
static void presto_put_super(struct super_block *sb)
{
struct presto_cache *cache;
struct upc_channel *channel;
struct super_operations *sops;
struct list_head *lh;
int err;
 
ENTRY;
cache = presto_cache_find(sb->s_dev);
if (!cache) {
EXIT;
goto exit;
}
channel = &izo_channels[presto_c2m(cache)];
sops = filter_c2csops(cache->cache_filter);
err = izo_clear_all_fsetroots(cache);
if (err) {
CERROR("%s: err %d\n", __FUNCTION__, err);
}
PRESTO_FREE(cache->cache_vfsmount, sizeof(struct vfsmount));
 
/* look at kill_super - fsync_super is not exported GRRR but
probably not needed */
unlock_super(sb);
shrink_dcache_parent(cache->cache_root);
dput(cache->cache_root);
//fsync_super(sb);
lock_super(sb);
 
if (sops->write_super)
sops->write_super(sb);
 
if (sops->put_super)
sops->put_super(sb);
 
/* free any remaining async upcalls when the filesystem is unmounted */
spin_lock(&channel->uc_lock);
lh = channel->uc_pending.next;
while ( lh != &channel->uc_pending) {
struct upc_req *req;
req = list_entry(lh, struct upc_req, rq_chain);
 
/* assignment must be here: we are about to free &lh */
lh = lh->next;
if ( ! (req->rq_flags & REQ_ASYNC) )
continue;
list_del(&(req->rq_chain));
PRESTO_FREE(req->rq_data, req->rq_bufsize);
PRESTO_FREE(req, sizeof(struct upc_req));
}
list_del(&cache->cache_channel_list);
spin_unlock(&channel->uc_lock);
 
presto_free_cache(cache);
 
exit:
CDEBUG(D_MALLOC, "after umount: kmem %ld, vmem %ld\n",
presto_kmemory, presto_vmemory);
MOD_DEC_USE_COUNT;
return ;
}
 
struct super_operations presto_super_ops = {
.read_inode = presto_read_inode,
.put_super = presto_put_super,
};
 
 
/* symlinks can be chowned */
struct inode_operations presto_sym_iops = {
.setattr = presto_setattr
};
 
/* NULL for now */
struct file_operations presto_sym_fops;
/methods.c
0,0 → 1,497
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2000 Stelias Computing, Inc.
* Copyright (C) 2000 Red Hat, Inc.
* Copyright (C) 2000 Mountain View Data, Inc.
*
* Extended Attribute Support
* Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
*/
 
#include <stdarg.h>
 
#include <asm/bitops.h>
#include <asm/uaccess.h>
#include <asm/system.h>
 
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/ext2_fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/locks.h>
#include <linux/blkdev.h>
#include <linux/init.h>
#define __NO_VERSION__
#include <linux/module.h>
 
#include <linux/fsfilter.h>
#include <linux/intermezzo_fs.h>
 
 
int filter_print_entry = 0;
int filter_debug = 0xfffffff;
/*
* The function in this file are responsible for setting up the
* correct methods layered file systems like InterMezzo and snapfs
*/
 
 
static struct filter_fs filter_oppar[FILTER_FS_TYPES];
 
/* get to the upper methods (intermezzo, snapfs) */
inline struct super_operations *filter_c2usops(struct filter_fs *cache)
{
return &cache->o_fops.filter_sops;
}
 
inline struct inode_operations *filter_c2udiops(struct filter_fs *cache)
{
return &cache->o_fops.filter_dir_iops;
}
 
 
inline struct inode_operations *filter_c2ufiops(struct filter_fs *cache)
{
return &cache->o_fops.filter_file_iops;
}
 
inline struct inode_operations *filter_c2usiops(struct filter_fs *cache)
{
return &cache->o_fops.filter_sym_iops;
}
 
 
inline struct file_operations *filter_c2udfops(struct filter_fs *cache)
{
return &cache->o_fops.filter_dir_fops;
}
 
inline struct file_operations *filter_c2uffops(struct filter_fs *cache)
{
return &cache->o_fops.filter_file_fops;
}
 
inline struct file_operations *filter_c2usfops(struct filter_fs *cache)
{
return &cache->o_fops.filter_sym_fops;
}
 
inline struct dentry_operations *filter_c2udops(struct filter_fs *cache)
{
return &cache->o_fops.filter_dentry_ops;
}
 
/* get to the cache (lower) methods */
inline struct super_operations *filter_c2csops(struct filter_fs *cache)
{
return cache->o_caops.cache_sops;
}
 
inline struct inode_operations *filter_c2cdiops(struct filter_fs *cache)
{
return cache->o_caops.cache_dir_iops;
}
 
inline struct inode_operations *filter_c2cfiops(struct filter_fs *cache)
{
return cache->o_caops.cache_file_iops;
}
 
inline struct inode_operations *filter_c2csiops(struct filter_fs *cache)
{
return cache->o_caops.cache_sym_iops;
}
 
inline struct file_operations *filter_c2cdfops(struct filter_fs *cache)
{
return cache->o_caops.cache_dir_fops;
}
 
inline struct file_operations *filter_c2cffops(struct filter_fs *cache)
{
return cache->o_caops.cache_file_fops;
}
 
inline struct file_operations *filter_c2csfops(struct filter_fs *cache)
{
return cache->o_caops.cache_sym_fops;
}
 
inline struct dentry_operations *filter_c2cdops(struct filter_fs *cache)
{
return cache->o_caops.cache_dentry_ops;
}
 
 
void filter_setup_journal_ops(struct filter_fs *ops, char *cache_type)
{
if ( strlen(cache_type) == strlen("ext2") &&
memcmp(cache_type, "ext2", strlen("ext2")) == 0 ) {
#if CONFIG_EXT2_FS
ops->o_trops = &presto_ext2_journal_ops;
#else
ops->o_trops = NULL;
#endif
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
 
if ( strlen(cache_type) == strlen("ext3") &&
memcmp(cache_type, "ext3", strlen("ext3")) == 0 ) {
#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE)
ops->o_trops = &presto_ext3_journal_ops;
#else
ops->o_trops = NULL;
#endif
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
 
if ( strlen(cache_type) == strlen("tmpfs") &&
memcmp(cache_type, "tmpfs", strlen("tmpfs")) == 0 ) {
#if defined(CONFIG_TMPFS)
ops->o_trops = &presto_tmpfs_journal_ops;
#else
ops->o_trops = NULL;
#endif
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
 
if ( strlen(cache_type) == strlen("reiserfs") &&
memcmp(cache_type, "reiserfs", strlen("reiserfs")) == 0 ) {
#if 0
/* #if defined(CONFIG_REISERFS_FS) || defined(CONFIG_REISERFS_FS_MODULE) */
ops->o_trops = &presto_reiserfs_journal_ops;
#else
ops->o_trops = NULL;
#endif
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
 
if ( strlen(cache_type) == strlen("xfs") &&
memcmp(cache_type, "xfs", strlen("xfs")) == 0 ) {
#if 0
/*#if defined(CONFIG_XFS_FS) || defined (CONFIG_XFS_FS_MODULE) */
ops->o_trops = &presto_xfs_journal_ops;
#else
ops->o_trops = NULL;
#endif
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
 
if ( strlen(cache_type) == strlen("obdfs") &&
memcmp(cache_type, "obdfs", strlen("obdfs")) == 0 ) {
#if defined(CONFIG_OBDFS_FS) || defined (CONFIG_OBDFS_FS_MODULE)
ops->o_trops = presto_obdfs_journal_ops;
#else
ops->o_trops = NULL;
#endif
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
}
 
 
/* find the cache for this FS */
struct filter_fs *filter_get_filter_fs(const char *cache_type)
{
struct filter_fs *ops = NULL;
FENTRY;
 
if ( strlen(cache_type) == strlen("ext2") &&
memcmp(cache_type, "ext2", strlen("ext2")) == 0 ) {
ops = &filter_oppar[FILTER_FS_EXT2];
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
 
if ( strlen(cache_type) == strlen("xfs") &&
memcmp(cache_type, "xfs", strlen("xfs")) == 0 ) {
ops = &filter_oppar[FILTER_FS_XFS];
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
 
if ( strlen(cache_type) == strlen("ext3") &&
memcmp(cache_type, "ext3", strlen("ext3")) == 0 ) {
ops = &filter_oppar[FILTER_FS_EXT3];
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
 
if ( strlen(cache_type) == strlen("tmpfs") &&
memcmp(cache_type, "tmpfs", strlen("tmpfs")) == 0 ) {
ops = &filter_oppar[FILTER_FS_TMPFS];
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
 
if ( strlen(cache_type) == strlen("reiserfs") &&
memcmp(cache_type, "reiserfs", strlen("reiserfs")) == 0 ) {
ops = &filter_oppar[FILTER_FS_REISERFS];
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
if ( strlen(cache_type) == strlen("obdfs") &&
memcmp(cache_type, "obdfs", strlen("obdfs")) == 0 ) {
ops = &filter_oppar[FILTER_FS_OBDFS];
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
 
if (ops == NULL) {
CERROR("prepare to die: unrecognized cache type for Filter\n");
}
return ops;
FEXIT;
}
 
 
/*
* Frobnicate the InterMezzo operations
* this establishes the link between the InterMezzo file system
* and the underlying file system used for the cache.
*/
 
void filter_setup_super_ops(struct filter_fs *cache, struct super_operations *cache_sops, struct super_operations *filter_sops)
{
/* Get ptr to the shared struct snapfs_ops structure. */
struct filter_ops *props = &cache->o_fops;
/* Get ptr to the shared struct cache_ops structure. */
struct cache_ops *caops = &cache->o_caops;
 
FENTRY;
 
if ( cache->o_flags & FILTER_DID_SUPER_OPS ) {
FEXIT;
return;
}
cache->o_flags |= FILTER_DID_SUPER_OPS;
 
/* Set the cache superblock operations to point to the
superblock operations of the underlying file system. */
caops->cache_sops = cache_sops;
 
/*
* Copy the cache (real fs) superblock ops to the "filter"
* superblock ops as defaults. Some will be changed below
*/
memcpy(&props->filter_sops, cache_sops, sizeof(*cache_sops));
 
/* 'put_super' unconditionally is that of filter */
if (filter_sops->put_super) {
props->filter_sops.put_super = filter_sops->put_super;
}
 
if (cache_sops->read_inode) {
props->filter_sops.read_inode = filter_sops->read_inode;
FDEBUG(D_INODE, "setting filter_read_inode, cache_ops %p, cache %p, ri at %p\n",
cache, cache, props->filter_sops.read_inode);
}
 
if (cache_sops->remount_fs)
props->filter_sops.remount_fs = filter_sops->remount_fs;
FEXIT;
}
 
 
void filter_setup_dir_ops(struct filter_fs *cache, struct inode *inode, struct inode_operations *filter_iops, struct file_operations *filter_fops)
{
struct inode_operations *cache_filter_iops;
struct inode_operations *cache_iops = inode->i_op;
struct file_operations *cache_fops = inode->i_fop;
FENTRY;
 
if ( cache->o_flags & FILTER_DID_DIR_OPS ) {
FEXIT;
return;
}
cache->o_flags |= FILTER_DID_DIR_OPS;
 
/* former ops become cache_ops */
cache->o_caops.cache_dir_iops = cache_iops;
cache->o_caops.cache_dir_fops = cache_fops;
FDEBUG(D_SUPER, "filter at %p, cache iops %p, iops %p\n",
cache, cache_iops, filter_c2udiops(cache));
 
/* setup our dir iops: copy and modify */
memcpy(filter_c2udiops(cache), cache_iops, sizeof(*cache_iops));
 
/* abbreviate */
cache_filter_iops = filter_c2udiops(cache);
 
/* methods that filter if cache filesystem has these ops */
if (cache_iops->lookup && filter_iops->lookup)
cache_filter_iops->lookup = filter_iops->lookup;
if (cache_iops->create && filter_iops->create)
cache_filter_iops->create = filter_iops->create;
if (cache_iops->link && filter_iops->link)
cache_filter_iops->link = filter_iops->link;
if (cache_iops->unlink && filter_iops->unlink)
cache_filter_iops->unlink = filter_iops->unlink;
if (cache_iops->mkdir && filter_iops->mkdir)
cache_filter_iops->mkdir = filter_iops->mkdir;
if (cache_iops->rmdir && filter_iops->rmdir)
cache_filter_iops->rmdir = filter_iops->rmdir;
if (cache_iops->symlink && filter_iops->symlink)
cache_filter_iops->symlink = filter_iops->symlink;
if (cache_iops->rename && filter_iops->rename)
cache_filter_iops->rename = filter_iops->rename;
if (cache_iops->mknod && filter_iops->mknod)
cache_filter_iops->mknod = filter_iops->mknod;
if (cache_iops->permission && filter_iops->permission)
cache_filter_iops->permission = filter_iops->permission;
if (cache_iops->getattr)
cache_filter_iops->getattr = filter_iops->getattr;
/* Some filesystems do not use a setattr method of their own
instead relying on inode_setattr/write_inode. We still need to
journal these so we make setattr an unconditional operation.
XXX: we should probably check for write_inode. SHP
*/
/*if (cache_iops->setattr)*/
cache_filter_iops->setattr = filter_iops->setattr;
#ifdef CONFIG_FS_EXT_ATTR
/* For now we assume that posix acls are handled through extended
* attributes. If this is not the case, we must explicitly trap
* posix_set_acl. SHP
*/
if (cache_iops->set_ext_attr && filter_iops->set_ext_attr)
cache_filter_iops->set_ext_attr = filter_iops->set_ext_attr;
#endif
 
 
/* copy dir fops */
memcpy(filter_c2udfops(cache), cache_fops, sizeof(*cache_fops));
 
/* unconditional filtering operations */
filter_c2udfops(cache)->ioctl = filter_fops->ioctl;
 
FEXIT;
}
 
 
void filter_setup_file_ops(struct filter_fs *cache, struct inode *inode, struct inode_operations *filter_iops, struct file_operations *filter_fops)
{
struct inode_operations *pr_iops;
struct inode_operations *cache_iops = inode->i_op;
struct file_operations *cache_fops = inode->i_fop;
FENTRY;
 
if ( cache->o_flags & FILTER_DID_FILE_OPS ) {
FEXIT;
return;
}
cache->o_flags |= FILTER_DID_FILE_OPS;
 
/* steal the old ops */
/* former ops become cache_ops */
cache->o_caops.cache_file_iops = cache_iops;
cache->o_caops.cache_file_fops = cache_fops;
/* abbreviate */
pr_iops = filter_c2ufiops(cache);
 
/* setup our dir iops: copy and modify */
memcpy(pr_iops, cache_iops, sizeof(*cache_iops));
 
/* copy dir fops */
CERROR("*** cache file ops at %p\n", cache_fops);
memcpy(filter_c2uffops(cache), cache_fops, sizeof(*cache_fops));
 
/* assign */
/* See comments above in filter_setup_dir_ops. SHP */
/*if (cache_iops->setattr)*/
pr_iops->setattr = filter_iops->setattr;
if (cache_iops->getattr)
pr_iops->getattr = filter_iops->getattr;
/* XXX Should this be conditional rmr ? */
pr_iops->permission = filter_iops->permission;
#ifdef CONFIG_FS_EXT_ATTR
/* For now we assume that posix acls are handled through extended
* attributes. If this is not the case, we must explicitly trap and
* posix_set_acl
*/
if (cache_iops->set_ext_attr && filter_iops->set_ext_attr)
pr_iops->set_ext_attr = filter_iops->set_ext_attr;
#endif
 
 
/* unconditional filtering operations */
filter_c2uffops(cache)->open = filter_fops->open;
filter_c2uffops(cache)->release = filter_fops->release;
filter_c2uffops(cache)->write = filter_fops->write;
filter_c2uffops(cache)->ioctl = filter_fops->ioctl;
 
FEXIT;
}
 
/* XXX in 2.3 there are "fast" and "slow" symlink ops for ext2 XXX */
void filter_setup_symlink_ops(struct filter_fs *cache, struct inode *inode, struct inode_operations *filter_iops, struct file_operations *filter_fops)
{
struct inode_operations *pr_iops;
struct inode_operations *cache_iops = inode->i_op;
struct file_operations *cache_fops = inode->i_fop;
FENTRY;
 
if ( cache->o_flags & FILTER_DID_SYMLINK_OPS ) {
FEXIT;
return;
}
cache->o_flags |= FILTER_DID_SYMLINK_OPS;
 
/* steal the old ops */
cache->o_caops.cache_sym_iops = cache_iops;
cache->o_caops.cache_sym_fops = cache_fops;
 
/* abbreviate */
pr_iops = filter_c2usiops(cache);
 
/* setup our dir iops: copy and modify */
memcpy(pr_iops, cache_iops, sizeof(*cache_iops));
 
/* See comments above in filter_setup_dir_ops. SHP */
/* if (cache_iops->setattr) */
pr_iops->setattr = filter_iops->setattr;
if (cache_iops->getattr)
pr_iops->getattr = filter_iops->getattr;
 
/* assign */
/* copy fops - careful for symlinks they might be NULL */
if ( cache_fops ) {
memcpy(filter_c2usfops(cache), cache_fops, sizeof(*cache_fops));
}
 
FEXIT;
}
 
void filter_setup_dentry_ops(struct filter_fs *cache,
struct dentry_operations *cache_dop,
struct dentry_operations *filter_dop)
{
if ( cache->o_flags & FILTER_DID_DENTRY_OPS ) {
FEXIT;
return;
}
cache->o_flags |= FILTER_DID_DENTRY_OPS;
 
cache->o_caops.cache_dentry_ops = cache_dop;
memcpy(&cache->o_fops.filter_dentry_ops,
filter_dop, sizeof(*filter_dop));
if (cache_dop && cache_dop != filter_dop && cache_dop->d_revalidate){
CERROR("WARNING: filter overriding revalidation!\n");
}
return;
}
/cache.c
0,0 → 1,204
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2000 Stelias Computing, Inc.
* Copyright (C) 2000 Red Hat, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
 
#define __NO_VERSION__
#include <linux/module.h>
#include <stdarg.h>
#include <asm/bitops.h>
#include <asm/uaccess.h>
#include <asm/system.h>
 
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/ext2_fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/locks.h>
#include <linux/blkdev.h>
#include <linux/init.h>
 
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
 
/*
This file contains the routines associated with managing a
cache of files for InterMezzo. These caches have two reqs:
- need to be found fast so they are hashed by the device,
with an attempt to have collision chains of length 1.
The methods for the cache are set up in methods.
*/
 
extern kmem_cache_t * presto_dentry_slab;
 
/* the intent of this hash is to have collision chains of length 1 */
#define CACHES_BITS 8
#define CACHES_SIZE (1 << CACHES_BITS)
#define CACHES_MASK CACHES_SIZE - 1
static struct list_head presto_caches[CACHES_SIZE];
 
static inline int presto_cache_hash(kdev_t dev)
{
return (CACHES_MASK) & ((0x000F & (dev)) + ((0x0F00 & (dev)) >>8));
}
 
inline void presto_cache_add(struct presto_cache *cache, kdev_t dev)
{
list_add(&cache->cache_chain,
&presto_caches[presto_cache_hash(dev)]);
cache->cache_dev = dev;
}
 
inline void presto_cache_init_hash(void)
{
int i;
for ( i = 0; i < CACHES_SIZE; i++ ) {
INIT_LIST_HEAD(&presto_caches[i]);
}
}
 
/* map a device to a cache */
struct presto_cache *presto_cache_find(kdev_t dev)
{
struct presto_cache *cache;
struct list_head *lh, *tmp;
 
lh = tmp = &(presto_caches[presto_cache_hash(dev)]);
while ( (tmp = lh->next) != lh ) {
cache = list_entry(tmp, struct presto_cache, cache_chain);
if ( cache->cache_dev == dev ) {
return cache;
}
}
return NULL;
}
 
 
/* map an inode to a cache */
struct presto_cache *presto_get_cache(struct inode *inode)
{
struct presto_cache *cache;
ENTRY;
/* find the correct presto_cache here, based on the device */
cache = presto_cache_find(inode->i_dev);
if ( !cache ) {
CERROR("WARNING: no presto cache for dev %x, ino %ld\n",
inode->i_dev, inode->i_ino);
EXIT;
return NULL;
}
EXIT;
return cache;
}
 
/* another debugging routine: check fs is InterMezzo fs */
int presto_ispresto(struct inode *inode)
{
struct presto_cache *cache;
 
if ( !inode )
return 0;
cache = presto_get_cache(inode);
if ( !cache )
return 0;
return (inode->i_dev == cache->cache_dev);
}
 
/* setup a cache structure when we need one */
struct presto_cache *presto_cache_init(void)
{
struct presto_cache *cache;
 
PRESTO_ALLOC(cache, sizeof(struct presto_cache));
if ( cache ) {
memset(cache, 0, sizeof(struct presto_cache));
INIT_LIST_HEAD(&cache->cache_chain);
INIT_LIST_HEAD(&cache->cache_fset_list);
cache->cache_lock = SPIN_LOCK_UNLOCKED;
cache->cache_reserved = 0;
}
return cache;
}
 
/* free a cache structure and all of the memory it is pointing to */
inline void presto_free_cache(struct presto_cache *cache)
{
if (!cache)
return;
 
list_del(&cache->cache_chain);
if (cache->cache_sb && cache->cache_sb->s_root &&
presto_d2d(cache->cache_sb->s_root)) {
kmem_cache_free(presto_dentry_slab,
presto_d2d(cache->cache_sb->s_root));
cache->cache_sb->s_root->d_fsdata = NULL;
}
 
PRESTO_FREE(cache, sizeof(struct presto_cache));
}
 
int presto_reserve_space(struct presto_cache *cache, loff_t req)
{
struct filter_fs *filter;
loff_t avail;
struct super_block *sb = cache->cache_sb;
filter = cache->cache_filter;
if (!filter ) {
EXIT;
return 0;
}
if (!filter->o_trops ) {
EXIT;
return 0;
}
if (!filter->o_trops->tr_avail ) {
EXIT;
return 0;
}
 
spin_lock(&cache->cache_lock);
avail = filter->o_trops->tr_avail(cache, sb);
CDEBUG(D_SUPER, "ESC::%ld +++> %ld \n", (long) cache->cache_reserved,
(long) (cache->cache_reserved + req));
CDEBUG(D_SUPER, "ESC::Avail::%ld \n", (long) avail);
if (req + cache->cache_reserved > avail) {
spin_unlock(&cache->cache_lock);
EXIT;
return -ENOSPC;
}
cache->cache_reserved += req;
spin_unlock(&cache->cache_lock);
 
EXIT;
return 0;
}
 
void presto_release_space(struct presto_cache *cache, loff_t req)
{
CDEBUG(D_SUPER, "ESC::%ld ---> %ld \n", (long) cache->cache_reserved,
(long) (cache->cache_reserved - req));
spin_lock(&cache->cache_lock);
cache->cache_reserved -= req;
spin_unlock(&cache->cache_lock);
}
/replicator.c
0,0 → 1,291
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
* Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Manage RCVD records for clients in the kernel
*
*/
 
#define __NO_VERSION__
#include <linux/module.h>
#include <stdarg.h>
#include <asm/uaccess.h>
 
#include <linux/errno.h>
 
#include <linux/intermezzo_fs.h>
 
/*
* this file contains a hash table of replicators/clients for a
* fileset. It allows fast lookup and update of reintegration status
*/
 
struct izo_offset_rec {
struct list_head or_list;
char or_uuid[16];
loff_t or_offset;
};
 
#define RCACHE_BITS 8
#define RCACHE_SIZE (1 << RCACHE_BITS)
#define RCACHE_MASK (RCACHE_SIZE - 1)
 
static struct list_head *
izo_rep_cache(void)
{
int i;
struct list_head *cache;
PRESTO_ALLOC(cache, sizeof(struct list_head) * RCACHE_SIZE);
if (cache == NULL) {
CERROR("intermezzo-fatal: no memory for replicator cache\n");
return NULL;
}
memset(cache, 0, sizeof(struct list_head) * RCACHE_SIZE);
for (i = 0; i < RCACHE_SIZE; i++)
INIT_LIST_HEAD(&cache[i]);
 
return cache;
}
 
static struct list_head *
izo_rep_hash(struct list_head *cache, char *uuid)
{
return &cache[(RCACHE_MASK & uuid[1])];
}
 
static void
izo_rep_cache_clean(struct presto_file_set *fset)
{
int i;
struct list_head *bucket;
struct list_head *tmp;
 
if (fset->fset_clients == NULL)
return;
for (i = 0; i < RCACHE_SIZE; i++) {
tmp = bucket = &fset->fset_clients[i];
 
tmp = tmp->next;
while (tmp != bucket) {
struct izo_offset_rec *offrec;
tmp = tmp->next;
list_del(tmp);
offrec = list_entry(tmp, struct izo_offset_rec,
or_list);
PRESTO_FREE(offrec, sizeof(struct izo_offset_rec));
}
}
}
 
struct izo_offset_rec *
izo_rep_cache_find(struct presto_file_set *fset, char *uuid)
{
struct list_head *buck = izo_rep_hash(fset->fset_clients, uuid);
struct list_head *tmp = buck;
struct izo_offset_rec *rec = NULL;
 
while ( (tmp = tmp->next) != buck ) {
rec = list_entry(tmp, struct izo_offset_rec, or_list);
if ( memcmp(rec->or_uuid, uuid, sizeof(rec->or_uuid)) == 0 )
return rec;
}
 
return NULL;
}
 
static int
izo_rep_cache_add(struct presto_file_set *fset, struct izo_rcvd_rec *rec,
loff_t offset)
{
struct izo_offset_rec *offrec;
 
if (izo_rep_cache_find(fset, rec->lr_uuid)) {
CERROR("izo: duplicate client entry %s off %Ld\n",
fset->fset_name, offset);
return -EINVAL;
}
 
PRESTO_ALLOC(offrec, sizeof(*offrec));
if (offrec == NULL) {
CERROR("izo: cannot allocate offrec\n");
return -ENOMEM;
}
 
memcpy(offrec->or_uuid, rec->lr_uuid, sizeof(rec->lr_uuid));
offrec->or_offset = offset;
 
list_add(&offrec->or_list,
izo_rep_hash(fset->fset_clients, rec->lr_uuid));
return 0;
}
 
int
izo_rep_cache_init(struct presto_file_set *fset)
{
struct izo_rcvd_rec rec;
loff_t offset = 0, last_offset = 0;
 
fset->fset_clients = izo_rep_cache();
if (fset->fset_clients == NULL) {
CERROR("Error initializing client cache\n");
return -ENOMEM;
}
 
while ( presto_fread(fset->fset_rcvd.fd_file, (char *)&rec,
sizeof(rec), &offset) == sizeof(rec) ) {
int rc;
 
if ((rc = izo_rep_cache_add(fset, &rec, last_offset)) < 0) {
izo_rep_cache_clean(fset);
return rc;
}
 
last_offset = offset;
}
 
return 0;
}
 
/*
* Return local last_rcvd record for the client. Update or create
* if necessary.
*
* XXX: After this call, any -EINVAL from izo_rcvd_get is a real error.
*/
int
izo_repstatus(struct presto_file_set *fset, __u64 client_kmlsize,
struct izo_rcvd_rec *lr_client, struct izo_rcvd_rec *lr_server)
{
int rc;
rc = izo_rcvd_get(lr_server, fset, lr_client->lr_uuid);
if (rc < 0 && rc != -EINVAL) {
return rc;
}
 
/* client is new or has been reset. */
if (rc < 0 || (client_kmlsize == 0 && lr_client->lr_remote_offset == 0)) {
memset(lr_server, 0, sizeof(*lr_server));
memcpy(lr_server->lr_uuid, lr_client->lr_uuid, sizeof(lr_server->lr_uuid));
rc = izo_rcvd_write(fset, lr_server);
if (rc < 0)
return rc;
}
 
/* update intersync */
rc = izo_upc_repstatus(presto_f2m(fset), fset->fset_name, lr_server);
return rc;
}
 
loff_t
izo_rcvd_get(struct izo_rcvd_rec *rec, struct presto_file_set *fset, char *uuid)
{
struct izo_offset_rec *offrec;
struct izo_rcvd_rec tmprec;
loff_t offset;
 
offrec = izo_rep_cache_find(fset, uuid);
if (offrec == NULL) {
CDEBUG(D_SPECIAL, "izo_get_rcvd: uuid not in hash.\n");
return -EINVAL;
}
offset = offrec->or_offset;
 
if (rec == NULL)
return offset;
 
if (presto_fread(fset->fset_rcvd.fd_file, (char *)&tmprec,
sizeof(tmprec), &offset) != sizeof(tmprec)) {
CERROR("izo_get_rcvd: Unable to read from last_rcvd file offset "
"%Lu\n", offset);
return -EIO;
}
 
memcpy(rec->lr_uuid, tmprec.lr_uuid, sizeof(tmprec.lr_uuid));
rec->lr_remote_recno = le64_to_cpu(tmprec.lr_remote_recno);
rec->lr_remote_offset = le64_to_cpu(tmprec.lr_remote_offset);
rec->lr_local_recno = le64_to_cpu(tmprec.lr_local_recno);
rec->lr_local_offset = le64_to_cpu(tmprec.lr_local_offset);
rec->lr_last_ctime = le64_to_cpu(tmprec.lr_last_ctime);
 
return offrec->or_offset;
}
 
/* Try to lookup the UUID in the hash. Insert it if it isn't found. Write the
* data to the file.
*
* Returns the offset of the beginning of the record in the last_rcvd file. */
loff_t
izo_rcvd_write(struct presto_file_set *fset, struct izo_rcvd_rec *rec)
{
struct izo_offset_rec *offrec;
loff_t offset, rc;
 
ENTRY;
 
offrec = izo_rep_cache_find(fset, rec->lr_uuid);
if (offrec == NULL) {
/* I don't think it should be possible for an entry to be not in
* the hash table without also having an invalid offset, but we
* handle it gracefully regardless. */
write_lock(&fset->fset_rcvd.fd_lock);
offset = fset->fset_rcvd.fd_offset;
fset->fset_rcvd.fd_offset += sizeof(*rec);
write_unlock(&fset->fset_rcvd.fd_lock);
 
rc = izo_rep_cache_add(fset, rec, offset);
if (rc < 0) {
EXIT;
return rc;
}
} else
offset = offrec->or_offset;
 
rc = presto_fwrite(fset->fset_rcvd.fd_file, (char *)rec, sizeof(*rec),
&offset);
if (rc == sizeof(*rec))
/* presto_fwrite() advances 'offset' */
rc = offset - sizeof(*rec);
 
EXIT;
return rc;
}
 
loff_t
izo_rcvd_upd_remote(struct presto_file_set *fset, char * uuid, __u64 remote_recno,
__u64 remote_offset)
{
struct izo_rcvd_rec rec;
loff_t rc;
 
ENTRY;
rc = izo_rcvd_get(&rec, fset, uuid);
if (rc < 0)
return rc;
rec.lr_remote_recno = remote_recno;
rec.lr_remote_offset = remote_offset;
 
rc = izo_rcvd_write(fset, &rec);
EXIT;
if (rc < 0)
return rc;
return 0;
}
/dcache.c
0,0 → 1,345
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Original version: Copyright (C) 1996 P. Braam and M. Callahan
* Rewritten for Linux 2.1. Copyright (C) 1997 Carnegie Mellon University
* d_fsdata and NFS compatiblity fixes Copyright (C) 2001 Tacit Networks, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Directory operations for InterMezzo filesystem
*/
 
/* inode dentry alias list walking code adapted from linux/fs/dcache.c
*
* fs/dcache.c
*
* (C) 1997 Thomas Schoebel-Theuer,
* with heavy changes by Linus Torvalds
*/
 
#define __NO_VERSION__
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/locks.h>
#include <linux/slab.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>
#include <linux/smp_lock.h>
#include <linux/vmalloc.h>
 
#include <linux/intermezzo_fs.h>
 
kmem_cache_t * presto_dentry_slab;
 
/* called when a cache lookup succeeds */
static int presto_d_revalidate(struct dentry *de, int flag)
{
struct inode *inode = de->d_inode;
struct presto_file_set * root_fset;
 
ENTRY;
if (!inode) {
EXIT;
return 0;
}
 
if (is_bad_inode(inode)) {
EXIT;
return 0;
}
 
if (!presto_d2d(de)) {
presto_set_dd(de);
}
 
if (!presto_d2d(de)) {
EXIT;
return 0;
}
 
root_fset = presto_d2d(de->d_inode->i_sb->s_root)->dd_fset;
if (root_fset->fset_flags & FSET_FLAT_BRANCH &&
(presto_d2d(de)->dd_fset != root_fset )) {
presto_d2d(de)->dd_fset = root_fset;
}
 
EXIT;
return 1;
 
#if 0
/* The following is needed for metadata on demand. */
if ( S_ISDIR(inode->i_mode) ) {
EXIT;
return (presto_chk(de, PRESTO_DATA) &&
(presto_chk(de, PRESTO_ATTR)));
} else {
EXIT;
return presto_chk(de, PRESTO_ATTR);
}
#endif
}
 
static void presto_d_release(struct dentry *dentry)
{
if (!presto_d2d(dentry)) {
/* This can happen for dentries from NFSd */
return;
}
presto_d2d(dentry)->dd_count--;
 
if (!presto_d2d(dentry)->dd_count) {
kmem_cache_free(presto_dentry_slab, presto_d2d(dentry));
dentry->d_fsdata = NULL;
}
}
 
struct dentry_operations presto_dentry_ops =
{
.d_revalidate = presto_d_revalidate,
.d_release = presto_d_release
};
 
static inline int presto_is_dentry_ROOT (struct dentry *dentry)
{
return(dentry_name_cmp(dentry,"ROOT") &&
!dentry_name_cmp(dentry->d_parent,".intermezzo"));
}
 
static struct presto_file_set* presto_try_find_fset(struct dentry* dentry,
int *is_under_d_intermezzo)
{
struct dentry* temp_dentry;
struct presto_dentry_data *d_data;
int found_root=0;
 
ENTRY;
CDEBUG(D_FSDATA, "finding fileset for %p:%s\n", dentry,
dentry->d_name.name);
 
*is_under_d_intermezzo = 0;
 
/* walk up through the branch to get the fileset */
/* The dentry we are passed presumably does not have the correct
* fset information. However, we still want to start walking up
* the branch from this dentry to get our found_root and
* is_under_d_intermezzo decisions correct
*/
for (temp_dentry = dentry ; ; temp_dentry = temp_dentry->d_parent) {
CDEBUG(D_FSDATA, "--->dentry %p:%*s\n", temp_dentry,
temp_dentry->d_name.len,temp_dentry->d_name.name);
if (presto_is_dentry_ROOT(temp_dentry))
found_root = 1;
if (!found_root &&
dentry_name_cmp(temp_dentry, ".intermezzo")) {
*is_under_d_intermezzo = 1;
}
d_data = presto_d2d(temp_dentry);
if (d_data) {
/* If we found a "ROOT" dentry while walking up the
* branch, we will journal regardless of whether
* we are under .intermezzo or not.
* If we are already under d_intermezzo don't reverse
* the decision here...even if we found a "ROOT"
* dentry above .intermezzo (if we were ever to
* modify the directory structure).
*/
if (!*is_under_d_intermezzo)
*is_under_d_intermezzo = !found_root &&
(d_data->dd_flags & PRESTO_DONT_JOURNAL);
EXIT;
return d_data->dd_fset;
}
if (temp_dentry->d_parent == temp_dentry) {
break;
}
}
EXIT;
return NULL;
}
 
/* Only call this function on positive dentries */
static struct presto_dentry_data* presto_try_find_alias_with_dd (
struct dentry* dentry)
{
struct inode *inode=dentry->d_inode;
struct list_head *head, *next, *tmp;
struct dentry *tmp_dentry;
 
/* Search through the alias list for dentries with d_fsdata */
spin_lock(&dcache_lock);
head = &inode->i_dentry;
next = inode->i_dentry.next;
while (next != head) {
tmp = next;
next = tmp->next;
tmp_dentry = list_entry(tmp, struct dentry, d_alias);
if (!presto_d2d(tmp_dentry)) {
spin_unlock(&dcache_lock);
return presto_d2d(tmp_dentry);
}
}
spin_unlock(&dcache_lock);
return NULL;
}
 
/* Only call this function on positive dentries */
static void presto_set_alias_dd (struct dentry *dentry,
struct presto_dentry_data* dd)
{
struct inode *inode=dentry->d_inode;
struct list_head *head, *next, *tmp;
struct dentry *tmp_dentry;
 
/* Set d_fsdata for this dentry */
dd->dd_count++;
dentry->d_fsdata = dd;
 
/* Now set d_fsdata for all dentries in the alias list. */
spin_lock(&dcache_lock);
head = &inode->i_dentry;
next = inode->i_dentry.next;
while (next != head) {
tmp = next;
next = tmp->next;
tmp_dentry = list_entry(tmp, struct dentry, d_alias);
if (!presto_d2d(tmp_dentry)) {
dd->dd_count++;
tmp_dentry->d_fsdata = dd;
}
}
spin_unlock(&dcache_lock);
return;
}
 
inline struct presto_dentry_data *izo_alloc_ddata(void)
{
struct presto_dentry_data *dd;
 
dd = kmem_cache_alloc(presto_dentry_slab, SLAB_KERNEL);
if (dd == NULL) {
CERROR("IZO: out of memory trying to allocate presto_dentry_data\n");
return NULL;
}
memset(dd, 0, sizeof(*dd));
dd->dd_count = 1;
 
return dd;
}
 
/* This uses the BKL! */
int presto_set_dd(struct dentry * dentry)
{
struct presto_file_set *fset;
struct presto_dentry_data *dd;
int is_under_d_izo;
int error=0;
 
ENTRY;
 
if (!dentry)
BUG();
 
lock_kernel();
 
/* Did we lose a race? */
if (dentry->d_fsdata) {
CERROR("dentry %p already has d_fsdata set\n", dentry);
if (dentry->d_inode)
CERROR(" inode: %ld\n", dentry->d_inode->i_ino);
EXIT;
goto out_unlock;
}
 
if (dentry->d_inode != NULL) {
/* NFSd runs find_fh_dentry which instantiates disconnected
* dentries which are then connected without a lookup().
* So it is possible to have connected dentries that do not
* have d_fsdata set. So we walk the list trying to find
* an alias which has its d_fsdata set and then use that
* for all the other dentries as well.
* - SHP,Vinny.
*/
 
/* If there is an alias with d_fsdata use it. */
if ((dd = presto_try_find_alias_with_dd (dentry))) {
presto_set_alias_dd (dentry, dd);
EXIT;
goto out_unlock;
}
} else {
/* Negative dentry */
CDEBUG(D_FSDATA,"negative dentry %p: %*s\n", dentry,
dentry->d_name.len, dentry->d_name.name);
}
 
/* No pre-existing d_fsdata, we need to construct one.
* First, we must walk up the tree to find the fileset
* If a fileset can't be found, we leave a null fsdata
* and return EROFS to indicate that we can't journal
* updates.
*/
fset = presto_try_find_fset (dentry, &is_under_d_izo);
if (!fset) {
#ifdef PRESTO_NO_NFS
CERROR("No fileset for dentry %p: %*s\n", dentry,
dentry->d_name.len, dentry->d_name.name);
#endif
error = -EROFS;
EXIT;
goto out_unlock;
}
 
dentry->d_fsdata = izo_alloc_ddata();
if (!presto_d2d(dentry)) {
CERROR ("InterMezzo: out of memory allocating d_fsdata\n");
error = -ENOMEM;
goto out_unlock;
}
presto_d2d(dentry)->dd_fset = fset;
if (is_under_d_izo)
presto_d2d(dentry)->dd_flags |= PRESTO_DONT_JOURNAL;
EXIT;
 
out_unlock:
CDEBUG(D_FSDATA,"presto_set_dd dentry %p: %*s, d_fsdata %p\n",
dentry, dentry->d_name.len, dentry->d_name.name,
dentry->d_fsdata);
unlock_kernel();
return error;
}
 
int presto_init_ddata_cache(void)
{
ENTRY;
presto_dentry_slab =
kmem_cache_create("presto_cache",
sizeof(struct presto_dentry_data), 0,
SLAB_HWCACHE_ALIGN, NULL,
NULL);
EXIT;
return (presto_dentry_slab != NULL);
}
 
void presto_cleanup_ddata_cache(void)
{
kmem_cache_destroy(presto_dentry_slab);
}
/journal.c
0,0 → 1,2453
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1998 Peter J. Braam
* Copyright (C) 2001 Cluster File Systems, Inc.
* Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
*
* Support for journalling extended attributes
* Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
 
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/time.h>
#include <linux/errno.h>
#include <linux/locks.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>
#include <linux/smp_lock.h>
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
 
struct presto_reservation_data {
unsigned int ri_recno;
loff_t ri_offset;
loff_t ri_size;
struct list_head ri_list;
};
 
/*
* Locking Semantics
*
* write lock in struct presto_log_fd:
* - name: fd_lock
* - required for: accessing any field in a presto_log_fd
* - may not be held across I/O
* -
*
*/
 
/*
* reserve record space and/or atomically request state of the log
* rec will hold the location reserved record upon return
* this reservation will be placed in the queue
*/
static void presto_reserve_record(struct presto_file_set *fset,
struct presto_log_fd *fd,
struct rec_info *rec,
struct presto_reservation_data *rd)
{
int chunked_record = 0;
ENTRY;
write_lock(&fd->fd_lock);
if ( rec->is_kml ) {
int chunk = 1 << fset->fset_chunkbits;
int chunk_mask = ~(chunk -1);
loff_t boundary;
 
boundary = (fd->fd_offset + chunk - 1) & chunk_mask;
if ( fd->fd_offset + rec->size >= boundary ) {
chunked_record = 1;
fd->fd_offset = boundary;
}
}
 
fd->fd_recno++;
/* this moves the fd_offset back after truncation */
if ( list_empty(&fd->fd_reservations) &&
!chunked_record) {
fd->fd_offset = fd->fd_file->f_dentry->d_inode->i_size;
}
 
rec->offset = fd->fd_offset;
if (rec->is_kml)
rec->offset += fset->fset_kml_logical_off;
 
rec->recno = fd->fd_recno;
 
/* add the reservation data to the end of the list */
rd->ri_offset = fd->fd_offset;
rd->ri_size = rec->size;
rd->ri_recno = rec->recno;
list_add(&rd->ri_list, fd->fd_reservations.prev);
 
fd->fd_offset += rec->size;
 
write_unlock(&fd->fd_lock);
 
EXIT;
}
 
static inline void presto_release_record(struct presto_log_fd *fd,
struct presto_reservation_data *rd)
{
write_lock(&fd->fd_lock);
list_del(&rd->ri_list);
write_unlock(&fd->fd_lock);
}
 
/* XXX should we ask for do_truncate to be exported? */
int izo_do_truncate(struct presto_file_set *fset, struct dentry *dentry,
loff_t length, loff_t size_check)
{
struct inode *inode = dentry->d_inode;
int error;
struct iattr newattrs;
 
ENTRY;
 
if (length < 0) {
EXIT;
return -EINVAL;
}
 
down(&inode->i_sem);
lock_kernel();
if (size_check != inode->i_size) {
unlock_kernel();
up(&inode->i_sem);
EXIT;
return -EALREADY;
}
 
newattrs.ia_size = length;
newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
 
if (inode->i_op && inode->i_op->setattr)
error = inode->i_op->setattr(dentry, &newattrs);
else {
inode_setattr(dentry->d_inode, &newattrs);
error = 0;
}
 
unlock_kernel();
up(&inode->i_sem);
EXIT;
return error;
}
 
static void presto_kml_truncate(struct presto_file_set *fset)
{
int rc;
ENTRY;
 
write_lock(&fset->fset_kml.fd_lock);
if (fset->fset_kml.fd_truncating == 1 ) {
write_unlock(&fset->fset_kml.fd_lock);
EXIT;
return;
}
 
fset->fset_kml.fd_truncating = 1;
write_unlock(&fset->fset_kml.fd_lock);
 
CERROR("islento: %d, count: %d\n",
ISLENTO(presto_i2m(fset->fset_dentry->d_inode)),
fset->fset_permit_count);
 
rc = izo_upc_kml_truncate(fset->fset_cache->cache_psdev->uc_minor,
fset->fset_lento_off, fset->fset_lento_recno,
fset->fset_name);
 
/* Userspace is the only permitholder now, and will retain an exclusive
* hold on the permit until KML truncation completes. */
/* FIXME: double check this code path now that the precise semantics of
* fset->fset_permit_count have changed. */
 
if (rc != 0) {
write_lock(&fset->fset_kml.fd_lock);
fset->fset_kml.fd_truncating = 0;
write_unlock(&fset->fset_kml.fd_lock);
}
 
EXIT;
}
 
void *presto_trans_start(struct presto_file_set *fset, struct inode *inode,
int op)
{
ENTRY;
if ( !fset->fset_cache->cache_filter->o_trops ) {
EXIT;
return NULL;
}
EXIT;
return fset->fset_cache->cache_filter->o_trops->tr_start
(fset, inode, op);
}
 
void presto_trans_commit(struct presto_file_set *fset, void *handle)
{
ENTRY;
if (!fset->fset_cache->cache_filter->o_trops ) {
EXIT;
return;
}
 
fset->fset_cache->cache_filter->o_trops->tr_commit(fset, handle);
 
/* Check to see if the KML needs truncated. */
if (fset->kml_truncate_size > 0 &&
!fset->fset_kml.fd_truncating &&
fset->fset_kml.fd_offset > fset->kml_truncate_size) {
CDEBUG(D_JOURNAL, "kml size: %lu; truncating\n",
(unsigned long)fset->fset_kml.fd_offset);
presto_kml_truncate(fset);
}
EXIT;
}
 
inline int presto_no_journal(struct presto_file_set *fset)
{
int minor = fset->fset_cache->cache_psdev->uc_minor;
return izo_channels[minor].uc_no_journal;
}
 
#define size_round(x) (((x)+3) & ~0x3)
 
#define BUFF_FREE(buf) PRESTO_FREE(buf, PAGE_SIZE)
#define BUFF_ALLOC(newbuf, oldbuf) \
PRESTO_ALLOC(newbuf, PAGE_SIZE); \
if ( !newbuf ) { \
if (oldbuf) \
BUFF_FREE(oldbuf); \
return -ENOMEM; \
}
 
/*
* "buflen" should be PAGE_SIZE or more.
* Give relative path wrt to a fsetroot
*/
char * presto_path(struct dentry *dentry, struct dentry *root,
char *buffer, int buflen)
{
char * end = buffer+buflen;
char * retval;
 
*--end = '\0';
buflen--;
if (dentry->d_parent != dentry && list_empty(&dentry->d_hash)) {
buflen -= 10;
end -= 10;
memcpy(end, " (deleted)", 10);
}
 
/* Get '/' right */
retval = end-1;
*retval = '/';
 
for (;;) {
struct dentry * parent;
int namelen;
 
if (dentry == root)
break;
parent = dentry->d_parent;
if (dentry == parent)
break;
namelen = dentry->d_name.len;
buflen -= namelen + 1;
if (buflen < 0)
break;
end -= namelen;
memcpy(end, dentry->d_name.name, namelen);
*--end = '/';
retval = end;
dentry = parent;
}
return retval;
}
 
static inline char *logit(char *buf, const void *value, int size)
{
char *ptr = (char *)value;
 
memcpy(buf, ptr, size);
buf += size;
return buf;
}
 
 
static inline char *
journal_log_prefix_with_groups_and_ids(char *buf, int opcode,
struct rec_info *rec,
__u32 ngroups, gid_t *groups,
__u32 fsuid, __u32 fsgid)
{
struct kml_prefix_hdr p;
u32 loggroups[NGROUPS_MAX];
 
int i;
 
p.len = cpu_to_le32(rec->size);
p.version = KML_MAJOR_VERSION | KML_MINOR_VERSION;
p.pid = cpu_to_le32(current->pid);
p.auid = cpu_to_le32(current->uid);
p.fsuid = cpu_to_le32(fsuid);
p.fsgid = cpu_to_le32(fsgid);
p.ngroups = cpu_to_le32(ngroups);
p.opcode = cpu_to_le32(opcode);
for (i=0 ; i < ngroups ; i++)
loggroups[i] = cpu_to_le32((__u32) groups[i]);
 
buf = logit(buf, &p, sizeof(struct kml_prefix_hdr));
buf = logit(buf, &loggroups, sizeof(__u32) * ngroups);
return buf;
}
 
static inline char *
journal_log_prefix(char *buf, int opcode, struct rec_info *rec)
{
__u32 groups[NGROUPS_MAX];
int i;
 
/* convert 16 bit gid's to 32 bit gid's */
for (i=0; i<current->ngroups; i++)
groups[i] = (__u32) current->groups[i];
return journal_log_prefix_with_groups_and_ids(buf, opcode, rec,
(__u32)current->ngroups,
groups,
(__u32)current->fsuid,
(__u32)current->fsgid);
}
 
static inline char *
journal_log_prefix_with_groups(char *buf, int opcode, struct rec_info *rec,
__u32 ngroups, gid_t *groups)
{
return journal_log_prefix_with_groups_and_ids(buf, opcode, rec,
ngroups, groups,
(__u32)current->fsuid,
(__u32)current->fsgid);
}
 
static inline char *log_dentry_version(char *buf, struct dentry *dentry)
{
struct presto_version version;
 
presto_getversion(&version, dentry->d_inode);
version.pv_mtime = HTON__u64(version.pv_mtime);
version.pv_ctime = HTON__u64(version.pv_ctime);
version.pv_size = HTON__u64(version.pv_size);
 
return logit(buf, &version, sizeof(version));
}
 
static inline char *log_version(char *buf, struct presto_version *pv)
{
struct presto_version version;
 
memcpy(&version, pv, sizeof(version));
version.pv_mtime = HTON__u64(version.pv_mtime);
version.pv_ctime = HTON__u64(version.pv_ctime);
version.pv_size = HTON__u64(version.pv_size);
 
return logit(buf, &version, sizeof(version));
}
 
static inline char *log_rollback(char *buf, struct izo_rollback_data *rb)
{
struct izo_rollback_data rollback;
 
memcpy(&rollback, rb, sizeof(rollback));
rollback.rb_mode = HTON__u32(rollback.rb_mode);
rollback.rb_rdev = HTON__u32(rollback.rb_rdev);
rollback.rb_uid = HTON__u64(rollback.rb_uid);
rollback.rb_gid = HTON__u64(rollback.rb_gid);
 
return logit(buf, &rollback, sizeof(rollback));
}
 
static inline char *journal_log_suffix(char *buf, char *log,
struct presto_file_set *fset,
struct dentry *dentry,
struct rec_info *rec)
{
struct kml_suffix s;
struct kml_prefix_hdr *p = (struct kml_prefix_hdr *)log;
 
#if 0
/* XXX needs to be done after reservation,
disable ths until version 1.2 */
if ( dentry ) {
s.prevrec = cpu_to_le32(rec->offset -
presto_d2d(dentry)->dd_kml_offset);
presto_d2d(dentry)->dd_kml_offset = rec->offset;
} else {
s.prevrec = -1;
}
#endif
s.prevrec = 0;
 
/* record number needs to be filled in after reservation
s.recno = cpu_to_le32(rec->recno); */
s.time = cpu_to_le32(CURRENT_TIME);
s.len = p->len;
return logit(buf, &s, sizeof(s));
}
 
int izo_log_close(struct presto_log_fd *logfd)
{
int rc = 0;
 
if (logfd->fd_file) {
rc = filp_close(logfd->fd_file, 0);
logfd->fd_file = NULL;
} else
CERROR("InterMezzo: %s: no filp\n", __FUNCTION__);
if (rc != 0)
CERROR("InterMezzo: close files: filp won't close: %d\n", rc);
 
return rc;
}
 
int presto_fwrite(struct file *file, const char *str, int len, loff_t *off)
{
int rc;
mm_segment_t old_fs;
ENTRY;
 
rc = -EINVAL;
if ( !off ) {
EXIT;
return rc;
}
 
if ( ! file ) {
EXIT;
return rc;
}
 
if ( ! file->f_op ) {
EXIT;
return rc;
}
 
if ( ! file->f_op->write ) {
EXIT;
return rc;
}
 
old_fs = get_fs();
set_fs(get_ds());
rc = file->f_op->write(file, str, len, off);
if (rc != len) {
CERROR("presto_fwrite: wrote %d bytes instead of "
"%d at %ld\n", rc, len, (long)*off);
rc = -EIO;
}
set_fs(old_fs);
EXIT;
return rc;
}
 
int presto_fread(struct file *file, char *str, int len, loff_t *off)
{
int rc;
mm_segment_t old_fs;
ENTRY;
 
if (len > 512)
CERROR("presto_fread: read at %Ld for %d bytes, ino %ld\n",
*off, len, file->f_dentry->d_inode->i_ino);
 
rc = -EINVAL;
if ( !off ) {
EXIT;
return rc;
}
 
if ( ! file ) {
EXIT;
return rc;
}
 
if ( ! file->f_op ) {
EXIT;
return rc;
}
 
if ( ! file->f_op->read ) {
EXIT;
return rc;
}
 
old_fs = get_fs();
set_fs(get_ds());
rc = file->f_op->read(file, str, len, off);
if (rc != len) {
CDEBUG(D_FILE, "presto_fread: read %d bytes instead of "
"%d at %Ld\n", rc, len, *off);
rc = -EIO;
}
set_fs(old_fs);
EXIT;
return rc;
}
 
loff_t presto_kml_offset(struct presto_file_set *fset)
{
unsigned int kml_recno;
struct presto_log_fd *fd = &fset->fset_kml;
loff_t offset;
ENTRY;
 
write_lock(&fd->fd_lock);
 
/* Determine the largest valid offset, i.e. up until the first
* reservation held on the file. */
if ( !list_empty(&fd->fd_reservations) ) {
struct presto_reservation_data *rd;
rd = list_entry(fd->fd_reservations.next,
struct presto_reservation_data,
ri_list);
offset = rd->ri_offset;
kml_recno = rd->ri_recno;
} else {
offset = fd->fd_file->f_dentry->d_inode->i_size;
kml_recno = fset->fset_kml.fd_recno;
}
write_unlock(&fd->fd_lock);
return offset;
}
 
static int presto_kml_dispatch(struct presto_file_set *fset)
{
int rc = 0;
unsigned int kml_recno;
struct presto_log_fd *fd = &fset->fset_kml;
loff_t offset;
ENTRY;
 
write_lock(&fd->fd_lock);
 
/* Determine the largest valid offset, i.e. up until the first
* reservation held on the file. */
if ( !list_empty(&fd->fd_reservations) ) {
struct presto_reservation_data *rd;
rd = list_entry(fd->fd_reservations.next,
struct presto_reservation_data,
ri_list);
offset = rd->ri_offset;
kml_recno = rd->ri_recno;
} else {
offset = fd->fd_file->f_dentry->d_inode->i_size;
kml_recno = fset->fset_kml.fd_recno;
}
 
if ( kml_recno < fset->fset_lento_recno ) {
CERROR("presto_kml_dispatch: smoke is coming\n");
write_unlock(&fd->fd_lock);
EXIT;
return 0;
} else if ( kml_recno == fset->fset_lento_recno ) {
write_unlock(&fd->fd_lock);
EXIT;
return 0;
/* XXX add a further "if" here to delay the KML upcall */
#if 0
} else if ( kml_recno < fset->fset_lento_recno + 100) {
write_unlock(&fd->fd_lock);
EXIT;
return 0;
#endif
}
CDEBUG(D_PIOCTL, "fset: %s\n", fset->fset_name);
 
rc = izo_upc_kml(fset->fset_cache->cache_psdev->uc_minor,
fset->fset_lento_off, fset->fset_lento_recno,
offset + fset->fset_kml_logical_off, kml_recno,
fset->fset_name);
 
if ( rc ) {
write_unlock(&fd->fd_lock);
EXIT;
return rc;
}
 
fset->fset_lento_off = offset;
fset->fset_lento_recno = kml_recno;
write_unlock(&fd->fd_lock);
EXIT;
return 0;
}
 
int izo_lookup_file(struct presto_file_set *fset, char *path,
struct nameidata *nd)
{
int error = 0;
 
CDEBUG(D_CACHE, "looking up: %s\n", path);
 
if (path_init(path, LOOKUP_PARENT, nd))
error = path_walk(path, nd);
if (error) {
EXIT;
return error;
}
 
return 0;
}
 
/* FIXME: this function is a mess of locking and error handling. There's got to
* be a better way. */
static int do_truncate_rename(struct presto_file_set *fset, char *oldname,
char *newname)
{
struct dentry *old_dentry, *new_dentry;
struct nameidata oldnd, newnd;
char *oldpath, *newpath;
int error;
 
ENTRY;
 
oldpath = izo_make_path(fset, oldname);
if (oldpath == NULL) {
EXIT;
return -ENOENT;
}
 
newpath = izo_make_path(fset, newname);
if (newpath == NULL) {
error = -ENOENT;
EXIT;
goto exit;
}
 
if ((error = izo_lookup_file(fset, oldpath, &oldnd)) != 0) {
EXIT;
goto exit1;
}
 
if ((error = izo_lookup_file(fset, newpath, &newnd)) != 0) {
EXIT;
goto exit2;
}
 
double_lock(newnd.dentry, oldnd.dentry);
old_dentry = lookup_hash(&oldnd.last, oldnd.dentry);
error = PTR_ERR(old_dentry);
if (IS_ERR(old_dentry)) {
EXIT;
goto exit3;
}
error = -ENOENT;
if (!old_dentry->d_inode) {
EXIT;
goto exit4;
}
new_dentry = lookup_hash(&newnd.last, newnd.dentry);
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry)) {
EXIT;
goto exit4;
}
 
{
extern int presto_rename(struct inode *old_dir,struct dentry *old_dentry,
struct inode *new_dir,struct dentry *new_dentry);
error = presto_rename(old_dentry->d_parent->d_inode, old_dentry,
new_dentry->d_parent->d_inode, new_dentry);
}
 
dput(new_dentry);
EXIT;
exit4:
dput(old_dentry);
exit3:
double_up(&newnd.dentry->d_inode->i_sem, &oldnd.dentry->d_inode->i_sem);
path_release(&newnd);
exit2:
path_release(&oldnd);
exit1:
PRESTO_FREE(newpath, strlen(newpath) + 1);
exit:
PRESTO_FREE(oldpath, strlen(oldpath) + 1);
return error;
}
 
/* This function is called with the fset->fset_kml.fd_lock held */
int presto_finish_kml_truncate(struct presto_file_set *fset,
unsigned long int offset)
{
struct lento_vfs_context info;
void *handle;
struct file *f;
struct dentry *dentry;
int error = 0, len;
struct nameidata nd;
char *kmlpath = NULL, *smlpath = NULL;
ENTRY;
 
if (offset == 0) {
/* Lento couldn't do what it needed to; abort the truncation. */
fset->fset_kml.fd_truncating = 0;
EXIT;
return 0;
}
 
/* someone is about to write to the end of the KML; try again later. */
if ( !list_empty(&fset->fset_kml.fd_reservations) ) {
EXIT;
return -EAGAIN;
}
 
f = presto_copy_kml_tail(fset, offset);
if (IS_ERR(f)) {
EXIT;
return PTR_ERR(f);
}
 
/* In a single transaction:
*
* - unlink 'kml'
* - rename 'kml_tmp' to 'kml'
* - unlink 'sml'
* - rename 'sml_tmp' to 'sml'
* - rewrite the first record of last_rcvd with the new kml
* offset.
*/
handle = presto_trans_start(fset, fset->fset_dentry->d_inode,
KML_OPCODE_KML_TRUNC);
if (IS_ERR(handle)) {
presto_release_space(fset->fset_cache, PRESTO_REQLOW);
CERROR("ERROR: presto_finish_kml_truncate: no space for transaction\n");
EXIT;
return -ENOMEM;
}
 
memset(&info, 0, sizeof(info));
info.flags = LENTO_FL_IGNORE_TIME;
 
kmlpath = izo_make_path(fset, "kml");
if (kmlpath == NULL) {
error = -ENOMEM;
CERROR("make_path failed: ENOMEM\n");
EXIT;
goto exit_commit;
}
 
if ((error = izo_lookup_file(fset, kmlpath, &nd)) != 0) {
CERROR("izo_lookup_file(kml) failed: %d.\n", error);
EXIT;
goto exit_commit;
}
down(&nd.dentry->d_inode->i_sem);
dentry = lookup_hash(&nd.last, nd.dentry);
error = PTR_ERR(dentry);
if (IS_ERR(dentry)) {
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
CERROR("lookup_hash failed\n");
EXIT;
goto exit_commit;
}
error = presto_do_unlink(fset, dentry->d_parent, dentry, &info);
dput(dentry);
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
 
if (error != 0) {
CERROR("presto_do_unlink(kml) failed: %d.\n", error);
EXIT;
goto exit_commit;
}
 
smlpath = izo_make_path(fset, "sml");
if (smlpath == NULL) {
error = -ENOMEM;
CERROR("make_path() failed: ENOMEM\n");
EXIT;
goto exit_commit;
}
 
if ((error = izo_lookup_file(fset, smlpath, &nd)) != 0) {
CERROR("izo_lookup_file(sml) failed: %d.\n", error);
EXIT;
goto exit_commit;
}
down(&nd.dentry->d_inode->i_sem);
dentry = lookup_hash(&nd.last, nd.dentry);
error = PTR_ERR(dentry);
if (IS_ERR(dentry)) {
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
CERROR("lookup_hash failed\n");
EXIT;
goto exit_commit;
}
error = presto_do_unlink(fset, dentry->d_parent, dentry, &info);
dput(dentry);
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
 
if (error != 0) {
CERROR("presto_do_unlink(sml) failed: %d.\n", error);
EXIT;
goto exit_commit;
}
 
error = do_truncate_rename(fset, "kml_tmp", "kml");
if (error != 0)
CERROR("do_truncate_rename(kml_tmp, kml) failed: %d\n", error);
error = do_truncate_rename(fset, "sml_tmp", "sml");
if (error != 0)
CERROR("do_truncate_rename(sml_tmp, sml) failed: %d\n", error);
 
/* Write a new 'last_rcvd' record with the new KML offset */
fset->fset_kml_logical_off += offset;
CDEBUG(D_CACHE, "new kml_logical_offset: %Lu\n",
fset->fset_kml_logical_off);
if (presto_write_kml_logical_offset(fset) != 0) {
CERROR("presto_write_kml_logical_offset failed\n");
}
 
presto_trans_commit(fset, handle);
 
/* Everything was successful, so swap the KML file descriptors */
filp_close(fset->fset_kml.fd_file, NULL);
fset->fset_kml.fd_file = f;
fset->fset_kml.fd_offset -= offset;
fset->fset_kml.fd_truncating = 0;
 
EXIT;
return 0;
 
exit_commit:
presto_trans_commit(fset, handle);
len = strlen("/.intermezzo/") + strlen(fset->fset_name) +strlen("sml");
if (kmlpath != NULL)
PRESTO_FREE(kmlpath, len);
if (smlpath != NULL)
PRESTO_FREE(smlpath, len);
return error;
}
 
/* structure of an extended log record:
 
buf-prefix buf-body [string1 [string2 [string3]]] buf-suffix
 
note: moves offset forward
*/
static inline int presto_write_record(struct file *f, loff_t *off,
const char *buf, size_t size,
const char *string1, int len1,
const char *string2, int len2,
const char *string3, int len3)
{
size_t prefix_size;
int rc;
 
prefix_size = size - sizeof(struct kml_suffix);
rc = presto_fwrite(f, buf, prefix_size, off);
if ( rc != prefix_size ) {
CERROR("Write error!\n");
EXIT;
return -EIO;
}
 
if ( string1 && len1 ) {
rc = presto_fwrite(f, string1, len1, off);
if ( rc != len1 ) {
CERROR("Write error!\n");
EXIT;
return -EIO;
}
}
 
if ( string2 && len2 ) {
rc = presto_fwrite(f, string2, len2, off);
if ( rc != len2 ) {
CERROR("Write error!\n");
EXIT;
return -EIO;
}
}
 
if ( string3 && len3 ) {
rc = presto_fwrite(f, string3, len3, off);
if ( rc != len3 ) {
CERROR("Write error!\n");
EXIT;
return -EIO;
}
}
 
rc = presto_fwrite(f, buf + prefix_size,
sizeof(struct kml_suffix), off);
if ( rc != sizeof(struct kml_suffix) ) {
CERROR("Write error!\n");
EXIT;
return -EIO;
}
return 0;
}
 
 
/*
* rec->size must be valid prior to calling this function.
*
* had to export this for branch_reinter in kml_reint.c
*/
int presto_log(struct presto_file_set *fset, struct rec_info *rec,
const char *buf, size_t size,
const char *string1, int len1,
const char *string2, int len2,
const char *string3, int len3)
{
int rc;
struct presto_reservation_data rd;
loff_t offset;
struct presto_log_fd *fd;
struct kml_suffix *s;
int prefix_size;
 
ENTRY;
 
/* buf is NULL when no_journal is in effect */
if (!buf) {
EXIT;
return -EINVAL;
}
 
if (rec->is_kml) {
fd = &fset->fset_kml;
} else {
fd = &fset->fset_lml;
}
 
presto_reserve_record(fset, fd, rec, &rd);
 
if (rec->is_kml) {
if (rec->offset < fset->fset_kml_logical_off) {
CERROR("record with pre-trunc offset. tell phil.\n");
BUG();
}
offset = rec->offset - fset->fset_kml_logical_off;
} else {
offset = rec->offset;
}
 
/* now we know the record number */
prefix_size = size - sizeof(struct kml_suffix);
s = (struct kml_suffix *) (buf + prefix_size);
s->recno = cpu_to_le32(rec->recno);
 
rc = presto_write_record(fd->fd_file, &offset, buf, size,
string1, len1, string2, len2, string3, len3);
if (rc) {
CERROR("presto: error writing record to %s\n",
rec->is_kml ? "KML" : "LML");
return rc;
}
presto_release_record(fd, &rd);
 
rc = presto_kml_dispatch(fset);
 
EXIT;
return rc;
}
 
/* read from the record at tail */
static int presto_last_record(struct presto_log_fd *fd, loff_t *size,
loff_t *tail_offset, __u32 *recno, loff_t tail)
{
struct kml_suffix suffix;
int rc;
loff_t zeroes;
 
*recno = 0;
*tail_offset = 0;
*size = 0;
if (tail < sizeof(struct kml_prefix_hdr) + sizeof(suffix)) {
EXIT;
return 0;
}
 
zeroes = tail - sizeof(int);
while ( zeroes >= 0 ) {
int data;
rc = presto_fread(fd->fd_file, (char *)&data, sizeof(data),
&zeroes);
if ( rc != sizeof(data) ) {
rc = -EIO;
return rc;
}
if (data)
break;
zeroes -= 2 * sizeof(data);
}
 
/* zeroes at the begining of file. this is needed to prevent
presto_fread errors -SHP
*/
if (zeroes <= 0) return 0;
zeroes -= sizeof(suffix) + sizeof(int);
rc = presto_fread(fd->fd_file, (char *)&suffix, sizeof(suffix), &zeroes);
if ( rc != sizeof(suffix) ) {
EXIT;
return rc;
}
if ( suffix.len > 500 ) {
CERROR("InterMezzo: Warning long record tail at %ld, rec tail_offset at %ld (size %d)\n",
(long) zeroes, (long)*tail_offset, suffix.len);
}
 
*recno = suffix.recno;
*size = suffix.len;
*tail_offset = zeroes;
return 0;
}
 
static int izo_kml_last_recno(struct presto_log_fd *logfd)
{
int rc;
loff_t size;
loff_t tail_offset;
int recno;
loff_t tail = logfd->fd_file->f_dentry->d_inode->i_size;
 
rc = presto_last_record(logfd, &size, &tail_offset, &recno, tail);
if (rc != 0) {
EXIT;
return rc;
}
 
logfd->fd_offset = tail_offset;
logfd->fd_recno = recno;
CDEBUG(D_JOURNAL, "setting fset_kml->fd_recno to %d, offset %Ld\n",
recno, tail_offset);
EXIT;
return 0;
}
 
struct file *izo_log_open(struct presto_file_set *fset, char *name, int flags)
{
struct presto_cache *cache = fset->fset_cache;
struct file *f;
int error;
ENTRY;
 
f = izo_fset_open(fset, name, flags, 0644);
error = PTR_ERR(f);
if (IS_ERR(f)) {
EXIT;
return f;
}
 
error = -EINVAL;
if ( cache != presto_get_cache(f->f_dentry->d_inode) ) {
CERROR("InterMezzo: %s cache does not match fset cache!\n",name);
fset->fset_kml.fd_file = NULL;
filp_close(f, NULL);
f = NULL;
EXIT;
return f;
}
 
if (cache->cache_filter && cache->cache_filter->o_trops &&
cache->cache_filter->o_trops->tr_journal_data) {
cache->cache_filter->o_trops->tr_journal_data
(f->f_dentry->d_inode);
} else {
CERROR("InterMezzo WARNING: no file data logging!\n");
}
 
EXIT;
 
return f;
}
 
int izo_init_kml_file(struct presto_file_set *fset, struct presto_log_fd *logfd)
{
int error = 0;
struct file *f;
 
ENTRY;
if (logfd->fd_file) {
CDEBUG(D_INODE, "fset already has KML open\n");
EXIT;
return 0;
}
 
logfd->fd_lock = RW_LOCK_UNLOCKED;
INIT_LIST_HEAD(&logfd->fd_reservations);
f = izo_log_open(fset, "kml", O_RDWR | O_CREAT);
if (IS_ERR(f)) {
error = PTR_ERR(f);
return error;
}
 
logfd->fd_file = f;
error = izo_kml_last_recno(logfd);
 
if (error) {
logfd->fd_file = NULL;
filp_close(f, NULL);
CERROR("InterMezzo: IO error in KML of fset %s\n",
fset->fset_name);
EXIT;
return error;
}
fset->fset_lento_off = logfd->fd_offset;
fset->fset_lento_recno = logfd->fd_recno;
 
EXIT;
return error;
}
 
int izo_init_last_rcvd_file(struct presto_file_set *fset, struct presto_log_fd *logfd)
{
int error = 0;
struct file *f;
struct rec_info recinfo;
 
ENTRY;
if (logfd->fd_file != NULL) {
CDEBUG(D_INODE, "fset already has last_rcvd open\n");
EXIT;
return 0;
}
 
logfd->fd_lock = RW_LOCK_UNLOCKED;
INIT_LIST_HEAD(&logfd->fd_reservations);
f = izo_log_open(fset, "last_rcvd", O_RDWR | O_CREAT);
if (IS_ERR(f)) {
error = PTR_ERR(f);
return error;
}
 
logfd->fd_file = f;
logfd->fd_offset = f->f_dentry->d_inode->i_size;
 
error = izo_rep_cache_init(fset);
 
if (presto_read_kml_logical_offset(&recinfo, fset) == 0) {
fset->fset_kml_logical_off = recinfo.offset;
} else {
/* The 'last_rcvd' file doesn't contain a kml offset record,
* probably because we just created 'last_rcvd'. Write one. */
fset->fset_kml_logical_off = 0;
presto_write_kml_logical_offset(fset);
}
 
EXIT;
return error;
}
 
int izo_init_lml_file(struct presto_file_set *fset, struct presto_log_fd *logfd)
{
int error = 0;
struct file *f;
 
ENTRY;
if (logfd->fd_file) {
CDEBUG(D_INODE, "fset already has lml open\n");
EXIT;
return 0;
}
 
logfd->fd_lock = RW_LOCK_UNLOCKED;
INIT_LIST_HEAD(&logfd->fd_reservations);
f = izo_log_open(fset, "lml", O_RDWR | O_CREAT);
if (IS_ERR(f)) {
error = PTR_ERR(f);
return error;
}
 
logfd->fd_file = f;
logfd->fd_offset = f->f_dentry->d_inode->i_size;
 
EXIT;
return error;
}
 
/* Get the KML-offset record from the last_rcvd file */
int presto_read_kml_logical_offset(struct rec_info *recinfo,
struct presto_file_set *fset)
{
loff_t off;
struct izo_rcvd_rec rec;
char uuid[16] = {0};
 
off = izo_rcvd_get(&rec, fset, uuid);
if (off < 0)
return -1;
 
recinfo->offset = rec.lr_local_offset;
return 0;
}
 
int presto_write_kml_logical_offset(struct presto_file_set *fset)
{
loff_t rc;
struct izo_rcvd_rec rec;
char uuid[16] = {0};
 
rc = izo_rcvd_get(&rec, fset, uuid);
if (rc < 0)
memset(&rec, 0, sizeof(rec));
 
rec.lr_local_offset =
cpu_to_le64(fset->fset_kml_logical_off);
 
return izo_rcvd_write(fset, &rec);
}
 
struct file * presto_copy_kml_tail(struct presto_file_set *fset,
unsigned long int start)
{
struct file *f;
int len;
loff_t read_off, write_off, bytes;
 
ENTRY;
 
/* Copy the tail of 'kml' to 'kml_tmp' */
f = izo_log_open(fset, "kml_tmp", O_RDWR);
if (IS_ERR(f)) {
EXIT;
return f;
}
 
write_off = 0;
read_off = start;
bytes = fset->fset_kml.fd_offset - start;
while (bytes > 0) {
char buf[4096];
int toread;
 
if (bytes > sizeof(buf))
toread = sizeof(buf);
else
toread = bytes;
 
len = presto_fread(fset->fset_kml.fd_file, buf, toread,
&read_off);
if (len <= 0)
break;
 
if (presto_fwrite(f, buf, len, &write_off) != len) {
filp_close(f, NULL);
EXIT;
return ERR_PTR(-EIO);
}
 
bytes -= len;
}
 
EXIT;
return f;
}
 
 
/* LML records here */
/* this writes an LML record to the LML file (rec->is_kml =0) */
int presto_write_lml_close(struct rec_info *rec,
struct presto_file_set *fset,
struct file *file,
__u64 remote_ino,
__u64 remote_generation,
struct presto_version *remote_version,
struct presto_version *new_file_ver)
{
int opcode = KML_OPCODE_CLOSE;
char *buffer;
struct dentry *dentry = file->f_dentry;
__u64 ino;
__u32 pathlen;
char *path;
__u32 generation;
int size;
char *logrecord;
char record[292];
struct dentry *root;
int error;
 
ENTRY;
 
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
root = fset->fset_dentry;
 
BUFF_ALLOC(buffer, NULL);
path = presto_path(dentry, root, buffer, PAGE_SIZE);
CDEBUG(D_INODE, "Path: %s\n", path);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
ino = cpu_to_le64(dentry->d_inode->i_ino);
generation = cpu_to_le32(dentry->d_inode->i_generation);
size = sizeof(__u32) * current->ngroups +
sizeof(struct kml_prefix_hdr) + sizeof(*new_file_ver) +
sizeof(ino) + sizeof(generation) + sizeof(pathlen) +
sizeof(remote_ino) + sizeof(remote_generation) +
sizeof(remote_version) + sizeof(rec->offset) +
sizeof(struct kml_suffix);
 
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
rec->is_kml = 0;
rec->size = size + size_round(le32_to_cpu(pathlen));
 
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, new_file_ver);
logrecord = logit(logrecord, &ino, sizeof(ino));
logrecord = logit(logrecord, &generation, sizeof(generation));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = logit(logrecord, &remote_ino, sizeof(remote_ino));
logrecord = logit(logrecord, &remote_generation,
sizeof(remote_generation));
logrecord = log_version(logrecord, remote_version);
logrecord = logit(logrecord, &rec->offset, sizeof(rec->offset));
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
 
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
NULL, 0, NULL, 0);
 
BUFF_FREE(buffer);
 
EXIT;
return error;
}
 
/*
* Check if the given record is at the end of the file. If it is, truncate
* the lml to the record's offset, removing it. Repeat on prior record,
* until we reach an active record or a reserved record (as defined by the
* reservations list).
*/
static int presto_truncate_lml_tail(struct presto_file_set *fset)
{
loff_t lml_tail;
loff_t lml_last_rec;
loff_t lml_last_recsize;
loff_t local_offset;
int recno;
struct kml_prefix_hdr prefix;
struct inode *inode = fset->fset_lml.fd_file->f_dentry->d_inode;
void *handle;
int rc;
 
ENTRY;
/* If someone else is already truncating the LML, return. */
write_lock(&fset->fset_lml.fd_lock);
if (fset->fset_lml.fd_truncating == 1 ) {
write_unlock(&fset->fset_lml.fd_lock);
EXIT;
return 0;
}
/* someone is about to write to the end of the LML */
if ( !list_empty(&fset->fset_lml.fd_reservations) ) {
write_unlock(&fset->fset_lml.fd_lock);
EXIT;
return 0;
}
lml_tail = fset->fset_lml.fd_file->f_dentry->d_inode->i_size;
/* Nothing to truncate?*/
if (lml_tail == 0) {
write_unlock(&fset->fset_lml.fd_lock);
EXIT;
return 0;
}
fset->fset_lml.fd_truncating = 1;
write_unlock(&fset->fset_lml.fd_lock);
 
presto_last_record(&fset->fset_lml, &lml_last_recsize,
&lml_last_rec, &recno, lml_tail);
/* Do we have a record to check? If not we have zeroes at the
beginning of the file. -SHP
*/
if (lml_last_recsize != 0) {
local_offset = lml_last_rec - lml_last_recsize;
rc = presto_fread(fset->fset_lml.fd_file, (char *)&prefix,
sizeof(prefix), &local_offset);
if (rc != sizeof(prefix)) {
EXIT;
goto tr_out;
}
if ( prefix.opcode != KML_OPCODE_NOOP ) {
EXIT;
rc = 0;
/* We may have zeroes at the end of the file, should
we clear them out? -SHP
*/
goto tr_out;
}
} else
lml_last_rec=0;
 
handle = presto_trans_start(fset, inode, KML_OPCODE_TRUNC);
if ( IS_ERR(handle) ) {
EXIT;
rc = -ENOMEM;
goto tr_out;
}
 
rc = izo_do_truncate(fset, fset->fset_lml.fd_file->f_dentry,
lml_last_rec - lml_last_recsize, lml_tail);
presto_trans_commit(fset, handle);
if ( rc == 0 ) {
rc = 1;
}
EXIT;
 
tr_out:
CDEBUG(D_JOURNAL, "rc = %d\n", rc);
write_lock(&fset->fset_lml.fd_lock);
fset->fset_lml.fd_truncating = 0;
write_unlock(&fset->fset_lml.fd_lock);
return rc;
}
 
int presto_truncate_lml(struct presto_file_set *fset)
{
int rc;
ENTRY;
while ( (rc = presto_truncate_lml_tail(fset)) > 0);
if ( rc < 0 && rc != -EALREADY) {
CERROR("truncate_lml error %d\n", rc);
}
EXIT;
return rc;
}
 
int presto_clear_lml_close(struct presto_file_set *fset, loff_t lml_offset)
{
int rc;
struct kml_prefix_hdr record;
loff_t offset = lml_offset;
 
ENTRY;
 
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
 
CDEBUG(D_JOURNAL, "reading prefix: off %ld, size %d\n",
(long)lml_offset, sizeof(record));
rc = presto_fread(fset->fset_lml.fd_file, (char *)&record,
sizeof(record), &offset);
 
if ( rc != sizeof(record) ) {
CERROR("presto: clear_lml io error %d\n", rc);
EXIT;
return -EIO;
}
 
/* overwrite the prefix */
CDEBUG(D_JOURNAL, "overwriting prefix: off %ld\n", (long)lml_offset);
record.opcode = KML_OPCODE_NOOP;
offset = lml_offset;
/* note: this does just a single transaction in the cache */
rc = presto_fwrite(fset->fset_lml.fd_file, (char *)(&record),
sizeof(record), &offset);
if ( rc != sizeof(record) ) {
EXIT;
return -EIO;
}
 
EXIT;
return 0;
}
 
 
 
/* now a journal function for every operation */
 
int presto_journal_setattr(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dentry, struct presto_version *old_ver,
struct izo_rollback_data *rb, struct iattr *iattr)
{
int opcode = KML_OPCODE_SETATTR;
char *buffer, *path, *logrecord, record[316];
struct dentry *root;
__u32 uid, gid, mode, valid, flags, pathlen;
__u64 fsize, mtime, ctime;
int error, size;
 
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
 
if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0)
|| ((dentry->d_parent != dentry) && list_empty(&dentry->d_hash))) {
EXIT;
return 0;
}
 
root = fset->fset_dentry;
 
BUFF_ALLOC(buffer, NULL);
path = presto_path(dentry, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->ngroups +
sizeof(struct kml_prefix_hdr) + sizeof(*old_ver) +
sizeof(valid) + sizeof(mode) + sizeof(uid) + sizeof(gid) +
sizeof(fsize) + sizeof(mtime) + sizeof(ctime) + sizeof(flags) +
sizeof(pathlen) + sizeof(*rb) + sizeof(struct kml_suffix);
 
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
 
/* Only journal one kind of mtime, and not atime at all. Also don't
* journal bogus data in iattr, to make the journal more compressible.
*/
if (iattr->ia_valid & ATTR_MTIME_SET)
iattr->ia_valid = iattr->ia_valid | ATTR_MTIME;
valid = cpu_to_le32(iattr->ia_valid & ~(ATTR_ATIME | ATTR_MTIME_SET |
ATTR_ATIME_SET));
mode = iattr->ia_valid & ATTR_MODE ? cpu_to_le32(iattr->ia_mode): 0;
uid = iattr->ia_valid & ATTR_UID ? cpu_to_le32(iattr->ia_uid): 0;
gid = iattr->ia_valid & ATTR_GID ? cpu_to_le32(iattr->ia_gid): 0;
fsize = iattr->ia_valid & ATTR_SIZE ? cpu_to_le64(iattr->ia_size): 0;
mtime = iattr->ia_valid & ATTR_MTIME ? cpu_to_le64(iattr->ia_mtime): 0;
ctime = iattr->ia_valid & ATTR_CTIME ? cpu_to_le64(iattr->ia_ctime): 0;
flags = iattr->ia_valid & ATTR_ATTR_FLAG ?
cpu_to_le32(iattr->ia_attr_flags): 0;
 
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen));
 
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, old_ver);
logrecord = logit(logrecord, &valid, sizeof(valid));
logrecord = logit(logrecord, &mode, sizeof(mode));
logrecord = logit(logrecord, &uid, sizeof(uid));
logrecord = logit(logrecord, &gid, sizeof(gid));
logrecord = logit(logrecord, &fsize, sizeof(fsize));
logrecord = logit(logrecord, &mtime, sizeof(mtime));
logrecord = logit(logrecord, &ctime, sizeof(ctime));
logrecord = logit(logrecord, &flags, sizeof(flags));
logrecord = log_rollback(logrecord, rb);
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
 
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
NULL, 0, NULL, 0);
 
BUFF_FREE(buffer);
EXIT;
return error;
}
 
int presto_get_fileid(int minor, struct presto_file_set *fset,
struct dentry *dentry)
{
int opcode = KML_OPCODE_GET_FILEID;
struct rec_info rec;
char *buffer, *path, *logrecord, record[4096]; /*include path*/
struct dentry *root;
__u32 uid, gid, pathlen;
int error, size;
struct kml_suffix *suffix;
 
ENTRY;
 
root = fset->fset_dentry;
 
uid = cpu_to_le32(dentry->d_inode->i_uid);
gid = cpu_to_le32(dentry->d_inode->i_gid);
BUFF_ALLOC(buffer, NULL);
path = presto_path(dentry, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->ngroups +
sizeof(struct kml_prefix_hdr) + sizeof(pathlen) +
size_round(le32_to_cpu(pathlen)) +
sizeof(struct kml_suffix);
 
CDEBUG(D_FILE, "kml size: %d\n", size);
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
 
memset(&rec, 0, sizeof(rec));
rec.is_kml = 1;
rec.size = size;
 
logrecord = journal_log_prefix(record, opcode, &rec);
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = logit(logrecord, path, size_round(le32_to_cpu(pathlen)));
suffix = (struct kml_suffix *)logrecord;
logrecord = journal_log_suffix(logrecord, record, fset, dentry, &rec);
/* journal_log_suffix expects journal_log to set this */
suffix->recno = 0;
 
CDEBUG(D_FILE, "actual kml size: %d\n", logrecord - record);
CDEBUG(D_FILE, "get fileid: uid %d, gid %d, path: %s\n", uid, gid,path);
 
error = izo_upc_get_fileid(minor, size, record,
size_round(le32_to_cpu(pathlen)), path,
fset->fset_name);
 
BUFF_FREE(buffer);
EXIT;
return error;
}
 
int presto_journal_create(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dentry,
struct presto_version *tgt_dir_ver,
struct presto_version *new_file_ver, int mode)
{
int opcode = KML_OPCODE_CREATE;
char *buffer, *path, *logrecord, record[292];
struct dentry *root;
__u32 uid, gid, lmode, pathlen;
int error, size;
 
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
 
root = fset->fset_dentry;
 
uid = cpu_to_le32(dentry->d_inode->i_uid);
gid = cpu_to_le32(dentry->d_inode->i_gid);
lmode = cpu_to_le32(mode);
BUFF_ALLOC(buffer, NULL);
path = presto_path(dentry, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->ngroups +
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(pathlen) +
sizeof(struct kml_suffix);
 
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
 
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen));
 
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, tgt_dir_ver);
logrecord = log_dentry_version(logrecord, dentry->d_parent);
logrecord = log_version(logrecord, new_file_ver);
logrecord = logit(logrecord, &lmode, sizeof(lmode));
logrecord = logit(logrecord, &uid, sizeof(uid));
logrecord = logit(logrecord, &gid, sizeof(gid));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
 
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
NULL, 0, NULL, 0);
 
BUFF_FREE(buffer);
EXIT;
return error;
}
 
int presto_journal_symlink(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dentry, const char *target,
struct presto_version *tgt_dir_ver,
struct presto_version *new_link_ver)
{
int opcode = KML_OPCODE_SYMLINK;
char *buffer, *path, *logrecord, record[292];
struct dentry *root;
__u32 uid, gid, pathlen;
__u32 targetlen = cpu_to_le32(strlen(target));
int error, size;
 
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
 
root = fset->fset_dentry;
 
uid = cpu_to_le32(dentry->d_inode->i_uid);
gid = cpu_to_le32(dentry->d_inode->i_gid);
 
BUFF_ALLOC(buffer, NULL);
path = presto_path(dentry, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->ngroups +
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
sizeof(uid) + sizeof(gid) + sizeof(pathlen) +
sizeof(targetlen) + sizeof(struct kml_suffix);
 
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
 
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen)) +
size_round(le32_to_cpu(targetlen));
 
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, tgt_dir_ver);
logrecord = log_dentry_version(logrecord, dentry->d_parent);
logrecord = log_version(logrecord, new_link_ver);
logrecord = logit(logrecord, &uid, sizeof(uid));
logrecord = logit(logrecord, &gid, sizeof(gid));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = logit(logrecord, &targetlen, sizeof(targetlen));
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
 
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
target, size_round(le32_to_cpu(targetlen)),
NULL, 0);
 
BUFF_FREE(buffer);
EXIT;
return error;
}
 
int presto_journal_mkdir(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dentry,
struct presto_version *tgt_dir_ver,
struct presto_version *new_dir_ver, int mode)
{
int opcode = KML_OPCODE_MKDIR;
char *buffer, *path, *logrecord, record[292];
struct dentry *root;
__u32 uid, gid, lmode, pathlen;
int error, size;
 
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
 
root = fset->fset_dentry;
 
uid = cpu_to_le32(dentry->d_inode->i_uid);
gid = cpu_to_le32(dentry->d_inode->i_gid);
lmode = cpu_to_le32(mode);
 
BUFF_ALLOC(buffer, NULL);
path = presto_path(dentry, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->ngroups +
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(pathlen) +
sizeof(struct kml_suffix);
 
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
 
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen));
logrecord = journal_log_prefix(record, opcode, rec);
 
logrecord = log_version(logrecord, tgt_dir_ver);
logrecord = log_dentry_version(logrecord, dentry->d_parent);
logrecord = log_version(logrecord, new_dir_ver);
logrecord = logit(logrecord, &lmode, sizeof(lmode));
logrecord = logit(logrecord, &uid, sizeof(uid));
logrecord = logit(logrecord, &gid, sizeof(gid));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
 
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
NULL, 0, NULL, 0);
 
BUFF_FREE(buffer);
EXIT;
return error;
}
 
 
int
presto_journal_rmdir(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dir, struct presto_version *tgt_dir_ver,
struct presto_version *old_dir_ver,
struct izo_rollback_data *rb, int len, const char *name)
{
int opcode = KML_OPCODE_RMDIR;
char *buffer, *path, *logrecord, record[316];
__u32 pathlen, llen;
struct dentry *root;
int error, size;
 
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
 
root = fset->fset_dentry;
 
llen = cpu_to_le32(len);
BUFF_ALLOC(buffer, NULL);
path = presto_path(dir, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->ngroups +
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
sizeof(pathlen) + sizeof(llen) + sizeof(*rb) +
sizeof(struct kml_suffix);
 
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
 
CDEBUG(D_JOURNAL, "path: %s (%d), name: %s (%d), size %d\n",
path, pathlen, name, len, size);
 
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen)) +
size_round(len);
 
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, tgt_dir_ver);
logrecord = log_dentry_version(logrecord, dir);
logrecord = log_version(logrecord, old_dir_ver);
logrecord = logit(logrecord, rb, sizeof(*rb));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = logit(logrecord, &llen, sizeof(llen));
logrecord = journal_log_suffix(logrecord, record, fset, dir, rec);
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
name, size_round(len),
NULL, 0);
 
BUFF_FREE(buffer);
EXIT;
return error;
}
 
 
int
presto_journal_mknod(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dentry, struct presto_version *tgt_dir_ver,
struct presto_version *new_node_ver, int mode,
int dmajor, int dminor )
{
int opcode = KML_OPCODE_MKNOD;
char *buffer, *path, *logrecord, record[292];
struct dentry *root;
__u32 uid, gid, lmode, lmajor, lminor, pathlen;
int error, size;
 
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
 
root = fset->fset_dentry;
 
uid = cpu_to_le32(dentry->d_inode->i_uid);
gid = cpu_to_le32(dentry->d_inode->i_gid);
lmode = cpu_to_le32(mode);
lmajor = cpu_to_le32(dmajor);
lminor = cpu_to_le32(dminor);
 
BUFF_ALLOC(buffer, NULL);
path = presto_path(dentry, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->ngroups +
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(lmajor) +
sizeof(lminor) + sizeof(pathlen) +
sizeof(struct kml_suffix);
 
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
 
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen));
 
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, tgt_dir_ver);
logrecord = log_dentry_version(logrecord, dentry->d_parent);
logrecord = log_version(logrecord, new_node_ver);
logrecord = logit(logrecord, &lmode, sizeof(lmode));
logrecord = logit(logrecord, &uid, sizeof(uid));
logrecord = logit(logrecord, &gid, sizeof(gid));
logrecord = logit(logrecord, &lmajor, sizeof(lmajor));
logrecord = logit(logrecord, &lminor, sizeof(lminor));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
 
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
NULL, 0, NULL, 0);
 
BUFF_FREE(buffer);
EXIT;
return error;
}
 
int
presto_journal_link(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *src, struct dentry *tgt,
struct presto_version *tgt_dir_ver,
struct presto_version *new_link_ver)
{
int opcode = KML_OPCODE_LINK;
char *buffer, *srcbuffer, *path, *srcpath, *logrecord, record[292];
__u32 pathlen, srcpathlen;
struct dentry *root;
int error, size;
 
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
 
root = fset->fset_dentry;
 
BUFF_ALLOC(srcbuffer, NULL);
srcpath = presto_path(src, root, srcbuffer, PAGE_SIZE);
srcpathlen = cpu_to_le32(MYPATHLEN(srcbuffer, srcpath));
 
BUFF_ALLOC(buffer, srcbuffer);
path = presto_path(tgt, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->ngroups +
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
sizeof(srcpathlen) + sizeof(pathlen) +
sizeof(struct kml_suffix);
 
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
 
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen)) +
size_round(le32_to_cpu(srcpathlen));
 
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, tgt_dir_ver);
logrecord = log_dentry_version(logrecord, tgt->d_parent);
logrecord = log_version(logrecord, new_link_ver);
logrecord = logit(logrecord, &srcpathlen, sizeof(srcpathlen));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = journal_log_suffix(logrecord, record, fset, tgt, rec);
 
error = presto_log(fset, rec, record, size,
srcpath, size_round(le32_to_cpu(srcpathlen)),
path, size_round(le32_to_cpu(pathlen)),
NULL, 0);
 
BUFF_FREE(srcbuffer);
BUFF_FREE(buffer);
EXIT;
return error;
}
 
 
int presto_journal_rename(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *src, struct dentry *tgt,
struct presto_version *src_dir_ver,
struct presto_version *tgt_dir_ver)
{
int opcode = KML_OPCODE_RENAME;
char *buffer, *srcbuffer, *path, *srcpath, *logrecord, record[292];
__u32 pathlen, srcpathlen;
struct dentry *root;
int error, size;
 
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
 
root = fset->fset_dentry;
 
BUFF_ALLOC(srcbuffer, NULL);
srcpath = presto_path(src, root, srcbuffer, PAGE_SIZE);
srcpathlen = cpu_to_le32(MYPATHLEN(srcbuffer, srcpath));
 
BUFF_ALLOC(buffer, srcbuffer);
path = presto_path(tgt, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->ngroups +
sizeof(struct kml_prefix_hdr) + 4 * sizeof(*src_dir_ver) +
sizeof(srcpathlen) + sizeof(pathlen) +
sizeof(struct kml_suffix);
 
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
 
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen)) +
size_round(le32_to_cpu(srcpathlen));
 
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, src_dir_ver);
logrecord = log_dentry_version(logrecord, src->d_parent);
logrecord = log_version(logrecord, tgt_dir_ver);
logrecord = log_dentry_version(logrecord, tgt->d_parent);
logrecord = logit(logrecord, &srcpathlen, sizeof(srcpathlen));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = journal_log_suffix(logrecord, record, fset, tgt, rec);
 
error = presto_log(fset, rec, record, size,
srcpath, size_round(le32_to_cpu(srcpathlen)),
path, size_round(le32_to_cpu(pathlen)),
NULL, 0);
 
BUFF_FREE(buffer);
BUFF_FREE(srcbuffer);
EXIT;
return error;
}
 
int presto_journal_unlink(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dir, struct presto_version *tgt_dir_ver,
struct presto_version *old_file_ver,
struct izo_rollback_data *rb, struct dentry *dentry,
char *old_target, int old_targetlen)
{
int opcode = KML_OPCODE_UNLINK;
char *buffer, *path, *logrecord, record[316];
const char *name;
__u32 pathlen, llen;
struct dentry *root;
int error, size, len;
 
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
 
root = fset->fset_dentry;
 
name = dentry->d_name.name;
len = dentry->d_name.len;
 
llen = cpu_to_le32(len);
BUFF_ALLOC(buffer, NULL);
path = presto_path(dir, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->ngroups +
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
sizeof(pathlen) + sizeof(llen) + sizeof(*rb) +
sizeof(old_targetlen) + sizeof(struct kml_suffix);
 
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
 
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen)) + size_round(len) +
size_round(old_targetlen);
 
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, tgt_dir_ver);
logrecord = log_dentry_version(logrecord, dir);
logrecord = log_version(logrecord, old_file_ver);
logrecord = log_rollback(logrecord, rb);
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = logit(logrecord, &llen, sizeof(llen));
logrecord = logit(logrecord, &old_targetlen, sizeof(old_targetlen));
logrecord = journal_log_suffix(logrecord, record, fset, dir, rec);
 
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
name, size_round(len),
old_target, size_round(old_targetlen));
 
BUFF_FREE(buffer);
EXIT;
return error;
}
 
int
presto_journal_close(struct rec_info *rec, struct presto_file_set *fset,
struct file *file, struct dentry *dentry,
struct presto_version *old_file_ver,
struct presto_version *new_file_ver)
{
int opcode = KML_OPCODE_CLOSE;
struct presto_file_data *fd;
char *buffer, *path, *logrecord, record[316];
struct dentry *root;
int error, size, i;
__u32 pathlen, generation;
__u64 ino;
__u32 open_fsuid;
__u32 open_fsgid;
__u32 open_ngroups;
__u32 open_groups[NGROUPS_MAX];
__u32 open_mode;
__u32 open_uid;
__u32 open_gid;
 
ENTRY;
 
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
 
if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0)
|| ((dentry->d_parent != dentry) && list_empty(&dentry->d_hash))) {
EXIT;
return 0;
}
 
root = fset->fset_dentry;
 
fd = (struct presto_file_data *)file->private_data;
if (fd) {
open_ngroups = fd->fd_ngroups;
for (i = 0; i < fd->fd_ngroups; i++)
open_groups[i] = (__u32) fd->fd_groups[i];
open_mode = fd->fd_mode;
open_uid = fd->fd_uid;
open_gid = fd->fd_gid;
open_fsuid = fd->fd_fsuid;
open_fsgid = fd->fd_fsgid;
} else {
open_ngroups = current->ngroups;
for (i=0; i<current->ngroups; i++)
open_groups[i] = (__u32) current->groups[i];
open_mode = dentry->d_inode->i_mode;
open_uid = dentry->d_inode->i_uid;
open_gid = dentry->d_inode->i_gid;
open_fsuid = current->fsuid;
open_fsgid = current->fsgid;
}
BUFF_ALLOC(buffer, NULL);
path = presto_path(dentry, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
ino = cpu_to_le64(dentry->d_inode->i_ino);
generation = cpu_to_le32(dentry->d_inode->i_generation);
size = sizeof(__u32) * open_ngroups +
sizeof(open_mode) + sizeof(open_uid) + sizeof(open_gid) +
sizeof(struct kml_prefix_hdr) + sizeof(*old_file_ver) +
sizeof(*new_file_ver) + sizeof(ino) + sizeof(generation) +
sizeof(pathlen) + sizeof(struct kml_suffix);
 
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
 
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen));
 
logrecord = journal_log_prefix_with_groups_and_ids(
record, opcode, rec, open_ngroups, open_groups,
open_fsuid, open_fsgid);
logrecord = logit(logrecord, &open_mode, sizeof(open_mode));
logrecord = logit(logrecord, &open_uid, sizeof(open_uid));
logrecord = logit(logrecord, &open_gid, sizeof(open_gid));
logrecord = log_version(logrecord, old_file_ver);
logrecord = log_version(logrecord, new_file_ver);
logrecord = logit(logrecord, &ino, sizeof(ino));
logrecord = logit(logrecord, &generation, sizeof(generation));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
 
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
NULL, 0, NULL, 0);
BUFF_FREE(buffer);
 
EXIT;
return error;
}
 
int presto_rewrite_close(struct rec_info *rec, struct presto_file_set *fset,
char *path, __u32 pathlen,
int ngroups, __u32 *groups,
__u64 ino, __u32 generation,
struct presto_version *new_file_ver)
{
int opcode = KML_OPCODE_CLOSE;
char *logrecord, record[292];
struct dentry *root;
int error, size;
 
ENTRY;
 
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
 
root = fset->fset_dentry;
 
size = sizeof(__u32) * ngroups +
sizeof(struct kml_prefix_hdr) + sizeof(*new_file_ver) +
sizeof(ino) + sizeof(generation) +
sizeof(le32_to_cpu(pathlen)) +
sizeof(struct kml_suffix);
 
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
 
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen));
 
logrecord = journal_log_prefix_with_groups(record, opcode, rec,
ngroups, groups);
logrecord = log_version(logrecord, new_file_ver);
logrecord = logit(logrecord, &ino, sizeof(ino));
logrecord = logit(logrecord, &generation, sizeof(generation));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = journal_log_suffix(logrecord, record, fset, NULL, rec);
 
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
NULL, 0, NULL, 0);
 
EXIT;
return error;
}
 
 
/* write closes for the local close records in the LML */
int presto_complete_lml(struct presto_file_set *fset)
{
__u32 groups[NGROUPS_MAX];
loff_t lml_offset;
loff_t read_offset;
char *buffer;
void *handle;
struct rec_info rec;
struct close_rec {
struct presto_version new_file_ver;
__u64 ino;
__u32 generation;
__u32 pathlen;
__u64 remote_ino;
__u32 remote_generation;
__u32 remote_version;
__u64 lml_offset;
} close_rec;
struct file *file = fset->fset_lml.fd_file;
struct kml_prefix_hdr prefix;
int rc = 0;
ENTRY;
 
lml_offset = 0;
again:
if (lml_offset >= file->f_dentry->d_inode->i_size) {
EXIT;
return rc;
}
 
read_offset = lml_offset;
rc = presto_fread(file, (char *)&prefix,
sizeof(prefix), &read_offset);
if ( rc != sizeof(prefix) ) {
EXIT;
CERROR("presto_complete_lml: ioerror - 1, tell Peter\n");
return -EIO;
}
 
if ( prefix.opcode == KML_OPCODE_NOOP ) {
lml_offset += prefix.len;
goto again;
}
 
rc = presto_fread(file, (char *)groups,
prefix.ngroups * sizeof(__u32), &read_offset);
if ( rc != prefix.ngroups * sizeof(__u32) ) {
EXIT;
CERROR("presto_complete_lml: ioerror - 2, tell Peter\n");
return -EIO;
}
 
rc = presto_fread(file, (char *)&close_rec,
sizeof(close_rec), &read_offset);
if ( rc != sizeof(close_rec) ) {
EXIT;
CERROR("presto_complete_lml: ioerror - 3, tell Peter\n");
return -EIO;
}
 
/* is this a backfetch or a close record? */
if ( le64_to_cpu(close_rec.remote_ino) != 0 ) {
lml_offset += prefix.len;
goto again;
}
 
BUFF_ALLOC(buffer, NULL);
rc = presto_fread(file, (char *)buffer,
le32_to_cpu(close_rec.pathlen), &read_offset);
if ( rc != le32_to_cpu(close_rec.pathlen) ) {
EXIT;
CERROR("presto_complete_lml: ioerror - 4, tell Peter\n");
return -EIO;
}
handle = presto_trans_start(fset, file->f_dentry->d_inode,
KML_OPCODE_RELEASE);
if ( IS_ERR(handle) ) {
EXIT;
return -ENOMEM;
}
 
rc = presto_clear_lml_close(fset, lml_offset);
if ( rc ) {
CERROR("error during clearing: %d\n", rc);
presto_trans_commit(fset, handle);
EXIT;
return rc;
}
 
rc = presto_rewrite_close(&rec, fset, buffer, close_rec.pathlen,
prefix.ngroups, groups,
close_rec.ino, close_rec.generation,
&close_rec.new_file_ver);
if ( rc ) {
CERROR("error during rewrite close: %d\n", rc);
presto_trans_commit(fset, handle);
EXIT;
return rc;
}
 
presto_trans_commit(fset, handle);
if ( rc ) {
CERROR("error during truncation: %d\n", rc);
EXIT;
return rc;
}
lml_offset += prefix.len;
CDEBUG(D_JOURNAL, "next LML record at: %ld\n", (long)lml_offset);
goto again;
 
EXIT;
return -EINVAL;
}
 
 
#ifdef CONFIG_FS_EXT_ATTR
/* Journal an ea operation. A NULL buffer implies the attribute is
* getting deleted. In this case we simply change the opcode, but nothing
* else is affected.
*/
int presto_journal_set_ext_attr (struct rec_info *rec,
struct presto_file_set *fset,
struct dentry *dentry,
struct presto_version *ver, const char *name,
const char *buffer, int buffer_len,
int flags)
{
int opcode = (buffer == NULL) ?
KML_OPCODE_DELEXTATTR :
KML_OPCODE_SETEXTATTR ;
char *temp, *path, *logrecord, record[292];
struct dentry *root;
int error, size;
__u32 namelen=cpu_to_le32(strnlen(name,PRESTO_EXT_ATTR_NAME_MAX));
__u32 buflen=(buffer != NULL)? cpu_to_le32(buffer_len): cpu_to_le32(0);
__u32 mode, pathlen;
 
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
 
if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0)
|| ((dentry->d_parent != dentry) && list_empty(&dentry->d_hash))) {
EXIT;
return 0;
}
 
root = fset->fset_dentry;
 
BUFF_ALLOC(temp, NULL);
path = presto_path(dentry, root, temp, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(temp, path));
 
flags=cpu_to_le32(flags);
/* Ugly, but needed. posix ACLs change the mode without using
* setattr, we need to record these changes. The EA code per se
* is not really affected.
*/
mode=cpu_to_le32(dentry->d_inode->i_mode);
 
size = sizeof(__u32) * current->ngroups +
sizeof(struct kml_prefix_hdr) +
2 * sizeof(struct presto_version) +
sizeof(flags) + sizeof(mode) + sizeof(namelen) +
sizeof(buflen) + sizeof(pathlen) +
sizeof(struct kml_suffix);
 
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
 
rec->is_kml = 1;
/* Make space for a path, a attr name and value*/
/* We use the buflen instead of buffer_len to make sure that we
* journal the right length. This may be a little paranoid, but
* with 64 bits round the corner, I would rather be safe than sorry!
* Also this handles deletes with non-zero buffer_lengths correctly.
* SHP
*/
rec->size = size + size_round(le32_to_cpu(pathlen)) +
size_round(le32_to_cpu(namelen)) +
size_round(le32_to_cpu(buflen));
 
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, ver);
logrecord = log_dentry_version(logrecord, dentry);
logrecord = logit(logrecord, &flags, sizeof(flags));
logrecord = logit(logrecord, &mode, sizeof(flags));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = logit(logrecord, &namelen, sizeof(namelen));
logrecord = logit(logrecord, &buflen, sizeof(buflen));
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
 
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
name, size_round(le32_to_cpu(namelen)),
buffer, size_round(le32_to_cpu(buflen)));
 
BUFF_FREE(temp);
EXIT;
return error;
}
#endif
/presto.c
0,0 → 1,740
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Author: Peter J. Braam <braam@clusterfs.com>
* Copyright (C) 1998 Stelias Computing Inc
* Copyright (C) 1999 Red Hat Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* This file implements basic routines supporting the semantics
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/locks.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>
#include <linux/smp_lock.h>
 
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
 
int presto_walk(const char *name, struct nameidata *nd)
{
int err;
/* we do not follow symlinks to support symlink operations
correctly. The vfs should always hand us resolved dentries
so we should not be required to use LOOKUP_FOLLOW. At the
reintegrating end, lento again should be working with the
resolved pathname and not the symlink. SHP
XXX: This code implies that direct symlinks do not work. SHP
*/
unsigned int flags = LOOKUP_POSITIVE;
 
ENTRY;
err = 0;
if (path_init(name, flags, nd))
err = path_walk(name, nd);
return err;
}
 
 
/* find the presto minor device for this inode */
int presto_i2m(struct inode *inode)
{
struct presto_cache *cache;
ENTRY;
cache = presto_get_cache(inode);
CDEBUG(D_PSDEV, "\n");
if ( !cache ) {
CERROR("PRESTO: BAD: cannot find cache for dev %d, ino %ld\n",
inode->i_dev, inode->i_ino);
EXIT;
return -1;
}
EXIT;
return cache->cache_psdev->uc_minor;
}
 
inline int presto_f2m(struct presto_file_set *fset)
{
return fset->fset_cache->cache_psdev->uc_minor;
 
}
 
inline int presto_c2m(struct presto_cache *cache)
{
return cache->cache_psdev->uc_minor;
 
}
 
/* XXX check this out */
struct presto_file_set *presto_path2fileset(const char *name)
{
struct nameidata nd;
struct presto_file_set *fileset;
int error;
ENTRY;
 
error = presto_walk(name, &nd);
if (!error) {
#if 0
error = do_revalidate(nd.dentry);
#endif
if (!error)
fileset = presto_fset(nd.dentry);
path_release(&nd);
EXIT;
} else
fileset = ERR_PTR(error);
 
EXIT;
return fileset;
}
 
/* check a flag on this dentry or fset root. Semantics:
- most flags: test if it is set
- PRESTO_ATTR, PRESTO_DATA return 1 if PRESTO_FSETINSYNC is set
*/
int presto_chk(struct dentry *dentry, int flag)
{
int minor;
struct presto_file_set *fset = presto_fset(dentry);
 
ENTRY;
minor = presto_i2m(dentry->d_inode);
if ( izo_channels[minor].uc_no_filter ) {
EXIT;
return ~0;
}
 
/* if the fileset is in sync DATA and ATTR are OK */
if ( fset &&
(flag == PRESTO_ATTR || flag == PRESTO_DATA) &&
(fset->fset_flags & FSET_INSYNC) ) {
CDEBUG(D_INODE, "fset in sync (ino %ld)!\n",
fset->fset_dentry->d_inode->i_ino);
EXIT;
return 1;
}
 
EXIT;
return (presto_d2d(dentry)->dd_flags & flag);
}
 
/* set a bit in the dentry flags */
void presto_set(struct dentry *dentry, int flag)
{
ENTRY;
if ( dentry->d_inode ) {
CDEBUG(D_INODE, "SET ino %ld, flag %x\n",
dentry->d_inode->i_ino, flag);
}
if ( presto_d2d(dentry) == NULL) {
CERROR("dentry without d_fsdata in presto_set: %p: %*s", dentry,
dentry->d_name.len, dentry->d_name.name);
BUG();
}
presto_d2d(dentry)->dd_flags |= flag;
EXIT;
}
 
/* given a path: complete the closes on the fset */
int lento_complete_closes(char *path)
{
struct nameidata nd;
struct dentry *dentry;
int error;
struct presto_file_set *fset;
ENTRY;
 
error = presto_walk(path, &nd);
if (error) {
EXIT;
return error;
}
 
dentry = nd.dentry;
 
error = -ENXIO;
if ( !presto_ispresto(dentry->d_inode) ) {
EXIT;
goto out_complete;
}
fset = presto_fset(dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
EXIT;
goto out_complete;
}
/* transactions and locking are internal to this function */
error = presto_complete_lml(fset);
EXIT;
out_complete:
path_release(&nd);
return error;
}
 
#if 0
/* given a path: write a close record and cancel an LML record, finally
call truncate LML. Lento is doing this so it goes in with uid/gid's
root.
*/
int lento_cancel_lml(char *path,
__u64 lml_offset,
__u64 remote_ino,
__u32 remote_generation,
__u32 remote_version,
struct lento_vfs_context *info)
{
struct nameidata nd;
struct rec_info rec;
struct dentry *dentry;
int error;
struct presto_file_set *fset;
void *handle;
struct presto_version new_ver;
ENTRY;
 
 
error = presto_walk(path, &nd);
if (error) {
EXIT;
return error;
}
dentry = nd.dentry;
 
error = -ENXIO;
if ( !presto_ispresto(dentry->d_inode) ) {
EXIT;
goto out_cancel_lml;
}
fset = presto_fset(dentry);
 
error=-EINVAL;
if (fset==NULL) {
CERROR("No fileset!\n");
EXIT;
goto out_cancel_lml;
}
/* this only requires a transaction below which is automatic */
handle = presto_trans_start(fset, dentry->d_inode, PRESTO_OP_RELEASE);
if ( IS_ERR(handle) ) {
error = -ENOMEM;
EXIT;
goto out_cancel_lml;
}
if (info->flags & LENTO_FL_CANCEL_LML) {
error = presto_clear_lml_close(fset, lml_offset);
if ( error ) {
presto_trans_commit(fset, handle);
EXIT;
goto out_cancel_lml;
}
}
 
 
if (info->flags & LENTO_FL_WRITE_KML) {
struct file file;
file.private_data = NULL;
file.f_dentry = dentry;
presto_getversion(&new_ver, dentry->d_inode);
error = presto_journal_close(&rec, fset, &file, dentry,
&new_ver);
if ( error ) {
EXIT;
presto_trans_commit(fset, handle);
goto out_cancel_lml;
}
}
 
if (info->flags & LENTO_FL_WRITE_EXPECT) {
error = presto_write_last_rcvd(&rec, fset, info);
if ( error < 0 ) {
EXIT;
presto_trans_commit(fset, handle);
goto out_cancel_lml;
}
}
 
presto_trans_commit(fset, handle);
 
if (info->flags & LENTO_FL_CANCEL_LML) {
presto_truncate_lml(fset);
}
 
out_cancel_lml:
EXIT;
path_release(&nd);
return error;
}
#endif
 
/* given a dentry, operate on the flags in its dentry. Used by downcalls */
int izo_mark_dentry(struct dentry *dentry, int and_flag, int or_flag,
int *res)
{
int error = 0;
 
if (presto_d2d(dentry) == NULL) {
CERROR("InterMezzo: no ddata for inode %ld in %s\n",
dentry->d_inode->i_ino, __FUNCTION__);
return -EINVAL;
}
 
CDEBUG(D_INODE, "inode: %ld, and flag %x, or flag %x, dd_flags %x\n",
dentry->d_inode->i_ino, and_flag, or_flag,
presto_d2d(dentry)->dd_flags);
 
presto_d2d(dentry)->dd_flags &= and_flag;
presto_d2d(dentry)->dd_flags |= or_flag;
if (res)
*res = presto_d2d(dentry)->dd_flags;
 
return error;
}
 
/* given a path, operate on the flags in its cache. Used by mark_ioctl */
int izo_mark_cache(struct dentry *dentry, int and_flag, int or_flag,
int *res)
{
struct presto_cache *cache;
 
if (presto_d2d(dentry) == NULL) {
CERROR("InterMezzo: no ddata for inode %ld in %s\n",
dentry->d_inode->i_ino, __FUNCTION__);
return -EINVAL;
}
 
CDEBUG(D_INODE, "inode: %ld, and flag %x, or flag %x, dd_flags %x\n",
dentry->d_inode->i_ino, and_flag, or_flag,
presto_d2d(dentry)->dd_flags);
 
cache = presto_get_cache(dentry->d_inode);
if ( !cache ) {
CERROR("PRESTO: BAD: cannot find cache in izo_mark_cache\n");
return -EBADF;
}
 
((int)cache->cache_flags) &= and_flag;
((int)cache->cache_flags) |= or_flag;
if (res)
*res = (int)cache->cache_flags;
 
return 0;
}
 
int presto_set_max_kml_size(const char *path, unsigned long max_size)
{
struct presto_file_set *fset;
 
ENTRY;
 
fset = presto_path2fileset(path);
if (IS_ERR(fset)) {
EXIT;
return PTR_ERR(fset);
}
 
fset->kml_truncate_size = max_size;
CDEBUG(D_CACHE, "KML truncate size set to %lu bytes for fset %s.\n",
max_size, path);
 
EXIT;
return 0;
}
 
int izo_mark_fset(struct dentry *dentry, int and_flag, int or_flag,
int * res)
{
struct presto_file_set *fset;
fset = presto_fset(dentry);
if ( !fset ) {
CERROR("PRESTO: BAD: cannot find cache in izo_mark_cache\n");
make_bad_inode(dentry->d_inode);
return -EBADF;
}
((int)fset->fset_flags) &= and_flag;
((int)fset->fset_flags) |= or_flag;
if (res)
*res = (int)fset->fset_flags;
 
return 0;
}
 
/* talk to Lento about the permit */
static int presto_permit_upcall(struct dentry *dentry)
{
int rc;
char *path, *buffer;
int pathlen;
int minor;
int fsetnamelen;
struct presto_file_set *fset = NULL;
 
ENTRY;
 
if ( (minor = presto_i2m(dentry->d_inode)) < 0) {
EXIT;
return -EINVAL;
}
 
fset = presto_fset(dentry);
if (!fset) {
EXIT;
return -ENOTCONN;
}
if ( !presto_lento_up(minor) ) {
if ( fset->fset_flags & FSET_STEAL_PERMIT ) {
EXIT;
return 0;
} else {
EXIT;
return -ENOTCONN;
}
}
 
PRESTO_ALLOC(buffer, PAGE_SIZE);
if ( !buffer ) {
CERROR("PRESTO: out of memory!\n");
EXIT;
return -ENOMEM;
}
path = presto_path(dentry, fset->fset_dentry, buffer, PAGE_SIZE);
pathlen = MYPATHLEN(buffer, path);
fsetnamelen = strlen(fset->fset_name);
rc = izo_upc_permit(minor, dentry, pathlen, path, fset->fset_name);
PRESTO_FREE(buffer, PAGE_SIZE);
EXIT;
return rc;
}
 
/* get a write permit for the fileset of this inode
* - if this returns a negative value there was an error
* - if 0 is returned the permit was already in the kernel -- or --
* Lento gave us the permit without reintegration
* - lento returns the number of records it reintegrated
*
* Note that if this fileset has branches, a permit will -never- to a normal
* process for writing in the data area (ie, outside of .intermezzo)
*/
int presto_get_permit(struct inode * inode)
{
struct dentry *de;
struct presto_file_set *fset;
int minor = presto_i2m(inode);
int rc = 0;
 
ENTRY;
if (minor < 0) {
EXIT;
return -1;
}
 
if ( ISLENTO(minor) ) {
EXIT;
return 0;
}
 
if (list_empty(&inode->i_dentry)) {
CERROR("No alias for inode %d\n", (int) inode->i_ino);
EXIT;
return -EINVAL;
}
 
de = list_entry(inode->i_dentry.next, struct dentry, d_alias);
 
if (presto_chk(de, PRESTO_DONT_JOURNAL)) {
EXIT;
return 0;
}
 
fset = presto_fset(de);
if ( !fset ) {
CERROR("Presto: no fileset in presto_get_permit!\n");
EXIT;
return -EINVAL;
}
 
if (fset->fset_flags & FSET_HAS_BRANCHES) {
EXIT;
return -EROFS;
}
 
spin_lock(&fset->fset_permit_lock);
if (fset->fset_flags & FSET_HASPERMIT) {
fset->fset_permit_count++;
CDEBUG(D_INODE, "permit count now %d, inode %lx\n",
fset->fset_permit_count, inode->i_ino);
spin_unlock(&fset->fset_permit_lock);
EXIT;
return 0;
}
 
/* Allow reintegration to proceed without locks -SHP */
fset->fset_permit_upcall_count++;
if (fset->fset_permit_upcall_count == 1) {
spin_unlock(&fset->fset_permit_lock);
rc = presto_permit_upcall(fset->fset_dentry);
spin_lock(&fset->fset_permit_lock);
fset->fset_permit_upcall_count--;
if (rc == 0) {
izo_mark_fset(fset->fset_dentry, ~0, FSET_HASPERMIT,
NULL);
fset->fset_permit_count++;
} else if (rc == ENOTCONN) {
CERROR("InterMezzo: disconnected operation. stealing permit.\n");
izo_mark_fset(fset->fset_dentry, ~0, FSET_HASPERMIT,
NULL);
fset->fset_permit_count++;
/* set a disconnected flag here to stop upcalls */
rc = 0;
} else {
CERROR("InterMezzo: presto_permit_upcall failed: %d\n", rc);
rc = -EROFS;
/* go to sleep here and try again? */
}
wake_up_interruptible(&fset->fset_permit_queue);
} else {
/* Someone is already doing an upcall; go to sleep. */
DECLARE_WAITQUEUE(wait, current);
 
spin_unlock(&fset->fset_permit_lock);
add_wait_queue(&fset->fset_permit_queue, &wait);
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
 
spin_lock(&fset->fset_permit_lock);
if (fset->fset_permit_upcall_count == 0)
break;
spin_unlock(&fset->fset_permit_lock);
 
if (signal_pending(current)) {
remove_wait_queue(&fset->fset_permit_queue,
&wait);
return -ERESTARTSYS;
}
schedule();
}
remove_wait_queue(&fset->fset_permit_queue, &wait);
/* We've been woken up: do we have the permit? */
if (fset->fset_flags & FSET_HASPERMIT)
/* FIXME: Is this the right thing? */
rc = -EAGAIN;
}
 
CDEBUG(D_INODE, "permit count now %d, ino %ld (likely 1), "
"rc %d\n", fset->fset_permit_count, inode->i_ino, rc);
spin_unlock(&fset->fset_permit_lock);
EXIT;
return rc;
}
 
int presto_put_permit(struct inode * inode)
{
struct dentry *de;
struct presto_file_set *fset;
int minor = presto_i2m(inode);
 
ENTRY;
if (minor < 0) {
EXIT;
return -1;
}
 
if ( ISLENTO(minor) ) {
EXIT;
return 0;
}
 
if (list_empty(&inode->i_dentry)) {
CERROR("No alias for inode %d\n", (int) inode->i_ino);
EXIT;
return -1;
}
 
de = list_entry(inode->i_dentry.next, struct dentry, d_alias);
 
fset = presto_fset(de);
if ( !fset ) {
CERROR("InterMezzo: no fileset in %s!\n", __FUNCTION__);
EXIT;
return -1;
}
 
if (presto_chk(de, PRESTO_DONT_JOURNAL)) {
EXIT;
return 0;
}
 
spin_lock(&fset->fset_permit_lock);
if (fset->fset_flags & FSET_HASPERMIT) {
if (fset->fset_permit_count > 0)
fset->fset_permit_count--;
else
CERROR("Put permit while permit count is 0, "
"inode %ld!\n", inode->i_ino);
} else {
fset->fset_permit_count = 0;
CERROR("InterMezzo: put permit while no permit, inode %ld, "
"flags %x!\n", inode->i_ino, fset->fset_flags);
}
 
CDEBUG(D_INODE, "permit count now %d, inode %ld\n",
fset->fset_permit_count, inode->i_ino);
 
if (fset->fset_flags & FSET_PERMIT_WAITING &&
fset->fset_permit_count == 0) {
CDEBUG(D_INODE, "permit count now 0, ino %ld, wake sleepers\n",
inode->i_ino);
wake_up_interruptible(&fset->fset_permit_queue);
}
spin_unlock(&fset->fset_permit_lock);
 
EXIT;
return 0;
}
 
void presto_getversion(struct presto_version * presto_version,
struct inode * inode)
{
presto_version->pv_mtime = (__u64)inode->i_mtime;
presto_version->pv_ctime = (__u64)inode->i_ctime;
presto_version->pv_size = (__u64)inode->i_size;
}
 
 
/* If uuid is non-null, it is the uuid of the peer that's making the revocation
* request. If it is null, this request was made locally, without external
* pressure to give up the permit. This most often occurs when a client
* starts up.
*
* FIXME: this function needs to be refactored slightly once we start handling
* multiple clients.
*/
int izo_revoke_permit(struct dentry *dentry, __u8 uuid[16])
{
struct presto_file_set *fset;
DECLARE_WAITQUEUE(wait, current);
int minor, rc;
 
ENTRY;
 
minor = presto_i2m(dentry->d_inode);
if (minor < 0) {
EXIT;
return -ENODEV;
}
 
fset = presto_fset(dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
 
spin_lock(&fset->fset_permit_lock);
if (fset->fset_flags & FSET_PERMIT_WAITING) {
CERROR("InterMezzo: Two processes are waiting on the same permit--this not yet supported! Aborting this particular permit request...\n");
EXIT;
spin_unlock(&fset->fset_permit_lock);
return -EINVAL;
}
 
if (fset->fset_permit_count == 0)
goto got_permit;
 
/* Something is still using this permit. Mark that we're waiting for it
* and go to sleep. */
rc = izo_mark_fset(dentry, ~0, FSET_PERMIT_WAITING, NULL);
spin_unlock(&fset->fset_permit_lock);
if (rc < 0) {
EXIT;
return rc;
}
 
add_wait_queue(&fset->fset_permit_queue, &wait);
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
 
spin_lock(&fset->fset_permit_lock);
if (fset->fset_permit_count == 0)
break;
spin_unlock(&fset->fset_permit_lock);
 
if (signal_pending(current)) {
/* FIXME: there must be a better thing to return... */
remove_wait_queue(&fset->fset_permit_queue, &wait);
EXIT;
return -ERESTARTSYS;
}
 
/* FIXME: maybe there should be a timeout here. */
 
schedule();
}
 
remove_wait_queue(&fset->fset_permit_queue, &wait);
got_permit:
/* By this point fset->fset_permit_count is zero and we're holding the
* lock. */
CDEBUG(D_CACHE, "InterMezzo: releasing permit inode %ld\n",
dentry->d_inode->i_ino);
 
if (uuid != NULL) {
rc = izo_upc_revoke_permit(minor, fset->fset_name, uuid);
if (rc < 0) {
spin_unlock(&fset->fset_permit_lock);
EXIT;
return rc;
}
}
 
izo_mark_fset(fset->fset_dentry, ~FSET_PERMIT_WAITING, 0, NULL);
izo_mark_fset(fset->fset_dentry, ~FSET_HASPERMIT, 0, NULL);
spin_unlock(&fset->fset_permit_lock);
EXIT;
return 0;
}
 
inline int presto_is_read_only(struct presto_file_set * fset)
{
int minor, mask;
struct presto_cache *cache = fset->fset_cache;
 
minor= cache->cache_psdev->uc_minor;
mask= (ISLENTO(minor)? FSET_LENTO_RO : FSET_CLIENT_RO);
if ( fset->fset_flags & mask )
return 1;
mask= (ISLENTO(minor)? CACHE_LENTO_RO : CACHE_CLIENT_RO);
return ((cache->cache_flags & mask)? 1 : 0);
}
/journal_reiserfs.c
0,0 → 1,142
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
* Copyright (C) 2000 Red Hat, Inc.
* Copyright (C) 2000 Los Alamos National Laboratory
* Copyright (C) 2000 TurboLinux, Inc.
* Copyright (C) 2001 Mountain View Data, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
 
#include <linux/types.h>
#include <linux/param.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/smp_lock.h>
#include <linux/locks.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>
#if 0
#if defined(CONFIG_REISERFS_FS) || defined(CONFIG_REISERFS_FS_MODULE)
#include <linux/reiserfs_fs.h>
#include <linux/reiserfs_fs_sb.h>
#include <linux/reiserfs_fs_i.h>
#endif
 
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
 
#if defined(CONFIG_REISERFS_FS) || defined(CONFIG_REISERFS_FS_MODULE)
 
 
static loff_t presto_reiserfs_freespace(struct presto_cache *cache,
struct super_block *sb)
{
struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (sb);
loff_t avail;
 
avail = le32_to_cpu(rs->s_free_blocks) *
le16_to_cpu(rs->s_blocksize);
return avail;
}
 
/* start the filesystem journal operations */
static void *presto_reiserfs_trans_start(struct presto_file_set *fset,
struct inode *inode,
int op)
{
int jblocks;
__u32 avail_kmlblocks;
struct reiserfs_transaction_handle *th ;
 
PRESTO_ALLOC(th, sizeof(*th));
if (!th) {
CERROR("presto: No memory for trans handle\n");
return NULL;
}
 
avail_kmlblocks = presto_reiserfs_freespace(fset->fset_cache,
inode->i_sb);
if ( presto_no_journal(fset) ||
strcmp(fset->fset_cache->cache_type, "reiserfs"))
{
CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n",
fset->fset_cache->cache_type);
return NULL;
}
 
if ( avail_kmlblocks < 3 ) {
return ERR_PTR(-ENOSPC);
}
if ( (op != PRESTO_OP_UNLINK && op != PRESTO_OP_RMDIR)
&& avail_kmlblocks < 6 ) {
return ERR_PTR(-ENOSPC);
}
 
jblocks = 3 + JOURNAL_PER_BALANCE_CNT * 4;
CDEBUG(D_JOURNAL, "creating journal handle (%d blocks)\n", jblocks);
 
lock_kernel();
journal_begin(th, inode->i_sb, jblocks);
unlock_kernel();
return th;
}
 
static void presto_reiserfs_trans_commit(struct presto_file_set *fset,
void *handle)
{
int jblocks;
jblocks = 3 + JOURNAL_PER_BALANCE_CNT * 4;
lock_kernel();
journal_end(handle, fset->fset_cache->cache_sb, jblocks);
unlock_kernel();
PRESTO_FREE(handle, sizeof(struct reiserfs_transaction_handle));
}
 
static void presto_reiserfs_journal_file_data(struct inode *inode)
{
#ifdef EXT3_JOURNAL_DATA_FL
inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL;
#else
#warning You must have a facility to enable journaled writes for recovery!
#endif
}
 
static int presto_reiserfs_has_all_data(struct inode *inode)
{
BUG();
return 0;
}
 
struct journal_ops presto_reiserfs_journal_ops = {
.tr_all_data = presto_reiserfs_has_all_data,
.tr_avail = presto_reiserfs_freespace,
.tr_start = presto_reiserfs_trans_start,
.tr_commit = presto_reiserfs_trans_commit,
.tr_journal_data = presto_reiserfs_journal_file_data
};
 
#endif
#endif
/journal_ext2.c
0,0 → 1,91
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
 
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/locks.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>
#include <linux/ext2_fs.h>
 
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
 
#if defined(CONFIG_EXT2_FS)
 
/* EXT2 has no journalling, so these functions do nothing */
static loff_t presto_e2_freespace(struct presto_cache *cache,
struct super_block *sb)
{
unsigned long freebl = le32_to_cpu(sb->u.ext2_sb.s_es->s_free_blocks_count);
unsigned long avail = freebl - le32_to_cpu(sb->u.ext2_sb.s_es->s_r_blocks_count);
return (avail << EXT2_BLOCK_SIZE_BITS(sb));
}
 
/* start the filesystem journal operations */
static void *presto_e2_trans_start(struct presto_file_set *fset, struct inode *inode, int op)
{
__u32 avail_kmlblocks;
 
if ( presto_no_journal(fset) ||
strcmp(fset->fset_cache->cache_type, "ext2"))
return NULL;
 
avail_kmlblocks = inode->i_sb->u.ext2_sb.s_es->s_free_blocks_count;
if ( avail_kmlblocks < 3 ) {
return ERR_PTR(-ENOSPC);
}
if ( (op != KML_OPCODE_UNLINK && op != KML_OPCODE_RMDIR)
&& avail_kmlblocks < 6 ) {
return ERR_PTR(-ENOSPC);
}
return (void *) 1;
}
 
static void presto_e2_trans_commit(struct presto_file_set *fset, void *handle)
{
do {} while (0);
}
 
static int presto_e2_has_all_data(struct inode *inode)
{
BUG();
return 0;
}
 
struct journal_ops presto_ext2_journal_ops = {
tr_all_data: presto_e2_has_all_data,
tr_avail: presto_e2_freespace,
tr_start: presto_e2_trans_start,
tr_commit: presto_e2_trans_commit,
tr_journal_data: NULL
};
 
#endif /* CONFIG_EXT2_FS */
/journal_ext3.c
0,0 → 1,285
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
* Copyright (C) 2000 Red Hat, Inc.
* Copyright (C) 2000 Los Alamos National Laboratory
* Copyright (C) 2000 TurboLinux, Inc.
* Copyright (C) 2001 Mountain View Data, Inc.
* Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
 
#include <linux/types.h>
#include <linux/param.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/locks.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>
#include <linux/smp_lock.h>
#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE)
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
#include <linux/ext3_jbd.h>
#endif
 
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
 
#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE)
 
#define MAX_PATH_BLOCKS(inode) (PATH_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb))
#define MAX_NAME_BLOCKS(inode) (NAME_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb))
 
/* space requirements:
presto_do_truncate:
used to truncate the KML forward to next fset->chunksize boundary
- zero partial block
- update inode
presto_write_record:
write header (< one block)
write one path (< MAX_PATHLEN)
possibly write another path (< MAX_PATHLEN)
write suffix (< one block)
presto_update_last_rcvd
write one block
*/
 
static loff_t presto_e3_freespace(struct presto_cache *cache,
struct super_block *sb)
{
loff_t freebl = le32_to_cpu(sb->u.ext3_sb.s_es->s_free_blocks_count);
loff_t avail = freebl -
le32_to_cpu(sb->u.ext3_sb.s_es->s_r_blocks_count);
return (avail << EXT3_BLOCK_SIZE_BITS(sb));
}
 
/* start the filesystem journal operations */
static void *presto_e3_trans_start(struct presto_file_set *fset,
struct inode *inode,
int op)
{
int jblocks;
int trunc_blks, one_path_blks, extra_path_blks,
extra_name_blks, lml_blks;
__u32 avail_kmlblocks;
handle_t *handle;
 
if ( presto_no_journal(fset) ||
strcmp(fset->fset_cache->cache_type, "ext3"))
{
CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n",
fset->fset_cache->cache_type);
return NULL;
}
 
avail_kmlblocks = inode->i_sb->u.ext3_sb.s_es->s_free_blocks_count;
if ( avail_kmlblocks < 3 ) {
return ERR_PTR(-ENOSPC);
}
if ( (op != KML_OPCODE_UNLINK && op != KML_OPCODE_RMDIR)
&& avail_kmlblocks < 6 ) {
return ERR_PTR(-ENOSPC);
}
 
/* Need journal space for:
at least three writes to KML (two one block writes, one a path)
possibly a second name (unlink, rmdir)
possibly a second path (symlink, rename)
a one block write to the last rcvd file
*/
 
trunc_blks = EXT3_DATA_TRANS_BLOCKS + 1;
one_path_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 3;
lml_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 2;
extra_path_blks = EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode);
extra_name_blks = EXT3_DATA_TRANS_BLOCKS + MAX_NAME_BLOCKS(inode);
 
/* additional blocks appear for "two pathname" operations
and operations involving the LML records
*/
switch (op) {
case KML_OPCODE_TRUNC:
jblocks = one_path_blks + extra_name_blks + trunc_blks
+ EXT3_DELETE_TRANS_BLOCKS;
break;
case KML_OPCODE_KML_TRUNC:
/* Hopefully this is a little better, but I'm still mostly
* guessing here. */
/* unlink 1 */
jblocks = extra_name_blks + trunc_blks +
EXT3_DELETE_TRANS_BLOCKS + 2;
 
/* unlink 2 */
jblocks += extra_name_blks + trunc_blks +
EXT3_DELETE_TRANS_BLOCKS + 2;
 
/* rename 1 */
jblocks += 2 * extra_path_blks + trunc_blks +
2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3;
 
/* rename 2 */
jblocks += 2 * extra_path_blks + trunc_blks +
2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3;
break;
case KML_OPCODE_RELEASE:
/*
jblocks = one_path_blks + lml_blks + 2*trunc_blks;
*/
jblocks = one_path_blks;
break;
case KML_OPCODE_SETATTR:
jblocks = one_path_blks + trunc_blks + 1 ;
break;
case KML_OPCODE_CREATE:
jblocks = one_path_blks + trunc_blks
+ EXT3_DATA_TRANS_BLOCKS + 3 + 2;
break;
case KML_OPCODE_LINK:
jblocks = one_path_blks + trunc_blks
+ EXT3_DATA_TRANS_BLOCKS + 2;
break;
case KML_OPCODE_UNLINK:
jblocks = one_path_blks + extra_name_blks + trunc_blks
+ EXT3_DELETE_TRANS_BLOCKS + 2;
break;
case KML_OPCODE_SYMLINK:
jblocks = one_path_blks + extra_path_blks + trunc_blks
+ EXT3_DATA_TRANS_BLOCKS + 5;
break;
case KML_OPCODE_MKDIR:
jblocks = one_path_blks + trunc_blks
+ EXT3_DATA_TRANS_BLOCKS + 4 + 2;
break;
case KML_OPCODE_RMDIR:
jblocks = one_path_blks + extra_name_blks + trunc_blks
+ EXT3_DELETE_TRANS_BLOCKS + 1;
break;
case KML_OPCODE_MKNOD:
jblocks = one_path_blks + trunc_blks +
EXT3_DATA_TRANS_BLOCKS + 3 + 2;
break;
case KML_OPCODE_RENAME:
jblocks = one_path_blks + extra_path_blks + trunc_blks +
2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3;
break;
case KML_OPCODE_WRITE:
jblocks = one_path_blks;
/* add this when we can wrap our transaction with
that of ext3_file_write (ordered writes)
+ EXT3_DATA_TRANS_BLOCKS;
*/
break;
default:
CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op);
return NULL;
}
 
CDEBUG(D_JOURNAL, "creating journal handle (%d blocks) for op %d\n",
jblocks, op);
/* journal_start/stop does not do its own locking while updating
* the handle/transaction information. Hence we create our own
* critical section to protect these calls. -SHP
*/
lock_kernel();
handle = journal_start(EXT3_JOURNAL(inode), jblocks);
unlock_kernel();
return handle;
}
 
static void presto_e3_trans_commit(struct presto_file_set *fset, void *handle)
{
if ( presto_no_journal(fset) || !handle)
return;
 
/* See comments before journal_start above. -SHP */
lock_kernel();
journal_stop(handle);
unlock_kernel();
}
 
static void presto_e3_journal_file_data(struct inode *inode)
{
#ifdef EXT3_JOURNAL_DATA_FL
inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL;
#else
#warning You must have a facility to enable journaled writes for recovery!
#endif
}
 
/* The logic here is a slightly modified version of ext3/inode.c:block_to_path
*/
static int presto_e3_has_all_data(struct inode *inode)
{
int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb);
int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb);
const long direct_blocks = EXT3_NDIR_BLOCKS,
indirect_blocks = ptrs,
double_blocks = (1 << (ptrs_bits * 2));
long block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
inode->i_sb->s_blocksize_bits;
 
ENTRY;
 
if (inode->i_size == 0) {
EXIT;
return 1;
}
 
if (block < direct_blocks) {
/* No indirect blocks, no problem. */
} else if (block < indirect_blocks + direct_blocks) {
block++;
} else if (block < double_blocks + indirect_blocks + direct_blocks) {
block += 2;
} else if (((block - double_blocks - indirect_blocks - direct_blocks)
>> (ptrs_bits * 2)) < ptrs) {
block += 3;
}
 
block *= (inode->i_sb->s_blocksize / 512);
 
CDEBUG(D_CACHE, "Need %ld blocks, have %ld.\n", block, inode->i_blocks);
 
if (block > inode->i_blocks) {
EXIT;
return 0;
}
 
EXIT;
return 1;
}
 
struct journal_ops presto_ext3_journal_ops = {
.tr_all_data = presto_e3_has_all_data,
.tr_avail = presto_e3_freespace,
.tr_start = presto_e3_trans_start,
.tr_commit = presto_e3_trans_commit,
.tr_journal_data = presto_e3_journal_file_data,
.tr_ilookup = presto_iget_ilookup
};
 
#endif /* CONFIG_EXT3_FS */
/dir.c
0,0 → 1,1415
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2000 Stelias Computing, Inc.
* Copyright (C) 2000 Red Hat, Inc.
* Copyright (C) 2000 Tacitus Systems
* Copyright (C) 2000 Peter J. Braam
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
 
#include <stdarg.h>
 
#include <asm/bitops.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/smp_lock.h>
 
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/ext2_fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/locks.h>
#include <linux/blkdev.h>
#include <linux/init.h>
#define __NO_VERSION__
#include <linux/module.h>
 
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
 
static inline void presto_relock_sem(struct inode *dir)
{
/* the lock from sys_mkdir / lookup_create */
down(&dir->i_sem);
/* the rest is done by the do_{create,mkdir, ...} */
}
 
static inline void presto_relock_other(struct inode *dir)
{
/* vfs_mkdir locks */
down(&dir->i_zombie);
lock_kernel();
}
 
static inline void presto_fulllock(struct inode *dir)
{
/* the lock from sys_mkdir / lookup_create */
down(&dir->i_sem);
/* vfs_mkdir locks */
down(&dir->i_zombie);
lock_kernel();
}
 
static inline void presto_unlock(struct inode *dir)
{
/* vfs_mkdir locks */
unlock_kernel();
up(&dir->i_zombie);
/* the lock from sys_mkdir / lookup_create */
up(&dir->i_sem);
}
 
 
/*
* these are initialized in super.c
*/
extern int presto_permission(struct inode *inode, int mask);
static int izo_authorized_uid = 0;
 
int izo_dentry_is_ilookup(struct dentry *dentry, ino_t *id,
unsigned int *generation)
{
char tmpname[64];
char *next;
 
ENTRY;
/* prefix is 7 characters: '...ino:' */
if ( dentry->d_name.len < 7 || dentry->d_name.len > 64 ||
memcmp(dentry->d_name.name, PRESTO_ILOOKUP_MAGIC, 7) != 0 ) {
EXIT;
return 0;
}
 
memcpy(tmpname, dentry->d_name.name + 7, dentry->d_name.len - 7);
*(tmpname + dentry->d_name.len - 7) = '\0';
 
/* name is of the form ...ino:<inode number>:<generation> */
*id = simple_strtoul(tmpname, &next, 16);
if ( *next == PRESTO_ILOOKUP_SEP ) {
*generation = simple_strtoul(next + 1, 0, 16);
CDEBUG(D_INODE, "ino string: %s, Id = %lx (%lu), "
"generation %x (%d)\n",
tmpname, *id, *id, *generation, *generation);
EXIT;
return 1;
} else {
EXIT;
return 0;
}
}
 
struct dentry *presto_tmpfs_ilookup(struct inode *dir,
struct dentry *dentry,
ino_t ino,
unsigned int generation)
{
return dentry;
}
 
 
inline int presto_can_ilookup(void)
{
return (current->euid == izo_authorized_uid ||
capable(CAP_DAC_READ_SEARCH));
}
 
struct dentry *presto_iget_ilookup(struct inode *dir,
struct dentry *dentry,
ino_t ino,
unsigned int generation)
{
struct inode *inode;
int error;
 
ENTRY;
 
if ( !presto_can_ilookup() ) {
CERROR("ilookup denied: euid %u, authorized_uid %u\n",
current->euid, izo_authorized_uid);
return ERR_PTR(-EPERM);
}
error = -ENOENT;
inode = iget(dir->i_sb, ino);
if (!inode) {
CERROR("fatal: NULL inode ino %lu\n", ino);
goto cleanup_iput;
}
if (is_bad_inode(inode) || inode->i_nlink == 0) {
CERROR("fatal: bad inode ino %lu, links %d\n", ino, inode->i_nlink);
goto cleanup_iput;
}
if (inode->i_generation != generation) {
CERROR("fatal: bad generation %u (want %u)\n",
inode->i_generation, generation);
goto cleanup_iput;
}
 
d_instantiate(dentry, inode);
dentry->d_flags |= DCACHE_NFSD_DISCONNECTED; /* NFS hack */
 
EXIT;
return NULL;
 
cleanup_iput:
if (inode)
iput(inode);
return ERR_PTR(error);
}
 
struct dentry *presto_add_ilookup_dentry(struct dentry *parent,
struct dentry *real)
{
struct inode *inode = real->d_inode;
struct dentry *de;
char buf[32];
char *ptr = buf;
struct dentry *inodir;
struct presto_dentry_data *dd;
 
inodir = lookup_one_len("..iopen..", parent, strlen("..iopen.."));
if (!inodir || IS_ERR(inodir) || !inodir->d_inode ) {
CERROR("%s: bad ..iopen.. lookup\n", __FUNCTION__);
return NULL;
}
inodir->d_inode->i_op = &presto_dir_iops;
 
snprintf(ptr, 32, "...ino:%lx:%x", inode->i_ino, inode->i_generation);
 
de = lookup_one_len(ptr, inodir, strlen(ptr));
if (!de || IS_ERR(de)) {
CERROR("%s: bad ...ino lookup %ld\n",
__FUNCTION__, PTR_ERR(de));
dput(inodir);
return NULL;
}
 
dd = presto_d2d(real);
if (!dd)
BUG();
 
/* already exists */
if (de->d_inode)
BUG();
#if 0
if (de->d_inode != inode ) {
CERROR("XX de->d_inode %ld, inode %ld\n",
de->d_inode->i_ino, inode->i_ino);
BUG();
}
if (dd->dd_inodentry) {
CERROR("inodentry exists %ld \n", inode->i_ino);
BUG();
}
dput(inodir);
return de;
}
#endif
 
if (presto_d2d(de))
BUG();
 
atomic_inc(&inode->i_count);
de->d_op = &presto_dentry_ops;
d_add(de, inode);
if (!de->d_op)
CERROR("DD: no ops dentry %p, dd %p\n", de, dd);
dd->dd_inodentry = de;
dd->dd_count++;
de->d_fsdata = dd;
 
dput(inodir);
return de;
}
 
struct dentry *presto_lookup(struct inode * dir, struct dentry *dentry)
{
int rc = 0;
struct dentry *de;
struct presto_cache *cache;
int minor;
ino_t ino;
unsigned int generation;
struct inode_operations *iops;
int is_ilookup = 0;
 
ENTRY;
cache = presto_get_cache(dir);
if (cache == NULL) {
CERROR("InterMezzo BUG: no cache in presto_lookup "
"(dir ino: %ld)!\n", dir->i_ino);
EXIT;
return NULL;
}
minor = presto_c2m(cache);
 
iops = filter_c2cdiops(cache->cache_filter);
if (!iops || !iops->lookup) {
CERROR("InterMezzo BUG: filesystem has no lookup\n");
EXIT;
return NULL;
}
 
 
CDEBUG(D_CACHE, "dentry %p, dir ino: %ld, name: %*s, islento: %d\n",
dentry, dir->i_ino, dentry->d_name.len, dentry->d_name.name,
ISLENTO(minor));
 
if (dentry->d_fsdata)
CERROR("DD -- BAD dentry %p has data\n", dentry);
dentry->d_fsdata = NULL;
#if 0
if (ext2_check_for_iopen(dir, dentry))
de = NULL;
else {
#endif
if ( izo_dentry_is_ilookup(dentry, &ino, &generation) ) {
de = cache->cache_filter->o_trops->tr_ilookup
(dir, dentry, ino, generation);
is_ilookup = 1;
} else
de = iops->lookup(dir, dentry);
#if 0
}
#endif
 
if ( IS_ERR(de) ) {
CERROR("dentry lookup error %ld\n", PTR_ERR(de));
return de;
}
 
/* some file systems have no read_inode: set methods here */
if (dentry->d_inode)
presto_set_ops(dentry->d_inode, cache->cache_filter);
 
filter_setup_dentry_ops(cache->cache_filter,
dentry->d_op, &presto_dentry_ops);
dentry->d_op = filter_c2udops(cache->cache_filter);
 
/* In lookup we will tolerate EROFS return codes from presto_set_dd
* to placate NFS. EROFS indicates that a fileset was not found but
* we should still be able to continue through a lookup.
* Anything else is a hard error and must be returned to VFS. */
if (!is_ilookup)
rc = presto_set_dd(dentry);
if (rc && rc != -EROFS) {
CERROR("presto_set_dd failed (dir %ld, name %*s): %d\n",
dir->i_ino, dentry->d_name.len, dentry->d_name.name, rc);
return ERR_PTR(rc);
}
 
EXIT;
return NULL;
}
 
static inline int presto_check_set_fsdata (struct dentry *de)
{
if (presto_d2d(de) == NULL) {
#ifdef PRESTO_NO_NFS
CERROR("dentry without fsdata: %p: %*s\n", de,
de->d_name.len, de->d_name.name);
BUG();
#endif
return presto_set_dd (de);
}
 
return 0;
}
 
int presto_setattr(struct dentry *de, struct iattr *iattr)
{
int error;
struct presto_cache *cache;
struct presto_file_set *fset;
struct lento_vfs_context info = { 0, 0, 0 };
 
ENTRY;
 
error = presto_prep(de, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
 
if (!iattr->ia_valid)
CDEBUG(D_INODE, "presto_setattr: iattr is not valid\n");
 
CDEBUG(D_INODE, "valid %#x, mode %#o, uid %u, gid %u, size %Lu, "
"atime %lu mtime %lu ctime %lu flags %d\n",
iattr->ia_valid, iattr->ia_mode, iattr->ia_uid, iattr->ia_gid,
iattr->ia_size, iattr->ia_atime, iattr->ia_mtime,
iattr->ia_ctime, iattr->ia_attr_flags);
if ( presto_get_permit(de->d_inode) < 0 ) {
EXIT;
return -EROFS;
}
 
if (!ISLENTO(presto_c2m(cache)))
info.flags = LENTO_FL_KML;
info.flags |= LENTO_FL_IGNORE_TIME;
error = presto_do_setattr(fset, de, iattr, &info);
presto_put_permit(de->d_inode);
return error;
}
 
/*
* Now the meat: the fs operations that require journaling
*
*
* XXX: some of these need modifications for hierarchical filesets
*/
 
int presto_prep(struct dentry *dentry, struct presto_cache **cache,
struct presto_file_set **fset)
{
int rc;
 
/* NFS might pass us dentries which have not gone through lookup.
* Test and set d_fsdata for such dentries
*/
rc = presto_check_set_fsdata (dentry);
if (rc) return rc;
 
*fset = presto_fset(dentry);
if ( *fset == NULL ) {
CERROR("No file set for dentry at %p: %*s\n", dentry,
dentry->d_name.len, dentry->d_name.name);
return -EROFS;
}
 
*cache = (*fset)->fset_cache;
if ( *cache == NULL ) {
CERROR("PRESTO: BAD, BAD: cannot find cache\n");
return -EBADF;
}
 
CDEBUG(D_PIOCTL, "---> cache flags %x, fset flags %x\n",
(*cache)->cache_flags, (*fset)->fset_flags);
if( presto_is_read_only(*fset) ) {
CERROR("PRESTO: cannot modify read-only fileset, minor %d.\n",
presto_c2m(*cache));
return -EROFS;
}
return 0;
}
 
static int presto_create(struct inode * dir, struct dentry * dentry, int mode)
{
int error;
struct presto_cache *cache;
struct dentry *parent = dentry->d_parent;
struct lento_vfs_context info;
struct presto_file_set *fset;
 
ENTRY;
error = presto_check_set_fsdata(dentry);
if ( error ) {
EXIT;
return error;
}
 
error = presto_prep(dentry->d_parent, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
presto_unlock(dir);
 
/* Does blocking and non-blocking behavious need to be
checked for. Without blocking (return 1), the permit
was acquired without reintegration
*/
if ( presto_get_permit(dir) < 0 ) {
EXIT;
presto_fulllock(dir);
return -EROFS;
}
 
presto_relock_sem(dir);
parent = dentry->d_parent;
memset(&info, 0, sizeof(info));
if (!ISLENTO(presto_c2m(cache)))
info.flags = LENTO_FL_KML;
info.flags |= LENTO_FL_IGNORE_TIME;
error = presto_do_create(fset, parent, dentry, mode, &info);
 
presto_relock_other(dir);
presto_put_permit(dir);
EXIT;
return error;
}
 
static int presto_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *new_dentry)
{
int error;
struct presto_cache *cache, *new_cache;
struct presto_file_set *fset, *new_fset;
struct dentry *parent = new_dentry->d_parent;
struct lento_vfs_context info;
 
ENTRY;
error = presto_prep(old_dentry, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
 
error = presto_check_set_fsdata(new_dentry);
if ( error ) {
EXIT;
return error;
}
 
error = presto_prep(new_dentry->d_parent, &new_cache, &new_fset);
if ( error ) {
EXIT;
return error;
}
 
if (fset != new_fset) {
EXIT;
return -EXDEV;
}
 
presto_unlock(dir);
if ( presto_get_permit(old_dentry->d_inode) < 0 ) {
EXIT;
presto_fulllock(dir);
return -EROFS;
}
 
if ( presto_get_permit(dir) < 0 ) {
EXIT;
presto_fulllock(dir);
return -EROFS;
}
 
presto_relock_sem(dir);
parent = new_dentry->d_parent;
 
memset(&info, 0, sizeof(info));
if (!ISLENTO(presto_c2m(cache)))
info.flags = LENTO_FL_KML;
info.flags |= LENTO_FL_IGNORE_TIME;
error = presto_do_link(fset, old_dentry, parent,
new_dentry, &info);
 
#if 0
/* XXX for links this is not right */
if (cache->cache_filter->o_trops->tr_add_ilookup ) {
struct dentry *d;
d = cache->cache_filter->o_trops->tr_add_ilookup
(dir->i_sb->s_root, new_dentry, 1);
}
#endif
 
presto_relock_other(dir);
presto_put_permit(dir);
presto_put_permit(old_dentry->d_inode);
return error;
}
 
static int presto_mkdir(struct inode * dir, struct dentry * dentry, int mode)
{
int error;
struct presto_file_set *fset;
struct presto_cache *cache;
struct dentry *parent = dentry->d_parent;
struct lento_vfs_context info;
 
ENTRY;
 
error = presto_check_set_fsdata(dentry);
if ( error ) {
EXIT;
return error;
}
 
error = presto_prep(dentry->d_parent, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
 
presto_unlock(dir);
 
if ( presto_get_permit(dir) < 0 ) {
EXIT;
presto_fulllock(dir);
return -EROFS;
}
 
memset(&info, 0, sizeof(info));
if (!ISLENTO(presto_c2m(cache)))
info.flags = LENTO_FL_KML;
info.flags |= LENTO_FL_IGNORE_TIME;
 
presto_relock_sem(dir);
parent = dentry->d_parent;
error = presto_do_mkdir(fset, parent, dentry, mode, &info);
presto_relock_other(dir);
presto_put_permit(dir);
return error;
}
 
 
 
static int presto_symlink(struct inode *dir, struct dentry *dentry,
const char *name)
{
int error;
struct presto_cache *cache;
struct presto_file_set *fset;
struct dentry *parent = dentry->d_parent;
struct lento_vfs_context info;
 
ENTRY;
error = presto_check_set_fsdata(dentry);
if ( error ) {
EXIT;
return error;
}
 
error = presto_prep(dentry->d_parent, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
 
presto_unlock(dir);
if ( presto_get_permit(dir) < 0 ) {
EXIT;
presto_fulllock(dir);
return -EROFS;
}
 
presto_relock_sem(dir);
parent = dentry->d_parent;
memset(&info, 0, sizeof(info));
if (!ISLENTO(presto_c2m(cache)))
info.flags = LENTO_FL_KML;
info.flags |= LENTO_FL_IGNORE_TIME;
error = presto_do_symlink(fset, parent, dentry, name, &info);
presto_relock_other(dir);
presto_put_permit(dir);
return error;
}
 
int presto_unlink(struct inode *dir, struct dentry *dentry)
{
int error;
struct presto_cache *cache;
struct presto_file_set *fset;
struct dentry *parent = dentry->d_parent;
struct lento_vfs_context info;
 
ENTRY;
error = presto_check_set_fsdata(dentry);
if ( error ) {
EXIT;
return error;
}
 
error = presto_prep(dentry->d_parent, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
 
presto_unlock(dir);
if ( presto_get_permit(dir) < 0 ) {
EXIT;
presto_fulllock(dir);
return -EROFS;
}
 
presto_relock_sem(dir);
parent = dentry->d_parent;
memset(&info, 0, sizeof(info));
if (!ISLENTO(presto_c2m(cache)))
info.flags = LENTO_FL_KML;
info.flags |= LENTO_FL_IGNORE_TIME;
 
error = presto_do_unlink(fset, parent, dentry, &info);
 
presto_relock_other(dir);
presto_put_permit(dir);
return error;
}
 
static int presto_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
struct presto_cache *cache;
struct presto_file_set *fset;
struct dentry *parent = dentry->d_parent;
struct lento_vfs_context info;
 
ENTRY;
CDEBUG(D_FILE, "prepping presto\n");
error = presto_check_set_fsdata(dentry);
 
if ( error ) {
EXIT;
return error;
}
 
error = presto_prep(dentry->d_parent, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
 
CDEBUG(D_FILE, "unlocking\n");
/* We need to dget() before the dput in double_unlock, to ensure we
* still have dentry references. double_lock doesn't do dget for us.
*/
unlock_kernel();
if (d_unhashed(dentry))
d_rehash(dentry);
double_up(&dir->i_zombie, &dentry->d_inode->i_zombie);
double_up(&dir->i_sem, &dentry->d_inode->i_sem);
 
CDEBUG(D_FILE, "getting permit\n");
if ( presto_get_permit(parent->d_inode) < 0 ) {
EXIT;
double_down(&dir->i_sem, &dentry->d_inode->i_sem);
double_down(&dir->i_zombie, &dentry->d_inode->i_zombie);
lock_kernel();
return -EROFS;
}
CDEBUG(D_FILE, "locking\n");
 
double_down(&dir->i_sem, &dentry->d_inode->i_sem);
parent = dentry->d_parent;
memset(&info, 0, sizeof(info));
if (!ISLENTO(presto_c2m(cache)))
info.flags = LENTO_FL_KML;
info.flags |= LENTO_FL_IGNORE_TIME;
error = presto_do_rmdir(fset, parent, dentry, &info);
presto_put_permit(parent->d_inode);
lock_kernel();
EXIT;
return error;
}
 
static int presto_mknod(struct inode * dir, struct dentry * dentry, int mode, int rdev)
{
int error;
struct presto_cache *cache;
struct presto_file_set *fset;
struct dentry *parent = dentry->d_parent;
struct lento_vfs_context info;
 
ENTRY;
error = presto_check_set_fsdata(dentry);
if ( error ) {
EXIT;
return error;
}
 
error = presto_prep(dentry->d_parent, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
 
presto_unlock(dir);
if ( presto_get_permit(dir) < 0 ) {
EXIT;
presto_fulllock(dir);
return -EROFS;
}
presto_relock_sem(dir);
parent = dentry->d_parent;
memset(&info, 0, sizeof(info));
if (!ISLENTO(presto_c2m(cache)))
info.flags = LENTO_FL_KML;
info.flags |= LENTO_FL_IGNORE_TIME;
error = presto_do_mknod(fset, parent, dentry, mode, rdev, &info);
presto_relock_other(dir);
presto_put_permit(dir);
EXIT;
return error;
}
 
inline void presto_triple_unlock(struct inode *old_dir, struct inode *new_dir,
struct dentry *old_dentry,
struct dentry *new_dentry, int triple)
{
/* rename_dir case */
if (S_ISDIR(old_dentry->d_inode->i_mode)) {
if (triple) {
triple_up(&old_dir->i_zombie,
&new_dir->i_zombie,
&new_dentry->d_inode->i_zombie);
} else {
double_up(&old_dir->i_zombie,
&new_dir->i_zombie);
}
up(&old_dir->i_sb->s_vfs_rename_sem);
} else /* this case is rename_other */
double_up(&old_dir->i_zombie, &new_dir->i_zombie);
/* done by do_rename */
unlock_kernel();
double_up(&old_dir->i_sem, &new_dir->i_sem);
}
 
inline void presto_triple_fulllock(struct inode *old_dir,
struct inode *new_dir,
struct dentry *old_dentry,
struct dentry *new_dentry, int triple)
{
/* done by do_rename */
double_down(&old_dir->i_sem, &new_dir->i_sem);
lock_kernel();
/* rename_dir case */
if (S_ISDIR(old_dentry->d_inode->i_mode)) {
down(&old_dir->i_sb->s_vfs_rename_sem);
if (triple) {
triple_down(&old_dir->i_zombie,
&new_dir->i_zombie,
&new_dentry->d_inode->i_zombie);
} else {
double_down(&old_dir->i_zombie,
&new_dir->i_zombie);
}
} else /* this case is rename_other */
double_down(&old_dir->i_zombie, &new_dir->i_zombie);
}
 
inline void presto_triple_relock_sem(struct inode *old_dir,
struct inode *new_dir,
struct dentry *old_dentry,
struct dentry *new_dentry, int triple)
{
/* done by do_rename */
double_down(&old_dir->i_sem, &new_dir->i_sem);
lock_kernel();
}
 
inline void presto_triple_relock_other(struct inode *old_dir,
struct inode *new_dir,
struct dentry *old_dentry,
struct dentry *new_dentry, int triple)
{
/* rename_dir case */
if (S_ISDIR(old_dentry->d_inode->i_mode)) {
down(&old_dir->i_sb->s_vfs_rename_sem);
if (triple) {
triple_down(&old_dir->i_zombie,
&new_dir->i_zombie,
&new_dentry->d_inode->i_zombie);
} else {
double_down(&old_dir->i_zombie,
&new_dir->i_zombie);
}
} else /* this case is rename_other */
double_down(&old_dir->i_zombie, &new_dir->i_zombie);
}
 
 
// XXX this can be optimized: renamtes across filesets only require
// multiple KML records, but can locally be executed normally.
int presto_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
int error;
struct presto_cache *cache, *new_cache;
struct presto_file_set *fset, *new_fset;
struct lento_vfs_context info;
struct dentry *old_parent = old_dentry->d_parent;
struct dentry *new_parent = new_dentry->d_parent;
int triple;
 
ENTRY;
error = presto_prep(old_dentry, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
error = presto_prep(new_parent, &new_cache, &new_fset);
if ( error ) {
EXIT;
return error;
}
 
if ( fset != new_fset ) {
EXIT;
return -EXDEV;
}
 
/* We need to do dget before the dput in double_unlock, to ensure we
* still have dentry references. double_lock doesn't do dget for us.
*/
 
triple = (S_ISDIR(old_dentry->d_inode->i_mode) && new_dentry->d_inode)?
1:0;
 
presto_triple_unlock(old_dir, new_dir, old_dentry, new_dentry, triple);
 
if ( presto_get_permit(old_dir) < 0 ) {
EXIT;
presto_triple_fulllock(old_dir, new_dir, old_dentry, new_dentry, triple);
return -EROFS;
}
if ( presto_get_permit(new_dir) < 0 ) {
EXIT;
presto_triple_fulllock(old_dir, new_dir, old_dentry, new_dentry, triple);
return -EROFS;
}
 
presto_triple_relock_sem(old_dir, new_dir, old_dentry, new_dentry, triple);
memset(&info, 0, sizeof(info));
if (!ISLENTO(presto_c2m(cache)))
info.flags = LENTO_FL_KML;
info.flags |= LENTO_FL_IGNORE_TIME;
error = do_rename(fset, old_parent, old_dentry, new_parent,
new_dentry, &info);
presto_triple_relock_other(old_dir, new_dir, old_dentry, new_dentry, triple);
 
presto_put_permit(new_dir);
presto_put_permit(old_dir);
return error;
}
 
/* basically this allows the ilookup processes access to all files for
* reading, while not making ilookup totally insecure. This could all
* go away if we could set the CAP_DAC_READ_SEARCH capability for the client.
*/
/* If posix acls are available, the underlying cache fs will export the
* appropriate permission function. Thus we do not worry here about ACLs
* or EAs. -SHP
*/
int presto_permission(struct inode *inode, int mask)
{
unsigned short mode = inode->i_mode;
struct presto_cache *cache;
int rc;
 
ENTRY;
if ( presto_can_ilookup() && !(mask & S_IWOTH)) {
CDEBUG(D_CACHE, "ilookup on %ld OK\n", inode->i_ino);
EXIT;
return 0;
}
 
cache = presto_get_cache(inode);
 
if ( cache ) {
/* we only override the file/dir permission operations */
struct inode_operations *fiops = filter_c2cfiops(cache->cache_filter);
struct inode_operations *diops = filter_c2cdiops(cache->cache_filter);
 
if ( S_ISREG(mode) && fiops && fiops->permission ) {
EXIT;
return fiops->permission(inode, mask);
}
if ( S_ISDIR(mode) && diops && diops->permission ) {
EXIT;
return diops->permission(inode, mask);
}
}
 
/* The cache filesystem doesn't have its own permission function,
* but we don't want to duplicate the VFS code here. In order
* to avoid looping from permission calling this function again,
* we temporarily override the permission operation while we call
* the VFS permission function.
*/
inode->i_op->permission = NULL;
rc = permission(inode, mask);
inode->i_op->permission = &presto_permission;
 
EXIT;
return rc;
}
 
 
int presto_ioctl(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg)
{
char buf[1024];
struct izo_ioctl_data *data = NULL;
struct presto_dentry_data *dd;
int rc;
 
ENTRY;
 
/* Try the filesystem's ioctl first, and return if it succeeded. */
dd = presto_d2d(file->f_dentry);
if (dd && dd->dd_fset) {
int (*cache_ioctl)(struct inode *, struct file *, unsigned int, unsigned long ) = filter_c2cdfops(dd->dd_fset->fset_cache->cache_filter)->ioctl;
rc = -ENOTTY;
if (cache_ioctl)
rc = cache_ioctl(inode, file, cmd, arg);
if (rc != -ENOTTY) {
EXIT;
return rc;
}
}
 
if (current->euid != 0 && current->euid != izo_authorized_uid) {
EXIT;
return -EPERM;
}
 
memset(buf, 0, sizeof(buf));
if (izo_ioctl_getdata(buf, buf + 1024, (void *)arg)) {
CERROR("intermezzo ioctl: data error\n");
return -EINVAL;
}
data = (struct izo_ioctl_data *)buf;
switch(cmd) {
case IZO_IOC_REINTKML: {
int rc;
int cperr;
rc = kml_reint_rec(file, data);
 
EXIT;
cperr = copy_to_user((char *)arg, data, sizeof(*data));
if (cperr) {
CERROR("WARNING: cperr %d\n", cperr);
rc = -EFAULT;
}
return rc;
}
 
case IZO_IOC_GET_RCVD: {
struct izo_rcvd_rec rec;
struct presto_file_set *fset;
int rc;
 
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
rc = izo_rcvd_get(&rec, fset, data->ioc_uuid);
if (rc < 0) {
EXIT;
return rc;
}
 
EXIT;
return copy_to_user((char *)arg, &rec, sizeof(rec))? -EFAULT : 0;
}
 
case IZO_IOC_REPSTATUS: {
__u64 client_kmlsize;
struct izo_rcvd_rec *lr_client;
struct izo_rcvd_rec rec;
struct presto_file_set *fset;
int minor;
int rc;
 
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
minor = presto_f2m(fset);
 
client_kmlsize = data->ioc_kmlsize;
lr_client = (struct izo_rcvd_rec *) data->ioc_pbuf1;
 
rc = izo_repstatus(fset, client_kmlsize,
lr_client, &rec);
if (rc < 0) {
EXIT;
return rc;
}
 
EXIT;
return copy_to_user((char *)arg, &rec, sizeof(rec))? -EFAULT : 0;
}
 
case IZO_IOC_GET_CHANNEL: {
struct presto_file_set *fset;
 
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
data->ioc_dev = fset->fset_cache->cache_psdev->uc_minor;
CDEBUG(D_PSDEV, "CHANNEL %d\n", data->ioc_dev);
EXIT;
return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT : 0;
}
 
case IZO_IOC_SET_IOCTL_UID:
izo_authorized_uid = data->ioc_uid;
EXIT;
return 0;
 
case IZO_IOC_SET_PID:
rc = izo_psdev_setpid(data->ioc_dev);
EXIT;
return rc;
 
case IZO_IOC_SET_CHANNEL:
rc = izo_psdev_setchannel(file, data->ioc_dev);
EXIT;
return rc;
 
case IZO_IOC_GET_KML_SIZE: {
struct presto_file_set *fset;
__u64 kmlsize;
 
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
 
kmlsize = presto_kml_offset(fset) + fset->fset_kml_logical_off;
 
EXIT;
return copy_to_user((char *)arg, &kmlsize, sizeof(kmlsize))?-EFAULT : 0;
}
 
case IZO_IOC_PURGE_FILE_DATA: {
struct presto_file_set *fset;
 
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
 
rc = izo_purge_file(fset, data->ioc_inlbuf1);
EXIT;
return rc;
}
 
case IZO_IOC_GET_FILEID: {
rc = izo_get_fileid(file, data);
EXIT;
if (rc)
return rc;
return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT : 0;
}
 
case IZO_IOC_SET_FILEID: {
rc = izo_set_fileid(file, data);
EXIT;
if (rc)
return rc;
return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT : 0;
}
 
case IZO_IOC_ADJUST_LML: {
struct lento_vfs_context *info;
info = (struct lento_vfs_context *)data->ioc_inlbuf1;
rc = presto_adjust_lml(file, info);
EXIT;
return rc;
}
 
case IZO_IOC_CONNECT: {
struct presto_file_set *fset;
int minor;
 
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
minor = presto_f2m(fset);
 
rc = izo_upc_connect(minor, data->ioc_ino,
data->ioc_generation, data->ioc_uuid,
data->ioc_flags);
EXIT;
return rc;
}
 
case IZO_IOC_GO_FETCH_KML: {
struct presto_file_set *fset;
int minor;
 
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
minor = presto_f2m(fset);
 
rc = izo_upc_go_fetch_kml(minor, fset->fset_name,
data->ioc_uuid, data->ioc_kmlsize);
EXIT;
return rc;
}
 
case IZO_IOC_REVOKE_PERMIT:
if (data->ioc_flags)
rc = izo_revoke_permit(file->f_dentry, data->ioc_uuid);
else
rc = izo_revoke_permit(file->f_dentry, NULL);
EXIT;
return rc;
 
case IZO_IOC_CLEAR_FSET:
rc = izo_clear_fsetroot(file->f_dentry);
EXIT;
return rc;
 
case IZO_IOC_CLEAR_ALL_FSETS: {
struct presto_file_set *fset;
 
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
 
rc = izo_clear_all_fsetroots(fset->fset_cache);
EXIT;
return rc;
}
 
case IZO_IOC_SET_FSET:
/*
* Mark this dentry as being a fileset root.
*/
rc = presto_set_fsetroot_from_ioc(file->f_dentry,
data->ioc_inlbuf1,
data->ioc_flags);
EXIT;
return rc;
 
 
case IZO_IOC_MARK: {
int res = 0; /* resulting flags - returned to user */
int error;
 
CDEBUG(D_DOWNCALL, "mark inode: %ld, and: %x, or: %x, what %d\n",
file->f_dentry->d_inode->i_ino, data->ioc_and_flag,
data->ioc_or_flag, data->ioc_mark_what);
 
switch (data->ioc_mark_what) {
case MARK_DENTRY:
error = izo_mark_dentry(file->f_dentry,
data->ioc_and_flag,
data->ioc_or_flag, &res);
break;
case MARK_FSET:
error = izo_mark_fset(file->f_dentry,
data->ioc_and_flag,
data->ioc_or_flag, &res);
break;
case MARK_CACHE:
error = izo_mark_cache(file->f_dentry,
data->ioc_and_flag,
data->ioc_or_flag, &res);
break;
case MARK_GETFL: {
int fflags, cflags;
data->ioc_and_flag = 0xffffffff;
data->ioc_or_flag = 0;
error = izo_mark_dentry(file->f_dentry,
data->ioc_and_flag,
data->ioc_or_flag, &res);
if (error)
break;
error = izo_mark_fset(file->f_dentry,
data->ioc_and_flag,
data->ioc_or_flag, &fflags);
if (error)
break;
error = izo_mark_cache(file->f_dentry,
data->ioc_and_flag,
data->ioc_or_flag,
&cflags);
 
if (error)
break;
data->ioc_and_flag = fflags;
data->ioc_or_flag = cflags;
break;
}
default:
error = -EINVAL;
}
 
if (error) {
EXIT;
return error;
}
data->ioc_mark_what = res;
CDEBUG(D_DOWNCALL, "mark inode: %ld, and: %x, or: %x, what %x\n",
file->f_dentry->d_inode->i_ino, data->ioc_and_flag,
data->ioc_or_flag, data->ioc_mark_what);
 
EXIT;
return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT : 0;
}
#if 0
case IZO_IOC_CLIENT_MAKE_BRANCH: {
struct presto_file_set *fset;
int minor;
 
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
minor = presto_f2m(fset);
 
rc = izo_upc_client_make_branch(minor, fset->fset_name,
data->ioc_inlbuf1,
data->ioc_inlbuf2);
EXIT;
return rc;
}
#endif
case IZO_IOC_SERVER_MAKE_BRANCH: {
struct presto_file_set *fset;
int minor;
 
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
minor = presto_f2m(fset);
 
izo_upc_server_make_branch(minor, data->ioc_inlbuf1);
EXIT;
return 0;
}
case IZO_IOC_SET_KMLSIZE: {
struct presto_file_set *fset;
int minor;
struct izo_rcvd_rec rec;
 
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
minor = presto_f2m(fset);
 
rc = izo_upc_set_kmlsize(minor, fset->fset_name, data->ioc_uuid,
data->ioc_kmlsize);
 
if (rc != 0) {
EXIT;
return rc;
}
 
rc = izo_rcvd_get(&rec, fset, data->ioc_uuid);
if (rc == -EINVAL) {
/* We don't know anything about this uuid yet; no
* worries. */
memset(&rec, 0, sizeof(rec));
} else if (rc <= 0) {
CERROR("InterMezzo: error reading last_rcvd: %d\n", rc);
EXIT;
return rc;
}
rec.lr_remote_offset = data->ioc_kmlsize;
rc = izo_rcvd_write(fset, &rec);
if (rc <= 0) {
CERROR("InterMezzo: error writing last_rcvd: %d\n", rc);
EXIT;
return rc;
}
EXIT;
return rc;
}
case IZO_IOC_BRANCH_UNDO: {
struct presto_file_set *fset;
int minor;
 
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
minor = presto_f2m(fset);
 
rc = izo_upc_branch_undo(minor, fset->fset_name,
data->ioc_inlbuf1);
EXIT;
return rc;
}
case IZO_IOC_BRANCH_REDO: {
struct presto_file_set *fset;
int minor;
 
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
minor = presto_f2m(fset);
 
rc = izo_upc_branch_redo(minor, fset->fset_name,
data->ioc_inlbuf1);
EXIT;
return rc;
}
 
case TCGETS:
EXIT;
return -EINVAL;
 
default:
EXIT;
return -EINVAL;
}
EXIT;
return 0;
}
 
struct file_operations presto_dir_fops = {
.ioctl = presto_ioctl
};
 
struct inode_operations presto_dir_iops = {
.create = presto_create,
.lookup = presto_lookup,
.link = presto_link,
.unlink = presto_unlink,
.symlink = presto_symlink,
.mkdir = presto_mkdir,
.rmdir = presto_rmdir,
.mknod = presto_mknod,
.rename = presto_rename,
.permission = presto_permission,
.setattr = presto_setattr,
#ifdef CONFIG_FS_EXT_ATTR
.set_ext_attr = presto_set_ext_attr,
#endif
};
 
 
/file.c
0,0 → 1,539
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2000 Stelias Computing, Inc.
* Copyright (C) 2000 Red Hat, Inc.
* Copyright (C) 2000 TurboLinux, Inc.
* Copyright (C) 2000 Los Alamos National Laboratory.
* Copyright (C) 2000, 2001 Tacit Networks, Inc.
* Copyright (C) 2000 Peter J. Braam
* Copyright (C) 2001 Mountain View Data, Inc.
* Copyright (C) 2001 Cluster File Systems, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* This file manages file I/O
*
*/
 
#include <stdarg.h>
 
#include <asm/bitops.h>
#include <asm/uaccess.h>
#include <asm/system.h>
 
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/ext2_fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/locks.h>
#include <linux/blkdev.h>
#include <linux/init.h>
#include <linux/smp_lock.h>
#define __NO_VERSION__
#include <linux/module.h>
 
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
#include <linux/fsfilter.h>
/*
* these are initialized in super.c
*/
extern int presto_permission(struct inode *inode, int mask);
 
 
static int presto_open_upcall(int minor, struct dentry *de)
{
int rc = 0;
char *path, *buffer;
struct presto_file_set *fset;
int pathlen;
struct lento_vfs_context info;
struct presto_dentry_data *dd = presto_d2d(de);
 
PRESTO_ALLOC(buffer, PAGE_SIZE);
if ( !buffer ) {
CERROR("PRESTO: out of memory!\n");
return -ENOMEM;
}
fset = presto_fset(de);
path = presto_path(de, fset->fset_dentry, buffer, PAGE_SIZE);
pathlen = MYPATHLEN(buffer, path);
CDEBUG(D_FILE, "de %p, dd %p\n", de, dd);
if (dd->remote_ino == 0) {
rc = presto_get_fileid(minor, fset, de);
}
memset (&info, 0, sizeof(info));
if (dd->remote_ino > 0) {
info.remote_ino = dd->remote_ino;
info.remote_generation = dd->remote_generation;
} else
CERROR("get_fileid failed %d, ino: %Lx, fetching by name\n", rc,
dd->remote_ino);
 
rc = izo_upc_open(minor, pathlen, path, fset->fset_name, &info);
PRESTO_FREE(buffer, PAGE_SIZE);
return rc;
}
 
static inline int open_check_dod(struct file *file,
struct presto_file_set *fset)
{
int gen, is_iopen = 0, minor;
struct presto_cache *cache = fset->fset_cache;
ino_t inum;
 
minor = presto_c2m(cache);
 
if ( ISLENTO(minor) ) {
CDEBUG(D_CACHE, "is lento, not doing DOD.\n");
return 0;
}
 
/* Files are only ever opened by inode during backfetches, when by
* definition we have the authoritative copy of the data. No DOD. */
is_iopen = izo_dentry_is_ilookup(file->f_dentry, &inum, &gen);
 
if (is_iopen) {
CDEBUG(D_CACHE, "doing iopen, not doing DOD.\n");
return 0;
}
 
if (!(fset->fset_flags & FSET_DATA_ON_DEMAND)) {
CDEBUG(D_CACHE, "fileset not on demand.\n");
return 0;
}
if (file->f_flags & O_TRUNC) {
CDEBUG(D_CACHE, "fileset dod: O_TRUNC.\n");
return 0;
}
if (presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL)) {
CDEBUG(D_CACHE, "file under .intermezzo, not doing DOD\n");
return 0;
}
 
if (presto_chk(file->f_dentry, PRESTO_DATA)) {
CDEBUG(D_CACHE, "PRESTO_DATA is set, not doing DOD.\n");
return 0;
}
 
if (cache->cache_filter->o_trops->tr_all_data(file->f_dentry->d_inode)) {
CDEBUG(D_CACHE, "file not sparse, not doing DOD.\n");
return 0;
}
 
return 1;
}
 
static int presto_file_open(struct inode *inode, struct file *file)
{
int rc = 0;
struct file_operations *fops;
struct presto_cache *cache;
struct presto_file_set *fset;
struct presto_file_data *fdata;
int writable = (file->f_flags & (O_RDWR | O_WRONLY));
int minor, i;
 
ENTRY;
 
if (presto_prep(file->f_dentry, &cache, &fset) < 0) {
EXIT;
return -EBADF;
}
 
minor = presto_c2m(cache);
 
CDEBUG(D_CACHE, "DATA_OK: %d, ino: %ld, islento: %d\n",
presto_chk(file->f_dentry, PRESTO_DATA), inode->i_ino,
ISLENTO(minor));
 
if ( !ISLENTO(minor) && (file->f_flags & O_RDWR ||
file->f_flags & O_WRONLY)) {
CDEBUG(D_CACHE, "calling presto_get_permit\n");
if ( presto_get_permit(inode) < 0 ) {
EXIT;
return -EROFS;
}
presto_put_permit(inode);
}
 
if (open_check_dod(file, fset)) {
CDEBUG(D_CACHE, "presto_open_upcall\n");
CDEBUG(D_CACHE, "dentry: %p setting DATA, ATTR\n", file->f_dentry);
presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA);
rc = presto_open_upcall(minor, file->f_dentry);
if (rc) {
EXIT;
CERROR("%s: returning error %d\n", __FUNCTION__, rc);
return rc;
}
 
}
 
/* file was truncated upon open: do not refetch */
if (file->f_flags & O_TRUNC) {
CDEBUG(D_CACHE, "setting DATA, ATTR\n");
presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA);
}
 
fops = filter_c2cffops(cache->cache_filter);
if ( fops->open ) {
CDEBUG(D_CACHE, "calling fs open\n");
rc = fops->open(inode, file);
 
if (rc) {
EXIT;
return rc;
}
}
 
if (writable) {
PRESTO_ALLOC(fdata, sizeof(*fdata));
if (!fdata) {
EXIT;
return -ENOMEM;
}
/* LOCK: XXX check that the kernel lock protects this alloc */
fdata->fd_do_lml = 0;
fdata->fd_bytes_written = 0;
fdata->fd_fsuid = current->fsuid;
fdata->fd_fsgid = current->fsgid;
fdata->fd_mode = file->f_dentry->d_inode->i_mode;
fdata->fd_uid = file->f_dentry->d_inode->i_uid;
fdata->fd_gid = file->f_dentry->d_inode->i_gid;
fdata->fd_ngroups = current->ngroups;
for (i=0 ; i < current->ngroups ; i++)
fdata->fd_groups[i] = current->groups[i];
if (!ISLENTO(minor))
fdata->fd_info.flags = LENTO_FL_KML;
else {
/* this is for the case of DOD,
reint_close will adjust flags if needed */
fdata->fd_info.flags = 0;
}
 
presto_getversion(&fdata->fd_version, inode);
file->private_data = fdata;
} else {
file->private_data = NULL;
}
 
EXIT;
return 0;
}
 
int presto_adjust_lml(struct file *file, struct lento_vfs_context *info)
{
struct presto_file_data *fdata =
(struct presto_file_data *) file->private_data;
 
if (!fdata) {
EXIT;
return -EINVAL;
}
memcpy(&fdata->fd_info, info, sizeof(*info));
EXIT;
return 0;
}
 
 
static int presto_file_release(struct inode *inode, struct file *file)
{
int rc;
struct file_operations *fops;
struct presto_cache *cache;
struct presto_file_set *fset;
struct presto_file_data *fdata =
(struct presto_file_data *)file->private_data;
ENTRY;
 
rc = presto_prep(file->f_dentry, &cache, &fset);
if ( rc ) {
EXIT;
return rc;
}
 
fops = filter_c2cffops(cache->cache_filter);
if (fops && fops->release)
rc = fops->release(inode, file);
 
CDEBUG(D_CACHE, "islento = %d (minor %d), rc %d, data %p\n",
ISLENTO(cache->cache_psdev->uc_minor),
cache->cache_psdev->uc_minor, rc, fdata);
 
/* this file was modified: ignore close errors, write KML */
if (fdata && fdata->fd_do_lml) {
/* XXX: remove when lento gets file granularity cd */
if ( presto_get_permit(inode) < 0 ) {
EXIT;
return -EROFS;
}
fdata->fd_info.updated_time = file->f_dentry->d_inode->i_mtime;
rc = presto_do_close(fset, file);
presto_put_permit(inode);
}
 
if (!rc && fdata) {
PRESTO_FREE(fdata, sizeof(*fdata));
file->private_data = NULL;
}
EXIT;
return rc;
}
 
static void presto_apply_write_policy(struct file *file,
struct presto_file_set *fset, loff_t res)
{
struct presto_file_data *fdata =
(struct presto_file_data *)file->private_data;
struct presto_cache *cache = fset->fset_cache;
struct presto_version new_file_ver;
int error;
struct rec_info rec;
 
/* Here we do a journal close after a fixed or a specified
amount of KBytes, currently a global parameter set with
sysctl. If files are open for a long time, this gives added
protection. (XXX todo: per cache, add ioctl, handle
journaling in a thread, add more options etc.)
*/
if ((fset->fset_flags & FSET_JCLOSE_ON_WRITE) &&
(!ISLENTO(cache->cache_psdev->uc_minor))) {
fdata->fd_bytes_written += res;
if (fdata->fd_bytes_written >= fset->fset_file_maxio) {
presto_getversion(&new_file_ver,
file->f_dentry->d_inode);
/* This is really heavy weight and should be fixed
ASAP. At most we should be recording the number
of bytes written and not locking the kernel,
wait for permits, etc, on the write path. SHP
*/
lock_kernel();
if ( presto_get_permit(file->f_dentry->d_inode) < 0 ) {
EXIT;
/* we must be disconnected, not to worry */
unlock_kernel();
return;
}
error = presto_journal_close(&rec, fset, file,
file->f_dentry,
&fdata->fd_version,
&new_file_ver);
presto_put_permit(file->f_dentry->d_inode);
unlock_kernel();
if ( error ) {
CERROR("presto_close: cannot journal close\n");
/* XXX these errors are really bad */
/* panic(); */
return;
}
fdata->fd_bytes_written = 0;
}
}
}
 
static ssize_t presto_file_write(struct file *file, const char *buf,
size_t size, loff_t *off)
{
struct rec_info rec;
int error;
struct presto_cache *cache;
struct presto_file_set *fset;
struct file_operations *fops;
ssize_t res;
int do_lml_here;
void *handle = NULL;
unsigned long blocks;
struct presto_file_data *fdata;
loff_t res_size;
 
error = presto_prep(file->f_dentry, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
 
blocks = (size >> file->f_dentry->d_inode->i_sb->s_blocksize_bits) + 1;
/* XXX 3 is for ext2 indirect blocks ... */
res_size = 2 * PRESTO_REQHIGH + ((blocks+3)
<< file->f_dentry->d_inode->i_sb->s_blocksize_bits);
 
error = presto_reserve_space(fset->fset_cache, res_size);
CDEBUG(D_INODE, "Reserved %Ld for %d\n", res_size, size);
if ( error ) {
EXIT;
return -ENOSPC;
}
 
CDEBUG(D_INODE, "islento %d, minor: %d\n",
ISLENTO(cache->cache_psdev->uc_minor),
cache->cache_psdev->uc_minor);
 
/*
* XXX this lock should become a per inode lock when
* Vinny's changes are in; we could just use i_sem.
*/
read_lock(&fset->fset_lml.fd_lock);
fdata = (struct presto_file_data *)file->private_data;
do_lml_here = size && (fdata->fd_do_lml == 0) &&
!presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL);
 
if (do_lml_here)
fdata->fd_do_lml = 1;
read_unlock(&fset->fset_lml.fd_lock);
 
/* XXX
There might be a bug here. We need to make
absolutely sure that the ext3_file_write commits
after our transaction that writes the LML record.
Nesting the file write helps if new blocks are allocated.
*/
res = 0;
if (do_lml_here) {
struct presto_version file_version;
/* handle different space reqs from file system below! */
handle = presto_trans_start(fset, file->f_dentry->d_inode,
KML_OPCODE_WRITE);
if ( IS_ERR(handle) ) {
presto_release_space(fset->fset_cache, res_size);
CERROR("presto_write: no space for transaction\n");
return -ENOSPC;
}
 
presto_getversion(&file_version, file->f_dentry->d_inode);
res = presto_write_lml_close(&rec, fset, file,
fdata->fd_info.remote_ino,
fdata->fd_info.remote_generation,
&fdata->fd_info.remote_version,
&file_version);
fdata->fd_lml_offset = rec.offset;
if ( res ) {
CERROR("intermezzo: PANIC failed to write LML\n");
*(int *)0 = 1;
EXIT;
goto exit_write;
}
presto_trans_commit(fset, handle);
}
 
fops = filter_c2cffops(cache->cache_filter);
res = fops->write(file, buf, size, off);
if ( res != size ) {
CDEBUG(D_FILE, "file write returns short write: size %d, res %d\n", size, res);
}
 
if ( (res > 0) && fdata )
presto_apply_write_policy(file, fset, res);
 
exit_write:
presto_release_space(fset->fset_cache, res_size);
return res;
}
 
struct file_operations presto_file_fops = {
.write = presto_file_write,
.open = presto_file_open,
.release = presto_file_release,
.ioctl = presto_ioctl
};
 
struct inode_operations presto_file_iops = {
.permission = presto_permission,
.setattr = presto_setattr,
#ifdef CONFIG_FS_EXT_ATTR
.set_ext_attr = presto_set_ext_attr,
#endif
};
 
/* FIXME: I bet we want to add a lock here and in presto_file_open. */
int izo_purge_file(struct presto_file_set *fset, char *file)
{
#if 0
void *handle = NULL;
char *path = NULL;
struct nameidata nd;
struct dentry *dentry;
int rc = 0, len;
loff_t oldsize;
 
/* FIXME: not mtpt it's gone */
len = strlen(fset->fset_cache->cache_mtpt) + strlen(file) + 1;
PRESTO_ALLOC(path, len + 1);
if (path == NULL)
return -1;
 
sprintf(path, "%s/%s", fset->fset_cache->cache_mtpt, file);
rc = izo_lookup_file(fset, path, &nd);
if (rc)
goto error;
dentry = nd.dentry;
 
/* FIXME: take a lock here */
 
if (dentry->d_inode->i_atime > CURRENT_TIME - 5) {
/* We lost the race; this file was accessed while we were doing
* ioctls and lookups and whatnot. */
rc = -EBUSY;
goto error_unlock;
}
 
/* FIXME: Check if this file is open. */
 
handle = presto_trans_start(fset, dentry->d_inode, KML_OPCODE_TRUNC);
if (IS_ERR(handle)) {
rc = -ENOMEM;
goto error_unlock;
}
 
/* FIXME: Write LML record */
 
oldsize = dentry->d_inode->i_size;
rc = izo_do_truncate(fset, dentry, 0, oldsize);
if (rc != 0)
goto error_clear;
rc = izo_do_truncate(fset, dentry, oldsize, 0);
if (rc != 0)
goto error_clear;
 
error_clear:
/* FIXME: clear LML record */
 
error_unlock:
/* FIXME: release the lock here */
 
error:
if (handle != NULL && !IS_ERR(handle))
presto_trans_commit(fset, handle);
if (path != NULL)
PRESTO_FREE(path, len + 1);
return rc;
#else
return 0;
#endif
}
/upcall.c
0,0 → 1,557
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2001, 2002 Cluster File Systems, Inc. <braam@clusterfs.com>
* Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Mostly platform independent upcall operations to a cache manager:
* -- upcalls
* -- upcall routines
*
*/
 
#include <asm/system.h>
#include <asm/segment.h>
#include <asm/signal.h>
#include <linux/signal.h>
 
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/locks.h>
#include <linux/string.h>
#include <asm/uaccess.h>
#include <linux/vmalloc.h>
#include <asm/segment.h>
 
#include <linux/intermezzo_lib.h>
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
 
#include <linux/intermezzo_idl.h>
 
/*
At present:
-- Asynchronous calls:
- kml: give a "more" kml indication to userland
- kml_truncate: initiate KML truncation
- release_permit: kernel is done with permit
-- Synchronous
- open: fetch file
- permit: get a permit
 
Errors returned by user level code are positive
 
*/
 
static struct izo_upcall_hdr *upc_pack(__u32 opcode, int pathlen, char *path,
char *fsetname, int reclen, char *rec,
int *size)
{
struct izo_upcall_hdr *hdr;
char *ptr;
ENTRY;
 
*size = sizeof(struct izo_upcall_hdr);
if ( fsetname ) {
*size += round_strlen(fsetname);
}
if ( path ) {
*size += round_strlen(path);
}
if ( rec ) {
*size += size_round(reclen);
}
PRESTO_ALLOC(hdr, *size);
if (!hdr) {
CERROR("intermezzo upcall: out of memory (opc %d)\n", opcode);
EXIT;
return NULL;
}
memset(hdr, 0, *size);
 
ptr = (char *)hdr + sizeof(*hdr);
 
/* XXX do we need fsuid ? */
hdr->u_len = *size;
hdr->u_version = IZO_UPC_VERSION;
hdr->u_opc = opcode;
hdr->u_pid = current->pid;
hdr->u_uid = current->fsuid;
 
if (path) {
/*XXX Robert: please review what len to pass in for
NUL terminated strings */
hdr->u_pathlen = strlen(path);
LOGL0(path, hdr->u_pathlen, ptr);
}
if (fsetname) {
hdr->u_fsetlen = strlen(fsetname);
LOGL0(fsetname, strlen(fsetname), ptr);
}
if (rec) {
hdr->u_reclen = reclen;
LOGL(rec, reclen, ptr);
}
EXIT;
return hdr;
}
 
/* the upcalls */
int izo_upc_kml(int minor, __u64 offset, __u32 first_recno, __u64 length, __u32 last_recno, char *fsetname)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
 
ENTRY;
if (!presto_lento_up(minor)) {
EXIT;
return 0;
}
 
hdr = upc_pack(IZO_UPC_KML, 0, NULL, fsetname, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
 
hdr->u_offset = offset;
hdr->u_first_recno = first_recno;
hdr->u_length = length;
hdr->u_last_recno = last_recno;
 
CDEBUG(D_UPCALL, "KML: fileset %s, offset %Lu, length %Lu, "
"first %u, last %d; minor %d\n",
fsetname, hdr->u_offset, hdr->u_length, hdr->u_first_recno,
hdr->u_last_recno, minor);
 
error = izo_upc_upcall(minor, &size, hdr, ASYNCHRONOUS);
 
EXIT;
return -error;
}
 
int izo_upc_kml_truncate(int minor, __u64 length, __u32 last_recno, char *fsetname)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
 
ENTRY;
if (!presto_lento_up(minor)) {
EXIT;
return 0;
}
 
hdr = upc_pack(IZO_UPC_KML_TRUNC, 0, NULL, fsetname, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
 
hdr->u_length = length;
hdr->u_last_recno = last_recno;
 
CDEBUG(D_UPCALL, "KML TRUNCATE: fileset %s, length %Lu, "
"last recno %d, minor %d\n",
fsetname, hdr->u_length, hdr->u_last_recno, minor);
 
error = izo_upc_upcall(minor, &size, hdr, ASYNCHRONOUS);
 
EXIT;
return error;
}
 
int izo_upc_open(int minor, __u32 pathlen, char *path, char *fsetname, struct lento_vfs_context *info)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
 
if (!presto_lento_up(minor)) {
EXIT;
return -EIO;
}
 
hdr = upc_pack(IZO_UPC_OPEN, pathlen, path, fsetname,
sizeof(*info), (char*)info, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
 
CDEBUG(D_UPCALL, "path %s\n", path);
 
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error)
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
 
EXIT;
return -error;
}
 
int izo_upc_get_fileid(int minor, __u32 reclen, char *rec,
__u32 pathlen, char *path, char *fsetname)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
 
if (!presto_lento_up(minor)) {
EXIT;
return -EIO;
}
 
hdr = upc_pack(IZO_UPC_GET_FILEID, pathlen, path, fsetname, reclen, rec, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
 
CDEBUG(D_UPCALL, "path %s\n", path);
 
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error)
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
 
EXIT;
return -error;
}
 
int izo_upc_backfetch(int minor, char *path, char *fsetname, struct lento_vfs_context *info)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
 
if (!presto_lento_up(minor)) {
EXIT;
return -EIO;
}
 
hdr = upc_pack(IZO_UPC_BACKFETCH, strlen(path), path, fsetname,
sizeof(*info), (char *)info, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
 
/* This is currently synchronous, kml_reint_record blocks */
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error)
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
 
EXIT;
return -error;
}
 
int izo_upc_permit(int minor, struct dentry *dentry, __u32 pathlen, char *path,
char *fsetname)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
 
ENTRY;
 
hdr = upc_pack(IZO_UPC_PERMIT, pathlen, path, fsetname, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
 
CDEBUG(D_UPCALL, "Permit minor %d path %s\n", minor, path);
 
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
 
if (error == -EROFS) {
int err;
CERROR("InterMezzo: ERROR - requested permit for read-only "
"fileset.\n Setting \"%s\" read-only!\n", path);
err = izo_mark_cache(dentry, 0xFFFFFFFF, CACHE_CLIENT_RO, NULL);
if (err)
CERROR("InterMezzo ERROR: mark_cache %d\n", err);
} else if (error) {
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
}
 
EXIT;
return error;
}
 
/* This is a ping-pong upcall handled on the server when a client (uuid)
* requests the permit for itself. */
int izo_upc_revoke_permit(int minor, char *fsetname, __u8 uuid[16])
{
int size;
int error;
struct izo_upcall_hdr *hdr;
 
ENTRY;
 
hdr = upc_pack(IZO_UPC_REVOKE_PERMIT, 0, NULL, fsetname, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
 
memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid));
 
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
 
if (error)
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
 
EXIT;
return -error;
}
 
int izo_upc_go_fetch_kml(int minor, char *fsetname, __u8 uuid[16],
__u64 kmlsize)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
 
if (!presto_lento_up(minor)) {
EXIT;
return -EIO;
}
 
hdr = upc_pack(IZO_UPC_GO_FETCH_KML, 0, NULL, fsetname, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
 
hdr->u_offset = kmlsize;
memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid));
 
error = izo_upc_upcall(minor, &size, hdr, ASYNCHRONOUS);
if (error)
CERROR("%s: error %d\n", __FUNCTION__, error);
 
EXIT;
return -error;
}
 
int izo_upc_connect(int minor, __u64 ip_address, __u64 port, __u8 uuid[16],
int client_flag)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
 
if (!presto_lento_up(minor)) {
EXIT;
return -EIO;
}
 
hdr = upc_pack(IZO_UPC_CONNECT, 0, NULL, NULL, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
 
hdr->u_offset = ip_address;
hdr->u_length = port;
memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid));
hdr->u_first_recno = client_flag;
 
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error) {
CERROR("%s: error %d\n", __FUNCTION__, error);
}
 
EXIT;
return -error;
}
 
int izo_upc_set_kmlsize(int minor, char *fsetname, __u8 uuid[16], __u64 kmlsize)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
 
if (!presto_lento_up(minor)) {
EXIT;
return -EIO;
}
 
hdr = upc_pack(IZO_UPC_SET_KMLSIZE, 0, NULL, fsetname, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
 
memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid));
hdr->u_length = kmlsize;
 
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error)
CERROR("%s: error %d\n", __FUNCTION__, error);
 
EXIT;
return -error;
}
 
int izo_upc_repstatus(int minor, char * fsetname, struct izo_rcvd_rec *lr_server)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
 
if (!presto_lento_up(minor)) {
EXIT;
return -EIO;
}
 
hdr = upc_pack(IZO_UPC_REPSTATUS, 0, NULL, fsetname,
sizeof(*lr_server), (char*)lr_server,
&size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
 
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error)
CERROR("%s: error %d\n", __FUNCTION__, error);
 
EXIT;
return -error;
}
 
 
#if 0
int izo_upc_client_make_branch(int minor, char *fsetname, char *tagname,
char *branchname)
{
int size, error;
struct izo_upcall_hdr *hdr;
int pathlen;
char *path;
ENTRY;
 
hdr = upc_pack(IZO_UPC_CLIENT_MAKE_BRANCH, strlen(tagname), tagname,
fsetname, strlen(branchname) + 1, branchname, &size);
if (!hdr || IS_ERR(hdr)) {
error = -PTR_ERR(hdr);
goto error;
}
 
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error)
CERROR("InterMezzo: error %d\n", error);
 
error:
PRESTO_FREE(path, pathlen);
EXIT;
return error;
}
#endif
 
int izo_upc_server_make_branch(int minor, char *fsetname)
{
int size, error;
struct izo_upcall_hdr *hdr;
ENTRY;
 
hdr = upc_pack(IZO_UPC_SERVER_MAKE_BRANCH, 0, NULL, fsetname, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
error = -PTR_ERR(hdr);
goto error;
}
 
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error)
CERROR("InterMezzo: error %d\n", error);
 
error:
EXIT;
return -error;
}
 
int izo_upc_branch_undo(int minor, char *fsetname, char *branchname)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
 
if (!presto_lento_up(minor)) {
EXIT;
return -EIO;
}
 
hdr = upc_pack(IZO_UPC_BRANCH_UNDO, strlen(branchname), branchname,
fsetname, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
 
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error)
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
 
EXIT;
return -error;
}
 
int izo_upc_branch_redo(int minor, char *fsetname, char *branchname)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
 
if (!presto_lento_up(minor)) {
EXIT;
return -EIO;
}
 
hdr = upc_pack(IZO_UPC_BRANCH_REDO, strlen(branchname) + 1, branchname,
fsetname, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
 
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error)
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
 
EXIT;
return -error;
}
/sysctl.c
0,0 → 1,369
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1999 Peter J. Braam <braam@clusterfs.com>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Sysctrl entries for Intermezzo!
*/
 
#define __NO_VERSION__
#include <linux/config.h> /* for CONFIG_PROC_FS */
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/sysctl.h>
#include <linux/swapctl.h>
#include <linux/proc_fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/stat.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <asm/bitops.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/utsname.h>
#include <linux/blk.h>
 
 
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
 
/* /proc entries */
 
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc_fs_intermezzo;
int intermezzo_mount_get_info( char * buffer, char ** start, off_t offset,
int length)
{
int len=0;
 
/* this works as long as we are below 1024 characters! */
*start = buffer + offset;
len -= offset;
 
if ( len < 0 )
return -EINVAL;
 
return len;
}
 
#endif
 
 
/* SYSCTL below */
 
static struct ctl_table_header *intermezzo_table_header = NULL;
/* 0x100 to avoid any chance of collisions at any point in the tree with
* non-directories
*/
#define PSDEV_INTERMEZZO (0x100)
 
#define PSDEV_DEBUG 1 /* control debugging */
#define PSDEV_TRACE 2 /* control enter/leave pattern */
#define PSDEV_TIMEOUT 3 /* timeout on upcalls to become intrble */
#define PSDEV_HARD 4 /* mount type "hard" or "soft" */
#define PSDEV_NO_FILTER 5 /* controls presto_chk */
#define PSDEV_NO_JOURNAL 6 /* controls presto_chk */
#define PSDEV_NO_UPCALL 7 /* controls lento_upcall */
#define PSDEV_ERRORVAL 8 /* controls presto_debug_fail_blkdev */
#define PSDEV_EXCL_GID 9 /* which GID is ignored by presto */
#define PSDEV_BYTES_TO_CLOSE 11 /* bytes to write before close */
 
/* These are global presto control options */
#define PRESTO_PRIMARY_CTLCNT 2
static struct ctl_table presto_table[ PRESTO_PRIMARY_CTLCNT + MAX_CHANNEL + 1] =
{
{PSDEV_DEBUG, "debug", &presto_debug, sizeof(int), 0644, NULL, &proc_dointvec},
{PSDEV_TRACE, "trace", &presto_print_entry, sizeof(int), 0644, NULL, &proc_dointvec},
};
 
/*
* Intalling the sysctl entries: strategy
* - have templates for each /proc/sys/intermezzo/ entry
* such an entry exists for each /dev/presto
* (proto_channel_entry)
* - have a template for the contents of such directories
* (proto_psdev_table)
* - have the master table (presto_table)
*
* When installing, malloc, memcpy and fix up the pointers to point to
* the appropriate constants in izo_channels[your_minor]
*/
 
static ctl_table proto_psdev_table[] = {
{PSDEV_HARD, "hard", 0, sizeof(int), 0644, NULL, &proc_dointvec},
{PSDEV_NO_FILTER, "no_filter", 0, sizeof(int), 0644, NULL, &proc_dointvec},
{PSDEV_NO_JOURNAL, "no_journal", NULL, sizeof(int), 0644, NULL, &proc_dointvec},
{PSDEV_NO_UPCALL, "no_upcall", NULL, sizeof(int), 0644, NULL, &proc_dointvec},
{PSDEV_TIMEOUT, "timeout", NULL, sizeof(int), 0644, NULL, &proc_dointvec},
#ifdef PRESTO_DEBUG
{PSDEV_ERRORVAL, "errorval", NULL, sizeof(int), 0644, NULL, &proc_dointvec},
#endif
{ 0 }
};
 
static ctl_table proto_channel_entry = {
PSDEV_INTERMEZZO, 0, NULL, 0, 0555, 0,
};
 
static ctl_table intermezzo_table[2] = {
{PSDEV_INTERMEZZO, "intermezzo", NULL, 0, 0555, presto_table},
{0}
};
 
/* support for external setting and getting of opts. */
/* particularly via ioctl. The Right way to do this is via sysctl,
* but that will have to wait until intermezzo gets its own nice set of
* sysctl IDs
*/
/* we made these separate as setting may in future be more restricted
* than getting
*/
#ifdef RON_MINNICH
int dosetopt(int minor, struct psdev_opt *opt)
{
int retval = 0;
int newval = opt->optval;
 
ENTRY;
 
switch(opt->optname) {
 
case PSDEV_TIMEOUT:
izo_channels[minor].uc_timeout = newval;
break;
 
case PSDEV_HARD:
izo_channels[minor].uc_hard = newval;
break;
 
case PSDEV_NO_FILTER:
izo_channels[minor].uc_no_filter = newval;
break;
 
case PSDEV_NO_JOURNAL:
izo_channels[minor].uc_no_journal = newval;
break;
 
case PSDEV_NO_UPCALL:
izo_channels[minor].uc_no_upcall = newval;
break;
 
#ifdef PRESTO_DEBUG
case PSDEV_ERRORVAL: {
/* If we have a positive arg, set a breakpoint for that
* value. If we have a negative arg, make that device
* read-only. FIXME It would be much better to only
* allow setting the underlying device read-only for the
* current presto cache.
*/
int errorval = izo_channels[minor].uc_errorval;
if (errorval < 0) {
if (newval == 0)
set_device_ro(-errorval, 0);
else
CERROR("device %s already read only\n",
kdevname(-errorval));
} else {
if (newval < 0)
set_device_ro(-newval, 1);
izo_channels[minor].uc_errorval = newval;
CDEBUG(D_PSDEV, "setting errorval to %d\n", newval);
}
 
break;
}
#endif
 
case PSDEV_TRACE:
case PSDEV_DEBUG:
case PSDEV_BYTES_TO_CLOSE:
default:
CDEBUG(D_PSDEV,
"ioctl: dosetopt: minor %d, bad optname 0x%x, \n",
minor, opt->optname);
 
retval = -EINVAL;
}
 
EXIT;
return retval;
}
 
int dogetopt(int minor, struct psdev_opt *opt)
{
int retval = 0;
 
ENTRY;
 
switch(opt->optname) {
 
case PSDEV_TIMEOUT:
opt->optval = izo_channels[minor].uc_timeout;
break;
 
case PSDEV_HARD:
opt->optval = izo_channels[minor].uc_hard;
break;
 
case PSDEV_NO_FILTER:
opt->optval = izo_channels[minor].uc_no_filter;
break;
 
case PSDEV_NO_JOURNAL:
opt->optval = izo_channels[minor].uc_no_journal;
break;
 
case PSDEV_NO_UPCALL:
opt->optval = izo_channels[minor].uc_no_upcall;
break;
 
#ifdef PSDEV_DEBUG
case PSDEV_ERRORVAL: {
int errorval = izo_channels[minor].uc_errorval;
if (errorval < 0 && is_read_only(-errorval))
CERROR("device %s has been set read-only\n",
kdevname(-errorval));
opt->optval = izo_channels[minor].uc_errorval;
break;
}
#endif
 
case PSDEV_TRACE:
case PSDEV_DEBUG:
case PSDEV_BYTES_TO_CLOSE:
default:
CDEBUG(D_PSDEV,
"ioctl: dogetopt: minor %d, bad optval 0x%x, \n",
minor, opt->optname);
 
retval = -EINVAL;
}
 
EXIT;
return retval;
}
#endif
 
 
/* allocate the tables for the presto devices. We need
* sizeof(proto_channel_table)/sizeof(proto_channel_table[0])
* entries for each dev
*/
int /* __init */ init_intermezzo_sysctl(void)
{
int i;
int total_dev = MAX_CHANNEL;
int entries_per_dev = sizeof(proto_psdev_table) /
sizeof(proto_psdev_table[0]);
int total_entries = entries_per_dev * total_dev;
ctl_table *dev_ctl_table;
 
PRESTO_ALLOC(dev_ctl_table, sizeof(ctl_table) * total_entries);
 
if (! dev_ctl_table) {
CERROR("WARNING: presto couldn't allocate dev_ctl_table\n");
EXIT;
return -ENOMEM;
}
 
/* now fill in the entries ... we put the individual presto<x>
* entries at the end of the table, and the per-presto stuff
* starting at the front. We assume that the compiler makes
* this code more efficient, but really, who cares ... it
* happens once per reboot.
*/
for(i = 0; i < total_dev; i++) {
/* entry for this /proc/sys/intermezzo/intermezzo"i" */
ctl_table *psdev = &presto_table[i + PRESTO_PRIMARY_CTLCNT];
/* entries for the individual "files" in this "directory" */
ctl_table *psdev_entries = &dev_ctl_table[i * entries_per_dev];
/* init the psdev and psdev_entries with the prototypes */
*psdev = proto_channel_entry;
memcpy(psdev_entries, proto_psdev_table,
sizeof(proto_psdev_table));
/* now specialize them ... */
/* the psdev has to point to psdev_entries, and fix the number */
psdev->ctl_name = psdev->ctl_name + i + 1; /* sorry */
 
PRESTO_ALLOC((void*)psdev->procname, PROCNAME_SIZE);
if (!psdev->procname) {
PRESTO_FREE(dev_ctl_table,
sizeof(ctl_table) * total_entries);
return -ENOMEM;
}
sprintf((char *) psdev->procname, "intermezzo%d", i);
/* hook presto into */
psdev->child = psdev_entries;
 
/* now for each psdev entry ... */
psdev_entries[0].data = &(izo_channels[i].uc_hard);
psdev_entries[1].data = &(izo_channels[i].uc_no_filter);
psdev_entries[2].data = &(izo_channels[i].uc_no_journal);
psdev_entries[3].data = &(izo_channels[i].uc_no_upcall);
psdev_entries[4].data = &(izo_channels[i].uc_timeout);
#ifdef PRESTO_DEBUG
psdev_entries[5].data = &(izo_channels[i].uc_errorval);
#endif
}
 
 
#ifdef CONFIG_SYSCTL
if ( !intermezzo_table_header )
intermezzo_table_header =
register_sysctl_table(intermezzo_table, 0);
#endif
#ifdef CONFIG_PROC_FS
proc_fs_intermezzo = proc_mkdir("intermezzo", proc_root_fs);
proc_fs_intermezzo->owner = THIS_MODULE;
create_proc_info_entry("mounts", 0, proc_fs_intermezzo,
intermezzo_mount_get_info);
#endif
return 0;
}
 
void cleanup_intermezzo_sysctl(void)
{
int total_dev = MAX_CHANNEL;
int entries_per_dev = sizeof(proto_psdev_table) /
sizeof(proto_psdev_table[0]);
int total_entries = entries_per_dev * total_dev;
int i;
 
#ifdef CONFIG_SYSCTL
if ( intermezzo_table_header )
unregister_sysctl_table(intermezzo_table_header);
intermezzo_table_header = NULL;
#endif
for(i = 0; i < total_dev; i++) {
/* entry for this /proc/sys/intermezzo/intermezzo"i" */
ctl_table *psdev = &presto_table[i + PRESTO_PRIMARY_CTLCNT];
PRESTO_FREE(psdev->procname, PROCNAME_SIZE);
}
/* presto_table[PRESTO_PRIMARY_CTLCNT].child points to the
* dev_ctl_table previously allocated in init_intermezzo_psdev()
*/
PRESTO_FREE(presto_table[PRESTO_PRIMARY_CTLCNT].child, sizeof(ctl_table) * total_entries);
 
#if CONFIG_PROC_FS
remove_proc_entry("mounts", proc_fs_intermezzo);
remove_proc_entry("intermezzo", proc_root_fs);
#endif
}
 
/psdev.c
0,0 → 1,651
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* An implementation of a loadable kernel mode driver providing
* multiple kernel/user space bidirectional communications links.
*
* Author: Alan Cox <alan@cymru.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 2 as published by the Free Software Foundation.
*
* Adapted to become the Linux 2.0 Coda pseudo device
* Peter Braam <braam@maths.ox.ac.uk>
* Michael Callahan <mjc@emmy.smith.edu>
*
* Changes for Linux 2.1
* Copyright (c) 1997 Carnegie-Mellon University
*
* Redone again for InterMezzo
* Copyright (c) 1998 Peter J. Braam
* Copyright (c) 2000 Mountain View Data, Inc.
* Copyright (c) 2000 Tacitus Systems, Inc.
* Copyright (c) 2001 Cluster File Systems, Inc.
*
*/
 
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/sched.h>
#include <linux/lp.h>
#include <linux/slab.h>
#include <linux/ioport.h>
#include <linux/fcntl.h>
#include <linux/delay.h>
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/poll.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/devfs_fs_kernel.h>
#include <asm/io.h>
#include <asm/segment.h>
#include <asm/system.h>
#include <asm/poll.h>
#include <asm/uaccess.h>
#include <linux/miscdevice.h>
 
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
 
 
#ifdef PRESTO_DEVEL
int presto_print_entry = 1;
int presto_debug = 4095;
#else
int presto_print_entry = 0;
int presto_debug = 0;
#endif
 
/* Like inode.c (presto_sym_iops), the initializer is just to prevent
izo_channels from appearing as a COMMON symbol (and therefore
interfering with other modules that use the same variable name). */
struct upc_channel izo_channels[MAX_CHANNEL] = {{0}};
 
int izo_psdev_get_free_channel(void)
{
int i, result = -1;
for (i = 0 ; i < MAX_CHANNEL ; i++ ) {
if (list_empty(&(izo_channels[i].uc_cache_list))) {
result = i;
break;
}
}
return result;
}
 
 
int izo_psdev_setpid(int minor)
{
struct upc_channel *channel;
if (minor < 0 || minor >= MAX_CHANNEL) {
return -EINVAL;
}
 
channel = &(izo_channels[minor]);
/*
* This ioctl is performed by each Lento that starts up
* and wants to do further communication with presto.
*/
CDEBUG(D_PSDEV, "Setting current pid to %d channel %d\n",
current->pid, minor);
channel->uc_pid = current->pid;
spin_lock(&channel->uc_lock);
if ( !list_empty(&channel->uc_processing) ) {
struct list_head *lh;
struct upc_req *req;
CERROR("WARNING: setpid & processing not empty!\n");
lh = &channel->uc_processing;
while ( (lh = lh->next) != &channel->uc_processing) {
req = list_entry(lh, struct upc_req, rq_chain);
/* freeing of req and data is done by the sleeper */
wake_up(&req->rq_sleep);
}
}
if ( !list_empty(&channel->uc_processing) ) {
CERROR("BAD: FAILDED TO CLEAN PROCESSING LIST!\n");
}
spin_unlock(&channel->uc_lock);
EXIT;
return 0;
}
 
int izo_psdev_setchannel(struct file *file, int fd)
{
 
struct file *psdev_file = fget(fd);
struct presto_cache *cache = presto_get_cache(file->f_dentry->d_inode);
 
if (!psdev_file) {
CERROR("%s: no psdev_file!\n", __FUNCTION__);
return -EINVAL;
}
 
if (!cache) {
CERROR("%s: no cache!\n", __FUNCTION__);
fput(psdev_file);
return -EINVAL;
}
 
if (psdev_file->private_data) {
CERROR("%s: channel already set!\n", __FUNCTION__);
fput(psdev_file);
return -EINVAL;
}
 
psdev_file->private_data = cache->cache_psdev;
fput(psdev_file);
EXIT;
return 0;
}
 
inline int presto_lento_up(int minor)
{
return izo_channels[minor].uc_pid;
}
 
static unsigned int presto_psdev_poll(struct file *file, poll_table * wait)
{
struct upc_channel *channel = (struct upc_channel *)file->private_data;
unsigned int mask = POLLOUT | POLLWRNORM;
 
/* ENTRY; this will flood you */
if ( ! channel ) {
CERROR("%s: bad psdev file\n", __FUNCTION__);
return -EBADF;
}
 
poll_wait(file, &(channel->uc_waitq), wait);
 
spin_lock(&channel->uc_lock);
if (!list_empty(&channel->uc_pending)) {
CDEBUG(D_PSDEV, "Non-empty pending list.\n");
mask |= POLLIN | POLLRDNORM;
}
spin_unlock(&channel->uc_lock);
 
/* EXIT; will flood you */
return mask;
}
 
/*
* Receive a message written by Lento to the psdev
*/
static ssize_t presto_psdev_write(struct file *file, const char *buf,
size_t count, loff_t *off)
{
struct upc_channel *channel = (struct upc_channel *)file->private_data;
struct upc_req *req = NULL;
struct upc_req *tmp;
struct list_head *lh;
struct izo_upcall_resp hdr;
int error;
 
if ( ! channel ) {
CERROR("%s: bad psdev file\n", __FUNCTION__);
return -EBADF;
}
 
/* Peek at the opcode, uniquefier */
if ( count < sizeof(hdr) ) {
CERROR("presto_psdev_write: Lento didn't write full hdr.\n");
return -EINVAL;
}
 
error = copy_from_user(&hdr, buf, sizeof(hdr));
if ( error )
return -EFAULT;
 
CDEBUG(D_PSDEV, "(process,opc,uniq)=(%d,%d,%d)\n",
current->pid, hdr.opcode, hdr.unique);
 
spin_lock(&channel->uc_lock);
/* Look for the message on the processing queue. */
lh = &channel->uc_processing;
while ( (lh = lh->next) != &channel->uc_processing ) {
tmp = list_entry(lh, struct upc_req , rq_chain);
if (tmp->rq_unique == hdr.unique) {
req = tmp;
/* unlink here: keeps search length minimal */
list_del_init(&req->rq_chain);
CDEBUG(D_PSDEV,"Eureka opc %d uniq %d!\n",
hdr.opcode, hdr.unique);
break;
}
}
spin_unlock(&channel->uc_lock);
if (!req) {
CERROR("psdev_write: msg (%d, %d) not found\n",
hdr.opcode, hdr.unique);
return(-ESRCH);
}
 
/* move data into response buffer. */
if (req->rq_bufsize < count) {
CERROR("psdev_write: too much cnt: %d, cnt: %d, "
"opc: %d, uniq: %d.\n",
req->rq_bufsize, count, hdr.opcode, hdr.unique);
count = req->rq_bufsize; /* don't have more space! */
}
error = copy_from_user(req->rq_data, buf, count);
if ( error )
return -EFAULT;
 
/* adjust outsize: good upcalls can be aware of this */
req->rq_rep_size = count;
req->rq_flags |= REQ_WRITE;
 
wake_up(&req->rq_sleep);
return(count);
}
 
/*
* Read a message from the kernel to Lento
*/
static ssize_t presto_psdev_read(struct file * file, char * buf,
size_t count, loff_t *off)
{
struct upc_channel *channel = (struct upc_channel *)file->private_data;
struct upc_req *req;
int result = count;
 
if ( ! channel ) {
CERROR("%s: bad psdev file\n", __FUNCTION__);
return -EBADF;
}
 
spin_lock(&channel->uc_lock);
if (list_empty(&(channel->uc_pending))) {
CDEBUG(D_UPCALL, "Empty pending list in read, not good\n");
spin_unlock(&channel->uc_lock);
return -EINVAL;
}
req = list_entry((channel->uc_pending.next), struct upc_req, rq_chain);
list_del(&(req->rq_chain));
if (! (req->rq_flags & REQ_ASYNC) ) {
list_add(&(req->rq_chain), channel->uc_processing.prev);
}
spin_unlock(&channel->uc_lock);
 
req->rq_flags |= REQ_READ;
 
/* Move the input args into userspace */
CDEBUG(D_PSDEV, "\n");
if (req->rq_bufsize <= count) {
result = req->rq_bufsize;
}
 
if (count < req->rq_bufsize) {
CERROR ("psdev_read: buffer too small, read %d of %d bytes\n",
count, req->rq_bufsize);
}
 
if ( copy_to_user(buf, req->rq_data, result) ) {
BUG();
return -EFAULT;
}
 
/* If request was asynchronous don't enqueue, but free */
if (req->rq_flags & REQ_ASYNC) {
CDEBUG(D_PSDEV, "psdev_read: async msg (%d, %d), result %d\n",
req->rq_opcode, req->rq_unique, result);
PRESTO_FREE(req->rq_data, req->rq_bufsize);
PRESTO_FREE(req, sizeof(*req));
return result;
}
 
return result;
}
 
 
static int presto_psdev_open(struct inode * inode, struct file * file)
{
ENTRY;
 
file->private_data = NULL;
 
MOD_INC_USE_COUNT;
 
CDEBUG(D_PSDEV, "Psdev_open: caller: %d, flags: %d\n", current->pid, file->f_flags);
 
EXIT;
return 0;
}
 
 
 
static int presto_psdev_release(struct inode * inode, struct file * file)
{
struct upc_channel *channel = (struct upc_channel *)file->private_data;
struct upc_req *req;
struct list_head *lh;
ENTRY;
 
if ( ! channel ) {
CERROR("%s: bad psdev file\n", __FUNCTION__);
return -EBADF;
}
 
MOD_DEC_USE_COUNT;
CDEBUG(D_PSDEV, "Lento: pid %d\n", current->pid);
channel->uc_pid = 0;
 
/* Wake up clients so they can return. */
CDEBUG(D_PSDEV, "Wake up clients sleeping for pending.\n");
spin_lock(&channel->uc_lock);
lh = &channel->uc_pending;
while ( (lh = lh->next) != &channel->uc_pending) {
req = list_entry(lh, struct upc_req, rq_chain);
 
/* Async requests stay around for a new lento */
if (req->rq_flags & REQ_ASYNC) {
continue;
}
/* the sleeper will free the req and data */
req->rq_flags |= REQ_DEAD;
wake_up(&req->rq_sleep);
}
 
CDEBUG(D_PSDEV, "Wake up clients sleeping for processing\n");
lh = &channel->uc_processing;
while ( (lh = lh->next) != &channel->uc_processing) {
req = list_entry(lh, struct upc_req, rq_chain);
/* freeing of req and data is done by the sleeper */
req->rq_flags |= REQ_DEAD;
wake_up(&req->rq_sleep);
}
spin_unlock(&channel->uc_lock);
CDEBUG(D_PSDEV, "Done.\n");
 
EXIT;
return 0;
}
 
static struct file_operations presto_psdev_fops = {
.read = presto_psdev_read,
.write = presto_psdev_write,
.poll = presto_psdev_poll,
.open = presto_psdev_open,
.release = presto_psdev_release
};
 
/* modules setup */
static struct miscdevice intermezzo_psdev = {
INTERMEZZO_MINOR,
"intermezzo",
&presto_psdev_fops
};
 
int presto_psdev_init(void)
{
int i;
int err;
 
if ( (err = misc_register(&intermezzo_psdev)) ) {
CERROR("%s: cannot register %d err %d\n",
__FUNCTION__, INTERMEZZO_MINOR, err);
return -EIO;
}
 
memset(&izo_channels, 0, sizeof(izo_channels));
for ( i = 0 ; i < MAX_CHANNEL ; i++ ) {
struct upc_channel *channel = &(izo_channels[i]);
INIT_LIST_HEAD(&channel->uc_pending);
INIT_LIST_HEAD(&channel->uc_processing);
INIT_LIST_HEAD(&channel->uc_cache_list);
init_waitqueue_head(&channel->uc_waitq);
channel->uc_lock = SPIN_LOCK_UNLOCKED;
channel->uc_hard = 0;
channel->uc_no_filter = 0;
channel->uc_no_journal = 0;
channel->uc_no_upcall = 0;
channel->uc_timeout = 30;
channel->uc_errorval = 0;
channel->uc_minor = i;
}
return 0;
}
 
void presto_psdev_cleanup(void)
{
int i;
 
misc_deregister(&intermezzo_psdev);
 
for ( i = 0 ; i < MAX_CHANNEL ; i++ ) {
struct upc_channel *channel = &(izo_channels[i]);
struct list_head *lh;
 
spin_lock(&channel->uc_lock);
if ( ! list_empty(&channel->uc_pending)) {
CERROR("Weird, tell Peter: module cleanup and pending list not empty dev %d\n", i);
}
if ( ! list_empty(&channel->uc_processing)) {
CERROR("Weird, tell Peter: module cleanup and processing list not empty dev %d\n", i);
}
if ( ! list_empty(&channel->uc_cache_list)) {
CERROR("Weird, tell Peter: module cleanup and cache listnot empty dev %d\n", i);
}
lh = channel->uc_pending.next;
while ( lh != &channel->uc_pending) {
struct upc_req *req;
 
req = list_entry(lh, struct upc_req, rq_chain);
lh = lh->next;
if ( req->rq_flags & REQ_ASYNC ) {
list_del(&(req->rq_chain));
CDEBUG(D_UPCALL, "free pending upcall type %d\n",
req->rq_opcode);
PRESTO_FREE(req->rq_data, req->rq_bufsize);
PRESTO_FREE(req, sizeof(struct upc_req));
} else {
req->rq_flags |= REQ_DEAD;
wake_up(&req->rq_sleep);
}
}
lh = &channel->uc_processing;
while ( (lh = lh->next) != &channel->uc_processing ) {
struct upc_req *req;
req = list_entry(lh, struct upc_req, rq_chain);
list_del(&(req->rq_chain));
req->rq_flags |= REQ_DEAD;
wake_up(&req->rq_sleep);
}
spin_unlock(&channel->uc_lock);
}
}
 
/*
* lento_upcall and lento_downcall routines
*/
static inline unsigned long lento_waitfor_upcall
(struct upc_channel *channel, struct upc_req *req, int minor)
{
DECLARE_WAITQUEUE(wait, current);
unsigned long posttime;
 
req->rq_posttime = posttime = jiffies;
 
add_wait_queue(&req->rq_sleep, &wait);
for (;;) {
if ( izo_channels[minor].uc_hard == 0 )
set_current_state(TASK_INTERRUPTIBLE);
else
set_current_state(TASK_UNINTERRUPTIBLE);
 
/* got a reply */
if ( req->rq_flags & (REQ_WRITE | REQ_DEAD) )
break;
 
/* these cases only apply when TASK_INTERRUPTIBLE */
if ( !izo_channels[minor].uc_hard && signal_pending(current) ) {
/* if this process really wants to die, let it go */
if (sigismember(&(current->pending.signal), SIGKILL)||
sigismember(&(current->pending.signal), SIGINT) )
break;
/* signal is present: after timeout always return
really smart idea, probably useless ... */
if ( time_after(jiffies, req->rq_posttime +
izo_channels[minor].uc_timeout * HZ) )
break;
}
schedule();
}
 
spin_lock(&channel->uc_lock);
list_del_init(&req->rq_chain);
spin_unlock(&channel->uc_lock);
remove_wait_queue(&req->rq_sleep, &wait);
set_current_state(TASK_RUNNING);
 
CDEBUG(D_SPECIAL, "posttime: %ld, returned: %ld\n",
posttime, jiffies-posttime);
return (jiffies - posttime);
}
 
/*
* lento_upcall will return an error in the case of
* failed communication with Lento _or_ will peek at Lento
* reply and return Lento's error.
*
* As lento has 2 types of errors, normal errors (positive) and internal
* errors (negative), normal errors are negated, while internal errors
* are all mapped to -EINTR, while showing a nice warning message. (jh)
*
* lento_upcall will always free buffer, either directly, when an upcall
* is read (in presto_psdev_read), when the filesystem is unmounted, or
* when the module is unloaded.
*/
int izo_upc_upcall(int minor, int *size, struct izo_upcall_hdr *buffer,
int async)
{
unsigned long runtime;
struct upc_channel *channel;
struct izo_upcall_resp *out;
struct upc_req *req;
int error = 0;
 
ENTRY;
channel = &(izo_channels[minor]);
 
if (channel->uc_no_upcall) {
EXIT;
goto exit_buf;
}
if (!channel->uc_pid && !async) {
EXIT;
error = -ENXIO;
goto exit_buf;
}
 
/* Format the request message. */
PRESTO_ALLOC(req, sizeof(struct upc_req));
if ( !req ) {
EXIT;
error = -ENOMEM;
goto exit_buf;
}
req->rq_data = (void *)buffer;
req->rq_flags = 0;
req->rq_bufsize = *size;
req->rq_rep_size = 0;
req->rq_opcode = buffer->u_opc;
req->rq_unique = ++channel->uc_seq;
init_waitqueue_head(&req->rq_sleep);
 
/* Fill in the common input args. */
buffer->u_uniq = req->rq_unique;
buffer->u_async = async;
 
spin_lock(&channel->uc_lock);
/* Append msg to pending queue and poke Lento. */
list_add(&req->rq_chain, channel->uc_pending.prev);
spin_unlock(&channel->uc_lock);
CDEBUG(D_UPCALL,
"Proc %d waking Lento %d for(opc,uniq) =(%d,%d) msg at %p.\n",
current->pid, channel->uc_pid, req->rq_opcode,
req->rq_unique, req);
wake_up_interruptible(&channel->uc_waitq);
 
if ( async ) {
/* req, rq_data are freed in presto_psdev_read for async */
req->rq_flags = REQ_ASYNC;
EXIT;
return 0;
}
 
/* We can be interrupted while we wait for Lento to process
* our request. If the interrupt occurs before Lento has read
* the request, we dequeue and return. If it occurs after the
* read but before the reply, we dequeue, send a signal
* message, and return. If it occurs after the reply we ignore
* it. In no case do we want to restart the syscall. If it
* was interrupted by a lento shutdown (psdev_close), return
* ENODEV. */
 
/* Go to sleep. Wake up on signals only after the timeout. */
runtime = lento_waitfor_upcall(channel, req, minor);
 
CDEBUG(D_TIMING, "opc: %d time: %ld uniq: %d size: %d\n",
req->rq_opcode, jiffies - req->rq_posttime,
req->rq_unique, req->rq_rep_size);
CDEBUG(D_UPCALL,
"..process %d woken up by Lento for req at 0x%x, data at %x\n",
current->pid, (int)req, (int)req->rq_data);
 
if (channel->uc_pid) { /* i.e. Lento is still alive */
/* Op went through, interrupt or not we go on */
if (req->rq_flags & REQ_WRITE) {
out = (struct izo_upcall_resp *)req->rq_data;
/* here we map positive Lento errors to kernel errors */
if ( out->result < 0 ) {
CERROR("Tell Peter: Lento returns negative error %d, for oc %d!\n",
out->result, out->opcode);
out->result = EINVAL;
}
error = -out->result;
CDEBUG(D_UPCALL, "upcall: (u,o,r) (%d, %d, %d) out at %p\n",
out->unique, out->opcode, out->result, out);
*size = req->rq_rep_size;
EXIT;
goto exit_req;
}
/* Interrupted before lento read it. */
if ( !(req->rq_flags & REQ_READ) && signal_pending(current)) {
CDEBUG(D_UPCALL,
"Interrupt before read: (op,un)=(%d,%d), flags %x\n",
req->rq_opcode, req->rq_unique, req->rq_flags);
/* perhaps the best way to convince the app to give up? */
error = -EINTR;
EXIT;
goto exit_req;
}
 
/* interrupted after Lento did its read, send signal */
if ( (req->rq_flags & REQ_READ) && signal_pending(current) ) {
CDEBUG(D_UPCALL,"Interrupt after read: op = %d.%d, flags = %x\n",
req->rq_opcode, req->rq_unique, req->rq_flags);
 
error = -EINTR;
} else {
CERROR("Lento: Strange interruption - tell Peter.\n");
error = -EINTR;
}
} else { /* If lento died i.e. !UC_OPEN(channel) */
CERROR("lento_upcall: Lento dead on (op,un) (%d.%d) flags %d\n",
req->rq_opcode, req->rq_unique, req->rq_flags);
error = -ENODEV;
}
 
exit_req:
PRESTO_FREE(req, sizeof(struct upc_req));
exit_buf:
return error;
}
/journal_tmpfs.c
0,0 → 1,109
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
* Copyright (C) 2000 Red Hat, Inc.
* Copyright (C) 2000 Los Alamos National Laboratory
* Copyright (C) 2000 TurboLinux, Inc.
* Copyright (C) 2001 Mountain View Data, Inc.
* Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
 
#include <linux/types.h>
#include <linux/param.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/locks.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>
#include <linux/smp_lock.h>
#if defined(CONFIG_TMPFS)
#include <linux/jbd.h>
#if defined(CONFIG_EXT3)
#include <linux/ext3_fs.h>
#include <linux/ext3_jbd.h>
#endif
#endif
 
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
 
#if defined(CONFIG_TMPFS)
 
/* space requirements:
presto_do_truncate:
used to truncate the KML forward to next fset->chunksize boundary
- zero partial block
- update inode
presto_write_record:
write header (< one block)
write one path (< MAX_PATHLEN)
possibly write another path (< MAX_PATHLEN)
write suffix (< one block)
presto_update_last_rcvd
write one block
*/
 
static loff_t presto_tmpfs_freespace(struct presto_cache *cache,
struct super_block *sb)
{
return (1<<30);
}
 
/* start the filesystem journal operations */
static void *presto_tmpfs_trans_start(struct presto_file_set *fset,
struct inode *inode,
int op)
{
return (void *)1;
}
 
static void presto_tmpfs_trans_commit(struct presto_file_set *fset, void *handle)
{
return;
}
 
static void presto_tmpfs_journal_file_data(struct inode *inode)
{
return;
}
 
/* The logic here is a slightly modified version of ext3/inode.c:block_to_path
*/
static int presto_tmpfs_has_all_data(struct inode *inode)
{
return 0;
}
 
struct journal_ops presto_tmpfs_journal_ops = {
tr_all_data: presto_tmpfs_has_all_data,
tr_avail: presto_tmpfs_freespace,
tr_start: presto_tmpfs_trans_start,
tr_commit: presto_tmpfs_trans_commit,
tr_journal_data: presto_tmpfs_journal_file_data,
tr_ilookup: presto_tmpfs_ilookup,
tr_add_ilookup: presto_add_ilookup_dentry
};
 
#endif /* CONFIG_EXT3_FS */
/kml_unpack.c
0,0 → 1,708
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Unpacking of KML records
*
*/
 
#ifdef __KERNEL__
# include <linux/module.h>
# include <linux/errno.h>
# include <linux/kernel.h>
# include <linux/major.h>
# include <linux/sched.h>
# include <linux/lp.h>
# include <linux/slab.h>
# include <linux/ioport.h>
# include <linux/fcntl.h>
# include <linux/delay.h>
# include <linux/skbuff.h>
# include <linux/proc_fs.h>
# include <linux/vmalloc.h>
# include <linux/fs.h>
# include <linux/poll.h>
# include <linux/init.h>
# include <linux/list.h>
# include <linux/stat.h>
# include <asm/io.h>
# include <asm/segment.h>
# include <asm/system.h>
# include <asm/poll.h>
# include <asm/uaccess.h>
#else
# include <time.h>
# include <stdio.h>
# include <string.h>
# include <stdlib.h>
# include <errno.h>
# include <sys/stat.h>
# include <glib.h>
#endif
 
#include <linux/intermezzo_lib.h>
#include <linux/intermezzo_idl.h>
#include <linux/intermezzo_fs.h>
 
int kml_unpack_version(struct presto_version **ver, char **buf, char *end)
{
char *ptr = *buf;
struct presto_version *pv;
 
UNLOGP(*ver, struct presto_version, ptr, end);
pv = *ver;
pv->pv_mtime = NTOH__u64(pv->pv_mtime);
pv->pv_ctime = NTOH__u64(pv->pv_ctime);
pv->pv_size = NTOH__u64(pv->pv_size);
 
*buf = ptr;
 
return 0;
}
 
 
static int kml_unpack_noop(struct kml_rec *rec, char **buf, char *end)
{
return 0;
}
 
static int kml_unpack_get_fileid(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
 
LUNLOGV(rec->pathlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
 
*buf = ptr;
return 0;
}
 
static int kml_unpack_create(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
 
kml_unpack_version(&rec->old_parentv, &ptr, end);
kml_unpack_version(&rec->new_parentv, &ptr, end);
kml_unpack_version(&rec->new_objectv, &ptr, end);
LUNLOGV(rec->mode, __u32, ptr, end);
LUNLOGV(rec->uid, __u32, ptr, end);
LUNLOGV(rec->gid, __u32, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
 
*buf = ptr;
 
return 0;
}
 
static int kml_unpack_mkdir(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
 
kml_unpack_version(&rec->old_parentv, &ptr, end);
kml_unpack_version(&rec->new_parentv, &ptr, end);
kml_unpack_version(&rec->new_objectv, &ptr, end);
LUNLOGV(rec->mode, __u32, ptr, end);
LUNLOGV(rec->uid, __u32, ptr, end);
LUNLOGV(rec->gid, __u32, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
 
*buf = ptr;
 
return 0;
}
 
 
static int kml_unpack_unlink(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
 
kml_unpack_version(&rec->old_parentv, &ptr, end);
kml_unpack_version(&rec->new_parentv, &ptr, end);
kml_unpack_version(&rec->old_objectv, &ptr, end);
LUNLOGV(rec->old_mode, __u32, ptr, end);
LUNLOGV(rec->old_rdev, __u32, ptr, end);
LUNLOGV(rec->old_uid, __u64, ptr, end);
LUNLOGV(rec->old_gid, __u64, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
LUNLOGV(rec->targetlen, __u32, ptr, end);
LUNLOGV(rec->old_targetlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
UNLOGL(rec->target, char, rec->targetlen, ptr, end);
UNLOGL(rec->old_target, char, rec->old_targetlen, ptr, end);
 
*buf = ptr;
 
return 0;
}
 
 
static int kml_unpack_rmdir(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
 
kml_unpack_version(&rec->old_parentv, &ptr, end);
kml_unpack_version(&rec->new_parentv, &ptr, end);
kml_unpack_version(&rec->old_objectv, &ptr, end);
LUNLOGV(rec->old_mode, __u32, ptr, end);
LUNLOGV(rec->old_rdev, __u32, ptr, end);
LUNLOGV(rec->old_uid, __u64, ptr, end);
LUNLOGV(rec->old_gid, __u64, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
LUNLOGV(rec->targetlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
UNLOGL(rec->target, char, rec->targetlen, ptr, end);
 
*buf = ptr;
 
return 0;
}
 
 
static int kml_unpack_close(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
 
LUNLOGV(rec->mode, __u32, ptr, end); // used for open_mode
LUNLOGV(rec->uid, __u32, ptr, end); // used for open_uid
LUNLOGV(rec->gid, __u32, ptr, end); // used for open_gid
kml_unpack_version(&rec->old_objectv, &ptr, end);
kml_unpack_version(&rec->new_objectv, &ptr, end);
LUNLOGV(rec->ino, __u64, ptr, end);
LUNLOGV(rec->generation, __u32, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
 
*buf = ptr;
 
return 0;
}
 
 
static int kml_unpack_symlink(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
 
kml_unpack_version(&rec->old_parentv, &ptr, end);
kml_unpack_version(&rec->new_parentv, &ptr, end);
kml_unpack_version(&rec->new_objectv, &ptr, end);
LUNLOGV(rec->uid, __u32, ptr, end);
LUNLOGV(rec->gid, __u32, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
LUNLOGV(rec->targetlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
UNLOGL(rec->target, char, rec->targetlen, ptr, end);
 
*buf = ptr;
 
return 0;
}
 
 
static int kml_unpack_rename(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
 
kml_unpack_version(&rec->old_objectv, &ptr, end);
kml_unpack_version(&rec->new_objectv, &ptr, end);
kml_unpack_version(&rec->old_parentv, &ptr, end);
kml_unpack_version(&rec->new_parentv, &ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
LUNLOGV(rec->targetlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
UNLOGL(rec->target, char, rec->targetlen, ptr, end);
 
*buf = ptr;
 
return 0;
}
 
 
static int kml_unpack_setattr(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
 
kml_unpack_version(&rec->old_objectv, &ptr, end);
LUNLOGV(rec->valid, __u32, ptr, end);
LUNLOGV(rec->mode, __u32, ptr, end);
LUNLOGV(rec->uid, __u32, ptr, end);
LUNLOGV(rec->gid, __u32, ptr, end);
LUNLOGV(rec->size, __u64, ptr, end);
LUNLOGV(rec->mtime, __u64, ptr, end);
LUNLOGV(rec->ctime, __u64, ptr, end);
LUNLOGV(rec->flags, __u32, ptr, end);
LUNLOGV(rec->old_mode, __u32, ptr, end);
LUNLOGV(rec->old_rdev, __u32, ptr, end);
LUNLOGV(rec->old_uid, __u64, ptr, end);
LUNLOGV(rec->old_gid, __u64, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
*buf = ptr;
 
return 0;
}
 
 
static int kml_unpack_link(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
 
kml_unpack_version(&rec->old_parentv, &ptr, end);
kml_unpack_version(&rec->new_parentv, &ptr, end);
kml_unpack_version(&rec->new_objectv, &ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
LUNLOGV(rec->targetlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
UNLOGL(rec->target, char, rec->targetlen, ptr, end);
 
*buf = ptr;
 
return 0;
}
 
static int kml_unpack_mknod(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
 
kml_unpack_version(&rec->old_parentv, &ptr, end);
kml_unpack_version(&rec->new_parentv, &ptr, end);
kml_unpack_version(&rec->new_objectv, &ptr, end);
LUNLOGV(rec->mode, __u32, ptr, end);
LUNLOGV(rec->uid, __u32, ptr, end);
LUNLOGV(rec->gid, __u32, ptr, end);
LUNLOGV(rec->major, __u32, ptr, end);
LUNLOGV(rec->minor, __u32, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
 
*buf = ptr;
 
return 0;
}
 
 
static int kml_unpack_write(struct kml_rec *rec, char **buf, char *end)
{
printf("NOT IMPLEMENTED");
return 0;
}
 
 
static int kml_unpack_release(struct kml_rec *rec, char **buf, char *end)
{
printf("NOT IMPLEMENTED");
return 0;
}
 
 
static int kml_unpack_trunc(struct kml_rec *rec, char **buf, char *end)
{
printf("NOT IMPLEMENTED");
return 0;
}
 
 
static int kml_unpack_setextattr(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
 
kml_unpack_version(&rec->old_objectv, &ptr, end);
kml_unpack_version(&rec->new_objectv, &ptr, end);
LUNLOGV(rec->flags, __u32, ptr, end);
LUNLOGV(rec->mode, __u32, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
LUNLOGV(rec->namelen, __u32, ptr, end);
LUNLOGV(rec->targetlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
UNLOGL(rec->name, char, rec->namelen, ptr, end);
UNLOGL(rec->target, char, rec->targetlen, ptr, end);
 
*buf = ptr;
 
return 0;
}
 
 
static int kml_unpack_delextattr(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
 
kml_unpack_version(&rec->old_objectv, &ptr, end);
kml_unpack_version(&rec->new_objectv, &ptr, end);
LUNLOGV(rec->flags, __u32, ptr, end);
LUNLOGV(rec->mode, __u32, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
LUNLOGV(rec->namelen, __u32, ptr, end);
LUNLOGV(rec->targetlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
UNLOGL(rec->name, char, rec->namelen, ptr, end);
 
*buf = ptr;
 
return 0;
}
 
static int kml_unpack_open(struct kml_rec *rec, char **buf, char *end)
{
printf("NOT IMPLEMENTED");
return 0;
}
 
static int kml_unpack_kml_trunc(struct kml_rec *rec, char **buf, char *end)
{
 
printf("NOT IMPLEMENTED");
return 0;
}
 
 
typedef int (*unpacker)(struct kml_rec *rec, char **buf, char *end);
 
static unpacker unpackers[KML_OPCODE_NUM] =
{
[KML_OPCODE_NOOP] = kml_unpack_noop,
[KML_OPCODE_CREATE] = kml_unpack_create,
[KML_OPCODE_MKDIR] = kml_unpack_mkdir,
[KML_OPCODE_UNLINK] = kml_unpack_unlink,
[KML_OPCODE_RMDIR] = kml_unpack_rmdir,
[KML_OPCODE_CLOSE] = kml_unpack_close,
[KML_OPCODE_SYMLINK] = kml_unpack_symlink,
[KML_OPCODE_RENAME] = kml_unpack_rename,
[KML_OPCODE_SETATTR] = kml_unpack_setattr,
[KML_OPCODE_LINK] = kml_unpack_link,
[KML_OPCODE_OPEN] = kml_unpack_open,
[KML_OPCODE_MKNOD] = kml_unpack_mknod,
[KML_OPCODE_WRITE] = kml_unpack_write,
[KML_OPCODE_RELEASE] = kml_unpack_release,
[KML_OPCODE_TRUNC] = kml_unpack_trunc,
[KML_OPCODE_SETEXTATTR] = kml_unpack_setextattr,
[KML_OPCODE_DELEXTATTR] = kml_unpack_delextattr,
[KML_OPCODE_KML_TRUNC] = kml_unpack_kml_trunc,
[KML_OPCODE_GET_FILEID] = kml_unpack_get_fileid
};
 
int kml_unpack_prefix(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
int n;
 
UNLOGP(rec->prefix.hdr, struct kml_prefix_hdr, ptr, end);
rec->prefix.hdr->len = NTOH__u32(rec->prefix.hdr->len);
rec->prefix.hdr->version = NTOH__u32(rec->prefix.hdr->version);
rec->prefix.hdr->pid = NTOH__u32(rec->prefix.hdr->pid);
rec->prefix.hdr->auid = NTOH__u32(rec->prefix.hdr->auid);
rec->prefix.hdr->fsuid = NTOH__u32(rec->prefix.hdr->fsuid);
rec->prefix.hdr->fsgid = NTOH__u32(rec->prefix.hdr->fsgid);
rec->prefix.hdr->opcode = NTOH__u32(rec->prefix.hdr->opcode);
rec->prefix.hdr->ngroups = NTOH__u32(rec->prefix.hdr->ngroups);
 
UNLOGL(rec->prefix.groups, __u32, rec->prefix.hdr->ngroups, ptr, end);
for (n = 0; n < rec->prefix.hdr->ngroups; n++) {
rec->prefix.groups[n] = NTOH__u32(rec->prefix.groups[n]);
}
 
*buf = ptr;
 
return 0;
}
 
int kml_unpack_suffix(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
 
UNLOGP(rec->suffix, struct kml_suffix, ptr, end);
rec->suffix->prevrec = NTOH__u32(rec->suffix->prevrec);
rec->suffix->recno = NTOH__u32(rec->suffix->recno);
rec->suffix->time = NTOH__u32(rec->suffix->time);
rec->suffix->len = NTOH__u32(rec->suffix->len);
 
*buf = ptr;
 
return 0;
}
 
int kml_unpack(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
int err;
 
if (((unsigned long)ptr % 4) != 0) {
printf("InterMezzo: %s: record misaligned.\n", __FUNCTION__);
return -EINVAL;
}
 
while (ptr < end) {
__u32 *i = (__u32 *)ptr;
if (*i)
break;
ptr += sizeof(*i);
}
*buf = ptr;
 
memset(rec, 0, sizeof(*rec));
 
err = kml_unpack_prefix(rec, &ptr, end);
if (err) {
printf("InterMezzo: %s: unpack_prefix failed: %d\n",
__FUNCTION__, err);
return err;
}
 
if (rec->prefix.hdr->opcode < 0 ||
rec->prefix.hdr->opcode >= KML_OPCODE_NUM) {
printf("InterMezzo: %s: invalid opcode (%d)\n",
__FUNCTION__, rec->prefix.hdr->opcode);
return -EINVAL;
}
err = unpackers[rec->prefix.hdr->opcode](rec, &ptr, end);
if (err) {
printf("InterMezzo: %s: unpacker failed: %d\n",
__FUNCTION__, err);
return err;
}
 
err = kml_unpack_suffix(rec, &ptr, end);
if (err) {
printf("InterMezzo: %s: unpack_suffix failed: %d\n",
__FUNCTION__, err);
return err;
}
 
 
if (rec->prefix.hdr->len != rec->suffix->len) {
printf("InterMezzo: %s: lengths don't match\n",
__FUNCTION__);
return -EINVAL;
}
if ((rec->prefix.hdr->len % 4) != 0) {
printf("InterMezzo: %s: record length not a "
"multiple of 4.\n", __FUNCTION__);
return -EINVAL;
}
if (ptr - *buf != rec->prefix.hdr->len) {
printf("InterMezzo: %s: unpacking error\n",
__FUNCTION__);
return -EINVAL;
}
while (ptr < end) {
__u32 *i = (__u32 *)ptr;
if (*i)
break;
ptr += sizeof(*i);
}
*buf = ptr;
return 0;
}
 
 
#ifndef __KERNEL__
#define STR(ptr) ((ptr))? (ptr) : ""
 
#define OPNAME(n) [KML_OPCODE_##n] = #n
static char *opnames[KML_OPCODE_NUM] = {
OPNAME(NOOP),
OPNAME(CREATE),
OPNAME(MKDIR),
OPNAME(UNLINK),
OPNAME(RMDIR),
OPNAME(CLOSE),
OPNAME(SYMLINK),
OPNAME(RENAME),
OPNAME(SETATTR),
OPNAME(LINK),
OPNAME(OPEN),
OPNAME(MKNOD),
OPNAME(WRITE),
OPNAME(RELEASE),
OPNAME(TRUNC),
OPNAME(SETEXTATTR),
OPNAME(DELEXTATTR),
OPNAME(KML_TRUNC),
OPNAME(GET_FILEID)
};
#undef OPNAME
 
static char *print_opname(int op)
{
if (op < 0 || op >= sizeof (opnames) / sizeof (*opnames))
return NULL;
return opnames[op];
}
 
 
static char *print_time(__u64 i)
{
char buf[128];
memset(buf, 0, 128);
 
#ifndef __KERNEL__
strftime(buf, 128, "%Y/%m/%d %H:%M:%S", gmtime((time_t *)&i));
#else
sprintf(buf, "%Ld\n", i);
#endif
 
return strdup(buf);
}
 
static char *print_version(struct presto_version *ver)
{
char ver_buf[128];
char *mtime;
char *ctime;
 
if (!ver || ver->pv_ctime == 0) {
return strdup("");
}
mtime = print_time(ver->pv_mtime);
ctime = print_time(ver->pv_ctime);
sprintf(ver_buf, "mtime %s, ctime %s, len %lld",
mtime, ctime, ver->pv_size);
free(mtime);
free(ctime);
return strdup(ver_buf);
}
 
 
char *kml_print_rec(struct kml_rec *rec, int brief)
{
char *str;
char *nov, *oov, *ntv, *otv, *npv, *opv;
char *rectime, *mtime, *ctime;
 
if (brief) {
str = g_strdup_printf(" %08d %7s %*s %*s",
rec->suffix->recno,
print_opname (rec->prefix.hdr->opcode),
rec->pathlen, STR(rec->path),
rec->targetlen, STR(rec->target));
return str;
}
 
rectime = print_time(rec->suffix->time);
mtime = print_time(rec->mtime);
ctime = print_time(rec->ctime);
 
nov = print_version(rec->new_objectv);
oov = print_version(rec->old_objectv);
ntv = print_version(rec->new_targetv);
otv = print_version(rec->old_targetv);
npv = print_version(rec->new_parentv);
opv = print_version(rec->old_parentv);
 
str = g_strdup_printf("\n -- Record:\n"
" Recno %d\n"
" KML off %lld\n"
" Version %d\n"
" Len %d\n"
" Suf len %d\n"
" Time %s\n"
" Opcode %d\n"
" Op %s\n"
" Pid %d\n"
" AUid %d\n"
" Fsuid %d\n"
" Fsgid %d\n"
" Prevrec %d\n"
" Ngroups %d\n"
//" Groups @{$self->{groups}}\n"
" -- Path:\n"
" Inode %d\n"
" Gen num %u\n"
" Old mode %o\n"
" Old rdev %x\n"
" Old uid %llu\n"
" Old gid %llu\n"
" Path %*s\n"
//" Open_mode %o\n",
" Pathlen %d\n"
" Tgt %*s\n"
" Tgtlen %d\n"
" Old Tgt %*s\n"
" Old Tgtln %d\n"
" -- Attr:\n"
" Valid %x\n"
" mode %o, uid %d, gid %d, size %lld, mtime %s, ctime %s rdev %x (%d:%d)\n"
" -- Versions:\n"
" New object %s\n"
" Old object %s\n"
" New target %s\n"
" Old target %s\n"
" New parent %s\n"
" Old parent %s\n",
rec->suffix->recno,
rec->offset,
rec->prefix.hdr->version,
rec->prefix.hdr->len,
rec->suffix->len,
rectime,
rec->prefix.hdr->opcode,
print_opname (rec->prefix.hdr->opcode),
rec->prefix.hdr->pid,
rec->prefix.hdr->auid,
rec->prefix.hdr->fsuid,
rec->prefix.hdr->fsgid,
rec->suffix->prevrec,
rec->prefix.hdr->ngroups,
rec->ino,
rec->generation,
rec->old_mode,
rec->old_rdev,
rec->old_uid,
rec->old_gid,
rec->pathlen,
STR(rec->path),
rec->pathlen,
rec->targetlen,
STR(rec->target),
rec->targetlen,
rec->old_targetlen,
STR(rec->old_target),
rec->old_targetlen,
rec->valid,
rec->mode,
rec->uid,
rec->gid,
rec->size,
mtime,
ctime,
rec->rdev, rec->major, rec->minor,
nov, oov, ntv, otv, npv, opv);
free(nov);
free(oov);
free(ntv);
free(otv);
free(npv);
free(opv);
 
free(rectime);
free(ctime);
free(mtime);
 
return str;
}
#endif
/kml_reint.c
0,0 → 1,630
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Reintegration of KML records
*
*/
 
#define __NO_VERSION__
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
 
static void kmlreint_pre_secure(struct kml_rec *rec, struct file *dir,
struct run_ctxt *saved)
{
struct run_ctxt ctxt;
struct presto_dentry_data *dd = presto_d2d(dir->f_dentry);
int i;
 
ctxt.fsuid = rec->prefix.hdr->fsuid;
ctxt.fsgid = rec->prefix.hdr->fsgid;
ctxt.fs = KERNEL_DS;
ctxt.pwd = dd->dd_fset->fset_dentry;
ctxt.pwdmnt = dd->dd_fset->fset_mnt;
 
ctxt.root = ctxt.pwd;
ctxt.rootmnt = ctxt.pwdmnt;
if (rec->prefix.hdr->ngroups > 0) {
ctxt.ngroups = rec->prefix.hdr->ngroups;
for (i = 0; i< ctxt.ngroups; i++)
ctxt.groups[i] = rec->prefix.groups[i];
} else
ctxt.ngroups = 0;
 
push_ctxt(saved, &ctxt);
}
 
 
/* Append two strings in a less-retarded fashion. */
static char * path_join(char *p1, int p1len, char *p2, int p2len)
{
int size = p1len + p2len + 2; /* possibly one extra /, one NULL */
char *path;
 
path = kmalloc(size, GFP_KERNEL);
if (path == NULL)
return NULL;
 
memcpy(path, p1, p1len);
if (path[p1len - 1] != '/') {
path[p1len] = '/';
p1len++;
}
memcpy(path + p1len, p2, p2len);
path[p1len + p2len] = '\0';
 
return path;
}
 
static inline int kml_recno_equal(struct kml_rec *rec,
struct presto_file_set *fset)
{
return (rec->suffix->recno == fset->fset_lento_recno + 1);
}
 
static inline int version_equal(struct presto_version *a, struct inode *inode)
{
if (a == NULL)
return 1;
 
if (inode == NULL) {
CERROR("InterMezzo: NULL inode in version_equal()\n");
return 0;
}
 
if (inode->i_mtime == a->pv_mtime &&
(S_ISDIR(inode->i_mode) || inode->i_size == a->pv_size))
return 1;
 
return 0;
}
 
static int reint_close(struct kml_rec *rec, struct file *file,
struct lento_vfs_context *given_info)
{
struct run_ctxt saved_ctxt;
int error;
struct presto_file_set *fset;
struct lento_vfs_context info;
ENTRY;
 
memcpy(&info, given_info, sizeof(*given_info));
 
 
CDEBUG (D_KML, "=====REINT_CLOSE::%s\n", rec->path);
 
fset = presto_fset(file->f_dentry);
if (fset->fset_flags & FSET_DATA_ON_DEMAND) {
struct iattr iattr;
 
iattr.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_SIZE;
iattr.ia_mtime = (time_t)rec->new_objectv->pv_mtime;
iattr.ia_ctime = (time_t)rec->new_objectv->pv_ctime;
iattr.ia_size = (time_t)rec->new_objectv->pv_size;
 
/* no kml record, but update last rcvd */
/* save fileid in dentry for later backfetch */
info.flags |= LENTO_FL_EXPECT | LENTO_FL_SET_DDFILEID;
info.remote_ino = rec->ino;
info.remote_generation = rec->generation;
info.flags &= ~LENTO_FL_KML;
kmlreint_pre_secure(rec, file, &saved_ctxt);
error = lento_setattr(rec->path, &iattr, &info);
pop_ctxt(&saved_ctxt);
 
presto_d2d(file->f_dentry)->dd_flags &= ~PRESTO_DATA;
} else {
int minor = presto_f2m(fset);
 
info.updated_time = rec->new_objectv->pv_mtime;
memcpy(&info.remote_version, rec->old_objectv,
sizeof(*rec->old_objectv));
info.remote_ino = rec->ino;
info.remote_generation = rec->generation;
error = izo_upc_backfetch(minor, rec->path, fset->fset_name,
&info);
if (error) {
CERROR("backfetch error %d\n", error);
/* if file doesn't exist anymore, then ignore the CLOSE
* and just update the last_rcvd.
*/
if (error == ENOENT) {
CDEBUG(D_KML, "manually updating remote offset uuid %s"
"recno %d offset %Lu\n", info.uuid, info.recno, info.kml_offset);
error = izo_rcvd_upd_remote(fset, info.uuid, info.recno, info.kml_offset);
if(error)
CERROR("izo_rcvd_upd_remote error %d\n", error);
 
}
}
/* propagate error to avoid further reint */
}
 
EXIT;
return error;
}
 
static int reint_create(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
struct run_ctxt saved_ctxt;
int error; ENTRY;
 
CDEBUG (D_KML, "=====REINT_CREATE::%s\n", rec->path);
info->updated_time = rec->new_objectv->pv_ctime;
kmlreint_pre_secure(rec, dir, &saved_ctxt);
error = lento_create(rec->path, rec->mode, info);
pop_ctxt(&saved_ctxt);
 
EXIT;
return error;
}
 
static int reint_link(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
struct run_ctxt saved_ctxt;
int error;
 
ENTRY;
 
CDEBUG (D_KML, "=====REINT_LINK::%s -> %s\n", rec->path, rec->target);
info->updated_time = rec->new_objectv->pv_mtime;
kmlreint_pre_secure(rec, dir, &saved_ctxt);
error = lento_link(rec->path, rec->target, info);
pop_ctxt(&saved_ctxt);
 
EXIT;
return error;
}
 
static int reint_mkdir(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
struct run_ctxt saved_ctxt;
int error;
 
ENTRY;
 
CDEBUG (D_KML, "=====REINT_MKDIR::%s\n", rec->path);
info->updated_time = rec->new_objectv->pv_ctime;
kmlreint_pre_secure(rec, dir, &saved_ctxt);
error = lento_mkdir(rec->path, rec->mode, info);
pop_ctxt(&saved_ctxt);
 
EXIT;
return error;
}
 
static int reint_mknod(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
struct run_ctxt saved_ctxt;
int error, dev;
 
ENTRY;
 
CDEBUG (D_KML, "=====REINT_MKNOD::%s\n", rec->path);
info->updated_time = rec->new_objectv->pv_ctime;
kmlreint_pre_secure(rec, dir, &saved_ctxt);
 
dev = rec->rdev ?: MKDEV(rec->major, rec->minor);
 
error = lento_mknod(rec->path, rec->mode, dev, info);
pop_ctxt(&saved_ctxt);
 
EXIT;
return error;
}
 
 
static int reint_noop(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
return 0;
}
 
static int reint_rename(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
struct run_ctxt saved_ctxt;
int error;
 
ENTRY;
 
CDEBUG (D_KML, "=====REINT_RENAME::%s -> %s\n", rec->path, rec->target);
info->updated_time = rec->new_objectv->pv_mtime;
kmlreint_pre_secure(rec, dir, &saved_ctxt);
error = lento_rename(rec->path, rec->target, info);
pop_ctxt(&saved_ctxt);
 
EXIT;
return error;
}
 
static int reint_rmdir(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
struct run_ctxt saved_ctxt;
int error;
char *path;
 
ENTRY;
 
path = path_join(rec->path, rec->pathlen - 1, rec->target, rec->targetlen);
if (path == NULL) {
EXIT;
return -ENOMEM;
}
 
CDEBUG (D_KML, "=====REINT_RMDIR::%s\n", path);
info->updated_time = rec->new_parentv->pv_mtime;
kmlreint_pre_secure(rec, dir, &saved_ctxt);
error = lento_rmdir(path, info);
pop_ctxt(&saved_ctxt);
 
kfree(path);
EXIT;
return error;
}
 
static int reint_setattr(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
struct run_ctxt saved_ctxt;
struct iattr iattr;
int error;
 
ENTRY;
 
iattr.ia_valid = rec->valid;
iattr.ia_mode = (umode_t)rec->mode;
iattr.ia_uid = (uid_t)rec->uid;
iattr.ia_gid = (gid_t)rec->gid;
iattr.ia_size = (off_t)rec->size;
iattr.ia_ctime = (time_t)rec->ctime;
iattr.ia_mtime = (time_t)rec->mtime;
iattr.ia_atime = iattr.ia_mtime; /* We don't track atimes. */
iattr.ia_attr_flags = rec->flags;
 
CDEBUG (D_KML, "=====REINT_SETATTR::%s (%d)\n", rec->path, rec->valid);
kmlreint_pre_secure(rec, dir, &saved_ctxt);
error = lento_setattr(rec->path, &iattr, info);
pop_ctxt(&saved_ctxt);
 
EXIT;
return error;
}
 
static int reint_symlink(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
struct run_ctxt saved_ctxt;
int error;
 
ENTRY;
 
CDEBUG (D_KML, "=====REINT_SYMLINK::%s -> %s\n", rec->path, rec->target);
info->updated_time = rec->new_objectv->pv_ctime;
kmlreint_pre_secure(rec, dir, &saved_ctxt);
error = lento_symlink(rec->target, rec->path, info);
pop_ctxt(&saved_ctxt);
 
EXIT;
return error;
}
 
static int reint_unlink(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
struct run_ctxt saved_ctxt;
int error;
char *path;
 
ENTRY;
 
path = path_join(rec->path, rec->pathlen - 1, rec->target, rec->targetlen);
if (path == NULL) {
EXIT;
return -ENOMEM;
}
 
CDEBUG (D_KML, "=====REINT_UNLINK::%s\n", path);
info->updated_time = rec->new_parentv->pv_mtime;
kmlreint_pre_secure(rec, dir, &saved_ctxt);
error = lento_unlink(path, info);
pop_ctxt(&saved_ctxt);
 
kfree(path);
EXIT;
return error;
}
 
static int branch_reint_rename(struct presto_file_set *fset, struct kml_rec *rec,
struct file *dir, struct lento_vfs_context *info,
char * kml_data, __u64 kml_size)
{
int error;
 
ENTRY;
 
error = reint_rename(rec, dir, info);
if (error == -ENOENT) {
/* normal reint failed because path was not found */
struct rec_info rec;
CDEBUG(D_KML, "saving branch rename kml\n");
rec.is_kml = 1;
rec.size = kml_size;
error = presto_log(fset, &rec, kml_data, kml_size,
NULL, 0, NULL, 0, NULL, 0);
if (error == 0)
error = presto_write_last_rcvd(&rec, fset, info);
}
 
EXIT;
return error;
}
 
int branch_reinter(struct presto_file_set *fset, struct kml_rec *rec,
struct file *dir, struct lento_vfs_context *info,
char * kml_data, __u64 kml_size)
{
int error = 0;
int op = rec->prefix.hdr->opcode;
 
if (op == KML_OPCODE_CLOSE) {
/* regular close and backfetch */
error = reint_close(rec, dir, info);
} else if (op == KML_OPCODE_RENAME) {
/* rename only if name already exists */
error = branch_reint_rename(fset, rec, dir, info,
kml_data, kml_size);
} else {
/* just rewrite kml into branch/kml and update last_rcvd */
struct rec_info rec;
CDEBUG(D_KML, "Saving branch kml\n");
rec.is_kml = 1;
rec.size = kml_size;
error = presto_log(fset, &rec, kml_data, kml_size,
NULL, 0, NULL, 0, NULL, 0);
if (error == 0)
error = presto_write_last_rcvd(&rec, fset, info);
}
return error;
}
 
typedef int (*reinter_t)(struct kml_rec *rec, struct file *basedir,
struct lento_vfs_context *info);
 
static reinter_t presto_reinters[KML_OPCODE_NUM] =
{
[KML_OPCODE_CLOSE] = reint_close,
[KML_OPCODE_CREATE] = reint_create,
[KML_OPCODE_LINK] = reint_link,
[KML_OPCODE_MKDIR] = reint_mkdir,
[KML_OPCODE_MKNOD] = reint_mknod,
[KML_OPCODE_NOOP] = reint_noop,
[KML_OPCODE_RENAME] = reint_rename,
[KML_OPCODE_RMDIR] = reint_rmdir,
[KML_OPCODE_SETATTR] = reint_setattr,
[KML_OPCODE_SYMLINK] = reint_symlink,
[KML_OPCODE_UNLINK] = reint_unlink,
};
 
static inline reinter_t get_reinter(int op)
{
if (op < 0 || op >= sizeof(presto_reinters) / sizeof(reinter_t))
return NULL;
else
return presto_reinters[op];
}
 
int kml_reint_rec(struct file *dir, struct izo_ioctl_data *data)
{
char *ptr;
char *end;
struct kml_rec rec;
int error = 0;
struct lento_vfs_context info;
struct presto_cache *cache;
struct presto_file_set *fset;
struct presto_dentry_data *dd = presto_d2d(dir->f_dentry);
int op;
reinter_t reinter;
 
struct izo_rcvd_rec lr_rec;
int off;
 
ENTRY;
 
error = presto_prep(dir->f_dentry, &cache, &fset);
if ( error ) {
CERROR("intermezzo: Reintegration on invalid file\n");
return error;
}
 
if (!dd || !dd->dd_fset || dd->dd_fset->fset_dentry != dir->f_dentry) {
CERROR("intermezzo: reintegration on non-fset root (ino %ld)\n",
dir->f_dentry->d_inode->i_ino);
return -EINVAL;
}
 
if (data->ioc_plen1 > 64 * 1024) {
EXIT;
return -ENOSPC;
}
 
ptr = fset->fset_reint_buf;
end = ptr + data->ioc_plen1;
 
if (copy_from_user(ptr, data->ioc_pbuf1, data->ioc_plen1)) {
EXIT;
error = -EFAULT;
goto out;
}
 
error = kml_unpack(&rec, &ptr, end);
if (error) {
EXIT;
error = -EFAULT;
goto out;
}
 
off = izo_rcvd_get(&lr_rec, fset, data->ioc_uuid);
if (off < 0) {
CERROR("No last_rcvd record, setting to 0\n");
memset(&lr_rec, 0, sizeof(lr_rec));
}
data->ioc_kmlsize = ptr - fset->fset_reint_buf;
 
if (rec.suffix->recno != lr_rec.lr_remote_recno + 1) {
CERROR("KML record number %Lu expected, not %d\n",
lr_rec.lr_remote_recno + 1,
rec.suffix->recno);
 
#if 0
if (!version_check(&rec, dd->dd_fset, &info)) {
/* FIXME: do an upcall to resolve conflicts */
CERROR("intermezzo: would be a conflict!\n");
error = -EINVAL;
EXIT;
goto out;
}
#endif
}
 
op = rec.prefix.hdr->opcode;
 
reinter = get_reinter(op);
if (!reinter) {
CERROR("%s: Unrecognized KML opcode %d\n", __FUNCTION__, op);
error = -EINVAL;
EXIT;
goto out;
}
 
info.kml_offset = data->ioc_offset + data->ioc_kmlsize;
info.recno = rec.suffix->recno;
info.flags = LENTO_FL_EXPECT;
if (data->ioc_flags)
info.flags |= LENTO_FL_KML;
 
memcpy(info.uuid, data->ioc_uuid, sizeof(info.uuid));
 
if (fset->fset_flags & FSET_IS_BRANCH && data->ioc_flags)
error = branch_reinter(fset, &rec, dir, &info, fset->fset_reint_buf,
data->ioc_kmlsize);
else
error = reinter(&rec, dir, &info);
out:
EXIT;
return error;
}
 
int izo_get_fileid(struct file *dir, struct izo_ioctl_data *data)
{
char *buf = NULL;
char *ptr;
char *end;
struct kml_rec rec;
struct file *file;
struct presto_cache *cache;
struct presto_file_set *fset;
struct presto_dentry_data *dd = presto_d2d(dir->f_dentry);
struct run_ctxt saved_ctxt;
int error;
 
ENTRY;
 
error = presto_prep(dir->f_dentry, &cache, &fset);
if ( error ) {
CERROR("intermezzo: Reintegration on invalid file\n");
return error;
}
 
if (!dd || !dd->dd_fset || dd->dd_fset->fset_dentry != dir->f_dentry) {
CERROR("intermezzo: reintegration on non-fset root (ino %ld)\n",
dir->f_dentry->d_inode->i_ino);
return -EINVAL;
}
 
 
PRESTO_ALLOC(buf, data->ioc_plen1);
if (!buf) {
EXIT;
return -ENOMEM;
}
ptr = buf;
end = buf + data->ioc_plen1;
 
if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) {
EXIT;
PRESTO_FREE(buf, data->ioc_plen1);
return -EFAULT;
}
 
error = kml_unpack(&rec, &ptr, end);
if (error) {
EXIT;
PRESTO_FREE(buf, data->ioc_plen1);
return -EFAULT;
}
 
kmlreint_pre_secure(&rec, dir, &saved_ctxt);
 
file = filp_open(rec.path, O_RDONLY, 0);
if (!file || IS_ERR(file)) {
error = PTR_ERR(file);
goto out;
}
data->ioc_ino = file->f_dentry->d_inode->i_ino;
data->ioc_generation = file->f_dentry->d_inode->i_generation;
filp_close(file, 0);
 
CDEBUG(D_FILE, "%s ino %Lx, gen %Lx\n", rec.path,
data->ioc_ino, data->ioc_generation);
 
out:
if (buf)
PRESTO_FREE(buf, data->ioc_plen1);
pop_ctxt(&saved_ctxt);
EXIT;
return error;
}
 
 
/journal_obdfs.c
0,0 → 1,194
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
* Copyright (C) 2000 Red Hat, Inc.
* Copyright (C) 2000 Los Alamos National Laboratory
* Copyright (C) 2000 TurboLinux, Inc.
* Copyright (C) 2001 Mountain View Data, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
 
#include <linux/types.h>
#include <linux/param.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/locks.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>
#ifdef CONFIG_OBDFS_FS
#include /usr/src/obd/include/linux/obdfs.h
#endif
 
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
 
#ifdef CONFIG_OBDFS_FS
 
 
static unsigned long presto_obdfs_freespace(struct presto_file_set *fset,
struct super_block *sb)
{
return 0x0fffff;
}
 
/* start the filesystem journal operations */
static void *presto_obdfs_trans_start(struct presto_file_set *fset,
struct inode *inode,
int op)
{
 
return (void *) 1;
}
 
#if 0
int jblocks;
int trunc_blks, one_path_blks, extra_path_blks,
extra_name_blks, lml_blks;
__u32 avail_kmlblocks;
 
if ( presto_no_journal(fset) ||
strcmp(fset->fset_cache->cache_type, "ext3"))
{
CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n",
fset->fset_cache->cache_type);
return NULL;
}
 
avail_kmlblocks = inode->i_sb->u.ext3_sb.s_es->s_free_blocks_count;
if ( avail_kmlblocks < 3 ) {
return ERR_PTR(-ENOSPC);
}
if ( (op != PRESTO_OP_UNLINK && op != PRESTO_OP_RMDIR)
&& avail_kmlblocks < 6 ) {
return ERR_PTR(-ENOSPC);
}
 
/* Need journal space for:
at least three writes to KML (two one block writes, one a path)
possibly a second name (unlink, rmdir)
possibly a second path (symlink, rename)
a one block write to the last rcvd file
*/
 
trunc_blks = EXT3_DATA_TRANS_BLOCKS + 1;
one_path_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 3;
lml_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 2;
extra_path_blks = EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode);
extra_name_blks = EXT3_DATA_TRANS_BLOCKS + MAX_NAME_BLOCKS(inode);
 
/* additional blocks appear for "two pathname" operations
and operations involving the LML records
*/
switch (op) {
case PRESTO_OP_TRUNC:
jblocks = one_path_blks + extra_name_blks + trunc_blks
+ EXT3_DELETE_TRANS_BLOCKS;
break;
case PRESTO_OP_RELEASE:
/*
jblocks = one_path_blks + lml_blks + 2*trunc_blks;
*/
jblocks = one_path_blks;
break;
case PRESTO_OP_SETATTR:
jblocks = one_path_blks + trunc_blks + 1 ;
break;
case PRESTO_OP_CREATE:
jblocks = one_path_blks + trunc_blks
+ EXT3_DATA_TRANS_BLOCKS + 3;
break;
case PRESTO_OP_LINK:
jblocks = one_path_blks + trunc_blks
+ EXT3_DATA_TRANS_BLOCKS;
break;
case PRESTO_OP_UNLINK:
jblocks = one_path_blks + extra_name_blks + trunc_blks
+ EXT3_DELETE_TRANS_BLOCKS;
break;
case PRESTO_OP_SYMLINK:
jblocks = one_path_blks + extra_path_blks + trunc_blks
+ EXT3_DATA_TRANS_BLOCKS + 5;
break;
case PRESTO_OP_MKDIR:
jblocks = one_path_blks + trunc_blks
+ EXT3_DATA_TRANS_BLOCKS + 4;
break;
case PRESTO_OP_RMDIR:
jblocks = one_path_blks + extra_name_blks + trunc_blks
+ EXT3_DELETE_TRANS_BLOCKS;
break;
case PRESTO_OP_MKNOD:
jblocks = one_path_blks + trunc_blks +
EXT3_DATA_TRANS_BLOCKS + 3;
break;
case PRESTO_OP_RENAME:
jblocks = one_path_blks + extra_path_blks + trunc_blks +
2 * EXT3_DATA_TRANS_BLOCKS + 2;
break;
case PRESTO_OP_WRITE:
jblocks = one_path_blks;
/* add this when we can wrap our transaction with
that of ext3_file_write (ordered writes)
+ EXT3_DATA_TRANS_BLOCKS;
*/
break;
default:
CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op);
return NULL;
}
 
CDEBUG(D_JOURNAL, "creating journal handle (%d blocks)\n", jblocks);
return journal_start(EXT3_JOURNAL(inode), jblocks);
}
#endif
 
void presto_obdfs_trans_commit(struct presto_file_set *fset, void *handle)
{
#if 0
if ( presto_no_journal(fset) || !handle)
return;
 
journal_stop(handle);
#endif
}
 
void presto_obdfs_journal_file_data(struct inode *inode)
{
#ifdef EXT3_JOURNAL_DATA_FL
inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL;
#else
#warning You must have a facility to enable journaled writes for recovery!
#endif
}
 
struct journal_ops presto_obdfs_journal_ops = {
.tr_avail = presto_obdfs_freespace,
.tr_start = presto_obdfs_trans_start,
.tr_commit = presto_obdfs_trans_commit,
.tr_journal_data = presto_obdfs_journal_file_data
};
 
#endif
/journal_xfs.c
0,0 → 1,162
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
 
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/locks.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>
#if 0
/* XFS Support not there yet */
#ifdef CONFIG_FS_XFS
#include <linux/xfs_fs.h>
#endif
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
#include <linux/intermezzo_journal.h>
 
#if 0
 
/* XFS has journalling, but these functions do nothing yet... */
 
static unsigned long presto_xfs_freespace(struct presto_file_set *fset,
struct super_block *sb)
{
 
#if 0
vfs_t *vfsp = LINVFS_GET_VFS(sb);
struct statvfs_t stat;
bhv_desc_t *bdp;
unsigned long avail;
int rc;
 
VFS_STATVFS(vfsp, &stat, NULL, rc);
avail = statp.f_bfree;
 
return sbp->sb_fdblocks;;
#endif
return 0x0fffffff;
}
 
 
/* start the filesystem journal operations */
static void *
presto_xfs_trans_start(struct presto_file_set *fset,
struct inode *inode, int op)
{
int xfs_op;
/* do a free blocks check as in journal_ext3? does anything protect
* the space in that case or can it disappear out from under us
* anyway? */
/* copied from xfs_trans.h, skipping header maze for now */
#define XFS_TRANS_SETATTR_NOT_SIZE 1
#define XFS_TRANS_SETATTR_SIZE 2
#define XFS_TRANS_INACTIVE 3
#define XFS_TRANS_CREATE 4
#define XFS_TRANS_CREATE_TRUNC 5
#define XFS_TRANS_TRUNCATE_FILE 6
#define XFS_TRANS_REMOVE 7
#define XFS_TRANS_LINK 8
#define XFS_TRANS_RENAME 9
#define XFS_TRANS_MKDIR 10
#define XFS_TRANS_RMDIR 11
#define XFS_TRANS_SYMLINK 12
 
/* map the op onto the values for XFS so it can do reservation. if
* we don't have enough info to differentiate between e.g. setattr
* with or without size, what do we do? will it adjust? */
switch (op) {
case PRESTO_OP_SETATTR:
/* or XFS_TRANS_SETATTR_NOT_SIZE? */
xfs_op = XFS_TRANS_SETATTR_SIZE;
break;
case PRESTO_OP_CREATE:
/* or CREATE_TRUNC? */
xfs_op = XFS_TRANS_CREATE;
break;
case PRESTO_OP_LINK:
xfs_op = XFS_TRANS_LINK;
break;
case PRESTO_OP_UNLINK:
xfs_op = XFS_TRANS_REMOVE;
break;
case PRESTO_OP_SYMLINK:
xfs_op = XFS_TRANS_SYMLINK;
break;
case PRESTO_OP_MKDIR:
xfs_op = XFS_TRANS_MKDIR;
break;
case PRESTO_OP_RMDIR:
xfs_op = XFS_TRANS_RMDIR;
break;
case PRESTO_OP_MKNOD:
/* XXX can't find an analog for mknod? */
xfs_op = XFS_TRANS_CREATE;
break;
case PRESTO_OP_RENAME:
xfs_op = XFS_TRANS_RENAME;
break;
default:
CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op);
return NULL;
}
 
return xfs_trans_start(inode, xfs_op);
}
 
static void presto_xfs_trans_commit(struct presto_file_set *fset, void *handle)
{
/* assert (handle == current->j_handle) */
xfs_trans_stop(handle);
}
 
static void presto_xfs_journal_file_data(struct inode *inode)
{
return;
}
 
static int presto_xfs_has_all_data(struct inode *inode)
{
BUG();
return 0;
}
 
struct journal_ops presto_xfs_journal_ops = {
.tr_all_data = presto_xfs_has_all_data,
.tr_avail = presto_xfs_freespace,
.tr_start = presto_xfs_trans_start,
.tr_commit = presto_xfs_trans_commit,
.tr_journal_data = presto_xfs_journal_file_data
};
 
#endif
 
 
#endif /* CONFIG_XFS_FS */
 
/ext_attr.c
0,0 → 1,205
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2001 Tacit Networks, Inc.
* Author: Shirish H. Phatak <shirish@tacitnetworks.com>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Extended attribute handling for presto.
*/
 
#define __NO_VERSION__
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/locks.h>
#include <linux/unistd.h>
 
#include <asm/system.h>
#include <asm/uaccess.h>
 
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/locks.h>
#include <linux/string.h>
#include <asm/uaccess.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <asm/segment.h>
#include <linux/smp_lock.h>
 
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
 
#ifdef CONFIG_FS_EXT_ATTR
#include <linux/ext_attr.h>
 
extern inline void presto_debug_fail_blkdev(struct presto_file_set *fset,
unsigned long value);
 
 
/* VFS interface */
/* XXX! Fixme test for user defined attributes */
int presto_set_ext_attr(struct inode *inode,
const char *name, void *buffer,
size_t buffer_len, int flags)
{
int error;
struct presto_cache *cache;
struct presto_file_set *fset;
struct lento_vfs_context info;
struct dentry *dentry;
int minor = presto_i2m(inode);
char *buf = NULL;
 
ENTRY;
if (minor < 0) {
EXIT;
return -1;
}
 
if ( ISLENTO(minor) ) {
EXIT;
return -EINVAL;
}
 
/* BAD...vfs should really pass down the dentry to use, especially
* since every other operation in iops does. But for now
* we do a reverse mapping from inode to the first dentry
*/
if (list_empty(&inode->i_dentry)) {
CERROR("No alias for inode %d\n", (int) inode->i_ino);
EXIT;
return -EINVAL;
}
 
dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
 
error = presto_prep(dentry, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
 
if ((buffer != NULL) && (buffer_len != 0)) {
/* If buffer is a user space pointer copy it to kernel space
* and reset the flag. We do this since the journal functions need
* access to the contents of the buffer, and the file system
* does not care. When we actually invoke the function, we remove
* the EXT_ATTR_FLAG_USER flag.
*
* XXX:Check if the "fs does not care" assertion is always true -SHP
* (works for ext3)
*/
if (flags & EXT_ATTR_FLAG_USER) {
PRESTO_ALLOC(buf, buffer_len);
if (!buf) {
CERROR("InterMezzo: out of memory!!!\n");
return -ENOMEM;
}
error = copy_from_user(buf, buffer, buffer_len);
if (error)
return -EFAULT;
} else
buf = buffer;
} else
buf = buffer;
 
if ( presto_get_permit(inode) < 0 ) {
EXIT;
if (buffer_len && (flags & EXT_ATTR_FLAG_USER))
PRESTO_FREE(buf, buffer_len);
return -EROFS;
}
 
/* Simulate presto_setup_info */
memset(&info, 0, sizeof(info));
/* For now redundant..but we keep it around just in case */
info.flags = LENTO_FL_IGNORE_TIME;
if (!ISLENTO(cache->cache_psdev->uc_minor))
info.flags |= LENTO_FL_KML;
 
/* We pass in the kernel space pointer and reset the
* EXT_ATTR_FLAG_USER flag.
* See comments above.
*/
/* Note that mode is already set by VFS so we send in a NULL */
error = presto_do_set_ext_attr(fset, dentry, name, buf,
buffer_len, flags & ~EXT_ATTR_FLAG_USER,
NULL, &info);
presto_put_permit(inode);
 
if (buffer_len && (flags & EXT_ATTR_FLAG_USER))
PRESTO_FREE(buf, buffer_len);
EXIT;
return error;
}
 
/* Lento Interface */
/* XXX: ignore flags? We should be forcing these operations through? -SHP*/
int lento_set_ext_attr(const char *path, const char *name,
void *buffer, size_t buffer_len, int flags, mode_t mode,
struct lento_vfs_context *info)
{
int error;
char * pathname;
struct nameidata nd;
struct dentry *dentry;
struct presto_file_set *fset;
 
ENTRY;
lock_kernel();
 
pathname=getname(path);
error = PTR_ERR(pathname);
if (IS_ERR(pathname)) {
EXIT;
goto exit;
}
 
/* Note that ext_attrs apply to both files and directories..*/
error=presto_walk(pathname,&nd);
if (error)
goto exit;
dentry = nd.dentry;
 
fset = presto_fset(dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
EXIT;
goto exit_dentry;
}
 
if (buffer==NULL) buffer_len=0;
 
error = presto_do_set_ext_attr(fset, dentry, name, buffer,
buffer_len, flags, &mode, info);
exit_dentry:
path_release(&nd);
exit_path:
putname(pathname);
exit:
unlock_kernel();
return error;
}
 
#endif /*CONFIG_FS_EXT_ATTR*/
/fileset.c
0,0 → 1,675
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Managing filesets
*
*/
 
#define __NO_VERSION__
#include <stdarg.h>
 
#include <asm/bitops.h>
#include <asm/uaccess.h>
#include <asm/system.h>
 
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/ext2_fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/locks.h>
#include <linux/blkdev.h>
#include <linux/init.h>
#include <linux/module.h>
 
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
 
static inline struct presto_file_set *presto_dentry2fset(struct dentry *dentry)
{
if (presto_d2d(dentry) == NULL) {
EXIT;
return NULL;
}
return presto_d2d(dentry)->dd_fset;
}
 
/* find the fileset dentry for this dentry */
struct presto_file_set *presto_fset(struct dentry *de)
{
struct dentry *fsde;
ENTRY;
if ( !de->d_inode ) {
/* FIXME: is this ok to be NULL? */
CDEBUG(D_INODE,"presto_fset: warning %*s has NULL inode.\n",
de->d_name.len, de->d_name.name);
}
for (fsde = de;; fsde = fsde->d_parent) {
if ( presto_dentry2fset(fsde) ) {
EXIT;
return presto_dentry2fset(fsde);
}
if (fsde->d_parent == fsde)
break;
}
EXIT;
return NULL;
}
 
int presto_get_lastrecno(char *path, off_t *recno)
{
struct nameidata nd;
struct presto_file_set *fset;
struct dentry *dentry;
int error;
ENTRY;
 
error = presto_walk(path, &nd);
if (error) {
EXIT;
return error;
}
 
dentry = nd.dentry;
 
error = -ENXIO;
if ( !presto_ispresto(dentry->d_inode) ) {
EXIT;
goto kml_out;
}
 
error = -EINVAL;
if ( ! presto_dentry2fset(dentry)) {
EXIT;
goto kml_out;
}
 
fset = presto_dentry2fset(dentry);
if (!fset) {
EXIT;
goto kml_out;
}
error = 0;
*recno = fset->fset_kml.fd_recno;
 
kml_out:
path_release(&nd);
return error;
}
 
static char * _izo_make_path(char *fsetname, char *name)
{
char *path = NULL;
int len;
 
len = strlen("/.intermezzo/") + strlen(fsetname)
+ 1 + strlen(name) + 1;
 
PRESTO_ALLOC(path, len);
if (path == NULL)
return NULL;
 
sprintf(path, "/.intermezzo/%s/%s", fsetname, name);
 
return path;
}
 
char * izo_make_path(struct presto_file_set *fset, char *name)
{
return _izo_make_path(fset->fset_name, name);
}
 
static struct file *_izo_fset_open(char *fsetname, char *name, int flags, int mode)
{
char *path;
struct file *f;
int error;
ENTRY;
 
path = _izo_make_path(fsetname, name);
if (path == NULL) {
EXIT;
return ERR_PTR(-ENOMEM);
}
 
CDEBUG(D_INODE, "opening file %s\n", path);
f = filp_open(path, flags, mode);
error = PTR_ERR(f);
if (IS_ERR(f)) {
CDEBUG(D_INODE, "Error %d\n", error);
}
 
PRESTO_FREE(path, strlen(path));
 
EXIT;
return f;
 
}
 
struct file *izo_fset_open(struct presto_file_set *fset, char *name, int flags, int mode)
{
return _izo_fset_open(fset->fset_name, name, flags, mode);
}
 
 
 
/*
* note: this routine "pins" a dentry for a fileset root
*/
int presto_set_fsetroot(struct dentry *ioctl_dentry, char *fsetname,
unsigned int flags)
{
struct presto_file_set *fset = NULL;
struct presto_cache *cache;
int error;
struct file *fset_root;
struct dentry *dentry;
 
ENTRY;
 
fset_root = _izo_fset_open(fsetname, "ROOT", O_RDONLY, 000);
if (IS_ERR(fset_root)) {
CERROR("Can't open %s/ROOT\n", fsetname);
EXIT;
error = PTR_ERR(fset_root);
goto out;
}
dentry = dget(fset_root->f_dentry);
filp_close(fset_root, NULL);
 
dentry->d_inode->i_op = ioctl_dentry->d_inode->i_op;
dentry->d_inode->i_fop = ioctl_dentry->d_inode->i_fop;
dentry->d_op = ioctl_dentry->d_op;
fset = presto_dentry2fset(dentry);
if (fset && (fset->fset_dentry == dentry) ) {
CERROR("Fsetroot already set (inode %ld)\n",
dentry->d_inode->i_ino);
/* XXX: ignore because clear_fsetroot is broken */
#if 0
dput(dentry);
EXIT;
error = -EEXIST;
goto out;
#endif
}
 
cache = presto_get_cache(dentry->d_inode);
if (!cache) {
CERROR("No cache found for inode %ld\n",
dentry->d_inode->i_ino);
EXIT;
error = -ENODEV;
goto out_free;
}
 
PRESTO_ALLOC(fset, sizeof(*fset));
if ( !fset ) {
CERROR("No memory allocating fset for %s\n", fsetname);
EXIT;
error = -ENOMEM;
goto out_free;
}
CDEBUG(D_INODE, "fset at %p\n", fset);
 
CDEBUG(D_INODE, "InterMezzo: fsetroot: inode %ld, fileset name %s\n",
dentry->d_inode->i_ino, fsetname);
 
fset->fset_mnt = mntget(current->fs->pwdmnt);
fset->fset_cache = cache;
fset->fset_dentry = dentry;
fset->fset_name = strdup(fsetname);
fset->fset_chunkbits = CHUNK_BITS;
fset->fset_flags = flags;
fset->fset_file_maxio = FSET_DEFAULT_MAX_FILEIO;
fset->fset_permit_lock = SPIN_LOCK_UNLOCKED;
PRESTO_ALLOC(fset->fset_reint_buf, 64 * 1024);
if (fset->fset_reint_buf == NULL) {
EXIT;
error = -ENOMEM;
goto out_free;
}
init_waitqueue_head(&fset->fset_permit_queue);
 
if (presto_d2d(dentry) == NULL) {
dentry->d_fsdata = izo_alloc_ddata();
}
if (presto_d2d(dentry) == NULL) {
CERROR("InterMezzo: %s: no memory\n", __FUNCTION__);
EXIT;
error = -ENOMEM;
goto out_free;
}
presto_d2d(dentry)->dd_fset = fset;
list_add(&fset->fset_list, &cache->cache_fset_list);
 
error = izo_init_kml_file(fset, &fset->fset_kml);
if ( error ) {
EXIT;
CDEBUG(D_JOURNAL, "Error init_kml %d\n", error);
goto out_list_del;
}
 
error = izo_init_lml_file(fset, &fset->fset_lml);
if ( error ) {
int rc;
EXIT;
rc = izo_log_close(&fset->fset_kml);
CDEBUG(D_JOURNAL, "Error init_lml %d, cleanup %d\n", error, rc);
goto out_list_del;
}
 
/* init_last_rcvd_file could trigger a presto_file_write(), which
* requires that the lml structure be initialized. -phil */
error = izo_init_last_rcvd_file(fset, &fset->fset_rcvd);
if ( error ) {
int rc;
EXIT;
rc = izo_log_close(&fset->fset_kml);
rc = izo_log_close(&fset->fset_lml);
CDEBUG(D_JOURNAL, "Error init_lastrcvd %d, cleanup %d\n", error, rc);
goto out_list_del;
}
 
CDEBUG(D_PIOCTL, "-------> fset at %p, dentry at %p, mtpt %p,"
"fset %s, cache %p, presto_d2d(dentry)->dd_fset %p\n",
fset, dentry, fset->fset_dentry, fset->fset_name, cache,
presto_d2d(dentry)->dd_fset);
 
EXIT;
return 0;
 
out_list_del:
list_del(&fset->fset_list);
presto_d2d(dentry)->dd_fset = NULL;
out_free:
if (fset) {
mntput(fset->fset_mnt);
if (fset->fset_reint_buf != NULL)
PRESTO_FREE(fset->fset_reint_buf, 64 * 1024);
PRESTO_FREE(fset, sizeof(*fset));
}
dput(dentry);
out:
return error;
}
 
static int izo_cleanup_fset(struct presto_file_set *fset)
{
int error;
struct presto_cache *cache;
 
ENTRY;
 
CERROR("Cleaning up fset %s\n", fset->fset_name);
 
error = izo_log_close(&fset->fset_kml);
if (error)
CERROR("InterMezzo: Closing kml for fset %s: %d\n",
fset->fset_name, error);
error = izo_log_close(&fset->fset_lml);
if (error)
CERROR("InterMezzo: Closing lml for fset %s: %d\n",
fset->fset_name, error);
error = izo_log_close(&fset->fset_rcvd);
if (error)
CERROR("InterMezzo: Closing last_rcvd for fset %s: %d\n",
fset->fset_name, error);
 
cache = fset->fset_cache;
 
list_del(&fset->fset_list);
 
presto_d2d(fset->fset_dentry)->dd_fset = NULL;
dput(fset->fset_dentry);
mntput(fset->fset_mnt);
 
PRESTO_FREE(fset->fset_name, strlen(fset->fset_name) + 1);
PRESTO_FREE(fset->fset_reint_buf, 64 * 1024);
PRESTO_FREE(fset, sizeof(*fset));
EXIT;
return error;
}
 
int izo_clear_fsetroot(struct dentry *dentry)
{
struct presto_file_set *fset;
 
ENTRY;
 
fset = presto_dentry2fset(dentry);
if (!fset) {
EXIT;
return -EINVAL;
}
 
izo_cleanup_fset(fset);
EXIT;
return 0;
}
 
int izo_clear_all_fsetroots(struct presto_cache *cache)
{
struct presto_file_set *fset;
struct list_head *tmp,*tmpnext;
int error;
error = 0;
tmp = &cache->cache_fset_list;
tmpnext = tmp->next;
while ( tmpnext != &cache->cache_fset_list) {
tmp = tmpnext;
tmpnext = tmp->next;
fset = list_entry(tmp, struct presto_file_set, fset_list);
 
error = izo_cleanup_fset(fset);
if (error)
break;
}
return error;
}
 
static struct vfsmount *izo_alloc_vfsmnt(void)
{
struct vfsmount *mnt;
PRESTO_ALLOC(mnt, sizeof(*mnt));
if (mnt) {
memset(mnt, 0, sizeof(struct vfsmount));
atomic_set(&mnt->mnt_count,1);
INIT_LIST_HEAD(&mnt->mnt_hash);
INIT_LIST_HEAD(&mnt->mnt_child);
INIT_LIST_HEAD(&mnt->mnt_mounts);
INIT_LIST_HEAD(&mnt->mnt_list);
}
return mnt;
}
 
 
static void izo_setup_ctxt(struct dentry *root, struct vfsmount *mnt,
struct run_ctxt *save)
{
struct run_ctxt new;
 
mnt->mnt_root = root;
mnt->mnt_sb = root->d_inode->i_sb;
unlock_super(mnt->mnt_sb);
 
new.rootmnt = mnt;
new.root = root;
new.pwdmnt = mnt;
new.pwd = root;
new.fsuid = 0;
new.fsgid = 0;
new.fs = get_fs();
/* XXX where can we get the groups from? */
new.ngroups = 0;
 
push_ctxt(save, &new);
}
 
static void izo_cleanup_ctxt(struct vfsmount *mnt, struct run_ctxt *save)
{
lock_super(mnt->mnt_sb);
pop_ctxt(save);
}
 
static int izo_simple_mkdir(struct dentry *dir, char *name, int mode)
{
struct dentry *dchild;
int err;
ENTRY;
dchild = lookup_one_len(name, dir, strlen(name));
if (IS_ERR(dchild)) {
EXIT;
return PTR_ERR(dchild);
}
 
if (dchild->d_inode) {
dput(dchild);
EXIT;
return -EEXIST;
}
 
err = vfs_mkdir(dir->d_inode, dchild, mode);
dput(dchild);
EXIT;
return err;
}
 
static int izo_simple_symlink(struct dentry *dir, char *name, char *tgt)
{
struct dentry *dchild;
int err;
ENTRY;
dchild = lookup_one_len(name, dir, strlen(name));
if (IS_ERR(dchild)) {
EXIT;
return PTR_ERR(dchild);
}
 
if (dchild->d_inode) {
dput(dchild);
EXIT;
return -EEXIST;
}
 
err = vfs_symlink(dir->d_inode, dchild, tgt);
dput(dchild);
EXIT;
return err;
}
 
/*
* run set_fsetroot in chroot environment
*/
int presto_set_fsetroot_from_ioc(struct dentry *root, char *fsetname,
unsigned int flags)
{
int rc;
struct presto_cache *cache;
struct vfsmount *mnt;
struct run_ctxt save;
 
if (root != root->d_inode->i_sb->s_root) {
CERROR ("IOC_SET_FSET must be called on mount point\n");
return -ENODEV;
}
 
cache = presto_get_cache(root->d_inode);
mnt = cache->cache_vfsmount;
if (!mnt) {
EXIT;
return -ENOMEM;
}
izo_setup_ctxt(root, mnt, &save);
rc = presto_set_fsetroot(root, fsetname, flags);
izo_cleanup_ctxt(mnt, &save);
return rc;
}
 
/* XXX: this function should detect if fsetname is already in use for
the cache under root
*/
int izo_prepare_fileset(struct dentry *root, char *fsetname)
{
int err;
struct dentry *dotizo = NULL, *fsetdir = NULL, *dotiopen = NULL;
struct presto_cache *cache;
struct vfsmount *mnt;
struct run_ctxt save;
 
cache = presto_get_cache(root->d_inode);
mnt = cache->cache_vfsmount = izo_alloc_vfsmnt();
if (!mnt) {
EXIT;
return -ENOMEM;
}
if (!fsetname)
fsetname = "rootfset";
 
izo_setup_ctxt(root, mnt, &save);
 
err = izo_simple_mkdir(root, ".intermezzo", 0755);
CDEBUG(D_CACHE, "mkdir on .intermezzo err %d\n", err);
 
err = izo_simple_mkdir(root, "..iopen..", 0755);
CDEBUG(D_CACHE, "mkdir on ..iopen.. err %d\n", err);
 
dotiopen = lookup_one_len("..iopen..", root, strlen("..iopen.."));
if (IS_ERR(dotiopen)) {
EXIT;
goto out;
}
dotiopen->d_inode->i_op = &presto_dir_iops;
dput(dotiopen);
 
 
dotizo = lookup_one_len(".intermezzo", root, strlen(".intermezzo"));
if (IS_ERR(dotizo)) {
EXIT;
goto out;
}
 
 
err = izo_simple_mkdir(dotizo, fsetname, 0755);
CDEBUG(D_CACHE, "mkdir err %d\n", err);
 
/* XXX find the dentry of the root of the fileset (root for now) */
fsetdir = lookup_one_len(fsetname, dotizo, strlen(fsetname));
if (IS_ERR(fsetdir)) {
EXIT;
goto out;
}
 
err = izo_simple_symlink(fsetdir, "ROOT", "../..");
 
/* XXX read flags from flags file */
err = presto_set_fsetroot(root, fsetname, 0);
CDEBUG(D_CACHE, "set_fsetroot err %d\n", err);
 
out:
if (dotizo && !IS_ERR(dotizo))
dput(dotizo);
if (fsetdir && !IS_ERR(fsetdir))
dput(fsetdir);
izo_cleanup_ctxt(mnt, &save);
return err;
}
 
int izo_set_fileid(struct file *dir, struct izo_ioctl_data *data)
{
int rc = 0;
struct presto_cache *cache;
struct vfsmount *mnt;
struct run_ctxt save;
struct nameidata nd;
struct dentry *dentry;
struct presto_dentry_data *dd;
struct dentry *root;
char *buf = NULL;
 
ENTRY;
 
 
root = dir->f_dentry;
 
/* actually, needs to be called on ROOT of fset, not mount point
if (root != root->d_inode->i_sb->s_root) {
CERROR ("IOC_SET_FSET must be called on mount point\n");
return -ENODEV;
}
*/
 
cache = presto_get_cache(root->d_inode);
mnt = cache->cache_vfsmount;
if (!mnt) {
EXIT;
return -ENOMEM;
}
izo_setup_ctxt(root, mnt, &save);
PRESTO_ALLOC(buf, data->ioc_plen1);
if (!buf) {
rc = -ENOMEM;
EXIT;
goto out;
}
if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) {
rc = -EFAULT;
EXIT;
goto out;
}
 
rc = presto_walk(buf, &nd);
if (rc) {
CERROR("Unable to open: %s\n", buf);
EXIT;
goto out;
}
dentry = nd.dentry;
if (!dentry) {
CERROR("no dentry!\n");
rc = -EINVAL;
EXIT;
goto out_close;
}
dd = presto_d2d(dentry);
if (!dd) {
CERROR("no dentry_data!\n");
rc = -EINVAL;
EXIT;
goto out_close;
}
 
CDEBUG(D_FILE,"de:%p dd:%p\n", dentry, dd);
 
if (dd->remote_ino != 0) {
CERROR("remote_ino already set? %Lx:%Lx\n", dd->remote_ino,
dd->remote_generation);
rc = 0;
EXIT;
goto out_close;
}
 
 
CDEBUG(D_FILE,"setting %p %p, %s to %Lx:%Lx\n", dentry, dd,
buf, data->ioc_ino,
data->ioc_generation);
dd->remote_ino = data->ioc_ino;
dd->remote_generation = data->ioc_generation;
 
EXIT;
out_close:
path_release(&nd);
out:
if (buf)
PRESTO_FREE(buf, data->ioc_plen1);
izo_cleanup_ctxt(mnt, &save);
return rc;
}
/Makefile
0,0 → 1,15
#
# Makefile 1.00 Peter Braam <braam@clusterfs.com>
#
 
O_TARGET := intermezzo.o
 
obj-y := cache.o dcache.o dir.o ext_attr.o file.o fileset.o inode.o \
journal.o journal_ext2.o journal_ext3.o journal_obdfs.o \
journal_reiserfs.o journal_tmpfs.o journal_xfs.o kml_reint.o \
kml_unpack.o methods.o presto.o psdev.o replicator.o super.o \
sysctl.o upcall.o vfs.o
 
obj-m := $(O_TARGET)
 
include $(TOPDIR)/Rules.make

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.