URL
https://opencores.org/ocsvn/or1k/or1k/trunk
Subversion Repositories or1k
[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [fs/] [nfs/] [direct.c] - Rev 1765
Compare with Previous | Blame | View Log
/* * linux/fs/nfs/direct.c * * High-performance direct I/O for the NFS client * * When an application requests uncached I/O, all read and write requests * are made directly to the server; data stored or fetched via these * requests is not cached in the Linux page cache. The client does not * correct unaligned requests from applications. All requested bytes are * held on permanent storage before a direct write system call returns to * an application. Applications that manage their own data caching, such * as databases, make very good use of direct I/O on local file systems. * * Solaris implements an uncached I/O facility called directio() that * is used for backups and sequential I/O to very large files. Solaris * also supports uncaching whole NFS partitions with "-o forcedirectio," * an undocumented mount option. * * Note that I/O to read in executables (e.g. kernel_read) cannot use * direct (kiobuf) reads because there is no vma backing the passed-in * data buffer. * * Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust. * * Initial implementation: 12/2001 by Chuck Lever <cel@netapp.com> * * TODO: * * 1. Use concurrent asynchronous network requests rather than * serialized synchronous network requests for normal (non-sync) * direct I/O. */ #include <linux/config.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/file.h> #include <linux/errno.h> #include <linux/nfs_fs.h> #include <linux/smp_lock.h> #include <linux/sunrpc/clnt.h> #include <linux/iobuf.h> #include <asm/system.h> #include <asm/uaccess.h> #define NFSDBG_FACILITY (NFSDBG_PAGECACHE | NFSDBG_VFS) #define VERF_SIZE (2 * sizeof(__u32)) static inline int nfs_direct_read_rpc(struct file *file, struct nfs_readargs *arg) { int result; struct inode * inode = file->f_dentry->d_inode; struct nfs_fattr fattr; struct rpc_message msg; struct nfs_readres res = { &fattr, arg->count, 0 }; #ifdef CONFIG_NFS_V3 msg.rpc_proc = (NFS_PROTO(inode)->version == 3) ? NFS3PROC_READ : NFSPROC_READ; #else msg.rpc_proc = NFSPROC_READ; #endif msg.rpc_argp = arg; msg.rpc_resp = &res; lock_kernel(); msg.rpc_cred = nfs_file_cred(file); fattr.valid = 0; result = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_refresh_inode(inode, &fattr); unlock_kernel(); return result; } static inline int nfs_direct_write_rpc(struct file *file, struct nfs_writeargs *arg, struct nfs_writeverf *verf) { int result; struct inode *inode = file->f_dentry->d_inode; struct nfs_fattr fattr; struct rpc_message msg; struct nfs_writeres res = { &fattr, verf, 0 }; #ifdef CONFIG_NFS_V3 msg.rpc_proc = (NFS_PROTO(inode)->version == 3) ? NFS3PROC_WRITE : NFSPROC_WRITE; #else msg.rpc_proc = NFSPROC_WRITE; #endif msg.rpc_argp = arg; msg.rpc_resp = &res; lock_kernel(); msg.rpc_cred = get_rpccred(nfs_file_cred(file)); fattr.valid = 0; result = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_write_attributes(inode, &fattr); put_rpccred(msg.rpc_cred); unlock_kernel(); #ifdef CONFIG_NFS_V3 if (NFS_PROTO(inode)->version == 3) { if (result > 0) { if ((arg->stable == NFS_FILE_SYNC) && (verf->committed != NFS_FILE_SYNC)) { printk(KERN_ERR "%s: server didn't sync stable write request\n", __FUNCTION__); return -EIO; } if (result != arg->count) { printk(KERN_INFO "%s: short write, count=%u, result=%d\n", __FUNCTION__, arg->count, result); } } return result; } else { #endif verf->committed = NFS_FILE_SYNC; /* NFSv2 always syncs data */ if (result == 0) return arg->count; return result; #ifdef CONFIG_NFS_V3 } #endif } #ifdef CONFIG_NFS_V3 static inline int nfs_direct_commit_rpc(struct inode *inode, loff_t offset, size_t count, struct nfs_writeverf *verf) { int result; struct nfs_fattr fattr; struct nfs_writeargs arg = { NFS_FH(inode), offset, count, 0, 0, NULL }; struct nfs_writeres res = { &fattr, verf, 0 }; struct rpc_message msg = { NFS3PROC_COMMIT, &arg, &res, NULL }; fattr.valid = 0; lock_kernel(); result = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_write_attributes(inode, &fattr); unlock_kernel(); return result; } #else static inline int nfs_direct_commit_rpc(struct inode *inode, loff_t offset, size_t count, struct nfs_writeverf *verf) { return 0; } #endif /* * Walk through the iobuf and create an iovec for each "rsize" bytes. */ static int nfs_direct_read(struct file *file, struct kiobuf *iobuf, loff_t offset, size_t count) { int curpage, total; int result = 0; struct inode *inode = file->f_dentry->d_inode; int rsize = NFS_SERVER(inode)->rsize; struct page *pages[NFS_READ_MAXIOV]; struct nfs_readargs args = { NFS_FH(inode), offset, 0, iobuf->offset, pages }; total = 0; curpage = 0; while (count) { int len, request; struct page **dest = pages; request = count; if (count > rsize) request = rsize; args.count = request; args.offset = offset; args.pgbase = (iobuf->offset + total) & ~PAGE_MASK; len = PAGE_SIZE - args.pgbase; do { struct page *page = iobuf->maplist[curpage]; if (curpage >= iobuf->nr_pages || !page) { result = -EFAULT; goto out_err; } *dest++ = page; /* zero after the first iov */ if (request < len) break; request -= len; len = PAGE_SIZE; curpage++; } while (request != 0); result = nfs_direct_read_rpc(file, &args); if (result < 0) break; total += result; if (result < args.count) /* NFSv2ism */ break; count -= result; offset += result; }; out_err: if (!total) return result; return total; } /* * Walk through the iobuf and create an iovec for each "wsize" bytes. * If only one network write is necessary, or if the O_SYNC flag or * 'sync' mount option are present, or if this is a V2 inode, use * FILE_SYNC. Otherwise, use UNSTABLE and finish with a COMMIT. * * The mechanics of this function are much the same as nfs_direct_read, * with the added complexity of committing unstable writes. */ static int nfs_direct_write(struct file *file, struct kiobuf *iobuf, loff_t offset, size_t count) { int curpage, total; int need_commit = 0; int result = 0; loff_t save_offset = offset; struct inode *inode = file->f_dentry->d_inode; int wsize = NFS_SERVER(inode)->wsize; struct nfs_writeverf first_verf, ret_verf; struct page *pages[NFS_WRITE_MAXIOV]; struct nfs_writeargs args = { NFS_FH(inode), 0, 0, NFS_FILE_SYNC, 0, pages }; #ifdef CONFIG_NFS_V3 if ((NFS_PROTO(inode)->version == 3) && (count > wsize) && (!IS_SYNC(inode))) args.stable = NFS_UNSTABLE; #endif retry: total = 0; curpage = 0; while (count) { int len, request; struct page **dest = pages; request = count; if (count > wsize) request = wsize; args.count = request; args.offset = offset; args.pgbase = (iobuf->offset + total) & ~PAGE_MASK; len = PAGE_SIZE - args.pgbase; do { struct page *page = iobuf->maplist[curpage]; if (curpage >= iobuf->nr_pages || !page) { result = -EFAULT; goto out_err; } *dest++ = page; /* zero after the first iov */ if (request < len) break; request -= len; len = PAGE_SIZE; curpage++; } while (request != 0); result = nfs_direct_write_rpc(file, &args, &ret_verf); if (result < 0) break; if (!total) memcpy(&first_verf.verifier, &ret_verf.verifier, VERF_SIZE); if (ret_verf.committed != NFS_FILE_SYNC) { need_commit = 1; if (memcmp(&first_verf.verifier, &ret_verf.verifier, VERF_SIZE)) goto print_retry; } total += result; count -= result; offset += result; }; out_err: /* * Commit data written so far, even in the event of an error */ if (need_commit) { if (nfs_direct_commit_rpc(inode, save_offset, iobuf->length - count, &ret_verf)) goto print_retry; if (memcmp(&first_verf.verifier, &ret_verf.verifier, VERF_SIZE)) goto print_retry; } if (!total) return result; return total; print_retry: printk(KERN_INFO "%s: detected server restart; retrying with FILE_SYNC\n", __FUNCTION__); args.stable = NFS_FILE_SYNC; offset = save_offset; count = iobuf->length; goto retry; } /* * Read or write data, moving the data directly to/from the * application's buffer without caching in the page cache. * * Rules for direct I/O * * 1. block size = 512 bytes or more * 2. file byte offset is block aligned * 3. byte count is a multiple of block size * 4. user buffer is not aligned * 5. user buffer is faulted in and pinned * * These are verified before we get here. */ int nfs_direct_IO(int rw, struct file *file, struct kiobuf *iobuf, unsigned long blocknr, int blocksize) { int result = -EINVAL; size_t count = iobuf->length; struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; loff_t offset = (loff_t) blocknr << inode->i_blkbits; switch (rw) { case READ: dfprintk(VFS, "NFS: direct_IO(READ) (%s/%s) off/cnt(%Lu/%d)\n", dentry->d_parent->d_name.name, dentry->d_name.name, offset, count); result = nfs_direct_read(file, iobuf, offset, count); break; case WRITE: dfprintk(VFS, "NFS: direct_IO(WRITE) (%s/%s) off/cnt(%Lu/%d)\n", dentry->d_parent->d_name.name, dentry->d_name.name, offset, count); result = nfs_direct_write(file, iobuf, offset, count); break; default: break; } dfprintk(VFS, "NFS: direct_IO result = %d\n", result); return result; }