/*
|
/*
|
* linux/fs/read_write.c
|
* linux/fs/read_write.c
|
*
|
*
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
*/
|
*/
|
|
|
#include <linux/types.h>
|
#include <linux/types.h>
|
#include <linux/errno.h>
|
#include <linux/errno.h>
|
#include <linux/stat.h>
|
#include <linux/stat.h>
|
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
#include <linux/sched.h>
|
#include <linux/sched.h>
|
#include <linux/fcntl.h>
|
#include <linux/fcntl.h>
|
#include <linux/file.h>
|
#include <linux/file.h>
|
#include <linux/mm.h>
|
#include <linux/mm.h>
|
#include <linux/uio.h>
|
#include <linux/uio.h>
|
|
|
#include <asm/segment.h>
|
#include <asm/segment.h>
|
|
|
asmlinkage long sys_lseek(unsigned int fd, off_t offset, unsigned int origin)
|
asmlinkage long sys_lseek(unsigned int fd, off_t offset, unsigned int origin)
|
{
|
{
|
struct file * file;
|
struct file * file;
|
long tmp = -1;
|
long tmp = -1;
|
|
|
if (fd >= NR_OPEN || !(file=current->files->fd[fd]) || !(file->f_inode))
|
if (fd >= NR_OPEN || !(file=current->files->fd[fd]) || !(file->f_inode))
|
return -EBADF;
|
return -EBADF;
|
if (origin > 2)
|
if (origin > 2)
|
return -EINVAL;
|
return -EINVAL;
|
if (file->f_op && file->f_op->lseek)
|
if (file->f_op && file->f_op->lseek)
|
return file->f_op->lseek(file->f_inode,file,offset,origin);
|
return file->f_op->lseek(file->f_inode,file,offset,origin);
|
|
|
/* this is the default handler if no lseek handler is present */
|
/* this is the default handler if no lseek handler is present */
|
switch (origin) {
|
switch (origin) {
|
case 0:
|
case 0:
|
tmp = offset;
|
tmp = offset;
|
break;
|
break;
|
case 1:
|
case 1:
|
tmp = file->f_pos + offset;
|
tmp = file->f_pos + offset;
|
break;
|
break;
|
case 2:
|
case 2:
|
if (!file->f_inode)
|
if (!file->f_inode)
|
return -EINVAL;
|
return -EINVAL;
|
tmp = file->f_inode->i_size + offset;
|
tmp = file->f_inode->i_size + offset;
|
break;
|
break;
|
}
|
}
|
if (tmp < 0)
|
if (tmp < 0)
|
return -EINVAL;
|
return -EINVAL;
|
if (tmp != file->f_pos) {
|
if (tmp != file->f_pos) {
|
file->f_pos = tmp;
|
file->f_pos = tmp;
|
file->f_reada = 0;
|
file->f_reada = 0;
|
file->f_version = ++event;
|
file->f_version = ++event;
|
}
|
}
|
return file->f_pos;
|
return file->f_pos;
|
}
|
}
|
|
|
asmlinkage int sys_llseek(unsigned int fd, unsigned long offset_high,
|
asmlinkage int sys_llseek(unsigned int fd, unsigned long offset_high,
|
unsigned long offset_low, loff_t * result,
|
unsigned long offset_low, loff_t * result,
|
unsigned int origin)
|
unsigned int origin)
|
{
|
{
|
struct file * file;
|
struct file * file;
|
loff_t tmp = -1;
|
loff_t tmp = -1;
|
loff_t offset;
|
loff_t offset;
|
int err;
|
int err;
|
|
|
if (fd >= NR_OPEN || !(file=current->files->fd[fd]) || !(file->f_inode))
|
if (fd >= NR_OPEN || !(file=current->files->fd[fd]) || !(file->f_inode))
|
return -EBADF;
|
return -EBADF;
|
if (origin > 2)
|
if (origin > 2)
|
return -EINVAL;
|
return -EINVAL;
|
if ((err = verify_area(VERIFY_WRITE, result, sizeof(loff_t))))
|
if ((err = verify_area(VERIFY_WRITE, result, sizeof(loff_t))))
|
return err;
|
return err;
|
offset = (loff_t) (((unsigned long long) offset_high << 32) | offset_low);
|
offset = (loff_t) (((unsigned long long) offset_high << 32) | offset_low);
|
|
|
/* if there is a fs-specific handler, we can't just ignore it.. */
|
/* if there is a fs-specific handler, we can't just ignore it.. */
|
/* accept llseek() only for the signed long subset of long long */
|
/* accept llseek() only for the signed long subset of long long */
|
if (file->f_op && file->f_op->lseek) {
|
if (file->f_op && file->f_op->lseek) {
|
if (offset != (long) offset)
|
if (offset != (long) offset)
|
return -EINVAL;
|
return -EINVAL;
|
return file->f_op->lseek(file->f_inode,file,offset,origin);
|
return file->f_op->lseek(file->f_inode,file,offset,origin);
|
}
|
}
|
|
|
switch (origin) {
|
switch (origin) {
|
case 0:
|
case 0:
|
tmp = offset;
|
tmp = offset;
|
break;
|
break;
|
case 1:
|
case 1:
|
tmp = file->f_pos + offset;
|
tmp = file->f_pos + offset;
|
break;
|
break;
|
case 2:
|
case 2:
|
if (!file->f_inode)
|
if (!file->f_inode)
|
return -EINVAL;
|
return -EINVAL;
|
tmp = file->f_inode->i_size + offset;
|
tmp = file->f_inode->i_size + offset;
|
break;
|
break;
|
}
|
}
|
if (tmp < 0)
|
if (tmp < 0)
|
return -EINVAL;
|
return -EINVAL;
|
if (tmp != file->f_pos) {
|
if (tmp != file->f_pos) {
|
file->f_pos = tmp;
|
file->f_pos = tmp;
|
file->f_reada = 0;
|
file->f_reada = 0;
|
file->f_version = ++event;
|
file->f_version = ++event;
|
}
|
}
|
memcpy_tofs(result, &file->f_pos, sizeof(loff_t));
|
memcpy_tofs(result, &file->f_pos, sizeof(loff_t));
|
return 0;
|
return 0;
|
}
|
}
|
|
|
asmlinkage int sys_read(unsigned int fd,char * buf,int count)
|
asmlinkage int sys_read(unsigned int fd,char * buf,int count)
|
{
|
{
|
int error;
|
int error;
|
struct file * file;
|
struct file * file;
|
struct inode * inode;
|
struct inode * inode;
|
|
|
error = -EBADF;
|
error = -EBADF;
|
file = fget(fd);
|
file = fget(fd);
|
if (!file)
|
if (!file)
|
goto bad_file;
|
goto bad_file;
|
inode = file->f_inode;
|
inode = file->f_inode;
|
if (!inode)
|
if (!inode)
|
goto out;
|
goto out;
|
error = -EBADF;
|
error = -EBADF;
|
if (!(file->f_mode & 1))
|
if (!(file->f_mode & 1))
|
goto out;
|
goto out;
|
error = -EINVAL;
|
error = -EINVAL;
|
if (!file->f_op || !file->f_op->read)
|
if (!file->f_op || !file->f_op->read)
|
goto out;
|
goto out;
|
error = 0;
|
error = 0;
|
if (count <= 0)
|
if (count <= 0)
|
goto out;
|
goto out;
|
error = locks_verify_area(FLOCK_VERIFY_READ,inode,file,file->f_pos,count);
|
error = locks_verify_area(FLOCK_VERIFY_READ,inode,file,file->f_pos,count);
|
if (error)
|
if (error)
|
goto out;
|
goto out;
|
error = verify_area(VERIFY_WRITE,buf,count);
|
error = verify_area(VERIFY_WRITE,buf,count);
|
if (error)
|
if (error)
|
goto out;
|
goto out;
|
error = file->f_op->read(inode,file,buf,count);
|
error = file->f_op->read(inode,file,buf,count);
|
out:
|
out:
|
fput(file, inode);
|
fput(file, inode);
|
bad_file:
|
bad_file:
|
return error;
|
return error;
|
}
|
}
|
|
|
asmlinkage int sys_write(unsigned int fd,char * buf,unsigned int count)
|
asmlinkage int sys_write(unsigned int fd,char * buf,unsigned int count)
|
{
|
{
|
int error;
|
int error;
|
struct file * file;
|
struct file * file;
|
struct inode * inode;
|
struct inode * inode;
|
|
|
error = -EBADF;
|
error = -EBADF;
|
file = fget(fd);
|
file = fget(fd);
|
if (!file)
|
if (!file)
|
goto bad_file;
|
goto bad_file;
|
inode = file->f_inode;
|
inode = file->f_inode;
|
if (!inode)
|
if (!inode)
|
goto out;
|
goto out;
|
if (!(file->f_mode & 2))
|
if (!(file->f_mode & 2))
|
goto out;
|
goto out;
|
error = -EINVAL;
|
error = -EINVAL;
|
if (!file->f_op || !file->f_op->write)
|
if (!file->f_op || !file->f_op->write)
|
goto out;
|
goto out;
|
error = 0;
|
error = 0;
|
/*
|
/*
|
* If this was a development kernel we'd just drop the test
|
* If this was a development kernel we'd just drop the test
|
* its not so we do this for stricter compatibility both to
|
* its not so we do this for stricter compatibility both to
|
* applications and drivers.
|
* applications and drivers.
|
*/
|
*/
|
if (!count && !IS_ZERO_WR(inode))
|
if (!count && !IS_ZERO_WR(inode))
|
goto out;
|
goto out;
|
error = locks_verify_area(FLOCK_VERIFY_WRITE,inode,file,file->f_pos,count);
|
error = locks_verify_area(FLOCK_VERIFY_WRITE,inode,file,file->f_pos,count);
|
if (error)
|
if (error)
|
goto out;
|
goto out;
|
error = verify_area(VERIFY_READ,buf,count);
|
error = verify_area(VERIFY_READ,buf,count);
|
if (error)
|
if (error)
|
goto out;
|
goto out;
|
/*
|
/*
|
* If data has been written to the file, remove the setuid and
|
* If data has been written to the file, remove the setuid and
|
* the setgid bits. We do it anyway otherwise there is an
|
* the setgid bits. We do it anyway otherwise there is an
|
* extremely exploitable race - does your OS get it right |->
|
* extremely exploitable race - does your OS get it right |->
|
*
|
*
|
* Set ATTR_FORCE so it will always be changed.
|
* Set ATTR_FORCE so it will always be changed.
|
*/
|
*/
|
if (!suser() && (inode->i_mode & (S_ISUID | S_ISGID))) {
|
if (!suser() && (inode->i_mode & (S_ISUID | S_ISGID))) {
|
struct iattr newattrs;
|
struct iattr newattrs;
|
/*
|
/*
|
* Don't turn off setgid if no group execute. This special
|
* Don't turn off setgid if no group execute. This special
|
* case marks candidates for mandatory locking.
|
* case marks candidates for mandatory locking.
|
*/
|
*/
|
newattrs.ia_mode = inode->i_mode &
|
newattrs.ia_mode = inode->i_mode &
|
~(S_ISUID | ((inode->i_mode & S_IXGRP) ? S_ISGID : 0));
|
~(S_ISUID | ((inode->i_mode & S_IXGRP) ? S_ISGID : 0));
|
newattrs.ia_valid = ATTR_CTIME | ATTR_MODE | ATTR_FORCE;
|
newattrs.ia_valid = ATTR_CTIME | ATTR_MODE | ATTR_FORCE;
|
notify_change(inode, &newattrs);
|
notify_change(inode, &newattrs);
|
}
|
}
|
|
|
down(&inode->i_sem);
|
down(&inode->i_sem);
|
error = file->f_op->write(inode,file,buf,count);
|
error = file->f_op->write(inode,file,buf,count);
|
up(&inode->i_sem);
|
up(&inode->i_sem);
|
out:
|
out:
|
fput(file, inode);
|
fput(file, inode);
|
bad_file:
|
bad_file:
|
return error;
|
return error;
|
}
|
}
|
|
|
static int sock_readv_writev(int type, struct inode * inode, struct file * file,
|
static int sock_readv_writev(int type, struct inode * inode, struct file * file,
|
const struct iovec * iov, long count, long size)
|
const struct iovec * iov, long count, long size)
|
{
|
{
|
struct msghdr msg;
|
struct msghdr msg;
|
struct socket *sock;
|
struct socket *sock;
|
|
|
sock = &inode->u.socket_i;
|
sock = &inode->u.socket_i;
|
if (!sock->ops)
|
if (!sock->ops)
|
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
msg.msg_name = NULL;
|
msg.msg_name = NULL;
|
msg.msg_namelen = 0;
|
msg.msg_namelen = 0;
|
msg.msg_control = NULL;
|
msg.msg_control = NULL;
|
msg.msg_iov = (struct iovec *) iov;
|
msg.msg_iov = (struct iovec *) iov;
|
msg.msg_iovlen = count;
|
msg.msg_iovlen = count;
|
|
|
/* read() does a VERIFY_WRITE */
|
/* read() does a VERIFY_WRITE */
|
if (type == VERIFY_WRITE) {
|
if (type == VERIFY_WRITE) {
|
if (!sock->ops->recvmsg)
|
if (!sock->ops->recvmsg)
|
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
return sock->ops->recvmsg(sock, &msg, size,
|
return sock->ops->recvmsg(sock, &msg, size,
|
(file->f_flags & O_NONBLOCK), 0, NULL);
|
(file->f_flags & O_NONBLOCK), 0, NULL);
|
}
|
}
|
if (!sock->ops->sendmsg)
|
if (!sock->ops->sendmsg)
|
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
return sock->ops->sendmsg(sock, &msg, size,
|
return sock->ops->sendmsg(sock, &msg, size,
|
(file->f_flags & O_NONBLOCK), 0);
|
(file->f_flags & O_NONBLOCK), 0);
|
}
|
}
|
|
|
typedef int (*IO_fn_t)(struct inode *, struct file *, char *, int);
|
typedef int (*IO_fn_t)(struct inode *, struct file *, char *, int);
|
|
|
static int do_readv_writev(int type, struct inode * inode, struct file * file,
|
static int do_readv_writev(int type, struct inode * inode, struct file * file,
|
const struct iovec * vector, unsigned long count)
|
const struct iovec * vector, unsigned long count)
|
{
|
{
|
size_t tot_len;
|
size_t tot_len;
|
struct iovec iov[UIO_MAXIOV];
|
struct iovec iov[UIO_MAXIOV];
|
int retval, i;
|
int retval, i;
|
IO_fn_t fn;
|
IO_fn_t fn;
|
|
|
/*
|
/*
|
* First get the "struct iovec" from user memory and
|
* First get the "struct iovec" from user memory and
|
* verify all the pointers
|
* verify all the pointers
|
*/
|
*/
|
if (!count)
|
if (!count)
|
return 0;
|
return 0;
|
if (count > UIO_MAXIOV)
|
if (count > UIO_MAXIOV)
|
return -EINVAL;
|
return -EINVAL;
|
retval = verify_area(VERIFY_READ, vector, count*sizeof(*vector));
|
retval = verify_area(VERIFY_READ, vector, count*sizeof(*vector));
|
if (retval)
|
if (retval)
|
return retval;
|
return retval;
|
memcpy_fromfs(iov, vector, count*sizeof(*vector));
|
memcpy_fromfs(iov, vector, count*sizeof(*vector));
|
tot_len = 0;
|
tot_len = 0;
|
for (i = 0 ; i < count ; i++) {
|
for (i = 0 ; i < count ; i++) {
|
tot_len += iov[i].iov_len;
|
tot_len += iov[i].iov_len;
|
retval = verify_area(type, iov[i].iov_base, iov[i].iov_len);
|
retval = verify_area(type, iov[i].iov_base, iov[i].iov_len);
|
if (retval)
|
if (retval)
|
return retval;
|
return retval;
|
}
|
}
|
|
|
retval = locks_verify_area(type == VERIFY_WRITE ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
|
retval = locks_verify_area(type == VERIFY_WRITE ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
|
inode, file, file->f_pos, tot_len);
|
inode, file, file->f_pos, tot_len);
|
if (retval)
|
if (retval)
|
return retval;
|
return retval;
|
|
|
/*
|
/*
|
* Then do the actual IO. Note that sockets need to be handled
|
* Then do the actual IO. Note that sockets need to be handled
|
* specially as they have atomicity guarantees and can handle
|
* specially as they have atomicity guarantees and can handle
|
* iovec's natively
|
* iovec's natively
|
*/
|
*/
|
if (inode->i_sock)
|
if (inode->i_sock)
|
return sock_readv_writev(type, inode, file, iov, count, tot_len);
|
return sock_readv_writev(type, inode, file, iov, count, tot_len);
|
|
|
if (!file->f_op)
|
if (!file->f_op)
|
return -EINVAL;
|
return -EINVAL;
|
/* VERIFY_WRITE actually means a read, as we write to user space */
|
/* VERIFY_WRITE actually means a read, as we write to user space */
|
fn = file->f_op->read;
|
fn = file->f_op->read;
|
if (type == VERIFY_READ)
|
if (type == VERIFY_READ)
|
fn = (IO_fn_t) file->f_op->write;
|
fn = (IO_fn_t) file->f_op->write;
|
|
|
if(fn==NULL)
|
if(fn==NULL)
|
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
|
|
vector = iov;
|
vector = iov;
|
while (count > 0) {
|
while (count > 0) {
|
void * base;
|
void * base;
|
int len, nr;
|
int len, nr;
|
|
|
base = vector->iov_base;
|
base = vector->iov_base;
|
len = vector->iov_len;
|
len = vector->iov_len;
|
vector++;
|
vector++;
|
count--;
|
count--;
|
nr = fn(inode, file, base, len);
|
nr = fn(inode, file, base, len);
|
if (nr < 0) {
|
if (nr < 0) {
|
if (retval)
|
if (retval)
|
break;
|
break;
|
retval = nr;
|
retval = nr;
|
break;
|
break;
|
}
|
}
|
retval += nr;
|
retval += nr;
|
if (nr != len)
|
if (nr != len)
|
break;
|
break;
|
}
|
}
|
return retval;
|
return retval;
|
}
|
}
|
|
|
asmlinkage int sys_readv(unsigned long fd, const struct iovec * vector, long count)
|
asmlinkage int sys_readv(unsigned long fd, const struct iovec * vector, long count)
|
{
|
{
|
struct file * file;
|
struct file * file;
|
struct inode * inode;
|
struct inode * inode;
|
|
|
if (fd >= NR_OPEN || !(file = current->files->fd[fd]) || !(inode = file->f_inode))
|
if (fd >= NR_OPEN || !(file = current->files->fd[fd]) || !(inode = file->f_inode))
|
return -EBADF;
|
return -EBADF;
|
if (!(file->f_mode & 1))
|
if (!(file->f_mode & 1))
|
return -EBADF;
|
return -EBADF;
|
return do_readv_writev(VERIFY_WRITE, inode, file, vector, count);
|
return do_readv_writev(VERIFY_WRITE, inode, file, vector, count);
|
}
|
}
|
|
|
asmlinkage int sys_writev(unsigned long fd, const struct iovec * vector, long count)
|
asmlinkage int sys_writev(unsigned long fd, const struct iovec * vector, long count)
|
{
|
{
|
int error;
|
int error;
|
struct file * file;
|
struct file * file;
|
struct inode * inode;
|
struct inode * inode;
|
|
|
if (fd >= NR_OPEN || !(file = current->files->fd[fd]) || !(inode = file->f_inode))
|
if (fd >= NR_OPEN || !(file = current->files->fd[fd]) || !(inode = file->f_inode))
|
return -EBADF;
|
return -EBADF;
|
if (!(file->f_mode & 2))
|
if (!(file->f_mode & 2))
|
return -EBADF;
|
return -EBADF;
|
down(&inode->i_sem);
|
down(&inode->i_sem);
|
error = do_readv_writev(VERIFY_READ, inode, file, vector, count);
|
error = do_readv_writev(VERIFY_READ, inode, file, vector, count);
|
up(&inode->i_sem);
|
up(&inode->i_sem);
|
return error;
|
return error;
|
}
|
}
|
|
|