/*
|
/*
|
* This file contains the procedures for the handling of select
|
* This file contains the procedures for the handling of select
|
*
|
*
|
* Created for Linux based loosely upon Mathius Lattner's minix
|
* Created for Linux based loosely upon Mathius Lattner's minix
|
* patches by Peter MacDonald. Heavily edited by Linus.
|
* patches by Peter MacDonald. Heavily edited by Linus.
|
*
|
*
|
* 4 February 1994
|
* 4 February 1994
|
* COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
|
* COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
|
* flag set in its personality we do *not* modify the given timeout
|
* flag set in its personality we do *not* modify the given timeout
|
* parameter to reflect time remaining.
|
* parameter to reflect time remaining.
|
*/
|
*/
|
|
|
#include <linux/types.h>
|
#include <linux/types.h>
|
#include <linux/time.h>
|
#include <linux/time.h>
|
#include <linux/fs.h>
|
#include <linux/fs.h>
|
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
#include <linux/sched.h>
|
#include <linux/sched.h>
|
#include <linux/string.h>
|
#include <linux/string.h>
|
#include <linux/stat.h>
|
#include <linux/stat.h>
|
#include <linux/signal.h>
|
#include <linux/signal.h>
|
#include <linux/errno.h>
|
#include <linux/errno.h>
|
#include <linux/personality.h>
|
#include <linux/personality.h>
|
#include <linux/mm.h>
|
#include <linux/mm.h>
|
#include <linux/file.h>
|
#include <linux/file.h>
|
|
|
#include <asm/segment.h>
|
#include <asm/segment.h>
|
#include <asm/system.h>
|
#include <asm/system.h>
|
|
|
#define ROUND_UP(x,y) (((x)+(y)-1)/(y))
|
#define ROUND_UP(x,y) (((x)+(y)-1)/(y))
|
|
|
/*
|
/*
|
* Ok, Peter made a complicated, but straightforward multiple_wait() function.
|
* Ok, Peter made a complicated, but straightforward multiple_wait() function.
|
* I have rewritten this, taking some shortcuts: This code may not be easy to
|
* I have rewritten this, taking some shortcuts: This code may not be easy to
|
* follow, but it should be free of race-conditions, and it's practical. If you
|
* follow, but it should be free of race-conditions, and it's practical. If you
|
* understand what I'm doing here, then you understand how the linux
|
* understand what I'm doing here, then you understand how the linux
|
* sleep/wakeup mechanism works.
|
* sleep/wakeup mechanism works.
|
*
|
*
|
* Two very simple procedures, select_wait() and free_wait() make all the work.
|
* Two very simple procedures, select_wait() and free_wait() make all the work.
|
* select_wait() is a inline-function defined in <linux/sched.h>, as all select
|
* select_wait() is a inline-function defined in <linux/sched.h>, as all select
|
* functions have to call it to add an entry to the select table.
|
* functions have to call it to add an entry to the select table.
|
*/
|
*/
|
|
|
/*
|
/*
|
* I rewrote this again to make the select_table size variable, take some
|
* I rewrote this again to make the select_table size variable, take some
|
* more shortcuts, improve responsiveness, and remove another race that
|
* more shortcuts, improve responsiveness, and remove another race that
|
* Linus noticed. -- jrs
|
* Linus noticed. -- jrs
|
*/
|
*/
|
|
|
void select_free_wait(select_table * p)
|
void select_free_wait(select_table * p)
|
{
|
{
|
struct select_table_entry * entry = p->entry + p->nr;
|
struct select_table_entry * entry = p->entry + p->nr;
|
|
|
while (p->nr > 0) {
|
while (p->nr > 0) {
|
p->nr--;
|
p->nr--;
|
entry--;
|
entry--;
|
remove_wait_queue(entry->wait_address,&entry->wait);
|
remove_wait_queue(entry->wait_address,&entry->wait);
|
}
|
}
|
}
|
}
|
|
|
/*
|
/*
|
* File handle locking
|
* File handle locking
|
*/
|
*/
|
|
|
static void lock_fd_bits(int n, int x)
|
static void lock_fd_bits(int n, int x)
|
{
|
{
|
int i;
|
int i;
|
for(i=0;i<__NFDBITS;i++)
|
for(i=0;i<__NFDBITS;i++)
|
{
|
{
|
if(x&(1<<i))
|
if(x&(1<<i))
|
fget(n+i);
|
fget(n+i);
|
}
|
}
|
}
|
}
|
|
|
static void unlock_fd_bits(int n, int x)
|
static void unlock_fd_bits(int n, int x)
|
{
|
{
|
int i;
|
int i;
|
for(i=0;i<__NFDBITS;i++)
|
for(i=0;i<__NFDBITS;i++)
|
{
|
{
|
if(x&(1<<i))
|
if(x&(1<<i))
|
{
|
{
|
/* ick */
|
/* ick */
|
struct file *f=current->files->fd[n+i];
|
struct file *f=current->files->fd[n+i];
|
fput(f, f->f_inode);
|
fput(f, f->f_inode);
|
}
|
}
|
}
|
}
|
}
|
}
|
|
|
/*
|
/*
|
* The check function checks the ready status of a file using the vfs layer.
|
* The check function checks the ready status of a file using the vfs layer.
|
*
|
*
|
* If the file was not ready we were added to its wait queue. But in
|
* If the file was not ready we were added to its wait queue. But in
|
* case it became ready just after the check and just before it called
|
* case it became ready just after the check and just before it called
|
* select_wait, we call it again, knowing we are already on its
|
* select_wait, we call it again, knowing we are already on its
|
* wait queue this time. The second call is not necessary if the
|
* wait queue this time. The second call is not necessary if the
|
* select_table is NULL indicating an earlier file check was ready
|
* select_table is NULL indicating an earlier file check was ready
|
* and we aren't going to sleep on the select_table. -- jrs
|
* and we aren't going to sleep on the select_table. -- jrs
|
*/
|
*/
|
|
|
int select_check(int flag, select_table * wait, struct file * file)
|
int select_check(int flag, select_table * wait, struct file * file)
|
{
|
{
|
struct inode * inode;
|
struct inode * inode;
|
struct file_operations *fops;
|
struct file_operations *fops;
|
int (*select) (struct inode *, struct file *, int, select_table *);
|
int (*select) (struct inode *, struct file *, int, select_table *);
|
|
|
inode = file->f_inode;
|
inode = file->f_inode;
|
if ((fops = file->f_op) && (select = fops->select))
|
if ((fops = file->f_op) && (select = fops->select))
|
return select(inode, file, flag, wait)
|
return select(inode, file, flag, wait)
|
|| (wait && select(inode, file, flag, NULL));
|
|| (wait && select(inode, file, flag, NULL));
|
if (flag != SEL_EX)
|
if (flag != SEL_EX)
|
return 1;
|
return 1;
|
return 0;
|
return 0;
|
}
|
}
|
|
|
static int do_select(int n, fd_set *in, fd_set *out, fd_set *ex,
|
static int do_select(int n, fd_set *in, fd_set *out, fd_set *ex,
|
fd_set *res_in, fd_set *res_out, fd_set *res_ex, fd_set *locked)
|
fd_set *res_in, fd_set *res_out, fd_set *res_ex, fd_set *locked)
|
{
|
{
|
int count;
|
int count;
|
select_table wait_table, *wait;
|
select_table wait_table, *wait;
|
struct select_table_entry *entry;
|
struct select_table_entry *entry;
|
unsigned long set;
|
unsigned long set;
|
int i,j;
|
int i,j;
|
int max = -1;
|
int max = -1;
|
int threaded = 0;
|
int threaded = 0;
|
j = 0;
|
j = 0;
|
for (;;) {
|
for (;;) {
|
i = j * __NFDBITS;
|
i = j * __NFDBITS;
|
if (i >= n)
|
if (i >= n)
|
break;
|
break;
|
set = in->fds_bits[j] | out->fds_bits[j] | ex->fds_bits[j];
|
set = in->fds_bits[j] | out->fds_bits[j] | ex->fds_bits[j];
|
j++;
|
j++;
|
for ( ; set ; i++,set >>= 1) {
|
for ( ; set ; i++,set >>= 1) {
|
if (i >= n)
|
if (i >= n)
|
goto end_check;
|
goto end_check;
|
if (!(set & 1))
|
if (!(set & 1))
|
continue;
|
continue;
|
if (!current->files->fd[i])
|
if (!current->files->fd[i])
|
return -EBADF;
|
return -EBADF;
|
if (!current->files->fd[i]->f_inode)
|
if (!current->files->fd[i]->f_inode)
|
return -EBADF;
|
return -EBADF;
|
max = i;
|
max = i;
|
}
|
}
|
}
|
}
|
end_check:
|
end_check:
|
n = max + 1;
|
n = max + 1;
|
|
|
/* Now we _must_ lock the handles before we get the page otherwise
|
/* Now we _must_ lock the handles before we get the page otherwise
|
they may get closed on us during the kmalloc causing explosions.. */
|
they may get closed on us during the kmalloc causing explosions.. */
|
|
|
if(current->files->count>1)
|
if(current->files->count>1)
|
{
|
{
|
|
|
/*
|
/*
|
* Only for the threaded cases must we do work.
|
* Only for the threaded cases must we do work.
|
*/
|
*/
|
j = 0;
|
j = 0;
|
for (;;) {
|
for (;;) {
|
i = j * __NFDBITS;
|
i = j * __NFDBITS;
|
if (i >= n)
|
if (i >= n)
|
break;
|
break;
|
lock_fd_bits(i,in->fds_bits[j]);
|
lock_fd_bits(i,in->fds_bits[j]);
|
lock_fd_bits(i,out->fds_bits[j]);
|
lock_fd_bits(i,out->fds_bits[j]);
|
lock_fd_bits(i,ex->fds_bits[j]);
|
lock_fd_bits(i,ex->fds_bits[j]);
|
j++;
|
j++;
|
}
|
}
|
threaded=1;
|
threaded=1;
|
}
|
}
|
|
|
if(!(entry = (struct select_table_entry*) __get_free_page(GFP_KERNEL)))
|
if(!(entry = (struct select_table_entry*) __get_free_page(GFP_KERNEL)))
|
{
|
{
|
count = -ENOMEM;
|
count = -ENOMEM;
|
goto bale;
|
goto bale;
|
}
|
}
|
count = 0;
|
count = 0;
|
wait_table.nr = 0;
|
wait_table.nr = 0;
|
wait_table.entry = entry;
|
wait_table.entry = entry;
|
wait = &wait_table;
|
wait = &wait_table;
|
repeat:
|
repeat:
|
current->state = TASK_INTERRUPTIBLE;
|
current->state = TASK_INTERRUPTIBLE;
|
for (i = 0 ; i < n ; i++) {
|
for (i = 0 ; i < n ; i++) {
|
struct file * file = current->files->fd[i];
|
struct file * file = current->files->fd[i];
|
if (!file)
|
if (!file)
|
continue;
|
continue;
|
if (FD_ISSET(i,in) && select_check(SEL_IN,wait,file)) {
|
if (FD_ISSET(i,in) && select_check(SEL_IN,wait,file)) {
|
FD_SET(i, res_in);
|
FD_SET(i, res_in);
|
count++;
|
count++;
|
wait = NULL;
|
wait = NULL;
|
}
|
}
|
if (FD_ISSET(i,out) && select_check(SEL_OUT,wait,file)) {
|
if (FD_ISSET(i,out) && select_check(SEL_OUT,wait,file)) {
|
FD_SET(i, res_out);
|
FD_SET(i, res_out);
|
count++;
|
count++;
|
wait = NULL;
|
wait = NULL;
|
}
|
}
|
if (FD_ISSET(i,ex) && select_check(SEL_EX,wait,file)) {
|
if (FD_ISSET(i,ex) && select_check(SEL_EX,wait,file)) {
|
FD_SET(i, res_ex);
|
FD_SET(i, res_ex);
|
count++;
|
count++;
|
wait = NULL;
|
wait = NULL;
|
}
|
}
|
}
|
}
|
wait = NULL;
|
wait = NULL;
|
if (!count && current->timeout && !(current->signal & ~current->blocked)) {
|
if (!count && current->timeout && !(current->signal & ~current->blocked)) {
|
schedule();
|
schedule();
|
goto repeat;
|
goto repeat;
|
}
|
}
|
select_free_wait(&wait_table);
|
select_free_wait(&wait_table);
|
free_page((unsigned long) entry);
|
free_page((unsigned long) entry);
|
current->state = TASK_RUNNING;
|
current->state = TASK_RUNNING;
|
bale:
|
bale:
|
|
|
if(threaded)
|
if(threaded)
|
{
|
{
|
/* Unlock handles now */
|
/* Unlock handles now */
|
j = 0;
|
j = 0;
|
for (;;) {
|
for (;;) {
|
i = j * __NFDBITS;
|
i = j * __NFDBITS;
|
if (i >= n)
|
if (i >= n)
|
break;
|
break;
|
unlock_fd_bits(i,in->fds_bits[j]);
|
unlock_fd_bits(i,in->fds_bits[j]);
|
unlock_fd_bits(i,out->fds_bits[j]);
|
unlock_fd_bits(i,out->fds_bits[j]);
|
unlock_fd_bits(i,ex->fds_bits[j]);
|
unlock_fd_bits(i,ex->fds_bits[j]);
|
j++;
|
j++;
|
}
|
}
|
}
|
}
|
return count;
|
return count;
|
}
|
}
|
|
|
/*
|
/*
|
* We do a VERIFY_WRITE here even though we are only reading this time:
|
* We do a VERIFY_WRITE here even though we are only reading this time:
|
* we'll write to it eventually..
|
* we'll write to it eventually..
|
*
|
*
|
* Use "int" accesses to let user-mode fd_set's be int-aligned.
|
* Use "int" accesses to let user-mode fd_set's be int-aligned.
|
*/
|
*/
|
static int __get_fd_set(unsigned long nr, int * fs_pointer, int * fdset)
|
static int __get_fd_set(unsigned long nr, int * fs_pointer, int * fdset)
|
{
|
{
|
/* round up nr to nearest "int" */
|
/* round up nr to nearest "int" */
|
nr = (nr + 8*sizeof(int)-1) / (8*sizeof(int));
|
nr = (nr + 8*sizeof(int)-1) / (8*sizeof(int));
|
if (fs_pointer) {
|
if (fs_pointer) {
|
int error = verify_area(VERIFY_WRITE,fs_pointer,nr*sizeof(int));
|
int error = verify_area(VERIFY_WRITE,fs_pointer,nr*sizeof(int));
|
if (!error) {
|
if (!error) {
|
while (nr) {
|
while (nr) {
|
*fdset = get_user(fs_pointer);
|
*fdset = get_user(fs_pointer);
|
nr--;
|
nr--;
|
fs_pointer++;
|
fs_pointer++;
|
fdset++;
|
fdset++;
|
}
|
}
|
}
|
}
|
return error;
|
return error;
|
}
|
}
|
while (nr) {
|
while (nr) {
|
*fdset = 0;
|
*fdset = 0;
|
nr--;
|
nr--;
|
fdset++;
|
fdset++;
|
}
|
}
|
return 0;
|
return 0;
|
}
|
}
|
|
|
static void __set_fd_set(long nr, int * fs_pointer, int * fdset)
|
static void __set_fd_set(long nr, int * fs_pointer, int * fdset)
|
{
|
{
|
if (!fs_pointer)
|
if (!fs_pointer)
|
return;
|
return;
|
while (nr >= 0) {
|
while (nr >= 0) {
|
put_user(*fdset, fs_pointer);
|
put_user(*fdset, fs_pointer);
|
nr -= 8 * sizeof(int);
|
nr -= 8 * sizeof(int);
|
fdset++;
|
fdset++;
|
fs_pointer++;
|
fs_pointer++;
|
}
|
}
|
}
|
}
|
|
|
/* We can do long accesses here, kernel fdsets are always long-aligned */
|
/* We can do long accesses here, kernel fdsets are always long-aligned */
|
static inline void __zero_fd_set(long nr, unsigned long * fdset)
|
static inline void __zero_fd_set(long nr, unsigned long * fdset)
|
{
|
{
|
while (nr >= 0) {
|
while (nr >= 0) {
|
*fdset = 0;
|
*fdset = 0;
|
nr -= 8 * sizeof(unsigned long);
|
nr -= 8 * sizeof(unsigned long);
|
fdset++;
|
fdset++;
|
}
|
}
|
}
|
}
|
|
|
/*
|
/*
|
* Due to kernel stack usage, we use a _limited_ fd_set type here, and once
|
* Due to kernel stack usage, we use a _limited_ fd_set type here, and once
|
* we really start supporting >256 file descriptors we'll probably have to
|
* we really start supporting >256 file descriptors we'll probably have to
|
* allocate the kernel fd_set copies dynamically.. (The kernel select routines
|
* allocate the kernel fd_set copies dynamically.. (The kernel select routines
|
* are careful to touch only the defined low bits of any fd_set pointer, this
|
* are careful to touch only the defined low bits of any fd_set pointer, this
|
* is important for performance too).
|
* is important for performance too).
|
*
|
*
|
* Note a few subtleties: we use "long" for the dummy, not int, and we do a
|
* Note a few subtleties: we use "long" for the dummy, not int, and we do a
|
* subtract by 1 on the nr of file descriptors. The former is better for
|
* subtract by 1 on the nr of file descriptors. The former is better for
|
* machines with long > int, and the latter allows us to test the bit count
|
* machines with long > int, and the latter allows us to test the bit count
|
* against "zero or positive", which can mostly be just a sign bit test..
|
* against "zero or positive", which can mostly be just a sign bit test..
|
*/
|
*/
|
typedef struct {
|
typedef struct {
|
unsigned long dummy[NR_OPEN/(8*(sizeof(unsigned long)))];
|
unsigned long dummy[NR_OPEN/(8*(sizeof(unsigned long)))];
|
} limited_fd_set;
|
} limited_fd_set;
|
|
|
#define get_fd_set(nr,fsp,fdp) \
|
#define get_fd_set(nr,fsp,fdp) \
|
__get_fd_set(nr, (int *) (fsp), (int *) (fdp))
|
__get_fd_set(nr, (int *) (fsp), (int *) (fdp))
|
|
|
#define set_fd_set(nr,fsp,fdp) \
|
#define set_fd_set(nr,fsp,fdp) \
|
__set_fd_set((nr)-1, (int *) (fsp), (int *) (fdp))
|
__set_fd_set((nr)-1, (int *) (fsp), (int *) (fdp))
|
|
|
#define zero_fd_set(nr,fdp) \
|
#define zero_fd_set(nr,fdp) \
|
__zero_fd_set((nr)-1, (unsigned long *) (fdp))
|
__zero_fd_set((nr)-1, (unsigned long *) (fdp))
|
|
|
/*
|
/*
|
* We can actually return ERESTARTSYS instead of EINTR, but I'd
|
* We can actually return ERESTARTSYS instead of EINTR, but I'd
|
* like to be certain this leads to no problems. So I return
|
* like to be certain this leads to no problems. So I return
|
* EINTR just for safety.
|
* EINTR just for safety.
|
*
|
*
|
* Update: ERESTARTSYS breaks at least the xview clock binary, so
|
* Update: ERESTARTSYS breaks at least the xview clock binary, so
|
* I'm trying ERESTARTNOHAND which restart only when you want to.
|
* I'm trying ERESTARTNOHAND which restart only when you want to.
|
*/
|
*/
|
asmlinkage int sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
|
asmlinkage int sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
|
{
|
{
|
int error;
|
int error;
|
limited_fd_set res_in, in;
|
limited_fd_set res_in, in;
|
limited_fd_set res_out, out;
|
limited_fd_set res_out, out;
|
limited_fd_set res_ex, ex;
|
limited_fd_set res_ex, ex;
|
limited_fd_set locked;
|
limited_fd_set locked;
|
unsigned long timeout;
|
unsigned long timeout;
|
|
|
error = -EINVAL;
|
error = -EINVAL;
|
if (n < 0)
|
if (n < 0)
|
goto out;
|
goto out;
|
if (n > NR_OPEN)
|
if (n > NR_OPEN)
|
n = NR_OPEN;
|
n = NR_OPEN;
|
if ((error = get_fd_set(n, inp, &in)) ||
|
if ((error = get_fd_set(n, inp, &in)) ||
|
(error = get_fd_set(n, outp, &out)) ||
|
(error = get_fd_set(n, outp, &out)) ||
|
(error = get_fd_set(n, exp, &ex))) goto out;
|
(error = get_fd_set(n, exp, &ex))) goto out;
|
timeout = ~0UL;
|
timeout = ~0UL;
|
if (tvp) {
|
if (tvp) {
|
error = verify_area(VERIFY_WRITE, tvp, sizeof(*tvp));
|
error = verify_area(VERIFY_WRITE, tvp, sizeof(*tvp));
|
if (error)
|
if (error)
|
goto out;
|
goto out;
|
timeout = ROUND_UP(get_user(&tvp->tv_usec),(1000000/HZ));
|
timeout = ROUND_UP(get_user(&tvp->tv_usec),(1000000/HZ));
|
timeout += get_user(&tvp->tv_sec) * (unsigned long) HZ;
|
timeout += get_user(&tvp->tv_sec) * (unsigned long) HZ;
|
if (timeout)
|
if (timeout)
|
timeout += jiffies + 1;
|
timeout += jiffies + 1;
|
}
|
}
|
zero_fd_set(n, &res_in);
|
zero_fd_set(n, &res_in);
|
zero_fd_set(n, &res_out);
|
zero_fd_set(n, &res_out);
|
zero_fd_set(n, &res_ex);
|
zero_fd_set(n, &res_ex);
|
current->timeout = timeout;
|
current->timeout = timeout;
|
error = do_select(n,
|
error = do_select(n,
|
(fd_set *) &in,
|
(fd_set *) &in,
|
(fd_set *) &out,
|
(fd_set *) &out,
|
(fd_set *) &ex,
|
(fd_set *) &ex,
|
(fd_set *) &res_in,
|
(fd_set *) &res_in,
|
(fd_set *) &res_out,
|
(fd_set *) &res_out,
|
(fd_set *) &res_ex,
|
(fd_set *) &res_ex,
|
(fd_set *) &locked);
|
(fd_set *) &locked);
|
timeout = current->timeout?current->timeout - jiffies - 1:0;
|
timeout = current->timeout?current->timeout - jiffies - 1:0;
|
current->timeout = 0;
|
current->timeout = 0;
|
if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
|
if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
|
put_user(timeout/HZ, &tvp->tv_sec);
|
put_user(timeout/HZ, &tvp->tv_sec);
|
timeout %= HZ;
|
timeout %= HZ;
|
timeout *= (1000000/HZ);
|
timeout *= (1000000/HZ);
|
put_user(timeout, &tvp->tv_usec);
|
put_user(timeout, &tvp->tv_usec);
|
}
|
}
|
if (error < 0)
|
if (error < 0)
|
goto out;
|
goto out;
|
if (!error) {
|
if (!error) {
|
error = -ERESTARTNOHAND;
|
error = -ERESTARTNOHAND;
|
if (current->signal & ~current->blocked)
|
if (current->signal & ~current->blocked)
|
goto out;
|
goto out;
|
error = 0;
|
error = 0;
|
}
|
}
|
set_fd_set(n, inp, &res_in);
|
set_fd_set(n, inp, &res_in);
|
set_fd_set(n, outp, &res_out);
|
set_fd_set(n, outp, &res_out);
|
set_fd_set(n, exp, &res_ex);
|
set_fd_set(n, exp, &res_ex);
|
out:
|
out:
|
return error;
|
return error;
|
}
|
}
|
|
|