URL
https://opencores.org/ocsvn/or1k/or1k/trunk
Subversion Repositories or1k
Compare Revisions
- This comparison shows the changes necessary to convert path
/or1k/trunk/linux/linux-2.4/net/sunrpc
- from Rev 1275 to Rev 1765
- ↔ Reverse comparison
Rev 1275 → Rev 1765
/stats.c
0,0 → 1,206
/* |
* linux/net/sunrpc/stats.c |
* |
* procfs-based user access to generic RPC statistics. The stats files |
* reside in /proc/net/rpc. |
* |
* The read routines assume that the buffer passed in is just big enough. |
* If you implement an RPC service that has its own stats routine which |
* appends the generic RPC stats, make sure you don't exceed the PAGE_SIZE |
* limit. |
* |
* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> |
*/ |
|
#include <linux/module.h> |
|
#include <linux/init.h> |
#include <linux/kernel.h> |
#include <linux/sched.h> |
#include <linux/proc_fs.h> |
#include <linux/sunrpc/clnt.h> |
#include <linux/sunrpc/svcsock.h> |
#include <linux/init.h> |
|
#define RPCDBG_FACILITY RPCDBG_MISC |
|
static struct proc_dir_entry *proc_net_rpc = NULL; |
|
/* |
* Get RPC client stats |
*/ |
int |
rpc_proc_read(char *buffer, char **start, off_t offset, int count, |
int *eof, void *data) |
{ |
struct rpc_stat *statp = (struct rpc_stat *) data; |
struct rpc_program *prog = statp->program; |
struct rpc_version *vers; |
int len, i, j; |
|
len = sprintf(buffer, |
"net %d %d %d %d\n", |
statp->netcnt, |
statp->netudpcnt, |
statp->nettcpcnt, |
statp->nettcpconn); |
len += sprintf(buffer + len, |
"rpc %d %d %d\n", |
statp->rpccnt, |
statp->rpcretrans, |
statp->rpcauthrefresh); |
|
for (i = 0; i < prog->nrvers; i++) { |
if (!(vers = prog->version[i])) |
continue; |
len += sprintf(buffer + len, "proc%d %d", |
vers->number, vers->nrprocs); |
for (j = 0; j < vers->nrprocs; j++) |
len += sprintf(buffer + len, " %d", |
vers->procs[j].p_count); |
buffer[len++] = '\n'; |
} |
|
if (offset >= len) { |
*start = buffer; |
*eof = 1; |
return 0; |
} |
*start = buffer + offset; |
if ((len -= offset) > count) |
return count; |
*eof = 1; |
return len; |
} |
|
/* |
* Get RPC server stats |
*/ |
int |
svc_proc_read(char *buffer, char **start, off_t offset, int count, |
int *eof, void *data) |
{ |
struct svc_stat *statp = (struct svc_stat *) data; |
struct svc_program *prog = statp->program; |
struct svc_procedure *proc; |
struct svc_version *vers; |
int len, i, j; |
|
len = sprintf(buffer, |
"net %d %d %d %d\n", |
statp->netcnt, |
statp->netudpcnt, |
statp->nettcpcnt, |
statp->nettcpconn); |
len += sprintf(buffer + len, |
"rpc %d %d %d %d %d\n", |
statp->rpccnt, |
statp->rpcbadfmt+statp->rpcbadauth+statp->rpcbadclnt, |
statp->rpcbadfmt, |
statp->rpcbadauth, |
statp->rpcbadclnt); |
|
for (i = 0; i < prog->pg_nvers; i++) { |
if (!(vers = prog->pg_vers[i]) || !(proc = vers->vs_proc)) |
continue; |
len += sprintf(buffer + len, "proc%d %d", i, vers->vs_nproc); |
for (j = 0; j < vers->vs_nproc; j++, proc++) |
len += sprintf(buffer + len, " %d", proc->pc_count); |
buffer[len++] = '\n'; |
} |
|
if (offset >= len) { |
*start = buffer; |
*eof = 1; |
return 0; |
} |
*start = buffer + offset; |
if ((len -= offset) > count) |
return count; |
*eof = 1; |
return len; |
} |
|
/* |
* Register/unregister RPC proc files |
*/ |
static inline struct proc_dir_entry * |
do_register(const char *name, void *data, int issvc) |
{ |
rpc_proc_init(); |
dprintk("RPC: registering /proc/net/rpc/%s\n", name); |
return create_proc_read_entry(name, 0, proc_net_rpc, |
issvc? svc_proc_read : rpc_proc_read, |
data); |
} |
|
struct proc_dir_entry * |
rpc_proc_register(struct rpc_stat *statp) |
{ |
return do_register(statp->program->name, statp, 0); |
} |
|
void |
rpc_proc_unregister(const char *name) |
{ |
remove_proc_entry(name, proc_net_rpc); |
} |
|
struct proc_dir_entry * |
svc_proc_register(struct svc_stat *statp) |
{ |
return do_register(statp->program->pg_name, statp, 1); |
} |
|
void |
svc_proc_unregister(const char *name) |
{ |
remove_proc_entry(name, proc_net_rpc); |
} |
|
void |
rpc_proc_init(void) |
{ |
dprintk("RPC: registering /proc/net/rpc\n"); |
if (!proc_net_rpc) { |
struct proc_dir_entry *ent; |
ent = proc_mkdir("net/rpc", 0); |
if (ent) { |
ent->owner = THIS_MODULE; |
proc_net_rpc = ent; |
} |
} |
} |
|
void |
rpc_proc_exit(void) |
{ |
dprintk("RPC: unregistering /proc/net/rpc\n"); |
if (proc_net_rpc) { |
proc_net_rpc = NULL; |
remove_proc_entry("net/rpc", 0); |
} |
} |
|
|
static int __init |
init_sunrpc(void) |
{ |
#ifdef RPC_DEBUG |
rpc_register_sysctl(); |
#endif |
rpc_proc_init(); |
return 0; |
} |
|
static void __exit |
cleanup_sunrpc(void) |
{ |
#ifdef RPC_DEBUG |
rpc_unregister_sysctl(); |
#endif |
rpc_proc_exit(); |
} |
MODULE_LICENSE("GPL"); |
module_init(init_sunrpc); |
module_exit(cleanup_sunrpc); |
/clnt.c
0,0 → 1,962
/* |
* linux/net/sunrpc/rpcclnt.c |
* |
* This file contains the high-level RPC interface. |
* It is modeled as a finite state machine to support both synchronous |
* and asynchronous requests. |
* |
* - RPC header generation and argument serialization. |
* - Credential refresh. |
* - TCP reconnect handling (when finished). |
* - Retry of operation when it is suspected the operation failed because |
* of uid squashing on the server, or when the credentials were stale |
* and need to be refreshed, or when a packet was damaged in transit. |
* This may be have to be moved to the VFS layer. |
* |
* NB: BSD uses a more intelligent approach to guessing when a request |
* or reply has been lost by keeping the RTO estimate for each procedure. |
* We currently make do with a constant timeout value. |
* |
* Copyright (C) 1992,1993 Rick Sladkey <jrs@world.std.com> |
* Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de> |
*/ |
|
#include <asm/system.h> |
|
#include <linux/types.h> |
#include <linux/mm.h> |
#include <linux/slab.h> |
#include <linux/in.h> |
#include <linux/utsname.h> |
|
#include <linux/sunrpc/clnt.h> |
|
#include <linux/nfs.h> |
|
|
#define RPC_SLACK_SPACE 512 /* total overkill */ |
|
#ifdef RPC_DEBUG |
# define RPCDBG_FACILITY RPCDBG_CALL |
#endif |
|
static DECLARE_WAIT_QUEUE_HEAD(destroy_wait); |
|
|
static void call_start(struct rpc_task *task); |
static void call_reserve(struct rpc_task *task); |
static void call_reserveresult(struct rpc_task *task); |
static void call_allocate(struct rpc_task *task); |
static void call_encode(struct rpc_task *task); |
static void call_decode(struct rpc_task *task); |
static void call_bind(struct rpc_task *task); |
static void call_transmit(struct rpc_task *task); |
static void call_status(struct rpc_task *task); |
static void call_refresh(struct rpc_task *task); |
static void call_refreshresult(struct rpc_task *task); |
static void call_timeout(struct rpc_task *task); |
static void call_connect(struct rpc_task *task); |
static void call_connect_status(struct rpc_task *); |
static u32 * call_header(struct rpc_task *task); |
static u32 * call_verify(struct rpc_task *task); |
|
|
/* |
* Create an RPC client |
* FIXME: This should also take a flags argument (as in task->tk_flags). |
* It's called (among others) from pmap_create_client, which may in |
* turn be called by an async task. In this case, rpciod should not be |
* made to sleep too long. |
*/ |
struct rpc_clnt * |
rpc_create_client(struct rpc_xprt *xprt, char *servname, |
struct rpc_program *program, u32 vers, int flavor) |
{ |
struct rpc_version *version; |
struct rpc_clnt *clnt = NULL; |
|
dprintk("RPC: creating %s client for %s (xprt %p)\n", |
program->name, servname, xprt); |
|
if (!xprt) |
goto out; |
if (vers >= program->nrvers || !(version = program->version[vers])) |
goto out; |
|
clnt = (struct rpc_clnt *) rpc_allocate(0, sizeof(*clnt)); |
if (!clnt) |
goto out_no_clnt; |
memset(clnt, 0, sizeof(*clnt)); |
atomic_set(&clnt->cl_users, 0); |
|
clnt->cl_xprt = xprt; |
clnt->cl_procinfo = version->procs; |
clnt->cl_maxproc = version->nrprocs; |
clnt->cl_server = servname; |
clnt->cl_protname = program->name; |
clnt->cl_port = xprt->addr.sin_port; |
clnt->cl_prog = program->number; |
clnt->cl_vers = version->number; |
clnt->cl_prot = xprt->prot; |
clnt->cl_stats = program->stats; |
INIT_RPC_WAITQ(&clnt->cl_bindwait, "bindwait"); |
|
if (!clnt->cl_port) |
clnt->cl_autobind = 1; |
|
rpc_init_rtt(&clnt->cl_rtt, xprt->timeout.to_initval); |
|
if (!rpcauth_create(flavor, clnt)) |
goto out_no_auth; |
|
/* save the nodename */ |
clnt->cl_nodelen = strlen(system_utsname.nodename); |
if (clnt->cl_nodelen > UNX_MAXNODENAME) |
clnt->cl_nodelen = UNX_MAXNODENAME; |
memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen); |
out: |
return clnt; |
|
out_no_clnt: |
printk(KERN_INFO "RPC: out of memory in rpc_create_client\n"); |
goto out; |
out_no_auth: |
printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %d)\n", |
flavor); |
rpc_free(clnt); |
clnt = NULL; |
goto out; |
} |
|
/* |
* Properly shut down an RPC client, terminating all outstanding |
* requests. Note that we must be certain that cl_oneshot and |
* cl_dead are cleared, or else the client would be destroyed |
* when the last task releases it. |
*/ |
int |
rpc_shutdown_client(struct rpc_clnt *clnt) |
{ |
dprintk("RPC: shutting down %s client for %s\n", |
clnt->cl_protname, clnt->cl_server); |
while (atomic_read(&clnt->cl_users)) { |
#ifdef RPC_DEBUG |
dprintk("RPC: rpc_shutdown_client: client %s, tasks=%d\n", |
clnt->cl_protname, atomic_read(&clnt->cl_users)); |
#endif |
/* Don't let rpc_release_client destroy us */ |
clnt->cl_oneshot = 0; |
clnt->cl_dead = 0; |
rpc_killall_tasks(clnt); |
sleep_on_timeout(&destroy_wait, 1*HZ); |
} |
return rpc_destroy_client(clnt); |
} |
|
/* |
* Delete an RPC client |
*/ |
int |
rpc_destroy_client(struct rpc_clnt *clnt) |
{ |
dprintk("RPC: destroying %s client for %s\n", |
clnt->cl_protname, clnt->cl_server); |
|
if (clnt->cl_auth) { |
rpcauth_destroy(clnt->cl_auth); |
clnt->cl_auth = NULL; |
} |
if (clnt->cl_xprt) { |
xprt_destroy(clnt->cl_xprt); |
clnt->cl_xprt = NULL; |
} |
rpc_free(clnt); |
return 0; |
} |
|
/* |
* Release an RPC client |
*/ |
void |
rpc_release_client(struct rpc_clnt *clnt) |
{ |
dprintk("RPC: rpc_release_client(%p, %d)\n", |
clnt, atomic_read(&clnt->cl_users)); |
|
if (!atomic_dec_and_test(&clnt->cl_users)) |
return; |
wake_up(&destroy_wait); |
if (clnt->cl_oneshot || clnt->cl_dead) |
rpc_destroy_client(clnt); |
} |
|
/* |
* Default callback for async RPC calls |
*/ |
static void |
rpc_default_callback(struct rpc_task *task) |
{ |
} |
|
/* |
* Export the signal mask handling for aysnchronous code that |
* sleeps on RPC calls |
*/ |
|
void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset) |
{ |
unsigned long sigallow = sigmask(SIGKILL); |
unsigned long irqflags; |
|
/* Turn off various signals */ |
if (clnt->cl_intr) { |
struct k_sigaction *action = current->sig->action; |
if (action[SIGINT-1].sa.sa_handler == SIG_DFL) |
sigallow |= sigmask(SIGINT); |
if (action[SIGQUIT-1].sa.sa_handler == SIG_DFL) |
sigallow |= sigmask(SIGQUIT); |
} |
spin_lock_irqsave(¤t->sigmask_lock, irqflags); |
*oldset = current->blocked; |
siginitsetinv(¤t->blocked, sigallow & ~oldset->sig[0]); |
recalc_sigpending(current); |
spin_unlock_irqrestore(¤t->sigmask_lock, irqflags); |
} |
|
void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset) |
{ |
unsigned long irqflags; |
|
spin_lock_irqsave(¤t->sigmask_lock, irqflags); |
current->blocked = *oldset; |
recalc_sigpending(current); |
spin_unlock_irqrestore(¤t->sigmask_lock, irqflags); |
} |
|
/* |
* New rpc_call implementation |
*/ |
int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) |
{ |
struct rpc_task my_task, *task = &my_task; |
sigset_t oldset; |
int status; |
|
/* If this client is slain all further I/O fails */ |
if (clnt->cl_dead) |
return -EIO; |
|
if (flags & RPC_TASK_ASYNC) { |
printk("rpc_call_sync: Illegal flag combination for synchronous task\n"); |
flags &= ~RPC_TASK_ASYNC; |
} |
|
rpc_clnt_sigmask(clnt, &oldset); |
|
/* Create/initialize a new RPC task */ |
rpc_init_task(task, clnt, NULL, flags); |
rpc_call_setup(task, msg, 0); |
|
/* Set up the call info struct and execute the task */ |
if (task->tk_status == 0) |
status = rpc_execute(task); |
else { |
status = task->tk_status; |
rpc_release_task(task); |
} |
|
rpc_clnt_sigunmask(clnt, &oldset); |
|
return status; |
} |
|
/* |
* New rpc_call implementation |
*/ |
int |
rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, |
rpc_action callback, void *data) |
{ |
struct rpc_task *task; |
sigset_t oldset; |
int status; |
|
/* If this client is slain all further I/O fails */ |
if (clnt->cl_dead) |
return -EIO; |
|
flags |= RPC_TASK_ASYNC; |
|
rpc_clnt_sigmask(clnt, &oldset); |
|
/* Create/initialize a new RPC task */ |
if (!callback) |
callback = rpc_default_callback; |
status = -ENOMEM; |
if (!(task = rpc_new_task(clnt, callback, flags))) |
goto out; |
task->tk_calldata = data; |
|
rpc_call_setup(task, msg, 0); |
|
/* Set up the call info struct and execute the task */ |
if (task->tk_status == 0) |
status = rpc_execute(task); |
else { |
status = task->tk_status; |
rpc_release_task(task); |
} |
|
out: |
rpc_clnt_sigunmask(clnt, &oldset); |
|
return status; |
} |
|
|
void |
rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags) |
{ |
task->tk_msg = *msg; |
task->tk_flags |= flags; |
/* Bind the user cred */ |
if (task->tk_msg.rpc_cred != NULL) { |
rpcauth_holdcred(task); |
} else |
rpcauth_bindcred(task); |
|
if (task->tk_status == 0) |
task->tk_action = call_start; |
else |
task->tk_action = NULL; |
} |
|
void |
rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) |
{ |
struct rpc_xprt *xprt = clnt->cl_xprt; |
|
xprt->sndsize = 0; |
if (sndsize) |
xprt->sndsize = sndsize + RPC_SLACK_SPACE; |
xprt->rcvsize = 0; |
if (rcvsize) |
xprt->rcvsize = rcvsize + RPC_SLACK_SPACE; |
xprt_sock_setbufsize(xprt); |
} |
|
/* |
* Restart an (async) RPC call. Usually called from within the |
* exit handler. |
*/ |
void |
rpc_restart_call(struct rpc_task *task) |
{ |
if (RPC_ASSASSINATED(task)) |
return; |
|
task->tk_action = call_start; |
} |
|
/* |
* 0. Initial state |
* |
* Other FSM states can be visited zero or more times, but |
* this state is visited exactly once for each RPC. |
*/ |
static void |
call_start(struct rpc_task *task) |
{ |
struct rpc_clnt *clnt = task->tk_client; |
|
if (task->tk_msg.rpc_proc > clnt->cl_maxproc) { |
printk(KERN_ERR "%s (vers %d): bad procedure number %d\n", |
clnt->cl_protname, clnt->cl_vers, |
task->tk_msg.rpc_proc); |
rpc_exit(task, -EIO); |
return; |
} |
|
dprintk("RPC: %4d call_start %s%d proc %d (%s)\n", task->tk_pid, |
clnt->cl_protname, clnt->cl_vers, task->tk_msg.rpc_proc, |
(RPC_IS_ASYNC(task) ? "async" : "sync")); |
|
/* Increment call count */ |
rpcproc_count(clnt, task->tk_msg.rpc_proc)++; |
clnt->cl_stats->rpccnt++; |
task->tk_action = call_reserve; |
} |
|
/* |
* 1. Reserve an RPC call slot |
*/ |
static void |
call_reserve(struct rpc_task *task) |
{ |
dprintk("RPC: %4d call_reserve\n", task->tk_pid); |
|
if (!rpcauth_uptodatecred(task)) { |
task->tk_action = call_refresh; |
return; |
} |
|
task->tk_status = 0; |
task->tk_action = call_reserveresult; |
xprt_reserve(task); |
} |
|
/* |
* 1b. Grok the result of xprt_reserve() |
*/ |
static void |
call_reserveresult(struct rpc_task *task) |
{ |
int status = task->tk_status; |
|
dprintk("RPC: %4d call_reserveresult (status %d)\n", |
task->tk_pid, task->tk_status); |
|
/* |
* After a call to xprt_reserve(), we must have either |
* a request slot or else an error status. |
*/ |
task->tk_status = 0; |
if (status >= 0) { |
if (task->tk_rqstp) { |
task->tk_action = call_allocate; |
return; |
} |
|
printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n", |
__FUNCTION__, status); |
rpc_exit(task, -EIO); |
return; |
} |
|
/* |
* Even though there was an error, we may have acquired |
* a request slot somehow. Make sure not to leak it. |
*/ |
if (task->tk_rqstp) { |
printk(KERN_ERR "%s: status=%d, request allocated anyway\n", |
__FUNCTION__, status); |
xprt_release(task); |
} |
|
switch (status) { |
case -EAGAIN: /* woken up; retry */ |
task->tk_action = call_reserve; |
return; |
case -EIO: /* probably a shutdown */ |
break; |
default: |
printk(KERN_ERR "%s: unrecognized error %d, exiting\n", |
__FUNCTION__, status); |
break; |
} |
rpc_exit(task, status); |
} |
|
/* |
* 2. Allocate the buffer. For details, see sched.c:rpc_malloc. |
* (Note: buffer memory is freed in rpc_task_release). |
*/ |
static void |
call_allocate(struct rpc_task *task) |
{ |
struct rpc_clnt *clnt = task->tk_client; |
unsigned int bufsiz; |
|
dprintk("RPC: %4d call_allocate (status %d)\n", |
task->tk_pid, task->tk_status); |
task->tk_action = call_encode; |
if (task->tk_buffer) |
return; |
|
/* FIXME: compute buffer requirements more exactly using |
* auth->au_wslack */ |
bufsiz = rpcproc_bufsiz(clnt, task->tk_msg.rpc_proc) + RPC_SLACK_SPACE; |
|
if ((task->tk_buffer = rpc_malloc(task, bufsiz << 1)) != NULL) |
return; |
printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); |
|
if (RPC_IS_ASYNC(task) || !(task->tk_client->cl_intr && signalled())) { |
xprt_release(task); |
task->tk_action = call_reserve; |
rpc_delay(task, HZ>>4); |
return; |
} |
|
rpc_exit(task, -ERESTARTSYS); |
} |
|
/* |
* 3. Encode arguments of an RPC call |
*/ |
static void |
call_encode(struct rpc_task *task) |
{ |
struct rpc_clnt *clnt = task->tk_client; |
struct rpc_rqst *req = task->tk_rqstp; |
struct xdr_buf *sndbuf = &req->rq_snd_buf; |
struct xdr_buf *rcvbuf = &req->rq_rcv_buf; |
unsigned int bufsiz; |
kxdrproc_t encode; |
int status; |
u32 *p; |
|
dprintk("RPC: %4d call_encode (status %d)\n", |
task->tk_pid, task->tk_status); |
|
task->tk_action = call_bind; |
|
/* Default buffer setup */ |
bufsiz = rpcproc_bufsiz(clnt, task->tk_msg.rpc_proc)+RPC_SLACK_SPACE; |
sndbuf->head[0].iov_base = (void *)task->tk_buffer; |
sndbuf->head[0].iov_len = bufsiz; |
sndbuf->tail[0].iov_len = 0; |
sndbuf->page_len = 0; |
sndbuf->len = 0; |
rcvbuf->head[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz); |
rcvbuf->head[0].iov_len = bufsiz; |
rcvbuf->tail[0].iov_len = 0; |
rcvbuf->page_len = 0; |
rcvbuf->len = bufsiz; |
|
/* Zero buffer so we have automatic zero-padding of opaque & string */ |
memset(task->tk_buffer, 0, bufsiz); |
|
/* Encode header and provided arguments */ |
encode = rpcproc_encode(clnt, task->tk_msg.rpc_proc); |
if (!(p = call_header(task))) { |
printk(KERN_INFO "RPC: call_header failed, exit EIO\n"); |
rpc_exit(task, -EIO); |
} else |
if (encode && (status = encode(req, p, task->tk_msg.rpc_argp)) < 0) { |
printk(KERN_WARNING "%s: can't encode arguments: %d\n", |
clnt->cl_protname, -status); |
rpc_exit(task, status); |
} |
} |
|
/* |
* 4. Get the server port number if not yet set |
*/ |
static void |
call_bind(struct rpc_task *task) |
{ |
struct rpc_clnt *clnt = task->tk_client; |
struct rpc_xprt *xprt = clnt->cl_xprt; |
|
dprintk("RPC: %4d call_bind xprt %p %s connected\n", task->tk_pid, |
xprt, (xprt_connected(xprt) ? "is" : "is not")); |
|
task->tk_action = (xprt_connected(xprt)) ? call_transmit : call_connect; |
|
if (!clnt->cl_port) { |
task->tk_action = call_connect; |
task->tk_timeout = clnt->cl_timeout.to_maxval; |
rpc_getport(task, clnt); |
} |
} |
|
/* |
* 4a. Establish socket |
* Connect to the RPC server (TCP case) |
*/ |
static void |
call_connect(struct rpc_task *task) |
{ |
struct rpc_clnt *clnt = task->tk_client; |
|
dprintk("RPC: %4d call_connect status %d\n", |
task->tk_pid, task->tk_status); |
|
if (xprt_connected(clnt->cl_xprt)) { |
task->tk_action = call_transmit; |
return; |
} |
task->tk_action = call_connect_status; |
if (task->tk_status < 0) |
return; |
xprt_connect(task); |
} |
|
/* |
* 4b. Sort out reconnection result |
*/ |
static void call_connect_status(struct rpc_task *task) |
{ |
struct rpc_clnt *clnt = task->tk_client; |
int status = task->tk_status; |
|
task->tk_status = 0; |
if (status >= 0) { |
clnt->cl_stats->netreconn++; |
task->tk_action = call_transmit; |
return; |
} |
|
/* Something failed: we may have to rebind */ |
if (clnt->cl_autobind) |
clnt->cl_port = 0; |
switch (status) { |
case -ECONNREFUSED: |
case -ECONNRESET: |
case -ENOTCONN: |
case -ETIMEDOUT: |
case -EAGAIN: |
task->tk_action = (clnt->cl_port == 0) ? call_bind : call_connect; |
break; |
default: |
rpc_exit(task, status); |
} |
} |
|
/* |
* 5. Transmit the RPC request, and wait for reply |
*/ |
static void |
call_transmit(struct rpc_task *task) |
{ |
struct rpc_clnt *clnt = task->tk_client; |
|
dprintk("RPC: %4d call_transmit (status %d)\n", |
task->tk_pid, task->tk_status); |
|
task->tk_action = call_status; |
if (task->tk_status < 0) |
return; |
xprt_transmit(task); |
if (!rpcproc_decode(clnt, task->tk_msg.rpc_proc) && task->tk_status >= 0) { |
task->tk_action = NULL; |
rpc_wake_up_task(task); |
} |
} |
|
/* |
* 6. Sort out the RPC call status |
*/ |
static void |
call_status(struct rpc_task *task) |
{ |
struct rpc_clnt *clnt = task->tk_client; |
struct rpc_xprt *xprt = clnt->cl_xprt; |
struct rpc_rqst *req = task->tk_rqstp; |
int status; |
|
smp_rmb(); |
if (req->rq_received > 0 && !req->rq_bytes_sent) |
task->tk_status = req->rq_received; |
|
dprintk("RPC: %4d call_status (status %d)\n", |
task->tk_pid, task->tk_status); |
|
status = task->tk_status; |
if (status >= 0) { |
task->tk_action = call_decode; |
return; |
} |
|
task->tk_status = 0; |
switch(status) { |
case -ETIMEDOUT: |
task->tk_action = call_timeout; |
break; |
case -ECONNREFUSED: |
case -ENOTCONN: |
req->rq_bytes_sent = 0; |
if (clnt->cl_autobind || !clnt->cl_port) { |
clnt->cl_port = 0; |
task->tk_action = call_bind; |
break; |
} |
task->tk_action = call_connect; |
break; |
/* |
* Sleep and dream of an open connection |
*/ |
task->tk_timeout = 5 * HZ; |
rpc_sleep_on(&xprt->sending, task, NULL, NULL); |
case -ENOMEM: |
case -EAGAIN: |
task->tk_action = call_transmit; |
break; |
default: |
if (clnt->cl_chatty) |
printk("%s: RPC call returned error %d\n", |
clnt->cl_protname, -status); |
rpc_exit(task, status); |
} |
} |
|
/* |
* 6a. Handle RPC timeout |
* We do not release the request slot, so we keep using the |
* same XID for all retransmits. |
*/ |
static void |
call_timeout(struct rpc_task *task) |
{ |
struct rpc_clnt *clnt = task->tk_client; |
struct rpc_timeout *to = &task->tk_rqstp->rq_timeout; |
|
if (xprt_adjust_timeout(to)) { |
dprintk("RPC: %4d call_timeout (minor)\n", task->tk_pid); |
goto retry; |
} |
to->to_retries = clnt->cl_timeout.to_retries; |
|
dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid); |
if (clnt->cl_softrtry) { |
if (clnt->cl_chatty) |
printk(KERN_NOTICE "%s: server %s not responding, timed out\n", |
clnt->cl_protname, clnt->cl_server); |
rpc_exit(task, -EIO); |
return; |
} |
|
if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) { |
task->tk_flags |= RPC_CALL_MAJORSEEN; |
printk(KERN_NOTICE "%s: server %s not responding, still trying\n", |
clnt->cl_protname, clnt->cl_server); |
} |
if (clnt->cl_autobind) |
clnt->cl_port = 0; |
|
retry: |
clnt->cl_stats->rpcretrans++; |
task->tk_action = call_bind; |
task->tk_status = 0; |
} |
|
/* |
* 7. Decode the RPC reply |
*/ |
static void |
call_decode(struct rpc_task *task) |
{ |
struct rpc_clnt *clnt = task->tk_client; |
struct rpc_rqst *req = task->tk_rqstp; |
kxdrproc_t decode = rpcproc_decode(clnt, task->tk_msg.rpc_proc); |
u32 *p; |
|
dprintk("RPC: %4d call_decode (status %d)\n", |
task->tk_pid, task->tk_status); |
|
if (clnt->cl_chatty && (task->tk_flags & RPC_CALL_MAJORSEEN)) { |
printk(KERN_NOTICE "%s: server %s OK\n", |
clnt->cl_protname, clnt->cl_server); |
task->tk_flags &= ~RPC_CALL_MAJORSEEN; |
} |
|
if (task->tk_status < 12) { |
if (!clnt->cl_softrtry) { |
task->tk_action = call_transmit; |
clnt->cl_stats->rpcretrans++; |
goto out_retry; |
} |
printk(KERN_WARNING "%s: too small RPC reply size (%d bytes)\n", |
clnt->cl_protname, task->tk_status); |
rpc_exit(task, -EIO); |
return; |
} |
|
/* Check that the softirq receive buffer is valid */ |
if (unlikely(memcmp(&req->rq_rcv_buf, &req->rq_private_buf, |
sizeof(req->rq_rcv_buf)) != 0)) |
printk(KERN_WARNING "%s: receive buffer is inconsistent. Please contact maintainer.\n", |
__FUNCTION__); |
|
/* Verify the RPC header */ |
if (!(p = call_verify(task))) { |
/* |
* When call_verfiy sets tk_action to NULL (via task_exit) |
* a non-retry-able error has occurred (like the server |
* not supporting a particular procedure call). |
*/ |
if (task->tk_action == NULL) |
return; |
goto out_retry; |
} |
/* |
* The following is an NFS-specific hack to cater for setuid |
* processes whose uid is mapped to nobody on the server. |
*/ |
if (task->tk_client->cl_droppriv && |
(ntohl(*p) == NFSERR_ACCES || ntohl(*p) == NFSERR_PERM)) { |
if (RPC_IS_SETUID(task) && task->tk_suid_retry) { |
dprintk("RPC: %4d retry squashed uid\n", task->tk_pid); |
task->tk_flags ^= RPC_CALL_REALUID; |
task->tk_action = call_encode; |
task->tk_suid_retry--; |
goto out_retry; |
} |
} |
|
task->tk_action = NULL; |
|
if (decode) |
task->tk_status = decode(req, p, task->tk_msg.rpc_resp); |
dprintk("RPC: %4d call_decode result %d\n", task->tk_pid, |
task->tk_status); |
return; |
out_retry: |
req->rq_received = 0; |
task->tk_status = 0; |
} |
|
/* |
* 8. Refresh the credentials if rejected by the server |
*/ |
static void |
call_refresh(struct rpc_task *task) |
{ |
dprintk("RPC: %4d call_refresh\n", task->tk_pid); |
|
xprt_release(task); /* Must do to obtain new XID */ |
task->tk_action = call_refreshresult; |
task->tk_status = 0; |
task->tk_client->cl_stats->rpcauthrefresh++; |
rpcauth_refreshcred(task); |
} |
|
/* |
* 8a. Process the results of a credential refresh |
*/ |
static void |
call_refreshresult(struct rpc_task *task) |
{ |
dprintk("RPC: %4d call_refreshresult (status %d)\n", |
task->tk_pid, task->tk_status); |
|
if (task->tk_status < 0) |
rpc_exit(task, -EACCES); |
else |
task->tk_action = call_reserve; |
} |
|
/* |
* Call header serialization |
*/ |
static u32 * |
call_header(struct rpc_task *task) |
{ |
struct rpc_clnt *clnt = task->tk_client; |
struct rpc_xprt *xprt = clnt->cl_xprt; |
struct rpc_rqst *req = task->tk_rqstp; |
u32 *p = req->rq_svec[0].iov_base; |
|
/* FIXME: check buffer size? */ |
if (xprt->stream) |
*p++ = 0; /* fill in later */ |
*p++ = req->rq_xid; /* XID */ |
*p++ = htonl(RPC_CALL); /* CALL */ |
*p++ = htonl(RPC_VERSION); /* RPC version */ |
*p++ = htonl(clnt->cl_prog); /* program number */ |
*p++ = htonl(clnt->cl_vers); /* program version */ |
*p++ = htonl(task->tk_msg.rpc_proc); /* procedure */ |
return rpcauth_marshcred(task, p); |
} |
|
/* |
* Reply header verification |
*/ |
static u32 * |
call_verify(struct rpc_task *task) |
{ |
u32 *p = task->tk_rqstp->rq_rvec[0].iov_base, n; |
|
p += 1; /* skip XID */ |
|
if ((n = ntohl(*p++)) != RPC_REPLY) { |
printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n); |
goto garbage; |
} |
if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) { |
int error = -EACCES; |
|
if ((n = ntohl(*p++)) != RPC_AUTH_ERROR) { |
printk(KERN_WARNING "call_verify: RPC call rejected: %x\n", n); |
} else |
switch ((n = ntohl(*p++))) { |
case RPC_AUTH_REJECTEDCRED: |
case RPC_AUTH_REJECTEDVERF: |
if (!task->tk_cred_retry) |
break; |
task->tk_cred_retry--; |
dprintk("RPC: %4d call_verify: retry stale creds\n", |
task->tk_pid); |
rpcauth_invalcred(task); |
task->tk_action = call_refresh; |
return NULL; |
case RPC_AUTH_BADCRED: |
case RPC_AUTH_BADVERF: |
/* possibly garbled cred/verf? */ |
if (!task->tk_garb_retry) |
break; |
task->tk_garb_retry--; |
dprintk("RPC: %4d call_verify: retry garbled creds\n", |
task->tk_pid); |
task->tk_action = call_encode; |
return NULL; |
case RPC_AUTH_TOOWEAK: |
printk(KERN_NOTICE "call_verify: server requires stronger " |
"authentication.\n"); |
break; |
default: |
printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n); |
error = -EIO; |
} |
dprintk("RPC: %4d call_verify: call rejected %d\n", |
task->tk_pid, n); |
rpc_exit(task, error); |
return NULL; |
} |
if (!(p = rpcauth_checkverf(task, p))) { |
printk(KERN_WARNING "call_verify: auth check failed\n"); |
goto garbage; /* bad verifier, retry */ |
} |
switch ((n = ntohl(*p++))) { |
case RPC_SUCCESS: |
return p; |
case RPC_PROG_UNAVAIL: |
printk(KERN_WARNING "RPC: call_verify: program %u is unsupported by server %s\n", |
(unsigned int)task->tk_client->cl_prog, |
task->tk_client->cl_server); |
goto out_eio; |
case RPC_PROG_MISMATCH: |
printk(KERN_WARNING "RPC: call_verify: program %u, version %u unsupported by server %s\n", |
(unsigned int)task->tk_client->cl_prog, |
(unsigned int)task->tk_client->cl_vers, |
task->tk_client->cl_server); |
goto out_eio; |
case RPC_PROC_UNAVAIL: |
printk(KERN_WARNING "RPC: call_verify: proc %u unsupported by program %u, version %u on server %s\n", |
(unsigned int)task->tk_msg.rpc_proc, |
(unsigned int)task->tk_client->cl_prog, |
(unsigned int)task->tk_client->cl_vers, |
task->tk_client->cl_server); |
goto out_eio; |
case RPC_GARBAGE_ARGS: |
break; /* retry */ |
default: |
printk(KERN_WARNING "call_verify: server accept status: %x\n", n); |
/* Also retry */ |
} |
|
garbage: |
dprintk("RPC: %4d call_verify: server saw garbage\n", task->tk_pid); |
task->tk_client->cl_stats->rpcgarbage++; |
if (task->tk_garb_retry) { |
task->tk_garb_retry--; |
dprintk(KERN_WARNING "RPC: garbage, retrying %4d\n", task->tk_pid); |
task->tk_action = call_encode; |
return NULL; |
} |
printk(KERN_WARNING "RPC: garbage, exit EIO\n"); |
out_eio: |
rpc_exit(task, -EIO); |
return NULL; |
} |
/auth.c
0,0 → 1,376
/* |
* linux/fs/nfs/rpcauth.c |
* |
* Generic RPC authentication API. |
* |
* Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> |
*/ |
|
#include <linux/types.h> |
#include <linux/sched.h> |
#include <linux/slab.h> |
#include <linux/errno.h> |
#include <linux/socket.h> |
#include <linux/sunrpc/clnt.h> |
#include <linux/spinlock.h> |
|
#ifdef RPC_DEBUG |
# define RPCDBG_FACILITY RPCDBG_AUTH |
#endif |
|
#define RPC_MAXFLAVOR 8 |
|
static struct rpc_authops * auth_flavors[RPC_MAXFLAVOR] = { |
&authnull_ops, /* AUTH_NULL */ |
&authunix_ops, /* AUTH_UNIX */ |
NULL, /* others can be loadable modules */ |
}; |
|
int |
rpcauth_register(struct rpc_authops *ops) |
{ |
unsigned int flavor; |
|
if ((flavor = ops->au_flavor) >= RPC_MAXFLAVOR) |
return -EINVAL; |
if (auth_flavors[flavor] != NULL) |
return -EPERM; /* what else? */ |
auth_flavors[flavor] = ops; |
return 0; |
} |
|
int |
rpcauth_unregister(struct rpc_authops *ops) |
{ |
unsigned int flavor; |
|
if ((flavor = ops->au_flavor) >= RPC_MAXFLAVOR) |
return -EINVAL; |
if (auth_flavors[flavor] != ops) |
return -EPERM; /* what else? */ |
auth_flavors[flavor] = NULL; |
return 0; |
} |
|
struct rpc_auth * |
rpcauth_create(unsigned int flavor, struct rpc_clnt *clnt) |
{ |
struct rpc_authops *ops; |
|
if (flavor >= RPC_MAXFLAVOR || !(ops = auth_flavors[flavor])) |
return NULL; |
clnt->cl_auth = ops->create(clnt); |
return clnt->cl_auth; |
} |
|
void |
rpcauth_destroy(struct rpc_auth *auth) |
{ |
auth->au_ops->destroy(auth); |
} |
|
static spinlock_t rpc_credcache_lock = SPIN_LOCK_UNLOCKED; |
|
/* |
* Initialize RPC credential cache |
*/ |
void |
rpcauth_init_credcache(struct rpc_auth *auth) |
{ |
memset(auth->au_credcache, 0, sizeof(auth->au_credcache)); |
auth->au_nextgc = jiffies + (auth->au_expire >> 1); |
} |
|
/* |
* Destroy an unreferenced credential |
*/ |
static inline void |
rpcauth_crdestroy(struct rpc_cred *cred) |
{ |
#ifdef RPC_DEBUG |
if (cred->cr_magic != RPCAUTH_CRED_MAGIC) |
BUG(); |
cred->cr_magic = 0; |
if (atomic_read(&cred->cr_count) || cred->cr_auth) |
BUG(); |
#endif |
cred->cr_ops->crdestroy(cred); |
} |
|
/* |
* Destroy a list of credentials |
*/ |
static inline |
void rpcauth_destroy_credlist(struct rpc_cred *head) |
{ |
struct rpc_cred *cred; |
|
while ((cred = head) != NULL) { |
head = cred->cr_next; |
rpcauth_crdestroy(cred); |
} |
} |
|
/* |
* Clear the RPC credential cache, and delete those credentials |
* that are not referenced. |
*/ |
void |
rpcauth_free_credcache(struct rpc_auth *auth) |
{ |
struct rpc_cred **q, *cred, *free = NULL; |
int i; |
|
spin_lock(&rpc_credcache_lock); |
for (i = 0; i < RPC_CREDCACHE_NR; i++) { |
q = &auth->au_credcache[i]; |
while ((cred = *q) != NULL) { |
*q = cred->cr_next; |
cred->cr_auth = NULL; |
if (atomic_read(&cred->cr_count) == 0) { |
cred->cr_next = free; |
free = cred; |
} else |
cred->cr_next = NULL; |
} |
} |
spin_unlock(&rpc_credcache_lock); |
rpcauth_destroy_credlist(free); |
} |
|
/* |
* Remove stale credentials. Avoid sleeping inside the loop. |
*/ |
static void |
rpcauth_gc_credcache(struct rpc_auth *auth) |
{ |
struct rpc_cred **q, *cred, *free = NULL; |
int i; |
|
dprintk("RPC: gc'ing RPC credentials for auth %p\n", auth); |
spin_lock(&rpc_credcache_lock); |
for (i = 0; i < RPC_CREDCACHE_NR; i++) { |
q = &auth->au_credcache[i]; |
while ((cred = *q) != NULL) { |
if (!atomic_read(&cred->cr_count) && |
time_before(cred->cr_expire, jiffies)) { |
*q = cred->cr_next; |
cred->cr_auth = NULL; |
cred->cr_next = free; |
free = cred; |
continue; |
} |
q = &cred->cr_next; |
} |
} |
spin_unlock(&rpc_credcache_lock); |
rpcauth_destroy_credlist(free); |
auth->au_nextgc = jiffies + auth->au_expire; |
} |
|
/* |
* Insert credential into cache |
*/ |
void |
rpcauth_insert_credcache(struct rpc_auth *auth, struct rpc_cred *cred) |
{ |
int nr; |
|
nr = (cred->cr_uid & RPC_CREDCACHE_MASK); |
spin_lock(&rpc_credcache_lock); |
cred->cr_next = auth->au_credcache[nr]; |
auth->au_credcache[nr] = cred; |
cred->cr_auth = auth; |
get_rpccred(cred); |
spin_unlock(&rpc_credcache_lock); |
} |
|
/* |
* Look up a process' credentials in the authentication cache |
*/ |
static struct rpc_cred * |
rpcauth_lookup_credcache(struct rpc_auth *auth, int taskflags) |
{ |
struct rpc_cred **q, *cred = NULL; |
int nr = 0; |
|
if (!(taskflags & RPC_TASK_ROOTCREDS)) |
nr = current->uid & RPC_CREDCACHE_MASK; |
|
if (time_before(auth->au_nextgc, jiffies)) |
rpcauth_gc_credcache(auth); |
|
spin_lock(&rpc_credcache_lock); |
q = &auth->au_credcache[nr]; |
while ((cred = *q) != NULL) { |
if (!(cred->cr_flags & RPCAUTH_CRED_DEAD) && |
cred->cr_ops->crmatch(cred, taskflags)) { |
*q = cred->cr_next; |
break; |
} |
q = &cred->cr_next; |
} |
spin_unlock(&rpc_credcache_lock); |
|
if (!cred) { |
cred = auth->au_ops->crcreate(taskflags); |
#ifdef RPC_DEBUG |
if (cred) |
cred->cr_magic = RPCAUTH_CRED_MAGIC; |
#endif |
} |
|
if (cred) |
rpcauth_insert_credcache(auth, cred); |
|
return (struct rpc_cred *) cred; |
} |
|
/* |
* Remove cred handle from cache |
*/ |
static void |
rpcauth_remove_credcache(struct rpc_cred *cred) |
{ |
struct rpc_auth *auth = cred->cr_auth; |
struct rpc_cred **q, *cr; |
int nr; |
|
nr = (cred->cr_uid & RPC_CREDCACHE_MASK); |
q = &auth->au_credcache[nr]; |
while ((cr = *q) != NULL) { |
if (cred == cr) { |
*q = cred->cr_next; |
cred->cr_next = NULL; |
cred->cr_auth = NULL; |
break; |
} |
q = &cred->cr_next; |
} |
} |
|
struct rpc_cred * |
rpcauth_lookupcred(struct rpc_auth *auth, int taskflags) |
{ |
dprintk("RPC: looking up %s cred\n", |
auth->au_ops->au_name); |
return rpcauth_lookup_credcache(auth, taskflags); |
} |
|
struct rpc_cred * |
rpcauth_bindcred(struct rpc_task *task) |
{ |
struct rpc_auth *auth = task->tk_auth; |
|
dprintk("RPC: %4d looking up %s cred\n", |
task->tk_pid, task->tk_auth->au_ops->au_name); |
task->tk_msg.rpc_cred = rpcauth_lookup_credcache(auth, task->tk_flags); |
if (task->tk_msg.rpc_cred == 0) |
task->tk_status = -ENOMEM; |
return task->tk_msg.rpc_cred; |
} |
|
int |
rpcauth_matchcred(struct rpc_auth *auth, struct rpc_cred *cred, int taskflags) |
{ |
dprintk("RPC: matching %s cred %d\n", |
auth->au_ops->au_name, taskflags); |
return cred->cr_ops->crmatch(cred, taskflags); |
} |
|
void |
rpcauth_holdcred(struct rpc_task *task) |
{ |
dprintk("RPC: %4d holding %s cred %p\n", |
task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred); |
if (task->tk_msg.rpc_cred) |
get_rpccred(task->tk_msg.rpc_cred); |
} |
|
void |
put_rpccred(struct rpc_cred *cred) |
{ |
if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock)) |
return; |
|
if (cred->cr_auth && cred->cr_flags & RPCAUTH_CRED_DEAD) |
rpcauth_remove_credcache(cred); |
|
if (!cred->cr_auth) { |
spin_unlock(&rpc_credcache_lock); |
rpcauth_crdestroy(cred); |
return; |
} |
cred->cr_expire = jiffies + cred->cr_auth->au_expire; |
spin_unlock(&rpc_credcache_lock); |
} |
|
void |
rpcauth_unbindcred(struct rpc_task *task) |
{ |
struct rpc_auth *auth = task->tk_auth; |
struct rpc_cred *cred = task->tk_msg.rpc_cred; |
|
dprintk("RPC: %4d releasing %s cred %p\n", |
task->tk_pid, auth->au_ops->au_name, cred); |
|
put_rpccred(cred); |
task->tk_msg.rpc_cred = NULL; |
} |
|
u32 * |
rpcauth_marshcred(struct rpc_task *task, u32 *p) |
{ |
struct rpc_auth *auth = task->tk_auth; |
struct rpc_cred *cred = task->tk_msg.rpc_cred; |
|
dprintk("RPC: %4d marshaling %s cred %p\n", |
task->tk_pid, auth->au_ops->au_name, cred); |
return cred->cr_ops->crmarshal(task, p, |
task->tk_flags & RPC_CALL_REALUID); |
} |
|
u32 * |
rpcauth_checkverf(struct rpc_task *task, u32 *p) |
{ |
struct rpc_auth *auth = task->tk_auth; |
struct rpc_cred *cred = task->tk_msg.rpc_cred; |
|
dprintk("RPC: %4d validating %s cred %p\n", |
task->tk_pid, auth->au_ops->au_name, cred); |
return cred->cr_ops->crvalidate(task, p); |
} |
|
int |
rpcauth_refreshcred(struct rpc_task *task) |
{ |
struct rpc_auth *auth = task->tk_auth; |
struct rpc_cred *cred = task->tk_msg.rpc_cred; |
|
dprintk("RPC: %4d refreshing %s cred %p\n", |
task->tk_pid, auth->au_ops->au_name, cred); |
task->tk_status = cred->cr_ops->crrefresh(task); |
return task->tk_status; |
} |
|
void |
rpcauth_invalcred(struct rpc_task *task) |
{ |
dprintk("RPC: %4d invalidating %s cred %p\n", |
task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred); |
spin_lock(&rpc_credcache_lock); |
if (task->tk_msg.rpc_cred) |
task->tk_msg.rpc_cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; |
spin_unlock(&rpc_credcache_lock); |
} |
|
int |
rpcauth_uptodatecred(struct rpc_task *task) |
{ |
int retval; |
spin_lock(&rpc_credcache_lock); |
retval = !(task->tk_msg.rpc_cred) || |
(task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE); |
spin_unlock(&rpc_credcache_lock); |
return retval; |
} |
/auth_unix.c
0,0 → 1,252
/* |
* linux/net/sunrpc/rpcauth_unix.c |
* |
* UNIX-style authentication; no AUTH_SHORT support |
* |
* Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> |
*/ |
|
#include <linux/types.h> |
#include <linux/slab.h> |
#include <linux/socket.h> |
#include <linux/in.h> |
#include <linux/sunrpc/clnt.h> |
#include <linux/sunrpc/auth.h> |
|
#define NFS_NGROUPS 16 |
struct unx_cred { |
struct rpc_cred uc_base; |
uid_t uc_fsuid; |
gid_t uc_gid, uc_fsgid; |
gid_t uc_gids[NFS_NGROUPS]; |
}; |
#define uc_uid uc_base.cr_uid |
#define uc_count uc_base.cr_count |
#define uc_flags uc_base.cr_flags |
#define uc_expire uc_base.cr_expire |
|
#define UNX_CRED_EXPIRE (60 * HZ) |
|
#define UNX_WRITESLACK (21 + (UNX_MAXNODENAME >> 2)) |
|
#ifdef RPC_DEBUG |
# define RPCDBG_FACILITY RPCDBG_AUTH |
#endif |
|
static struct rpc_credops unix_credops; |
|
static struct rpc_auth * |
unx_create(struct rpc_clnt *clnt) |
{ |
struct rpc_auth *auth; |
|
dprintk("RPC: creating UNIX authenticator for client %p\n", clnt); |
if (!(auth = (struct rpc_auth *) rpc_allocate(0, sizeof(*auth)))) |
return NULL; |
auth->au_cslack = UNX_WRITESLACK; |
auth->au_rslack = 2; /* assume AUTH_NULL verf */ |
auth->au_expire = UNX_CRED_EXPIRE; |
auth->au_ops = &authunix_ops; |
|
rpcauth_init_credcache(auth); |
|
return auth; |
} |
|
static void |
unx_destroy(struct rpc_auth *auth) |
{ |
dprintk("RPC: destroying UNIX authenticator %p\n", auth); |
rpcauth_free_credcache(auth); |
rpc_free(auth); |
} |
|
static struct rpc_cred * |
unx_create_cred(int flags) |
{ |
struct unx_cred *cred; |
int i; |
|
dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", |
current->uid, current->gid); |
|
if (!(cred = (struct unx_cred *) rpc_allocate(flags, sizeof(*cred)))) |
return NULL; |
|
atomic_set(&cred->uc_count, 0); |
cred->uc_flags = RPCAUTH_CRED_UPTODATE; |
if (flags & RPC_TASK_ROOTCREDS) { |
cred->uc_uid = cred->uc_fsuid = 0; |
cred->uc_gid = cred->uc_fsgid = 0; |
cred->uc_gids[0] = NOGROUP; |
} else { |
int groups = current->ngroups; |
if (groups > NFS_NGROUPS) |
groups = NFS_NGROUPS; |
|
cred->uc_uid = current->uid; |
cred->uc_gid = current->gid; |
cred->uc_fsuid = current->fsuid; |
cred->uc_fsgid = current->fsgid; |
for (i = 0; i < groups; i++) |
cred->uc_gids[i] = (gid_t) current->groups[i]; |
if (i < NFS_NGROUPS) |
cred->uc_gids[i] = NOGROUP; |
} |
cred->uc_base.cr_ops = &unix_credops; |
|
return (struct rpc_cred *) cred; |
} |
|
struct rpc_cred * |
authunix_fake_cred(struct rpc_task *task, uid_t uid, gid_t gid) |
{ |
struct unx_cred *cred; |
|
dprintk("RPC: allocating fake UNIX cred for uid %d gid %d\n", |
uid, gid); |
|
if (!(cred = (struct unx_cred *) rpc_malloc(task, sizeof(*cred)))) |
return NULL; |
|
atomic_set(&cred->uc_count, 1); |
cred->uc_flags = RPCAUTH_CRED_DEAD|RPCAUTH_CRED_UPTODATE; |
cred->uc_uid = uid; |
cred->uc_gid = gid; |
cred->uc_fsuid = uid; |
cred->uc_fsgid = gid; |
cred->uc_gids[0] = (gid_t) NOGROUP; |
|
return task->tk_msg.rpc_cred = (struct rpc_cred *) cred; |
} |
|
static void |
unx_destroy_cred(struct rpc_cred *cred) |
{ |
rpc_free(cred); |
} |
|
/* |
* Match credentials against current process creds. |
* The root_override argument takes care of cases where the caller may |
* request root creds (e.g. for NFS swapping). |
*/ |
static int |
unx_match(struct rpc_cred *rcred, int taskflags) |
{ |
struct unx_cred *cred = (struct unx_cred *) rcred; |
int i; |
|
if (!(taskflags & RPC_TASK_ROOTCREDS)) { |
int groups; |
|
if (cred->uc_uid != current->uid |
|| cred->uc_gid != current->gid |
|| cred->uc_fsuid != current->fsuid |
|| cred->uc_fsgid != current->fsgid) |
return 0; |
|
groups = current->ngroups; |
if (groups > NFS_NGROUPS) |
groups = NFS_NGROUPS; |
for (i = 0; i < groups ; i++) |
if (cred->uc_gids[i] != (gid_t) current->groups[i]) |
return 0; |
return 1; |
} |
return (cred->uc_uid == 0 && cred->uc_fsuid == 0 |
&& cred->uc_gid == 0 && cred->uc_fsgid == 0 |
&& cred->uc_gids[0] == (gid_t) NOGROUP); |
} |
|
/* |
* Marshal credentials. |
* Maybe we should keep a cached credential for performance reasons. |
*/ |
static u32 * |
unx_marshal(struct rpc_task *task, u32 *p, int ruid) |
{ |
struct rpc_clnt *clnt = task->tk_client; |
struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred; |
u32 *base, *hold; |
int i, n; |
|
*p++ = htonl(RPC_AUTH_UNIX); |
base = p++; |
*p++ = htonl(jiffies/HZ); |
|
/* |
* Copy the UTS nodename captured when the client was created. |
*/ |
n = clnt->cl_nodelen; |
*p++ = htonl(n); |
memcpy(p, clnt->cl_nodename, n); |
p += (n + 3) >> 2; |
|
/* Note: we don't use real uid if it involves raising priviledge */ |
if (ruid && cred->uc_uid != 0 && cred->uc_gid != 0) { |
*p++ = htonl((u32) cred->uc_uid); |
*p++ = htonl((u32) cred->uc_gid); |
} else { |
*p++ = htonl((u32) cred->uc_fsuid); |
*p++ = htonl((u32) cred->uc_fsgid); |
} |
hold = p++; |
for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++) |
*p++ = htonl((u32) cred->uc_gids[i]); |
*hold = htonl(p - hold - 1); /* gid array length */ |
*base = htonl((p - base - 1) << 2); /* cred length */ |
|
*p++ = htonl(RPC_AUTH_NULL); |
*p++ = htonl(0); |
|
return p; |
} |
|
/* |
* Refresh credentials. This is a no-op for AUTH_UNIX |
*/ |
static int |
unx_refresh(struct rpc_task *task) |
{ |
task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE; |
return task->tk_status = -EACCES; |
} |
|
static u32 * |
unx_validate(struct rpc_task *task, u32 *p) |
{ |
u32 n = ntohl(*p++); |
|
if (n != RPC_AUTH_NULL && n != RPC_AUTH_UNIX && n != RPC_AUTH_SHORT) { |
printk("RPC: bad verf flavor: %ld\n", (unsigned long) n); |
return NULL; |
} |
if ((n = ntohl(*p++)) > 400) { |
printk("RPC: giant verf size: %ld\n", (unsigned long) n); |
return NULL; |
} |
task->tk_auth->au_rslack = (n >> 2) + 2; |
p += (n >> 2); |
|
return p; |
} |
|
struct rpc_authops authunix_ops = { |
RPC_AUTH_UNIX, |
#ifdef RPC_DEBUG |
"UNIX", |
#endif |
unx_create, |
unx_destroy, |
unx_create_cred |
}; |
|
static |
struct rpc_credops unix_credops = { |
unx_destroy_cred, |
unx_match, |
unx_marshal, |
unx_refresh, |
unx_validate |
}; |
/svcauth_des.c
0,0 → 1,215
/* |
* linux/net/sunrpc/svcauth_des.c |
* |
* Server-side AUTH_DES handling. |
* |
* Copyright (C) 1996, 1997 Olaf Kirch <okir@monad.swb.de> |
*/ |
|
#include <linux/types.h> |
#include <linux/sched.h> |
#include <linux/sunrpc/types.h> |
#include <linux/sunrpc/xdr.h> |
#include <linux/sunrpc/svcauth.h> |
#include <linux/sunrpc/svcsock.h> |
|
#define RPCDBG_FACILITY RPCDBG_AUTH |
|
/* |
* DES cedential cache. |
* The cache is indexed by fullname/key to allow for multiple sessions |
* by the same user from different hosts. |
* It would be tempting to use the client's IP address rather than the |
* conversation key as an index, but that could become problematic for |
* multi-homed hosts that distribute traffic across their interfaces. |
*/ |
struct des_cred { |
struct des_cred * dc_next; |
char * dc_fullname; |
u32 dc_nickname; |
des_cblock dc_key; /* conversation key */ |
des_cblock dc_xkey; /* encrypted conv. key */ |
des_key_schedule dc_keysched; |
}; |
|
#define ADN_FULLNAME 0 |
#define ADN_NICKNAME 1 |
|
/* |
* The default slack allowed when checking for replayed credentials |
* (in milliseconds). |
*/ |
#define DES_REPLAY_SLACK 2000 |
|
/* |
* Make sure we don't place more than one call to the key server at |
* a time. |
*/ |
static int in_keycall = 0; |
|
#define FAIL(err) \ |
{ if (data) put_cred(data); \ |
*authp = rpc_autherr_##err; \ |
return; \ |
} |
|
void |
svcauth_des(struct svc_rqst *rqstp, u32 *statp, u32 *authp) |
{ |
struct svc_buf *argp = &rqstp->rq_argbuf; |
struct svc_buf *resp = &rqstp->rq_resbuf; |
struct svc_cred *cred = &rqstp->rq_cred; |
struct des_cred *data = NULL; |
u32 cryptkey[2]; |
u32 cryptbuf[4]; |
u32 *p = argp->buf; |
int len = argp->len, slen, i; |
|
*authp = rpc_auth_ok; |
|
if ((argp->len -= 3) < 0) { |
*statp = rpc_garbage_args; |
return; |
} |
|
p++; /* skip length field */ |
namekind = ntohl(*p++); /* fullname/nickname */ |
|
/* Get the credentials */ |
if (namekind == ADN_NICKNAME) { |
/* If we can't find the cached session key, initiate a |
* new session. */ |
if (!(data = get_cred_bynick(*p++))) |
FAIL(rejectedcred); |
} else if (namekind == ADN_FULLNAME) { |
p = xdr_decode_string(p, &fullname, &len, RPC_MAXNETNAMELEN); |
if (p == NULL) |
FAIL(badcred); |
cryptkey[0] = *p++; /* get the encrypted key */ |
cryptkey[1] = *p++; |
cryptbuf[2] = *p++; /* get the encrypted window */ |
} else { |
FAIL(badcred); |
} |
|
/* If we're just updating the key, silently discard the request. */ |
if (data && data->dc_locked) { |
*authp = rpc_autherr_dropit; |
_put_cred(data); /* release but don't unlock */ |
return; |
} |
|
/* Get the verifier flavor and length */ |
if (ntohl(*p++) != RPC_AUTH_DES && ntohl(*p++) != 12) |
FAIL(badverf); |
|
cryptbuf[0] = *p++; /* encrypted time stamp */ |
cryptbuf[1] = *p++; |
cryptbuf[3] = *p++; /* 0 or window - 1 */ |
|
if (namekind == ADN_NICKNAME) { |
status = des_ecb_encrypt((des_block *) cryptbuf, |
(des_block *) cryptbuf, |
data->dc_keysched, DES_DECRYPT); |
} else { |
/* We first have to decrypt the new session key and |
* fill in the UNIX creds. */ |
if (!(data = get_cred_byname(rqstp, authp, fullname, cryptkey))) |
return; |
status = des_cbc_encrypt((des_cblock *) cryptbuf, |
(des_cblock *) cryptbuf, 16, |
data->dc_keysched, |
(des_cblock *) &ivec, |
DES_DECRYPT); |
} |
if (status) { |
printk("svcauth_des: DES decryption failed (status %d)\n", |
status); |
FAIL(badverf); |
} |
|
/* Now check the whole lot */ |
if (namekind == ADN_FULLNAME) { |
unsigned long winverf; |
|
data->dc_window = ntohl(cryptbuf[2]); |
winverf = ntohl(cryptbuf[2]); |
if (window != winverf - 1) { |
printk("svcauth_des: bad window verifier!\n"); |
FAIL(badverf); |
} |
} |
|
/* XDR the decrypted timestamp */ |
cryptbuf[0] = ntohl(cryptbuf[0]); |
cryptbuf[1] = ntohl(cryptbuf[1]); |
if (cryptbuf[1] > 1000000) { |
dprintk("svcauth_des: bad usec value %u\n", cryptbuf[1]); |
if (namekind == ADN_NICKNAME) |
FAIL(rejectedverf); |
FAIL(badverf); |
} |
|
/* |
* Check for replayed credentials. We must allow for reordering |
* of requests by the network, and the OS scheduler, hence we |
* cannot expect timestamps to be increasing monotonically. |
* This opens a small security hole, therefore the replay_slack |
* value shouldn't be too large. |
*/ |
if ((delta = cryptbuf[0] - data->dc_timestamp[0]) <= 0) { |
switch (delta) { |
case -1: |
delta = -1000000; |
case 0: |
delta += cryptbuf[1] - data->dc_timestamp[1]; |
break; |
default: |
delta = -1000000; |
} |
if (delta < DES_REPLAY_SLACK) |
FAIL(rejectedverf); |
#ifdef STRICT_REPLAY_CHECKS |
/* TODO: compare time stamp to last five timestamps cached |
* and reject (drop?) request if a match is found. */ |
#endif |
} |
|
now = xtime; |
now.tv_secs -= data->dc_window; |
if (now.tv_secs < cryptbuf[0] || |
(now.tv_secs == cryptbuf[0] && now.tv_usec < cryptbuf[1])) |
FAIL(rejectedverf); |
|
/* Okay, we're done. Update the lot */ |
if (namekind == ADN_FULLNAME) |
data->dc_valid = 1; |
data->dc_timestamp[0] = cryptbuf[0]; |
data->dc_timestamp[1] = cryptbuf[1]; |
|
put_cred(data); |
return; |
garbage: |
*statp = rpc_garbage_args; |
return; |
} |
|
/* |
* Call the keyserver to obtain the decrypted conversation key and |
* UNIX creds. We use a Linux-specific keycall extension that does |
* both things in one go. |
*/ |
static struct des_cred * |
get_cred_byname(struct svc_rqst *rqstp, u32 *authp, char *fullname, u32 *cryptkey) |
{ |
static int in_keycall = 0; |
struct des_cred *cred; |
|
if (in_keycall) { |
*authp = rpc_autherr_dropit; |
return NULL; |
} |
in_keycall = 1; |
in_keycall = 0; |
return cred; |
} |
/svcsock.c
0,0 → 1,1343
/* |
* linux/net/sunrpc/svcsock.c |
* |
* These are the RPC server socket internals. |
* |
* The server scheduling algorithm does not always distribute the load |
* evenly when servicing a single client. May need to modify the |
* svc_sock_enqueue procedure... |
* |
* TCP support is largely untested and may be a little slow. The problem |
* is that we currently do two separate recvfrom's, one for the 4-byte |
* record length, and the second for the actual record. This could possibly |
* be improved by always reading a minimum size of around 100 bytes and |
* tucking any superfluous bytes away in a temporary store. Still, that |
* leaves write requests out in the rain. An alternative may be to peek at |
* the first skb in the queue, and if it matches the next TCP sequence |
* number, to extract the record marker. Yuck. |
* |
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> |
*/ |
|
#include <linux/sched.h> |
#include <linux/errno.h> |
#include <linux/fcntl.h> |
#include <linux/net.h> |
#include <linux/in.h> |
#include <linux/inet.h> |
#include <linux/udp.h> |
#include <linux/version.h> |
#include <linux/unistd.h> |
#include <linux/slab.h> |
#include <linux/netdevice.h> |
#include <linux/skbuff.h> |
#include <net/sock.h> |
#include <net/checksum.h> |
#include <net/ip.h> |
#include <asm/uaccess.h> |
#include <asm/ioctls.h> |
|
#include <linux/sunrpc/types.h> |
#include <linux/sunrpc/xdr.h> |
#include <linux/sunrpc/svcsock.h> |
#include <linux/sunrpc/stats.h> |
|
/* SMP locking strategy: |
* |
* svc_serv->sv_lock protects most stuff for that service. |
* |
* Some flags can be set to certain values at any time |
* providing that certain rules are followed: |
* |
* SK_BUSY can be set to 0 at any time. |
* svc_sock_enqueue must be called afterwards |
* SK_CONN, SK_DATA, can be set or cleared at any time. |
* after a set, svc_sock_enqueue must be called. |
* after a clear, the socket must be read/accepted |
* if this succeeds, it must be set again. |
* SK_CLOSE can set at any time. It is never cleared. |
* |
*/ |
|
#define RPCDBG_FACILITY RPCDBG_SVCSOCK |
|
|
static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, |
int *errp, int pmap_reg); |
static void svc_udp_data_ready(struct sock *, int); |
static int svc_udp_recvfrom(struct svc_rqst *); |
static int svc_udp_sendto(struct svc_rqst *); |
|
|
/* |
* Queue up an idle server thread. Must have serv->sv_lock held. |
* Note: this is really a stack rather than a queue, so that we only |
* use as many different threads as we need, and the rest don't polute |
* the cache. |
*/ |
static inline void |
svc_serv_enqueue(struct svc_serv *serv, struct svc_rqst *rqstp) |
{ |
list_add(&rqstp->rq_list, &serv->sv_threads); |
} |
|
/* |
* Dequeue an nfsd thread. Must have serv->sv_lock held. |
*/ |
static inline void |
svc_serv_dequeue(struct svc_serv *serv, struct svc_rqst *rqstp) |
{ |
list_del(&rqstp->rq_list); |
} |
|
/* |
* Release an skbuff after use |
*/ |
static inline void |
svc_release_skb(struct svc_rqst *rqstp) |
{ |
struct sk_buff *skb = rqstp->rq_skbuff; |
|
if (!skb) |
return; |
rqstp->rq_skbuff = NULL; |
|
dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); |
skb_free_datagram(rqstp->rq_sock->sk_sk, skb); |
} |
|
/* |
* Queue up a socket with data pending. If there are idle nfsd |
* processes, wake 'em up. |
* |
*/ |
static void |
svc_sock_enqueue(struct svc_sock *svsk) |
{ |
struct svc_serv *serv = svsk->sk_server; |
struct svc_rqst *rqstp; |
|
if (!(svsk->sk_flags & |
( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)) )) |
return; |
if (test_bit(SK_DEAD, &svsk->sk_flags)) |
return; |
|
spin_lock_bh(&serv->sv_lock); |
|
if (!list_empty(&serv->sv_threads) && |
!list_empty(&serv->sv_sockets)) |
printk(KERN_ERR |
"svc_sock_enqueue: threads and sockets both waiting??\n"); |
|
if (test_bit(SK_BUSY, &svsk->sk_flags)) { |
/* Don't enqueue socket while daemon is receiving */ |
dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); |
goto out_unlock; |
} |
|
if (((svsk->sk_reserved + serv->sv_bufsz)*2 |
> sock_wspace(svsk->sk_sk)) |
&& !test_bit(SK_CLOSE, &svsk->sk_flags) |
&& !test_bit(SK_CONN, &svsk->sk_flags)) { |
/* Don't enqueue while not enough space for reply */ |
dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", |
svsk->sk_sk, svsk->sk_reserved+serv->sv_bufsz, |
sock_wspace(svsk->sk_sk)); |
goto out_unlock; |
} |
|
/* Mark socket as busy. It will remain in this state until the |
* server has processed all pending data and put the socket back |
* on the idle list. |
*/ |
set_bit(SK_BUSY, &svsk->sk_flags); |
|
if (!list_empty(&serv->sv_threads)) { |
rqstp = list_entry(serv->sv_threads.next, |
struct svc_rqst, |
rq_list); |
dprintk("svc: socket %p served by daemon %p\n", |
svsk->sk_sk, rqstp); |
svc_serv_dequeue(serv, rqstp); |
if (rqstp->rq_sock) |
printk(KERN_ERR |
"svc_sock_enqueue: server %p, rq_sock=%p!\n", |
rqstp, rqstp->rq_sock); |
rqstp->rq_sock = svsk; |
svsk->sk_inuse++; |
rqstp->rq_reserved = serv->sv_bufsz; |
svsk->sk_reserved += rqstp->rq_reserved; |
wake_up(&rqstp->rq_wait); |
} else { |
dprintk("svc: socket %p put into queue\n", svsk->sk_sk); |
list_add_tail(&svsk->sk_ready, &serv->sv_sockets); |
set_bit(SK_QUED, &svsk->sk_flags); |
} |
|
out_unlock: |
spin_unlock_bh(&serv->sv_lock); |
} |
|
/* |
* Dequeue the first socket. Must be called with the serv->sv_lock held. |
*/ |
static inline struct svc_sock * |
svc_sock_dequeue(struct svc_serv *serv) |
{ |
struct svc_sock *svsk; |
|
if (list_empty(&serv->sv_sockets)) |
return NULL; |
|
svsk = list_entry(serv->sv_sockets.next, |
struct svc_sock, sk_ready); |
list_del(&svsk->sk_ready); |
|
dprintk("svc: socket %p dequeued, inuse=%d\n", |
svsk->sk_sk, svsk->sk_inuse); |
clear_bit(SK_QUED, &svsk->sk_flags); |
|
return svsk; |
} |
|
/* |
* Having read something from a socket, check whether it |
* needs to be re-enqueued. |
* Note: SK_DATA only gets cleared when a read-attempt finds |
* no (or insufficient) data. |
*/ |
static inline void |
svc_sock_received(struct svc_sock *svsk) |
{ |
clear_bit(SK_BUSY, &svsk->sk_flags); |
svc_sock_enqueue(svsk); |
} |
|
|
/** |
* svc_reserve - change the space reserved for the reply to a request. |
* @rqstp: The request in question |
* @space: new max space to reserve |
* |
* Each request reserves some space on the output queue of the socket |
* to make sure the reply fits. This function reduces that reserved |
* space to be the amount of space used already, plus @space. |
* |
*/ |
void svc_reserve(struct svc_rqst *rqstp, int space) |
{ |
space += rqstp->rq_resbuf.len<<2; |
|
if (space < rqstp->rq_reserved) { |
struct svc_sock *svsk = rqstp->rq_sock; |
spin_lock_bh(&svsk->sk_server->sv_lock); |
svsk->sk_reserved -= (rqstp->rq_reserved - space); |
rqstp->rq_reserved = space; |
spin_unlock_bh(&svsk->sk_server->sv_lock); |
|
svc_sock_enqueue(svsk); |
} |
} |
|
/* |
* Release a socket after use. |
*/ |
static inline void |
svc_sock_put(struct svc_sock *svsk) |
{ |
struct svc_serv *serv = svsk->sk_server; |
|
spin_lock_bh(&serv->sv_lock); |
if (!--(svsk->sk_inuse) && test_bit(SK_DEAD, &svsk->sk_flags)) { |
spin_unlock_bh(&serv->sv_lock); |
dprintk("svc: releasing dead socket\n"); |
sock_release(svsk->sk_sock); |
kfree(svsk); |
} |
else |
spin_unlock_bh(&serv->sv_lock); |
} |
|
static void |
svc_sock_release(struct svc_rqst *rqstp) |
{ |
struct svc_sock *svsk = rqstp->rq_sock; |
|
svc_release_skb(rqstp); |
|
/* Reset response buffer and release |
* the reservation. |
* But first, check that enough space was reserved |
* for the reply, otherwise we have a bug! |
*/ |
if ((rqstp->rq_resbuf.len<<2) > rqstp->rq_reserved) |
printk(KERN_ERR "RPC request reserved %d but used %d\n", |
rqstp->rq_reserved, |
rqstp->rq_resbuf.len<<2); |
|
rqstp->rq_resbuf.buf = rqstp->rq_resbuf.base; |
rqstp->rq_resbuf.len = 0; |
svc_reserve(rqstp, 0); |
rqstp->rq_sock = NULL; |
|
svc_sock_put(svsk); |
} |
|
/* |
* External function to wake up a server waiting for data |
*/ |
void |
svc_wake_up(struct svc_serv *serv) |
{ |
struct svc_rqst *rqstp; |
|
spin_lock_bh(&serv->sv_lock); |
if (!list_empty(&serv->sv_threads)) { |
rqstp = list_entry(serv->sv_threads.next, |
struct svc_rqst, |
rq_list); |
dprintk("svc: daemon %p woken up.\n", rqstp); |
/* |
svc_serv_dequeue(serv, rqstp); |
rqstp->rq_sock = NULL; |
*/ |
wake_up(&rqstp->rq_wait); |
} |
spin_unlock_bh(&serv->sv_lock); |
} |
|
/* |
* Generic sendto routine |
*/ |
static int |
svc_sendto(struct svc_rqst *rqstp, struct iovec *iov, int nr) |
{ |
mm_segment_t oldfs; |
struct svc_sock *svsk = rqstp->rq_sock; |
struct socket *sock = svsk->sk_sock; |
struct msghdr msg; |
char buffer[CMSG_SPACE(sizeof(struct in_pktinfo))]; |
struct cmsghdr *cmh = (struct cmsghdr *)buffer; |
struct in_pktinfo *pki = (struct in_pktinfo *)CMSG_DATA(cmh); |
int i, buflen, len; |
|
for (i = buflen = 0; i < nr; i++) |
buflen += iov[i].iov_len; |
|
msg.msg_name = &rqstp->rq_addr; |
msg.msg_namelen = sizeof(rqstp->rq_addr); |
msg.msg_iov = iov; |
msg.msg_iovlen = nr; |
if (rqstp->rq_prot == IPPROTO_UDP) { |
msg.msg_control = cmh; |
msg.msg_controllen = sizeof(buffer); |
cmh->cmsg_len = CMSG_LEN(sizeof(*pki)); |
cmh->cmsg_level = SOL_IP; |
cmh->cmsg_type = IP_PKTINFO; |
pki->ipi_ifindex = 0; |
pki->ipi_spec_dst.s_addr = rqstp->rq_daddr; |
} else { |
msg.msg_control = NULL; |
msg.msg_controllen = 0; |
} |
|
/* This was MSG_DONTWAIT, but I now want it to wait. |
* The only thing that it would wait for is memory and |
* if we are fairly low on memory, then we aren't likely |
* to make much progress anyway. |
* sk->sndtimeo is set to 30seconds just in case. |
*/ |
msg.msg_flags = 0; |
|
oldfs = get_fs(); set_fs(KERNEL_DS); |
len = sock_sendmsg(sock, &msg, buflen); |
set_fs(oldfs); |
|
dprintk("svc: socket %p sendto([%p %Zu... ], %d, %d) = %d\n", |
rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, nr, buflen, len); |
|
return len; |
} |
|
/* |
* Check input queue length |
*/ |
static int |
svc_recv_available(struct svc_sock *svsk) |
{ |
mm_segment_t oldfs; |
struct socket *sock = svsk->sk_sock; |
int avail, err; |
|
oldfs = get_fs(); set_fs(KERNEL_DS); |
err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail); |
set_fs(oldfs); |
|
return (err >= 0)? avail : err; |
} |
|
/* |
* Generic recvfrom routine. |
*/ |
static int |
svc_recvfrom(struct svc_rqst *rqstp, struct iovec *iov, int nr, int buflen) |
{ |
mm_segment_t oldfs; |
struct msghdr msg; |
struct socket *sock; |
int len, alen; |
|
rqstp->rq_addrlen = sizeof(rqstp->rq_addr); |
sock = rqstp->rq_sock->sk_sock; |
|
msg.msg_name = &rqstp->rq_addr; |
msg.msg_namelen = sizeof(rqstp->rq_addr); |
msg.msg_iov = iov; |
msg.msg_iovlen = nr; |
msg.msg_control = NULL; |
msg.msg_controllen = 0; |
|
msg.msg_flags = MSG_DONTWAIT; |
|
oldfs = get_fs(); set_fs(KERNEL_DS); |
len = sock_recvmsg(sock, &msg, buflen, MSG_DONTWAIT); |
set_fs(oldfs); |
|
/* sock_recvmsg doesn't fill in the name/namelen, so we must.. |
* possibly we should cache this in the svc_sock structure |
* at accept time. FIXME |
*/ |
alen = sizeof(rqstp->rq_addr); |
sock->ops->getname(sock, (struct sockaddr *)&rqstp->rq_addr, &alen, 1); |
|
dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", |
rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len); |
|
return len; |
} |
|
/* |
* Set socket snd and rcv buffer lengths |
*/ |
static inline void |
svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv) |
{ |
#if 0 |
mm_segment_t oldfs; |
oldfs = get_fs(); set_fs(KERNEL_DS); |
sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF, |
(char*)&snd, sizeof(snd)); |
sock_setsockopt(sock, SOL_SOCKET, SO_RCVBUF, |
(char*)&rcv, sizeof(rcv)); |
#else |
/* sock_setsockopt limits use to sysctl_?mem_max, |
* which isn't acceptable. Until that is made conditional |
* on not having CAP_SYS_RESOURCE or similar, we go direct... |
* DaveM said I could! |
*/ |
lock_sock(sock->sk); |
sock->sk->sndbuf = snd * 2; |
sock->sk->rcvbuf = rcv * 2; |
sock->sk->userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; |
release_sock(sock->sk); |
#endif |
} |
/* |
* INET callback when data has been received on the socket. |
*/ |
static void |
svc_udp_data_ready(struct sock *sk, int count) |
{ |
struct svc_sock *svsk = (struct svc_sock *)(sk->user_data); |
|
if (!svsk) |
goto out; |
dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", |
svsk, sk, count, test_bit(SK_BUSY, &svsk->sk_flags)); |
set_bit(SK_DATA, &svsk->sk_flags); |
svc_sock_enqueue(svsk); |
out: |
if (sk->sleep && waitqueue_active(sk->sleep)) |
wake_up_interruptible(sk->sleep); |
} |
|
/* |
* INET callback when space is newly available on the socket. |
*/ |
static void |
svc_write_space(struct sock *sk) |
{ |
struct svc_sock *svsk = (struct svc_sock *)(sk->user_data); |
|
if (svsk) { |
dprintk("svc: socket %p(inet %p), write_space busy=%d\n", |
svsk, sk, test_bit(SK_BUSY, &svsk->sk_flags)); |
svc_sock_enqueue(svsk); |
} |
|
if (sk->sleep && waitqueue_active(sk->sleep)) |
wake_up_interruptible(sk->sleep); |
} |
|
/* |
* Receive a datagram from a UDP socket. |
*/ |
static int |
svc_udp_recvfrom(struct svc_rqst *rqstp) |
{ |
struct svc_sock *svsk = rqstp->rq_sock; |
struct svc_serv *serv = svsk->sk_server; |
struct sk_buff *skb; |
u32 *data; |
int err, len; |
|
if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) |
/* udp sockets need large rcvbuf as all pending |
* requests are still in that buffer. sndbuf must |
* also be large enough that there is enough space |
* for one reply per thread. |
*/ |
svc_sock_setbufsize(svsk->sk_sock, |
(serv->sv_nrthreads+3)* serv->sv_bufsz, |
(serv->sv_nrthreads+3)* serv->sv_bufsz); |
|
clear_bit(SK_DATA, &svsk->sk_flags); |
while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) { |
svc_sock_received(svsk); |
if (err == -EAGAIN) |
return err; |
/* possibly an icmp error */ |
dprintk("svc: recvfrom returned error %d\n", -err); |
} |
set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ |
|
/* Sorry. */ |
if (skb_is_nonlinear(skb)) { |
if (skb_linearize(skb, GFP_KERNEL) != 0) { |
kfree_skb(skb); |
svc_sock_received(svsk); |
return 0; |
} |
} |
|
if (skb->ip_summed != CHECKSUM_UNNECESSARY) { |
if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) { |
skb_free_datagram(svsk->sk_sk, skb); |
svc_sock_received(svsk); |
return 0; |
} |
} |
|
|
len = skb->len - sizeof(struct udphdr); |
data = (u32 *) (skb->data + sizeof(struct udphdr)); |
|
rqstp->rq_skbuff = skb; |
rqstp->rq_argbuf.base = data; |
rqstp->rq_argbuf.buf = data; |
rqstp->rq_argbuf.len = (len >> 2); |
/* rqstp->rq_resbuf = rqstp->rq_defbuf; */ |
rqstp->rq_prot = IPPROTO_UDP; |
|
/* Get sender address */ |
rqstp->rq_addr.sin_family = AF_INET; |
rqstp->rq_addr.sin_port = skb->h.uh->source; |
rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr; |
rqstp->rq_daddr = skb->nh.iph->daddr; |
|
if (serv->sv_stats) |
serv->sv_stats->netudpcnt++; |
|
/* One down, maybe more to go... */ |
svsk->sk_sk->stamp = skb->stamp; |
svc_sock_received(svsk); |
|
return len; |
} |
|
static int |
svc_udp_sendto(struct svc_rqst *rqstp) |
{ |
struct svc_buf *bufp = &rqstp->rq_resbuf; |
int error; |
|
/* Set up the first element of the reply iovec. |
* Any other iovecs that may be in use have been taken |
* care of by the server implementation itself. |
*/ |
/* bufp->base = bufp->area; */ |
bufp->iov[0].iov_base = bufp->base; |
bufp->iov[0].iov_len = bufp->len << 2; |
|
error = svc_sendto(rqstp, bufp->iov, bufp->nriov); |
if (error == -ECONNREFUSED) |
/* ICMP error on earlier request. */ |
error = svc_sendto(rqstp, bufp->iov, bufp->nriov); |
|
return error; |
} |
|
static int |
svc_udp_init(struct svc_sock *svsk) |
{ |
svsk->sk_sk->data_ready = svc_udp_data_ready; |
svsk->sk_sk->write_space = svc_write_space; |
svsk->sk_recvfrom = svc_udp_recvfrom; |
svsk->sk_sendto = svc_udp_sendto; |
|
/* initialise setting must have enough space to |
* receive and respond to one request. |
* svc_udp_recvfrom will re-adjust if necessary |
*/ |
svc_sock_setbufsize(svsk->sk_sock, |
3 * svsk->sk_server->sv_bufsz, |
3 * svsk->sk_server->sv_bufsz); |
|
set_bit(SK_CHNGBUF, &svsk->sk_flags); |
|
return 0; |
} |
|
/* |
* A data_ready event on a listening socket means there's a connection |
* pending. Do not use state_change as a substitute for it. |
*/ |
static void |
svc_tcp_listen_data_ready(struct sock *sk, int count_unused) |
{ |
struct svc_sock *svsk; |
|
dprintk("svc: socket %p TCP (listen) state change %d\n", |
sk, sk->state); |
|
if (sk->state != TCP_LISTEN) { |
/* |
* This callback may called twice when a new connection |
* is established as a child socket inherits everything |
* from a parent LISTEN socket. |
* 1) data_ready method of the parent socket will be called |
* when one of child sockets become ESTABLISHED. |
* 2) data_ready method of the child socket may be called |
* when it receives data before the socket is accepted. |
* In case of 2, we should ignore it silently. |
*/ |
goto out; |
} |
if (!(svsk = (struct svc_sock *) sk->user_data)) { |
printk("svc: socket %p: no user data\n", sk); |
goto out; |
} |
set_bit(SK_CONN, &svsk->sk_flags); |
svc_sock_enqueue(svsk); |
out: |
if (sk->sleep && waitqueue_active(sk->sleep)) |
wake_up_interruptible_all(sk->sleep); |
} |
|
/* |
* A state change on a connected socket means it's dying or dead. |
*/ |
static void |
svc_tcp_state_change(struct sock *sk) |
{ |
struct svc_sock *svsk; |
|
dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", |
sk, sk->state, sk->user_data); |
|
if (!(svsk = (struct svc_sock *) sk->user_data)) { |
printk("svc: socket %p: no user data\n", sk); |
goto out; |
} |
set_bit(SK_CLOSE, &svsk->sk_flags); |
svc_sock_enqueue(svsk); |
out: |
if (sk->sleep && waitqueue_active(sk->sleep)) |
wake_up_interruptible_all(sk->sleep); |
} |
|
static void |
svc_tcp_data_ready(struct sock *sk, int count) |
{ |
struct svc_sock * svsk; |
|
dprintk("svc: socket %p TCP data ready (svsk %p)\n", |
sk, sk->user_data); |
if (!(svsk = (struct svc_sock *)(sk->user_data))) |
goto out; |
set_bit(SK_DATA, &svsk->sk_flags); |
svc_sock_enqueue(svsk); |
out: |
if (sk->sleep && waitqueue_active(sk->sleep)) |
wake_up_interruptible(sk->sleep); |
} |
|
/* |
* Accept a TCP connection |
*/ |
static void |
svc_tcp_accept(struct svc_sock *svsk) |
{ |
struct sockaddr_in sin; |
struct svc_serv *serv = svsk->sk_server; |
struct socket *sock = svsk->sk_sock; |
struct socket *newsock; |
struct proto_ops *ops; |
struct svc_sock *newsvsk; |
int err, slen; |
|
dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); |
if (!sock) |
return; |
|
if (!(newsock = sock_alloc())) { |
printk(KERN_WARNING "%s: no more sockets!\n", serv->sv_name); |
return; |
} |
dprintk("svc: tcp_accept %p allocated\n", newsock); |
|
newsock->type = sock->type; |
newsock->ops = ops = sock->ops; |
|
clear_bit(SK_CONN, &svsk->sk_flags); |
if ((err = ops->accept(sock, newsock, O_NONBLOCK)) < 0) { |
if (err != -EAGAIN && net_ratelimit()) |
printk(KERN_WARNING "%s: accept failed (err %d)!\n", |
serv->sv_name, -err); |
goto failed; /* aborted connection or whatever */ |
} |
set_bit(SK_CONN, &svsk->sk_flags); |
svc_sock_enqueue(svsk); |
|
slen = sizeof(sin); |
err = ops->getname(newsock, (struct sockaddr *) &sin, &slen, 1); |
if (err < 0) { |
if (net_ratelimit()) |
printk(KERN_WARNING "%s: peername failed (err %d)!\n", |
serv->sv_name, -err); |
goto failed; /* aborted connection or whatever */ |
} |
|
/* Ideally, we would want to reject connections from unauthorized |
* hosts here, but when we get encription, the IP of the host won't |
* tell us anything. For now just warn about unpriv connections. |
*/ |
if (ntohs(sin.sin_port) >= 1024) { |
dprintk(KERN_WARNING |
"%s: connect from unprivileged port: %u.%u.%u.%u:%d\n", |
serv->sv_name, |
NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); |
} |
|
dprintk("%s: connect from %u.%u.%u.%u:%04x\n", serv->sv_name, |
NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); |
|
if (!(newsvsk = svc_setup_socket(serv, newsock, &err, 0))) |
goto failed; |
|
/* make sure that a write doesn't block forever when |
* low on memory |
*/ |
newsock->sk->sndtimeo = HZ*30; |
|
/* Precharge. Data may have arrived on the socket before we |
* installed the data_ready callback. |
*/ |
set_bit(SK_DATA, &newsvsk->sk_flags); |
svc_sock_enqueue(newsvsk); |
|
/* make sure that we don't have too many active connections. |
* If we have, something must be dropped. |
* We randomly choose between newest and oldest (in terms |
* of recent activity) and drop it. |
*/ |
if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*10) { |
struct svc_sock *svsk = NULL; |
spin_lock_bh(&serv->sv_lock); |
if (!list_empty(&serv->sv_tempsocks)) { |
if (net_random()&1) |
svsk = list_entry(serv->sv_tempsocks.prev, |
struct svc_sock, |
sk_list); |
else |
svsk = list_entry(serv->sv_tempsocks.next, |
struct svc_sock, |
sk_list); |
set_bit(SK_CLOSE, &svsk->sk_flags); |
svsk->sk_inuse ++; |
} |
spin_unlock_bh(&serv->sv_lock); |
|
if (svsk) { |
svc_sock_enqueue(svsk); |
svc_sock_put(svsk); |
} |
|
} |
|
if (serv->sv_stats) |
serv->sv_stats->nettcpconn++; |
|
return; |
|
failed: |
sock_release(newsock); |
return; |
} |
|
/* |
* Receive data from a TCP socket. |
*/ |
static int |
svc_tcp_recvfrom(struct svc_rqst *rqstp) |
{ |
struct svc_sock *svsk = rqstp->rq_sock; |
struct svc_serv *serv = svsk->sk_server; |
struct svc_buf *bufp = &rqstp->rq_argbuf; |
int len; |
|
dprintk("svc: tcp_recv %p data %d conn %d close %d\n", |
svsk, test_bit(SK_DATA, &svsk->sk_flags), |
test_bit(SK_CONN, &svsk->sk_flags), |
test_bit(SK_CLOSE, &svsk->sk_flags)); |
|
if (test_bit(SK_CLOSE, &svsk->sk_flags)) { |
svc_delete_socket(svsk); |
return 0; |
} |
|
if (test_bit(SK_CONN, &svsk->sk_flags)) { |
svc_tcp_accept(svsk); |
svc_sock_received(svsk); |
return 0; |
} |
|
if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) |
/* sndbuf needs to have room for one request |
* per thread, otherwise we can stall even when the |
* network isn't a bottleneck. |
* rcvbuf just needs to be able to hold a few requests. |
* Normally they will be removed from the queue |
* as soon as a complete request arrives. |
*/ |
svc_sock_setbufsize(svsk->sk_sock, |
(serv->sv_nrthreads+3) * |
serv->sv_bufsz, |
3 * serv->sv_bufsz); |
|
clear_bit(SK_DATA, &svsk->sk_flags); |
|
/* Receive data. If we haven't got the record length yet, get |
* the next four bytes. Otherwise try to gobble up as much as |
* possible up to the complete record length. |
*/ |
if (svsk->sk_tcplen < 4) { |
unsigned long want = 4 - svsk->sk_tcplen; |
struct iovec iov; |
|
iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; |
iov.iov_len = want; |
if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) |
goto error; |
svsk->sk_tcplen += len; |
if (len < want) { |
dprintk("svc: short recvfrom while reading record length (%d of %ld)\n", |
len, want); |
svc_sock_received(svsk); |
return -EAGAIN; /* record header not complete */ |
} |
|
svsk->sk_reclen = ntohl(svsk->sk_reclen); |
if (!(svsk->sk_reclen & 0x80000000)) { |
/* FIXME: technically, a record can be fragmented, |
* and non-terminal fragments will not have the top |
* bit set in the fragment length header. |
* But apparently no known nfs clients send fragmented |
* records. */ |
printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (non-terminal)\n", |
(unsigned long) svsk->sk_reclen); |
goto err_delete; |
} |
svsk->sk_reclen &= 0x7fffffff; |
dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); |
if (svsk->sk_reclen > (bufp->buflen<<2)) { |
printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (large)\n", |
(unsigned long) svsk->sk_reclen); |
goto err_delete; |
} |
} |
|
/* Check whether enough data is available */ |
len = svc_recv_available(svsk); |
if (len < 0) |
goto error; |
|
if (len < svsk->sk_reclen) { |
dprintk("svc: incomplete TCP record (%d of %d)\n", |
len, svsk->sk_reclen); |
svc_sock_received(svsk); |
return -EAGAIN; /* record not complete */ |
} |
set_bit(SK_DATA, &svsk->sk_flags); |
|
/* Frob argbuf */ |
bufp->iov[0].iov_base += 4; |
bufp->iov[0].iov_len -= 4; |
|
/* Now receive data */ |
len = svc_recvfrom(rqstp, bufp->iov, bufp->nriov, svsk->sk_reclen); |
if (len < 0) |
goto error; |
|
dprintk("svc: TCP complete record (%d bytes)\n", len); |
|
/* Position reply write pointer immediately after |
* record length */ |
rqstp->rq_resbuf.buf += 1; |
rqstp->rq_resbuf.len = 1; |
|
rqstp->rq_skbuff = 0; |
rqstp->rq_argbuf.buf += 1; |
rqstp->rq_argbuf.len = (len >> 2); |
rqstp->rq_prot = IPPROTO_TCP; |
|
/* Reset TCP read info */ |
svsk->sk_reclen = 0; |
svsk->sk_tcplen = 0; |
|
svc_sock_received(svsk); |
if (serv->sv_stats) |
serv->sv_stats->nettcpcnt++; |
|
return len; |
|
err_delete: |
svc_delete_socket(svsk); |
return -EAGAIN; |
|
error: |
if (len == -EAGAIN) { |
dprintk("RPC: TCP recvfrom got EAGAIN\n"); |
svc_sock_received(svsk); |
} else { |
printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", |
svsk->sk_server->sv_name, -len); |
svc_sock_received(svsk); |
} |
|
return len; |
} |
|
/* |
* Send out data on TCP socket. |
*/ |
static int |
svc_tcp_sendto(struct svc_rqst *rqstp) |
{ |
struct svc_buf *bufp = &rqstp->rq_resbuf; |
int sent; |
|
/* Set up the first element of the reply iovec. |
* Any other iovecs that may be in use have been taken |
* care of by the server implementation itself. |
*/ |
bufp->iov[0].iov_base = bufp->base; |
bufp->iov[0].iov_len = bufp->len << 2; |
bufp->base[0] = htonl(0x80000000|((bufp->len << 2) - 4)); |
|
if (test_bit(SK_DEAD, &rqstp->rq_sock->sk_flags)) |
return -ENOTCONN; |
|
sent = svc_sendto(rqstp, bufp->iov, bufp->nriov); |
if (sent != bufp->len<<2) { |
printk(KERN_NOTICE "rpc-srv/tcp: %s: sent only %d bytes of %d - shutting down socket\n", |
rqstp->rq_sock->sk_server->sv_name, |
sent, bufp->len << 2); |
svc_delete_socket(rqstp->rq_sock); |
sent = -EAGAIN; |
} |
return sent; |
} |
|
static int |
svc_tcp_init(struct svc_sock *svsk) |
{ |
struct sock *sk = svsk->sk_sk; |
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); |
|
svsk->sk_recvfrom = svc_tcp_recvfrom; |
svsk->sk_sendto = svc_tcp_sendto; |
|
if (sk->state == TCP_LISTEN) { |
dprintk("setting up TCP socket for listening\n"); |
sk->data_ready = svc_tcp_listen_data_ready; |
} else { |
dprintk("setting up TCP socket for reading\n"); |
sk->state_change = svc_tcp_state_change; |
sk->data_ready = svc_tcp_data_ready; |
sk->write_space = svc_write_space; |
|
svsk->sk_reclen = 0; |
svsk->sk_tcplen = 0; |
|
tp->nonagle = 1; /* disable Nagle's algorithm */ |
|
/* initialise setting must have enough space to |
* receive and respond to one request. |
* svc_tcp_recvfrom will re-adjust if necessary |
*/ |
svc_sock_setbufsize(svsk->sk_sock, |
3 * svsk->sk_server->sv_bufsz, |
3 * svsk->sk_server->sv_bufsz); |
|
set_bit(SK_CHNGBUF, &svsk->sk_flags); |
if (sk->state != TCP_ESTABLISHED) |
set_bit(SK_CLOSE, &svsk->sk_flags); |
} |
|
return 0; |
} |
|
void |
svc_sock_update_bufs(struct svc_serv *serv) |
{ |
/* |
* The number of server threads has changed. |
* flag all socket to the snd/rcv buffer sizes |
* updated. |
* We don't just do it, as the locking is rather |
* awkward at this point |
*/ |
struct list_head *le; |
|
spin_lock_bh(&serv->sv_lock); |
list_for_each(le, &serv->sv_permsocks) { |
struct svc_sock *svsk = |
list_entry(le, struct svc_sock, sk_list); |
set_bit(SK_CHNGBUF, &svsk->sk_flags); |
} |
list_for_each(le, &serv->sv_tempsocks) { |
struct svc_sock *svsk = |
list_entry(le, struct svc_sock, sk_list); |
set_bit(SK_CHNGBUF, &svsk->sk_flags); |
} |
spin_unlock_bh(&serv->sv_lock); |
} |
|
/* |
* Receive the next request on any socket. |
*/ |
int |
svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout) |
{ |
struct svc_sock *svsk =NULL; |
int len; |
DECLARE_WAITQUEUE(wait, current); |
|
dprintk("svc: server %p waiting for data (to = %ld)\n", |
rqstp, timeout); |
|
if (rqstp->rq_sock) |
printk(KERN_ERR |
"svc_recv: service %p, socket not NULL!\n", |
rqstp); |
if (waitqueue_active(&rqstp->rq_wait)) |
printk(KERN_ERR |
"svc_recv: service %p, wait queue active!\n", |
rqstp); |
|
/* Initialize the buffers */ |
rqstp->rq_argbuf = rqstp->rq_defbuf; |
rqstp->rq_resbuf = rqstp->rq_defbuf; |
|
if (signalled()) |
return -EINTR; |
|
spin_lock_bh(&serv->sv_lock); |
if (!list_empty(&serv->sv_tempsocks)) { |
svsk = list_entry(serv->sv_tempsocks.next, |
struct svc_sock, sk_list); |
/* apparently the "standard" is that clients close |
* idle connections after 5 minutes, servers after |
* 6 minutes |
* http://www.connectathon.org/talks96/nfstcp.pdf |
*/ |
if (CURRENT_TIME - svsk->sk_lastrecv < 6*60 |
|| test_bit(SK_BUSY, &svsk->sk_flags)) |
svsk = NULL; |
} |
if (svsk) { |
set_bit(SK_BUSY, &svsk->sk_flags); |
set_bit(SK_CLOSE, &svsk->sk_flags); |
rqstp->rq_sock = svsk; |
svsk->sk_inuse++; |
} else if ((svsk = svc_sock_dequeue(serv)) != NULL) { |
rqstp->rq_sock = svsk; |
svsk->sk_inuse++; |
rqstp->rq_reserved = serv->sv_bufsz; |
svsk->sk_reserved += rqstp->rq_reserved; |
} else { |
/* No data pending. Go to sleep */ |
svc_serv_enqueue(serv, rqstp); |
|
/* |
* We have to be able to interrupt this wait |
* to bring down the daemons ... |
*/ |
set_current_state(TASK_INTERRUPTIBLE); |
add_wait_queue(&rqstp->rq_wait, &wait); |
spin_unlock_bh(&serv->sv_lock); |
|
schedule_timeout(timeout); |
|
spin_lock_bh(&serv->sv_lock); |
remove_wait_queue(&rqstp->rq_wait, &wait); |
|
if (!(svsk = rqstp->rq_sock)) { |
svc_serv_dequeue(serv, rqstp); |
spin_unlock_bh(&serv->sv_lock); |
dprintk("svc: server %p, no data yet\n", rqstp); |
return signalled()? -EINTR : -EAGAIN; |
} |
} |
spin_unlock_bh(&serv->sv_lock); |
|
dprintk("svc: server %p, socket %p, inuse=%d\n", |
rqstp, svsk, svsk->sk_inuse); |
len = svsk->sk_recvfrom(rqstp); |
dprintk("svc: got len=%d\n", len); |
|
/* No data, incomplete (TCP) read, or accept() */ |
if (len == 0 || len == -EAGAIN) { |
svc_sock_release(rqstp); |
return -EAGAIN; |
} |
svsk->sk_lastrecv = CURRENT_TIME; |
if (test_bit(SK_TEMP, &svsk->sk_flags)) { |
/* push active sockets to end of list */ |
spin_lock_bh(&serv->sv_lock); |
list_del(&svsk->sk_list); |
list_add_tail(&svsk->sk_list, &serv->sv_tempsocks); |
spin_unlock_bh(&serv->sv_lock); |
} |
|
rqstp->rq_secure = ntohs(rqstp->rq_addr.sin_port) < 1024; |
rqstp->rq_userset = 0; |
rqstp->rq_verfed = 0; |
|
svc_getlong(&rqstp->rq_argbuf, rqstp->rq_xid); |
svc_putlong(&rqstp->rq_resbuf, rqstp->rq_xid); |
|
/* Assume that the reply consists of a single buffer. */ |
rqstp->rq_resbuf.nriov = 1; |
|
if (serv->sv_stats) |
serv->sv_stats->netcnt++; |
return len; |
} |
|
/* |
* Drop request |
*/ |
void |
svc_drop(struct svc_rqst *rqstp) |
{ |
dprintk("svc: socket %p dropped request\n", rqstp->rq_sock); |
svc_sock_release(rqstp); |
} |
|
/* |
* Return reply to client. |
*/ |
int |
svc_send(struct svc_rqst *rqstp) |
{ |
struct svc_sock *svsk; |
int len; |
|
if ((svsk = rqstp->rq_sock) == NULL) { |
printk(KERN_WARNING "NULL socket pointer in %s:%d\n", |
__FILE__, __LINE__); |
return -EFAULT; |
} |
|
/* release the receive skb before sending the reply */ |
svc_release_skb(rqstp); |
|
len = svsk->sk_sendto(rqstp); |
svc_sock_release(rqstp); |
|
if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) |
return 0; |
return len; |
} |
|
/* |
* Initialize socket for RPC use and create svc_sock struct |
* XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. |
*/ |
static struct svc_sock * |
svc_setup_socket(struct svc_serv *serv, struct socket *sock, |
int *errp, int pmap_register) |
{ |
struct svc_sock *svsk; |
struct sock *inet; |
|
dprintk("svc: svc_setup_socket %p\n", sock); |
if (!(svsk = kmalloc(sizeof(*svsk), GFP_KERNEL))) { |
*errp = -ENOMEM; |
return NULL; |
} |
memset(svsk, 0, sizeof(*svsk)); |
|
inet = sock->sk; |
inet->user_data = svsk; |
svsk->sk_sock = sock; |
svsk->sk_sk = inet; |
svsk->sk_ostate = inet->state_change; |
svsk->sk_odata = inet->data_ready; |
svsk->sk_owspace = inet->write_space; |
svsk->sk_server = serv; |
svsk->sk_lastrecv = CURRENT_TIME; |
|
/* Initialize the socket */ |
if (sock->type == SOCK_DGRAM) |
*errp = svc_udp_init(svsk); |
else |
*errp = svc_tcp_init(svsk); |
if (svsk->sk_sk == NULL) |
printk(KERN_WARNING "svsk->sk_sk == NULL after svc_prot_init!\n"); |
|
/* Register socket with portmapper */ |
if (*errp >= 0 && pmap_register) |
*errp = svc_register(serv, inet->protocol, ntohs(inet->sport)); |
|
if (*errp < 0) { |
inet->user_data = NULL; |
kfree(svsk); |
return NULL; |
} |
|
|
spin_lock_bh(&serv->sv_lock); |
if (!pmap_register) { |
set_bit(SK_TEMP, &svsk->sk_flags); |
list_add(&svsk->sk_list, &serv->sv_tempsocks); |
serv->sv_tmpcnt++; |
} else { |
clear_bit(SK_TEMP, &svsk->sk_flags); |
list_add(&svsk->sk_list, &serv->sv_permsocks); |
} |
spin_unlock_bh(&serv->sv_lock); |
|
dprintk("svc: svc_setup_socket created %p (inet %p)\n", |
svsk, svsk->sk_sk); |
return svsk; |
} |
|
/* |
* Create socket for RPC service. |
*/ |
static int |
svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin) |
{ |
struct svc_sock *svsk; |
struct socket *sock; |
int error; |
int type; |
|
dprintk("svc: svc_create_socket(%s, %d, %u.%u.%u.%u:%d)\n", |
serv->sv_program->pg_name, protocol, |
NIPQUAD(sin->sin_addr.s_addr), |
ntohs(sin->sin_port)); |
|
if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { |
printk(KERN_WARNING "svc: only UDP and TCP " |
"sockets supported\n"); |
return -EINVAL; |
} |
type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; |
|
if ((error = sock_create(PF_INET, type, protocol, &sock)) < 0) |
return error; |
|
if (sin != NULL) { |
if (type == SOCK_STREAM) |
sock->sk->reuse = 1; /* allow address reuse */ |
error = sock->ops->bind(sock, (struct sockaddr *) sin, |
sizeof(*sin)); |
if (error < 0) |
goto bummer; |
} |
|
if (protocol == IPPROTO_TCP) { |
if ((error = sock->ops->listen(sock, 64)) < 0) |
goto bummer; |
} |
|
if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL) |
return 0; |
|
bummer: |
dprintk("svc: svc_create_socket error = %d\n", -error); |
sock_release(sock); |
return error; |
} |
|
/* |
* Remove a dead socket |
*/ |
void |
svc_delete_socket(struct svc_sock *svsk) |
{ |
struct svc_serv *serv; |
struct sock *sk; |
|
dprintk("svc: svc_delete_socket(%p)\n", svsk); |
|
if (test_and_set_bit(SK_DEAD, &svsk->sk_flags)) |
return ; |
|
serv = svsk->sk_server; |
sk = svsk->sk_sk; |
|
sk->state_change = svsk->sk_ostate; |
sk->data_ready = svsk->sk_odata; |
sk->write_space = svsk->sk_owspace; |
|
spin_lock_bh(&serv->sv_lock); |
|
list_del(&svsk->sk_list); |
if (test_bit(SK_TEMP, &svsk->sk_flags)) |
serv->sv_tmpcnt--; |
if (test_bit(SK_QUED, &svsk->sk_flags)) |
list_del(&svsk->sk_ready); |
|
|
if (!svsk->sk_inuse) { |
spin_unlock_bh(&serv->sv_lock); |
sock_release(svsk->sk_sock); |
kfree(svsk); |
} else { |
spin_unlock_bh(&serv->sv_lock); |
dprintk(KERN_NOTICE "svc: server socket destroy delayed\n"); |
/* svsk->sk_server = NULL; */ |
} |
} |
|
/* |
* Make a socket for nfsd and lockd |
*/ |
int |
svc_makesock(struct svc_serv *serv, int protocol, unsigned short port) |
{ |
struct sockaddr_in sin; |
|
dprintk("svc: creating socket proto = %d\n", protocol); |
sin.sin_family = AF_INET; |
sin.sin_addr.s_addr = INADDR_ANY; |
sin.sin_port = htons(port); |
return svc_create_socket(serv, protocol, &sin); |
} |
|
/pmap_clnt.c
0,0 → 1,278
/* |
* linux/net/sunrpc/pmap.c |
* |
* Portmapper client. |
* |
* FIXME: In a secure environment, we may want to use an authentication |
* flavor other than AUTH_NULL. |
* |
* Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> |
*/ |
|
#include <linux/config.h> |
#include <linux/types.h> |
#include <linux/socket.h> |
#include <linux/kernel.h> |
#include <linux/errno.h> |
#include <linux/uio.h> |
#include <linux/in.h> |
#include <linux/sunrpc/clnt.h> |
#include <linux/sunrpc/xprt.h> |
#include <linux/sunrpc/sched.h> |
|
#ifdef RPC_DEBUG |
# define RPCDBG_FACILITY RPCDBG_PMAP |
#endif |
|
#define PMAP_SET 1 |
#define PMAP_UNSET 2 |
#define PMAP_GETPORT 3 |
|
static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int); |
static void pmap_getport_done(struct rpc_task *); |
extern struct rpc_program pmap_program; |
static spinlock_t pmap_lock = SPIN_LOCK_UNLOCKED; |
|
/* |
* Obtain the port for a given RPC service on a given host. This one can |
* be called for an ongoing RPC request. |
*/ |
void |
rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) |
{ |
struct rpc_portmap *map = &clnt->cl_pmap; |
struct sockaddr_in *sap = &clnt->cl_xprt->addr; |
struct rpc_message msg = { PMAP_GETPORT, map, &clnt->cl_port, NULL }; |
struct rpc_clnt *pmap_clnt; |
struct rpc_task *child; |
|
dprintk("RPC: %4d rpc_getport(%s, %d, %d, %d)\n", |
task->tk_pid, clnt->cl_server, |
map->pm_prog, map->pm_vers, map->pm_prot); |
|
spin_lock(&pmap_lock); |
if (clnt->cl_binding) { |
rpc_sleep_on(&clnt->cl_bindwait, task, NULL, 0); |
spin_unlock(&pmap_lock); |
return; |
} |
clnt->cl_binding = 1; |
spin_unlock(&pmap_lock); |
|
task->tk_status = -EACCES; /* why set this? returns -EIO below */ |
if (!(pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot))) |
goto bailout; |
task->tk_status = 0; |
|
/* |
* Note: rpc_new_child will release client after a failure. |
*/ |
if (!(child = rpc_new_child(pmap_clnt, task))) |
goto bailout; |
|
/* Setup the call info struct */ |
rpc_call_setup(child, &msg, 0); |
|
/* ... and run the child task */ |
rpc_run_child(task, child, pmap_getport_done); |
return; |
|
bailout: |
spin_lock(&pmap_lock); |
clnt->cl_binding = 0; |
rpc_wake_up(&clnt->cl_bindwait); |
spin_unlock(&pmap_lock); |
task->tk_status = -EIO; |
task->tk_action = NULL; |
} |
|
#ifdef CONFIG_ROOT_NFS |
int |
rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) |
{ |
struct rpc_portmap map = { prog, vers, prot, 0 }; |
struct rpc_clnt *pmap_clnt; |
char hostname[32]; |
int status; |
|
dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %d, %d, %d)\n", |
NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); |
|
sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); |
if (!(pmap_clnt = pmap_create(hostname, sin, prot))) |
return -EACCES; |
|
/* Setup the call info struct */ |
status = rpc_call(pmap_clnt, PMAP_GETPORT, &map, &map.pm_port, 0); |
|
if (status >= 0) { |
if (map.pm_port != 0) |
return map.pm_port; |
status = -EACCES; |
} |
return status; |
} |
#endif |
|
static void |
pmap_getport_done(struct rpc_task *task) |
{ |
struct rpc_clnt *clnt = task->tk_client; |
|
dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n", |
task->tk_pid, task->tk_status, clnt->cl_port); |
if (task->tk_status < 0) { |
/* Make the calling task exit with an error */ |
task->tk_action = NULL; |
} else if (clnt->cl_port == 0) { |
/* Program not registered */ |
task->tk_status = -EACCES; |
task->tk_action = NULL; |
} else { |
/* byte-swap port number first */ |
clnt->cl_port = htons(clnt->cl_port); |
clnt->cl_xprt->addr.sin_port = clnt->cl_port; |
} |
spin_lock(&pmap_lock); |
clnt->cl_binding = 0; |
rpc_wake_up(&clnt->cl_bindwait); |
spin_unlock(&pmap_lock); |
} |
|
/* |
* Set or unset a port registration with the local portmapper. |
* port == 0 means unregister, port != 0 means register. |
*/ |
int |
rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) |
{ |
struct sockaddr_in sin; |
struct rpc_portmap map; |
struct rpc_clnt *pmap_clnt; |
unsigned int error = 0; |
|
dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n", |
prog, vers, prot, port); |
|
sin.sin_family = AF_INET; |
sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); |
if (!(pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP))) { |
dprintk("RPC: couldn't create pmap client\n"); |
return -EACCES; |
} |
|
map.pm_prog = prog; |
map.pm_vers = vers; |
map.pm_prot = prot; |
map.pm_port = port; |
|
error = rpc_call(pmap_clnt, port? PMAP_SET : PMAP_UNSET, |
&map, okay, 0); |
|
if (error < 0) { |
printk(KERN_WARNING |
"RPC: failed to contact portmap (errno %d).\n", |
error); |
} |
dprintk("RPC: registration status %d/%d\n", error, *okay); |
|
/* Client deleted automatically because cl_oneshot == 1 */ |
return error; |
} |
|
static struct rpc_clnt * |
pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto) |
{ |
struct rpc_xprt *xprt; |
struct rpc_clnt *clnt; |
|
/* printk("pmap: create xprt\n"); */ |
if (!(xprt = xprt_create_proto(proto, srvaddr, NULL))) |
return NULL; |
xprt->addr.sin_port = htons(RPC_PMAP_PORT); |
|
/* printk("pmap: create clnt\n"); */ |
clnt = rpc_create_client(xprt, hostname, |
&pmap_program, RPC_PMAP_VERSION, |
RPC_AUTH_NULL); |
if (!clnt) { |
xprt_destroy(xprt); |
} else { |
clnt->cl_softrtry = 1; |
clnt->cl_chatty = 1; |
clnt->cl_oneshot = 1; |
} |
return clnt; |
} |
|
/* |
* XDR encode/decode functions for PMAP |
*/ |
static int |
xdr_error(struct rpc_rqst *req, u32 *p, void *dummy) |
{ |
return -EIO; |
} |
|
static int |
xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct rpc_portmap *map) |
{ |
dprintk("RPC: xdr_encode_mapping(%d, %d, %d, %d)\n", |
map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port); |
*p++ = htonl(map->pm_prog); |
*p++ = htonl(map->pm_vers); |
*p++ = htonl(map->pm_prot); |
*p++ = htonl(map->pm_port); |
|
req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); |
return 0; |
} |
|
static int |
xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp) |
{ |
*portp = (unsigned short) ntohl(*p++); |
return 0; |
} |
|
static int |
xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp) |
{ |
*boolp = (unsigned int) ntohl(*p++); |
return 0; |
} |
|
static struct rpc_procinfo pmap_procedures[4] = { |
{ "pmap_null", |
(kxdrproc_t) xdr_error, |
(kxdrproc_t) xdr_error, 0, 0 }, |
{ "pmap_set", |
(kxdrproc_t) xdr_encode_mapping, |
(kxdrproc_t) xdr_decode_bool, 4, 1 }, |
{ "pmap_unset", |
(kxdrproc_t) xdr_encode_mapping, |
(kxdrproc_t) xdr_decode_bool, 4, 1 }, |
{ "pmap_get", |
(kxdrproc_t) xdr_encode_mapping, |
(kxdrproc_t) xdr_decode_port, 4, 1 }, |
}; |
|
static struct rpc_version pmap_version2 = { |
2, 4, pmap_procedures |
}; |
|
static struct rpc_version * pmap_version[] = { |
NULL, |
NULL, |
&pmap_version2, |
}; |
|
static struct rpc_stat pmap_stats; |
|
struct rpc_program pmap_program = { |
"portmap", |
RPC_PMAP_PROGRAM, |
sizeof(pmap_version)/sizeof(pmap_version[0]), |
pmap_version, |
&pmap_stats, |
}; |
/svcauth.c
0,0 → 1,166
/* |
* linux/net/sunrpc/svcauth.c |
* |
* The generic interface for RPC authentication on the server side. |
* |
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> |
* |
* CHANGES |
* 19-Apr-2000 Chris Evans - Security fix |
*/ |
|
#include <linux/types.h> |
#include <linux/sched.h> |
#include <linux/sunrpc/types.h> |
#include <linux/sunrpc/xdr.h> |
#include <linux/sunrpc/svcauth.h> |
#include <linux/sunrpc/svcsock.h> |
|
#define RPCDBG_FACILITY RPCDBG_AUTH |
|
/* |
* Type of authenticator function |
*/ |
typedef void (*auth_fn_t)(struct svc_rqst *rqstp, u32 *statp, u32 *authp); |
|
/* |
* Builtin auth flavors |
*/ |
static void svcauth_null(struct svc_rqst *rqstp, u32 *statp, u32 *authp); |
static void svcauth_unix(struct svc_rqst *rqstp, u32 *statp, u32 *authp); |
|
/* |
* Max number of authentication flavors we support |
*/ |
#define RPC_SVCAUTH_MAX 8 |
|
/* |
* Table of authenticators |
*/ |
static auth_fn_t authtab[RPC_SVCAUTH_MAX] = { |
svcauth_null, |
svcauth_unix, |
NULL, |
}; |
|
void |
svc_authenticate(struct svc_rqst *rqstp, u32 *statp, u32 *authp) |
{ |
u32 flavor; |
auth_fn_t func; |
|
*statp = rpc_success; |
*authp = rpc_auth_ok; |
|
svc_getlong(&rqstp->rq_argbuf, flavor); |
flavor = ntohl(flavor); |
|
dprintk("svc: svc_authenticate (%d)\n", flavor); |
if (flavor >= RPC_SVCAUTH_MAX || !(func = authtab[flavor])) { |
*authp = rpc_autherr_badcred; |
return; |
} |
|
rqstp->rq_cred.cr_flavor = flavor; |
func(rqstp, statp, authp); |
} |
|
int |
svc_auth_register(u32 flavor, auth_fn_t func) |
{ |
if (flavor >= RPC_SVCAUTH_MAX || authtab[flavor]) |
return -EINVAL; |
authtab[flavor] = func; |
return 0; |
} |
|
void |
svc_auth_unregister(u32 flavor) |
{ |
if (flavor < RPC_SVCAUTH_MAX) |
authtab[flavor] = NULL; |
} |
|
static void |
svcauth_null(struct svc_rqst *rqstp, u32 *statp, u32 *authp) |
{ |
struct svc_buf *argp = &rqstp->rq_argbuf; |
struct svc_buf *resp = &rqstp->rq_resbuf; |
|
if ((argp->len -= 3) < 0) { |
*statp = rpc_garbage_args; |
return; |
} |
if (*(argp->buf)++ != 0) { /* we already skipped the flavor */ |
dprintk("svc: bad null cred\n"); |
*authp = rpc_autherr_badcred; |
return; |
} |
if (*(argp->buf)++ != RPC_AUTH_NULL || *(argp->buf)++ != 0) { |
dprintk("svc: bad null verf\n"); |
*authp = rpc_autherr_badverf; |
return; |
} |
|
/* Signal that mapping to nobody uid/gid is required */ |
rqstp->rq_cred.cr_uid = (uid_t) -1; |
rqstp->rq_cred.cr_gid = (gid_t) -1; |
rqstp->rq_cred.cr_groups[0] = NOGROUP; |
|
/* Put NULL verifier */ |
rqstp->rq_verfed = 1; |
svc_putlong(resp, RPC_AUTH_NULL); |
svc_putlong(resp, 0); |
} |
|
static void |
svcauth_unix(struct svc_rqst *rqstp, u32 *statp, u32 *authp) |
{ |
struct svc_buf *argp = &rqstp->rq_argbuf; |
struct svc_buf *resp = &rqstp->rq_resbuf; |
struct svc_cred *cred = &rqstp->rq_cred; |
u32 *bufp = argp->buf, slen, i; |
int len = argp->len; |
|
if ((len -= 3) < 0) { |
*statp = rpc_garbage_args; |
return; |
} |
|
bufp++; /* length */ |
bufp++; /* time stamp */ |
slen = (ntohl(*bufp++) + 3) >> 2; /* machname length */ |
if (slen > 64 || (len -= slen + 3) < 0) |
goto badcred; |
bufp += slen; /* skip machname */ |
|
cred->cr_uid = ntohl(*bufp++); /* uid */ |
cred->cr_gid = ntohl(*bufp++); /* gid */ |
|
slen = ntohl(*bufp++); /* gids length */ |
if (slen > 16 || (len -= slen + 2) < 0) |
goto badcred; |
for (i = 0; i < NGROUPS && i < slen; i++) |
cred->cr_groups[i] = ntohl(*bufp++); |
if (i < NGROUPS) |
cred->cr_groups[i] = NOGROUP; |
bufp += (slen - i); |
|
if (*bufp++ != RPC_AUTH_NULL || *bufp++ != 0) { |
*authp = rpc_autherr_badverf; |
return; |
} |
|
argp->buf = bufp; |
argp->len = len; |
|
/* Put NULL verifier */ |
rqstp->rq_verfed = 1; |
svc_putlong(resp, RPC_AUTH_NULL); |
svc_putlong(resp, 0); |
|
return; |
|
badcred: |
*authp = rpc_autherr_badcred; |
} |
/timer.c
0,0 → 1,74
#include <linux/version.h> |
#include <linux/types.h> |
#include <linux/unistd.h> |
|
#include <linux/sunrpc/clnt.h> |
#include <linux/sunrpc/xprt.h> |
#include <linux/sunrpc/timer.h> |
|
#define RPC_RTO_MAX (60*HZ) |
#define RPC_RTO_INIT (HZ/5) |
#define RPC_RTO_MIN (HZ/10) |
|
void |
rpc_init_rtt(struct rpc_rtt *rt, long timeo) |
{ |
long t = (timeo - RPC_RTO_INIT) << 3; |
int i; |
rt->timeo = timeo; |
if (t < 0) |
t = 0; |
for (i = 0; i < 5; i++) { |
rt->srtt[i] = t; |
rt->sdrtt[i] = RPC_RTO_INIT; |
} |
memset(rt->ntimeouts, 0, sizeof(rt->ntimeouts)); |
} |
|
void |
rpc_update_rtt(struct rpc_rtt *rt, int timer, long m) |
{ |
long *srtt, *sdrtt; |
|
if (timer-- == 0) |
return; |
|
if (m == 0) |
m = 1; |
srtt = &rt->srtt[timer]; |
m -= *srtt >> 3; |
*srtt += m; |
if (m < 0) |
m = -m; |
sdrtt = &rt->sdrtt[timer]; |
m -= *sdrtt >> 2; |
*sdrtt += m; |
/* Set lower bound on the variance */ |
if (*sdrtt < RPC_RTO_MIN) |
*sdrtt = RPC_RTO_MIN; |
} |
|
/* |
* Estimate rto for an nfs rpc sent via. an unreliable datagram. |
* Use the mean and mean deviation of rtt for the appropriate type of rpc |
* for the frequent rpcs and a default for the others. |
* The justification for doing "other" this way is that these rpcs |
* happen so infrequently that timer est. would probably be stale. |
* Also, since many of these rpcs are |
* non-idempotent, a conservative timeout is desired. |
* getattr, lookup, |
* read, write, commit - A+4D |
* other - timeo |
*/ |
|
long |
rpc_calc_rto(struct rpc_rtt *rt, int timer) |
{ |
long res; |
if (timer-- == 0) |
return rt->timeo; |
res = (rt->srtt[timer] >> 3) + rt->sdrtt[timer]; |
if (res > RPC_RTO_MAX) |
res = RPC_RTO_MAX; |
return res; |
} |
/sysctl.c
0,0 → 1,137
/* |
* linux/net/sunrpc/sysctl.c |
* |
* Sysctl interface to sunrpc module. This is for debugging only now. |
* |
* I would prefer to register the sunrpc table below sys/net, but that's |
* impossible at the moment. |
*/ |
|
#include <linux/config.h> |
#include <linux/version.h> |
#include <linux/types.h> |
#include <linux/linkage.h> |
#include <linux/ctype.h> |
#include <linux/fs.h> |
#include <linux/sysctl.h> |
#include <linux/module.h> |
|
#include <asm/uaccess.h> |
#include <linux/sunrpc/types.h> |
#include <linux/sunrpc/sched.h> |
#include <linux/sunrpc/stats.h> |
|
/* |
* Declare the debug flags here |
*/ |
unsigned int rpc_debug; |
unsigned int nfs_debug; |
unsigned int nfsd_debug; |
unsigned int nlm_debug; |
|
#ifdef RPC_DEBUG |
|
static struct ctl_table_header *sunrpc_table_header; |
static ctl_table sunrpc_table[]; |
|
void |
rpc_register_sysctl(void) |
{ |
if (!sunrpc_table_header) { |
sunrpc_table_header = register_sysctl_table(sunrpc_table, 1); |
#ifdef CONFIG_PROC_FS |
if (sunrpc_table[0].de) |
sunrpc_table[0].de->owner = THIS_MODULE; |
#endif |
} |
|
} |
|
void |
rpc_unregister_sysctl(void) |
{ |
if (sunrpc_table_header) { |
unregister_sysctl_table(sunrpc_table_header); |
sunrpc_table_header = NULL; |
} |
} |
|
static int |
proc_dodebug(ctl_table *table, int write, struct file *file, |
void *buffer, size_t *lenp) |
{ |
char tmpbuf[20], *p, c; |
unsigned int value; |
size_t left, len; |
|
if ((file->f_pos && !write) || !*lenp) { |
*lenp = 0; |
return 0; |
} |
|
left = *lenp; |
|
if (write) { |
if (!access_ok(VERIFY_READ, buffer, left)) |
return -EFAULT; |
p = (char *) buffer; |
while (left && __get_user(c, p) >= 0 && isspace(c)) |
left--, p++; |
if (!left) |
goto done; |
|
if (left > sizeof(tmpbuf) - 1) |
return -EINVAL; |
copy_from_user(tmpbuf, p, left); |
tmpbuf[left] = '\0'; |
|
for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--) |
value = 10 * value + (*p - '0'); |
if (*p && !isspace(*p)) |
return -EINVAL; |
while (left && isspace(*p)) |
left--, p++; |
*(unsigned int *) table->data = value; |
/* Display the RPC tasks on writing to rpc_debug */ |
if (table->ctl_name == CTL_RPCDEBUG) { |
rpc_show_tasks(); |
} |
} else { |
if (!access_ok(VERIFY_WRITE, buffer, left)) |
return -EFAULT; |
len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data); |
if (len > left) |
len = left; |
copy_to_user(buffer, tmpbuf, len); |
if ((left -= len) > 0) { |
put_user('\n', (char *)buffer + len); |
left--; |
} |
} |
|
done: |
*lenp -= left; |
file->f_pos += *lenp; |
return 0; |
} |
|
#define DIRENTRY(nam1, nam2, child) \ |
{CTL_##nam1, #nam2, NULL, 0, 0555, child } |
#define DBGENTRY(nam1, nam2) \ |
{CTL_##nam1##DEBUG, #nam2 "_debug", &nam2##_debug, sizeof(int),\ |
0644, NULL, &proc_dodebug} |
|
static ctl_table debug_table[] = { |
DBGENTRY(RPC, rpc), |
DBGENTRY(NFS, nfs), |
DBGENTRY(NFSD, nfsd), |
DBGENTRY(NLM, nlm), |
{0} |
}; |
|
static ctl_table sunrpc_table[] = { |
DIRENTRY(SUNRPC, sunrpc, debug_table), |
{0} |
}; |
|
#endif |
/sunrpc_syms.c
0,0 → 1,109
/* |
* linux/net/sunrpc/sunrpc_syms.c |
* |
* Symbols exported by the sunrpc module. |
* |
* Copyright (C) 1997 Olaf Kirch <okir@monad.swb.de> |
*/ |
|
#include <linux/config.h> |
#include <linux/module.h> |
|
#include <linux/types.h> |
#include <linux/socket.h> |
#include <linux/sched.h> |
#include <linux/uio.h> |
#include <linux/unistd.h> |
|
#include <linux/sunrpc/sched.h> |
#include <linux/sunrpc/clnt.h> |
#include <linux/sunrpc/svc.h> |
#include <linux/sunrpc/svcsock.h> |
#include <linux/sunrpc/auth.h> |
|
/* RPC scheduler */ |
EXPORT_SYMBOL(rpc_allocate); |
EXPORT_SYMBOL(rpc_free); |
EXPORT_SYMBOL(rpc_execute); |
EXPORT_SYMBOL(rpc_init_task); |
EXPORT_SYMBOL(rpc_sleep_on); |
EXPORT_SYMBOL(rpc_wake_up_next); |
EXPORT_SYMBOL(rpc_wake_up_task); |
EXPORT_SYMBOL(rpc_new_child); |
EXPORT_SYMBOL(rpc_run_child); |
EXPORT_SYMBOL(rpciod_down); |
EXPORT_SYMBOL(rpciod_up); |
EXPORT_SYMBOL(rpc_new_task); |
EXPORT_SYMBOL(rpc_wake_up_status); |
EXPORT_SYMBOL(rpc_release_task); |
|
/* RPC client functions */ |
EXPORT_SYMBOL(rpc_create_client); |
EXPORT_SYMBOL(rpc_destroy_client); |
EXPORT_SYMBOL(rpc_shutdown_client); |
EXPORT_SYMBOL(rpc_killall_tasks); |
EXPORT_SYMBOL(rpc_call_sync); |
EXPORT_SYMBOL(rpc_call_async); |
EXPORT_SYMBOL(rpc_call_setup); |
EXPORT_SYMBOL(rpc_clnt_sigmask); |
EXPORT_SYMBOL(rpc_clnt_sigunmask); |
EXPORT_SYMBOL(rpc_delay); |
EXPORT_SYMBOL(rpc_restart_call); |
EXPORT_SYMBOL(rpc_setbufsize); |
|
/* Client transport */ |
EXPORT_SYMBOL(xprt_create_proto); |
EXPORT_SYMBOL(xprt_destroy); |
EXPORT_SYMBOL(xprt_set_timeout); |
|
/* Client credential cache */ |
EXPORT_SYMBOL(rpcauth_register); |
EXPORT_SYMBOL(rpcauth_unregister); |
EXPORT_SYMBOL(rpcauth_init_credcache); |
EXPORT_SYMBOL(rpcauth_free_credcache); |
EXPORT_SYMBOL(rpcauth_insert_credcache); |
EXPORT_SYMBOL(rpcauth_lookupcred); |
EXPORT_SYMBOL(rpcauth_bindcred); |
EXPORT_SYMBOL(rpcauth_matchcred); |
EXPORT_SYMBOL(put_rpccred); |
|
/* RPC server stuff */ |
EXPORT_SYMBOL(svc_create); |
EXPORT_SYMBOL(svc_create_thread); |
EXPORT_SYMBOL(svc_exit_thread); |
EXPORT_SYMBOL(svc_destroy); |
EXPORT_SYMBOL(svc_drop); |
EXPORT_SYMBOL(svc_process); |
EXPORT_SYMBOL(svc_recv); |
EXPORT_SYMBOL(svc_wake_up); |
EXPORT_SYMBOL(svc_makesock); |
EXPORT_SYMBOL(svc_reserve); |
|
/* RPC statistics */ |
#ifdef CONFIG_PROC_FS |
EXPORT_SYMBOL(rpc_proc_register); |
EXPORT_SYMBOL(rpc_proc_unregister); |
EXPORT_SYMBOL(rpc_proc_read); |
EXPORT_SYMBOL(svc_proc_register); |
EXPORT_SYMBOL(svc_proc_unregister); |
EXPORT_SYMBOL(svc_proc_read); |
#endif |
|
/* Generic XDR */ |
EXPORT_SYMBOL(xdr_encode_array); |
EXPORT_SYMBOL(xdr_encode_string); |
EXPORT_SYMBOL(xdr_decode_string); |
EXPORT_SYMBOL(xdr_decode_string_inplace); |
EXPORT_SYMBOL(xdr_decode_netobj); |
EXPORT_SYMBOL(xdr_encode_netobj); |
EXPORT_SYMBOL(xdr_encode_pages); |
EXPORT_SYMBOL(xdr_inline_pages); |
EXPORT_SYMBOL(xdr_shift_buf); |
|
/* Debugging symbols */ |
#ifdef RPC_DEBUG |
EXPORT_SYMBOL(rpc_debug); |
EXPORT_SYMBOL(nfs_debug); |
EXPORT_SYMBOL(nfsd_debug); |
EXPORT_SYMBOL(nlm_debug); |
#endif |
/sched.c
0,0 → 1,1158
/* |
* linux/net/sunrpc/sched.c |
* |
* Scheduling for synchronous and asynchronous RPC requests. |
* |
* Copyright (C) 1996 Olaf Kirch, <okir@monad.swb.de> |
* |
* TCP NFS related read + write fixes |
* (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> |
*/ |
|
#include <linux/module.h> |
|
#define __KERNEL_SYSCALLS__ |
#include <linux/sched.h> |
#include <linux/interrupt.h> |
#include <linux/slab.h> |
#include <linux/unistd.h> |
#include <linux/smp.h> |
#include <linux/smp_lock.h> |
#include <linux/spinlock.h> |
|
#include <linux/sunrpc/clnt.h> |
#include <linux/sunrpc/xprt.h> |
|
#ifdef RPC_DEBUG |
#define RPCDBG_FACILITY RPCDBG_SCHED |
static int rpc_task_id; |
#endif |
|
/* |
* We give RPC the same get_free_pages priority as NFS |
*/ |
#define GFP_RPC GFP_NOFS |
|
static void __rpc_default_timer(struct rpc_task *task); |
static void rpciod_killall(void); |
|
/* |
* When an asynchronous RPC task is activated within a bottom half |
* handler, or while executing another RPC task, it is put on |
* schedq, and rpciod is woken up. |
*/ |
static RPC_WAITQ(schedq, "schedq"); |
|
/* |
* RPC tasks that create another task (e.g. for contacting the portmapper) |
* will wait on this queue for their child's completion |
*/ |
static RPC_WAITQ(childq, "childq"); |
|
/* |
* RPC tasks sit here while waiting for conditions to improve. |
*/ |
static RPC_WAITQ(delay_queue, "delayq"); |
|
/* |
* All RPC tasks are linked into this list |
*/ |
static LIST_HEAD(all_tasks); |
|
/* |
* rpciod-related stuff |
*/ |
static DECLARE_WAIT_QUEUE_HEAD(rpciod_idle); |
static DECLARE_WAIT_QUEUE_HEAD(rpciod_killer); |
static DECLARE_MUTEX(rpciod_sema); |
static unsigned int rpciod_users; |
static pid_t rpciod_pid; |
static int rpc_inhibit; |
|
/* |
* Spinlock for wait queues. Access to the latter also has to be |
* interrupt-safe in order to allow timers to wake up sleeping tasks. |
*/ |
static spinlock_t rpc_queue_lock = SPIN_LOCK_UNLOCKED; |
/* |
* Spinlock for other critical sections of code. |
*/ |
static spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED; |
|
/* |
* This is the last-ditch buffer for NFS swap requests |
*/ |
static u32 swap_buffer[PAGE_SIZE >> 2]; |
static long swap_buffer_used; |
|
/* |
* Make allocation of the swap_buffer SMP-safe |
*/ |
static __inline__ int rpc_lock_swapbuf(void) |
{ |
return !test_and_set_bit(1, &swap_buffer_used); |
} |
static __inline__ void rpc_unlock_swapbuf(void) |
{ |
clear_bit(1, &swap_buffer_used); |
} |
|
/* |
* Disable the timer for a given RPC task. Should be called with |
* rpc_queue_lock and bh_disabled in order to avoid races within |
* rpc_run_timer(). |
*/ |
static inline void |
__rpc_disable_timer(struct rpc_task *task) |
{ |
dprintk("RPC: %4d disabling timer\n", task->tk_pid); |
task->tk_timeout_fn = NULL; |
task->tk_timeout = 0; |
} |
|
/* |
* Run a timeout function. |
* We use the callback in order to allow __rpc_wake_up_task() |
* and friends to disable the timer synchronously on SMP systems |
* without calling del_timer_sync(). The latter could cause a |
* deadlock if called while we're holding spinlocks... |
*/ |
static void |
rpc_run_timer(struct rpc_task *task) |
{ |
void (*callback)(struct rpc_task *); |
|
spin_lock_bh(&rpc_queue_lock); |
callback = task->tk_timeout_fn; |
task->tk_timeout_fn = NULL; |
spin_unlock_bh(&rpc_queue_lock); |
if (callback) { |
dprintk("RPC: %4d running timer\n", task->tk_pid); |
callback(task); |
} |
} |
|
/* |
* Set up a timer for the current task. |
*/ |
static inline void |
__rpc_add_timer(struct rpc_task *task, rpc_action timer) |
{ |
if (!task->tk_timeout) |
return; |
|
dprintk("RPC: %4d setting alarm for %lu ms\n", |
task->tk_pid, task->tk_timeout * 1000 / HZ); |
|
if (timer) |
task->tk_timeout_fn = timer; |
else |
task->tk_timeout_fn = __rpc_default_timer; |
mod_timer(&task->tk_timer, jiffies + task->tk_timeout); |
} |
|
/* |
* Set up a timer for an already sleeping task. |
*/ |
void rpc_add_timer(struct rpc_task *task, rpc_action timer) |
{ |
spin_lock_bh(&rpc_queue_lock); |
if (!RPC_IS_RUNNING(task)) |
__rpc_add_timer(task, timer); |
spin_unlock_bh(&rpc_queue_lock); |
} |
|
/* |
* Delete any timer for the current task. Because we use del_timer_sync(), |
* this function should never be called while holding rpc_queue_lock. |
*/ |
static inline void |
rpc_delete_timer(struct rpc_task *task) |
{ |
dprintk("RPC: %4d deleting timer\n", task->tk_pid); |
del_timer_sync(&task->tk_timer); |
} |
|
/* |
* Add new request to wait queue. |
* |
* Swapper tasks always get inserted at the head of the queue. |
* This should avoid many nasty memory deadlocks and hopefully |
* improve overall performance. |
* Everyone else gets appended to the queue to ensure proper FIFO behavior. |
*/ |
static inline int |
__rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) |
{ |
if (task->tk_rpcwait == queue) |
return 0; |
|
if (task->tk_rpcwait) { |
printk(KERN_WARNING "RPC: doubly enqueued task!\n"); |
return -EWOULDBLOCK; |
} |
if (RPC_IS_SWAPPER(task)) |
list_add(&task->tk_list, &queue->tasks); |
else |
list_add_tail(&task->tk_list, &queue->tasks); |
task->tk_rpcwait = queue; |
|
dprintk("RPC: %4d added to queue %p \"%s\"\n", |
task->tk_pid, queue, rpc_qname(queue)); |
|
return 0; |
} |
|
int |
rpc_add_wait_queue(struct rpc_wait_queue *q, struct rpc_task *task) |
{ |
int result; |
|
spin_lock_bh(&rpc_queue_lock); |
result = __rpc_add_wait_queue(q, task); |
spin_unlock_bh(&rpc_queue_lock); |
return result; |
} |
|
/* |
* Remove request from queue. |
* Note: must be called with spin lock held. |
*/ |
static inline void |
__rpc_remove_wait_queue(struct rpc_task *task) |
{ |
struct rpc_wait_queue *queue = task->tk_rpcwait; |
|
if (!queue) |
return; |
|
list_del(&task->tk_list); |
task->tk_rpcwait = NULL; |
|
dprintk("RPC: %4d removed from queue %p \"%s\"\n", |
task->tk_pid, queue, rpc_qname(queue)); |
} |
|
void |
rpc_remove_wait_queue(struct rpc_task *task) |
{ |
if (!task->tk_rpcwait) |
return; |
spin_lock_bh(&rpc_queue_lock); |
__rpc_remove_wait_queue(task); |
spin_unlock_bh(&rpc_queue_lock); |
} |
|
/* |
* Make an RPC task runnable. |
* |
* Note: If the task is ASYNC, this must be called with |
* the spinlock held to protect the wait queue operation. |
*/ |
static inline void |
rpc_make_runnable(struct rpc_task *task) |
{ |
if (task->tk_timeout_fn) { |
printk(KERN_ERR "RPC: task w/ running timer in rpc_make_runnable!!\n"); |
return; |
} |
rpc_set_running(task); |
if (RPC_IS_ASYNC(task)) { |
if (RPC_IS_SLEEPING(task)) { |
int status; |
status = __rpc_add_wait_queue(&schedq, task); |
if (status < 0) { |
printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); |
task->tk_status = status; |
return; |
} |
rpc_clear_sleeping(task); |
if (waitqueue_active(&rpciod_idle)) |
wake_up(&rpciod_idle); |
} |
} else { |
rpc_clear_sleeping(task); |
if (waitqueue_active(&task->tk_wait)) |
wake_up(&task->tk_wait); |
} |
} |
|
/* |
* Place a newly initialized task on the schedq. |
*/ |
static inline void |
rpc_schedule_run(struct rpc_task *task) |
{ |
/* Don't run a child twice! */ |
if (RPC_IS_ACTIVATED(task)) |
return; |
task->tk_active = 1; |
rpc_set_sleeping(task); |
rpc_make_runnable(task); |
} |
|
/* |
* For other people who may need to wake the I/O daemon |
* but should (for now) know nothing about its innards |
*/ |
void rpciod_wake_up(void) |
{ |
if(rpciod_pid==0) |
printk(KERN_ERR "rpciod: wot no daemon?\n"); |
if (waitqueue_active(&rpciod_idle)) |
wake_up(&rpciod_idle); |
} |
|
/* |
* Prepare for sleeping on a wait queue. |
* By always appending tasks to the list we ensure FIFO behavior. |
* NB: An RPC task will only receive interrupt-driven events as long |
* as it's on a wait queue. |
*/ |
static void |
__rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, |
rpc_action action, rpc_action timer) |
{ |
int status; |
|
dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid, |
rpc_qname(q), jiffies); |
|
if (!RPC_IS_ASYNC(task) && !RPC_IS_ACTIVATED(task)) { |
printk(KERN_ERR "RPC: Inactive synchronous task put to sleep!\n"); |
return; |
} |
|
/* Mark the task as being activated if so needed */ |
if (!RPC_IS_ACTIVATED(task)) { |
task->tk_active = 1; |
rpc_set_sleeping(task); |
} |
|
status = __rpc_add_wait_queue(q, task); |
if (status) { |
printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); |
task->tk_status = status; |
} else { |
rpc_clear_running(task); |
if (task->tk_callback) { |
dprintk(KERN_ERR "RPC: %4d overwrites an active callback\n", task->tk_pid); |
BUG(); |
} |
task->tk_callback = action; |
__rpc_add_timer(task, timer); |
} |
} |
|
void |
rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, |
rpc_action action, rpc_action timer) |
{ |
/* |
* Protect the queue operations. |
*/ |
spin_lock_bh(&rpc_queue_lock); |
__rpc_sleep_on(q, task, action, timer); |
spin_unlock_bh(&rpc_queue_lock); |
} |
|
/** |
* __rpc_wake_up_task - wake up a single rpc_task |
* @task: task to be woken up |
* |
* Caller must hold rpc_queue_lock |
*/ |
static void |
__rpc_wake_up_task(struct rpc_task *task) |
{ |
dprintk("RPC: %4d __rpc_wake_up_task (now %ld inh %d)\n", |
task->tk_pid, jiffies, rpc_inhibit); |
|
#ifdef RPC_DEBUG |
if (task->tk_magic != 0xf00baa) { |
printk(KERN_ERR "RPC: attempt to wake up non-existing task!\n"); |
rpc_debug = ~0; |
rpc_show_tasks(); |
return; |
} |
#endif |
/* Has the task been executed yet? If not, we cannot wake it up! */ |
if (!RPC_IS_ACTIVATED(task)) { |
printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task); |
return; |
} |
if (RPC_IS_RUNNING(task)) |
return; |
|
__rpc_disable_timer(task); |
if (task->tk_rpcwait != &schedq) |
__rpc_remove_wait_queue(task); |
|
rpc_make_runnable(task); |
|
dprintk("RPC: __rpc_wake_up_task done\n"); |
} |
|
/* |
* Default timeout handler if none specified by user |
*/ |
static void |
__rpc_default_timer(struct rpc_task *task) |
{ |
dprintk("RPC: %d timeout (default timer)\n", task->tk_pid); |
task->tk_status = -ETIMEDOUT; |
rpc_wake_up_task(task); |
} |
|
/* |
* Wake up the specified task |
*/ |
void |
rpc_wake_up_task(struct rpc_task *task) |
{ |
if (RPC_IS_RUNNING(task)) |
return; |
spin_lock_bh(&rpc_queue_lock); |
__rpc_wake_up_task(task); |
spin_unlock_bh(&rpc_queue_lock); |
} |
|
/* |
* Wake up the next task on the wait queue. |
*/ |
struct rpc_task * |
rpc_wake_up_next(struct rpc_wait_queue *queue) |
{ |
struct rpc_task *task = NULL; |
|
dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); |
spin_lock_bh(&rpc_queue_lock); |
task_for_first(task, &queue->tasks) |
__rpc_wake_up_task(task); |
spin_unlock_bh(&rpc_queue_lock); |
|
return task; |
} |
|
/** |
* rpc_wake_up - wake up all rpc_tasks |
* @queue: rpc_wait_queue on which the tasks are sleeping |
* |
* Grabs rpc_queue_lock |
*/ |
void |
rpc_wake_up(struct rpc_wait_queue *queue) |
{ |
struct rpc_task *task; |
|
spin_lock_bh(&rpc_queue_lock); |
while (!list_empty(&queue->tasks)) |
task_for_first(task, &queue->tasks) |
__rpc_wake_up_task(task); |
spin_unlock_bh(&rpc_queue_lock); |
} |
|
/** |
* rpc_wake_up_status - wake up all rpc_tasks and set their status value. |
* @queue: rpc_wait_queue on which the tasks are sleeping |
* @status: status value to set |
* |
* Grabs rpc_queue_lock |
*/ |
void |
rpc_wake_up_status(struct rpc_wait_queue *queue, int status) |
{ |
struct rpc_task *task; |
|
spin_lock_bh(&rpc_queue_lock); |
while (!list_empty(&queue->tasks)) { |
task_for_first(task, &queue->tasks) { |
task->tk_status = status; |
__rpc_wake_up_task(task); |
} |
} |
spin_unlock_bh(&rpc_queue_lock); |
} |
|
/* |
* Run a task at a later time |
*/ |
static void __rpc_atrun(struct rpc_task *); |
void |
rpc_delay(struct rpc_task *task, unsigned long delay) |
{ |
task->tk_timeout = delay; |
rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun); |
} |
|
static void |
__rpc_atrun(struct rpc_task *task) |
{ |
task->tk_status = 0; |
rpc_wake_up_task(task); |
} |
|
/* |
* This is the RPC `scheduler' (or rather, the finite state machine). |
*/ |
static int |
__rpc_execute(struct rpc_task *task) |
{ |
int status = 0; |
|
dprintk("RPC: %4d rpc_execute flgs %x\n", |
task->tk_pid, task->tk_flags); |
|
if (!RPC_IS_RUNNING(task)) { |
printk(KERN_WARNING "RPC: rpc_execute called for sleeping task!!\n"); |
return 0; |
} |
|
restarted: |
while (1) { |
/* |
* Execute any pending callback. |
*/ |
if (RPC_DO_CALLBACK(task)) { |
/* Define a callback save pointer */ |
void (*save_callback)(struct rpc_task *); |
|
/* |
* If a callback exists, save it, reset it, |
* call it. |
* The save is needed to stop from resetting |
* another callback set within the callback handler |
* - Dave |
*/ |
save_callback=task->tk_callback; |
task->tk_callback=NULL; |
save_callback(task); |
} |
|
/* |
* Perform the next FSM step. |
* tk_action may be NULL when the task has been killed |
* by someone else. |
*/ |
if (RPC_IS_RUNNING(task)) { |
/* |
* Garbage collection of pending timers... |
*/ |
rpc_delete_timer(task); |
if (!task->tk_action) |
break; |
task->tk_action(task); |
} |
|
/* |
* Check whether task is sleeping. |
*/ |
spin_lock_bh(&rpc_queue_lock); |
if (!RPC_IS_RUNNING(task)) { |
rpc_set_sleeping(task); |
if (RPC_IS_ASYNC(task)) { |
spin_unlock_bh(&rpc_queue_lock); |
return 0; |
} |
} |
spin_unlock_bh(&rpc_queue_lock); |
|
while (RPC_IS_SLEEPING(task)) { |
/* sync task: sleep here */ |
dprintk("RPC: %4d sync task going to sleep\n", |
task->tk_pid); |
if (current->pid == rpciod_pid) |
printk(KERN_ERR "RPC: rpciod waiting on sync task!\n"); |
|
__wait_event(task->tk_wait, !RPC_IS_SLEEPING(task)); |
dprintk("RPC: %4d sync task resuming\n", task->tk_pid); |
|
/* |
* When a sync task receives a signal, it exits with |
* -ERESTARTSYS. In order to catch any callbacks that |
* clean up after sleeping on some queue, we don't |
* break the loop here, but go around once more. |
*/ |
if (task->tk_client->cl_intr && signalled()) { |
dprintk("RPC: %4d got signal\n", task->tk_pid); |
task->tk_flags |= RPC_TASK_KILLED; |
rpc_exit(task, -ERESTARTSYS); |
rpc_wake_up_task(task); |
} |
} |
} |
|
if (task->tk_exit) { |
task->tk_exit(task); |
/* If tk_action is non-null, the user wants us to restart */ |
if (task->tk_action) { |
if (!RPC_ASSASSINATED(task)) { |
/* Release RPC slot and buffer memory */ |
if (task->tk_rqstp) |
xprt_release(task); |
if (task->tk_buffer) { |
rpc_free(task->tk_buffer); |
task->tk_buffer = NULL; |
} |
goto restarted; |
} |
printk(KERN_ERR "RPC: dead task tries to walk away.\n"); |
} |
} |
|
dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status); |
status = task->tk_status; |
|
/* Release all resources associated with the task */ |
rpc_release_task(task); |
|
return status; |
} |
|
/* |
* User-visible entry point to the scheduler. |
* |
* This may be called recursively if e.g. an async NFS task updates |
* the attributes and finds that dirty pages must be flushed. |
* NOTE: Upon exit of this function the task is guaranteed to be |
* released. In particular note that tk_release() will have |
* been called, so your task memory may have been freed. |
*/ |
int |
rpc_execute(struct rpc_task *task) |
{ |
int status = -EIO; |
if (rpc_inhibit) { |
printk(KERN_INFO "RPC: execution inhibited!\n"); |
goto out_release; |
} |
|
status = -EWOULDBLOCK; |
if (task->tk_active) { |
printk(KERN_ERR "RPC: active task was run twice!\n"); |
goto out_err; |
} |
|
task->tk_active = 1; |
rpc_set_running(task); |
return __rpc_execute(task); |
out_release: |
rpc_release_task(task); |
out_err: |
return status; |
} |
|
/* |
* This is our own little scheduler for async RPC tasks. |
*/ |
static void |
__rpc_schedule(void) |
{ |
struct rpc_task *task; |
int count = 0; |
|
dprintk("RPC: rpc_schedule enter\n"); |
while (1) { |
spin_lock_bh(&rpc_queue_lock); |
|
task_for_first(task, &schedq.tasks) { |
__rpc_remove_wait_queue(task); |
spin_unlock_bh(&rpc_queue_lock); |
|
__rpc_execute(task); |
} else { |
spin_unlock_bh(&rpc_queue_lock); |
break; |
} |
|
if (++count >= 200 || current->need_resched) { |
count = 0; |
schedule(); |
} |
} |
dprintk("RPC: rpc_schedule leave\n"); |
} |
|
/* |
* Allocate memory for RPC purpose. |
* |
* This is yet another tricky issue: For sync requests issued by |
* a user process, we want to make kmalloc sleep if there isn't |
* enough memory. Async requests should not sleep too excessively |
* because that will block rpciod (but that's not dramatic when |
* it's starved of memory anyway). Finally, swapout requests should |
* never sleep at all, and should not trigger another swap_out |
* request through kmalloc which would just increase memory contention. |
* |
* I hope the following gets it right, which gives async requests |
* a slight advantage over sync requests (good for writeback, debatable |
* for readahead): |
* |
* sync user requests: GFP_KERNEL |
* async requests: GFP_RPC (== GFP_NOFS) |
* swap requests: GFP_ATOMIC (or new GFP_SWAPPER) |
*/ |
void * |
rpc_allocate(unsigned int flags, unsigned int size) |
{ |
u32 *buffer; |
int gfp; |
|
if (flags & RPC_TASK_SWAPPER) |
gfp = GFP_ATOMIC; |
else if (flags & RPC_TASK_ASYNC) |
gfp = GFP_RPC; |
else |
gfp = GFP_KERNEL; |
|
do { |
if ((buffer = (u32 *) kmalloc(size, gfp)) != NULL) { |
dprintk("RPC: allocated buffer %p\n", buffer); |
return buffer; |
} |
if ((flags & RPC_TASK_SWAPPER) && size <= sizeof(swap_buffer) |
&& rpc_lock_swapbuf()) { |
dprintk("RPC: used last-ditch swap buffer\n"); |
return swap_buffer; |
} |
if (flags & RPC_TASK_ASYNC) |
return NULL; |
yield(); |
} while (!signalled()); |
|
return NULL; |
} |
|
void |
rpc_free(void *buffer) |
{ |
if (buffer != swap_buffer) { |
kfree(buffer); |
return; |
} |
rpc_unlock_swapbuf(); |
} |
|
/* |
* Creation and deletion of RPC task structures |
*/ |
inline void |
rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, |
rpc_action callback, int flags) |
{ |
memset(task, 0, sizeof(*task)); |
init_timer(&task->tk_timer); |
task->tk_timer.data = (unsigned long) task; |
task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer; |
task->tk_client = clnt; |
task->tk_flags = flags; |
task->tk_exit = callback; |
init_waitqueue_head(&task->tk_wait); |
if (current->uid != current->fsuid || current->gid != current->fsgid) |
task->tk_flags |= RPC_TASK_SETUID; |
|
/* Initialize retry counters */ |
task->tk_garb_retry = 2; |
task->tk_cred_retry = 2; |
task->tk_suid_retry = 1; |
|
/* Add to global list of all tasks */ |
spin_lock(&rpc_sched_lock); |
list_add(&task->tk_task, &all_tasks); |
spin_unlock(&rpc_sched_lock); |
|
if (clnt) |
atomic_inc(&clnt->cl_users); |
|
#ifdef RPC_DEBUG |
task->tk_magic = 0xf00baa; |
task->tk_pid = rpc_task_id++; |
#endif |
dprintk("RPC: %4d new task procpid %d\n", task->tk_pid, |
current->pid); |
} |
|
static void |
rpc_default_free_task(struct rpc_task *task) |
{ |
dprintk("RPC: %4d freeing task\n", task->tk_pid); |
rpc_free(task); |
} |
|
/* |
* Create a new task for the specified client. We have to |
* clean up after an allocation failure, as the client may |
* have specified "oneshot". |
*/ |
struct rpc_task * |
rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags) |
{ |
struct rpc_task *task; |
|
task = (struct rpc_task *) rpc_allocate(flags, sizeof(*task)); |
if (!task) |
goto cleanup; |
|
rpc_init_task(task, clnt, callback, flags); |
|
/* Replace tk_release */ |
task->tk_release = rpc_default_free_task; |
|
dprintk("RPC: %4d allocated task\n", task->tk_pid); |
task->tk_flags |= RPC_TASK_DYNAMIC; |
out: |
return task; |
|
cleanup: |
/* Check whether to release the client */ |
if (clnt) { |
printk("rpc_new_task: failed, users=%d, oneshot=%d\n", |
atomic_read(&clnt->cl_users), clnt->cl_oneshot); |
atomic_inc(&clnt->cl_users); /* pretend we were used ... */ |
rpc_release_client(clnt); |
} |
goto out; |
} |
|
void |
rpc_release_task(struct rpc_task *task) |
{ |
dprintk("RPC: %4d release task\n", task->tk_pid); |
|
#ifdef RPC_DEBUG |
if (task->tk_magic != 0xf00baa) { |
printk(KERN_ERR "RPC: attempt to release a non-existing task!\n"); |
rpc_debug = ~0; |
rpc_show_tasks(); |
return; |
} |
#endif |
|
/* Remove from global task list */ |
spin_lock(&rpc_sched_lock); |
list_del(&task->tk_task); |
spin_unlock(&rpc_sched_lock); |
|
/* Protect the execution below. */ |
spin_lock_bh(&rpc_queue_lock); |
|
/* Disable timer to prevent zombie wakeup */ |
__rpc_disable_timer(task); |
|
/* Remove from any wait queue we're still on */ |
__rpc_remove_wait_queue(task); |
|
task->tk_active = 0; |
|
spin_unlock_bh(&rpc_queue_lock); |
|
/* Synchronously delete any running timer */ |
rpc_delete_timer(task); |
|
/* Release resources */ |
if (task->tk_rqstp) |
xprt_release(task); |
if (task->tk_msg.rpc_cred) |
rpcauth_unbindcred(task); |
if (task->tk_buffer) { |
rpc_free(task->tk_buffer); |
task->tk_buffer = NULL; |
} |
if (task->tk_client) { |
rpc_release_client(task->tk_client); |
task->tk_client = NULL; |
} |
|
#ifdef RPC_DEBUG |
task->tk_magic = 0; |
#endif |
if (task->tk_release) |
task->tk_release(task); |
} |
|
/** |
* rpc_find_parent - find the parent of a child task. |
* @child: child task |
* |
* Checks that the parent task is still sleeping on the |
* queue 'childq'. If so returns a pointer to the parent. |
* Upon failure returns NULL. |
* |
* Caller must hold rpc_queue_lock |
*/ |
static inline struct rpc_task * |
rpc_find_parent(struct rpc_task *child) |
{ |
struct rpc_task *task, *parent; |
struct list_head *le; |
|
parent = (struct rpc_task *) child->tk_calldata; |
task_for_each(task, le, &childq.tasks) |
if (task == parent) |
return parent; |
|
return NULL; |
} |
|
static void |
rpc_child_exit(struct rpc_task *child) |
{ |
struct rpc_task *parent; |
|
spin_lock_bh(&rpc_queue_lock); |
if ((parent = rpc_find_parent(child)) != NULL) { |
parent->tk_status = child->tk_status; |
__rpc_wake_up_task(parent); |
} |
spin_unlock_bh(&rpc_queue_lock); |
} |
|
/* |
* Note: rpc_new_task releases the client after a failure. |
*/ |
struct rpc_task * |
rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent) |
{ |
struct rpc_task *task; |
|
task = rpc_new_task(clnt, NULL, RPC_TASK_ASYNC | RPC_TASK_CHILD); |
if (!task) |
goto fail; |
task->tk_exit = rpc_child_exit; |
task->tk_calldata = parent; |
return task; |
|
fail: |
parent->tk_status = -ENOMEM; |
return NULL; |
} |
|
void |
rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func) |
{ |
spin_lock_bh(&rpc_queue_lock); |
/* N.B. Is it possible for the child to have already finished? */ |
__rpc_sleep_on(&childq, task, func, NULL); |
rpc_schedule_run(child); |
spin_unlock_bh(&rpc_queue_lock); |
} |
|
/* |
* Kill all tasks for the given client. |
* XXX: kill their descendants as well? |
*/ |
void |
rpc_killall_tasks(struct rpc_clnt *clnt) |
{ |
struct rpc_task *rovr; |
struct list_head *le; |
|
dprintk("RPC: killing all tasks for client %p\n", clnt); |
|
/* |
* Spin lock all_tasks to prevent changes... |
*/ |
spin_lock(&rpc_sched_lock); |
alltask_for_each(rovr, le, &all_tasks) |
if (!clnt || rovr->tk_client == clnt) { |
rovr->tk_flags |= RPC_TASK_KILLED; |
rpc_exit(rovr, -EIO); |
rpc_wake_up_task(rovr); |
} |
spin_unlock(&rpc_sched_lock); |
} |
|
static DECLARE_MUTEX_LOCKED(rpciod_running); |
|
static inline int |
rpciod_task_pending(void) |
{ |
return !list_empty(&schedq.tasks); |
} |
|
|
/* |
* This is the rpciod kernel thread |
*/ |
static int |
rpciod(void *ptr) |
{ |
wait_queue_head_t *assassin = (wait_queue_head_t*) ptr; |
int rounds = 0; |
|
MOD_INC_USE_COUNT; |
lock_kernel(); |
/* |
* Let our maker know we're running ... |
*/ |
rpciod_pid = current->pid; |
up(&rpciod_running); |
|
daemonize(); |
|
spin_lock_irq(¤t->sigmask_lock); |
siginitsetinv(¤t->blocked, sigmask(SIGKILL)); |
recalc_sigpending(current); |
spin_unlock_irq(¤t->sigmask_lock); |
|
strcpy(current->comm, "rpciod"); |
|
dprintk("RPC: rpciod starting (pid %d)\n", rpciod_pid); |
while (rpciod_users) { |
if (signalled()) { |
rpciod_killall(); |
flush_signals(current); |
} |
__rpc_schedule(); |
|
if (++rounds >= 64) { /* safeguard */ |
schedule(); |
rounds = 0; |
} |
|
if (!rpciod_task_pending()) { |
dprintk("RPC: rpciod back to sleep\n"); |
wait_event_interruptible(rpciod_idle, rpciod_task_pending()); |
dprintk("RPC: switch to rpciod\n"); |
rounds = 0; |
} |
} |
|
dprintk("RPC: rpciod shutdown commences\n"); |
if (!list_empty(&all_tasks)) { |
printk(KERN_ERR "rpciod: active tasks at shutdown?!\n"); |
rpciod_killall(); |
} |
|
rpciod_pid = 0; |
wake_up(assassin); |
|
dprintk("RPC: rpciod exiting\n"); |
MOD_DEC_USE_COUNT; |
return 0; |
} |
|
static void |
rpciod_killall(void) |
{ |
unsigned long flags; |
|
while (!list_empty(&all_tasks)) { |
current->sigpending = 0; |
rpc_killall_tasks(NULL); |
__rpc_schedule(); |
if (!list_empty(&all_tasks)) { |
dprintk("rpciod_killall: waiting for tasks to exit\n"); |
yield(); |
} |
} |
|
spin_lock_irqsave(¤t->sigmask_lock, flags); |
recalc_sigpending(current); |
spin_unlock_irqrestore(¤t->sigmask_lock, flags); |
} |
|
/* |
* Start up the rpciod process if it's not already running. |
*/ |
int |
rpciod_up(void) |
{ |
int error = 0; |
|
MOD_INC_USE_COUNT; |
down(&rpciod_sema); |
dprintk("rpciod_up: pid %d, users %d\n", rpciod_pid, rpciod_users); |
rpciod_users++; |
if (rpciod_pid) |
goto out; |
/* |
* If there's no pid, we should be the first user. |
*/ |
if (rpciod_users > 1) |
printk(KERN_WARNING "rpciod_up: no pid, %d users??\n", rpciod_users); |
/* |
* Create the rpciod thread and wait for it to start. |
*/ |
error = kernel_thread(rpciod, &rpciod_killer, 0); |
if (error < 0) { |
printk(KERN_WARNING "rpciod_up: create thread failed, error=%d\n", error); |
rpciod_users--; |
goto out; |
} |
down(&rpciod_running); |
error = 0; |
out: |
up(&rpciod_sema); |
MOD_DEC_USE_COUNT; |
return error; |
} |
|
void |
rpciod_down(void) |
{ |
unsigned long flags; |
|
MOD_INC_USE_COUNT; |
down(&rpciod_sema); |
dprintk("rpciod_down pid %d sema %d\n", rpciod_pid, rpciod_users); |
if (rpciod_users) { |
if (--rpciod_users) |
goto out; |
} else |
printk(KERN_WARNING "rpciod_down: pid=%d, no users??\n", rpciod_pid); |
|
if (!rpciod_pid) { |
dprintk("rpciod_down: Nothing to do!\n"); |
goto out; |
} |
|
kill_proc(rpciod_pid, SIGKILL, 1); |
/* |
* Usually rpciod will exit very quickly, so we |
* wait briefly before checking the process id. |
*/ |
current->sigpending = 0; |
yield(); |
/* |
* Display a message if we're going to wait longer. |
*/ |
while (rpciod_pid) { |
dprintk("rpciod_down: waiting for pid %d to exit\n", rpciod_pid); |
if (signalled()) { |
dprintk("rpciod_down: caught signal\n"); |
break; |
} |
interruptible_sleep_on(&rpciod_killer); |
} |
spin_lock_irqsave(¤t->sigmask_lock, flags); |
recalc_sigpending(current); |
spin_unlock_irqrestore(¤t->sigmask_lock, flags); |
out: |
up(&rpciod_sema); |
MOD_DEC_USE_COUNT; |
} |
|
#ifdef RPC_DEBUG |
void rpc_show_tasks(void) |
{ |
struct list_head *le; |
struct rpc_task *t; |
|
spin_lock(&rpc_sched_lock); |
if (list_empty(&all_tasks)) { |
spin_unlock(&rpc_sched_lock); |
return; |
} |
printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " |
"-rpcwait -action- --exit--\n"); |
alltask_for_each(t, le, &all_tasks) |
printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n", |
t->tk_pid, t->tk_msg.rpc_proc, t->tk_flags, t->tk_status, |
t->tk_client, t->tk_client->cl_prog, |
t->tk_rqstp, t->tk_timeout, |
t->tk_rpcwait ? rpc_qname(t->tk_rpcwait) : " <NULL> ", |
t->tk_action, t->tk_exit); |
spin_unlock(&rpc_sched_lock); |
} |
#endif |
/svc.c
0,0 → 1,427
/* |
* linux/net/sunrpc/svc.c |
* |
* High-level RPC service routines |
* |
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> |
*/ |
|
#define __KERNEL_SYSCALLS__ |
#include <linux/linkage.h> |
#include <linux/sched.h> |
#include <linux/errno.h> |
#include <linux/net.h> |
#include <linux/in.h> |
#include <linux/unistd.h> |
|
#include <linux/sunrpc/types.h> |
#include <linux/sunrpc/xdr.h> |
#include <linux/sunrpc/stats.h> |
#include <linux/sunrpc/svcsock.h> |
#include <linux/sunrpc/clnt.h> |
|
#define RPCDBG_FACILITY RPCDBG_SVCDSP |
#define RPC_PARANOIA 1 |
|
/* |
* Create an RPC service |
*/ |
struct svc_serv * |
svc_create(struct svc_program *prog, unsigned int bufsize, unsigned int xdrsize) |
{ |
struct svc_serv *serv; |
|
if (!(serv = (struct svc_serv *) kmalloc(sizeof(*serv), GFP_KERNEL))) |
return NULL; |
|
memset(serv, 0, sizeof(*serv)); |
serv->sv_program = prog; |
serv->sv_nrthreads = 1; |
serv->sv_stats = prog->pg_stats; |
serv->sv_bufsz = bufsize? bufsize : 4096; |
serv->sv_xdrsize = xdrsize; |
INIT_LIST_HEAD(&serv->sv_threads); |
INIT_LIST_HEAD(&serv->sv_sockets); |
INIT_LIST_HEAD(&serv->sv_tempsocks); |
INIT_LIST_HEAD(&serv->sv_permsocks); |
spin_lock_init(&serv->sv_lock); |
|
serv->sv_name = prog->pg_name; |
|
/* Remove any stale portmap registrations */ |
svc_register(serv, 0, 0); |
|
return serv; |
} |
|
/* |
* Destroy an RPC service |
*/ |
void |
svc_destroy(struct svc_serv *serv) |
{ |
struct svc_sock *svsk; |
|
dprintk("RPC: svc_destroy(%s, %d)\n", |
serv->sv_program->pg_name, |
serv->sv_nrthreads); |
|
if (serv->sv_nrthreads) { |
if (--(serv->sv_nrthreads) != 0) { |
svc_sock_update_bufs(serv); |
return; |
} |
} else |
printk("svc_destroy: no threads for serv=%p!\n", serv); |
|
while (!list_empty(&serv->sv_tempsocks)) { |
svsk = list_entry(serv->sv_tempsocks.next, |
struct svc_sock, |
sk_list); |
svc_delete_socket(svsk); |
} |
while (!list_empty(&serv->sv_permsocks)) { |
svsk = list_entry(serv->sv_permsocks.next, |
struct svc_sock, |
sk_list); |
svc_delete_socket(svsk); |
} |
|
/* Unregister service with the portmapper */ |
svc_register(serv, 0, 0); |
kfree(serv); |
} |
|
/* |
* Allocate an RPC server buffer |
* Later versions may do nifty things by allocating multiple pages |
* of memory directly and putting them into the bufp->iov. |
*/ |
int |
svc_init_buffer(struct svc_buf *bufp, unsigned int size) |
{ |
if (!(bufp->area = (u32 *) kmalloc(size, GFP_KERNEL))) |
return 0; |
bufp->base = bufp->area; |
bufp->buf = bufp->area; |
bufp->len = 0; |
bufp->buflen = size >> 2; |
|
bufp->iov[0].iov_base = bufp->area; |
bufp->iov[0].iov_len = size; |
bufp->nriov = 1; |
|
return 1; |
} |
|
/* |
* Release an RPC server buffer |
*/ |
void |
svc_release_buffer(struct svc_buf *bufp) |
{ |
kfree(bufp->area); |
bufp->area = 0; |
} |
|
/* |
* Create a server thread |
*/ |
int |
svc_create_thread(svc_thread_fn func, struct svc_serv *serv) |
{ |
struct svc_rqst *rqstp; |
int error = -ENOMEM; |
|
rqstp = kmalloc(sizeof(*rqstp), GFP_KERNEL); |
if (!rqstp) |
goto out; |
|
memset(rqstp, 0, sizeof(*rqstp)); |
init_waitqueue_head(&rqstp->rq_wait); |
|
if (!(rqstp->rq_argp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL)) |
|| !(rqstp->rq_resp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL)) |
|| !svc_init_buffer(&rqstp->rq_defbuf, serv->sv_bufsz)) |
goto out_thread; |
|
serv->sv_nrthreads++; |
rqstp->rq_server = serv; |
error = kernel_thread((int (*)(void *)) func, rqstp, 0); |
if (error < 0) |
goto out_thread; |
svc_sock_update_bufs(serv); |
error = 0; |
out: |
return error; |
|
out_thread: |
svc_exit_thread(rqstp); |
goto out; |
} |
|
/* |
* Destroy an RPC server thread |
*/ |
void |
svc_exit_thread(struct svc_rqst *rqstp) |
{ |
struct svc_serv *serv = rqstp->rq_server; |
|
svc_release_buffer(&rqstp->rq_defbuf); |
if (rqstp->rq_resp) |
kfree(rqstp->rq_resp); |
if (rqstp->rq_argp) |
kfree(rqstp->rq_argp); |
kfree(rqstp); |
|
/* Release the server */ |
if (serv) |
svc_destroy(serv); |
} |
|
/* |
* Register an RPC service with the local portmapper. |
* To unregister a service, call this routine with |
* proto and port == 0. |
*/ |
int |
svc_register(struct svc_serv *serv, int proto, unsigned short port) |
{ |
struct svc_program *progp; |
unsigned long flags; |
int i, error = 0, dummy; |
|
progp = serv->sv_program; |
|
dprintk("RPC: svc_register(%s, %s, %d)\n", |
progp->pg_name, proto == IPPROTO_UDP? "udp" : "tcp", port); |
|
if (!port) |
current->sigpending = 0; |
|
for (i = 0; i < progp->pg_nvers; i++) { |
if (progp->pg_vers[i] == NULL) |
continue; |
error = rpc_register(progp->pg_prog, i, proto, port, &dummy); |
if (error < 0) |
break; |
if (port && !dummy) { |
error = -EACCES; |
break; |
} |
} |
|
if (!port) { |
spin_lock_irqsave(¤t->sigmask_lock, flags); |
recalc_sigpending(current); |
spin_unlock_irqrestore(¤t->sigmask_lock, flags); |
} |
|
return error; |
} |
|
/* |
* Process the RPC request. |
*/ |
int |
svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) |
{ |
struct svc_program *progp; |
struct svc_version *versp = NULL; /* compiler food */ |
struct svc_procedure *procp = NULL; |
struct svc_buf * argp = &rqstp->rq_argbuf; |
struct svc_buf * resp = &rqstp->rq_resbuf; |
kxdrproc_t xdr; |
u32 *bufp, *statp; |
u32 dir, prog, vers, proc, |
auth_stat, rpc_stat; |
|
rpc_stat = rpc_success; |
bufp = argp->buf; |
|
if (argp->len < 5) |
goto err_short_len; |
|
dir = ntohl(*bufp++); |
vers = ntohl(*bufp++); |
|
/* First words of reply: */ |
svc_putlong(resp, xdr_one); /* REPLY */ |
svc_putlong(resp, xdr_zero); /* ACCEPT */ |
|
if (dir != 0) /* direction != CALL */ |
goto err_bad_dir; |
if (vers != 2) /* RPC version number */ |
goto err_bad_rpc; |
|
rqstp->rq_prog = prog = ntohl(*bufp++); /* program number */ |
rqstp->rq_vers = vers = ntohl(*bufp++); /* version number */ |
rqstp->rq_proc = proc = ntohl(*bufp++); /* procedure number */ |
|
argp->buf += 5; |
argp->len -= 5; |
|
/* Used by nfsd to only allow the NULL procedure for amd. */ |
if (rqstp->rq_auth && !rqstp->rq_client && proc) { |
auth_stat = rpc_autherr_badcred; |
goto err_bad_auth; |
} |
|
/* |
* Decode auth data, and add verifier to reply buffer. |
* We do this before anything else in order to get a decent |
* auth verifier. |
*/ |
svc_authenticate(rqstp, &rpc_stat, &auth_stat); |
|
if (rpc_stat != rpc_success) |
goto err_garbage; |
|
if (auth_stat != rpc_auth_ok) |
goto err_bad_auth; |
|
progp = serv->sv_program; |
if (prog != progp->pg_prog) |
goto err_bad_prog; |
|
if (vers >= progp->pg_nvers || |
!(versp = progp->pg_vers[vers])) |
goto err_bad_vers; |
|
procp = versp->vs_proc + proc; |
if (proc >= versp->vs_nproc || !procp->pc_func) |
goto err_bad_proc; |
rqstp->rq_server = serv; |
rqstp->rq_procinfo = procp; |
|
/* Syntactic check complete */ |
serv->sv_stats->rpccnt++; |
|
/* Build the reply header. */ |
statp = resp->buf; |
svc_putlong(resp, rpc_success); /* RPC_SUCCESS */ |
|
/* Bump per-procedure stats counter */ |
procp->pc_count++; |
|
/* Initialize storage for argp and resp */ |
memset(rqstp->rq_argp, 0, procp->pc_argsize); |
memset(rqstp->rq_resp, 0, procp->pc_ressize); |
|
/* un-reserve some of the out-queue now that we have a |
* better idea of reply size |
*/ |
if (procp->pc_xdrressize) |
svc_reserve(rqstp, procp->pc_xdrressize<<2); |
|
/* Call the function that processes the request. */ |
if (!versp->vs_dispatch) { |
/* Decode arguments */ |
xdr = procp->pc_decode; |
if (xdr && !xdr(rqstp, rqstp->rq_argbuf.buf, rqstp->rq_argp)) |
goto err_garbage; |
|
*statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); |
|
/* Encode reply */ |
if (*statp == rpc_success && (xdr = procp->pc_encode) |
&& !xdr(rqstp, rqstp->rq_resbuf.buf, rqstp->rq_resp)) { |
dprintk("svc: failed to encode reply\n"); |
/* serv->sv_stats->rpcsystemerr++; */ |
*statp = rpc_system_err; |
} |
} else { |
dprintk("svc: calling dispatcher\n"); |
if (!versp->vs_dispatch(rqstp, statp)) { |
/* Release reply info */ |
if (procp->pc_release) |
procp->pc_release(rqstp, NULL, rqstp->rq_resp); |
goto dropit; |
} |
} |
|
/* Check RPC status result */ |
if (*statp != rpc_success) |
resp->len = statp + 1 - resp->base; |
|
/* Release reply info */ |
if (procp->pc_release) |
procp->pc_release(rqstp, NULL, rqstp->rq_resp); |
|
if (procp->pc_encode == NULL) |
goto dropit; |
sendit: |
return svc_send(rqstp); |
|
dropit: |
dprintk("svc: svc_process dropit\n"); |
svc_drop(rqstp); |
return 0; |
|
err_short_len: |
#ifdef RPC_PARANOIA |
printk("svc: short len %d, dropping request\n", argp->len); |
#endif |
goto dropit; /* drop request */ |
|
err_bad_dir: |
#ifdef RPC_PARANOIA |
printk("svc: bad direction %d, dropping request\n", dir); |
#endif |
serv->sv_stats->rpcbadfmt++; |
goto dropit; /* drop request */ |
|
err_bad_rpc: |
serv->sv_stats->rpcbadfmt++; |
resp->buf[-1] = xdr_one; /* REJECT */ |
svc_putlong(resp, xdr_zero); /* RPC_MISMATCH */ |
svc_putlong(resp, xdr_two); /* Only RPCv2 supported */ |
svc_putlong(resp, xdr_two); |
goto sendit; |
|
err_bad_auth: |
dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat)); |
serv->sv_stats->rpcbadauth++; |
resp->buf[-1] = xdr_one; /* REJECT */ |
svc_putlong(resp, xdr_one); /* AUTH_ERROR */ |
svc_putlong(resp, auth_stat); /* status */ |
goto sendit; |
|
err_bad_prog: |
#ifdef RPC_PARANOIA |
if (prog != 100227 || progp->pg_prog != 100003) |
printk("svc: unknown program %d (me %d)\n", prog, progp->pg_prog); |
/* else it is just a Solaris client seeing if ACLs are supported */ |
#endif |
serv->sv_stats->rpcbadfmt++; |
svc_putlong(resp, rpc_prog_unavail); |
goto sendit; |
|
err_bad_vers: |
#ifdef RPC_PARANOIA |
if (vers) |
printk("svc: unknown version (%d)\n", vers); |
#endif |
serv->sv_stats->rpcbadfmt++; |
svc_putlong(resp, rpc_prog_mismatch); |
svc_putlong(resp, htonl(progp->pg_lovers)); |
svc_putlong(resp, htonl(progp->pg_hivers)); |
goto sendit; |
|
err_bad_proc: |
#ifdef RPC_PARANOIA |
printk("svc: unknown procedure (%d)\n", proc); |
#endif |
serv->sv_stats->rpcbadfmt++; |
svc_putlong(resp, rpc_proc_unavail); |
goto sendit; |
|
err_garbage: |
#ifdef RPC_PARANOIA |
printk("svc: failed to decode args\n"); |
#endif |
serv->sv_stats->rpcbadfmt++; |
svc_putlong(resp, rpc_garbage_args); |
goto sendit; |
} |
/auth_null.c
0,0 → 1,140
/* |
* linux/net/sunrpc/rpcauth_null.c |
* |
* AUTH_NULL authentication. Really :-) |
* |
* Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> |
*/ |
|
#include <linux/types.h> |
#include <linux/slab.h> |
#include <linux/socket.h> |
#include <linux/in.h> |
#include <linux/utsname.h> |
#include <linux/sunrpc/clnt.h> |
|
#ifdef RPC_DEBUG |
# define RPCDBG_FACILITY RPCDBG_AUTH |
#endif |
|
static struct rpc_credops null_credops; |
|
static struct rpc_auth * |
nul_create(struct rpc_clnt *clnt) |
{ |
struct rpc_auth *auth; |
|
dprintk("RPC: creating NULL authenticator for client %p\n", clnt); |
if (!(auth = (struct rpc_auth *) rpc_allocate(0, sizeof(*auth)))) |
return NULL; |
auth->au_cslack = 4; |
auth->au_rslack = 2; |
auth->au_ops = &authnull_ops; |
auth->au_expire = 1800 * HZ; |
rpcauth_init_credcache(auth); |
|
return (struct rpc_auth *) auth; |
} |
|
static void |
nul_destroy(struct rpc_auth *auth) |
{ |
dprintk("RPC: destroying NULL authenticator %p\n", auth); |
rpcauth_free_credcache(auth); |
rpc_free(auth); |
} |
|
/* |
* Create NULL creds for current process |
*/ |
static struct rpc_cred * |
nul_create_cred(int flags) |
{ |
struct rpc_cred *cred; |
|
if (!(cred = (struct rpc_cred *) rpc_allocate(flags, sizeof(*cred)))) |
return NULL; |
atomic_set(&cred->cr_count, 0); |
cred->cr_flags = RPCAUTH_CRED_UPTODATE; |
cred->cr_uid = current->uid; |
cred->cr_ops = &null_credops; |
|
return cred; |
} |
|
/* |
* Destroy cred handle. |
*/ |
static void |
nul_destroy_cred(struct rpc_cred *cred) |
{ |
rpc_free(cred); |
} |
|
/* |
* Match cred handle against current process |
*/ |
static int |
nul_match(struct rpc_cred *cred, int taskflags) |
{ |
return 1; |
} |
|
/* |
* Marshal credential. |
*/ |
static u32 * |
nul_marshal(struct rpc_task *task, u32 *p, int ruid) |
{ |
*p++ = htonl(RPC_AUTH_NULL); |
*p++ = 0; |
*p++ = htonl(RPC_AUTH_NULL); |
*p++ = 0; |
|
return p; |
} |
|
/* |
* Refresh credential. This is a no-op for AUTH_NULL |
*/ |
static int |
nul_refresh(struct rpc_task *task) |
{ |
return task->tk_status = -EACCES; |
} |
|
static u32 * |
nul_validate(struct rpc_task *task, u32 *p) |
{ |
u32 n = ntohl(*p++); |
|
if (n != RPC_AUTH_NULL) { |
printk("RPC: bad verf flavor: %ld\n", (unsigned long) n); |
return NULL; |
} |
if ((n = ntohl(*p++)) != 0) { |
printk("RPC: bad verf size: %ld\n", (unsigned long) n); |
return NULL; |
} |
|
return p; |
} |
|
struct rpc_authops authnull_ops = { |
RPC_AUTH_NULL, |
#ifdef RPC_DEBUG |
"NULL", |
#endif |
nul_create, |
nul_destroy, |
nul_create_cred |
}; |
|
static |
struct rpc_credops null_credops = { |
nul_destroy_cred, |
nul_match, |
nul_marshal, |
nul_refresh, |
nul_validate |
}; |
/Makefile
0,0 → 1,24
# |
# Makefile for Linux kernel SUN RPC |
# |
# Note! Dependencies are done automagically by 'make dep', which also |
# removes any old dependencies. DON'T put your own dependencies here |
# unless it's something special (ie not a .c file). |
# |
# Note 2! The CFLAGS definition is now in the main makefile... |
|
O_TARGET := sunrpc.o |
|
export-objs := sunrpc_syms.o |
|
obj-y := clnt.o xprt.o sched.o \ |
auth.o auth_null.o auth_unix.o \ |
svc.o svcsock.o svcauth.o \ |
pmap_clnt.o timer.o xdr.o sunrpc_syms.o |
|
obj-$(CONFIG_PROC_FS) += stats.o |
obj-$(CONFIG_SYSCTL) += sysctl.o |
|
obj-m := $(O_TARGET) |
|
include $(TOPDIR)/Rules.make |
/xprt.c
0,0 → 1,1608
/* |
* linux/net/sunrpc/xprt.c |
* |
* This is a generic RPC call interface supporting congestion avoidance, |
* and asynchronous calls. |
* |
* The interface works like this: |
* |
* - When a process places a call, it allocates a request slot if |
* one is available. Otherwise, it sleeps on the backlog queue |
* (xprt_reserve). |
* - Next, the caller puts together the RPC message, stuffs it into |
* the request struct, and calls xprt_call(). |
* - xprt_call transmits the message and installs the caller on the |
* socket's wait list. At the same time, it installs a timer that |
* is run after the packet's timeout has expired. |
* - When a packet arrives, the data_ready handler walks the list of |
* pending requests for that socket. If a matching XID is found, the |
* caller is woken up, and the timer removed. |
* - When no reply arrives within the timeout interval, the timer is |
* fired by the kernel and runs xprt_timer(). It either adjusts the |
* timeout values (minor timeout) or wakes up the caller with a status |
* of -ETIMEDOUT. |
* - When the caller receives a notification from RPC that a reply arrived, |
* it should release the RPC slot, and process the reply. |
* If the call timed out, it may choose to retry the operation by |
* adjusting the initial timeout value, and simply calling rpc_call |
* again. |
* |
* Support for async RPC is done through a set of RPC-specific scheduling |
* primitives that `transparently' work for processes as well as async |
* tasks that rely on callbacks. |
* |
* Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de> |
* |
* TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com> |
* TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com> |
* TCP NFS related read + write fixes |
* (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> |
* |
* Rewrite of larges part of the code in order to stabilize TCP stuff. |
* Fix behaviour when socket buffer is full. |
* (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> |
*/ |
|
#define __KERNEL_SYSCALLS__ |
|
#include <linux/version.h> |
#include <linux/types.h> |
#include <linux/slab.h> |
#include <linux/capability.h> |
#include <linux/sched.h> |
#include <linux/errno.h> |
#include <linux/socket.h> |
#include <linux/in.h> |
#include <linux/net.h> |
#include <linux/mm.h> |
#include <linux/udp.h> |
#include <linux/unistd.h> |
#include <linux/sunrpc/clnt.h> |
#include <linux/file.h> |
|
#include <net/sock.h> |
#include <net/checksum.h> |
#include <net/udp.h> |
#include <net/tcp.h> |
|
#include <asm/uaccess.h> |
|
/* |
* Local variables |
*/ |
|
#ifdef RPC_DEBUG |
# undef RPC_DEBUG_DATA |
# define RPCDBG_FACILITY RPCDBG_XPRT |
#endif |
|
#define XPRT_MAX_BACKOFF (8) |
|
/* |
* Local functions |
*/ |
static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); |
static void do_xprt_transmit(struct rpc_task *); |
static inline void do_xprt_reserve(struct rpc_task *); |
static void xprt_disconnect(struct rpc_xprt *); |
static void xprt_connect_status(struct rpc_task *task); |
static struct socket *xprt_create_socket(int, struct rpc_timeout *, int); |
static int xprt_bind_socket(struct rpc_xprt *, struct socket *); |
static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); |
|
#ifdef RPC_DEBUG_DATA |
/* |
* Print the buffer contents (first 128 bytes only--just enough for |
* diropres return). |
*/ |
static void |
xprt_pktdump(char *msg, u32 *packet, unsigned int count) |
{ |
u8 *buf = (u8 *) packet; |
int j; |
|
dprintk("RPC: %s\n", msg); |
for (j = 0; j < count && j < 128; j += 4) { |
if (!(j & 31)) { |
if (j) |
dprintk("\n"); |
dprintk("0x%04x ", j); |
} |
dprintk("%02x%02x%02x%02x ", |
buf[j], buf[j+1], buf[j+2], buf[j+3]); |
} |
dprintk("\n"); |
} |
#else |
static inline void |
xprt_pktdump(char *msg, u32 *packet, unsigned int count) |
{ |
/* NOP */ |
} |
#endif |
|
/* |
* Look up RPC transport given an INET socket |
*/ |
static inline struct rpc_xprt * |
xprt_from_sock(struct sock *sk) |
{ |
return (struct rpc_xprt *) sk->user_data; |
} |
|
/* |
* Serialize write access to sockets, in order to prevent different |
* requests from interfering with each other. |
* Also prevents TCP socket connections from colliding with writes. |
*/ |
static int |
__xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) |
{ |
struct rpc_rqst *req = task->tk_rqstp; |
if (!xprt->snd_task) { |
if (xprt->nocong || __xprt_get_cong(xprt, task)) { |
xprt->snd_task = task; |
if (req) { |
req->rq_bytes_sent = 0; |
req->rq_ntrans++; |
} |
} |
} |
if (xprt->snd_task != task) { |
dprintk("RPC: %4d TCP write queue full\n", task->tk_pid); |
task->tk_timeout = 0; |
task->tk_status = -EAGAIN; |
if (req && req->rq_ntrans) |
rpc_sleep_on(&xprt->resend, task, NULL, NULL); |
else |
rpc_sleep_on(&xprt->sending, task, NULL, NULL); |
} |
return xprt->snd_task == task; |
} |
|
static inline int |
xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) |
{ |
int retval; |
spin_lock_bh(&xprt->sock_lock); |
retval = __xprt_lock_write(xprt, task); |
spin_unlock_bh(&xprt->sock_lock); |
return retval; |
} |
|
static void |
__xprt_lock_write_next(struct rpc_xprt *xprt) |
{ |
struct rpc_task *task; |
|
if (xprt->snd_task) |
return; |
task = rpc_wake_up_next(&xprt->resend); |
if (!task) { |
if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) |
return; |
task = rpc_wake_up_next(&xprt->sending); |
if (!task) |
return; |
} |
if (xprt->nocong || __xprt_get_cong(xprt, task)) { |
struct rpc_rqst *req = task->tk_rqstp; |
xprt->snd_task = task; |
if (req) { |
req->rq_bytes_sent = 0; |
req->rq_ntrans++; |
} |
} |
} |
|
/* |
* Releases the socket for use by other requests. |
*/ |
static void |
__xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) |
{ |
if (xprt->snd_task == task) |
xprt->snd_task = NULL; |
__xprt_lock_write_next(xprt); |
} |
|
static inline void |
xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) |
{ |
spin_lock_bh(&xprt->sock_lock); |
__xprt_release_write(xprt, task); |
spin_unlock_bh(&xprt->sock_lock); |
} |
|
/* |
* Write data to socket. |
*/ |
static inline int |
xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) |
{ |
struct socket *sock = xprt->sock; |
struct msghdr msg; |
struct xdr_buf *xdr = &req->rq_snd_buf; |
struct iovec niv[MAX_IOVEC]; |
unsigned int niov, slen, skip; |
mm_segment_t oldfs; |
int result; |
|
if (!sock) |
return -ENOTCONN; |
|
xprt_pktdump("packet data:", |
req->rq_svec->iov_base, |
req->rq_svec->iov_len); |
|
/* Dont repeat bytes */ |
skip = req->rq_bytes_sent; |
slen = xdr->len - skip; |
oldfs = get_fs(); set_fs(get_ds()); |
do { |
unsigned int slen_part, n; |
|
niov = xdr_kmap(niv, xdr, skip); |
if (!niov) { |
result = -EAGAIN; |
break; |
} |
|
msg.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL; |
msg.msg_iov = niv; |
msg.msg_iovlen = niov; |
msg.msg_name = (struct sockaddr *) &xprt->addr; |
msg.msg_namelen = sizeof(xprt->addr); |
msg.msg_control = NULL; |
msg.msg_controllen = 0; |
|
slen_part = 0; |
for (n = 0; n < niov; n++) |
slen_part += niv[n].iov_len; |
|
clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); |
result = sock_sendmsg(sock, &msg, slen_part); |
|
xdr_kunmap(xdr, skip, niov); |
|
skip += slen_part; |
slen -= slen_part; |
} while (result >= 0 && slen); |
set_fs(oldfs); |
|
dprintk("RPC: xprt_sendmsg(%d) = %d\n", slen, result); |
|
if (result >= 0) |
return result; |
|
switch (result) { |
case -ECONNREFUSED: |
/* When the server has died, an ICMP port unreachable message |
* prompts ECONNREFUSED. |
*/ |
case -EAGAIN: |
break; |
case -ECONNRESET: |
case -ENOTCONN: |
case -EPIPE: |
/* connection broken */ |
if (xprt->stream) |
result = -ENOTCONN; |
break; |
default: |
printk(KERN_NOTICE "RPC: sendmsg returned error %d\n", -result); |
} |
return result; |
} |
|
/* |
* Van Jacobson congestion avoidance. Check if the congestion window |
* overflowed. Put the task to sleep if this is the case. |
*/ |
static int |
__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task) |
{ |
struct rpc_rqst *req = task->tk_rqstp; |
|
if (req->rq_cong) |
return 1; |
dprintk("RPC: %4d xprt_cwnd_limited cong = %ld cwnd = %ld\n", |
task->tk_pid, xprt->cong, xprt->cwnd); |
if (RPCXPRT_CONGESTED(xprt)) |
return 0; |
req->rq_cong = 1; |
xprt->cong += RPC_CWNDSCALE; |
return 1; |
} |
|
/* |
* Adjust the congestion window, and wake up the next task |
* that has been sleeping due to congestion |
*/ |
static void |
__xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) |
{ |
if (!req->rq_cong) |
return; |
req->rq_cong = 0; |
xprt->cong -= RPC_CWNDSCALE; |
__xprt_lock_write_next(xprt); |
} |
|
/* |
* Adjust RPC congestion window |
* We use a time-smoothed congestion estimator to avoid heavy oscillation. |
*/ |
static void |
xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) |
{ |
unsigned long cwnd; |
|
cwnd = xprt->cwnd; |
if (result >= 0 && cwnd <= xprt->cong) { |
/* The (cwnd >> 1) term makes sure |
* the result gets rounded properly. */ |
cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; |
if (cwnd > RPC_MAXCWND) |
cwnd = RPC_MAXCWND; |
__xprt_lock_write_next(xprt); |
} else if (result == -ETIMEDOUT) { |
cwnd >>= 1; |
if (cwnd < RPC_CWNDSCALE) |
cwnd = RPC_CWNDSCALE; |
} |
dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n", |
xprt->cong, xprt->cwnd, cwnd); |
xprt->cwnd = cwnd; |
} |
|
/* |
* Adjust timeout values etc for next retransmit |
*/ |
int |
xprt_adjust_timeout(struct rpc_timeout *to) |
{ |
if (to->to_retries > 0) { |
if (to->to_exponential) |
to->to_current <<= 1; |
else |
to->to_current += to->to_increment; |
if (to->to_maxval && to->to_current >= to->to_maxval) |
to->to_current = to->to_maxval; |
} else { |
if (to->to_exponential) |
to->to_initval <<= 1; |
else |
to->to_initval += to->to_increment; |
if (to->to_maxval && to->to_initval >= to->to_maxval) |
to->to_initval = to->to_maxval; |
to->to_current = to->to_initval; |
} |
|
if (!to->to_current) { |
printk(KERN_WARNING "xprt_adjust_timeout: to_current = 0!\n"); |
to->to_current = 5 * HZ; |
} |
pprintk("RPC: %lu %s\n", jiffies, |
to->to_retries? "retrans" : "timeout"); |
return to->to_retries-- > 0; |
} |
|
/* |
* Close down a transport socket |
*/ |
static void |
xprt_close(struct rpc_xprt *xprt) |
{ |
struct socket *sock = xprt->sock; |
struct sock *sk = xprt->inet; |
|
if (!sk) |
return; |
|
write_lock_bh(&sk->callback_lock); |
xprt->inet = NULL; |
xprt->sock = NULL; |
|
sk->user_data = NULL; |
sk->data_ready = xprt->old_data_ready; |
sk->state_change = xprt->old_state_change; |
sk->write_space = xprt->old_write_space; |
write_unlock_bh(&sk->callback_lock); |
|
xprt_disconnect(xprt); |
sk->no_check = 0; |
|
sock_release(sock); |
} |
|
/* |
* Mark a transport as disconnected |
*/ |
static void |
xprt_disconnect(struct rpc_xprt *xprt) |
{ |
dprintk("RPC: disconnected transport %p\n", xprt); |
spin_lock_bh(&xprt->sock_lock); |
xprt_clear_connected(xprt); |
rpc_wake_up_status(&xprt->pending, -ENOTCONN); |
spin_unlock_bh(&xprt->sock_lock); |
} |
|
/* |
* Reconnect a broken TCP connection. |
* |
*/ |
void |
xprt_connect(struct rpc_task *task) |
{ |
struct rpc_xprt *xprt = task->tk_xprt; |
struct socket *sock = xprt->sock; |
struct sock *inet; |
int status; |
|
dprintk("RPC: %4d xprt_connect %p connected %d\n", |
task->tk_pid, xprt, xprt_connected(xprt)); |
if (xprt->shutdown) |
return; |
|
if (!xprt->addr.sin_port) { |
task->tk_status = -EIO; |
return; |
} |
|
if (!xprt_lock_write(xprt, task)) |
return; |
if (xprt_connected(xprt)) |
goto out_write; |
|
if (task->tk_rqstp) |
task->tk_rqstp->rq_bytes_sent = 0; |
|
xprt_close(xprt); |
/* Create an unconnected socket */ |
sock = xprt_create_socket(xprt->prot, &xprt->timeout, xprt->resvport); |
if (!sock) { |
/* couldn't create socket or bind to reserved port; |
* this is likely a permanent error, so cause an abort */ |
task->tk_status = -EIO; |
goto out_write; |
} |
xprt_bind_socket(xprt, sock); |
|
if (!xprt->stream) |
goto out_write; |
|
inet = sock->sk; |
|
/* Now connect it asynchronously. */ |
dprintk("RPC: %4d connecting new socket\n", task->tk_pid); |
status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, |
sizeof(xprt->addr), O_NONBLOCK); |
dprintk("RPC: %4d connect status %d connected %d\n", |
task->tk_pid, status, xprt_connected(xprt)); |
|
if (status >= 0) |
return; |
|
switch (status) { |
case -EALREADY: |
case -EINPROGRESS: |
/* Protect against TCP socket state changes */ |
lock_sock(inet); |
if (inet->state != TCP_ESTABLISHED) { |
dprintk("RPC: %4d waiting for connection\n", |
task->tk_pid); |
task->tk_timeout = RPC_CONNECT_TIMEOUT; |
/* if the socket is already closing, delay briefly */ |
if ((1<<inet->state) & ~(TCPF_SYN_SENT|TCPF_SYN_RECV)) |
task->tk_timeout = RPC_REESTABLISH_TIMEOUT; |
rpc_sleep_on(&xprt->pending, task, xprt_connect_status, |
NULL); |
} |
release_sock(inet); |
break; |
case -ECONNREFUSED: |
case -ECONNRESET: |
case -ENOTCONN: |
if (!task->tk_client->cl_softrtry) { |
rpc_delay(task, RPC_REESTABLISH_TIMEOUT); |
task->tk_status = -ENOTCONN; |
break; |
} |
default: |
/* Report myriad other possible returns. If this file |
* system is soft mounted, just error out, like Solaris. */ |
if (task->tk_client->cl_softrtry) { |
printk(KERN_WARNING |
"RPC: error %d connecting to server %s, exiting\n", |
-status, task->tk_client->cl_server); |
task->tk_status = -EIO; |
goto out_write; |
} |
printk(KERN_WARNING "RPC: error %d connecting to server %s\n", |
-status, task->tk_client->cl_server); |
/* This will prevent anybody else from connecting */ |
rpc_delay(task, RPC_REESTABLISH_TIMEOUT); |
task->tk_status = status; |
break; |
} |
return; |
out_write: |
xprt_release_write(xprt, task); |
} |
|
/* |
* We arrive here when awoken from waiting on connection establishment. |
*/ |
static void |
xprt_connect_status(struct rpc_task *task) |
{ |
struct rpc_xprt *xprt = task->tk_xprt; |
|
if (task->tk_status >= 0) { |
dprintk("RPC: %4d xprt_connect_status: connection established\n", |
task->tk_pid); |
return; |
} |
|
/* if soft mounted, cause this RPC to fail */ |
if (task->tk_client->cl_softrtry) |
task->tk_status = -EIO; |
|
switch (task->tk_status) { |
case -ENOTCONN: |
rpc_delay(task, RPC_REESTABLISH_TIMEOUT); |
return; |
case -ETIMEDOUT: |
dprintk("RPC: %4d xprt_connect_status: timed out\n", |
task->tk_pid); |
break; |
default: |
printk(KERN_ERR "RPC: error %d connecting to server %s\n", |
-task->tk_status, task->tk_client->cl_server); |
} |
xprt_release_write(xprt, task); |
} |
|
/* |
* Look up the RPC request corresponding to a reply, and then lock it. |
*/ |
static inline struct rpc_rqst * |
xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) |
{ |
struct list_head *pos; |
struct rpc_rqst *req = NULL; |
|
list_for_each(pos, &xprt->recv) { |
struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list); |
if (entry->rq_xid == xid) { |
req = entry; |
break; |
} |
} |
return req; |
} |
|
/* |
* Complete reply received. |
* The TCP code relies on us to remove the request from xprt->pending. |
*/ |
static void |
xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied) |
{ |
struct rpc_task *task = req->rq_task; |
struct rpc_clnt *clnt = task->tk_client; |
|
/* Adjust congestion window */ |
if (!xprt->nocong) { |
int timer = rpcproc_timer(clnt, task->tk_msg.rpc_proc); |
xprt_adjust_cwnd(xprt, copied); |
__xprt_put_cong(xprt, req); |
if (req->rq_ntrans == 1) { |
if (timer) |
rpc_update_rtt(&clnt->cl_rtt, timer, (long)jiffies - req->rq_xtime); |
} |
rpc_set_timeo(&clnt->cl_rtt, timer, req->rq_ntrans - 1); |
} |
|
#ifdef RPC_PROFILE |
/* Profile only reads for now */ |
if (copied > 1024) { |
static unsigned long nextstat = 0; |
static unsigned long pkt_rtt = 0, pkt_len = 0, pkt_cnt = 0; |
|
pkt_cnt++; |
pkt_len += req->rq_slen + copied; |
pkt_rtt += jiffies - req->rq_xtime; |
if (time_before(nextstat, jiffies)) { |
printk("RPC: %lu %ld cwnd\n", jiffies, xprt->cwnd); |
printk("RPC: %ld %ld %ld %ld stat\n", |
jiffies, pkt_cnt, pkt_len, pkt_rtt); |
pkt_rtt = pkt_len = pkt_cnt = 0; |
nextstat = jiffies + 5 * HZ; |
} |
} |
#endif |
|
dprintk("RPC: %4d has input (%d bytes)\n", task->tk_pid, copied); |
req->rq_received = copied; |
list_del_init(&req->rq_list); |
|
/* ... and wake up the process. */ |
rpc_wake_up_task(task); |
return; |
} |
|
static size_t |
skb_read_bits(skb_reader_t *desc, void *to, size_t len) |
{ |
if (len > desc->count) |
len = desc->count; |
skb_copy_bits(desc->skb, desc->offset, to, len); |
desc->count -= len; |
desc->offset += len; |
return len; |
} |
|
static size_t |
skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) |
{ |
unsigned int csum2, pos; |
|
if (len > desc->count) |
len = desc->count; |
pos = desc->offset; |
csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0); |
desc->csum = csum_block_add(desc->csum, csum2, pos); |
desc->count -= len; |
desc->offset += len; |
return len; |
} |
|
/* |
* We have set things up such that we perform the checksum of the UDP |
* packet in parallel with the copies into the RPC client iovec. -DaveM |
*/ |
static int |
csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) |
{ |
skb_reader_t desc; |
|
desc.skb = skb; |
desc.offset = sizeof(struct udphdr); |
desc.count = skb->len - desc.offset; |
|
if (skb->ip_summed == CHECKSUM_UNNECESSARY) |
goto no_checksum; |
|
desc.csum = csum_partial(skb->data, desc.offset, skb->csum); |
xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits); |
if (desc.offset != skb->len) { |
unsigned int csum2; |
csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); |
desc.csum = csum_block_add(desc.csum, csum2, desc.offset); |
} |
if ((unsigned short)csum_fold(desc.csum)) |
return -1; |
return 0; |
no_checksum: |
xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits); |
return 0; |
} |
|
/* |
* Input handler for RPC replies. Called from a bottom half and hence |
* atomic. |
*/ |
static void |
udp_data_ready(struct sock *sk, int len) |
{ |
struct rpc_task *task; |
struct rpc_xprt *xprt; |
struct rpc_rqst *rovr; |
struct sk_buff *skb; |
int err, repsize, copied; |
|
read_lock(&sk->callback_lock); |
dprintk("RPC: udp_data_ready...\n"); |
if (sk->dead || !(xprt = xprt_from_sock(sk))) { |
printk("RPC: udp_data_ready request not found!\n"); |
goto out; |
} |
|
dprintk("RPC: udp_data_ready client %p\n", xprt); |
|
if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) |
goto out; |
|
if (xprt->shutdown) |
goto dropit; |
|
repsize = skb->len - sizeof(struct udphdr); |
if (repsize < 4) { |
printk("RPC: impossible RPC reply size %d!\n", repsize); |
goto dropit; |
} |
|
/* Look up and lock the request corresponding to the given XID */ |
spin_lock(&xprt->sock_lock); |
rovr = xprt_lookup_rqst(xprt, *(u32 *) (skb->h.raw + sizeof(struct udphdr))); |
if (!rovr) |
goto out_unlock; |
task = rovr->rq_task; |
|
dprintk("RPC: %4d received reply\n", task->tk_pid); |
xprt_pktdump("packet data:", |
(u32 *) (skb->h.raw+sizeof(struct udphdr)), repsize); |
|
if ((copied = rovr->rq_private_buf.len) > repsize) |
copied = repsize; |
|
/* Suck it into the iovec, verify checksum if not done by hw. */ |
if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) |
goto out_unlock; |
|
/* Something worked... */ |
dst_confirm(skb->dst); |
|
xprt_complete_rqst(xprt, rovr, copied); |
|
out_unlock: |
spin_unlock(&xprt->sock_lock); |
dropit: |
skb_free_datagram(sk, skb); |
out: |
if (sk->sleep && waitqueue_active(sk->sleep)) |
wake_up_interruptible(sk->sleep); |
read_unlock(&sk->callback_lock); |
} |
|
/* |
* Copy from an skb into memory and shrink the skb. |
*/ |
static inline size_t |
tcp_copy_data(skb_reader_t *desc, void *p, size_t len) |
{ |
if (len > desc->count) |
len = desc->count; |
skb_copy_bits(desc->skb, desc->offset, p, len); |
desc->offset += len; |
desc->count -= len; |
return len; |
} |
|
/* |
* TCP read fragment marker |
*/ |
static inline void |
tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) |
{ |
size_t len, used; |
char *p; |
|
p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset; |
len = sizeof(xprt->tcp_recm) - xprt->tcp_offset; |
used = tcp_copy_data(desc, p, len); |
xprt->tcp_offset += used; |
if (used != len) |
return; |
xprt->tcp_reclen = ntohl(xprt->tcp_recm); |
if (xprt->tcp_reclen & 0x80000000) |
xprt->tcp_flags |= XPRT_LAST_FRAG; |
else |
xprt->tcp_flags &= ~XPRT_LAST_FRAG; |
xprt->tcp_reclen &= 0x7fffffff; |
xprt->tcp_flags &= ~XPRT_COPY_RECM; |
xprt->tcp_offset = 0; |
/* Sanity check of the record length */ |
if (xprt->tcp_reclen < 4) { |
printk(KERN_ERR "RPC: Invalid TCP record fragment length\n"); |
xprt_disconnect(xprt); |
} |
dprintk("RPC: reading TCP record fragment of length %d\n", |
xprt->tcp_reclen); |
} |
|
static void |
tcp_check_recm(struct rpc_xprt *xprt) |
{ |
if (xprt->tcp_offset == xprt->tcp_reclen) { |
xprt->tcp_flags |= XPRT_COPY_RECM; |
xprt->tcp_offset = 0; |
if (xprt->tcp_flags & XPRT_LAST_FRAG) { |
xprt->tcp_flags &= ~XPRT_COPY_DATA; |
xprt->tcp_flags |= XPRT_COPY_XID; |
xprt->tcp_copied = 0; |
} |
} |
} |
|
/* |
* TCP read xid |
*/ |
static inline void |
tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) |
{ |
size_t len, used; |
char *p; |
|
len = sizeof(xprt->tcp_xid) - xprt->tcp_offset; |
dprintk("RPC: reading XID (%Zu bytes)\n", len); |
p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset; |
used = tcp_copy_data(desc, p, len); |
xprt->tcp_offset += used; |
if (used != len) |
return; |
xprt->tcp_flags &= ~XPRT_COPY_XID; |
xprt->tcp_flags |= XPRT_COPY_DATA; |
xprt->tcp_copied = 4; |
dprintk("RPC: reading reply for XID %08x\n", xprt->tcp_xid); |
tcp_check_recm(xprt); |
} |
|
/* |
* TCP read and complete request |
*/ |
static inline void |
tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) |
{ |
struct rpc_rqst *req; |
struct xdr_buf *rcvbuf; |
size_t len; |
|
/* Find and lock the request corresponding to this xid */ |
spin_lock(&xprt->sock_lock); |
req = xprt_lookup_rqst(xprt, xprt->tcp_xid); |
if (!req) { |
xprt->tcp_flags &= ~XPRT_COPY_DATA; |
dprintk("RPC: XID %08x request not found!\n", |
xprt->tcp_xid); |
spin_unlock(&xprt->sock_lock); |
return; |
} |
|
rcvbuf = &req->rq_private_buf; |
len = desc->count; |
if (len > xprt->tcp_reclen - xprt->tcp_offset) { |
skb_reader_t my_desc; |
|
len = xprt->tcp_reclen - xprt->tcp_offset; |
memcpy(&my_desc, desc, sizeof(my_desc)); |
my_desc.count = len; |
xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, |
&my_desc, tcp_copy_data); |
desc->count -= len; |
desc->offset += len; |
} else |
xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, |
desc, tcp_copy_data); |
xprt->tcp_copied += len; |
xprt->tcp_offset += len; |
|
if (xprt->tcp_copied == req->rq_private_buf.len) |
xprt->tcp_flags &= ~XPRT_COPY_DATA; |
else if (xprt->tcp_offset == xprt->tcp_reclen) { |
if (xprt->tcp_flags & XPRT_LAST_FRAG) |
xprt->tcp_flags &= ~XPRT_COPY_DATA; |
} |
|
if (!(xprt->tcp_flags & XPRT_COPY_DATA)) { |
dprintk("RPC: %4d received reply complete\n", |
req->rq_task->tk_pid); |
xprt_complete_rqst(xprt, req, xprt->tcp_copied); |
} |
spin_unlock(&xprt->sock_lock); |
tcp_check_recm(xprt); |
} |
|
/* |
* TCP discard extra bytes from a short read |
*/ |
static inline void |
tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) |
{ |
size_t len; |
|
len = xprt->tcp_reclen - xprt->tcp_offset; |
if (len > desc->count) |
len = desc->count; |
desc->count -= len; |
desc->offset += len; |
xprt->tcp_offset += len; |
tcp_check_recm(xprt); |
} |
|
/* |
* TCP record receive routine |
* We first have to grab the record marker, then the XID, then the data. |
*/ |
static int |
tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, |
unsigned int offset, size_t len) |
{ |
struct rpc_xprt *xprt = (struct rpc_xprt *)rd_desc->buf; |
skb_reader_t desc = { skb, offset, len }; |
|
dprintk("RPC: tcp_data_recv\n"); |
do { |
/* Read in a new fragment marker if necessary */ |
/* Can we ever really expect to get completely empty fragments? */ |
if (xprt->tcp_flags & XPRT_COPY_RECM) { |
tcp_read_fraghdr(xprt, &desc); |
continue; |
} |
/* Read in the xid if necessary */ |
if (xprt->tcp_flags & XPRT_COPY_XID) { |
tcp_read_xid(xprt, &desc); |
continue; |
} |
/* Read in the request data */ |
if (xprt->tcp_flags & XPRT_COPY_DATA) { |
tcp_read_request(xprt, &desc); |
continue; |
} |
/* Skip over any trailing bytes on short reads */ |
tcp_read_discard(xprt, &desc); |
} while (desc.count); |
dprintk("RPC: tcp_data_recv done\n"); |
return len - desc.count; |
} |
|
static void tcp_data_ready(struct sock *sk, int bytes) |
{ |
struct rpc_xprt *xprt; |
read_descriptor_t rd_desc; |
|
read_lock(&sk->callback_lock); |
dprintk("RPC: tcp_data_ready...\n"); |
if (!(xprt = xprt_from_sock(sk))) { |
printk("RPC: tcp_data_ready socket info not found!\n"); |
goto out; |
} |
if (xprt->shutdown) |
goto out; |
|
/* We use rd_desc to pass struct xprt to tcp_data_recv */ |
rd_desc.buf = (char *)xprt; |
rd_desc.count = 65536; |
tcp_read_sock(sk, &rd_desc, tcp_data_recv); |
out: |
read_unlock(&sk->callback_lock); |
} |
|
static void |
tcp_state_change(struct sock *sk) |
{ |
struct rpc_xprt *xprt; |
|
read_lock(&sk->callback_lock); |
if (!(xprt = xprt_from_sock(sk))) |
goto out; |
dprintk("RPC: tcp_state_change client %p...\n", xprt); |
dprintk("RPC: state %x conn %d dead %d zapped %d\n", |
sk->state, xprt_connected(xprt), |
sk->dead, sk->zapped); |
|
switch (sk->state) { |
case TCP_ESTABLISHED: |
if (xprt_test_and_set_connected(xprt)) |
break; |
|
/* Reset TCP record info */ |
xprt->tcp_offset = 0; |
xprt->tcp_reclen = 0; |
xprt->tcp_copied = 0; |
xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; |
|
spin_lock_bh(&xprt->sock_lock); |
if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending) |
rpc_wake_up_task(xprt->snd_task); |
spin_unlock_bh(&xprt->sock_lock); |
break; |
case TCP_SYN_SENT: |
case TCP_SYN_RECV: |
break; |
default: |
xprt_disconnect(xprt); |
break; |
} |
out: |
if (sk->sleep && waitqueue_active(sk->sleep)) |
wake_up_interruptible_all(sk->sleep); |
read_unlock(&sk->callback_lock); |
} |
|
/* |
* Called when more output buffer space is available for this socket. |
* We try not to wake our writers until they can make "significant" |
* progress, otherwise we'll waste resources thrashing sock_sendmsg |
* with a bunch of small requests. |
*/ |
static void |
xprt_write_space(struct sock *sk) |
{ |
struct rpc_xprt *xprt; |
struct socket *sock; |
|
read_lock(&sk->callback_lock); |
if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->socket)) |
goto out; |
if (xprt->shutdown) |
goto out; |
|
/* Wait until we have enough socket memory */ |
if (xprt->stream) { |
/* from net/ipv4/tcp.c:tcp_write_space */ |
if (tcp_wspace(sk) < tcp_min_write_space(sk)) |
goto out; |
} else { |
/* from net/core/sock.c:sock_def_write_space */ |
if (!sock_writeable(sk)) |
goto out; |
} |
|
if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)) |
goto out; |
|
spin_lock_bh(&xprt->sock_lock); |
if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending) |
rpc_wake_up_task(xprt->snd_task); |
spin_unlock_bh(&xprt->sock_lock); |
if (sk->sleep && waitqueue_active(sk->sleep)) |
wake_up_interruptible(sk->sleep); |
out: |
read_unlock(&sk->callback_lock); |
} |
|
/* |
* RPC receive timeout handler. |
*/ |
static void |
xprt_timer(struct rpc_task *task) |
{ |
struct rpc_rqst *req = task->tk_rqstp; |
struct rpc_xprt *xprt = req->rq_xprt; |
|
spin_lock(&xprt->sock_lock); |
if (req->rq_received) |
goto out; |
|
xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT); |
__xprt_put_cong(xprt, req); |
|
dprintk("RPC: %4d xprt_timer (%s request)\n", |
task->tk_pid, req ? "pending" : "backlogged"); |
|
task->tk_status = -ETIMEDOUT; |
out: |
task->tk_timeout = 0; |
rpc_wake_up_task(task); |
spin_unlock(&xprt->sock_lock); |
} |
|
/* |
* Place the actual RPC call. |
* We have to copy the iovec because sendmsg fiddles with its contents. |
*/ |
void |
xprt_transmit(struct rpc_task *task) |
{ |
struct rpc_rqst *req = task->tk_rqstp; |
struct rpc_xprt *xprt = req->rq_xprt; |
|
dprintk("RPC: %4d xprt_transmit(%x)\n", task->tk_pid, |
*(u32 *)(req->rq_svec[0].iov_base)); |
|
if (xprt->shutdown) |
task->tk_status = -EIO; |
|
if (task->tk_status < 0) |
return; |
|
if (task->tk_rpcwait) |
rpc_remove_wait_queue(task); |
|
/* set up everything as needed. */ |
/* Write the record marker */ |
if (xprt->stream) { |
u32 *marker = req->rq_svec[0].iov_base; |
|
*marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker))); |
} |
|
spin_lock_bh(&xprt->sock_lock); |
if (req->rq_received != 0 && !req->rq_bytes_sent) |
goto out_notrans; |
|
if (!__xprt_lock_write(xprt, task)) |
goto out_notrans; |
|
if (!xprt_connected(xprt)) { |
task->tk_status = -ENOTCONN; |
goto out_notrans; |
} |
|
if (list_empty(&req->rq_list)) { |
/* Update the softirq receive buffer */ |
memcpy(&req->rq_private_buf, &req->rq_rcv_buf, |
sizeof(req->rq_private_buf)); |
list_add_tail(&req->rq_list, &xprt->recv); |
} |
spin_unlock_bh(&xprt->sock_lock); |
|
do_xprt_transmit(task); |
return; |
out_notrans: |
spin_unlock_bh(&xprt->sock_lock); |
} |
|
static void |
do_xprt_transmit(struct rpc_task *task) |
{ |
struct rpc_clnt *clnt = task->tk_client; |
struct rpc_rqst *req = task->tk_rqstp; |
struct rpc_xprt *xprt = req->rq_xprt; |
int status, retry = 0; |
|
|
/* Continue transmitting the packet/record. We must be careful |
* to cope with writespace callbacks arriving _after_ we have |
* called xprt_sendmsg(). |
*/ |
while (1) { |
req->rq_xtime = jiffies; |
status = xprt_sendmsg(xprt, req); |
|
if (status < 0) |
break; |
|
if (xprt->stream) { |
req->rq_bytes_sent += status; |
|
/* If we've sent the entire packet, immediately |
* reset the count of bytes sent. */ |
if (req->rq_bytes_sent >= req->rq_slen) { |
req->rq_bytes_sent = 0; |
goto out_receive; |
} |
} else { |
if (status >= req->rq_slen) |
goto out_receive; |
status = -EAGAIN; |
break; |
} |
|
dprintk("RPC: %4d xmit incomplete (%d left of %d)\n", |
task->tk_pid, req->rq_slen - req->rq_bytes_sent, |
req->rq_slen); |
|
status = -EAGAIN; |
if (retry++ > 50) |
break; |
} |
|
/* If we're doing a resend and have received a reply already, |
* then exit early. |
* Note, though, that we can't do this if we've already started |
* resending down a TCP stream. |
*/ |
task->tk_status = status; |
|
switch (status) { |
case -EAGAIN: |
if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { |
/* Protect against races with xprt_write_space */ |
spin_lock_bh(&xprt->sock_lock); |
/* Don't race with disconnect */ |
if (!xprt_connected(xprt)) |
task->tk_status = -ENOTCONN; |
else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) { |
task->tk_timeout = req->rq_timeout.to_current; |
rpc_sleep_on(&xprt->pending, task, NULL, NULL); |
} |
spin_unlock_bh(&xprt->sock_lock); |
return; |
} |
/* Keep holding the socket if it is blocked */ |
rpc_delay(task, HZ>>4); |
return; |
case -ECONNREFUSED: |
task->tk_timeout = RPC_REESTABLISH_TIMEOUT; |
rpc_sleep_on(&xprt->sending, task, NULL, NULL); |
case -ENOTCONN: |
return; |
default: |
if (xprt->stream) |
xprt_disconnect(xprt); |
} |
xprt_release_write(xprt, task); |
return; |
out_receive: |
dprintk("RPC: %4d xmit complete\n", task->tk_pid); |
spin_lock_bh(&xprt->sock_lock); |
/* Set the task's receive timeout value */ |
if (!xprt->nocong) { |
int timer = rpcproc_timer(clnt, task->tk_msg.rpc_proc); |
task->tk_timeout = rpc_calc_rto(&clnt->cl_rtt, timer); |
task->tk_timeout <<= rpc_ntimeo(&clnt->cl_rtt, timer); |
task->tk_timeout <<= clnt->cl_timeout.to_retries |
- req->rq_timeout.to_retries; |
if (task->tk_timeout > req->rq_timeout.to_maxval) |
task->tk_timeout = req->rq_timeout.to_maxval; |
} else |
task->tk_timeout = req->rq_timeout.to_current; |
/* Don't race with disconnect */ |
if (!xprt_connected(xprt)) |
task->tk_status = -ENOTCONN; |
else if (!req->rq_received) |
rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); |
__xprt_release_write(xprt, task); |
spin_unlock_bh(&xprt->sock_lock); |
} |
|
/* |
* Reserve an RPC call slot. |
*/ |
void |
xprt_reserve(struct rpc_task *task) |
{ |
struct rpc_xprt *xprt = task->tk_xprt; |
|
task->tk_status = -EIO; |
if (!xprt->shutdown) { |
spin_lock(&xprt->xprt_lock); |
do_xprt_reserve(task); |
spin_unlock(&xprt->xprt_lock); |
} |
} |
|
static inline void |
do_xprt_reserve(struct rpc_task *task) |
{ |
struct rpc_xprt *xprt = task->tk_xprt; |
|
task->tk_status = 0; |
if (task->tk_rqstp) |
return; |
if (xprt->free) { |
struct rpc_rqst *req = xprt->free; |
xprt->free = req->rq_next; |
req->rq_next = NULL; |
task->tk_rqstp = req; |
xprt_request_init(task, xprt); |
return; |
} |
dprintk("RPC: waiting for request slot\n"); |
task->tk_status = -EAGAIN; |
task->tk_timeout = 0; |
rpc_sleep_on(&xprt->backlog, task, NULL, NULL); |
} |
|
/* |
* Allocate a 'unique' XID |
*/ |
static u32 |
xprt_alloc_xid(void) |
{ |
static spinlock_t xid_lock = SPIN_LOCK_UNLOCKED; |
static int need_init = 1; |
static u32 xid; |
u32 ret; |
|
spin_lock(&xid_lock); |
if (unlikely(need_init)) { |
xid = CURRENT_TIME << 12; |
need_init = 0; |
} |
ret = xid++; |
spin_unlock(&xid_lock); |
return ret; |
} |
|
/* |
* Initialize RPC request |
*/ |
static void |
xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) |
{ |
struct rpc_rqst *req = task->tk_rqstp; |
|
req->rq_timeout = xprt->timeout; |
req->rq_task = task; |
req->rq_xprt = xprt; |
req->rq_xid = xprt_alloc_xid(); |
INIT_LIST_HEAD(&req->rq_list); |
dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, |
req, req->rq_xid); |
} |
|
/* |
* Release an RPC call slot |
*/ |
void |
xprt_release(struct rpc_task *task) |
{ |
struct rpc_xprt *xprt = task->tk_xprt; |
struct rpc_rqst *req; |
|
if (!(req = task->tk_rqstp)) |
return; |
spin_lock_bh(&xprt->sock_lock); |
__xprt_release_write(xprt, task); |
__xprt_put_cong(xprt, req); |
if (!list_empty(&req->rq_list)) |
list_del(&req->rq_list); |
spin_unlock_bh(&xprt->sock_lock); |
task->tk_rqstp = NULL; |
memset(req, 0, sizeof(*req)); /* mark unused */ |
|
dprintk("RPC: %4d release request %p\n", task->tk_pid, req); |
|
spin_lock(&xprt->xprt_lock); |
req->rq_next = xprt->free; |
xprt->free = req; |
|
xprt_clear_backlog(xprt); |
spin_unlock(&xprt->xprt_lock); |
} |
|
/* |
* Set default timeout parameters |
*/ |
void |
xprt_default_timeout(struct rpc_timeout *to, int proto) |
{ |
if (proto == IPPROTO_UDP) |
xprt_set_timeout(to, 5, 5 * HZ); |
else |
xprt_set_timeout(to, 5, 60 * HZ); |
} |
|
/* |
* Set constant timeout |
*/ |
void |
xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr) |
{ |
to->to_current = |
to->to_initval = |
to->to_increment = incr; |
to->to_maxval = incr * retr; |
to->to_retries = retr; |
to->to_exponential = 0; |
} |
|
/* |
* Initialize an RPC client |
*/ |
static struct rpc_xprt * |
xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) |
{ |
struct rpc_xprt *xprt; |
struct rpc_rqst *req; |
int i; |
|
dprintk("RPC: setting up %s transport...\n", |
proto == IPPROTO_UDP? "UDP" : "TCP"); |
|
if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) |
return NULL; |
memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */ |
|
xprt->addr = *ap; |
xprt->prot = proto; |
xprt->stream = (proto == IPPROTO_TCP)? 1 : 0; |
if (xprt->stream) { |
xprt->cwnd = RPC_MAXCWND; |
xprt->nocong = 1; |
} else |
xprt->cwnd = RPC_INITCWND; |
spin_lock_init(&xprt->sock_lock); |
spin_lock_init(&xprt->xprt_lock); |
init_waitqueue_head(&xprt->cong_wait); |
|
INIT_LIST_HEAD(&xprt->recv); |
|
/* Set timeout parameters */ |
if (to) { |
xprt->timeout = *to; |
xprt->timeout.to_current = to->to_initval; |
} else |
xprt_default_timeout(&xprt->timeout, xprt->prot); |
|
INIT_RPC_WAITQ(&xprt->pending, "xprt_pending"); |
INIT_RPC_WAITQ(&xprt->sending, "xprt_sending"); |
INIT_RPC_WAITQ(&xprt->resend, "xprt_resend"); |
INIT_RPC_WAITQ(&xprt->backlog, "xprt_backlog"); |
|
/* initialize free list */ |
for (i = 0, req = xprt->slot; i < RPC_MAXREQS-1; i++, req++) |
req->rq_next = req + 1; |
req->rq_next = NULL; |
xprt->free = xprt->slot; |
|
/* Check whether we want to use a reserved port */ |
xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; |
|
dprintk("RPC: created transport %p\n", xprt); |
|
return xprt; |
} |
|
/* |
* Bind to a reserved port |
*/ |
static inline int |
xprt_bindresvport(struct socket *sock) |
{ |
struct sockaddr_in myaddr; |
int err, port; |
kernel_cap_t saved_cap = current->cap_effective; |
|
/* Override capabilities. |
* They were checked in xprt_create_proto i.e. at mount time |
*/ |
cap_raise (current->cap_effective, CAP_NET_BIND_SERVICE); |
|
memset(&myaddr, 0, sizeof(myaddr)); |
myaddr.sin_family = AF_INET; |
port = 800; |
do { |
myaddr.sin_port = htons(port); |
err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, |
sizeof(myaddr)); |
} while (err == -EADDRINUSE && --port > 0); |
current->cap_effective = saved_cap; |
|
if (err < 0) |
printk("RPC: Can't bind to reserved port (%d).\n", -err); |
|
return err; |
} |
|
static int |
xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock) |
{ |
struct sock *sk = sock->sk; |
|
if (xprt->inet) |
return -EBUSY; |
|
write_lock_bh(&sk->callback_lock); |
sk->user_data = xprt; |
xprt->old_data_ready = sk->data_ready; |
xprt->old_state_change = sk->state_change; |
xprt->old_write_space = sk->write_space; |
if (xprt->prot == IPPROTO_UDP) { |
sk->data_ready = udp_data_ready; |
sk->no_check = UDP_CSUM_NORCV; |
xprt_set_connected(xprt); |
} else { |
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); |
tp->nonagle = 1; /* disable Nagle's algorithm */ |
sk->data_ready = tcp_data_ready; |
sk->state_change = tcp_state_change; |
xprt_clear_connected(xprt); |
} |
sk->write_space = xprt_write_space; |
|
/* Reset to new socket */ |
xprt->sock = sock; |
xprt->inet = sk; |
write_unlock_bh(&sk->callback_lock); |
|
return 0; |
} |
|
/* |
* Set socket buffer length |
*/ |
void |
xprt_sock_setbufsize(struct rpc_xprt *xprt) |
{ |
struct sock *sk = xprt->inet; |
|
if (xprt->stream) |
return; |
if (xprt->rcvsize) { |
sk->userlocks |= SOCK_RCVBUF_LOCK; |
sk->rcvbuf = xprt->rcvsize * RPC_MAXCONG * 2; |
} |
if (xprt->sndsize) { |
sk->userlocks |= SOCK_SNDBUF_LOCK; |
sk->sndbuf = xprt->sndsize * RPC_MAXCONG * 2; |
sk->write_space(sk); |
} |
} |
|
/* |
* Create a client socket given the protocol and peer address. |
*/ |
static struct socket * |
xprt_create_socket(int proto, struct rpc_timeout *to, int resvport) |
{ |
struct socket *sock; |
int type, err; |
|
dprintk("RPC: xprt_create_socket(%s %d)\n", |
(proto == IPPROTO_UDP)? "udp" : "tcp", proto); |
|
type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; |
|
if ((err = sock_create(PF_INET, type, proto, &sock)) < 0) { |
printk("RPC: can't create socket (%d).\n", -err); |
goto failed; |
} |
|
/* bind to a reserved port */ |
if (resvport && xprt_bindresvport(sock) < 0) |
goto failed; |
|
return sock; |
|
failed: |
sock_release(sock); |
return NULL; |
} |
|
/* |
* Create an RPC client transport given the protocol and peer address. |
*/ |
struct rpc_xprt * |
xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) |
{ |
struct rpc_xprt *xprt; |
|
xprt = xprt_setup(proto, sap, to); |
if (!xprt) |
goto out_bad; |
|
dprintk("RPC: xprt_create_proto created xprt %p\n", xprt); |
return xprt; |
out_bad: |
dprintk("RPC: xprt_create_proto failed\n"); |
if (xprt) |
kfree(xprt); |
return NULL; |
} |
|
/* |
* Prepare for transport shutdown. |
*/ |
void |
xprt_shutdown(struct rpc_xprt *xprt) |
{ |
xprt->shutdown = 1; |
rpc_wake_up(&xprt->sending); |
rpc_wake_up(&xprt->resend); |
rpc_wake_up(&xprt->pending); |
rpc_wake_up(&xprt->backlog); |
if (waitqueue_active(&xprt->cong_wait)) |
wake_up(&xprt->cong_wait); |
} |
|
/* |
* Clear the xprt backlog queue |
*/ |
int |
xprt_clear_backlog(struct rpc_xprt *xprt) { |
rpc_wake_up_next(&xprt->backlog); |
if (waitqueue_active(&xprt->cong_wait)) |
wake_up(&xprt->cong_wait); |
return 1; |
} |
|
/* |
* Destroy an RPC transport, killing off all requests. |
*/ |
int |
xprt_destroy(struct rpc_xprt *xprt) |
{ |
dprintk("RPC: destroying transport %p\n", xprt); |
xprt_shutdown(xprt); |
xprt_close(xprt); |
kfree(xprt); |
|
return 0; |
} |
/xdr.c
0,0 → 1,554
/* |
* linux/net/sunrpc/xdr.c |
* |
* Generic XDR support. |
* |
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> |
*/ |
|
#include <linux/types.h> |
#include <linux/socket.h> |
#include <linux/string.h> |
#include <linux/kernel.h> |
#include <linux/pagemap.h> |
#include <linux/errno.h> |
#include <linux/in.h> |
#include <linux/sunrpc/xdr.h> |
#include <linux/sunrpc/msg_prot.h> |
|
/* |
* XDR functions for basic NFS types |
*/ |
u32 * |
xdr_encode_netobj(u32 *p, const struct xdr_netobj *obj) |
{ |
unsigned int quadlen = XDR_QUADLEN(obj->len); |
|
p[quadlen] = 0; /* zero trailing bytes */ |
*p++ = htonl(obj->len); |
memcpy(p, obj->data, obj->len); |
return p + XDR_QUADLEN(obj->len); |
} |
|
u32 * |
xdr_decode_netobj_fixed(u32 *p, void *obj, unsigned int len) |
{ |
if (ntohl(*p++) != len) |
return NULL; |
memcpy(obj, p, len); |
return p + XDR_QUADLEN(len); |
} |
|
u32 * |
xdr_decode_netobj(u32 *p, struct xdr_netobj *obj) |
{ |
unsigned int len; |
|
if ((len = ntohl(*p++)) > XDR_MAX_NETOBJ) |
return NULL; |
obj->len = len; |
obj->data = (u8 *) p; |
return p + XDR_QUADLEN(len); |
} |
|
u32 * |
xdr_encode_array(u32 *p, const char *array, unsigned int len) |
{ |
int quadlen = XDR_QUADLEN(len); |
|
p[quadlen] = 0; |
*p++ = htonl(len); |
memcpy(p, array, len); |
return p + quadlen; |
} |
|
u32 * |
xdr_encode_string(u32 *p, const char *string) |
{ |
return xdr_encode_array(p, string, strlen(string)); |
} |
|
u32 * |
xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen) |
{ |
unsigned int len; |
char *string; |
|
if ((len = ntohl(*p++)) > maxlen) |
return NULL; |
if (lenp) |
*lenp = len; |
if ((len % 4) != 0) { |
string = (char *) p; |
} else { |
string = (char *) (p - 1); |
memmove(string, p, len); |
} |
string[len] = '\0'; |
*sp = string; |
return p + XDR_QUADLEN(len); |
} |
|
u32 * |
xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen) |
{ |
unsigned int len; |
|
if ((len = ntohl(*p++)) > maxlen) |
return NULL; |
*lenp = len; |
*sp = (char *) p; |
return p + XDR_QUADLEN(len); |
} |
|
|
void |
xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, |
unsigned int len) |
{ |
xdr->pages = pages; |
xdr->page_base = base; |
xdr->page_len = len; |
|
if (len & 3) { |
struct iovec *iov = xdr->tail; |
unsigned int pad = 4 - (len & 3); |
|
iov->iov_base = (void *) "\0\0\0"; |
iov->iov_len = pad; |
len += pad; |
} |
xdr->len += len; |
} |
|
void |
xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, |
struct page **pages, unsigned int base, unsigned int len) |
{ |
struct iovec *head = xdr->head; |
struct iovec *tail = xdr->tail; |
char *buf = (char *)head->iov_base; |
unsigned int buflen = head->iov_len; |
|
head->iov_len = offset; |
|
xdr->pages = pages; |
xdr->page_base = base; |
xdr->page_len = len; |
|
tail->iov_base = buf + offset; |
tail->iov_len = buflen - offset; |
|
xdr->len += len; |
} |
|
/* |
* Realign the iovec if the server missed out some reply elements |
* (such as post-op attributes,...) |
* Note: This is a simple implementation that assumes that |
* len <= iov->iov_len !!! |
* The RPC header (assumed to be the 1st element in the iov array) |
* is not shifted. |
*/ |
void xdr_shift_iovec(struct iovec *iov, int nr, size_t len) |
{ |
struct iovec *pvec; |
|
for (pvec = iov + nr - 1; nr > 1; nr--, pvec--) { |
struct iovec *svec = pvec - 1; |
|
if (len > pvec->iov_len) { |
printk(KERN_DEBUG "RPC: Urk! Large shift of short iovec.\n"); |
return; |
} |
memmove((char *)pvec->iov_base + len, pvec->iov_base, |
pvec->iov_len - len); |
|
if (len > svec->iov_len) { |
printk(KERN_DEBUG "RPC: Urk! Large shift of short iovec.\n"); |
return; |
} |
memcpy(pvec->iov_base, |
(char *)svec->iov_base + svec->iov_len - len, len); |
} |
} |
|
/* |
* Map a struct xdr_buf into an iovec array. |
*/ |
int xdr_kmap(struct iovec *iov_base, struct xdr_buf *xdr, unsigned int base) |
{ |
struct iovec *iov = iov_base; |
struct page **ppage = xdr->pages; |
struct page **first_kmap = NULL; |
unsigned int len, pglen = xdr->page_len; |
|
len = xdr->head[0].iov_len; |
if (base < len) { |
iov->iov_len = len - base; |
iov->iov_base = (char *)xdr->head[0].iov_base + base; |
iov++; |
base = 0; |
} else |
base -= len; |
|
if (pglen == 0) |
goto map_tail; |
if (base >= pglen) { |
base -= pglen; |
goto map_tail; |
} |
if (base || xdr->page_base) { |
pglen -= base; |
base += xdr->page_base; |
ppage += base >> PAGE_CACHE_SHIFT; |
base &= ~PAGE_CACHE_MASK; |
} |
do { |
len = PAGE_CACHE_SIZE; |
if (!first_kmap) { |
first_kmap = ppage; |
iov->iov_base = kmap(*ppage); |
} else { |
iov->iov_base = kmap_nonblock(*ppage); |
if (!iov->iov_base) |
goto out_err; |
} |
if (base) { |
iov->iov_base += base; |
len -= base; |
base = 0; |
} |
if (pglen < len) |
len = pglen; |
iov->iov_len = len; |
iov++; |
ppage++; |
} while ((pglen -= len) != 0); |
map_tail: |
if (xdr->tail[0].iov_len) { |
iov->iov_len = xdr->tail[0].iov_len - base; |
iov->iov_base = (char *)xdr->tail[0].iov_base + base; |
iov++; |
} |
return (iov - iov_base); |
out_err: |
for (; first_kmap != ppage; first_kmap++) |
kunmap(*first_kmap); |
return 0; |
} |
|
void xdr_kunmap(struct xdr_buf *xdr, unsigned int base, int niov) |
{ |
struct page **ppage = xdr->pages; |
unsigned int pglen = xdr->page_len; |
|
if (!pglen) |
return; |
if (base >= xdr->head[0].iov_len) |
base -= xdr->head[0].iov_len; |
else { |
niov--; |
base = 0; |
} |
|
if (base >= pglen) |
return; |
if (base || xdr->page_base) { |
pglen -= base; |
base += xdr->page_base; |
ppage += base >> PAGE_CACHE_SHIFT; |
/* Note: The offset means that the length of the first |
* page is really (PAGE_CACHE_SIZE - (base & ~PAGE_CACHE_MASK)). |
* In order to avoid an extra test inside the loop, |
* we bump pglen here, and just subtract PAGE_CACHE_SIZE... */ |
pglen += base & ~PAGE_CACHE_MASK; |
} |
/* |
* In case we could only do a partial xdr_kmap, all remaining iovecs |
* refer to pages. Otherwise we detect the end through pglen. |
*/ |
for (; niov; niov--) { |
flush_dcache_page(*ppage); |
kunmap(*ppage); |
if (pglen <= PAGE_CACHE_SIZE) |
break; |
pglen -= PAGE_CACHE_SIZE; |
ppage++; |
} |
} |
|
void |
xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, |
skb_reader_t *desc, |
skb_read_actor_t copy_actor) |
{ |
struct page **ppage = xdr->pages; |
unsigned int len, pglen = xdr->page_len; |
int ret; |
|
len = xdr->head[0].iov_len; |
if (base < len) { |
len -= base; |
ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len); |
if (ret != len || !desc->count) |
return; |
base = 0; |
} else |
base -= len; |
|
if (pglen == 0) |
goto copy_tail; |
if (base >= pglen) { |
base -= pglen; |
goto copy_tail; |
} |
if (base || xdr->page_base) { |
pglen -= base; |
base += xdr->page_base; |
ppage += base >> PAGE_CACHE_SHIFT; |
base &= ~PAGE_CACHE_MASK; |
} |
do { |
char *kaddr; |
|
len = PAGE_CACHE_SIZE; |
kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA); |
if (base) { |
len -= base; |
if (pglen < len) |
len = pglen; |
ret = copy_actor(desc, kaddr + base, len); |
base = 0; |
} else { |
if (pglen < len) |
len = pglen; |
ret = copy_actor(desc, kaddr, len); |
} |
kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA); |
if (ret != len || !desc->count) |
return; |
ppage++; |
} while ((pglen -= len) != 0); |
copy_tail: |
len = xdr->tail[0].iov_len; |
if (len) |
copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len); |
} |
|
/* |
* Helper routines for doing 'memmove' like operations on a struct xdr_buf |
* |
* _shift_data_right_pages |
* @pages: vector of pages containing both the source and dest memory area. |
* @pgto_base: page vector address of destination |
* @pgfrom_base: page vector address of source |
* @len: number of bytes to copy |
* |
* Note: the addresses pgto_base and pgfrom_base are both calculated in |
* the same way: |
* if a memory area starts at byte 'base' in page 'pages[i]', |
* then its address is given as (i << PAGE_CACHE_SHIFT) + base |
* Also note: pgfrom_base must be < pgto_base, but the memory areas |
* they point to may overlap. |
*/ |
static void |
_shift_data_right_pages(struct page **pages, size_t pgto_base, |
size_t pgfrom_base, size_t len) |
{ |
struct page **pgfrom, **pgto; |
char *vfrom, *vto; |
size_t copy; |
|
BUG_ON(pgto_base <= pgfrom_base); |
|
pgto_base += len; |
pgfrom_base += len; |
|
pgto = pages + (pgto_base >> PAGE_CACHE_SHIFT); |
pgfrom = pages + (pgfrom_base >> PAGE_CACHE_SHIFT); |
|
pgto_base &= ~PAGE_CACHE_MASK; |
pgfrom_base &= ~PAGE_CACHE_MASK; |
|
do { |
/* Are any pointers crossing a page boundary? */ |
if (pgto_base == 0) { |
pgto_base = PAGE_CACHE_SIZE; |
pgto--; |
} |
if (pgfrom_base == 0) { |
pgfrom_base = PAGE_CACHE_SIZE; |
pgfrom--; |
} |
|
copy = len; |
if (copy > pgto_base) |
copy = pgto_base; |
if (copy > pgfrom_base) |
copy = pgfrom_base; |
pgto_base -= copy; |
pgfrom_base -= copy; |
|
vto = kmap_atomic(*pgto, KM_USER0); |
vfrom = kmap_atomic(*pgfrom, KM_USER1); |
memmove(vto + pgto_base, vfrom + pgfrom_base, copy); |
kunmap_atomic(vfrom, KM_USER1); |
kunmap_atomic(vto, KM_USER0); |
|
} while ((len -= copy) != 0); |
} |
|
/* |
* _copy_to_pages |
* @pages: array of pages |
* @pgbase: page vector address of destination |
* @p: pointer to source data |
* @len: length |
* |
* Copies data from an arbitrary memory location into an array of pages |
* The copy is assumed to be non-overlapping. |
*/ |
static void |
_copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len) |
{ |
struct page **pgto; |
char *vto; |
size_t copy; |
|
pgto = pages + (pgbase >> PAGE_CACHE_SHIFT); |
pgbase &= ~PAGE_CACHE_MASK; |
|
do { |
copy = PAGE_CACHE_SIZE - pgbase; |
if (copy > len) |
copy = len; |
|
vto = kmap_atomic(*pgto, KM_USER0); |
memcpy(vto + pgbase, p, copy); |
kunmap_atomic(vto, KM_USER0); |
|
pgbase += copy; |
if (pgbase == PAGE_CACHE_SIZE) { |
pgbase = 0; |
pgto++; |
} |
p += copy; |
|
} while ((len -= copy) != 0); |
} |
|
/* |
* _copy_from_pages |
* @p: pointer to destination |
* @pages: array of pages |
* @pgbase: offset of source data |
* @len: length |
* |
* Copies data into an arbitrary memory location from an array of pages |
* The copy is assumed to be non-overlapping. |
*/ |
static void |
_copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len) |
{ |
struct page **pgfrom; |
char *vfrom; |
size_t copy; |
|
pgfrom = pages + (pgbase >> PAGE_CACHE_SHIFT); |
pgbase &= ~PAGE_CACHE_MASK; |
|
do { |
copy = PAGE_CACHE_SIZE - pgbase; |
if (copy > len) |
copy = len; |
|
vfrom = kmap_atomic(*pgfrom, KM_USER0); |
memcpy(p, vfrom + pgbase, copy); |
kunmap_atomic(vfrom, KM_USER0); |
|
pgbase += copy; |
if (pgbase == PAGE_CACHE_SIZE) { |
pgbase = 0; |
pgfrom++; |
} |
p += copy; |
|
} while ((len -= copy) != 0); |
} |
|
/* |
* xdr_shrink_bufhead |
* @buf: xdr_buf |
* @len: bytes to remove from buf->head[0] |
* |
* Shrinks XDR buffer's header iovec buf->head[0] by |
* 'len' bytes. The extra data is not lost, but is instead |
* moved into the inlined pages and/or the tail. |
*/ |
void |
xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) |
{ |
struct iovec *head, *tail; |
size_t copy, offs; |
unsigned int pglen = buf->page_len; |
|
tail = buf->tail; |
head = buf->head; |
BUG_ON (len > head->iov_len); |
|
/* Shift the tail first */ |
if (tail->iov_len != 0) { |
if (tail->iov_len > len) { |
copy = tail->iov_len - len; |
memmove((char *)tail->iov_base + len, |
tail->iov_base, copy); |
} |
/* Copy from the inlined pages into the tail */ |
copy = len; |
if (copy > pglen) |
copy = pglen; |
offs = len - copy; |
if (offs >= tail->iov_len) |
copy = 0; |
else if (copy > tail->iov_len - offs) |
copy = tail->iov_len - offs; |
if (copy != 0) |
_copy_from_pages((char *)tail->iov_base + offs, |
buf->pages, |
buf->page_base + pglen + offs - len, |
copy); |
/* Do we also need to copy data from the head into the tail ? */ |
if (len > pglen) { |
offs = copy = len - pglen; |
if (copy > tail->iov_len) |
copy = tail->iov_len; |
memcpy(tail->iov_base, |
(char *)head->iov_base + |
head->iov_len - offs, |
copy); |
} |
} |
/* Now handle pages */ |
if (pglen != 0) { |
if (pglen > len) |
_shift_data_right_pages(buf->pages, |
buf->page_base + len, |
buf->page_base, |
pglen - len); |
copy = len; |
if (len > pglen) |
copy = pglen; |
_copy_to_pages(buf->pages, buf->page_base, |
(char *)head->iov_base + head->iov_len - len, |
copy); |
} |
head->iov_len -= len; |
buf->len -= len; |
} |
|
void |
xdr_shift_buf(struct xdr_buf *buf, size_t len) |
{ |
xdr_shrink_bufhead(buf, len); |
} |