OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /or1k/trunk/linux/linux-2.4/net/sunrpc
    from Rev 1275 to Rev 1765
    Reverse comparison

Rev 1275 → Rev 1765

/stats.c
0,0 → 1,206
/*
* linux/net/sunrpc/stats.c
*
* procfs-based user access to generic RPC statistics. The stats files
* reside in /proc/net/rpc.
*
* The read routines assume that the buffer passed in is just big enough.
* If you implement an RPC service that has its own stats routine which
* appends the generic RPC stats, make sure you don't exceed the PAGE_SIZE
* limit.
*
* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
*/
 
#include <linux/module.h>
 
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/proc_fs.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/init.h>
 
#define RPCDBG_FACILITY RPCDBG_MISC
 
static struct proc_dir_entry *proc_net_rpc = NULL;
 
/*
* Get RPC client stats
*/
int
rpc_proc_read(char *buffer, char **start, off_t offset, int count,
int *eof, void *data)
{
struct rpc_stat *statp = (struct rpc_stat *) data;
struct rpc_program *prog = statp->program;
struct rpc_version *vers;
int len, i, j;
 
len = sprintf(buffer,
"net %d %d %d %d\n",
statp->netcnt,
statp->netudpcnt,
statp->nettcpcnt,
statp->nettcpconn);
len += sprintf(buffer + len,
"rpc %d %d %d\n",
statp->rpccnt,
statp->rpcretrans,
statp->rpcauthrefresh);
 
for (i = 0; i < prog->nrvers; i++) {
if (!(vers = prog->version[i]))
continue;
len += sprintf(buffer + len, "proc%d %d",
vers->number, vers->nrprocs);
for (j = 0; j < vers->nrprocs; j++)
len += sprintf(buffer + len, " %d",
vers->procs[j].p_count);
buffer[len++] = '\n';
}
 
if (offset >= len) {
*start = buffer;
*eof = 1;
return 0;
}
*start = buffer + offset;
if ((len -= offset) > count)
return count;
*eof = 1;
return len;
}
 
/*
* Get RPC server stats
*/
int
svc_proc_read(char *buffer, char **start, off_t offset, int count,
int *eof, void *data)
{
struct svc_stat *statp = (struct svc_stat *) data;
struct svc_program *prog = statp->program;
struct svc_procedure *proc;
struct svc_version *vers;
int len, i, j;
 
len = sprintf(buffer,
"net %d %d %d %d\n",
statp->netcnt,
statp->netudpcnt,
statp->nettcpcnt,
statp->nettcpconn);
len += sprintf(buffer + len,
"rpc %d %d %d %d %d\n",
statp->rpccnt,
statp->rpcbadfmt+statp->rpcbadauth+statp->rpcbadclnt,
statp->rpcbadfmt,
statp->rpcbadauth,
statp->rpcbadclnt);
 
for (i = 0; i < prog->pg_nvers; i++) {
if (!(vers = prog->pg_vers[i]) || !(proc = vers->vs_proc))
continue;
len += sprintf(buffer + len, "proc%d %d", i, vers->vs_nproc);
for (j = 0; j < vers->vs_nproc; j++, proc++)
len += sprintf(buffer + len, " %d", proc->pc_count);
buffer[len++] = '\n';
}
 
if (offset >= len) {
*start = buffer;
*eof = 1;
return 0;
}
*start = buffer + offset;
if ((len -= offset) > count)
return count;
*eof = 1;
return len;
}
 
/*
* Register/unregister RPC proc files
*/
static inline struct proc_dir_entry *
do_register(const char *name, void *data, int issvc)
{
rpc_proc_init();
dprintk("RPC: registering /proc/net/rpc/%s\n", name);
return create_proc_read_entry(name, 0, proc_net_rpc,
issvc? svc_proc_read : rpc_proc_read,
data);
}
 
struct proc_dir_entry *
rpc_proc_register(struct rpc_stat *statp)
{
return do_register(statp->program->name, statp, 0);
}
 
void
rpc_proc_unregister(const char *name)
{
remove_proc_entry(name, proc_net_rpc);
}
 
struct proc_dir_entry *
svc_proc_register(struct svc_stat *statp)
{
return do_register(statp->program->pg_name, statp, 1);
}
 
void
svc_proc_unregister(const char *name)
{
remove_proc_entry(name, proc_net_rpc);
}
 
void
rpc_proc_init(void)
{
dprintk("RPC: registering /proc/net/rpc\n");
if (!proc_net_rpc) {
struct proc_dir_entry *ent;
ent = proc_mkdir("net/rpc", 0);
if (ent) {
ent->owner = THIS_MODULE;
proc_net_rpc = ent;
}
}
}
 
void
rpc_proc_exit(void)
{
dprintk("RPC: unregistering /proc/net/rpc\n");
if (proc_net_rpc) {
proc_net_rpc = NULL;
remove_proc_entry("net/rpc", 0);
}
}
 
 
static int __init
init_sunrpc(void)
{
#ifdef RPC_DEBUG
rpc_register_sysctl();
#endif
rpc_proc_init();
return 0;
}
 
static void __exit
cleanup_sunrpc(void)
{
#ifdef RPC_DEBUG
rpc_unregister_sysctl();
#endif
rpc_proc_exit();
}
MODULE_LICENSE("GPL");
module_init(init_sunrpc);
module_exit(cleanup_sunrpc);
/clnt.c
0,0 → 1,962
/*
* linux/net/sunrpc/rpcclnt.c
*
* This file contains the high-level RPC interface.
* It is modeled as a finite state machine to support both synchronous
* and asynchronous requests.
*
* - RPC header generation and argument serialization.
* - Credential refresh.
* - TCP reconnect handling (when finished).
* - Retry of operation when it is suspected the operation failed because
* of uid squashing on the server, or when the credentials were stale
* and need to be refreshed, or when a packet was damaged in transit.
* This may be have to be moved to the VFS layer.
*
* NB: BSD uses a more intelligent approach to guessing when a request
* or reply has been lost by keeping the RTO estimate for each procedure.
* We currently make do with a constant timeout value.
*
* Copyright (C) 1992,1993 Rick Sladkey <jrs@world.std.com>
* Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de>
*/
 
#include <asm/system.h>
 
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/in.h>
#include <linux/utsname.h>
 
#include <linux/sunrpc/clnt.h>
 
#include <linux/nfs.h>
 
 
#define RPC_SLACK_SPACE 512 /* total overkill */
 
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_CALL
#endif
 
static DECLARE_WAIT_QUEUE_HEAD(destroy_wait);
 
 
static void call_start(struct rpc_task *task);
static void call_reserve(struct rpc_task *task);
static void call_reserveresult(struct rpc_task *task);
static void call_allocate(struct rpc_task *task);
static void call_encode(struct rpc_task *task);
static void call_decode(struct rpc_task *task);
static void call_bind(struct rpc_task *task);
static void call_transmit(struct rpc_task *task);
static void call_status(struct rpc_task *task);
static void call_refresh(struct rpc_task *task);
static void call_refreshresult(struct rpc_task *task);
static void call_timeout(struct rpc_task *task);
static void call_connect(struct rpc_task *task);
static void call_connect_status(struct rpc_task *);
static u32 * call_header(struct rpc_task *task);
static u32 * call_verify(struct rpc_task *task);
 
 
/*
* Create an RPC client
* FIXME: This should also take a flags argument (as in task->tk_flags).
* It's called (among others) from pmap_create_client, which may in
* turn be called by an async task. In this case, rpciod should not be
* made to sleep too long.
*/
struct rpc_clnt *
rpc_create_client(struct rpc_xprt *xprt, char *servname,
struct rpc_program *program, u32 vers, int flavor)
{
struct rpc_version *version;
struct rpc_clnt *clnt = NULL;
 
dprintk("RPC: creating %s client for %s (xprt %p)\n",
program->name, servname, xprt);
 
if (!xprt)
goto out;
if (vers >= program->nrvers || !(version = program->version[vers]))
goto out;
 
clnt = (struct rpc_clnt *) rpc_allocate(0, sizeof(*clnt));
if (!clnt)
goto out_no_clnt;
memset(clnt, 0, sizeof(*clnt));
atomic_set(&clnt->cl_users, 0);
 
clnt->cl_xprt = xprt;
clnt->cl_procinfo = version->procs;
clnt->cl_maxproc = version->nrprocs;
clnt->cl_server = servname;
clnt->cl_protname = program->name;
clnt->cl_port = xprt->addr.sin_port;
clnt->cl_prog = program->number;
clnt->cl_vers = version->number;
clnt->cl_prot = xprt->prot;
clnt->cl_stats = program->stats;
INIT_RPC_WAITQ(&clnt->cl_bindwait, "bindwait");
 
if (!clnt->cl_port)
clnt->cl_autobind = 1;
 
rpc_init_rtt(&clnt->cl_rtt, xprt->timeout.to_initval);
 
if (!rpcauth_create(flavor, clnt))
goto out_no_auth;
 
/* save the nodename */
clnt->cl_nodelen = strlen(system_utsname.nodename);
if (clnt->cl_nodelen > UNX_MAXNODENAME)
clnt->cl_nodelen = UNX_MAXNODENAME;
memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen);
out:
return clnt;
 
out_no_clnt:
printk(KERN_INFO "RPC: out of memory in rpc_create_client\n");
goto out;
out_no_auth:
printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %d)\n",
flavor);
rpc_free(clnt);
clnt = NULL;
goto out;
}
 
/*
* Properly shut down an RPC client, terminating all outstanding
* requests. Note that we must be certain that cl_oneshot and
* cl_dead are cleared, or else the client would be destroyed
* when the last task releases it.
*/
int
rpc_shutdown_client(struct rpc_clnt *clnt)
{
dprintk("RPC: shutting down %s client for %s\n",
clnt->cl_protname, clnt->cl_server);
while (atomic_read(&clnt->cl_users)) {
#ifdef RPC_DEBUG
dprintk("RPC: rpc_shutdown_client: client %s, tasks=%d\n",
clnt->cl_protname, atomic_read(&clnt->cl_users));
#endif
/* Don't let rpc_release_client destroy us */
clnt->cl_oneshot = 0;
clnt->cl_dead = 0;
rpc_killall_tasks(clnt);
sleep_on_timeout(&destroy_wait, 1*HZ);
}
return rpc_destroy_client(clnt);
}
 
/*
* Delete an RPC client
*/
int
rpc_destroy_client(struct rpc_clnt *clnt)
{
dprintk("RPC: destroying %s client for %s\n",
clnt->cl_protname, clnt->cl_server);
 
if (clnt->cl_auth) {
rpcauth_destroy(clnt->cl_auth);
clnt->cl_auth = NULL;
}
if (clnt->cl_xprt) {
xprt_destroy(clnt->cl_xprt);
clnt->cl_xprt = NULL;
}
rpc_free(clnt);
return 0;
}
 
/*
* Release an RPC client
*/
void
rpc_release_client(struct rpc_clnt *clnt)
{
dprintk("RPC: rpc_release_client(%p, %d)\n",
clnt, atomic_read(&clnt->cl_users));
 
if (!atomic_dec_and_test(&clnt->cl_users))
return;
wake_up(&destroy_wait);
if (clnt->cl_oneshot || clnt->cl_dead)
rpc_destroy_client(clnt);
}
 
/*
* Default callback for async RPC calls
*/
static void
rpc_default_callback(struct rpc_task *task)
{
}
 
/*
* Export the signal mask handling for aysnchronous code that
* sleeps on RPC calls
*/
void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset)
{
unsigned long sigallow = sigmask(SIGKILL);
unsigned long irqflags;
/* Turn off various signals */
if (clnt->cl_intr) {
struct k_sigaction *action = current->sig->action;
if (action[SIGINT-1].sa.sa_handler == SIG_DFL)
sigallow |= sigmask(SIGINT);
if (action[SIGQUIT-1].sa.sa_handler == SIG_DFL)
sigallow |= sigmask(SIGQUIT);
}
spin_lock_irqsave(&current->sigmask_lock, irqflags);
*oldset = current->blocked;
siginitsetinv(&current->blocked, sigallow & ~oldset->sig[0]);
recalc_sigpending(current);
spin_unlock_irqrestore(&current->sigmask_lock, irqflags);
}
 
void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset)
{
unsigned long irqflags;
spin_lock_irqsave(&current->sigmask_lock, irqflags);
current->blocked = *oldset;
recalc_sigpending(current);
spin_unlock_irqrestore(&current->sigmask_lock, irqflags);
}
 
/*
* New rpc_call implementation
*/
int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
{
struct rpc_task my_task, *task = &my_task;
sigset_t oldset;
int status;
 
/* If this client is slain all further I/O fails */
if (clnt->cl_dead)
return -EIO;
 
if (flags & RPC_TASK_ASYNC) {
printk("rpc_call_sync: Illegal flag combination for synchronous task\n");
flags &= ~RPC_TASK_ASYNC;
}
 
rpc_clnt_sigmask(clnt, &oldset);
 
/* Create/initialize a new RPC task */
rpc_init_task(task, clnt, NULL, flags);
rpc_call_setup(task, msg, 0);
 
/* Set up the call info struct and execute the task */
if (task->tk_status == 0)
status = rpc_execute(task);
else {
status = task->tk_status;
rpc_release_task(task);
}
 
rpc_clnt_sigunmask(clnt, &oldset);
 
return status;
}
 
/*
* New rpc_call implementation
*/
int
rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
rpc_action callback, void *data)
{
struct rpc_task *task;
sigset_t oldset;
int status;
 
/* If this client is slain all further I/O fails */
if (clnt->cl_dead)
return -EIO;
 
flags |= RPC_TASK_ASYNC;
 
rpc_clnt_sigmask(clnt, &oldset);
 
/* Create/initialize a new RPC task */
if (!callback)
callback = rpc_default_callback;
status = -ENOMEM;
if (!(task = rpc_new_task(clnt, callback, flags)))
goto out;
task->tk_calldata = data;
 
rpc_call_setup(task, msg, 0);
 
/* Set up the call info struct and execute the task */
if (task->tk_status == 0)
status = rpc_execute(task);
else {
status = task->tk_status;
rpc_release_task(task);
}
 
out:
rpc_clnt_sigunmask(clnt, &oldset);
 
return status;
}
 
 
void
rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
{
task->tk_msg = *msg;
task->tk_flags |= flags;
/* Bind the user cred */
if (task->tk_msg.rpc_cred != NULL) {
rpcauth_holdcred(task);
} else
rpcauth_bindcred(task);
 
if (task->tk_status == 0)
task->tk_action = call_start;
else
task->tk_action = NULL;
}
 
void
rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize)
{
struct rpc_xprt *xprt = clnt->cl_xprt;
 
xprt->sndsize = 0;
if (sndsize)
xprt->sndsize = sndsize + RPC_SLACK_SPACE;
xprt->rcvsize = 0;
if (rcvsize)
xprt->rcvsize = rcvsize + RPC_SLACK_SPACE;
xprt_sock_setbufsize(xprt);
}
 
/*
* Restart an (async) RPC call. Usually called from within the
* exit handler.
*/
void
rpc_restart_call(struct rpc_task *task)
{
if (RPC_ASSASSINATED(task))
return;
 
task->tk_action = call_start;
}
 
/*
* 0. Initial state
*
* Other FSM states can be visited zero or more times, but
* this state is visited exactly once for each RPC.
*/
static void
call_start(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
 
if (task->tk_msg.rpc_proc > clnt->cl_maxproc) {
printk(KERN_ERR "%s (vers %d): bad procedure number %d\n",
clnt->cl_protname, clnt->cl_vers,
task->tk_msg.rpc_proc);
rpc_exit(task, -EIO);
return;
}
 
dprintk("RPC: %4d call_start %s%d proc %d (%s)\n", task->tk_pid,
clnt->cl_protname, clnt->cl_vers, task->tk_msg.rpc_proc,
(RPC_IS_ASYNC(task) ? "async" : "sync"));
 
/* Increment call count */
rpcproc_count(clnt, task->tk_msg.rpc_proc)++;
clnt->cl_stats->rpccnt++;
task->tk_action = call_reserve;
}
 
/*
* 1. Reserve an RPC call slot
*/
static void
call_reserve(struct rpc_task *task)
{
dprintk("RPC: %4d call_reserve\n", task->tk_pid);
 
if (!rpcauth_uptodatecred(task)) {
task->tk_action = call_refresh;
return;
}
 
task->tk_status = 0;
task->tk_action = call_reserveresult;
xprt_reserve(task);
}
 
/*
* 1b. Grok the result of xprt_reserve()
*/
static void
call_reserveresult(struct rpc_task *task)
{
int status = task->tk_status;
 
dprintk("RPC: %4d call_reserveresult (status %d)\n",
task->tk_pid, task->tk_status);
 
/*
* After a call to xprt_reserve(), we must have either
* a request slot or else an error status.
*/
task->tk_status = 0;
if (status >= 0) {
if (task->tk_rqstp) {
task->tk_action = call_allocate;
return;
}
 
printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n",
__FUNCTION__, status);
rpc_exit(task, -EIO);
return;
}
 
/*
* Even though there was an error, we may have acquired
* a request slot somehow. Make sure not to leak it.
*/
if (task->tk_rqstp) {
printk(KERN_ERR "%s: status=%d, request allocated anyway\n",
__FUNCTION__, status);
xprt_release(task);
}
 
switch (status) {
case -EAGAIN: /* woken up; retry */
task->tk_action = call_reserve;
return;
case -EIO: /* probably a shutdown */
break;
default:
printk(KERN_ERR "%s: unrecognized error %d, exiting\n",
__FUNCTION__, status);
break;
}
rpc_exit(task, status);
}
 
/*
* 2. Allocate the buffer. For details, see sched.c:rpc_malloc.
* (Note: buffer memory is freed in rpc_task_release).
*/
static void
call_allocate(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
unsigned int bufsiz;
 
dprintk("RPC: %4d call_allocate (status %d)\n",
task->tk_pid, task->tk_status);
task->tk_action = call_encode;
if (task->tk_buffer)
return;
 
/* FIXME: compute buffer requirements more exactly using
* auth->au_wslack */
bufsiz = rpcproc_bufsiz(clnt, task->tk_msg.rpc_proc) + RPC_SLACK_SPACE;
 
if ((task->tk_buffer = rpc_malloc(task, bufsiz << 1)) != NULL)
return;
printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task);
 
if (RPC_IS_ASYNC(task) || !(task->tk_client->cl_intr && signalled())) {
xprt_release(task);
task->tk_action = call_reserve;
rpc_delay(task, HZ>>4);
return;
}
 
rpc_exit(task, -ERESTARTSYS);
}
 
/*
* 3. Encode arguments of an RPC call
*/
static void
call_encode(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_rqst *req = task->tk_rqstp;
struct xdr_buf *sndbuf = &req->rq_snd_buf;
struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
unsigned int bufsiz;
kxdrproc_t encode;
int status;
u32 *p;
 
dprintk("RPC: %4d call_encode (status %d)\n",
task->tk_pid, task->tk_status);
 
task->tk_action = call_bind;
 
/* Default buffer setup */
bufsiz = rpcproc_bufsiz(clnt, task->tk_msg.rpc_proc)+RPC_SLACK_SPACE;
sndbuf->head[0].iov_base = (void *)task->tk_buffer;
sndbuf->head[0].iov_len = bufsiz;
sndbuf->tail[0].iov_len = 0;
sndbuf->page_len = 0;
sndbuf->len = 0;
rcvbuf->head[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz);
rcvbuf->head[0].iov_len = bufsiz;
rcvbuf->tail[0].iov_len = 0;
rcvbuf->page_len = 0;
rcvbuf->len = bufsiz;
 
/* Zero buffer so we have automatic zero-padding of opaque & string */
memset(task->tk_buffer, 0, bufsiz);
 
/* Encode header and provided arguments */
encode = rpcproc_encode(clnt, task->tk_msg.rpc_proc);
if (!(p = call_header(task))) {
printk(KERN_INFO "RPC: call_header failed, exit EIO\n");
rpc_exit(task, -EIO);
} else
if (encode && (status = encode(req, p, task->tk_msg.rpc_argp)) < 0) {
printk(KERN_WARNING "%s: can't encode arguments: %d\n",
clnt->cl_protname, -status);
rpc_exit(task, status);
}
}
 
/*
* 4. Get the server port number if not yet set
*/
static void
call_bind(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_xprt *xprt = clnt->cl_xprt;
 
dprintk("RPC: %4d call_bind xprt %p %s connected\n", task->tk_pid,
xprt, (xprt_connected(xprt) ? "is" : "is not"));
 
task->tk_action = (xprt_connected(xprt)) ? call_transmit : call_connect;
 
if (!clnt->cl_port) {
task->tk_action = call_connect;
task->tk_timeout = clnt->cl_timeout.to_maxval;
rpc_getport(task, clnt);
}
}
 
/*
* 4a. Establish socket
* Connect to the RPC server (TCP case)
*/
static void
call_connect(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
 
dprintk("RPC: %4d call_connect status %d\n",
task->tk_pid, task->tk_status);
 
if (xprt_connected(clnt->cl_xprt)) {
task->tk_action = call_transmit;
return;
}
task->tk_action = call_connect_status;
if (task->tk_status < 0)
return;
xprt_connect(task);
}
 
/*
* 4b. Sort out reconnection result
*/
static void call_connect_status(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
int status = task->tk_status;
 
task->tk_status = 0;
if (status >= 0) {
clnt->cl_stats->netreconn++;
task->tk_action = call_transmit;
return;
}
 
/* Something failed: we may have to rebind */
if (clnt->cl_autobind)
clnt->cl_port = 0;
switch (status) {
case -ECONNREFUSED:
case -ECONNRESET:
case -ENOTCONN:
case -ETIMEDOUT:
case -EAGAIN:
task->tk_action = (clnt->cl_port == 0) ? call_bind : call_connect;
break;
default:
rpc_exit(task, status);
}
}
 
/*
* 5. Transmit the RPC request, and wait for reply
*/
static void
call_transmit(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
 
dprintk("RPC: %4d call_transmit (status %d)\n",
task->tk_pid, task->tk_status);
 
task->tk_action = call_status;
if (task->tk_status < 0)
return;
xprt_transmit(task);
if (!rpcproc_decode(clnt, task->tk_msg.rpc_proc) && task->tk_status >= 0) {
task->tk_action = NULL;
rpc_wake_up_task(task);
}
}
 
/*
* 6. Sort out the RPC call status
*/
static void
call_status(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_xprt *xprt = clnt->cl_xprt;
struct rpc_rqst *req = task->tk_rqstp;
int status;
 
smp_rmb();
if (req->rq_received > 0 && !req->rq_bytes_sent)
task->tk_status = req->rq_received;
 
dprintk("RPC: %4d call_status (status %d)\n",
task->tk_pid, task->tk_status);
 
status = task->tk_status;
if (status >= 0) {
task->tk_action = call_decode;
return;
}
 
task->tk_status = 0;
switch(status) {
case -ETIMEDOUT:
task->tk_action = call_timeout;
break;
case -ECONNREFUSED:
case -ENOTCONN:
req->rq_bytes_sent = 0;
if (clnt->cl_autobind || !clnt->cl_port) {
clnt->cl_port = 0;
task->tk_action = call_bind;
break;
}
task->tk_action = call_connect;
break;
/*
* Sleep and dream of an open connection
*/
task->tk_timeout = 5 * HZ;
rpc_sleep_on(&xprt->sending, task, NULL, NULL);
case -ENOMEM:
case -EAGAIN:
task->tk_action = call_transmit;
break;
default:
if (clnt->cl_chatty)
printk("%s: RPC call returned error %d\n",
clnt->cl_protname, -status);
rpc_exit(task, status);
}
}
 
/*
* 6a. Handle RPC timeout
* We do not release the request slot, so we keep using the
* same XID for all retransmits.
*/
static void
call_timeout(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_timeout *to = &task->tk_rqstp->rq_timeout;
 
if (xprt_adjust_timeout(to)) {
dprintk("RPC: %4d call_timeout (minor)\n", task->tk_pid);
goto retry;
}
to->to_retries = clnt->cl_timeout.to_retries;
 
dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid);
if (clnt->cl_softrtry) {
if (clnt->cl_chatty)
printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
clnt->cl_protname, clnt->cl_server);
rpc_exit(task, -EIO);
return;
}
 
if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) {
task->tk_flags |= RPC_CALL_MAJORSEEN;
printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
clnt->cl_protname, clnt->cl_server);
}
if (clnt->cl_autobind)
clnt->cl_port = 0;
 
retry:
clnt->cl_stats->rpcretrans++;
task->tk_action = call_bind;
task->tk_status = 0;
}
 
/*
* 7. Decode the RPC reply
*/
static void
call_decode(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_rqst *req = task->tk_rqstp;
kxdrproc_t decode = rpcproc_decode(clnt, task->tk_msg.rpc_proc);
u32 *p;
 
dprintk("RPC: %4d call_decode (status %d)\n",
task->tk_pid, task->tk_status);
 
if (clnt->cl_chatty && (task->tk_flags & RPC_CALL_MAJORSEEN)) {
printk(KERN_NOTICE "%s: server %s OK\n",
clnt->cl_protname, clnt->cl_server);
task->tk_flags &= ~RPC_CALL_MAJORSEEN;
}
 
if (task->tk_status < 12) {
if (!clnt->cl_softrtry) {
task->tk_action = call_transmit;
clnt->cl_stats->rpcretrans++;
goto out_retry;
}
printk(KERN_WARNING "%s: too small RPC reply size (%d bytes)\n",
clnt->cl_protname, task->tk_status);
rpc_exit(task, -EIO);
return;
}
 
/* Check that the softirq receive buffer is valid */
if (unlikely(memcmp(&req->rq_rcv_buf, &req->rq_private_buf,
sizeof(req->rq_rcv_buf)) != 0))
printk(KERN_WARNING "%s: receive buffer is inconsistent. Please contact maintainer.\n",
__FUNCTION__);
 
/* Verify the RPC header */
if (!(p = call_verify(task))) {
/*
* When call_verfiy sets tk_action to NULL (via task_exit)
* a non-retry-able error has occurred (like the server
* not supporting a particular procedure call).
*/
if (task->tk_action == NULL)
return;
goto out_retry;
}
/*
* The following is an NFS-specific hack to cater for setuid
* processes whose uid is mapped to nobody on the server.
*/
if (task->tk_client->cl_droppriv &&
(ntohl(*p) == NFSERR_ACCES || ntohl(*p) == NFSERR_PERM)) {
if (RPC_IS_SETUID(task) && task->tk_suid_retry) {
dprintk("RPC: %4d retry squashed uid\n", task->tk_pid);
task->tk_flags ^= RPC_CALL_REALUID;
task->tk_action = call_encode;
task->tk_suid_retry--;
goto out_retry;
}
}
 
task->tk_action = NULL;
 
if (decode)
task->tk_status = decode(req, p, task->tk_msg.rpc_resp);
dprintk("RPC: %4d call_decode result %d\n", task->tk_pid,
task->tk_status);
return;
out_retry:
req->rq_received = 0;
task->tk_status = 0;
}
 
/*
* 8. Refresh the credentials if rejected by the server
*/
static void
call_refresh(struct rpc_task *task)
{
dprintk("RPC: %4d call_refresh\n", task->tk_pid);
 
xprt_release(task); /* Must do to obtain new XID */
task->tk_action = call_refreshresult;
task->tk_status = 0;
task->tk_client->cl_stats->rpcauthrefresh++;
rpcauth_refreshcred(task);
}
 
/*
* 8a. Process the results of a credential refresh
*/
static void
call_refreshresult(struct rpc_task *task)
{
dprintk("RPC: %4d call_refreshresult (status %d)\n",
task->tk_pid, task->tk_status);
 
if (task->tk_status < 0)
rpc_exit(task, -EACCES);
else
task->tk_action = call_reserve;
}
 
/*
* Call header serialization
*/
static u32 *
call_header(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_xprt *xprt = clnt->cl_xprt;
struct rpc_rqst *req = task->tk_rqstp;
u32 *p = req->rq_svec[0].iov_base;
 
/* FIXME: check buffer size? */
if (xprt->stream)
*p++ = 0; /* fill in later */
*p++ = req->rq_xid; /* XID */
*p++ = htonl(RPC_CALL); /* CALL */
*p++ = htonl(RPC_VERSION); /* RPC version */
*p++ = htonl(clnt->cl_prog); /* program number */
*p++ = htonl(clnt->cl_vers); /* program version */
*p++ = htonl(task->tk_msg.rpc_proc); /* procedure */
return rpcauth_marshcred(task, p);
}
 
/*
* Reply header verification
*/
static u32 *
call_verify(struct rpc_task *task)
{
u32 *p = task->tk_rqstp->rq_rvec[0].iov_base, n;
 
p += 1; /* skip XID */
 
if ((n = ntohl(*p++)) != RPC_REPLY) {
printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n);
goto garbage;
}
if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
int error = -EACCES;
 
if ((n = ntohl(*p++)) != RPC_AUTH_ERROR) {
printk(KERN_WARNING "call_verify: RPC call rejected: %x\n", n);
} else
switch ((n = ntohl(*p++))) {
case RPC_AUTH_REJECTEDCRED:
case RPC_AUTH_REJECTEDVERF:
if (!task->tk_cred_retry)
break;
task->tk_cred_retry--;
dprintk("RPC: %4d call_verify: retry stale creds\n",
task->tk_pid);
rpcauth_invalcred(task);
task->tk_action = call_refresh;
return NULL;
case RPC_AUTH_BADCRED:
case RPC_AUTH_BADVERF:
/* possibly garbled cred/verf? */
if (!task->tk_garb_retry)
break;
task->tk_garb_retry--;
dprintk("RPC: %4d call_verify: retry garbled creds\n",
task->tk_pid);
task->tk_action = call_encode;
return NULL;
case RPC_AUTH_TOOWEAK:
printk(KERN_NOTICE "call_verify: server requires stronger "
"authentication.\n");
break;
default:
printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n);
error = -EIO;
}
dprintk("RPC: %4d call_verify: call rejected %d\n",
task->tk_pid, n);
rpc_exit(task, error);
return NULL;
}
if (!(p = rpcauth_checkverf(task, p))) {
printk(KERN_WARNING "call_verify: auth check failed\n");
goto garbage; /* bad verifier, retry */
}
switch ((n = ntohl(*p++))) {
case RPC_SUCCESS:
return p;
case RPC_PROG_UNAVAIL:
printk(KERN_WARNING "RPC: call_verify: program %u is unsupported by server %s\n",
(unsigned int)task->tk_client->cl_prog,
task->tk_client->cl_server);
goto out_eio;
case RPC_PROG_MISMATCH:
printk(KERN_WARNING "RPC: call_verify: program %u, version %u unsupported by server %s\n",
(unsigned int)task->tk_client->cl_prog,
(unsigned int)task->tk_client->cl_vers,
task->tk_client->cl_server);
goto out_eio;
case RPC_PROC_UNAVAIL:
printk(KERN_WARNING "RPC: call_verify: proc %u unsupported by program %u, version %u on server %s\n",
(unsigned int)task->tk_msg.rpc_proc,
(unsigned int)task->tk_client->cl_prog,
(unsigned int)task->tk_client->cl_vers,
task->tk_client->cl_server);
goto out_eio;
case RPC_GARBAGE_ARGS:
break; /* retry */
default:
printk(KERN_WARNING "call_verify: server accept status: %x\n", n);
/* Also retry */
}
 
garbage:
dprintk("RPC: %4d call_verify: server saw garbage\n", task->tk_pid);
task->tk_client->cl_stats->rpcgarbage++;
if (task->tk_garb_retry) {
task->tk_garb_retry--;
dprintk(KERN_WARNING "RPC: garbage, retrying %4d\n", task->tk_pid);
task->tk_action = call_encode;
return NULL;
}
printk(KERN_WARNING "RPC: garbage, exit EIO\n");
out_eio:
rpc_exit(task, -EIO);
return NULL;
}
/auth.c
0,0 → 1,376
/*
* linux/fs/nfs/rpcauth.c
*
* Generic RPC authentication API.
*
* Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
*/
 
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/socket.h>
#include <linux/sunrpc/clnt.h>
#include <linux/spinlock.h>
 
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
 
#define RPC_MAXFLAVOR 8
 
static struct rpc_authops * auth_flavors[RPC_MAXFLAVOR] = {
&authnull_ops, /* AUTH_NULL */
&authunix_ops, /* AUTH_UNIX */
NULL, /* others can be loadable modules */
};
 
int
rpcauth_register(struct rpc_authops *ops)
{
unsigned int flavor;
 
if ((flavor = ops->au_flavor) >= RPC_MAXFLAVOR)
return -EINVAL;
if (auth_flavors[flavor] != NULL)
return -EPERM; /* what else? */
auth_flavors[flavor] = ops;
return 0;
}
 
int
rpcauth_unregister(struct rpc_authops *ops)
{
unsigned int flavor;
 
if ((flavor = ops->au_flavor) >= RPC_MAXFLAVOR)
return -EINVAL;
if (auth_flavors[flavor] != ops)
return -EPERM; /* what else? */
auth_flavors[flavor] = NULL;
return 0;
}
 
struct rpc_auth *
rpcauth_create(unsigned int flavor, struct rpc_clnt *clnt)
{
struct rpc_authops *ops;
 
if (flavor >= RPC_MAXFLAVOR || !(ops = auth_flavors[flavor]))
return NULL;
clnt->cl_auth = ops->create(clnt);
return clnt->cl_auth;
}
 
void
rpcauth_destroy(struct rpc_auth *auth)
{
auth->au_ops->destroy(auth);
}
 
static spinlock_t rpc_credcache_lock = SPIN_LOCK_UNLOCKED;
 
/*
* Initialize RPC credential cache
*/
void
rpcauth_init_credcache(struct rpc_auth *auth)
{
memset(auth->au_credcache, 0, sizeof(auth->au_credcache));
auth->au_nextgc = jiffies + (auth->au_expire >> 1);
}
 
/*
* Destroy an unreferenced credential
*/
static inline void
rpcauth_crdestroy(struct rpc_cred *cred)
{
#ifdef RPC_DEBUG
if (cred->cr_magic != RPCAUTH_CRED_MAGIC)
BUG();
cred->cr_magic = 0;
if (atomic_read(&cred->cr_count) || cred->cr_auth)
BUG();
#endif
cred->cr_ops->crdestroy(cred);
}
 
/*
* Destroy a list of credentials
*/
static inline
void rpcauth_destroy_credlist(struct rpc_cred *head)
{
struct rpc_cred *cred;
 
while ((cred = head) != NULL) {
head = cred->cr_next;
rpcauth_crdestroy(cred);
}
}
 
/*
* Clear the RPC credential cache, and delete those credentials
* that are not referenced.
*/
void
rpcauth_free_credcache(struct rpc_auth *auth)
{
struct rpc_cred **q, *cred, *free = NULL;
int i;
 
spin_lock(&rpc_credcache_lock);
for (i = 0; i < RPC_CREDCACHE_NR; i++) {
q = &auth->au_credcache[i];
while ((cred = *q) != NULL) {
*q = cred->cr_next;
cred->cr_auth = NULL;
if (atomic_read(&cred->cr_count) == 0) {
cred->cr_next = free;
free = cred;
} else
cred->cr_next = NULL;
}
}
spin_unlock(&rpc_credcache_lock);
rpcauth_destroy_credlist(free);
}
 
/*
* Remove stale credentials. Avoid sleeping inside the loop.
*/
static void
rpcauth_gc_credcache(struct rpc_auth *auth)
{
struct rpc_cred **q, *cred, *free = NULL;
int i;
 
dprintk("RPC: gc'ing RPC credentials for auth %p\n", auth);
spin_lock(&rpc_credcache_lock);
for (i = 0; i < RPC_CREDCACHE_NR; i++) {
q = &auth->au_credcache[i];
while ((cred = *q) != NULL) {
if (!atomic_read(&cred->cr_count) &&
time_before(cred->cr_expire, jiffies)) {
*q = cred->cr_next;
cred->cr_auth = NULL;
cred->cr_next = free;
free = cred;
continue;
}
q = &cred->cr_next;
}
}
spin_unlock(&rpc_credcache_lock);
rpcauth_destroy_credlist(free);
auth->au_nextgc = jiffies + auth->au_expire;
}
 
/*
* Insert credential into cache
*/
void
rpcauth_insert_credcache(struct rpc_auth *auth, struct rpc_cred *cred)
{
int nr;
 
nr = (cred->cr_uid & RPC_CREDCACHE_MASK);
spin_lock(&rpc_credcache_lock);
cred->cr_next = auth->au_credcache[nr];
auth->au_credcache[nr] = cred;
cred->cr_auth = auth;
get_rpccred(cred);
spin_unlock(&rpc_credcache_lock);
}
 
/*
* Look up a process' credentials in the authentication cache
*/
static struct rpc_cred *
rpcauth_lookup_credcache(struct rpc_auth *auth, int taskflags)
{
struct rpc_cred **q, *cred = NULL;
int nr = 0;
 
if (!(taskflags & RPC_TASK_ROOTCREDS))
nr = current->uid & RPC_CREDCACHE_MASK;
 
if (time_before(auth->au_nextgc, jiffies))
rpcauth_gc_credcache(auth);
 
spin_lock(&rpc_credcache_lock);
q = &auth->au_credcache[nr];
while ((cred = *q) != NULL) {
if (!(cred->cr_flags & RPCAUTH_CRED_DEAD) &&
cred->cr_ops->crmatch(cred, taskflags)) {
*q = cred->cr_next;
break;
}
q = &cred->cr_next;
}
spin_unlock(&rpc_credcache_lock);
 
if (!cred) {
cred = auth->au_ops->crcreate(taskflags);
#ifdef RPC_DEBUG
if (cred)
cred->cr_magic = RPCAUTH_CRED_MAGIC;
#endif
}
 
if (cred)
rpcauth_insert_credcache(auth, cred);
 
return (struct rpc_cred *) cred;
}
 
/*
* Remove cred handle from cache
*/
static void
rpcauth_remove_credcache(struct rpc_cred *cred)
{
struct rpc_auth *auth = cred->cr_auth;
struct rpc_cred **q, *cr;
int nr;
 
nr = (cred->cr_uid & RPC_CREDCACHE_MASK);
q = &auth->au_credcache[nr];
while ((cr = *q) != NULL) {
if (cred == cr) {
*q = cred->cr_next;
cred->cr_next = NULL;
cred->cr_auth = NULL;
break;
}
q = &cred->cr_next;
}
}
 
struct rpc_cred *
rpcauth_lookupcred(struct rpc_auth *auth, int taskflags)
{
dprintk("RPC: looking up %s cred\n",
auth->au_ops->au_name);
return rpcauth_lookup_credcache(auth, taskflags);
}
 
struct rpc_cred *
rpcauth_bindcred(struct rpc_task *task)
{
struct rpc_auth *auth = task->tk_auth;
 
dprintk("RPC: %4d looking up %s cred\n",
task->tk_pid, task->tk_auth->au_ops->au_name);
task->tk_msg.rpc_cred = rpcauth_lookup_credcache(auth, task->tk_flags);
if (task->tk_msg.rpc_cred == 0)
task->tk_status = -ENOMEM;
return task->tk_msg.rpc_cred;
}
 
int
rpcauth_matchcred(struct rpc_auth *auth, struct rpc_cred *cred, int taskflags)
{
dprintk("RPC: matching %s cred %d\n",
auth->au_ops->au_name, taskflags);
return cred->cr_ops->crmatch(cred, taskflags);
}
 
void
rpcauth_holdcred(struct rpc_task *task)
{
dprintk("RPC: %4d holding %s cred %p\n",
task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred);
if (task->tk_msg.rpc_cred)
get_rpccred(task->tk_msg.rpc_cred);
}
 
void
put_rpccred(struct rpc_cred *cred)
{
if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock))
return;
 
if (cred->cr_auth && cred->cr_flags & RPCAUTH_CRED_DEAD)
rpcauth_remove_credcache(cred);
 
if (!cred->cr_auth) {
spin_unlock(&rpc_credcache_lock);
rpcauth_crdestroy(cred);
return;
}
cred->cr_expire = jiffies + cred->cr_auth->au_expire;
spin_unlock(&rpc_credcache_lock);
}
 
void
rpcauth_unbindcred(struct rpc_task *task)
{
struct rpc_auth *auth = task->tk_auth;
struct rpc_cred *cred = task->tk_msg.rpc_cred;
 
dprintk("RPC: %4d releasing %s cred %p\n",
task->tk_pid, auth->au_ops->au_name, cred);
 
put_rpccred(cred);
task->tk_msg.rpc_cred = NULL;
}
 
u32 *
rpcauth_marshcred(struct rpc_task *task, u32 *p)
{
struct rpc_auth *auth = task->tk_auth;
struct rpc_cred *cred = task->tk_msg.rpc_cred;
 
dprintk("RPC: %4d marshaling %s cred %p\n",
task->tk_pid, auth->au_ops->au_name, cred);
return cred->cr_ops->crmarshal(task, p,
task->tk_flags & RPC_CALL_REALUID);
}
 
u32 *
rpcauth_checkverf(struct rpc_task *task, u32 *p)
{
struct rpc_auth *auth = task->tk_auth;
struct rpc_cred *cred = task->tk_msg.rpc_cred;
 
dprintk("RPC: %4d validating %s cred %p\n",
task->tk_pid, auth->au_ops->au_name, cred);
return cred->cr_ops->crvalidate(task, p);
}
 
int
rpcauth_refreshcred(struct rpc_task *task)
{
struct rpc_auth *auth = task->tk_auth;
struct rpc_cred *cred = task->tk_msg.rpc_cred;
 
dprintk("RPC: %4d refreshing %s cred %p\n",
task->tk_pid, auth->au_ops->au_name, cred);
task->tk_status = cred->cr_ops->crrefresh(task);
return task->tk_status;
}
 
void
rpcauth_invalcred(struct rpc_task *task)
{
dprintk("RPC: %4d invalidating %s cred %p\n",
task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred);
spin_lock(&rpc_credcache_lock);
if (task->tk_msg.rpc_cred)
task->tk_msg.rpc_cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
spin_unlock(&rpc_credcache_lock);
}
 
int
rpcauth_uptodatecred(struct rpc_task *task)
{
int retval;
spin_lock(&rpc_credcache_lock);
retval = !(task->tk_msg.rpc_cred) ||
(task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE);
spin_unlock(&rpc_credcache_lock);
return retval;
}
/auth_unix.c
0,0 → 1,252
/*
* linux/net/sunrpc/rpcauth_unix.c
*
* UNIX-style authentication; no AUTH_SHORT support
*
* Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
*/
 
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/auth.h>
 
#define NFS_NGROUPS 16
struct unx_cred {
struct rpc_cred uc_base;
uid_t uc_fsuid;
gid_t uc_gid, uc_fsgid;
gid_t uc_gids[NFS_NGROUPS];
};
#define uc_uid uc_base.cr_uid
#define uc_count uc_base.cr_count
#define uc_flags uc_base.cr_flags
#define uc_expire uc_base.cr_expire
 
#define UNX_CRED_EXPIRE (60 * HZ)
 
#define UNX_WRITESLACK (21 + (UNX_MAXNODENAME >> 2))
 
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
 
static struct rpc_credops unix_credops;
 
static struct rpc_auth *
unx_create(struct rpc_clnt *clnt)
{
struct rpc_auth *auth;
 
dprintk("RPC: creating UNIX authenticator for client %p\n", clnt);
if (!(auth = (struct rpc_auth *) rpc_allocate(0, sizeof(*auth))))
return NULL;
auth->au_cslack = UNX_WRITESLACK;
auth->au_rslack = 2; /* assume AUTH_NULL verf */
auth->au_expire = UNX_CRED_EXPIRE;
auth->au_ops = &authunix_ops;
 
rpcauth_init_credcache(auth);
 
return auth;
}
 
static void
unx_destroy(struct rpc_auth *auth)
{
dprintk("RPC: destroying UNIX authenticator %p\n", auth);
rpcauth_free_credcache(auth);
rpc_free(auth);
}
 
static struct rpc_cred *
unx_create_cred(int flags)
{
struct unx_cred *cred;
int i;
 
dprintk("RPC: allocating UNIX cred for uid %d gid %d\n",
current->uid, current->gid);
 
if (!(cred = (struct unx_cred *) rpc_allocate(flags, sizeof(*cred))))
return NULL;
 
atomic_set(&cred->uc_count, 0);
cred->uc_flags = RPCAUTH_CRED_UPTODATE;
if (flags & RPC_TASK_ROOTCREDS) {
cred->uc_uid = cred->uc_fsuid = 0;
cred->uc_gid = cred->uc_fsgid = 0;
cred->uc_gids[0] = NOGROUP;
} else {
int groups = current->ngroups;
if (groups > NFS_NGROUPS)
groups = NFS_NGROUPS;
 
cred->uc_uid = current->uid;
cred->uc_gid = current->gid;
cred->uc_fsuid = current->fsuid;
cred->uc_fsgid = current->fsgid;
for (i = 0; i < groups; i++)
cred->uc_gids[i] = (gid_t) current->groups[i];
if (i < NFS_NGROUPS)
cred->uc_gids[i] = NOGROUP;
}
cred->uc_base.cr_ops = &unix_credops;
 
return (struct rpc_cred *) cred;
}
 
struct rpc_cred *
authunix_fake_cred(struct rpc_task *task, uid_t uid, gid_t gid)
{
struct unx_cred *cred;
 
dprintk("RPC: allocating fake UNIX cred for uid %d gid %d\n",
uid, gid);
 
if (!(cred = (struct unx_cred *) rpc_malloc(task, sizeof(*cred))))
return NULL;
 
atomic_set(&cred->uc_count, 1);
cred->uc_flags = RPCAUTH_CRED_DEAD|RPCAUTH_CRED_UPTODATE;
cred->uc_uid = uid;
cred->uc_gid = gid;
cred->uc_fsuid = uid;
cred->uc_fsgid = gid;
cred->uc_gids[0] = (gid_t) NOGROUP;
 
return task->tk_msg.rpc_cred = (struct rpc_cred *) cred;
}
 
static void
unx_destroy_cred(struct rpc_cred *cred)
{
rpc_free(cred);
}
 
/*
* Match credentials against current process creds.
* The root_override argument takes care of cases where the caller may
* request root creds (e.g. for NFS swapping).
*/
static int
unx_match(struct rpc_cred *rcred, int taskflags)
{
struct unx_cred *cred = (struct unx_cred *) rcred;
int i;
 
if (!(taskflags & RPC_TASK_ROOTCREDS)) {
int groups;
 
if (cred->uc_uid != current->uid
|| cred->uc_gid != current->gid
|| cred->uc_fsuid != current->fsuid
|| cred->uc_fsgid != current->fsgid)
return 0;
 
groups = current->ngroups;
if (groups > NFS_NGROUPS)
groups = NFS_NGROUPS;
for (i = 0; i < groups ; i++)
if (cred->uc_gids[i] != (gid_t) current->groups[i])
return 0;
return 1;
}
return (cred->uc_uid == 0 && cred->uc_fsuid == 0
&& cred->uc_gid == 0 && cred->uc_fsgid == 0
&& cred->uc_gids[0] == (gid_t) NOGROUP);
}
 
/*
* Marshal credentials.
* Maybe we should keep a cached credential for performance reasons.
*/
static u32 *
unx_marshal(struct rpc_task *task, u32 *p, int ruid)
{
struct rpc_clnt *clnt = task->tk_client;
struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred;
u32 *base, *hold;
int i, n;
 
*p++ = htonl(RPC_AUTH_UNIX);
base = p++;
*p++ = htonl(jiffies/HZ);
 
/*
* Copy the UTS nodename captured when the client was created.
*/
n = clnt->cl_nodelen;
*p++ = htonl(n);
memcpy(p, clnt->cl_nodename, n);
p += (n + 3) >> 2;
 
/* Note: we don't use real uid if it involves raising priviledge */
if (ruid && cred->uc_uid != 0 && cred->uc_gid != 0) {
*p++ = htonl((u32) cred->uc_uid);
*p++ = htonl((u32) cred->uc_gid);
} else {
*p++ = htonl((u32) cred->uc_fsuid);
*p++ = htonl((u32) cred->uc_fsgid);
}
hold = p++;
for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++)
*p++ = htonl((u32) cred->uc_gids[i]);
*hold = htonl(p - hold - 1); /* gid array length */
*base = htonl((p - base - 1) << 2); /* cred length */
 
*p++ = htonl(RPC_AUTH_NULL);
*p++ = htonl(0);
 
return p;
}
 
/*
* Refresh credentials. This is a no-op for AUTH_UNIX
*/
static int
unx_refresh(struct rpc_task *task)
{
task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
return task->tk_status = -EACCES;
}
 
static u32 *
unx_validate(struct rpc_task *task, u32 *p)
{
u32 n = ntohl(*p++);
 
if (n != RPC_AUTH_NULL && n != RPC_AUTH_UNIX && n != RPC_AUTH_SHORT) {
printk("RPC: bad verf flavor: %ld\n", (unsigned long) n);
return NULL;
}
if ((n = ntohl(*p++)) > 400) {
printk("RPC: giant verf size: %ld\n", (unsigned long) n);
return NULL;
}
task->tk_auth->au_rslack = (n >> 2) + 2;
p += (n >> 2);
 
return p;
}
 
struct rpc_authops authunix_ops = {
RPC_AUTH_UNIX,
#ifdef RPC_DEBUG
"UNIX",
#endif
unx_create,
unx_destroy,
unx_create_cred
};
 
static
struct rpc_credops unix_credops = {
unx_destroy_cred,
unx_match,
unx_marshal,
unx_refresh,
unx_validate
};
/svcauth_des.c
0,0 → 1,215
/*
* linux/net/sunrpc/svcauth_des.c
*
* Server-side AUTH_DES handling.
*
* Copyright (C) 1996, 1997 Olaf Kirch <okir@monad.swb.de>
*/
 
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/svcauth.h>
#include <linux/sunrpc/svcsock.h>
 
#define RPCDBG_FACILITY RPCDBG_AUTH
 
/*
* DES cedential cache.
* The cache is indexed by fullname/key to allow for multiple sessions
* by the same user from different hosts.
* It would be tempting to use the client's IP address rather than the
* conversation key as an index, but that could become problematic for
* multi-homed hosts that distribute traffic across their interfaces.
*/
struct des_cred {
struct des_cred * dc_next;
char * dc_fullname;
u32 dc_nickname;
des_cblock dc_key; /* conversation key */
des_cblock dc_xkey; /* encrypted conv. key */
des_key_schedule dc_keysched;
};
 
#define ADN_FULLNAME 0
#define ADN_NICKNAME 1
 
/*
* The default slack allowed when checking for replayed credentials
* (in milliseconds).
*/
#define DES_REPLAY_SLACK 2000
 
/*
* Make sure we don't place more than one call to the key server at
* a time.
*/
static int in_keycall = 0;
 
#define FAIL(err) \
{ if (data) put_cred(data); \
*authp = rpc_autherr_##err; \
return; \
}
 
void
svcauth_des(struct svc_rqst *rqstp, u32 *statp, u32 *authp)
{
struct svc_buf *argp = &rqstp->rq_argbuf;
struct svc_buf *resp = &rqstp->rq_resbuf;
struct svc_cred *cred = &rqstp->rq_cred;
struct des_cred *data = NULL;
u32 cryptkey[2];
u32 cryptbuf[4];
u32 *p = argp->buf;
int len = argp->len, slen, i;
 
*authp = rpc_auth_ok;
 
if ((argp->len -= 3) < 0) {
*statp = rpc_garbage_args;
return;
}
 
p++; /* skip length field */
namekind = ntohl(*p++); /* fullname/nickname */
 
/* Get the credentials */
if (namekind == ADN_NICKNAME) {
/* If we can't find the cached session key, initiate a
* new session. */
if (!(data = get_cred_bynick(*p++)))
FAIL(rejectedcred);
} else if (namekind == ADN_FULLNAME) {
p = xdr_decode_string(p, &fullname, &len, RPC_MAXNETNAMELEN);
if (p == NULL)
FAIL(badcred);
cryptkey[0] = *p++; /* get the encrypted key */
cryptkey[1] = *p++;
cryptbuf[2] = *p++; /* get the encrypted window */
} else {
FAIL(badcred);
}
 
/* If we're just updating the key, silently discard the request. */
if (data && data->dc_locked) {
*authp = rpc_autherr_dropit;
_put_cred(data); /* release but don't unlock */
return;
}
 
/* Get the verifier flavor and length */
if (ntohl(*p++) != RPC_AUTH_DES && ntohl(*p++) != 12)
FAIL(badverf);
 
cryptbuf[0] = *p++; /* encrypted time stamp */
cryptbuf[1] = *p++;
cryptbuf[3] = *p++; /* 0 or window - 1 */
 
if (namekind == ADN_NICKNAME) {
status = des_ecb_encrypt((des_block *) cryptbuf,
(des_block *) cryptbuf,
data->dc_keysched, DES_DECRYPT);
} else {
/* We first have to decrypt the new session key and
* fill in the UNIX creds. */
if (!(data = get_cred_byname(rqstp, authp, fullname, cryptkey)))
return;
status = des_cbc_encrypt((des_cblock *) cryptbuf,
(des_cblock *) cryptbuf, 16,
data->dc_keysched,
(des_cblock *) &ivec,
DES_DECRYPT);
}
if (status) {
printk("svcauth_des: DES decryption failed (status %d)\n",
status);
FAIL(badverf);
}
 
/* Now check the whole lot */
if (namekind == ADN_FULLNAME) {
unsigned long winverf;
 
data->dc_window = ntohl(cryptbuf[2]);
winverf = ntohl(cryptbuf[2]);
if (window != winverf - 1) {
printk("svcauth_des: bad window verifier!\n");
FAIL(badverf);
}
}
 
/* XDR the decrypted timestamp */
cryptbuf[0] = ntohl(cryptbuf[0]);
cryptbuf[1] = ntohl(cryptbuf[1]);
if (cryptbuf[1] > 1000000) {
dprintk("svcauth_des: bad usec value %u\n", cryptbuf[1]);
if (namekind == ADN_NICKNAME)
FAIL(rejectedverf);
FAIL(badverf);
}
/*
* Check for replayed credentials. We must allow for reordering
* of requests by the network, and the OS scheduler, hence we
* cannot expect timestamps to be increasing monotonically.
* This opens a small security hole, therefore the replay_slack
* value shouldn't be too large.
*/
if ((delta = cryptbuf[0] - data->dc_timestamp[0]) <= 0) {
switch (delta) {
case -1:
delta = -1000000;
case 0:
delta += cryptbuf[1] - data->dc_timestamp[1];
break;
default:
delta = -1000000;
}
if (delta < DES_REPLAY_SLACK)
FAIL(rejectedverf);
#ifdef STRICT_REPLAY_CHECKS
/* TODO: compare time stamp to last five timestamps cached
* and reject (drop?) request if a match is found. */
#endif
}
 
now = xtime;
now.tv_secs -= data->dc_window;
if (now.tv_secs < cryptbuf[0] ||
(now.tv_secs == cryptbuf[0] && now.tv_usec < cryptbuf[1]))
FAIL(rejectedverf);
 
/* Okay, we're done. Update the lot */
if (namekind == ADN_FULLNAME)
data->dc_valid = 1;
data->dc_timestamp[0] = cryptbuf[0];
data->dc_timestamp[1] = cryptbuf[1];
 
put_cred(data);
return;
garbage:
*statp = rpc_garbage_args;
return;
}
 
/*
* Call the keyserver to obtain the decrypted conversation key and
* UNIX creds. We use a Linux-specific keycall extension that does
* both things in one go.
*/
static struct des_cred *
get_cred_byname(struct svc_rqst *rqstp, u32 *authp, char *fullname, u32 *cryptkey)
{
static int in_keycall = 0;
struct des_cred *cred;
 
if (in_keycall) {
*authp = rpc_autherr_dropit;
return NULL;
}
in_keycall = 1;
in_keycall = 0;
return cred;
}
/svcsock.c
0,0 → 1,1343
/*
* linux/net/sunrpc/svcsock.c
*
* These are the RPC server socket internals.
*
* The server scheduling algorithm does not always distribute the load
* evenly when servicing a single client. May need to modify the
* svc_sock_enqueue procedure...
*
* TCP support is largely untested and may be a little slow. The problem
* is that we currently do two separate recvfrom's, one for the 4-byte
* record length, and the second for the actual record. This could possibly
* be improved by always reading a minimum size of around 100 bytes and
* tucking any superfluous bytes away in a temporary store. Still, that
* leaves write requests out in the rain. An alternative may be to peek at
* the first skb in the queue, and if it matches the next TCP sequence
* number, to extract the record marker. Yuck.
*
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/
 
#include <linux/sched.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
#include <linux/net.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/udp.h>
#include <linux/version.h>
#include <linux/unistd.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/checksum.h>
#include <net/ip.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
 
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/stats.h>
 
/* SMP locking strategy:
*
* svc_serv->sv_lock protects most stuff for that service.
*
* Some flags can be set to certain values at any time
* providing that certain rules are followed:
*
* SK_BUSY can be set to 0 at any time.
* svc_sock_enqueue must be called afterwards
* SK_CONN, SK_DATA, can be set or cleared at any time.
* after a set, svc_sock_enqueue must be called.
* after a clear, the socket must be read/accepted
* if this succeeds, it must be set again.
* SK_CLOSE can set at any time. It is never cleared.
*
*/
 
#define RPCDBG_FACILITY RPCDBG_SVCSOCK
 
 
static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
int *errp, int pmap_reg);
static void svc_udp_data_ready(struct sock *, int);
static int svc_udp_recvfrom(struct svc_rqst *);
static int svc_udp_sendto(struct svc_rqst *);
 
 
/*
* Queue up an idle server thread. Must have serv->sv_lock held.
* Note: this is really a stack rather than a queue, so that we only
* use as many different threads as we need, and the rest don't polute
* the cache.
*/
static inline void
svc_serv_enqueue(struct svc_serv *serv, struct svc_rqst *rqstp)
{
list_add(&rqstp->rq_list, &serv->sv_threads);
}
 
/*
* Dequeue an nfsd thread. Must have serv->sv_lock held.
*/
static inline void
svc_serv_dequeue(struct svc_serv *serv, struct svc_rqst *rqstp)
{
list_del(&rqstp->rq_list);
}
 
/*
* Release an skbuff after use
*/
static inline void
svc_release_skb(struct svc_rqst *rqstp)
{
struct sk_buff *skb = rqstp->rq_skbuff;
 
if (!skb)
return;
rqstp->rq_skbuff = NULL;
 
dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
skb_free_datagram(rqstp->rq_sock->sk_sk, skb);
}
 
/*
* Queue up a socket with data pending. If there are idle nfsd
* processes, wake 'em up.
*
*/
static void
svc_sock_enqueue(struct svc_sock *svsk)
{
struct svc_serv *serv = svsk->sk_server;
struct svc_rqst *rqstp;
 
if (!(svsk->sk_flags &
( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)) ))
return;
if (test_bit(SK_DEAD, &svsk->sk_flags))
return;
 
spin_lock_bh(&serv->sv_lock);
 
if (!list_empty(&serv->sv_threads) &&
!list_empty(&serv->sv_sockets))
printk(KERN_ERR
"svc_sock_enqueue: threads and sockets both waiting??\n");
 
if (test_bit(SK_BUSY, &svsk->sk_flags)) {
/* Don't enqueue socket while daemon is receiving */
dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk);
goto out_unlock;
}
 
if (((svsk->sk_reserved + serv->sv_bufsz)*2
> sock_wspace(svsk->sk_sk))
&& !test_bit(SK_CLOSE, &svsk->sk_flags)
&& !test_bit(SK_CONN, &svsk->sk_flags)) {
/* Don't enqueue while not enough space for reply */
dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n",
svsk->sk_sk, svsk->sk_reserved+serv->sv_bufsz,
sock_wspace(svsk->sk_sk));
goto out_unlock;
}
 
/* Mark socket as busy. It will remain in this state until the
* server has processed all pending data and put the socket back
* on the idle list.
*/
set_bit(SK_BUSY, &svsk->sk_flags);
 
if (!list_empty(&serv->sv_threads)) {
rqstp = list_entry(serv->sv_threads.next,
struct svc_rqst,
rq_list);
dprintk("svc: socket %p served by daemon %p\n",
svsk->sk_sk, rqstp);
svc_serv_dequeue(serv, rqstp);
if (rqstp->rq_sock)
printk(KERN_ERR
"svc_sock_enqueue: server %p, rq_sock=%p!\n",
rqstp, rqstp->rq_sock);
rqstp->rq_sock = svsk;
svsk->sk_inuse++;
rqstp->rq_reserved = serv->sv_bufsz;
svsk->sk_reserved += rqstp->rq_reserved;
wake_up(&rqstp->rq_wait);
} else {
dprintk("svc: socket %p put into queue\n", svsk->sk_sk);
list_add_tail(&svsk->sk_ready, &serv->sv_sockets);
set_bit(SK_QUED, &svsk->sk_flags);
}
 
out_unlock:
spin_unlock_bh(&serv->sv_lock);
}
 
/*
* Dequeue the first socket. Must be called with the serv->sv_lock held.
*/
static inline struct svc_sock *
svc_sock_dequeue(struct svc_serv *serv)
{
struct svc_sock *svsk;
 
if (list_empty(&serv->sv_sockets))
return NULL;
 
svsk = list_entry(serv->sv_sockets.next,
struct svc_sock, sk_ready);
list_del(&svsk->sk_ready);
 
dprintk("svc: socket %p dequeued, inuse=%d\n",
svsk->sk_sk, svsk->sk_inuse);
clear_bit(SK_QUED, &svsk->sk_flags);
 
return svsk;
}
 
/*
* Having read something from a socket, check whether it
* needs to be re-enqueued.
* Note: SK_DATA only gets cleared when a read-attempt finds
* no (or insufficient) data.
*/
static inline void
svc_sock_received(struct svc_sock *svsk)
{
clear_bit(SK_BUSY, &svsk->sk_flags);
svc_sock_enqueue(svsk);
}
 
 
/**
* svc_reserve - change the space reserved for the reply to a request.
* @rqstp: The request in question
* @space: new max space to reserve
*
* Each request reserves some space on the output queue of the socket
* to make sure the reply fits. This function reduces that reserved
* space to be the amount of space used already, plus @space.
*
*/
void svc_reserve(struct svc_rqst *rqstp, int space)
{
space += rqstp->rq_resbuf.len<<2;
 
if (space < rqstp->rq_reserved) {
struct svc_sock *svsk = rqstp->rq_sock;
spin_lock_bh(&svsk->sk_server->sv_lock);
svsk->sk_reserved -= (rqstp->rq_reserved - space);
rqstp->rq_reserved = space;
spin_unlock_bh(&svsk->sk_server->sv_lock);
 
svc_sock_enqueue(svsk);
}
}
 
/*
* Release a socket after use.
*/
static inline void
svc_sock_put(struct svc_sock *svsk)
{
struct svc_serv *serv = svsk->sk_server;
 
spin_lock_bh(&serv->sv_lock);
if (!--(svsk->sk_inuse) && test_bit(SK_DEAD, &svsk->sk_flags)) {
spin_unlock_bh(&serv->sv_lock);
dprintk("svc: releasing dead socket\n");
sock_release(svsk->sk_sock);
kfree(svsk);
}
else
spin_unlock_bh(&serv->sv_lock);
}
 
static void
svc_sock_release(struct svc_rqst *rqstp)
{
struct svc_sock *svsk = rqstp->rq_sock;
 
svc_release_skb(rqstp);
 
/* Reset response buffer and release
* the reservation.
* But first, check that enough space was reserved
* for the reply, otherwise we have a bug!
*/
if ((rqstp->rq_resbuf.len<<2) > rqstp->rq_reserved)
printk(KERN_ERR "RPC request reserved %d but used %d\n",
rqstp->rq_reserved,
rqstp->rq_resbuf.len<<2);
 
rqstp->rq_resbuf.buf = rqstp->rq_resbuf.base;
rqstp->rq_resbuf.len = 0;
svc_reserve(rqstp, 0);
rqstp->rq_sock = NULL;
 
svc_sock_put(svsk);
}
 
/*
* External function to wake up a server waiting for data
*/
void
svc_wake_up(struct svc_serv *serv)
{
struct svc_rqst *rqstp;
 
spin_lock_bh(&serv->sv_lock);
if (!list_empty(&serv->sv_threads)) {
rqstp = list_entry(serv->sv_threads.next,
struct svc_rqst,
rq_list);
dprintk("svc: daemon %p woken up.\n", rqstp);
/*
svc_serv_dequeue(serv, rqstp);
rqstp->rq_sock = NULL;
*/
wake_up(&rqstp->rq_wait);
}
spin_unlock_bh(&serv->sv_lock);
}
 
/*
* Generic sendto routine
*/
static int
svc_sendto(struct svc_rqst *rqstp, struct iovec *iov, int nr)
{
mm_segment_t oldfs;
struct svc_sock *svsk = rqstp->rq_sock;
struct socket *sock = svsk->sk_sock;
struct msghdr msg;
char buffer[CMSG_SPACE(sizeof(struct in_pktinfo))];
struct cmsghdr *cmh = (struct cmsghdr *)buffer;
struct in_pktinfo *pki = (struct in_pktinfo *)CMSG_DATA(cmh);
int i, buflen, len;
 
for (i = buflen = 0; i < nr; i++)
buflen += iov[i].iov_len;
 
msg.msg_name = &rqstp->rq_addr;
msg.msg_namelen = sizeof(rqstp->rq_addr);
msg.msg_iov = iov;
msg.msg_iovlen = nr;
if (rqstp->rq_prot == IPPROTO_UDP) {
msg.msg_control = cmh;
msg.msg_controllen = sizeof(buffer);
cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
cmh->cmsg_level = SOL_IP;
cmh->cmsg_type = IP_PKTINFO;
pki->ipi_ifindex = 0;
pki->ipi_spec_dst.s_addr = rqstp->rq_daddr;
} else {
msg.msg_control = NULL;
msg.msg_controllen = 0;
}
 
/* This was MSG_DONTWAIT, but I now want it to wait.
* The only thing that it would wait for is memory and
* if we are fairly low on memory, then we aren't likely
* to make much progress anyway.
* sk->sndtimeo is set to 30seconds just in case.
*/
msg.msg_flags = 0;
 
oldfs = get_fs(); set_fs(KERNEL_DS);
len = sock_sendmsg(sock, &msg, buflen);
set_fs(oldfs);
 
dprintk("svc: socket %p sendto([%p %Zu... ], %d, %d) = %d\n",
rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, nr, buflen, len);
 
return len;
}
 
/*
* Check input queue length
*/
static int
svc_recv_available(struct svc_sock *svsk)
{
mm_segment_t oldfs;
struct socket *sock = svsk->sk_sock;
int avail, err;
 
oldfs = get_fs(); set_fs(KERNEL_DS);
err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail);
set_fs(oldfs);
 
return (err >= 0)? avail : err;
}
 
/*
* Generic recvfrom routine.
*/
static int
svc_recvfrom(struct svc_rqst *rqstp, struct iovec *iov, int nr, int buflen)
{
mm_segment_t oldfs;
struct msghdr msg;
struct socket *sock;
int len, alen;
 
rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
sock = rqstp->rq_sock->sk_sock;
 
msg.msg_name = &rqstp->rq_addr;
msg.msg_namelen = sizeof(rqstp->rq_addr);
msg.msg_iov = iov;
msg.msg_iovlen = nr;
msg.msg_control = NULL;
msg.msg_controllen = 0;
 
msg.msg_flags = MSG_DONTWAIT;
 
oldfs = get_fs(); set_fs(KERNEL_DS);
len = sock_recvmsg(sock, &msg, buflen, MSG_DONTWAIT);
set_fs(oldfs);
 
/* sock_recvmsg doesn't fill in the name/namelen, so we must..
* possibly we should cache this in the svc_sock structure
* at accept time. FIXME
*/
alen = sizeof(rqstp->rq_addr);
sock->ops->getname(sock, (struct sockaddr *)&rqstp->rq_addr, &alen, 1);
 
dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len);
 
return len;
}
 
/*
* Set socket snd and rcv buffer lengths
*/
static inline void
svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv)
{
#if 0
mm_segment_t oldfs;
oldfs = get_fs(); set_fs(KERNEL_DS);
sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
(char*)&snd, sizeof(snd));
sock_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
(char*)&rcv, sizeof(rcv));
#else
/* sock_setsockopt limits use to sysctl_?mem_max,
* which isn't acceptable. Until that is made conditional
* on not having CAP_SYS_RESOURCE or similar, we go direct...
* DaveM said I could!
*/
lock_sock(sock->sk);
sock->sk->sndbuf = snd * 2;
sock->sk->rcvbuf = rcv * 2;
sock->sk->userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
release_sock(sock->sk);
#endif
}
/*
* INET callback when data has been received on the socket.
*/
static void
svc_udp_data_ready(struct sock *sk, int count)
{
struct svc_sock *svsk = (struct svc_sock *)(sk->user_data);
 
if (!svsk)
goto out;
dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n",
svsk, sk, count, test_bit(SK_BUSY, &svsk->sk_flags));
set_bit(SK_DATA, &svsk->sk_flags);
svc_sock_enqueue(svsk);
out:
if (sk->sleep && waitqueue_active(sk->sleep))
wake_up_interruptible(sk->sleep);
}
 
/*
* INET callback when space is newly available on the socket.
*/
static void
svc_write_space(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)(sk->user_data);
 
if (svsk) {
dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
svsk, sk, test_bit(SK_BUSY, &svsk->sk_flags));
svc_sock_enqueue(svsk);
}
 
if (sk->sleep && waitqueue_active(sk->sleep))
wake_up_interruptible(sk->sleep);
}
 
/*
* Receive a datagram from a UDP socket.
*/
static int
svc_udp_recvfrom(struct svc_rqst *rqstp)
{
struct svc_sock *svsk = rqstp->rq_sock;
struct svc_serv *serv = svsk->sk_server;
struct sk_buff *skb;
u32 *data;
int err, len;
 
if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
/* udp sockets need large rcvbuf as all pending
* requests are still in that buffer. sndbuf must
* also be large enough that there is enough space
* for one reply per thread.
*/
svc_sock_setbufsize(svsk->sk_sock,
(serv->sv_nrthreads+3)* serv->sv_bufsz,
(serv->sv_nrthreads+3)* serv->sv_bufsz);
 
clear_bit(SK_DATA, &svsk->sk_flags);
while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
svc_sock_received(svsk);
if (err == -EAGAIN)
return err;
/* possibly an icmp error */
dprintk("svc: recvfrom returned error %d\n", -err);
}
set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */
 
/* Sorry. */
if (skb_is_nonlinear(skb)) {
if (skb_linearize(skb, GFP_KERNEL) != 0) {
kfree_skb(skb);
svc_sock_received(svsk);
return 0;
}
}
 
if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
skb_free_datagram(svsk->sk_sk, skb);
svc_sock_received(svsk);
return 0;
}
}
 
 
len = skb->len - sizeof(struct udphdr);
data = (u32 *) (skb->data + sizeof(struct udphdr));
 
rqstp->rq_skbuff = skb;
rqstp->rq_argbuf.base = data;
rqstp->rq_argbuf.buf = data;
rqstp->rq_argbuf.len = (len >> 2);
/* rqstp->rq_resbuf = rqstp->rq_defbuf; */
rqstp->rq_prot = IPPROTO_UDP;
 
/* Get sender address */
rqstp->rq_addr.sin_family = AF_INET;
rqstp->rq_addr.sin_port = skb->h.uh->source;
rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr;
rqstp->rq_daddr = skb->nh.iph->daddr;
 
if (serv->sv_stats)
serv->sv_stats->netudpcnt++;
 
/* One down, maybe more to go... */
svsk->sk_sk->stamp = skb->stamp;
svc_sock_received(svsk);
 
return len;
}
 
static int
svc_udp_sendto(struct svc_rqst *rqstp)
{
struct svc_buf *bufp = &rqstp->rq_resbuf;
int error;
 
/* Set up the first element of the reply iovec.
* Any other iovecs that may be in use have been taken
* care of by the server implementation itself.
*/
/* bufp->base = bufp->area; */
bufp->iov[0].iov_base = bufp->base;
bufp->iov[0].iov_len = bufp->len << 2;
 
error = svc_sendto(rqstp, bufp->iov, bufp->nriov);
if (error == -ECONNREFUSED)
/* ICMP error on earlier request. */
error = svc_sendto(rqstp, bufp->iov, bufp->nriov);
 
return error;
}
 
static int
svc_udp_init(struct svc_sock *svsk)
{
svsk->sk_sk->data_ready = svc_udp_data_ready;
svsk->sk_sk->write_space = svc_write_space;
svsk->sk_recvfrom = svc_udp_recvfrom;
svsk->sk_sendto = svc_udp_sendto;
 
/* initialise setting must have enough space to
* receive and respond to one request.
* svc_udp_recvfrom will re-adjust if necessary
*/
svc_sock_setbufsize(svsk->sk_sock,
3 * svsk->sk_server->sv_bufsz,
3 * svsk->sk_server->sv_bufsz);
 
set_bit(SK_CHNGBUF, &svsk->sk_flags);
 
return 0;
}
 
/*
* A data_ready event on a listening socket means there's a connection
* pending. Do not use state_change as a substitute for it.
*/
static void
svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
{
struct svc_sock *svsk;
 
dprintk("svc: socket %p TCP (listen) state change %d\n",
sk, sk->state);
 
if (sk->state != TCP_LISTEN) {
/*
* This callback may called twice when a new connection
* is established as a child socket inherits everything
* from a parent LISTEN socket.
* 1) data_ready method of the parent socket will be called
* when one of child sockets become ESTABLISHED.
* 2) data_ready method of the child socket may be called
* when it receives data before the socket is accepted.
* In case of 2, we should ignore it silently.
*/
goto out;
}
if (!(svsk = (struct svc_sock *) sk->user_data)) {
printk("svc: socket %p: no user data\n", sk);
goto out;
}
set_bit(SK_CONN, &svsk->sk_flags);
svc_sock_enqueue(svsk);
out:
if (sk->sleep && waitqueue_active(sk->sleep))
wake_up_interruptible_all(sk->sleep);
}
 
/*
* A state change on a connected socket means it's dying or dead.
*/
static void
svc_tcp_state_change(struct sock *sk)
{
struct svc_sock *svsk;
 
dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
sk, sk->state, sk->user_data);
 
if (!(svsk = (struct svc_sock *) sk->user_data)) {
printk("svc: socket %p: no user data\n", sk);
goto out;
}
set_bit(SK_CLOSE, &svsk->sk_flags);
svc_sock_enqueue(svsk);
out:
if (sk->sleep && waitqueue_active(sk->sleep))
wake_up_interruptible_all(sk->sleep);
}
 
static void
svc_tcp_data_ready(struct sock *sk, int count)
{
struct svc_sock * svsk;
 
dprintk("svc: socket %p TCP data ready (svsk %p)\n",
sk, sk->user_data);
if (!(svsk = (struct svc_sock *)(sk->user_data)))
goto out;
set_bit(SK_DATA, &svsk->sk_flags);
svc_sock_enqueue(svsk);
out:
if (sk->sleep && waitqueue_active(sk->sleep))
wake_up_interruptible(sk->sleep);
}
 
/*
* Accept a TCP connection
*/
static void
svc_tcp_accept(struct svc_sock *svsk)
{
struct sockaddr_in sin;
struct svc_serv *serv = svsk->sk_server;
struct socket *sock = svsk->sk_sock;
struct socket *newsock;
struct proto_ops *ops;
struct svc_sock *newsvsk;
int err, slen;
 
dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
if (!sock)
return;
 
if (!(newsock = sock_alloc())) {
printk(KERN_WARNING "%s: no more sockets!\n", serv->sv_name);
return;
}
dprintk("svc: tcp_accept %p allocated\n", newsock);
 
newsock->type = sock->type;
newsock->ops = ops = sock->ops;
 
clear_bit(SK_CONN, &svsk->sk_flags);
if ((err = ops->accept(sock, newsock, O_NONBLOCK)) < 0) {
if (err != -EAGAIN && net_ratelimit())
printk(KERN_WARNING "%s: accept failed (err %d)!\n",
serv->sv_name, -err);
goto failed; /* aborted connection or whatever */
}
set_bit(SK_CONN, &svsk->sk_flags);
svc_sock_enqueue(svsk);
 
slen = sizeof(sin);
err = ops->getname(newsock, (struct sockaddr *) &sin, &slen, 1);
if (err < 0) {
if (net_ratelimit())
printk(KERN_WARNING "%s: peername failed (err %d)!\n",
serv->sv_name, -err);
goto failed; /* aborted connection or whatever */
}
 
/* Ideally, we would want to reject connections from unauthorized
* hosts here, but when we get encription, the IP of the host won't
* tell us anything. For now just warn about unpriv connections.
*/
if (ntohs(sin.sin_port) >= 1024) {
dprintk(KERN_WARNING
"%s: connect from unprivileged port: %u.%u.%u.%u:%d\n",
serv->sv_name,
NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
}
 
dprintk("%s: connect from %u.%u.%u.%u:%04x\n", serv->sv_name,
NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
 
if (!(newsvsk = svc_setup_socket(serv, newsock, &err, 0)))
goto failed;
 
/* make sure that a write doesn't block forever when
* low on memory
*/
newsock->sk->sndtimeo = HZ*30;
 
/* Precharge. Data may have arrived on the socket before we
* installed the data_ready callback.
*/
set_bit(SK_DATA, &newsvsk->sk_flags);
svc_sock_enqueue(newsvsk);
 
/* make sure that we don't have too many active connections.
* If we have, something must be dropped.
* We randomly choose between newest and oldest (in terms
* of recent activity) and drop it.
*/
if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*10) {
struct svc_sock *svsk = NULL;
spin_lock_bh(&serv->sv_lock);
if (!list_empty(&serv->sv_tempsocks)) {
if (net_random()&1)
svsk = list_entry(serv->sv_tempsocks.prev,
struct svc_sock,
sk_list);
else
svsk = list_entry(serv->sv_tempsocks.next,
struct svc_sock,
sk_list);
set_bit(SK_CLOSE, &svsk->sk_flags);
svsk->sk_inuse ++;
}
spin_unlock_bh(&serv->sv_lock);
 
if (svsk) {
svc_sock_enqueue(svsk);
svc_sock_put(svsk);
}
 
}
 
if (serv->sv_stats)
serv->sv_stats->nettcpconn++;
 
return;
 
failed:
sock_release(newsock);
return;
}
 
/*
* Receive data from a TCP socket.
*/
static int
svc_tcp_recvfrom(struct svc_rqst *rqstp)
{
struct svc_sock *svsk = rqstp->rq_sock;
struct svc_serv *serv = svsk->sk_server;
struct svc_buf *bufp = &rqstp->rq_argbuf;
int len;
 
dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
svsk, test_bit(SK_DATA, &svsk->sk_flags),
test_bit(SK_CONN, &svsk->sk_flags),
test_bit(SK_CLOSE, &svsk->sk_flags));
 
if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
svc_delete_socket(svsk);
return 0;
}
 
if (test_bit(SK_CONN, &svsk->sk_flags)) {
svc_tcp_accept(svsk);
svc_sock_received(svsk);
return 0;
}
 
if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
/* sndbuf needs to have room for one request
* per thread, otherwise we can stall even when the
* network isn't a bottleneck.
* rcvbuf just needs to be able to hold a few requests.
* Normally they will be removed from the queue
* as soon as a complete request arrives.
*/
svc_sock_setbufsize(svsk->sk_sock,
(serv->sv_nrthreads+3) *
serv->sv_bufsz,
3 * serv->sv_bufsz);
 
clear_bit(SK_DATA, &svsk->sk_flags);
 
/* Receive data. If we haven't got the record length yet, get
* the next four bytes. Otherwise try to gobble up as much as
* possible up to the complete record length.
*/
if (svsk->sk_tcplen < 4) {
unsigned long want = 4 - svsk->sk_tcplen;
struct iovec iov;
 
iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
iov.iov_len = want;
if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0)
goto error;
svsk->sk_tcplen += len;
if (len < want) {
dprintk("svc: short recvfrom while reading record length (%d of %ld)\n",
len, want);
svc_sock_received(svsk);
return -EAGAIN; /* record header not complete */
}
 
svsk->sk_reclen = ntohl(svsk->sk_reclen);
if (!(svsk->sk_reclen & 0x80000000)) {
/* FIXME: technically, a record can be fragmented,
* and non-terminal fragments will not have the top
* bit set in the fragment length header.
* But apparently no known nfs clients send fragmented
* records. */
printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (non-terminal)\n",
(unsigned long) svsk->sk_reclen);
goto err_delete;
}
svsk->sk_reclen &= 0x7fffffff;
dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
if (svsk->sk_reclen > (bufp->buflen<<2)) {
printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (large)\n",
(unsigned long) svsk->sk_reclen);
goto err_delete;
}
}
 
/* Check whether enough data is available */
len = svc_recv_available(svsk);
if (len < 0)
goto error;
 
if (len < svsk->sk_reclen) {
dprintk("svc: incomplete TCP record (%d of %d)\n",
len, svsk->sk_reclen);
svc_sock_received(svsk);
return -EAGAIN; /* record not complete */
}
set_bit(SK_DATA, &svsk->sk_flags);
 
/* Frob argbuf */
bufp->iov[0].iov_base += 4;
bufp->iov[0].iov_len -= 4;
 
/* Now receive data */
len = svc_recvfrom(rqstp, bufp->iov, bufp->nriov, svsk->sk_reclen);
if (len < 0)
goto error;
 
dprintk("svc: TCP complete record (%d bytes)\n", len);
 
/* Position reply write pointer immediately after
* record length */
rqstp->rq_resbuf.buf += 1;
rqstp->rq_resbuf.len = 1;
 
rqstp->rq_skbuff = 0;
rqstp->rq_argbuf.buf += 1;
rqstp->rq_argbuf.len = (len >> 2);
rqstp->rq_prot = IPPROTO_TCP;
 
/* Reset TCP read info */
svsk->sk_reclen = 0;
svsk->sk_tcplen = 0;
 
svc_sock_received(svsk);
if (serv->sv_stats)
serv->sv_stats->nettcpcnt++;
 
return len;
 
err_delete:
svc_delete_socket(svsk);
return -EAGAIN;
 
error:
if (len == -EAGAIN) {
dprintk("RPC: TCP recvfrom got EAGAIN\n");
svc_sock_received(svsk);
} else {
printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
svsk->sk_server->sv_name, -len);
svc_sock_received(svsk);
}
 
return len;
}
 
/*
* Send out data on TCP socket.
*/
static int
svc_tcp_sendto(struct svc_rqst *rqstp)
{
struct svc_buf *bufp = &rqstp->rq_resbuf;
int sent;
 
/* Set up the first element of the reply iovec.
* Any other iovecs that may be in use have been taken
* care of by the server implementation itself.
*/
bufp->iov[0].iov_base = bufp->base;
bufp->iov[0].iov_len = bufp->len << 2;
bufp->base[0] = htonl(0x80000000|((bufp->len << 2) - 4));
 
if (test_bit(SK_DEAD, &rqstp->rq_sock->sk_flags))
return -ENOTCONN;
 
sent = svc_sendto(rqstp, bufp->iov, bufp->nriov);
if (sent != bufp->len<<2) {
printk(KERN_NOTICE "rpc-srv/tcp: %s: sent only %d bytes of %d - shutting down socket\n",
rqstp->rq_sock->sk_server->sv_name,
sent, bufp->len << 2);
svc_delete_socket(rqstp->rq_sock);
sent = -EAGAIN;
}
return sent;
}
 
static int
svc_tcp_init(struct svc_sock *svsk)
{
struct sock *sk = svsk->sk_sk;
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 
svsk->sk_recvfrom = svc_tcp_recvfrom;
svsk->sk_sendto = svc_tcp_sendto;
 
if (sk->state == TCP_LISTEN) {
dprintk("setting up TCP socket for listening\n");
sk->data_ready = svc_tcp_listen_data_ready;
} else {
dprintk("setting up TCP socket for reading\n");
sk->state_change = svc_tcp_state_change;
sk->data_ready = svc_tcp_data_ready;
sk->write_space = svc_write_space;
 
svsk->sk_reclen = 0;
svsk->sk_tcplen = 0;
 
tp->nonagle = 1; /* disable Nagle's algorithm */
 
/* initialise setting must have enough space to
* receive and respond to one request.
* svc_tcp_recvfrom will re-adjust if necessary
*/
svc_sock_setbufsize(svsk->sk_sock,
3 * svsk->sk_server->sv_bufsz,
3 * svsk->sk_server->sv_bufsz);
 
set_bit(SK_CHNGBUF, &svsk->sk_flags);
if (sk->state != TCP_ESTABLISHED)
set_bit(SK_CLOSE, &svsk->sk_flags);
}
 
return 0;
}
 
void
svc_sock_update_bufs(struct svc_serv *serv)
{
/*
* The number of server threads has changed.
* flag all socket to the snd/rcv buffer sizes
* updated.
* We don't just do it, as the locking is rather
* awkward at this point
*/
struct list_head *le;
 
spin_lock_bh(&serv->sv_lock);
list_for_each(le, &serv->sv_permsocks) {
struct svc_sock *svsk =
list_entry(le, struct svc_sock, sk_list);
set_bit(SK_CHNGBUF, &svsk->sk_flags);
}
list_for_each(le, &serv->sv_tempsocks) {
struct svc_sock *svsk =
list_entry(le, struct svc_sock, sk_list);
set_bit(SK_CHNGBUF, &svsk->sk_flags);
}
spin_unlock_bh(&serv->sv_lock);
}
 
/*
* Receive the next request on any socket.
*/
int
svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout)
{
struct svc_sock *svsk =NULL;
int len;
DECLARE_WAITQUEUE(wait, current);
 
dprintk("svc: server %p waiting for data (to = %ld)\n",
rqstp, timeout);
 
if (rqstp->rq_sock)
printk(KERN_ERR
"svc_recv: service %p, socket not NULL!\n",
rqstp);
if (waitqueue_active(&rqstp->rq_wait))
printk(KERN_ERR
"svc_recv: service %p, wait queue active!\n",
rqstp);
 
/* Initialize the buffers */
rqstp->rq_argbuf = rqstp->rq_defbuf;
rqstp->rq_resbuf = rqstp->rq_defbuf;
 
if (signalled())
return -EINTR;
 
spin_lock_bh(&serv->sv_lock);
if (!list_empty(&serv->sv_tempsocks)) {
svsk = list_entry(serv->sv_tempsocks.next,
struct svc_sock, sk_list);
/* apparently the "standard" is that clients close
* idle connections after 5 minutes, servers after
* 6 minutes
* http://www.connectathon.org/talks96/nfstcp.pdf
*/
if (CURRENT_TIME - svsk->sk_lastrecv < 6*60
|| test_bit(SK_BUSY, &svsk->sk_flags))
svsk = NULL;
}
if (svsk) {
set_bit(SK_BUSY, &svsk->sk_flags);
set_bit(SK_CLOSE, &svsk->sk_flags);
rqstp->rq_sock = svsk;
svsk->sk_inuse++;
} else if ((svsk = svc_sock_dequeue(serv)) != NULL) {
rqstp->rq_sock = svsk;
svsk->sk_inuse++;
rqstp->rq_reserved = serv->sv_bufsz;
svsk->sk_reserved += rqstp->rq_reserved;
} else {
/* No data pending. Go to sleep */
svc_serv_enqueue(serv, rqstp);
 
/*
* We have to be able to interrupt this wait
* to bring down the daemons ...
*/
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&rqstp->rq_wait, &wait);
spin_unlock_bh(&serv->sv_lock);
 
schedule_timeout(timeout);
 
spin_lock_bh(&serv->sv_lock);
remove_wait_queue(&rqstp->rq_wait, &wait);
 
if (!(svsk = rqstp->rq_sock)) {
svc_serv_dequeue(serv, rqstp);
spin_unlock_bh(&serv->sv_lock);
dprintk("svc: server %p, no data yet\n", rqstp);
return signalled()? -EINTR : -EAGAIN;
}
}
spin_unlock_bh(&serv->sv_lock);
 
dprintk("svc: server %p, socket %p, inuse=%d\n",
rqstp, svsk, svsk->sk_inuse);
len = svsk->sk_recvfrom(rqstp);
dprintk("svc: got len=%d\n", len);
 
/* No data, incomplete (TCP) read, or accept() */
if (len == 0 || len == -EAGAIN) {
svc_sock_release(rqstp);
return -EAGAIN;
}
svsk->sk_lastrecv = CURRENT_TIME;
if (test_bit(SK_TEMP, &svsk->sk_flags)) {
/* push active sockets to end of list */
spin_lock_bh(&serv->sv_lock);
list_del(&svsk->sk_list);
list_add_tail(&svsk->sk_list, &serv->sv_tempsocks);
spin_unlock_bh(&serv->sv_lock);
}
 
rqstp->rq_secure = ntohs(rqstp->rq_addr.sin_port) < 1024;
rqstp->rq_userset = 0;
rqstp->rq_verfed = 0;
 
svc_getlong(&rqstp->rq_argbuf, rqstp->rq_xid);
svc_putlong(&rqstp->rq_resbuf, rqstp->rq_xid);
 
/* Assume that the reply consists of a single buffer. */
rqstp->rq_resbuf.nriov = 1;
 
if (serv->sv_stats)
serv->sv_stats->netcnt++;
return len;
}
 
/*
* Drop request
*/
void
svc_drop(struct svc_rqst *rqstp)
{
dprintk("svc: socket %p dropped request\n", rqstp->rq_sock);
svc_sock_release(rqstp);
}
 
/*
* Return reply to client.
*/
int
svc_send(struct svc_rqst *rqstp)
{
struct svc_sock *svsk;
int len;
 
if ((svsk = rqstp->rq_sock) == NULL) {
printk(KERN_WARNING "NULL socket pointer in %s:%d\n",
__FILE__, __LINE__);
return -EFAULT;
}
 
/* release the receive skb before sending the reply */
svc_release_skb(rqstp);
 
len = svsk->sk_sendto(rqstp);
svc_sock_release(rqstp);
 
if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
return 0;
return len;
}
 
/*
* Initialize socket for RPC use and create svc_sock struct
* XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
*/
static struct svc_sock *
svc_setup_socket(struct svc_serv *serv, struct socket *sock,
int *errp, int pmap_register)
{
struct svc_sock *svsk;
struct sock *inet;
 
dprintk("svc: svc_setup_socket %p\n", sock);
if (!(svsk = kmalloc(sizeof(*svsk), GFP_KERNEL))) {
*errp = -ENOMEM;
return NULL;
}
memset(svsk, 0, sizeof(*svsk));
 
inet = sock->sk;
inet->user_data = svsk;
svsk->sk_sock = sock;
svsk->sk_sk = inet;
svsk->sk_ostate = inet->state_change;
svsk->sk_odata = inet->data_ready;
svsk->sk_owspace = inet->write_space;
svsk->sk_server = serv;
svsk->sk_lastrecv = CURRENT_TIME;
 
/* Initialize the socket */
if (sock->type == SOCK_DGRAM)
*errp = svc_udp_init(svsk);
else
*errp = svc_tcp_init(svsk);
if (svsk->sk_sk == NULL)
printk(KERN_WARNING "svsk->sk_sk == NULL after svc_prot_init!\n");
 
/* Register socket with portmapper */
if (*errp >= 0 && pmap_register)
*errp = svc_register(serv, inet->protocol, ntohs(inet->sport));
 
if (*errp < 0) {
inet->user_data = NULL;
kfree(svsk);
return NULL;
}
 
 
spin_lock_bh(&serv->sv_lock);
if (!pmap_register) {
set_bit(SK_TEMP, &svsk->sk_flags);
list_add(&svsk->sk_list, &serv->sv_tempsocks);
serv->sv_tmpcnt++;
} else {
clear_bit(SK_TEMP, &svsk->sk_flags);
list_add(&svsk->sk_list, &serv->sv_permsocks);
}
spin_unlock_bh(&serv->sv_lock);
 
dprintk("svc: svc_setup_socket created %p (inet %p)\n",
svsk, svsk->sk_sk);
return svsk;
}
 
/*
* Create socket for RPC service.
*/
static int
svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin)
{
struct svc_sock *svsk;
struct socket *sock;
int error;
int type;
 
dprintk("svc: svc_create_socket(%s, %d, %u.%u.%u.%u:%d)\n",
serv->sv_program->pg_name, protocol,
NIPQUAD(sin->sin_addr.s_addr),
ntohs(sin->sin_port));
 
if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
printk(KERN_WARNING "svc: only UDP and TCP "
"sockets supported\n");
return -EINVAL;
}
type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
 
if ((error = sock_create(PF_INET, type, protocol, &sock)) < 0)
return error;
 
if (sin != NULL) {
if (type == SOCK_STREAM)
sock->sk->reuse = 1; /* allow address reuse */
error = sock->ops->bind(sock, (struct sockaddr *) sin,
sizeof(*sin));
if (error < 0)
goto bummer;
}
 
if (protocol == IPPROTO_TCP) {
if ((error = sock->ops->listen(sock, 64)) < 0)
goto bummer;
}
 
if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL)
return 0;
 
bummer:
dprintk("svc: svc_create_socket error = %d\n", -error);
sock_release(sock);
return error;
}
 
/*
* Remove a dead socket
*/
void
svc_delete_socket(struct svc_sock *svsk)
{
struct svc_serv *serv;
struct sock *sk;
 
dprintk("svc: svc_delete_socket(%p)\n", svsk);
 
if (test_and_set_bit(SK_DEAD, &svsk->sk_flags))
return ;
 
serv = svsk->sk_server;
sk = svsk->sk_sk;
 
sk->state_change = svsk->sk_ostate;
sk->data_ready = svsk->sk_odata;
sk->write_space = svsk->sk_owspace;
 
spin_lock_bh(&serv->sv_lock);
 
list_del(&svsk->sk_list);
if (test_bit(SK_TEMP, &svsk->sk_flags))
serv->sv_tmpcnt--;
if (test_bit(SK_QUED, &svsk->sk_flags))
list_del(&svsk->sk_ready);
 
 
if (!svsk->sk_inuse) {
spin_unlock_bh(&serv->sv_lock);
sock_release(svsk->sk_sock);
kfree(svsk);
} else {
spin_unlock_bh(&serv->sv_lock);
dprintk(KERN_NOTICE "svc: server socket destroy delayed\n");
/* svsk->sk_server = NULL; */
}
}
 
/*
* Make a socket for nfsd and lockd
*/
int
svc_makesock(struct svc_serv *serv, int protocol, unsigned short port)
{
struct sockaddr_in sin;
 
dprintk("svc: creating socket proto = %d\n", protocol);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = INADDR_ANY;
sin.sin_port = htons(port);
return svc_create_socket(serv, protocol, &sin);
}
 
/pmap_clnt.c
0,0 → 1,278
/*
* linux/net/sunrpc/pmap.c
*
* Portmapper client.
*
* FIXME: In a secure environment, we may want to use an authentication
* flavor other than AUTH_NULL.
*
* Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
*/
 
#include <linux/config.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/uio.h>
#include <linux/in.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/sched.h>
 
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_PMAP
#endif
 
#define PMAP_SET 1
#define PMAP_UNSET 2
#define PMAP_GETPORT 3
 
static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int);
static void pmap_getport_done(struct rpc_task *);
extern struct rpc_program pmap_program;
static spinlock_t pmap_lock = SPIN_LOCK_UNLOCKED;
 
/*
* Obtain the port for a given RPC service on a given host. This one can
* be called for an ongoing RPC request.
*/
void
rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
{
struct rpc_portmap *map = &clnt->cl_pmap;
struct sockaddr_in *sap = &clnt->cl_xprt->addr;
struct rpc_message msg = { PMAP_GETPORT, map, &clnt->cl_port, NULL };
struct rpc_clnt *pmap_clnt;
struct rpc_task *child;
 
dprintk("RPC: %4d rpc_getport(%s, %d, %d, %d)\n",
task->tk_pid, clnt->cl_server,
map->pm_prog, map->pm_vers, map->pm_prot);
 
spin_lock(&pmap_lock);
if (clnt->cl_binding) {
rpc_sleep_on(&clnt->cl_bindwait, task, NULL, 0);
spin_unlock(&pmap_lock);
return;
}
clnt->cl_binding = 1;
spin_unlock(&pmap_lock);
 
task->tk_status = -EACCES; /* why set this? returns -EIO below */
if (!(pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot)))
goto bailout;
task->tk_status = 0;
 
/*
* Note: rpc_new_child will release client after a failure.
*/
if (!(child = rpc_new_child(pmap_clnt, task)))
goto bailout;
 
/* Setup the call info struct */
rpc_call_setup(child, &msg, 0);
 
/* ... and run the child task */
rpc_run_child(task, child, pmap_getport_done);
return;
 
bailout:
spin_lock(&pmap_lock);
clnt->cl_binding = 0;
rpc_wake_up(&clnt->cl_bindwait);
spin_unlock(&pmap_lock);
task->tk_status = -EIO;
task->tk_action = NULL;
}
 
#ifdef CONFIG_ROOT_NFS
int
rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
{
struct rpc_portmap map = { prog, vers, prot, 0 };
struct rpc_clnt *pmap_clnt;
char hostname[32];
int status;
 
dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %d, %d, %d)\n",
NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
 
sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr));
if (!(pmap_clnt = pmap_create(hostname, sin, prot)))
return -EACCES;
 
/* Setup the call info struct */
status = rpc_call(pmap_clnt, PMAP_GETPORT, &map, &map.pm_port, 0);
 
if (status >= 0) {
if (map.pm_port != 0)
return map.pm_port;
status = -EACCES;
}
return status;
}
#endif
 
static void
pmap_getport_done(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
 
dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n",
task->tk_pid, task->tk_status, clnt->cl_port);
if (task->tk_status < 0) {
/* Make the calling task exit with an error */
task->tk_action = NULL;
} else if (clnt->cl_port == 0) {
/* Program not registered */
task->tk_status = -EACCES;
task->tk_action = NULL;
} else {
/* byte-swap port number first */
clnt->cl_port = htons(clnt->cl_port);
clnt->cl_xprt->addr.sin_port = clnt->cl_port;
}
spin_lock(&pmap_lock);
clnt->cl_binding = 0;
rpc_wake_up(&clnt->cl_bindwait);
spin_unlock(&pmap_lock);
}
 
/*
* Set or unset a port registration with the local portmapper.
* port == 0 means unregister, port != 0 means register.
*/
int
rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
{
struct sockaddr_in sin;
struct rpc_portmap map;
struct rpc_clnt *pmap_clnt;
unsigned int error = 0;
 
dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n",
prog, vers, prot, port);
 
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
if (!(pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP))) {
dprintk("RPC: couldn't create pmap client\n");
return -EACCES;
}
 
map.pm_prog = prog;
map.pm_vers = vers;
map.pm_prot = prot;
map.pm_port = port;
 
error = rpc_call(pmap_clnt, port? PMAP_SET : PMAP_UNSET,
&map, okay, 0);
 
if (error < 0) {
printk(KERN_WARNING
"RPC: failed to contact portmap (errno %d).\n",
error);
}
dprintk("RPC: registration status %d/%d\n", error, *okay);
 
/* Client deleted automatically because cl_oneshot == 1 */
return error;
}
 
static struct rpc_clnt *
pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto)
{
struct rpc_xprt *xprt;
struct rpc_clnt *clnt;
 
/* printk("pmap: create xprt\n"); */
if (!(xprt = xprt_create_proto(proto, srvaddr, NULL)))
return NULL;
xprt->addr.sin_port = htons(RPC_PMAP_PORT);
 
/* printk("pmap: create clnt\n"); */
clnt = rpc_create_client(xprt, hostname,
&pmap_program, RPC_PMAP_VERSION,
RPC_AUTH_NULL);
if (!clnt) {
xprt_destroy(xprt);
} else {
clnt->cl_softrtry = 1;
clnt->cl_chatty = 1;
clnt->cl_oneshot = 1;
}
return clnt;
}
 
/*
* XDR encode/decode functions for PMAP
*/
static int
xdr_error(struct rpc_rqst *req, u32 *p, void *dummy)
{
return -EIO;
}
 
static int
xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct rpc_portmap *map)
{
dprintk("RPC: xdr_encode_mapping(%d, %d, %d, %d)\n",
map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port);
*p++ = htonl(map->pm_prog);
*p++ = htonl(map->pm_vers);
*p++ = htonl(map->pm_prot);
*p++ = htonl(map->pm_port);
 
req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
return 0;
}
 
static int
xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp)
{
*portp = (unsigned short) ntohl(*p++);
return 0;
}
 
static int
xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp)
{
*boolp = (unsigned int) ntohl(*p++);
return 0;
}
 
static struct rpc_procinfo pmap_procedures[4] = {
{ "pmap_null",
(kxdrproc_t) xdr_error,
(kxdrproc_t) xdr_error, 0, 0 },
{ "pmap_set",
(kxdrproc_t) xdr_encode_mapping,
(kxdrproc_t) xdr_decode_bool, 4, 1 },
{ "pmap_unset",
(kxdrproc_t) xdr_encode_mapping,
(kxdrproc_t) xdr_decode_bool, 4, 1 },
{ "pmap_get",
(kxdrproc_t) xdr_encode_mapping,
(kxdrproc_t) xdr_decode_port, 4, 1 },
};
 
static struct rpc_version pmap_version2 = {
2, 4, pmap_procedures
};
 
static struct rpc_version * pmap_version[] = {
NULL,
NULL,
&pmap_version2,
};
 
static struct rpc_stat pmap_stats;
 
struct rpc_program pmap_program = {
"portmap",
RPC_PMAP_PROGRAM,
sizeof(pmap_version)/sizeof(pmap_version[0]),
pmap_version,
&pmap_stats,
};
/svcauth.c
0,0 → 1,166
/*
* linux/net/sunrpc/svcauth.c
*
* The generic interface for RPC authentication on the server side.
*
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*
* CHANGES
* 19-Apr-2000 Chris Evans - Security fix
*/
 
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/svcauth.h>
#include <linux/sunrpc/svcsock.h>
 
#define RPCDBG_FACILITY RPCDBG_AUTH
 
/*
* Type of authenticator function
*/
typedef void (*auth_fn_t)(struct svc_rqst *rqstp, u32 *statp, u32 *authp);
 
/*
* Builtin auth flavors
*/
static void svcauth_null(struct svc_rqst *rqstp, u32 *statp, u32 *authp);
static void svcauth_unix(struct svc_rqst *rqstp, u32 *statp, u32 *authp);
 
/*
* Max number of authentication flavors we support
*/
#define RPC_SVCAUTH_MAX 8
 
/*
* Table of authenticators
*/
static auth_fn_t authtab[RPC_SVCAUTH_MAX] = {
svcauth_null,
svcauth_unix,
NULL,
};
 
void
svc_authenticate(struct svc_rqst *rqstp, u32 *statp, u32 *authp)
{
u32 flavor;
auth_fn_t func;
 
*statp = rpc_success;
*authp = rpc_auth_ok;
 
svc_getlong(&rqstp->rq_argbuf, flavor);
flavor = ntohl(flavor);
 
dprintk("svc: svc_authenticate (%d)\n", flavor);
if (flavor >= RPC_SVCAUTH_MAX || !(func = authtab[flavor])) {
*authp = rpc_autherr_badcred;
return;
}
 
rqstp->rq_cred.cr_flavor = flavor;
func(rqstp, statp, authp);
}
 
int
svc_auth_register(u32 flavor, auth_fn_t func)
{
if (flavor >= RPC_SVCAUTH_MAX || authtab[flavor])
return -EINVAL;
authtab[flavor] = func;
return 0;
}
 
void
svc_auth_unregister(u32 flavor)
{
if (flavor < RPC_SVCAUTH_MAX)
authtab[flavor] = NULL;
}
 
static void
svcauth_null(struct svc_rqst *rqstp, u32 *statp, u32 *authp)
{
struct svc_buf *argp = &rqstp->rq_argbuf;
struct svc_buf *resp = &rqstp->rq_resbuf;
 
if ((argp->len -= 3) < 0) {
*statp = rpc_garbage_args;
return;
}
if (*(argp->buf)++ != 0) { /* we already skipped the flavor */
dprintk("svc: bad null cred\n");
*authp = rpc_autherr_badcred;
return;
}
if (*(argp->buf)++ != RPC_AUTH_NULL || *(argp->buf)++ != 0) {
dprintk("svc: bad null verf\n");
*authp = rpc_autherr_badverf;
return;
}
 
/* Signal that mapping to nobody uid/gid is required */
rqstp->rq_cred.cr_uid = (uid_t) -1;
rqstp->rq_cred.cr_gid = (gid_t) -1;
rqstp->rq_cred.cr_groups[0] = NOGROUP;
 
/* Put NULL verifier */
rqstp->rq_verfed = 1;
svc_putlong(resp, RPC_AUTH_NULL);
svc_putlong(resp, 0);
}
 
static void
svcauth_unix(struct svc_rqst *rqstp, u32 *statp, u32 *authp)
{
struct svc_buf *argp = &rqstp->rq_argbuf;
struct svc_buf *resp = &rqstp->rq_resbuf;
struct svc_cred *cred = &rqstp->rq_cred;
u32 *bufp = argp->buf, slen, i;
int len = argp->len;
 
if ((len -= 3) < 0) {
*statp = rpc_garbage_args;
return;
}
 
bufp++; /* length */
bufp++; /* time stamp */
slen = (ntohl(*bufp++) + 3) >> 2; /* machname length */
if (slen > 64 || (len -= slen + 3) < 0)
goto badcred;
bufp += slen; /* skip machname */
 
cred->cr_uid = ntohl(*bufp++); /* uid */
cred->cr_gid = ntohl(*bufp++); /* gid */
 
slen = ntohl(*bufp++); /* gids length */
if (slen > 16 || (len -= slen + 2) < 0)
goto badcred;
for (i = 0; i < NGROUPS && i < slen; i++)
cred->cr_groups[i] = ntohl(*bufp++);
if (i < NGROUPS)
cred->cr_groups[i] = NOGROUP;
bufp += (slen - i);
 
if (*bufp++ != RPC_AUTH_NULL || *bufp++ != 0) {
*authp = rpc_autherr_badverf;
return;
}
 
argp->buf = bufp;
argp->len = len;
 
/* Put NULL verifier */
rqstp->rq_verfed = 1;
svc_putlong(resp, RPC_AUTH_NULL);
svc_putlong(resp, 0);
 
return;
 
badcred:
*authp = rpc_autherr_badcred;
}
/timer.c
0,0 → 1,74
#include <linux/version.h>
#include <linux/types.h>
#include <linux/unistd.h>
 
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/timer.h>
 
#define RPC_RTO_MAX (60*HZ)
#define RPC_RTO_INIT (HZ/5)
#define RPC_RTO_MIN (HZ/10)
 
void
rpc_init_rtt(struct rpc_rtt *rt, long timeo)
{
long t = (timeo - RPC_RTO_INIT) << 3;
int i;
rt->timeo = timeo;
if (t < 0)
t = 0;
for (i = 0; i < 5; i++) {
rt->srtt[i] = t;
rt->sdrtt[i] = RPC_RTO_INIT;
}
memset(rt->ntimeouts, 0, sizeof(rt->ntimeouts));
}
 
void
rpc_update_rtt(struct rpc_rtt *rt, int timer, long m)
{
long *srtt, *sdrtt;
 
if (timer-- == 0)
return;
 
if (m == 0)
m = 1;
srtt = &rt->srtt[timer];
m -= *srtt >> 3;
*srtt += m;
if (m < 0)
m = -m;
sdrtt = &rt->sdrtt[timer];
m -= *sdrtt >> 2;
*sdrtt += m;
/* Set lower bound on the variance */
if (*sdrtt < RPC_RTO_MIN)
*sdrtt = RPC_RTO_MIN;
}
 
/*
* Estimate rto for an nfs rpc sent via. an unreliable datagram.
* Use the mean and mean deviation of rtt for the appropriate type of rpc
* for the frequent rpcs and a default for the others.
* The justification for doing "other" this way is that these rpcs
* happen so infrequently that timer est. would probably be stale.
* Also, since many of these rpcs are
* non-idempotent, a conservative timeout is desired.
* getattr, lookup,
* read, write, commit - A+4D
* other - timeo
*/
 
long
rpc_calc_rto(struct rpc_rtt *rt, int timer)
{
long res;
if (timer-- == 0)
return rt->timeo;
res = (rt->srtt[timer] >> 3) + rt->sdrtt[timer];
if (res > RPC_RTO_MAX)
res = RPC_RTO_MAX;
return res;
}
/sysctl.c
0,0 → 1,137
/*
* linux/net/sunrpc/sysctl.c
*
* Sysctl interface to sunrpc module. This is for debugging only now.
*
* I would prefer to register the sunrpc table below sys/net, but that's
* impossible at the moment.
*/
 
#include <linux/config.h>
#include <linux/version.h>
#include <linux/types.h>
#include <linux/linkage.h>
#include <linux/ctype.h>
#include <linux/fs.h>
#include <linux/sysctl.h>
#include <linux/module.h>
 
#include <asm/uaccess.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/stats.h>
 
/*
* Declare the debug flags here
*/
unsigned int rpc_debug;
unsigned int nfs_debug;
unsigned int nfsd_debug;
unsigned int nlm_debug;
 
#ifdef RPC_DEBUG
 
static struct ctl_table_header *sunrpc_table_header;
static ctl_table sunrpc_table[];
 
void
rpc_register_sysctl(void)
{
if (!sunrpc_table_header) {
sunrpc_table_header = register_sysctl_table(sunrpc_table, 1);
#ifdef CONFIG_PROC_FS
if (sunrpc_table[0].de)
sunrpc_table[0].de->owner = THIS_MODULE;
#endif
}
}
 
void
rpc_unregister_sysctl(void)
{
if (sunrpc_table_header) {
unregister_sysctl_table(sunrpc_table_header);
sunrpc_table_header = NULL;
}
}
 
static int
proc_dodebug(ctl_table *table, int write, struct file *file,
void *buffer, size_t *lenp)
{
char tmpbuf[20], *p, c;
unsigned int value;
size_t left, len;
 
if ((file->f_pos && !write) || !*lenp) {
*lenp = 0;
return 0;
}
 
left = *lenp;
 
if (write) {
if (!access_ok(VERIFY_READ, buffer, left))
return -EFAULT;
p = (char *) buffer;
while (left && __get_user(c, p) >= 0 && isspace(c))
left--, p++;
if (!left)
goto done;
 
if (left > sizeof(tmpbuf) - 1)
return -EINVAL;
copy_from_user(tmpbuf, p, left);
tmpbuf[left] = '\0';
 
for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--)
value = 10 * value + (*p - '0');
if (*p && !isspace(*p))
return -EINVAL;
while (left && isspace(*p))
left--, p++;
*(unsigned int *) table->data = value;
/* Display the RPC tasks on writing to rpc_debug */
if (table->ctl_name == CTL_RPCDEBUG) {
rpc_show_tasks();
}
} else {
if (!access_ok(VERIFY_WRITE, buffer, left))
return -EFAULT;
len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data);
if (len > left)
len = left;
copy_to_user(buffer, tmpbuf, len);
if ((left -= len) > 0) {
put_user('\n', (char *)buffer + len);
left--;
}
}
 
done:
*lenp -= left;
file->f_pos += *lenp;
return 0;
}
 
#define DIRENTRY(nam1, nam2, child) \
{CTL_##nam1, #nam2, NULL, 0, 0555, child }
#define DBGENTRY(nam1, nam2) \
{CTL_##nam1##DEBUG, #nam2 "_debug", &nam2##_debug, sizeof(int),\
0644, NULL, &proc_dodebug}
 
static ctl_table debug_table[] = {
DBGENTRY(RPC, rpc),
DBGENTRY(NFS, nfs),
DBGENTRY(NFSD, nfsd),
DBGENTRY(NLM, nlm),
{0}
};
 
static ctl_table sunrpc_table[] = {
DIRENTRY(SUNRPC, sunrpc, debug_table),
{0}
};
 
#endif
/sunrpc_syms.c
0,0 → 1,109
/*
* linux/net/sunrpc/sunrpc_syms.c
*
* Symbols exported by the sunrpc module.
*
* Copyright (C) 1997 Olaf Kirch <okir@monad.swb.de>
*/
 
#include <linux/config.h>
#include <linux/module.h>
 
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/sched.h>
#include <linux/uio.h>
#include <linux/unistd.h>
 
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/auth.h>
 
/* RPC scheduler */
EXPORT_SYMBOL(rpc_allocate);
EXPORT_SYMBOL(rpc_free);
EXPORT_SYMBOL(rpc_execute);
EXPORT_SYMBOL(rpc_init_task);
EXPORT_SYMBOL(rpc_sleep_on);
EXPORT_SYMBOL(rpc_wake_up_next);
EXPORT_SYMBOL(rpc_wake_up_task);
EXPORT_SYMBOL(rpc_new_child);
EXPORT_SYMBOL(rpc_run_child);
EXPORT_SYMBOL(rpciod_down);
EXPORT_SYMBOL(rpciod_up);
EXPORT_SYMBOL(rpc_new_task);
EXPORT_SYMBOL(rpc_wake_up_status);
EXPORT_SYMBOL(rpc_release_task);
 
/* RPC client functions */
EXPORT_SYMBOL(rpc_create_client);
EXPORT_SYMBOL(rpc_destroy_client);
EXPORT_SYMBOL(rpc_shutdown_client);
EXPORT_SYMBOL(rpc_killall_tasks);
EXPORT_SYMBOL(rpc_call_sync);
EXPORT_SYMBOL(rpc_call_async);
EXPORT_SYMBOL(rpc_call_setup);
EXPORT_SYMBOL(rpc_clnt_sigmask);
EXPORT_SYMBOL(rpc_clnt_sigunmask);
EXPORT_SYMBOL(rpc_delay);
EXPORT_SYMBOL(rpc_restart_call);
EXPORT_SYMBOL(rpc_setbufsize);
 
/* Client transport */
EXPORT_SYMBOL(xprt_create_proto);
EXPORT_SYMBOL(xprt_destroy);
EXPORT_SYMBOL(xprt_set_timeout);
 
/* Client credential cache */
EXPORT_SYMBOL(rpcauth_register);
EXPORT_SYMBOL(rpcauth_unregister);
EXPORT_SYMBOL(rpcauth_init_credcache);
EXPORT_SYMBOL(rpcauth_free_credcache);
EXPORT_SYMBOL(rpcauth_insert_credcache);
EXPORT_SYMBOL(rpcauth_lookupcred);
EXPORT_SYMBOL(rpcauth_bindcred);
EXPORT_SYMBOL(rpcauth_matchcred);
EXPORT_SYMBOL(put_rpccred);
 
/* RPC server stuff */
EXPORT_SYMBOL(svc_create);
EXPORT_SYMBOL(svc_create_thread);
EXPORT_SYMBOL(svc_exit_thread);
EXPORT_SYMBOL(svc_destroy);
EXPORT_SYMBOL(svc_drop);
EXPORT_SYMBOL(svc_process);
EXPORT_SYMBOL(svc_recv);
EXPORT_SYMBOL(svc_wake_up);
EXPORT_SYMBOL(svc_makesock);
EXPORT_SYMBOL(svc_reserve);
 
/* RPC statistics */
#ifdef CONFIG_PROC_FS
EXPORT_SYMBOL(rpc_proc_register);
EXPORT_SYMBOL(rpc_proc_unregister);
EXPORT_SYMBOL(rpc_proc_read);
EXPORT_SYMBOL(svc_proc_register);
EXPORT_SYMBOL(svc_proc_unregister);
EXPORT_SYMBOL(svc_proc_read);
#endif
 
/* Generic XDR */
EXPORT_SYMBOL(xdr_encode_array);
EXPORT_SYMBOL(xdr_encode_string);
EXPORT_SYMBOL(xdr_decode_string);
EXPORT_SYMBOL(xdr_decode_string_inplace);
EXPORT_SYMBOL(xdr_decode_netobj);
EXPORT_SYMBOL(xdr_encode_netobj);
EXPORT_SYMBOL(xdr_encode_pages);
EXPORT_SYMBOL(xdr_inline_pages);
EXPORT_SYMBOL(xdr_shift_buf);
 
/* Debugging symbols */
#ifdef RPC_DEBUG
EXPORT_SYMBOL(rpc_debug);
EXPORT_SYMBOL(nfs_debug);
EXPORT_SYMBOL(nfsd_debug);
EXPORT_SYMBOL(nlm_debug);
#endif
/sched.c
0,0 → 1,1158
/*
* linux/net/sunrpc/sched.c
*
* Scheduling for synchronous and asynchronous RPC requests.
*
* Copyright (C) 1996 Olaf Kirch, <okir@monad.swb.de>
*
* TCP NFS related read + write fixes
* (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie>
*/
 
#include <linux/module.h>
 
#define __KERNEL_SYSCALLS__
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/unistd.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/spinlock.h>
 
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/xprt.h>
 
#ifdef RPC_DEBUG
#define RPCDBG_FACILITY RPCDBG_SCHED
static int rpc_task_id;
#endif
 
/*
* We give RPC the same get_free_pages priority as NFS
*/
#define GFP_RPC GFP_NOFS
 
static void __rpc_default_timer(struct rpc_task *task);
static void rpciod_killall(void);
 
/*
* When an asynchronous RPC task is activated within a bottom half
* handler, or while executing another RPC task, it is put on
* schedq, and rpciod is woken up.
*/
static RPC_WAITQ(schedq, "schedq");
 
/*
* RPC tasks that create another task (e.g. for contacting the portmapper)
* will wait on this queue for their child's completion
*/
static RPC_WAITQ(childq, "childq");
 
/*
* RPC tasks sit here while waiting for conditions to improve.
*/
static RPC_WAITQ(delay_queue, "delayq");
 
/*
* All RPC tasks are linked into this list
*/
static LIST_HEAD(all_tasks);
 
/*
* rpciod-related stuff
*/
static DECLARE_WAIT_QUEUE_HEAD(rpciod_idle);
static DECLARE_WAIT_QUEUE_HEAD(rpciod_killer);
static DECLARE_MUTEX(rpciod_sema);
static unsigned int rpciod_users;
static pid_t rpciod_pid;
static int rpc_inhibit;
 
/*
* Spinlock for wait queues. Access to the latter also has to be
* interrupt-safe in order to allow timers to wake up sleeping tasks.
*/
static spinlock_t rpc_queue_lock = SPIN_LOCK_UNLOCKED;
/*
* Spinlock for other critical sections of code.
*/
static spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED;
 
/*
* This is the last-ditch buffer for NFS swap requests
*/
static u32 swap_buffer[PAGE_SIZE >> 2];
static long swap_buffer_used;
 
/*
* Make allocation of the swap_buffer SMP-safe
*/
static __inline__ int rpc_lock_swapbuf(void)
{
return !test_and_set_bit(1, &swap_buffer_used);
}
static __inline__ void rpc_unlock_swapbuf(void)
{
clear_bit(1, &swap_buffer_used);
}
 
/*
* Disable the timer for a given RPC task. Should be called with
* rpc_queue_lock and bh_disabled in order to avoid races within
* rpc_run_timer().
*/
static inline void
__rpc_disable_timer(struct rpc_task *task)
{
dprintk("RPC: %4d disabling timer\n", task->tk_pid);
task->tk_timeout_fn = NULL;
task->tk_timeout = 0;
}
 
/*
* Run a timeout function.
* We use the callback in order to allow __rpc_wake_up_task()
* and friends to disable the timer synchronously on SMP systems
* without calling del_timer_sync(). The latter could cause a
* deadlock if called while we're holding spinlocks...
*/
static void
rpc_run_timer(struct rpc_task *task)
{
void (*callback)(struct rpc_task *);
 
spin_lock_bh(&rpc_queue_lock);
callback = task->tk_timeout_fn;
task->tk_timeout_fn = NULL;
spin_unlock_bh(&rpc_queue_lock);
if (callback) {
dprintk("RPC: %4d running timer\n", task->tk_pid);
callback(task);
}
}
 
/*
* Set up a timer for the current task.
*/
static inline void
__rpc_add_timer(struct rpc_task *task, rpc_action timer)
{
if (!task->tk_timeout)
return;
 
dprintk("RPC: %4d setting alarm for %lu ms\n",
task->tk_pid, task->tk_timeout * 1000 / HZ);
 
if (timer)
task->tk_timeout_fn = timer;
else
task->tk_timeout_fn = __rpc_default_timer;
mod_timer(&task->tk_timer, jiffies + task->tk_timeout);
}
 
/*
* Set up a timer for an already sleeping task.
*/
void rpc_add_timer(struct rpc_task *task, rpc_action timer)
{
spin_lock_bh(&rpc_queue_lock);
if (!RPC_IS_RUNNING(task))
__rpc_add_timer(task, timer);
spin_unlock_bh(&rpc_queue_lock);
}
 
/*
* Delete any timer for the current task. Because we use del_timer_sync(),
* this function should never be called while holding rpc_queue_lock.
*/
static inline void
rpc_delete_timer(struct rpc_task *task)
{
dprintk("RPC: %4d deleting timer\n", task->tk_pid);
del_timer_sync(&task->tk_timer);
}
 
/*
* Add new request to wait queue.
*
* Swapper tasks always get inserted at the head of the queue.
* This should avoid many nasty memory deadlocks and hopefully
* improve overall performance.
* Everyone else gets appended to the queue to ensure proper FIFO behavior.
*/
static inline int
__rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
{
if (task->tk_rpcwait == queue)
return 0;
 
if (task->tk_rpcwait) {
printk(KERN_WARNING "RPC: doubly enqueued task!\n");
return -EWOULDBLOCK;
}
if (RPC_IS_SWAPPER(task))
list_add(&task->tk_list, &queue->tasks);
else
list_add_tail(&task->tk_list, &queue->tasks);
task->tk_rpcwait = queue;
 
dprintk("RPC: %4d added to queue %p \"%s\"\n",
task->tk_pid, queue, rpc_qname(queue));
 
return 0;
}
 
int
rpc_add_wait_queue(struct rpc_wait_queue *q, struct rpc_task *task)
{
int result;
 
spin_lock_bh(&rpc_queue_lock);
result = __rpc_add_wait_queue(q, task);
spin_unlock_bh(&rpc_queue_lock);
return result;
}
 
/*
* Remove request from queue.
* Note: must be called with spin lock held.
*/
static inline void
__rpc_remove_wait_queue(struct rpc_task *task)
{
struct rpc_wait_queue *queue = task->tk_rpcwait;
 
if (!queue)
return;
 
list_del(&task->tk_list);
task->tk_rpcwait = NULL;
 
dprintk("RPC: %4d removed from queue %p \"%s\"\n",
task->tk_pid, queue, rpc_qname(queue));
}
 
void
rpc_remove_wait_queue(struct rpc_task *task)
{
if (!task->tk_rpcwait)
return;
spin_lock_bh(&rpc_queue_lock);
__rpc_remove_wait_queue(task);
spin_unlock_bh(&rpc_queue_lock);
}
 
/*
* Make an RPC task runnable.
*
* Note: If the task is ASYNC, this must be called with
* the spinlock held to protect the wait queue operation.
*/
static inline void
rpc_make_runnable(struct rpc_task *task)
{
if (task->tk_timeout_fn) {
printk(KERN_ERR "RPC: task w/ running timer in rpc_make_runnable!!\n");
return;
}
rpc_set_running(task);
if (RPC_IS_ASYNC(task)) {
if (RPC_IS_SLEEPING(task)) {
int status;
status = __rpc_add_wait_queue(&schedq, task);
if (status < 0) {
printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
task->tk_status = status;
return;
}
rpc_clear_sleeping(task);
if (waitqueue_active(&rpciod_idle))
wake_up(&rpciod_idle);
}
} else {
rpc_clear_sleeping(task);
if (waitqueue_active(&task->tk_wait))
wake_up(&task->tk_wait);
}
}
 
/*
* Place a newly initialized task on the schedq.
*/
static inline void
rpc_schedule_run(struct rpc_task *task)
{
/* Don't run a child twice! */
if (RPC_IS_ACTIVATED(task))
return;
task->tk_active = 1;
rpc_set_sleeping(task);
rpc_make_runnable(task);
}
 
/*
* For other people who may need to wake the I/O daemon
* but should (for now) know nothing about its innards
*/
void rpciod_wake_up(void)
{
if(rpciod_pid==0)
printk(KERN_ERR "rpciod: wot no daemon?\n");
if (waitqueue_active(&rpciod_idle))
wake_up(&rpciod_idle);
}
 
/*
* Prepare for sleeping on a wait queue.
* By always appending tasks to the list we ensure FIFO behavior.
* NB: An RPC task will only receive interrupt-driven events as long
* as it's on a wait queue.
*/
static void
__rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
rpc_action action, rpc_action timer)
{
int status;
 
dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid,
rpc_qname(q), jiffies);
 
if (!RPC_IS_ASYNC(task) && !RPC_IS_ACTIVATED(task)) {
printk(KERN_ERR "RPC: Inactive synchronous task put to sleep!\n");
return;
}
 
/* Mark the task as being activated if so needed */
if (!RPC_IS_ACTIVATED(task)) {
task->tk_active = 1;
rpc_set_sleeping(task);
}
 
status = __rpc_add_wait_queue(q, task);
if (status) {
printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
task->tk_status = status;
} else {
rpc_clear_running(task);
if (task->tk_callback) {
dprintk(KERN_ERR "RPC: %4d overwrites an active callback\n", task->tk_pid);
BUG();
}
task->tk_callback = action;
__rpc_add_timer(task, timer);
}
}
 
void
rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
rpc_action action, rpc_action timer)
{
/*
* Protect the queue operations.
*/
spin_lock_bh(&rpc_queue_lock);
__rpc_sleep_on(q, task, action, timer);
spin_unlock_bh(&rpc_queue_lock);
}
 
/**
* __rpc_wake_up_task - wake up a single rpc_task
* @task: task to be woken up
*
* Caller must hold rpc_queue_lock
*/
static void
__rpc_wake_up_task(struct rpc_task *task)
{
dprintk("RPC: %4d __rpc_wake_up_task (now %ld inh %d)\n",
task->tk_pid, jiffies, rpc_inhibit);
 
#ifdef RPC_DEBUG
if (task->tk_magic != 0xf00baa) {
printk(KERN_ERR "RPC: attempt to wake up non-existing task!\n");
rpc_debug = ~0;
rpc_show_tasks();
return;
}
#endif
/* Has the task been executed yet? If not, we cannot wake it up! */
if (!RPC_IS_ACTIVATED(task)) {
printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
return;
}
if (RPC_IS_RUNNING(task))
return;
 
__rpc_disable_timer(task);
if (task->tk_rpcwait != &schedq)
__rpc_remove_wait_queue(task);
 
rpc_make_runnable(task);
 
dprintk("RPC: __rpc_wake_up_task done\n");
}
 
/*
* Default timeout handler if none specified by user
*/
static void
__rpc_default_timer(struct rpc_task *task)
{
dprintk("RPC: %d timeout (default timer)\n", task->tk_pid);
task->tk_status = -ETIMEDOUT;
rpc_wake_up_task(task);
}
 
/*
* Wake up the specified task
*/
void
rpc_wake_up_task(struct rpc_task *task)
{
if (RPC_IS_RUNNING(task))
return;
spin_lock_bh(&rpc_queue_lock);
__rpc_wake_up_task(task);
spin_unlock_bh(&rpc_queue_lock);
}
 
/*
* Wake up the next task on the wait queue.
*/
struct rpc_task *
rpc_wake_up_next(struct rpc_wait_queue *queue)
{
struct rpc_task *task = NULL;
 
dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue));
spin_lock_bh(&rpc_queue_lock);
task_for_first(task, &queue->tasks)
__rpc_wake_up_task(task);
spin_unlock_bh(&rpc_queue_lock);
 
return task;
}
 
/**
* rpc_wake_up - wake up all rpc_tasks
* @queue: rpc_wait_queue on which the tasks are sleeping
*
* Grabs rpc_queue_lock
*/
void
rpc_wake_up(struct rpc_wait_queue *queue)
{
struct rpc_task *task;
 
spin_lock_bh(&rpc_queue_lock);
while (!list_empty(&queue->tasks))
task_for_first(task, &queue->tasks)
__rpc_wake_up_task(task);
spin_unlock_bh(&rpc_queue_lock);
}
 
/**
* rpc_wake_up_status - wake up all rpc_tasks and set their status value.
* @queue: rpc_wait_queue on which the tasks are sleeping
* @status: status value to set
*
* Grabs rpc_queue_lock
*/
void
rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
{
struct rpc_task *task;
 
spin_lock_bh(&rpc_queue_lock);
while (!list_empty(&queue->tasks)) {
task_for_first(task, &queue->tasks) {
task->tk_status = status;
__rpc_wake_up_task(task);
}
}
spin_unlock_bh(&rpc_queue_lock);
}
 
/*
* Run a task at a later time
*/
static void __rpc_atrun(struct rpc_task *);
void
rpc_delay(struct rpc_task *task, unsigned long delay)
{
task->tk_timeout = delay;
rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun);
}
 
static void
__rpc_atrun(struct rpc_task *task)
{
task->tk_status = 0;
rpc_wake_up_task(task);
}
 
/*
* This is the RPC `scheduler' (or rather, the finite state machine).
*/
static int
__rpc_execute(struct rpc_task *task)
{
int status = 0;
 
dprintk("RPC: %4d rpc_execute flgs %x\n",
task->tk_pid, task->tk_flags);
 
if (!RPC_IS_RUNNING(task)) {
printk(KERN_WARNING "RPC: rpc_execute called for sleeping task!!\n");
return 0;
}
 
restarted:
while (1) {
/*
* Execute any pending callback.
*/
if (RPC_DO_CALLBACK(task)) {
/* Define a callback save pointer */
void (*save_callback)(struct rpc_task *);
/*
* If a callback exists, save it, reset it,
* call it.
* The save is needed to stop from resetting
* another callback set within the callback handler
* - Dave
*/
save_callback=task->tk_callback;
task->tk_callback=NULL;
save_callback(task);
}
 
/*
* Perform the next FSM step.
* tk_action may be NULL when the task has been killed
* by someone else.
*/
if (RPC_IS_RUNNING(task)) {
/*
* Garbage collection of pending timers...
*/
rpc_delete_timer(task);
if (!task->tk_action)
break;
task->tk_action(task);
}
 
/*
* Check whether task is sleeping.
*/
spin_lock_bh(&rpc_queue_lock);
if (!RPC_IS_RUNNING(task)) {
rpc_set_sleeping(task);
if (RPC_IS_ASYNC(task)) {
spin_unlock_bh(&rpc_queue_lock);
return 0;
}
}
spin_unlock_bh(&rpc_queue_lock);
 
while (RPC_IS_SLEEPING(task)) {
/* sync task: sleep here */
dprintk("RPC: %4d sync task going to sleep\n",
task->tk_pid);
if (current->pid == rpciod_pid)
printk(KERN_ERR "RPC: rpciod waiting on sync task!\n");
 
__wait_event(task->tk_wait, !RPC_IS_SLEEPING(task));
dprintk("RPC: %4d sync task resuming\n", task->tk_pid);
 
/*
* When a sync task receives a signal, it exits with
* -ERESTARTSYS. In order to catch any callbacks that
* clean up after sleeping on some queue, we don't
* break the loop here, but go around once more.
*/
if (task->tk_client->cl_intr && signalled()) {
dprintk("RPC: %4d got signal\n", task->tk_pid);
task->tk_flags |= RPC_TASK_KILLED;
rpc_exit(task, -ERESTARTSYS);
rpc_wake_up_task(task);
}
}
}
 
if (task->tk_exit) {
task->tk_exit(task);
/* If tk_action is non-null, the user wants us to restart */
if (task->tk_action) {
if (!RPC_ASSASSINATED(task)) {
/* Release RPC slot and buffer memory */
if (task->tk_rqstp)
xprt_release(task);
if (task->tk_buffer) {
rpc_free(task->tk_buffer);
task->tk_buffer = NULL;
}
goto restarted;
}
printk(KERN_ERR "RPC: dead task tries to walk away.\n");
}
}
 
dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status);
status = task->tk_status;
 
/* Release all resources associated with the task */
rpc_release_task(task);
 
return status;
}
 
/*
* User-visible entry point to the scheduler.
*
* This may be called recursively if e.g. an async NFS task updates
* the attributes and finds that dirty pages must be flushed.
* NOTE: Upon exit of this function the task is guaranteed to be
* released. In particular note that tk_release() will have
* been called, so your task memory may have been freed.
*/
int
rpc_execute(struct rpc_task *task)
{
int status = -EIO;
if (rpc_inhibit) {
printk(KERN_INFO "RPC: execution inhibited!\n");
goto out_release;
}
 
status = -EWOULDBLOCK;
if (task->tk_active) {
printk(KERN_ERR "RPC: active task was run twice!\n");
goto out_err;
}
 
task->tk_active = 1;
rpc_set_running(task);
return __rpc_execute(task);
out_release:
rpc_release_task(task);
out_err:
return status;
}
 
/*
* This is our own little scheduler for async RPC tasks.
*/
static void
__rpc_schedule(void)
{
struct rpc_task *task;
int count = 0;
 
dprintk("RPC: rpc_schedule enter\n");
while (1) {
spin_lock_bh(&rpc_queue_lock);
 
task_for_first(task, &schedq.tasks) {
__rpc_remove_wait_queue(task);
spin_unlock_bh(&rpc_queue_lock);
 
__rpc_execute(task);
} else {
spin_unlock_bh(&rpc_queue_lock);
break;
}
 
if (++count >= 200 || current->need_resched) {
count = 0;
schedule();
}
}
dprintk("RPC: rpc_schedule leave\n");
}
 
/*
* Allocate memory for RPC purpose.
*
* This is yet another tricky issue: For sync requests issued by
* a user process, we want to make kmalloc sleep if there isn't
* enough memory. Async requests should not sleep too excessively
* because that will block rpciod (but that's not dramatic when
* it's starved of memory anyway). Finally, swapout requests should
* never sleep at all, and should not trigger another swap_out
* request through kmalloc which would just increase memory contention.
*
* I hope the following gets it right, which gives async requests
* a slight advantage over sync requests (good for writeback, debatable
* for readahead):
*
* sync user requests: GFP_KERNEL
* async requests: GFP_RPC (== GFP_NOFS)
* swap requests: GFP_ATOMIC (or new GFP_SWAPPER)
*/
void *
rpc_allocate(unsigned int flags, unsigned int size)
{
u32 *buffer;
int gfp;
 
if (flags & RPC_TASK_SWAPPER)
gfp = GFP_ATOMIC;
else if (flags & RPC_TASK_ASYNC)
gfp = GFP_RPC;
else
gfp = GFP_KERNEL;
 
do {
if ((buffer = (u32 *) kmalloc(size, gfp)) != NULL) {
dprintk("RPC: allocated buffer %p\n", buffer);
return buffer;
}
if ((flags & RPC_TASK_SWAPPER) && size <= sizeof(swap_buffer)
&& rpc_lock_swapbuf()) {
dprintk("RPC: used last-ditch swap buffer\n");
return swap_buffer;
}
if (flags & RPC_TASK_ASYNC)
return NULL;
yield();
} while (!signalled());
 
return NULL;
}
 
void
rpc_free(void *buffer)
{
if (buffer != swap_buffer) {
kfree(buffer);
return;
}
rpc_unlock_swapbuf();
}
 
/*
* Creation and deletion of RPC task structures
*/
inline void
rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt,
rpc_action callback, int flags)
{
memset(task, 0, sizeof(*task));
init_timer(&task->tk_timer);
task->tk_timer.data = (unsigned long) task;
task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer;
task->tk_client = clnt;
task->tk_flags = flags;
task->tk_exit = callback;
init_waitqueue_head(&task->tk_wait);
if (current->uid != current->fsuid || current->gid != current->fsgid)
task->tk_flags |= RPC_TASK_SETUID;
 
/* Initialize retry counters */
task->tk_garb_retry = 2;
task->tk_cred_retry = 2;
task->tk_suid_retry = 1;
 
/* Add to global list of all tasks */
spin_lock(&rpc_sched_lock);
list_add(&task->tk_task, &all_tasks);
spin_unlock(&rpc_sched_lock);
 
if (clnt)
atomic_inc(&clnt->cl_users);
 
#ifdef RPC_DEBUG
task->tk_magic = 0xf00baa;
task->tk_pid = rpc_task_id++;
#endif
dprintk("RPC: %4d new task procpid %d\n", task->tk_pid,
current->pid);
}
 
static void
rpc_default_free_task(struct rpc_task *task)
{
dprintk("RPC: %4d freeing task\n", task->tk_pid);
rpc_free(task);
}
 
/*
* Create a new task for the specified client. We have to
* clean up after an allocation failure, as the client may
* have specified "oneshot".
*/
struct rpc_task *
rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags)
{
struct rpc_task *task;
 
task = (struct rpc_task *) rpc_allocate(flags, sizeof(*task));
if (!task)
goto cleanup;
 
rpc_init_task(task, clnt, callback, flags);
 
/* Replace tk_release */
task->tk_release = rpc_default_free_task;
 
dprintk("RPC: %4d allocated task\n", task->tk_pid);
task->tk_flags |= RPC_TASK_DYNAMIC;
out:
return task;
 
cleanup:
/* Check whether to release the client */
if (clnt) {
printk("rpc_new_task: failed, users=%d, oneshot=%d\n",
atomic_read(&clnt->cl_users), clnt->cl_oneshot);
atomic_inc(&clnt->cl_users); /* pretend we were used ... */
rpc_release_client(clnt);
}
goto out;
}
 
void
rpc_release_task(struct rpc_task *task)
{
dprintk("RPC: %4d release task\n", task->tk_pid);
 
#ifdef RPC_DEBUG
if (task->tk_magic != 0xf00baa) {
printk(KERN_ERR "RPC: attempt to release a non-existing task!\n");
rpc_debug = ~0;
rpc_show_tasks();
return;
}
#endif
 
/* Remove from global task list */
spin_lock(&rpc_sched_lock);
list_del(&task->tk_task);
spin_unlock(&rpc_sched_lock);
 
/* Protect the execution below. */
spin_lock_bh(&rpc_queue_lock);
 
/* Disable timer to prevent zombie wakeup */
__rpc_disable_timer(task);
 
/* Remove from any wait queue we're still on */
__rpc_remove_wait_queue(task);
 
task->tk_active = 0;
 
spin_unlock_bh(&rpc_queue_lock);
 
/* Synchronously delete any running timer */
rpc_delete_timer(task);
 
/* Release resources */
if (task->tk_rqstp)
xprt_release(task);
if (task->tk_msg.rpc_cred)
rpcauth_unbindcred(task);
if (task->tk_buffer) {
rpc_free(task->tk_buffer);
task->tk_buffer = NULL;
}
if (task->tk_client) {
rpc_release_client(task->tk_client);
task->tk_client = NULL;
}
 
#ifdef RPC_DEBUG
task->tk_magic = 0;
#endif
if (task->tk_release)
task->tk_release(task);
}
 
/**
* rpc_find_parent - find the parent of a child task.
* @child: child task
*
* Checks that the parent task is still sleeping on the
* queue 'childq'. If so returns a pointer to the parent.
* Upon failure returns NULL.
*
* Caller must hold rpc_queue_lock
*/
static inline struct rpc_task *
rpc_find_parent(struct rpc_task *child)
{
struct rpc_task *task, *parent;
struct list_head *le;
 
parent = (struct rpc_task *) child->tk_calldata;
task_for_each(task, le, &childq.tasks)
if (task == parent)
return parent;
 
return NULL;
}
 
static void
rpc_child_exit(struct rpc_task *child)
{
struct rpc_task *parent;
 
spin_lock_bh(&rpc_queue_lock);
if ((parent = rpc_find_parent(child)) != NULL) {
parent->tk_status = child->tk_status;
__rpc_wake_up_task(parent);
}
spin_unlock_bh(&rpc_queue_lock);
}
 
/*
* Note: rpc_new_task releases the client after a failure.
*/
struct rpc_task *
rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent)
{
struct rpc_task *task;
 
task = rpc_new_task(clnt, NULL, RPC_TASK_ASYNC | RPC_TASK_CHILD);
if (!task)
goto fail;
task->tk_exit = rpc_child_exit;
task->tk_calldata = parent;
return task;
 
fail:
parent->tk_status = -ENOMEM;
return NULL;
}
 
void
rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
{
spin_lock_bh(&rpc_queue_lock);
/* N.B. Is it possible for the child to have already finished? */
__rpc_sleep_on(&childq, task, func, NULL);
rpc_schedule_run(child);
spin_unlock_bh(&rpc_queue_lock);
}
 
/*
* Kill all tasks for the given client.
* XXX: kill their descendants as well?
*/
void
rpc_killall_tasks(struct rpc_clnt *clnt)
{
struct rpc_task *rovr;
struct list_head *le;
 
dprintk("RPC: killing all tasks for client %p\n", clnt);
 
/*
* Spin lock all_tasks to prevent changes...
*/
spin_lock(&rpc_sched_lock);
alltask_for_each(rovr, le, &all_tasks)
if (!clnt || rovr->tk_client == clnt) {
rovr->tk_flags |= RPC_TASK_KILLED;
rpc_exit(rovr, -EIO);
rpc_wake_up_task(rovr);
}
spin_unlock(&rpc_sched_lock);
}
 
static DECLARE_MUTEX_LOCKED(rpciod_running);
 
static inline int
rpciod_task_pending(void)
{
return !list_empty(&schedq.tasks);
}
 
 
/*
* This is the rpciod kernel thread
*/
static int
rpciod(void *ptr)
{
wait_queue_head_t *assassin = (wait_queue_head_t*) ptr;
int rounds = 0;
 
MOD_INC_USE_COUNT;
lock_kernel();
/*
* Let our maker know we're running ...
*/
rpciod_pid = current->pid;
up(&rpciod_running);
 
daemonize();
 
spin_lock_irq(&current->sigmask_lock);
siginitsetinv(&current->blocked, sigmask(SIGKILL));
recalc_sigpending(current);
spin_unlock_irq(&current->sigmask_lock);
 
strcpy(current->comm, "rpciod");
 
dprintk("RPC: rpciod starting (pid %d)\n", rpciod_pid);
while (rpciod_users) {
if (signalled()) {
rpciod_killall();
flush_signals(current);
}
__rpc_schedule();
 
if (++rounds >= 64) { /* safeguard */
schedule();
rounds = 0;
}
 
if (!rpciod_task_pending()) {
dprintk("RPC: rpciod back to sleep\n");
wait_event_interruptible(rpciod_idle, rpciod_task_pending());
dprintk("RPC: switch to rpciod\n");
rounds = 0;
}
}
 
dprintk("RPC: rpciod shutdown commences\n");
if (!list_empty(&all_tasks)) {
printk(KERN_ERR "rpciod: active tasks at shutdown?!\n");
rpciod_killall();
}
 
rpciod_pid = 0;
wake_up(assassin);
 
dprintk("RPC: rpciod exiting\n");
MOD_DEC_USE_COUNT;
return 0;
}
 
static void
rpciod_killall(void)
{
unsigned long flags;
 
while (!list_empty(&all_tasks)) {
current->sigpending = 0;
rpc_killall_tasks(NULL);
__rpc_schedule();
if (!list_empty(&all_tasks)) {
dprintk("rpciod_killall: waiting for tasks to exit\n");
yield();
}
}
 
spin_lock_irqsave(&current->sigmask_lock, flags);
recalc_sigpending(current);
spin_unlock_irqrestore(&current->sigmask_lock, flags);
}
 
/*
* Start up the rpciod process if it's not already running.
*/
int
rpciod_up(void)
{
int error = 0;
 
MOD_INC_USE_COUNT;
down(&rpciod_sema);
dprintk("rpciod_up: pid %d, users %d\n", rpciod_pid, rpciod_users);
rpciod_users++;
if (rpciod_pid)
goto out;
/*
* If there's no pid, we should be the first user.
*/
if (rpciod_users > 1)
printk(KERN_WARNING "rpciod_up: no pid, %d users??\n", rpciod_users);
/*
* Create the rpciod thread and wait for it to start.
*/
error = kernel_thread(rpciod, &rpciod_killer, 0);
if (error < 0) {
printk(KERN_WARNING "rpciod_up: create thread failed, error=%d\n", error);
rpciod_users--;
goto out;
}
down(&rpciod_running);
error = 0;
out:
up(&rpciod_sema);
MOD_DEC_USE_COUNT;
return error;
}
 
void
rpciod_down(void)
{
unsigned long flags;
 
MOD_INC_USE_COUNT;
down(&rpciod_sema);
dprintk("rpciod_down pid %d sema %d\n", rpciod_pid, rpciod_users);
if (rpciod_users) {
if (--rpciod_users)
goto out;
} else
printk(KERN_WARNING "rpciod_down: pid=%d, no users??\n", rpciod_pid);
 
if (!rpciod_pid) {
dprintk("rpciod_down: Nothing to do!\n");
goto out;
}
 
kill_proc(rpciod_pid, SIGKILL, 1);
/*
* Usually rpciod will exit very quickly, so we
* wait briefly before checking the process id.
*/
current->sigpending = 0;
yield();
/*
* Display a message if we're going to wait longer.
*/
while (rpciod_pid) {
dprintk("rpciod_down: waiting for pid %d to exit\n", rpciod_pid);
if (signalled()) {
dprintk("rpciod_down: caught signal\n");
break;
}
interruptible_sleep_on(&rpciod_killer);
}
spin_lock_irqsave(&current->sigmask_lock, flags);
recalc_sigpending(current);
spin_unlock_irqrestore(&current->sigmask_lock, flags);
out:
up(&rpciod_sema);
MOD_DEC_USE_COUNT;
}
 
#ifdef RPC_DEBUG
void rpc_show_tasks(void)
{
struct list_head *le;
struct rpc_task *t;
 
spin_lock(&rpc_sched_lock);
if (list_empty(&all_tasks)) {
spin_unlock(&rpc_sched_lock);
return;
}
printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
"-rpcwait -action- --exit--\n");
alltask_for_each(t, le, &all_tasks)
printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n",
t->tk_pid, t->tk_msg.rpc_proc, t->tk_flags, t->tk_status,
t->tk_client, t->tk_client->cl_prog,
t->tk_rqstp, t->tk_timeout,
t->tk_rpcwait ? rpc_qname(t->tk_rpcwait) : " <NULL> ",
t->tk_action, t->tk_exit);
spin_unlock(&rpc_sched_lock);
}
#endif
/svc.c
0,0 → 1,427
/*
* linux/net/sunrpc/svc.c
*
* High-level RPC service routines
*
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/
 
#define __KERNEL_SYSCALLS__
#include <linux/linkage.h>
#include <linux/sched.h>
#include <linux/errno.h>
#include <linux/net.h>
#include <linux/in.h>
#include <linux/unistd.h>
 
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/clnt.h>
 
#define RPCDBG_FACILITY RPCDBG_SVCDSP
#define RPC_PARANOIA 1
 
/*
* Create an RPC service
*/
struct svc_serv *
svc_create(struct svc_program *prog, unsigned int bufsize, unsigned int xdrsize)
{
struct svc_serv *serv;
 
if (!(serv = (struct svc_serv *) kmalloc(sizeof(*serv), GFP_KERNEL)))
return NULL;
 
memset(serv, 0, sizeof(*serv));
serv->sv_program = prog;
serv->sv_nrthreads = 1;
serv->sv_stats = prog->pg_stats;
serv->sv_bufsz = bufsize? bufsize : 4096;
serv->sv_xdrsize = xdrsize;
INIT_LIST_HEAD(&serv->sv_threads);
INIT_LIST_HEAD(&serv->sv_sockets);
INIT_LIST_HEAD(&serv->sv_tempsocks);
INIT_LIST_HEAD(&serv->sv_permsocks);
spin_lock_init(&serv->sv_lock);
 
serv->sv_name = prog->pg_name;
 
/* Remove any stale portmap registrations */
svc_register(serv, 0, 0);
 
return serv;
}
 
/*
* Destroy an RPC service
*/
void
svc_destroy(struct svc_serv *serv)
{
struct svc_sock *svsk;
 
dprintk("RPC: svc_destroy(%s, %d)\n",
serv->sv_program->pg_name,
serv->sv_nrthreads);
 
if (serv->sv_nrthreads) {
if (--(serv->sv_nrthreads) != 0) {
svc_sock_update_bufs(serv);
return;
}
} else
printk("svc_destroy: no threads for serv=%p!\n", serv);
 
while (!list_empty(&serv->sv_tempsocks)) {
svsk = list_entry(serv->sv_tempsocks.next,
struct svc_sock,
sk_list);
svc_delete_socket(svsk);
}
while (!list_empty(&serv->sv_permsocks)) {
svsk = list_entry(serv->sv_permsocks.next,
struct svc_sock,
sk_list);
svc_delete_socket(svsk);
}
 
/* Unregister service with the portmapper */
svc_register(serv, 0, 0);
kfree(serv);
}
 
/*
* Allocate an RPC server buffer
* Later versions may do nifty things by allocating multiple pages
* of memory directly and putting them into the bufp->iov.
*/
int
svc_init_buffer(struct svc_buf *bufp, unsigned int size)
{
if (!(bufp->area = (u32 *) kmalloc(size, GFP_KERNEL)))
return 0;
bufp->base = bufp->area;
bufp->buf = bufp->area;
bufp->len = 0;
bufp->buflen = size >> 2;
 
bufp->iov[0].iov_base = bufp->area;
bufp->iov[0].iov_len = size;
bufp->nriov = 1;
 
return 1;
}
 
/*
* Release an RPC server buffer
*/
void
svc_release_buffer(struct svc_buf *bufp)
{
kfree(bufp->area);
bufp->area = 0;
}
 
/*
* Create a server thread
*/
int
svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
{
struct svc_rqst *rqstp;
int error = -ENOMEM;
 
rqstp = kmalloc(sizeof(*rqstp), GFP_KERNEL);
if (!rqstp)
goto out;
 
memset(rqstp, 0, sizeof(*rqstp));
init_waitqueue_head(&rqstp->rq_wait);
 
if (!(rqstp->rq_argp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL))
|| !(rqstp->rq_resp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL))
|| !svc_init_buffer(&rqstp->rq_defbuf, serv->sv_bufsz))
goto out_thread;
 
serv->sv_nrthreads++;
rqstp->rq_server = serv;
error = kernel_thread((int (*)(void *)) func, rqstp, 0);
if (error < 0)
goto out_thread;
svc_sock_update_bufs(serv);
error = 0;
out:
return error;
 
out_thread:
svc_exit_thread(rqstp);
goto out;
}
 
/*
* Destroy an RPC server thread
*/
void
svc_exit_thread(struct svc_rqst *rqstp)
{
struct svc_serv *serv = rqstp->rq_server;
 
svc_release_buffer(&rqstp->rq_defbuf);
if (rqstp->rq_resp)
kfree(rqstp->rq_resp);
if (rqstp->rq_argp)
kfree(rqstp->rq_argp);
kfree(rqstp);
 
/* Release the server */
if (serv)
svc_destroy(serv);
}
 
/*
* Register an RPC service with the local portmapper.
* To unregister a service, call this routine with
* proto and port == 0.
*/
int
svc_register(struct svc_serv *serv, int proto, unsigned short port)
{
struct svc_program *progp;
unsigned long flags;
int i, error = 0, dummy;
 
progp = serv->sv_program;
 
dprintk("RPC: svc_register(%s, %s, %d)\n",
progp->pg_name, proto == IPPROTO_UDP? "udp" : "tcp", port);
 
if (!port)
current->sigpending = 0;
 
for (i = 0; i < progp->pg_nvers; i++) {
if (progp->pg_vers[i] == NULL)
continue;
error = rpc_register(progp->pg_prog, i, proto, port, &dummy);
if (error < 0)
break;
if (port && !dummy) {
error = -EACCES;
break;
}
}
 
if (!port) {
spin_lock_irqsave(&current->sigmask_lock, flags);
recalc_sigpending(current);
spin_unlock_irqrestore(&current->sigmask_lock, flags);
}
 
return error;
}
 
/*
* Process the RPC request.
*/
int
svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
{
struct svc_program *progp;
struct svc_version *versp = NULL; /* compiler food */
struct svc_procedure *procp = NULL;
struct svc_buf * argp = &rqstp->rq_argbuf;
struct svc_buf * resp = &rqstp->rq_resbuf;
kxdrproc_t xdr;
u32 *bufp, *statp;
u32 dir, prog, vers, proc,
auth_stat, rpc_stat;
 
rpc_stat = rpc_success;
bufp = argp->buf;
 
if (argp->len < 5)
goto err_short_len;
 
dir = ntohl(*bufp++);
vers = ntohl(*bufp++);
 
/* First words of reply: */
svc_putlong(resp, xdr_one); /* REPLY */
svc_putlong(resp, xdr_zero); /* ACCEPT */
 
if (dir != 0) /* direction != CALL */
goto err_bad_dir;
if (vers != 2) /* RPC version number */
goto err_bad_rpc;
 
rqstp->rq_prog = prog = ntohl(*bufp++); /* program number */
rqstp->rq_vers = vers = ntohl(*bufp++); /* version number */
rqstp->rq_proc = proc = ntohl(*bufp++); /* procedure number */
 
argp->buf += 5;
argp->len -= 5;
 
/* Used by nfsd to only allow the NULL procedure for amd. */
if (rqstp->rq_auth && !rqstp->rq_client && proc) {
auth_stat = rpc_autherr_badcred;
goto err_bad_auth;
}
 
/*
* Decode auth data, and add verifier to reply buffer.
* We do this before anything else in order to get a decent
* auth verifier.
*/
svc_authenticate(rqstp, &rpc_stat, &auth_stat);
 
if (rpc_stat != rpc_success)
goto err_garbage;
 
if (auth_stat != rpc_auth_ok)
goto err_bad_auth;
 
progp = serv->sv_program;
if (prog != progp->pg_prog)
goto err_bad_prog;
 
if (vers >= progp->pg_nvers ||
!(versp = progp->pg_vers[vers]))
goto err_bad_vers;
 
procp = versp->vs_proc + proc;
if (proc >= versp->vs_nproc || !procp->pc_func)
goto err_bad_proc;
rqstp->rq_server = serv;
rqstp->rq_procinfo = procp;
 
/* Syntactic check complete */
serv->sv_stats->rpccnt++;
 
/* Build the reply header. */
statp = resp->buf;
svc_putlong(resp, rpc_success); /* RPC_SUCCESS */
 
/* Bump per-procedure stats counter */
procp->pc_count++;
 
/* Initialize storage for argp and resp */
memset(rqstp->rq_argp, 0, procp->pc_argsize);
memset(rqstp->rq_resp, 0, procp->pc_ressize);
 
/* un-reserve some of the out-queue now that we have a
* better idea of reply size
*/
if (procp->pc_xdrressize)
svc_reserve(rqstp, procp->pc_xdrressize<<2);
 
/* Call the function that processes the request. */
if (!versp->vs_dispatch) {
/* Decode arguments */
xdr = procp->pc_decode;
if (xdr && !xdr(rqstp, rqstp->rq_argbuf.buf, rqstp->rq_argp))
goto err_garbage;
 
*statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
 
/* Encode reply */
if (*statp == rpc_success && (xdr = procp->pc_encode)
&& !xdr(rqstp, rqstp->rq_resbuf.buf, rqstp->rq_resp)) {
dprintk("svc: failed to encode reply\n");
/* serv->sv_stats->rpcsystemerr++; */
*statp = rpc_system_err;
}
} else {
dprintk("svc: calling dispatcher\n");
if (!versp->vs_dispatch(rqstp, statp)) {
/* Release reply info */
if (procp->pc_release)
procp->pc_release(rqstp, NULL, rqstp->rq_resp);
goto dropit;
}
}
 
/* Check RPC status result */
if (*statp != rpc_success)
resp->len = statp + 1 - resp->base;
 
/* Release reply info */
if (procp->pc_release)
procp->pc_release(rqstp, NULL, rqstp->rq_resp);
 
if (procp->pc_encode == NULL)
goto dropit;
sendit:
return svc_send(rqstp);
 
dropit:
dprintk("svc: svc_process dropit\n");
svc_drop(rqstp);
return 0;
 
err_short_len:
#ifdef RPC_PARANOIA
printk("svc: short len %d, dropping request\n", argp->len);
#endif
goto dropit; /* drop request */
 
err_bad_dir:
#ifdef RPC_PARANOIA
printk("svc: bad direction %d, dropping request\n", dir);
#endif
serv->sv_stats->rpcbadfmt++;
goto dropit; /* drop request */
 
err_bad_rpc:
serv->sv_stats->rpcbadfmt++;
resp->buf[-1] = xdr_one; /* REJECT */
svc_putlong(resp, xdr_zero); /* RPC_MISMATCH */
svc_putlong(resp, xdr_two); /* Only RPCv2 supported */
svc_putlong(resp, xdr_two);
goto sendit;
 
err_bad_auth:
dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
serv->sv_stats->rpcbadauth++;
resp->buf[-1] = xdr_one; /* REJECT */
svc_putlong(resp, xdr_one); /* AUTH_ERROR */
svc_putlong(resp, auth_stat); /* status */
goto sendit;
 
err_bad_prog:
#ifdef RPC_PARANOIA
if (prog != 100227 || progp->pg_prog != 100003)
printk("svc: unknown program %d (me %d)\n", prog, progp->pg_prog);
/* else it is just a Solaris client seeing if ACLs are supported */
#endif
serv->sv_stats->rpcbadfmt++;
svc_putlong(resp, rpc_prog_unavail);
goto sendit;
 
err_bad_vers:
#ifdef RPC_PARANOIA
if (vers)
printk("svc: unknown version (%d)\n", vers);
#endif
serv->sv_stats->rpcbadfmt++;
svc_putlong(resp, rpc_prog_mismatch);
svc_putlong(resp, htonl(progp->pg_lovers));
svc_putlong(resp, htonl(progp->pg_hivers));
goto sendit;
 
err_bad_proc:
#ifdef RPC_PARANOIA
printk("svc: unknown procedure (%d)\n", proc);
#endif
serv->sv_stats->rpcbadfmt++;
svc_putlong(resp, rpc_proc_unavail);
goto sendit;
 
err_garbage:
#ifdef RPC_PARANOIA
printk("svc: failed to decode args\n");
#endif
serv->sv_stats->rpcbadfmt++;
svc_putlong(resp, rpc_garbage_args);
goto sendit;
}
/auth_null.c
0,0 → 1,140
/*
* linux/net/sunrpc/rpcauth_null.c
*
* AUTH_NULL authentication. Really :-)
*
* Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
*/
 
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/utsname.h>
#include <linux/sunrpc/clnt.h>
 
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
 
static struct rpc_credops null_credops;
 
static struct rpc_auth *
nul_create(struct rpc_clnt *clnt)
{
struct rpc_auth *auth;
 
dprintk("RPC: creating NULL authenticator for client %p\n", clnt);
if (!(auth = (struct rpc_auth *) rpc_allocate(0, sizeof(*auth))))
return NULL;
auth->au_cslack = 4;
auth->au_rslack = 2;
auth->au_ops = &authnull_ops;
auth->au_expire = 1800 * HZ;
rpcauth_init_credcache(auth);
 
return (struct rpc_auth *) auth;
}
 
static void
nul_destroy(struct rpc_auth *auth)
{
dprintk("RPC: destroying NULL authenticator %p\n", auth);
rpcauth_free_credcache(auth);
rpc_free(auth);
}
 
/*
* Create NULL creds for current process
*/
static struct rpc_cred *
nul_create_cred(int flags)
{
struct rpc_cred *cred;
 
if (!(cred = (struct rpc_cred *) rpc_allocate(flags, sizeof(*cred))))
return NULL;
atomic_set(&cred->cr_count, 0);
cred->cr_flags = RPCAUTH_CRED_UPTODATE;
cred->cr_uid = current->uid;
cred->cr_ops = &null_credops;
 
return cred;
}
 
/*
* Destroy cred handle.
*/
static void
nul_destroy_cred(struct rpc_cred *cred)
{
rpc_free(cred);
}
 
/*
* Match cred handle against current process
*/
static int
nul_match(struct rpc_cred *cred, int taskflags)
{
return 1;
}
 
/*
* Marshal credential.
*/
static u32 *
nul_marshal(struct rpc_task *task, u32 *p, int ruid)
{
*p++ = htonl(RPC_AUTH_NULL);
*p++ = 0;
*p++ = htonl(RPC_AUTH_NULL);
*p++ = 0;
 
return p;
}
 
/*
* Refresh credential. This is a no-op for AUTH_NULL
*/
static int
nul_refresh(struct rpc_task *task)
{
return task->tk_status = -EACCES;
}
 
static u32 *
nul_validate(struct rpc_task *task, u32 *p)
{
u32 n = ntohl(*p++);
 
if (n != RPC_AUTH_NULL) {
printk("RPC: bad verf flavor: %ld\n", (unsigned long) n);
return NULL;
}
if ((n = ntohl(*p++)) != 0) {
printk("RPC: bad verf size: %ld\n", (unsigned long) n);
return NULL;
}
 
return p;
}
 
struct rpc_authops authnull_ops = {
RPC_AUTH_NULL,
#ifdef RPC_DEBUG
"NULL",
#endif
nul_create,
nul_destroy,
nul_create_cred
};
 
static
struct rpc_credops null_credops = {
nul_destroy_cred,
nul_match,
nul_marshal,
nul_refresh,
nul_validate
};
/Makefile
0,0 → 1,24
#
# Makefile for Linux kernel SUN RPC
#
# Note! Dependencies are done automagically by 'make dep', which also
# removes any old dependencies. DON'T put your own dependencies here
# unless it's something special (ie not a .c file).
#
# Note 2! The CFLAGS definition is now in the main makefile...
 
O_TARGET := sunrpc.o
 
export-objs := sunrpc_syms.o
 
obj-y := clnt.o xprt.o sched.o \
auth.o auth_null.o auth_unix.o \
svc.o svcsock.o svcauth.o \
pmap_clnt.o timer.o xdr.o sunrpc_syms.o
 
obj-$(CONFIG_PROC_FS) += stats.o
obj-$(CONFIG_SYSCTL) += sysctl.o
 
obj-m := $(O_TARGET)
 
include $(TOPDIR)/Rules.make
/xprt.c
0,0 → 1,1608
/*
* linux/net/sunrpc/xprt.c
*
* This is a generic RPC call interface supporting congestion avoidance,
* and asynchronous calls.
*
* The interface works like this:
*
* - When a process places a call, it allocates a request slot if
* one is available. Otherwise, it sleeps on the backlog queue
* (xprt_reserve).
* - Next, the caller puts together the RPC message, stuffs it into
* the request struct, and calls xprt_call().
* - xprt_call transmits the message and installs the caller on the
* socket's wait list. At the same time, it installs a timer that
* is run after the packet's timeout has expired.
* - When a packet arrives, the data_ready handler walks the list of
* pending requests for that socket. If a matching XID is found, the
* caller is woken up, and the timer removed.
* - When no reply arrives within the timeout interval, the timer is
* fired by the kernel and runs xprt_timer(). It either adjusts the
* timeout values (minor timeout) or wakes up the caller with a status
* of -ETIMEDOUT.
* - When the caller receives a notification from RPC that a reply arrived,
* it should release the RPC slot, and process the reply.
* If the call timed out, it may choose to retry the operation by
* adjusting the initial timeout value, and simply calling rpc_call
* again.
*
* Support for async RPC is done through a set of RPC-specific scheduling
* primitives that `transparently' work for processes as well as async
* tasks that rely on callbacks.
*
* Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de>
*
* TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com>
* TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com>
* TCP NFS related read + write fixes
* (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie>
*
* Rewrite of larges part of the code in order to stabilize TCP stuff.
* Fix behaviour when socket buffer is full.
* (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no>
*/
 
#define __KERNEL_SYSCALLS__
 
#include <linux/version.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/capability.h>
#include <linux/sched.h>
#include <linux/errno.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/net.h>
#include <linux/mm.h>
#include <linux/udp.h>
#include <linux/unistd.h>
#include <linux/sunrpc/clnt.h>
#include <linux/file.h>
 
#include <net/sock.h>
#include <net/checksum.h>
#include <net/udp.h>
#include <net/tcp.h>
 
#include <asm/uaccess.h>
 
/*
* Local variables
*/
 
#ifdef RPC_DEBUG
# undef RPC_DEBUG_DATA
# define RPCDBG_FACILITY RPCDBG_XPRT
#endif
 
#define XPRT_MAX_BACKOFF (8)
 
/*
* Local functions
*/
static void xprt_request_init(struct rpc_task *, struct rpc_xprt *);
static void do_xprt_transmit(struct rpc_task *);
static inline void do_xprt_reserve(struct rpc_task *);
static void xprt_disconnect(struct rpc_xprt *);
static void xprt_connect_status(struct rpc_task *task);
static struct socket *xprt_create_socket(int, struct rpc_timeout *, int);
static int xprt_bind_socket(struct rpc_xprt *, struct socket *);
static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
 
#ifdef RPC_DEBUG_DATA
/*
* Print the buffer contents (first 128 bytes only--just enough for
* diropres return).
*/
static void
xprt_pktdump(char *msg, u32 *packet, unsigned int count)
{
u8 *buf = (u8 *) packet;
int j;
 
dprintk("RPC: %s\n", msg);
for (j = 0; j < count && j < 128; j += 4) {
if (!(j & 31)) {
if (j)
dprintk("\n");
dprintk("0x%04x ", j);
}
dprintk("%02x%02x%02x%02x ",
buf[j], buf[j+1], buf[j+2], buf[j+3]);
}
dprintk("\n");
}
#else
static inline void
xprt_pktdump(char *msg, u32 *packet, unsigned int count)
{
/* NOP */
}
#endif
 
/*
* Look up RPC transport given an INET socket
*/
static inline struct rpc_xprt *
xprt_from_sock(struct sock *sk)
{
return (struct rpc_xprt *) sk->user_data;
}
 
/*
* Serialize write access to sockets, in order to prevent different
* requests from interfering with each other.
* Also prevents TCP socket connections from colliding with writes.
*/
static int
__xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
if (!xprt->snd_task) {
if (xprt->nocong || __xprt_get_cong(xprt, task)) {
xprt->snd_task = task;
if (req) {
req->rq_bytes_sent = 0;
req->rq_ntrans++;
}
}
}
if (xprt->snd_task != task) {
dprintk("RPC: %4d TCP write queue full\n", task->tk_pid);
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
if (req && req->rq_ntrans)
rpc_sleep_on(&xprt->resend, task, NULL, NULL);
else
rpc_sleep_on(&xprt->sending, task, NULL, NULL);
}
return xprt->snd_task == task;
}
 
static inline int
xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
{
int retval;
spin_lock_bh(&xprt->sock_lock);
retval = __xprt_lock_write(xprt, task);
spin_unlock_bh(&xprt->sock_lock);
return retval;
}
 
static void
__xprt_lock_write_next(struct rpc_xprt *xprt)
{
struct rpc_task *task;
 
if (xprt->snd_task)
return;
task = rpc_wake_up_next(&xprt->resend);
if (!task) {
if (!xprt->nocong && RPCXPRT_CONGESTED(xprt))
return;
task = rpc_wake_up_next(&xprt->sending);
if (!task)
return;
}
if (xprt->nocong || __xprt_get_cong(xprt, task)) {
struct rpc_rqst *req = task->tk_rqstp;
xprt->snd_task = task;
if (req) {
req->rq_bytes_sent = 0;
req->rq_ntrans++;
}
}
}
 
/*
* Releases the socket for use by other requests.
*/
static void
__xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
{
if (xprt->snd_task == task)
xprt->snd_task = NULL;
__xprt_lock_write_next(xprt);
}
 
static inline void
xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
{
spin_lock_bh(&xprt->sock_lock);
__xprt_release_write(xprt, task);
spin_unlock_bh(&xprt->sock_lock);
}
 
/*
* Write data to socket.
*/
static inline int
xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
struct socket *sock = xprt->sock;
struct msghdr msg;
struct xdr_buf *xdr = &req->rq_snd_buf;
struct iovec niv[MAX_IOVEC];
unsigned int niov, slen, skip;
mm_segment_t oldfs;
int result;
 
if (!sock)
return -ENOTCONN;
 
xprt_pktdump("packet data:",
req->rq_svec->iov_base,
req->rq_svec->iov_len);
 
/* Dont repeat bytes */
skip = req->rq_bytes_sent;
slen = xdr->len - skip;
oldfs = get_fs(); set_fs(get_ds());
do {
unsigned int slen_part, n;
 
niov = xdr_kmap(niv, xdr, skip);
if (!niov) {
result = -EAGAIN;
break;
}
 
msg.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL;
msg.msg_iov = niv;
msg.msg_iovlen = niov;
msg.msg_name = (struct sockaddr *) &xprt->addr;
msg.msg_namelen = sizeof(xprt->addr);
msg.msg_control = NULL;
msg.msg_controllen = 0;
 
slen_part = 0;
for (n = 0; n < niov; n++)
slen_part += niv[n].iov_len;
 
clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
result = sock_sendmsg(sock, &msg, slen_part);
 
xdr_kunmap(xdr, skip, niov);
 
skip += slen_part;
slen -= slen_part;
} while (result >= 0 && slen);
set_fs(oldfs);
 
dprintk("RPC: xprt_sendmsg(%d) = %d\n", slen, result);
 
if (result >= 0)
return result;
 
switch (result) {
case -ECONNREFUSED:
/* When the server has died, an ICMP port unreachable message
* prompts ECONNREFUSED.
*/
case -EAGAIN:
break;
case -ECONNRESET:
case -ENOTCONN:
case -EPIPE:
/* connection broken */
if (xprt->stream)
result = -ENOTCONN;
break;
default:
printk(KERN_NOTICE "RPC: sendmsg returned error %d\n", -result);
}
return result;
}
 
/*
* Van Jacobson congestion avoidance. Check if the congestion window
* overflowed. Put the task to sleep if this is the case.
*/
static int
__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
 
if (req->rq_cong)
return 1;
dprintk("RPC: %4d xprt_cwnd_limited cong = %ld cwnd = %ld\n",
task->tk_pid, xprt->cong, xprt->cwnd);
if (RPCXPRT_CONGESTED(xprt))
return 0;
req->rq_cong = 1;
xprt->cong += RPC_CWNDSCALE;
return 1;
}
 
/*
* Adjust the congestion window, and wake up the next task
* that has been sleeping due to congestion
*/
static void
__xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
if (!req->rq_cong)
return;
req->rq_cong = 0;
xprt->cong -= RPC_CWNDSCALE;
__xprt_lock_write_next(xprt);
}
 
/*
* Adjust RPC congestion window
* We use a time-smoothed congestion estimator to avoid heavy oscillation.
*/
static void
xprt_adjust_cwnd(struct rpc_xprt *xprt, int result)
{
unsigned long cwnd;
 
cwnd = xprt->cwnd;
if (result >= 0 && cwnd <= xprt->cong) {
/* The (cwnd >> 1) term makes sure
* the result gets rounded properly. */
cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd;
if (cwnd > RPC_MAXCWND)
cwnd = RPC_MAXCWND;
__xprt_lock_write_next(xprt);
} else if (result == -ETIMEDOUT) {
cwnd >>= 1;
if (cwnd < RPC_CWNDSCALE)
cwnd = RPC_CWNDSCALE;
}
dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n",
xprt->cong, xprt->cwnd, cwnd);
xprt->cwnd = cwnd;
}
 
/*
* Adjust timeout values etc for next retransmit
*/
int
xprt_adjust_timeout(struct rpc_timeout *to)
{
if (to->to_retries > 0) {
if (to->to_exponential)
to->to_current <<= 1;
else
to->to_current += to->to_increment;
if (to->to_maxval && to->to_current >= to->to_maxval)
to->to_current = to->to_maxval;
} else {
if (to->to_exponential)
to->to_initval <<= 1;
else
to->to_initval += to->to_increment;
if (to->to_maxval && to->to_initval >= to->to_maxval)
to->to_initval = to->to_maxval;
to->to_current = to->to_initval;
}
 
if (!to->to_current) {
printk(KERN_WARNING "xprt_adjust_timeout: to_current = 0!\n");
to->to_current = 5 * HZ;
}
pprintk("RPC: %lu %s\n", jiffies,
to->to_retries? "retrans" : "timeout");
return to->to_retries-- > 0;
}
 
/*
* Close down a transport socket
*/
static void
xprt_close(struct rpc_xprt *xprt)
{
struct socket *sock = xprt->sock;
struct sock *sk = xprt->inet;
 
if (!sk)
return;
 
write_lock_bh(&sk->callback_lock);
xprt->inet = NULL;
xprt->sock = NULL;
 
sk->user_data = NULL;
sk->data_ready = xprt->old_data_ready;
sk->state_change = xprt->old_state_change;
sk->write_space = xprt->old_write_space;
write_unlock_bh(&sk->callback_lock);
 
xprt_disconnect(xprt);
sk->no_check = 0;
 
sock_release(sock);
}
 
/*
* Mark a transport as disconnected
*/
static void
xprt_disconnect(struct rpc_xprt *xprt)
{
dprintk("RPC: disconnected transport %p\n", xprt);
spin_lock_bh(&xprt->sock_lock);
xprt_clear_connected(xprt);
rpc_wake_up_status(&xprt->pending, -ENOTCONN);
spin_unlock_bh(&xprt->sock_lock);
}
 
/*
* Reconnect a broken TCP connection.
*
*/
void
xprt_connect(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_xprt;
struct socket *sock = xprt->sock;
struct sock *inet;
int status;
 
dprintk("RPC: %4d xprt_connect %p connected %d\n",
task->tk_pid, xprt, xprt_connected(xprt));
if (xprt->shutdown)
return;
 
if (!xprt->addr.sin_port) {
task->tk_status = -EIO;
return;
}
 
if (!xprt_lock_write(xprt, task))
return;
if (xprt_connected(xprt))
goto out_write;
 
if (task->tk_rqstp)
task->tk_rqstp->rq_bytes_sent = 0;
 
xprt_close(xprt);
/* Create an unconnected socket */
sock = xprt_create_socket(xprt->prot, &xprt->timeout, xprt->resvport);
if (!sock) {
/* couldn't create socket or bind to reserved port;
* this is likely a permanent error, so cause an abort */
task->tk_status = -EIO;
goto out_write;
}
xprt_bind_socket(xprt, sock);
 
if (!xprt->stream)
goto out_write;
 
inet = sock->sk;
 
/* Now connect it asynchronously. */
dprintk("RPC: %4d connecting new socket\n", task->tk_pid);
status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
sizeof(xprt->addr), O_NONBLOCK);
dprintk("RPC: %4d connect status %d connected %d\n",
task->tk_pid, status, xprt_connected(xprt));
 
if (status >= 0)
return;
 
switch (status) {
case -EALREADY:
case -EINPROGRESS:
/* Protect against TCP socket state changes */
lock_sock(inet);
if (inet->state != TCP_ESTABLISHED) {
dprintk("RPC: %4d waiting for connection\n",
task->tk_pid);
task->tk_timeout = RPC_CONNECT_TIMEOUT;
/* if the socket is already closing, delay briefly */
if ((1<<inet->state) & ~(TCPF_SYN_SENT|TCPF_SYN_RECV))
task->tk_timeout = RPC_REESTABLISH_TIMEOUT;
rpc_sleep_on(&xprt->pending, task, xprt_connect_status,
NULL);
}
release_sock(inet);
break;
case -ECONNREFUSED:
case -ECONNRESET:
case -ENOTCONN:
if (!task->tk_client->cl_softrtry) {
rpc_delay(task, RPC_REESTABLISH_TIMEOUT);
task->tk_status = -ENOTCONN;
break;
}
default:
/* Report myriad other possible returns. If this file
* system is soft mounted, just error out, like Solaris. */
if (task->tk_client->cl_softrtry) {
printk(KERN_WARNING
"RPC: error %d connecting to server %s, exiting\n",
-status, task->tk_client->cl_server);
task->tk_status = -EIO;
goto out_write;
}
printk(KERN_WARNING "RPC: error %d connecting to server %s\n",
-status, task->tk_client->cl_server);
/* This will prevent anybody else from connecting */
rpc_delay(task, RPC_REESTABLISH_TIMEOUT);
task->tk_status = status;
break;
}
return;
out_write:
xprt_release_write(xprt, task);
}
 
/*
* We arrive here when awoken from waiting on connection establishment.
*/
static void
xprt_connect_status(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_xprt;
 
if (task->tk_status >= 0) {
dprintk("RPC: %4d xprt_connect_status: connection established\n",
task->tk_pid);
return;
}
 
/* if soft mounted, cause this RPC to fail */
if (task->tk_client->cl_softrtry)
task->tk_status = -EIO;
 
switch (task->tk_status) {
case -ENOTCONN:
rpc_delay(task, RPC_REESTABLISH_TIMEOUT);
return;
case -ETIMEDOUT:
dprintk("RPC: %4d xprt_connect_status: timed out\n",
task->tk_pid);
break;
default:
printk(KERN_ERR "RPC: error %d connecting to server %s\n",
-task->tk_status, task->tk_client->cl_server);
}
xprt_release_write(xprt, task);
}
 
/*
* Look up the RPC request corresponding to a reply, and then lock it.
*/
static inline struct rpc_rqst *
xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid)
{
struct list_head *pos;
struct rpc_rqst *req = NULL;
 
list_for_each(pos, &xprt->recv) {
struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list);
if (entry->rq_xid == xid) {
req = entry;
break;
}
}
return req;
}
 
/*
* Complete reply received.
* The TCP code relies on us to remove the request from xprt->pending.
*/
static void
xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied)
{
struct rpc_task *task = req->rq_task;
struct rpc_clnt *clnt = task->tk_client;
 
/* Adjust congestion window */
if (!xprt->nocong) {
int timer = rpcproc_timer(clnt, task->tk_msg.rpc_proc);
xprt_adjust_cwnd(xprt, copied);
__xprt_put_cong(xprt, req);
if (req->rq_ntrans == 1) {
if (timer)
rpc_update_rtt(&clnt->cl_rtt, timer, (long)jiffies - req->rq_xtime);
}
rpc_set_timeo(&clnt->cl_rtt, timer, req->rq_ntrans - 1);
}
 
#ifdef RPC_PROFILE
/* Profile only reads for now */
if (copied > 1024) {
static unsigned long nextstat = 0;
static unsigned long pkt_rtt = 0, pkt_len = 0, pkt_cnt = 0;
 
pkt_cnt++;
pkt_len += req->rq_slen + copied;
pkt_rtt += jiffies - req->rq_xtime;
if (time_before(nextstat, jiffies)) {
printk("RPC: %lu %ld cwnd\n", jiffies, xprt->cwnd);
printk("RPC: %ld %ld %ld %ld stat\n",
jiffies, pkt_cnt, pkt_len, pkt_rtt);
pkt_rtt = pkt_len = pkt_cnt = 0;
nextstat = jiffies + 5 * HZ;
}
}
#endif
 
dprintk("RPC: %4d has input (%d bytes)\n", task->tk_pid, copied);
req->rq_received = copied;
list_del_init(&req->rq_list);
 
/* ... and wake up the process. */
rpc_wake_up_task(task);
return;
}
 
static size_t
skb_read_bits(skb_reader_t *desc, void *to, size_t len)
{
if (len > desc->count)
len = desc->count;
skb_copy_bits(desc->skb, desc->offset, to, len);
desc->count -= len;
desc->offset += len;
return len;
}
 
static size_t
skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len)
{
unsigned int csum2, pos;
 
if (len > desc->count)
len = desc->count;
pos = desc->offset;
csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0);
desc->csum = csum_block_add(desc->csum, csum2, pos);
desc->count -= len;
desc->offset += len;
return len;
}
 
/*
* We have set things up such that we perform the checksum of the UDP
* packet in parallel with the copies into the RPC client iovec. -DaveM
*/
static int
csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
{
skb_reader_t desc;
 
desc.skb = skb;
desc.offset = sizeof(struct udphdr);
desc.count = skb->len - desc.offset;
 
if (skb->ip_summed == CHECKSUM_UNNECESSARY)
goto no_checksum;
 
desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits);
if (desc.offset != skb->len) {
unsigned int csum2;
csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0);
desc.csum = csum_block_add(desc.csum, csum2, desc.offset);
}
if ((unsigned short)csum_fold(desc.csum))
return -1;
return 0;
no_checksum:
xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits);
return 0;
}
 
/*
* Input handler for RPC replies. Called from a bottom half and hence
* atomic.
*/
static void
udp_data_ready(struct sock *sk, int len)
{
struct rpc_task *task;
struct rpc_xprt *xprt;
struct rpc_rqst *rovr;
struct sk_buff *skb;
int err, repsize, copied;
 
read_lock(&sk->callback_lock);
dprintk("RPC: udp_data_ready...\n");
if (sk->dead || !(xprt = xprt_from_sock(sk))) {
printk("RPC: udp_data_ready request not found!\n");
goto out;
}
 
dprintk("RPC: udp_data_ready client %p\n", xprt);
 
if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL)
goto out;
 
if (xprt->shutdown)
goto dropit;
 
repsize = skb->len - sizeof(struct udphdr);
if (repsize < 4) {
printk("RPC: impossible RPC reply size %d!\n", repsize);
goto dropit;
}
 
/* Look up and lock the request corresponding to the given XID */
spin_lock(&xprt->sock_lock);
rovr = xprt_lookup_rqst(xprt, *(u32 *) (skb->h.raw + sizeof(struct udphdr)));
if (!rovr)
goto out_unlock;
task = rovr->rq_task;
 
dprintk("RPC: %4d received reply\n", task->tk_pid);
xprt_pktdump("packet data:",
(u32 *) (skb->h.raw+sizeof(struct udphdr)), repsize);
 
if ((copied = rovr->rq_private_buf.len) > repsize)
copied = repsize;
 
/* Suck it into the iovec, verify checksum if not done by hw. */
if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb))
goto out_unlock;
 
/* Something worked... */
dst_confirm(skb->dst);
 
xprt_complete_rqst(xprt, rovr, copied);
 
out_unlock:
spin_unlock(&xprt->sock_lock);
dropit:
skb_free_datagram(sk, skb);
out:
if (sk->sleep && waitqueue_active(sk->sleep))
wake_up_interruptible(sk->sleep);
read_unlock(&sk->callback_lock);
}
 
/*
* Copy from an skb into memory and shrink the skb.
*/
static inline size_t
tcp_copy_data(skb_reader_t *desc, void *p, size_t len)
{
if (len > desc->count)
len = desc->count;
skb_copy_bits(desc->skb, desc->offset, p, len);
desc->offset += len;
desc->count -= len;
return len;
}
 
/*
* TCP read fragment marker
*/
static inline void
tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc)
{
size_t len, used;
char *p;
 
p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset;
len = sizeof(xprt->tcp_recm) - xprt->tcp_offset;
used = tcp_copy_data(desc, p, len);
xprt->tcp_offset += used;
if (used != len)
return;
xprt->tcp_reclen = ntohl(xprt->tcp_recm);
if (xprt->tcp_reclen & 0x80000000)
xprt->tcp_flags |= XPRT_LAST_FRAG;
else
xprt->tcp_flags &= ~XPRT_LAST_FRAG;
xprt->tcp_reclen &= 0x7fffffff;
xprt->tcp_flags &= ~XPRT_COPY_RECM;
xprt->tcp_offset = 0;
/* Sanity check of the record length */
if (xprt->tcp_reclen < 4) {
printk(KERN_ERR "RPC: Invalid TCP record fragment length\n");
xprt_disconnect(xprt);
}
dprintk("RPC: reading TCP record fragment of length %d\n",
xprt->tcp_reclen);
}
 
static void
tcp_check_recm(struct rpc_xprt *xprt)
{
if (xprt->tcp_offset == xprt->tcp_reclen) {
xprt->tcp_flags |= XPRT_COPY_RECM;
xprt->tcp_offset = 0;
if (xprt->tcp_flags & XPRT_LAST_FRAG) {
xprt->tcp_flags &= ~XPRT_COPY_DATA;
xprt->tcp_flags |= XPRT_COPY_XID;
xprt->tcp_copied = 0;
}
}
}
 
/*
* TCP read xid
*/
static inline void
tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc)
{
size_t len, used;
char *p;
 
len = sizeof(xprt->tcp_xid) - xprt->tcp_offset;
dprintk("RPC: reading XID (%Zu bytes)\n", len);
p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset;
used = tcp_copy_data(desc, p, len);
xprt->tcp_offset += used;
if (used != len)
return;
xprt->tcp_flags &= ~XPRT_COPY_XID;
xprt->tcp_flags |= XPRT_COPY_DATA;
xprt->tcp_copied = 4;
dprintk("RPC: reading reply for XID %08x\n", xprt->tcp_xid);
tcp_check_recm(xprt);
}
 
/*
* TCP read and complete request
*/
static inline void
tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc)
{
struct rpc_rqst *req;
struct xdr_buf *rcvbuf;
size_t len;
 
/* Find and lock the request corresponding to this xid */
spin_lock(&xprt->sock_lock);
req = xprt_lookup_rqst(xprt, xprt->tcp_xid);
if (!req) {
xprt->tcp_flags &= ~XPRT_COPY_DATA;
dprintk("RPC: XID %08x request not found!\n",
xprt->tcp_xid);
spin_unlock(&xprt->sock_lock);
return;
}
 
rcvbuf = &req->rq_private_buf;
len = desc->count;
if (len > xprt->tcp_reclen - xprt->tcp_offset) {
skb_reader_t my_desc;
 
len = xprt->tcp_reclen - xprt->tcp_offset;
memcpy(&my_desc, desc, sizeof(my_desc));
my_desc.count = len;
xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
&my_desc, tcp_copy_data);
desc->count -= len;
desc->offset += len;
} else
xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
desc, tcp_copy_data);
xprt->tcp_copied += len;
xprt->tcp_offset += len;
 
if (xprt->tcp_copied == req->rq_private_buf.len)
xprt->tcp_flags &= ~XPRT_COPY_DATA;
else if (xprt->tcp_offset == xprt->tcp_reclen) {
if (xprt->tcp_flags & XPRT_LAST_FRAG)
xprt->tcp_flags &= ~XPRT_COPY_DATA;
}
 
if (!(xprt->tcp_flags & XPRT_COPY_DATA)) {
dprintk("RPC: %4d received reply complete\n",
req->rq_task->tk_pid);
xprt_complete_rqst(xprt, req, xprt->tcp_copied);
}
spin_unlock(&xprt->sock_lock);
tcp_check_recm(xprt);
}
 
/*
* TCP discard extra bytes from a short read
*/
static inline void
tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc)
{
size_t len;
 
len = xprt->tcp_reclen - xprt->tcp_offset;
if (len > desc->count)
len = desc->count;
desc->count -= len;
desc->offset += len;
xprt->tcp_offset += len;
tcp_check_recm(xprt);
}
 
/*
* TCP record receive routine
* We first have to grab the record marker, then the XID, then the data.
*/
static int
tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
unsigned int offset, size_t len)
{
struct rpc_xprt *xprt = (struct rpc_xprt *)rd_desc->buf;
skb_reader_t desc = { skb, offset, len };
 
dprintk("RPC: tcp_data_recv\n");
do {
/* Read in a new fragment marker if necessary */
/* Can we ever really expect to get completely empty fragments? */
if (xprt->tcp_flags & XPRT_COPY_RECM) {
tcp_read_fraghdr(xprt, &desc);
continue;
}
/* Read in the xid if necessary */
if (xprt->tcp_flags & XPRT_COPY_XID) {
tcp_read_xid(xprt, &desc);
continue;
}
/* Read in the request data */
if (xprt->tcp_flags & XPRT_COPY_DATA) {
tcp_read_request(xprt, &desc);
continue;
}
/* Skip over any trailing bytes on short reads */
tcp_read_discard(xprt, &desc);
} while (desc.count);
dprintk("RPC: tcp_data_recv done\n");
return len - desc.count;
}
 
static void tcp_data_ready(struct sock *sk, int bytes)
{
struct rpc_xprt *xprt;
read_descriptor_t rd_desc;
 
read_lock(&sk->callback_lock);
dprintk("RPC: tcp_data_ready...\n");
if (!(xprt = xprt_from_sock(sk))) {
printk("RPC: tcp_data_ready socket info not found!\n");
goto out;
}
if (xprt->shutdown)
goto out;
 
/* We use rd_desc to pass struct xprt to tcp_data_recv */
rd_desc.buf = (char *)xprt;
rd_desc.count = 65536;
tcp_read_sock(sk, &rd_desc, tcp_data_recv);
out:
read_unlock(&sk->callback_lock);
}
 
static void
tcp_state_change(struct sock *sk)
{
struct rpc_xprt *xprt;
 
read_lock(&sk->callback_lock);
if (!(xprt = xprt_from_sock(sk)))
goto out;
dprintk("RPC: tcp_state_change client %p...\n", xprt);
dprintk("RPC: state %x conn %d dead %d zapped %d\n",
sk->state, xprt_connected(xprt),
sk->dead, sk->zapped);
 
switch (sk->state) {
case TCP_ESTABLISHED:
if (xprt_test_and_set_connected(xprt))
break;
 
/* Reset TCP record info */
xprt->tcp_offset = 0;
xprt->tcp_reclen = 0;
xprt->tcp_copied = 0;
xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID;
 
spin_lock_bh(&xprt->sock_lock);
if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending)
rpc_wake_up_task(xprt->snd_task);
spin_unlock_bh(&xprt->sock_lock);
break;
case TCP_SYN_SENT:
case TCP_SYN_RECV:
break;
default:
xprt_disconnect(xprt);
break;
}
out:
if (sk->sleep && waitqueue_active(sk->sleep))
wake_up_interruptible_all(sk->sleep);
read_unlock(&sk->callback_lock);
}
 
/*
* Called when more output buffer space is available for this socket.
* We try not to wake our writers until they can make "significant"
* progress, otherwise we'll waste resources thrashing sock_sendmsg
* with a bunch of small requests.
*/
static void
xprt_write_space(struct sock *sk)
{
struct rpc_xprt *xprt;
struct socket *sock;
 
read_lock(&sk->callback_lock);
if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->socket))
goto out;
if (xprt->shutdown)
goto out;
 
/* Wait until we have enough socket memory */
if (xprt->stream) {
/* from net/ipv4/tcp.c:tcp_write_space */
if (tcp_wspace(sk) < tcp_min_write_space(sk))
goto out;
} else {
/* from net/core/sock.c:sock_def_write_space */
if (!sock_writeable(sk))
goto out;
}
 
if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))
goto out;
 
spin_lock_bh(&xprt->sock_lock);
if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending)
rpc_wake_up_task(xprt->snd_task);
spin_unlock_bh(&xprt->sock_lock);
if (sk->sleep && waitqueue_active(sk->sleep))
wake_up_interruptible(sk->sleep);
out:
read_unlock(&sk->callback_lock);
}
 
/*
* RPC receive timeout handler.
*/
static void
xprt_timer(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
 
spin_lock(&xprt->sock_lock);
if (req->rq_received)
goto out;
 
xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT);
__xprt_put_cong(xprt, req);
 
dprintk("RPC: %4d xprt_timer (%s request)\n",
task->tk_pid, req ? "pending" : "backlogged");
 
task->tk_status = -ETIMEDOUT;
out:
task->tk_timeout = 0;
rpc_wake_up_task(task);
spin_unlock(&xprt->sock_lock);
}
 
/*
* Place the actual RPC call.
* We have to copy the iovec because sendmsg fiddles with its contents.
*/
void
xprt_transmit(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
 
dprintk("RPC: %4d xprt_transmit(%x)\n", task->tk_pid,
*(u32 *)(req->rq_svec[0].iov_base));
 
if (xprt->shutdown)
task->tk_status = -EIO;
 
if (task->tk_status < 0)
return;
 
if (task->tk_rpcwait)
rpc_remove_wait_queue(task);
 
/* set up everything as needed. */
/* Write the record marker */
if (xprt->stream) {
u32 *marker = req->rq_svec[0].iov_base;
 
*marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker)));
}
 
spin_lock_bh(&xprt->sock_lock);
if (req->rq_received != 0 && !req->rq_bytes_sent)
goto out_notrans;
 
if (!__xprt_lock_write(xprt, task))
goto out_notrans;
 
if (!xprt_connected(xprt)) {
task->tk_status = -ENOTCONN;
goto out_notrans;
}
 
if (list_empty(&req->rq_list)) {
/* Update the softirq receive buffer */
memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
sizeof(req->rq_private_buf));
list_add_tail(&req->rq_list, &xprt->recv);
}
spin_unlock_bh(&xprt->sock_lock);
 
do_xprt_transmit(task);
return;
out_notrans:
spin_unlock_bh(&xprt->sock_lock);
}
 
static void
do_xprt_transmit(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
int status, retry = 0;
 
 
/* Continue transmitting the packet/record. We must be careful
* to cope with writespace callbacks arriving _after_ we have
* called xprt_sendmsg().
*/
while (1) {
req->rq_xtime = jiffies;
status = xprt_sendmsg(xprt, req);
 
if (status < 0)
break;
 
if (xprt->stream) {
req->rq_bytes_sent += status;
 
/* If we've sent the entire packet, immediately
* reset the count of bytes sent. */
if (req->rq_bytes_sent >= req->rq_slen) {
req->rq_bytes_sent = 0;
goto out_receive;
}
} else {
if (status >= req->rq_slen)
goto out_receive;
status = -EAGAIN;
break;
}
 
dprintk("RPC: %4d xmit incomplete (%d left of %d)\n",
task->tk_pid, req->rq_slen - req->rq_bytes_sent,
req->rq_slen);
 
status = -EAGAIN;
if (retry++ > 50)
break;
}
 
/* If we're doing a resend and have received a reply already,
* then exit early.
* Note, though, that we can't do this if we've already started
* resending down a TCP stream.
*/
task->tk_status = status;
 
switch (status) {
case -EAGAIN:
if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) {
/* Protect against races with xprt_write_space */
spin_lock_bh(&xprt->sock_lock);
/* Don't race with disconnect */
if (!xprt_connected(xprt))
task->tk_status = -ENOTCONN;
else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) {
task->tk_timeout = req->rq_timeout.to_current;
rpc_sleep_on(&xprt->pending, task, NULL, NULL);
}
spin_unlock_bh(&xprt->sock_lock);
return;
}
/* Keep holding the socket if it is blocked */
rpc_delay(task, HZ>>4);
return;
case -ECONNREFUSED:
task->tk_timeout = RPC_REESTABLISH_TIMEOUT;
rpc_sleep_on(&xprt->sending, task, NULL, NULL);
case -ENOTCONN:
return;
default:
if (xprt->stream)
xprt_disconnect(xprt);
}
xprt_release_write(xprt, task);
return;
out_receive:
dprintk("RPC: %4d xmit complete\n", task->tk_pid);
spin_lock_bh(&xprt->sock_lock);
/* Set the task's receive timeout value */
if (!xprt->nocong) {
int timer = rpcproc_timer(clnt, task->tk_msg.rpc_proc);
task->tk_timeout = rpc_calc_rto(&clnt->cl_rtt, timer);
task->tk_timeout <<= rpc_ntimeo(&clnt->cl_rtt, timer);
task->tk_timeout <<= clnt->cl_timeout.to_retries
- req->rq_timeout.to_retries;
if (task->tk_timeout > req->rq_timeout.to_maxval)
task->tk_timeout = req->rq_timeout.to_maxval;
} else
task->tk_timeout = req->rq_timeout.to_current;
/* Don't race with disconnect */
if (!xprt_connected(xprt))
task->tk_status = -ENOTCONN;
else if (!req->rq_received)
rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer);
__xprt_release_write(xprt, task);
spin_unlock_bh(&xprt->sock_lock);
}
 
/*
* Reserve an RPC call slot.
*/
void
xprt_reserve(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_xprt;
 
task->tk_status = -EIO;
if (!xprt->shutdown) {
spin_lock(&xprt->xprt_lock);
do_xprt_reserve(task);
spin_unlock(&xprt->xprt_lock);
}
}
 
static inline void
do_xprt_reserve(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_xprt;
 
task->tk_status = 0;
if (task->tk_rqstp)
return;
if (xprt->free) {
struct rpc_rqst *req = xprt->free;
xprt->free = req->rq_next;
req->rq_next = NULL;
task->tk_rqstp = req;
xprt_request_init(task, xprt);
return;
}
dprintk("RPC: waiting for request slot\n");
task->tk_status = -EAGAIN;
task->tk_timeout = 0;
rpc_sleep_on(&xprt->backlog, task, NULL, NULL);
}
 
/*
* Allocate a 'unique' XID
*/
static u32
xprt_alloc_xid(void)
{
static spinlock_t xid_lock = SPIN_LOCK_UNLOCKED;
static int need_init = 1;
static u32 xid;
u32 ret;
 
spin_lock(&xid_lock);
if (unlikely(need_init)) {
xid = CURRENT_TIME << 12;
need_init = 0;
}
ret = xid++;
spin_unlock(&xid_lock);
return ret;
}
 
/*
* Initialize RPC request
*/
static void
xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
{
struct rpc_rqst *req = task->tk_rqstp;
 
req->rq_timeout = xprt->timeout;
req->rq_task = task;
req->rq_xprt = xprt;
req->rq_xid = xprt_alloc_xid();
INIT_LIST_HEAD(&req->rq_list);
dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
req, req->rq_xid);
}
 
/*
* Release an RPC call slot
*/
void
xprt_release(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_xprt;
struct rpc_rqst *req;
 
if (!(req = task->tk_rqstp))
return;
spin_lock_bh(&xprt->sock_lock);
__xprt_release_write(xprt, task);
__xprt_put_cong(xprt, req);
if (!list_empty(&req->rq_list))
list_del(&req->rq_list);
spin_unlock_bh(&xprt->sock_lock);
task->tk_rqstp = NULL;
memset(req, 0, sizeof(*req)); /* mark unused */
 
dprintk("RPC: %4d release request %p\n", task->tk_pid, req);
 
spin_lock(&xprt->xprt_lock);
req->rq_next = xprt->free;
xprt->free = req;
 
xprt_clear_backlog(xprt);
spin_unlock(&xprt->xprt_lock);
}
 
/*
* Set default timeout parameters
*/
void
xprt_default_timeout(struct rpc_timeout *to, int proto)
{
if (proto == IPPROTO_UDP)
xprt_set_timeout(to, 5, 5 * HZ);
else
xprt_set_timeout(to, 5, 60 * HZ);
}
 
/*
* Set constant timeout
*/
void
xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr)
{
to->to_current =
to->to_initval =
to->to_increment = incr;
to->to_maxval = incr * retr;
to->to_retries = retr;
to->to_exponential = 0;
}
 
/*
* Initialize an RPC client
*/
static struct rpc_xprt *
xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to)
{
struct rpc_xprt *xprt;
struct rpc_rqst *req;
int i;
 
dprintk("RPC: setting up %s transport...\n",
proto == IPPROTO_UDP? "UDP" : "TCP");
 
if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL)
return NULL;
memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */
 
xprt->addr = *ap;
xprt->prot = proto;
xprt->stream = (proto == IPPROTO_TCP)? 1 : 0;
if (xprt->stream) {
xprt->cwnd = RPC_MAXCWND;
xprt->nocong = 1;
} else
xprt->cwnd = RPC_INITCWND;
spin_lock_init(&xprt->sock_lock);
spin_lock_init(&xprt->xprt_lock);
init_waitqueue_head(&xprt->cong_wait);
 
INIT_LIST_HEAD(&xprt->recv);
 
/* Set timeout parameters */
if (to) {
xprt->timeout = *to;
xprt->timeout.to_current = to->to_initval;
} else
xprt_default_timeout(&xprt->timeout, xprt->prot);
 
INIT_RPC_WAITQ(&xprt->pending, "xprt_pending");
INIT_RPC_WAITQ(&xprt->sending, "xprt_sending");
INIT_RPC_WAITQ(&xprt->resend, "xprt_resend");
INIT_RPC_WAITQ(&xprt->backlog, "xprt_backlog");
 
/* initialize free list */
for (i = 0, req = xprt->slot; i < RPC_MAXREQS-1; i++, req++)
req->rq_next = req + 1;
req->rq_next = NULL;
xprt->free = xprt->slot;
 
/* Check whether we want to use a reserved port */
xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
 
dprintk("RPC: created transport %p\n", xprt);
return xprt;
}
 
/*
* Bind to a reserved port
*/
static inline int
xprt_bindresvport(struct socket *sock)
{
struct sockaddr_in myaddr;
int err, port;
kernel_cap_t saved_cap = current->cap_effective;
 
/* Override capabilities.
* They were checked in xprt_create_proto i.e. at mount time
*/
cap_raise (current->cap_effective, CAP_NET_BIND_SERVICE);
 
memset(&myaddr, 0, sizeof(myaddr));
myaddr.sin_family = AF_INET;
port = 800;
do {
myaddr.sin_port = htons(port);
err = sock->ops->bind(sock, (struct sockaddr *) &myaddr,
sizeof(myaddr));
} while (err == -EADDRINUSE && --port > 0);
current->cap_effective = saved_cap;
 
if (err < 0)
printk("RPC: Can't bind to reserved port (%d).\n", -err);
 
return err;
}
 
static int
xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock)
{
struct sock *sk = sock->sk;
 
if (xprt->inet)
return -EBUSY;
 
write_lock_bh(&sk->callback_lock);
sk->user_data = xprt;
xprt->old_data_ready = sk->data_ready;
xprt->old_state_change = sk->state_change;
xprt->old_write_space = sk->write_space;
if (xprt->prot == IPPROTO_UDP) {
sk->data_ready = udp_data_ready;
sk->no_check = UDP_CSUM_NORCV;
xprt_set_connected(xprt);
} else {
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
tp->nonagle = 1; /* disable Nagle's algorithm */
sk->data_ready = tcp_data_ready;
sk->state_change = tcp_state_change;
xprt_clear_connected(xprt);
}
sk->write_space = xprt_write_space;
 
/* Reset to new socket */
xprt->sock = sock;
xprt->inet = sk;
write_unlock_bh(&sk->callback_lock);
 
return 0;
}
 
/*
* Set socket buffer length
*/
void
xprt_sock_setbufsize(struct rpc_xprt *xprt)
{
struct sock *sk = xprt->inet;
 
if (xprt->stream)
return;
if (xprt->rcvsize) {
sk->userlocks |= SOCK_RCVBUF_LOCK;
sk->rcvbuf = xprt->rcvsize * RPC_MAXCONG * 2;
}
if (xprt->sndsize) {
sk->userlocks |= SOCK_SNDBUF_LOCK;
sk->sndbuf = xprt->sndsize * RPC_MAXCONG * 2;
sk->write_space(sk);
}
}
 
/*
* Create a client socket given the protocol and peer address.
*/
static struct socket *
xprt_create_socket(int proto, struct rpc_timeout *to, int resvport)
{
struct socket *sock;
int type, err;
 
dprintk("RPC: xprt_create_socket(%s %d)\n",
(proto == IPPROTO_UDP)? "udp" : "tcp", proto);
 
type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
 
if ((err = sock_create(PF_INET, type, proto, &sock)) < 0) {
printk("RPC: can't create socket (%d).\n", -err);
goto failed;
}
 
/* bind to a reserved port */
if (resvport && xprt_bindresvport(sock) < 0)
goto failed;
 
return sock;
 
failed:
sock_release(sock);
return NULL;
}
 
/*
* Create an RPC client transport given the protocol and peer address.
*/
struct rpc_xprt *
xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to)
{
struct rpc_xprt *xprt;
 
xprt = xprt_setup(proto, sap, to);
if (!xprt)
goto out_bad;
 
dprintk("RPC: xprt_create_proto created xprt %p\n", xprt);
return xprt;
out_bad:
dprintk("RPC: xprt_create_proto failed\n");
if (xprt)
kfree(xprt);
return NULL;
}
 
/*
* Prepare for transport shutdown.
*/
void
xprt_shutdown(struct rpc_xprt *xprt)
{
xprt->shutdown = 1;
rpc_wake_up(&xprt->sending);
rpc_wake_up(&xprt->resend);
rpc_wake_up(&xprt->pending);
rpc_wake_up(&xprt->backlog);
if (waitqueue_active(&xprt->cong_wait))
wake_up(&xprt->cong_wait);
}
 
/*
* Clear the xprt backlog queue
*/
int
xprt_clear_backlog(struct rpc_xprt *xprt) {
rpc_wake_up_next(&xprt->backlog);
if (waitqueue_active(&xprt->cong_wait))
wake_up(&xprt->cong_wait);
return 1;
}
 
/*
* Destroy an RPC transport, killing off all requests.
*/
int
xprt_destroy(struct rpc_xprt *xprt)
{
dprintk("RPC: destroying transport %p\n", xprt);
xprt_shutdown(xprt);
xprt_close(xprt);
kfree(xprt);
 
return 0;
}
/xdr.c
0,0 → 1,554
/*
* linux/net/sunrpc/xdr.c
*
* Generic XDR support.
*
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/
 
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/pagemap.h>
#include <linux/errno.h>
#include <linux/in.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/msg_prot.h>
 
/*
* XDR functions for basic NFS types
*/
u32 *
xdr_encode_netobj(u32 *p, const struct xdr_netobj *obj)
{
unsigned int quadlen = XDR_QUADLEN(obj->len);
 
p[quadlen] = 0; /* zero trailing bytes */
*p++ = htonl(obj->len);
memcpy(p, obj->data, obj->len);
return p + XDR_QUADLEN(obj->len);
}
 
u32 *
xdr_decode_netobj_fixed(u32 *p, void *obj, unsigned int len)
{
if (ntohl(*p++) != len)
return NULL;
memcpy(obj, p, len);
return p + XDR_QUADLEN(len);
}
 
u32 *
xdr_decode_netobj(u32 *p, struct xdr_netobj *obj)
{
unsigned int len;
 
if ((len = ntohl(*p++)) > XDR_MAX_NETOBJ)
return NULL;
obj->len = len;
obj->data = (u8 *) p;
return p + XDR_QUADLEN(len);
}
 
u32 *
xdr_encode_array(u32 *p, const char *array, unsigned int len)
{
int quadlen = XDR_QUADLEN(len);
 
p[quadlen] = 0;
*p++ = htonl(len);
memcpy(p, array, len);
return p + quadlen;
}
 
u32 *
xdr_encode_string(u32 *p, const char *string)
{
return xdr_encode_array(p, string, strlen(string));
}
 
u32 *
xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen)
{
unsigned int len;
char *string;
 
if ((len = ntohl(*p++)) > maxlen)
return NULL;
if (lenp)
*lenp = len;
if ((len % 4) != 0) {
string = (char *) p;
} else {
string = (char *) (p - 1);
memmove(string, p, len);
}
string[len] = '\0';
*sp = string;
return p + XDR_QUADLEN(len);
}
 
u32 *
xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen)
{
unsigned int len;
 
if ((len = ntohl(*p++)) > maxlen)
return NULL;
*lenp = len;
*sp = (char *) p;
return p + XDR_QUADLEN(len);
}
 
 
void
xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
unsigned int len)
{
xdr->pages = pages;
xdr->page_base = base;
xdr->page_len = len;
 
if (len & 3) {
struct iovec *iov = xdr->tail;
unsigned int pad = 4 - (len & 3);
 
iov->iov_base = (void *) "\0\0\0";
iov->iov_len = pad;
len += pad;
}
xdr->len += len;
}
 
void
xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
struct page **pages, unsigned int base, unsigned int len)
{
struct iovec *head = xdr->head;
struct iovec *tail = xdr->tail;
char *buf = (char *)head->iov_base;
unsigned int buflen = head->iov_len;
 
head->iov_len = offset;
 
xdr->pages = pages;
xdr->page_base = base;
xdr->page_len = len;
 
tail->iov_base = buf + offset;
tail->iov_len = buflen - offset;
 
xdr->len += len;
}
 
/*
* Realign the iovec if the server missed out some reply elements
* (such as post-op attributes,...)
* Note: This is a simple implementation that assumes that
* len <= iov->iov_len !!!
* The RPC header (assumed to be the 1st element in the iov array)
* is not shifted.
*/
void xdr_shift_iovec(struct iovec *iov, int nr, size_t len)
{
struct iovec *pvec;
 
for (pvec = iov + nr - 1; nr > 1; nr--, pvec--) {
struct iovec *svec = pvec - 1;
 
if (len > pvec->iov_len) {
printk(KERN_DEBUG "RPC: Urk! Large shift of short iovec.\n");
return;
}
memmove((char *)pvec->iov_base + len, pvec->iov_base,
pvec->iov_len - len);
 
if (len > svec->iov_len) {
printk(KERN_DEBUG "RPC: Urk! Large shift of short iovec.\n");
return;
}
memcpy(pvec->iov_base,
(char *)svec->iov_base + svec->iov_len - len, len);
}
}
 
/*
* Map a struct xdr_buf into an iovec array.
*/
int xdr_kmap(struct iovec *iov_base, struct xdr_buf *xdr, unsigned int base)
{
struct iovec *iov = iov_base;
struct page **ppage = xdr->pages;
struct page **first_kmap = NULL;
unsigned int len, pglen = xdr->page_len;
 
len = xdr->head[0].iov_len;
if (base < len) {
iov->iov_len = len - base;
iov->iov_base = (char *)xdr->head[0].iov_base + base;
iov++;
base = 0;
} else
base -= len;
 
if (pglen == 0)
goto map_tail;
if (base >= pglen) {
base -= pglen;
goto map_tail;
}
if (base || xdr->page_base) {
pglen -= base;
base += xdr->page_base;
ppage += base >> PAGE_CACHE_SHIFT;
base &= ~PAGE_CACHE_MASK;
}
do {
len = PAGE_CACHE_SIZE;
if (!first_kmap) {
first_kmap = ppage;
iov->iov_base = kmap(*ppage);
} else {
iov->iov_base = kmap_nonblock(*ppage);
if (!iov->iov_base)
goto out_err;
}
if (base) {
iov->iov_base += base;
len -= base;
base = 0;
}
if (pglen < len)
len = pglen;
iov->iov_len = len;
iov++;
ppage++;
} while ((pglen -= len) != 0);
map_tail:
if (xdr->tail[0].iov_len) {
iov->iov_len = xdr->tail[0].iov_len - base;
iov->iov_base = (char *)xdr->tail[0].iov_base + base;
iov++;
}
return (iov - iov_base);
out_err:
for (; first_kmap != ppage; first_kmap++)
kunmap(*first_kmap);
return 0;
}
 
void xdr_kunmap(struct xdr_buf *xdr, unsigned int base, int niov)
{
struct page **ppage = xdr->pages;
unsigned int pglen = xdr->page_len;
 
if (!pglen)
return;
if (base >= xdr->head[0].iov_len)
base -= xdr->head[0].iov_len;
else {
niov--;
base = 0;
}
 
if (base >= pglen)
return;
if (base || xdr->page_base) {
pglen -= base;
base += xdr->page_base;
ppage += base >> PAGE_CACHE_SHIFT;
/* Note: The offset means that the length of the first
* page is really (PAGE_CACHE_SIZE - (base & ~PAGE_CACHE_MASK)).
* In order to avoid an extra test inside the loop,
* we bump pglen here, and just subtract PAGE_CACHE_SIZE... */
pglen += base & ~PAGE_CACHE_MASK;
}
/*
* In case we could only do a partial xdr_kmap, all remaining iovecs
* refer to pages. Otherwise we detect the end through pglen.
*/
for (; niov; niov--) {
flush_dcache_page(*ppage);
kunmap(*ppage);
if (pglen <= PAGE_CACHE_SIZE)
break;
pglen -= PAGE_CACHE_SIZE;
ppage++;
}
}
 
void
xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base,
skb_reader_t *desc,
skb_read_actor_t copy_actor)
{
struct page **ppage = xdr->pages;
unsigned int len, pglen = xdr->page_len;
int ret;
 
len = xdr->head[0].iov_len;
if (base < len) {
len -= base;
ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len);
if (ret != len || !desc->count)
return;
base = 0;
} else
base -= len;
 
if (pglen == 0)
goto copy_tail;
if (base >= pglen) {
base -= pglen;
goto copy_tail;
}
if (base || xdr->page_base) {
pglen -= base;
base += xdr->page_base;
ppage += base >> PAGE_CACHE_SHIFT;
base &= ~PAGE_CACHE_MASK;
}
do {
char *kaddr;
 
len = PAGE_CACHE_SIZE;
kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA);
if (base) {
len -= base;
if (pglen < len)
len = pglen;
ret = copy_actor(desc, kaddr + base, len);
base = 0;
} else {
if (pglen < len)
len = pglen;
ret = copy_actor(desc, kaddr, len);
}
kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA);
if (ret != len || !desc->count)
return;
ppage++;
} while ((pglen -= len) != 0);
copy_tail:
len = xdr->tail[0].iov_len;
if (len)
copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len);
}
 
/*
* Helper routines for doing 'memmove' like operations on a struct xdr_buf
*
* _shift_data_right_pages
* @pages: vector of pages containing both the source and dest memory area.
* @pgto_base: page vector address of destination
* @pgfrom_base: page vector address of source
* @len: number of bytes to copy
*
* Note: the addresses pgto_base and pgfrom_base are both calculated in
* the same way:
* if a memory area starts at byte 'base' in page 'pages[i]',
* then its address is given as (i << PAGE_CACHE_SHIFT) + base
* Also note: pgfrom_base must be < pgto_base, but the memory areas
* they point to may overlap.
*/
static void
_shift_data_right_pages(struct page **pages, size_t pgto_base,
size_t pgfrom_base, size_t len)
{
struct page **pgfrom, **pgto;
char *vfrom, *vto;
size_t copy;
 
BUG_ON(pgto_base <= pgfrom_base);
 
pgto_base += len;
pgfrom_base += len;
 
pgto = pages + (pgto_base >> PAGE_CACHE_SHIFT);
pgfrom = pages + (pgfrom_base >> PAGE_CACHE_SHIFT);
 
pgto_base &= ~PAGE_CACHE_MASK;
pgfrom_base &= ~PAGE_CACHE_MASK;
 
do {
/* Are any pointers crossing a page boundary? */
if (pgto_base == 0) {
pgto_base = PAGE_CACHE_SIZE;
pgto--;
}
if (pgfrom_base == 0) {
pgfrom_base = PAGE_CACHE_SIZE;
pgfrom--;
}
 
copy = len;
if (copy > pgto_base)
copy = pgto_base;
if (copy > pgfrom_base)
copy = pgfrom_base;
pgto_base -= copy;
pgfrom_base -= copy;
 
vto = kmap_atomic(*pgto, KM_USER0);
vfrom = kmap_atomic(*pgfrom, KM_USER1);
memmove(vto + pgto_base, vfrom + pgfrom_base, copy);
kunmap_atomic(vfrom, KM_USER1);
kunmap_atomic(vto, KM_USER0);
 
} while ((len -= copy) != 0);
}
 
/*
* _copy_to_pages
* @pages: array of pages
* @pgbase: page vector address of destination
* @p: pointer to source data
* @len: length
*
* Copies data from an arbitrary memory location into an array of pages
* The copy is assumed to be non-overlapping.
*/
static void
_copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len)
{
struct page **pgto;
char *vto;
size_t copy;
 
pgto = pages + (pgbase >> PAGE_CACHE_SHIFT);
pgbase &= ~PAGE_CACHE_MASK;
 
do {
copy = PAGE_CACHE_SIZE - pgbase;
if (copy > len)
copy = len;
 
vto = kmap_atomic(*pgto, KM_USER0);
memcpy(vto + pgbase, p, copy);
kunmap_atomic(vto, KM_USER0);
 
pgbase += copy;
if (pgbase == PAGE_CACHE_SIZE) {
pgbase = 0;
pgto++;
}
p += copy;
 
} while ((len -= copy) != 0);
}
 
/*
* _copy_from_pages
* @p: pointer to destination
* @pages: array of pages
* @pgbase: offset of source data
* @len: length
*
* Copies data into an arbitrary memory location from an array of pages
* The copy is assumed to be non-overlapping.
*/
static void
_copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len)
{
struct page **pgfrom;
char *vfrom;
size_t copy;
 
pgfrom = pages + (pgbase >> PAGE_CACHE_SHIFT);
pgbase &= ~PAGE_CACHE_MASK;
 
do {
copy = PAGE_CACHE_SIZE - pgbase;
if (copy > len)
copy = len;
 
vfrom = kmap_atomic(*pgfrom, KM_USER0);
memcpy(p, vfrom + pgbase, copy);
kunmap_atomic(vfrom, KM_USER0);
 
pgbase += copy;
if (pgbase == PAGE_CACHE_SIZE) {
pgbase = 0;
pgfrom++;
}
p += copy;
 
} while ((len -= copy) != 0);
}
 
/*
* xdr_shrink_bufhead
* @buf: xdr_buf
* @len: bytes to remove from buf->head[0]
*
* Shrinks XDR buffer's header iovec buf->head[0] by
* 'len' bytes. The extra data is not lost, but is instead
* moved into the inlined pages and/or the tail.
*/
void
xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
{
struct iovec *head, *tail;
size_t copy, offs;
unsigned int pglen = buf->page_len;
 
tail = buf->tail;
head = buf->head;
BUG_ON (len > head->iov_len);
 
/* Shift the tail first */
if (tail->iov_len != 0) {
if (tail->iov_len > len) {
copy = tail->iov_len - len;
memmove((char *)tail->iov_base + len,
tail->iov_base, copy);
}
/* Copy from the inlined pages into the tail */
copy = len;
if (copy > pglen)
copy = pglen;
offs = len - copy;
if (offs >= tail->iov_len)
copy = 0;
else if (copy > tail->iov_len - offs)
copy = tail->iov_len - offs;
if (copy != 0)
_copy_from_pages((char *)tail->iov_base + offs,
buf->pages,
buf->page_base + pglen + offs - len,
copy);
/* Do we also need to copy data from the head into the tail ? */
if (len > pglen) {
offs = copy = len - pglen;
if (copy > tail->iov_len)
copy = tail->iov_len;
memcpy(tail->iov_base,
(char *)head->iov_base +
head->iov_len - offs,
copy);
}
}
/* Now handle pages */
if (pglen != 0) {
if (pglen > len)
_shift_data_right_pages(buf->pages,
buf->page_base + len,
buf->page_base,
pglen - len);
copy = len;
if (len > pglen)
copy = pglen;
_copy_to_pages(buf->pages, buf->page_base,
(char *)head->iov_base + head->iov_len - len,
copy);
}
head->iov_len -= len;
buf->len -= len;
}
 
void
xdr_shift_buf(struct xdr_buf *buf, size_t len)
{
xdr_shrink_bufhead(buf, len);
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.