URL
https://opencores.org/ocsvn/or1k_old/or1k_old/trunk
Subversion Repositories or1k_old
Compare Revisions
- This comparison shows the changes necessary to convert path
/or1k_old/trunk/rc203soc/sw/uClinux/ipc
- from Rev 1765 to Rev 1782
- ↔ Reverse comparison
Rev 1765 → Rev 1782
/sem.c
0,0 → 1,708
/* |
* linux/ipc/sem.c |
* Copyright (C) 1992 Krishna Balasubramanian |
* Copyright (C) 1995 Eric Schenk, Bruno Haible |
* |
* IMPLEMENTATION NOTES ON CODE REWRITE (Eric Schenk, January 1995): |
* This code underwent a massive rewrite in order to solve some problems |
* with the original code. In particular the original code failed to |
* wake up processes that were waiting for semval to go to 0 if the |
* value went to 0 and was then incremented rapidly enough. In solving |
* this problem I have also modified the implementation so that it |
* processes pending operations in a FIFO manner, thus give a guarantee |
* that processes waiting for a lock on the semaphore won't starve |
* unless another locking process fails to unlock. |
* In addition the following two changes in behavior have been introduced: |
* - The original implementation of semop returned the value |
* last semaphore element examined on success. This does not |
* match the manual page specifications, and effectively |
* allows the user to read the semaphore even if they do not |
* have read permissions. The implementation now returns 0 |
* on success as stated in the manual page. |
* - There is some confusion over whether the set of undo adjustments |
* to be performed at exit should be done in an atomic manner. |
* That is, if we are attempting to decrement the semval should we queue |
* up and wait until we can do so legally? |
* The original implementation attempted to do this. |
* The current implementation does not do so. This is because I don't |
* think it is the right thing (TM) to do, and because I couldn't |
* see a clean way to get the old behavior with the new design. |
* The POSIX standard and SVID should be consulted to determine |
* what behavior is mandated. |
*/ |
|
#include <linux/errno.h> |
#include <asm/segment.h> |
#include <linux/string.h> |
#include <linux/sched.h> |
#include <linux/sem.h> |
#include <linux/ipc.h> |
#include <linux/stat.h> |
#include <linux/malloc.h> |
|
extern int ipcperms (struct ipc_perm *ipcp, short semflg); |
static int newary (key_t, int, int); |
static int findkey (key_t key); |
static void freeary (int id); |
|
static struct semid_ds *semary[SEMMNI]; |
static int used_sems = 0, used_semids = 0; |
static struct wait_queue *sem_lock = NULL; |
static int max_semid = 0; |
|
static unsigned short sem_seq = 0; |
|
void sem_init (void) |
{ |
int i; |
|
sem_lock = NULL; |
used_sems = used_semids = max_semid = sem_seq = 0; |
for (i = 0; i < SEMMNI; i++) |
semary[i] = (struct semid_ds *) IPC_UNUSED; |
return; |
} |
|
static int findkey (key_t key) |
{ |
int id; |
struct semid_ds *sma; |
|
for (id = 0; id <= max_semid; id++) { |
while ((sma = semary[id]) == IPC_NOID) |
interruptible_sleep_on (&sem_lock); |
if (sma == IPC_UNUSED) |
continue; |
if (key == sma->sem_perm.key) |
return id; |
} |
return -1; |
} |
|
static int newary (key_t key, int nsems, int semflg) |
{ |
int id; |
struct semid_ds *sma; |
struct ipc_perm *ipcp; |
int size; |
|
if (!nsems) |
return -EINVAL; |
if (used_sems + nsems > SEMMNS) |
return -ENOSPC; |
for (id = 0; id < SEMMNI; id++) |
if (semary[id] == IPC_UNUSED) { |
semary[id] = (struct semid_ds *) IPC_NOID; |
goto found; |
} |
return -ENOSPC; |
found: |
size = sizeof (*sma) + nsems * sizeof (struct sem); |
used_sems += nsems; |
sma = (struct semid_ds *) kmalloc (size, GFP_KERNEL); |
if (!sma) { |
semary[id] = (struct semid_ds *) IPC_UNUSED; |
used_sems -= nsems; |
wake_up (&sem_lock); |
return -ENOMEM; |
} |
memset (sma, 0, size); |
sma->sem_base = (struct sem *) &sma[1]; |
ipcp = &sma->sem_perm; |
ipcp->mode = (semflg & S_IRWXUGO); |
ipcp->key = key; |
ipcp->cuid = ipcp->uid = current->euid; |
ipcp->gid = ipcp->cgid = current->egid; |
sma->sem_perm.seq = sem_seq; |
/* sma->sem_pending = NULL; */ |
sma->sem_pending_last = &sma->sem_pending; |
/* sma->undo = NULL; */ |
sma->sem_nsems = nsems; |
sma->sem_ctime = CURRENT_TIME; |
if (id > max_semid) |
max_semid = id; |
used_semids++; |
semary[id] = sma; |
wake_up (&sem_lock); |
return (unsigned int) sma->sem_perm.seq * SEMMNI + id; |
} |
|
asmlinkage int sys_semget (key_t key, int nsems, int semflg) |
{ |
int id; |
struct semid_ds *sma; |
|
if (nsems < 0 || nsems > SEMMSL) |
return -EINVAL; |
if (key == IPC_PRIVATE) |
return newary(key, nsems, semflg); |
if ((id = findkey (key)) == -1) { /* key not used */ |
if (!(semflg & IPC_CREAT)) |
return -ENOENT; |
return newary(key, nsems, semflg); |
} |
if (semflg & IPC_CREAT && semflg & IPC_EXCL) |
return -EEXIST; |
sma = semary[id]; |
if (nsems > sma->sem_nsems) |
return -EINVAL; |
if (ipcperms(&sma->sem_perm, semflg)) |
return -EACCES; |
return (unsigned int) sma->sem_perm.seq * SEMMNI + id; |
} |
|
/* Manage the doubly linked list sma->sem_pending as a FIFO: |
* insert new queue elements at the tail sma->sem_pending_last. |
*/ |
static inline void insert_into_queue (struct semid_ds * sma, struct sem_queue * q) |
{ |
*(q->prev = sma->sem_pending_last) = q; |
*(sma->sem_pending_last = &q->next) = NULL; |
} |
static inline void remove_from_queue (struct semid_ds * sma, struct sem_queue * q) |
{ |
*(q->prev) = q->next; |
if (q->next) |
q->next->prev = q->prev; |
else /* sma->sem_pending_last == &q->next */ |
sma->sem_pending_last = q->prev; |
q->prev = NULL; /* mark as removed */ |
} |
|
/* Determine whether a sequence of semaphore operations would succeed |
* all at once. Return 0 if yes, 1 if need to sleep, else return error code. |
*/ |
static int try_semop (struct semid_ds * sma, struct sembuf * sops, int nsops) |
{ |
int result = 0; |
int i = 0; |
|
while (i < nsops) { |
struct sembuf * sop = &sops[i]; |
struct sem * curr = &sma->sem_base[sop->sem_num]; |
if (sop->sem_op + curr->semval > SEMVMX) { |
result = -ERANGE; |
break; |
} |
if (!sop->sem_op && curr->semval) { |
if (sop->sem_flg & IPC_NOWAIT) |
result = -EAGAIN; |
else |
result = 1; |
break; |
} |
i++; |
curr->semval += sop->sem_op; |
if (curr->semval < 0) { |
if (sop->sem_flg & IPC_NOWAIT) |
result = -EAGAIN; |
else |
result = 1; |
break; |
} |
} |
while (--i >= 0) { |
struct sembuf * sop = &sops[i]; |
struct sem * curr = &sma->sem_base[sop->sem_num]; |
curr->semval -= sop->sem_op; |
} |
return result; |
} |
|
/* Actually perform a sequence of semaphore operations. Atomically. */ |
/* This assumes that try_semop() already returned 0. */ |
static int do_semop (struct semid_ds * sma, struct sembuf * sops, int nsops, |
struct sem_undo * un, int pid) |
{ |
int i; |
|
for (i = 0; i < nsops; i++) { |
struct sembuf * sop = &sops[i]; |
struct sem * curr = &sma->sem_base[sop->sem_num]; |
if (sop->sem_op + curr->semval > SEMVMX) { |
printk("do_semop: race\n"); |
break; |
} |
if (!sop->sem_op) { |
if (curr->semval) { |
printk("do_semop: race\n"); |
break; |
} |
} else { |
curr->semval += sop->sem_op; |
if (curr->semval < 0) { |
printk("do_semop: race\n"); |
break; |
} |
if (sop->sem_flg & SEM_UNDO) |
un->semadj[sop->sem_num] -= sop->sem_op; |
} |
curr->sempid = pid; |
} |
sma->sem_otime = CURRENT_TIME; |
|
/* Previous implementation returned the last semaphore's semval. |
* This is wrong because we may not have checked read permission, |
* only write permission. |
*/ |
return 0; |
} |
|
/* Go through the pending queue for the indicated semaphore |
* looking for tasks that can be completed. Keep cycling through |
* the queue until a pass is made in which no process is woken up. |
*/ |
static void update_queue (struct semid_ds * sma) |
{ |
int wokeup, error; |
struct sem_queue * q; |
|
do { |
wokeup = 0; |
for (q = sma->sem_pending; q; q = q->next) { |
error = try_semop(sma, q->sops, q->nsops); |
/* Does q->sleeper still need to sleep? */ |
if (error > 0) |
continue; |
/* Perform the operations the sleeper was waiting for */ |
if (!error) |
error = do_semop(sma, q->sops, q->nsops, q->undo, q->pid); |
q->status = error; |
/* Remove it from the queue */ |
remove_from_queue(sma,q); |
/* Wake it up */ |
wake_up_interruptible(&q->sleeper); /* doesn't sleep! */ |
wokeup++; |
} |
} while (wokeup); |
} |
|
/* The following counts are associated to each semaphore: |
* semncnt number of tasks waiting on semval being nonzero |
* semzcnt number of tasks waiting on semval being zero |
* This model assumes that a task waits on exactly one semaphore. |
* Since semaphore operations are to be performed atomically, tasks actually |
* wait on a whole sequence of semaphores simultaneously. |
* The counts we return here are a rough approximation, but still |
* warrant that semncnt+semzcnt>0 if the task is on the pending queue. |
*/ |
static int count_semncnt (struct semid_ds * sma, ushort semnum) |
{ |
int semncnt; |
struct sem_queue * q; |
|
semncnt = 0; |
for (q = sma->sem_pending; q; q = q->next) { |
struct sembuf * sops = q->sops; |
int nsops = q->nsops; |
int i; |
for (i = 0; i < nsops; i++) |
if (sops[i].sem_num == semnum |
&& (sops[i].sem_op < 0) |
&& !(sops[i].sem_flg & IPC_NOWAIT)) |
semncnt++; |
} |
return semncnt; |
} |
static int count_semzcnt (struct semid_ds * sma, ushort semnum) |
{ |
int semzcnt; |
struct sem_queue * q; |
|
semzcnt = 0; |
for (q = sma->sem_pending; q; q = q->next) { |
struct sembuf * sops = q->sops; |
int nsops = q->nsops; |
int i; |
for (i = 0; i < nsops; i++) |
if (sops[i].sem_num == semnum |
&& (sops[i].sem_op == 0) |
&& !(sops[i].sem_flg & IPC_NOWAIT)) |
semzcnt++; |
} |
return semzcnt; |
} |
|
/* Free a semaphore set. */ |
static void freeary (int id) |
{ |
struct semid_ds *sma = semary[id]; |
struct sem_undo *un; |
struct sem_queue *q; |
|
/* Invalidate this semaphore set */ |
sma->sem_perm.seq++; |
sem_seq = (sem_seq+1) % ((unsigned)(1<<31)/SEMMNI); /* increment, but avoid overflow */ |
used_sems -= sma->sem_nsems; |
if (id == max_semid) |
while (max_semid && (semary[--max_semid] == IPC_UNUSED)); |
semary[id] = (struct semid_ds *) IPC_UNUSED; |
used_semids--; |
|
/* Invalidate the existing undo structures for this semaphore set. |
* (They will be freed without any further action in sem_exit().) |
*/ |
for (un = sma->undo; un; un = un->id_next) |
un->semid = -1; |
|
/* Wake up all pending processes and let them fail with EIDRM. */ |
for (q = sma->sem_pending; q; q = q->next) { |
q->status = -EIDRM; |
q->prev = NULL; |
wake_up_interruptible(&q->sleeper); /* doesn't sleep! */ |
} |
|
kfree(sma); |
} |
|
asmlinkage int sys_semctl (int semid, int semnum, int cmd, union semun arg) |
{ |
struct semid_ds *buf = NULL; |
struct semid_ds tbuf; |
int i, id, val = 0; |
struct semid_ds *sma; |
struct ipc_perm *ipcp; |
struct sem *curr = NULL; |
struct sem_undo *un; |
unsigned int nsems; |
ushort *array = NULL; |
ushort sem_io[SEMMSL]; |
|
if (semid < 0 || semnum < 0 || cmd < 0) |
return -EINVAL; |
|
switch (cmd) { |
case IPC_INFO: |
case SEM_INFO: |
{ |
struct seminfo seminfo, *tmp = arg.__buf; |
seminfo.semmni = SEMMNI; |
seminfo.semmns = SEMMNS; |
seminfo.semmsl = SEMMSL; |
seminfo.semopm = SEMOPM; |
seminfo.semvmx = SEMVMX; |
seminfo.semmnu = SEMMNU; |
seminfo.semmap = SEMMAP; |
seminfo.semume = SEMUME; |
seminfo.semusz = SEMUSZ; |
seminfo.semaem = SEMAEM; |
if (cmd == SEM_INFO) { |
seminfo.semusz = used_semids; |
seminfo.semaem = used_sems; |
} |
i = verify_area(VERIFY_WRITE, tmp, sizeof(struct seminfo)); |
if (i) |
return i; |
memcpy_tofs (tmp, &seminfo, sizeof(struct seminfo)); |
return max_semid; |
} |
|
case SEM_STAT: |
buf = arg.buf; |
i = verify_area (VERIFY_WRITE, buf, sizeof (*buf)); |
if (i) |
return i; |
if (semid > max_semid) |
return -EINVAL; |
sma = semary[semid]; |
if (sma == IPC_UNUSED || sma == IPC_NOID) |
return -EINVAL; |
if (ipcperms (&sma->sem_perm, S_IRUGO)) |
return -EACCES; |
id = (unsigned int) sma->sem_perm.seq * SEMMNI + semid; |
tbuf.sem_perm = sma->sem_perm; |
tbuf.sem_otime = sma->sem_otime; |
tbuf.sem_ctime = sma->sem_ctime; |
tbuf.sem_nsems = sma->sem_nsems; |
memcpy_tofs (buf, &tbuf, sizeof(*buf)); |
return id; |
} |
|
id = (unsigned int) semid % SEMMNI; |
sma = semary [id]; |
if (sma == IPC_UNUSED || sma == IPC_NOID) |
return -EINVAL; |
ipcp = &sma->sem_perm; |
nsems = sma->sem_nsems; |
if (sma->sem_perm.seq != (unsigned int) semid / SEMMNI) |
return -EIDRM; |
|
switch (cmd) { |
case GETVAL: |
case GETPID: |
case GETNCNT: |
case GETZCNT: |
case SETVAL: |
if (semnum >= nsems) |
return -EINVAL; |
curr = &sma->sem_base[semnum]; |
break; |
} |
|
switch (cmd) { |
case GETVAL: |
case GETPID: |
case GETNCNT: |
case GETZCNT: |
case GETALL: |
if (ipcperms (ipcp, S_IRUGO)) |
return -EACCES; |
switch (cmd) { |
case GETVAL : return curr->semval; |
case GETPID : return curr->sempid; |
case GETNCNT: return count_semncnt(sma,semnum); |
case GETZCNT: return count_semzcnt(sma,semnum); |
case GETALL: |
array = arg.array; |
i = verify_area (VERIFY_WRITE, array, nsems*sizeof(ushort)); |
if (i) |
return i; |
} |
break; |
case SETVAL: |
val = arg.val; |
if (val > SEMVMX || val < 0) |
return -ERANGE; |
break; |
case IPC_RMID: |
if (suser() || current->euid == ipcp->cuid || current->euid == ipcp->uid) { |
freeary (id); |
return 0; |
} |
return -EPERM; |
case SETALL: /* arg is a pointer to an array of ushort */ |
array = arg.array; |
if ((i = verify_area (VERIFY_READ, array, nsems*sizeof(ushort)))) |
return i; |
memcpy_fromfs (sem_io, array, nsems*sizeof(ushort)); |
for (i = 0; i < nsems; i++) |
if (sem_io[i] > SEMVMX) |
return -ERANGE; |
break; |
case IPC_STAT: |
buf = arg.buf; |
if ((i = verify_area (VERIFY_WRITE, buf, sizeof(*buf)))) |
return i; |
break; |
case IPC_SET: |
buf = arg.buf; |
if ((i = verify_area (VERIFY_READ, buf, sizeof (*buf)))) |
return i; |
memcpy_fromfs (&tbuf, buf, sizeof (*buf)); |
break; |
} |
|
if (semary[id] == IPC_UNUSED || semary[id] == IPC_NOID) |
return -EIDRM; |
if (sma->sem_perm.seq != (unsigned int) semid / SEMMNI) |
return -EIDRM; |
|
switch (cmd) { |
case GETALL: |
if (ipcperms (ipcp, S_IRUGO)) |
return -EACCES; |
for (i = 0; i < sma->sem_nsems; i++) |
sem_io[i] = sma->sem_base[i].semval; |
memcpy_tofs (array, sem_io, nsems*sizeof(ushort)); |
break; |
case SETVAL: |
if (ipcperms (ipcp, S_IWUGO)) |
return -EACCES; |
for (un = sma->undo; un; un = un->id_next) |
un->semadj[semnum] = 0; |
curr->semval = val; |
sma->sem_ctime = CURRENT_TIME; |
/* maybe some queued-up processes were waiting for this */ |
update_queue(sma); |
break; |
case IPC_SET: |
if (suser() || current->euid == ipcp->cuid || current->euid == ipcp->uid) { |
ipcp->uid = tbuf.sem_perm.uid; |
ipcp->gid = tbuf.sem_perm.gid; |
ipcp->mode = (ipcp->mode & ~S_IRWXUGO) |
| (tbuf.sem_perm.mode & S_IRWXUGO); |
sma->sem_ctime = CURRENT_TIME; |
return 0; |
} |
return -EPERM; |
case IPC_STAT: |
if (ipcperms (ipcp, S_IRUGO)) |
return -EACCES; |
tbuf.sem_perm = sma->sem_perm; |
tbuf.sem_otime = sma->sem_otime; |
tbuf.sem_ctime = sma->sem_ctime; |
tbuf.sem_nsems = sma->sem_nsems; |
memcpy_tofs (buf, &tbuf, sizeof(*buf)); |
break; |
case SETALL: |
if (ipcperms (ipcp, S_IWUGO)) |
return -EACCES; |
for (i = 0; i < nsems; i++) |
sma->sem_base[i].semval = sem_io[i]; |
for (un = sma->undo; un; un = un->id_next) |
for (i = 0; i < nsems; i++) |
un->semadj[i] = 0; |
sma->sem_ctime = CURRENT_TIME; |
/* maybe some queued-up processes were waiting for this */ |
update_queue(sma); |
break; |
default: |
return -EINVAL; |
} |
return 0; |
} |
|
asmlinkage int sys_semop (int semid, struct sembuf *tsops, unsigned nsops) |
{ |
int i, id, size, error; |
struct semid_ds *sma; |
struct sembuf sops[SEMOPM], *sop; |
struct sem_undo *un; |
int undos = 0, alter = 0; |
|
if (nsops < 1 || semid < 0) |
return -EINVAL; |
if (nsops > SEMOPM) |
return -E2BIG; |
if (!tsops) |
return -EFAULT; |
if ((i = verify_area (VERIFY_READ, tsops, nsops * sizeof(*tsops)))) |
return i; |
memcpy_fromfs (sops, tsops, nsops * sizeof(*tsops)); |
id = (unsigned int) semid % SEMMNI; |
if ((sma = semary[id]) == IPC_UNUSED || sma == IPC_NOID) |
return -EINVAL; |
if (sma->sem_perm.seq != (unsigned int) semid / SEMMNI) |
return -EIDRM; |
for (i = 0; i < nsops; i++) { |
sop = &sops[i]; |
if (sop->sem_num >= sma->sem_nsems) |
return -EFBIG; |
if (sop->sem_flg & SEM_UNDO) |
undos++; |
if (sop->sem_op) |
alter++; |
} |
if (ipcperms(&sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) |
return -EACCES; |
error = try_semop(sma, sops, nsops); |
if (error < 0) |
return error; |
if (undos) { |
/* Make sure we have an undo structure |
* for this process and this semaphore set. |
*/ |
for (un = current->semundo; un; un = un->proc_next) |
if (un->semid == semid) |
break; |
if (!un) { |
size = sizeof(struct sem_undo) + sizeof(short)*sma->sem_nsems; |
un = (struct sem_undo *) kmalloc(size, GFP_ATOMIC); |
if (!un) |
return -ENOMEM; |
memset(un, 0, size); |
un->semadj = (short *) &un[1]; |
un->semid = semid; |
un->proc_next = current->semundo; |
current->semundo = un; |
un->id_next = sma->undo; |
sma->undo = un; |
} |
} else |
un = NULL; |
if (error == 0) { |
/* the operations go through immediately */ |
error = do_semop(sma, sops, nsops, un, current->pid); |
/* maybe some queued-up processes were waiting for this */ |
update_queue(sma); |
return error; |
} else { |
/* We need to sleep on this operation, so we put the current |
* task into the pending queue and go to sleep. |
*/ |
struct sem_queue queue; |
|
queue.sma = sma; |
queue.sops = sops; |
queue.nsops = nsops; |
queue.undo = un; |
queue.pid = current->pid; |
queue.status = 0; |
insert_into_queue(sma,&queue); |
queue.sleeper = NULL; |
current->semsleeping = &queue; |
interruptible_sleep_on(&queue.sleeper); |
current->semsleeping = NULL; |
/* When we wake up, either the operation is finished, |
* or some kind of error happened. |
*/ |
if (!queue.prev) { |
/* operation is finished, update_queue() removed us */ |
return queue.status; |
} else { |
remove_from_queue(sma,&queue); |
return -EINTR; |
} |
} |
} |
|
/* |
* add semadj values to semaphores, free undo structures. |
* undo structures are not freed when semaphore arrays are destroyed |
* so some of them may be out of date. |
* IMPLEMENTATION NOTE: There is some confusion over whether the |
* set of adjustments that needs to be done should be done in an atomic |
* manner or not. That is, if we are attempting to decrement the semval |
* should we queue up and wait until we can do so legally? |
* The original implementation attempted to do this (queue and wait). |
* The current implementation does not do so. The POSIX standard |
* and SVID should be consulted to determine what behavior is mandated. |
*/ |
void sem_exit (void) |
{ |
struct sem_queue *q; |
struct sem_undo *u, *un = NULL, **up, **unp; |
struct semid_ds *sma; |
int nsems, i; |
|
/* If the current process was sleeping for a semaphore, |
* remove it from the queue. |
*/ |
if ((q = current->semsleeping)) { |
if (q->prev) |
remove_from_queue(q->sma,q); |
current->semsleeping = NULL; |
} |
|
for (up = ¤t->semundo; (u = *up); *up = u->proc_next, kfree(u)) { |
if (u->semid == -1) |
continue; |
sma = semary[(unsigned int) u->semid % SEMMNI]; |
if (sma == IPC_UNUSED || sma == IPC_NOID) |
continue; |
if (sma->sem_perm.seq != (unsigned int) u->semid / SEMMNI) |
continue; |
/* remove u from the sma->undo list */ |
for (unp = &sma->undo; (un = *unp); unp = &un->id_next) { |
if (u == un) |
goto found; |
} |
printk ("sem_exit undo list error id=%d\n", u->semid); |
break; |
found: |
*unp = un->id_next; |
/* perform adjustments registered in u */ |
nsems = sma->sem_nsems; |
for (i = 0; i < nsems; i++) { |
struct sem * sem = &sma->sem_base[i]; |
sem->semval += u->semadj[i]; |
if (sem->semval < 0) |
sem->semval = 0; /* shouldn't happen */ |
sem->sempid = current->pid; |
} |
sma->sem_otime = CURRENT_TIME; |
/* maybe some queued-up processes were waiting for this */ |
update_queue(sma); |
} |
current->semundo = NULL; |
} |
/msg.c
0,0 → 1,767
/* |
* linux/ipc/msg.c |
* Copyright (C) 1992 Krishna Balasubramanian |
* |
* Kerneld extensions by Bjorn Ekwall <bj0rn@blox.se> in May 1995, and May 1996 |
* |
* See <linux/kerneld.h> for the (optional) new kerneld protocol |
*/ |
|
#include <linux/config.h> |
#include <linux/errno.h> |
#include <linux/sched.h> |
#include <linux/msg.h> |
#include <linux/stat.h> |
#include <linux/malloc.h> |
#include <linux/kerneld.h> |
#include <linux/interrupt.h> |
|
#include <asm/segment.h> |
|
extern int ipcperms (struct ipc_perm *ipcp, short msgflg); |
|
static void freeque (int id); |
static int newque (key_t key, int msgflg); |
static int findkey (key_t key); |
|
static struct msqid_ds *msgque[MSGMNI]; |
static int msgbytes = 0; |
static int msghdrs = 0; |
static unsigned short msg_seq = 0; |
static int used_queues = 0; |
static int max_msqid = 0; |
static struct wait_queue *msg_lock = NULL; |
static int kerneld_msqid = -1; |
|
#define MAX_KERNELDS 20 |
static int kerneld_arr[MAX_KERNELDS]; |
static int n_kernelds = 0; |
|
void msg_init (void) |
{ |
int id; |
|
for (id = 0; id < MSGMNI; id++) |
msgque[id] = (struct msqid_ds *) IPC_UNUSED; |
msgbytes = msghdrs = msg_seq = max_msqid = used_queues = 0; |
msg_lock = NULL; |
return; |
} |
|
/* |
* If the send queue is full, try to free any old messages. |
* These are most probably unwanted, since no one has picked them up... |
*/ |
#define MSG_FLUSH_TIME 10 /* seconds */ |
static void flush_msg(struct msqid_ds *msq) |
{ |
struct msg *nmsg; |
unsigned long flags; |
int flushed = 0; |
|
save_flags(flags); |
cli(); |
|
/* messages were put on the queue in time order */ |
while ( (nmsg = msq->msg_first) && |
((CURRENT_TIME - nmsg->msg_stime) > MSG_FLUSH_TIME)) { |
msgbytes -= nmsg->msg_ts; |
msghdrs--; |
msq->msg_cbytes -= nmsg->msg_ts; |
msq->msg_qnum--; |
msq->msg_first = nmsg->msg_next; |
++flushed; |
kfree(nmsg); |
} |
|
if (msq->msg_qnum == 0) |
msq->msg_first = msq->msg_last = NULL; |
restore_flags(flags); |
if (flushed) |
printk(KERN_WARNING "flushed %d old SYSVIPC messages", flushed); |
} |
|
static int real_msgsnd (int msqid, struct msgbuf *msgp, size_t msgsz, int msgflg) |
{ |
int id, err; |
struct msqid_ds *msq; |
struct ipc_perm *ipcp; |
struct msg *msgh; |
long mtype; |
unsigned long flags; |
|
if (msgsz > MSGMAX || (long) msgsz < 0 || msqid < 0) |
return -EINVAL; |
if (!msgp) |
return -EFAULT; |
/* |
* Calls from kernel level (IPC_KERNELD set) |
* have the message somewhere in kernel space already! |
*/ |
if ((msgflg & IPC_KERNELD)) |
mtype = msgp->mtype; |
else { |
err = verify_area (VERIFY_READ, msgp->mtext, msgsz); |
if (err) |
return err; |
if ((mtype = get_user (&msgp->mtype)) < 1) |
return -EINVAL; |
} |
id = (unsigned int) msqid % MSGMNI; |
msq = msgque [id]; |
if (msq == IPC_UNUSED || msq == IPC_NOID) |
return -EINVAL; |
ipcp = &msq->msg_perm; |
|
slept: |
if (msq->msg_perm.seq != (unsigned int) msqid / MSGMNI) |
return -EIDRM; |
/* |
* Non-root kernel level processes may send to kerneld! |
* i.e. no permission check if called from the kernel |
* otoh we don't want user level non-root snoopers... |
*/ |
if ((msgflg & IPC_KERNELD) == 0) |
if (ipcperms(ipcp, S_IWUGO)) |
return -EACCES; |
|
if (msgsz + msq->msg_cbytes > msq->msg_qbytes) { |
if ((kerneld_msqid != -1) && (kerneld_msqid == msqid)) |
flush_msg(msq); /* flush the kerneld channel only */ |
if (msgsz + msq->msg_cbytes > msq->msg_qbytes) { |
/* still no space in queue */ |
if (msgflg & IPC_NOWAIT) |
return -EAGAIN; |
if (current->signal & ~current->blocked) |
return -EINTR; |
if (intr_count) { |
/* Very unlikely, but better safe than sorry */ |
printk(KERN_WARNING "Ouch, kerneld:msgsnd buffers full!\n"); |
return -EINTR; |
} |
interruptible_sleep_on (&msq->wwait); |
goto slept; |
} |
} |
|
/* allocate message header and text space*/ |
msgh = (struct msg *) kmalloc (sizeof(*msgh) + msgsz, GFP_ATOMIC); |
if (!msgh) |
return -ENOMEM; |
msgh->msg_spot = (char *) (msgh + 1); |
|
/* |
* Calls from kernel level (IPC_KERNELD set) |
* have the message somewhere in kernel space already! |
*/ |
if (msgflg & IPC_KERNELD) { |
struct kerneld_msg *kdmp = (struct kerneld_msg *)msgp; |
|
/* |
* Note that the kernel supplies a pointer |
* but the user-level kerneld uses a char array... |
*/ |
memcpy(msgh->msg_spot, (char *)(&(kdmp->id)), KDHDR); |
memcpy(msgh->msg_spot + KDHDR, kdmp->text, msgsz - KDHDR); |
} |
else |
memcpy_fromfs (msgh->msg_spot, msgp->mtext, msgsz); |
|
if (msgque[id] == IPC_UNUSED || msgque[id] == IPC_NOID |
|| msq->msg_perm.seq != (unsigned int) msqid / MSGMNI) { |
kfree(msgh); |
return -EIDRM; |
} |
|
msgh->msg_next = NULL; |
msgh->msg_ts = msgsz; |
msgh->msg_type = mtype; |
msgh->msg_stime = CURRENT_TIME; |
|
save_flags(flags); |
cli(); |
if (!msq->msg_first) |
msq->msg_first = msq->msg_last = msgh; |
else { |
msq->msg_last->msg_next = msgh; |
msq->msg_last = msgh; |
} |
msq->msg_cbytes += msgsz; |
msgbytes += msgsz; |
msghdrs++; |
msq->msg_qnum++; |
msq->msg_lspid = current->pid; |
msq->msg_stime = CURRENT_TIME; |
restore_flags(flags); |
wake_up (&msq->rwait); |
return 0; |
} |
|
/* |
* Take care of missing kerneld, especially in case of multiple daemons |
*/ |
#define KERNELD_TIMEOUT 1 * (HZ) |
#define DROP_TIMER del_timer(&kd_timer) |
/*#define DROP_TIMER if ((msgflg & IPC_KERNELD) && kd_timer.next && kd_timer.prev) del_timer(&kd_timer)*/ |
|
static void kd_timeout(unsigned long msgid) |
{ |
struct msqid_ds *msq; |
struct msg *tmsg; |
unsigned long flags; |
|
msq = msgque [ (unsigned int) kerneld_msqid % MSGMNI ]; |
if (msq == IPC_NOID || msq == IPC_UNUSED) |
return; |
|
save_flags(flags); |
cli(); |
for (tmsg = msq->msg_first; tmsg; tmsg = tmsg->msg_next) |
if (*(long *)(tmsg->msg_spot) == msgid) |
break; |
restore_flags(flags); |
if (tmsg) { /* still there! */ |
struct kerneld_msg kmsp = { msgid, NULL_KDHDR, "" }; |
|
printk(KERN_ALERT "Ouch, no kerneld for message %ld\n", msgid); |
kmsp.id = -ENODEV; |
real_msgsnd(kerneld_msqid, (struct msgbuf *)&kmsp, KDHDR, |
S_IRUSR | S_IWUSR | IPC_KERNELD | MSG_NOERROR); |
} |
} |
|
static int real_msgrcv (int msqid, struct msgbuf *msgp, size_t msgsz, long msgtyp, int msgflg) |
{ |
struct timer_list kd_timer = { NULL, NULL, 0, 0, 0}; |
struct msqid_ds *msq; |
struct ipc_perm *ipcp; |
struct msg *tmsg, *leastp = NULL; |
struct msg *nmsg = NULL; |
int id, err; |
unsigned long flags; |
|
if (msqid < 0 || (long) msgsz < 0) |
return -EINVAL; |
if (!msgp || !msgp->mtext) |
return -EFAULT; |
/* |
* Calls from kernel level (IPC_KERNELD set) |
* wants the message put in kernel space! |
*/ |
if ((msgflg & IPC_KERNELD) == 0) { |
err = verify_area (VERIFY_WRITE, msgp->mtext, msgsz); |
if (err) |
return err; |
} |
|
id = (unsigned int) msqid % MSGMNI; |
msq = msgque [id]; |
if (msq == IPC_NOID || msq == IPC_UNUSED) |
return -EINVAL; |
ipcp = &msq->msg_perm; |
|
/* |
* Start timer for missing kerneld |
*/ |
if (msgflg & IPC_KERNELD) { |
kd_timer.data = (unsigned long)msgtyp; |
kd_timer.expires = jiffies + KERNELD_TIMEOUT; |
kd_timer.function = kd_timeout; |
add_timer(&kd_timer); |
} |
|
/* |
* find message of correct type. |
* msgtyp = 0 => get first. |
* msgtyp > 0 => get first message of matching type. |
* msgtyp < 0 => get message with least type must be < abs(msgtype). |
*/ |
while (!nmsg) { |
if (msq->msg_perm.seq != (unsigned int) msqid / MSGMNI) { |
DROP_TIMER; |
return -EIDRM; |
} |
if ((msgflg & IPC_KERNELD) == 0) { |
/* |
* All kernel level processes may receive from kerneld! |
* i.e. no permission check if called from the kernel |
* otoh we don't want user level non-root snoopers... |
*/ |
if (ipcperms (ipcp, S_IRUGO)) { |
DROP_TIMER; /* Not needed, but doesn't hurt */ |
return -EACCES; |
} |
} |
|
save_flags(flags); |
cli(); |
if (msgtyp == 0) |
nmsg = msq->msg_first; |
else if (msgtyp > 0) { |
if (msgflg & MSG_EXCEPT) { |
for (tmsg = msq->msg_first; tmsg; |
tmsg = tmsg->msg_next) |
if (tmsg->msg_type != msgtyp) |
break; |
nmsg = tmsg; |
} else { |
for (tmsg = msq->msg_first; tmsg; |
tmsg = tmsg->msg_next) |
if (tmsg->msg_type == msgtyp) |
break; |
nmsg = tmsg; |
} |
} else { |
for (leastp = tmsg = msq->msg_first; tmsg; |
tmsg = tmsg->msg_next) |
if (tmsg->msg_type < leastp->msg_type) |
leastp = tmsg; |
if (leastp && leastp->msg_type <= - msgtyp) |
nmsg = leastp; |
} |
restore_flags(flags); |
|
if (nmsg) { /* done finding a message */ |
DROP_TIMER; |
if ((msgsz < nmsg->msg_ts) && !(msgflg & MSG_NOERROR)) { |
return -E2BIG; |
} |
msgsz = (msgsz > nmsg->msg_ts)? nmsg->msg_ts : msgsz; |
save_flags(flags); |
cli(); |
if (nmsg == msq->msg_first) |
msq->msg_first = nmsg->msg_next; |
else { |
for (tmsg = msq->msg_first; tmsg; |
tmsg = tmsg->msg_next) |
if (tmsg->msg_next == nmsg) |
break; |
tmsg->msg_next = nmsg->msg_next; |
if (nmsg == msq->msg_last) |
msq->msg_last = tmsg; |
} |
if (!(--msq->msg_qnum)) |
msq->msg_last = msq->msg_first = NULL; |
|
msq->msg_rtime = CURRENT_TIME; |
msq->msg_lrpid = current->pid; |
msgbytes -= nmsg->msg_ts; |
msghdrs--; |
msq->msg_cbytes -= nmsg->msg_ts; |
restore_flags(flags); |
wake_up (&msq->wwait); |
/* |
* Calls from kernel level (IPC_KERNELD set) |
* wants the message copied to kernel space! |
*/ |
if (msgflg & IPC_KERNELD) { |
struct kerneld_msg *kdmp = (struct kerneld_msg *) msgp; |
|
memcpy((char *)(&(kdmp->id)), |
nmsg->msg_spot, KDHDR); |
/* |
* Note that kdmp->text is a pointer |
* when called from kernel space! |
*/ |
if ((msgsz > KDHDR) && kdmp->text) |
memcpy(kdmp->text, |
nmsg->msg_spot + KDHDR, |
msgsz - KDHDR); |
} |
else { |
put_user (nmsg->msg_type, &msgp->mtype); |
memcpy_tofs (msgp->mtext, nmsg->msg_spot, msgsz); |
} |
kfree(nmsg); |
return msgsz; |
} else { /* did not find a message */ |
if (msgflg & IPC_NOWAIT) { |
DROP_TIMER; |
return -ENOMSG; |
} |
if (current->signal & ~current->blocked) { |
DROP_TIMER; |
return -EINTR; |
} |
interruptible_sleep_on (&msq->rwait); |
} |
} /* end while */ |
DROP_TIMER; |
return -1; |
} |
|
asmlinkage int sys_msgsnd (int msqid, struct msgbuf *msgp, size_t msgsz, int msgflg) |
{ |
/* IPC_KERNELD is used as a marker for kernel level calls */ |
return real_msgsnd(msqid, msgp, msgsz, msgflg & ~IPC_KERNELD); |
} |
|
asmlinkage int sys_msgrcv (int msqid, struct msgbuf *msgp, size_t msgsz, |
long msgtyp, int msgflg) |
{ |
/* IPC_KERNELD is used as a marker for kernel level calls */ |
return real_msgrcv (msqid, msgp, msgsz, msgtyp, msgflg & ~IPC_KERNELD); |
} |
|
static int findkey (key_t key) |
{ |
int id; |
struct msqid_ds *msq; |
|
for (id = 0; id <= max_msqid; id++) { |
while ((msq = msgque[id]) == IPC_NOID) |
interruptible_sleep_on (&msg_lock); |
if (msq == IPC_UNUSED) |
continue; |
if (key == msq->msg_perm.key) |
return id; |
} |
return -1; |
} |
|
static int newque (key_t key, int msgflg) |
{ |
int id; |
struct msqid_ds *msq; |
struct ipc_perm *ipcp; |
|
for (id = 0; id < MSGMNI; id++) |
if (msgque[id] == IPC_UNUSED) { |
msgque[id] = (struct msqid_ds *) IPC_NOID; |
goto found; |
} |
return -ENOSPC; |
|
found: |
msq = (struct msqid_ds *) kmalloc (sizeof (*msq), GFP_KERNEL); |
if (!msq) { |
msgque[id] = (struct msqid_ds *) IPC_UNUSED; |
wake_up (&msg_lock); |
return -ENOMEM; |
} |
ipcp = &msq->msg_perm; |
ipcp->mode = (msgflg & S_IRWXUGO); |
ipcp->key = key; |
ipcp->cuid = ipcp->uid = current->euid; |
ipcp->gid = ipcp->cgid = current->egid; |
msq->msg_perm.seq = msg_seq; |
msq->msg_first = msq->msg_last = NULL; |
msq->rwait = msq->wwait = NULL; |
msq->msg_cbytes = msq->msg_qnum = 0; |
msq->msg_lspid = msq->msg_lrpid = 0; |
msq->msg_stime = msq->msg_rtime = 0; |
msq->msg_qbytes = MSGMNB; |
msq->msg_ctime = CURRENT_TIME; |
if (id > max_msqid) |
max_msqid = id; |
msgque[id] = msq; |
used_queues++; |
wake_up (&msg_lock); |
return (unsigned int) msq->msg_perm.seq * MSGMNI + id; |
} |
|
asmlinkage int sys_msgget (key_t key, int msgflg) |
{ |
int id; |
struct msqid_ds *msq; |
|
/* |
* If the IPC_KERNELD flag is set, the key is forced to IPC_PRIVATE, |
* and a designated kerneld message queue is created/referred to |
*/ |
if ((msgflg & IPC_KERNELD)) { |
int i; |
if (!suser()) |
return -EPERM; |
#ifdef NEW_KERNELD_PROTOCOL |
if ((msgflg & IPC_KERNELD) == OLDIPC_KERNELD) { |
printk(KERN_ALERT "Please recompile your kerneld daemons!\n"); |
return -EPERM; |
} |
#endif |
if ((kerneld_msqid == -1) && (kerneld_msqid = |
newque(IPC_PRIVATE, msgflg & S_IRWXU)) < 0) |
return -ENOSPC; |
for (i = 0; i < MAX_KERNELDS; ++i) { |
if (kerneld_arr[i] == 0) { |
kerneld_arr[i] = current->pid; |
++n_kernelds; |
return kerneld_msqid; |
} |
} |
return -ENOSPC; |
} |
/* else it is a "normal" request */ |
if (key == IPC_PRIVATE) |
return newque(key, msgflg); |
if ((id = findkey (key)) == -1) { /* key not used */ |
if (!(msgflg & IPC_CREAT)) |
return -ENOENT; |
return newque(key, msgflg); |
} |
if (msgflg & IPC_CREAT && msgflg & IPC_EXCL) |
return -EEXIST; |
msq = msgque[id]; |
if (msq == IPC_UNUSED || msq == IPC_NOID) |
return -EIDRM; |
if (ipcperms(&msq->msg_perm, msgflg)) |
return -EACCES; |
return (unsigned int) msq->msg_perm.seq * MSGMNI + id; |
} |
|
static void freeque (int id) |
{ |
struct msqid_ds *msq = msgque[id]; |
struct msg *msgp, *msgh; |
|
msq->msg_perm.seq++; |
msg_seq = (msg_seq+1) % ((unsigned)(1<<31)/MSGMNI); /* increment, but avoid overflow */ |
msgbytes -= msq->msg_cbytes; |
if (id == max_msqid) |
while (max_msqid && (msgque[--max_msqid] == IPC_UNUSED)); |
msgque[id] = (struct msqid_ds *) IPC_UNUSED; |
used_queues--; |
while (waitqueue_active(&msq->rwait) || waitqueue_active(&msq->wwait)) { |
wake_up (&msq->rwait); |
wake_up (&msq->wwait); |
schedule(); |
} |
for (msgp = msq->msg_first; msgp; msgp = msgh ) { |
msgh = msgp->msg_next; |
msghdrs--; |
kfree(msgp); |
} |
kfree(msq); |
} |
|
asmlinkage int sys_msgctl (int msqid, int cmd, struct msqid_ds *buf) |
{ |
int id, err; |
struct msqid_ds *msq; |
struct msqid_ds tbuf; |
struct ipc_perm *ipcp; |
|
if (msqid < 0 || cmd < 0) |
return -EINVAL; |
switch (cmd) { |
case IPC_INFO: |
case MSG_INFO: |
if (!buf) |
return -EFAULT; |
{ |
struct msginfo msginfo; |
msginfo.msgmni = MSGMNI; |
msginfo.msgmax = MSGMAX; |
msginfo.msgmnb = MSGMNB; |
msginfo.msgmap = MSGMAP; |
msginfo.msgpool = MSGPOOL; |
msginfo.msgtql = MSGTQL; |
msginfo.msgssz = MSGSSZ; |
msginfo.msgseg = MSGSEG; |
if (cmd == MSG_INFO) { |
msginfo.msgpool = used_queues; |
msginfo.msgmap = msghdrs; |
msginfo.msgtql = msgbytes; |
} |
err = verify_area (VERIFY_WRITE, buf, sizeof (struct msginfo)); |
if (err) |
return err; |
memcpy_tofs (buf, &msginfo, sizeof(struct msginfo)); |
return max_msqid; |
} |
case MSG_STAT: |
if (!buf) |
return -EFAULT; |
err = verify_area (VERIFY_WRITE, buf, sizeof (*buf)); |
if (err) |
return err; |
if (msqid > max_msqid) |
return -EINVAL; |
msq = msgque[msqid]; |
if (msq == IPC_UNUSED || msq == IPC_NOID) |
return -EINVAL; |
if (ipcperms (&msq->msg_perm, S_IRUGO)) |
return -EACCES; |
id = (unsigned int) msq->msg_perm.seq * MSGMNI + msqid; |
tbuf.msg_perm = msq->msg_perm; |
tbuf.msg_stime = msq->msg_stime; |
tbuf.msg_rtime = msq->msg_rtime; |
tbuf.msg_ctime = msq->msg_ctime; |
tbuf.msg_cbytes = msq->msg_cbytes; |
tbuf.msg_qnum = msq->msg_qnum; |
tbuf.msg_qbytes = msq->msg_qbytes; |
tbuf.msg_lspid = msq->msg_lspid; |
tbuf.msg_lrpid = msq->msg_lrpid; |
memcpy_tofs (buf, &tbuf, sizeof(*buf)); |
return id; |
case IPC_SET: |
if (!buf) |
return -EFAULT; |
err = verify_area (VERIFY_READ, buf, sizeof (*buf)); |
if (err) |
return err; |
memcpy_fromfs (&tbuf, buf, sizeof (*buf)); |
break; |
case IPC_STAT: |
if (!buf) |
return -EFAULT; |
err = verify_area (VERIFY_WRITE, buf, sizeof(*buf)); |
if (err) |
return err; |
break; |
} |
|
id = (unsigned int) msqid % MSGMNI; |
msq = msgque [id]; |
if (msq == IPC_UNUSED || msq == IPC_NOID) |
return -EINVAL; |
if (msq->msg_perm.seq != (unsigned int) msqid / MSGMNI) |
return -EIDRM; |
ipcp = &msq->msg_perm; |
|
switch (cmd) { |
case IPC_STAT: |
if (ipcperms (ipcp, S_IRUGO)) |
return -EACCES; |
tbuf.msg_perm = msq->msg_perm; |
tbuf.msg_stime = msq->msg_stime; |
tbuf.msg_rtime = msq->msg_rtime; |
tbuf.msg_ctime = msq->msg_ctime; |
tbuf.msg_cbytes = msq->msg_cbytes; |
tbuf.msg_qnum = msq->msg_qnum; |
tbuf.msg_qbytes = msq->msg_qbytes; |
tbuf.msg_lspid = msq->msg_lspid; |
tbuf.msg_lrpid = msq->msg_lrpid; |
memcpy_tofs (buf, &tbuf, sizeof (*buf)); |
return 0; |
case IPC_SET: |
if (!suser() && current->euid != ipcp->cuid && |
current->euid != ipcp->uid) |
return -EPERM; |
if (tbuf.msg_qbytes > MSGMNB && !suser()) |
return -EPERM; |
msq->msg_qbytes = tbuf.msg_qbytes; |
ipcp->uid = tbuf.msg_perm.uid; |
ipcp->gid = tbuf.msg_perm.gid; |
ipcp->mode = (ipcp->mode & ~S_IRWXUGO) | |
(S_IRWXUGO & tbuf.msg_perm.mode); |
msq->msg_ctime = CURRENT_TIME; |
return 0; |
case IPC_RMID: |
if (!suser() && current->euid != ipcp->cuid && |
current->euid != ipcp->uid) |
return -EPERM; |
/* |
* There is only one kerneld message queue, |
* mark it as non-existent |
*/ |
if ((kerneld_msqid >= 0) && (msqid == kerneld_msqid)) |
kerneld_msqid = -1; |
freeque (id); |
return 0; |
default: |
return -EINVAL; |
} |
} |
|
/* |
* We do perhaps need a "flush" for waiting processes, |
* so that if they are terminated, a call from do_exit |
* will minimize the possibility of orphaned received |
* messages in the queue. For now we just make sure |
* that the queue is shut down whenever all kernelds have died. |
*/ |
void kerneld_exit(void) |
{ |
int i; |
|
if (kerneld_msqid == -1) |
return; |
for (i = 0; i < MAX_KERNELDS; ++i) { |
if (kerneld_arr[i] == current->pid) { |
kerneld_arr[i] = 0; |
--n_kernelds; |
if (n_kernelds == 0) |
sys_msgctl(kerneld_msqid, IPC_RMID, NULL); |
break; |
} |
} |
} |
|
/* |
* Kerneld internal message format/syntax: |
* |
* The message type from the kernel to kerneld is used to specify _what_ |
* function we want kerneld to perform. |
* |
* The "normal" message area is divided into a header, followed by a char array. |
* The header is used to hold the sequence number of the request, which will |
* be used as the return message type from kerneld back to the kernel. |
* In the return message, the header will be used to store the exit status |
* of the kerneld "job", or task. |
* The character array is used to pass parameters to kerneld and (optional) |
* return information from kerneld back to the kernel. |
* It is the responsibility of kerneld and the kernel level caller |
* to set usable sizes on the parameter/return value array, since |
* that information is _not_ included in the message format |
*/ |
|
/* |
* The basic kernel level entry point to kerneld. |
* msgtype should correspond to a task type for (a) kerneld |
* ret_size is the size of the (optional) return _value, |
* OR-ed with KERNELD_WAIT if we want an answer |
* msgsize is the size (in bytes) of the message, not including |
* the header that is always sent first in a kerneld message |
* text is the parameter for the kerneld specific task |
* ret_val is NULL or the kernel address where an expected answer |
* from kerneld should be placed. |
* |
* See <linux/kerneld.h> for usage (inline convenience functions) |
* |
*/ |
int kerneld_send(int msgtype, int ret_size, int msgsz, |
const char *text, const char *ret_val) |
{ |
int status = -ENOSYS; |
#ifdef CONFIG_KERNELD |
static int id = KERNELD_MINSEQ; |
struct kerneld_msg kmsp = { msgtype, NULL_KDHDR, (char *)text }; |
int msgflg = S_IRUSR | S_IWUSR | IPC_KERNELD | MSG_NOERROR; |
unsigned long flags; |
|
if (kerneld_msqid == -1) |
return -ENODEV; |
|
/* Do not wait for an answer at interrupt-time! */ |
if (intr_count) |
ret_size &= ~KERNELD_WAIT; |
#ifdef NEW_KERNELD_PROTOCOL |
else |
kmsp.pid = current->pid; |
#endif |
|
msgsz += KDHDR; |
if (ret_size & KERNELD_WAIT) { |
save_flags(flags); |
cli(); |
if (++id <= 0) /* overflow */ |
id = KERNELD_MINSEQ; |
kmsp.id = id; |
restore_flags(flags); |
} |
|
status = real_msgsnd(kerneld_msqid, (struct msgbuf *)&kmsp, msgsz, msgflg); |
if ((status >= 0) && (ret_size & KERNELD_WAIT)) { |
ret_size &= ~KERNELD_WAIT; |
kmsp.text = (char *)ret_val; |
status = real_msgrcv(kerneld_msqid, (struct msgbuf *)&kmsp, |
KDHDR + ((ret_val)?ret_size:0), |
kmsp.id, msgflg); |
if (status > 0) /* a valid answer contains at least a long */ |
status = kmsp.id; |
} |
|
#endif /* CONFIG_KERNELD */ |
return status; |
} |
/shm.c
0,0 → 1,920
/* |
* linux/ipc/shm.c |
* Copyright (C) 1992, 1993 Krishna Balasubramanian |
* Many improvements/fixes by Bruno Haible. |
* Replaced `struct shm_desc' by `struct vm_area_struct', July 1994. |
* Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli. |
*/ |
|
/* |
* uClinux revisions for NO_MM |
* Copyright (C) 1998 Kenneth Albanowski <kjahds@kjahds.com>, |
* The Silver Hammer Group, Ltd. |
*/ |
|
#include <linux/errno.h> |
#include <linux/sched.h> |
#include <linux/mm.h> |
#include <linux/ipc.h> |
#include <linux/shm.h> |
#include <linux/stat.h> |
#include <linux/malloc.h> |
#include <linux/swap.h> |
#include <linux/swapctl.h> |
|
#include <asm/segment.h> |
#include <asm/pgtable.h> |
|
#ifndef NO_MM |
|
extern int ipcperms (struct ipc_perm *ipcp, short shmflg); |
extern unsigned long get_swap_page (void); |
static int findkey (key_t key); |
static int newseg (key_t key, int shmflg, int size); |
static int shm_map (struct vm_area_struct *shmd); |
static void killseg (int id); |
static void shm_open (struct vm_area_struct *shmd); |
static void shm_close (struct vm_area_struct *shmd); |
static pte_t shm_swap_in(struct vm_area_struct *, unsigned long, unsigned long); |
|
static int shm_tot = 0; /* total number of shared memory pages */ |
static int shm_rss = 0; /* number of shared memory pages that are in memory */ |
static int shm_swp = 0; /* number of shared memory pages that are in swap */ |
static int max_shmid = 0; /* every used id is <= max_shmid */ |
static struct wait_queue *shm_lock = NULL; /* calling findkey() may need to wait */ |
static struct shmid_ds *shm_segs[SHMMNI]; |
|
static unsigned short shm_seq = 0; /* incremented, for recognizing stale ids */ |
|
/* some statistics */ |
static ulong swap_attempts = 0; |
static ulong swap_successes = 0; |
static ulong used_segs = 0; |
|
void shm_init (void) |
{ |
int id; |
|
for (id = 0; id < SHMMNI; id++) |
shm_segs[id] = (struct shmid_ds *) IPC_UNUSED; |
shm_tot = shm_rss = shm_seq = max_shmid = used_segs = 0; |
shm_lock = NULL; |
return; |
} |
|
static int findkey (key_t key) |
{ |
int id; |
struct shmid_ds *shp; |
|
for (id = 0; id <= max_shmid; id++) { |
while ((shp = shm_segs[id]) == IPC_NOID) |
sleep_on (&shm_lock); |
if (shp == IPC_UNUSED) |
continue; |
if (key == shp->shm_perm.key) |
return id; |
} |
return -1; |
} |
|
/* |
* allocate new shmid_ds and pgtable. protected by shm_segs[id] = NOID. |
*/ |
static int newseg (key_t key, int shmflg, int size) |
{ |
struct shmid_ds *shp; |
int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT; |
int id, i; |
|
if (size < SHMMIN) |
return -EINVAL; |
if (shm_tot + numpages >= SHMALL) |
return -ENOSPC; |
for (id = 0; id < SHMMNI; id++) |
if (shm_segs[id] == IPC_UNUSED) { |
shm_segs[id] = (struct shmid_ds *) IPC_NOID; |
goto found; |
} |
return -ENOSPC; |
|
found: |
shp = (struct shmid_ds *) kmalloc (sizeof (*shp), GFP_KERNEL); |
if (!shp) { |
shm_segs[id] = (struct shmid_ds *) IPC_UNUSED; |
wake_up (&shm_lock); |
return -ENOMEM; |
} |
|
shp->shm_pages = (ulong *) kmalloc (numpages*sizeof(ulong),GFP_KERNEL); |
if (!shp->shm_pages) { |
shm_segs[id] = (struct shmid_ds *) IPC_UNUSED; |
wake_up (&shm_lock); |
kfree(shp); |
return -ENOMEM; |
} |
|
for (i = 0; i < numpages; shp->shm_pages[i++] = 0); |
shm_tot += numpages; |
shp->shm_perm.key = key; |
shp->shm_perm.mode = (shmflg & S_IRWXUGO); |
shp->shm_perm.cuid = shp->shm_perm.uid = current->euid; |
shp->shm_perm.cgid = shp->shm_perm.gid = current->egid; |
shp->shm_perm.seq = shm_seq; |
shp->shm_segsz = size; |
shp->shm_cpid = current->pid; |
shp->attaches = NULL; |
shp->shm_lpid = shp->shm_nattch = 0; |
shp->shm_atime = shp->shm_dtime = 0; |
shp->shm_ctime = CURRENT_TIME; |
shp->shm_npages = numpages; |
|
if (id > max_shmid) |
max_shmid = id; |
shm_segs[id] = shp; |
used_segs++; |
wake_up (&shm_lock); |
return (unsigned int) shp->shm_perm.seq * SHMMNI + id; |
} |
|
asmlinkage int sys_shmget (key_t key, int size, int shmflg) |
{ |
struct shmid_ds *shp; |
int id = 0; |
|
if (size < 0 || size > SHMMAX) |
return -EINVAL; |
if (key == IPC_PRIVATE) |
return newseg(key, shmflg, size); |
if ((id = findkey (key)) == -1) { |
if (!(shmflg & IPC_CREAT)) |
return -ENOENT; |
return newseg(key, shmflg, size); |
} |
if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) |
return -EEXIST; |
shp = shm_segs[id]; |
if (shp->shm_perm.mode & SHM_DEST) |
return -EIDRM; |
if (size > shp->shm_segsz) |
return -EINVAL; |
if (ipcperms (&shp->shm_perm, shmflg)) |
return -EACCES; |
return (unsigned int) shp->shm_perm.seq * SHMMNI + id; |
} |
|
/* |
* Only called after testing nattch and SHM_DEST. |
* Here pages, pgtable and shmid_ds are freed. |
*/ |
static void killseg (int id) |
{ |
struct shmid_ds *shp; |
int i, numpages; |
|
shp = shm_segs[id]; |
if (shp == IPC_NOID || shp == IPC_UNUSED) { |
printk ("shm nono: killseg called on unused seg id=%d\n", id); |
return; |
} |
shp->shm_perm.seq++; /* for shmat */ |
shm_seq = (shm_seq+1) % ((unsigned)(1<<31)/SHMMNI); /* increment, but avoid overflow */ |
shm_segs[id] = (struct shmid_ds *) IPC_UNUSED; |
used_segs--; |
if (id == max_shmid) |
while (max_shmid && (shm_segs[--max_shmid] == IPC_UNUSED)); |
if (!shp->shm_pages) { |
printk ("shm nono: killseg shp->pages=NULL. id=%d\n", id); |
return; |
} |
numpages = shp->shm_npages; |
for (i = 0; i < numpages ; i++) { |
pte_t pte; |
pte_val(pte) = shp->shm_pages[i]; |
if (pte_none(pte)) |
continue; |
if (pte_present(pte)) { |
free_page (pte_page(pte)); |
shm_rss--; |
} else { |
swap_free(pte_val(pte)); |
shm_swp--; |
} |
} |
kfree(shp->shm_pages); |
shm_tot -= numpages; |
kfree(shp); |
return; |
} |
|
asmlinkage int sys_shmctl (int shmid, int cmd, struct shmid_ds *buf) |
{ |
struct shmid_ds tbuf; |
struct shmid_ds *shp; |
struct ipc_perm *ipcp; |
int id, err; |
|
if (cmd < 0 || shmid < 0) |
return -EINVAL; |
if (cmd == IPC_SET) { |
if (!buf) |
return -EFAULT; |
err = verify_area (VERIFY_READ, buf, sizeof (*buf)); |
if (err) |
return err; |
memcpy_fromfs (&tbuf, buf, sizeof (*buf)); |
} |
|
switch (cmd) { /* replace with proc interface ? */ |
case IPC_INFO: |
{ |
struct shminfo shminfo; |
if (!buf) |
return -EFAULT; |
shminfo.shmmni = SHMMNI; |
shminfo.shmmax = SHMMAX; |
shminfo.shmmin = SHMMIN; |
shminfo.shmall = SHMALL; |
shminfo.shmseg = SHMSEG; |
err = verify_area (VERIFY_WRITE, buf, sizeof (struct shminfo)); |
if (err) |
return err; |
memcpy_tofs (buf, &shminfo, sizeof(struct shminfo)); |
return max_shmid; |
} |
case SHM_INFO: |
{ |
struct shm_info shm_info; |
if (!buf) |
return -EFAULT; |
err = verify_area (VERIFY_WRITE, buf, sizeof (shm_info)); |
if (err) |
return err; |
shm_info.used_ids = used_segs; |
shm_info.shm_rss = shm_rss; |
shm_info.shm_tot = shm_tot; |
shm_info.shm_swp = shm_swp; |
shm_info.swap_attempts = swap_attempts; |
shm_info.swap_successes = swap_successes; |
memcpy_tofs (buf, &shm_info, sizeof(shm_info)); |
return max_shmid; |
} |
case SHM_STAT: |
if (!buf) |
return -EFAULT; |
err = verify_area (VERIFY_WRITE, buf, sizeof (*buf)); |
if (err) |
return err; |
if (shmid > max_shmid) |
return -EINVAL; |
shp = shm_segs[shmid]; |
if (shp == IPC_UNUSED || shp == IPC_NOID) |
return -EINVAL; |
if (ipcperms (&shp->shm_perm, S_IRUGO)) |
return -EACCES; |
id = (unsigned int) shp->shm_perm.seq * SHMMNI + shmid; |
tbuf.shm_perm = shp->shm_perm; |
tbuf.shm_segsz = shp->shm_segsz; |
tbuf.shm_atime = shp->shm_atime; |
tbuf.shm_dtime = shp->shm_dtime; |
tbuf.shm_ctime = shp->shm_ctime; |
tbuf.shm_cpid = shp->shm_cpid; |
tbuf.shm_lpid = shp->shm_lpid; |
tbuf.shm_nattch = shp->shm_nattch; |
memcpy_tofs (buf, &tbuf, sizeof(*buf)); |
return id; |
} |
|
shp = shm_segs[id = (unsigned int) shmid % SHMMNI]; |
if (shp == IPC_UNUSED || shp == IPC_NOID) |
return -EINVAL; |
if (shp->shm_perm.seq != (unsigned int) shmid / SHMMNI) |
return -EIDRM; |
ipcp = &shp->shm_perm; |
|
switch (cmd) { |
case SHM_UNLOCK: |
if (!suser()) |
return -EPERM; |
if (!(ipcp->mode & SHM_LOCKED)) |
return -EINVAL; |
ipcp->mode &= ~SHM_LOCKED; |
break; |
case SHM_LOCK: |
/* Allow superuser to lock segment in memory */ |
/* Should the pages be faulted in here or leave it to user? */ |
/* need to determine interaction with current->swappable */ |
if (!suser()) |
return -EPERM; |
if (ipcp->mode & SHM_LOCKED) |
return -EINVAL; |
ipcp->mode |= SHM_LOCKED; |
break; |
case IPC_STAT: |
if (ipcperms (ipcp, S_IRUGO)) |
return -EACCES; |
if (!buf) |
return -EFAULT; |
err = verify_area (VERIFY_WRITE, buf, sizeof (*buf)); |
if (err) |
return err; |
tbuf.shm_perm = shp->shm_perm; |
tbuf.shm_segsz = shp->shm_segsz; |
tbuf.shm_atime = shp->shm_atime; |
tbuf.shm_dtime = shp->shm_dtime; |
tbuf.shm_ctime = shp->shm_ctime; |
tbuf.shm_cpid = shp->shm_cpid; |
tbuf.shm_lpid = shp->shm_lpid; |
tbuf.shm_nattch = shp->shm_nattch; |
memcpy_tofs (buf, &tbuf, sizeof(*buf)); |
break; |
case IPC_SET: |
if (suser() || current->euid == shp->shm_perm.uid || |
current->euid == shp->shm_perm.cuid) { |
ipcp->uid = tbuf.shm_perm.uid; |
ipcp->gid = tbuf.shm_perm.gid; |
ipcp->mode = (ipcp->mode & ~S_IRWXUGO) |
| (tbuf.shm_perm.mode & S_IRWXUGO); |
shp->shm_ctime = CURRENT_TIME; |
break; |
} |
return -EPERM; |
case IPC_RMID: |
if (suser() || current->euid == shp->shm_perm.uid || |
current->euid == shp->shm_perm.cuid) { |
shp->shm_perm.mode |= SHM_DEST; |
if (shp->shm_nattch <= 0) |
killseg (id); |
break; |
} |
return -EPERM; |
default: |
return -EINVAL; |
} |
return 0; |
} |
|
/* |
* The per process internal structure for managing segments is |
* `struct vm_area_struct'. |
* A shmat will add to and shmdt will remove from the list. |
* shmd->vm_mm the attacher |
* shmd->vm_start virt addr of attach, multiple of SHMLBA |
* shmd->vm_end multiple of SHMLBA |
* shmd->vm_next next attach for task |
* shmd->vm_next_share next attach for segment |
* shmd->vm_offset offset into segment |
* shmd->vm_pte signature for this attach |
*/ |
|
static struct vm_operations_struct shm_vm_ops = { |
shm_open, /* open - callback for a new vm-area open */ |
shm_close, /* close - callback for when the vm-area is released */ |
NULL, /* no need to sync pages at unmap */ |
NULL, /* protect */ |
NULL, /* sync */ |
NULL, /* advise */ |
NULL, /* nopage (done with swapin) */ |
NULL, /* wppage */ |
NULL, /* swapout (hardcoded right now) */ |
shm_swap_in /* swapin */ |
}; |
|
/* Insert shmd into the circular list shp->attaches */ |
static inline void insert_attach (struct shmid_ds * shp, struct vm_area_struct * shmd) |
{ |
struct vm_area_struct * attaches; |
|
if ((attaches = shp->attaches)) { |
shmd->vm_next_share = attaches; |
shmd->vm_prev_share = attaches->vm_prev_share; |
shmd->vm_prev_share->vm_next_share = shmd; |
attaches->vm_prev_share = shmd; |
} else |
shp->attaches = shmd->vm_next_share = shmd->vm_prev_share = shmd; |
} |
|
/* Remove shmd from circular list shp->attaches */ |
static inline void remove_attach (struct shmid_ds * shp, struct vm_area_struct * shmd) |
{ |
if (shmd->vm_next_share == shmd) { |
if (shp->attaches != shmd) { |
printk("shm_close: shm segment (id=%ld) attach list inconsistent\n", |
SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK); |
printk("shm_close: %08lx-%08lx %c%c%c%c %08lx %08lx\n", |
shmd->vm_start, shmd->vm_end, |
shmd->vm_flags & VM_READ ? 'r' : '-', |
shmd->vm_flags & VM_WRITE ? 'w' : '-', |
shmd->vm_flags & VM_EXEC ? 'x' : '-', |
shmd->vm_flags & VM_MAYSHARE ? 's' : 'p', |
shmd->vm_offset, shmd->vm_pte); |
} |
shp->attaches = NULL; |
} else { |
if (shp->attaches == shmd) |
shp->attaches = shmd->vm_next_share; |
shmd->vm_prev_share->vm_next_share = shmd->vm_next_share; |
shmd->vm_next_share->vm_prev_share = shmd->vm_prev_share; |
} |
} |
|
/* |
* ensure page tables exist |
* mark page table entries with shm_sgn. |
*/ |
static int shm_map (struct vm_area_struct *shmd) |
{ |
pgd_t *page_dir; |
pmd_t *page_middle; |
pte_t *page_table; |
unsigned long tmp, shm_sgn; |
int error; |
|
/* clear old mappings */ |
do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start); |
|
/* add new mapping */ |
tmp = shmd->vm_end - shmd->vm_start; |
if((current->mm->total_vm << PAGE_SHIFT) + tmp |
> (unsigned long) current->rlim[RLIMIT_AS].rlim_cur) |
return -ENOMEM; |
current->mm->total_vm += tmp >> PAGE_SHIFT; |
insert_vm_struct(current->mm, shmd); |
merge_segments(current->mm, shmd->vm_start, shmd->vm_end); |
|
/* map page range */ |
error = 0; |
shm_sgn = shmd->vm_pte + |
SWP_ENTRY(0, (shmd->vm_offset >> PAGE_SHIFT) << SHM_IDX_SHIFT); |
flush_cache_range(shmd->vm_mm, shmd->vm_start, shmd->vm_end); |
for (tmp = shmd->vm_start; |
tmp < shmd->vm_end; |
tmp += PAGE_SIZE, shm_sgn += SWP_ENTRY(0, 1 << SHM_IDX_SHIFT)) |
{ |
page_dir = pgd_offset(shmd->vm_mm,tmp); |
page_middle = pmd_alloc(page_dir,tmp); |
if (!page_middle) { |
error = -ENOMEM; |
break; |
} |
page_table = pte_alloc(page_middle,tmp); |
if (!page_table) { |
error = -ENOMEM; |
break; |
} |
set_pte(page_table, __pte(shm_sgn)); |
} |
flush_tlb_range(shmd->vm_mm, shmd->vm_start, shmd->vm_end); |
return error; |
} |
|
/* |
* Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists. |
*/ |
asmlinkage int sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr) |
{ |
struct shmid_ds *shp; |
struct vm_area_struct *shmd; |
int err; |
unsigned int id; |
unsigned long addr; |
unsigned long len; |
|
if (shmid < 0) { |
/* printk("shmat() -> EINVAL because shmid = %d < 0\n",shmid); */ |
return -EINVAL; |
} |
|
shp = shm_segs[id = (unsigned int) shmid % SHMMNI]; |
if (shp == IPC_UNUSED || shp == IPC_NOID) { |
/* printk("shmat() -> EINVAL because shmid = %d is invalid\n",shmid); */ |
return -EINVAL; |
} |
|
if (!(addr = (ulong) shmaddr)) { |
if (shmflg & SHM_REMAP) |
return -EINVAL; |
if (!(addr = get_unmapped_area(0, shp->shm_segsz))) |
return -ENOMEM; |
} else if (addr & (SHMLBA-1)) { |
if (shmflg & SHM_RND) |
addr &= ~(SHMLBA-1); /* round down */ |
else |
return -EINVAL; |
} |
/* |
* Check if addr exceeds MAX_USER_ADDR (from do_mmap) |
*/ |
len = PAGE_SIZE*shp->shm_npages; |
if (addr >= MAX_USER_ADDR || len > MAX_USER_ADDR || addr > MAX_USER_ADDR - len) |
return -EINVAL; |
/* |
* If shm segment goes below stack, make sure there is some |
* space left for the stack to grow (presently 4 pages). |
*/ |
if (addr < current->mm->start_stack && |
addr > current->mm->start_stack - PAGE_SIZE*(shp->shm_npages + 4)) |
{ |
/* printk("shmat() -> EINVAL because segment intersects stack\n"); */ |
return -EINVAL; |
} |
if (!(shmflg & SHM_REMAP)) |
if ((shmd = find_vma_intersection(current->mm, addr, addr + shp->shm_segsz))) { |
/* printk("shmat() -> EINVAL because the interval [0x%lx,0x%lx) intersects an already mapped interval [0x%lx,0x%lx).\n", |
addr, addr + shp->shm_segsz, shmd->vm_start, shmd->vm_end); */ |
return -EINVAL; |
} |
|
if (ipcperms(&shp->shm_perm, shmflg & SHM_RDONLY ? S_IRUGO : S_IRUGO|S_IWUGO)) |
return -EACCES; |
if (shp->shm_perm.seq != (unsigned int) shmid / SHMMNI) |
return -EIDRM; |
|
shmd = (struct vm_area_struct *) kmalloc (sizeof(*shmd), GFP_KERNEL); |
if (!shmd) |
return -ENOMEM; |
if ((shp != shm_segs[id]) || (shp->shm_perm.seq != (unsigned int) shmid / SHMMNI)) { |
kfree(shmd); |
return -EIDRM; |
} |
|
shmd->vm_pte = SWP_ENTRY(SHM_SWP_TYPE, id); |
shmd->vm_start = addr; |
shmd->vm_end = addr + shp->shm_npages * PAGE_SIZE; |
shmd->vm_mm = current->mm; |
shmd->vm_page_prot = (shmflg & SHM_RDONLY) ? PAGE_READONLY : PAGE_SHARED; |
shmd->vm_flags = VM_SHM | VM_MAYSHARE | VM_SHARED |
| VM_MAYREAD | VM_MAYEXEC | VM_READ | VM_EXEC |
| ((shmflg & SHM_RDONLY) ? 0 : VM_MAYWRITE | VM_WRITE); |
shmd->vm_next_share = shmd->vm_prev_share = NULL; |
shmd->vm_inode = NULL; |
shmd->vm_offset = 0; |
shmd->vm_ops = &shm_vm_ops; |
|
shp->shm_nattch++; /* prevent destruction */ |
if ((err = shm_map (shmd))) { |
if (--shp->shm_nattch <= 0 && shp->shm_perm.mode & SHM_DEST) |
killseg(id); |
kfree(shmd); |
return err; |
} |
|
insert_attach(shp,shmd); /* insert shmd into shp->attaches */ |
|
shp->shm_lpid = current->pid; |
shp->shm_atime = CURRENT_TIME; |
|
*raddr = addr; |
return 0; |
} |
|
/* This is called by fork, once for every shm attach. */ |
static void shm_open (struct vm_area_struct *shmd) |
{ |
unsigned int id; |
struct shmid_ds *shp; |
|
id = SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK; |
shp = shm_segs[id]; |
if (shp == IPC_UNUSED) { |
printk("shm_open: unused id=%d PANIC\n", id); |
return; |
} |
insert_attach(shp,shmd); /* insert shmd into shp->attaches */ |
shp->shm_nattch++; |
shp->shm_atime = CURRENT_TIME; |
shp->shm_lpid = current->pid; |
} |
|
/* |
* remove the attach descriptor shmd. |
* free memory for segment if it is marked destroyed. |
* The descriptor has already been removed from the current->mm->mmap list |
* and will later be kfree()d. |
*/ |
static void shm_close (struct vm_area_struct *shmd) |
{ |
struct shmid_ds *shp; |
int id; |
|
/* remove from the list of attaches of the shm segment */ |
id = SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK; |
shp = shm_segs[id]; |
remove_attach(shp,shmd); /* remove from shp->attaches */ |
shp->shm_lpid = current->pid; |
shp->shm_dtime = CURRENT_TIME; |
if (--shp->shm_nattch <= 0 && shp->shm_perm.mode & SHM_DEST) |
killseg (id); |
} |
|
/* |
* detach and kill segment if marked destroyed. |
* The work is done in shm_close. |
*/ |
asmlinkage int sys_shmdt (char *shmaddr) |
{ |
struct vm_area_struct *shmd, *shmdnext; |
|
for (shmd = current->mm->mmap; shmd; shmd = shmdnext) { |
shmdnext = shmd->vm_next; |
if (shmd->vm_ops == &shm_vm_ops |
&& shmd->vm_start - shmd->vm_offset == (ulong) shmaddr) |
do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start); |
} |
return 0; |
} |
|
/* |
* page not present ... go through shm_pages |
*/ |
static pte_t shm_swap_in(struct vm_area_struct * shmd, unsigned long offset, unsigned long code) |
{ |
pte_t pte; |
struct shmid_ds *shp; |
unsigned int id, idx; |
|
id = SWP_OFFSET(code) & SHM_ID_MASK; |
if (id != (SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK)) { |
printk ("shm_swap_in: code id = %d and shmd id = %ld differ\n", |
id, SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK); |
return BAD_PAGE; |
} |
if (id > max_shmid) { |
printk ("shm_swap_in: id=%d too big. proc mem corrupted\n", id); |
return BAD_PAGE; |
} |
shp = shm_segs[id]; |
if (shp == IPC_UNUSED || shp == IPC_NOID) { |
printk ("shm_swap_in: id=%d invalid. Race.\n", id); |
return BAD_PAGE; |
} |
idx = (SWP_OFFSET(code) >> SHM_IDX_SHIFT) & SHM_IDX_MASK; |
if (idx != (offset >> PAGE_SHIFT)) { |
printk ("shm_swap_in: code idx = %u and shmd idx = %lu differ\n", |
idx, offset >> PAGE_SHIFT); |
return BAD_PAGE; |
} |
if (idx >= shp->shm_npages) { |
printk ("shm_swap_in : too large page index. id=%d\n", id); |
return BAD_PAGE; |
} |
|
pte_val(pte) = shp->shm_pages[idx]; |
if (!pte_present(pte)) { |
unsigned long page = get_free_page(GFP_KERNEL); |
if (!page) { |
oom(current); |
return BAD_PAGE; |
} |
repeat: |
pte_val(pte) = shp->shm_pages[idx]; |
if (pte_present(pte)) { |
free_page (page); /* doesn't sleep */ |
goto done; |
} |
if (!pte_none(pte)) { |
read_swap_page(pte_val(pte), (char *) page); |
if (pte_val(pte) != shp->shm_pages[idx]) |
goto repeat; |
swap_free(pte_val(pte)); |
shm_swp--; |
} |
shm_rss++; |
|
/* Give the physical reallocated page a bigger start */ |
if (shm_rss < (MAP_NR(high_memory) >> 3)) |
mem_map[MAP_NR(page)].age = (PAGE_INITIAL_AGE + PAGE_ADVANCE); |
|
pte = pte_mkdirty(mk_pte(page, PAGE_SHARED)); |
shp->shm_pages[idx] = pte_val(pte); |
} else |
--current->maj_flt; /* was incremented in do_no_page */ |
|
done: /* pte_val(pte) == shp->shm_pages[idx] */ |
current->min_flt++; |
mem_map[MAP_NR(pte_page(pte))].count++; |
return pte_modify(pte, shmd->vm_page_prot); |
} |
|
/* |
* Goes through counter = (shm_rss >> prio) present shm pages. |
*/ |
static unsigned long swap_id = 0; /* currently being swapped */ |
static unsigned long swap_idx = 0; /* next to swap */ |
|
int shm_swap (int prio, int dma) |
{ |
pte_t page; |
struct page *page_map; |
struct shmid_ds *shp; |
struct vm_area_struct *shmd; |
unsigned long swap_nr; |
unsigned long id, idx; |
int loop = 0; |
int counter; |
|
counter = shm_rss >> prio; |
if (!counter || !(swap_nr = get_swap_page())) |
return 0; |
|
check_id: |
shp = shm_segs[swap_id]; |
if (shp == IPC_UNUSED || shp == IPC_NOID || shp->shm_perm.mode & SHM_LOCKED ) { |
next_id: |
swap_idx = 0; |
if (++swap_id > max_shmid) { |
if (loop) |
goto failed; |
loop = 1; |
swap_id = 0; |
} |
goto check_id; |
} |
id = swap_id; |
|
check_table: |
idx = swap_idx++; |
if (idx >= shp->shm_npages) |
goto next_id; |
|
pte_val(page) = shp->shm_pages[idx]; |
if (!pte_present(page)) |
goto check_table; |
page_map = &mem_map[MAP_NR(pte_page(page))]; |
if (PageLocked(page_map)) |
goto check_table; |
if (dma && !PageDMA(page_map)) |
goto check_table; |
swap_attempts++; |
|
if (--counter < 0) { /* failed */ |
failed: |
swap_free (swap_nr); |
return 0; |
} |
if (shp->attaches) |
for (shmd = shp->attaches; ; ) { |
do { |
pgd_t *page_dir; |
pmd_t *page_middle; |
pte_t *page_table, pte; |
unsigned long tmp; |
|
if ((SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK) != id) { |
printk ("shm_swap: id=%ld does not match shmd->vm_pte.id=%ld\n", |
id, SWP_OFFSET(shmd->vm_pte) & SHM_ID_MASK); |
continue; |
} |
tmp = shmd->vm_start + (idx << PAGE_SHIFT) - shmd->vm_offset; |
if (!(tmp >= shmd->vm_start && tmp < shmd->vm_end)) |
continue; |
page_dir = pgd_offset(shmd->vm_mm,tmp); |
if (pgd_none(*page_dir) || pgd_bad(*page_dir)) { |
printk("shm_swap: bad pgtbl! id=%ld start=%lx idx=%ld\n", |
id, shmd->vm_start, idx); |
pgd_clear(page_dir); |
continue; |
} |
page_middle = pmd_offset(page_dir,tmp); |
if (pmd_none(*page_middle) || pmd_bad(*page_middle)) { |
printk("shm_swap: bad pgmid! id=%ld start=%lx idx=%ld\n", |
id, shmd->vm_start, idx); |
pmd_clear(page_middle); |
continue; |
} |
page_table = pte_offset(page_middle,tmp); |
pte = *page_table; |
if (!pte_present(pte)) |
continue; |
if (pte_young(pte)) { |
set_pte(page_table, pte_mkold(pte)); |
continue; |
} |
if (pte_page(pte) != pte_page(page)) |
printk("shm_swap_out: page and pte mismatch\n"); |
flush_cache_page(shmd, tmp); |
set_pte(page_table, |
__pte(shmd->vm_pte + SWP_ENTRY(0, idx << SHM_IDX_SHIFT))); |
mem_map[MAP_NR(pte_page(pte))].count--; |
if (shmd->vm_mm->rss > 0) |
shmd->vm_mm->rss--; |
flush_tlb_page(shmd, tmp); |
/* continue looping through circular list */ |
} while (0); |
if ((shmd = shmd->vm_next_share) == shp->attaches) |
break; |
} |
|
if (mem_map[MAP_NR(pte_page(page))].count != 1) |
goto check_table; |
shp->shm_pages[idx] = swap_nr; |
write_swap_page (swap_nr, (char *) pte_page(page)); |
free_page(pte_page(page)); |
swap_successes++; |
shm_swp++; |
shm_rss--; |
return 1; |
} |
|
#else /* NO_MM */ |
|
/* FIXME: shm _is_ feasible under NO_MM, but requires more advanced memory |
accounting then we currently have available. */ |
|
void shm_init (void) |
{ |
return; |
} |
|
asmlinkage int sys_shmget (key_t key, int size, int shmflg) |
{ |
return -ENOSYS; |
} |
|
asmlinkage int sys_shmctl (int shmid, int cmd, struct shmid_ds *buf) |
{ |
return -ENOSYS; |
} |
|
asmlinkage int sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr) |
{ |
return -ENOSYS; |
} |
|
asmlinkage int sys_shmdt (char *shmaddr) |
{ |
return -ENOSYS; |
} |
|
int shm_swap (int prio, int dma) |
{ |
return 0; |
} |
|
#endif /* NO_MM */ |
|
|
#ifndef NO_MM |
/* |
* Free the swap entry and set the new pte for the shm page. |
*/ |
static void shm_unuse_page(struct shmid_ds *shp, unsigned long idx, |
unsigned long type) |
{ |
pte_t pte = __pte(shp->shm_pages[idx]); |
unsigned long page, entry = shp->shm_pages[idx]; |
|
if (pte_none(pte)) |
return; |
if (pte_present(pte)) |
{ |
/* |
* Security check. Should be not needed... |
*/ |
unsigned long page_nr = MAP_NR(pte_page(pte)); |
if (page_nr >= MAP_NR(high_memory)) |
{ |
printk("shm page mapped in virtual memory\n"); |
return; |
} |
if (!in_swap_cache(page_nr)) |
return; |
if (SWP_TYPE(in_swap_cache(page_nr)) != type) |
return; |
printk("shm page in swap cache, trying to remove it!\n"); |
delete_from_swap_cache(page_nr); |
|
shp->shm_pages[idx] = pte_val(pte_mkdirty(pte)); |
return; |
} |
|
if (SWP_TYPE(pte_val(pte)) != type) |
return; |
|
/* |
* Here we must swapin the pte and free the swap. |
*/ |
page = get_free_page(GFP_KERNEL); |
read_swap_page(pte_val(pte), (char *) page); |
pte = pte_mkdirty(mk_pte(page, PAGE_SHARED)); |
shp->shm_pages[idx] = pte_val(pte); |
shm_rss++; |
|
swap_free(entry); |
shm_swp--; |
} |
|
/* |
* unuse_shm() search for an eventually swapped out shm page. |
*/ |
void shm_unuse(unsigned int type) |
{ |
int i, n; |
|
for (i = 0; i < SHMMNI; i++) |
if (shm_segs[i] != IPC_UNUSED && shm_segs[i] != IPC_NOID) |
for (n = 0; n < shm_segs[i]->shm_npages; n++) |
shm_unuse_page(shm_segs[i], n, type); |
} |
|
#endif /* NO_MM */ |
/Makefile
0,0 → 1,21
# |
# Makefile for the linux ipc. |
# |
# Note! Dependencies are done automagically by 'make dep', which also |
# removes any old dependencies. DON'T put your own dependencies here |
# unless it's something special (ie not a .c file). |
# |
# Note 2! The CFLAGS definition is now in the main makefile... |
|
O_TARGET := ipc.o |
O_OBJS := util.o |
|
ifdef CONFIG_KERNELD |
CONFIG_SYSVIPC=1 |
endif |
|
ifdef CONFIG_SYSVIPC |
O_OBJS += msg.o sem.o shm.o |
endif |
|
include $(TOPDIR)/Rules.make |
/util.c
0,0 → 1,124
/* |
* linux/ipc/util.c |
* Copyright (C) 1992 Krishna Balasubramanian |
*/ |
|
#include <linux/config.h> |
#include <linux/errno.h> |
#include <asm/segment.h> |
#include <linux/sched.h> |
#include <linux/mm.h> |
#include <linux/sem.h> |
#include <linux/msg.h> |
#include <linux/shm.h> |
#include <linux/stat.h> |
|
#if defined(CONFIG_SYSVIPC) || defined(CONFIG_KERNELD) |
|
extern void sem_init (void), msg_init (void), shm_init (void); |
|
void ipc_init (void) |
{ |
sem_init(); |
msg_init(); |
shm_init(); |
return; |
} |
|
/* |
* Check user, group, other permissions for access |
* to ipc resources. return 0 if allowed |
*/ |
int ipcperms (struct ipc_perm *ipcp, short flag) |
{ /* flag will most probably be 0 or S_...UGO from <linux/stat.h> */ |
int requested_mode, granted_mode; |
|
if (suser()) |
return 0; |
requested_mode = (flag >> 6) | (flag >> 3) | flag; |
granted_mode = ipcp->mode; |
if (current->euid == ipcp->cuid || current->euid == ipcp->uid) |
granted_mode >>= 6; |
else if (in_group_p(ipcp->cgid) || in_group_p(ipcp->gid)) |
granted_mode >>= 3; |
/* is there some bit set in requested_mode but not in granted_mode? */ |
if (requested_mode & ~granted_mode & 0007) |
return -1; |
return 0; |
} |
|
#else |
/* |
* Dummy functions when SYSV IPC isn't configured |
*/ |
|
void sem_exit (void) |
{ |
return; |
} |
|
int shm_swap (int prio, unsigned long limit) |
{ |
return 0; |
} |
|
asmlinkage int sys_semget (key_t key, int nsems, int semflg) |
{ |
return -ENOSYS; |
} |
|
asmlinkage int sys_semop (int semid, struct sembuf *sops, unsigned nsops) |
{ |
return -ENOSYS; |
} |
|
asmlinkage int sys_semctl (int semid, int semnum, int cmd, union semun arg) |
{ |
return -ENOSYS; |
} |
|
asmlinkage int sys_msgget (key_t key, int msgflg) |
{ |
return -ENOSYS; |
} |
|
asmlinkage int sys_msgsnd (int msqid, struct msgbuf *msgp, size_t msgsz, int msgflg) |
{ |
return -ENOSYS; |
} |
|
asmlinkage int sys_msgrcv (int msqid, struct msgbuf *msgp, size_t msgsz, long msgtyp, |
int msgflg) |
{ |
return -ENOSYS; |
} |
|
asmlinkage int sys_msgctl (int msqid, int cmd, struct msqid_ds *buf) |
{ |
return -ENOSYS; |
} |
|
asmlinkage int sys_shmget (key_t key, int size, int flag) |
{ |
return -ENOSYS; |
} |
|
asmlinkage int sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *addr) |
{ |
return -ENOSYS; |
} |
|
asmlinkage int sys_shmdt (char *shmaddr) |
{ |
return -ENOSYS; |
} |
|
asmlinkage int sys_shmctl (int shmid, int cmd, struct shmid_ds *buf) |
{ |
return -ENOSYS; |
} |
|
void kerneld_exit(void) |
{ |
} |
#endif /* CONFIG_SYSVIPC */ |