OpenCores
URL https://opencores.org/ocsvn/or1k_old/or1k_old/trunk

Subversion Repositories or1k_old

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /or1k_old/trunk/rc203soc/sw/uClinux/kernel
    from Rev 1765 to Rev 1782
    Reverse comparison

Rev 1765 → Rev 1782

/time.c
0,0 → 1,397
/*
* linux/kernel/time.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
* This file contains the interface functions for the various
* time related system calls: time, stime, gettimeofday, settimeofday,
* adjtime
*/
/*
* Modification history kernel/time.c
*
* 1993-09-02 Philip Gladstone
* Created file with time related functions from sched.c and adjtimex()
* 1993-10-08 Torsten Duwe
* adjtime interface update and CMOS clock write code
* 1995-08-13 Torsten Duwe
* kernel PLL updated to 1994-12-13 specs (rfc-1589)
* 1996-10-22, 1997-09-13 Ulrich Windl
* support for external PPS signal, error checking in adjtimex()
* Updated NTP code according to technical memorandum Jan '96
* "A Kernel Model for Precision Timekeeping" by Dave Mills
* 1998-03-05 Ulrich Windl
* Allow time_constant larger than MAXTC(6) for NTP v4
* (debated with and blessed by Dave Mills, despite of earlier work
* and words)
*/
 
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/timex.h>
 
#include <asm/segment.h>
 
/*
* The timezone where the local system is located. Used as a default by some
* programs who obtain this value by using gettimeofday.
*/
struct timezone sys_tz = { 0, 0};
 
#ifndef __alpha__
 
/*
* sys_time() can be implemented in user-level using
* sys_gettimeofday(). Is this for backwards compatibility? If so,
* why not move it into the appropriate arch directory (for those
* architectures that need it).
*/
asmlinkage int sys_time(int * tloc)
{
int i;
 
i = CURRENT_TIME;
if (tloc) {
int error = verify_area(VERIFY_WRITE, tloc, sizeof(*tloc));
if (error)
return error;
put_user(i,tloc);
}
return i;
}
 
/*
* sys_stime() can be implemented in user-level using
* sys_settimeofday(). Is this for backwards compatibility? If so,
* why not move it into the appropriate arch directory (for those
* architectures that need it).
*/
asmlinkage int sys_stime(int * tptr)
{
int error, value;
 
if (!suser())
return -EPERM;
error = verify_area(VERIFY_READ, tptr, sizeof(*tptr));
if (error)
return error;
value = get_user(tptr);
cli();
xtime.tv_sec = value;
xtime.tv_usec = 0;
time_adjust = 0; /* stop active adjtime() */
time_status |= STA_UNSYNC;
time_state = TIME_ERROR; /* p. 24, (a) */
time_maxerror = NTP_PHASE_LIMIT;
time_esterror = NTP_PHASE_LIMIT;
sti();
return 0;
}
 
#endif
 
asmlinkage int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
{
int error;
 
if (tv) {
struct timeval ktv;
error = verify_area(VERIFY_WRITE, tv, sizeof *tv);
if (error)
return error;
do_gettimeofday(&ktv);
memcpy_tofs(tv, &ktv, sizeof(ktv));
}
if (tz) {
error = verify_area(VERIFY_WRITE, tz, sizeof *tz);
if (error)
return error;
memcpy_tofs(tz, &sys_tz, sizeof(sys_tz));
}
return 0;
}
 
/*
* Adjust the time obtained from the CMOS to be UTC time instead of
* local time.
*
* This is ugly, but preferable to the alternatives. Otherwise we
* would either need to write a program to do it in /etc/rc (and risk
* confusion if the program gets run more than once; it would also be
* hard to make the program warp the clock precisely n hours) or
* compile in the timezone information into the kernel. Bad, bad....
*
* - TYT, 1992-01-01
*
* The best thing to do is to keep the CMOS clock in universal time (UTC)
* as real UNIX machines always do it. This avoids all headaches about
* daylight saving times and warping kernel clocks.
*/
inline static void warp_clock(void)
{
cli();
xtime.tv_sec += sys_tz.tz_minuteswest * 60;
sti();
}
 
/*
* In case for some reason the CMOS clock has not already been running
* in UTC, but in some local time: The first time we set the timezone,
* we will warp the clock so that it is ticking UTC time instead of
* local time. Presumably, if someone is setting the timezone then we
* are running in an environment where the programs understand about
* timezones. This should be done at boot time in the /etc/rc script,
* as soon as possible, so that the clock can be set right. Otherwise,
* various programs will get confused when the clock gets warped.
*/
asmlinkage int sys_settimeofday(struct timeval *tv, struct timezone *tz)
{
static int firsttime = 1;
struct timeval new_tv;
struct timezone new_tz;
 
if (!suser())
return -EPERM;
if (tv) {
int error = verify_area(VERIFY_READ, tv, sizeof(*tv));
if (error)
return error;
memcpy_fromfs(&new_tv, tv, sizeof(*tv));
}
if (tz) {
int error = verify_area(VERIFY_READ, tz, sizeof(*tz));
if (error)
return error;
memcpy_fromfs(&new_tz, tz, sizeof(*tz));
}
if (tz) {
sys_tz = new_tz;
if (firsttime) {
firsttime = 0;
if (!tv)
warp_clock();
}
}
if (tv)
do_settimeofday(&new_tv);
return 0;
}
 
long pps_offset = 0; /* pps time offset (us) */
long pps_jitter = MAXTIME; /* time dispersion (jitter) (us) */
 
long pps_freq = 0; /* frequency offset (scaled ppm) */
long pps_stabil = MAXFREQ; /* frequency dispersion (scaled ppm) */
 
long pps_valid = PPS_VALID; /* pps signal watchdog counter */
 
int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */
 
long pps_jitcnt = 0; /* jitter limit exceeded */
long pps_calcnt = 0; /* calibration intervals */
long pps_errcnt = 0; /* calibration errors */
long pps_stbcnt = 0; /* stability limit exceeded */
 
/* hook for a loadable hardpps kernel module */
void (*hardpps_ptr)(struct timeval *) = (void (*)(struct timeval *))0;
 
/* adjtimex mainly allows reading (and writing, if superuser) of
* kernel time-keeping variables. used by xntpd.
*/
asmlinkage int sys_adjtimex(struct timex *txc_p)
{
long ltemp, mtemp, save_adjust;
int error = 0;
 
/* Local copy of parameter */
struct timex txc;
 
error = verify_area(VERIFY_WRITE, txc_p, sizeof(struct timex));
if (error)
return error; /* do not write results */
 
/* Copy the user data space into the kernel copy
* structure. But bear in mind that the structures
* may change
*/
memcpy_fromfs(&txc, txc_p, sizeof(struct timex));
 
/* In order to modify anything, you gotta be super-user! */
if (txc.modes && !suser())
return -EPERM;
 
/* Now we validate the data before disabling interrupts
*/
if (txc.modes != ADJ_OFFSET_SINGLESHOT && (txc.modes & ADJ_OFFSET))
/* adjustment Offset limited to +- .512 seconds */
if (txc.offset <= - MAXPHASE || txc.offset >= MAXPHASE )
return -EINVAL;
 
cli();
 
/* Save for later - semantics of adjtime() is to return old value */
save_adjust = time_adjust;
 
/* If there are input parameters, then process them */
#if 0 /* STA_CLOCKERR is never set yet */
time_status &= ~STA_CLOCKERR; /* reset STA_CLOCKERR */
#endif
if (txc.modes)
{
if (time_state == TIME_ERROR)
time_state = TIME_OK; /* reset error -- why? */
 
if (txc.modes & ADJ_STATUS) /* only set allowed bits */
time_status = (txc.status & ~STA_RONLY) |
(time_status & STA_RONLY);
 
if (txc.modes & ADJ_FREQUENCY) { /* p. 22 */
if (txc.freq > MAXFREQ || txc.freq < -MAXFREQ) {
error = -EINVAL;
goto leave;
}
time_freq = txc.freq - pps_freq;
}
 
if (txc.modes & ADJ_MAXERROR) {
if (txc.maxerror < 0 || txc.maxerror >= NTP_PHASE_LIMIT) {
error = -EINVAL;
goto leave;
}
time_maxerror = txc.maxerror;
}
 
if (txc.modes & ADJ_ESTERROR) {
if (txc.esterror < 0 || txc.esterror >= NTP_PHASE_LIMIT) {
error = -EINVAL;
goto leave;
}
time_esterror = txc.esterror;
}
 
if (txc.modes & ADJ_TIMECONST) { /* p. 24 */
if (txc.constant < 0) { /* NTP v4 uses values > 6 */
error = -EINVAL;
goto leave;
}
time_constant = txc.constant;
}
 
if (txc.modes & ADJ_OFFSET) { /* values checked earlier */
if (txc.modes == ADJ_OFFSET_SINGLESHOT) {
/* adjtime() is independent from ntp_adjtime() */
time_adjust = txc.offset;
}
else if ( time_status & (STA_PLL | STA_PPSTIME) ) {
ltemp = (time_status & (STA_PPSTIME | STA_PPSSIGNAL)) ==
(STA_PPSTIME | STA_PPSSIGNAL) ?
pps_offset : txc.offset;
 
/*
* Scale the phase adjustment and
* clamp to the operating range.
*/
if (ltemp > MAXPHASE)
time_offset = MAXPHASE << SHIFT_UPDATE;
else if (ltemp < -MAXPHASE)
time_offset = -(MAXPHASE << SHIFT_UPDATE);
else
time_offset = ltemp << SHIFT_UPDATE;
 
/*
* Select whether the frequency is to be controlled
* and in which mode (PLL or FLL). Clamp to the operating
* range. Ugly multiply/divide should be replaced someday.
*/
 
if (time_status & STA_FREQHOLD || time_reftime == 0)
time_reftime = xtime.tv_sec;
mtemp = xtime.tv_sec - time_reftime;
time_reftime = xtime.tv_sec;
if (time_status & STA_FLL) {
if (mtemp >= MINSEC) {
ltemp = (time_offset / mtemp) << (SHIFT_USEC -
SHIFT_UPDATE);
if (ltemp < 0)
time_freq -= -ltemp >> SHIFT_KH;
else
time_freq += ltemp >> SHIFT_KH;
} else /* calibration interval too short (p. 12) */
time_state = TIME_ERROR;
} else { /* PLL mode */
if (mtemp < MAXSEC) {
ltemp *= mtemp;
if (ltemp < 0)
time_freq -= -ltemp >> (time_constant +
time_constant +
SHIFT_KF - SHIFT_USEC);
else
time_freq += ltemp >> (time_constant +
time_constant +
SHIFT_KF - SHIFT_USEC);
} else /* calibration interval too long (p. 12) */
time_state = TIME_ERROR;
}
if (time_freq > time_tolerance)
time_freq = time_tolerance;
else if (time_freq < -time_tolerance)
time_freq = -time_tolerance;
} /* STA_PLL || STA_PPSTIME */
} /* txc.modes & ADJ_OFFSET */
if (txc.modes & ADJ_TICK) {
/* if the quartz is off by more than 10% something is
VERY wrong ! */
if (txc.tick < 900000/HZ || txc.tick > 1100000/HZ) {
error = -EINVAL;
goto leave;
}
tick = txc.tick;
}
} /* txc.modes */
leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
|| ((time_status & (STA_PPSFREQ|STA_PPSTIME)) != 0
&& (time_status & STA_PPSSIGNAL) == 0)
/* p. 24, (b) */
|| ((time_status & (STA_PPSTIME|STA_PPSJITTER))
== (STA_PPSTIME|STA_PPSJITTER))
/* p. 24, (c) */
|| ((time_status & STA_PPSFREQ) != 0
&& (time_status & (STA_PPSWANDER|STA_PPSERROR)) != 0))
/* p. 24, (d) */
time_state = TIME_ERROR;
if ((txc.modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
txc.offset = save_adjust;
else {
if (time_offset < 0)
txc.offset = -(-time_offset >> SHIFT_UPDATE);
else
txc.offset = time_offset >> SHIFT_UPDATE;
}
txc.freq = time_freq + pps_freq;
txc.maxerror = time_maxerror;
txc.esterror = time_esterror;
txc.status = time_status;
txc.constant = time_constant;
txc.precision = time_precision;
txc.tolerance = time_tolerance;
do_gettimeofday(&txc.time);
txc.tick = tick;
txc.ppsfreq = pps_freq;
txc.jitter = pps_jitter >> PPS_AVG;
txc.shift = pps_shift;
txc.stabil = pps_stabil;
txc.jitcnt = pps_jitcnt;
txc.calcnt = pps_calcnt;
txc.errcnt = pps_errcnt;
txc.stbcnt = pps_stbcnt;
 
sti();
 
memcpy_tofs(txc_p, &txc, sizeof(struct timex));
return(error < 0 ? error : time_state);
}
/dma.c
0,0 → 1,104
/* $Id: dma.c,v 1.1 2005-12-20 11:46:56 jcastillo Exp $
* linux/kernel/dma.c: A DMA channel allocator. Inspired by linux/kernel/irq.c.
*
* Written by Hennus Bergman, 1992.
*
* 1994/12/26: Changes by Alex Nash to fix a minor bug in /proc/dma.
* In the previous version the reported device could end up being wrong,
* if a device requested a DMA channel that was already in use.
* [It also happened to remove the sizeof(char *) == sizeof(int)
* assumption introduced because of those /proc/dma patches. -- Hennus]
*/
 
#include <linux/kernel.h>
#include <linux/errno.h>
#include <asm/dma.h>
#include <asm/system.h>
 
 
/* A note on resource allocation:
*
* All drivers needing DMA channels, should allocate and release them
* through the public routines `request_dma()' and `free_dma()'.
*
* In order to avoid problems, all processes should allocate resources in
* the same sequence and release them in the reverse order.
*
* So, when allocating DMAs and IRQs, first allocate the IRQ, then the DMA.
* When releasing them, first release the DMA, then release the IRQ.
* If you don't, you may cause allocation requests to fail unnecessarily.
* This doesn't really matter now, but it will once we get real semaphores
* in the kernel.
*/
 
 
 
/* Channel n is busy iff dma_chan_busy[n].lock != 0.
* DMA0 used to be reserved for DRAM refresh, but apparently not any more...
* DMA4 is reserved for cascading.
*/
 
struct dma_chan {
int lock;
const char *device_id;
};
 
static struct dma_chan dma_chan_busy[MAX_DMA_CHANNELS] = {
{ 0, 0 },
#ifdef CONFIG_M5307
{ 0, 0 },
{ 0, 0 },
#endif
#ifndef CONFIG_UCLINUX
{ 0, 0 },
{ 0, 0 },
{ 1, "cascade" },
{ 0, 0 },
{ 0, 0 },
#endif
{ 0, 0 }
};
 
int get_dma_list(char *buf)
{
int i, len = 0;
 
for (i = 0 ; i < MAX_DMA_CHANNELS ; i++) {
if (dma_chan_busy[i].lock) {
len += sprintf(buf+len, "%2d: %s\n",
i,
dma_chan_busy[i].device_id);
}
}
return len;
} /* get_dma_list */
 
 
int request_dma(unsigned int dmanr, const char * device_id)
{
if (dmanr >= MAX_DMA_CHANNELS)
return -EINVAL;
 
if (xchg(&dma_chan_busy[dmanr].lock, 1) != 0)
return -EBUSY;
 
dma_chan_busy[dmanr].device_id = device_id;
 
/* old flag was 0, now contains 1 to indicate busy */
return 0;
} /* request_dma */
 
 
void free_dma(unsigned int dmanr)
{
if (dmanr >= MAX_DMA_CHANNELS) {
printk("Trying to free DMA%d\n", dmanr);
return;
}
 
if (xchg(&dma_chan_busy[dmanr].lock, 0) == 0) {
printk("Trying to free free DMA%d\n", dmanr);
return;
}
 
} /* free_dma */
/fork.c
0,0 → 1,414
/*
* linux/kernel/fork.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*/
 
/*
* 'fork.c' contains the help-routines for the 'fork' system call
* (see also system_call.s).
* Fork is rather simple, once you get the hang of it, but the memory
* management can be a bitch. See 'mm/mm.c': 'copy_page_tables()'
*/
 
/*
* uClinux revisions for NO_MM
* Copyright (C) 1998 Kenneth Albanowski <kjahds@kjahds.com>,
* The Silver Hammer Group, Ltd.
*/
 
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
#include <linux/malloc.h>
#include <linux/ldt.h>
#include <linux/smp.h>
 
#include <asm/segment.h>
#include <asm/system.h>
#include <asm/pgtable.h>
 
int nr_tasks=1;
int nr_running=1;
unsigned long int total_forks=0; /* Handle normal Linux uptimes. */
int last_pid=0;
 
static inline int find_empty_process(void)
{
int i;
 
if (nr_tasks >= NR_TASKS - MIN_TASKS_LEFT_FOR_ROOT) {
if (current->uid)
return -EAGAIN;
}
if (current->uid) {
long max_tasks = current->rlim[RLIMIT_NPROC].rlim_cur;
 
max_tasks--; /* count the new process.. */
if (max_tasks < nr_tasks) {
struct task_struct *p;
for_each_task (p) {
if (p->uid == current->uid)
if (--max_tasks < 0)
return -EAGAIN;
}
}
}
for (i = 0 ; i < NR_TASKS ; i++) {
if (!task[i])
return i;
}
return -EAGAIN;
}
 
static int get_pid(unsigned long flags)
{
struct task_struct *p;
 
if (flags & CLONE_PID)
return current->pid;
repeat:
if ((++last_pid) & 0xffff8000)
last_pid=1;
for_each_task (p) {
if (p->pid == last_pid ||
p->pgrp == last_pid ||
p->session == last_pid)
goto repeat;
}
return last_pid;
}
 
#ifndef NO_MM
 
static inline int dup_mmap(struct mm_struct * mm)
{
struct vm_area_struct * mpnt, **p, *tmp;
 
mm->mmap = NULL;
p = &mm->mmap;
for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
tmp = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
if (!tmp) {
/* exit_mmap is called by the caller */
return -ENOMEM;
}
*tmp = *mpnt;
tmp->vm_flags &= ~VM_LOCKED;
tmp->vm_mm = mm;
tmp->vm_next = NULL;
if (tmp->vm_inode) {
tmp->vm_inode->i_count++;
/* insert tmp into the share list, just after mpnt */
tmp->vm_next_share->vm_prev_share = tmp;
mpnt->vm_next_share = tmp;
tmp->vm_prev_share = mpnt;
}
if (copy_page_range(mm, current->mm, tmp)) {
/* link into the linked list for exit_mmap */
*p = tmp;
p = &tmp->vm_next;
/* exit_mmap is called by the caller */
return -ENOMEM;
}
if (tmp->vm_ops && tmp->vm_ops->open)
tmp->vm_ops->open(tmp);
*p = tmp;
p = &tmp->vm_next;
}
build_mmap_avl(mm);
flush_tlb_mm(current->mm);
return 0;
}
 
static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
{
if (!(clone_flags & CLONE_VM)) {
struct mm_struct * mm = kmalloc(sizeof(*tsk->mm), GFP_KERNEL);
if (!mm)
return -ENOMEM;
*mm = *current->mm;
mm->count = 1;
mm->def_flags = 0;
mm->mmap_sem = MUTEX;
tsk->mm = mm;
tsk->min_flt = tsk->maj_flt = 0;
tsk->cmin_flt = tsk->cmaj_flt = 0;
tsk->nswap = tsk->cnswap = 0;
if (new_page_tables(tsk)) {
tsk->mm = NULL;
exit_mmap(mm);
goto free_mm;
}
down(&mm->mmap_sem);
if (dup_mmap(mm)) {
up(&mm->mmap_sem);
tsk->mm = NULL;
exit_mmap(mm);
free_page_tables(mm);
free_mm:
kfree(mm);
return -ENOMEM;
}
up(&mm->mmap_sem);
return 0;
}
SET_PAGE_DIR(tsk, current->mm->pgd);
current->mm->count++;
return 0;
}
#else /* NO_MM */
 
static inline int dup_mmap(struct mm_struct * mm)
{
struct mm_tblock_struct * tmp = &current->mm->tblock;
struct mm_tblock_struct * newtmp = &mm->tblock;
/*unsigned long flags;*/
extern long realalloc, askedalloc;
if (!mm)
return -1;
mm->tblock.rblock = 0;
mm->tblock.next = 0;
/*save_flags(flags); cli();*/
while((tmp = tmp->next)) {
newtmp->next = kmalloc(sizeof(struct mm_tblock_struct), GFP_KERNEL);
if (!newtmp->next) {
/*restore_flags(flags);*/
return -ENOMEM;
}
realalloc += ksize(newtmp->next);
askedalloc += sizeof(struct mm_tblock_struct);
newtmp->next->rblock = tmp->rblock;
if (tmp->rblock)
tmp->rblock->refcount++;
newtmp->next->next = 0;
newtmp = newtmp->next;
}
/*restore_flags(flags);*/
 
return 0;
 
}
 
static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
{
if (!(clone_flags & CLONE_VM)) {
struct mm_struct * mm = kmalloc(sizeof(*tsk->mm), GFP_KERNEL);
if (!mm)
return -ENOMEM;
*mm = *current->mm;
mm->count = 1;
mm->def_flags = 0;
mm->vforkwait = 0;
tsk->mm = mm;
if (tsk->mm->executable)
tsk->mm->executable->i_count++;
tsk->min_flt = tsk->maj_flt = 0;
tsk->cmin_flt = tsk->cmaj_flt = 0;
tsk->nswap = tsk->cnswap = 0;
if (dup_mmap(mm)) {
tsk->mm = NULL;
exit_mmap(mm);
kfree(mm);
return -ENOMEM;
}
return 0;
}
current->mm->count++;
return 0;
}
 
#endif /* NO_MM */
 
static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
{
if (clone_flags & CLONE_FS) {
current->fs->count++;
return 0;
}
tsk->fs = kmalloc(sizeof(*tsk->fs), GFP_KERNEL);
if (!tsk->fs)
return -1;
tsk->fs->count = 1;
tsk->fs->umask = current->fs->umask;
if ((tsk->fs->root = current->fs->root))
tsk->fs->root->i_count++;
if ((tsk->fs->pwd = current->fs->pwd))
tsk->fs->pwd->i_count++;
return 0;
}
 
static inline int copy_files(unsigned long clone_flags, struct task_struct * tsk)
{
int i;
struct files_struct *oldf, *newf;
struct file **old_fds, **new_fds;
 
oldf = current->files;
if (clone_flags & CLONE_FILES) {
oldf->count++;
return 0;
}
 
newf = kmalloc(sizeof(*newf), GFP_KERNEL);
tsk->files = newf;
if (!newf)
return -1;
newf->count = 1;
newf->close_on_exec = oldf->close_on_exec;
newf->open_fds = oldf->open_fds;
 
old_fds = oldf->fd;
new_fds = newf->fd;
for (i = NR_OPEN; i != 0; i--) {
struct file * f = *old_fds;
old_fds++;
*new_fds = f;
new_fds++;
if (f)
f->f_count++;
}
return 0;
}
 
static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
{
if (clone_flags & CLONE_SIGHAND) {
current->sig->count++;
return 0;
}
tsk->sig = kmalloc(sizeof(*tsk->sig), GFP_KERNEL);
if (!tsk->sig)
return -1;
tsk->sig->count = 1;
memcpy(tsk->sig->action, current->sig->action, sizeof(tsk->sig->action));
return 0;
}
 
/*
* Ok, this is the main fork-routine. It copies the system process
* information (task[nr]) and sets up the necessary registers. It
* also copies the data segment in its entirety.
*/
int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
{
int nr;
int i;
int error = -ENOMEM;
unsigned long new_stack;
struct task_struct *p;
 
p = (struct task_struct *) kmalloc(sizeof(*p), GFP_KERNEL);
if (!p)
goto bad_fork;
 
new_stack = alloc_kernel_stack();
if (!new_stack)
goto bad_fork_free_p;
 
error = -EAGAIN;
nr = find_empty_process();
if (nr < 0)
goto bad_fork_free_stack;
 
*p = *current;
 
if (p->exec_domain && p->exec_domain->use_count)
(*p->exec_domain->use_count)++;
if (p->binfmt && p->binfmt->use_count)
(*p->binfmt->use_count)++;
 
p->did_exec = 0;
p->swappable = 0;
p->kernel_stack_page = new_stack;
*(unsigned long *) p->kernel_stack_page = STACK_MAGIC;
for(i=1;i<(PAGE_SIZE/sizeof(long));i++)
((unsigned long*)p->kernel_stack_page)[i] = STACK_UNTOUCHED_MAGIC;
p->state = TASK_UNINTERRUPTIBLE;
p->flags &= ~(PF_PTRACED|PF_TRACESYS|PF_SUPERPRIV);
p->flags |= PF_FORKNOEXEC;
p->pid = get_pid(clone_flags);
p->next_run = NULL;
p->prev_run = NULL;
p->p_pptr = p->p_opptr = current;
p->p_cptr = NULL;
init_waitqueue(&p->wait_chldexit);
p->signal = 0;
p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
init_timer(&p->real_timer);
p->real_timer.data = (unsigned long) p;
p->leader = 0; /* session leadership doesn't inherit */
p->tty_old_pgrp = 0;
p->utime = p->stime = 0;
p->cutime = p->cstime = 0;
#ifdef __SMP__
p->processor = NO_PROC_ID;
p->lock_depth = 1;
#endif
p->start_time = jiffies;
task[nr] = p;
SET_LINKS(p);
nr_tasks++;
 
error = -ENOMEM;
/* copy all the process information */
if (copy_files(clone_flags, p))
goto bad_fork_cleanup;
 
if (copy_fs(clone_flags, p))
goto bad_fork_cleanup_files;
 
if (copy_sighand(clone_flags, p))
goto bad_fork_cleanup_fs;
 
if (copy_mm(clone_flags, p))
goto bad_fork_cleanup_sighand;
 
copy_thread(nr, clone_flags, usp, p, regs);
p->semundo = NULL;
 
/* ok, now we should be set up.. */
p->swappable = 1;
p->exit_signal = clone_flags & CSIGNAL;
p->counter = (current->counter >>= 1);
wake_up_process(p); /* do this last, just in case */
++total_forks;
#ifdef NO_MM
if (clone_flags & CLONE_WAIT) {
sleep_on(&current->mm->vforkwait);
}
#endif /*NO_MM*/
 
return p->pid;
 
bad_fork_cleanup_sighand:
exit_sighand(p);
bad_fork_cleanup_fs:
exit_fs(p);
bad_fork_cleanup_files:
exit_files(p);
bad_fork_cleanup:
if (p->exec_domain && p->exec_domain->use_count)
(*p->exec_domain->use_count)--;
if (p->binfmt && p->binfmt->use_count)
(*p->binfmt->use_count)--;
task[nr] = NULL;
REMOVE_LINKS(p);
nr_tasks--;
bad_fork_free_stack:
free_kernel_stack(new_stack);
bad_fork_free_p:
kfree(p);
bad_fork:
return error;
}
/ksyms.c
0,0 → 1,402
/*
* Herein lies all the functions/variables that are "exported" for linkage
* with dynamically loaded kernel modules.
* Jon.
*
* - Stacked module support and unified symbol table added (June 1994)
* - External symbol table support added (December 1994)
* - Versions on symbols added (December 1994)
* by Bjorn Ekwall <bj0rn@blox.se>
*/
 
#include <linux/module.h>
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/smp.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/sched.h>
#include <linux/kernel_stat.h>
#include <linux/mm.h>
#include <linux/malloc.h>
#include <linux/ptrace.h>
#include <linux/sys.h>
#include <linux/utsname.h>
#include <linux/interrupt.h>
#include <linux/ioport.h>
#include <linux/timer.h>
#include <linux/binfmts.h>
#include <linux/personality.h>
#include <linux/termios.h>
#include <linux/tqueue.h>
#include <linux/tty.h>
#include <linux/serial.h>
#include <linux/locks.h>
#include <linux/string.h>
#include <linux/delay.h>
#include <linux/sem.h>
#include <linux/minix_fs.h>
#include <linux/ext2_fs.h>
#include <linux/random.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/sysctl.h>
#include <linux/hdreg.h>
#include <linux/skbuff.h>
#include <linux/genhd.h>
#include <linux/swap.h>
#include <linux/ctype.h>
#include <linux/file.h>
 
extern unsigned char aux_device_present, kbd_read_mask;
 
#ifdef CONFIG_PCI
#include <linux/bios32.h>
#include <linux/pci.h>
#endif
#if defined(CONFIG_PROC_FS)
#include <linux/proc_fs.h>
#endif
#ifdef CONFIG_KERNELD
#include <linux/kerneld.h>
#endif
#include <asm/irq.h>
#ifdef __SMP__
#include <linux/smp.h>
#endif
 
extern char *get_options(char *str, int *ints);
extern void set_device_ro(kdev_t dev,int flag);
extern struct file_operations * get_blkfops(unsigned int);
extern void blkdev_release(struct inode * inode);
 
extern void *sys_call_table;
 
extern struct timezone sys_tz;
extern int request_dma(unsigned int dmanr, char * deviceID);
extern void free_dma(unsigned int dmanr);
 
extern void hard_reset_now(void);
 
extern void select_free_wait(select_table * p);
extern int select_check(int flag, select_table * wait, struct file * file);
 
struct symbol_table symbol_table = {
#include <linux/symtab_begin.h>
#ifdef MODVERSIONS
{ (void *)1 /* Version version :-) */,
SYMBOL_NAME_STR (Using_Versions) },
#endif
 
/* stackable module support */
X(register_symtab_from),
X(get_module_symbol),
#ifdef CONFIG_KERNELD
X(kerneld_send),
#endif
X(get_options),
 
/* system info variables */
/* These check that they aren't defines (0/1) */
#ifndef EISA_bus__is_a_macro
X(EISA_bus),
#endif
#ifndef MCA_bus__is_a_macro
X(MCA_bus),
#endif
#ifndef wp_works_ok__is_a_macro
X(wp_works_ok),
#endif
 
#ifdef CONFIG_PCI
/* PCI BIOS support */
X(pcibios_present),
X(pcibios_find_class),
X(pcibios_find_device),
X(pcibios_read_config_byte),
X(pcibios_read_config_word),
X(pcibios_read_config_dword),
X(pcibios_strerror),
X(pcibios_write_config_byte),
X(pcibios_write_config_word),
X(pcibios_write_config_dword),
#endif
 
/* process memory management */
X(verify_area),
X(do_mmap),
X(do_munmap),
X(exit_mm),
 
/* internal kernel memory management */
X(__get_free_pages),
X(free_pages),
X(kmalloc),
X(kfree),
X(vmalloc),
X(vremap),
X(vfree),
X(mem_map),
X(remap_page_range),
X(high_memory),
X(update_vm_cache),
 
/* filesystem internal functions */
X(getname),
X(putname),
X(__iget),
X(iput),
X(namei),
X(lnamei),
X(open_namei),
X(sys_close),
X(close_fp),
X(check_disk_change),
X(invalidate_buffers),
X(invalidate_inodes),
X(invalidate_inode_pages),
X(fsync_dev),
X(permission),
X(inode_setattr),
X(inode_change_ok),
X(set_blocksize),
X(getblk),
X(bread),
X(breada),
 
X(select_check),
X(select_free_wait),
 
X(__brelse),
X(__bforget),
X(ll_rw_block),
X(brw_page),
X(__wait_on_buffer),
X(mark_buffer_uptodate),
X(unlock_buffer),
X(dcache_lookup),
X(dcache_add),
X(add_blkdev_randomness),
X(generic_file_read),
X(generic_file_mmap),
X(generic_readpage),
X(__fput),
X(make_bad_inode),
 
/* device registration */
X(register_chrdev),
X(unregister_chrdev),
X(register_blkdev),
X(unregister_blkdev),
X(tty_register_driver),
X(tty_unregister_driver),
X(tty_std_termios),
 
/* block device driver support */
X(block_read),
X(block_write),
X(block_fsync),
X(wait_for_request),
X(blksize_size),
X(hardsect_size),
X(blk_size),
X(blk_dev),
X(max_sectors),
X(max_segments),
X(is_read_only),
X(set_device_ro),
X(bmap),
X(sync_dev),
X(get_blkfops),
X(blkdev_open),
X(blkdev_release),
X(gendisk_head),
X(resetup_one_dev),
X(unplug_device),
X(make_request),
X(tq_disk),
 
#ifdef CONFIG_SERIAL
/* Module creation of serial units */
X(register_serial),
X(unregister_serial),
#endif
/* tty routines */
X(tty_hangup),
X(tty_wait_until_sent),
X(tty_check_change),
X(tty_hung_up_p),
X(do_SAK),
X(console_print),
 
/* filesystem registration */
X(register_filesystem),
X(unregister_filesystem),
 
/* executable format registration */
X(register_binfmt),
X(unregister_binfmt),
X(search_binary_handler),
X(prepare_binprm),
X(remove_arg_zero),
 
/* execution environment registration */
X(lookup_exec_domain),
X(register_exec_domain),
X(unregister_exec_domain),
 
/* sysctl table registration */
X(register_sysctl_table),
X(unregister_sysctl_table),
X(sysctl_string),
X(sysctl_intvec),
X(proc_dostring),
X(proc_dointvec),
X(proc_dointvec_minmax),
 
/* interrupt handling */
X(request_irq),
X(free_irq),
X(enable_irq),
X(disable_irq),
X(probe_irq_on),
X(probe_irq_off),
X(bh_active),
X(bh_mask),
X(bh_mask_count),
X(bh_base),
X(add_timer),
X(del_timer),
X(tq_timer),
X(tq_immediate),
X(tq_scheduler),
X(timer_active),
X(timer_table),
X(intr_count),
 
/* autoirq from drivers/net/auto_irq.c */
#ifdef CONFIG_NET
X(autoirq_setup),
X(autoirq_report),
#endif
 
/* dma handling */
X(request_dma),
X(free_dma),
#ifdef HAVE_DISABLE_HLT
X(disable_hlt),
X(enable_hlt),
#endif
 
/* IO port handling */
X(check_region),
X(request_region),
X(release_region),
 
/* process management */
X(wake_up),
X(wake_up_interruptible),
X(sleep_on),
X(interruptible_sleep_on),
X(schedule),
X(current_set),
X(jiffies),
X(xtime),
X(do_gettimeofday),
X(loops_per_sec),
X(need_resched),
X(kstat),
X(kill_proc),
X(kill_pg),
X(kill_sl),
X(force_sig),
 
/* misc */
X(panic),
X(printk),
X(sprintf),
X(vsprintf),
X(kdevname),
X(simple_strtoul),
X(system_utsname),
X(sys_call_table),
X(hard_reset_now),
X(_ctype),
X(_ctmp),
X(get_random_bytes),
 
/* Signal interfaces */
X(send_sig),
 
/* Program loader interfaces */
X(setup_arg_pages),
X(copy_strings),
X(do_execve),
X(flush_old_exec),
X(open_inode),
X(read_exec),
 
/* Miscellaneous access points */
X(si_meminfo),
 
/* Added to make file system as module */
X(set_writetime),
X(sys_tz),
X(__wait_on_super),
X(file_fsync),
X(clear_inode),
X(refile_buffer),
X(nr_async_pages),
X(___strtok),
X(init_fifo),
X(super_blocks),
X(fifo_inode_operations),
X(chrdev_inode_operations),
X(blkdev_inode_operations),
X(read_ahead),
X(get_hash_table),
X(get_empty_inode),
X(insert_inode_hash),
X(event),
X(__down),
X(__up),
X(securelevel),
/* all busmice */
X(add_mouse_randomness),
X(fasync_helper),
#ifndef __mc68000__
/* psaux mouse */
X(aux_device_present),
X(kbd_read_mask),
#endif
 
#ifdef CONFIG_BLK_DEV_IDE_PCMCIA
X(ide_register),
X(ide_unregister),
#endif
 
#ifdef CONFIG_BLK_DEV_MD
X(disk_name), /* for md.c */
#endif
/* binfmt_aout */
X(get_write_access),
X(put_write_access),
 
#ifdef CONFIG_PROC_FS
X(proc_dir_inode_operations),
#endif
 
/* Modular sound */
X(sys_open),
X(sys_read),
/********************************************************
* Do not add anything below this line,
* as the stacked modules depend on this!
*/
#include <linux/symtab_end.h>
};
 
/*
int symbol_table_size = sizeof (symbol_table) / sizeof (symbol_table[0]);
*/
/printk.c
0,0 → 1,276
/*
* linux/kernel/printk.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
* Modified to make sys_syslog() more flexible: added commands to
* return the last 4k of kernel messages, regardless of whether
* they've been read or not. Added option to suppress kernel printk's
* to the console. Added hook for sending the console messages
* elsewhere, in preparation for a serial line console (someday).
* Ted Ts'o, 2/11/93.
*/
 
#include <stdarg.h>
 
#include <asm/segment.h>
#include <asm/system.h>
 
#include <linux/config.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/tty.h>
#include <linux/tty_driver.h>
 
#if defined(CONFIG_REDUCED_MEMORY) && !defined(CONFIG_DUMPTOFLASH)
#define LOG_BUF_LEN 1024 /* Originally: 8192 */
#else /* !CONFIG_REDUCED_MEMORY */
#define LOG_BUF_LEN 8192
#endif /* !CONFIG_REDUCED_MEMORY */
 
static char buf[1024];
 
extern void console_print(const char *);
 
/* printk's without a loglevel use this.. */
#define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */
 
/* We show everything that is MORE important than this.. */
#define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */
#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */
 
unsigned long log_size = 0;
struct wait_queue * log_wait = NULL;
int console_loglevel = 8;/*DEFAULT_CONSOLE_LOGLEVEL;*/
 
static void (*console_print_proc)(const char *) = 0;
static char log_buf[LOG_BUF_LEN];
static unsigned long log_start = 0;
static unsigned long logged_chars = 0;
 
/*
* Commands to sys_syslog:
*
* 0 -- Close the log. Currently a NOP.
* 1 -- Open the log. Currently a NOP.
* 2 -- Read from the log.
* 3 -- Read up to the last 4k of messages in the ring buffer.
* 4 -- Read and clear last 4k of messages in the ring buffer
* 5 -- Clear ring buffer.
* 6 -- Disable printk's to console
* 7 -- Enable printk's to console
* 8 -- Set level of messages printed to console
*/
asmlinkage int sys_syslog(int type, char * buf, int len)
{
unsigned long i, j, count;
int do_clear = 0;
char c;
int error;
 
if ((type != 3) && !suser())
return -EPERM;
switch (type) {
case 0: /* Close log */
return 0;
case 1: /* Open log */
return 0;
case 2: /* Read from log */
if (!buf || len < 0)
return -EINVAL;
if (!len)
return 0;
error = verify_area(VERIFY_WRITE,buf,len);
if (error)
return error;
cli();
while (!log_size) {
if (current->signal & ~current->blocked) {
sti();
return -ERESTARTSYS;
}
interruptible_sleep_on(&log_wait);
}
i = 0;
while (log_size && i < len) {
c = *((char *) log_buf+log_start);
log_start++;
log_size--;
log_start &= LOG_BUF_LEN-1;
sti();
put_user(c,buf);
buf++;
i++;
cli();
}
sti();
return i;
case 4: /* Read/clear last kernel messages */
do_clear = 1;
/* FALL THRU */
case 3: /* Read last kernel messages */
if (!buf || len < 0)
return -EINVAL;
if (!len)
return 0;
error = verify_area(VERIFY_WRITE,buf,len);
if (error)
return error;
count = len;
if (count > LOG_BUF_LEN)
count = LOG_BUF_LEN;
if (count > logged_chars)
count = logged_chars;
j = log_start + log_size - count;
for (i = 0; i < count; i++) {
c = *((char *) log_buf+(j++ & (LOG_BUF_LEN-1)));
put_user(c, buf++);
}
if (do_clear)
logged_chars = 0;
return i;
case 5: /* Clear ring buffer */
logged_chars = 0;
return 0;
case 6: /* Disable logging to console */
console_loglevel = MINIMUM_CONSOLE_LOGLEVEL;
return 0;
case 7: /* Enable logging to console */
console_loglevel = DEFAULT_CONSOLE_LOGLEVEL;
return 0;
case 8:
if (len < 1 || len > 8)
return -EINVAL;
if (len < MINIMUM_CONSOLE_LOGLEVEL)
len = MINIMUM_CONSOLE_LOGLEVEL;
console_loglevel = len;
return 0;
}
return -EINVAL;
}
 
asmlinkage int printk(const char *fmt, ...)
{
va_list args;
int i;
char *msg, *p, *buf_end;
static char msg_level = -1;
long flags;
 
save_flags(flags);
cli();
va_start(args, fmt);
 
i = vsprintf(buf + 3, fmt, args); /* hopefully i < sizeof(buf)-4 */
buf_end = buf + 3 + i;
va_end(args);
 
for (p = buf + 3; p < buf_end; p++) {
msg = p;
if (msg_level < 0) {
if (
p[0] != '<' ||
p[1] < '0' ||
p[1] > '7' ||
p[2] != '>'
) {
p -= 3;
p[0] = '<';
p[1] = DEFAULT_MESSAGE_LOGLEVEL + '0';
p[2] = '>';
} else
msg += 3;
msg_level = p[1] - '0';
}
for (; p < buf_end; p++) {
log_buf[(log_start+log_size) & (LOG_BUF_LEN-1)] = *p;
if (log_size < LOG_BUF_LEN)
log_size++;
else {
log_start++;
log_start &= LOG_BUF_LEN-1;
}
logged_chars++;
if (*p == '\n')
break;
}
 
if (msg_level < console_loglevel && console_print_proc) {
char tmp = p[1];
p[1] = '\0';
(*console_print_proc)(msg);
p[1] = tmp;
}
if (*p == '\n')
msg_level = -1;
}
restore_flags(flags);
wake_up_interruptible(&log_wait);
return i;
}
 
/*
* The console driver calls this routine during kernel initialization
* to register the console printing procedure with printk() and to
* print any messages that were printed by the kernel before the
* console driver was initialized.
*/
void register_console(void (*proc)(const char *))
{
int i,j;
int p = log_start;
char buf[16];
char msg_level = -1;
char *q;
 
console_print_proc = proc;
 
for (i=0,j=0; i < log_size; i++) {
buf[j++] = log_buf[p];
p++; p &= LOG_BUF_LEN-1;
if (buf[j-1] != '\n' && i < log_size - 1 && j < sizeof(buf)-1)
continue;
buf[j] = 0;
q = buf;
if (msg_level < 0) {
msg_level = buf[1] - '0';
q = buf + 3;
}
if (msg_level < console_loglevel)
(*proc)(q);
if (buf[j-1] == '\n')
msg_level = -1;
j = 0;
}
}
 
/*
* Return log buffer address and size.
*/
unsigned long sys_getlog(char **bp)
{
*bp = &log_buf[0];
return(log_size);
}
 
void sys_resetlog(void)
{
log_start = 0;
log_size = 0;
logged_chars = 0;
}
 
/*
* Write a message to a certain tty, not just the console. This is used for
* messages that need to be redirected to a specific tty.
* We don't put it into the syslog queue right now maybe in the future if
* really needed.
*/
void tty_write_message(struct tty_struct *tty, char *msg)
{
if (tty && tty->driver.write)
tty->driver.write(tty, 0, msg, strlen(msg));
return;
}
/exit.c
0,0 → 1,763
/*
* linux/kernel/exit.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*/
 
/*
* uClinux revisions for NO_MM
* Copyright (C) 1998 Kenneth Albanowski <kjahds@kjahds.com>,
* The Silver Hammer Group, Ltd.
*/
 
#undef DEBUG_PROC_TREE
 
#include <linux/wait.h>
#include <linux/errno.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/resource.h>
#include <linux/mm.h>
#include <linux/tty.h>
#include <linux/malloc.h>
#include <linux/interrupt.h>
 
#include <asm/segment.h>
#include <asm/pgtable.h>
 
extern void sem_exit (void);
extern int acct_process (long exitcode);
extern void kerneld_exit(void);
 
int getrusage(struct task_struct *, int, struct rusage *);
 
static inline void generate(unsigned long sig, struct task_struct * p)
{
unsigned long flags;
unsigned long mask = 1 << (sig-1);
struct sigaction * sa = sig + p->sig->action - 1;
 
/*
* Optimize away the signal, if it's a signal that can
* be handled immediately (ie non-blocked and untraced)
* and that is ignored (either explicitly or by default)
*/
save_flags(flags); cli();
if (!(mask & p->blocked) && !(p->flags & PF_PTRACED)) {
/* don't bother with ignored signals (but SIGCHLD is special) */
if (sa->sa_handler == SIG_IGN && sig != SIGCHLD) {
restore_flags(flags);
return;
}
/* some signals are ignored by default.. (but SIGCONT already did its deed) */
if ((sa->sa_handler == SIG_DFL) &&
(sig == SIGCONT || sig == SIGCHLD || sig == SIGWINCH || sig == SIGURG)) {
restore_flags(flags);
return;
}
}
p->signal |= mask;
if (p->state == TASK_INTERRUPTIBLE && (p->signal & ~p->blocked))
wake_up_process(p);
restore_flags(flags);
}
 
/*
* Force a signal that the process can't ignore: if necessary
* we unblock the signal and change any SIG_IGN to SIG_DFL.
*/
void force_sig(unsigned long sig, struct task_struct * p)
{
sig--;
if (p->sig) {
unsigned long flags;
unsigned long mask = 1UL << sig;
struct sigaction *sa = p->sig->action + sig;
 
save_flags(flags); cli();
p->signal |= mask;
p->blocked &= ~mask;
if (sa->sa_handler == SIG_IGN)
sa->sa_handler = SIG_DFL;
if (p->state == TASK_INTERRUPTIBLE)
wake_up_process(p);
restore_flags(flags);
}
}
 
int send_sig(unsigned long sig,struct task_struct * p,int priv)
{
unsigned long flags;
 
if (!p || sig > 32)
return -EINVAL;
if (!priv && ((sig != SIGCONT) || (current->session != p->session)) &&
(current->euid ^ p->suid) && (current->euid ^ p->uid) &&
(current->uid ^ p->suid) && (current->uid ^ p->uid) &&
!suser())
return -EPERM;
if (!sig)
return 0;
/*
* Forget it if the process is already zombie'd.
*/
if (!p->sig)
return 0;
save_flags(flags); cli();
if ((sig == SIGKILL) || (sig == SIGCONT)) {
if (p->state == TASK_STOPPED)
wake_up_process(p);
p->exit_code = 0;
p->signal &= ~( (1<<(SIGSTOP-1)) | (1<<(SIGTSTP-1)) |
(1<<(SIGTTIN-1)) | (1<<(SIGTTOU-1)) );
}
if (sig == SIGSTOP || sig == SIGTSTP || sig == SIGTTIN || sig == SIGTTOU)
p->signal &= ~(1<<(SIGCONT-1));
restore_flags(flags);
 
/* Actually generate the signal */
generate(sig,p);
return 0;
}
 
void notify_parent(struct task_struct * tsk, int signal)
{
send_sig(signal, tsk->p_pptr, 1);
wake_up_interruptible(&tsk->p_pptr->wait_chldexit);
}
 
void release(struct task_struct * p)
{
int i;
 
if (!p)
return;
if (p == current) {
printk("task releasing itself\n");
return;
}
for (i=1 ; i<NR_TASKS ; i++)
if (task[i] == p) {
nr_tasks--;
task[i] = NULL;
REMOVE_LINKS(p);
release_thread(p);
if (STACK_MAGIC != *(unsigned long *)p->kernel_stack_page)
printk(KERN_ALERT "release: %s kernel stack corruption. Aiee\n", p->comm);
free_kernel_stack(p->kernel_stack_page);
current->cmin_flt += p->min_flt + p->cmin_flt;
current->cmaj_flt += p->maj_flt + p->cmaj_flt;
current->cnswap += p->nswap + p->cnswap;
kfree(p);
return;
}
panic("trying to release non-existent task");
}
 
#ifdef DEBUG_PROC_TREE
/*
* Check to see if a task_struct pointer is present in the task[] array
* Return 0 if found, and 1 if not found.
*/
int bad_task_ptr(struct task_struct *p)
{
int i;
 
if (!p)
return 0;
for (i=0 ; i<NR_TASKS ; i++)
if (task[i] == p)
return 0;
return 1;
}
/*
* This routine scans the pid tree and makes sure the rep invariant still
* holds. Used for debugging only, since it's very slow....
*
* It looks a lot scarier than it really is.... we're doing nothing more
* than verifying the doubly-linked list found in p_ysptr and p_osptr,
* and checking it corresponds with the process tree defined by p_cptr and
* p_pptr;
*/
void audit_ptree(void)
{
int i;
 
for (i=1 ; i<NR_TASKS ; i++) {
if (!task[i])
continue;
if (bad_task_ptr(task[i]->p_pptr))
printk("Warning, pid %d's parent link is bad\n",
task[i]->pid);
if (bad_task_ptr(task[i]->p_cptr))
printk("Warning, pid %d's child link is bad\n",
task[i]->pid);
if (bad_task_ptr(task[i]->p_ysptr))
printk("Warning, pid %d's ys link is bad\n",
task[i]->pid);
if (bad_task_ptr(task[i]->p_osptr))
printk("Warning, pid %d's os link is bad\n",
task[i]->pid);
if (task[i]->p_pptr == task[i])
printk("Warning, pid %d parent link points to self\n",
task[i]->pid);
if (task[i]->p_cptr == task[i])
printk("Warning, pid %d child link points to self\n",
task[i]->pid);
if (task[i]->p_ysptr == task[i])
printk("Warning, pid %d ys link points to self\n",
task[i]->pid);
if (task[i]->p_osptr == task[i])
printk("Warning, pid %d os link points to self\n",
task[i]->pid);
if (task[i]->p_osptr) {
if (task[i]->p_pptr != task[i]->p_osptr->p_pptr)
printk(
"Warning, pid %d older sibling %d parent is %d\n",
task[i]->pid, task[i]->p_osptr->pid,
task[i]->p_osptr->p_pptr->pid);
if (task[i]->p_osptr->p_ysptr != task[i])
printk(
"Warning, pid %d older sibling %d has mismatched ys link\n",
task[i]->pid, task[i]->p_osptr->pid);
}
if (task[i]->p_ysptr) {
if (task[i]->p_pptr != task[i]->p_ysptr->p_pptr)
printk(
"Warning, pid %d younger sibling %d parent is %d\n",
task[i]->pid, task[i]->p_osptr->pid,
task[i]->p_osptr->p_pptr->pid);
if (task[i]->p_ysptr->p_osptr != task[i])
printk(
"Warning, pid %d younger sibling %d has mismatched os link\n",
task[i]->pid, task[i]->p_ysptr->pid);
}
if (task[i]->p_cptr) {
if (task[i]->p_cptr->p_pptr != task[i])
printk(
"Warning, pid %d youngest child %d has mismatched parent link\n",
task[i]->pid, task[i]->p_cptr->pid);
if (task[i]->p_cptr->p_ysptr)
printk(
"Warning, pid %d youngest child %d has non-NULL ys link\n",
task[i]->pid, task[i]->p_cptr->pid);
}
}
}
#endif /* DEBUG_PROC_TREE */
 
/*
* This checks not only the pgrp, but falls back on the pid if no
* satisfactory pgrp is found. I dunno - gdb doesn't work correctly
* without this...
*/
int session_of_pgrp(int pgrp)
{
struct task_struct *p;
int fallback;
 
fallback = -1;
for_each_task(p) {
if (p->session <= 0)
continue;
if (p->pgrp == pgrp)
return p->session;
if (p->pid == pgrp)
fallback = p->session;
}
return fallback;
}
 
/*
* kill_pg() sends a signal to a process group: this is what the tty
* control characters do (^C, ^Z etc)
*/
int kill_pg(int pgrp, int sig, int priv)
{
struct task_struct *p;
int err,retval = -ESRCH;
int found = 0;
 
if (sig<0 || sig>32 || pgrp<=0)
return -EINVAL;
for_each_task(p) {
if (p->pgrp == pgrp) {
if ((err = send_sig(sig,p,priv)) != 0)
retval = err;
else
found++;
}
}
return(found ? 0 : retval);
}
 
/*
* kill_sl() sends a signal to the session leader: this is used
* to send SIGHUP to the controlling process of a terminal when
* the connection is lost.
*/
int kill_sl(int sess, int sig, int priv)
{
struct task_struct *p;
int err,retval = -ESRCH;
int found = 0;
 
if (sig<0 || sig>32 || sess<=0)
return -EINVAL;
for_each_task(p) {
if (p->session == sess && p->leader) {
if ((err = send_sig(sig,p,priv)) != 0)
retval = err;
else
found++;
}
}
return(found ? 0 : retval);
}
 
int kill_proc(int pid, int sig, int priv)
{
struct task_struct *p;
 
if (sig<0 || sig>32)
return -EINVAL;
for_each_task(p) {
if (p && p->pid == pid)
return send_sig(sig,p,priv);
}
return(-ESRCH);
}
 
/*
* POSIX specifies that kill(-1,sig) is unspecified, but what we have
* is probably wrong. Should make it like BSD or SYSV.
*/
asmlinkage int sys_kill(int pid,int sig)
{
int err, retval = 0, count = 0;
 
if (!pid)
return(kill_pg(current->pgrp,sig,0));
if (pid == -1) {
struct task_struct * p;
for_each_task(p) {
if (p->pid > 1 && p != current) {
++count;
if ((err = send_sig(sig,p,0)) != -EPERM)
retval = err;
}
}
return(count ? retval : -ESRCH);
}
if (pid < 0)
return(kill_pg(-pid,sig,0));
/* Normal kill */
return(kill_proc(pid,sig,0));
}
 
/*
* Determine if a process group is "orphaned", according to the POSIX
* definition in 2.2.2.52. Orphaned process groups are not to be affected
* by terminal-generated stop signals. Newly orphaned process groups are
* to receive a SIGHUP and a SIGCONT.
*
* "I ask you, have you ever known what it is to be an orphan?"
*/
static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task)
{
struct task_struct *p;
 
for_each_task(p) {
if ((p == ignored_task) || (p->pgrp != pgrp) ||
(p->state == TASK_ZOMBIE) ||
(p->p_pptr->pid == 1))
continue;
if ((p->p_pptr->pgrp != pgrp) &&
(p->p_pptr->session == p->session))
return 0;
}
return(1); /* (sighing) "Often!" */
}
 
int is_orphaned_pgrp(int pgrp)
{
return will_become_orphaned_pgrp(pgrp, 0);
}
 
static inline int has_stopped_jobs(int pgrp)
{
struct task_struct * p;
 
for_each_task(p) {
if (p->pgrp != pgrp)
continue;
if (p->state == TASK_STOPPED)
return(1);
}
return(0);
}
 
static inline void forget_original_parent(struct task_struct * father)
{
struct task_struct * p;
 
for_each_task(p) {
if (p->p_opptr == father) {
p->exit_signal = SIGCHLD;
p->p_opptr = task[smp_num_cpus] ? : task[0]; /* init */
}
}
}
 
static inline void close_files(struct files_struct * files)
{
int i, j;
 
j = 0;
for (;;) {
unsigned long set = files->open_fds.fds_bits[j];
i = j * __NFDBITS;
j++;
if (i >= NR_OPEN)
break;
while (set) {
if (set & 1) {
struct file * file = files->fd[i];
if (file) {
files->fd[i] = NULL;
close_fp(file);
}
}
i++;
set >>= 1;
}
}
}
 
static inline void __exit_files(struct task_struct *tsk)
{
struct files_struct * files = tsk->files;
 
if (files) {
tsk->files = NULL;
if (!--files->count) {
close_files(files);
kfree(files);
}
}
}
 
void exit_files(struct task_struct *tsk)
{
__exit_files(tsk);
}
 
static inline void __exit_fs(struct task_struct *tsk)
{
struct fs_struct * fs = tsk->fs;
 
if (fs) {
tsk->fs = NULL;
if (!--fs->count) {
iput(fs->root);
iput(fs->pwd);
kfree(fs);
}
}
}
 
void exit_fs(struct task_struct *tsk)
{
__exit_fs(tsk);
}
 
static inline void __exit_sighand(struct task_struct *tsk)
{
struct signal_struct * sig = tsk->sig;
 
if (sig) {
tsk->sig = NULL;
if (!--sig->count) {
kfree(sig);
}
}
}
 
void exit_sighand(struct task_struct *tsk)
{
__exit_sighand(tsk);
}
 
static inline void __exit_mm(struct task_struct * tsk)
{
#ifndef NO_MM
struct mm_struct * mm = tsk->mm;
 
/* Set us up to use the kernel mm state */
if (mm != &init_mm) {
flush_cache_mm(mm);
flush_tlb_mm(mm);
tsk->mm = &init_mm;
tsk->swappable = 0;
SET_PAGE_DIR(tsk, swapper_pg_dir);
 
/* free the old state - not used any more */
if (!--mm->count) {
exit_mmap(mm);
free_page_tables(mm);
kfree(mm);
}
}
#else /* NO_MM */
struct mm_struct * mm = tsk->mm;
 
/* Set us up to use the kernel mm state */
if (mm != &init_mm) {
 
/* Wake up parent that vforked me */
wake_up(&tsk->p_opptr->mm->vforkwait);
 
tsk->mm = &init_mm;
tsk->swappable = 0;
 
/* free the old state - not used any more */
if (!--mm->count) {
exit_mmap(mm);
kfree(mm);
}
}
#endif /* NO_MM */
}
 
void exit_mm(struct task_struct *tsk)
{
__exit_mm(tsk);
}
 
/*
* Send signals to all our closest relatives so that they know
* to properly mourn us..
*/
static void exit_notify(void)
{
struct task_struct * p;
 
forget_original_parent(current);
/*
* Check to see if any process groups have become orphaned
* as a result of our exiting, and if they have any stopped
* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
*
* Case i: Our father is in a different pgrp than we are
* and we were the only connection outside, so our pgrp
* is about to become orphaned.
*/
if ((current->p_pptr->pgrp != current->pgrp) &&
(current->p_pptr->session == current->session) &&
will_become_orphaned_pgrp(current->pgrp, current) &&
has_stopped_jobs(current->pgrp)) {
kill_pg(current->pgrp,SIGHUP,1);
kill_pg(current->pgrp,SIGCONT,1);
}
/* Let father know we died */
notify_parent(current, current->exit_signal);
/*
* This loop does two things:
*
* A. Make init inherit all the child processes
* B. Check to see if any process groups have become orphaned
* as a result of our exiting, and if they have any stopped
* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
*/
while ((p = current->p_cptr) != NULL) {
current->p_cptr = p->p_osptr;
p->p_ysptr = NULL;
p->flags &= ~(PF_PTRACED|PF_TRACESYS);
 
p->p_pptr = p->p_opptr;
p->p_osptr = p->p_pptr->p_cptr;
if (p->p_osptr)
p->p_osptr->p_ysptr = p;
p->p_pptr->p_cptr = p;
if (p->state == TASK_ZOMBIE)
notify_parent(p, p->exit_signal);
 
/*
* process group orphan check
* Case ii: Our child is in a different pgrp
* than we are, and it was the only connection
* outside, so the child pgrp is now orphaned.
*/
if ((p->pgrp != current->pgrp) &&
(p->session == current->session) &&
is_orphaned_pgrp(p->pgrp) &&
has_stopped_jobs(p->pgrp)) {
kill_pg(p->pgrp,SIGHUP,1);
kill_pg(p->pgrp,SIGCONT,1);
}
}
if (current->leader)
disassociate_ctty(1);
}
 
NORET_TYPE void do_exit(long code)
{
if (intr_count) {
printk("Aiee, killing interrupt handler\n");
intr_count = 0;
}
fake_volatile:
acct_process(code);
current->flags |= PF_EXITING;
del_timer(&current->real_timer);
sem_exit();
kerneld_exit();
__exit_mm(current);
__exit_files(current);
__exit_fs(current);
__exit_sighand(current);
exit_thread();
current->state = TASK_ZOMBIE;
current->exit_code = code;
exit_notify();
#ifdef DEBUG_PROC_TREE
audit_ptree();
#endif
if (current->exec_domain && current->exec_domain->use_count)
(*current->exec_domain->use_count)--;
if (current->binfmt && current->binfmt->use_count)
(*current->binfmt->use_count)--;
schedule();
/*
* In order to get rid of the "volatile function does return" message
* I did this little loop that confuses gcc to think do_exit really
* is volatile. In fact it's schedule() that is volatile in some
* circumstances: when current->state = ZOMBIE, schedule() never
* returns.
*
* In fact the natural way to do all this is to have the label and the
* goto right after each other, but I put the fake_volatile label at
* the start of the function just in case something /really/ bad
* happens, and the schedule returns. This way we can try again. I'm
* not paranoid: it's just that everybody is out to get me.
*/
goto fake_volatile;
}
 
asmlinkage int sys_exit(int error_code)
{
do_exit((error_code&0xff)<<8);
}
 
asmlinkage int sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
{
int flag, retval;
struct wait_queue wait = { current, NULL };
struct task_struct *p;
 
if (stat_addr) {
flag = verify_area(VERIFY_WRITE, stat_addr, sizeof(*stat_addr));
if (flag)
return flag;
}
if (ru) {
flag = verify_area(VERIFY_WRITE, ru, sizeof(*ru));
if (flag)
return flag;
}
if (options & ~(WNOHANG|WUNTRACED|__WCLONE))
return -EINVAL;
 
add_wait_queue(&current->wait_chldexit,&wait);
repeat:
flag=0;
for (p = current->p_cptr ; p ; p = p->p_osptr) {
if (pid>0) {
if (p->pid != pid)
continue;
} else if (!pid) {
if (p->pgrp != current->pgrp)
continue;
} else if (pid != -1) {
if (p->pgrp != -pid)
continue;
}
/* If you are tracing a process, then you don't need to get the
* WCLONE bit right -- useful for strace and gdb
*/
if (!(p->flags & (PF_PTRACED|PF_TRACESYS))) {
/* wait for cloned processes iff the __WCLONE flag is set */
if ((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
continue;
}
flag = 1;
switch (p->state) {
case TASK_STOPPED:
if (!p->exit_code)
continue;
if (!(options & WUNTRACED) && !(p->flags & PF_PTRACED))
continue;
if (ru != NULL)
getrusage(p, RUSAGE_BOTH, ru);
if (stat_addr)
put_user((p->exit_code << 8) | 0x7f,
stat_addr);
p->exit_code = 0;
retval = p->pid;
goto end_wait4;
case TASK_ZOMBIE:
current->cutime += p->utime + p->cutime;
current->cstime += p->stime + p->cstime;
if (ru != NULL)
getrusage(p, RUSAGE_BOTH, ru);
if (stat_addr)
put_user(p->exit_code, stat_addr);
retval = p->pid;
if (p->p_opptr != p->p_pptr) {
REMOVE_LINKS(p);
p->p_pptr = p->p_opptr;
SET_LINKS(p);
notify_parent(p, p->exit_signal);
} else
release(p);
#ifdef DEBUG_PROC_TREE
audit_ptree();
#endif
goto end_wait4;
default:
continue;
}
}
if (flag) {
retval = 0;
if (options & WNOHANG)
goto end_wait4;
retval = -ERESTARTSYS;
if (current->signal & ~current->blocked)
goto end_wait4;
current->state=TASK_INTERRUPTIBLE;
schedule();
goto repeat;
}
retval = -ECHILD;
end_wait4:
remove_wait_queue(&current->wait_chldexit,&wait);
return retval;
}
 
#ifndef __alpha__
 
/*
* sys_waitpid() remains for compatibility. waitpid() should be
* implemented by calling sys_wait4() from libc.a.
*/
asmlinkage int sys_waitpid(pid_t pid,unsigned int * stat_addr, int options)
{
return sys_wait4(pid, stat_addr, options, NULL);
}
 
#endif
/exec_domain.c
0,0 → 1,123
#include <linux/personality.h>
#include <linux/ptrace.h>
#include <linux/sched.h>
#include <linux/mm.h>
 
static asmlinkage void no_lcall7(struct pt_regs * regs);
 
 
static unsigned long ident_map[32] = {
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31
};
 
struct exec_domain default_exec_domain = {
"Linux", /* name */
no_lcall7, /* lcall7 causes a seg fault. */
0, 0xff, /* All personalities. */
ident_map, /* Identity map signals. */
ident_map, /* - both ways. */
NULL, /* No usage counter. */
NULL /* Nothing after this in the list. */
};
 
static struct exec_domain *exec_domains = &default_exec_domain;
 
 
static asmlinkage void no_lcall7(struct pt_regs * regs)
{
 
/*
* This may have been a static linked SVr4 binary, so we would have the
* personality set incorrectly. Check to see whether SVr4 is available,
* and use it, otherwise give the user a SEGV.
*/
if (current->exec_domain && current->exec_domain->use_count)
(*current->exec_domain->use_count)--;
 
current->personality = PER_SVR4;
current->exec_domain = lookup_exec_domain(current->personality);
 
if (current->exec_domain && current->exec_domain->use_count)
(*current->exec_domain->use_count)++;
 
if (current->exec_domain && current->exec_domain->handler
&& current->exec_domain->handler != no_lcall7) {
current->exec_domain->handler(regs);
return;
}
 
send_sig(SIGSEGV, current, 1);
}
 
struct exec_domain *lookup_exec_domain(unsigned long personality)
{
unsigned long pers = personality & PER_MASK;
struct exec_domain *it;
 
for (it=exec_domains; it; it=it->next)
if (pers >= it->pers_low
&& pers <= it->pers_high)
return it;
 
/* Should never get this far. */
printk(KERN_ERR "No execution domain for personality 0x%02lx\n", pers);
return NULL;
}
 
int register_exec_domain(struct exec_domain *it)
{
struct exec_domain *tmp;
 
if (!it)
return -EINVAL;
if (it->next)
return -EBUSY;
for (tmp=exec_domains; tmp; tmp=tmp->next)
if (tmp == it)
return -EBUSY;
it->next = exec_domains;
exec_domains = it;
return 0;
}
 
int unregister_exec_domain(struct exec_domain *it)
{
struct exec_domain ** tmp;
 
tmp = &exec_domains;
while (*tmp) {
if (it == *tmp) {
*tmp = it->next;
it->next = NULL;
return 0;
}
tmp = &(*tmp)->next;
}
return -EINVAL;
}
 
asmlinkage int sys_personality(unsigned long personality)
{
struct exec_domain *it;
unsigned long old_personality;
 
if (personality == 0xffffffff)
return current->personality;
 
it = lookup_exec_domain(personality);
if (!it)
return -EINVAL;
 
old_personality = current->personality;
if (current->exec_domain && current->exec_domain->use_count)
(*current->exec_domain->use_count)--;
current->personality = personality;
current->exec_domain = it;
if (current->exec_domain->use_count)
(*current->exec_domain->use_count)++;
 
return old_personality;
}
/signal.c
0,0 → 1,182
/*
* linux/kernel/signal.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*/
 
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/signal.h>
#include <linux/errno.h>
#include <linux/wait.h>
#include <linux/ptrace.h>
#include <linux/unistd.h>
#include <linux/mm.h>
 
#include <asm/segment.h>
 
#define _S(nr) (1<<((nr)-1))
 
#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP)))
 
#ifndef __alpha__
 
/*
* This call isn't used by all ports, in particular, the Alpha
* uses osf_sigprocmask instead. Maybe it should be moved into
* arch-dependent dir?
*/
asmlinkage int sys_sigprocmask(int how, sigset_t *set, sigset_t *oset)
{
sigset_t new_set, old_set = current->blocked;
int error;
 
if (set) {
error = verify_area(VERIFY_READ, set, sizeof(sigset_t));
if (error)
return error;
new_set = get_user(set) & _BLOCKABLE;
switch (how) {
case SIG_BLOCK:
current->blocked |= new_set;
break;
case SIG_UNBLOCK:
current->blocked &= ~new_set;
break;
case SIG_SETMASK:
current->blocked = new_set;
break;
default:
return -EINVAL;
}
}
if (oset) {
error = verify_area(VERIFY_WRITE, oset, sizeof(sigset_t));
if (error)
return error;
put_user(old_set, oset);
}
return 0;
}
 
/*
* For backwards compatibility? Functionality superseded by sigprocmask.
*/
asmlinkage int sys_sgetmask(void)
{
return current->blocked;
}
 
asmlinkage int sys_ssetmask(int newmask)
{
int old=current->blocked;
 
current->blocked = newmask & _BLOCKABLE;
return old;
}
 
#endif
 
asmlinkage int sys_sigpending(sigset_t *set)
{
int error;
/* fill in "set" with signals pending but blocked. */
error = verify_area(VERIFY_WRITE, set, sizeof(sigset_t));
if (!error)
put_user(current->blocked & current->signal, set);
return error;
}
 
/*
* POSIX 3.3.1.3:
* "Setting a signal action to SIG_IGN for a signal that is pending
* shall cause the pending signal to be discarded, whether or not
* it is blocked."
*
* "Setting a signal action to SIG_DFL for a signal that is pending
* and whose default action is to ignore the signal (for example,
* SIGCHLD), shall cause the pending signal to be discarded, whether
* or not it is blocked"
*
* Note the silly behaviour of SIGCHLD: SIG_IGN means that the signal
* isn't actually ignored, but does automatic child reaping, while
* SIG_DFL is explicitly said by POSIX to force the signal to be ignored..
*/
static inline void check_pending(int signum)
{
struct sigaction *p;
 
p = signum - 1 + current->sig->action;
if (p->sa_handler == SIG_IGN) {
current->signal &= ~_S(signum);
return;
}
if (p->sa_handler == SIG_DFL) {
if (signum != SIGCONT && signum != SIGCHLD && signum != SIGWINCH)
return;
current->signal &= ~_S(signum);
return;
}
}
 
#ifndef __alpha__
/*
* For backwards compatibility? Functionality superseded by sigaction.
*/
asmlinkage unsigned long sys_signal(int signum, __sighandler_t handler)
{
int err;
struct sigaction tmp;
 
if (signum<1 || signum>32)
return -EINVAL;
if (signum==SIGKILL || signum==SIGSTOP)
return -EINVAL;
if (handler != SIG_DFL && handler != SIG_IGN) {
err = verify_area(VERIFY_READ, handler, 1);
if (err)
return err;
}
memset(&tmp, 0, sizeof(tmp));
tmp.sa_handler = handler;
tmp.sa_flags = SA_ONESHOT | SA_NOMASK;
handler = current->sig->action[signum-1].sa_handler;
current->sig->action[signum-1] = tmp;
check_pending(signum);
return (unsigned long) handler;
}
#endif
 
asmlinkage int sys_sigaction(int signum, const struct sigaction * action,
struct sigaction * oldaction)
{
struct sigaction new_sa, *p;
 
if (signum<1 || signum>32)
return -EINVAL;
p = signum - 1 + current->sig->action;
if (action) {
int err = verify_area(VERIFY_READ, action, sizeof(*action));
if (err)
return err;
if (signum==SIGKILL || signum==SIGSTOP)
return -EINVAL;
memcpy_fromfs(&new_sa, action, sizeof(struct sigaction));
if (new_sa.sa_handler != SIG_DFL && new_sa.sa_handler != SIG_IGN) {
err = verify_area(VERIFY_READ, new_sa.sa_handler, 1);
if (err)
return err;
}
}
if (oldaction) {
int err = verify_area(VERIFY_WRITE, oldaction, sizeof(*oldaction));
if (err)
return err;
memcpy_tofs(oldaction, p, sizeof(struct sigaction));
}
if (action) {
*p = new_sa;
check_pending(signum);
}
return 0;
}
/sys.c
0,0 → 1,956
/*
* linux/kernel/sys.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*/
 
#include <linux/config.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/times.h>
#include <linux/utsname.h>
#include <linux/param.h>
#include <linux/resource.h>
#include <linux/signal.h>
#include <linux/string.h>
#include <linux/ptrace.h>
#include <linux/stat.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/fcntl.h>
#include <linux/acct.h>
#include <linux/tty.h>
#if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF)
#include <linux/apm_bios.h>
#endif
 
#include <asm/segment.h>
#include <asm/io.h>
 
/*
* this indicates whether you can reboot with ctrl-alt-del: the default is yes
*/
int C_A_D = 1;
 
extern void adjust_clock(void);
extern void DAC960_Finalize(void);
extern void gdth_halt(void);
 
asmlinkage int sys_ni_syscall(void)
{
return -ENOSYS;
}
 
static int proc_sel(struct task_struct *p, int which, int who)
{
if(p->pid)
{
switch (which) {
case PRIO_PROCESS:
if (!who && p == current)
return 1;
return(p->pid == who);
case PRIO_PGRP:
if (!who)
who = current->pgrp;
return(p->pgrp == who);
case PRIO_USER:
if (!who)
who = current->uid;
return(p->uid == who);
}
}
return 0;
}
 
asmlinkage int sys_setpriority(int which, int who, int niceval)
{
struct task_struct *p;
int error = ESRCH;
unsigned int priority;
 
if (which > 2 || which < 0)
return -EINVAL;
 
/* normalize: avoid signed division (rounding problems) */
priority = niceval;
if (niceval < 0)
priority = -niceval;
if (priority > 20)
priority = 20;
priority = (priority * DEF_PRIORITY + 10) / 20 + DEF_PRIORITY;
 
if (niceval >= 0) {
priority = 2*DEF_PRIORITY - priority;
if (!priority)
priority = 1;
}
 
for_each_task(p) {
if (!proc_sel(p, which, who))
continue;
if (p->uid != current->euid &&
p->uid != current->uid && !suser()) {
error = EPERM;
continue;
}
if (error == ESRCH)
error = 0;
if (priority > p->priority && !suser())
error = EACCES;
else
p->priority = priority;
}
return -error;
}
 
/*
* Ugh. To avoid negative return values, "getpriority()" will
* not return the normal nice-value, but a value that has been
* offset by 20 (ie it returns 0..40 instead of -20..20)
*/
asmlinkage int sys_getpriority(int which, int who)
{
struct task_struct *p;
long max_prio = -ESRCH;
 
if (which > 2 || which < 0)
return -EINVAL;
 
for_each_task (p) {
if (!proc_sel(p, which, who))
continue;
if (p->priority > max_prio)
max_prio = p->priority;
}
 
/* scale the priority from timeslice to 0..40 */
if (max_prio > 0)
max_prio = (max_prio * 20 + DEF_PRIORITY/2) / DEF_PRIORITY;
return max_prio;
}
 
#ifndef __alpha__
 
/*
* Why do these exist? Binary compatibility with some other standard?
* If so, maybe they should be moved into the appropriate arch
* directory.
*/
 
asmlinkage int sys_profil(void)
{
return -ENOSYS;
}
 
asmlinkage int sys_ftime(void)
{
return -ENOSYS;
}
 
asmlinkage int sys_break(void)
{
return -ENOSYS;
}
 
asmlinkage int sys_stty(void)
{
return -ENOSYS;
}
 
asmlinkage int sys_gtty(void)
{
return -ENOSYS;
}
 
asmlinkage int sys_prof(void)
{
return -ENOSYS;
}
 
#endif
 
extern void hard_reset_now(void);
extern asmlinkage int sys_kill(int, int);
 
/*
* Reboot system call: for obvious reasons only root may call it,
* and even root needs to set up some magic numbers in the registers
* so that some mistake won't make this reboot the whole machine.
* You can also set the meaning of the ctrl-alt-del-key here.
*
* reboot doesn't sync: do that yourself before calling this.
*/
asmlinkage int sys_reboot(int magic, int magic_too, int flag)
{
if (!suser())
return -EPERM;
if (magic != 0xfee1dead || magic_too != 672274793)
return -EINVAL;
if (flag == 0x01234567) {
#ifdef CONFIG_BLK_DEV_DAC960
DAC960_Finalize();
#endif
#ifdef CONFIG_SCSI_GDTH
gdth_halt();
#endif
hard_reset_now();
} else if (flag == 0x89ABCDEF)
C_A_D = 1;
else if (!flag)
C_A_D = 0;
else if (flag == 0xCDEF0123) {
#ifdef CONFIG_BLK_DEV_DAC960
DAC960_Finalize();
#endif
#ifdef CONFIG_SCSI_GDTH
gdth_halt();
#endif
printk(KERN_EMERG "System halted\n");
sys_kill(-1, SIGKILL);
#if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF)
apm_power_off();
#endif
do_exit(0);
} else
return -EINVAL;
return (0);
}
 
/*
* This function gets called by ctrl-alt-del - ie the keyboard interrupt.
* As it's called within an interrupt, it may NOT sync: the only choice
* is whether to reboot at once, or just ignore the ctrl-alt-del.
*/
void ctrl_alt_del(void)
{
if (C_A_D) {
#ifdef CONFIG_BLK_DEV_DAC960
DAC960_Finalize();
#endif
#ifdef CONFIG_SCSI_GDTH
gdth_halt();
#endif
hard_reset_now();
} else
kill_proc(1, SIGINT, 1);
}
 
/*
* Unprivileged users may change the real gid to the effective gid
* or vice versa. (BSD-style)
*
* If you set the real gid at all, or set the effective gid to a value not
* equal to the real gid, then the saved gid is set to the new effective gid.
*
* This makes it possible for a setgid program to completely drop its
* privileges, which is often a useful assertion to make when you are doing
* a security audit over a program.
*
* The general idea is that a program which uses just setregid() will be
* 100% compatible with BSD. A program which uses just setgid() will be
* 100% compatible with POSIX w/ Saved ID's.
*/
asmlinkage int sys_setregid(gid_t rgid, gid_t egid)
{
int old_rgid = current->gid;
int old_egid = current->egid;
 
if (rgid != (gid_t) -1) {
if ((old_rgid == rgid) ||
(current->egid==rgid) ||
suser())
current->gid = rgid;
else
return(-EPERM);
}
if (egid != (gid_t) -1) {
if ((old_rgid == egid) ||
(current->egid == egid) ||
(current->sgid == egid) ||
suser())
current->fsgid = current->egid = egid;
else {
current->gid = old_rgid;
return(-EPERM);
}
}
if (rgid != (gid_t) -1 ||
(egid != (gid_t) -1 && egid != old_rgid))
current->sgid = current->egid;
current->fsgid = current->egid;
if (current->egid != old_egid)
current->dumpable = 0;
return 0;
}
 
/*
* setgid() is implemented like SysV w/ SAVED_IDS
*/
asmlinkage int sys_setgid(gid_t gid)
{
int old_egid = current->egid;
 
if (suser())
current->gid = current->egid = current->sgid = current->fsgid = gid;
else if ((gid == current->gid) || (gid == current->sgid))
current->egid = current->fsgid = gid;
else
return -EPERM;
if (current->egid != old_egid)
current->dumpable = 0;
return 0;
}
static char acct_active = 0;
static struct file acct_file;
 
int acct_process(long exitcode)
{
struct acct ac;
unsigned short fs;
 
if (acct_active) {
strncpy(ac.ac_comm, current->comm, ACCT_COMM);
ac.ac_comm[ACCT_COMM-1] = '\0';
ac.ac_utime = current->utime;
ac.ac_stime = current->stime;
ac.ac_btime = CT_TO_SECS(current->start_time) + (xtime.tv_sec - (jiffies / HZ));
ac.ac_etime = CURRENT_TIME - ac.ac_btime;
ac.ac_uid = current->uid;
ac.ac_gid = current->gid;
ac.ac_tty = (current)->tty == NULL ? -1 :
kdev_t_to_nr(current->tty->device);
ac.ac_flag = 0;
if (current->flags & PF_FORKNOEXEC)
ac.ac_flag |= AFORK;
if (current->flags & PF_SUPERPRIV)
ac.ac_flag |= ASU;
if (current->flags & PF_DUMPCORE)
ac.ac_flag |= ACORE;
if (current->flags & PF_SIGNALED)
ac.ac_flag |= AXSIG;
ac.ac_minflt = current->min_flt;
ac.ac_majflt = current->maj_flt;
ac.ac_exitcode = exitcode;
 
/* Kernel segment override */
fs = get_fs();
set_fs(KERNEL_DS);
 
acct_file.f_op->write(acct_file.f_inode, &acct_file,
(char *)&ac, sizeof(struct acct));
 
set_fs(fs);
}
return 0;
}
 
asmlinkage int sys_acct(const char *name)
{
struct inode *inode = (struct inode *)0;
char *tmp;
int error;
 
if (!suser())
return -EPERM;
 
if (name == (char *)0) {
if (acct_active) {
if (acct_file.f_op->release)
acct_file.f_op->release(acct_file.f_inode, &acct_file);
 
if (acct_file.f_inode != (struct inode *) 0)
iput(acct_file.f_inode);
 
acct_active = 0;
}
return 0;
} else {
if (!acct_active) {
 
if ((error = getname(name, &tmp)) != 0)
return (error);
 
error = open_namei(tmp, O_RDWR, 0600, &inode, 0);
putname(tmp);
 
if (error)
return (error);
 
if (!S_ISREG(inode->i_mode)) {
iput(inode);
return -EACCES;
}
 
if (!inode->i_op || !inode->i_op->default_file_ops ||
!inode->i_op->default_file_ops->write) {
iput(inode);
return -EIO;
}
 
acct_file.f_mode = 3;
acct_file.f_flags = 0;
acct_file.f_count = 1;
acct_file.f_inode = inode;
acct_file.f_pos = inode->i_size;
acct_file.f_reada = 0;
acct_file.f_op = inode->i_op->default_file_ops;
 
if (acct_file.f_op->open)
if (acct_file.f_op->open(acct_file.f_inode, &acct_file)) {
iput(inode);
return -EIO;
}
 
acct_active = 1;
return 0;
} else
return -EBUSY;
}
}
 
#ifndef __alpha__
 
/*
* Why do these exist? Binary compatibility with some other standard?
* If so, maybe they should be moved into the appropriate arch
* directory.
*/
 
asmlinkage int sys_phys(void)
{
return -ENOSYS;
}
 
asmlinkage int sys_lock(void)
{
return -ENOSYS;
}
 
asmlinkage int sys_mpx(void)
{
return -ENOSYS;
}
 
asmlinkage int sys_ulimit(void)
{
return -ENOSYS;
}
 
asmlinkage int sys_old_syscall(void)
{
return -ENOSYS;
}
 
#endif
 
/*
* Unprivileged users may change the real uid to the effective uid
* or vice versa. (BSD-style)
*
* If you set the real uid at all, or set the effective uid to a value not
* equal to the real uid, then the saved uid is set to the new effective uid.
*
* This makes it possible for a setuid program to completely drop its
* privileges, which is often a useful assertion to make when you are doing
* a security audit over a program.
*
* The general idea is that a program which uses just setreuid() will be
* 100% compatible with BSD. A program which uses just setuid() will be
* 100% compatible with POSIX w/ Saved ID's.
*/
asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
{
int old_ruid = current->uid;
int old_euid = current->euid;
 
if (ruid != (uid_t) -1) {
if ((old_ruid == ruid) ||
(current->euid==ruid) ||
suser())
current->uid = ruid;
else
return(-EPERM);
}
if (euid != (uid_t) -1) {
if ((old_ruid == euid) ||
(current->euid == euid) ||
(current->suid == euid) ||
suser())
current->fsuid = current->euid = euid;
else {
current->uid = old_ruid;
return(-EPERM);
}
}
if (ruid != (uid_t) -1 ||
(euid != (uid_t) -1 && euid != old_ruid))
current->suid = current->euid;
current->fsuid = current->euid;
if (current->euid != old_euid)
current->dumpable = 0;
return 0;
}
 
/*
* setuid() is implemented like SysV w/ SAVED_IDS
*
* Note that SAVED_ID's is deficient in that a setuid root program
* like sendmail, for example, cannot set its uid to be a normal
* user and then switch back, because if you're root, setuid() sets
* the saved uid too. If you don't like this, blame the bright people
* in the POSIX committee and/or USG. Note that the BSD-style setreuid()
* will allow a root program to temporarily drop privileges and be able to
* regain them by swapping the real and effective uid.
*/
asmlinkage int sys_setuid(uid_t uid)
{
int old_euid = current->euid;
 
if (suser())
current->uid = current->euid = current->suid = current->fsuid = uid;
else if ((uid == current->uid) || (uid == current->suid))
current->fsuid = current->euid = uid;
else
return -EPERM;
if (current->euid != old_euid)
current->dumpable = 0;
return(0);
}
 
/*
* "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
* is used for "access()" and for the NFS daemon (letting nfsd stay at
* whatever uid it wants to). It normally shadows "euid", except when
* explicitly set by setfsuid() or for access..
*/
asmlinkage int sys_setfsuid(uid_t uid)
{
int old_fsuid = current->fsuid;
 
if (uid == current->uid || uid == current->euid ||
uid == current->suid || uid == current->fsuid || suser())
current->fsuid = uid;
if (current->fsuid != old_fsuid)
current->dumpable = 0;
return old_fsuid;
}
 
/*
 
*/
asmlinkage int sys_setfsgid(gid_t gid)
{
int old_fsgid = current->fsgid;
 
if (gid == current->gid || gid == current->egid ||
gid == current->sgid || gid == current->fsgid || suser())
current->fsgid = gid;
if (current->fsgid != old_fsgid)
current->dumpable = 0;
return old_fsgid;
}
 
asmlinkage long sys_times(struct tms * tbuf)
{
if (tbuf) {
int error = verify_area(VERIFY_WRITE,tbuf,sizeof *tbuf);
if (error)
return error;
put_user(current->utime,&tbuf->tms_utime);
put_user(current->stime,&tbuf->tms_stime);
put_user(current->cutime,&tbuf->tms_cutime);
put_user(current->cstime,&tbuf->tms_cstime);
}
return jiffies;
}
 
/*
* This needs some heavy checking ...
* I just haven't the stomach for it. I also don't fully
* understand sessions/pgrp etc. Let somebody who does explain it.
*
* OK, I think I have the protection semantics right.... this is really
* only important on a multi-user system anyway, to make sure one user
* can't send a signal to a process owned by another. -TYT, 12/12/91
*
* Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
* LBT 04.03.94
*/
asmlinkage int sys_setpgid(pid_t pid, pid_t pgid)
{
struct task_struct * p;
 
if (!pid)
pid = current->pid;
if (!pgid)
pgid = pid;
if (pgid < 0)
return -EINVAL;
for_each_task(p) {
if (p->pid == pid)
goto found_task;
}
return -ESRCH;
 
found_task:
if (p->p_pptr == current || p->p_opptr == current) {
if (p->session != current->session)
return -EPERM;
if (p->did_exec)
return -EACCES;
} else if (p != current)
return -ESRCH;
if (p->leader)
return -EPERM;
if (pgid != pid) {
struct task_struct * tmp;
for_each_task (tmp) {
if (tmp->pgrp == pgid &&
tmp->session == current->session)
goto ok_pgid;
}
return -EPERM;
}
 
ok_pgid:
p->pgrp = pgid;
return 0;
}
 
asmlinkage int sys_getpgid(pid_t pid)
{
struct task_struct * p;
 
if (!pid)
return current->pgrp;
for_each_task(p) {
if (p->pid == pid)
return p->pgrp;
}
return -ESRCH;
}
 
asmlinkage int sys_getpgrp(void)
{
return current->pgrp;
}
 
asmlinkage int sys_getsid(pid_t pid)
{
struct task_struct * p;
 
if (!pid)
return current->session;
for_each_task(p) {
if (p->pid == pid)
return p->session;
}
return -ESRCH;
}
 
asmlinkage int sys_setsid(void)
{
struct task_struct * p;
 
for_each_task(p) {
if (p->pgrp == current->pid)
return -EPERM;
}
 
current->leader = 1;
current->session = current->pgrp = current->pid;
current->tty = NULL;
current->tty_old_pgrp = 0;
return current->pgrp;
}
 
/*
* Supplementary group ID's
*/
asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist)
{
int i;
int * groups;
 
/* Avoid an integer overflow on systems with 32 bit gid_t (Alpha) */
if (gidsetsize & ~0x3FFFFFFF)
return -EINVAL;
groups = current->groups;
for (i = 0 ; i < NGROUPS ; i++) {
if (groups[i] == NOGROUP)
break;
}
if (gidsetsize) {
int error;
error = verify_area(VERIFY_WRITE, grouplist, sizeof(gid_t) * gidsetsize);
if (error)
return error;
if (i > gidsetsize)
return -EINVAL;
 
for (i = 0 ; i < NGROUPS ; i++) {
if (groups[i] == NOGROUP)
break;
put_user(groups[i], grouplist);
grouplist++;
}
}
return i;
}
 
asmlinkage int sys_setgroups(int gidsetsize, gid_t *grouplist)
{
int i;
 
if (!suser())
return -EPERM;
if (gidsetsize > NGROUPS)
return -EINVAL;
i = verify_area(VERIFY_READ, grouplist, sizeof(gid_t) * gidsetsize);
if (i)
return i;
for (i = 0; i < gidsetsize; i++, grouplist++) {
current->groups[i] = get_user(grouplist);
}
if (i < NGROUPS)
current->groups[i] = NOGROUP;
return 0;
}
 
int in_group_p(gid_t grp)
{
int i;
 
if (grp == current->fsgid)
return 1;
 
for (i = 0; i < NGROUPS; i++) {
if (current->groups[i] == NOGROUP)
break;
if (current->groups[i] == grp)
return 1;
}
return 0;
}
 
asmlinkage int sys_newuname(struct new_utsname * name)
{
int error;
 
if (!name)
return -EFAULT;
error = verify_area(VERIFY_WRITE, name, sizeof *name);
if (!error)
memcpy_tofs(name,&system_utsname,sizeof *name);
return error;
}
 
#ifndef __alpha__
 
/*
* Move these to arch dependent dir since they are for
* backward compatibility only?
*/
asmlinkage int sys_uname(struct old_utsname * name)
{
int error;
if (!name)
return -EFAULT;
error = verify_area(VERIFY_WRITE, name,sizeof *name);
if (error)
return error;
memcpy_tofs(&name->sysname,&system_utsname.sysname,
sizeof (system_utsname.sysname));
memcpy_tofs(&name->nodename,&system_utsname.nodename,
sizeof (system_utsname.nodename));
memcpy_tofs(&name->release,&system_utsname.release,
sizeof (system_utsname.release));
memcpy_tofs(&name->version,&system_utsname.version,
sizeof (system_utsname.version));
memcpy_tofs(&name->machine,&system_utsname.machine,
sizeof (system_utsname.machine));
return 0;
}
 
asmlinkage int sys_olduname(struct oldold_utsname * name)
{
int error;
if (!name)
return -EFAULT;
error = verify_area(VERIFY_WRITE, name,sizeof *name);
if (error)
return error;
memcpy_tofs(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
put_user(0,name->sysname+__OLD_UTS_LEN);
memcpy_tofs(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
put_user(0,name->nodename+__OLD_UTS_LEN);
memcpy_tofs(&name->release,&system_utsname.release,__OLD_UTS_LEN);
put_user(0,name->release+__OLD_UTS_LEN);
memcpy_tofs(&name->version,&system_utsname.version,__OLD_UTS_LEN);
put_user(0,name->version+__OLD_UTS_LEN);
memcpy_tofs(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
put_user(0,name->machine+__OLD_UTS_LEN);
return 0;
}
 
#endif
 
asmlinkage int sys_sethostname(char *name, int len)
{
int error;
 
if (!suser())
return -EPERM;
if (len < 0 || len > __NEW_UTS_LEN)
return -EINVAL;
error = verify_area(VERIFY_READ, name, len);
if (error)
return error;
memcpy_fromfs(system_utsname.nodename, name, len);
system_utsname.nodename[len] = 0;
return 0;
}
 
asmlinkage int sys_gethostname(char *name, int len)
{
int i;
 
if (len < 0)
return -EINVAL;
i = verify_area(VERIFY_WRITE, name, len);
if (i)
return i;
i = 1+strlen(system_utsname.nodename);
if (i > len)
i = len;
memcpy_tofs(name, system_utsname.nodename, i);
return 0;
}
 
/*
* Only setdomainname; getdomainname can be implemented by calling
* uname()
*/
asmlinkage int sys_setdomainname(char *name, int len)
{
int error;
if (!suser())
return -EPERM;
if (len < 0 || len > __NEW_UTS_LEN)
return -EINVAL;
error = verify_area(VERIFY_READ, name, len);
if (error)
return error;
memcpy_fromfs(system_utsname.domainname, name, len);
system_utsname.domainname[len] = 0;
return 0;
}
 
asmlinkage int sys_getrlimit(unsigned int resource, struct rlimit *rlim)
{
int error;
 
if (resource >= RLIM_NLIMITS)
return -EINVAL;
error = verify_area(VERIFY_WRITE,rlim,sizeof *rlim);
if (error)
return error;
memcpy_tofs(rlim, current->rlim + resource, sizeof(*rlim));
return 0;
}
 
asmlinkage int sys_setrlimit(unsigned int resource, struct rlimit *rlim)
{
struct rlimit new_rlim, *old_rlim;
int err;
 
if (resource >= RLIM_NLIMITS)
return -EINVAL;
err = verify_area(VERIFY_READ, rlim, sizeof(*rlim));
if (err)
return err;
memcpy_fromfs(&new_rlim, rlim, sizeof(*rlim));
if (new_rlim.rlim_cur < 0 || new_rlim.rlim_max < 0)
return -EINVAL;
old_rlim = current->rlim + resource;
if (((new_rlim.rlim_cur > old_rlim->rlim_max) ||
(new_rlim.rlim_max > old_rlim->rlim_max)) &&
!suser())
return -EPERM;
if (resource == RLIMIT_NOFILE) {
if (new_rlim.rlim_cur > NR_OPEN || new_rlim.rlim_max > NR_OPEN)
return -EPERM;
}
*old_rlim = new_rlim;
return 0;
}
 
/*
* It would make sense to put struct rusage in the task_struct,
* except that would make the task_struct be *really big*. After
* task_struct gets moved into malloc'ed memory, it would
* make sense to do this. It will make moving the rest of the information
* a lot simpler! (Which we're not doing right now because we're not
* measuring them yet).
*/
int getrusage(struct task_struct *p, int who, struct rusage *ru)
{
int error;
struct rusage r;
 
error = verify_area(VERIFY_WRITE, ru, sizeof *ru);
if (error)
return error;
memset((char *) &r, 0, sizeof(r));
switch (who) {
case RUSAGE_SELF:
r.ru_utime.tv_sec = CT_TO_SECS(p->utime);
r.ru_utime.tv_usec = CT_TO_USECS(p->utime);
r.ru_stime.tv_sec = CT_TO_SECS(p->stime);
r.ru_stime.tv_usec = CT_TO_USECS(p->stime);
r.ru_minflt = p->min_flt;
r.ru_majflt = p->maj_flt;
r.ru_nswap = p->nswap;
break;
case RUSAGE_CHILDREN:
r.ru_utime.tv_sec = CT_TO_SECS(p->cutime);
r.ru_utime.tv_usec = CT_TO_USECS(p->cutime);
r.ru_stime.tv_sec = CT_TO_SECS(p->cstime);
r.ru_stime.tv_usec = CT_TO_USECS(p->cstime);
r.ru_minflt = p->cmin_flt;
r.ru_majflt = p->cmaj_flt;
r.ru_nswap = p->cnswap;
break;
default:
r.ru_utime.tv_sec = CT_TO_SECS(p->utime + p->cutime);
r.ru_utime.tv_usec = CT_TO_USECS(p->utime + p->cutime);
r.ru_stime.tv_sec = CT_TO_SECS(p->stime + p->cstime);
r.ru_stime.tv_usec = CT_TO_USECS(p->stime + p->cstime);
r.ru_minflt = p->min_flt + p->cmin_flt;
r.ru_majflt = p->maj_flt + p->cmaj_flt;
r.ru_nswap = p->nswap + p->cnswap;
break;
}
memcpy_tofs(ru, &r, sizeof(r));
return 0;
}
 
asmlinkage int sys_getrusage(int who, struct rusage *ru)
{
if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
return -EINVAL;
return getrusage(current, who, ru);
}
 
asmlinkage int sys_umask(int mask)
{
int old = current->fs->umask;
 
current->fs->umask = mask & S_IRWXUGO;
return (old);
}
/sysctl.c
0,0 → 1,887
/*
* sysctl.c: General linux system control interface
*
* Begun 24 March 1995, Stephen Tweedie
* Added /proc support, Dec 1995
* Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
* Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
* Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
*/
 
/*
* uClinux revisions for NO_MM
* Copyright (C) 1998 Kenneth Albanowski <kjahds@kjahds.com>,
* The Silver Hammer Group, Ltd.
*/
 
#include <linux/config.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/sysctl.h>
#include <linux/swapctl.h>
#include <linux/proc_fs.h>
#include <linux/malloc.h>
#include <linux/stat.h>
#include <linux/ctype.h>
#include <asm/bitops.h>
#include <asm/segment.h>
 
#include <linux/utsname.h>
#include <linux/swapctl.h>
 
/* External variables not in a header file. */
extern int panic_timeout;
 
 
#ifdef CONFIG_ROOT_NFS
#include <linux/nfs_fs.h>
#endif
 
static ctl_table root_table[];
static struct ctl_table_header root_table_header =
{root_table, DNODE_SINGLE(&root_table_header)};
 
static int parse_table(int *, int, void *, size_t *, void *, size_t,
ctl_table *, void **);
 
static ctl_table kern_table[];
static ctl_table vm_table[];
extern ctl_table net_table[];
 
/* /proc declarations: */
 
#ifdef CONFIG_PROC_FS
 
static int proc_readsys(struct inode * inode, struct file * file,
char * buf, int count);
static int proc_writesys(struct inode * inode, struct file * file,
const char * buf, int count);
static int proc_sys_permission(struct inode *, int);
 
struct file_operations proc_sys_file_operations =
{
NULL, /* lseek */
proc_readsys, /* read */
proc_writesys, /* write */
NULL, /* readdir */
NULL, /* select */
NULL, /* ioctl */
NULL, /* mmap */
NULL, /* no special open code */
NULL, /* no special release code */
NULL /* can't fsync */
};
 
struct inode_operations proc_sys_inode_operations =
{
&proc_sys_file_operations,
NULL, /* create */
NULL, /* lookup */
NULL, /* link */
NULL, /* unlink */
NULL, /* symlink */
NULL, /* mkdir */
NULL, /* rmdir */
NULL, /* mknod */
NULL, /* rename */
NULL, /* readlink */
NULL, /* follow_link */
NULL, /* readpage */
NULL, /* writepage */
NULL, /* bmap */
NULL, /* truncate */
proc_sys_permission
};
 
extern struct proc_dir_entry proc_sys_root;
 
static void register_proc_table(ctl_table *, struct proc_dir_entry *);
static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
#endif
 
extern int bdf_prm[], bdflush_min[], bdflush_max[];
 
static int do_securelevel_strategy (ctl_table *, int *, int, void *, size_t *,
void *, size_t, void **);
 
extern char binfmt_java_interpreter[], binfmt_java_appletviewer[];
 
/* The default sysctl tables: */
 
static ctl_table root_table[] = {
{CTL_KERN, "kernel", NULL, 0, 0555, kern_table},
{CTL_VM, "vm", NULL, 0, 0555, vm_table},
{CTL_NET, "net", NULL, 0, 0555, net_table},
{0}
};
 
static ctl_table kern_table[] = {
{KERN_OSTYPE, "ostype", system_utsname.sysname, 64,
0444, NULL, &proc_dostring, &sysctl_string},
{KERN_OSRELEASE, "osrelease", system_utsname.release, 64,
0444, NULL, &proc_dostring, &sysctl_string},
{KERN_VERSION, "version", system_utsname.version, 64,
0444, NULL, &proc_dostring, &sysctl_string},
{KERN_NODENAME, "hostname", system_utsname.nodename, 64,
0644, NULL, &proc_dostring, &sysctl_string},
{KERN_DOMAINNAME, "domainname", system_utsname.domainname, 64,
0644, NULL, &proc_dostring, &sysctl_string},
{KERN_NRINODE, "inode-nr", &nr_inodes, 2*sizeof(int),
0444, NULL, &proc_dointvec},
{KERN_MAXINODE, "inode-max", &max_inodes, sizeof(int),
0644, NULL, &proc_dointvec},
{KERN_NRFILE, "file-nr", &nr_files, sizeof(int),
0444, NULL, &proc_dointvec},
{KERN_MAXFILE, "file-max", &max_files, sizeof(int),
0644, NULL, &proc_dointvec},
{KERN_SECURELVL, "securelevel", &securelevel, sizeof(int),
0444, NULL, &proc_dointvec, (ctl_handler *)&do_securelevel_strategy},
{KERN_PANIC, "panic", &panic_timeout, sizeof(int),
0644, NULL, &proc_dointvec},
#ifdef CONFIG_BLK_DEV_INITRD
{KERN_REALROOTDEV, "real-root-dev", &real_root_dev, sizeof(int),
0644, NULL, &proc_dointvec},
#endif
#ifdef CONFIG_ROOT_NFS
{KERN_NFSRNAME, "nfs-root-name", nfs_root_name, NFS_ROOT_NAME_LEN,
0644, NULL, &proc_dostring, &sysctl_string },
{KERN_NFSRADDRS, "nfs-root-addrs", nfs_root_addrs, NFS_ROOT_ADDRS_LEN,
0644, NULL, &proc_dostring, &sysctl_string },
#endif
#ifdef CONFIG_BINFMT_JAVA
{KERN_JAVA_INTERPRETER, "java-interpreter", binfmt_java_interpreter,
64, 0644, NULL, &proc_dostring, &sysctl_string },
{KERN_JAVA_APPLETVIEWER, "java-appletviewer", binfmt_java_appletviewer,
64, 0644, NULL, &proc_dostring, &sysctl_string },
#endif
{0}
};
 
static ctl_table vm_table[] = {
#ifndef NO_MM
{VM_SWAPCTL, "swapctl",
&swap_control, sizeof(swap_control_t), 0600, NULL, &proc_dointvec},
{VM_KSWAPD, "kswapd",
&kswapd_ctl, sizeof(kswapd_ctl), 0600, NULL, &proc_dointvec},
{VM_FREEPG, "freepages",
&min_free_pages, 3*sizeof(int), 0600, NULL, &proc_dointvec},
{VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0600, NULL,
&proc_dointvec_minmax, &sysctl_intvec, NULL,
&bdflush_min, &bdflush_max},
#endif /* !NO_MM */
{0}
};
 
void sysctl_init(void)
{
#ifdef CONFIG_PROC_FS
register_proc_table(root_table, &proc_sys_root);
#endif
}
 
 
int do_sysctl (int *name, int nlen,
void *oldval, size_t *oldlenp,
void *newval, size_t newlen)
{
int error;
struct ctl_table_header *tmp;
void *context;
if (nlen <= 0 || nlen >= CTL_MAXNAME)
return -ENOTDIR;
error = verify_area(VERIFY_READ,name,nlen*sizeof(int));
if (error) return error;
if (oldval) {
if (!oldlenp)
return -EFAULT;
error = verify_area(VERIFY_WRITE,oldlenp,sizeof(size_t));
if (error) return error;
error = verify_area(VERIFY_WRITE,oldval,get_user(oldlenp));
if (error) return error;
}
if (newval) {
error = verify_area(VERIFY_READ,newval,newlen);
if (error) return error;
}
tmp = &root_table_header;
do {
context = 0;
error = parse_table(name, nlen, oldval, oldlenp,
newval, newlen, tmp->ctl_table, &context);
if (context)
kfree(context);
if (error != -ENOTDIR)
return error;
tmp = tmp->DLIST_NEXT(ctl_entry);
} while (tmp != &root_table_header);
return -ENOTDIR;
}
 
extern asmlinkage int sys_sysctl(struct __sysctl_args *args)
{
struct __sysctl_args tmp;
int error;
error = verify_area(VERIFY_READ, args, sizeof(*args));
if (error)
return error;
memcpy_fromfs(&tmp, args, sizeof(tmp));
return do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
tmp.newval, tmp.newlen);
}
 
/* Like in_group_p, but testing against egid, not fsgid */
static int in_egroup_p(gid_t grp)
{
int i;
 
if (grp == current->egid)
return 1;
 
for (i = 0; i < NGROUPS; i++) {
if (current->groups[i] == NOGROUP)
break;
if (current->groups[i] == grp)
return 1;
}
return 0;
}
/* ctl_perm does NOT grant the superuser all rights automatically, because
some sysctl variables are readonly even to root. */
static int test_perm(int mode, int op)
{
if (!current->euid)
mode >>= 6;
else if (in_egroup_p(0))
mode >>= 3;
if ((mode & op & 0007) == op)
return 0;
return -EACCES;
}
static inline int ctl_perm(ctl_table *table, int op)
{
return test_perm(table->mode, op);
}
 
static int parse_table(int *name, int nlen,
void *oldval, size_t *oldlenp,
void *newval, size_t newlen,
ctl_table *table, void **context)
{
int error;
repeat:
if (!nlen)
return -ENOTDIR;
 
for ( ; table->ctl_name; table++) {
if (get_user(name) == table->ctl_name ||
table->ctl_name == CTL_ANY) {
if (table->child) {
if (ctl_perm(table, 001))
return -EPERM;
if (table->strategy) {
error = table->strategy(
table, name, nlen,
oldval, oldlenp,
newval, newlen, context);
if (error)
return error;
}
name++;
nlen--;
table = table->child;
goto repeat;
}
error = do_sysctl_strategy(table, name, nlen,
oldval, oldlenp,
newval, newlen, context);
return error;
}
};
return -ENOTDIR;
}
 
/* Perform the actual read/write of a sysctl table entry. */
int do_sysctl_strategy (ctl_table *table,
int *name, int nlen,
void *oldval, size_t *oldlenp,
void *newval, size_t newlen, void **context)
{
int op = 0, rc, len;
 
if (oldval)
op |= 004;
if (newval)
op |= 002;
if (ctl_perm(table, op))
if( table->data != &securelevel || current->euid)
return -EPERM;
 
if (table->strategy) {
rc = table->strategy(table, name, nlen, oldval, oldlenp,
newval, newlen, context);
if (rc < 0)
return rc;
if (rc > 0)
return 0;
}
 
/* If there is no strategy routine, or if the strategy returns
* zero, proceed with automatic r/w */
if (table->data && table->maxlen) {
if (oldval && oldlenp && get_user(oldlenp)) {
len = get_user(oldlenp);
if (len > table->maxlen)
len = table->maxlen;
memcpy_tofs(oldval, table->data, len);
put_user(len, oldlenp);
}
if (newval && newlen) {
len = newlen;
if (len > table->maxlen)
len = table->maxlen;
memcpy_fromfs(table->data, newval, len);
}
}
return 0;
}
 
/*
* This function only checks permission for changing the security level
* If the tests are successful, the actual change is done by
* do_sysctl_strategy
*/
static int do_securelevel_strategy (ctl_table *table,
int *name, int nlen,
void *oldval, size_t *oldlenp,
void *newval, size_t newlen, void **context)
{
int level;
 
if (newval && newlen) {
if (newlen != sizeof (int))
return -EINVAL;
memcpy_fromfs (&level, newval, newlen);
if (level < securelevel && current->pid != 1)
return -EPERM;
}
return 0;
}
 
struct ctl_table_header *register_sysctl_table(ctl_table * table,
int insert_at_head)
{
struct ctl_table_header *tmp;
tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
if (!tmp)
return 0;
*tmp = ((struct ctl_table_header) {table, DNODE_NULL});
if (insert_at_head)
DLIST_INSERT_AFTER(&root_table_header, tmp, ctl_entry);
else
DLIST_INSERT_BEFORE(&root_table_header, tmp, ctl_entry);
#ifdef CONFIG_PROC_FS
register_proc_table(table, &proc_sys_root);
#endif
return tmp;
}
 
void unregister_sysctl_table(struct ctl_table_header * header)
{
DLIST_DELETE(header, ctl_entry);
#ifdef CONFIG_PROC_FS
unregister_proc_table(header->ctl_table, &proc_sys_root);
#endif
kfree(header);
}
 
/*
* /proc/sys support
*/
 
#ifdef CONFIG_PROC_FS
 
/* Scan the sysctl entries in table and add them all into /proc */
static void register_proc_table(ctl_table * table, struct proc_dir_entry *root)
{
struct proc_dir_entry *de, *tmp;
int exists;
for (; table->ctl_name; table++) {
exists = 0;
/* Can't do anything without a proc name. */
if (!table->procname)
continue;
/* Maybe we can't do anything with it... */
if (!table->proc_handler &&
!table->child)
continue;
de = kmalloc(sizeof(*de), GFP_KERNEL);
if (!de) continue;
de->namelen = strlen(table->procname);
de->name = table->procname;
de->mode = table->mode;
de->nlink = 1;
de->uid = 0;
de->gid = 0;
de->size = 0;
de->get_info = 0; /* For internal use if we want it */
de->fill_inode = 0; /* To override struct inode fields */
de->next = de->subdir = 0;
de->data = (void *) table;
/* Is it a file? */
if (table->proc_handler) {
de->ops = &proc_sys_inode_operations;
de->mode |= S_IFREG;
}
/* Otherwise it's a subdir */
else {
/* First check to see if it already exists */
for (tmp = root->subdir; tmp; tmp = tmp->next) {
if (tmp->namelen == de->namelen &&
!memcmp(tmp->name,de->name,de->namelen)) {
exists = 1;
kfree (de);
de = tmp;
}
}
if (!exists) {
de->ops = &proc_dir_inode_operations;
de->nlink++;
de->mode |= S_IFDIR;
}
}
table->de = de;
if (!exists)
proc_register_dynamic(root, de);
if (de->mode & S_IFDIR )
register_proc_table(table->child, de);
}
}
 
static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root)
{
struct proc_dir_entry *de;
for (; table->ctl_name; table++) {
if (!(de = table->de))
continue;
if (de->mode & S_IFDIR) {
if (!table->child) {
printk (KERN_ALERT "Help - malformed sysctl tree on free\n");
continue;
}
unregister_proc_table(table->child, de);
}
/* Don't unregister proc directories which still have
entries... */
if (!((de->mode & S_IFDIR) && de->subdir)) {
proc_unregister(root, de->low_ino);
table->de = NULL;
kfree(de);
}
}
}
 
 
static int do_rw_proc(int write, struct inode * inode, struct file * file,
char * buf, int count)
{
int error, op;
struct proc_dir_entry *de;
struct ctl_table *table;
size_t res;
error = verify_area(write ? VERIFY_READ : VERIFY_WRITE, buf, count);
if (error)
return error;
 
de = (struct proc_dir_entry*) inode->u.generic_ip;
if (!de || !de->data)
return -ENOTDIR;
table = (struct ctl_table *) de->data;
if (!table || !table->proc_handler)
return -ENOTDIR;
op = (write ? 002 : 004);
if (ctl_perm(table, op))
return -EPERM;
res = count;
error = (*table->proc_handler) (table, write, file, buf, &res);
if (error)
return error;
return res;
}
 
static int proc_readsys(struct inode * inode, struct file * file,
char * buf, int count)
{
return do_rw_proc(0, inode, file, buf, count);
}
 
static int proc_writesys(struct inode * inode, struct file * file,
const char * buf, int count)
{
return do_rw_proc(1, inode, file, (char *) buf, count);
}
 
static int proc_sys_permission(struct inode *inode, int op)
{
return test_perm(inode->i_mode, op);
}
 
int proc_dostring(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp)
{
int len;
char *p, c;
if (!table->data || !table->maxlen || !*lenp ||
(filp->f_pos && !write)) {
*lenp = 0;
return 0;
}
if (write) {
len = 0;
p = buffer;
while (len < *lenp &&
(c = get_user(p++)) != 0 && c != '\n')
len++;
if (len >= table->maxlen)
len = table->maxlen-1;
memcpy_fromfs(table->data, buffer, len);
((char *) table->data)[len] = 0;
filp->f_pos += *lenp;
} else {
len = strlen(table->data);
if (len > table->maxlen)
len = table->maxlen;
if (len > *lenp)
len = *lenp;
if (len)
memcpy_tofs(buffer, table->data, len);
if (len < *lenp) {
put_user('\n', ((char *) buffer) + len);
len++;
}
*lenp = len;
filp->f_pos += len;
}
return 0;
}
 
int proc_dointvec(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp)
{
int *i, vleft, first=1, len, left, neg, val;
#define TMPBUFLEN 20
char buf[TMPBUFLEN], *p;
if (!table->data || !table->maxlen || !*lenp ||
(filp->f_pos && !write)) {
*lenp = 0;
return 0;
}
i = (int *) table->data;
vleft = table->maxlen / sizeof(int);
left = *lenp;
for (; left && vleft--; i++, first=0) {
if (write) {
while (left && isspace(get_user((char *) buffer)))
left--, ((char *) buffer)++;
if (!left)
break;
neg = 0;
len = left;
if (len > TMPBUFLEN-1)
len = TMPBUFLEN-1;
memcpy_fromfs(buf, buffer, len);
buf[len] = 0;
p = buf;
if (*p == '-' && left > 1) {
neg = 1;
left--, p++;
}
if (*p < '0' || *p > '9')
break;
val = simple_strtoul(p, &p, 0);
len = p-buf;
if ((len < left) && *p && !isspace(*p))
break;
if (neg)
val = -val;
buffer += len;
left -= len;
*i = val;
} else {
p = buf;
if (!first)
*p++ = '\t';
sprintf(p, "%d", *i);
len = strlen(buf);
if (len > left)
len = left;
memcpy_tofs(buffer, buf, len);
left -= len;
buffer += len;
}
}
 
if (!write && !first && left) {
put_user('\n', (char *) buffer);
left--, buffer++;
}
if (write) {
p = (char *) buffer;
while (left && isspace(get_user(p++)))
left--;
}
if (write && first)
return -EINVAL;
*lenp -= left;
filp->f_pos += *lenp;
return 0;
}
 
int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp)
{
int *i, *min, *max, vleft, first=1, len, left, neg, val;
#define TMPBUFLEN 20
char buf[TMPBUFLEN], *p;
if (!table->data || !table->maxlen || !*lenp ||
(filp->f_pos && !write)) {
*lenp = 0;
return 0;
}
i = (int *) table->data;
min = (int *) table->extra1;
max = (int *) table->extra2;
vleft = table->maxlen / sizeof(int);
left = *lenp;
for (; left && vleft--; i++, first=0) {
if (write) {
while (left && isspace(get_user((char *) buffer)))
left--, ((char *) buffer)++;
if (!left)
break;
neg = 0;
len = left;
if (len > TMPBUFLEN-1)
len = TMPBUFLEN-1;
memcpy_fromfs(buf, buffer, len);
buf[len] = 0;
p = buf;
if (*p == '-' && left > 1) {
neg = 1;
left--, p++;
}
if (*p < '0' || *p > '9')
break;
val = simple_strtoul(p, &p, 0);
len = p-buf;
if ((len < left) && *p && !isspace(*p))
break;
if (neg)
val = -val;
buffer += len;
left -= len;
 
if (min && val < *min++)
continue;
if (max && val > *max++)
continue;
*i = val;
} else {
p = buf;
if (!first)
*p++ = '\t';
sprintf(p, "%d", *i);
len = strlen(buf);
if (len > left)
len = left;
memcpy_tofs(buffer, buf, len);
left -= len;
buffer += len;
}
}
 
if (!write && !first && left) {
put_user('\n', (char *) buffer);
left--, buffer++;
}
if (write) {
p = (char *) buffer;
while (left && isspace(get_user(p++)))
left--;
}
if (write && first)
return -EINVAL;
*lenp -= left;
filp->f_pos += *lenp;
return 0;
}
 
#else /* CONFIG_PROC_FS */
 
int proc_dostring(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp)
{
return -ENOSYS;
}
 
int proc_dointvec(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp)
{
return -ENOSYS;
}
 
int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp)
{
return -ENOSYS;
}
 
#endif /* CONFIG_PROC_FS */
 
 
/*
* General sysctl support routines
*/
 
/* The generic string strategy routine: */
int sysctl_string(ctl_table *table, int *name, int nlen,
void *oldval, size_t *oldlenp,
void *newval, size_t newlen, void **context)
{
int l, len;
if (!table->data || !table->maxlen)
return -ENOTDIR;
if (oldval && oldlenp && get_user(oldlenp)) {
len = get_user(oldlenp);
l = strlen(table->data);
if (len > l) len = l;
if (len >= table->maxlen)
len = table->maxlen;
memcpy_tofs(oldval, table->data, len);
put_user(0, ((char *) oldval) + len);
put_user(len, oldlenp);
}
if (newval && newlen) {
len = newlen;
if (len > table->maxlen)
len = table->maxlen;
memcpy_fromfs(table->data, newval, len);
if (len == table->maxlen)
len--;
((char *) table->data)[len] = 0;
}
return 0;
}
 
/*
* This function makes sure that all of the integers in the vector
* are between the minimum and maximum values given in the arrays
* table->extra1 and table->extra2, respectively.
*/
int sysctl_intvec(ctl_table *table, int *name, int nlen,
void *oldval, size_t *oldlenp,
void *newval, size_t newlen, void **context)
{
int i, length, *vec, *min, *max;
 
if (newval && newlen) {
if (newlen % sizeof(int) != 0)
return -EINVAL;
 
if (!table->extra1 && !table->extra2)
return 0;
 
if (newlen > table->maxlen)
newlen = table->maxlen;
length = newlen / sizeof(int);
 
vec = (int *) newval;
min = (int *) table->extra1;
max = (int *) table->extra2;
 
for (i = 0; i < length; i++) {
int value = get_user(vec + i);
if (min && value < min[i])
return -EINVAL;
if (max && value > max[i])
return -EINVAL;
}
}
return 0;
}
 
int do_string (
void *oldval, size_t *oldlenp, void *newval, size_t newlen,
int rdwr, char *data, size_t max)
{
int l = strlen(data) + 1;
if (newval && !rdwr)
return -EPERM;
if (newval && newlen >= max)
return -EINVAL;
if (oldval) {
if (l > get_user(oldlenp))
return -ENOMEM;
put_user(l, oldlenp);
memcpy_tofs(oldval, data, l);
}
if (newval) {
memcpy_fromfs(data, newval, newlen);
data[newlen] = 0;
}
return 0;
}
 
int do_int (
void *oldval, size_t *oldlenp, void *newval, size_t newlen,
int rdwr, int *data)
{
if (newval && !rdwr)
return -EPERM;
if (newval && newlen != sizeof(int))
return -EINVAL;
if (oldval) {
if (get_user(oldlenp) < sizeof(int))
return -ENOMEM;
put_user(sizeof(int), oldlenp);
memcpy_tofs(oldval, data, sizeof(int));
}
if (newval)
memcpy_fromfs(data, newval, sizeof(int));
return 0;
}
 
int do_struct (
void *oldval, size_t *oldlenp, void *newval, size_t newlen,
int rdwr, void *data, size_t len)
{
if (newval && !rdwr)
return -EPERM;
if (newval && newlen != len)
return -EINVAL;
if (oldval) {
if (get_user(oldlenp) < len)
return -ENOMEM;
put_user(len, oldlenp);
memcpy_tofs(oldval, data, len);
}
if (newval)
memcpy_fromfs(data, newval, len);
return 0;
}
 
/module.c
0,0 → 1,818
#include <linux/errno.h>
#include <linux/kernel.h>
#include <asm/segment.h>
#include <linux/mm.h> /* defines GFP_KERNEL */
#include <linux/string.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/malloc.h>
#include <linux/config.h>
#include <asm/pgtable.h>
/*
* Originally by Anonymous (as far as I know...)
* Linux version by Bas Laarhoven <bas@vimec.nl>
* 0.99.14 version by Jon Tombs <jon@gtex02.us.es>,
*
* Heavily modified by Bjorn Ekwall <bj0rn@blox.se> May 1994 (C)
* This source is covered by the GNU GPL, the same as all kernel sources.
*
* Features:
* - Supports stacked modules (removable only of there are no dependents).
* - Supports table of symbols defined by the modules.
* - Supports /proc/ksyms, showing value, name and owner of all
* the symbols defined by all modules (in stack order).
* - Added module dependencies information into /proc/modules
* - Supports redefines of all symbols, for streams-like behaviour.
* - Compatible with older versions of insmod.
*
* New addition in December 1994: (Bjorn Ekwall, idea from Jacques Gelinas)
* - Externally callable function:
*
* "int register_symtab(struct symbol_table *)"
*
* This function can be called from within the kernel,
* and ALSO from loadable modules.
* The goal is to assist in modularizing the kernel even more,
* and finally: reducing the number of entries in ksyms.c
* since every subsystem should now be able to decide and
* control exactly what symbols it wants to export, locally!
*
* On 1-Aug-95: <Matti.Aarnio@utu.fi> altered code to use same style as
* do /proc/net/XXX "files". Namely allow more than 4kB
* (or what the block size is) output.
*
* - Use dummy syscall functions for users who disable all
* module support. Similar to kernel/sys.c (Paul Gortmaker)
*/
 
#ifdef CONFIG_MODULES /* a *big* #ifdef block... */
 
static struct module kernel_module;
static struct module *module_list = &kernel_module;
 
static int freeing_modules; /* true if some modules are marked for deletion */
 
static struct module *find_module( const char *name);
static int get_mod_name( char *user_name, char *buf);
static int free_modules( void);
 
extern struct symbol_table symbol_table; /* in kernel/ksyms.c */
 
/*
* Called at boot time
*/
void init_modules(void) {
struct internal_symbol *sym;
int i;
 
for (i = 0, sym = symbol_table.symbol; sym->name; ++sym, ++i)
;
symbol_table.n_symbols = i;
 
kernel_module.symtab = &symbol_table;
kernel_module.state = MOD_RUNNING; /* Hah! */
kernel_module.name = "";
}
 
/*
* Allocate space for a module.
*/
asmlinkage unsigned long
sys_create_module(char *module_name, unsigned long size)
{
struct module *mp;
void* addr;
int error;
int npages;
int sspace = sizeof(struct module) + MOD_MAX_NAME;
char name[MOD_MAX_NAME];
 
if (!suser() || securelevel > 0)
return -EPERM;
if (module_name == NULL || size == 0)
return -EINVAL;
if ((error = get_mod_name(module_name, name)) != 0)
return error;
if (find_module(name) != NULL) {
return -EEXIST;
}
 
if ((mp = (struct module*) kmalloc(sspace, GFP_KERNEL)) == NULL) {
return -ENOMEM;
}
strcpy((char *)(mp + 1), name); /* why not? */
 
npages = (size + sizeof (long) + PAGE_SIZE - 1) / PAGE_SIZE;
if ((addr = vmalloc(npages * PAGE_SIZE)) == 0) {
kfree_s(mp, sspace);
return -ENOMEM;
}
 
mp->next = module_list;
mp->ref = NULL;
mp->symtab = NULL;
mp->name = (char *)(mp + 1);
mp->size = npages;
mp->addr = addr;
mp->state = MOD_UNINITIALIZED;
mp->cleanup = NULL;
 
* (long *) addr = 0; /* set use count to zero */
module_list = mp; /* link it in */
 
pr_debug("module `%s' (%lu pages @ 0x%08lx) created\n",
mp->name, (unsigned long) mp->size, (unsigned long) mp->addr);
return (unsigned long) addr;
}
 
/*
* Initialize a module.
*/
asmlinkage int
sys_init_module(char *module_name, char *code, unsigned codesize,
struct mod_routines *routines,
struct symbol_table *symtab)
{
struct module *mp;
struct symbol_table *newtab;
char name[MOD_MAX_NAME];
int error;
struct mod_routines rt;
 
if (!suser() || securelevel > 0)
return -EPERM;
 
#ifdef __i386__
/* A little bit of protection... we "know" where the user stack is... */
 
if (symtab && ((unsigned long)symtab > 0xb0000000)) {
printk(KERN_WARNING "warning: you are using an old insmod, no symbols will be inserted!\n");
symtab = NULL;
}
#endif
if ((error = get_mod_name(module_name, name)) != 0)
return error;
pr_debug("initializing module `%s', %d (0x%x) bytes\n",
name, codesize, codesize);
memcpy_fromfs(&rt, routines, sizeof rt);
if ((mp = find_module(name)) == NULL)
return -ENOENT;
if (codesize & MOD_AUTOCLEAN) {
/*
* set autoclean marker from codesize...
* set usage count to "zero"
*/
codesize &= ~MOD_AUTOCLEAN;
GET_USE_COUNT(mp) = MOD_AUTOCLEAN;
}
if ((codesize + sizeof (long) + PAGE_SIZE - 1) / PAGE_SIZE > mp->size)
return -EINVAL;
memcpy_fromfs((char *)mp->addr + sizeof (long), code, codesize);
memset((char *)mp->addr + sizeof (long) + codesize, 0,
mp->size * PAGE_SIZE - (codesize + sizeof (long)));
pr_debug("module init entry = 0x%08lx, cleanup entry = 0x%08lx\n",
(unsigned long) rt.init, (unsigned long) rt.cleanup);
mp->cleanup = rt.cleanup;
 
/* update kernel symbol table */
if (symtab) { /* symtab == NULL means no new entries to handle */
struct internal_symbol *sym;
struct module_ref *ref;
int size;
int i;
int legal_start;
 
if ((error = verify_area(VERIFY_READ, &symtab->size, sizeof(symtab->size))))
return error;
size = get_user(&symtab->size);
 
if ((newtab = (struct symbol_table*) kmalloc(size, GFP_KERNEL)) == NULL) {
return -ENOMEM;
}
 
if ((error = verify_area(VERIFY_READ, symtab, size))) {
kfree_s(newtab, size);
return error;
}
memcpy_fromfs((char *)(newtab), symtab, size);
 
/* sanity check */
legal_start = sizeof(struct symbol_table) +
newtab->n_symbols * sizeof(struct internal_symbol) +
newtab->n_refs * sizeof(struct module_ref);
 
if ((newtab->n_symbols < 0) || (newtab->n_refs < 0) || (legal_start > size)) {
printk(KERN_WARNING "Rejecting illegal symbol table (n_symbols=%d,n_refs=%d)\n",
newtab->n_symbols, newtab->n_refs);
kfree_s(newtab, size);
return -EINVAL;
}
 
/* relocate name pointers, index referred from start of table */
for (sym = &(newtab->symbol[0]), i = 0; i < newtab->n_symbols; ++sym, ++i) {
if ((unsigned long)sym->name < legal_start || size <= (unsigned long)sym->name) {
printk(KERN_WARNING "Rejecting illegal symbol table\n");
kfree_s(newtab, size);
return -EINVAL;
}
/* else */
sym->name += (long)newtab;
}
mp->symtab = newtab;
 
/* Update module references.
* On entry, from "insmod", ref->module points to
* the referenced module!
* Now it will point to the current module instead!
* The ref structure becomes the first link in the linked
* list of references to the referenced module.
* Also, "sym" from above, points to the first ref entry!!!
*/
for (ref = (struct module_ref *)sym, i = 0;
i < newtab->n_refs; ++ref, ++i) {
 
/* Check for valid reference */
struct module *link = module_list;
while (link && (ref->module != link))
link = link->next;
 
if (link == (struct module *)0) {
printk(KERN_WARNING "Non-module reference! Rejected!\n");
return -EINVAL;
}
 
ref->next = ref->module->ref;
ref->module->ref = ref;
ref->module = mp;
}
}
 
flush_pages_to_ram((unsigned long)mp->addr,
(codesize+sizeof(long)+PAGE_SIZE-1)/PAGE_SIZE);
 
GET_USE_COUNT(mp) += 1;
if ((*rt.init)() != 0) {
GET_USE_COUNT(mp) = 0;
return -EBUSY;
}
GET_USE_COUNT(mp) -= 1;
mp->state = MOD_RUNNING;
 
return 0;
}
 
asmlinkage int
sys_delete_module(char *module_name)
{
struct module *mp;
char name[MOD_MAX_NAME];
int error;
 
if (!suser() || securelevel > 0)
return -EPERM;
/* else */
if (module_name != NULL) {
if ((error = get_mod_name(module_name, name)) != 0)
return error;
if ((mp = find_module(name)) == NULL)
return -ENOENT;
if ((mp->ref != NULL) ||
((GET_USE_COUNT(mp) & ~(MOD_AUTOCLEAN | MOD_VISITED)) != 0))
return -EBUSY;
GET_USE_COUNT(mp) &= ~(MOD_AUTOCLEAN | MOD_VISITED);
if (mp->state == MOD_RUNNING)
(*mp->cleanup)();
mp->state = MOD_DELETED;
free_modules();
}
/* for automatic reaping */
else {
struct module *mp_next;
for (mp = module_list; mp != &kernel_module; mp = mp_next) {
mp_next = mp->next;
if ((mp->ref == NULL) && (mp->state == MOD_RUNNING) &&
((GET_USE_COUNT(mp) & ~MOD_VISITED) == MOD_AUTOCLEAN)) {
if ((GET_USE_COUNT(mp) & MOD_VISITED)) {
/* Don't reap until one "cycle" after last _use_ */
GET_USE_COUNT(mp) &= ~MOD_VISITED;
}
else {
GET_USE_COUNT(mp) &= ~(MOD_AUTOCLEAN | MOD_VISITED);
(*mp->cleanup)();
mp->state = MOD_DELETED;
free_modules();
}
}
}
}
return 0;
}
 
 
/*
* Copy the kernel symbol table to user space. If the argument is null,
* just return the size of the table.
*
* Note that the transient module symbols are copied _first_,
* in lifo order!!!
*
* The symbols to "insmod" are according to the "old" format: struct kernel_sym,
* which is actually quite handy for this purpose.
* Note that insmod inserts a struct symbol_table later on...
* (as that format is quite handy for the kernel...)
*
* For every module, the first (pseudo)symbol copied is the module name
* and the address of the module struct.
* This lets "insmod" keep track of references, and build the array of
* struct module_refs in the symbol table.
* The format of the module name is "#module", so that "insmod" can easily
* notice when a module name comes along. Also, this will make it possible
* to use old versions of "insmod", albeit with reduced functionality...
* The "kernel" module has an empty name.
*/
asmlinkage int
sys_get_kernel_syms(struct kernel_sym *table)
{
struct internal_symbol *from;
struct kernel_sym isym;
struct kernel_sym *to;
struct module *mp = module_list;
int i;
int nmodsyms = 0;
 
for (mp = module_list; mp; mp = mp->next) {
if (mp->symtab && mp->symtab->n_symbols) {
/* include the count for the module name! */
nmodsyms += mp->symtab->n_symbols + 1;
}
else
/* include the count for the module name! */
nmodsyms += 1; /* return modules without symbols too */
}
 
if (table != NULL) {
to = table;
 
if ((i = verify_area(VERIFY_WRITE, to, nmodsyms * sizeof(*table))))
return i;
 
/* copy all module symbols first (always LIFO order) */
for (mp = module_list; mp; mp = mp->next) {
if (mp->state == MOD_RUNNING) {
/* magic: write module info as a pseudo symbol */
isym.value = (unsigned long)mp;
sprintf(isym.name, "#%s", mp->name);
memcpy_tofs(to, &isym, sizeof isym);
++to;
 
if (mp->symtab != NULL) {
for (i = mp->symtab->n_symbols,
from = mp->symtab->symbol;
i > 0; --i, ++from, ++to) {
 
isym.value = (unsigned long)from->addr;
strncpy(isym.name, from->name, sizeof isym.name);
memcpy_tofs(to, &isym, sizeof isym);
}
}
}
}
}
 
return nmodsyms;
}
 
 
/*
* Copy the name of a module from user space.
*/
int
get_mod_name(char *user_name, char *buf)
{
int i;
 
i = 0;
for (i = 0 ; (buf[i] = get_user(user_name + i)) != '\0' ; ) {
if (++i >= MOD_MAX_NAME)
return -E2BIG;
}
return 0;
}
 
 
/*
* Look for a module by name, ignoring modules marked for deletion.
*/
struct module *
find_module( const char *name)
{
struct module *mp;
 
for (mp = module_list ; mp ; mp = mp->next) {
if (mp->state == MOD_DELETED)
continue;
if (!strcmp(mp->name, name))
break;
}
return mp;
}
 
static void
drop_refs(struct module *mp)
{
struct module *step;
struct module_ref *prev;
struct module_ref *ref;
 
for (step = module_list; step; step = step->next) {
for (prev = ref = step->ref; ref; ref = prev->next) {
if (ref->module == mp) {
if (ref == step->ref)
step->ref = ref->next;
else
prev->next = ref->next;
break; /* every module only references once! */
}
else
prev = ref;
}
}
}
 
/*
* Try to free modules which have been marked for deletion. Returns nonzero
* if a module was actually freed.
*/
int
free_modules( void)
{
struct module *mp;
struct module **mpp;
int did_deletion;
 
did_deletion = 0;
freeing_modules = 0;
mpp = &module_list;
while ((mp = *mpp) != NULL) {
if (mp->state != MOD_DELETED) {
mpp = &mp->next;
} else {
if ((GET_USE_COUNT(mp) != 0) || (mp->ref != NULL)) {
freeing_modules = 1;
mpp = &mp->next;
} else { /* delete it */
*mpp = mp->next;
if (mp->symtab) {
if (mp->symtab->n_refs)
drop_refs(mp);
if (mp->symtab->size)
kfree_s(mp->symtab, mp->symtab->size);
}
vfree(mp->addr);
kfree_s(mp, sizeof(struct module) + MOD_MAX_NAME);
did_deletion = 1;
}
}
}
return did_deletion;
}
 
 
/*
* Called by the /proc file system to return a current list of modules.
*/
int get_module_list(char *buf)
{
char *p;
const char *q;
int i;
struct module *mp;
struct module_ref *ref;
char size[32];
 
p = buf;
/* Do not show the kernel pseudo module */
for (mp = module_list ; mp && mp->next; mp = mp->next) {
if (p - buf > 4096 - 100)
break; /* avoid overflowing buffer */
q = mp->name;
if (*q == '\0' && mp->size == 0 && mp->ref == NULL)
continue; /* don't list modules for kernel syms */
i = 20;
while (*q) {
*p++ = *q++;
i--;
}
sprintf(size, "%d", mp->size);
i -= strlen(size);
if (i <= 0)
i = 1;
while (--i >= 0)
*p++ = ' ';
q = size;
while (*q)
*p++ = *q++;
if (mp->state == MOD_UNINITIALIZED)
q = " (uninitialized)";
else if (mp->state == MOD_RUNNING)
q = "";
else if (mp->state == MOD_DELETED)
q = " (deleted)";
else
q = " (bad state)";
while (*q)
*p++ = *q++;
 
*p++ = '\t';
if ((ref = mp->ref) != NULL) {
*p++ = '[';
for (; ref; ref = ref->next) {
q = ref->module->name;
while (*q)
*p++ = *q++;
if (ref->next)
*p++ = ' ';
}
*p++ = ']';
}
if (mp->state == MOD_RUNNING) {
sprintf(size,"\t%ld%s",
GET_USE_COUNT(mp) & ~(MOD_AUTOCLEAN | MOD_VISITED),
((GET_USE_COUNT(mp) & MOD_AUTOCLEAN)?
" (autoclean)":""));
q = size;
while (*q)
*p++ = *q++;
}
*p++ = '\n';
}
return p - buf;
}
 
 
/*
* Called by the /proc file system to return a current list of ksyms.
*/
int get_ksyms_list(char *buf, char **start, off_t offset, int length)
{
struct module *mp;
struct internal_symbol *sym;
int i;
char *p = buf;
int len = 0; /* code from net/ipv4/proc.c */
off_t pos = 0;
off_t begin = 0;
 
for (mp = module_list; mp; mp = mp->next) {
if ((mp->state == MOD_RUNNING) &&
(mp->symtab != NULL) &&
(mp->symtab->n_symbols > 0)) {
for (i = mp->symtab->n_symbols,
sym = mp->symtab->symbol;
i > 0; --i, ++sym) {
 
p = buf + len;
if (mp->name[0]) {
len += sprintf(p, "%08lx %s\t[%s]\n",
(long)sym->addr,
sym->name, mp->name);
} else {
len += sprintf(p, "%08lx %s\n",
(long)sym->addr,
sym->name);
}
pos = begin + len;
if (pos < offset) {
len = 0;
begin = pos;
}
pos = begin + len;
if (pos > offset+length)
goto leave_the_loop;
}
}
}
leave_the_loop:
*start = buf + (offset - begin);
len -= (offset - begin);
if (len > length)
len = length;
return len;
}
 
/*
* Gets the address for a symbol in the given module. If modname is
* NULL, it looks for the name in any registered symbol table. If the
* modname is an empty string, it looks for the symbol in kernel exported
* symbol tables.
*/
void *get_module_symbol(char *modname, char *symname)
{
struct module *mp;
struct internal_symbol *sym;
int i;
 
for (mp = module_list; mp; mp = mp->next) {
if (((modname == NULL) || (strcmp(mp->name, modname) == 0)) &&
(mp->state == MOD_RUNNING) &&
(mp->symtab != NULL) &&
(mp->symtab->n_symbols > 0)) {
for (i = mp->symtab->n_symbols,
sym = mp->symtab->symbol;
i > 0; --i, ++sym) {
 
if (strcmp(sym->name, symname) == 0) {
return sym->addr;
}
}
}
}
return NULL;
}
 
/*
* Rules:
* - The new symbol table should be statically allocated, or else you _have_
* to set the "size" field of the struct to the number of bytes allocated.
*
* - The strings that name the symbols will not be copied, maybe the pointers
*
* - For a loadable module, the function should only be called in the
* context of init_module
*
* Those are the only restrictions! (apart from not being reentrant...)
*
* If you want to remove a symbol table for a loadable module,
* the call looks like: "register_symtab(0)".
*
* The look of the code is mostly dictated by the format of
* the frozen struct symbol_table, due to compatibility demands.
*/
#define INTSIZ sizeof(struct internal_symbol)
#define REFSIZ sizeof(struct module_ref)
#define SYMSIZ sizeof(struct symbol_table)
#define MODSIZ sizeof(struct module)
static struct symbol_table nulltab;
 
int
register_symtab_from(struct symbol_table *intab, long *from)
{
struct module *mp;
struct module *link;
struct symbol_table *oldtab;
struct symbol_table *newtab;
struct module_ref *newref;
int size;
 
if (intab && (intab->n_symbols == 0)) {
struct internal_symbol *sym;
/* How many symbols, really? */
 
for (sym = intab->symbol; sym->name; ++sym)
intab->n_symbols +=1;
}
 
for (mp = module_list; mp != &kernel_module; mp = mp->next) {
/*
* "from" points to "mod_use_count_" (== start of module)
* or is == 0 if called from a non-module
*/
if ((unsigned long)(mp->addr) == (unsigned long)from)
break;
}
 
if (mp == &kernel_module) {
/* Aha! Called from an "internal" module */
if (!intab)
return 0; /* or -ESILLY_PROGRAMMER :-) */
 
/* create a pseudo module! */
if (!(mp = (struct module*) kmalloc(MODSIZ, GFP_KERNEL))) {
/* panic time! */
printk(KERN_ERR "Out of memory for new symbol table!\n");
return -ENOMEM;
}
/* else OK */
memset(mp, 0, MODSIZ);
mp->state = MOD_RUNNING; /* Since it is resident... */
mp->name = ""; /* This is still the "kernel" symbol table! */
mp->symtab = intab;
 
/* link it in _after_ the resident symbol table */
mp->next = kernel_module.next;
kernel_module.next = mp;
 
return 0;
}
 
/* else ******** Called from a loadable module **********/
 
/*
* This call should _only_ be done in the context of the
* call to init_module i.e. when loading the module!!
* Or else...
*/
 
/* Any table there before? */
if ((oldtab = mp->symtab) == (struct symbol_table*)0) {
/* No, just insert it! */
mp->symtab = intab;
return 0;
}
 
/* else ****** we have to replace the module symbol table ******/
 
if (oldtab->n_refs == 0) { /* no problems! */
mp->symtab = intab;
/* if the old table was kmalloc-ed, drop it */
if (oldtab->size > 0)
kfree_s(oldtab, oldtab->size);
 
return 0;
}
 
/* else */
/***** The module references other modules... insmod said so! *****/
/* We have to allocate a new symbol table, or we lose them! */
if (intab == (struct symbol_table*)0)
intab = &nulltab; /* easier code with zeroes in place */
 
/* the input symbol table space does not include the string table */
/* (it does for symbol tables that insmod creates) */
 
if (!(newtab = (struct symbol_table*)kmalloc(
size = SYMSIZ + intab->n_symbols * INTSIZ +
oldtab->n_refs * REFSIZ,
GFP_KERNEL))) {
/* panic time! */
printk(KERN_ERR "Out of memory for new symbol table!\n");
return -ENOMEM;
}
 
/* copy up to, and including, the new symbols */
memcpy(newtab, intab, SYMSIZ + intab->n_symbols * INTSIZ);
 
newtab->size = size;
newtab->n_refs = oldtab->n_refs;
 
/* copy references */
memcpy( ((char *)newtab) + SYMSIZ + intab->n_symbols * INTSIZ,
((char *)oldtab) + SYMSIZ + oldtab->n_symbols * INTSIZ,
oldtab->n_refs * REFSIZ);
 
/* relink references from the old table to the new one */
 
/* pointer to the first reference entry in newtab! Really! */
newref = (struct module_ref*) &(newtab->symbol[newtab->n_symbols]);
 
/* check for reference links from previous modules */
for ( link = module_list;
link && (link != &kernel_module);
link = link->next) {
 
if (link->ref && (link->ref->module == mp))
link->ref = newref++;
}
 
mp->symtab = newtab;
 
/* all references (if any) have been handled */
 
/* if the old table was kmalloc-ed, drop it */
if (oldtab->size > 0)
kfree_s(oldtab, oldtab->size);
 
return 0;
}
 
#else /* CONFIG_MODULES */
 
/* Dummy syscalls for people who don't want modules */
 
asmlinkage unsigned long sys_create_module(void)
{
return -ENOSYS;
}
 
asmlinkage int sys_init_module(void)
{
return -ENOSYS;
}
 
asmlinkage int sys_delete_module(void)
{
return -ENOSYS;
}
 
asmlinkage int sys_get_kernel_syms(void)
{
return -ENOSYS;
}
 
int register_symtab_from(struct symbol_table *intab, long *from)
{
return 0;
}
 
#endif /* CONFIG_MODULES */
 
/sched.c
0,0 → 1,1816
/*
* linux/kernel/sched.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
* 1996-04-21 Modified by Ulrich Windl to make NTP work
* 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and
* make semaphores SMP safe
* 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better.
* 1997-09-10 Updated NTP code according to technical memorandum Jan '96
* "A Kernel Model for Precision Timekeeping" by Dave Mills
*/
 
/*
* 'sched.c' is the main kernel file. It contains scheduling primitives
* (sleep_on, wakeup, schedule etc) as well as a number of simple system
* call functions (type getpid()), which just extract a field from
* current-task
*/
 
/*
* uClinux revisions for NO_MM
* Copyright (C) 1998 Kenneth Albanowski <kjahds@kjahds.com>,
* The Silver Hammer Group, Ltd.
*/
 
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/kernel.h>
#include <linux/kernel_stat.h>
#include <linux/fdreg.h>
#include <linux/errno.h>
#include <linux/time.h>
#include <linux/ptrace.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/tqueue.h>
#include <linux/resource.h>
#include <linux/mm.h>
#include <linux/smp.h>
 
#include <asm/system.h>
#include <asm/io.h>
#include <asm/segment.h>
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
 
#include <linux/timex.h>
 
/* SIMON - I don't know why the fuck this prototype can't be in header but it won't work */
extern void switch_to(struct task_struct *prev, struct task_struct *next);
 
 
/*
* kernel variables
*/
 
int securelevel = 0; /* system security level */
 
long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */
volatile struct timeval xtime; /* The current time */
int tickadj = 500/HZ ? 500/HZ : 1; /* microsecs */
 
DECLARE_TASK_QUEUE(tq_timer);
DECLARE_TASK_QUEUE(tq_immediate);
DECLARE_TASK_QUEUE(tq_scheduler);
 
/*
* phase-lock loop variables
*/
/* TIME_ERROR prevents overwriting the CMOS clock */
int time_state = TIME_ERROR; /* clock synchronization status */
int time_status = STA_UNSYNC; /* clock status bits */
long time_offset = 0; /* time adjustment (us) */
long time_constant = 2; /* pll time constant */
long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */
long time_precision = 1; /* clock precision (us) */
long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */
long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */
long time_phase = 0; /* phase offset (scaled us) */
long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC; /* frequency offset (scaled ppm) */
long time_adj = 0; /* tick adjust (scaled 1 / HZ) */
long time_reftime = 0; /* time at last adjustment (s) */
 
long time_adjust = 0;
long time_adjust_step = 0;
 
int need_resched = 0;
unsigned long event = 0;
 
extern int _setitimer(int, struct itimerval *, struct itimerval *);
unsigned int * prof_buffer = NULL;
unsigned long prof_len = 0;
unsigned long prof_shift = 0;
 
#define _S(nr) (1<<((nr)-1))
 
extern void mem_use(void);
 
unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
#ifndef NO_MM
unsigned long init_user_stack[1024] = { STACK_MAGIC, };
static struct vm_area_struct init_mmap = INIT_MMAP;
#endif /* !NO_MM */
static struct fs_struct init_fs = INIT_FS;
static struct files_struct init_files = INIT_FILES;
static struct signal_struct init_signals = INIT_SIGNALS;
 
struct mm_struct init_mm = INIT_MM;
struct task_struct init_task = INIT_TASK;
 
unsigned long volatile jiffies=0;
 
struct task_struct *current_set[NR_CPUS];
struct task_struct *last_task_used_math = NULL;
 
struct task_struct * task[NR_TASKS] = {&init_task, };
 
struct kernel_stat kstat = { 0 };
 
static inline void add_to_runqueue(struct task_struct * p)
{
#ifdef __SMP__
int cpu=smp_processor_id();
#endif
#if 1 /* sanity tests */
if (p->next_run || p->prev_run) {
printk("task already on run-queue\n");
return;
}
#endif
if (p->policy != SCHED_OTHER || p->counter > current->counter + 3)
need_resched = 1;
nr_running++;
(p->prev_run = init_task.prev_run)->next_run = p;
p->next_run = &init_task;
init_task.prev_run = p;
#ifdef __SMP__
/* this is safe only if called with cli()*/
while(set_bit(31,&smp_process_available))
{
while(test_bit(31,&smp_process_available))
{
if(clear_bit(cpu,&smp_invalidate_needed))
{
local_flush_tlb();
set_bit(cpu,&cpu_callin_map[0]);
}
}
}
smp_process_available++;
clear_bit(31,&smp_process_available);
if ((0!=p->pid) && smp_threads_ready)
{
int i;
for (i=0;i<smp_num_cpus;i++)
{
if (0==current_set[cpu_logical_map[i]]->pid)
{
smp_message_pass(cpu_logical_map[i], MSG_RESCHEDULE, 0L, 0);
break;
}
}
}
#endif
}
 
static inline void del_from_runqueue(struct task_struct * p)
{
struct task_struct *next = p->next_run;
struct task_struct *prev = p->prev_run;
 
#if 1 /* sanity tests */
if (!next || !prev) {
printk("task not on run-queue\n");
return;
}
#endif
if (p == &init_task) {
static int nr = 0;
if (nr < 5) {
nr++;
printk("idle task may not sleep\n");
}
return;
}
nr_running--;
next->prev_run = prev;
prev->next_run = next;
p->next_run = NULL;
p->prev_run = NULL;
}
 
static inline void move_last_runqueue(struct task_struct * p)
{
struct task_struct *next = p->next_run;
struct task_struct *prev = p->prev_run;
 
/* remove from list */
next->prev_run = prev;
prev->next_run = next;
/* add back to list */
p->next_run = &init_task;
prev = init_task.prev_run;
init_task.prev_run = p;
p->prev_run = prev;
prev->next_run = p;
}
 
/*
* Wake up a process. Put it on the run-queue if it's not
* already there. The "current" process is always on the
* run-queue (except when the actual re-schedule is in
* progress), and as such you're allowed to do the simpler
* "current->state = TASK_RUNNING" to mark yourself runnable
* without the overhead of this.
*/
inline void wake_up_process(struct task_struct * p)
{
unsigned long flags;
 
save_flags(flags);
cli();
p->state = TASK_RUNNING;
if (!p->next_run)
add_to_runqueue(p);
restore_flags(flags);
}
 
static void process_timeout(unsigned long __data)
{
struct task_struct * p = (struct task_struct *) __data;
 
p->timeout = 0;
wake_up_process(p);
}
 
/*
* This is the function that decides how desirable a process is..
* You can weigh different processes against each other depending
* on what CPU they've run on lately etc to try to handle cache
* and TLB miss penalties.
*
* Return values:
* -1000: never select this
* 0: out of time, recalculate counters (but it might still be
* selected)
* +ve: "goodness" value (the larger, the better)
* +1000: realtime process, select this.
*/
static inline int goodness(struct task_struct * p, struct task_struct * prev, int this_cpu)
{
int weight;
 
#ifdef __SMP__
/* We are not permitted to run a task someone else is running */
if (p->processor != NO_PROC_ID)
return -1000;
#ifdef PAST_2_0
/* This process is locked to a processor group */
if (p->processor_mask && !(p->processor_mask & (1<<this_cpu))
return -1000;
#endif
#endif
 
/*
* Realtime process, select the first one on the
* runqueue (taking priorities within processes
* into account).
*/
if (p->policy != SCHED_OTHER)
return 1000 + p->rt_priority;
 
/*
* Give the process a first-approximation goodness value
* according to the number of clock-ticks it has left.
*
* Don't do any other calculations if the time slice is
* over..
*/
weight = p->counter;
if (weight) {
#ifdef __SMP__
/* Give a largish advantage to the same processor... */
/* (this is equivalent to penalizing other processors) */
if (p->last_processor == this_cpu)
weight += PROC_CHANGE_PENALTY;
#endif
 
/* .. and a slight advantage to the current process */
if (p == prev)
weight += 1;
}
 
return weight;
}
 
 
/*
The following allow_interrupts function is used to workaround a rare but
nasty deadlock situation that is possible for 2.0.x Intel SMP because it uses
a single kernel lock and interrupts are only routed to the boot CPU. There
are two deadlock scenarios this code protects against.
 
The first scenario is that if a CPU other than the boot CPU holds the kernel
lock and needs to wait for an operation to complete that itself requires an
interrupt, there is a deadlock since the boot CPU may be able to accept the
interrupt but will not be able to acquire the kernel lock to process it.
 
The workaround for this deadlock requires adding calls to allow_interrupts to
places where this deadlock is possible. These places are known to be present
in buffer.c and keyboard.c. It is also possible that there are other such
places which have not been identified yet. In order to break the deadlock,
the code in allow_interrupts temporarily yields the kernel lock directly to
the boot CPU to allow the interrupt to be processed. The boot CPU interrupt
entry code indicates that it is spinning waiting for the kernel lock by
setting the smp_blocked_interrupt_pending variable. This code notices that
and manipulates the active_kernel_processor variable to yield the kernel lock
without ever clearing it. When the interrupt has been processed, the
saved_active_kernel_processor variable contains the value for the interrupt
exit code to restore, either the APICID of the CPU that granted it the kernel
lock, or NO_PROC_ID in the normal case where no yielding occurred. Restoring
active_kernel_processor from saved_active_kernel_processor returns the kernel
lock back to the CPU that yielded it.
 
The second form of deadlock is even more insidious. Suppose the boot CPU
takes a page fault and then the previous scenario ensues. In this case, the
boot CPU would spin with interrupts disabled waiting to acquire the kernel
lock. To resolve this deadlock, the kernel lock acquisition code must enable
interrupts briefly so that the pending interrupt can be handled as in the
case above.
 
An additional form of deadlock is where kernel code running on a non-boot CPU
waits for the jiffies variable to be incremented. This deadlock is avoided
by having the spin loops in ENTER_KERNEL increment jiffies approximately
every 10 milliseconds. Finally, if approximately 60 seconds elapse waiting
for the kernel lock, a message will be printed if possible to indicate that a
deadlock has been detected.
 
Leonard N. Zubkoff
4 August 1997
*/
 
#if defined(__SMP__) && defined(__i386__)
 
volatile unsigned char smp_blocked_interrupt_pending = 0;
 
volatile unsigned char saved_active_kernel_processor = NO_PROC_ID;
 
void allow_interrupts(void)
{
if (smp_processor_id() == boot_cpu_id) return;
if (smp_blocked_interrupt_pending)
{
unsigned long saved_kernel_counter;
long timeout_counter;
saved_active_kernel_processor = active_kernel_processor;
saved_kernel_counter = kernel_counter;
kernel_counter = 0;
active_kernel_processor = boot_cpu_id;
timeout_counter = 6000000;
while (active_kernel_processor != saved_active_kernel_processor &&
--timeout_counter >= 0)
{
udelay(10);
barrier();
}
if (timeout_counter < 0)
panic("FORWARDED INTERRUPT TIMEOUT (AKP = %d, Saved AKP = %d)\n",
active_kernel_processor, saved_active_kernel_processor);
kernel_counter = saved_kernel_counter;
saved_active_kernel_processor = NO_PROC_ID;
}
}
 
#else
 
void allow_interrupts(void) {}
 
#endif
 
 
/*
* 'schedule()' is the scheduler function. It's a very simple and nice
* scheduler: it's not perfect, but certainly works for most things.
*
* The goto is "interesting".
*
* NOTE!! Task 0 is the 'idle' task, which gets called when no other
* tasks can run. It can not be killed, and it cannot sleep. The 'state'
* information in task[0] is never used.
*/
asmlinkage void schedule(void)
{
int c;
struct task_struct * p;
struct task_struct * prev, * next;
unsigned long timeout = 0;
int this_cpu=smp_processor_id();
/* check alarm, wake up any interruptible tasks that have got a signal */
 
allow_interrupts();
 
if (intr_count)
goto scheduling_in_interrupt;
 
if (bh_active & bh_mask) {
intr_count = 1;
do_bottom_half();
intr_count = 0;
}
 
run_task_queue(&tq_scheduler);
 
need_resched = 0;
prev = current;
cli();
/* move an exhausted RR process to be last.. */
if (!prev->counter && prev->policy == SCHED_RR) {
prev->counter = prev->priority;
move_last_runqueue(prev);
}
switch (prev->state) {
case TASK_INTERRUPTIBLE:
if (prev->signal & ~prev->blocked)
goto makerunnable;
timeout = prev->timeout;
if (timeout && (timeout <= jiffies)) {
prev->timeout = 0;
timeout = 0;
makerunnable:
prev->state = TASK_RUNNING;
break;
}
default:
del_from_runqueue(prev);
case TASK_RUNNING:
}
p = init_task.next_run;
sti();
#ifdef __SMP__
/*
* This is safe as we do not permit re-entry of schedule()
*/
prev->processor = NO_PROC_ID;
#define idle_task (task[cpu_number_map[this_cpu]])
#else
#define idle_task (&init_task)
#endif
 
/*
* Note! there may appear new tasks on the run-queue during this, as
* interrupts are enabled. However, they will be put on front of the
* list, so our list starting at "p" is essentially fixed.
*/
/* this is the scheduler proper: */
c = -1000;
next = idle_task;
while (p != &init_task) {
int weight = goodness(p, prev, this_cpu);
if (weight > c)
c = weight, next = p;
p = p->next_run;
}
 
/* if all runnable processes have "counter == 0", re-calculate counters */
if (!c) {
for_each_task(p)
p->counter = (p->counter >> 1) + p->priority;
}
#ifdef __SMP__
/*
* Allocate process to CPU
*/
next->processor = this_cpu;
next->last_processor = this_cpu;
#endif
#ifdef __SMP_PROF__
/* mark processor running an idle thread */
if (0==next->pid)
set_bit(this_cpu,&smp_idle_map);
else
clear_bit(this_cpu,&smp_idle_map);
#endif
if (prev != next) {
struct timer_list timer;
 
kstat.context_swtch++;
if (timeout) {
init_timer(&timer);
timer.expires = timeout;
timer.data = (unsigned long) prev;
timer.function = process_timeout;
add_timer(&timer);
}
get_mmu_context(next);
switch_to(prev,next);
if (timeout)
del_timer(&timer);
}
return;
 
scheduling_in_interrupt:
printk("Aiee: scheduling in interrupt %p\n",
__builtin_return_address(0));
}
 
#ifndef __alpha__
 
/*
* For backwards compatibility? This can be done in libc so Alpha
* and all newer ports shouldn't need it.
*/
asmlinkage int sys_pause(void)
{
current->state = TASK_INTERRUPTIBLE;
schedule();
return -ERESTARTNOHAND;
}
 
#endif
 
/*
* wake_up doesn't wake up stopped processes - they have to be awakened
* with signals or similar.
*
* Note that this doesn't need cli-sti pairs: interrupts may not change
* the wait-queue structures directly, but only call wake_up() to wake
* a process. The process itself must remove the queue once it has woken.
*/
void wake_up(struct wait_queue **q)
{
struct wait_queue *next;
struct wait_queue *head;
 
if (!q || !(next = *q))
return;
head = WAIT_QUEUE_HEAD(q);
while (next != head) {
struct task_struct *p = next->task;
next = next->next;
if (p != NULL) {
if ((p->state == TASK_UNINTERRUPTIBLE) ||
(p->state == TASK_INTERRUPTIBLE))
wake_up_process(p);
}
if (!next)
goto bad;
}
return;
bad:
printk("wait_queue is bad (eip = %p)\n",
__builtin_return_address(0));
printk(" q = %p\n",q);
printk(" *q = %p\n",*q);
}
 
void wake_up_interruptible(struct wait_queue **q)
{
struct wait_queue *next;
struct wait_queue *head;
 
if (!q || !(next = *q))
return;
head = WAIT_QUEUE_HEAD(q);
while (next != head) {
struct task_struct *p = next->task;
next = next->next;
if (p != NULL) {
if (p->state == TASK_INTERRUPTIBLE)
wake_up_process(p);
}
if (!next)
goto bad;
}
return;
bad:
printk("wait_queue is bad (eip = %p)\n",
__builtin_return_address(0));
printk(" q = %p\n",q);
printk(" *q = %p\n",*q);
}
 
 
/*
* Semaphores are implemented using a two-way counter:
* The "count" variable is decremented for each process
* that tries to sleep, while the "waking" variable is
* incremented when the "up()" code goes to wake up waiting
* processes.
*
* Notably, the inline "up()" and "down()" functions can
* efficiently test if they need to do any extra work (up
* needs to do something only if count was negative before
* the increment operation.
*
* This routine must execute atomically.
*/
static inline int waking_non_zero(struct semaphore *sem)
{
int ret ;
long flags ;
 
get_buzz_lock(&sem->lock) ;
save_flags(flags) ;
cli() ;
 
if ((ret = (sem->waking > 0)))
sem->waking-- ;
 
restore_flags(flags) ;
give_buzz_lock(&sem->lock) ;
return(ret) ;
}
 
/*
* When __up() is called, the count was negative before
* incrementing it, and we need to wake up somebody.
*
* This routine adds one to the count of processes that need to
* wake up and exit. ALL waiting processes actually wake up but
* only the one that gets to the "waking" field first will gate
* through and acquire the semaphore. The others will go back
* to sleep.
*
* Note that these functions are only called when there is
* contention on the lock, and as such all this is the
* "non-critical" part of the whole semaphore business. The
* critical part is the inline stuff in <asm/semaphore.h>
* where we want to avoid any extra jumps and calls.
*/
void __up(struct semaphore *sem)
{
atomic_inc(&sem->waking) ;
wake_up(&sem->wait);
}
 
/*
* Perform the "down" function. Return zero for semaphore acquired,
* return negative for signalled out of the function.
*
* If called from __down, the return is ignored and the wait loop is
* not interruptible. This means that a task waiting on a semaphore
* using "down()" cannot be killed until someone does an "up()" on
* the semaphore.
*
* If called from __down_interruptible, the return value gets checked
* upon return. If the return value is negative then the task continues
* with the negative value in the return register (it can be tested by
* the caller).
*
* Either form may be used in conjunction with "up()".
*
*/
int __do_down(struct semaphore * sem, int task_state)
{
struct task_struct *tsk = current;
struct wait_queue wait = { tsk, NULL };
int ret = 0 ;
 
tsk->state = task_state;
add_wait_queue(&sem->wait, &wait);
 
/*
* Ok, we're set up. sem->count is known to be less than zero
* so we must wait.
*
* We can let go the lock for purposes of waiting.
* We re-acquire it after awaking so as to protect
* all semaphore operations.
*
* If "up()" is called before we call waking_non_zero() then
* we will catch it right away. If it is called later then
* we will have to go through a wakeup cycle to catch it.
*
* Multiple waiters contend for the semaphore lock to see
* who gets to gate through and who has to wait some more.
*/
for (;;)
{
if (waking_non_zero(sem)) /* are we waking up? */
break ; /* yes, exit loop */
 
if ( task_state == TASK_INTERRUPTIBLE
&& (tsk->signal & ~tsk->blocked) /* signalled */
)
{
ret = -EINTR ; /* interrupted */
atomic_inc(&sem->count) ; /* give up on down operation */
break ;
}
 
schedule();
tsk->state = task_state;
}
 
tsk->state = TASK_RUNNING;
remove_wait_queue(&sem->wait, &wait);
return(ret) ;
 
} /* __do_down */
 
void __down(struct semaphore * sem)
{
__do_down(sem,TASK_UNINTERRUPTIBLE) ;
}
 
int __down_interruptible(struct semaphore * sem)
{
return(__do_down(sem,TASK_INTERRUPTIBLE)) ;
}
 
 
static inline void __sleep_on(struct wait_queue **p, int state)
{
unsigned long flags;
struct wait_queue wait = { current, NULL };
 
if (!p)
return;
if (current == task[0])
panic("task[0] trying to sleep");
current->state = state;
save_flags(flags);
cli();
__add_wait_queue(p, &wait);
sti();
schedule();
cli();
__remove_wait_queue(p, &wait);
restore_flags(flags);
}
 
void interruptible_sleep_on(struct wait_queue **p)
{
__sleep_on(p,TASK_INTERRUPTIBLE);
}
 
void sleep_on(struct wait_queue **p)
{
__sleep_on(p,TASK_UNINTERRUPTIBLE);
}
 
#define TVN_BITS 6
#define TVR_BITS 8
#define TVN_SIZE (1 << TVN_BITS)
#define TVR_SIZE (1 << TVR_BITS)
#define TVN_MASK (TVN_SIZE - 1)
#define TVR_MASK (TVR_SIZE - 1)
 
#define SLOW_BUT_DEBUGGING_TIMERS 0
 
struct timer_vec {
int index;
struct timer_list *vec[TVN_SIZE];
};
 
struct timer_vec_root {
int index;
struct timer_list *vec[TVR_SIZE];
};
 
static struct timer_vec tv5 = { 0 };
static struct timer_vec tv4 = { 0 };
static struct timer_vec tv3 = { 0 };
static struct timer_vec tv2 = { 0 };
static struct timer_vec_root tv1 = { 0 };
 
static struct timer_vec * const tvecs[] = {
(struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
};
 
#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
 
static unsigned long timer_jiffies = 0;
 
static inline void insert_timer(struct timer_list *timer,
struct timer_list **vec, int idx)
{
if ((timer->next = vec[idx]))
vec[idx]->prev = timer;
vec[idx] = timer;
timer->prev = (struct timer_list *)&vec[idx];
}
 
static inline void internal_add_timer(struct timer_list *timer)
{
/*
* must be cli-ed when calling this
*/
unsigned long expires = timer->expires;
unsigned long idx = expires - timer_jiffies;
 
if (idx < TVR_SIZE) {
int i = expires & TVR_MASK;
insert_timer(timer, tv1.vec, i);
} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
int i = (expires >> TVR_BITS) & TVN_MASK;
insert_timer(timer, tv2.vec, i);
} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
insert_timer(timer, tv3.vec, i);
} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
insert_timer(timer, tv4.vec, i);
} else if (expires < timer_jiffies) {
/* can happen if you add a timer with expires == jiffies,
* or you set a timer to go off in the past
*/
insert_timer(timer, tv1.vec, tv1.index);
} else if (idx < 0xffffffffUL) {
int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
insert_timer(timer, tv5.vec, i);
} else {
/* Can only get here on architectures with 64-bit jiffies */
timer->next = timer->prev = timer;
}
}
 
void add_timer(struct timer_list *timer)
{
unsigned long flags;
save_flags(flags);
cli();
#if SLOW_BUT_DEBUGGING_TIMERS
if (timer->next || timer->prev) {
printk("add_timer() called with non-zero list from %p\n",
__builtin_return_address(0));
goto out;
}
#endif
internal_add_timer(timer);
#if SLOW_BUT_DEBUGGING_TIMERS
out:
#endif
restore_flags(flags);
}
 
static inline int detach_timer(struct timer_list *timer)
{
int ret = 0;
struct timer_list *next, *prev;
next = timer->next;
prev = timer->prev;
if (next) {
next->prev = prev;
}
if (prev) {
ret = 1;
prev->next = next;
}
return ret;
}
 
 
int del_timer(struct timer_list * timer)
{
int ret;
unsigned long flags;
save_flags(flags);
cli();
ret = detach_timer(timer);
timer->next = timer->prev = 0;
restore_flags(flags);
return ret;
}
 
static inline void cascade_timers(struct timer_vec *tv)
{
/* cascade all the timers from tv up one level */
struct timer_list *timer;
timer = tv->vec[tv->index];
/*
* We are removing _all_ timers from the list, so we don't have to
* detach them individually, just clear the list afterwards.
*/
while (timer) {
struct timer_list *tmp = timer;
timer = timer->next;
internal_add_timer(tmp);
}
tv->vec[tv->index] = NULL;
tv->index = (tv->index + 1) & TVN_MASK;
}
 
static inline void run_timer_list(void)
{
cli();
while ((long)(jiffies - timer_jiffies) >= 0) {
struct timer_list *timer;
if (!tv1.index) {
int n = 1;
do {
cascade_timers(tvecs[n]);
} while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
}
while ((timer = tv1.vec[tv1.index])) {
void (*fn)(unsigned long) = timer->function;
unsigned long data = timer->data;
detach_timer(timer);
timer->next = timer->prev = NULL;
sti();
fn(data);
cli();
}
++timer_jiffies;
tv1.index = (tv1.index + 1) & TVR_MASK;
}
sti();
}
 
static inline void run_old_timers(void)
{
struct timer_struct *tp;
unsigned long mask;
 
for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
if (mask > timer_active)
break;
if (!(mask & timer_active))
continue;
if (tp->expires > jiffies)
continue;
timer_active &= ~mask;
tp->fn();
sti();
}
}
 
void tqueue_bh(void)
{
run_task_queue(&tq_timer);
}
 
void immediate_bh(void)
{
run_task_queue(&tq_immediate);
}
 
unsigned long timer_active = 0;
struct timer_struct timer_table[32];
 
/*
* Hmm.. Changed this, as the GNU make sources (load.c) seems to
* imply that avenrun[] is the standard name for this kind of thing.
* Nothing else seems to be standardized: the fractional size etc
* all seem to differ on different machines.
*/
unsigned long avenrun[3] = { 0,0,0 };
 
/*
* Nr of active tasks - counted in fixed-point numbers
*/
static unsigned long count_active_tasks(void)
{
struct task_struct **p;
unsigned long nr = 0;
 
for(p = &LAST_TASK; p > &FIRST_TASK; --p)
if (*p && ((*p)->state == TASK_RUNNING ||
(*p)->state == TASK_UNINTERRUPTIBLE ||
(*p)->state == TASK_SWAPPING))
nr += FIXED_1;
#ifdef __SMP__
nr-=(smp_num_cpus-1)*FIXED_1;
#endif
return nr;
}
 
static inline void calc_load(unsigned long ticks)
{
unsigned long active_tasks; /* fixed-point */
static int count = LOAD_FREQ;
 
count -= ticks;
if (count < 0) {
count += LOAD_FREQ;
active_tasks = count_active_tasks();
CALC_LOAD(avenrun[0], EXP_1, active_tasks);
CALC_LOAD(avenrun[1], EXP_5, active_tasks);
CALC_LOAD(avenrun[2], EXP_15, active_tasks);
}
}
 
/*
* this routine handles the overflow of the microsecond field
*
* The tricky bits of code to handle the accurate clock support
* were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
* They were originally developed for SUN and DEC kernels.
* All the kudos should go to Dave for this stuff.
*
*/
static void second_overflow(void)
{
long ltemp;
 
/* Bump the maxerror field */
time_maxerror += time_tolerance >> SHIFT_USEC;
if ( time_maxerror > NTP_PHASE_LIMIT ) {
time_maxerror = NTP_PHASE_LIMIT;
time_state = TIME_ERROR; /* p. 17, sect. 4.3, (b) */
time_status |= STA_UNSYNC;
}
 
/*
* Leap second processing. If in leap-insert state at
* the end of the day, the system clock is set back one
* second; if in leap-delete state, the system clock is
* set ahead one second. The microtime() routine or
* external clock driver will insure that reported time
* is always monotonic. The ugly divides should be
* replaced.
*/
switch (time_state) {
 
case TIME_OK:
if (time_status & STA_INS)
time_state = TIME_INS;
else if (time_status & STA_DEL)
time_state = TIME_DEL;
break;
 
case TIME_INS:
if (xtime.tv_sec % 86400 == 0) {
xtime.tv_sec--;
time_state = TIME_OOP;
printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
}
break;
 
case TIME_DEL:
if ((xtime.tv_sec + 1) % 86400 == 0) {
xtime.tv_sec++;
time_state = TIME_WAIT;
printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
}
break;
 
case TIME_OOP:
time_state = TIME_WAIT;
break;
 
case TIME_WAIT:
if (!(time_status & (STA_INS | STA_DEL)))
time_state = TIME_OK;
}
 
/*
* Compute the phase adjustment for the next second. In
* PLL mode, the offset is reduced by a fixed factor
* times the time constant. In FLL mode the offset is
* used directly. In either mode, the maximum phase
* adjustment for each second is clamped so as to spread
* the adjustment over not more than the number of
* seconds between updates.
*/
if (time_offset < 0) {
ltemp = -time_offset;
if (!(time_status & STA_FLL))
ltemp >>= SHIFT_KG + time_constant;
if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
time_offset += ltemp;
time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
} else {
ltemp = time_offset;
if (!(time_status & STA_FLL))
ltemp >>= SHIFT_KG + time_constant;
if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
time_offset -= ltemp;
time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
}
 
/*
* Compute the frequency estimate and additional phase
* adjustment due to frequency error for the next
* second. When the PPS signal is engaged, gnaw on the
* watchdog counter and update the frequency computed by
* the pll and the PPS signal.
*/
pps_valid++;
if (pps_valid == PPS_VALID) { /* PPS signal lost */
pps_jitter = MAXTIME;
pps_stabil = MAXFREQ;
time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
STA_PPSWANDER | STA_PPSERROR);
}
ltemp = time_freq + pps_freq;
if (ltemp < 0)
time_adj -= -ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
else
time_adj += ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 
#if HZ == 100
/* Compensate for (HZ==100) != (1 << SHIFT_HZ).
* Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
*/
if (time_adj < 0)
time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
else
time_adj += (time_adj >> 2) + (time_adj >> 5);
#endif
}
 
/* in the NTP reference this is called "hardclock()" */
static void update_wall_time_one_tick(void)
{
if ( (time_adjust_step = time_adjust) != 0 ) {
/* We are doing an adjtime thing.
*
* Prepare time_adjust_step to be within bounds.
* Note that a positive time_adjust means we want the clock
* to run faster.
*
* Limit the amount of the step to be in the range
* -tickadj .. +tickadj
*/
if (time_adjust > tickadj)
time_adjust_step = tickadj;
else if (time_adjust < -tickadj)
time_adjust_step = -tickadj;
/* Reduce by this step the amount of time left */
time_adjust -= time_adjust_step;
}
xtime.tv_usec += tick + time_adjust_step;
/*
* Advance the phase, once it gets to one microsecond, then
* advance the tick more.
*/
time_phase += time_adj;
if (time_phase <= -FINEUSEC) {
long ltemp = -time_phase >> SHIFT_SCALE;
time_phase += ltemp << SHIFT_SCALE;
xtime.tv_usec -= ltemp;
}
else if (time_phase >= FINEUSEC) {
long ltemp = time_phase >> SHIFT_SCALE;
time_phase -= ltemp << SHIFT_SCALE;
xtime.tv_usec += ltemp;
}
}
 
/*
* Using a loop looks inefficient, but "ticks" is
* usually just one (we shouldn't be losing ticks,
* we're doing this this way mainly for interrupt
* latency reasons, not because we think we'll
* have lots of lost timer ticks
*/
static void update_wall_time(unsigned long ticks)
{
do {
ticks--;
update_wall_time_one_tick();
} while (ticks);
 
if (xtime.tv_usec >= 1000000) {
xtime.tv_usec -= 1000000;
xtime.tv_sec++;
second_overflow();
}
}
 
static inline void do_process_times(struct task_struct *p,
unsigned long user, unsigned long system)
{
long psecs;
 
p->utime += user;
p->stime += system;
 
psecs = (p->stime + p->utime) / HZ;
if (psecs > p->rlim[RLIMIT_CPU].rlim_cur) {
/* Send SIGXCPU every second.. */
if (psecs * HZ == p->stime + p->utime)
send_sig(SIGXCPU, p, 1);
/* and SIGKILL when we go over max.. */
if (psecs > p->rlim[RLIMIT_CPU].rlim_max)
send_sig(SIGKILL, p, 1);
}
}
 
static inline void do_it_virt(struct task_struct * p, unsigned long ticks)
{
unsigned long it_virt = p->it_virt_value;
 
if (it_virt) {
if (it_virt <= ticks) {
it_virt = ticks + p->it_virt_incr;
send_sig(SIGVTALRM, p, 1);
}
p->it_virt_value = it_virt - ticks;
}
}
 
static inline void do_it_prof(struct task_struct * p, unsigned long ticks)
{
unsigned long it_prof = p->it_prof_value;
 
if (it_prof) {
if (it_prof <= ticks) {
it_prof = ticks + p->it_prof_incr;
send_sig(SIGPROF, p, 1);
}
p->it_prof_value = it_prof - ticks;
}
}
 
static __inline__ void update_one_process(struct task_struct *p,
unsigned long ticks, unsigned long user, unsigned long system)
{
do_process_times(p, user, system);
do_it_virt(p, user);
do_it_prof(p, ticks);
}
 
static void update_process_times(unsigned long ticks, unsigned long system)
{
#ifndef __SMP__
struct task_struct * p = current;
unsigned long user = ticks - system;
if (p->pid) {
p->counter -= ticks;
if (p->counter < 0) {
p->counter = 0;
need_resched = 1;
}
if (p->priority < DEF_PRIORITY)
kstat.cpu_nice += user;
else
kstat.cpu_user += user;
kstat.cpu_system += system;
}
update_one_process(p, ticks, user, system);
#else
int cpu,j;
cpu = smp_processor_id();
for (j=0;j<smp_num_cpus;j++)
{
int i = cpu_logical_map[j];
struct task_struct *p;
#ifdef __SMP_PROF__
if (test_bit(i,&smp_idle_map))
smp_idle_count[i]++;
#endif
p = current_set[i];
/*
* Do we have a real process?
*/
if (p->pid) {
/* assume user-mode process */
unsigned long utime = ticks;
unsigned long stime = 0;
if (cpu == i) {
utime = ticks-system;
stime = system;
} else if (smp_proc_in_lock[j]) {
utime = 0;
stime = ticks;
}
update_one_process(p, ticks, utime, stime);
 
if (p->priority < DEF_PRIORITY)
kstat.cpu_nice += utime;
else
kstat.cpu_user += utime;
kstat.cpu_system += stime;
 
p->counter -= ticks;
if (p->counter >= 0)
continue;
p->counter = 0;
} else {
/*
* Idle processor found, do we have anything
* we could run?
*/
if (!(0x7fffffff & smp_process_available))
continue;
}
/* Ok, we should reschedule, do the magic */
if (i==cpu)
need_resched = 1;
else
smp_message_pass(i, MSG_RESCHEDULE, 0L, 0);
}
#endif
}
 
static unsigned long lost_ticks = 0;
static unsigned long lost_ticks_system = 0;
 
static inline void update_times(void)
{
unsigned long ticks;
 
ticks = xchg(&lost_ticks, 0);
 
if (ticks) {
unsigned long system;
 
system = xchg(&lost_ticks_system, 0);
calc_load(ticks);
update_wall_time(ticks);
update_process_times(ticks, system);
}
}
 
void timer_bh(void)
{
update_times();
run_old_timers();
run_timer_list();
}
 
void do_timer(struct pt_regs * regs)
{
(*(unsigned long *)&jiffies)++;
lost_ticks++;
mark_bh(TIMER_BH);
if (!user_mode(regs)) {
lost_ticks_system++;
if (prof_buffer && current->pid) {
extern int _stext;
unsigned long ip = instruction_pointer(regs);
ip -= (unsigned long) &_stext;
ip >>= prof_shift;
if (ip < prof_len)
prof_buffer[ip]++;
}
}
if (tq_timer)
mark_bh(TQUEUE_BH);
}
 
#ifndef __alpha__
 
/*
* For backwards compatibility? This can be done in libc so Alpha
* and all newer ports shouldn't need it.
*/
asmlinkage unsigned int sys_alarm(unsigned int seconds)
{
struct itimerval it_new, it_old;
unsigned int oldalarm;
 
it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
it_new.it_value.tv_sec = seconds;
it_new.it_value.tv_usec = 0;
_setitimer(ITIMER_REAL, &it_new, &it_old);
oldalarm = it_old.it_value.tv_sec;
/* ehhh.. We can't return 0 if we have an alarm pending.. */
/* And we'd better return too much than too little anyway */
if (it_old.it_value.tv_usec)
oldalarm++;
return oldalarm;
}
 
/*
* The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this
* should be moved into arch/i386 instead?
*/
asmlinkage int sys_getpid(void)
{
return current->pid;
}
 
asmlinkage int sys_getppid(void)
{
return current->p_opptr->pid;
}
 
asmlinkage int sys_getuid(void)
{
return current->uid;
}
 
asmlinkage int sys_geteuid(void)
{
return current->euid;
}
 
asmlinkage int sys_getgid(void)
{
return current->gid;
}
 
asmlinkage int sys_getegid(void)
{
return current->egid;
}
 
/*
* This has been replaced by sys_setpriority. Maybe it should be
* moved into the arch dependent tree for those ports that require
* it for backward compatibility?
*/
asmlinkage int sys_nice(int increment)
{
unsigned long newprio;
int increase = 0;
 
newprio = increment;
if (increment < 0) {
if (!suser())
return -EPERM;
newprio = -increment;
increase = 1;
}
if (newprio > 40)
newprio = 40;
/*
* do a "normalization" of the priority (traditionally
* unix nice values are -20..20, linux doesn't really
* use that kind of thing, but uses the length of the
* timeslice instead (default 150 msec). The rounding is
* why we want to avoid negative values.
*/
newprio = (newprio * DEF_PRIORITY + 10) / 20;
increment = newprio;
if (increase)
increment = -increment;
newprio = current->priority - increment;
if ((signed) newprio < 1)
newprio = 1;
if (newprio > DEF_PRIORITY*2)
newprio = DEF_PRIORITY*2;
current->priority = newprio;
return 0;
}
 
#endif
 
static struct task_struct *find_process_by_pid(pid_t pid) {
struct task_struct *p, *q;
 
if (pid == 0)
p = current;
else {
p = 0;
for_each_task(q) {
if (q && q->pid == pid) {
p = q;
break;
}
}
}
return p;
}
 
static int setscheduler(pid_t pid, int policy,
struct sched_param *param)
{
int error;
struct sched_param lp;
struct task_struct *p;
 
if (!param || pid < 0)
return -EINVAL;
 
error = verify_area(VERIFY_READ, param, sizeof(struct sched_param));
if (error)
return error;
memcpy_fromfs(&lp, param, sizeof(struct sched_param));
 
p = find_process_by_pid(pid);
if (!p)
return -ESRCH;
if (policy < 0)
policy = p->policy;
else if (policy != SCHED_FIFO && policy != SCHED_RR &&
policy != SCHED_OTHER)
return -EINVAL;
/*
* Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
* priority for SCHED_OTHER is 0.
*/
if (lp.sched_priority < 0 || lp.sched_priority > 99)
return -EINVAL;
if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
return -EINVAL;
 
if ((policy == SCHED_FIFO || policy == SCHED_RR) && !suser())
return -EPERM;
if ((current->euid != p->euid) && (current->euid != p->uid) &&
!suser())
return -EPERM;
 
p->policy = policy;
p->rt_priority = lp.sched_priority;
cli();
if (p->next_run)
move_last_runqueue(p);
sti();
need_resched = 1;
return 0;
}
 
asmlinkage int sys_sched_setscheduler(pid_t pid, int policy,
struct sched_param *param)
{
return setscheduler(pid, policy, param);
}
 
asmlinkage int sys_sched_setparam(pid_t pid, struct sched_param *param)
{
return setscheduler(pid, -1, param);
}
 
asmlinkage int sys_sched_getscheduler(pid_t pid)
{
struct task_struct *p;
 
if (pid < 0)
return -EINVAL;
 
p = find_process_by_pid(pid);
if (!p)
return -ESRCH;
return p->policy;
}
 
asmlinkage int sys_sched_getparam(pid_t pid, struct sched_param *param)
{
int error;
struct task_struct *p;
struct sched_param lp;
 
if (!param || pid < 0)
return -EINVAL;
 
error = verify_area(VERIFY_WRITE, param, sizeof(struct sched_param));
if (error)
return error;
 
p = find_process_by_pid(pid);
if (!p)
return -ESRCH;
 
lp.sched_priority = p->rt_priority;
memcpy_tofs(param, &lp, sizeof(struct sched_param));
 
return 0;
}
 
asmlinkage int sys_sched_yield(void)
{
cli();
move_last_runqueue(current);
current->counter = 0;
need_resched = 1;
sti();
return 0;
}
 
asmlinkage int sys_sched_get_priority_max(int policy)
{
switch (policy) {
case SCHED_FIFO:
case SCHED_RR:
return 99;
case SCHED_OTHER:
return 0;
}
 
return -EINVAL;
}
 
asmlinkage int sys_sched_get_priority_min(int policy)
{
switch (policy) {
case SCHED_FIFO:
case SCHED_RR:
return 1;
case SCHED_OTHER:
return 0;
}
 
return -EINVAL;
}
 
asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
{
int error;
struct timespec t;
 
error = verify_area(VERIFY_WRITE, interval, sizeof(struct timespec));
if (error)
return error;
 
/* Values taken from 2.1.38 */
t.tv_sec = 0;
t.tv_nsec = 150000; /* is this right for non-intel architecture too?*/
memcpy_tofs(interval, &t, sizeof(struct timespec));
 
return 0;
}
 
/*
* change timeval to jiffies, trying to avoid the
* most obvious overflows..
*/
static unsigned long timespectojiffies(struct timespec *value)
{
unsigned long sec = (unsigned) value->tv_sec;
long nsec = value->tv_nsec;
 
if (sec > (LONG_MAX / HZ))
return LONG_MAX;
nsec += 1000000000L / HZ - 1;
nsec /= 1000000000L / HZ;
return HZ * sec + nsec;
}
 
static void jiffiestotimespec(unsigned long jiffies, struct timespec *value)
{
value->tv_nsec = (jiffies % HZ) * (1000000000L / HZ);
value->tv_sec = jiffies / HZ;
return;
}
 
asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
{
int error;
struct timespec t;
unsigned long expire;
 
error = verify_area(VERIFY_READ, rqtp, sizeof(struct timespec));
if (error)
return error;
memcpy_fromfs(&t, rqtp, sizeof(struct timespec));
if (rmtp) {
error = verify_area(VERIFY_WRITE, rmtp,
sizeof(struct timespec));
if (error)
return error;
}
 
if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
return -EINVAL;
 
if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
current->policy != SCHED_OTHER) {
/*
* Short delay requests up to 2 ms will be handled with
* high precision by a busy wait for all real-time processes.
*/
udelay((t.tv_nsec + 999) / 1000);
return 0;
}
 
expire = timespectojiffies(&t) + (t.tv_sec || t.tv_nsec) + jiffies;
current->timeout = expire;
current->state = TASK_INTERRUPTIBLE;
schedule();
 
if (expire > jiffies) {
if (rmtp) {
jiffiestotimespec(expire - jiffies -
(expire > jiffies + 1), &t);
memcpy_tofs(rmtp, &t, sizeof(struct timespec));
}
return -EINTR;
}
 
return 0;
}
 
/* Used in fs/proc/array.c */
unsigned long get_wchan(struct task_struct *p)
{
if (!p || p == current || p->state == TASK_RUNNING)
return 0;
#if defined(__i386__)
{
unsigned long ebp, eip;
unsigned long stack_page;
int count = 0;
 
stack_page = p->kernel_stack_page;
if (!stack_page)
return 0;
ebp = p->tss.ebp;
do {
if (ebp < stack_page || ebp >= 4092+stack_page)
return 0;
eip = *(unsigned long *) (ebp+4);
if (eip < (unsigned long) interruptible_sleep_on
|| eip >= (unsigned long) add_timer)
return eip;
ebp = *(unsigned long *) ebp;
} while (count++ < 16);
}
#elif defined(__alpha__)
/*
* This one depends on the frame size of schedule(). Do a
* "disass schedule" in gdb to find the frame size. Also, the
* code assumes that sleep_on() follows immediately after
* interruptible_sleep_on() and that add_timer() follows
* immediately after interruptible_sleep(). Ugly, isn't it?
* Maybe adding a wchan field to task_struct would be better,
* after all...
*/
{
unsigned long schedule_frame;
unsigned long pc;
 
pc = thread_saved_pc(&p->tss);
if (pc >= (unsigned long) interruptible_sleep_on && pc < (unsigned long) add_timer) {
schedule_frame = ((unsigned long *)p->tss.ksp)[6];
return ((unsigned long *)schedule_frame)[12];
}
return pc;
}
#endif
return 0;
}
 
static void show_task(int nr,struct task_struct * p)
{
unsigned long free;
static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
 
printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *))
printk(stat_nam[p->state]);
else
printk(" ");
#if ((~0UL) == 0xffffffff)
if (p == current)
printk(" current ");
else
printk(" %08lX ", thread_saved_pc(&p->tss));
printk("%08lX ", get_wchan(p));
#else
if (p == current)
printk(" current task ");
else
printk(" %016lx ", thread_saved_pc(&p->tss));
printk("%08lX ", get_wchan(p) & 0xffffffffL);
#endif
if (((unsigned long *)p->kernel_stack_page)[0] != STACK_MAGIC)
printk(" bad-");
for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) {
if (((unsigned long *)p->kernel_stack_page)[free] != STACK_UNTOUCHED_MAGIC)
break;
}
printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid);
if (p->p_cptr)
printk("%5d ", p->p_cptr->pid);
else
printk(" ");
if (p->p_ysptr)
printk("%7d", p->p_ysptr->pid);
else
printk(" ");
if (p->p_osptr)
printk(" %5d\n", p->p_osptr->pid);
else
printk("\n");
}
 
void show_state(void)
{
int i;
 
#if ((~0UL) == 0xffffffff)
printk("\n"
" free sibling\n");
printk(" task PC wchan stack pid father child younger older\n");
#else
printk("\n"
" free sibling\n");
printk(" task PC wchan stack pid father child younger older\n");
#endif
for (i=0 ; i<NR_TASKS ; i++)
if (task[i])
show_task(i,task[i]);
}
 
void sched_init(void)
{
/*
* We have to do a little magic to get the first
* process right in SMP mode.
*/
int cpu=smp_processor_id();
int i;
#ifndef __SMP__
current_set[cpu]=&init_task;
#else
init_task.processor=cpu;
for(cpu = 0; cpu < NR_CPUS; cpu++)
current_set[cpu] = &init_task;
#endif
 
init_kernel_stack[0] = STACK_MAGIC;
for(i=1;i<1024;i++)
init_kernel_stack[i] = STACK_UNTOUCHED_MAGIC;
init_bh(TIMER_BH, timer_bh);
init_bh(TQUEUE_BH, tqueue_bh);
init_bh(IMMEDIATE_BH, immediate_bh);
}
/resource.c
0,0 → 1,138
/*
* linux/kernel/resource.c
*
* Copyright (C) 1995 Linus Torvalds
* David Hinds
*
* Kernel io-region resource management
*/
 
/*
* Revisions for CONFIG_REDUCED_MEMORY by Kenneth Albanowski <kjahds@kjahds.com>,
* Copyright (C) 1997, 1998 The Silver Hammer Group, Ltd.
*/
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/ioport.h>
 
#ifdef CONFIG_REDUCED_MEMORY
#define IOTABLE_SIZE 1 /* Originally 128 */
#else /* !CONFIG_REDUCED_MEMORY */
#define IOTABLE_SIZE 128
#endif /* !CONFIG_REDUCED_MEMORY */
 
typedef struct resource_entry_t {
u_long from, num;
const char *name;
struct resource_entry_t *next;
} resource_entry_t;
 
static resource_entry_t iolist = { 0, 0, "", NULL };
 
static resource_entry_t iotable[IOTABLE_SIZE];
 
/*
* This generates the report for /proc/ioports
*/
int get_ioport_list(char *buf)
{
resource_entry_t *p;
int len = 0;
 
for (p = iolist.next; (p) && (len < 4000); p = p->next)
len += sprintf(buf+len, "%04lx-%04lx : %s\n",
p->from, p->from+p->num-1, p->name);
if (p)
len += sprintf(buf+len, "4K limit reached!\n");
return len;
}
 
/*
* The workhorse function: find where to put a new entry
*/
static resource_entry_t *find_gap(resource_entry_t *root,
u_long from, u_long num)
{
unsigned long flags;
resource_entry_t *p;
if (from > from+num-1)
return NULL;
save_flags(flags);
cli();
for (p = root; ; p = p->next) {
if ((p != root) && (p->from+p->num-1 >= from)) {
p = NULL;
break;
}
if ((p->next == NULL) || (p->next->from > from+num-1))
break;
}
restore_flags(flags);
return p;
}
 
/*
* Call this from the device driver to register the ioport region.
*/
void request_region(unsigned int from, unsigned int num, const char *name)
{
resource_entry_t *p;
int i;
 
for (i = 0; i < IOTABLE_SIZE; i++)
if (iotable[i].num == 0)
break;
if (i == IOTABLE_SIZE)
printk("warning: ioport table is full\n");
else {
p = find_gap(&iolist, from, num);
if (p == NULL)
return;
iotable[i].name = name;
iotable[i].from = from;
iotable[i].num = num;
iotable[i].next = p->next;
p->next = &iotable[i];
return;
}
}
 
/*
* Call this when the device driver is unloaded
*/
void release_region(unsigned int from, unsigned int num)
{
resource_entry_t *p, *q;
 
for (p = &iolist; ; p = q) {
q = p->next;
if (q == NULL)
break;
if ((q->from == from) && (q->num == num)) {
q->num = 0;
p->next = q->next;
return;
}
}
}
 
/*
* Call this to check the ioport region before probing
*/
int check_region(unsigned int from, unsigned int num)
{
return (find_gap(&iolist, from, num) == NULL) ? -EBUSY : 0;
}
 
/* Called from init/main.c to reserve IO ports. */
void reserve_setup(char *str, int *ints)
{
int i;
 
for (i = 1; i < ints[0]; i += 2)
request_region(ints[i], ints[i+1], "reserved");
}
/softirq.c
0,0 → 1,60
/*
* linux/kernel/softirq.c
*
* Copyright (C) 1992 Linus Torvalds
*
* do_bottom_half() runs at normal kernel priority: all interrupts
* enabled. do_bottom_half() is atomic with respect to itself: a
* bottom_half handler need not be re-entrant.
*/
 
#include <linux/ptrace.h>
#include <linux/errno.h>
#include <linux/kernel_stat.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <linux/mm.h>
 
#include <asm/system.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/bitops.h>
 
unsigned long intr_count = 0;
 
int bh_mask_count[32];
unsigned long bh_active = 0;
unsigned long bh_mask = 0;
void (*bh_base[32])(void);
 
 
asmlinkage void do_bottom_half(void)
{
unsigned long active;
unsigned long mask, left;
void (**bh)(void);
 
sti();
bh = bh_base;
active = bh_active & bh_mask;
for (mask = 1, left = ~0 ; left & active ; bh++,mask += mask,left += left) {
if (mask & active) {
void (*fn)(void);
bh_active &= ~mask;
fn = *bh;
if (!fn)
goto bad_bh;
fn();
}
}
/* SIMON: forbidden change of source file - execution error */
cli();
 
return;
bad_bh:
printk ("irq.c:bad bottom half entry %08lx\n", mask);
/* SIMON: forbidden change of source file - execution error */
cli();
 
}
/itimer.c
0,0 → 1,181
/*
* linux/kernel/itimer.c
*
* Copyright (C) 1992 Darren Senn
*/
 
/* These are all the functions necessary to implement itimers */
 
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/time.h>
#include <linux/mm.h>
 
#include <asm/segment.h>
 
/*
* change timeval to jiffies, trying to avoid the
* most obvious overflows..
*
* The tv_*sec values are signed, but nothing seems to
* indicate whether we really should use them as signed values
* when doing itimers. POSIX doesn't mention this (but if
* alarm() uses itimers without checking, we have to use unsigned
* arithmetic).
*/
static unsigned long tvtojiffies(struct timeval *value)
{
unsigned long sec = (unsigned) value->tv_sec;
unsigned long usec = (unsigned) value->tv_usec;
 
if (sec > (ULONG_MAX / HZ))
return ULONG_MAX;
usec += 1000000 / HZ - 1;
usec /= 1000000 / HZ;
return HZ*sec+usec;
}
 
static void jiffiestotv(unsigned long jiffies, struct timeval *value)
{
value->tv_usec = (jiffies % HZ) * (1000000 / HZ);
value->tv_sec = jiffies / HZ;
return;
}
 
static int _getitimer(int which, struct itimerval *value)
{
register unsigned long val, interval;
 
switch (which) {
case ITIMER_REAL:
interval = current->it_real_incr;
val = 0;
if (del_timer(&current->real_timer)) {
unsigned long now = jiffies;
val = current->real_timer.expires;
add_timer(&current->real_timer);
/* look out for negative/zero itimer.. */
if (val <= now)
val = now+1;
val -= now;
}
break;
case ITIMER_VIRTUAL:
val = current->it_virt_value;
interval = current->it_virt_incr;
break;
case ITIMER_PROF:
val = current->it_prof_value;
interval = current->it_prof_incr;
break;
default:
return(-EINVAL);
}
jiffiestotv(val, &value->it_value);
jiffiestotv(interval, &value->it_interval);
return 0;
}
 
asmlinkage int sys_getitimer(int which, struct itimerval *value)
{
int error;
struct itimerval get_buffer;
 
if (!value)
return -EFAULT;
error = _getitimer(which, &get_buffer);
if (error)
return error;
error = verify_area(VERIFY_WRITE, value, sizeof(struct itimerval));
if (error)
return error;
memcpy_tofs(value, &get_buffer, sizeof(get_buffer));
return 0;
}
 
void it_real_fn(unsigned long __data)
{
struct task_struct * p = (struct task_struct *) __data;
unsigned long interval;
 
send_sig(SIGALRM, p, 1);
interval = p->it_real_incr;
if (interval) {
unsigned long timeout = jiffies + interval;
/* check for overflow */
if (timeout < interval)
timeout = ULONG_MAX;
p->real_timer.expires = timeout;
add_timer(&p->real_timer);
}
}
 
int _setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
{
register unsigned long i, j;
int k;
 
i = tvtojiffies(&value->it_interval);
j = tvtojiffies(&value->it_value);
if (ovalue && (k = _getitimer(which, ovalue)) < 0)
return k;
switch (which) {
case ITIMER_REAL:
del_timer(&current->real_timer);
current->it_real_value = j;
current->it_real_incr = i;
if (!j)
break;
i = j + jiffies;
/* check for overflow.. */
if (i < j)
i = ULONG_MAX;
current->real_timer.expires = i;
add_timer(&current->real_timer);
break;
case ITIMER_VIRTUAL:
if (j)
j++;
current->it_virt_value = j;
current->it_virt_incr = i;
break;
case ITIMER_PROF:
if (j)
j++;
current->it_prof_value = j;
current->it_prof_incr = i;
break;
default:
return -EINVAL;
}
return 0;
}
 
asmlinkage int sys_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
{
int error;
struct itimerval set_buffer, get_buffer;
 
if (value) {
error = verify_area(VERIFY_READ, value, sizeof(*value));
if (error)
return error;
memcpy_fromfs(&set_buffer, value, sizeof(set_buffer));
} else
memset((char *) &set_buffer, 0, sizeof(set_buffer));
 
if (ovalue) {
error = verify_area(VERIFY_WRITE, ovalue, sizeof(struct itimerval));
if (error)
return error;
}
 
error = _setitimer(which, &set_buffer, ovalue ? &get_buffer : 0);
if (error || !ovalue)
return error;
 
memcpy_tofs(ovalue, &get_buffer, sizeof(get_buffer));
return error;
}
/panic.c
0,0 → 1,93
/*
* linux/kernel/panic.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
* Revisions for CONFIG_CONSOLE by Kenneth Albanowski
* Copyright (c) 1997, 1998 The Silver Hammer Group, Ltd.
*
*/
 
/*
* This function is used through-out the kernel (including mm and fs)
* to indicate a major problem.
*/
#include <stdarg.h>
 
#include <linux/config.h> /* CONFIG_SCSI_GDTH */
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/delay.h>
 
#ifdef CONFIG_NETtel
#include <asm/coldfire.h>
#include <asm/mcfsim.h>
#include <asm/nettel.h>
#endif
 
asmlinkage void sys_sync(void); /* it's really int */
extern void hard_reset_now(void);
extern void do_unblank_screen(void);
extern void DAC960_Finalize(void);
extern void gdth_halt(void);
extern int C_A_D;
 
int panic_timeout = 0;
 
void panic_setup(char *str, int *ints)
{
if (ints[0] == 1)
panic_timeout = ints[1];
}
 
NORET_TYPE void panic(const char * fmt, ...)
{
static char buf[1024];
va_list args;
int i;
 
va_start(args, fmt);
vsprintf(buf, fmt, args);
va_end(args);
printk(KERN_EMERG "Kernel panic: %s\n",buf);
if (current == task[0])
printk(KERN_EMERG "In swapper task - not syncing\n");
else
sys_sync();
 
#ifdef CONFIG_CONSOLE
do_unblank_screen();
#endif /* CONFIG_CONSOLE */
 
if (panic_timeout > 0)
{
/*
* Delay timeout seconds before rebooting the machine.
* We can't use the "normal" timers since we just panicked..
*/
printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout);
for(i = 0; i < (panic_timeout*1000); i++)
udelay(1000);
#ifdef CONFIG_BLK_DEV_DAC960
DAC960_Finalize();
#endif
#ifdef CONFIG_SCSI_GDTH
gdth_halt();
#endif
hard_reset_now();
}
 
#ifdef CONFIG_NETtel
nettel_panic();
#endif
for(;;);
}
 
/*
* GCC 2.5.8 doesn't always optimize correctly; see include/asm/segment.h
*/
 
int bad_user_access_length(void)
{
panic("bad_user_access_length executed (not cool, dude)");
}
/info.c
0,0 → 1,41
/*
* linux/kernel/info.c
*
* Copyright (C) 1992 Darren Senn
*/
 
/* This implements the sysinfo() system call */
 
#include <asm/segment.h>
 
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/unistd.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/swap.h>
 
asmlinkage int sys_sysinfo(struct sysinfo *info)
{
int error;
struct sysinfo val;
 
error = verify_area(VERIFY_WRITE, info, sizeof(struct sysinfo));
if (error)
return error;
memset((char *)&val, 0, sizeof(struct sysinfo));
 
val.uptime = jiffies / HZ;
 
val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
 
val.procs = nr_tasks-1;
 
si_meminfo(&val);
si_swapinfo(&val);
 
memcpy_tofs(info, &val, sizeof(struct sysinfo));
return 0;
}
/Makefile
0,0 → 1,25
#
# Makefile for the linux kernel.
#
# Note! Dependencies are done automagically by 'make dep', which also
# removes any old dependencies. DON'T put your own dependencies here
# unless it's something special (ie not a .c file).
#
# Note 2! The CFLAGS definitions are now in the main makefile...
 
.S.s:
$(CPP) -traditional $< -o $*.s
 
O_TARGET := kernel.o
O_OBJS = sched.o dma.o fork.o exec_domain.o panic.o printk.o sys.o \
module.o exit.o signal.o itimer.o info.o time.o softirq.o \
resource.o sysctl.o
 
ifeq ($(CONFIG_MODULES),y)
OX_OBJS = ksyms.o
endif
 
include $(TOPDIR)/Rules.make
 
sched.o: sched.c
$(CC) $(CFLAGS) $(PROFILING) -fno-omit-frame-pointer -c $<

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.