URL
https://opencores.org/ocsvn/or1k/or1k/trunk
Subversion Repositories or1k
Compare Revisions
- This comparison shows the changes necessary to convert path
/or1k/trunk/linux/linux-2.4/kernel
- from Rev 1275 to Rev 1765
- ↔ Reverse comparison
Rev 1275 → Rev 1765
/time.c
0,0 → 1,411
/* |
* linux/kernel/time.c |
* |
* Copyright (C) 1991, 1992 Linus Torvalds |
* |
* This file contains the interface functions for the various |
* time related system calls: time, stime, gettimeofday, settimeofday, |
* adjtime |
*/ |
/* |
* Modification history kernel/time.c |
* |
* 1993-09-02 Philip Gladstone |
* Created file with time related functions from sched.c and adjtimex() |
* 1993-10-08 Torsten Duwe |
* adjtime interface update and CMOS clock write code |
* 1995-08-13 Torsten Duwe |
* kernel PLL updated to 1994-12-13 specs (rfc-1589) |
* 1999-01-16 Ulrich Windl |
* Introduced error checking for many cases in adjtimex(). |
* Updated NTP code according to technical memorandum Jan '96 |
* "A Kernel Model for Precision Timekeeping" by Dave Mills |
* Allow time_constant larger than MAXTC(6) for NTP v4 (MAXTC == 10) |
* (Even though the technical memorandum forbids it) |
*/ |
|
#include <linux/mm.h> |
#include <linux/timex.h> |
#include <linux/smp_lock.h> |
|
#include <asm/uaccess.h> |
|
/* |
* The timezone where the local system is located. Used as a default by some |
* programs who obtain this value by using gettimeofday. |
*/ |
struct timezone sys_tz; |
|
/* The xtime_lock is not only serializing the xtime read/writes but it's also |
serializing all accesses to the global NTP variables now. */ |
extern rwlock_t xtime_lock; |
|
#if !defined(__alpha__) && !defined(__ia64__) |
|
/* |
* sys_time() can be implemented in user-level using |
* sys_gettimeofday(). Is this for backwards compatibility? If so, |
* why not move it into the appropriate arch directory (for those |
* architectures that need it). |
* |
* XXX This function is NOT 64-bit clean! |
*/ |
asmlinkage long sys_time(int * tloc) |
{ |
struct timeval now; |
int i; |
|
do_gettimeofday(&now); |
i = now.tv_sec; |
if (tloc) { |
if (put_user(i,tloc)) |
i = -EFAULT; |
} |
return i; |
} |
|
/* |
* sys_stime() can be implemented in user-level using |
* sys_settimeofday(). Is this for backwards compatibility? If so, |
* why not move it into the appropriate arch directory (for those |
* architectures that need it). |
*/ |
|
asmlinkage long sys_stime(int * tptr) |
{ |
int value; |
|
if (!capable(CAP_SYS_TIME)) |
return -EPERM; |
if (get_user(value, tptr)) |
return -EFAULT; |
write_lock_irq(&xtime_lock); |
vxtime_lock(); |
xtime.tv_sec = value; |
xtime.tv_usec = 0; |
vxtime_unlock(); |
time_adjust = 0; /* stop active adjtime() */ |
time_status |= STA_UNSYNC; |
time_maxerror = NTP_PHASE_LIMIT; |
time_esterror = NTP_PHASE_LIMIT; |
write_unlock_irq(&xtime_lock); |
return 0; |
} |
|
#endif |
|
asmlinkage long sys_gettimeofday(struct timeval *tv, struct timezone *tz) |
{ |
if (tv) { |
struct timeval ktv; |
do_gettimeofday(&ktv); |
if (copy_to_user(tv, &ktv, sizeof(ktv))) |
return -EFAULT; |
} |
if (tz) { |
if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) |
return -EFAULT; |
} |
return 0; |
} |
|
/* |
* Adjust the time obtained from the CMOS to be UTC time instead of |
* local time. |
* |
* This is ugly, but preferable to the alternatives. Otherwise we |
* would either need to write a program to do it in /etc/rc (and risk |
* confusion if the program gets run more than once; it would also be |
* hard to make the program warp the clock precisely n hours) or |
* compile in the timezone information into the kernel. Bad, bad.... |
* |
* - TYT, 1992-01-01 |
* |
* The best thing to do is to keep the CMOS clock in universal time (UTC) |
* as real UNIX machines always do it. This avoids all headaches about |
* daylight saving times and warping kernel clocks. |
*/ |
inline static void warp_clock(void) |
{ |
write_lock_irq(&xtime_lock); |
vxtime_lock(); |
xtime.tv_sec += sys_tz.tz_minuteswest * 60; |
vxtime_unlock(); |
write_unlock_irq(&xtime_lock); |
} |
|
/* |
* In case for some reason the CMOS clock has not already been running |
* in UTC, but in some local time: The first time we set the timezone, |
* we will warp the clock so that it is ticking UTC time instead of |
* local time. Presumably, if someone is setting the timezone then we |
* are running in an environment where the programs understand about |
* timezones. This should be done at boot time in the /etc/rc script, |
* as soon as possible, so that the clock can be set right. Otherwise, |
* various programs will get confused when the clock gets warped. |
*/ |
|
int do_sys_settimeofday(struct timeval *tv, struct timezone *tz) |
{ |
static int firsttime = 1; |
|
if (!capable(CAP_SYS_TIME)) |
return -EPERM; |
|
if (tz) { |
/* SMP safe, global irq locking makes it work. */ |
sys_tz = *tz; |
if (firsttime) { |
firsttime = 0; |
if (!tv) |
warp_clock(); |
} |
} |
if (tv) |
{ |
/* SMP safe, again the code in arch/foo/time.c should |
* globally block out interrupts when it runs. |
*/ |
do_settimeofday(tv); |
} |
return 0; |
} |
|
asmlinkage long sys_settimeofday(struct timeval *tv, struct timezone *tz) |
{ |
struct timeval new_tv; |
struct timezone new_tz; |
|
if (tv) { |
if (copy_from_user(&new_tv, tv, sizeof(*tv))) |
return -EFAULT; |
} |
if (tz) { |
if (copy_from_user(&new_tz, tz, sizeof(*tz))) |
return -EFAULT; |
} |
|
return do_sys_settimeofday(tv ? &new_tv : NULL, tz ? &new_tz : NULL); |
} |
|
long pps_offset; /* pps time offset (us) */ |
long pps_jitter = MAXTIME; /* time dispersion (jitter) (us) */ |
|
long pps_freq; /* frequency offset (scaled ppm) */ |
long pps_stabil = MAXFREQ; /* frequency dispersion (scaled ppm) */ |
|
long pps_valid = PPS_VALID; /* pps signal watchdog counter */ |
|
int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */ |
|
long pps_jitcnt; /* jitter limit exceeded */ |
long pps_calcnt; /* calibration intervals */ |
long pps_errcnt; /* calibration errors */ |
long pps_stbcnt; /* stability limit exceeded */ |
|
/* hook for a loadable hardpps kernel module */ |
void (*hardpps_ptr)(struct timeval *); |
|
/* adjtimex mainly allows reading (and writing, if superuser) of |
* kernel time-keeping variables. used by xntpd. |
*/ |
int do_adjtimex(struct timex *txc) |
{ |
long ltemp, mtemp, save_adjust; |
int result; |
|
/* In order to modify anything, you gotta be super-user! */ |
if (txc->modes && !capable(CAP_SYS_TIME)) |
return -EPERM; |
|
/* Now we validate the data before disabling interrupts */ |
|
if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) |
/* singleshot must not be used with any other mode bits */ |
if (txc->modes != ADJ_OFFSET_SINGLESHOT) |
return -EINVAL; |
|
if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET)) |
/* adjustment Offset limited to +- .512 seconds */ |
if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE ) |
return -EINVAL; |
|
/* if the quartz is off by more than 10% something is VERY wrong ! */ |
if (txc->modes & ADJ_TICK) |
if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ) |
return -EINVAL; |
|
write_lock_irq(&xtime_lock); |
result = time_state; /* mostly `TIME_OK' */ |
|
/* Save for later - semantics of adjtime is to return old value */ |
save_adjust = time_adjust; |
|
#if 0 /* STA_CLOCKERR is never set yet */ |
time_status &= ~STA_CLOCKERR; /* reset STA_CLOCKERR */ |
#endif |
/* If there are input parameters, then process them */ |
if (txc->modes) |
{ |
if (txc->modes & ADJ_STATUS) /* only set allowed bits */ |
time_status = (txc->status & ~STA_RONLY) | |
(time_status & STA_RONLY); |
|
if (txc->modes & ADJ_FREQUENCY) { /* p. 22 */ |
if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) { |
result = -EINVAL; |
goto leave; |
} |
time_freq = txc->freq - pps_freq; |
} |
|
if (txc->modes & ADJ_MAXERROR) { |
if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) { |
result = -EINVAL; |
goto leave; |
} |
time_maxerror = txc->maxerror; |
} |
|
if (txc->modes & ADJ_ESTERROR) { |
if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) { |
result = -EINVAL; |
goto leave; |
} |
time_esterror = txc->esterror; |
} |
|
if (txc->modes & ADJ_TIMECONST) { /* p. 24 */ |
if (txc->constant < 0) { /* NTP v4 uses values > 6 */ |
result = -EINVAL; |
goto leave; |
} |
time_constant = txc->constant; |
} |
|
if (txc->modes & ADJ_OFFSET) { /* values checked earlier */ |
if (txc->modes == ADJ_OFFSET_SINGLESHOT) { |
/* adjtime() is independent from ntp_adjtime() */ |
time_adjust = txc->offset; |
} |
else if ( time_status & (STA_PLL | STA_PPSTIME) ) { |
ltemp = (time_status & (STA_PPSTIME | STA_PPSSIGNAL)) == |
(STA_PPSTIME | STA_PPSSIGNAL) ? |
pps_offset : txc->offset; |
|
/* |
* Scale the phase adjustment and |
* clamp to the operating range. |
*/ |
if (ltemp > MAXPHASE) |
time_offset = MAXPHASE << SHIFT_UPDATE; |
else if (ltemp < -MAXPHASE) |
time_offset = -(MAXPHASE << SHIFT_UPDATE); |
else |
time_offset = ltemp << SHIFT_UPDATE; |
|
/* |
* Select whether the frequency is to be controlled |
* and in which mode (PLL or FLL). Clamp to the operating |
* range. Ugly multiply/divide should be replaced someday. |
*/ |
|
if (time_status & STA_FREQHOLD || time_reftime == 0) |
time_reftime = xtime.tv_sec; |
mtemp = xtime.tv_sec - time_reftime; |
time_reftime = xtime.tv_sec; |
if (time_status & STA_FLL) { |
if (mtemp >= MINSEC) { |
ltemp = (time_offset / mtemp) << (SHIFT_USEC - |
SHIFT_UPDATE); |
if (ltemp < 0) |
time_freq -= -ltemp >> SHIFT_KH; |
else |
time_freq += ltemp >> SHIFT_KH; |
} else /* calibration interval too short (p. 12) */ |
result = TIME_ERROR; |
} else { /* PLL mode */ |
if (mtemp < MAXSEC) { |
ltemp *= mtemp; |
if (ltemp < 0) |
time_freq -= -ltemp >> (time_constant + |
time_constant + |
SHIFT_KF - SHIFT_USEC); |
else |
time_freq += ltemp >> (time_constant + |
time_constant + |
SHIFT_KF - SHIFT_USEC); |
} else /* calibration interval too long (p. 12) */ |
result = TIME_ERROR; |
} |
if (time_freq > time_tolerance) |
time_freq = time_tolerance; |
else if (time_freq < -time_tolerance) |
time_freq = -time_tolerance; |
} /* STA_PLL || STA_PPSTIME */ |
} /* txc->modes & ADJ_OFFSET */ |
if (txc->modes & ADJ_TICK) { |
/* if the quartz is off by more than 10% something is |
VERY wrong ! */ |
if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ) { |
result = -EINVAL; |
goto leave; |
} |
tick = txc->tick; |
} |
} /* txc->modes */ |
leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0 |
|| ((time_status & (STA_PPSFREQ|STA_PPSTIME)) != 0 |
&& (time_status & STA_PPSSIGNAL) == 0) |
/* p. 24, (b) */ |
|| ((time_status & (STA_PPSTIME|STA_PPSJITTER)) |
== (STA_PPSTIME|STA_PPSJITTER)) |
/* p. 24, (c) */ |
|| ((time_status & STA_PPSFREQ) != 0 |
&& (time_status & (STA_PPSWANDER|STA_PPSERROR)) != 0)) |
/* p. 24, (d) */ |
result = TIME_ERROR; |
|
if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) |
txc->offset = save_adjust; |
else { |
if (time_offset < 0) |
txc->offset = -(-time_offset >> SHIFT_UPDATE); |
else |
txc->offset = time_offset >> SHIFT_UPDATE; |
} |
txc->freq = time_freq + pps_freq; |
txc->maxerror = time_maxerror; |
txc->esterror = time_esterror; |
txc->status = time_status; |
txc->constant = time_constant; |
txc->precision = time_precision; |
txc->tolerance = time_tolerance; |
txc->tick = tick; |
txc->ppsfreq = pps_freq; |
txc->jitter = pps_jitter >> PPS_AVG; |
txc->shift = pps_shift; |
txc->stabil = pps_stabil; |
txc->jitcnt = pps_jitcnt; |
txc->calcnt = pps_calcnt; |
txc->errcnt = pps_errcnt; |
txc->stbcnt = pps_stbcnt; |
write_unlock_irq(&xtime_lock); |
do_gettimeofday(&txc->time); |
return(result); |
} |
|
asmlinkage long sys_adjtimex(struct timex *txc_p) |
{ |
struct timex txc; /* Local copy of parameter */ |
int ret; |
|
/* Copy the user data space into the kernel copy |
* structure. But bear in mind that the structures |
* may change |
*/ |
if(copy_from_user(&txc, txc_p, sizeof(struct timex))) |
return -EFAULT; |
ret = do_adjtimex(&txc); |
return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; |
} |
/dma.c
0,0 → 1,128
/* $Id: dma.c,v 1.1.1.1 2004-04-15 02:30:21 phoenix Exp $ |
* linux/kernel/dma.c: A DMA channel allocator. Inspired by linux/kernel/irq.c. |
* |
* Written by Hennus Bergman, 1992. |
* |
* 1994/12/26: Changes by Alex Nash to fix a minor bug in /proc/dma. |
* In the previous version the reported device could end up being wrong, |
* if a device requested a DMA channel that was already in use. |
* [It also happened to remove the sizeof(char *) == sizeof(int) |
* assumption introduced because of those /proc/dma patches. -- Hennus] |
*/ |
|
#include <linux/kernel.h> |
#include <linux/errno.h> |
#include <linux/spinlock.h> |
#include <linux/string.h> |
#include <asm/dma.h> |
#include <asm/system.h> |
|
|
|
/* A note on resource allocation: |
* |
* All drivers needing DMA channels, should allocate and release them |
* through the public routines `request_dma()' and `free_dma()'. |
* |
* In order to avoid problems, all processes should allocate resources in |
* the same sequence and release them in the reverse order. |
* |
* So, when allocating DMAs and IRQs, first allocate the IRQ, then the DMA. |
* When releasing them, first release the DMA, then release the IRQ. |
* If you don't, you may cause allocation requests to fail unnecessarily. |
* This doesn't really matter now, but it will once we get real semaphores |
* in the kernel. |
*/ |
|
|
spinlock_t dma_spin_lock = SPIN_LOCK_UNLOCKED; |
|
/* |
* If our port doesn't define this it has no PC like DMA |
*/ |
|
#ifdef MAX_DMA_CHANNELS |
|
|
/* Channel n is busy iff dma_chan_busy[n].lock != 0. |
* DMA0 used to be reserved for DRAM refresh, but apparently not any more... |
* DMA4 is reserved for cascading. |
*/ |
|
struct dma_chan { |
int lock; |
const char *device_id; |
}; |
|
static struct dma_chan dma_chan_busy[MAX_DMA_CHANNELS] = { |
{ 0, 0 }, |
{ 0, 0 }, |
{ 0, 0 }, |
{ 0, 0 }, |
{ 1, "cascade" }, |
{ 0, 0 }, |
{ 0, 0 }, |
{ 0, 0 } |
}; |
|
int get_dma_list(char *buf) |
{ |
int i, len = 0; |
|
for (i = 0 ; i < MAX_DMA_CHANNELS ; i++) { |
if (dma_chan_busy[i].lock) { |
len += sprintf(buf+len, "%2d: %s\n", |
i, |
dma_chan_busy[i].device_id); |
} |
} |
return len; |
} /* get_dma_list */ |
|
|
int request_dma(unsigned int dmanr, const char * device_id) |
{ |
if (dmanr >= MAX_DMA_CHANNELS) |
return -EINVAL; |
|
if (xchg(&dma_chan_busy[dmanr].lock, 1) != 0) |
return -EBUSY; |
|
dma_chan_busy[dmanr].device_id = device_id; |
|
/* old flag was 0, now contains 1 to indicate busy */ |
return 0; |
} /* request_dma */ |
|
|
void free_dma(unsigned int dmanr) |
{ |
if (dmanr >= MAX_DMA_CHANNELS) { |
printk("Trying to free DMA%d\n", dmanr); |
return; |
} |
|
if (xchg(&dma_chan_busy[dmanr].lock, 0) == 0) { |
printk("Trying to free free DMA%d\n", dmanr); |
return; |
} |
|
} /* free_dma */ |
|
#else |
|
int request_dma(unsigned int dmanr, const char *device_id) |
{ |
return -EINVAL; |
} |
|
void free_dma(unsigned int dmanr) |
{ |
} |
|
int get_dma_list(char *buf) |
{ |
strcpy(buf, "No DMA\n"); |
return 7; |
} |
#endif |
/fork.c
0,0 → 1,895
/* |
* linux/kernel/fork.c |
* |
* Copyright (C) 1991, 1992 Linus Torvalds |
*/ |
|
/* |
* 'fork.c' contains the help-routines for the 'fork' system call |
* (see also entry.S and others). |
* Fork is rather simple, once you get the hang of it, but the memory |
* management can be a bitch. See 'mm/memory.c': 'copy_page_range()' |
*/ |
|
#include <linux/config.h> |
#include <linux/slab.h> |
#include <linux/init.h> |
#include <linux/unistd.h> |
#include <linux/smp_lock.h> |
#include <linux/module.h> |
#include <linux/vmalloc.h> |
#include <linux/completion.h> |
#include <linux/namespace.h> |
#include <linux/personality.h> |
#include <linux/compiler.h> |
|
#include <asm/pgtable.h> |
#include <asm/pgalloc.h> |
#include <asm/uaccess.h> |
#include <asm/mmu_context.h> |
#include <asm/processor.h> |
|
/* The idle threads do not count.. */ |
int nr_threads; |
int nr_running; |
|
int max_threads; |
unsigned long total_forks; /* Handle normal Linux uptimes. */ |
int last_pid; |
|
struct task_struct *pidhash[PIDHASH_SZ]; |
|
void add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait) |
{ |
unsigned long flags; |
|
wait->flags &= ~WQ_FLAG_EXCLUSIVE; |
wq_write_lock_irqsave(&q->lock, flags); |
__add_wait_queue(q, wait); |
wq_write_unlock_irqrestore(&q->lock, flags); |
} |
|
void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait) |
{ |
unsigned long flags; |
|
wait->flags |= WQ_FLAG_EXCLUSIVE; |
wq_write_lock_irqsave(&q->lock, flags); |
__add_wait_queue_tail(q, wait); |
wq_write_unlock_irqrestore(&q->lock, flags); |
} |
|
void remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait) |
{ |
unsigned long flags; |
|
wq_write_lock_irqsave(&q->lock, flags); |
__remove_wait_queue(q, wait); |
wq_write_unlock_irqrestore(&q->lock, flags); |
} |
|
void __init fork_init(unsigned long mempages) |
{ |
/* |
* The default maximum number of threads is set to a safe |
* value: the thread structures can take up at most half |
* of memory. |
*/ |
max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 8; |
|
init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; |
init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2; |
} |
|
/* Protects next_safe and last_pid. */ |
spinlock_t lastpid_lock = SPIN_LOCK_UNLOCKED; |
|
static int get_pid(unsigned long flags) |
{ |
static int next_safe = PID_MAX; |
struct task_struct *p; |
int pid, beginpid; |
|
if (flags & CLONE_PID) |
return current->pid; |
|
spin_lock(&lastpid_lock); |
beginpid = last_pid; |
if((++last_pid) & 0xffff8000) { |
last_pid = 300; /* Skip daemons etc. */ |
goto inside; |
} |
if(last_pid >= next_safe) { |
inside: |
next_safe = PID_MAX; |
read_lock(&tasklist_lock); |
repeat: |
for_each_task(p) { |
if(p->pid == last_pid || |
p->pgrp == last_pid || |
p->tgid == last_pid || |
p->session == last_pid) { |
if(++last_pid >= next_safe) { |
if(last_pid & 0xffff8000) |
last_pid = 300; |
next_safe = PID_MAX; |
} |
if(unlikely(last_pid == beginpid)) { |
next_safe = 0; |
goto nomorepids; |
} |
goto repeat; |
} |
if(p->pid > last_pid && next_safe > p->pid) |
next_safe = p->pid; |
if(p->pgrp > last_pid && next_safe > p->pgrp) |
next_safe = p->pgrp; |
if(p->tgid > last_pid && next_safe > p->tgid) |
next_safe = p->tgid; |
if(p->session > last_pid && next_safe > p->session) |
next_safe = p->session; |
} |
read_unlock(&tasklist_lock); |
} |
pid = last_pid; |
spin_unlock(&lastpid_lock); |
|
return pid; |
|
nomorepids: |
read_unlock(&tasklist_lock); |
spin_unlock(&lastpid_lock); |
return 0; |
} |
|
static inline int dup_mmap(struct mm_struct * mm) |
{ |
struct vm_area_struct * mpnt, *tmp, **pprev; |
int retval; |
|
flush_cache_mm(current->mm); |
mm->locked_vm = 0; |
mm->mmap = NULL; |
mm->mmap_cache = NULL; |
mm->map_count = 0; |
mm->rss = 0; |
mm->cpu_vm_mask = 0; |
mm->swap_address = 0; |
pprev = &mm->mmap; |
|
/* |
* Add it to the mmlist after the parent. |
* Doing it this way means that we can order the list, |
* and fork() won't mess up the ordering significantly. |
* Add it first so that swapoff can see any swap entries. |
*/ |
spin_lock(&mmlist_lock); |
list_add(&mm->mmlist, ¤t->mm->mmlist); |
mmlist_nr++; |
spin_unlock(&mmlist_lock); |
|
for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) { |
struct file *file; |
|
retval = -ENOMEM; |
if(mpnt->vm_flags & VM_DONTCOPY) |
continue; |
tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); |
if (!tmp) |
goto fail_nomem; |
*tmp = *mpnt; |
tmp->vm_flags &= ~VM_LOCKED; |
tmp->vm_mm = mm; |
tmp->vm_next = NULL; |
file = tmp->vm_file; |
if (file) { |
struct inode *inode = file->f_dentry->d_inode; |
get_file(file); |
if (tmp->vm_flags & VM_DENYWRITE) |
atomic_dec(&inode->i_writecount); |
|
/* insert tmp into the share list, just after mpnt */ |
spin_lock(&inode->i_mapping->i_shared_lock); |
if((tmp->vm_next_share = mpnt->vm_next_share) != NULL) |
mpnt->vm_next_share->vm_pprev_share = |
&tmp->vm_next_share; |
mpnt->vm_next_share = tmp; |
tmp->vm_pprev_share = &mpnt->vm_next_share; |
spin_unlock(&inode->i_mapping->i_shared_lock); |
} |
|
/* |
* Link in the new vma and copy the page table entries: |
* link in first so that swapoff can see swap entries. |
*/ |
spin_lock(&mm->page_table_lock); |
*pprev = tmp; |
pprev = &tmp->vm_next; |
mm->map_count++; |
retval = copy_page_range(mm, current->mm, tmp); |
spin_unlock(&mm->page_table_lock); |
|
if (tmp->vm_ops && tmp->vm_ops->open) |
tmp->vm_ops->open(tmp); |
|
if (retval) |
goto fail_nomem; |
} |
retval = 0; |
build_mmap_rb(mm); |
|
fail_nomem: |
flush_tlb_mm(current->mm); |
return retval; |
} |
|
spinlock_t mmlist_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; |
int mmlist_nr; |
|
#define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL)) |
#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) |
|
static struct mm_struct * mm_init(struct mm_struct * mm) |
{ |
atomic_set(&mm->mm_users, 1); |
atomic_set(&mm->mm_count, 1); |
init_rwsem(&mm->mmap_sem); |
mm->page_table_lock = SPIN_LOCK_UNLOCKED; |
mm->pgd = pgd_alloc(mm); |
mm->def_flags = 0; |
if (mm->pgd) |
return mm; |
free_mm(mm); |
return NULL; |
} |
|
|
/* |
* Allocate and initialize an mm_struct. |
*/ |
struct mm_struct * mm_alloc(void) |
{ |
struct mm_struct * mm; |
|
mm = allocate_mm(); |
if (mm) { |
memset(mm, 0, sizeof(*mm)); |
return mm_init(mm); |
} |
return NULL; |
} |
|
/* |
* Called when the last reference to the mm |
* is dropped: either by a lazy thread or by |
* mmput. Free the page directory and the mm. |
*/ |
inline void __mmdrop(struct mm_struct *mm) |
{ |
BUG_ON(mm == &init_mm); |
pgd_free(mm->pgd); |
check_pgt_cache(); |
destroy_context(mm); |
free_mm(mm); |
} |
|
/* |
* Decrement the use count and release all resources for an mm. |
*/ |
void mmput(struct mm_struct *mm) |
{ |
if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) { |
extern struct mm_struct *swap_mm; |
if (swap_mm == mm) |
swap_mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist); |
list_del(&mm->mmlist); |
mmlist_nr--; |
spin_unlock(&mmlist_lock); |
exit_mmap(mm); |
mmdrop(mm); |
} |
} |
|
/* Please note the differences between mmput and mm_release. |
* mmput is called whenever we stop holding onto a mm_struct, |
* error success whatever. |
* |
* mm_release is called after a mm_struct has been removed |
* from the current process. |
* |
* This difference is important for error handling, when we |
* only half set up a mm_struct for a new process and need to restore |
* the old one. Because we mmput the new mm_struct before |
* restoring the old one. . . |
* Eric Biederman 10 January 1998 |
*/ |
void mm_release(void) |
{ |
struct task_struct *tsk = current; |
struct completion *vfork_done = tsk->vfork_done; |
|
/* notify parent sleeping on vfork() */ |
if (vfork_done) { |
tsk->vfork_done = NULL; |
complete(vfork_done); |
} |
} |
|
static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) |
{ |
struct mm_struct * mm, *oldmm; |
int retval; |
|
tsk->min_flt = tsk->maj_flt = 0; |
tsk->cmin_flt = tsk->cmaj_flt = 0; |
tsk->nswap = tsk->cnswap = 0; |
|
tsk->mm = NULL; |
tsk->active_mm = NULL; |
|
/* |
* Are we cloning a kernel thread? |
* |
* We need to steal a active VM for that.. |
*/ |
oldmm = current->mm; |
if (!oldmm) |
return 0; |
|
if (clone_flags & CLONE_VM) { |
atomic_inc(&oldmm->mm_users); |
mm = oldmm; |
goto good_mm; |
} |
|
retval = -ENOMEM; |
mm = allocate_mm(); |
if (!mm) |
goto fail_nomem; |
|
/* Copy the current MM stuff.. */ |
memcpy(mm, oldmm, sizeof(*mm)); |
if (!mm_init(mm)) |
goto fail_nomem; |
|
if (init_new_context(tsk,mm)) |
goto free_pt; |
|
down_write(&oldmm->mmap_sem); |
retval = dup_mmap(mm); |
up_write(&oldmm->mmap_sem); |
|
if (retval) |
goto free_pt; |
|
/* |
* child gets a private LDT (if there was an LDT in the parent) |
*/ |
copy_segments(tsk, mm); |
|
good_mm: |
tsk->mm = mm; |
tsk->active_mm = mm; |
return 0; |
|
free_pt: |
mmput(mm); |
fail_nomem: |
return retval; |
} |
|
static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) |
{ |
struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); |
/* We don't need to lock fs - think why ;-) */ |
if (fs) { |
atomic_set(&fs->count, 1); |
fs->lock = RW_LOCK_UNLOCKED; |
fs->umask = old->umask; |
read_lock(&old->lock); |
fs->rootmnt = mntget(old->rootmnt); |
fs->root = dget(old->root); |
fs->pwdmnt = mntget(old->pwdmnt); |
fs->pwd = dget(old->pwd); |
if (old->altroot) { |
fs->altrootmnt = mntget(old->altrootmnt); |
fs->altroot = dget(old->altroot); |
} else { |
fs->altrootmnt = NULL; |
fs->altroot = NULL; |
} |
read_unlock(&old->lock); |
} |
return fs; |
} |
|
struct fs_struct *copy_fs_struct(struct fs_struct *old) |
{ |
return __copy_fs_struct(old); |
} |
|
static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk) |
{ |
if (clone_flags & CLONE_FS) { |
atomic_inc(¤t->fs->count); |
return 0; |
} |
tsk->fs = __copy_fs_struct(current->fs); |
if (!tsk->fs) |
return -1; |
return 0; |
} |
|
static int count_open_files(struct files_struct *files, int size) |
{ |
int i; |
|
/* Find the last open fd */ |
for (i = size/(8*sizeof(long)); i > 0; ) { |
if (files->open_fds->fds_bits[--i]) |
break; |
} |
i = (i+1) * 8 * sizeof(long); |
return i; |
} |
|
static int copy_files(unsigned long clone_flags, struct task_struct * tsk) |
{ |
struct files_struct *oldf, *newf; |
struct file **old_fds, **new_fds; |
int open_files, nfds, size, i, error = 0; |
|
/* |
* A background process may not have any files ... |
*/ |
oldf = current->files; |
if (!oldf) |
goto out; |
|
if (clone_flags & CLONE_FILES) { |
atomic_inc(&oldf->count); |
goto out; |
} |
|
/* |
* Note: we may be using current for both targets (See exec.c) |
* This works because we cache current->files (old) as oldf. Don't |
* break this. |
*/ |
tsk->files = NULL; |
error = -ENOMEM; |
newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL); |
if (!newf) |
goto out; |
|
atomic_set(&newf->count, 1); |
|
newf->file_lock = RW_LOCK_UNLOCKED; |
newf->next_fd = 0; |
newf->max_fds = NR_OPEN_DEFAULT; |
newf->max_fdset = __FD_SETSIZE; |
newf->close_on_exec = &newf->close_on_exec_init; |
newf->open_fds = &newf->open_fds_init; |
newf->fd = &newf->fd_array[0]; |
|
/* We don't yet have the oldf readlock, but even if the old |
fdset gets grown now, we'll only copy up to "size" fds */ |
size = oldf->max_fdset; |
if (size > __FD_SETSIZE) { |
newf->max_fdset = 0; |
write_lock(&newf->file_lock); |
error = expand_fdset(newf, size-1); |
write_unlock(&newf->file_lock); |
if (error) |
goto out_release; |
} |
read_lock(&oldf->file_lock); |
|
open_files = count_open_files(oldf, size); |
|
/* |
* Check whether we need to allocate a larger fd array. |
* Note: we're not a clone task, so the open count won't |
* change. |
*/ |
nfds = NR_OPEN_DEFAULT; |
if (open_files > nfds) { |
read_unlock(&oldf->file_lock); |
newf->max_fds = 0; |
write_lock(&newf->file_lock); |
error = expand_fd_array(newf, open_files-1); |
write_unlock(&newf->file_lock); |
if (error) |
goto out_release; |
nfds = newf->max_fds; |
read_lock(&oldf->file_lock); |
} |
|
old_fds = oldf->fd; |
new_fds = newf->fd; |
|
memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8); |
memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8); |
|
for (i = open_files; i != 0; i--) { |
struct file *f = *old_fds++; |
if (f) |
get_file(f); |
*new_fds++ = f; |
} |
read_unlock(&oldf->file_lock); |
|
/* compute the remainder to be cleared */ |
size = (newf->max_fds - open_files) * sizeof(struct file *); |
|
/* This is long word aligned thus could use a optimized version */ |
memset(new_fds, 0, size); |
|
if (newf->max_fdset > open_files) { |
int left = (newf->max_fdset-open_files)/8; |
int start = open_files / (8 * sizeof(unsigned long)); |
|
memset(&newf->open_fds->fds_bits[start], 0, left); |
memset(&newf->close_on_exec->fds_bits[start], 0, left); |
} |
|
tsk->files = newf; |
error = 0; |
out: |
return error; |
|
out_release: |
free_fdset (newf->close_on_exec, newf->max_fdset); |
free_fdset (newf->open_fds, newf->max_fdset); |
kmem_cache_free(files_cachep, newf); |
goto out; |
} |
|
/* |
* Helper to unshare the files of the current task. |
* We don't want to expose copy_files internals to |
* the exec layer of the kernel. |
*/ |
|
int unshare_files(void) |
{ |
struct files_struct *files = current->files; |
int rc; |
|
if(!files) |
BUG(); |
|
/* This can race but the race causes us to copy when we don't |
need to and drop the copy */ |
if(atomic_read(&files->count) == 1) |
{ |
atomic_inc(&files->count); |
return 0; |
} |
rc = copy_files(0, current); |
if(rc) |
current->files = files; |
return rc; |
} |
|
static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk) |
{ |
struct signal_struct *sig; |
|
if (clone_flags & CLONE_SIGHAND) { |
atomic_inc(¤t->sig->count); |
return 0; |
} |
sig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL); |
tsk->sig = sig; |
if (!sig) |
return -1; |
spin_lock_init(&sig->siglock); |
atomic_set(&sig->count, 1); |
memcpy(tsk->sig->action, current->sig->action, sizeof(tsk->sig->action)); |
return 0; |
} |
|
static inline void copy_flags(unsigned long clone_flags, struct task_struct *p) |
{ |
unsigned long new_flags = p->flags; |
|
new_flags &= ~(PF_SUPERPRIV | PF_USEDFPU); |
new_flags |= PF_FORKNOEXEC; |
if (!(clone_flags & CLONE_PTRACE)) |
p->ptrace = 0; |
p->flags = new_flags; |
} |
|
long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) |
{ |
struct task_struct *task = current; |
unsigned old_task_dumpable; |
long ret; |
|
/* lock out any potential ptracer */ |
task_lock(task); |
if (task->ptrace) { |
task_unlock(task); |
return -EPERM; |
} |
|
old_task_dumpable = task->task_dumpable; |
task->task_dumpable = 0; |
task_unlock(task); |
|
ret = arch_kernel_thread(fn, arg, flags); |
|
/* never reached in child process, only in parent */ |
current->task_dumpable = old_task_dumpable; |
|
return ret; |
} |
|
/* |
* Ok, this is the main fork-routine. It copies the system process |
* information (task[nr]) and sets up the necessary registers. It also |
* copies the data segment in its entirety. The "stack_start" and |
* "stack_top" arguments are simply passed along to the platform |
* specific copy_thread() routine. Most platforms ignore stack_top. |
* For an example that's using stack_top, see |
* arch/ia64/kernel/process.c. |
*/ |
int do_fork(unsigned long clone_flags, unsigned long stack_start, |
struct pt_regs *regs, unsigned long stack_size) |
{ |
int retval; |
struct task_struct *p; |
struct completion vfork; |
|
if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) |
return -EINVAL; |
|
retval = -EPERM; |
|
/* |
* CLONE_PID is only allowed for the initial SMP swapper |
* calls |
*/ |
if (clone_flags & CLONE_PID) { |
if (current->pid) |
goto fork_out; |
} |
|
retval = -ENOMEM; |
p = alloc_task_struct(); |
if (!p) |
goto fork_out; |
|
*p = *current; |
|
retval = -EAGAIN; |
/* |
* Check if we are over our maximum process limit, but be sure to |
* exclude root. This is needed to make it possible for login and |
* friends to set the per-user process limit to something lower |
* than the amount of processes root is running. -- Rik |
*/ |
if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur |
&& p->user != &root_user |
&& !capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE)) |
goto bad_fork_free; |
|
atomic_inc(&p->user->__count); |
atomic_inc(&p->user->processes); |
|
/* |
* Counter increases are protected by |
* the kernel lock so nr_threads can't |
* increase under us (but it may decrease). |
*/ |
if (nr_threads >= max_threads) |
goto bad_fork_cleanup_count; |
|
get_exec_domain(p->exec_domain); |
|
if (p->binfmt && p->binfmt->module) |
__MOD_INC_USE_COUNT(p->binfmt->module); |
|
p->did_exec = 0; |
p->swappable = 0; |
p->state = TASK_UNINTERRUPTIBLE; |
|
copy_flags(clone_flags, p); |
p->pid = get_pid(clone_flags); |
if (p->pid == 0 && current->pid != 0) |
goto bad_fork_cleanup; |
|
p->run_list.next = NULL; |
p->run_list.prev = NULL; |
|
p->p_cptr = NULL; |
init_waitqueue_head(&p->wait_chldexit); |
p->vfork_done = NULL; |
if (clone_flags & CLONE_VFORK) { |
p->vfork_done = &vfork; |
init_completion(&vfork); |
} |
spin_lock_init(&p->alloc_lock); |
|
p->sigpending = 0; |
init_sigpending(&p->pending); |
|
p->it_real_value = p->it_virt_value = p->it_prof_value = 0; |
p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0; |
init_timer(&p->real_timer); |
p->real_timer.data = (unsigned long) p; |
|
p->leader = 0; /* session leadership doesn't inherit */ |
p->tty_old_pgrp = 0; |
p->times.tms_utime = p->times.tms_stime = 0; |
p->times.tms_cutime = p->times.tms_cstime = 0; |
#ifdef CONFIG_SMP |
{ |
int i; |
p->cpus_runnable = ~0UL; |
p->processor = current->processor; |
/* ?? should we just memset this ?? */ |
for(i = 0; i < smp_num_cpus; i++) |
p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0; |
spin_lock_init(&p->sigmask_lock); |
} |
#endif |
p->lock_depth = -1; /* -1 = no lock */ |
p->start_time = jiffies; |
|
INIT_LIST_HEAD(&p->local_pages); |
|
retval = -ENOMEM; |
/* copy all the process information */ |
if (copy_files(clone_flags, p)) |
goto bad_fork_cleanup; |
if (copy_fs(clone_flags, p)) |
goto bad_fork_cleanup_files; |
if (copy_sighand(clone_flags, p)) |
goto bad_fork_cleanup_fs; |
if (copy_mm(clone_flags, p)) |
goto bad_fork_cleanup_sighand; |
retval = copy_namespace(clone_flags, p); |
if (retval) |
goto bad_fork_cleanup_mm; |
retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); |
if (retval) |
goto bad_fork_cleanup_namespace; |
p->semundo = NULL; |
|
/* Our parent execution domain becomes current domain |
These must match for thread signalling to apply */ |
|
p->parent_exec_id = p->self_exec_id; |
|
/* ok, now we should be set up.. */ |
p->swappable = 1; |
p->exit_signal = clone_flags & CSIGNAL; |
p->pdeath_signal = 0; |
|
/* |
* "share" dynamic priority between parent and child, thus the |
* total amount of dynamic priorities in the system doesn't change, |
* more scheduling fairness. This is only important in the first |
* timeslice, on the long run the scheduling behaviour is unchanged. |
*/ |
p->counter = (current->counter + 1) >> 1; |
current->counter >>= 1; |
if (!current->counter) |
current->need_resched = 1; |
|
/* |
* Ok, add it to the run-queues and make it |
* visible to the rest of the system. |
* |
* Let it rip! |
*/ |
retval = p->pid; |
p->tgid = retval; |
INIT_LIST_HEAD(&p->thread_group); |
|
/* Need tasklist lock for parent etc handling! */ |
write_lock_irq(&tasklist_lock); |
|
/* CLONE_PARENT re-uses the old parent */ |
p->p_opptr = current->p_opptr; |
p->p_pptr = current->p_pptr; |
if (!(clone_flags & CLONE_PARENT)) { |
p->p_opptr = current; |
if (!(p->ptrace & PT_PTRACED)) |
p->p_pptr = current; |
} |
|
if (clone_flags & CLONE_THREAD) { |
p->tgid = current->tgid; |
list_add(&p->thread_group, ¤t->thread_group); |
} |
|
SET_LINKS(p); |
hash_pid(p); |
nr_threads++; |
write_unlock_irq(&tasklist_lock); |
|
if (p->ptrace & PT_PTRACED) |
send_sig(SIGSTOP, p, 1); |
|
wake_up_process(p); /* do this last */ |
++total_forks; |
if (clone_flags & CLONE_VFORK) |
wait_for_completion(&vfork); |
|
fork_out: |
return retval; |
|
bad_fork_cleanup_namespace: |
exit_namespace(p); |
bad_fork_cleanup_mm: |
exit_mm(p); |
if (p->active_mm) |
mmdrop(p->active_mm); |
bad_fork_cleanup_sighand: |
exit_sighand(p); |
bad_fork_cleanup_fs: |
exit_fs(p); /* blocking */ |
bad_fork_cleanup_files: |
exit_files(p); /* blocking */ |
bad_fork_cleanup: |
put_exec_domain(p->exec_domain); |
if (p->binfmt && p->binfmt->module) |
__MOD_DEC_USE_COUNT(p->binfmt->module); |
bad_fork_cleanup_count: |
atomic_dec(&p->user->processes); |
free_uid(p->user); |
bad_fork_free: |
free_task_struct(p); |
goto fork_out; |
} |
|
/* SLAB cache for signal_struct structures (tsk->sig) */ |
kmem_cache_t *sigact_cachep; |
|
/* SLAB cache for files_struct structures (tsk->files) */ |
kmem_cache_t *files_cachep; |
|
/* SLAB cache for fs_struct structures (tsk->fs) */ |
kmem_cache_t *fs_cachep; |
|
/* SLAB cache for vm_area_struct structures */ |
kmem_cache_t *vm_area_cachep; |
|
/* SLAB cache for mm_struct structures (tsk->mm) */ |
kmem_cache_t *mm_cachep; |
|
void __init proc_caches_init(void) |
{ |
sigact_cachep = kmem_cache_create("signal_act", |
sizeof(struct signal_struct), 0, |
SLAB_HWCACHE_ALIGN, NULL, NULL); |
if (!sigact_cachep) |
panic("Cannot create signal action SLAB cache"); |
|
files_cachep = kmem_cache_create("files_cache", |
sizeof(struct files_struct), 0, |
SLAB_HWCACHE_ALIGN, NULL, NULL); |
if (!files_cachep) |
panic("Cannot create files SLAB cache"); |
|
fs_cachep = kmem_cache_create("fs_cache", |
sizeof(struct fs_struct), 0, |
SLAB_HWCACHE_ALIGN, NULL, NULL); |
if (!fs_cachep) |
panic("Cannot create fs_struct SLAB cache"); |
|
vm_area_cachep = kmem_cache_create("vm_area_struct", |
sizeof(struct vm_area_struct), 0, |
SLAB_HWCACHE_ALIGN, NULL, NULL); |
if(!vm_area_cachep) |
panic("vma_init: Cannot alloc vm_area_struct SLAB cache"); |
|
mm_cachep = kmem_cache_create("mm_struct", |
sizeof(struct mm_struct), 0, |
SLAB_HWCACHE_ALIGN, NULL, NULL); |
if(!mm_cachep) |
panic("vma_init: Cannot alloc mm_struct SLAB cache"); |
} |
/ksyms.c
0,0 → 1,624
/* |
* Herein lies all the functions/variables that are "exported" for linkage |
* with dynamically loaded kernel modules. |
* Jon. |
* |
* - Stacked module support and unified symbol table added (June 1994) |
* - External symbol table support added (December 1994) |
* - Versions on symbols added (December 1994) |
* by Bjorn Ekwall <bj0rn@blox.se> |
*/ |
|
#include <linux/config.h> |
#include <linux/slab.h> |
#include <linux/module.h> |
#include <linux/blkdev.h> |
#include <linux/cdrom.h> |
#include <linux/kernel_stat.h> |
#include <linux/vmalloc.h> |
#include <linux/sys.h> |
#include <linux/utsname.h> |
#include <linux/interrupt.h> |
#include <linux/ioport.h> |
#include <linux/serial.h> |
#include <linux/locks.h> |
#include <linux/delay.h> |
#include <linux/random.h> |
#include <linux/reboot.h> |
#include <linux/pagemap.h> |
#include <linux/sysctl.h> |
#include <linux/hdreg.h> |
#include <linux/skbuff.h> |
#include <linux/genhd.h> |
#include <linux/blkpg.h> |
#include <linux/swap.h> |
#include <linux/ctype.h> |
#include <linux/file.h> |
#include <linux/iobuf.h> |
#include <linux/console.h> |
#include <linux/poll.h> |
#include <linux/mmzone.h> |
#include <linux/mm.h> |
#include <linux/capability.h> |
#include <linux/highuid.h> |
#include <linux/brlock.h> |
#include <linux/fs.h> |
#include <linux/tty.h> |
#include <linux/in6.h> |
#include <linux/completion.h> |
#include <linux/seq_file.h> |
#include <linux/dnotify.h> |
#include <linux/crc32.h> |
#include <linux/firmware.h> |
#include <asm/checksum.h> |
|
#if defined(CONFIG_PROC_FS) |
#include <linux/proc_fs.h> |
#endif |
#ifdef CONFIG_KMOD |
#include <linux/kmod.h> |
#endif |
|
extern void set_device_ro(kdev_t dev,int flag); |
|
extern void *sys_call_table; |
|
extern struct timezone sys_tz; |
extern int request_dma(unsigned int dmanr, char * deviceID); |
extern void free_dma(unsigned int dmanr); |
extern spinlock_t dma_spin_lock; |
extern int panic_timeout; |
|
#ifdef CONFIG_MODVERSIONS |
const struct module_symbol __export_Using_Versions |
__attribute__((section("__ksymtab"))) = { |
1 /* Version version */, "Using_Versions" |
}; |
#endif |
|
|
EXPORT_SYMBOL(inter_module_register); |
EXPORT_SYMBOL(inter_module_unregister); |
EXPORT_SYMBOL(inter_module_get); |
EXPORT_SYMBOL(inter_module_get_request); |
EXPORT_SYMBOL(inter_module_put); |
EXPORT_SYMBOL(try_inc_mod_count); |
|
/* process memory management */ |
EXPORT_SYMBOL(do_mmap_pgoff); |
EXPORT_SYMBOL(do_munmap); |
EXPORT_SYMBOL(do_brk); |
EXPORT_SYMBOL(exit_mm); |
EXPORT_SYMBOL(exit_files); |
EXPORT_SYMBOL(exit_fs); |
EXPORT_SYMBOL(exit_sighand); |
|
/* internal kernel memory management */ |
EXPORT_SYMBOL(_alloc_pages); |
EXPORT_SYMBOL(__alloc_pages); |
EXPORT_SYMBOL(alloc_pages_node); |
EXPORT_SYMBOL(__get_free_pages); |
EXPORT_SYMBOL(get_zeroed_page); |
EXPORT_SYMBOL(__free_pages); |
EXPORT_SYMBOL(free_pages); |
EXPORT_SYMBOL(num_physpages); |
EXPORT_SYMBOL(kmem_find_general_cachep); |
EXPORT_SYMBOL(kmem_cache_create); |
EXPORT_SYMBOL(kmem_cache_destroy); |
EXPORT_SYMBOL(kmem_cache_shrink); |
EXPORT_SYMBOL(kmem_cache_alloc); |
EXPORT_SYMBOL(kmem_cache_free); |
EXPORT_SYMBOL(kmem_cache_size); |
EXPORT_SYMBOL(kmalloc); |
EXPORT_SYMBOL(kfree); |
EXPORT_SYMBOL(vfree); |
EXPORT_SYMBOL(__vmalloc); |
EXPORT_SYMBOL(vmap); |
EXPORT_SYMBOL(vmalloc_to_page); |
EXPORT_SYMBOL(mem_map); |
EXPORT_SYMBOL(remap_page_range); |
EXPORT_SYMBOL(max_mapnr); |
EXPORT_SYMBOL(high_memory); |
EXPORT_SYMBOL(vmtruncate); |
EXPORT_SYMBOL(find_vma); |
EXPORT_SYMBOL(get_unmapped_area); |
EXPORT_SYMBOL(init_mm); |
#ifdef CONFIG_HIGHMEM |
EXPORT_SYMBOL(kmap_high); |
EXPORT_SYMBOL(kunmap_high); |
EXPORT_SYMBOL(highmem_start_page); |
EXPORT_SYMBOL(create_bounce); |
EXPORT_SYMBOL(kmap_prot); |
EXPORT_SYMBOL(kmap_pte); |
#endif |
|
/* filesystem internal functions */ |
EXPORT_SYMBOL(def_blk_fops); |
EXPORT_SYMBOL(update_atime); |
EXPORT_SYMBOL(get_fs_type); |
EXPORT_SYMBOL(get_super); |
EXPORT_SYMBOL(drop_super); |
EXPORT_SYMBOL(getname); |
EXPORT_SYMBOL(names_cachep); |
EXPORT_SYMBOL(fput); |
EXPORT_SYMBOL(fget); |
EXPORT_SYMBOL(igrab); |
EXPORT_SYMBOL(iunique); |
EXPORT_SYMBOL(ilookup); |
EXPORT_SYMBOL(iget4_locked); |
EXPORT_SYMBOL(unlock_new_inode); |
EXPORT_SYMBOL(iput); |
EXPORT_SYMBOL(inode_init_once); |
EXPORT_SYMBOL(__inode_init_once); |
EXPORT_SYMBOL(force_delete); |
EXPORT_SYMBOL(follow_up); |
EXPORT_SYMBOL(follow_down); |
EXPORT_SYMBOL(lookup_mnt); |
EXPORT_SYMBOL(path_init); |
EXPORT_SYMBOL(path_walk); |
EXPORT_SYMBOL(path_lookup); |
EXPORT_SYMBOL(path_release); |
EXPORT_SYMBOL(__user_walk); |
EXPORT_SYMBOL(lookup_one_len); |
EXPORT_SYMBOL(lookup_hash); |
EXPORT_SYMBOL(sys_close); |
EXPORT_SYMBOL(dcache_lock); |
EXPORT_SYMBOL(d_alloc_root); |
EXPORT_SYMBOL(d_delete); |
EXPORT_SYMBOL(dget_locked); |
EXPORT_SYMBOL(d_validate); |
EXPORT_SYMBOL(d_rehash); |
EXPORT_SYMBOL(d_invalidate); /* May be it will be better in dcache.h? */ |
EXPORT_SYMBOL(d_move); |
EXPORT_SYMBOL(d_instantiate); |
EXPORT_SYMBOL(d_alloc); |
EXPORT_SYMBOL(d_lookup); |
EXPORT_SYMBOL(__d_path); |
EXPORT_SYMBOL(mark_buffer_dirty); |
EXPORT_SYMBOL(set_buffer_async_io); /* for reiserfs_writepage */ |
EXPORT_SYMBOL(end_buffer_io_async); |
EXPORT_SYMBOL(__mark_buffer_dirty); |
EXPORT_SYMBOL(__mark_inode_dirty); |
EXPORT_SYMBOL(fd_install); |
EXPORT_SYMBOL(get_empty_filp); |
EXPORT_SYMBOL(init_private_file); |
EXPORT_SYMBOL(filp_open); |
EXPORT_SYMBOL(filp_close); |
EXPORT_SYMBOL(put_filp); |
EXPORT_SYMBOL(files_lock); |
EXPORT_SYMBOL(check_disk_change); |
EXPORT_SYMBOL(__invalidate_buffers); |
EXPORT_SYMBOL(invalidate_bdev); |
EXPORT_SYMBOL(invalidate_inodes); |
EXPORT_SYMBOL(invalidate_device); |
EXPORT_SYMBOL(invalidate_inode_pages); |
EXPORT_SYMBOL(truncate_inode_pages); |
EXPORT_SYMBOL(fsync_dev); |
EXPORT_SYMBOL(fsync_no_super); |
EXPORT_SYMBOL(permission); |
EXPORT_SYMBOL(vfs_permission); |
EXPORT_SYMBOL(inode_setattr); |
EXPORT_SYMBOL(inode_change_ok); |
EXPORT_SYMBOL(write_inode_now); |
EXPORT_SYMBOL(notify_change); |
EXPORT_SYMBOL(set_blocksize); |
EXPORT_SYMBOL(sb_set_blocksize); |
EXPORT_SYMBOL(sb_min_blocksize); |
EXPORT_SYMBOL(getblk); |
EXPORT_SYMBOL(cdget); |
EXPORT_SYMBOL(cdput); |
EXPORT_SYMBOL(bdget); |
EXPORT_SYMBOL(bdput); |
EXPORT_SYMBOL(bread); |
EXPORT_SYMBOL(__brelse); |
EXPORT_SYMBOL(__bforget); |
EXPORT_SYMBOL(ll_rw_block); |
EXPORT_SYMBOL(submit_bh); |
EXPORT_SYMBOL(unlock_buffer); |
EXPORT_SYMBOL(__wait_on_buffer); |
EXPORT_SYMBOL(___wait_on_page); |
EXPORT_SYMBOL(generic_direct_IO); |
EXPORT_SYMBOL(discard_bh_page); |
EXPORT_SYMBOL(block_write_full_page); |
EXPORT_SYMBOL(block_read_full_page); |
EXPORT_SYMBOL(block_prepare_write); |
EXPORT_SYMBOL(block_sync_page); |
EXPORT_SYMBOL(generic_cont_expand); |
EXPORT_SYMBOL(cont_prepare_write); |
EXPORT_SYMBOL(generic_commit_write); |
EXPORT_SYMBOL(block_truncate_page); |
EXPORT_SYMBOL(generic_block_bmap); |
EXPORT_SYMBOL(generic_file_read); |
EXPORT_SYMBOL(do_generic_file_read); |
EXPORT_SYMBOL(do_generic_file_write); |
EXPORT_SYMBOL(do_generic_direct_read); |
EXPORT_SYMBOL(do_generic_direct_write); |
EXPORT_SYMBOL(generic_file_write); |
EXPORT_SYMBOL(generic_file_mmap); |
EXPORT_SYMBOL(generic_ro_fops); |
EXPORT_SYMBOL(generic_buffer_fdatasync); |
EXPORT_SYMBOL(page_hash_bits); |
EXPORT_SYMBOL(page_hash_table); |
EXPORT_SYMBOL(file_lock_list); |
EXPORT_SYMBOL(locks_init_lock); |
EXPORT_SYMBOL(locks_copy_lock); |
EXPORT_SYMBOL(posix_lock_file); |
EXPORT_SYMBOL(posix_test_lock); |
EXPORT_SYMBOL(posix_block_lock); |
EXPORT_SYMBOL(posix_unblock_lock); |
EXPORT_SYMBOL(posix_locks_deadlock); |
EXPORT_SYMBOL(locks_mandatory_area); |
EXPORT_SYMBOL(dput); |
EXPORT_SYMBOL(have_submounts); |
EXPORT_SYMBOL(d_find_alias); |
EXPORT_SYMBOL(d_prune_aliases); |
EXPORT_SYMBOL(prune_dcache); |
EXPORT_SYMBOL(shrink_dcache_sb); |
EXPORT_SYMBOL(shrink_dcache_parent); |
EXPORT_SYMBOL(find_inode_number); |
EXPORT_SYMBOL(is_subdir); |
EXPORT_SYMBOL(get_unused_fd); |
EXPORT_SYMBOL(put_unused_fd); |
EXPORT_SYMBOL(vfs_create); |
EXPORT_SYMBOL(vfs_mkdir); |
EXPORT_SYMBOL(vfs_mknod); |
EXPORT_SYMBOL(vfs_symlink); |
EXPORT_SYMBOL(vfs_link); |
EXPORT_SYMBOL(vfs_rmdir); |
EXPORT_SYMBOL(vfs_unlink); |
EXPORT_SYMBOL(vfs_rename); |
EXPORT_SYMBOL(vfs_statfs); |
EXPORT_SYMBOL(generic_read_dir); |
EXPORT_SYMBOL(generic_file_llseek); |
EXPORT_SYMBOL(no_llseek); |
EXPORT_SYMBOL(__pollwait); |
EXPORT_SYMBOL(poll_freewait); |
EXPORT_SYMBOL(ROOT_DEV); |
EXPORT_SYMBOL(__find_get_page); |
EXPORT_SYMBOL(__find_lock_page); |
EXPORT_SYMBOL(find_trylock_page); |
EXPORT_SYMBOL(find_or_create_page); |
EXPORT_SYMBOL(grab_cache_page_nowait); |
EXPORT_SYMBOL(read_cache_page); |
EXPORT_SYMBOL(set_page_dirty); |
EXPORT_SYMBOL(mark_page_accessed); |
EXPORT_SYMBOL(vfs_readlink); |
EXPORT_SYMBOL(vfs_follow_link); |
EXPORT_SYMBOL(page_readlink); |
EXPORT_SYMBOL(page_follow_link); |
EXPORT_SYMBOL(page_symlink_inode_operations); |
EXPORT_SYMBOL(block_symlink); |
EXPORT_SYMBOL(vfs_readdir); |
EXPORT_SYMBOL(__get_lease); |
EXPORT_SYMBOL(lease_get_mtime); |
EXPORT_SYMBOL(lock_may_read); |
EXPORT_SYMBOL(lock_may_write); |
EXPORT_SYMBOL(dcache_dir_open); |
EXPORT_SYMBOL(dcache_dir_close); |
EXPORT_SYMBOL(dcache_dir_lseek); |
EXPORT_SYMBOL(dcache_dir_fsync); |
EXPORT_SYMBOL(dcache_readdir); |
EXPORT_SYMBOL(dcache_dir_ops); |
|
/* for stackable file systems (lofs, wrapfs, cryptfs, etc.) */ |
EXPORT_SYMBOL(default_llseek); |
EXPORT_SYMBOL(dentry_open); |
EXPORT_SYMBOL(filemap_nopage); |
EXPORT_SYMBOL(filemap_sync); |
EXPORT_SYMBOL(filemap_fdatawrite); |
EXPORT_SYMBOL(filemap_fdatasync); |
EXPORT_SYMBOL(filemap_fdatawait); |
EXPORT_SYMBOL(lock_page); |
EXPORT_SYMBOL(unlock_page); |
EXPORT_SYMBOL(wakeup_page_waiters); |
|
/* device registration */ |
EXPORT_SYMBOL(register_chrdev); |
EXPORT_SYMBOL(unregister_chrdev); |
EXPORT_SYMBOL(register_blkdev); |
EXPORT_SYMBOL(unregister_blkdev); |
EXPORT_SYMBOL(tty_register_driver); |
EXPORT_SYMBOL(tty_unregister_driver); |
EXPORT_SYMBOL(tty_std_termios); |
|
/* block device driver support */ |
EXPORT_SYMBOL(blksize_size); |
EXPORT_SYMBOL(hardsect_size); |
EXPORT_SYMBOL(blk_size); |
EXPORT_SYMBOL(blk_dev); |
EXPORT_SYMBOL(is_read_only); |
EXPORT_SYMBOL(set_device_ro); |
EXPORT_SYMBOL(bmap); |
EXPORT_SYMBOL(sync_dev); |
EXPORT_SYMBOL(devfs_register_partitions); |
EXPORT_SYMBOL(blkdev_open); |
EXPORT_SYMBOL(blkdev_get); |
EXPORT_SYMBOL(blkdev_put); |
EXPORT_SYMBOL(ioctl_by_bdev); |
EXPORT_SYMBOL(grok_partitions); |
EXPORT_SYMBOL(register_disk); |
EXPORT_SYMBOL(tq_disk); |
EXPORT_SYMBOL(init_buffer); |
EXPORT_SYMBOL(refile_buffer); |
EXPORT_SYMBOL(max_sectors); |
EXPORT_SYMBOL(max_readahead); |
|
/* tty routines */ |
EXPORT_SYMBOL(tty_hangup); |
EXPORT_SYMBOL(tty_wait_until_sent); |
EXPORT_SYMBOL(tty_check_change); |
EXPORT_SYMBOL(tty_hung_up_p); |
EXPORT_SYMBOL(tty_flip_buffer_push); |
EXPORT_SYMBOL(tty_get_baud_rate); |
EXPORT_SYMBOL(do_SAK); |
|
/* filesystem registration */ |
EXPORT_SYMBOL(register_filesystem); |
EXPORT_SYMBOL(unregister_filesystem); |
EXPORT_SYMBOL(kern_mount); |
EXPORT_SYMBOL(__mntput); |
EXPORT_SYMBOL(may_umount); |
|
/* executable format registration */ |
EXPORT_SYMBOL(register_binfmt); |
EXPORT_SYMBOL(unregister_binfmt); |
EXPORT_SYMBOL(search_binary_handler); |
EXPORT_SYMBOL(prepare_binprm); |
EXPORT_SYMBOL(compute_creds); |
EXPORT_SYMBOL(remove_arg_zero); |
EXPORT_SYMBOL(set_binfmt); |
|
/* sysctl table registration */ |
EXPORT_SYMBOL(register_sysctl_table); |
EXPORT_SYMBOL(unregister_sysctl_table); |
EXPORT_SYMBOL(sysctl_string); |
EXPORT_SYMBOL(sysctl_intvec); |
EXPORT_SYMBOL(sysctl_jiffies); |
EXPORT_SYMBOL(proc_dostring); |
EXPORT_SYMBOL(proc_dointvec); |
EXPORT_SYMBOL(proc_dointvec_jiffies); |
EXPORT_SYMBOL(proc_dointvec_minmax); |
EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); |
EXPORT_SYMBOL(proc_doulongvec_minmax); |
|
/* interrupt handling */ |
EXPORT_SYMBOL(add_timer); |
EXPORT_SYMBOL(del_timer); |
EXPORT_SYMBOL(request_irq); |
EXPORT_SYMBOL(free_irq); |
#if !defined(CONFIG_IA64) /* irq_stat is part of struct cpuinfo_ia64 */ |
EXPORT_SYMBOL(irq_stat); |
#endif |
|
/* waitqueue handling */ |
EXPORT_SYMBOL(add_wait_queue); |
EXPORT_SYMBOL(add_wait_queue_exclusive); |
EXPORT_SYMBOL(remove_wait_queue); |
|
/* completion handling */ |
EXPORT_SYMBOL(wait_for_completion); |
EXPORT_SYMBOL(complete); |
|
/* The notion of irq probe/assignment is foreign to S/390 */ |
|
#if !defined(CONFIG_ARCH_S390) |
EXPORT_SYMBOL(probe_irq_on); |
EXPORT_SYMBOL(probe_irq_off); |
#endif |
|
#ifdef CONFIG_SMP |
EXPORT_SYMBOL(del_timer_sync); |
#endif |
EXPORT_SYMBOL(mod_timer); |
EXPORT_SYMBOL(tq_timer); |
EXPORT_SYMBOL(tq_immediate); |
|
#ifdef CONFIG_SMP |
/* Various random spinlocks we want to export */ |
EXPORT_SYMBOL(tqueue_lock); |
|
/* Big-Reader lock implementation */ |
EXPORT_SYMBOL(__brlock_array); |
#ifndef __BRLOCK_USE_ATOMICS |
EXPORT_SYMBOL(__br_write_locks); |
#endif |
EXPORT_SYMBOL(__br_write_lock); |
EXPORT_SYMBOL(__br_write_unlock); |
#endif |
|
/* Kiobufs */ |
EXPORT_SYMBOL(alloc_kiovec); |
EXPORT_SYMBOL(free_kiovec); |
EXPORT_SYMBOL(expand_kiobuf); |
|
EXPORT_SYMBOL(map_user_kiobuf); |
EXPORT_SYMBOL(unmap_kiobuf); |
EXPORT_SYMBOL(lock_kiovec); |
EXPORT_SYMBOL(unlock_kiovec); |
EXPORT_SYMBOL(brw_kiovec); |
EXPORT_SYMBOL(kiobuf_wait_for_io); |
|
/* dma handling */ |
EXPORT_SYMBOL(request_dma); |
EXPORT_SYMBOL(free_dma); |
EXPORT_SYMBOL(dma_spin_lock); |
#ifdef HAVE_DISABLE_HLT |
EXPORT_SYMBOL(disable_hlt); |
EXPORT_SYMBOL(enable_hlt); |
#endif |
|
/* resource handling */ |
EXPORT_SYMBOL(request_resource); |
EXPORT_SYMBOL(release_resource); |
EXPORT_SYMBOL(allocate_resource); |
EXPORT_SYMBOL(check_resource); |
EXPORT_SYMBOL(__request_region); |
EXPORT_SYMBOL(__check_region); |
EXPORT_SYMBOL(__release_region); |
EXPORT_SYMBOL(ioport_resource); |
EXPORT_SYMBOL(iomem_resource); |
|
/* process management */ |
EXPORT_SYMBOL(complete_and_exit); |
EXPORT_SYMBOL(__wake_up); |
EXPORT_SYMBOL(__wake_up_sync); |
EXPORT_SYMBOL(wake_up_process); |
EXPORT_SYMBOL(sleep_on); |
EXPORT_SYMBOL(sleep_on_timeout); |
EXPORT_SYMBOL(interruptible_sleep_on); |
EXPORT_SYMBOL(interruptible_sleep_on_timeout); |
EXPORT_SYMBOL(schedule); |
EXPORT_SYMBOL(schedule_timeout); |
#if CONFIG_SMP |
EXPORT_SYMBOL(set_cpus_allowed); |
#endif |
EXPORT_SYMBOL(yield); |
EXPORT_SYMBOL(__cond_resched); |
EXPORT_SYMBOL(jiffies); |
EXPORT_SYMBOL(xtime); |
EXPORT_SYMBOL(do_gettimeofday); |
EXPORT_SYMBOL(do_settimeofday); |
|
#if !defined(__ia64__) |
EXPORT_SYMBOL(loops_per_jiffy); |
#endif |
|
EXPORT_SYMBOL(kstat); |
EXPORT_SYMBOL(nr_running); |
|
/* misc */ |
EXPORT_SYMBOL(panic); |
EXPORT_SYMBOL(panic_notifier_list); |
EXPORT_SYMBOL(panic_timeout); |
EXPORT_SYMBOL(__out_of_line_bug); |
EXPORT_SYMBOL(sprintf); |
EXPORT_SYMBOL(snprintf); |
EXPORT_SYMBOL(sscanf); |
EXPORT_SYMBOL(vsprintf); |
EXPORT_SYMBOL(vsnprintf); |
EXPORT_SYMBOL(vsscanf); |
EXPORT_SYMBOL(kdevname); |
EXPORT_SYMBOL(bdevname); |
EXPORT_SYMBOL(cdevname); |
EXPORT_SYMBOL(simple_strtol); |
EXPORT_SYMBOL(simple_strtoul); |
EXPORT_SYMBOL(simple_strtoull); |
EXPORT_SYMBOL(system_utsname); /* UTS data */ |
EXPORT_SYMBOL(uts_sem); /* UTS semaphore */ |
#ifndef __mips__ |
EXPORT_SYMBOL(sys_call_table); |
#endif |
EXPORT_SYMBOL(machine_restart); |
EXPORT_SYMBOL(machine_halt); |
EXPORT_SYMBOL(machine_power_off); |
EXPORT_SYMBOL(_ctype); |
EXPORT_SYMBOL(secure_tcp_sequence_number); |
EXPORT_SYMBOL(get_random_bytes); |
EXPORT_SYMBOL(securebits); |
EXPORT_SYMBOL(cap_bset); |
EXPORT_SYMBOL(reparent_to_init); |
EXPORT_SYMBOL(daemonize); |
EXPORT_SYMBOL(csum_partial); /* for networking and md */ |
EXPORT_SYMBOL(seq_escape); |
EXPORT_SYMBOL(seq_printf); |
EXPORT_SYMBOL(seq_open); |
EXPORT_SYMBOL(seq_release); |
EXPORT_SYMBOL(seq_read); |
EXPORT_SYMBOL(seq_lseek); |
EXPORT_SYMBOL(single_open); |
EXPORT_SYMBOL(single_release); |
EXPORT_SYMBOL(seq_release_private); |
|
/* Program loader interfaces */ |
EXPORT_SYMBOL(setup_arg_pages); |
EXPORT_SYMBOL(copy_strings_kernel); |
EXPORT_SYMBOL(do_execve); |
EXPORT_SYMBOL(flush_old_exec); |
EXPORT_SYMBOL(kernel_read); |
EXPORT_SYMBOL(open_exec); |
|
/* Miscellaneous access points */ |
EXPORT_SYMBOL(si_meminfo); |
|
/* Added to make file system as module */ |
EXPORT_SYMBOL(sys_tz); |
EXPORT_SYMBOL(file_fsync); |
EXPORT_SYMBOL(fsync_buffers_list); |
EXPORT_SYMBOL(clear_inode); |
EXPORT_SYMBOL(___strtok); |
EXPORT_SYMBOL(init_special_inode); |
EXPORT_SYMBOL(read_ahead); |
EXPORT_SYMBOL(get_hash_table); |
EXPORT_SYMBOL(new_inode); |
EXPORT_SYMBOL(insert_inode_hash); |
EXPORT_SYMBOL(remove_inode_hash); |
EXPORT_SYMBOL(buffer_insert_list); |
EXPORT_SYMBOL(make_bad_inode); |
EXPORT_SYMBOL(is_bad_inode); |
EXPORT_SYMBOL(event); |
EXPORT_SYMBOL(brw_page); |
EXPORT_SYMBOL(__inode_dir_notify); |
|
#ifdef CONFIG_UID16 |
EXPORT_SYMBOL(overflowuid); |
EXPORT_SYMBOL(overflowgid); |
#endif |
EXPORT_SYMBOL(fs_overflowuid); |
EXPORT_SYMBOL(fs_overflowgid); |
|
/* all busmice */ |
EXPORT_SYMBOL(fasync_helper); |
EXPORT_SYMBOL(kill_fasync); |
|
EXPORT_SYMBOL(disk_name); /* for md.c */ |
|
/* binfmt_aout */ |
EXPORT_SYMBOL(get_write_access); |
|
/* library functions */ |
EXPORT_SYMBOL(strnicmp); |
EXPORT_SYMBOL(strspn); |
EXPORT_SYMBOL(strsep); |
|
#ifdef CONFIG_CRC32 |
EXPORT_SYMBOL(crc32_le); |
EXPORT_SYMBOL(crc32_be); |
EXPORT_SYMBOL(bitreverse); |
#endif |
|
#ifdef CONFIG_FW_LOADER |
EXPORT_SYMBOL(release_firmware); |
EXPORT_SYMBOL(request_firmware); |
EXPORT_SYMBOL(request_firmware_nowait); |
EXPORT_SYMBOL(register_firmware); |
#endif |
|
/* software interrupts */ |
EXPORT_SYMBOL(tasklet_hi_vec); |
EXPORT_SYMBOL(tasklet_vec); |
EXPORT_SYMBOL(bh_task_vec); |
EXPORT_SYMBOL(init_bh); |
EXPORT_SYMBOL(remove_bh); |
EXPORT_SYMBOL(tasklet_init); |
EXPORT_SYMBOL(tasklet_kill); |
EXPORT_SYMBOL(__run_task_queue); |
EXPORT_SYMBOL(do_softirq); |
EXPORT_SYMBOL(raise_softirq); |
EXPORT_SYMBOL(cpu_raise_softirq); |
EXPORT_SYMBOL(__tasklet_schedule); |
EXPORT_SYMBOL(__tasklet_hi_schedule); |
|
/* init task, for moving kthread roots - ought to export a function ?? */ |
|
EXPORT_SYMBOL(init_task_union); |
|
EXPORT_SYMBOL(tasklist_lock); |
EXPORT_SYMBOL(pidhash); |
EXPORT_SYMBOL(unshare_files); |
|
/* debug */ |
EXPORT_SYMBOL(dump_stack); |
|
/* To match ksyms with System.map */ |
extern const char _end[]; |
EXPORT_SYMBOL(_end); |
/printk.c
0,0 → 1,698
/* |
* linux/kernel/printk.c |
* |
* Copyright (C) 1991, 1992 Linus Torvalds |
* |
* Modified to make sys_syslog() more flexible: added commands to |
* return the last 4k of kernel messages, regardless of whether |
* they've been read or not. Added option to suppress kernel printk's |
* to the console. Added hook for sending the console messages |
* elsewhere, in preparation for a serial line console (someday). |
* Ted Ts'o, 2/11/93. |
* Modified for sysctl support, 1/8/97, Chris Horn. |
* Fixed SMP synchronization, 08/08/99, Manfred Spraul |
* manfreds@colorfullife.com |
* Rewrote bits to get rid of console_lock |
* 01Mar01 Andrew Morton <andrewm@uow.edu.au> |
*/ |
|
#include <linux/kernel.h> |
#include <linux/mm.h> |
#include <linux/tty.h> |
#include <linux/tty_driver.h> |
#include <linux/smp_lock.h> |
#include <linux/console.h> |
#include <linux/init.h> |
#include <linux/module.h> |
#include <linux/interrupt.h> /* For in_interrupt() */ |
#include <linux/config.h> |
|
#include <asm/uaccess.h> |
|
#if !defined(CONFIG_LOG_BUF_SHIFT) || (CONFIG_LOG_BUF_SHIFT == 0) |
#if defined(CONFIG_MULTIQUAD) || defined(CONFIG_IA64) |
#define LOG_BUF_LEN (65536) |
#elif defined(CONFIG_ARCH_S390) |
#define LOG_BUF_LEN (131072) |
#elif defined(CONFIG_SMP) |
#define LOG_BUF_LEN (32768) |
#else |
#define LOG_BUF_LEN (16384) /* This must be a power of two */ |
#endif |
#else /* CONFIG_LOG_BUF_SHIFT */ |
#define LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) |
#endif |
|
#define LOG_BUF_MASK (LOG_BUF_LEN-1) |
|
#ifndef arch_consoles_callable |
#define arch_consoles_callable() (1) |
#endif |
|
/* printk's without a loglevel use this.. */ |
#define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */ |
|
/* We show everything that is MORE important than this.. */ |
#define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */ |
#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */ |
|
DECLARE_WAIT_QUEUE_HEAD(log_wait); |
|
int console_printk[4] = { |
DEFAULT_CONSOLE_LOGLEVEL, /* console_loglevel */ |
DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */ |
MINIMUM_CONSOLE_LOGLEVEL, /* minimum_console_loglevel */ |
DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */ |
}; |
|
int oops_in_progress; |
|
/* |
* console_sem protects the console_drivers list, and also |
* provides serialisation for access to the entire console |
* driver system. |
*/ |
static DECLARE_MUTEX(console_sem); |
struct console *console_drivers; |
|
/* |
* logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars |
* It is also used in interesting ways to provide interlocking in |
* release_console_sem(). |
*/ |
static spinlock_t logbuf_lock = SPIN_LOCK_UNLOCKED; |
|
static char log_buf[LOG_BUF_LEN]; |
#define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK]) |
|
/* |
* The indices into log_buf are not constrained to LOG_BUF_LEN - they |
* must be masked before subscripting |
*/ |
static unsigned long log_start; /* Index into log_buf: next char to be read by syslog() */ |
static unsigned long con_start; /* Index into log_buf: next char to be sent to consoles */ |
static unsigned long log_end; /* Index into log_buf: most-recently-written-char + 1 */ |
static unsigned long logged_chars; /* Number of chars produced since last read+clear operation */ |
|
struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; |
static int preferred_console = -1; |
|
/* Flag: console code may call schedule() */ |
static int console_may_schedule; |
|
/* |
* Setup a list of consoles. Called from init/main.c |
*/ |
static int __init console_setup(char *str) |
{ |
struct console_cmdline *c; |
char name[sizeof(c->name)]; |
char *s, *options; |
int i, idx; |
|
/* |
* Decode str into name, index, options. |
*/ |
if (str[0] >= '0' && str[0] <= '9') { |
strcpy(name, "ttyS"); |
strncpy(name + 4, str, sizeof(name) - 5); |
} else |
strncpy(name, str, sizeof(name) - 1); |
name[sizeof(name) - 1] = 0; |
if ((options = strchr(str, ',')) != NULL) |
*(options++) = 0; |
#ifdef __sparc__ |
if (!strcmp(str, "ttya")) |
strcpy(name, "ttyS0"); |
if (!strcmp(str, "ttyb")) |
strcpy(name, "ttyS1"); |
#endif |
for(s = name; *s; s++) |
if (*s >= '0' && *s <= '9') |
break; |
idx = simple_strtoul(s, NULL, 10); |
*s = 0; |
|
/* |
* See if this tty is not yet registered, and |
* if we have a slot free. |
*/ |
for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) |
if (strcmp(console_cmdline[i].name, name) == 0 && |
console_cmdline[i].index == idx) { |
preferred_console = i; |
return 1; |
} |
if (i == MAX_CMDLINECONSOLES) |
return 1; |
preferred_console = i; |
c = &console_cmdline[i]; |
memcpy(c->name, name, sizeof(c->name)); |
c->options = options; |
c->index = idx; |
return 1; |
} |
|
__setup("console=", console_setup); |
|
/* |
* Commands to do_syslog: |
* |
* 0 -- Close the log. Currently a NOP. |
* 1 -- Open the log. Currently a NOP. |
* 2 -- Read from the log. |
* 3 -- Read all messages remaining in the ring buffer. |
* 4 -- Read and clear all messages remaining in the ring buffer |
* 5 -- Clear ring buffer. |
* 6 -- Disable printk's to console |
* 7 -- Enable printk's to console |
* 8 -- Set level of messages printed to console |
* 9 -- Return number of unread characters in the log buffer |
*/ |
int do_syslog(int type, char * buf, int len) |
{ |
unsigned long i, j, limit, count; |
int do_clear = 0; |
char c; |
int error = 0; |
|
switch (type) { |
case 0: /* Close log */ |
break; |
case 1: /* Open log */ |
break; |
case 2: /* Read from log */ |
error = -EINVAL; |
if (!buf || len < 0) |
goto out; |
error = 0; |
if (!len) |
goto out; |
error = verify_area(VERIFY_WRITE,buf,len); |
if (error) |
goto out; |
error = wait_event_interruptible(log_wait, (log_start - log_end)); |
if (error) |
goto out; |
i = 0; |
spin_lock_irq(&logbuf_lock); |
while ((log_start != log_end) && i < len) { |
c = LOG_BUF(log_start); |
log_start++; |
spin_unlock_irq(&logbuf_lock); |
__put_user(c,buf); |
buf++; |
i++; |
spin_lock_irq(&logbuf_lock); |
} |
spin_unlock_irq(&logbuf_lock); |
error = i; |
break; |
case 4: /* Read/clear last kernel messages */ |
do_clear = 1; |
/* FALL THRU */ |
case 3: /* Read last kernel messages */ |
error = -EINVAL; |
if (!buf || len < 0) |
goto out; |
error = 0; |
if (!len) |
goto out; |
error = verify_area(VERIFY_WRITE,buf,len); |
if (error) |
goto out; |
count = len; |
if (count > LOG_BUF_LEN) |
count = LOG_BUF_LEN; |
spin_lock_irq(&logbuf_lock); |
if (count > logged_chars) |
count = logged_chars; |
if (do_clear) |
logged_chars = 0; |
limit = log_end; |
/* |
* __put_user() could sleep, and while we sleep |
* printk() could overwrite the messages |
* we try to copy to user space. Therefore |
* the messages are copied in reverse. <manfreds> |
*/ |
for(i=0;i < count;i++) { |
j = limit-1-i; |
if (j+LOG_BUF_LEN < log_end) |
break; |
c = LOG_BUF(j); |
spin_unlock_irq(&logbuf_lock); |
__put_user(c,&buf[count-1-i]); |
spin_lock_irq(&logbuf_lock); |
} |
spin_unlock_irq(&logbuf_lock); |
error = i; |
if(i != count) { |
int offset = count-error; |
/* buffer overflow during copy, correct user buffer. */ |
for(i=0;i<error;i++) { |
__get_user(c,&buf[i+offset]); |
__put_user(c,&buf[i]); |
} |
} |
|
break; |
case 5: /* Clear ring buffer */ |
spin_lock_irq(&logbuf_lock); |
logged_chars = 0; |
spin_unlock_irq(&logbuf_lock); |
break; |
case 6: /* Disable logging to console */ |
spin_lock_irq(&logbuf_lock); |
console_loglevel = minimum_console_loglevel; |
spin_unlock_irq(&logbuf_lock); |
break; |
case 7: /* Enable logging to console */ |
spin_lock_irq(&logbuf_lock); |
console_loglevel = default_console_loglevel; |
spin_unlock_irq(&logbuf_lock); |
break; |
case 8: /* Set level of messages printed to console */ |
error = -EINVAL; |
if (len < 1 || len > 8) |
goto out; |
if (len < minimum_console_loglevel) |
len = minimum_console_loglevel; |
spin_lock_irq(&logbuf_lock); |
console_loglevel = len; |
spin_unlock_irq(&logbuf_lock); |
error = 0; |
break; |
case 9: /* Number of chars in the log buffer */ |
spin_lock_irq(&logbuf_lock); |
error = log_end - log_start; |
spin_unlock_irq(&logbuf_lock); |
break; |
default: |
error = -EINVAL; |
break; |
} |
out: |
return error; |
} |
|
asmlinkage long sys_syslog(int type, char * buf, int len) |
{ |
if ((type != 3) && !capable(CAP_SYS_ADMIN)) |
return -EPERM; |
return do_syslog(type, buf, len); |
} |
|
/* |
* Call the console drivers on a range of log_buf |
*/ |
static void __call_console_drivers(unsigned long start, unsigned long end) |
{ |
struct console *con; |
|
for (con = console_drivers; con; con = con->next) { |
if ((con->flags & CON_ENABLED) && con->write) |
con->write(con, &LOG_BUF(start), end - start); |
} |
} |
|
/* |
* Write out chars from start to end - 1 inclusive |
*/ |
static void _call_console_drivers(unsigned long start, unsigned long end, int msg_log_level) |
{ |
if (msg_log_level < console_loglevel && console_drivers && start != end) { |
if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) { |
/* wrapped write */ |
__call_console_drivers(start & LOG_BUF_MASK, LOG_BUF_LEN); |
__call_console_drivers(0, end & LOG_BUF_MASK); |
} else { |
__call_console_drivers(start, end); |
} |
} |
} |
|
/* |
* Call the console drivers, asking them to write out |
* log_buf[start] to log_buf[end - 1]. |
* The console_sem must be held. |
*/ |
static void call_console_drivers(unsigned long start, unsigned long end) |
{ |
unsigned long cur_index, start_print; |
static int msg_level = -1; |
|
if (((long)(start - end)) > 0) |
BUG(); |
|
cur_index = start; |
start_print = start; |
while (cur_index != end) { |
if ( msg_level < 0 && |
((end - cur_index) > 2) && |
LOG_BUF(cur_index + 0) == '<' && |
LOG_BUF(cur_index + 1) >= '0' && |
LOG_BUF(cur_index + 1) <= '7' && |
LOG_BUF(cur_index + 2) == '>') |
{ |
msg_level = LOG_BUF(cur_index + 1) - '0'; |
cur_index += 3; |
start_print = cur_index; |
} |
while (cur_index != end) { |
char c = LOG_BUF(cur_index); |
cur_index++; |
|
if (c == '\n') { |
if (msg_level < 0) { |
/* |
* printk() has already given us loglevel tags in |
* the buffer. This code is here in case the |
* log buffer has wrapped right round and scribbled |
* on those tags |
*/ |
msg_level = default_message_loglevel; |
} |
_call_console_drivers(start_print, cur_index, msg_level); |
msg_level = -1; |
start_print = cur_index; |
break; |
} |
} |
} |
_call_console_drivers(start_print, end, msg_level); |
} |
|
static void emit_log_char(char c) |
{ |
LOG_BUF(log_end) = c; |
log_end++; |
if (log_end - log_start > LOG_BUF_LEN) |
log_start = log_end - LOG_BUF_LEN; |
if (log_end - con_start > LOG_BUF_LEN) |
con_start = log_end - LOG_BUF_LEN; |
if (logged_chars < LOG_BUF_LEN) |
logged_chars++; |
} |
|
/* |
* This is printk. It can be called from any context. We want it to work. |
* |
* We try to grab the console_sem. If we succeed, it's easy - we log the output and |
* call the console drivers. If we fail to get the semaphore we place the output |
* into the log buffer and return. The current holder of the console_sem will |
* notice the new output in release_console_sem() and will send it to the |
* consoles before releasing the semaphore. |
* |
* One effect of this deferred printing is that code which calls printk() and |
* then changes console_loglevel may break. This is because console_loglevel |
* is inspected when the actual printing occurs. |
*/ |
asmlinkage int printk(const char *fmt, ...) |
{ |
va_list args; |
unsigned long flags; |
int printed_len; |
char *p; |
static char printk_buf[1024]; |
static int log_level_unknown = 1; |
|
if (oops_in_progress) { |
/* If a crash is occurring, make sure we can't deadlock */ |
spin_lock_init(&logbuf_lock); |
/* And make sure that we print immediately */ |
init_MUTEX(&console_sem); |
} |
|
/* This stops the holder of console_sem just where we want him */ |
spin_lock_irqsave(&logbuf_lock, flags); |
|
/* Emit the output into the temporary buffer */ |
va_start(args, fmt); |
printed_len = vsnprintf(printk_buf, sizeof(printk_buf), fmt, args); |
va_end(args); |
|
/* |
* Copy the output into log_buf. If the caller didn't provide |
* appropriate log level tags, we insert them here |
*/ |
for (p = printk_buf; *p; p++) { |
if (log_level_unknown) { |
if (p[0] != '<' || p[1] < '0' || p[1] > '7' || p[2] != '>') { |
emit_log_char('<'); |
emit_log_char(default_message_loglevel + '0'); |
emit_log_char('>'); |
} |
log_level_unknown = 0; |
} |
emit_log_char(*p); |
if (*p == '\n') |
log_level_unknown = 1; |
} |
|
if (!arch_consoles_callable()) { |
/* |
* On some architectures, the consoles are not usable |
* on secondary CPUs early in the boot process. |
*/ |
spin_unlock_irqrestore(&logbuf_lock, flags); |
goto out; |
} |
if (!down_trylock(&console_sem)) { |
/* |
* We own the drivers. We can drop the spinlock and let |
* release_console_sem() print the text |
*/ |
spin_unlock_irqrestore(&logbuf_lock, flags); |
console_may_schedule = 0; |
release_console_sem(); |
} else { |
/* |
* Someone else owns the drivers. We drop the spinlock, which |
* allows the semaphore holder to proceed and to call the |
* console drivers with the output which we just produced. |
*/ |
spin_unlock_irqrestore(&logbuf_lock, flags); |
} |
out: |
return printed_len; |
} |
EXPORT_SYMBOL(printk); |
|
/** |
* acquire_console_sem - lock the console system for exclusive use. |
* |
* Acquires a semaphore which guarantees that the caller has |
* exclusive access to the console system and the console_drivers list. |
* |
* Can sleep, returns nothing. |
*/ |
void acquire_console_sem(void) |
{ |
if (in_interrupt()) |
BUG(); |
down(&console_sem); |
console_may_schedule = 1; |
} |
EXPORT_SYMBOL(acquire_console_sem); |
|
/** |
* release_console_sem - unlock the console system |
* |
* Releases the semaphore which the caller holds on the console system |
* and the console driver list. |
* |
* While the semaphore was held, console output may have been buffered |
* by printk(). If this is the case, release_console_sem() emits |
* the output prior to releasing the semaphore. |
* |
* If there is output waiting for klogd, we wake it up. |
* |
* release_console_sem() may be called from any context. |
*/ |
void release_console_sem(void) |
{ |
unsigned long flags; |
unsigned long _con_start, _log_end; |
unsigned long must_wake_klogd = 0; |
|
for ( ; ; ) { |
spin_lock_irqsave(&logbuf_lock, flags); |
must_wake_klogd |= log_start - log_end; |
if (con_start == log_end) |
break; /* Nothing to print */ |
_con_start = con_start; |
_log_end = log_end; |
con_start = log_end; /* Flush */ |
spin_unlock_irqrestore(&logbuf_lock, flags); |
call_console_drivers(_con_start, _log_end); |
} |
console_may_schedule = 0; |
up(&console_sem); |
spin_unlock_irqrestore(&logbuf_lock, flags); |
if (must_wake_klogd && !oops_in_progress) |
wake_up_interruptible(&log_wait); |
} |
|
/** console_conditional_schedule - yield the CPU if required |
* |
* If the console code is currently allowed to sleep, and |
* if this CPU should yield the CPU to another task, do |
* so here. |
* |
* Must be called within acquire_console_sem(). |
*/ |
void console_conditional_schedule(void) |
{ |
if (console_may_schedule && current->need_resched) { |
set_current_state(TASK_RUNNING); |
schedule(); |
} |
} |
|
void console_print(const char *s) |
{ |
printk(KERN_EMERG "%s", s); |
} |
EXPORT_SYMBOL(console_print); |
|
void console_unblank(void) |
{ |
struct console *c; |
|
/* |
* Try to get the console semaphore. If someone else owns it |
* we have to return without unblanking because console_unblank |
* may be called in interrupt context. |
*/ |
if (down_trylock(&console_sem) != 0) |
return; |
console_may_schedule = 0; |
for (c = console_drivers; c != NULL; c = c->next) |
if ((c->flags & CON_ENABLED) && c->unblank) |
c->unblank(); |
release_console_sem(); |
} |
EXPORT_SYMBOL(console_unblank); |
|
/* |
* The console driver calls this routine during kernel initialization |
* to register the console printing procedure with printk() and to |
* print any messages that were printed by the kernel before the |
* console driver was initialized. |
*/ |
void register_console(struct console * console) |
{ |
int i; |
unsigned long flags; |
|
/* |
* See if we want to use this console driver. If we |
* didn't select a console we take the first one |
* that registers here. |
*/ |
if (preferred_console < 0) { |
if (console->index < 0) |
console->index = 0; |
if (console->setup == NULL || |
console->setup(console, NULL) == 0) { |
console->flags |= CON_ENABLED | CON_CONSDEV; |
preferred_console = 0; |
} |
} |
|
/* |
* See if this console matches one we selected on |
* the command line. |
*/ |
for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) { |
if (strcmp(console_cmdline[i].name, console->name) != 0) |
continue; |
if (console->index >= 0 && |
console->index != console_cmdline[i].index) |
continue; |
if (console->index < 0) |
console->index = console_cmdline[i].index; |
if (console->setup && |
console->setup(console, console_cmdline[i].options) != 0) |
break; |
console->flags |= CON_ENABLED; |
console->index = console_cmdline[i].index; |
if (i == preferred_console) |
console->flags |= CON_CONSDEV; |
break; |
} |
|
if (!(console->flags & CON_ENABLED)) |
return; |
|
/* |
* Put this console in the list - keep the |
* preferred driver at the head of the list. |
*/ |
acquire_console_sem(); |
if ((console->flags & CON_CONSDEV) || console_drivers == NULL) { |
console->next = console_drivers; |
console_drivers = console; |
} else { |
console->next = console_drivers->next; |
console_drivers->next = console; |
} |
if (console->flags & CON_PRINTBUFFER) { |
/* |
* release_console_sem() will print out the buffered messages for us. |
*/ |
spin_lock_irqsave(&logbuf_lock, flags); |
con_start = log_start; |
spin_unlock_irqrestore(&logbuf_lock, flags); |
} |
release_console_sem(); |
} |
EXPORT_SYMBOL(register_console); |
|
int unregister_console(struct console * console) |
{ |
struct console *a,*b; |
int res = 1; |
|
acquire_console_sem(); |
if (console_drivers == console) { |
console_drivers=console->next; |
res = 0; |
} else { |
for (a=console_drivers->next, b=console_drivers ; |
a; b=a, a=b->next) { |
if (a == console) { |
b->next = a->next; |
res = 0; |
break; |
} |
} |
} |
|
/* If last console is removed, we re-enable picking the first |
* one that gets registered. Without that, pmac early boot console |
* would prevent fbcon from taking over. |
*/ |
if (console_drivers == NULL) |
preferred_console = -1; |
|
|
release_console_sem(); |
return res; |
} |
EXPORT_SYMBOL(unregister_console); |
|
/** |
* tty_write_message - write a message to a certain tty, not just the console. |
* |
* This is used for messages that need to be redirected to a specific tty. |
* We don't put it into the syslog queue right now maybe in the future if |
* really needed. |
*/ |
void tty_write_message(struct tty_struct *tty, char *msg) |
{ |
if (tty && tty->driver.write) |
tty->driver.write(tty, 0, msg, strlen(msg)); |
return; |
} |
/exit.c
0,0 → 1,601
/* |
* linux/kernel/exit.c |
* |
* Copyright (C) 1991, 1992 Linus Torvalds |
*/ |
|
#include <linux/config.h> |
#include <linux/slab.h> |
#include <linux/interrupt.h> |
#include <linux/smp_lock.h> |
#include <linux/module.h> |
#include <linux/completion.h> |
#include <linux/personality.h> |
#include <linux/tty.h> |
#include <linux/namespace.h> |
#ifdef CONFIG_BSD_PROCESS_ACCT |
#include <linux/acct.h> |
#endif |
|
#include <asm/uaccess.h> |
#include <asm/pgtable.h> |
#include <asm/mmu_context.h> |
|
extern void sem_exit (void); |
extern struct task_struct *child_reaper; |
|
int getrusage(struct task_struct *, int, struct rusage *); |
|
static void release_task(struct task_struct * p) |
{ |
if (p != current) { |
#ifdef CONFIG_SMP |
/* |
* Wait to make sure the process isn't on the |
* runqueue (active on some other CPU still) |
*/ |
for (;;) { |
task_lock(p); |
if (!task_has_cpu(p)) |
break; |
task_unlock(p); |
do { |
cpu_relax(); |
barrier(); |
} while (task_has_cpu(p)); |
} |
task_unlock(p); |
#endif |
atomic_dec(&p->user->processes); |
free_uid(p->user); |
unhash_process(p); |
|
release_thread(p); |
current->cmin_flt += p->min_flt + p->cmin_flt; |
current->cmaj_flt += p->maj_flt + p->cmaj_flt; |
current->cnswap += p->nswap + p->cnswap; |
/* |
* Potentially available timeslices are retrieved |
* here - this way the parent does not get penalized |
* for creating too many processes. |
* |
* (this cannot be used to artificially 'generate' |
* timeslices, because any timeslice recovered here |
* was given away by the parent in the first place.) |
*/ |
current->counter += p->counter; |
if (current->counter >= MAX_COUNTER) |
current->counter = MAX_COUNTER; |
p->pid = 0; |
free_task_struct(p); |
} else { |
printk("task releasing itself\n"); |
} |
} |
|
/* |
* This checks not only the pgrp, but falls back on the pid if no |
* satisfactory pgrp is found. I dunno - gdb doesn't work correctly |
* without this... |
*/ |
int session_of_pgrp(int pgrp) |
{ |
struct task_struct *p; |
int fallback; |
|
fallback = -1; |
read_lock(&tasklist_lock); |
for_each_task(p) { |
if (p->session <= 0) |
continue; |
if (p->pgrp == pgrp) { |
fallback = p->session; |
break; |
} |
if (p->pid == pgrp) |
fallback = p->session; |
} |
read_unlock(&tasklist_lock); |
return fallback; |
} |
|
/* |
* Determine if a process group is "orphaned", according to the POSIX |
* definition in 2.2.2.52. Orphaned process groups are not to be affected |
* by terminal-generated stop signals. Newly orphaned process groups are |
* to receive a SIGHUP and a SIGCONT. |
* |
* "I ask you, have you ever known what it is to be an orphan?" |
*/ |
static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task) |
{ |
struct task_struct *p; |
|
read_lock(&tasklist_lock); |
for_each_task(p) { |
if ((p == ignored_task) || (p->pgrp != pgrp) || |
(p->state == TASK_ZOMBIE) || |
(p->p_pptr->pid == 1)) |
continue; |
if ((p->p_pptr->pgrp != pgrp) && |
(p->p_pptr->session == p->session)) { |
read_unlock(&tasklist_lock); |
return 0; |
} |
} |
read_unlock(&tasklist_lock); |
return 1; /* (sighing) "Often!" */ |
} |
|
int is_orphaned_pgrp(int pgrp) |
{ |
return will_become_orphaned_pgrp(pgrp, 0); |
} |
|
static inline int has_stopped_jobs(int pgrp) |
{ |
int retval = 0; |
struct task_struct * p; |
|
read_lock(&tasklist_lock); |
for_each_task(p) { |
if (p->pgrp != pgrp) |
continue; |
if (p->state != TASK_STOPPED) |
continue; |
retval = 1; |
break; |
} |
read_unlock(&tasklist_lock); |
return retval; |
} |
|
/* |
* When we die, we re-parent all our children. |
* Try to give them to another thread in our thread |
* group, and if no such member exists, give it to |
* the global child reaper process (ie "init") |
*/ |
static inline void forget_original_parent(struct task_struct * father) |
{ |
struct task_struct * p; |
|
read_lock(&tasklist_lock); |
|
for_each_task(p) { |
if (p->p_opptr == father) { |
/* We dont want people slaying init */ |
p->exit_signal = SIGCHLD; |
p->self_exec_id++; |
|
/* Make sure we're not reparenting to ourselves */ |
p->p_opptr = child_reaper; |
|
if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0); |
} |
} |
read_unlock(&tasklist_lock); |
} |
|
static inline void close_files(struct files_struct * files) |
{ |
int i, j; |
|
j = 0; |
for (;;) { |
unsigned long set; |
i = j * __NFDBITS; |
if (i >= files->max_fdset || i >= files->max_fds) |
break; |
set = files->open_fds->fds_bits[j++]; |
while (set) { |
if (set & 1) { |
struct file * file = xchg(&files->fd[i], NULL); |
if (file) |
filp_close(file, files); |
} |
i++; |
set >>= 1; |
} |
} |
} |
|
void put_files_struct(struct files_struct *files) |
{ |
if (atomic_dec_and_test(&files->count)) { |
close_files(files); |
/* |
* Free the fd and fdset arrays if we expanded them. |
*/ |
if (files->fd != &files->fd_array[0]) |
free_fd_array(files->fd, files->max_fds); |
if (files->max_fdset > __FD_SETSIZE) { |
free_fdset(files->open_fds, files->max_fdset); |
free_fdset(files->close_on_exec, files->max_fdset); |
} |
kmem_cache_free(files_cachep, files); |
} |
} |
|
static inline void __exit_files(struct task_struct *tsk) |
{ |
struct files_struct * files = tsk->files; |
|
if (files) { |
task_lock(tsk); |
tsk->files = NULL; |
task_unlock(tsk); |
put_files_struct(files); |
} |
} |
|
void exit_files(struct task_struct *tsk) |
{ |
__exit_files(tsk); |
} |
|
static inline void __put_fs_struct(struct fs_struct *fs) |
{ |
/* No need to hold fs->lock if we are killing it */ |
if (atomic_dec_and_test(&fs->count)) { |
dput(fs->root); |
mntput(fs->rootmnt); |
dput(fs->pwd); |
mntput(fs->pwdmnt); |
if (fs->altroot) { |
dput(fs->altroot); |
mntput(fs->altrootmnt); |
} |
kmem_cache_free(fs_cachep, fs); |
} |
} |
|
void put_fs_struct(struct fs_struct *fs) |
{ |
__put_fs_struct(fs); |
} |
|
static inline void __exit_fs(struct task_struct *tsk) |
{ |
struct fs_struct * fs = tsk->fs; |
|
if (fs) { |
task_lock(tsk); |
tsk->fs = NULL; |
task_unlock(tsk); |
__put_fs_struct(fs); |
} |
} |
|
void exit_fs(struct task_struct *tsk) |
{ |
__exit_fs(tsk); |
} |
|
/* |
* We can use these to temporarily drop into |
* "lazy TLB" mode and back. |
*/ |
struct mm_struct * start_lazy_tlb(void) |
{ |
struct mm_struct *mm = current->mm; |
current->mm = NULL; |
/* active_mm is still 'mm' */ |
atomic_inc(&mm->mm_count); |
enter_lazy_tlb(mm, current, smp_processor_id()); |
return mm; |
} |
|
void end_lazy_tlb(struct mm_struct *mm) |
{ |
struct mm_struct *active_mm = current->active_mm; |
|
current->mm = mm; |
if (mm != active_mm) { |
current->active_mm = mm; |
activate_mm(active_mm, mm); |
} |
mmdrop(active_mm); |
} |
|
/* |
* Turn us into a lazy TLB process if we |
* aren't already.. |
*/ |
static inline void __exit_mm(struct task_struct * tsk) |
{ |
struct mm_struct * mm = tsk->mm; |
|
mm_release(); |
if (mm) { |
atomic_inc(&mm->mm_count); |
BUG_ON(mm != tsk->active_mm); |
/* more a memory barrier than a real lock */ |
task_lock(tsk); |
tsk->mm = NULL; |
task_unlock(tsk); |
enter_lazy_tlb(mm, current, smp_processor_id()); |
mmput(mm); |
} |
} |
|
void exit_mm(struct task_struct *tsk) |
{ |
__exit_mm(tsk); |
} |
|
/* |
* Send signals to all our closest relatives so that they know |
* to properly mourn us.. |
*/ |
static void exit_notify(void) |
{ |
struct task_struct * p, *t; |
|
forget_original_parent(current); |
/* |
* Check to see if any process groups have become orphaned |
* as a result of our exiting, and if they have any stopped |
* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) |
* |
* Case i: Our father is in a different pgrp than we are |
* and we were the only connection outside, so our pgrp |
* is about to become orphaned. |
*/ |
|
t = current->p_pptr; |
|
if ((t->pgrp != current->pgrp) && |
(t->session == current->session) && |
will_become_orphaned_pgrp(current->pgrp, current) && |
has_stopped_jobs(current->pgrp)) { |
kill_pg(current->pgrp,SIGHUP,1); |
kill_pg(current->pgrp,SIGCONT,1); |
} |
|
/* Let father know we died |
* |
* Thread signals are configurable, but you aren't going to use |
* that to send signals to arbitary processes. |
* That stops right now. |
* |
* If the parent exec id doesn't match the exec id we saved |
* when we started then we know the parent has changed security |
* domain. |
* |
* If our self_exec id doesn't match our parent_exec_id then |
* we have changed execution domain as these two values started |
* the same after a fork. |
* |
*/ |
|
if(current->exit_signal != SIGCHLD && |
( current->parent_exec_id != t->self_exec_id || |
current->self_exec_id != current->parent_exec_id) |
&& !capable(CAP_KILL)) |
current->exit_signal = SIGCHLD; |
|
|
/* |
* This loop does two things: |
* |
* A. Make init inherit all the child processes |
* B. Check to see if any process groups have become orphaned |
* as a result of our exiting, and if they have any stopped |
* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) |
*/ |
|
write_lock_irq(&tasklist_lock); |
current->state = TASK_ZOMBIE; |
do_notify_parent(current, current->exit_signal); |
while (current->p_cptr != NULL) { |
p = current->p_cptr; |
current->p_cptr = p->p_osptr; |
p->p_ysptr = NULL; |
p->ptrace = 0; |
|
p->p_pptr = p->p_opptr; |
p->p_osptr = p->p_pptr->p_cptr; |
if (p->p_osptr) |
p->p_osptr->p_ysptr = p; |
p->p_pptr->p_cptr = p; |
if (p->state == TASK_ZOMBIE) |
do_notify_parent(p, p->exit_signal); |
/* |
* process group orphan check |
* Case ii: Our child is in a different pgrp |
* than we are, and it was the only connection |
* outside, so the child pgrp is now orphaned. |
*/ |
if ((p->pgrp != current->pgrp) && |
(p->session == current->session)) { |
int pgrp = p->pgrp; |
|
write_unlock_irq(&tasklist_lock); |
if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) { |
kill_pg(pgrp,SIGHUP,1); |
kill_pg(pgrp,SIGCONT,1); |
} |
write_lock_irq(&tasklist_lock); |
} |
} |
write_unlock_irq(&tasklist_lock); |
} |
|
NORET_TYPE void do_exit(long code) |
{ |
struct task_struct *tsk = current; |
|
if (in_interrupt()) |
panic("Aiee, killing interrupt handler!"); |
if (!tsk->pid) |
panic("Attempted to kill the idle task!"); |
if (tsk->pid == 1) |
panic("Attempted to kill init!"); |
tsk->flags |= PF_EXITING; |
del_timer_sync(&tsk->real_timer); |
|
fake_volatile: |
#ifdef CONFIG_BSD_PROCESS_ACCT |
acct_process(code); |
#endif |
__exit_mm(tsk); |
|
lock_kernel(); |
sem_exit(); |
__exit_files(tsk); |
__exit_fs(tsk); |
exit_namespace(tsk); |
exit_sighand(tsk); |
exit_thread(); |
|
if (current->leader) |
disassociate_ctty(1); |
|
put_exec_domain(tsk->exec_domain); |
if (tsk->binfmt && tsk->binfmt->module) |
__MOD_DEC_USE_COUNT(tsk->binfmt->module); |
|
tsk->exit_code = code; |
exit_notify(); |
schedule(); |
BUG(); |
/* |
* In order to get rid of the "volatile function does return" message |
* I did this little loop that confuses gcc to think do_exit really |
* is volatile. In fact it's schedule() that is volatile in some |
* circumstances: when current->state = ZOMBIE, schedule() never |
* returns. |
* |
* In fact the natural way to do all this is to have the label and the |
* goto right after each other, but I put the fake_volatile label at |
* the start of the function just in case something /really/ bad |
* happens, and the schedule returns. This way we can try again. I'm |
* not paranoid: it's just that everybody is out to get me. |
*/ |
goto fake_volatile; |
} |
|
NORET_TYPE void complete_and_exit(struct completion *comp, long code) |
{ |
if (comp) |
complete(comp); |
|
do_exit(code); |
} |
|
asmlinkage long sys_exit(int error_code) |
{ |
do_exit((error_code&0xff)<<8); |
} |
|
asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru) |
{ |
int flag, retval; |
DECLARE_WAITQUEUE(wait, current); |
struct task_struct *tsk; |
|
if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL)) |
return -EINVAL; |
|
add_wait_queue(¤t->wait_chldexit,&wait); |
repeat: |
flag = 0; |
current->state = TASK_INTERRUPTIBLE; |
read_lock(&tasklist_lock); |
tsk = current; |
do { |
struct task_struct *p; |
for (p = tsk->p_cptr ; p ; p = p->p_osptr) { |
if (pid>0) { |
if (p->pid != pid) |
continue; |
} else if (!pid) { |
if (p->pgrp != current->pgrp) |
continue; |
} else if (pid != -1) { |
if (p->pgrp != -pid) |
continue; |
} |
/* Wait for all children (clone and not) if __WALL is set; |
* otherwise, wait for clone children *only* if __WCLONE is |
* set; otherwise, wait for non-clone children *only*. (Note: |
* A "clone" child here is one that reports to its parent |
* using a signal other than SIGCHLD.) */ |
if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) |
&& !(options & __WALL)) |
continue; |
flag = 1; |
switch (p->state) { |
case TASK_STOPPED: |
if (!p->exit_code) |
continue; |
if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED)) |
continue; |
read_unlock(&tasklist_lock); |
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; |
if (!retval && stat_addr) |
retval = put_user((p->exit_code << 8) | 0x7f, stat_addr); |
if (!retval) { |
p->exit_code = 0; |
retval = p->pid; |
} |
goto end_wait4; |
case TASK_ZOMBIE: |
current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime; |
current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime; |
read_unlock(&tasklist_lock); |
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; |
if (!retval && stat_addr) |
retval = put_user(p->exit_code, stat_addr); |
if (retval) |
goto end_wait4; |
retval = p->pid; |
if (p->p_opptr != p->p_pptr) { |
write_lock_irq(&tasklist_lock); |
REMOVE_LINKS(p); |
p->p_pptr = p->p_opptr; |
SET_LINKS(p); |
do_notify_parent(p, SIGCHLD); |
write_unlock_irq(&tasklist_lock); |
} else |
release_task(p); |
goto end_wait4; |
default: |
continue; |
} |
} |
if (options & __WNOTHREAD) |
break; |
tsk = next_thread(tsk); |
} while (tsk != current); |
read_unlock(&tasklist_lock); |
if (flag) { |
retval = 0; |
if (options & WNOHANG) |
goto end_wait4; |
retval = -ERESTARTSYS; |
if (signal_pending(current)) |
goto end_wait4; |
schedule(); |
goto repeat; |
} |
retval = -ECHILD; |
end_wait4: |
current->state = TASK_RUNNING; |
remove_wait_queue(¤t->wait_chldexit,&wait); |
return retval; |
} |
|
#if !defined(__alpha__) && !defined(__ia64__) |
|
/* |
* sys_waitpid() remains for compatibility. waitpid() should be |
* implemented by calling sys_wait4() from libc.a. |
*/ |
asmlinkage long sys_waitpid(pid_t pid,unsigned int * stat_addr, int options) |
{ |
return sys_wait4(pid, stat_addr, options, NULL); |
} |
|
#endif |
/acct.c
0,0 → 1,381
/* |
* linux/kernel/acct.c |
* |
* BSD Process Accounting for Linux |
* |
* Author: Marco van Wieringen <mvw@planets.elm.net> |
* |
* Some code based on ideas and code from: |
* Thomas K. Dyas <tdyas@eden.rutgers.edu> |
* |
* This file implements BSD-style process accounting. Whenever any |
* process exits, an accounting record of type "struct acct" is |
* written to the file specified with the acct() system call. It is |
* up to user-level programs to do useful things with the accounting |
* log. The kernel just provides the raw accounting information. |
* |
* (C) Copyright 1995 - 1997 Marco van Wieringen - ELM Consultancy B.V. |
* |
* Plugged two leaks. 1) It didn't return acct_file into the free_filps if |
* the file happened to be read-only. 2) If the accounting was suspended |
* due to the lack of space it happily allowed to reopen it and completely |
* lost the old acct_file. 3/10/98, Al Viro. |
* |
* Now we silently close acct_file on attempt to reopen. Cleaned sys_acct(). |
* XTerms and EMACS are manifestations of pure evil. 21/10/98, AV. |
* |
* Fixed a nasty interaction with with sys_umount(). If the accointing |
* was suspeneded we failed to stop it on umount(). Messy. |
* Another one: remount to readonly didn't stop accounting. |
* Question: what should we do if we have CAP_SYS_ADMIN but not |
* CAP_SYS_PACCT? Current code does the following: umount returns -EBUSY |
* unless we are messing with the root. In that case we are getting a |
* real mess with do_remount_sb(). 9/11/98, AV. |
* |
* Fixed a bunch of races (and pair of leaks). Probably not the best way, |
* but this one obviously doesn't introduce deadlocks. Later. BTW, found |
* one race (and leak) in BSD implementation. |
* OK, that's better. ANOTHER race and leak in BSD variant. There always |
* is one more bug... 10/11/98, AV. |
* |
* Oh, fsck... Oopsable SMP race in do_process_acct() - we must hold |
* ->mmap_sem to walk the vma list of current->mm. Nasty, since it leaks |
* a struct file opened for write. Fixed. 2/6/2000, AV. |
*/ |
|
#include <linux/config.h> |
#include <linux/errno.h> |
#include <linux/kernel.h> |
|
#ifdef CONFIG_BSD_PROCESS_ACCT |
#include <linux/mm.h> |
#include <linux/slab.h> |
#include <linux/acct.h> |
#include <linux/smp_lock.h> |
#include <linux/file.h> |
#include <linux/tty.h> |
|
#include <asm/uaccess.h> |
|
/* |
* These constants control the amount of freespace that suspend and |
* resume the process accounting system, and the time delay between |
* each check. |
* Turned into sysctl-controllable parameters. AV, 12/11/98 |
*/ |
|
int acct_parm[3] = {4, 2, 30}; |
#define RESUME (acct_parm[0]) /* >foo% free space - resume */ |
#define SUSPEND (acct_parm[1]) /* <foo% free space - suspend */ |
#define ACCT_TIMEOUT (acct_parm[2]) /* foo second timeout between checks */ |
|
/* |
* External references and all of the globals. |
*/ |
|
static volatile int acct_active; |
static volatile int acct_needcheck; |
static struct file *acct_file; |
static struct timer_list acct_timer; |
static void do_acct_process(long, struct file *); |
|
/* |
* Called whenever the timer says to check the free space. |
*/ |
static void acct_timeout(unsigned long unused) |
{ |
acct_needcheck = 1; |
} |
|
/* |
* Check the amount of free space and suspend/resume accordingly. |
*/ |
static int check_free_space(struct file *file) |
{ |
struct statfs sbuf; |
int res; |
int act; |
|
lock_kernel(); |
res = acct_active; |
if (!file || !acct_needcheck) |
goto out; |
unlock_kernel(); |
|
/* May block */ |
if (vfs_statfs(file->f_dentry->d_inode->i_sb, &sbuf)) |
return res; |
|
if (sbuf.f_bavail <= SUSPEND * sbuf.f_blocks / 100) |
act = -1; |
else if (sbuf.f_bavail >= RESUME * sbuf.f_blocks / 100) |
act = 1; |
else |
act = 0; |
|
/* |
* If some joker switched acct_file under us we'ld better be |
* silent and _not_ touch anything. |
*/ |
lock_kernel(); |
if (file != acct_file) { |
if (act) |
res = act>0; |
goto out; |
} |
|
if (acct_active) { |
if (act < 0) { |
acct_active = 0; |
printk(KERN_INFO "Process accounting paused\n"); |
} |
} else { |
if (act > 0) { |
acct_active = 1; |
printk(KERN_INFO "Process accounting resumed\n"); |
} |
} |
|
del_timer(&acct_timer); |
acct_needcheck = 0; |
acct_timer.expires = jiffies + ACCT_TIMEOUT*HZ; |
add_timer(&acct_timer); |
res = acct_active; |
out: |
unlock_kernel(); |
return res; |
} |
|
/* |
* sys_acct() is the only system call needed to implement process |
* accounting. It takes the name of the file where accounting records |
* should be written. If the filename is NULL, accounting will be |
* shutdown. |
*/ |
asmlinkage long sys_acct(const char *name) |
{ |
struct file *file = NULL, *old_acct = NULL; |
char *tmp; |
int error; |
|
if (!capable(CAP_SYS_PACCT)) |
return -EPERM; |
|
if (name) { |
tmp = getname(name); |
error = PTR_ERR(tmp); |
if (IS_ERR(tmp)) |
goto out; |
/* Difference from BSD - they don't do O_APPEND */ |
file = filp_open(tmp, O_WRONLY|O_APPEND, 0); |
putname(tmp); |
if (IS_ERR(file)) { |
error = PTR_ERR(file); |
goto out; |
} |
error = -EACCES; |
if (!S_ISREG(file->f_dentry->d_inode->i_mode)) |
goto out_err; |
|
error = -EIO; |
if (!file->f_op->write) |
goto out_err; |
} |
|
error = 0; |
lock_kernel(); |
if (acct_file) { |
old_acct = acct_file; |
del_timer(&acct_timer); |
acct_active = 0; |
acct_needcheck = 0; |
acct_file = NULL; |
} |
if (name) { |
acct_file = file; |
acct_needcheck = 0; |
acct_active = 1; |
/* It's been deleted if it was used before so this is safe */ |
init_timer(&acct_timer); |
acct_timer.function = acct_timeout; |
acct_timer.expires = jiffies + ACCT_TIMEOUT*HZ; |
add_timer(&acct_timer); |
} |
unlock_kernel(); |
if (old_acct) { |
do_acct_process(0,old_acct); |
filp_close(old_acct, NULL); |
} |
out: |
return error; |
out_err: |
filp_close(file, NULL); |
goto out; |
} |
|
void acct_auto_close(kdev_t dev) |
{ |
lock_kernel(); |
if (acct_file && acct_file->f_dentry->d_inode->i_dev == dev) |
sys_acct(NULL); |
unlock_kernel(); |
} |
|
/* |
* encode an unsigned long into a comp_t |
* |
* This routine has been adopted from the encode_comp_t() function in |
* the kern_acct.c file of the FreeBSD operating system. The encoding |
* is a 13-bit fraction with a 3-bit (base 8) exponent. |
*/ |
|
#define MANTSIZE 13 /* 13 bit mantissa. */ |
#define EXPSIZE 3 /* Base 8 (3 bit) exponent. */ |
#define MAXFRACT ((1 << MANTSIZE) - 1) /* Maximum fractional value. */ |
|
static comp_t encode_comp_t(unsigned long value) |
{ |
int exp, rnd; |
|
exp = rnd = 0; |
while (value > MAXFRACT) { |
rnd = value & (1 << (EXPSIZE - 1)); /* Round up? */ |
value >>= EXPSIZE; /* Base 8 exponent == 3 bit shift. */ |
exp++; |
} |
|
/* |
* If we need to round up, do it (and handle overflow correctly). |
*/ |
if (rnd && (++value > MAXFRACT)) { |
value >>= EXPSIZE; |
exp++; |
} |
|
/* |
* Clean it up and polish it off. |
*/ |
exp <<= MANTSIZE; /* Shift the exponent into place */ |
exp += value; /* and add on the mantissa. */ |
return exp; |
} |
|
/* |
* Write an accounting entry for an exiting process |
* |
* The acct_process() call is the workhorse of the process |
* accounting system. The struct acct is built here and then written |
* into the accounting file. This function should only be called from |
* do_exit(). |
*/ |
|
/* |
* do_acct_process does all actual work. Caller holds the reference to file. |
*/ |
static void do_acct_process(long exitcode, struct file *file) |
{ |
struct acct ac; |
mm_segment_t fs; |
unsigned long vsize; |
unsigned long flim; |
|
/* |
* First check to see if there is enough free_space to continue |
* the process accounting system. |
*/ |
if (!check_free_space(file)) |
return; |
|
/* |
* Fill the accounting struct with the needed info as recorded |
* by the different kernel functions. |
*/ |
memset((caddr_t)&ac, 0, sizeof(struct acct)); |
|
strncpy(ac.ac_comm, current->comm, ACCT_COMM); |
ac.ac_comm[ACCT_COMM - 1] = '\0'; |
|
ac.ac_btime = CT_TO_SECS(current->start_time) + (xtime.tv_sec - (jiffies / HZ)); |
ac.ac_etime = encode_comp_t(jiffies - current->start_time); |
ac.ac_utime = encode_comp_t(current->times.tms_utime); |
ac.ac_stime = encode_comp_t(current->times.tms_stime); |
ac.ac_uid = current->uid; |
ac.ac_gid = current->gid; |
ac.ac_tty = (current->tty) ? kdev_t_to_nr(current->tty->device) : 0; |
|
ac.ac_flag = 0; |
if (current->flags & PF_FORKNOEXEC) |
ac.ac_flag |= AFORK; |
if (current->flags & PF_SUPERPRIV) |
ac.ac_flag |= ASU; |
if (current->flags & PF_DUMPCORE) |
ac.ac_flag |= ACORE; |
if (current->flags & PF_SIGNALED) |
ac.ac_flag |= AXSIG; |
|
vsize = 0; |
if (current->mm) { |
struct vm_area_struct *vma; |
down_read(¤t->mm->mmap_sem); |
vma = current->mm->mmap; |
while (vma) { |
vsize += vma->vm_end - vma->vm_start; |
vma = vma->vm_next; |
} |
up_read(¤t->mm->mmap_sem); |
} |
vsize = vsize / 1024; |
ac.ac_mem = encode_comp_t(vsize); |
ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ |
ac.ac_rw = encode_comp_t(ac.ac_io / 1024); |
ac.ac_minflt = encode_comp_t(current->min_flt); |
ac.ac_majflt = encode_comp_t(current->maj_flt); |
ac.ac_swaps = encode_comp_t(current->nswap); |
ac.ac_exitcode = exitcode; |
|
/* |
* Kernel segment override to datasegment and write it |
* to the accounting file. |
*/ |
fs = get_fs(); |
set_fs(KERNEL_DS); |
/* |
* Accounting records are not subject to resource limits. |
*/ |
flim = current->rlim[RLIMIT_FSIZE].rlim_cur; |
current->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; |
file->f_op->write(file, (char *)&ac, |
sizeof(struct acct), &file->f_pos); |
current->rlim[RLIMIT_FSIZE].rlim_cur = flim; |
set_fs(fs); |
} |
|
/* |
* acct_process - now just a wrapper around do_acct_process |
*/ |
int acct_process(long exitcode) |
{ |
struct file *file = NULL; |
lock_kernel(); |
if (acct_file) { |
file = acct_file; |
get_file(file); |
unlock_kernel(); |
do_acct_process(exitcode, file); |
fput(file); |
} else |
unlock_kernel(); |
return 0; |
} |
|
#else |
/* |
* Dummy system call when BSD process accounting is not configured |
* into the kernel. |
*/ |
|
asmlinkage long sys_acct(const char * filename) |
{ |
return -ENOSYS; |
} |
#endif |
/exec_domain.c
0,0 → 1,290
/* |
* Handling of different ABIs (personalities). |
* |
* We group personalities into execution domains which have their |
* own handlers for kernel entry points, signal mapping, etc... |
* |
* 2001-05-06 Complete rewrite, Christoph Hellwig (hch@infradead.org) |
*/ |
|
#include <linux/config.h> |
#include <linux/init.h> |
#include <linux/kernel.h> |
#include <linux/kmod.h> |
#include <linux/module.h> |
#include <linux/personality.h> |
#include <linux/sched.h> |
#include <linux/sysctl.h> |
#include <linux/types.h> |
|
|
static void default_handler(int, struct pt_regs *); |
|
static struct exec_domain *exec_domains = &default_exec_domain; |
static rwlock_t exec_domains_lock = RW_LOCK_UNLOCKED; |
|
|
static u_long ident_map[32] = { |
0, 1, 2, 3, 4, 5, 6, 7, |
8, 9, 10, 11, 12, 13, 14, 15, |
16, 17, 18, 19, 20, 21, 22, 23, |
24, 25, 26, 27, 28, 29, 30, 31 |
}; |
|
struct exec_domain default_exec_domain = { |
"Linux", /* name */ |
default_handler, /* lcall7 causes a seg fault. */ |
0, 0, /* PER_LINUX personality. */ |
ident_map, /* Identity map signals. */ |
ident_map, /* - both ways. */ |
}; |
|
|
static void |
default_handler(int segment, struct pt_regs *regp) |
{ |
u_long pers = 0; |
|
/* |
* This may have been a static linked SVr4 binary, so we would |
* have the personality set incorrectly. Or it might have been |
* a Solaris/x86 binary. We can tell which because the former |
* uses lcall7, while the latter used lcall 0x27. |
* Try to find or load the appropriate personality, and fall back |
* to just forcing a SEGV. |
* |
* XXX: this is IA32-specific and should be moved to the MD-tree. |
*/ |
switch (segment) { |
#ifdef __i386__ |
case 0x07: |
pers = abi_defhandler_lcall7; |
break; |
case 0x27: |
pers = PER_SOLARIS; |
break; |
#endif |
} |
set_personality(pers); |
|
if (current->exec_domain->handler != default_handler) |
current->exec_domain->handler(segment, regp); |
else |
send_sig(SIGSEGV, current, 1); |
} |
|
static struct exec_domain * |
lookup_exec_domain(u_long personality) |
{ |
struct exec_domain * ep; |
u_long pers = personality(personality); |
|
read_lock(&exec_domains_lock); |
for (ep = exec_domains; ep; ep = ep->next) { |
if (pers >= ep->pers_low && pers <= ep->pers_high) |
if (try_inc_mod_count(ep->module)) |
goto out; |
} |
|
#ifdef CONFIG_KMOD |
read_unlock(&exec_domains_lock); |
{ |
char buffer[30]; |
sprintf(buffer, "personality-%ld", pers); |
request_module(buffer); |
} |
read_lock(&exec_domains_lock); |
|
for (ep = exec_domains; ep; ep = ep->next) { |
if (pers >= ep->pers_low && pers <= ep->pers_high) |
if (try_inc_mod_count(ep->module)) |
goto out; |
} |
#endif |
|
ep = &default_exec_domain; |
out: |
read_unlock(&exec_domains_lock); |
return (ep); |
} |
|
int |
register_exec_domain(struct exec_domain *ep) |
{ |
struct exec_domain *tmp; |
int err = -EBUSY; |
|
if (ep == NULL) |
return -EINVAL; |
|
if (ep->next != NULL) |
return -EBUSY; |
|
write_lock(&exec_domains_lock); |
for (tmp = exec_domains; tmp; tmp = tmp->next) { |
if (tmp == ep) |
goto out; |
} |
|
ep->next = exec_domains; |
exec_domains = ep; |
err = 0; |
|
out: |
write_unlock(&exec_domains_lock); |
return (err); |
} |
|
int |
unregister_exec_domain(struct exec_domain *ep) |
{ |
struct exec_domain **epp; |
|
epp = &exec_domains; |
write_lock(&exec_domains_lock); |
for (epp = &exec_domains; *epp; epp = &(*epp)->next) { |
if (ep == *epp) |
goto unregister; |
} |
write_unlock(&exec_domains_lock); |
return -EINVAL; |
|
unregister: |
*epp = ep->next; |
ep->next = NULL; |
write_unlock(&exec_domains_lock); |
return 0; |
} |
|
int |
__set_personality(u_long personality) |
{ |
struct exec_domain *ep, *oep; |
|
ep = lookup_exec_domain(personality); |
if (ep == current->exec_domain) { |
current->personality = personality; |
return 0; |
} |
|
if (atomic_read(¤t->fs->count) != 1) { |
struct fs_struct *fsp, *ofsp; |
|
fsp = copy_fs_struct(current->fs); |
if (fsp == NULL) { |
put_exec_domain(ep); |
return -ENOMEM;; |
} |
|
task_lock(current); |
ofsp = current->fs; |
current->fs = fsp; |
task_unlock(current); |
|
put_fs_struct(ofsp); |
} |
|
/* |
* At that point we are guaranteed to be the sole owner of |
* current->fs. |
*/ |
|
current->personality = personality; |
oep = current->exec_domain; |
current->exec_domain = ep; |
set_fs_altroot(); |
|
put_exec_domain(oep); |
|
return 0; |
} |
|
int |
get_exec_domain_list(char *page) |
{ |
struct exec_domain *ep; |
int len = 0; |
|
read_lock(&exec_domains_lock); |
for (ep = exec_domains; ep && len < PAGE_SIZE - 80; ep = ep->next) |
len += sprintf(page + len, "%d-%d\t%-16s\t[%s]\n", |
ep->pers_low, ep->pers_high, ep->name, |
ep->module ? ep->module->name : "kernel"); |
read_unlock(&exec_domains_lock); |
return (len); |
} |
|
asmlinkage long |
sys_personality(u_long personality) |
{ |
u_long old = current->personality;; |
|
if (personality != 0xffffffff) { |
set_personality(personality); |
if (current->personality != personality) |
return -EINVAL; |
} |
|
return (long)old; |
} |
|
|
EXPORT_SYMBOL(register_exec_domain); |
EXPORT_SYMBOL(unregister_exec_domain); |
EXPORT_SYMBOL(__set_personality); |
|
/* |
* We have to have all sysctl handling for the Linux-ABI |
* in one place as the dynamic registration of sysctls is |
* horribly crufty in Linux <= 2.4. |
* |
* I hope the new sysctl schemes discussed for future versions |
* will obsolete this. |
* |
* --hch |
*/ |
|
u_long abi_defhandler_coff = PER_SCOSVR3; |
u_long abi_defhandler_elf = PER_LINUX; |
u_long abi_defhandler_lcall7 = PER_SVR4; |
u_long abi_defhandler_libcso = PER_SVR4; |
u_int abi_traceflg; |
int abi_fake_utsname; |
|
static struct ctl_table abi_table[] = { |
{ABI_DEFHANDLER_COFF, "defhandler_coff", &abi_defhandler_coff, |
sizeof(int), 0644, NULL, &proc_doulongvec_minmax}, |
{ABI_DEFHANDLER_ELF, "defhandler_elf", &abi_defhandler_elf, |
sizeof(int), 0644, NULL, &proc_doulongvec_minmax}, |
{ABI_DEFHANDLER_LCALL7, "defhandler_lcall7", &abi_defhandler_lcall7, |
sizeof(int), 0644, NULL, &proc_doulongvec_minmax}, |
{ABI_DEFHANDLER_LIBCSO, "defhandler_libcso", &abi_defhandler_libcso, |
sizeof(int), 0644, NULL, &proc_doulongvec_minmax}, |
{ABI_TRACE, "trace", &abi_traceflg, |
sizeof(u_int), 0644, NULL, &proc_dointvec}, |
{ABI_FAKE_UTSNAME, "fake_utsname", &abi_fake_utsname, |
sizeof(int), 0644, NULL, &proc_dointvec}, |
{0} |
}; |
|
static struct ctl_table abi_root_table[] = { |
{CTL_ABI, "abi", NULL, 0, 0555, abi_table}, |
{0} |
}; |
|
static int __init |
abi_register_sysctl(void) |
{ |
register_sysctl_table(abi_root_table, 1); |
return 0; |
} |
|
__initcall(abi_register_sysctl); |
|
|
EXPORT_SYMBOL(abi_defhandler_coff); |
EXPORT_SYMBOL(abi_defhandler_elf); |
EXPORT_SYMBOL(abi_defhandler_lcall7); |
EXPORT_SYMBOL(abi_defhandler_libcso); |
EXPORT_SYMBOL(abi_traceflg); |
EXPORT_SYMBOL(abi_fake_utsname); |
/pm.c
0,0 → 1,293
/* |
* pm.c - Power management interface |
* |
* Copyright (C) 2000 Andrew Henroid |
* |
* This program is free software; you can redistribute it and/or modify |
* it under the terms of the GNU General Public License as published by |
* the Free Software Foundation; either version 2 of the License, or |
* (at your option) any later version. |
* |
* This program is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with this program; if not, write to the Free Software |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
*/ |
|
#include <linux/module.h> |
#include <linux/spinlock.h> |
#include <linux/slab.h> |
#include <linux/pm.h> |
#include <linux/interrupt.h> |
|
int pm_active; |
|
/* |
* Locking notes: |
* pm_devs_lock can be a semaphore providing pm ops are not called |
* from an interrupt handler (already a bad idea so no change here). Each |
* change must be protected so that an unlink of an entry doesn't clash |
* with a pm send - which is permitted to sleep in the current architecture |
* |
* Module unloads clashing with pm events now work out safely, the module |
* unload path will block until the event has been sent. It may well block |
* until a resume but that will be fine. |
*/ |
|
static DECLARE_MUTEX(pm_devs_lock); |
static LIST_HEAD(pm_devs); |
|
/** |
* pm_register - register a device with power management |
* @type: device type |
* @id: device ID |
* @callback: callback function |
* |
* Add a device to the list of devices that wish to be notified about |
* power management events. A &pm_dev structure is returned on success, |
* on failure the return is %NULL. |
* |
* The callback function will be called in process context and |
* it may sleep. |
*/ |
|
struct pm_dev *pm_register(pm_dev_t type, |
unsigned long id, |
pm_callback callback) |
{ |
struct pm_dev *dev = kmalloc(sizeof(struct pm_dev), GFP_KERNEL); |
if (dev) { |
memset(dev, 0, sizeof(*dev)); |
dev->type = type; |
dev->id = id; |
dev->callback = callback; |
|
down(&pm_devs_lock); |
list_add(&dev->entry, &pm_devs); |
up(&pm_devs_lock); |
} |
return dev; |
} |
|
/** |
* pm_unregister - unregister a device with power management |
* @dev: device to unregister |
* |
* Remove a device from the power management notification lists. The |
* dev passed must be a handle previously returned by pm_register. |
*/ |
|
void pm_unregister(struct pm_dev *dev) |
{ |
if (dev) { |
down(&pm_devs_lock); |
list_del(&dev->entry); |
up(&pm_devs_lock); |
|
kfree(dev); |
} |
} |
|
static void __pm_unregister(struct pm_dev *dev) |
{ |
if (dev) { |
list_del(&dev->entry); |
kfree(dev); |
} |
} |
|
/** |
* pm_unregister_all - unregister all devices with matching callback |
* @callback: callback function pointer |
* |
* Unregister every device that would call the callback passed. This |
* is primarily meant as a helper function for loadable modules. It |
* enables a module to give up all its managed devices without keeping |
* its own private list. |
*/ |
|
void pm_unregister_all(pm_callback callback) |
{ |
struct list_head *entry; |
|
if (!callback) |
return; |
|
down(&pm_devs_lock); |
entry = pm_devs.next; |
while (entry != &pm_devs) { |
struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); |
entry = entry->next; |
if (dev->callback == callback) |
__pm_unregister(dev); |
} |
up(&pm_devs_lock); |
} |
|
/** |
* pm_send - send request to a single device |
* @dev: device to send to |
* @rqst: power management request |
* @data: data for the callback |
* |
* Issue a power management request to a given device. The |
* %PM_SUSPEND and %PM_RESUME events are handled specially. The |
* data field must hold the intended next state. No call is made |
* if the state matches. |
* |
* BUGS: what stops two power management requests occuring in parallel |
* and conflicting. |
* |
* WARNING: Calling pm_send directly is not generally recommended, in |
* paticular there is no locking against the pm_dev going away. The |
* caller must maintain all needed locking or have 'inside knowledge' |
* on the safety. Also remember that this function is not locked against |
* pm_unregister. This means that you must handle SMP races on callback |
* execution and unload yourself. |
*/ |
|
int pm_send(struct pm_dev *dev, pm_request_t rqst, void *data) |
{ |
int status = 0; |
int prev_state, next_state; |
|
if (in_interrupt()) |
BUG(); |
|
switch (rqst) { |
case PM_SUSPEND: |
case PM_RESUME: |
prev_state = dev->state; |
next_state = (unsigned long) data; |
if (prev_state != next_state) { |
if (dev->callback) |
status = (*dev->callback)(dev, rqst, data); |
if (!status) { |
dev->state = next_state; |
dev->prev_state = prev_state; |
} |
} |
else { |
dev->prev_state = prev_state; |
} |
break; |
default: |
if (dev->callback) |
status = (*dev->callback)(dev, rqst, data); |
break; |
} |
return status; |
} |
|
/* |
* Undo incomplete request |
*/ |
static void pm_undo_all(struct pm_dev *last) |
{ |
struct list_head *entry = last->entry.prev; |
while (entry != &pm_devs) { |
struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); |
if (dev->state != dev->prev_state) { |
/* previous state was zero (running) resume or |
* previous state was non-zero (suspended) suspend |
*/ |
pm_request_t undo = (dev->prev_state |
? PM_SUSPEND:PM_RESUME); |
pm_send(dev, undo, (void*) dev->prev_state); |
} |
entry = entry->prev; |
} |
} |
|
/** |
* pm_send_all - send request to all managed devices |
* @rqst: power management request |
* @data: data for the callback |
* |
* Issue a power management request to a all devices. The |
* %PM_SUSPEND events are handled specially. Any device is |
* permitted to fail a suspend by returning a non zero (error) |
* value from its callback function. If any device vetoes a |
* suspend request then all other devices that have suspended |
* during the processing of this request are restored to their |
* previous state. |
* |
* WARNING: This function takes the pm_devs_lock. The lock is not dropped until |
* the callbacks have completed. This prevents races against pm locking |
* functions, races against module unload pm_unregister code. It does |
* mean however that you must not issue pm_ functions within the callback |
* or you will deadlock and users will hate you. |
* |
* Zero is returned on success. If a suspend fails then the status |
* from the device that vetoes the suspend is returned. |
* |
* BUGS: what stops two power management requests occuring in parallel |
* and conflicting. |
*/ |
|
int pm_send_all(pm_request_t rqst, void *data) |
{ |
struct list_head *entry; |
|
down(&pm_devs_lock); |
entry = pm_devs.next; |
while (entry != &pm_devs) { |
struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); |
if (dev->callback) { |
int status = pm_send(dev, rqst, data); |
if (status) { |
/* return devices to previous state on |
* failed suspend request |
*/ |
if (rqst == PM_SUSPEND) |
pm_undo_all(dev); |
up(&pm_devs_lock); |
return status; |
} |
} |
entry = entry->next; |
} |
up(&pm_devs_lock); |
return 0; |
} |
|
/** |
* pm_find - find a device |
* @type: type of device |
* @from: where to start looking |
* |
* Scan the power management list for devices of a specific type. The |
* return value for a matching device may be passed to further calls |
* to this function to find further matches. A %NULL indicates the end |
* of the list. |
* |
* To search from the beginning pass %NULL as the @from value. |
* |
* The caller MUST hold the pm_devs_lock lock when calling this |
* function. The instant that the lock is dropped all pointers returned |
* may become invalid. |
*/ |
|
struct pm_dev *pm_find(pm_dev_t type, struct pm_dev *from) |
{ |
struct list_head *entry = from ? from->entry.next:pm_devs.next; |
while (entry != &pm_devs) { |
struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); |
if (type == PM_UNKNOWN_DEV || dev->type == type) |
return dev; |
entry = entry->next; |
} |
return 0; |
} |
|
EXPORT_SYMBOL(pm_register); |
EXPORT_SYMBOL(pm_unregister); |
EXPORT_SYMBOL(pm_unregister_all); |
EXPORT_SYMBOL(pm_send); |
EXPORT_SYMBOL(pm_send_all); |
EXPORT_SYMBOL(pm_find); |
EXPORT_SYMBOL(pm_active); |
/signal.c
0,0 → 1,1325
/* |
* linux/kernel/signal.c |
* |
* Copyright (C) 1991, 1992 Linus Torvalds |
* |
* 1997-11-02 Modified for POSIX.1b signals by Richard Henderson |
*/ |
|
#include <linux/config.h> |
#include <linux/slab.h> |
#include <linux/module.h> |
#include <linux/unistd.h> |
#include <linux/smp_lock.h> |
#include <linux/init.h> |
#include <linux/sched.h> |
|
#include <asm/uaccess.h> |
|
/* |
* SLAB caches for signal bits. |
*/ |
|
#define DEBUG_SIG 0 |
|
#if DEBUG_SIG |
#define SIG_SLAB_DEBUG (SLAB_DEBUG_FREE | SLAB_RED_ZONE /* | SLAB_POISON */) |
#else |
#define SIG_SLAB_DEBUG 0 |
#endif |
|
static kmem_cache_t *sigqueue_cachep; |
|
atomic_t nr_queued_signals; |
int max_queued_signals = 1024; |
|
void __init signals_init(void) |
{ |
sigqueue_cachep = |
kmem_cache_create("sigqueue", |
sizeof(struct sigqueue), |
__alignof__(struct sigqueue), |
SIG_SLAB_DEBUG, NULL, NULL); |
if (!sigqueue_cachep) |
panic("signals_init(): cannot create sigqueue SLAB cache"); |
} |
|
|
/* Given the mask, find the first available signal that should be serviced. */ |
|
static int |
next_signal(struct task_struct *tsk, sigset_t *mask) |
{ |
unsigned long i, *s, *m, x; |
int sig = 0; |
|
s = tsk->pending.signal.sig; |
m = mask->sig; |
switch (_NSIG_WORDS) { |
default: |
for (i = 0; i < _NSIG_WORDS; ++i, ++s, ++m) |
if ((x = *s &~ *m) != 0) { |
sig = ffz(~x) + i*_NSIG_BPW + 1; |
break; |
} |
break; |
|
case 2: if ((x = s[0] &~ m[0]) != 0) |
sig = 1; |
else if ((x = s[1] &~ m[1]) != 0) |
sig = _NSIG_BPW + 1; |
else |
break; |
sig += ffz(~x); |
break; |
|
case 1: if ((x = *s &~ *m) != 0) |
sig = ffz(~x) + 1; |
break; |
} |
|
return sig; |
} |
|
static void flush_sigqueue(struct sigpending *queue) |
{ |
struct sigqueue *q, *n; |
|
sigemptyset(&queue->signal); |
q = queue->head; |
queue->head = NULL; |
queue->tail = &queue->head; |
|
while (q) { |
n = q->next; |
kmem_cache_free(sigqueue_cachep, q); |
atomic_dec(&nr_queued_signals); |
q = n; |
} |
} |
|
/* |
* Flush all pending signals for a task. |
*/ |
|
void |
flush_signals(struct task_struct *t) |
{ |
t->sigpending = 0; |
flush_sigqueue(&t->pending); |
} |
|
void exit_sighand(struct task_struct *tsk) |
{ |
struct signal_struct * sig = tsk->sig; |
|
spin_lock_irq(&tsk->sigmask_lock); |
if (sig) { |
tsk->sig = NULL; |
if (atomic_dec_and_test(&sig->count)) |
kmem_cache_free(sigact_cachep, sig); |
} |
tsk->sigpending = 0; |
flush_sigqueue(&tsk->pending); |
spin_unlock_irq(&tsk->sigmask_lock); |
} |
|
/* |
* Flush all handlers for a task. |
*/ |
|
void |
flush_signal_handlers(struct task_struct *t) |
{ |
int i; |
struct k_sigaction *ka = &t->sig->action[0]; |
for (i = _NSIG ; i != 0 ; i--) { |
if (ka->sa.sa_handler != SIG_IGN) |
ka->sa.sa_handler = SIG_DFL; |
ka->sa.sa_flags = 0; |
sigemptyset(&ka->sa.sa_mask); |
ka++; |
} |
} |
|
/* |
* sig_exit - cause the current task to exit due to a signal. |
*/ |
|
void |
sig_exit(int sig, int exit_code, struct siginfo *info) |
{ |
struct task_struct *t; |
|
sigaddset(¤t->pending.signal, sig); |
recalc_sigpending(current); |
current->flags |= PF_SIGNALED; |
|
/* Propagate the signal to all the tasks in |
* our thread group |
*/ |
if (info && (unsigned long)info != 1 |
&& info->si_code != SI_TKILL) { |
read_lock(&tasklist_lock); |
for_each_thread(t) { |
force_sig_info(sig, info, t); |
} |
read_unlock(&tasklist_lock); |
} |
|
do_exit(exit_code); |
/* NOTREACHED */ |
} |
|
/* Notify the system that a driver wants to block all signals for this |
* process, and wants to be notified if any signals at all were to be |
* sent/acted upon. If the notifier routine returns non-zero, then the |
* signal will be acted upon after all. If the notifier routine returns 0, |
* then then signal will be blocked. Only one block per process is |
* allowed. priv is a pointer to private data that the notifier routine |
* can use to determine if the signal should be blocked or not. */ |
|
void |
block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask) |
{ |
unsigned long flags; |
|
spin_lock_irqsave(¤t->sigmask_lock, flags); |
current->notifier_mask = mask; |
current->notifier_data = priv; |
current->notifier = notifier; |
spin_unlock_irqrestore(¤t->sigmask_lock, flags); |
} |
|
/* Notify the system that blocking has ended. */ |
|
void |
unblock_all_signals(void) |
{ |
unsigned long flags; |
|
spin_lock_irqsave(¤t->sigmask_lock, flags); |
current->notifier = NULL; |
current->notifier_data = NULL; |
recalc_sigpending(current); |
spin_unlock_irqrestore(¤t->sigmask_lock, flags); |
} |
|
static int collect_signal(int sig, struct sigpending *list, siginfo_t *info) |
{ |
if (sigismember(&list->signal, sig)) { |
/* Collect the siginfo appropriate to this signal. */ |
struct sigqueue *q, **pp; |
pp = &list->head; |
while ((q = *pp) != NULL) { |
if (q->info.si_signo == sig) |
goto found_it; |
pp = &q->next; |
} |
|
/* Ok, it wasn't in the queue. We must have |
been out of queue space. So zero out the |
info. */ |
sigdelset(&list->signal, sig); |
info->si_signo = sig; |
info->si_errno = 0; |
info->si_code = 0; |
info->si_pid = 0; |
info->si_uid = 0; |
return 1; |
|
found_it: |
if ((*pp = q->next) == NULL) |
list->tail = pp; |
|
/* Copy the sigqueue information and free the queue entry */ |
copy_siginfo(info, &q->info); |
kmem_cache_free(sigqueue_cachep,q); |
atomic_dec(&nr_queued_signals); |
|
/* Non-RT signals can exist multiple times.. */ |
if (sig >= SIGRTMIN) { |
while ((q = *pp) != NULL) { |
if (q->info.si_signo == sig) |
goto found_another; |
pp = &q->next; |
} |
} |
|
sigdelset(&list->signal, sig); |
found_another: |
return 1; |
} |
return 0; |
} |
|
/* |
* Dequeue a signal and return the element to the caller, which is |
* expected to free it. |
* |
* All callers must be holding current->sigmask_lock. |
*/ |
|
int |
dequeue_signal(sigset_t *mask, siginfo_t *info) |
{ |
int sig = 0; |
|
#if DEBUG_SIG |
printk("SIG dequeue (%s:%d): %d ", current->comm, current->pid, |
signal_pending(current)); |
#endif |
|
sig = next_signal(current, mask); |
if (sig) { |
if (current->notifier) { |
if (sigismember(current->notifier_mask, sig)) { |
if (!(current->notifier)(current->notifier_data)) { |
current->sigpending = 0; |
return 0; |
} |
} |
} |
|
if (!collect_signal(sig, ¤t->pending, info)) |
sig = 0; |
|
/* XXX: Once POSIX.1b timers are in, if si_code == SI_TIMER, |
we need to xchg out the timer overrun values. */ |
} |
recalc_sigpending(current); |
|
#if DEBUG_SIG |
printk(" %d -> %d\n", signal_pending(current), sig); |
#endif |
|
return sig; |
} |
|
static int rm_from_queue(int sig, struct sigpending *s) |
{ |
struct sigqueue *q, **pp; |
|
if (!sigismember(&s->signal, sig)) |
return 0; |
|
sigdelset(&s->signal, sig); |
|
pp = &s->head; |
|
while ((q = *pp) != NULL) { |
if (q->info.si_signo == sig) { |
if ((*pp = q->next) == NULL) |
s->tail = pp; |
kmem_cache_free(sigqueue_cachep,q); |
atomic_dec(&nr_queued_signals); |
continue; |
} |
pp = &q->next; |
} |
return 1; |
} |
|
/* |
* Remove signal sig from t->pending. |
* Returns 1 if sig was found. |
* |
* All callers must be holding t->sigmask_lock. |
*/ |
static int rm_sig_from_queue(int sig, struct task_struct *t) |
{ |
return rm_from_queue(sig, &t->pending); |
} |
|
/* |
* Bad permissions for sending the signal |
*/ |
int bad_signal(int sig, struct siginfo *info, struct task_struct *t) |
{ |
return (!info || ((unsigned long)info != 1 && SI_FROMUSER(info))) |
&& ((sig != SIGCONT) || (current->session != t->session)) |
&& (current->euid ^ t->suid) && (current->euid ^ t->uid) |
&& (current->uid ^ t->suid) && (current->uid ^ t->uid) |
&& !capable(CAP_KILL); |
} |
|
/* |
* Signal type: |
* < 0 : global action (kill - spread to all non-blocked threads) |
* = 0 : ignored |
* > 0 : wake up. |
*/ |
static int signal_type(int sig, struct signal_struct *signals) |
{ |
unsigned long handler; |
|
if (!signals) |
return 0; |
|
handler = (unsigned long) signals->action[sig-1].sa.sa_handler; |
if (handler > 1) |
return 1; |
|
/* "Ignore" handler.. Illogical, but that has an implicit handler for SIGCHLD */ |
if (handler == 1) |
return sig == SIGCHLD; |
|
/* Default handler. Normally lethal, but.. */ |
switch (sig) { |
|
/* Ignored */ |
case SIGCONT: case SIGWINCH: |
case SIGCHLD: case SIGURG: |
return 0; |
|
/* Implicit behaviour */ |
case SIGTSTP: case SIGTTIN: case SIGTTOU: |
return 1; |
|
/* Implicit actions (kill or do special stuff) */ |
default: |
return -1; |
} |
} |
|
|
/* |
* Determine whether a signal should be posted or not. |
* |
* Signals with SIG_IGN can be ignored, except for the |
* special case of a SIGCHLD. |
* |
* Some signals with SIG_DFL default to a non-action. |
*/ |
static int ignored_signal(int sig, struct task_struct *t) |
{ |
/* Don't ignore traced or blocked signals */ |
if ((t->ptrace & PT_PTRACED) || sigismember(&t->blocked, sig)) |
return 0; |
|
return signal_type(sig, t->sig) == 0; |
} |
|
/* |
* Handle TASK_STOPPED cases etc implicit behaviour |
* of certain magical signals. |
* |
* SIGKILL gets spread out to every thread. |
*/ |
static void handle_stop_signal(int sig, struct task_struct *t) |
{ |
switch (sig) { |
case SIGKILL: case SIGCONT: |
/* Wake up the process if stopped. */ |
if (t->state == TASK_STOPPED) |
wake_up_process(t); |
t->exit_code = 0; |
rm_sig_from_queue(SIGSTOP, t); |
rm_sig_from_queue(SIGTSTP, t); |
rm_sig_from_queue(SIGTTOU, t); |
rm_sig_from_queue(SIGTTIN, t); |
break; |
|
case SIGSTOP: case SIGTSTP: |
case SIGTTIN: case SIGTTOU: |
/* If we're stopping again, cancel SIGCONT */ |
rm_sig_from_queue(SIGCONT, t); |
break; |
} |
} |
|
static int send_signal(int sig, struct siginfo *info, struct sigpending *signals) |
{ |
struct sigqueue * q = NULL; |
|
/* Real-time signals must be queued if sent by sigqueue, or |
some other real-time mechanism. It is implementation |
defined whether kill() does so. We attempt to do so, on |
the principle of least surprise, but since kill is not |
allowed to fail with EAGAIN when low on memory we just |
make sure at least one signal gets delivered and don't |
pass on the info struct. */ |
|
if (atomic_read(&nr_queued_signals) < max_queued_signals) { |
q = kmem_cache_alloc(sigqueue_cachep, GFP_ATOMIC); |
} |
|
if (q) { |
atomic_inc(&nr_queued_signals); |
q->next = NULL; |
*signals->tail = q; |
signals->tail = &q->next; |
switch ((unsigned long) info) { |
case 0: |
q->info.si_signo = sig; |
q->info.si_errno = 0; |
q->info.si_code = SI_USER; |
q->info.si_pid = current->pid; |
q->info.si_uid = current->uid; |
break; |
case 1: |
q->info.si_signo = sig; |
q->info.si_errno = 0; |
q->info.si_code = SI_KERNEL; |
q->info.si_pid = 0; |
q->info.si_uid = 0; |
break; |
default: |
copy_siginfo(&q->info, info); |
break; |
} |
} else if (sig >= SIGRTMIN && info && (unsigned long)info != 1 |
&& info->si_code != SI_USER) { |
/* |
* Queue overflow, abort. We may abort if the signal was rt |
* and sent by user using something other than kill(). |
*/ |
return -EAGAIN; |
} |
|
sigaddset(&signals->signal, sig); |
return 0; |
} |
|
/* |
* Tell a process that it has a new active signal.. |
* |
* NOTE! we rely on the previous spin_lock to |
* lock interrupts for us! We can only be called with |
* "sigmask_lock" held, and the local interrupt must |
* have been disabled when that got acquired! |
* |
* No need to set need_resched since signal event passing |
* goes through ->blocked |
*/ |
static inline void signal_wake_up(struct task_struct *t) |
{ |
t->sigpending = 1; |
|
#ifdef CONFIG_SMP |
/* |
* If the task is running on a different CPU |
* force a reschedule on the other CPU to make |
* it notice the new signal quickly. |
* |
* The code below is a tad loose and might occasionally |
* kick the wrong CPU if we catch the process in the |
* process of changing - but no harm is done by that |
* other than doing an extra (lightweight) IPI interrupt. |
*/ |
spin_lock(&runqueue_lock); |
if (task_has_cpu(t) && t->processor != smp_processor_id()) |
smp_send_reschedule(t->processor); |
spin_unlock(&runqueue_lock); |
#endif /* CONFIG_SMP */ |
|
if (t->state & TASK_INTERRUPTIBLE) { |
wake_up_process(t); |
return; |
} |
} |
|
static int deliver_signal(int sig, struct siginfo *info, struct task_struct *t) |
{ |
int retval = send_signal(sig, info, &t->pending); |
|
if (!retval && !sigismember(&t->blocked, sig)) |
signal_wake_up(t); |
|
return retval; |
} |
|
int |
send_sig_info(int sig, struct siginfo *info, struct task_struct *t) |
{ |
unsigned long flags; |
int ret; |
|
|
#if DEBUG_SIG |
printk("SIG queue (%s:%d): %d ", t->comm, t->pid, sig); |
#endif |
|
ret = -EINVAL; |
if (sig < 0 || sig > _NSIG) |
goto out_nolock; |
/* The somewhat baroque permissions check... */ |
ret = -EPERM; |
if (bad_signal(sig, info, t)) |
goto out_nolock; |
|
/* The null signal is a permissions and process existence probe. |
No signal is actually delivered. Same goes for zombies. */ |
ret = 0; |
if (!sig || !t->sig) |
goto out_nolock; |
|
spin_lock_irqsave(&t->sigmask_lock, flags); |
handle_stop_signal(sig, t); |
|
/* Optimize away the signal, if it's a signal that can be |
handled immediately (ie non-blocked and untraced) and |
that is ignored (either explicitly or by default). */ |
|
if (ignored_signal(sig, t)) |
goto out; |
|
/* Support queueing exactly one non-rt signal, so that we |
can get more detailed information about the cause of |
the signal. */ |
if (sig < SIGRTMIN && sigismember(&t->pending.signal, sig)) |
goto out; |
|
ret = deliver_signal(sig, info, t); |
out: |
spin_unlock_irqrestore(&t->sigmask_lock, flags); |
out_nolock: |
#if DEBUG_SIG |
printk(" %d -> %d\n", signal_pending(t), ret); |
#endif |
|
return ret; |
} |
|
/* |
* Force a signal that the process can't ignore: if necessary |
* we unblock the signal and change any SIG_IGN to SIG_DFL. |
*/ |
|
int |
force_sig_info(int sig, struct siginfo *info, struct task_struct *t) |
{ |
unsigned long int flags; |
|
spin_lock_irqsave(&t->sigmask_lock, flags); |
if (t->sig == NULL) { |
spin_unlock_irqrestore(&t->sigmask_lock, flags); |
return -ESRCH; |
} |
|
if (t->sig->action[sig-1].sa.sa_handler == SIG_IGN) |
t->sig->action[sig-1].sa.sa_handler = SIG_DFL; |
sigdelset(&t->blocked, sig); |
recalc_sigpending(t); |
spin_unlock_irqrestore(&t->sigmask_lock, flags); |
|
return send_sig_info(sig, info, t); |
} |
|
/* |
* kill_pg_info() sends a signal to a process group: this is what the tty |
* control characters do (^C, ^Z etc) |
*/ |
|
int |
kill_pg_info(int sig, struct siginfo *info, pid_t pgrp) |
{ |
int retval = -EINVAL; |
if (pgrp > 0) { |
struct task_struct *p; |
|
retval = -ESRCH; |
read_lock(&tasklist_lock); |
for_each_task(p) { |
if (p->pgrp == pgrp && thread_group_leader(p)) { |
int err = send_sig_info(sig, info, p); |
if (retval) |
retval = err; |
} |
} |
read_unlock(&tasklist_lock); |
} |
return retval; |
} |
|
/* |
* kill_sl_info() sends a signal to the session leader: this is used |
* to send SIGHUP to the controlling process of a terminal when |
* the connection is lost. |
*/ |
|
int |
kill_sl_info(int sig, struct siginfo *info, pid_t sess) |
{ |
int retval = -EINVAL; |
if (sess > 0) { |
struct task_struct *p; |
|
retval = -ESRCH; |
read_lock(&tasklist_lock); |
for_each_task(p) { |
if (p->leader && p->session == sess) { |
int err = send_sig_info(sig, info, p); |
if (retval) |
retval = err; |
} |
} |
read_unlock(&tasklist_lock); |
} |
return retval; |
} |
|
inline int |
kill_proc_info(int sig, struct siginfo *info, pid_t pid) |
{ |
int error; |
struct task_struct *p; |
|
read_lock(&tasklist_lock); |
p = find_task_by_pid(pid); |
error = -ESRCH; |
if (p) { |
if (!thread_group_leader(p)) { |
struct task_struct *tg; |
tg = find_task_by_pid(p->tgid); |
if (tg) |
p = tg; |
} |
error = send_sig_info(sig, info, p); |
} |
read_unlock(&tasklist_lock); |
return error; |
} |
|
|
/* |
* kill_something_info() interprets pid in interesting ways just like kill(2). |
* |
* POSIX specifies that kill(-1,sig) is unspecified, but what we have |
* is probably wrong. Should make it like BSD or SYSV. |
*/ |
|
static int kill_something_info(int sig, struct siginfo *info, int pid) |
{ |
if (!pid) { |
return kill_pg_info(sig, info, current->pgrp); |
} else if (pid == -1) { |
int retval = 0, count = 0; |
struct task_struct * p; |
|
read_lock(&tasklist_lock); |
for_each_task(p) { |
if (p->pid > 1 && p != current && thread_group_leader(p)) { |
int err = send_sig_info(sig, info, p); |
++count; |
if (err != -EPERM) |
retval = err; |
} |
} |
read_unlock(&tasklist_lock); |
return count ? retval : -ESRCH; |
} else if (pid < 0) { |
return kill_pg_info(sig, info, -pid); |
} else { |
return kill_proc_info(sig, info, pid); |
} |
} |
|
/* |
* These are for backward compatibility with the rest of the kernel source. |
*/ |
|
int |
send_sig(int sig, struct task_struct *p, int priv) |
{ |
return send_sig_info(sig, (void*)(long)(priv != 0), p); |
} |
|
void |
force_sig(int sig, struct task_struct *p) |
{ |
force_sig_info(sig, (void*)1L, p); |
} |
|
int |
kill_pg(pid_t pgrp, int sig, int priv) |
{ |
return kill_pg_info(sig, (void *)(long)(priv != 0), pgrp); |
} |
|
int |
kill_sl(pid_t sess, int sig, int priv) |
{ |
return kill_sl_info(sig, (void *)(long)(priv != 0), sess); |
} |
|
int |
kill_proc(pid_t pid, int sig, int priv) |
{ |
return kill_proc_info(sig, (void *)(long)(priv != 0), pid); |
} |
|
/* |
* Joy. Or not. Pthread wants us to wake up every thread |
* in our parent group. |
*/ |
static void wake_up_parent(struct task_struct *parent) |
{ |
struct task_struct *tsk = parent; |
|
do { |
wake_up_interruptible(&tsk->wait_chldexit); |
tsk = next_thread(tsk); |
} while (tsk != parent); |
} |
|
/* |
* Let a parent know about a status change of a child. |
*/ |
|
void do_notify_parent(struct task_struct *tsk, int sig) |
{ |
struct siginfo info; |
int why, status; |
|
info.si_signo = sig; |
info.si_errno = 0; |
info.si_pid = tsk->pid; |
info.si_uid = tsk->uid; |
|
/* FIXME: find out whether or not this is supposed to be c*time. */ |
info.si_utime = tsk->times.tms_utime; |
info.si_stime = tsk->times.tms_stime; |
|
status = tsk->exit_code & 0x7f; |
why = SI_KERNEL; /* shouldn't happen */ |
switch (tsk->state) { |
case TASK_STOPPED: |
/* FIXME -- can we deduce CLD_TRAPPED or CLD_CONTINUED? */ |
if (tsk->ptrace & PT_PTRACED) |
why = CLD_TRAPPED; |
else |
why = CLD_STOPPED; |
break; |
|
default: |
if (tsk->exit_code & 0x80) |
why = CLD_DUMPED; |
else if (tsk->exit_code & 0x7f) |
why = CLD_KILLED; |
else { |
why = CLD_EXITED; |
status = tsk->exit_code >> 8; |
} |
break; |
} |
info.si_code = why; |
info.si_status = status; |
|
send_sig_info(sig, &info, tsk->p_pptr); |
wake_up_parent(tsk->p_pptr); |
} |
|
|
/* |
* We need the tasklist lock because it's the only |
* thing that protects out "parent" pointer. |
* |
* exit.c calls "do_notify_parent()" directly, because |
* it already has the tasklist lock. |
*/ |
void |
notify_parent(struct task_struct *tsk, int sig) |
{ |
read_lock(&tasklist_lock); |
do_notify_parent(tsk, sig); |
read_unlock(&tasklist_lock); |
} |
|
EXPORT_SYMBOL(dequeue_signal); |
EXPORT_SYMBOL(flush_signals); |
EXPORT_SYMBOL(force_sig); |
EXPORT_SYMBOL(force_sig_info); |
EXPORT_SYMBOL(kill_pg); |
EXPORT_SYMBOL(kill_pg_info); |
EXPORT_SYMBOL(kill_proc); |
EXPORT_SYMBOL(kill_proc_info); |
EXPORT_SYMBOL(kill_sl); |
EXPORT_SYMBOL(kill_sl_info); |
EXPORT_SYMBOL(notify_parent); |
EXPORT_SYMBOL(recalc_sigpending); |
EXPORT_SYMBOL(send_sig); |
EXPORT_SYMBOL(send_sig_info); |
EXPORT_SYMBOL(block_all_signals); |
EXPORT_SYMBOL(unblock_all_signals); |
|
|
/* |
* System call entry points. |
*/ |
|
/* |
* We don't need to get the kernel lock - this is all local to this |
* particular thread.. (and that's good, because this is _heavily_ |
* used by various programs) |
*/ |
|
asmlinkage long |
sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset, size_t sigsetsize) |
{ |
int error = -EINVAL; |
sigset_t old_set, new_set; |
|
/* XXX: Don't preclude handling different sized sigset_t's. */ |
if (sigsetsize != sizeof(sigset_t)) |
goto out; |
|
if (set) { |
error = -EFAULT; |
if (copy_from_user(&new_set, set, sizeof(*set))) |
goto out; |
sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP)); |
|
spin_lock_irq(¤t->sigmask_lock); |
old_set = current->blocked; |
|
error = 0; |
switch (how) { |
default: |
error = -EINVAL; |
break; |
case SIG_BLOCK: |
sigorsets(¤t->blocked, &old_set, &new_set); |
break; |
case SIG_UNBLOCK: |
signandsets(¤t->blocked, &old_set, &new_set); |
break; |
case SIG_SETMASK: |
current->blocked = new_set; |
break; |
} |
|
recalc_sigpending(current); |
spin_unlock_irq(¤t->sigmask_lock); |
if (error) |
goto out; |
if (oset) |
goto set_old; |
} else if (oset) { |
spin_lock_irq(¤t->sigmask_lock); |
old_set = current->blocked; |
spin_unlock_irq(¤t->sigmask_lock); |
|
set_old: |
error = -EFAULT; |
if (copy_to_user(oset, &old_set, sizeof(*oset))) |
goto out; |
} |
error = 0; |
out: |
return error; |
} |
|
long do_sigpending(void *set, unsigned long sigsetsize) |
{ |
long error = -EINVAL; |
sigset_t pending; |
|
if (sigsetsize > sizeof(sigset_t)) |
goto out; |
|
spin_lock_irq(¤t->sigmask_lock); |
sigandsets(&pending, ¤t->blocked, ¤t->pending.signal); |
spin_unlock_irq(¤t->sigmask_lock); |
|
error = -EFAULT; |
if (!copy_to_user(set, &pending, sigsetsize)) |
error = 0; |
out: |
return error; |
} |
|
asmlinkage long |
sys_rt_sigpending(sigset_t *set, size_t sigsetsize) |
{ |
return do_sigpending(set, sigsetsize); |
} |
|
asmlinkage long |
sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo, |
const struct timespec *uts, size_t sigsetsize) |
{ |
int ret, sig; |
sigset_t these; |
struct timespec ts; |
siginfo_t info; |
long timeout = 0; |
|
/* XXX: Don't preclude handling different sized sigset_t's. */ |
if (sigsetsize != sizeof(sigset_t)) |
return -EINVAL; |
|
if (copy_from_user(&these, uthese, sizeof(these))) |
return -EFAULT; |
|
/* |
* Invert the set of allowed signals to get those we |
* want to block. |
*/ |
sigdelsetmask(&these, sigmask(SIGKILL)|sigmask(SIGSTOP)); |
signotset(&these); |
|
if (uts) { |
if (copy_from_user(&ts, uts, sizeof(ts))) |
return -EFAULT; |
if (ts.tv_nsec >= 1000000000L || ts.tv_nsec < 0 |
|| ts.tv_sec < 0) |
return -EINVAL; |
} |
|
spin_lock_irq(¤t->sigmask_lock); |
sig = dequeue_signal(&these, &info); |
if (!sig) { |
timeout = MAX_SCHEDULE_TIMEOUT; |
if (uts) |
timeout = (timespec_to_jiffies(&ts) |
+ (ts.tv_sec || ts.tv_nsec)); |
|
if (timeout) { |
/* None ready -- temporarily unblock those we're |
* interested while we are sleeping in so that we'll |
* be awakened when they arrive. */ |
sigset_t oldblocked = current->blocked; |
sigandsets(¤t->blocked, ¤t->blocked, &these); |
recalc_sigpending(current); |
spin_unlock_irq(¤t->sigmask_lock); |
|
current->state = TASK_INTERRUPTIBLE; |
timeout = schedule_timeout(timeout); |
|
spin_lock_irq(¤t->sigmask_lock); |
sig = dequeue_signal(&these, &info); |
current->blocked = oldblocked; |
recalc_sigpending(current); |
} |
} |
spin_unlock_irq(¤t->sigmask_lock); |
|
if (sig) { |
ret = sig; |
if (uinfo) { |
if (copy_siginfo_to_user(uinfo, &info)) |
ret = -EFAULT; |
} |
} else { |
ret = -EAGAIN; |
if (timeout) |
ret = -EINTR; |
} |
|
return ret; |
} |
|
asmlinkage long |
sys_kill(int pid, int sig) |
{ |
struct siginfo info; |
|
info.si_signo = sig; |
info.si_errno = 0; |
info.si_code = SI_USER; |
info.si_pid = current->pid; |
info.si_uid = current->uid; |
|
return kill_something_info(sig, &info, pid); |
} |
|
/* |
* Kill only one task, even if it's a CLONE_THREAD task. |
*/ |
asmlinkage long |
sys_tkill(int pid, int sig) |
{ |
struct siginfo info; |
int error; |
struct task_struct *p; |
|
/* This is only valid for single tasks */ |
if (pid <= 0) |
return -EINVAL; |
|
info.si_signo = sig; |
info.si_errno = 0; |
info.si_code = SI_TKILL; |
info.si_pid = current->pid; |
info.si_uid = current->uid; |
|
read_lock(&tasklist_lock); |
p = find_task_by_pid(pid); |
error = -ESRCH; |
if (p) { |
error = send_sig_info(sig, &info, p); |
} |
read_unlock(&tasklist_lock); |
return error; |
} |
|
asmlinkage long |
sys_rt_sigqueueinfo(int pid, int sig, siginfo_t *uinfo) |
{ |
siginfo_t info; |
|
if (copy_from_user(&info, uinfo, sizeof(siginfo_t))) |
return -EFAULT; |
|
/* Not even root can pretend to send signals from the kernel. |
Nor can they impersonate a kill(), which adds source info. */ |
if (info.si_code >= 0) |
return -EPERM; |
info.si_signo = sig; |
|
/* POSIX.1b doesn't mention process groups. */ |
return kill_proc_info(sig, &info, pid); |
} |
|
int |
do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) |
{ |
struct k_sigaction *k; |
|
if (sig < 1 || sig > _NSIG || |
(act && (sig == SIGKILL || sig == SIGSTOP))) |
return -EINVAL; |
|
k = ¤t->sig->action[sig-1]; |
|
spin_lock(¤t->sig->siglock); |
|
if (oact) |
*oact = *k; |
|
if (act) { |
*k = *act; |
sigdelsetmask(&k->sa.sa_mask, sigmask(SIGKILL) | sigmask(SIGSTOP)); |
|
/* |
* POSIX 3.3.1.3: |
* "Setting a signal action to SIG_IGN for a signal that is |
* pending shall cause the pending signal to be discarded, |
* whether or not it is blocked." |
* |
* "Setting a signal action to SIG_DFL for a signal that is |
* pending and whose default action is to ignore the signal |
* (for example, SIGCHLD), shall cause the pending signal to |
* be discarded, whether or not it is blocked" |
* |
* Note the silly behaviour of SIGCHLD: SIG_IGN means that the |
* signal isn't actually ignored, but does automatic child |
* reaping, while SIG_DFL is explicitly said by POSIX to force |
* the signal to be ignored. |
*/ |
|
if (k->sa.sa_handler == SIG_IGN |
|| (k->sa.sa_handler == SIG_DFL |
&& (sig == SIGCONT || |
sig == SIGCHLD || |
sig == SIGURG || |
sig == SIGWINCH))) { |
spin_lock_irq(¤t->sigmask_lock); |
if (rm_sig_from_queue(sig, current)) |
recalc_sigpending(current); |
spin_unlock_irq(¤t->sigmask_lock); |
} |
} |
|
spin_unlock(¤t->sig->siglock); |
return 0; |
} |
|
int |
do_sigaltstack (const stack_t *uss, stack_t *uoss, unsigned long sp) |
{ |
stack_t oss; |
int error; |
|
if (uoss) { |
oss.ss_sp = (void *) current->sas_ss_sp; |
oss.ss_size = current->sas_ss_size; |
oss.ss_flags = sas_ss_flags(sp); |
} |
|
if (uss) { |
void *ss_sp; |
size_t ss_size; |
int ss_flags; |
|
error = -EFAULT; |
if (verify_area(VERIFY_READ, uss, sizeof(*uss)) |
|| __get_user(ss_sp, &uss->ss_sp) |
|| __get_user(ss_flags, &uss->ss_flags) |
|| __get_user(ss_size, &uss->ss_size)) |
goto out; |
|
error = -EPERM; |
if (on_sig_stack (sp)) |
goto out; |
|
error = -EINVAL; |
/* |
* |
* Note - this code used to test ss_flags incorrectly |
* old code may have been written using ss_flags==0 |
* to mean ss_flags==SS_ONSTACK (as this was the only |
* way that worked) - this fix preserves that older |
* mechanism |
*/ |
if (ss_flags != SS_DISABLE && ss_flags != SS_ONSTACK && ss_flags != 0) |
goto out; |
|
if (ss_flags == SS_DISABLE) { |
ss_size = 0; |
ss_sp = NULL; |
} else { |
error = -ENOMEM; |
if (ss_size < MINSIGSTKSZ) |
goto out; |
} |
|
current->sas_ss_sp = (unsigned long) ss_sp; |
current->sas_ss_size = ss_size; |
} |
|
if (uoss) { |
error = -EFAULT; |
if (copy_to_user(uoss, &oss, sizeof(oss))) |
goto out; |
} |
|
error = 0; |
out: |
return error; |
} |
|
asmlinkage long |
sys_sigpending(old_sigset_t *set) |
{ |
return do_sigpending(set, sizeof(*set)); |
} |
|
#if !defined(__alpha__) |
/* Alpha has its own versions with special arguments. */ |
|
asmlinkage long |
sys_sigprocmask(int how, old_sigset_t *set, old_sigset_t *oset) |
{ |
int error; |
old_sigset_t old_set, new_set; |
|
if (set) { |
error = -EFAULT; |
if (copy_from_user(&new_set, set, sizeof(*set))) |
goto out; |
new_set &= ~(sigmask(SIGKILL)|sigmask(SIGSTOP)); |
|
spin_lock_irq(¤t->sigmask_lock); |
old_set = current->blocked.sig[0]; |
|
error = 0; |
switch (how) { |
default: |
error = -EINVAL; |
break; |
case SIG_BLOCK: |
sigaddsetmask(¤t->blocked, new_set); |
break; |
case SIG_UNBLOCK: |
sigdelsetmask(¤t->blocked, new_set); |
break; |
case SIG_SETMASK: |
current->blocked.sig[0] = new_set; |
break; |
} |
|
recalc_sigpending(current); |
spin_unlock_irq(¤t->sigmask_lock); |
if (error) |
goto out; |
if (oset) |
goto set_old; |
} else if (oset) { |
old_set = current->blocked.sig[0]; |
set_old: |
error = -EFAULT; |
if (copy_to_user(oset, &old_set, sizeof(*oset))) |
goto out; |
} |
error = 0; |
out: |
return error; |
} |
|
#ifndef __sparc__ |
asmlinkage long |
sys_rt_sigaction(int sig, const struct sigaction *act, struct sigaction *oact, |
size_t sigsetsize) |
{ |
struct k_sigaction new_sa, old_sa; |
int ret = -EINVAL; |
|
/* XXX: Don't preclude handling different sized sigset_t's. */ |
if (sigsetsize != sizeof(sigset_t)) |
goto out; |
|
if (act) { |
if (copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa))) |
return -EFAULT; |
} |
|
ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL); |
|
if (!ret && oact) { |
if (copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa))) |
return -EFAULT; |
} |
out: |
return ret; |
} |
#endif /* __sparc__ */ |
#endif |
|
#if !defined(__alpha__) && !defined(__ia64__) |
/* |
* For backwards compatibility. Functionality superseded by sigprocmask. |
*/ |
asmlinkage long |
sys_sgetmask(void) |
{ |
/* SMP safe */ |
return current->blocked.sig[0]; |
} |
|
asmlinkage long |
sys_ssetmask(int newmask) |
{ |
int old; |
|
spin_lock_irq(¤t->sigmask_lock); |
old = current->blocked.sig[0]; |
|
siginitset(¤t->blocked, newmask & ~(sigmask(SIGKILL)| |
sigmask(SIGSTOP))); |
recalc_sigpending(current); |
spin_unlock_irq(¤t->sigmask_lock); |
|
return old; |
} |
#endif /* !defined(__alpha__) */ |
|
#if !defined(__alpha__) && !defined(__ia64__) && !defined(__mips__) |
/* |
* For backwards compatibility. Functionality superseded by sigaction. |
*/ |
asmlinkage unsigned long |
sys_signal(int sig, __sighandler_t handler) |
{ |
struct k_sigaction new_sa, old_sa; |
int ret; |
|
new_sa.sa.sa_handler = handler; |
new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK; |
|
ret = do_sigaction(sig, &new_sa, &old_sa); |
|
return ret ? ret : (unsigned long)old_sa.sa.sa_handler; |
} |
#endif /* !alpha && !__ia64__ && !defined(__mips__) */ |
/sys.c
0,0 → 1,1292
/* |
* linux/kernel/sys.c |
* |
* Copyright (C) 1991, 1992 Linus Torvalds |
*/ |
|
#include <linux/module.h> |
#include <linux/mm.h> |
#include <linux/utsname.h> |
#include <linux/mman.h> |
#include <linux/smp_lock.h> |
#include <linux/notifier.h> |
#include <linux/reboot.h> |
#include <linux/prctl.h> |
#include <linux/init.h> |
#include <linux/highuid.h> |
|
#include <asm/uaccess.h> |
#include <asm/io.h> |
|
#ifndef SET_UNALIGN_CTL |
# define SET_UNALIGN_CTL(a,b) (-EINVAL) |
#endif |
#ifndef GET_UNALIGN_CTL |
# define GET_UNALIGN_CTL(a,b) (-EINVAL) |
#endif |
#ifndef SET_FPEMU_CTL |
# define SET_FPEMU_CTL(a,b) (-EINVAL) |
#endif |
#ifndef GET_FPEMU_CTL |
# define GET_FPEMU_CTL(a,b) (-EINVAL) |
#endif |
#ifndef SET_FPEXC_CTL |
# define SET_FPEXC_CTL(a,b) (-EINVAL) |
#endif |
#ifndef GET_FPEXC_CTL |
# define GET_FPEXC_CTL(a,b) (-EINVAL) |
#endif |
|
/* |
* this is where the system-wide overflow UID and GID are defined, for |
* architectures that now have 32-bit UID/GID but didn't in the past |
*/ |
|
int overflowuid = DEFAULT_OVERFLOWUID; |
int overflowgid = DEFAULT_OVERFLOWGID; |
|
/* |
* the same as above, but for filesystems which can only store a 16-bit |
* UID and GID. as such, this is needed on all architectures |
*/ |
|
int fs_overflowuid = DEFAULT_FS_OVERFLOWUID; |
int fs_overflowgid = DEFAULT_FS_OVERFLOWUID; |
|
/* |
* this indicates whether you can reboot with ctrl-alt-del: the default is yes |
*/ |
|
int C_A_D = 1; |
int cad_pid = 1; |
|
|
/* |
* Notifier list for kernel code which wants to be called |
* at shutdown. This is used to stop any idling DMA operations |
* and the like. |
*/ |
|
static struct notifier_block *reboot_notifier_list; |
rwlock_t notifier_lock = RW_LOCK_UNLOCKED; |
|
/** |
* notifier_chain_register - Add notifier to a notifier chain |
* @list: Pointer to root list pointer |
* @n: New entry in notifier chain |
* |
* Adds a notifier to a notifier chain. |
* |
* Currently always returns zero. |
*/ |
|
int notifier_chain_register(struct notifier_block **list, struct notifier_block *n) |
{ |
write_lock(¬ifier_lock); |
while(*list) |
{ |
if(n->priority > (*list)->priority) |
break; |
list= &((*list)->next); |
} |
n->next = *list; |
*list=n; |
write_unlock(¬ifier_lock); |
return 0; |
} |
|
/** |
* notifier_chain_unregister - Remove notifier from a notifier chain |
* @nl: Pointer to root list pointer |
* @n: New entry in notifier chain |
* |
* Removes a notifier from a notifier chain. |
* |
* Returns zero on success, or %-ENOENT on failure. |
*/ |
|
int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n) |
{ |
write_lock(¬ifier_lock); |
while((*nl)!=NULL) |
{ |
if((*nl)==n) |
{ |
*nl=n->next; |
write_unlock(¬ifier_lock); |
return 0; |
} |
nl=&((*nl)->next); |
} |
write_unlock(¬ifier_lock); |
return -ENOENT; |
} |
|
/** |
* notifier_call_chain - Call functions in a notifier chain |
* @n: Pointer to root pointer of notifier chain |
* @val: Value passed unmodified to notifier function |
* @v: Pointer passed unmodified to notifier function |
* |
* Calls each function in a notifier chain in turn. |
* |
* If the return value of the notifier can be and'd |
* with %NOTIFY_STOP_MASK, then notifier_call_chain |
* will return immediately, with the return value of |
* the notifier function which halted execution. |
* Otherwise, the return value is the return value |
* of the last notifier function called. |
*/ |
|
int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v) |
{ |
int ret=NOTIFY_DONE; |
struct notifier_block *nb = *n; |
|
while(nb) |
{ |
ret=nb->notifier_call(nb,val,v); |
if(ret&NOTIFY_STOP_MASK) |
{ |
return ret; |
} |
nb=nb->next; |
} |
return ret; |
} |
|
/** |
* register_reboot_notifier - Register function to be called at reboot time |
* @nb: Info about notifier function to be called |
* |
* Registers a function with the list of functions |
* to be called at reboot time. |
* |
* Currently always returns zero, as notifier_chain_register |
* always returns zero. |
*/ |
|
int register_reboot_notifier(struct notifier_block * nb) |
{ |
return notifier_chain_register(&reboot_notifier_list, nb); |
} |
|
/** |
* unregister_reboot_notifier - Unregister previously registered reboot notifier |
* @nb: Hook to be unregistered |
* |
* Unregisters a previously registered reboot |
* notifier function. |
* |
* Returns zero on success, or %-ENOENT on failure. |
*/ |
|
int unregister_reboot_notifier(struct notifier_block * nb) |
{ |
return notifier_chain_unregister(&reboot_notifier_list, nb); |
} |
|
asmlinkage long sys_ni_syscall(void) |
{ |
return -ENOSYS; |
} |
|
static int proc_sel(struct task_struct *p, int which, int who) |
{ |
if(p->pid) |
{ |
switch (which) { |
case PRIO_PROCESS: |
if (!who && p == current) |
return 1; |
return(p->pid == who); |
case PRIO_PGRP: |
if (!who) |
who = current->pgrp; |
return(p->pgrp == who); |
case PRIO_USER: |
if (!who) |
who = current->uid; |
return(p->uid == who); |
} |
} |
return 0; |
} |
|
asmlinkage long sys_setpriority(int which, int who, int niceval) |
{ |
struct task_struct *p; |
int error; |
|
if (which > 2 || which < 0) |
return -EINVAL; |
|
/* normalize: avoid signed division (rounding problems) */ |
error = -ESRCH; |
if (niceval < -20) |
niceval = -20; |
if (niceval > 19) |
niceval = 19; |
|
read_lock(&tasklist_lock); |
for_each_task(p) { |
if (!proc_sel(p, which, who)) |
continue; |
if (p->uid != current->euid && |
p->uid != current->uid && !capable(CAP_SYS_NICE)) { |
error = -EPERM; |
continue; |
} |
if (error == -ESRCH) |
error = 0; |
if (niceval < p->nice && !capable(CAP_SYS_NICE)) |
error = -EACCES; |
else |
p->nice = niceval; |
} |
read_unlock(&tasklist_lock); |
|
return error; |
} |
|
/* |
* Ugh. To avoid negative return values, "getpriority()" will |
* not return the normal nice-value, but a negated value that |
* has been offset by 20 (ie it returns 40..1 instead of -20..19) |
* to stay compatible. |
*/ |
asmlinkage long sys_getpriority(int which, int who) |
{ |
struct task_struct *p; |
long retval = -ESRCH; |
|
if (which > 2 || which < 0) |
return -EINVAL; |
|
read_lock(&tasklist_lock); |
for_each_task (p) { |
long niceval; |
if (!proc_sel(p, which, who)) |
continue; |
niceval = 20 - p->nice; |
if (niceval > retval) |
retval = niceval; |
} |
read_unlock(&tasklist_lock); |
|
return retval; |
} |
|
|
/* |
* Reboot system call: for obvious reasons only root may call it, |
* and even root needs to set up some magic numbers in the registers |
* so that some mistake won't make this reboot the whole machine. |
* You can also set the meaning of the ctrl-alt-del-key here. |
* |
* reboot doesn't sync: do that yourself before calling this. |
*/ |
asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void * arg) |
{ |
char buffer[256]; |
|
/* We only trust the superuser with rebooting the system. */ |
if (!capable(CAP_SYS_BOOT)) |
return -EPERM; |
|
/* For safety, we require "magic" arguments. */ |
if (magic1 != LINUX_REBOOT_MAGIC1 || |
(magic2 != LINUX_REBOOT_MAGIC2 && magic2 != LINUX_REBOOT_MAGIC2A && |
magic2 != LINUX_REBOOT_MAGIC2B)) |
return -EINVAL; |
|
lock_kernel(); |
switch (cmd) { |
case LINUX_REBOOT_CMD_RESTART: |
notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); |
printk(KERN_EMERG "Restarting system.\n"); |
machine_restart(NULL); |
break; |
|
case LINUX_REBOOT_CMD_CAD_ON: |
C_A_D = 1; |
break; |
|
case LINUX_REBOOT_CMD_CAD_OFF: |
C_A_D = 0; |
break; |
|
case LINUX_REBOOT_CMD_HALT: |
notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL); |
printk(KERN_EMERG "System halted.\n"); |
machine_halt(); |
do_exit(0); |
break; |
|
case LINUX_REBOOT_CMD_POWER_OFF: |
notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL); |
printk(KERN_EMERG "Power down.\n"); |
machine_power_off(); |
do_exit(0); |
break; |
|
case LINUX_REBOOT_CMD_RESTART2: |
if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0) { |
unlock_kernel(); |
return -EFAULT; |
} |
buffer[sizeof(buffer) - 1] = '\0'; |
|
notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer); |
printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer); |
machine_restart(buffer); |
break; |
|
default: |
unlock_kernel(); |
return -EINVAL; |
} |
unlock_kernel(); |
return 0; |
} |
|
static void deferred_cad(void *dummy) |
{ |
notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); |
machine_restart(NULL); |
} |
|
/* |
* This function gets called by ctrl-alt-del - ie the keyboard interrupt. |
* As it's called within an interrupt, it may NOT sync: the only choice |
* is whether to reboot at once, or just ignore the ctrl-alt-del. |
*/ |
void ctrl_alt_del(void) |
{ |
static struct tq_struct cad_tq = { |
routine: deferred_cad, |
}; |
|
if (C_A_D) |
schedule_task(&cad_tq); |
else |
kill_proc(cad_pid, SIGINT, 1); |
} |
|
|
/* |
* Unprivileged users may change the real gid to the effective gid |
* or vice versa. (BSD-style) |
* |
* If you set the real gid at all, or set the effective gid to a value not |
* equal to the real gid, then the saved gid is set to the new effective gid. |
* |
* This makes it possible for a setgid program to completely drop its |
* privileges, which is often a useful assertion to make when you are doing |
* a security audit over a program. |
* |
* The general idea is that a program which uses just setregid() will be |
* 100% compatible with BSD. A program which uses just setgid() will be |
* 100% compatible with POSIX with saved IDs. |
* |
* SMP: There are not races, the GIDs are checked only by filesystem |
* operations (as far as semantic preservation is concerned). |
*/ |
asmlinkage long sys_setregid(gid_t rgid, gid_t egid) |
{ |
int old_rgid = current->gid; |
int old_egid = current->egid; |
int new_rgid = old_rgid; |
int new_egid = old_egid; |
|
if (rgid != (gid_t) -1) { |
if ((old_rgid == rgid) || |
(current->egid==rgid) || |
capable(CAP_SETGID)) |
new_rgid = rgid; |
else |
return -EPERM; |
} |
if (egid != (gid_t) -1) { |
if ((old_rgid == egid) || |
(current->egid == egid) || |
(current->sgid == egid) || |
capable(CAP_SETGID)) |
new_egid = egid; |
else { |
return -EPERM; |
} |
} |
if (new_egid != old_egid) |
{ |
current->mm->dumpable = 0; |
wmb(); |
} |
if (rgid != (gid_t) -1 || |
(egid != (gid_t) -1 && egid != old_rgid)) |
current->sgid = new_egid; |
current->fsgid = new_egid; |
current->egid = new_egid; |
current->gid = new_rgid; |
return 0; |
} |
|
/* |
* setgid() is implemented like SysV w/ SAVED_IDS |
* |
* SMP: Same implicit races as above. |
*/ |
asmlinkage long sys_setgid(gid_t gid) |
{ |
int old_egid = current->egid; |
|
if (capable(CAP_SETGID)) |
{ |
if(old_egid != gid) |
{ |
current->mm->dumpable=0; |
wmb(); |
} |
current->gid = current->egid = current->sgid = current->fsgid = gid; |
} |
else if ((gid == current->gid) || (gid == current->sgid)) |
{ |
if(old_egid != gid) |
{ |
current->mm->dumpable=0; |
wmb(); |
} |
current->egid = current->fsgid = gid; |
} |
else |
return -EPERM; |
return 0; |
} |
|
/* |
* cap_emulate_setxuid() fixes the effective / permitted capabilities of |
* a process after a call to setuid, setreuid, or setresuid. |
* |
* 1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of |
* {r,e,s}uid != 0, the permitted and effective capabilities are |
* cleared. |
* |
* 2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective |
* capabilities of the process are cleared. |
* |
* 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective |
* capabilities are set to the permitted capabilities. |
* |
* fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should |
* never happen. |
* |
* -astor |
* |
* cevans - New behaviour, Oct '99 |
* A process may, via prctl(), elect to keep its capabilities when it |
* calls setuid() and switches away from uid==0. Both permitted and |
* effective sets will be retained. |
* Without this change, it was impossible for a daemon to drop only some |
* of its privilege. The call to setuid(!=0) would drop all privileges! |
* Keeping uid 0 is not an option because uid 0 owns too many vital |
* files.. |
* Thanks to Olaf Kirch and Peter Benie for spotting this. |
*/ |
static inline void cap_emulate_setxuid(int old_ruid, int old_euid, |
int old_suid) |
{ |
if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && |
(current->uid != 0 && current->euid != 0 && current->suid != 0) && |
!current->keep_capabilities) { |
cap_clear(current->cap_permitted); |
cap_clear(current->cap_effective); |
} |
if (old_euid == 0 && current->euid != 0) { |
cap_clear(current->cap_effective); |
} |
if (old_euid != 0 && current->euid == 0) { |
current->cap_effective = current->cap_permitted; |
} |
} |
|
static int set_user(uid_t new_ruid, int dumpclear) |
{ |
struct user_struct *new_user; |
|
new_user = alloc_uid(new_ruid); |
if (!new_user) |
return -EAGAIN; |
switch_uid(new_user); |
|
if(dumpclear) |
{ |
current->mm->dumpable = 0; |
wmb(); |
} |
current->uid = new_ruid; |
return 0; |
} |
|
/* |
* Unprivileged users may change the real uid to the effective uid |
* or vice versa. (BSD-style) |
* |
* If you set the real uid at all, or set the effective uid to a value not |
* equal to the real uid, then the saved uid is set to the new effective uid. |
* |
* This makes it possible for a setuid program to completely drop its |
* privileges, which is often a useful assertion to make when you are doing |
* a security audit over a program. |
* |
* The general idea is that a program which uses just setreuid() will be |
* 100% compatible with BSD. A program which uses just setuid() will be |
* 100% compatible with POSIX with saved IDs. |
*/ |
asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) |
{ |
int old_ruid, old_euid, old_suid, new_ruid, new_euid; |
|
new_ruid = old_ruid = current->uid; |
new_euid = old_euid = current->euid; |
old_suid = current->suid; |
|
if (ruid != (uid_t) -1) { |
new_ruid = ruid; |
if ((old_ruid != ruid) && |
(current->euid != ruid) && |
!capable(CAP_SETUID)) |
return -EPERM; |
} |
|
if (euid != (uid_t) -1) { |
new_euid = euid; |
if ((old_ruid != euid) && |
(current->euid != euid) && |
(current->suid != euid) && |
!capable(CAP_SETUID)) |
return -EPERM; |
} |
|
if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0) |
return -EAGAIN; |
|
if (new_euid != old_euid) |
{ |
current->mm->dumpable=0; |
wmb(); |
} |
current->fsuid = current->euid = new_euid; |
if (ruid != (uid_t) -1 || |
(euid != (uid_t) -1 && euid != old_ruid)) |
current->suid = current->euid; |
current->fsuid = current->euid; |
|
if (!issecure(SECURE_NO_SETUID_FIXUP)) { |
cap_emulate_setxuid(old_ruid, old_euid, old_suid); |
} |
|
return 0; |
} |
|
|
|
/* |
* setuid() is implemented like SysV with SAVED_IDS |
* |
* Note that SAVED_ID's is deficient in that a setuid root program |
* like sendmail, for example, cannot set its uid to be a normal |
* user and then switch back, because if you're root, setuid() sets |
* the saved uid too. If you don't like this, blame the bright people |
* in the POSIX committee and/or USG. Note that the BSD-style setreuid() |
* will allow a root program to temporarily drop privileges and be able to |
* regain them by swapping the real and effective uid. |
*/ |
asmlinkage long sys_setuid(uid_t uid) |
{ |
int old_euid = current->euid; |
int old_ruid, old_suid, new_ruid, new_suid; |
|
old_ruid = new_ruid = current->uid; |
old_suid = current->suid; |
new_suid = old_suid; |
|
if (capable(CAP_SETUID)) { |
if (uid != old_ruid && set_user(uid, old_euid != uid) < 0) |
return -EAGAIN; |
new_suid = uid; |
} else if ((uid != current->uid) && (uid != new_suid)) |
return -EPERM; |
|
if (old_euid != uid) |
{ |
current->mm->dumpable = 0; |
wmb(); |
} |
current->fsuid = current->euid = uid; |
current->suid = new_suid; |
|
if (!issecure(SECURE_NO_SETUID_FIXUP)) { |
cap_emulate_setxuid(old_ruid, old_euid, old_suid); |
} |
|
return 0; |
} |
|
|
/* |
* This function implements a generic ability to update ruid, euid, |
* and suid. This allows you to implement the 4.4 compatible seteuid(). |
*/ |
asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) |
{ |
int old_ruid = current->uid; |
int old_euid = current->euid; |
int old_suid = current->suid; |
|
if (!capable(CAP_SETUID)) { |
if ((ruid != (uid_t) -1) && (ruid != current->uid) && |
(ruid != current->euid) && (ruid != current->suid)) |
return -EPERM; |
if ((euid != (uid_t) -1) && (euid != current->uid) && |
(euid != current->euid) && (euid != current->suid)) |
return -EPERM; |
if ((suid != (uid_t) -1) && (suid != current->uid) && |
(suid != current->euid) && (suid != current->suid)) |
return -EPERM; |
} |
if (ruid != (uid_t) -1) { |
if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0) |
return -EAGAIN; |
} |
if (euid != (uid_t) -1) { |
if (euid != current->euid) |
{ |
current->mm->dumpable = 0; |
wmb(); |
} |
current->euid = euid; |
} |
current->fsuid = current->euid; |
if (suid != (uid_t) -1) |
current->suid = suid; |
|
if (!issecure(SECURE_NO_SETUID_FIXUP)) { |
cap_emulate_setxuid(old_ruid, old_euid, old_suid); |
} |
|
return 0; |
} |
|
asmlinkage long sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid) |
{ |
int retval; |
|
if (!(retval = put_user(current->uid, ruid)) && |
!(retval = put_user(current->euid, euid))) |
retval = put_user(current->suid, suid); |
|
return retval; |
} |
|
/* |
* Same as above, but for rgid, egid, sgid. |
*/ |
asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) |
{ |
if (!capable(CAP_SETGID)) { |
if ((rgid != (gid_t) -1) && (rgid != current->gid) && |
(rgid != current->egid) && (rgid != current->sgid)) |
return -EPERM; |
if ((egid != (gid_t) -1) && (egid != current->gid) && |
(egid != current->egid) && (egid != current->sgid)) |
return -EPERM; |
if ((sgid != (gid_t) -1) && (sgid != current->gid) && |
(sgid != current->egid) && (sgid != current->sgid)) |
return -EPERM; |
} |
if (egid != (gid_t) -1) { |
if (egid != current->egid) |
{ |
current->mm->dumpable = 0; |
wmb(); |
} |
current->egid = egid; |
} |
current->fsgid = current->egid; |
if (rgid != (gid_t) -1) |
current->gid = rgid; |
if (sgid != (gid_t) -1) |
current->sgid = sgid; |
return 0; |
} |
|
asmlinkage long sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid) |
{ |
int retval; |
|
if (!(retval = put_user(current->gid, rgid)) && |
!(retval = put_user(current->egid, egid))) |
retval = put_user(current->sgid, sgid); |
|
return retval; |
} |
|
|
/* |
* "setfsuid()" sets the fsuid - the uid used for filesystem checks. This |
* is used for "access()" and for the NFS daemon (letting nfsd stay at |
* whatever uid it wants to). It normally shadows "euid", except when |
* explicitly set by setfsuid() or for access.. |
*/ |
asmlinkage long sys_setfsuid(uid_t uid) |
{ |
int old_fsuid; |
|
old_fsuid = current->fsuid; |
if (uid == current->uid || uid == current->euid || |
uid == current->suid || uid == current->fsuid || |
capable(CAP_SETUID)) |
{ |
if (uid != old_fsuid) |
{ |
current->mm->dumpable = 0; |
wmb(); |
} |
current->fsuid = uid; |
} |
|
/* We emulate fsuid by essentially doing a scaled-down version |
* of what we did in setresuid and friends. However, we only |
* operate on the fs-specific bits of the process' effective |
* capabilities |
* |
* FIXME - is fsuser used for all CAP_FS_MASK capabilities? |
* if not, we might be a bit too harsh here. |
*/ |
|
if (!issecure(SECURE_NO_SETUID_FIXUP)) { |
if (old_fsuid == 0 && current->fsuid != 0) { |
cap_t(current->cap_effective) &= ~CAP_FS_MASK; |
} |
if (old_fsuid != 0 && current->fsuid == 0) { |
cap_t(current->cap_effective) |= |
(cap_t(current->cap_permitted) & CAP_FS_MASK); |
} |
} |
|
return old_fsuid; |
} |
|
/* |
|
*/ |
asmlinkage long sys_setfsgid(gid_t gid) |
{ |
int old_fsgid; |
|
old_fsgid = current->fsgid; |
if (gid == current->gid || gid == current->egid || |
gid == current->sgid || gid == current->fsgid || |
capable(CAP_SETGID)) |
{ |
if (gid != old_fsgid) |
{ |
current->mm->dumpable = 0; |
wmb(); |
} |
current->fsgid = gid; |
} |
return old_fsgid; |
} |
|
asmlinkage long sys_times(struct tms * tbuf) |
{ |
/* |
* In the SMP world we might just be unlucky and have one of |
* the times increment as we use it. Since the value is an |
* atomically safe type this is just fine. Conceptually its |
* as if the syscall took an instant longer to occur. |
*/ |
if (tbuf) |
if (copy_to_user(tbuf, ¤t->times, sizeof(struct tms))) |
return -EFAULT; |
return jiffies; |
} |
|
/* |
* This needs some heavy checking ... |
* I just haven't the stomach for it. I also don't fully |
* understand sessions/pgrp etc. Let somebody who does explain it. |
* |
* OK, I think I have the protection semantics right.... this is really |
* only important on a multi-user system anyway, to make sure one user |
* can't send a signal to a process owned by another. -TYT, 12/12/91 |
* |
* Auch. Had to add the 'did_exec' flag to conform completely to POSIX. |
* LBT 04.03.94 |
*/ |
|
asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) |
{ |
struct task_struct * p; |
int err = -EINVAL; |
|
if (!pid) |
pid = current->pid; |
if (!pgid) |
pgid = pid; |
if (pgid < 0) |
return -EINVAL; |
|
/* From this point forward we keep holding onto the tasklist lock |
* so that our parent does not change from under us. -DaveM |
*/ |
read_lock(&tasklist_lock); |
|
err = -ESRCH; |
p = find_task_by_pid(pid); |
if (!p) |
goto out; |
|
if (p->p_pptr == current || p->p_opptr == current) { |
err = -EPERM; |
if (p->session != current->session) |
goto out; |
err = -EACCES; |
if (p->did_exec) |
goto out; |
} else if (p != current) |
goto out; |
err = -EPERM; |
if (p->leader) |
goto out; |
if (pgid != pid) { |
struct task_struct * tmp; |
for_each_task (tmp) { |
if (tmp->pgrp == pgid && |
tmp->session == current->session) |
goto ok_pgid; |
} |
goto out; |
} |
|
ok_pgid: |
p->pgrp = pgid; |
err = 0; |
out: |
/* All paths lead to here, thus we are safe. -DaveM */ |
read_unlock(&tasklist_lock); |
return err; |
} |
|
asmlinkage long sys_getpgid(pid_t pid) |
{ |
if (!pid) { |
return current->pgrp; |
} else { |
int retval; |
struct task_struct *p; |
|
read_lock(&tasklist_lock); |
p = find_task_by_pid(pid); |
|
retval = -ESRCH; |
if (p) |
retval = p->pgrp; |
read_unlock(&tasklist_lock); |
return retval; |
} |
} |
|
asmlinkage long sys_getpgrp(void) |
{ |
/* SMP - assuming writes are word atomic this is fine */ |
return current->pgrp; |
} |
|
asmlinkage long sys_getsid(pid_t pid) |
{ |
if (!pid) { |
return current->session; |
} else { |
int retval; |
struct task_struct *p; |
|
read_lock(&tasklist_lock); |
p = find_task_by_pid(pid); |
|
retval = -ESRCH; |
if(p) |
retval = p->session; |
read_unlock(&tasklist_lock); |
return retval; |
} |
} |
|
asmlinkage long sys_setsid(void) |
{ |
struct task_struct * p; |
int err = -EPERM; |
|
read_lock(&tasklist_lock); |
for_each_task(p) { |
if (p->pgrp == current->pid) |
goto out; |
} |
|
current->leader = 1; |
current->session = current->pgrp = current->pid; |
current->tty = NULL; |
current->tty_old_pgrp = 0; |
err = current->pgrp; |
out: |
read_unlock(&tasklist_lock); |
return err; |
} |
|
/* |
* Supplementary group IDs |
*/ |
asmlinkage long sys_getgroups(int gidsetsize, gid_t *grouplist) |
{ |
int i; |
|
/* |
* SMP: Nobody else can change our grouplist. Thus we are |
* safe. |
*/ |
|
if (gidsetsize < 0) |
return -EINVAL; |
i = current->ngroups; |
if (gidsetsize) { |
if (i > gidsetsize) |
return -EINVAL; |
if (copy_to_user(grouplist, current->groups, sizeof(gid_t)*i)) |
return -EFAULT; |
} |
return i; |
} |
|
/* |
* SMP: Our groups are not shared. We can copy to/from them safely |
* without another task interfering. |
*/ |
|
asmlinkage long sys_setgroups(int gidsetsize, gid_t *grouplist) |
{ |
if (!capable(CAP_SETGID)) |
return -EPERM; |
if ((unsigned) gidsetsize > NGROUPS) |
return -EINVAL; |
if(copy_from_user(current->groups, grouplist, gidsetsize * sizeof(gid_t))) |
return -EFAULT; |
current->ngroups = gidsetsize; |
return 0; |
} |
|
static int supplemental_group_member(gid_t grp) |
{ |
int i = current->ngroups; |
|
if (i) { |
gid_t *groups = current->groups; |
do { |
if (*groups == grp) |
return 1; |
groups++; |
i--; |
} while (i); |
} |
return 0; |
} |
|
/* |
* Check whether we're fsgid/egid or in the supplemental group.. |
*/ |
int in_group_p(gid_t grp) |
{ |
int retval = 1; |
if (grp != current->fsgid) |
retval = supplemental_group_member(grp); |
return retval; |
} |
|
int in_egroup_p(gid_t grp) |
{ |
int retval = 1; |
if (grp != current->egid) |
retval = supplemental_group_member(grp); |
return retval; |
} |
|
DECLARE_RWSEM(uts_sem); |
|
asmlinkage long sys_newuname(struct new_utsname * name) |
{ |
int errno = 0; |
|
down_read(&uts_sem); |
if (copy_to_user(name,&system_utsname,sizeof *name)) |
errno = -EFAULT; |
up_read(&uts_sem); |
return errno; |
} |
|
asmlinkage long sys_sethostname(char *name, int len) |
{ |
int errno; |
char tmp[__NEW_UTS_LEN]; |
|
if (!capable(CAP_SYS_ADMIN)) |
return -EPERM; |
if (len < 0 || len > __NEW_UTS_LEN) |
return -EINVAL; |
down_write(&uts_sem); |
errno = -EFAULT; |
if (!copy_from_user(tmp, name, len)) { |
memcpy(system_utsname.nodename, tmp, len); |
system_utsname.nodename[len] = 0; |
errno = 0; |
} |
up_write(&uts_sem); |
return errno; |
} |
|
asmlinkage long sys_gethostname(char *name, int len) |
{ |
int i, errno; |
|
if (len < 0) |
return -EINVAL; |
down_read(&uts_sem); |
i = 1 + strlen(system_utsname.nodename); |
if (i > len) |
i = len; |
errno = 0; |
if (copy_to_user(name, system_utsname.nodename, i)) |
errno = -EFAULT; |
up_read(&uts_sem); |
return errno; |
} |
|
/* |
* Only setdomainname; getdomainname can be implemented by calling |
* uname() |
*/ |
asmlinkage long sys_setdomainname(char *name, int len) |
{ |
int errno; |
char tmp[__NEW_UTS_LEN]; |
|
if (!capable(CAP_SYS_ADMIN)) |
return -EPERM; |
if (len < 0 || len > __NEW_UTS_LEN) |
return -EINVAL; |
|
down_write(&uts_sem); |
errno = -EFAULT; |
if (!copy_from_user(tmp, name, len)) { |
memcpy(system_utsname.domainname, tmp, len); |
system_utsname.domainname[len] = 0; |
errno = 0; |
} |
up_write(&uts_sem); |
return errno; |
} |
|
asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit *rlim) |
{ |
if (resource >= RLIM_NLIMITS) |
return -EINVAL; |
else |
return copy_to_user(rlim, current->rlim + resource, sizeof(*rlim)) |
? -EFAULT : 0; |
} |
|
#if !defined(__ia64__) |
|
/* |
* Back compatibility for getrlimit. Needed for some apps. |
*/ |
|
asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit *rlim) |
{ |
struct rlimit x; |
if (resource >= RLIM_NLIMITS) |
return -EINVAL; |
|
memcpy(&x, current->rlim + resource, sizeof(*rlim)); |
if(x.rlim_cur > 0x7FFFFFFF) |
x.rlim_cur = 0x7FFFFFFF; |
if(x.rlim_max > 0x7FFFFFFF) |
x.rlim_max = 0x7FFFFFFF; |
return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0; |
} |
|
#endif |
|
asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim) |
{ |
struct rlimit new_rlim, *old_rlim; |
|
if (resource >= RLIM_NLIMITS) |
return -EINVAL; |
if(copy_from_user(&new_rlim, rlim, sizeof(*rlim))) |
return -EFAULT; |
if (new_rlim.rlim_cur > new_rlim.rlim_max) |
return -EINVAL; |
old_rlim = current->rlim + resource; |
if (((new_rlim.rlim_cur > old_rlim->rlim_max) || |
(new_rlim.rlim_max > old_rlim->rlim_max)) && |
!capable(CAP_SYS_RESOURCE)) |
return -EPERM; |
if (resource == RLIMIT_NOFILE) { |
if (new_rlim.rlim_cur > NR_OPEN || new_rlim.rlim_max > NR_OPEN) |
return -EPERM; |
} |
*old_rlim = new_rlim; |
return 0; |
} |
|
/* |
* It would make sense to put struct rusage in the task_struct, |
* except that would make the task_struct be *really big*. After |
* task_struct gets moved into malloc'ed memory, it would |
* make sense to do this. It will make moving the rest of the information |
* a lot simpler! (Which we're not doing right now because we're not |
* measuring them yet). |
* |
* This is SMP safe. Either we are called from sys_getrusage on ourselves |
* below (we know we aren't going to exit/disappear and only we change our |
* rusage counters), or we are called from wait4() on a process which is |
* either stopped or zombied. In the zombied case the task won't get |
* reaped till shortly after the call to getrusage(), in both cases the |
* task being examined is in a frozen state so the counters won't change. |
* |
* FIXME! Get the fault counts properly! |
*/ |
int getrusage(struct task_struct *p, int who, struct rusage *ru) |
{ |
struct rusage r; |
|
memset((char *) &r, 0, sizeof(r)); |
switch (who) { |
case RUSAGE_SELF: |
r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime); |
r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime); |
r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime); |
r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime); |
r.ru_minflt = p->min_flt; |
r.ru_majflt = p->maj_flt; |
r.ru_nswap = p->nswap; |
break; |
case RUSAGE_CHILDREN: |
r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_cutime); |
r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_cutime); |
r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_cstime); |
r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_cstime); |
r.ru_minflt = p->cmin_flt; |
r.ru_majflt = p->cmaj_flt; |
r.ru_nswap = p->cnswap; |
break; |
default: |
r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime + p->times.tms_cutime); |
r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime + p->times.tms_cutime); |
r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime + p->times.tms_cstime); |
r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime + p->times.tms_cstime); |
r.ru_minflt = p->min_flt + p->cmin_flt; |
r.ru_majflt = p->maj_flt + p->cmaj_flt; |
r.ru_nswap = p->nswap + p->cnswap; |
break; |
} |
return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; |
} |
|
asmlinkage long sys_getrusage(int who, struct rusage *ru) |
{ |
if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN) |
return -EINVAL; |
return getrusage(current, who, ru); |
} |
|
asmlinkage long sys_umask(int mask) |
{ |
mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); |
return mask; |
} |
|
asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, |
unsigned long arg4, unsigned long arg5) |
{ |
int error = 0; |
int sig; |
|
switch (option) { |
case PR_SET_PDEATHSIG: |
sig = arg2; |
if (sig < 0 || sig > _NSIG) { |
error = -EINVAL; |
break; |
} |
current->pdeath_signal = sig; |
break; |
case PR_GET_PDEATHSIG: |
error = put_user(current->pdeath_signal, (int *)arg2); |
break; |
case PR_GET_DUMPABLE: |
if (is_dumpable(current)) |
error = 1; |
break; |
case PR_SET_DUMPABLE: |
if (arg2 != 0 && arg2 != 1) { |
error = -EINVAL; |
break; |
} |
current->mm->dumpable = arg2; |
break; |
|
case PR_SET_UNALIGN: |
error = SET_UNALIGN_CTL(current, arg2); |
break; |
case PR_GET_UNALIGN: |
error = GET_UNALIGN_CTL(current, arg2); |
break; |
case PR_SET_FPEMU: |
error = SET_FPEMU_CTL(current, arg2); |
break; |
case PR_GET_FPEMU: |
error = GET_FPEMU_CTL(current, arg2); |
break; |
case PR_SET_FPEXC: |
error = SET_FPEXC_CTL(current, arg2); |
break; |
case PR_GET_FPEXC: |
error = GET_FPEXC_CTL(current, arg2); |
break; |
|
case PR_GET_KEEPCAPS: |
if (current->keep_capabilities) |
error = 1; |
break; |
case PR_SET_KEEPCAPS: |
if (arg2 != 0 && arg2 != 1) { |
error = -EINVAL; |
break; |
} |
current->keep_capabilities = arg2; |
break; |
default: |
error = -EINVAL; |
break; |
} |
return error; |
} |
|
EXPORT_SYMBOL(notifier_chain_register); |
EXPORT_SYMBOL(notifier_chain_unregister); |
EXPORT_SYMBOL(notifier_call_chain); |
EXPORT_SYMBOL(register_reboot_notifier); |
EXPORT_SYMBOL(unregister_reboot_notifier); |
EXPORT_SYMBOL(in_group_p); |
EXPORT_SYMBOL(in_egroup_p); |
/ptrace.c
0,0 → 1,234
/* |
* linux/kernel/ptrace.c |
* |
* (C) Copyright 1999 Linus Torvalds |
* |
* Common interfaces for "ptrace()" which we do not want |
* to continually duplicate across every architecture. |
*/ |
|
#include <linux/sched.h> |
#include <linux/errno.h> |
#include <linux/mm.h> |
#include <linux/highmem.h> |
#include <linux/smp_lock.h> |
|
#include <asm/pgtable.h> |
#include <asm/uaccess.h> |
|
/* |
* Check that we have indeed attached to the thing.. |
*/ |
int ptrace_check_attach(struct task_struct *child, int kill) |
{ |
|
if (!(child->ptrace & PT_PTRACED)) |
return -ESRCH; |
|
if (child->p_pptr != current) |
return -ESRCH; |
|
if (!kill) { |
if (child->state != TASK_STOPPED) |
return -ESRCH; |
#ifdef CONFIG_SMP |
/* Make sure the child gets off its CPU.. */ |
for (;;) { |
task_lock(child); |
if (!task_has_cpu(child)) |
break; |
task_unlock(child); |
do { |
if (child->state != TASK_STOPPED) |
return -ESRCH; |
barrier(); |
cpu_relax(); |
} while (task_has_cpu(child)); |
} |
task_unlock(child); |
#endif |
} |
|
/* All systems go.. */ |
return 0; |
} |
|
int ptrace_attach(struct task_struct *task) |
{ |
task_lock(task); |
if (task->pid <= 1) |
goto bad; |
if (task == current) |
goto bad; |
if (!task->mm) |
goto bad; |
if(((current->uid != task->euid) || |
(current->uid != task->suid) || |
(current->uid != task->uid) || |
(current->gid != task->egid) || |
(current->gid != task->sgid) || |
(!cap_issubset(task->cap_permitted, current->cap_permitted)) || |
(current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) |
goto bad; |
rmb(); |
if (!is_dumpable(task) && !capable(CAP_SYS_PTRACE)) |
goto bad; |
/* the same process cannot be attached many times */ |
if (task->ptrace & PT_PTRACED) |
goto bad; |
|
/* Go */ |
task->ptrace |= PT_PTRACED; |
if (capable(CAP_SYS_PTRACE)) |
task->ptrace |= PT_PTRACE_CAP; |
task_unlock(task); |
|
write_lock_irq(&tasklist_lock); |
if (task->p_pptr != current) { |
REMOVE_LINKS(task); |
task->p_pptr = current; |
SET_LINKS(task); |
} |
write_unlock_irq(&tasklist_lock); |
|
send_sig(SIGSTOP, task, 1); |
return 0; |
|
bad: |
task_unlock(task); |
return -EPERM; |
} |
|
int ptrace_detach(struct task_struct *child, unsigned int data) |
{ |
if ((unsigned long) data > _NSIG) |
return -EIO; |
|
/* Architecture-specific hardware disable .. */ |
ptrace_disable(child); |
|
/* .. re-parent .. */ |
child->ptrace = 0; |
child->exit_code = data; |
write_lock_irq(&tasklist_lock); |
REMOVE_LINKS(child); |
child->p_pptr = child->p_opptr; |
SET_LINKS(child); |
write_unlock_irq(&tasklist_lock); |
|
/* .. and wake it up. */ |
wake_up_process(child); |
return 0; |
} |
|
/* |
* Access another process' address space. |
* Source/target buffer must be kernel space, |
* Do not walk the page table directly, use get_user_pages |
*/ |
|
int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) |
{ |
struct mm_struct *mm; |
struct vm_area_struct *vma; |
struct page *page; |
void *old_buf = buf; |
|
/* Worry about races with exit() */ |
task_lock(tsk); |
mm = tsk->mm; |
if (mm) |
atomic_inc(&mm->mm_users); |
task_unlock(tsk); |
if (!mm) |
return 0; |
|
down_read(&mm->mmap_sem); |
/* ignore errors, just check how much was sucessfully transfered */ |
while (len) { |
int bytes, ret, offset; |
void *maddr; |
|
ret = get_user_pages(current, mm, addr, 1, |
write, 1, &page, &vma); |
if (ret <= 0) |
break; |
|
bytes = len; |
offset = addr & (PAGE_SIZE-1); |
if (bytes > PAGE_SIZE-offset) |
bytes = PAGE_SIZE-offset; |
|
flush_cache_page(vma, addr); |
|
maddr = kmap(page); |
if (write) { |
memcpy(maddr + offset, buf, bytes); |
flush_page_to_ram(page); |
flush_icache_user_range(vma, page, addr, len); |
set_page_dirty(page); |
} else { |
memcpy(buf, maddr + offset, bytes); |
flush_page_to_ram(page); |
} |
kunmap(page); |
put_page(page); |
len -= bytes; |
buf += bytes; |
addr += bytes; |
} |
up_read(&mm->mmap_sem); |
mmput(mm); |
|
return buf - old_buf; |
} |
|
int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len) |
{ |
int copied = 0; |
|
while (len > 0) { |
char buf[128]; |
int this_len, retval; |
|
this_len = (len > sizeof(buf)) ? sizeof(buf) : len; |
retval = access_process_vm(tsk, src, buf, this_len, 0); |
if (!retval) { |
if (copied) |
break; |
return -EIO; |
} |
if (copy_to_user(dst, buf, retval)) |
return -EFAULT; |
copied += retval; |
src += retval; |
dst += retval; |
len -= retval; |
} |
return copied; |
} |
|
int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long dst, int len) |
{ |
int copied = 0; |
|
while (len > 0) { |
char buf[128]; |
int this_len, retval; |
|
this_len = (len > sizeof(buf)) ? sizeof(buf) : len; |
if (copy_from_user(buf, src, this_len)) |
return -EFAULT; |
retval = access_process_vm(tsk, dst, buf, this_len, 1); |
if (!retval) { |
if (copied) |
break; |
return -EIO; |
} |
copied += retval; |
src += retval; |
dst += retval; |
len -= retval; |
} |
return copied; |
} |
/user.c
0,0 → 1,154
/* |
* The "user cache". |
* |
* (C) Copyright 1991-2000 Linus Torvalds |
* |
* We have a per-user structure to keep track of how many |
* processes, files etc the user has claimed, in order to be |
* able to have per-user limits for system resources. |
*/ |
|
#include <linux/init.h> |
#include <linux/sched.h> |
#include <linux/slab.h> |
|
/* |
* UID task count cache, to get fast user lookup in "alloc_uid" |
* when changing user ID's (ie setuid() and friends). |
*/ |
#define UIDHASH_BITS 8 |
#define UIDHASH_SZ (1 << UIDHASH_BITS) |
#define UIDHASH_MASK (UIDHASH_SZ - 1) |
#define __uidhashfn(uid) (((uid >> UIDHASH_BITS) ^ uid) & UIDHASH_MASK) |
#define uidhashentry(uid) (uidhash_table + __uidhashfn(uid)) |
|
static kmem_cache_t *uid_cachep; |
static struct user_struct *uidhash_table[UIDHASH_SZ]; |
static spinlock_t uidhash_lock = SPIN_LOCK_UNLOCKED; |
|
struct user_struct root_user = { |
__count: ATOMIC_INIT(1), |
processes: ATOMIC_INIT(1), |
files: ATOMIC_INIT(0) |
}; |
|
/* |
* These routines must be called with the uidhash spinlock held! |
*/ |
static inline void uid_hash_insert(struct user_struct *up, struct user_struct **hashent) |
{ |
struct user_struct *next = *hashent; |
|
up->next = next; |
if (next) |
next->pprev = &up->next; |
up->pprev = hashent; |
*hashent = up; |
} |
|
static inline void uid_hash_remove(struct user_struct *up) |
{ |
struct user_struct *next = up->next; |
struct user_struct **pprev = up->pprev; |
|
if (next) |
next->pprev = pprev; |
*pprev = next; |
} |
|
static inline struct user_struct *uid_hash_find(uid_t uid, struct user_struct **hashent) |
{ |
struct user_struct *next; |
|
next = *hashent; |
for (;;) { |
struct user_struct *up = next; |
if (next) { |
next = up->next; |
if (up->uid != uid) |
continue; |
atomic_inc(&up->__count); |
} |
return up; |
} |
} |
|
void free_uid(struct user_struct *up) |
{ |
if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) { |
uid_hash_remove(up); |
kmem_cache_free(uid_cachep, up); |
spin_unlock(&uidhash_lock); |
} |
} |
|
struct user_struct * alloc_uid(uid_t uid) |
{ |
struct user_struct **hashent = uidhashentry(uid); |
struct user_struct *up; |
|
spin_lock(&uidhash_lock); |
up = uid_hash_find(uid, hashent); |
spin_unlock(&uidhash_lock); |
|
if (!up) { |
struct user_struct *new; |
|
new = kmem_cache_alloc(uid_cachep, SLAB_KERNEL); |
if (!new) |
return NULL; |
new->uid = uid; |
atomic_set(&new->__count, 1); |
atomic_set(&new->processes, 0); |
atomic_set(&new->files, 0); |
|
/* |
* Before adding this, check whether we raced |
* on adding the same user already.. |
*/ |
spin_lock(&uidhash_lock); |
up = uid_hash_find(uid, hashent); |
if (up) { |
kmem_cache_free(uid_cachep, new); |
} else { |
uid_hash_insert(new, hashent); |
up = new; |
} |
spin_unlock(&uidhash_lock); |
|
} |
return up; |
} |
|
void switch_uid(struct user_struct *new_user) |
{ |
struct user_struct *old_user; |
|
/* What if a process setreuid()'s and this brings the |
* new uid over his NPROC rlimit? We can check this now |
* cheaply with the new uid cache, so if it matters |
* we should be checking for it. -DaveM |
*/ |
old_user = current->user; |
atomic_inc(&new_user->__count); |
atomic_inc(&new_user->processes); |
atomic_dec(&old_user->processes); |
current->user = new_user; |
free_uid(old_user); |
} |
|
|
static int __init uid_cache_init(void) |
{ |
uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct), |
0, |
SLAB_HWCACHE_ALIGN, NULL, NULL); |
if(!uid_cachep) |
panic("Cannot create uid taskcount SLAB cache\n"); |
|
/* Insert the root user immediately - init already runs with this */ |
uid_hash_insert(&root_user, uidhashentry(0)); |
return 0; |
} |
|
module_init(uid_cache_init); |
/timer.c
0,0 → 1,876
/* |
* linux/kernel/timer.c |
* |
* Kernel internal timers, kernel timekeeping, basic process system calls |
* |
* Copyright (C) 1991, 1992 Linus Torvalds |
* |
* 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. |
* |
* 1997-09-10 Updated NTP code according to technical memorandum Jan '96 |
* "A Kernel Model for Precision Timekeeping" by Dave Mills |
* 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to |
* serialize accesses to xtime/lost_ticks). |
* Copyright (C) 1998 Andrea Arcangeli |
* 1999-03-10 Improved NTP compatibility by Ulrich Windl |
*/ |
|
#include <linux/config.h> |
#include <linux/mm.h> |
#include <linux/timex.h> |
#include <linux/delay.h> |
#include <linux/smp_lock.h> |
#include <linux/interrupt.h> |
#include <linux/kernel_stat.h> |
|
#include <asm/uaccess.h> |
|
/* |
* Timekeeping variables |
*/ |
|
long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */ |
|
/* The current time */ |
struct timeval xtime __attribute__ ((aligned (16))); |
|
/* Don't completely fail for HZ > 500. */ |
int tickadj = 500/HZ ? : 1; /* microsecs */ |
|
DECLARE_TASK_QUEUE(tq_timer); |
DECLARE_TASK_QUEUE(tq_immediate); |
|
/* |
* phase-lock loop variables |
*/ |
/* TIME_ERROR prevents overwriting the CMOS clock */ |
int time_state = TIME_OK; /* clock synchronization status */ |
int time_status = STA_UNSYNC; /* clock status bits */ |
long time_offset; /* time adjustment (us) */ |
long time_constant = 2; /* pll time constant */ |
long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ |
long time_precision = 1; /* clock precision (us) */ |
long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ |
long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ |
long time_phase; /* phase offset (scaled us) */ |
long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC; |
/* frequency offset (scaled ppm)*/ |
long time_adj; /* tick adjust (scaled 1 / HZ) */ |
long time_reftime; /* time at last adjustment (s) */ |
|
long time_adjust; |
long time_adjust_step; |
|
unsigned long event; |
|
extern int do_setitimer(int, struct itimerval *, struct itimerval *); |
|
unsigned long volatile jiffies; |
|
unsigned int * prof_buffer; |
unsigned long prof_len; |
unsigned long prof_shift; |
|
/* |
* Event timer code |
*/ |
#define TVN_BITS 6 |
#define TVR_BITS 8 |
#define TVN_SIZE (1 << TVN_BITS) |
#define TVR_SIZE (1 << TVR_BITS) |
#define TVN_MASK (TVN_SIZE - 1) |
#define TVR_MASK (TVR_SIZE - 1) |
|
struct timer_vec { |
int index; |
struct list_head vec[TVN_SIZE]; |
}; |
|
struct timer_vec_root { |
int index; |
struct list_head vec[TVR_SIZE]; |
}; |
|
static struct timer_vec tv5; |
static struct timer_vec tv4; |
static struct timer_vec tv3; |
static struct timer_vec tv2; |
static struct timer_vec_root tv1; |
|
static struct timer_vec * const tvecs[] = { |
(struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5 |
}; |
|
static struct list_head * run_timer_list_running; |
|
#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0])) |
|
void init_timervecs (void) |
{ |
int i; |
|
for (i = 0; i < TVN_SIZE; i++) { |
INIT_LIST_HEAD(tv5.vec + i); |
INIT_LIST_HEAD(tv4.vec + i); |
INIT_LIST_HEAD(tv3.vec + i); |
INIT_LIST_HEAD(tv2.vec + i); |
} |
for (i = 0; i < TVR_SIZE; i++) |
INIT_LIST_HEAD(tv1.vec + i); |
} |
|
static unsigned long timer_jiffies; |
|
static inline void internal_add_timer(struct timer_list *timer) |
{ |
/* |
* must be cli-ed when calling this |
*/ |
unsigned long expires = timer->expires; |
unsigned long idx = expires - timer_jiffies; |
struct list_head * vec; |
|
if (run_timer_list_running) |
vec = run_timer_list_running; |
else if (idx < TVR_SIZE) { |
int i = expires & TVR_MASK; |
vec = tv1.vec + i; |
} else if (idx < 1 << (TVR_BITS + TVN_BITS)) { |
int i = (expires >> TVR_BITS) & TVN_MASK; |
vec = tv2.vec + i; |
} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { |
int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; |
vec = tv3.vec + i; |
} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { |
int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; |
vec = tv4.vec + i; |
} else if ((signed long) idx < 0) { |
/* can happen if you add a timer with expires == jiffies, |
* or you set a timer to go off in the past |
*/ |
vec = tv1.vec + tv1.index; |
} else if (idx <= 0xffffffffUL) { |
int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; |
vec = tv5.vec + i; |
} else { |
/* Can only get here on architectures with 64-bit jiffies */ |
INIT_LIST_HEAD(&timer->list); |
return; |
} |
/* |
* Timers are FIFO! |
*/ |
list_add(&timer->list, vec->prev); |
} |
|
/* Initialize both explicitly - let's try to have them in the same cache line */ |
spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED; |
|
#ifdef CONFIG_SMP |
volatile struct timer_list * volatile running_timer; |
#define timer_enter(t) do { running_timer = t; mb(); } while (0) |
#define timer_exit() do { running_timer = NULL; } while (0) |
#define timer_is_running(t) (running_timer == t) |
#define timer_synchronize(t) while (timer_is_running(t)) barrier() |
#else |
#define timer_enter(t) do { } while (0) |
#define timer_exit() do { } while (0) |
#endif |
|
void add_timer(struct timer_list *timer) |
{ |
unsigned long flags; |
|
spin_lock_irqsave(&timerlist_lock, flags); |
if (timer_pending(timer)) |
goto bug; |
internal_add_timer(timer); |
spin_unlock_irqrestore(&timerlist_lock, flags); |
return; |
bug: |
spin_unlock_irqrestore(&timerlist_lock, flags); |
printk("bug: kernel timer added twice at %p.\n", |
__builtin_return_address(0)); |
} |
|
static inline int detach_timer (struct timer_list *timer) |
{ |
if (!timer_pending(timer)) |
return 0; |
list_del(&timer->list); |
return 1; |
} |
|
int mod_timer(struct timer_list *timer, unsigned long expires) |
{ |
int ret; |
unsigned long flags; |
|
spin_lock_irqsave(&timerlist_lock, flags); |
timer->expires = expires; |
ret = detach_timer(timer); |
internal_add_timer(timer); |
spin_unlock_irqrestore(&timerlist_lock, flags); |
return ret; |
} |
|
int del_timer(struct timer_list * timer) |
{ |
int ret; |
unsigned long flags; |
|
spin_lock_irqsave(&timerlist_lock, flags); |
ret = detach_timer(timer); |
timer->list.next = timer->list.prev = NULL; |
spin_unlock_irqrestore(&timerlist_lock, flags); |
return ret; |
} |
|
#ifdef CONFIG_SMP |
void sync_timers(void) |
{ |
spin_unlock_wait(&global_bh_lock); |
} |
|
/* |
* SMP specific function to delete periodic timer. |
* Caller must disable by some means restarting the timer |
* for new. Upon exit the timer is not queued and handler is not running |
* on any CPU. It returns number of times, which timer was deleted |
* (for reference counting). |
*/ |
|
int del_timer_sync(struct timer_list * timer) |
{ |
int ret = 0; |
|
for (;;) { |
unsigned long flags; |
int running; |
|
spin_lock_irqsave(&timerlist_lock, flags); |
ret += detach_timer(timer); |
timer->list.next = timer->list.prev = 0; |
running = timer_is_running(timer); |
spin_unlock_irqrestore(&timerlist_lock, flags); |
|
if (!running) |
break; |
|
timer_synchronize(timer); |
} |
|
return ret; |
} |
#endif |
|
|
static inline void cascade_timers(struct timer_vec *tv) |
{ |
/* cascade all the timers from tv up one level */ |
struct list_head *head, *curr, *next; |
|
head = tv->vec + tv->index; |
curr = head->next; |
/* |
* We are removing _all_ timers from the list, so we don't have to |
* detach them individually, just clear the list afterwards. |
*/ |
while (curr != head) { |
struct timer_list *tmp; |
|
tmp = list_entry(curr, struct timer_list, list); |
next = curr->next; |
list_del(curr); // not needed |
internal_add_timer(tmp); |
curr = next; |
} |
INIT_LIST_HEAD(head); |
tv->index = (tv->index + 1) & TVN_MASK; |
} |
|
static inline void run_timer_list(void) |
{ |
spin_lock_irq(&timerlist_lock); |
while ((long)(jiffies - timer_jiffies) >= 0) { |
LIST_HEAD(queued); |
struct list_head *head, *curr; |
if (!tv1.index) { |
int n = 1; |
do { |
cascade_timers(tvecs[n]); |
} while (tvecs[n]->index == 1 && ++n < NOOF_TVECS); |
} |
run_timer_list_running = &queued; |
repeat: |
head = tv1.vec + tv1.index; |
curr = head->next; |
if (curr != head) { |
struct timer_list *timer; |
void (*fn)(unsigned long); |
unsigned long data; |
|
timer = list_entry(curr, struct timer_list, list); |
fn = timer->function; |
data= timer->data; |
|
detach_timer(timer); |
timer->list.next = timer->list.prev = NULL; |
timer_enter(timer); |
spin_unlock_irq(&timerlist_lock); |
fn(data); |
spin_lock_irq(&timerlist_lock); |
timer_exit(); |
goto repeat; |
} |
run_timer_list_running = NULL; |
++timer_jiffies; |
tv1.index = (tv1.index + 1) & TVR_MASK; |
|
curr = queued.next; |
while (curr != &queued) { |
struct timer_list *timer; |
|
timer = list_entry(curr, struct timer_list, list); |
curr = curr->next; |
internal_add_timer(timer); |
} |
} |
spin_unlock_irq(&timerlist_lock); |
} |
|
spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED; |
|
void tqueue_bh(void) |
{ |
run_task_queue(&tq_timer); |
} |
|
void immediate_bh(void) |
{ |
run_task_queue(&tq_immediate); |
} |
|
/* |
* this routine handles the overflow of the microsecond field |
* |
* The tricky bits of code to handle the accurate clock support |
* were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. |
* They were originally developed for SUN and DEC kernels. |
* All the kudos should go to Dave for this stuff. |
* |
*/ |
static void second_overflow(void) |
{ |
long ltemp; |
|
/* Bump the maxerror field */ |
time_maxerror += time_tolerance >> SHIFT_USEC; |
if ( time_maxerror > NTP_PHASE_LIMIT ) { |
time_maxerror = NTP_PHASE_LIMIT; |
time_status |= STA_UNSYNC; |
} |
|
/* |
* Leap second processing. If in leap-insert state at |
* the end of the day, the system clock is set back one |
* second; if in leap-delete state, the system clock is |
* set ahead one second. The microtime() routine or |
* external clock driver will insure that reported time |
* is always monotonic. The ugly divides should be |
* replaced. |
*/ |
switch (time_state) { |
|
case TIME_OK: |
if (time_status & STA_INS) |
time_state = TIME_INS; |
else if (time_status & STA_DEL) |
time_state = TIME_DEL; |
break; |
|
case TIME_INS: |
if (xtime.tv_sec % 86400 == 0) { |
xtime.tv_sec--; |
time_state = TIME_OOP; |
printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n"); |
} |
break; |
|
case TIME_DEL: |
if ((xtime.tv_sec + 1) % 86400 == 0) { |
xtime.tv_sec++; |
time_state = TIME_WAIT; |
printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n"); |
} |
break; |
|
case TIME_OOP: |
time_state = TIME_WAIT; |
break; |
|
case TIME_WAIT: |
if (!(time_status & (STA_INS | STA_DEL))) |
time_state = TIME_OK; |
} |
|
/* |
* Compute the phase adjustment for the next second. In |
* PLL mode, the offset is reduced by a fixed factor |
* times the time constant. In FLL mode the offset is |
* used directly. In either mode, the maximum phase |
* adjustment for each second is clamped so as to spread |
* the adjustment over not more than the number of |
* seconds between updates. |
*/ |
if (time_offset < 0) { |
ltemp = -time_offset; |
if (!(time_status & STA_FLL)) |
ltemp >>= SHIFT_KG + time_constant; |
if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) |
ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; |
time_offset += ltemp; |
time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); |
} else { |
ltemp = time_offset; |
if (!(time_status & STA_FLL)) |
ltemp >>= SHIFT_KG + time_constant; |
if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) |
ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; |
time_offset -= ltemp; |
time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); |
} |
|
/* |
* Compute the frequency estimate and additional phase |
* adjustment due to frequency error for the next |
* second. When the PPS signal is engaged, gnaw on the |
* watchdog counter and update the frequency computed by |
* the pll and the PPS signal. |
*/ |
pps_valid++; |
if (pps_valid == PPS_VALID) { /* PPS signal lost */ |
pps_jitter = MAXTIME; |
pps_stabil = MAXFREQ; |
time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER | |
STA_PPSWANDER | STA_PPSERROR); |
} |
ltemp = time_freq + pps_freq; |
if (ltemp < 0) |
time_adj -= -ltemp >> |
(SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); |
else |
time_adj += ltemp >> |
(SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); |
|
#if HZ == 100 |
/* Compensate for (HZ==100) != (1 << SHIFT_HZ). |
* Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14) |
*/ |
if (time_adj < 0) |
time_adj -= (-time_adj >> 2) + (-time_adj >> 5); |
else |
time_adj += (time_adj >> 2) + (time_adj >> 5); |
#endif |
} |
|
/* in the NTP reference this is called "hardclock()" */ |
static void update_wall_time_one_tick(void) |
{ |
if ( (time_adjust_step = time_adjust) != 0 ) { |
/* We are doing an adjtime thing. |
* |
* Prepare time_adjust_step to be within bounds. |
* Note that a positive time_adjust means we want the clock |
* to run faster. |
* |
* Limit the amount of the step to be in the range |
* -tickadj .. +tickadj |
*/ |
if (time_adjust > tickadj) |
time_adjust_step = tickadj; |
else if (time_adjust < -tickadj) |
time_adjust_step = -tickadj; |
|
/* Reduce by this step the amount of time left */ |
time_adjust -= time_adjust_step; |
} |
xtime.tv_usec += tick + time_adjust_step; |
/* |
* Advance the phase, once it gets to one microsecond, then |
* advance the tick more. |
*/ |
time_phase += time_adj; |
if (time_phase <= -FINEUSEC) { |
long ltemp = -time_phase >> SHIFT_SCALE; |
time_phase += ltemp << SHIFT_SCALE; |
xtime.tv_usec -= ltemp; |
} |
else if (time_phase >= FINEUSEC) { |
long ltemp = time_phase >> SHIFT_SCALE; |
time_phase -= ltemp << SHIFT_SCALE; |
xtime.tv_usec += ltemp; |
} |
} |
|
/* |
* Using a loop looks inefficient, but "ticks" is |
* usually just one (we shouldn't be losing ticks, |
* we're doing this this way mainly for interrupt |
* latency reasons, not because we think we'll |
* have lots of lost timer ticks |
*/ |
static void update_wall_time(unsigned long ticks) |
{ |
do { |
ticks--; |
update_wall_time_one_tick(); |
} while (ticks); |
|
if (xtime.tv_usec >= 1000000) { |
xtime.tv_usec -= 1000000; |
xtime.tv_sec++; |
second_overflow(); |
} |
} |
|
static inline void do_process_times(struct task_struct *p, |
unsigned long user, unsigned long system) |
{ |
unsigned long psecs; |
|
psecs = (p->times.tms_utime += user); |
psecs += (p->times.tms_stime += system); |
if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) { |
/* Send SIGXCPU every second.. */ |
if (!(psecs % HZ)) |
send_sig(SIGXCPU, p, 1); |
/* and SIGKILL when we go over max.. */ |
if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max) |
send_sig(SIGKILL, p, 1); |
} |
} |
|
static inline void do_it_virt(struct task_struct * p, unsigned long ticks) |
{ |
unsigned long it_virt = p->it_virt_value; |
|
if (it_virt) { |
it_virt -= ticks; |
if (!it_virt) { |
it_virt = p->it_virt_incr; |
send_sig(SIGVTALRM, p, 1); |
} |
p->it_virt_value = it_virt; |
} |
} |
|
static inline void do_it_prof(struct task_struct *p) |
{ |
unsigned long it_prof = p->it_prof_value; |
|
if (it_prof) { |
if (--it_prof == 0) { |
it_prof = p->it_prof_incr; |
send_sig(SIGPROF, p, 1); |
} |
p->it_prof_value = it_prof; |
} |
} |
|
void update_one_process(struct task_struct *p, unsigned long user, |
unsigned long system, int cpu) |
{ |
p->per_cpu_utime[cpu] += user; |
p->per_cpu_stime[cpu] += system; |
do_process_times(p, user, system); |
do_it_virt(p, user); |
do_it_prof(p); |
} |
|
/* |
* Called from the timer interrupt handler to charge one tick to the current |
* process. user_tick is 1 if the tick is user time, 0 for system. |
*/ |
void update_process_times(int user_tick) |
{ |
struct task_struct *p = current; |
int cpu = smp_processor_id(), system = user_tick ^ 1; |
|
update_one_process(p, user_tick, system, cpu); |
if (p->pid) { |
if (--p->counter <= 0) { |
p->counter = 0; |
/* |
* SCHED_FIFO is priority preemption, so this is |
* not the place to decide whether to reschedule a |
* SCHED_FIFO task or not - Bhavesh Davda |
*/ |
if (p->policy != SCHED_FIFO) { |
p->need_resched = 1; |
} |
} |
if (p->nice > 0) |
kstat.per_cpu_nice[cpu] += user_tick; |
else |
kstat.per_cpu_user[cpu] += user_tick; |
kstat.per_cpu_system[cpu] += system; |
} else if (local_bh_count(cpu) || local_irq_count(cpu) > 1) |
kstat.per_cpu_system[cpu] += system; |
} |
|
/* |
* Nr of active tasks - counted in fixed-point numbers |
*/ |
static unsigned long count_active_tasks(void) |
{ |
struct task_struct *p; |
unsigned long nr = 0; |
|
read_lock(&tasklist_lock); |
for_each_task(p) { |
if ((p->state == TASK_RUNNING || |
(p->state & TASK_UNINTERRUPTIBLE))) |
nr += FIXED_1; |
} |
read_unlock(&tasklist_lock); |
return nr; |
} |
|
/* |
* Hmm.. Changed this, as the GNU make sources (load.c) seems to |
* imply that avenrun[] is the standard name for this kind of thing. |
* Nothing else seems to be standardized: the fractional size etc |
* all seem to differ on different machines. |
*/ |
unsigned long avenrun[3]; |
|
static inline void calc_load(unsigned long ticks) |
{ |
unsigned long active_tasks; /* fixed-point */ |
static int count = LOAD_FREQ; |
|
count -= ticks; |
if (count < 0) { |
count += LOAD_FREQ; |
active_tasks = count_active_tasks(); |
CALC_LOAD(avenrun[0], EXP_1, active_tasks); |
CALC_LOAD(avenrun[1], EXP_5, active_tasks); |
CALC_LOAD(avenrun[2], EXP_15, active_tasks); |
} |
} |
|
/* jiffies at the most recent update of wall time */ |
unsigned long wall_jiffies; |
|
/* |
* This spinlock protect us from races in SMP while playing with xtime. -arca |
*/ |
rwlock_t xtime_lock = RW_LOCK_UNLOCKED; |
|
static inline void update_times(void) |
{ |
unsigned long ticks; |
|
/* |
* update_times() is run from the raw timer_bh handler so we |
* just know that the irqs are locally enabled and so we don't |
* need to save/restore the flags of the local CPU here. -arca |
*/ |
write_lock_irq(&xtime_lock); |
vxtime_lock(); |
|
ticks = jiffies - wall_jiffies; |
if (ticks) { |
wall_jiffies += ticks; |
update_wall_time(ticks); |
} |
vxtime_unlock(); |
write_unlock_irq(&xtime_lock); |
calc_load(ticks); |
} |
|
void timer_bh(void) |
{ |
update_times(); |
run_timer_list(); |
} |
|
void do_timer(struct pt_regs *regs) |
{ |
(*(unsigned long *)&jiffies)++; |
#ifndef CONFIG_SMP |
/* SMP process accounting uses the local APIC timer */ |
|
update_process_times(user_mode(regs)); |
#endif |
mark_bh(TIMER_BH); |
if (TQ_ACTIVE(tq_timer)) |
mark_bh(TQUEUE_BH); |
} |
|
#if !defined(__alpha__) && !defined(__ia64__) |
|
/* |
* For backwards compatibility? This can be done in libc so Alpha |
* and all newer ports shouldn't need it. |
*/ |
asmlinkage unsigned long sys_alarm(unsigned int seconds) |
{ |
struct itimerval it_new, it_old; |
unsigned int oldalarm; |
|
it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0; |
it_new.it_value.tv_sec = seconds; |
it_new.it_value.tv_usec = 0; |
do_setitimer(ITIMER_REAL, &it_new, &it_old); |
oldalarm = it_old.it_value.tv_sec; |
/* ehhh.. We can't return 0 if we have an alarm pending.. */ |
/* And we'd better return too much than too little anyway */ |
if (it_old.it_value.tv_usec) |
oldalarm++; |
return oldalarm; |
} |
|
#endif |
|
#ifndef __alpha__ |
|
/* |
* The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this |
* should be moved into arch/i386 instead? |
*/ |
|
/** |
* sys_getpid - return the thread group id of the current process |
* |
* Note, despite the name, this returns the tgid not the pid. The tgid and |
* the pid are identical unless CLONE_THREAD was specified on clone() in |
* which case the tgid is the same in all threads of the same group. |
* |
* This is SMP safe as current->tgid does not change. |
*/ |
asmlinkage long sys_getpid(void) |
{ |
return current->tgid; |
} |
|
/* |
* This is not strictly SMP safe: p_opptr could change |
* from under us. However, rather than getting any lock |
* we can use an optimistic algorithm: get the parent |
* pid, and go back and check that the parent is still |
* the same. If it has changed (which is extremely unlikely |
* indeed), we just try again.. |
* |
* NOTE! This depends on the fact that even if we _do_ |
* get an old value of "parent", we can happily dereference |
* the pointer: we just can't necessarily trust the result |
* until we know that the parent pointer is valid. |
* |
* The "mb()" macro is a memory barrier - a synchronizing |
* event. It also makes sure that gcc doesn't optimize |
* away the necessary memory references.. The barrier doesn't |
* have to have all that strong semantics: on x86 we don't |
* really require a synchronizing instruction, for example. |
* The barrier is more important for code generation than |
* for any real memory ordering semantics (even if there is |
* a small window for a race, using the old pointer is |
* harmless for a while). |
*/ |
asmlinkage long sys_getppid(void) |
{ |
int pid; |
struct task_struct * me = current; |
struct task_struct * parent; |
|
parent = me->p_opptr; |
for (;;) { |
pid = parent->pid; |
#if CONFIG_SMP |
{ |
struct task_struct *old = parent; |
mb(); |
parent = me->p_opptr; |
if (old != parent) |
continue; |
} |
#endif |
break; |
} |
return pid; |
} |
|
asmlinkage long sys_getuid(void) |
{ |
/* Only we change this so SMP safe */ |
return current->uid; |
} |
|
asmlinkage long sys_geteuid(void) |
{ |
/* Only we change this so SMP safe */ |
return current->euid; |
} |
|
asmlinkage long sys_getgid(void) |
{ |
/* Only we change this so SMP safe */ |
return current->gid; |
} |
|
asmlinkage long sys_getegid(void) |
{ |
/* Only we change this so SMP safe */ |
return current->egid; |
} |
|
#endif |
|
/* Thread ID - the internal kernel "pid" */ |
asmlinkage long sys_gettid(void) |
{ |
return current->pid; |
} |
|
asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp) |
{ |
struct timespec t; |
unsigned long expire; |
|
if(copy_from_user(&t, rqtp, sizeof(struct timespec))) |
return -EFAULT; |
|
if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0) |
return -EINVAL; |
|
|
if (t.tv_sec == 0 && t.tv_nsec <= 2000000L && |
current->policy != SCHED_OTHER) |
{ |
/* |
* Short delay requests up to 2 ms will be handled with |
* high precision by a busy wait for all real-time processes. |
* |
* Its important on SMP not to do this holding locks. |
*/ |
udelay((t.tv_nsec + 999) / 1000); |
return 0; |
} |
|
expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); |
|
current->state = TASK_INTERRUPTIBLE; |
expire = schedule_timeout(expire); |
|
if (expire) { |
if (rmtp) { |
jiffies_to_timespec(expire, &t); |
if (copy_to_user(rmtp, &t, sizeof(struct timespec))) |
return -EFAULT; |
} |
return -EINTR; |
} |
return 0; |
} |
|
/sysctl.c
0,0 → 1,1524
/* |
* sysctl.c: General linux system control interface |
* |
* Begun 24 March 1995, Stephen Tweedie |
* Added /proc support, Dec 1995 |
* Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas. |
* Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver. |
* Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver. |
* Dynamic registration fixes, Stephen Tweedie. |
* Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn. |
* Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris |
* Horn. |
* Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer. |
* Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer. |
* Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill |
* Wendling. |
* The list_for_each() macro wasn't appropriate for the sysctl loop. |
* Removed it and replaced it with older style, 03/23/00, Bill Wendling |
*/ |
|
#include <linux/config.h> |
#include <linux/slab.h> |
#include <linux/sysctl.h> |
#include <linux/swapctl.h> |
#include <linux/proc_fs.h> |
#include <linux/ctype.h> |
#include <linux/utsname.h> |
#include <linux/capability.h> |
#include <linux/smp_lock.h> |
#include <linux/init.h> |
#include <linux/sysrq.h> |
#include <linux/highuid.h> |
#include <linux/swap.h> |
|
#include <asm/uaccess.h> |
|
#ifdef CONFIG_ROOT_NFS |
#include <linux/nfs_fs.h> |
#endif |
|
#if defined(CONFIG_SYSCTL) |
|
/* External variables not in a header file. */ |
extern int panic_timeout; |
extern int C_A_D; |
extern int bdf_prm[], bdflush_min[], bdflush_max[]; |
extern int sysctl_overcommit_memory; |
extern int max_threads; |
extern atomic_t nr_queued_signals; |
extern int max_queued_signals; |
extern int sysrq_enabled; |
extern int core_uses_pid; |
extern int core_setuid_ok; |
extern char core_pattern[]; |
extern int cad_pid; |
extern int laptop_mode; |
extern int block_dump; |
|
/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ |
static int maxolduid = 65535; |
static int minolduid; |
|
#ifdef CONFIG_KMOD |
extern char modprobe_path[]; |
#endif |
#ifdef CONFIG_HOTPLUG |
extern char hotplug_path[]; |
#endif |
#ifdef CONFIG_CHR_DEV_SG |
extern int sg_big_buff; |
#endif |
#ifdef CONFIG_SYSVIPC |
extern size_t shm_ctlmax; |
extern size_t shm_ctlall; |
extern int shm_ctlmni; |
extern int msg_ctlmax; |
extern int msg_ctlmnb; |
extern int msg_ctlmni; |
extern int sem_ctls[]; |
#endif |
|
extern int exception_trace; |
|
#ifdef __sparc__ |
extern char reboot_command []; |
extern int stop_a_enabled; |
#endif |
|
#ifdef CONFIG_ARCH_S390 |
#ifdef CONFIG_MATHEMU |
extern int sysctl_ieee_emulation_warnings; |
#endif |
extern int sysctl_userprocess_debug; |
#endif |
|
#ifdef CONFIG_PPC32 |
extern unsigned long zero_paged_on, powersave_nap; |
int proc_dol2crvec(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp); |
int proc_dol3crvec(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp); |
#endif |
|
#ifdef CONFIG_BSD_PROCESS_ACCT |
extern int acct_parm[]; |
#endif |
|
extern int pgt_cache_water[]; |
|
static int parse_table(int *, int, void *, size_t *, void *, size_t, |
ctl_table *, void **); |
static int proc_doutsstring(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp); |
|
static ctl_table root_table[]; |
static struct ctl_table_header root_table_header = |
{ root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) }; |
|
static ctl_table kern_table[]; |
static ctl_table vm_table[]; |
#ifdef CONFIG_NET |
extern ctl_table net_table[]; |
#endif |
static ctl_table proc_table[]; |
static ctl_table fs_table[]; |
static ctl_table debug_table[]; |
static ctl_table dev_table[]; |
extern ctl_table random_table[]; |
|
/* /proc declarations: */ |
|
#ifdef CONFIG_PROC_FS |
|
static ssize_t proc_readsys(struct file *, char *, size_t, loff_t *); |
static ssize_t proc_writesys(struct file *, const char *, size_t, loff_t *); |
static int proc_sys_permission(struct inode *, int); |
|
struct file_operations proc_sys_file_operations = { |
read: proc_readsys, |
write: proc_writesys, |
}; |
|
static struct inode_operations proc_sys_inode_operations = { |
permission: proc_sys_permission, |
}; |
|
extern struct proc_dir_entry *proc_sys_root; |
|
static void register_proc_table(ctl_table *, struct proc_dir_entry *); |
static void unregister_proc_table(ctl_table *, struct proc_dir_entry *); |
#endif |
|
/* The default sysctl tables: */ |
|
static ctl_table root_table[] = { |
{CTL_KERN, "kernel", NULL, 0, 0555, kern_table}, |
{CTL_VM, "vm", NULL, 0, 0555, vm_table}, |
#ifdef CONFIG_NET |
{CTL_NET, "net", NULL, 0, 0555, net_table}, |
#endif |
{CTL_PROC, "proc", NULL, 0, 0555, proc_table}, |
{CTL_FS, "fs", NULL, 0, 0555, fs_table}, |
{CTL_DEBUG, "debug", NULL, 0, 0555, debug_table}, |
{CTL_DEV, "dev", NULL, 0, 0555, dev_table}, |
{0} |
}; |
|
static ctl_table kern_table[] = { |
{KERN_OSTYPE, "ostype", system_utsname.sysname, 64, |
0444, NULL, &proc_doutsstring, &sysctl_string}, |
{KERN_OSRELEASE, "osrelease", system_utsname.release, 64, |
0444, NULL, &proc_doutsstring, &sysctl_string}, |
{KERN_VERSION, "version", system_utsname.version, 64, |
0444, NULL, &proc_doutsstring, &sysctl_string}, |
{KERN_NODENAME, "hostname", system_utsname.nodename, 64, |
0644, NULL, &proc_doutsstring, &sysctl_string}, |
{KERN_DOMAINNAME, "domainname", system_utsname.domainname, 64, |
0644, NULL, &proc_doutsstring, &sysctl_string}, |
{KERN_PANIC, "panic", &panic_timeout, sizeof(int), |
0644, NULL, &proc_dointvec}, |
{KERN_CORE_USES_PID, "core_uses_pid", &core_uses_pid, sizeof(int), |
0644, NULL, &proc_dointvec}, |
{KERN_CORE_SETUID, "core_setuid_ok", &core_setuid_ok, sizeof(int), |
0644, NULL, &proc_dointvec}, |
{KERN_CORE_PATTERN, "core_pattern", core_pattern, 64, |
0644, NULL, &proc_dostring, &sysctl_string}, |
{KERN_TAINTED, "tainted", &tainted, sizeof(int), |
0644, NULL, &proc_dointvec}, |
{KERN_CAP_BSET, "cap-bound", &cap_bset, sizeof(kernel_cap_t), |
0600, NULL, &proc_dointvec_bset}, |
#ifdef CONFIG_BLK_DEV_INITRD |
{KERN_REALROOTDEV, "real-root-dev", &real_root_dev, sizeof(int), |
0644, NULL, &proc_dointvec}, |
#endif |
#ifdef __sparc__ |
{KERN_SPARC_REBOOT, "reboot-cmd", reboot_command, |
256, 0644, NULL, &proc_dostring, &sysctl_string }, |
{KERN_SPARC_STOP_A, "stop-a", &stop_a_enabled, sizeof (int), |
0644, NULL, &proc_dointvec}, |
#endif |
#ifdef CONFIG_PPC32 |
{KERN_PPC_ZEROPAGED, "zero-paged", &zero_paged_on, sizeof(int), |
0644, NULL, &proc_dointvec}, |
{KERN_PPC_POWERSAVE_NAP, "powersave-nap", &powersave_nap, sizeof(int), |
0644, NULL, &proc_dointvec}, |
{KERN_PPC_L2CR, "l2cr", NULL, 0, |
0644, NULL, &proc_dol2crvec}, |
{KERN_PPC_L3CR, "l3cr", NULL, 0, |
0644, NULL, &proc_dol3crvec}, |
#endif |
{KERN_CTLALTDEL, "ctrl-alt-del", &C_A_D, sizeof(int), |
0644, NULL, &proc_dointvec}, |
{KERN_PRINTK, "printk", &console_loglevel, 4*sizeof(int), |
0644, NULL, &proc_dointvec}, |
#ifdef CONFIG_KMOD |
{KERN_MODPROBE, "modprobe", &modprobe_path, 256, |
0644, NULL, &proc_dostring, &sysctl_string }, |
#endif |
#ifdef CONFIG_HOTPLUG |
{KERN_HOTPLUG, "hotplug", &hotplug_path, 256, |
0644, NULL, &proc_dostring, &sysctl_string }, |
#endif |
#ifdef CONFIG_CHR_DEV_SG |
{KERN_SG_BIG_BUFF, "sg-big-buff", &sg_big_buff, sizeof (int), |
0444, NULL, &proc_dointvec}, |
#endif |
#ifdef CONFIG_BSD_PROCESS_ACCT |
{KERN_ACCT, "acct", &acct_parm, 3*sizeof(int), |
0644, NULL, &proc_dointvec}, |
#endif |
{KERN_RTSIGNR, "rtsig-nr", &nr_queued_signals, sizeof(int), |
0444, NULL, &proc_dointvec}, |
{KERN_RTSIGMAX, "rtsig-max", &max_queued_signals, sizeof(int), |
0644, NULL, &proc_dointvec}, |
#ifdef CONFIG_SYSVIPC |
{KERN_SHMMAX, "shmmax", &shm_ctlmax, sizeof (size_t), |
0644, NULL, &proc_doulongvec_minmax}, |
{KERN_SHMALL, "shmall", &shm_ctlall, sizeof (size_t), |
0644, NULL, &proc_doulongvec_minmax}, |
{KERN_SHMMNI, "shmmni", &shm_ctlmni, sizeof (int), |
0644, NULL, &proc_dointvec}, |
{KERN_MSGMAX, "msgmax", &msg_ctlmax, sizeof (int), |
0644, NULL, &proc_dointvec}, |
{KERN_MSGMNI, "msgmni", &msg_ctlmni, sizeof (int), |
0644, NULL, &proc_dointvec}, |
{KERN_MSGMNB, "msgmnb", &msg_ctlmnb, sizeof (int), |
0644, NULL, &proc_dointvec}, |
{KERN_SEM, "sem", &sem_ctls, 4*sizeof (int), |
0644, NULL, &proc_dointvec}, |
#endif |
#ifdef CONFIG_MAGIC_SYSRQ |
{KERN_SYSRQ, "sysrq", &sysrq_enabled, sizeof (int), |
0644, NULL, &proc_dointvec}, |
#endif |
{KERN_CADPID, "cad_pid", &cad_pid, sizeof (int), |
0600, NULL, &proc_dointvec}, |
{KERN_MAX_THREADS, "threads-max", &max_threads, sizeof(int), |
0644, NULL, &proc_dointvec}, |
{KERN_RANDOM, "random", NULL, 0, 0555, random_table}, |
{KERN_OVERFLOWUID, "overflowuid", &overflowuid, sizeof(int), 0644, NULL, |
&proc_dointvec_minmax, &sysctl_intvec, NULL, |
&minolduid, &maxolduid}, |
{KERN_OVERFLOWGID, "overflowgid", &overflowgid, sizeof(int), 0644, NULL, |
&proc_dointvec_minmax, &sysctl_intvec, NULL, |
&minolduid, &maxolduid}, |
#ifdef CONFIG_ARCH_S390 |
#ifdef CONFIG_MATHEMU |
{KERN_IEEE_EMULATION_WARNINGS,"ieee_emulation_warnings", |
&sysctl_ieee_emulation_warnings,sizeof(int),0644,NULL,&proc_dointvec}, |
#endif |
{KERN_S390_USER_DEBUG_LOGGING,"userprocess_debug", |
&sysctl_userprocess_debug,sizeof(int),0644,NULL,&proc_dointvec}, |
#endif |
#ifdef __x86_64__ |
{KERN_EXCEPTION_TRACE,"exception-trace", |
&exception_trace,sizeof(int),0644,NULL,&proc_dointvec}, |
#endif |
{0} |
}; |
|
static ctl_table vm_table[] = { |
{VM_GFP_DEBUG, "vm_gfp_debug", |
&vm_gfp_debug, sizeof(int), 0644, NULL, &proc_dointvec}, |
{VM_VFS_SCAN_RATIO, "vm_vfs_scan_ratio", |
&vm_vfs_scan_ratio, sizeof(int), 0644, NULL, &proc_dointvec}, |
{VM_CACHE_SCAN_RATIO, "vm_cache_scan_ratio", |
&vm_cache_scan_ratio, sizeof(int), 0644, NULL, &proc_dointvec}, |
{VM_MAPPED_RATIO, "vm_mapped_ratio", |
&vm_mapped_ratio, sizeof(int), 0644, NULL, &proc_dointvec}, |
{VM_LRU_BALANCE_RATIO, "vm_lru_balance_ratio", |
&vm_lru_balance_ratio, sizeof(int), 0644, NULL, &proc_dointvec}, |
{VM_PASSES, "vm_passes", |
&vm_passes, sizeof(int), 0644, NULL, &proc_dointvec}, |
{VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0644, NULL, |
&proc_dointvec_minmax, &sysctl_intvec, NULL, |
&bdflush_min, &bdflush_max}, |
{VM_OVERCOMMIT_MEMORY, "overcommit_memory", &sysctl_overcommit_memory, |
sizeof(sysctl_overcommit_memory), 0644, NULL, &proc_dointvec}, |
{VM_PAGERDAEMON, "kswapd", |
&pager_daemon, sizeof(pager_daemon_t), 0644, NULL, &proc_dointvec}, |
{VM_PGT_CACHE, "pagetable_cache", |
&pgt_cache_water, 2*sizeof(int), 0644, NULL, &proc_dointvec}, |
{VM_PAGE_CLUSTER, "page-cluster", |
&page_cluster, sizeof(int), 0644, NULL, &proc_dointvec}, |
{VM_MIN_READAHEAD, "min-readahead", |
&vm_min_readahead,sizeof(int), 0644, NULL, &proc_dointvec}, |
{VM_MAX_READAHEAD, "max-readahead", |
&vm_max_readahead,sizeof(int), 0644, NULL, &proc_dointvec}, |
{VM_MAX_MAP_COUNT, "max_map_count", |
&max_map_count, sizeof(int), 0644, NULL, &proc_dointvec}, |
{VM_LAPTOP_MODE, "laptop_mode", |
&laptop_mode, sizeof(int), 0644, NULL, &proc_dointvec}, |
{VM_BLOCK_DUMP, "block_dump", |
&block_dump, sizeof(int), 0644, NULL, &proc_dointvec}, |
{0} |
}; |
|
static ctl_table proc_table[] = { |
{0} |
}; |
|
static ctl_table fs_table[] = { |
{FS_NRINODE, "inode-nr", &inodes_stat, 2*sizeof(int), |
0444, NULL, &proc_dointvec}, |
{FS_STATINODE, "inode-state", &inodes_stat, 7*sizeof(int), |
0444, NULL, &proc_dointvec}, |
{FS_NRFILE, "file-nr", &files_stat, 3*sizeof(int), |
0444, NULL, &proc_dointvec}, |
{FS_MAXFILE, "file-max", &files_stat.max_files, sizeof(int), |
0644, NULL, &proc_dointvec}, |
{FS_DENTRY, "dentry-state", &dentry_stat, 6*sizeof(int), |
0444, NULL, &proc_dointvec}, |
{FS_OVERFLOWUID, "overflowuid", &fs_overflowuid, sizeof(int), 0644, NULL, |
&proc_dointvec_minmax, &sysctl_intvec, NULL, |
&minolduid, &maxolduid}, |
{FS_OVERFLOWGID, "overflowgid", &fs_overflowgid, sizeof(int), 0644, NULL, |
&proc_dointvec_minmax, &sysctl_intvec, NULL, |
&minolduid, &maxolduid}, |
{FS_LEASES, "leases-enable", &leases_enable, sizeof(int), |
0644, NULL, &proc_dointvec}, |
{FS_DIR_NOTIFY, "dir-notify-enable", &dir_notify_enable, |
sizeof(int), 0644, NULL, &proc_dointvec}, |
{FS_LEASE_TIME, "lease-break-time", &lease_break_time, sizeof(int), |
0644, NULL, &proc_dointvec}, |
{0} |
}; |
|
static ctl_table debug_table[] = { |
{0} |
}; |
|
static ctl_table dev_table[] = { |
{0} |
}; |
|
extern void init_irq_proc (void); |
|
void __init sysctl_init(void) |
{ |
#ifdef CONFIG_PROC_FS |
register_proc_table(root_table, proc_sys_root); |
init_irq_proc(); |
#endif |
} |
|
int do_sysctl(int *name, int nlen, void *oldval, size_t *oldlenp, |
void *newval, size_t newlen) |
{ |
struct list_head *tmp; |
|
if (nlen <= 0 || nlen >= CTL_MAXNAME) |
return -ENOTDIR; |
if (oldval) { |
int old_len; |
if (!oldlenp || get_user(old_len, oldlenp)) |
return -EFAULT; |
} |
tmp = &root_table_header.ctl_entry; |
do { |
struct ctl_table_header *head = |
list_entry(tmp, struct ctl_table_header, ctl_entry); |
void *context = NULL; |
int error = parse_table(name, nlen, oldval, oldlenp, |
newval, newlen, head->ctl_table, |
&context); |
if (context) |
kfree(context); |
if (error != -ENOTDIR) |
return error; |
tmp = tmp->next; |
} while (tmp != &root_table_header.ctl_entry); |
return -ENOTDIR; |
} |
|
extern asmlinkage long sys_sysctl(struct __sysctl_args *args) |
{ |
struct __sysctl_args tmp; |
int error; |
|
if (copy_from_user(&tmp, args, sizeof(tmp))) |
return -EFAULT; |
|
lock_kernel(); |
error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp, |
tmp.newval, tmp.newlen); |
unlock_kernel(); |
return error; |
} |
|
/* |
* ctl_perm does NOT grant the superuser all rights automatically, because |
* some sysctl variables are readonly even to root. |
*/ |
|
static int test_perm(int mode, int op) |
{ |
if (!current->euid) |
mode >>= 6; |
else if (in_egroup_p(0)) |
mode >>= 3; |
if ((mode & op & 0007) == op) |
return 0; |
return -EACCES; |
} |
|
static inline int ctl_perm(ctl_table *table, int op) |
{ |
return test_perm(table->mode, op); |
} |
|
static int parse_table(int *name, int nlen, |
void *oldval, size_t *oldlenp, |
void *newval, size_t newlen, |
ctl_table *table, void **context) |
{ |
int n; |
repeat: |
if (!nlen) |
return -ENOTDIR; |
if (get_user(n, name)) |
return -EFAULT; |
for ( ; table->ctl_name; table++) { |
if (n == table->ctl_name || table->ctl_name == CTL_ANY) { |
int error; |
if (table->child) { |
if (ctl_perm(table, 001)) |
return -EPERM; |
if (table->strategy) { |
error = table->strategy( |
table, name, nlen, |
oldval, oldlenp, |
newval, newlen, context); |
if (error) |
return error; |
} |
name++; |
nlen--; |
table = table->child; |
goto repeat; |
} |
error = do_sysctl_strategy(table, name, nlen, |
oldval, oldlenp, |
newval, newlen, context); |
return error; |
} |
} |
return -ENOTDIR; |
} |
|
/* Perform the actual read/write of a sysctl table entry. */ |
int do_sysctl_strategy (ctl_table *table, |
int *name, int nlen, |
void *oldval, size_t *oldlenp, |
void *newval, size_t newlen, void **context) |
{ |
int op = 0, rc; |
size_t len; |
|
if (oldval) |
op |= 004; |
if (newval) |
op |= 002; |
if (ctl_perm(table, op)) |
return -EPERM; |
|
if (table->strategy) { |
rc = table->strategy(table, name, nlen, oldval, oldlenp, |
newval, newlen, context); |
if (rc < 0) |
return rc; |
if (rc > 0) |
return 0; |
} |
|
/* If there is no strategy routine, or if the strategy returns |
* zero, proceed with automatic r/w */ |
if (table->data && table->maxlen) { |
if (oldval && oldlenp) { |
if (get_user(len, oldlenp)) |
return -EFAULT; |
if (len) { |
if (len > table->maxlen) |
len = table->maxlen; |
if(copy_to_user(oldval, table->data, len)) |
return -EFAULT; |
if(put_user(len, oldlenp)) |
return -EFAULT; |
} |
} |
if (newval && newlen) { |
len = newlen; |
if (len > table->maxlen) |
len = table->maxlen; |
if(copy_from_user(table->data, newval, len)) |
return -EFAULT; |
} |
} |
return 0; |
} |
|
/** |
* register_sysctl_table - register a sysctl hierarchy |
* @table: the top-level table structure |
* @insert_at_head: whether the entry should be inserted in front or at the end |
* |
* Register a sysctl table hierarchy. @table should be a filled in ctl_table |
* array. An entry with a ctl_name of 0 terminates the table. |
* |
* The members of the &ctl_table structure are used as follows: |
* |
* ctl_name - This is the numeric sysctl value used by sysctl(2). The number |
* must be unique within that level of sysctl |
* |
* procname - the name of the sysctl file under /proc/sys. Set to %NULL to not |
* enter a sysctl file |
* |
* data - a pointer to data for use by proc_handler |
* |
* maxlen - the maximum size in bytes of the data |
* |
* mode - the file permissions for the /proc/sys file, and for sysctl(2) |
* |
* child - a pointer to the child sysctl table if this entry is a directory, or |
* %NULL. |
* |
* proc_handler - the text handler routine (described below) |
* |
* strategy - the strategy routine (described below) |
* |
* de - for internal use by the sysctl routines |
* |
* extra1, extra2 - extra pointers usable by the proc handler routines |
* |
* Leaf nodes in the sysctl tree will be represented by a single file |
* under /proc; non-leaf nodes will be represented by directories. |
* |
* sysctl(2) can automatically manage read and write requests through |
* the sysctl table. The data and maxlen fields of the ctl_table |
* struct enable minimal validation of the values being written to be |
* performed, and the mode field allows minimal authentication. |
* |
* More sophisticated management can be enabled by the provision of a |
* strategy routine with the table entry. This will be called before |
* any automatic read or write of the data is performed. |
* |
* The strategy routine may return |
* |
* < 0 - Error occurred (error is passed to user process) |
* |
* 0 - OK - proceed with automatic read or write. |
* |
* > 0 - OK - read or write has been done by the strategy routine, so |
* return immediately. |
* |
* There must be a proc_handler routine for any terminal nodes |
* mirrored under /proc/sys (non-terminals are handled by a built-in |
* directory handler). Several default handlers are available to |
* cover common cases - |
* |
* proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), |
* proc_dointvec_minmax(), proc_doulongvec_ms_jiffies_minmax(), |
* proc_doulongvec_minmax() |
* |
* It is the handler's job to read the input buffer from user memory |
* and process it. The handler should return 0 on success. |
* |
* This routine returns %NULL on a failure to register, and a pointer |
* to the table header on success. |
*/ |
struct ctl_table_header *register_sysctl_table(ctl_table * table, |
int insert_at_head) |
{ |
struct ctl_table_header *tmp; |
tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL); |
if (!tmp) |
return NULL; |
tmp->ctl_table = table; |
INIT_LIST_HEAD(&tmp->ctl_entry); |
if (insert_at_head) |
list_add(&tmp->ctl_entry, &root_table_header.ctl_entry); |
else |
list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); |
#ifdef CONFIG_PROC_FS |
register_proc_table(table, proc_sys_root); |
#endif |
return tmp; |
} |
|
/** |
* unregister_sysctl_table - unregister a sysctl table hierarchy |
* @header: the header returned from register_sysctl_table |
* |
* Unregisters the sysctl table and all children. proc entries may not |
* actually be removed until they are no longer used by anyone. |
*/ |
void unregister_sysctl_table(struct ctl_table_header * header) |
{ |
list_del(&header->ctl_entry); |
#ifdef CONFIG_PROC_FS |
unregister_proc_table(header->ctl_table, proc_sys_root); |
#endif |
kfree(header); |
} |
|
/* |
* /proc/sys support |
*/ |
|
#ifdef CONFIG_PROC_FS |
|
/* Scan the sysctl entries in table and add them all into /proc */ |
static void register_proc_table(ctl_table * table, struct proc_dir_entry *root) |
{ |
struct proc_dir_entry *de; |
int len; |
mode_t mode; |
|
for (; table->ctl_name; table++) { |
/* Can't do anything without a proc name. */ |
if (!table->procname) |
continue; |
/* Maybe we can't do anything with it... */ |
if (!table->proc_handler && !table->child) { |
printk(KERN_WARNING "SYSCTL: Can't register %s\n", |
table->procname); |
continue; |
} |
|
len = strlen(table->procname); |
mode = table->mode; |
|
de = NULL; |
if (table->proc_handler) |
mode |= S_IFREG; |
else { |
mode |= S_IFDIR; |
for (de = root->subdir; de; de = de->next) { |
if (proc_match(len, table->procname, de)) |
break; |
} |
/* If the subdir exists already, de is non-NULL */ |
} |
|
if (!de) { |
de = create_proc_entry(table->procname, mode, root); |
if (!de) |
continue; |
de->data = (void *) table; |
if (table->proc_handler) { |
de->proc_fops = &proc_sys_file_operations; |
de->proc_iops = &proc_sys_inode_operations; |
} |
} |
table->de = de; |
if (de->mode & S_IFDIR) |
register_proc_table(table->child, de); |
} |
} |
|
/* |
* Unregister a /proc sysctl table and any subdirectories. |
*/ |
static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root) |
{ |
struct proc_dir_entry *de; |
for (; table->ctl_name; table++) { |
if (!(de = table->de)) |
continue; |
if (de->mode & S_IFDIR) { |
if (!table->child) { |
printk (KERN_ALERT "Help - malformed sysctl tree on free\n"); |
continue; |
} |
unregister_proc_table(table->child, de); |
|
/* Don't unregister directories which still have entries.. */ |
if (de->subdir) |
continue; |
} |
|
/* Don't unregister proc entries that are still being used.. */ |
if (atomic_read(&de->count)) |
continue; |
|
table->de = NULL; |
remove_proc_entry(table->procname, root); |
} |
} |
|
static ssize_t do_rw_proc(int write, struct file * file, char * buf, |
size_t count, loff_t *ppos) |
{ |
int op; |
struct proc_dir_entry *de; |
struct ctl_table *table; |
size_t res; |
ssize_t error; |
|
de = (struct proc_dir_entry*) file->f_dentry->d_inode->u.generic_ip; |
if (!de || !de->data) |
return -ENOTDIR; |
table = (struct ctl_table *) de->data; |
if (!table || !table->proc_handler) |
return -ENOTDIR; |
op = (write ? 002 : 004); |
if (ctl_perm(table, op)) |
return -EPERM; |
|
res = count; |
|
/* |
* FIXME: we need to pass on ppos to the handler. |
*/ |
|
error = (*table->proc_handler) (table, write, file, buf, &res); |
if (error) |
return error; |
return res; |
} |
|
static ssize_t proc_readsys(struct file * file, char * buf, |
size_t count, loff_t *ppos) |
{ |
return do_rw_proc(0, file, buf, count, ppos); |
} |
|
static ssize_t proc_writesys(struct file * file, const char * buf, |
size_t count, loff_t *ppos) |
{ |
return do_rw_proc(1, file, (char *) buf, count, ppos); |
} |
|
static int proc_sys_permission(struct inode *inode, int op) |
{ |
return test_perm(inode->i_mode, op); |
} |
|
/** |
* proc_dostring - read a string sysctl |
* @table: the sysctl table |
* @write: %TRUE if this is a write to the sysctl file |
* @filp: the file structure |
* @buffer: the user buffer |
* @lenp: the size of the user buffer |
* |
* Reads/writes a string from/to the user buffer. If the kernel |
* buffer provided is not large enough to hold the string, the |
* string is truncated. The copied string is %NULL-terminated. |
* If the string is being read by the user process, it is copied |
* and a newline '\n' is added. It is truncated if the buffer is |
* not large enough. |
* |
* Returns 0 on success. |
*/ |
int proc_dostring(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
size_t len; |
char *p, c; |
|
if (!table->data || !table->maxlen || !*lenp || |
(filp->f_pos && !write)) { |
*lenp = 0; |
return 0; |
} |
|
if (write) { |
len = 0; |
p = buffer; |
while (len < *lenp) { |
if (get_user(c, p++)) |
return -EFAULT; |
if (c == 0 || c == '\n') |
break; |
len++; |
} |
if (len >= table->maxlen) |
len = table->maxlen-1; |
if(copy_from_user(table->data, buffer, len)) |
return -EFAULT; |
((char *) table->data)[len] = 0; |
filp->f_pos += *lenp; |
} else { |
len = strlen(table->data); |
if (len > table->maxlen) |
len = table->maxlen; |
if (len > *lenp) |
len = *lenp; |
if (len) |
if(copy_to_user(buffer, table->data, len)) |
return -EFAULT; |
if (len < *lenp) { |
if(put_user('\n', ((char *) buffer) + len)) |
return -EFAULT; |
len++; |
} |
*lenp = len; |
filp->f_pos += len; |
} |
return 0; |
} |
|
/* |
* Special case of dostring for the UTS structure. This has locks |
* to observe. Should this be in kernel/sys.c ???? |
*/ |
|
static int proc_doutsstring(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
int r; |
|
if (!write) { |
down_read(&uts_sem); |
r=proc_dostring(table,0,filp,buffer,lenp); |
up_read(&uts_sem); |
} else { |
down_write(&uts_sem); |
r=proc_dostring(table,1,filp,buffer,lenp); |
up_write(&uts_sem); |
} |
return r; |
} |
|
#define OP_SET 0 |
#define OP_AND 1 |
#define OP_OR 2 |
#define OP_MAX 3 |
#define OP_MIN 4 |
|
static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp, int conv, int op) |
{ |
int *i, vleft, first=1, neg, val; |
size_t left, len; |
|
#define TMPBUFLEN 20 |
char buf[TMPBUFLEN], *p; |
|
if (!table->data || !table->maxlen || !*lenp || |
(filp->f_pos && !write)) { |
*lenp = 0; |
return 0; |
} |
|
i = (int *) table->data; |
vleft = table->maxlen / sizeof(int); |
left = *lenp; |
|
for (; left && vleft--; i++, first=0) { |
if (write) { |
while (left) { |
char c; |
if (get_user(c, (char *) buffer)) |
return -EFAULT; |
if (!isspace(c)) |
break; |
left--; |
((char *) buffer)++; |
} |
if (!left) |
break; |
neg = 0; |
len = left; |
if (len > TMPBUFLEN-1) |
len = TMPBUFLEN-1; |
if(copy_from_user(buf, buffer, len)) |
return -EFAULT; |
buf[len] = 0; |
p = buf; |
if (*p == '-' && left > 1) { |
neg = 1; |
left--, p++; |
} |
if (*p < '0' || *p > '9') |
break; |
val = simple_strtoul(p, &p, 0) * conv; |
len = p-buf; |
if ((len < left) && *p && !isspace(*p)) |
break; |
if (neg) |
val = -val; |
buffer += len; |
left -= len; |
switch(op) { |
case OP_SET: *i = val; break; |
case OP_AND: *i &= val; break; |
case OP_OR: *i |= val; break; |
case OP_MAX: if(*i < val) |
*i = val; |
break; |
case OP_MIN: if(*i > val) |
*i = val; |
break; |
} |
} else { |
p = buf; |
if (!first) |
*p++ = '\t'; |
sprintf(p, "%d", (*i) / conv); |
len = strlen(buf); |
if (len > left) |
len = left; |
if(copy_to_user(buffer, buf, len)) |
return -EFAULT; |
left -= len; |
buffer += len; |
} |
} |
|
if (!write && !first && left) { |
if(put_user('\n', (char *) buffer)) |
return -EFAULT; |
left--, buffer++; |
} |
if (write) { |
p = (char *) buffer; |
while (left) { |
char c; |
if (get_user(c, p++)) |
return -EFAULT; |
if (!isspace(c)) |
break; |
left--; |
} |
} |
if (write && first) |
return -EINVAL; |
*lenp -= left; |
filp->f_pos += *lenp; |
return 0; |
} |
|
/** |
* proc_dointvec - read a vector of integers |
* @table: the sysctl table |
* @write: %TRUE if this is a write to the sysctl file |
* @filp: the file structure |
* @buffer: the user buffer |
* @lenp: the size of the user buffer |
* |
* Reads/writes up to table->maxlen/sizeof(unsigned int) integer |
* values from/to the user buffer, treated as an ASCII string. |
* |
* Returns 0 on success. |
*/ |
int proc_dointvec(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
return do_proc_dointvec(table,write,filp,buffer,lenp,1,OP_SET); |
} |
|
/* |
* init may raise the set. |
*/ |
|
int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
if (!capable(CAP_SYS_MODULE)) { |
return -EPERM; |
} |
return do_proc_dointvec(table,write,filp,buffer,lenp,1, |
(current->pid == 1) ? OP_SET : OP_AND); |
} |
|
/** |
* proc_dointvec_minmax - read a vector of integers with min/max values |
* @table: the sysctl table |
* @write: %TRUE if this is a write to the sysctl file |
* @filp: the file structure |
* @buffer: the user buffer |
* @lenp: the size of the user buffer |
* |
* Reads/writes up to table->maxlen/sizeof(unsigned int) integer |
* values from/to the user buffer, treated as an ASCII string. |
* |
* This routine will ensure the values are within the range specified by |
* table->extra1 (min) and table->extra2 (max). |
* |
* Returns 0 on success. |
*/ |
int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
int *i, *min, *max, vleft, first=1, neg, val; |
size_t len, left; |
#define TMPBUFLEN 20 |
char buf[TMPBUFLEN], *p; |
|
if (!table->data || !table->maxlen || !*lenp || |
(filp->f_pos && !write)) { |
*lenp = 0; |
return 0; |
} |
|
i = (int *) table->data; |
min = (int *) table->extra1; |
max = (int *) table->extra2; |
vleft = table->maxlen / sizeof(int); |
left = *lenp; |
|
for (; left && vleft--; i++, min++, max++, first=0) { |
if (write) { |
while (left) { |
char c; |
if (get_user(c, (char *) buffer)) |
return -EFAULT; |
if (!isspace(c)) |
break; |
left--; |
((char *) buffer)++; |
} |
if (!left) |
break; |
neg = 0; |
len = left; |
if (len > TMPBUFLEN-1) |
len = TMPBUFLEN-1; |
if(copy_from_user(buf, buffer, len)) |
return -EFAULT; |
buf[len] = 0; |
p = buf; |
if (*p == '-' && left > 1) { |
neg = 1; |
left--, p++; |
} |
if (*p < '0' || *p > '9') |
break; |
val = simple_strtoul(p, &p, 0); |
len = p-buf; |
if ((len < left) && *p && !isspace(*p)) |
break; |
if (neg) |
val = -val; |
buffer += len; |
left -= len; |
|
if ((min && val < *min) || (max && val > *max)) |
continue; |
*i = val; |
} else { |
p = buf; |
if (!first) |
*p++ = '\t'; |
sprintf(p, "%d", *i); |
len = strlen(buf); |
if (len > left) |
len = left; |
if(copy_to_user(buffer, buf, len)) |
return -EFAULT; |
left -= len; |
buffer += len; |
} |
} |
|
if (!write && !first && left) { |
if(put_user('\n', (char *) buffer)) |
return -EFAULT; |
left--, buffer++; |
} |
if (write) { |
p = (char *) buffer; |
while (left) { |
char c; |
if (get_user(c, p++)) |
return -EFAULT; |
if (!isspace(c)) |
break; |
left--; |
} |
} |
if (write && first) |
return -EINVAL; |
*lenp -= left; |
filp->f_pos += *lenp; |
return 0; |
} |
|
static int do_proc_doulongvec_minmax(ctl_table *table, int write, |
struct file *filp, |
void *buffer, size_t *lenp, |
unsigned long convmul, |
unsigned long convdiv) |
{ |
#define TMPBUFLEN 20 |
unsigned long *i, *min, *max, val; |
int vleft, first=1, neg; |
size_t len, left; |
char buf[TMPBUFLEN], *p; |
|
if (!table->data || !table->maxlen || !*lenp || |
(filp->f_pos && !write)) { |
*lenp = 0; |
return 0; |
} |
|
i = (unsigned long *) table->data; |
min = (unsigned long *) table->extra1; |
max = (unsigned long *) table->extra2; |
vleft = table->maxlen / sizeof(unsigned long); |
left = *lenp; |
|
for (; left && vleft--; i++, first=0) { |
if (write) { |
while (left) { |
char c; |
if (get_user(c, (char *) buffer)) |
return -EFAULT; |
if (!isspace(c)) |
break; |
left--; |
((char *) buffer)++; |
} |
if (!left) |
break; |
neg = 0; |
len = left; |
if (len > TMPBUFLEN-1) |
len = TMPBUFLEN-1; |
if(copy_from_user(buf, buffer, len)) |
return -EFAULT; |
buf[len] = 0; |
p = buf; |
if (*p == '-' && left > 1) { |
neg = 1; |
left--, p++; |
} |
if (*p < '0' || *p > '9') |
break; |
val = simple_strtoul(p, &p, 0) * convmul / convdiv ; |
len = p-buf; |
if ((len < left) && *p && !isspace(*p)) |
break; |
if (neg) |
val = -val; |
buffer += len; |
left -= len; |
|
if(neg) |
continue; |
if (min && val < *min++) |
continue; |
if (max && val > *max++) |
continue; |
*i = val; |
} else { |
p = buf; |
if (!first) |
*p++ = '\t'; |
sprintf(p, "%lu", convdiv * (*i) / convmul); |
len = strlen(buf); |
if (len > left) |
len = left; |
if(copy_to_user(buffer, buf, len)) |
return -EFAULT; |
left -= len; |
buffer += len; |
} |
} |
|
if (!write && !first && left) { |
if(put_user('\n', (char *) buffer)) |
return -EFAULT; |
left--, buffer++; |
} |
if (write) { |
p = (char *) buffer; |
while (left) { |
char c; |
if (get_user(c, p++)) |
return -EFAULT; |
if (!isspace(c)) |
break; |
left--; |
} |
} |
if (write && first) |
return -EINVAL; |
*lenp -= left; |
filp->f_pos += *lenp; |
return 0; |
#undef TMPBUFLEN |
} |
|
/** |
* proc_doulongvec_minmax - read a vector of long integers with min/max values |
* @table: the sysctl table |
* @write: %TRUE if this is a write to the sysctl file |
* @filp: the file structure |
* @buffer: the user buffer |
* @lenp: the size of the user buffer |
* |
* Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long |
* values from/to the user buffer, treated as an ASCII string. |
* |
* This routine will ensure the values are within the range specified by |
* table->extra1 (min) and table->extra2 (max). |
* |
* Returns 0 on success. |
*/ |
int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, 1l, 1l); |
} |
|
/** |
* proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values |
* @table: the sysctl table |
* @write: %TRUE if this is a write to the sysctl file |
* @filp: the file structure |
* @buffer: the user buffer |
* @lenp: the size of the user buffer |
* |
* Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long |
* values from/to the user buffer, treated as an ASCII string. The values |
* are treated as milliseconds, and converted to jiffies when they are stored. |
* |
* This routine will ensure the values are within the range specified by |
* table->extra1 (min) and table->extra2 (max). |
* |
* Returns 0 on success. |
*/ |
int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, |
struct file *filp, |
void *buffer, size_t *lenp) |
{ |
return do_proc_doulongvec_minmax(table, write, filp, buffer, |
lenp, HZ, 1000l); |
} |
|
|
/** |
* proc_dointvec_jiffies - read a vector of integers as seconds |
* @table: the sysctl table |
* @write: %TRUE if this is a write to the sysctl file |
* @filp: the file structure |
* @buffer: the user buffer |
* @lenp: the size of the user buffer |
* |
* Reads/writes up to table->maxlen/sizeof(unsigned int) integer |
* values from/to the user buffer, treated as an ASCII string. |
* The values read are assumed to be in seconds, and are converted into |
* jiffies. |
* |
* Returns 0 on success. |
*/ |
int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
return do_proc_dointvec(table,write,filp,buffer,lenp,HZ,OP_SET); |
} |
|
#else /* CONFIG_PROC_FS */ |
|
int proc_dostring(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
return -ENOSYS; |
} |
|
static int proc_doutsstring(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
return -ENOSYS; |
} |
|
int proc_dointvec(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
return -ENOSYS; |
} |
|
int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
return -ENOSYS; |
} |
|
int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
return -ENOSYS; |
} |
|
int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
return -ENOSYS; |
} |
|
int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
return -ENOSYS; |
} |
|
int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, |
struct file *filp, |
void *buffer, size_t *lenp) |
{ |
return -ENOSYS; |
} |
|
|
#endif /* CONFIG_PROC_FS */ |
|
|
/* |
* General sysctl support routines |
*/ |
|
/* The generic string strategy routine: */ |
int sysctl_string(ctl_table *table, int *name, int nlen, |
void *oldval, size_t *oldlenp, |
void *newval, size_t newlen, void **context) |
{ |
size_t l, len; |
|
if (!table->data || !table->maxlen) |
return -ENOTDIR; |
|
if (oldval && oldlenp) { |
if (get_user(len, oldlenp)) |
return -EFAULT; |
if (len) { |
l = strlen(table->data); |
if (len > l) len = l; |
if (len >= table->maxlen) |
len = table->maxlen; |
if(copy_to_user(oldval, table->data, len)) |
return -EFAULT; |
if(put_user(0, ((char *) oldval) + len)) |
return -EFAULT; |
if(put_user(len, oldlenp)) |
return -EFAULT; |
} |
} |
if (newval && newlen) { |
len = newlen; |
if (len > table->maxlen) |
len = table->maxlen; |
if(copy_from_user(table->data, newval, len)) |
return -EFAULT; |
if (len == table->maxlen) |
len--; |
((char *) table->data)[len] = 0; |
} |
return 0; |
} |
|
/* |
* This function makes sure that all of the integers in the vector |
* are between the minimum and maximum values given in the arrays |
* table->extra1 and table->extra2, respectively. |
*/ |
int sysctl_intvec(ctl_table *table, int *name, int nlen, |
void *oldval, size_t *oldlenp, |
void *newval, size_t newlen, void **context) |
{ |
int i, *vec, *min, *max; |
size_t length; |
|
if (newval && newlen) { |
if (newlen % sizeof(int) != 0) |
return -EINVAL; |
|
if (!table->extra1 && !table->extra2) |
return 0; |
|
if (newlen > table->maxlen) |
newlen = table->maxlen; |
length = newlen / sizeof(int); |
|
vec = (int *) newval; |
min = (int *) table->extra1; |
max = (int *) table->extra2; |
|
for (i = 0; i < length; i++) { |
int value; |
if (get_user(value, vec + i)) |
return -EFAULT; |
if (min && value < min[i]) |
return -EINVAL; |
if (max && value > max[i]) |
return -EINVAL; |
} |
} |
return 0; |
} |
|
/* Strategy function to convert jiffies to seconds */ |
int sysctl_jiffies(ctl_table *table, int *name, int nlen, |
void *oldval, size_t *oldlenp, |
void *newval, size_t newlen, void **context) |
{ |
if (oldval) { |
size_t olen; |
if (oldlenp) { |
if (get_user(olen, oldlenp)) |
return -EFAULT; |
if (olen!=sizeof(int)) |
return -EINVAL; |
} |
if (put_user(*(int *)(table->data) / HZ, (int *)oldval) || |
(oldlenp && put_user(sizeof(int),oldlenp))) |
return -EFAULT; |
} |
if (newval && newlen) { |
int new; |
if (newlen != sizeof(int)) |
return -EINVAL; |
if (get_user(new, (int *)newval)) |
return -EFAULT; |
*(int *)(table->data) = new*HZ; |
} |
return 1; |
} |
|
|
#else /* CONFIG_SYSCTL */ |
|
|
extern asmlinkage long sys_sysctl(struct __sysctl_args *args) |
{ |
return -ENOSYS; |
} |
|
int sysctl_string(ctl_table *table, int *name, int nlen, |
void *oldval, size_t *oldlenp, |
void *newval, size_t newlen, void **context) |
{ |
return -ENOSYS; |
} |
|
int sysctl_intvec(ctl_table *table, int *name, int nlen, |
void *oldval, size_t *oldlenp, |
void *newval, size_t newlen, void **context) |
{ |
return -ENOSYS; |
} |
|
int sysctl_jiffies(ctl_table *table, int *name, int nlen, |
void *oldval, size_t *oldlenp, |
void *newval, size_t newlen, void **context) |
{ |
return -ENOSYS; |
} |
|
int proc_dostring(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
return -ENOSYS; |
} |
|
int proc_dointvec(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
return -ENOSYS; |
} |
|
int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
return -ENOSYS; |
} |
|
int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
return -ENOSYS; |
} |
|
int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
return -ENOSYS; |
} |
|
int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, |
void *buffer, size_t *lenp) |
{ |
return -ENOSYS; |
} |
|
int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, |
struct file *filp, |
void *buffer, size_t *lenp) |
{ |
return -ENOSYS; |
} |
|
struct ctl_table_header * register_sysctl_table(ctl_table * table, |
int insert_at_head) |
{ |
return 0; |
} |
|
void unregister_sysctl_table(struct ctl_table_header * table) |
{ |
} |
|
#endif /* CONFIG_SYSCTL */ |
/capability.c
0,0 → 1,216
/* |
* linux/kernel/capability.c |
* |
* Copyright (C) 1997 Andrew Main <zefram@fysh.org> |
* Integrated into 2.1.97+, Andrew G. Morgan <morgan@transmeta.com> |
*/ |
|
#include <linux/mm.h> |
#include <asm/uaccess.h> |
|
kernel_cap_t cap_bset = CAP_INIT_EFF_SET; |
|
/* Note: never hold tasklist_lock while spinning for this one */ |
spinlock_t task_capability_lock = SPIN_LOCK_UNLOCKED; |
|
/* |
* For sys_getproccap() and sys_setproccap(), any of the three |
* capability set pointers may be NULL -- indicating that that set is |
* uninteresting and/or not to be changed. |
*/ |
|
asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) |
{ |
int error, pid; |
__u32 version; |
struct task_struct *target; |
struct __user_cap_data_struct data; |
|
if (get_user(version, &header->version)) |
return -EFAULT; |
|
error = -EINVAL; |
if (version != _LINUX_CAPABILITY_VERSION) { |
version = _LINUX_CAPABILITY_VERSION; |
if (put_user(version, &header->version)) |
error = -EFAULT; |
return error; |
} |
|
if (get_user(pid, &header->pid)) |
return -EFAULT; |
|
if (pid < 0) |
return -EINVAL; |
|
error = 0; |
|
spin_lock(&task_capability_lock); |
|
if (pid && pid != current->pid) { |
read_lock(&tasklist_lock); |
target = find_task_by_pid(pid); /* identify target of query */ |
if (!target) |
error = -ESRCH; |
} else { |
target = current; |
} |
|
if (!error) { |
data.permitted = cap_t(target->cap_permitted); |
data.inheritable = cap_t(target->cap_inheritable); |
data.effective = cap_t(target->cap_effective); |
} |
|
if (target != current) |
read_unlock(&tasklist_lock); |
spin_unlock(&task_capability_lock); |
|
if (!error) { |
if (copy_to_user(dataptr, &data, sizeof data)) |
return -EFAULT; |
} |
|
return error; |
} |
|
/* set capabilities for all processes in a given process group */ |
|
static void cap_set_pg(int pgrp, |
kernel_cap_t *effective, |
kernel_cap_t *inheritable, |
kernel_cap_t *permitted) |
{ |
struct task_struct *target; |
|
/* FIXME: do we need to have a write lock here..? */ |
read_lock(&tasklist_lock); |
for_each_task(target) { |
if (target->pgrp != pgrp) |
continue; |
target->cap_effective = *effective; |
target->cap_inheritable = *inheritable; |
target->cap_permitted = *permitted; |
} |
read_unlock(&tasklist_lock); |
} |
|
/* set capabilities for all processes other than 1 and self */ |
|
static void cap_set_all(kernel_cap_t *effective, |
kernel_cap_t *inheritable, |
kernel_cap_t *permitted) |
{ |
struct task_struct *target; |
|
/* FIXME: do we need to have a write lock here..? */ |
read_lock(&tasklist_lock); |
/* ALL means everyone other than self or 'init' */ |
for_each_task(target) { |
if (target == current || target->pid == 1) |
continue; |
target->cap_effective = *effective; |
target->cap_inheritable = *inheritable; |
target->cap_permitted = *permitted; |
} |
read_unlock(&tasklist_lock); |
} |
|
/* |
* The restrictions on setting capabilities are specified as: |
* |
* [pid is for the 'target' task. 'current' is the calling task.] |
* |
* I: any raised capabilities must be a subset of the (old current) Permitted |
* P: any raised capabilities must be a subset of the (old current) permitted |
* E: must be set to a subset of (new target) Permitted |
*/ |
|
asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) |
{ |
kernel_cap_t inheritable, permitted, effective; |
__u32 version; |
struct task_struct *target; |
int error, pid; |
|
if (get_user(version, &header->version)) |
return -EFAULT; |
|
if (version != _LINUX_CAPABILITY_VERSION) { |
version = _LINUX_CAPABILITY_VERSION; |
if (put_user(version, &header->version)) |
return -EFAULT; |
return -EINVAL; |
} |
|
if (get_user(pid, &header->pid)) |
return -EFAULT; |
|
if (pid && !capable(CAP_SETPCAP)) |
return -EPERM; |
|
if (copy_from_user(&effective, &data->effective, sizeof(effective)) || |
copy_from_user(&inheritable, &data->inheritable, sizeof(inheritable)) || |
copy_from_user(&permitted, &data->permitted, sizeof(permitted))) |
return -EFAULT; |
|
error = -EPERM; |
spin_lock(&task_capability_lock); |
|
if (pid > 0 && pid != current->pid) { |
read_lock(&tasklist_lock); |
target = find_task_by_pid(pid); /* identify target of query */ |
if (!target) { |
error = -ESRCH; |
goto out; |
} |
} else { |
target = current; |
} |
|
|
/* verify restrictions on target's new Inheritable set */ |
if (!cap_issubset(inheritable, |
cap_combine(target->cap_inheritable, |
current->cap_permitted))) { |
goto out; |
} |
|
/* verify restrictions on target's new Permitted set */ |
if (!cap_issubset(permitted, |
cap_combine(target->cap_permitted, |
current->cap_permitted))) { |
goto out; |
} |
|
/* verify the _new_Effective_ is a subset of the _new_Permitted_ */ |
if (!cap_issubset(effective, permitted)) { |
goto out; |
} |
|
/* having verified that the proposed changes are legal, |
we now put them into effect. */ |
error = 0; |
|
if (pid < 0) { |
if (pid == -1) /* all procs other than current and init */ |
cap_set_all(&effective, &inheritable, &permitted); |
|
else /* all procs in process group */ |
cap_set_pg(-pid, &effective, &inheritable, &permitted); |
goto spin_out; |
} else { |
/* FIXME: do we need to have a write lock here..? */ |
target->cap_effective = effective; |
target->cap_inheritable = inheritable; |
target->cap_permitted = permitted; |
} |
|
out: |
if (target != current) { |
read_unlock(&tasklist_lock); |
} |
spin_out: |
spin_unlock(&task_capability_lock); |
return error; |
} |
/context.c
0,0 → 1,165
/* |
* linux/kernel/context.c |
* |
* Mechanism for running arbitrary tasks in process context |
* |
* dwmw2@redhat.com: Genesis |
* |
* andrewm@uow.edu.au: 2.4.0-test12 |
* - Child reaping |
* - Support for tasks which re-add themselves |
* - flush_scheduled_tasks. |
*/ |
|
#define __KERNEL_SYSCALLS__ |
|
#include <linux/module.h> |
#include <linux/kernel.h> |
#include <linux/sched.h> |
#include <linux/init.h> |
#include <linux/unistd.h> |
#include <linux/signal.h> |
#include <linux/completion.h> |
|
static DECLARE_TASK_QUEUE(tq_context); |
static DECLARE_WAIT_QUEUE_HEAD(context_task_wq); |
static DECLARE_WAIT_QUEUE_HEAD(context_task_done); |
static int keventd_running; |
static struct task_struct *keventd_task; |
|
static int need_keventd(const char *who) |
{ |
if (keventd_running == 0) |
printk(KERN_ERR "%s(): keventd has not started\n", who); |
return keventd_running; |
} |
|
int current_is_keventd(void) |
{ |
int ret = 0; |
if (need_keventd(__FUNCTION__)) |
ret = (current == keventd_task); |
return ret; |
} |
|
/** |
* schedule_task - schedule a function for subsequent execution in process context. |
* @task: pointer to a &tq_struct which defines the function to be scheduled. |
* |
* May be called from interrupt context. The scheduled function is run at some |
* time in the near future by the keventd kernel thread. If it can sleep, it |
* should be designed to do so for the minimum possible time, as it will be |
* stalling all other scheduled tasks. |
* |
* schedule_task() returns non-zero if the task was successfully scheduled. |
* If @task is already residing on a task queue then schedule_task() fails |
* to schedule your task and returns zero. |
*/ |
int schedule_task(struct tq_struct *task) |
{ |
int ret; |
need_keventd(__FUNCTION__); |
ret = queue_task(task, &tq_context); |
wake_up(&context_task_wq); |
return ret; |
} |
|
static int context_thread(void *startup) |
{ |
struct task_struct *curtask = current; |
DECLARE_WAITQUEUE(wait, curtask); |
struct k_sigaction sa; |
|
daemonize(); |
strcpy(curtask->comm, "keventd"); |
keventd_running = 1; |
keventd_task = curtask; |
|
spin_lock_irq(&curtask->sigmask_lock); |
siginitsetinv(&curtask->blocked, sigmask(SIGCHLD)); |
recalc_sigpending(curtask); |
spin_unlock_irq(&curtask->sigmask_lock); |
|
complete((struct completion *)startup); |
|
/* Install a handler so SIGCLD is delivered */ |
sa.sa.sa_handler = SIG_IGN; |
sa.sa.sa_flags = 0; |
siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD)); |
do_sigaction(SIGCHLD, &sa, (struct k_sigaction *)0); |
|
/* |
* If one of the functions on a task queue re-adds itself |
* to the task queue we call schedule() in state TASK_RUNNING |
*/ |
for (;;) { |
set_task_state(curtask, TASK_INTERRUPTIBLE); |
add_wait_queue(&context_task_wq, &wait); |
if (TQ_ACTIVE(tq_context)) |
set_task_state(curtask, TASK_RUNNING); |
schedule(); |
remove_wait_queue(&context_task_wq, &wait); |
run_task_queue(&tq_context); |
wake_up(&context_task_done); |
if (signal_pending(curtask)) { |
while (waitpid(-1, (unsigned int *)0, __WALL|WNOHANG) > 0) |
; |
spin_lock_irq(&curtask->sigmask_lock); |
flush_signals(curtask); |
recalc_sigpending(curtask); |
spin_unlock_irq(&curtask->sigmask_lock); |
} |
} |
} |
|
/** |
* flush_scheduled_tasks - ensure that any scheduled tasks have run to completion. |
* |
* Forces execution of the schedule_task() queue and blocks until its completion. |
* |
* If a kernel subsystem uses schedule_task() and wishes to flush any pending |
* tasks, it should use this function. This is typically used in driver shutdown |
* handlers. |
* |
* The caller should hold no spinlocks and should hold no semaphores which could |
* cause the scheduled tasks to block. |
*/ |
static struct tq_struct dummy_task; |
|
void flush_scheduled_tasks(void) |
{ |
int count; |
DECLARE_WAITQUEUE(wait, current); |
|
/* |
* Do it twice. It's possible, albeit highly unlikely, that |
* the caller queued a task immediately before calling us, |
* and that the eventd thread was already past the run_task_queue() |
* but not yet into wake_up(), so it woke us up before completing |
* the caller's queued task or our new dummy task. |
*/ |
add_wait_queue(&context_task_done, &wait); |
for (count = 0; count < 2; count++) { |
set_current_state(TASK_UNINTERRUPTIBLE); |
|
/* Queue a dummy task to make sure we get kicked */ |
schedule_task(&dummy_task); |
|
/* Wait for it to complete */ |
schedule(); |
} |
remove_wait_queue(&context_task_done, &wait); |
} |
|
int start_context_thread(void) |
{ |
static struct completion startup __initdata = COMPLETION_INITIALIZER(startup); |
|
kernel_thread(context_thread, &startup, CLONE_FS | CLONE_FILES); |
wait_for_completion(&startup); |
return 0; |
} |
|
EXPORT_SYMBOL(schedule_task); |
EXPORT_SYMBOL(flush_scheduled_tasks); |
|
/module.c
0,0 → 1,1296
#include <linux/config.h> |
#include <linux/mm.h> |
#include <linux/module.h> |
#include <asm/module.h> |
#include <asm/uaccess.h> |
#include <linux/vmalloc.h> |
#include <linux/smp_lock.h> |
#include <asm/pgalloc.h> |
#include <linux/init.h> |
#include <linux/slab.h> |
#include <linux/kmod.h> |
#include <linux/seq_file.h> |
|
/* |
* Originally by Anonymous (as far as I know...) |
* Linux version by Bas Laarhoven <bas@vimec.nl> |
* 0.99.14 version by Jon Tombs <jon@gtex02.us.es>, |
* Heavily modified by Bjorn Ekwall <bj0rn@blox.se> May 1994 (C) |
* Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996 |
* Add MOD_INITIALIZING Keith Owens <kaos@ocs.com.au> Nov 1999 |
* Add kallsyms support, Keith Owens <kaos@ocs.com.au> Apr 2000 |
* Add asm/module support, IA64 has special requirements. Keith Owens <kaos@ocs.com.au> Sep 2000 |
* Fix assorted bugs in module verification. Keith Owens <kaos@ocs.com.au> Sep 2000 |
* Fix sys_init_module race, Andrew Morton <andrewm@uow.edu.au> Oct 2000 |
* http://www.uwsg.iu.edu/hypermail/linux/kernel/0008.3/0379.html |
* Replace xxx_module_symbol with inter_module_xxx. Keith Owens <kaos@ocs.com.au> Oct 2000 |
* Add a module list lock for kernel fault race fixing. Alan Cox <alan@redhat.com> |
* |
* This source is covered by the GNU GPL, the same as all kernel sources. |
*/ |
|
#if defined(CONFIG_MODULES) || defined(CONFIG_KALLSYMS) |
|
extern struct module_symbol __start___ksymtab[]; |
extern struct module_symbol __stop___ksymtab[]; |
|
extern const struct exception_table_entry __start___ex_table[]; |
extern const struct exception_table_entry __stop___ex_table[]; |
|
extern const char __start___kallsyms[] __attribute__ ((weak)); |
extern const char __stop___kallsyms[] __attribute__ ((weak)); |
|
struct module kernel_module = |
{ |
size_of_struct: sizeof(struct module), |
name: "", |
uc: {ATOMIC_INIT(1)}, |
flags: MOD_RUNNING, |
syms: __start___ksymtab, |
ex_table_start: __start___ex_table, |
ex_table_end: __stop___ex_table, |
kallsyms_start: __start___kallsyms, |
kallsyms_end: __stop___kallsyms, |
}; |
|
struct module *module_list = &kernel_module; |
|
#endif /* defined(CONFIG_MODULES) || defined(CONFIG_KALLSYMS) */ |
|
/* inter_module functions are always available, even when the kernel is |
* compiled without modules. Consumers of inter_module_xxx routines |
* will always work, even when both are built into the kernel, this |
* approach removes lots of #ifdefs in mainline code. |
*/ |
|
static struct list_head ime_list = LIST_HEAD_INIT(ime_list); |
static spinlock_t ime_lock = SPIN_LOCK_UNLOCKED; |
static int kmalloc_failed; |
|
/* |
* This lock prevents modifications that might race the kernel fault |
* fixups. It does not prevent reader walks that the modules code |
* does. The kernel lock does that. |
* |
* Since vmalloc fault fixups occur in any context this lock is taken |
* irqsave at all times. |
*/ |
|
spinlock_t modlist_lock = SPIN_LOCK_UNLOCKED; |
|
/** |
* inter_module_register - register a new set of inter module data. |
* @im_name: an arbitrary string to identify the data, must be unique |
* @owner: module that is registering the data, always use THIS_MODULE |
* @userdata: pointer to arbitrary userdata to be registered |
* |
* Description: Check that the im_name has not already been registered, |
* complain if it has. For new data, add it to the inter_module_entry |
* list. |
*/ |
void inter_module_register(const char *im_name, struct module *owner, const void *userdata) |
{ |
struct list_head *tmp; |
struct inter_module_entry *ime, *ime_new; |
|
if (!(ime_new = kmalloc(sizeof(*ime), GFP_KERNEL))) { |
/* Overloaded kernel, not fatal */ |
printk(KERN_ERR |
"Aiee, inter_module_register: cannot kmalloc entry for '%s'\n", |
im_name); |
kmalloc_failed = 1; |
return; |
} |
memset(ime_new, 0, sizeof(*ime_new)); |
ime_new->im_name = im_name; |
ime_new->owner = owner; |
ime_new->userdata = userdata; |
|
spin_lock(&ime_lock); |
list_for_each(tmp, &ime_list) { |
ime = list_entry(tmp, struct inter_module_entry, list); |
if (strcmp(ime->im_name, im_name) == 0) { |
spin_unlock(&ime_lock); |
kfree(ime_new); |
/* Program logic error, fatal */ |
printk(KERN_ERR "inter_module_register: duplicate im_name '%s'", im_name); |
BUG(); |
} |
} |
list_add(&(ime_new->list), &ime_list); |
spin_unlock(&ime_lock); |
} |
|
/** |
* inter_module_unregister - unregister a set of inter module data. |
* @im_name: an arbitrary string to identify the data, must be unique |
* |
* Description: Check that the im_name has been registered, complain if |
* it has not. For existing data, remove it from the |
* inter_module_entry list. |
*/ |
void inter_module_unregister(const char *im_name) |
{ |
struct list_head *tmp; |
struct inter_module_entry *ime; |
|
spin_lock(&ime_lock); |
list_for_each(tmp, &ime_list) { |
ime = list_entry(tmp, struct inter_module_entry, list); |
if (strcmp(ime->im_name, im_name) == 0) { |
list_del(&(ime->list)); |
spin_unlock(&ime_lock); |
kfree(ime); |
return; |
} |
} |
spin_unlock(&ime_lock); |
if (kmalloc_failed) { |
printk(KERN_ERR |
"inter_module_unregister: no entry for '%s', " |
"probably caused by previous kmalloc failure\n", |
im_name); |
return; |
} |
else { |
/* Program logic error, fatal */ |
printk(KERN_ERR "inter_module_unregister: no entry for '%s'", im_name); |
BUG(); |
} |
} |
|
/** |
* inter_module_get - return arbitrary userdata from another module. |
* @im_name: an arbitrary string to identify the data, must be unique |
* |
* Description: If the im_name has not been registered, return NULL. |
* Try to increment the use count on the owning module, if that fails |
* then return NULL. Otherwise return the userdata. |
*/ |
const void *inter_module_get(const char *im_name) |
{ |
struct list_head *tmp; |
struct inter_module_entry *ime; |
const void *result = NULL; |
|
spin_lock(&ime_lock); |
list_for_each(tmp, &ime_list) { |
ime = list_entry(tmp, struct inter_module_entry, list); |
if (strcmp(ime->im_name, im_name) == 0) { |
if (try_inc_mod_count(ime->owner)) |
result = ime->userdata; |
break; |
} |
} |
spin_unlock(&ime_lock); |
return(result); |
} |
|
/** |
* inter_module_get_request - im get with automatic request_module. |
* @im_name: an arbitrary string to identify the data, must be unique |
* @modname: module that is expected to register im_name |
* |
* Description: If inter_module_get fails, do request_module then retry. |
*/ |
const void *inter_module_get_request(const char *im_name, const char *modname) |
{ |
const void *result = inter_module_get(im_name); |
if (!result) { |
request_module(modname); |
result = inter_module_get(im_name); |
} |
return(result); |
} |
|
/** |
* inter_module_put - release use of data from another module. |
* @im_name: an arbitrary string to identify the data, must be unique |
* |
* Description: If the im_name has not been registered, complain, |
* otherwise decrement the use count on the owning module. |
*/ |
void inter_module_put(const char *im_name) |
{ |
struct list_head *tmp; |
struct inter_module_entry *ime; |
|
spin_lock(&ime_lock); |
list_for_each(tmp, &ime_list) { |
ime = list_entry(tmp, struct inter_module_entry, list); |
if (strcmp(ime->im_name, im_name) == 0) { |
if (ime->owner) |
__MOD_DEC_USE_COUNT(ime->owner); |
spin_unlock(&ime_lock); |
return; |
} |
} |
spin_unlock(&ime_lock); |
printk(KERN_ERR "inter_module_put: no entry for '%s'", im_name); |
BUG(); |
} |
|
|
#if defined(CONFIG_MODULES) /* The rest of the source */ |
|
static long get_mod_name(const char *user_name, char **buf); |
static void put_mod_name(char *buf); |
struct module *find_module(const char *name); |
void free_module(struct module *, int tag_freed); |
|
|
/* |
* Called at boot time |
*/ |
|
void __init init_modules(void) |
{ |
kernel_module.nsyms = __stop___ksymtab - __start___ksymtab; |
|
arch_init_modules(&kernel_module); |
} |
|
/* |
* Copy the name of a module from user space. |
*/ |
|
static inline long |
get_mod_name(const char *user_name, char **buf) |
{ |
unsigned long page; |
long retval; |
|
page = __get_free_page(GFP_KERNEL); |
if (!page) |
return -ENOMEM; |
|
retval = strncpy_from_user((char *)page, user_name, PAGE_SIZE); |
if (retval > 0) { |
if (retval < PAGE_SIZE) { |
*buf = (char *)page; |
return retval; |
} |
retval = -ENAMETOOLONG; |
} else if (!retval) |
retval = -EINVAL; |
|
free_page(page); |
return retval; |
} |
|
static inline void |
put_mod_name(char *buf) |
{ |
free_page((unsigned long)buf); |
} |
|
/* |
* Allocate space for a module. |
*/ |
|
asmlinkage unsigned long |
sys_create_module(const char *name_user, size_t size) |
{ |
char *name; |
long namelen, error; |
struct module *mod; |
unsigned long flags; |
|
if (!capable(CAP_SYS_MODULE)) |
return -EPERM; |
lock_kernel(); |
if ((namelen = get_mod_name(name_user, &name)) < 0) { |
error = namelen; |
goto err0; |
} |
if (size < sizeof(struct module)+namelen+1) { |
error = -EINVAL; |
goto err1; |
} |
if (find_module(name) != NULL) { |
error = -EEXIST; |
goto err1; |
} |
if ((mod = (struct module *)module_map(size)) == NULL) { |
error = -ENOMEM; |
goto err1; |
} |
|
memset(mod, 0, sizeof(*mod)); |
mod->size_of_struct = sizeof(*mod); |
mod->name = (char *)(mod + 1); |
mod->size = size; |
memcpy((char*)(mod+1), name, namelen+1); |
|
put_mod_name(name); |
|
spin_lock_irqsave(&modlist_lock, flags); |
mod->next = module_list; |
module_list = mod; /* link it in */ |
spin_unlock_irqrestore(&modlist_lock, flags); |
|
error = (long) mod; |
goto err0; |
err1: |
put_mod_name(name); |
err0: |
unlock_kernel(); |
return error; |
} |
|
/* |
* Initialize a module. |
*/ |
|
asmlinkage long |
sys_init_module(const char *name_user, struct module *mod_user) |
{ |
struct module mod_tmp, *mod, *mod2 = NULL; |
char *name, *n_name, *name_tmp = NULL; |
long namelen, n_namelen, i, error; |
unsigned long mod_user_size, flags; |
struct module_ref *dep; |
|
if (!capable(CAP_SYS_MODULE)) |
return -EPERM; |
lock_kernel(); |
if ((namelen = get_mod_name(name_user, &name)) < 0) { |
error = namelen; |
goto err0; |
} |
if ((mod = find_module(name)) == NULL) { |
error = -ENOENT; |
goto err1; |
} |
|
/* Check module header size. We allow a bit of slop over the |
size we are familiar with to cope with a version of insmod |
for a newer kernel. But don't over do it. */ |
if ((error = get_user(mod_user_size, &mod_user->size_of_struct)) != 0) |
goto err1; |
if (mod_user_size < (unsigned long)&((struct module *)0L)->persist_start |
|| mod_user_size > sizeof(struct module) + 16*sizeof(void*)) { |
printk(KERN_ERR "init_module: Invalid module header size.\n" |
KERN_ERR "A new version of the modutils is likely " |
"needed.\n"); |
error = -EINVAL; |
goto err1; |
} |
|
/* Hold the current contents while we play with the user's idea |
of righteousness. */ |
mod_tmp = *mod; |
name_tmp = kmalloc(strlen(mod->name) + 1, GFP_KERNEL); /* Where's kstrdup()? */ |
if (name_tmp == NULL) { |
error = -ENOMEM; |
goto err1; |
} |
strcpy(name_tmp, mod->name); |
|
/* Copying mod_user directly over mod breaks the module_list chain and |
* races against search_exception_table. copy_from_user may sleep so it |
* cannot be under modlist_lock, do the copy in two stages. |
*/ |
if (!(mod2 = vmalloc(mod_user_size))) { |
error = -ENOMEM; |
goto err2; |
} |
error = copy_from_user(mod2, mod_user, mod_user_size); |
if (error) { |
error = -EFAULT; |
goto err2; |
} |
spin_lock_irqsave(&modlist_lock, flags); |
memcpy(mod, mod2, mod_user_size); |
mod->next = mod_tmp.next; |
spin_unlock_irqrestore(&modlist_lock, flags); |
|
/* Sanity check the size of the module. */ |
error = -EINVAL; |
|
if (mod->size > mod_tmp.size) { |
printk(KERN_ERR "init_module: Size of initialized module " |
"exceeds size of created module.\n"); |
goto err2; |
} |
|
/* Make sure all interesting pointers are sane. */ |
|
if (!mod_bound(mod->name, namelen, mod)) { |
printk(KERN_ERR "init_module: mod->name out of bounds.\n"); |
goto err2; |
} |
if (mod->nsyms && !mod_bound(mod->syms, mod->nsyms, mod)) { |
printk(KERN_ERR "init_module: mod->syms out of bounds.\n"); |
goto err2; |
} |
if (mod->ndeps && !mod_bound(mod->deps, mod->ndeps, mod)) { |
printk(KERN_ERR "init_module: mod->deps out of bounds.\n"); |
goto err2; |
} |
if (mod->init && !mod_bound(mod->init, 0, mod)) { |
printk(KERN_ERR "init_module: mod->init out of bounds.\n"); |
goto err2; |
} |
if (mod->cleanup && !mod_bound(mod->cleanup, 0, mod)) { |
printk(KERN_ERR "init_module: mod->cleanup out of bounds.\n"); |
goto err2; |
} |
if (mod->ex_table_start > mod->ex_table_end |
|| (mod->ex_table_start && |
!((unsigned long)mod->ex_table_start >= ((unsigned long)mod + mod->size_of_struct) |
&& ((unsigned long)mod->ex_table_end |
< (unsigned long)mod + mod->size))) |
|| (((unsigned long)mod->ex_table_start |
- (unsigned long)mod->ex_table_end) |
% sizeof(struct exception_table_entry))) { |
printk(KERN_ERR "init_module: mod->ex_table_* invalid.\n"); |
goto err2; |
} |
if (mod->flags & ~MOD_AUTOCLEAN) { |
printk(KERN_ERR "init_module: mod->flags invalid.\n"); |
goto err2; |
} |
if (mod_member_present(mod, can_unload) |
&& mod->can_unload && !mod_bound(mod->can_unload, 0, mod)) { |
printk(KERN_ERR "init_module: mod->can_unload out of bounds.\n"); |
goto err2; |
} |
if (mod_member_present(mod, kallsyms_end)) { |
if (mod->kallsyms_end && |
(!mod_bound(mod->kallsyms_start, 0, mod) || |
!mod_bound(mod->kallsyms_end, 0, mod))) { |
printk(KERN_ERR "init_module: mod->kallsyms out of bounds.\n"); |
goto err2; |
} |
if (mod->kallsyms_start > mod->kallsyms_end) { |
printk(KERN_ERR "init_module: mod->kallsyms invalid.\n"); |
goto err2; |
} |
} |
if (mod_member_present(mod, archdata_end)) { |
if (mod->archdata_end && |
(!mod_bound(mod->archdata_start, 0, mod) || |
!mod_bound(mod->archdata_end, 0, mod))) { |
printk(KERN_ERR "init_module: mod->archdata out of bounds.\n"); |
goto err2; |
} |
if (mod->archdata_start > mod->archdata_end) { |
printk(KERN_ERR "init_module: mod->archdata invalid.\n"); |
goto err2; |
} |
} |
if (mod_member_present(mod, kernel_data) && mod->kernel_data) { |
printk(KERN_ERR "init_module: mod->kernel_data must be zero.\n"); |
goto err2; |
} |
|
/* Check that the user isn't doing something silly with the name. */ |
|
if ((n_namelen = get_mod_name(mod->name - (unsigned long)mod |
+ (unsigned long)mod_user, |
&n_name)) < 0) { |
printk(KERN_ERR "init_module: get_mod_name failure.\n"); |
error = n_namelen; |
goto err2; |
} |
if (namelen != n_namelen || strcmp(n_name, name_tmp) != 0) { |
printk(KERN_ERR "init_module: changed module name to " |
"`%s' from `%s'\n", |
n_name, name_tmp); |
goto err3; |
} |
|
/* Ok, that's about all the sanity we can stomach; copy the rest. */ |
|
if (copy_from_user((char *)mod+mod_user_size, |
(char *)mod_user+mod_user_size, |
mod->size-mod_user_size)) { |
error = -EFAULT; |
goto err3; |
} |
|
if (module_arch_init(mod)) |
goto err3; |
|
/* On some machines it is necessary to do something here |
to make the I and D caches consistent. */ |
flush_icache_range((unsigned long)mod, (unsigned long)mod + mod->size); |
|
mod->refs = NULL; |
|
/* Sanity check the module's dependents */ |
for (i = 0, dep = mod->deps; i < mod->ndeps; ++i, ++dep) { |
struct module *o, *d = dep->dep; |
|
/* Make sure the indicated dependencies are really modules. */ |
if (d == mod) { |
printk(KERN_ERR "init_module: self-referential " |
"dependency in mod->deps.\n"); |
goto err3; |
} |
|
/* Scan the current modules for this dependency */ |
for (o = module_list; o != &kernel_module && o != d; o = o->next) |
; |
|
if (o != d) { |
printk(KERN_ERR "init_module: found dependency that is " |
"(no longer?) a module.\n"); |
goto err3; |
} |
} |
|
/* Update module references. */ |
for (i = 0, dep = mod->deps; i < mod->ndeps; ++i, ++dep) { |
struct module *d = dep->dep; |
|
dep->ref = mod; |
dep->next_ref = d->refs; |
d->refs = dep; |
/* Being referenced by a dependent module counts as a |
use as far as kmod is concerned. */ |
d->flags |= MOD_USED_ONCE; |
} |
|
/* Free our temporary memory. */ |
put_mod_name(n_name); |
put_mod_name(name); |
|
/* Initialize the module. */ |
atomic_set(&mod->uc.usecount,1); |
mod->flags |= MOD_INITIALIZING; |
if (mod->init && (error = mod->init()) != 0) { |
atomic_set(&mod->uc.usecount,0); |
mod->flags &= ~MOD_INITIALIZING; |
if (error > 0) /* Buggy module */ |
error = -EBUSY; |
goto err0; |
} |
atomic_dec(&mod->uc.usecount); |
|
/* And set it running. */ |
mod->flags = (mod->flags | MOD_RUNNING) & ~MOD_INITIALIZING; |
error = 0; |
goto err0; |
|
err3: |
put_mod_name(n_name); |
err2: |
*mod = mod_tmp; |
strcpy((char *)mod->name, name_tmp); /* We know there is room for this */ |
err1: |
put_mod_name(name); |
err0: |
if (mod2) |
vfree(mod2); |
unlock_kernel(); |
kfree(name_tmp); |
return error; |
} |
|
static spinlock_t unload_lock = SPIN_LOCK_UNLOCKED; |
int try_inc_mod_count(struct module *mod) |
{ |
int res = 1; |
if (mod) { |
spin_lock(&unload_lock); |
if (mod->flags & MOD_DELETED) |
res = 0; |
else |
__MOD_INC_USE_COUNT(mod); |
spin_unlock(&unload_lock); |
} |
return res; |
} |
|
asmlinkage long |
sys_delete_module(const char *name_user) |
{ |
struct module *mod, *next; |
char *name; |
long error; |
int something_changed; |
|
if (!capable(CAP_SYS_MODULE)) |
return -EPERM; |
|
lock_kernel(); |
if (name_user) { |
if ((error = get_mod_name(name_user, &name)) < 0) |
goto out; |
error = -ENOENT; |
if ((mod = find_module(name)) == NULL) { |
put_mod_name(name); |
goto out; |
} |
put_mod_name(name); |
error = -EBUSY; |
if (mod->refs != NULL) |
goto out; |
|
spin_lock(&unload_lock); |
if (!__MOD_IN_USE(mod)) { |
mod->flags |= MOD_DELETED; |
spin_unlock(&unload_lock); |
free_module(mod, 0); |
error = 0; |
} else { |
spin_unlock(&unload_lock); |
} |
goto out; |
} |
|
/* Do automatic reaping */ |
restart: |
something_changed = 0; |
|
for (mod = module_list; mod != &kernel_module; mod = next) { |
next = mod->next; |
spin_lock(&unload_lock); |
if (mod->refs == NULL |
&& (mod->flags & MOD_AUTOCLEAN) |
&& (mod->flags & MOD_RUNNING) |
&& !(mod->flags & MOD_DELETED) |
&& (mod->flags & MOD_USED_ONCE) |
&& !__MOD_IN_USE(mod)) { |
if ((mod->flags & MOD_VISITED) |
&& !(mod->flags & MOD_JUST_FREED)) { |
spin_unlock(&unload_lock); |
mod->flags &= ~MOD_VISITED; |
} else { |
mod->flags |= MOD_DELETED; |
spin_unlock(&unload_lock); |
free_module(mod, 1); |
something_changed = 1; |
} |
} else { |
spin_unlock(&unload_lock); |
} |
} |
|
if (something_changed) |
goto restart; |
|
for (mod = module_list; mod != &kernel_module; mod = mod->next) |
mod->flags &= ~MOD_JUST_FREED; |
|
error = 0; |
out: |
unlock_kernel(); |
return error; |
} |
|
/* Query various bits about modules. */ |
|
static int |
qm_modules(char *buf, size_t bufsize, size_t *ret) |
{ |
struct module *mod; |
size_t nmod, space, len; |
|
nmod = space = 0; |
|
for (mod=module_list; mod != &kernel_module; mod=mod->next, ++nmod) { |
len = strlen(mod->name)+1; |
if (len > bufsize) |
goto calc_space_needed; |
if (copy_to_user(buf, mod->name, len)) |
return -EFAULT; |
buf += len; |
bufsize -= len; |
space += len; |
} |
|
if (put_user(nmod, ret)) |
return -EFAULT; |
else |
return 0; |
|
calc_space_needed: |
space += len; |
while ((mod = mod->next) != &kernel_module) |
space += strlen(mod->name)+1; |
|
if (put_user(space, ret)) |
return -EFAULT; |
else |
return -ENOSPC; |
} |
|
static int |
qm_deps(struct module *mod, char *buf, size_t bufsize, size_t *ret) |
{ |
size_t i, space, len; |
|
if (mod == &kernel_module) |
return -EINVAL; |
if (!MOD_CAN_QUERY(mod)) |
if (put_user(0, ret)) |
return -EFAULT; |
else |
return 0; |
|
space = 0; |
for (i = 0; i < mod->ndeps; ++i) { |
const char *dep_name = mod->deps[i].dep->name; |
|
len = strlen(dep_name)+1; |
if (len > bufsize) |
goto calc_space_needed; |
if (copy_to_user(buf, dep_name, len)) |
return -EFAULT; |
buf += len; |
bufsize -= len; |
space += len; |
} |
|
if (put_user(i, ret)) |
return -EFAULT; |
else |
return 0; |
|
calc_space_needed: |
space += len; |
while (++i < mod->ndeps) |
space += strlen(mod->deps[i].dep->name)+1; |
|
if (put_user(space, ret)) |
return -EFAULT; |
else |
return -ENOSPC; |
} |
|
static int |
qm_refs(struct module *mod, char *buf, size_t bufsize, size_t *ret) |
{ |
size_t nrefs, space, len; |
struct module_ref *ref; |
|
if (mod == &kernel_module) |
return -EINVAL; |
if (!MOD_CAN_QUERY(mod)) |
if (put_user(0, ret)) |
return -EFAULT; |
else |
return 0; |
|
space = 0; |
for (nrefs = 0, ref = mod->refs; ref ; ++nrefs, ref = ref->next_ref) { |
const char *ref_name = ref->ref->name; |
|
len = strlen(ref_name)+1; |
if (len > bufsize) |
goto calc_space_needed; |
if (copy_to_user(buf, ref_name, len)) |
return -EFAULT; |
buf += len; |
bufsize -= len; |
space += len; |
} |
|
if (put_user(nrefs, ret)) |
return -EFAULT; |
else |
return 0; |
|
calc_space_needed: |
space += len; |
while ((ref = ref->next_ref) != NULL) |
space += strlen(ref->ref->name)+1; |
|
if (put_user(space, ret)) |
return -EFAULT; |
else |
return -ENOSPC; |
} |
|
static int |
qm_symbols(struct module *mod, char *buf, size_t bufsize, size_t *ret) |
{ |
size_t i, space, len; |
struct module_symbol *s; |
char *strings; |
unsigned long *vals; |
|
if (!MOD_CAN_QUERY(mod)) |
if (put_user(0, ret)) |
return -EFAULT; |
else |
return 0; |
|
space = mod->nsyms * 2*sizeof(void *); |
|
i = len = 0; |
s = mod->syms; |
|
if (space > bufsize) |
goto calc_space_needed; |
|
if (!access_ok(VERIFY_WRITE, buf, space)) |
return -EFAULT; |
|
bufsize -= space; |
vals = (unsigned long *)buf; |
strings = buf+space; |
|
for (; i < mod->nsyms ; ++i, ++s, vals += 2) { |
len = strlen(s->name)+1; |
if (len > bufsize) |
goto calc_space_needed; |
|
if (copy_to_user(strings, s->name, len) |
|| __put_user(s->value, vals+0) |
|| __put_user(space, vals+1)) |
return -EFAULT; |
|
strings += len; |
bufsize -= len; |
space += len; |
} |
if (put_user(i, ret)) |
return -EFAULT; |
else |
return 0; |
|
calc_space_needed: |
for (; i < mod->nsyms; ++i, ++s) |
space += strlen(s->name)+1; |
|
if (put_user(space, ret)) |
return -EFAULT; |
else |
return -ENOSPC; |
} |
|
static int |
qm_info(struct module *mod, char *buf, size_t bufsize, size_t *ret) |
{ |
int error = 0; |
|
if (mod == &kernel_module) |
return -EINVAL; |
|
if (sizeof(struct module_info) <= bufsize) { |
struct module_info info; |
info.addr = (unsigned long)mod; |
info.size = mod->size; |
info.flags = mod->flags; |
|
/* usecount is one too high here - report appropriately to |
compensate for locking */ |
info.usecount = (mod_member_present(mod, can_unload) |
&& mod->can_unload ? -1 : atomic_read(&mod->uc.usecount)-1); |
|
if (copy_to_user(buf, &info, sizeof(struct module_info))) |
return -EFAULT; |
} else |
error = -ENOSPC; |
|
if (put_user(sizeof(struct module_info), ret)) |
return -EFAULT; |
|
return error; |
} |
|
asmlinkage long |
sys_query_module(const char *name_user, int which, char *buf, size_t bufsize, |
size_t *ret) |
{ |
struct module *mod; |
int err; |
|
lock_kernel(); |
if (name_user == NULL) |
mod = &kernel_module; |
else { |
long namelen; |
char *name; |
|
if ((namelen = get_mod_name(name_user, &name)) < 0) { |
err = namelen; |
goto out; |
} |
err = -ENOENT; |
if ((mod = find_module(name)) == NULL) { |
put_mod_name(name); |
goto out; |
} |
put_mod_name(name); |
} |
|
/* __MOD_ touches the flags. We must avoid that */ |
|
atomic_inc(&mod->uc.usecount); |
|
switch (which) |
{ |
case 0: |
err = 0; |
break; |
case QM_MODULES: |
err = qm_modules(buf, bufsize, ret); |
break; |
case QM_DEPS: |
err = qm_deps(mod, buf, bufsize, ret); |
break; |
case QM_REFS: |
err = qm_refs(mod, buf, bufsize, ret); |
break; |
case QM_SYMBOLS: |
err = qm_symbols(mod, buf, bufsize, ret); |
break; |
case QM_INFO: |
err = qm_info(mod, buf, bufsize, ret); |
break; |
default: |
err = -EINVAL; |
break; |
} |
atomic_dec(&mod->uc.usecount); |
|
out: |
unlock_kernel(); |
return err; |
} |
|
/* |
* Copy the kernel symbol table to user space. If the argument is |
* NULL, just return the size of the table. |
* |
* This call is obsolete. New programs should use query_module+QM_SYMBOLS |
* which does not arbitrarily limit the length of symbols. |
*/ |
|
asmlinkage long |
sys_get_kernel_syms(struct kernel_sym *table) |
{ |
struct module *mod; |
int i; |
struct kernel_sym ksym; |
|
lock_kernel(); |
for (mod = module_list, i = 0; mod; mod = mod->next) { |
/* include the count for the module name! */ |
i += mod->nsyms + 1; |
} |
|
if (table == NULL) |
goto out; |
|
/* So that we don't give the user our stack content */ |
memset (&ksym, 0, sizeof (ksym)); |
|
for (mod = module_list, i = 0; mod; mod = mod->next) { |
struct module_symbol *msym; |
unsigned int j; |
|
if (!MOD_CAN_QUERY(mod)) |
continue; |
|
/* magic: write module info as a pseudo symbol */ |
ksym.value = (unsigned long)mod; |
ksym.name[0] = '#'; |
strncpy(ksym.name+1, mod->name, sizeof(ksym.name)-1); |
ksym.name[sizeof(ksym.name)-1] = '\0'; |
|
if (copy_to_user(table, &ksym, sizeof(ksym)) != 0) |
goto out; |
++i, ++table; |
|
if (mod->nsyms == 0) |
continue; |
|
for (j = 0, msym = mod->syms; j < mod->nsyms; ++j, ++msym) { |
ksym.value = msym->value; |
strncpy(ksym.name, msym->name, sizeof(ksym.name)); |
ksym.name[sizeof(ksym.name)-1] = '\0'; |
|
if (copy_to_user(table, &ksym, sizeof(ksym)) != 0) |
goto out; |
++i, ++table; |
} |
} |
out: |
unlock_kernel(); |
return i; |
} |
|
/* |
* Look for a module by name, ignoring modules marked for deletion. |
*/ |
|
struct module * |
find_module(const char *name) |
{ |
struct module *mod; |
|
for (mod = module_list; mod ; mod = mod->next) { |
if (mod->flags & MOD_DELETED) |
continue; |
if (!strcmp(mod->name, name)) |
break; |
} |
|
return mod; |
} |
|
/* |
* Free the given module. |
*/ |
|
void |
free_module(struct module *mod, int tag_freed) |
{ |
struct module_ref *dep; |
unsigned i; |
unsigned long flags; |
|
/* Let the module clean up. */ |
|
if (mod->flags & MOD_RUNNING) |
{ |
if(mod->cleanup) |
mod->cleanup(); |
mod->flags &= ~MOD_RUNNING; |
} |
|
/* Remove the module from the dependency lists. */ |
|
for (i = 0, dep = mod->deps; i < mod->ndeps; ++i, ++dep) { |
struct module_ref **pp; |
for (pp = &dep->dep->refs; *pp != dep; pp = &(*pp)->next_ref) |
continue; |
*pp = dep->next_ref; |
if (tag_freed && dep->dep->refs == NULL) |
dep->dep->flags |= MOD_JUST_FREED; |
} |
|
/* And from the main module list. */ |
|
spin_lock_irqsave(&modlist_lock, flags); |
if (mod == module_list) { |
module_list = mod->next; |
} else { |
struct module *p; |
for (p = module_list; p->next != mod; p = p->next) |
continue; |
p->next = mod->next; |
} |
spin_unlock_irqrestore(&modlist_lock, flags); |
|
/* And free the memory. */ |
|
module_unmap(mod); |
} |
|
/* |
* Called by the /proc file system to return a current list of modules. |
*/ |
|
int get_module_list(char *p) |
{ |
size_t left = PAGE_SIZE; |
struct module *mod; |
char tmpstr[64]; |
struct module_ref *ref; |
|
for (mod = module_list; mod != &kernel_module; mod = mod->next) { |
long len; |
const char *q; |
|
#define safe_copy_str(str, len) \ |
do { \ |
if (left < len) \ |
goto fini; \ |
memcpy(p, str, len); p += len, left -= len; \ |
} while (0) |
#define safe_copy_cstr(str) safe_copy_str(str, sizeof(str)-1) |
|
len = strlen(mod->name); |
safe_copy_str(mod->name, len); |
|
if ((len = 20 - len) > 0) { |
if (left < len) |
goto fini; |
memset(p, ' ', len); |
p += len; |
left -= len; |
} |
|
len = sprintf(tmpstr, "%8lu", mod->size); |
safe_copy_str(tmpstr, len); |
|
if (mod->flags & MOD_RUNNING) { |
len = sprintf(tmpstr, "%4ld", |
(mod_member_present(mod, can_unload) |
&& mod->can_unload |
? -1L : (long)atomic_read(&mod->uc.usecount))); |
safe_copy_str(tmpstr, len); |
} |
|
if (mod->flags & MOD_DELETED) |
safe_copy_cstr(" (deleted)"); |
else if (mod->flags & MOD_RUNNING) { |
if (mod->flags & MOD_AUTOCLEAN) |
safe_copy_cstr(" (autoclean)"); |
if (!(mod->flags & MOD_USED_ONCE)) |
safe_copy_cstr(" (unused)"); |
} |
else if (mod->flags & MOD_INITIALIZING) |
safe_copy_cstr(" (initializing)"); |
else |
safe_copy_cstr(" (uninitialized)"); |
|
if ((ref = mod->refs) != NULL) { |
safe_copy_cstr(" ["); |
while (1) { |
q = ref->ref->name; |
len = strlen(q); |
safe_copy_str(q, len); |
|
if ((ref = ref->next_ref) != NULL) |
safe_copy_cstr(" "); |
else |
break; |
} |
safe_copy_cstr("]"); |
} |
safe_copy_cstr("\n"); |
|
#undef safe_copy_str |
#undef safe_copy_cstr |
} |
|
fini: |
return PAGE_SIZE - left; |
} |
|
/* |
* Called by the /proc file system to return a current list of ksyms. |
*/ |
|
struct mod_sym { |
struct module *mod; |
int index; |
}; |
|
/* iterator */ |
|
static void *s_start(struct seq_file *m, loff_t *pos) |
{ |
struct mod_sym *p = kmalloc(sizeof(*p), GFP_KERNEL); |
struct module *v; |
loff_t n = *pos; |
|
if (!p) |
return ERR_PTR(-ENOMEM); |
lock_kernel(); |
for (v = module_list, n = *pos; v; n -= v->nsyms, v = v->next) { |
if (n < v->nsyms) { |
p->mod = v; |
p->index = n; |
return p; |
} |
} |
unlock_kernel(); |
kfree(p); |
return NULL; |
} |
|
static void *s_next(struct seq_file *m, void *p, loff_t *pos) |
{ |
struct mod_sym *v = p; |
(*pos)++; |
if (++v->index >= v->mod->nsyms) { |
do { |
v->mod = v->mod->next; |
if (!v->mod) { |
unlock_kernel(); |
kfree(p); |
return NULL; |
} |
} while (!v->mod->nsyms); |
v->index = 0; |
} |
return p; |
} |
|
static void s_stop(struct seq_file *m, void *p) |
{ |
if (p && !IS_ERR(p)) { |
unlock_kernel(); |
kfree(p); |
} |
} |
|
static int s_show(struct seq_file *m, void *p) |
{ |
struct mod_sym *v = p; |
struct module_symbol *sym; |
|
if (!MOD_CAN_QUERY(v->mod)) |
return 0; |
sym = &v->mod->syms[v->index]; |
if (*v->mod->name) |
seq_printf(m, "%0*lx %s\t[%s]\n", (int)(2*sizeof(void*)), |
sym->value, sym->name, v->mod->name); |
else |
seq_printf(m, "%0*lx %s\n", (int)(2*sizeof(void*)), |
sym->value, sym->name); |
return 0; |
} |
|
struct seq_operations ksyms_op = { |
start: s_start, |
next: s_next, |
stop: s_stop, |
show: s_show |
}; |
|
#else /* CONFIG_MODULES */ |
|
/* Dummy syscalls for people who don't want modules */ |
|
asmlinkage unsigned long |
sys_create_module(const char *name_user, size_t size) |
{ |
return -ENOSYS; |
} |
|
asmlinkage long |
sys_init_module(const char *name_user, struct module *mod_user) |
{ |
return -ENOSYS; |
} |
|
asmlinkage long |
sys_delete_module(const char *name_user) |
{ |
return -ENOSYS; |
} |
|
asmlinkage long |
sys_query_module(const char *name_user, int which, char *buf, size_t bufsize, |
size_t *ret) |
{ |
/* Let the program know about the new interface. Not that |
it'll do them much good. */ |
if (which == 0) |
return 0; |
|
return -ENOSYS; |
} |
|
asmlinkage long |
sys_get_kernel_syms(struct kernel_sym *table) |
{ |
return -ENOSYS; |
} |
|
int try_inc_mod_count(struct module *mod) |
{ |
return 1; |
} |
|
#endif /* CONFIG_MODULES */ |
/sched.c
0,0 → 1,1397
/* |
* linux/kernel/sched.c |
* |
* Kernel scheduler and related syscalls |
* |
* Copyright (C) 1991, 1992 Linus Torvalds |
* |
* 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and |
* make semaphores SMP safe |
* 1998-11-19 Implemented schedule_timeout() and related stuff |
* by Andrea Arcangeli |
* 1998-12-28 Implemented better SMP scheduling by Ingo Molnar |
*/ |
|
/* |
* 'sched.c' is the main kernel file. It contains scheduling primitives |
* (sleep_on, wakeup, schedule etc) as well as a number of simple system |
* call functions (type getpid()), which just extract a field from |
* current-task |
*/ |
|
#include <linux/config.h> |
#include <linux/mm.h> |
#include <linux/init.h> |
#include <linux/smp_lock.h> |
#include <linux/nmi.h> |
#include <linux/interrupt.h> |
#include <linux/kernel_stat.h> |
#include <linux/completion.h> |
#include <linux/prefetch.h> |
#include <linux/compiler.h> |
|
#include <asm/uaccess.h> |
#include <asm/mmu_context.h> |
|
extern void timer_bh(void); |
extern void tqueue_bh(void); |
extern void immediate_bh(void); |
|
/* |
* scheduler variables |
*/ |
|
unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */ |
|
extern void mem_use(void); |
|
/* |
* Scheduling quanta. |
* |
* NOTE! The unix "nice" value influences how long a process |
* gets. The nice value ranges from -20 to +19, where a -20 |
* is a "high-priority" task, and a "+10" is a low-priority |
* task. |
* |
* We want the time-slice to be around 50ms or so, so this |
* calculation depends on the value of HZ. |
*/ |
#if HZ < 200 |
#define TICK_SCALE(x) ((x) >> 2) |
#elif HZ < 400 |
#define TICK_SCALE(x) ((x) >> 1) |
#elif HZ < 800 |
#define TICK_SCALE(x) (x) |
#elif HZ < 1600 |
#define TICK_SCALE(x) ((x) << 1) |
#else |
#define TICK_SCALE(x) ((x) << 2) |
#endif |
|
#define NICE_TO_TICKS(nice) (TICK_SCALE(20-(nice))+1) |
|
|
/* |
* Init task must be ok at boot for the ix86 as we will check its signals |
* via the SMP irq return path. |
*/ |
|
struct task_struct * init_tasks[NR_CPUS] = {&init_task, }; |
|
/* |
* The tasklist_lock protects the linked list of processes. |
* |
* The runqueue_lock locks the parts that actually access |
* and change the run-queues, and have to be interrupt-safe. |
* |
* If both locks are to be concurrently held, the runqueue_lock |
* nests inside the tasklist_lock. |
* |
* task->alloc_lock nests inside tasklist_lock. |
*/ |
spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; /* inner */ |
rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */ |
|
static LIST_HEAD(runqueue_head); |
|
/* |
* We align per-CPU scheduling data on cacheline boundaries, |
* to prevent cacheline ping-pong. |
*/ |
static union { |
struct schedule_data { |
struct task_struct * curr; |
cycles_t last_schedule; |
} schedule_data; |
char __pad [SMP_CACHE_BYTES]; |
} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}}; |
|
#define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr |
#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule |
|
struct kernel_stat kstat; |
extern struct task_struct *child_reaper; |
|
#ifdef CONFIG_SMP |
|
#define idle_task(cpu) (init_tasks[cpu_number_map(cpu)]) |
#define can_schedule(p,cpu) \ |
((p)->cpus_runnable & (p)->cpus_allowed & (1UL << cpu)) |
|
#else |
|
#define idle_task(cpu) (&init_task) |
#define can_schedule(p,cpu) (1) |
|
#endif |
|
void scheduling_functions_start_here(void) { } |
|
/* |
* This is the function that decides how desirable a process is.. |
* You can weigh different processes against each other depending |
* on what CPU they've run on lately etc to try to handle cache |
* and TLB miss penalties. |
* |
* Return values: |
* -1000: never select this |
* 0: out of time, recalculate counters (but it might still be |
* selected) |
* +ve: "goodness" value (the larger, the better) |
* +1000: realtime process, select this. |
*/ |
|
static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm) |
{ |
int weight; |
|
/* |
* select the current process after every other |
* runnable process, but before the idle thread. |
* Also, dont trigger a counter recalculation. |
*/ |
weight = -1; |
if (p->policy & SCHED_YIELD) |
goto out; |
|
/* |
* Non-RT process - normal case first. |
*/ |
if (p->policy == SCHED_OTHER) { |
/* |
* Give the process a first-approximation goodness value |
* according to the number of clock-ticks it has left. |
* |
* Don't do any other calculations if the time slice is |
* over.. |
*/ |
weight = p->counter; |
if (!weight) |
goto out; |
|
#ifdef CONFIG_SMP |
/* Give a largish advantage to the same processor... */ |
/* (this is equivalent to penalizing other processors) */ |
if (p->processor == this_cpu) |
weight += PROC_CHANGE_PENALTY; |
#endif |
|
/* .. and a slight advantage to the current MM */ |
if (p->mm == this_mm || !p->mm) |
weight += 1; |
weight += 20 - p->nice; |
goto out; |
} |
|
/* |
* Realtime process, select the first one on the |
* runqueue (taking priorities within processes |
* into account). |
*/ |
weight = 1000 + p->rt_priority; |
out: |
return weight; |
} |
|
/* |
* the 'goodness value' of replacing a process on a given CPU. |
* positive value means 'replace', zero or negative means 'dont'. |
*/ |
static inline int preemption_goodness(struct task_struct * prev, struct task_struct * p, int cpu) |
{ |
return goodness(p, cpu, prev->active_mm) - goodness(prev, cpu, prev->active_mm); |
} |
|
/* |
* This is ugly, but reschedule_idle() is very timing-critical. |
* We are called with the runqueue spinlock held and we must |
* not claim the tasklist_lock. |
*/ |
static FASTCALL(void reschedule_idle(struct task_struct * p)); |
|
static void reschedule_idle(struct task_struct * p) |
{ |
#ifdef CONFIG_SMP |
int this_cpu = smp_processor_id(); |
struct task_struct *tsk, *target_tsk; |
int cpu, best_cpu, i, max_prio; |
cycles_t oldest_idle; |
|
/* |
* shortcut if the woken up task's last CPU is |
* idle now. |
*/ |
best_cpu = p->processor; |
if (can_schedule(p, best_cpu)) { |
tsk = idle_task(best_cpu); |
if (cpu_curr(best_cpu) == tsk) { |
int need_resched; |
send_now_idle: |
/* |
* If need_resched == -1 then we can skip sending |
* the IPI altogether, tsk->need_resched is |
* actively watched by the idle thread. |
*/ |
need_resched = tsk->need_resched; |
tsk->need_resched = 1; |
if ((best_cpu != this_cpu) && !need_resched) |
smp_send_reschedule(best_cpu); |
return; |
} |
} |
|
/* |
* We know that the preferred CPU has a cache-affine current |
* process, lets try to find a new idle CPU for the woken-up |
* process. Select the least recently active idle CPU. (that |
* one will have the least active cache context.) Also find |
* the executing process which has the least priority. |
*/ |
oldest_idle = (cycles_t) -1; |
target_tsk = NULL; |
max_prio = 0; |
|
for (i = 0; i < smp_num_cpus; i++) { |
cpu = cpu_logical_map(i); |
if (!can_schedule(p, cpu)) |
continue; |
tsk = cpu_curr(cpu); |
/* |
* We use the first available idle CPU. This creates |
* a priority list between idle CPUs, but this is not |
* a problem. |
*/ |
if (tsk == idle_task(cpu)) { |
#if defined(__i386__) && defined(CONFIG_SMP) |
/* |
* Check if two siblings are idle in the same |
* physical package. Use them if found. |
*/ |
if (smp_num_siblings == 2) { |
if (cpu_curr(cpu_sibling_map[cpu]) == |
idle_task(cpu_sibling_map[cpu])) { |
oldest_idle = last_schedule(cpu); |
target_tsk = tsk; |
break; |
} |
|
} |
#endif |
if (last_schedule(cpu) < oldest_idle) { |
oldest_idle = last_schedule(cpu); |
target_tsk = tsk; |
} |
} else { |
if (oldest_idle == (cycles_t)-1) { |
int prio = preemption_goodness(tsk, p, cpu); |
|
if (prio > max_prio) { |
max_prio = prio; |
target_tsk = tsk; |
} |
} |
} |
} |
tsk = target_tsk; |
if (tsk) { |
if (oldest_idle != (cycles_t)-1) { |
best_cpu = tsk->processor; |
goto send_now_idle; |
} |
tsk->need_resched = 1; |
if (tsk->processor != this_cpu) |
smp_send_reschedule(tsk->processor); |
} |
return; |
|
|
#else /* UP */ |
int this_cpu = smp_processor_id(); |
struct task_struct *tsk; |
|
tsk = cpu_curr(this_cpu); |
if (preemption_goodness(tsk, p, this_cpu) > 0) |
tsk->need_resched = 1; |
#endif |
} |
|
/* |
* Careful! |
* |
* This has to add the process to the _end_ of the |
* run-queue, not the beginning. The goodness value will |
* determine whether this process will run next. This is |
* important to get SCHED_FIFO and SCHED_RR right, where |
* a process that is either pre-empted or its time slice |
* has expired, should be moved to the tail of the run |
* queue for its priority - Bhavesh Davda |
*/ |
static inline void add_to_runqueue(struct task_struct * p) |
{ |
list_add_tail(&p->run_list, &runqueue_head); |
nr_running++; |
} |
|
static inline void move_last_runqueue(struct task_struct * p) |
{ |
list_del(&p->run_list); |
list_add_tail(&p->run_list, &runqueue_head); |
} |
|
/* |
* Wake up a process. Put it on the run-queue if it's not |
* already there. The "current" process is always on the |
* run-queue (except when the actual re-schedule is in |
* progress), and as such you're allowed to do the simpler |
* "current->state = TASK_RUNNING" to mark yourself runnable |
* without the overhead of this. |
*/ |
static inline int try_to_wake_up(struct task_struct * p, int synchronous) |
{ |
unsigned long flags; |
int success = 0; |
|
/* |
* We want the common case fall through straight, thus the goto. |
*/ |
spin_lock_irqsave(&runqueue_lock, flags); |
p->state = TASK_RUNNING; |
if (task_on_runqueue(p)) |
goto out; |
add_to_runqueue(p); |
if (!synchronous || !(p->cpus_allowed & (1UL << smp_processor_id()))) |
reschedule_idle(p); |
success = 1; |
out: |
spin_unlock_irqrestore(&runqueue_lock, flags); |
return success; |
} |
|
inline int wake_up_process(struct task_struct * p) |
{ |
return try_to_wake_up(p, 0); |
} |
|
static void process_timeout(unsigned long __data) |
{ |
struct task_struct * p = (struct task_struct *) __data; |
|
wake_up_process(p); |
} |
|
/** |
* schedule_timeout - sleep until timeout |
* @timeout: timeout value in jiffies |
* |
* Make the current task sleep until @timeout jiffies have |
* elapsed. The routine will return immediately unless |
* the current task state has been set (see set_current_state()). |
* |
* You can set the task state as follows - |
* |
* %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to |
* pass before the routine returns. The routine will return 0 |
* |
* %TASK_INTERRUPTIBLE - the routine may return early if a signal is |
* delivered to the current task. In this case the remaining time |
* in jiffies will be returned, or 0 if the timer expired in time |
* |
* The current task state is guaranteed to be TASK_RUNNING when this |
* routine returns. |
* |
* Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule |
* the CPU away without a bound on the timeout. In this case the return |
* value will be %MAX_SCHEDULE_TIMEOUT. |
* |
* In all cases the return value is guaranteed to be non-negative. |
*/ |
signed long schedule_timeout(signed long timeout) |
{ |
struct timer_list timer; |
unsigned long expire; |
|
switch (timeout) |
{ |
case MAX_SCHEDULE_TIMEOUT: |
/* |
* These two special cases are useful to be comfortable |
* in the caller. Nothing more. We could take |
* MAX_SCHEDULE_TIMEOUT from one of the negative value |
* but I' d like to return a valid offset (>=0) to allow |
* the caller to do everything it want with the retval. |
*/ |
schedule(); |
goto out; |
default: |
/* |
* Another bit of PARANOID. Note that the retval will be |
* 0 since no piece of kernel is supposed to do a check |
* for a negative retval of schedule_timeout() (since it |
* should never happens anyway). You just have the printk() |
* that will tell you if something is gone wrong and where. |
*/ |
if (timeout < 0) |
{ |
printk(KERN_ERR "schedule_timeout: wrong timeout " |
"value %lx from %p\n", timeout, |
__builtin_return_address(0)); |
current->state = TASK_RUNNING; |
goto out; |
} |
} |
|
expire = timeout + jiffies; |
|
init_timer(&timer); |
timer.expires = expire; |
timer.data = (unsigned long) current; |
timer.function = process_timeout; |
|
add_timer(&timer); |
schedule(); |
del_timer_sync(&timer); |
|
timeout = expire - jiffies; |
|
out: |
return timeout < 0 ? 0 : timeout; |
} |
|
/* |
* schedule_tail() is getting called from the fork return path. This |
* cleans up all remaining scheduler things, without impacting the |
* common case. |
*/ |
static inline void __schedule_tail(struct task_struct *prev) |
{ |
#ifdef CONFIG_SMP |
int policy; |
|
/* |
* prev->policy can be written from here only before `prev' |
* can be scheduled (before setting prev->cpus_runnable to ~0UL). |
* Of course it must also be read before allowing prev |
* to be rescheduled, but since the write depends on the read |
* to complete, wmb() is enough. (the spin_lock() acquired |
* before setting cpus_runnable is not enough because the spin_lock() |
* common code semantics allows code outside the critical section |
* to enter inside the critical section) |
*/ |
policy = prev->policy; |
prev->policy = policy & ~SCHED_YIELD; |
wmb(); |
|
/* |
* fast path falls through. We have to clear cpus_runnable before |
* checking prev->state to avoid a wakeup race. Protect against |
* the task exiting early. |
*/ |
task_lock(prev); |
task_release_cpu(prev); |
mb(); |
if (prev->state == TASK_RUNNING) |
goto needs_resched; |
|
out_unlock: |
task_unlock(prev); /* Synchronise here with release_task() if prev is TASK_ZOMBIE */ |
return; |
|
/* |
* Slow path - we 'push' the previous process and |
* reschedule_idle() will attempt to find a new |
* processor for it. (but it might preempt the |
* current process as well.) We must take the runqueue |
* lock and re-check prev->state to be correct. It might |
* still happen that this process has a preemption |
* 'in progress' already - but this is not a problem and |
* might happen in other circumstances as well. |
*/ |
needs_resched: |
{ |
unsigned long flags; |
|
/* |
* Avoid taking the runqueue lock in cases where |
* no preemption-check is necessery: |
*/ |
if ((prev == idle_task(smp_processor_id())) || |
(policy & SCHED_YIELD)) |
goto out_unlock; |
|
spin_lock_irqsave(&runqueue_lock, flags); |
if ((prev->state == TASK_RUNNING) && !task_has_cpu(prev)) |
reschedule_idle(prev); |
spin_unlock_irqrestore(&runqueue_lock, flags); |
goto out_unlock; |
} |
#else |
prev->policy &= ~SCHED_YIELD; |
#endif /* CONFIG_SMP */ |
} |
|
asmlinkage void schedule_tail(struct task_struct *prev) |
{ |
__schedule_tail(prev); |
} |
|
/* |
* 'schedule()' is the scheduler function. It's a very simple and nice |
* scheduler: it's not perfect, but certainly works for most things. |
* |
* The goto is "interesting". |
* |
* NOTE!! Task 0 is the 'idle' task, which gets called when no other |
* tasks can run. It can not be killed, and it cannot sleep. The 'state' |
* information in task[0] is never used. |
*/ |
asmlinkage void schedule(void) |
{ |
struct schedule_data * sched_data; |
struct task_struct *prev, *next, *p; |
struct list_head *tmp; |
int this_cpu, c; |
|
|
spin_lock_prefetch(&runqueue_lock); |
|
BUG_ON(!current->active_mm); |
need_resched_back: |
prev = current; |
this_cpu = prev->processor; |
|
if (unlikely(in_interrupt())) { |
printk("Scheduling in interrupt\n"); |
BUG(); |
} |
|
release_kernel_lock(prev, this_cpu); |
|
/* |
* 'sched_data' is protected by the fact that we can run |
* only one process per CPU. |
*/ |
sched_data = & aligned_data[this_cpu].schedule_data; |
|
spin_lock_irq(&runqueue_lock); |
|
/* move an exhausted RR process to be last.. */ |
if (unlikely(prev->policy == SCHED_RR)) |
if (!prev->counter) { |
prev->counter = NICE_TO_TICKS(prev->nice); |
move_last_runqueue(prev); |
} |
|
switch (prev->state) { |
case TASK_INTERRUPTIBLE: |
if (signal_pending(prev)) { |
prev->state = TASK_RUNNING; |
break; |
} |
default: |
del_from_runqueue(prev); |
case TASK_RUNNING:; |
} |
prev->need_resched = 0; |
|
/* |
* this is the scheduler proper: |
*/ |
|
repeat_schedule: |
/* |
* Default process to select.. |
*/ |
next = idle_task(this_cpu); |
c = -1000; |
list_for_each(tmp, &runqueue_head) { |
p = list_entry(tmp, struct task_struct, run_list); |
if (can_schedule(p, this_cpu)) { |
int weight = goodness(p, this_cpu, prev->active_mm); |
if (weight > c) |
c = weight, next = p; |
} |
} |
|
/* Do we need to re-calculate counters? */ |
if (unlikely(!c)) { |
struct task_struct *p; |
|
spin_unlock_irq(&runqueue_lock); |
read_lock(&tasklist_lock); |
for_each_task(p) |
p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice); |
read_unlock(&tasklist_lock); |
spin_lock_irq(&runqueue_lock); |
goto repeat_schedule; |
} |
|
/* |
* from this point on nothing can prevent us from |
* switching to the next task, save this fact in |
* sched_data. |
*/ |
sched_data->curr = next; |
task_set_cpu(next, this_cpu); |
spin_unlock_irq(&runqueue_lock); |
|
if (unlikely(prev == next)) { |
/* We won't go through the normal tail, so do this by hand */ |
prev->policy &= ~SCHED_YIELD; |
goto same_process; |
} |
|
#ifdef CONFIG_SMP |
/* |
* maintain the per-process 'last schedule' value. |
* (this has to be recalculated even if we reschedule to |
* the same process) Currently this is only used on SMP, |
* and it's approximate, so we do not have to maintain |
* it while holding the runqueue spinlock. |
*/ |
sched_data->last_schedule = get_cycles(); |
|
/* |
* We drop the scheduler lock early (it's a global spinlock), |
* thus we have to lock the previous process from getting |
* rescheduled during switch_to(). |
*/ |
|
#endif /* CONFIG_SMP */ |
|
kstat.context_swtch++; |
/* |
* there are 3 processes which are affected by a context switch: |
* |
* prev == .... ==> (last => next) |
* |
* It's the 'much more previous' 'prev' that is on next's stack, |
* but prev is set to (the just run) 'last' process by switch_to(). |
* This might sound slightly confusing but makes tons of sense. |
*/ |
prepare_to_switch(); |
{ |
struct mm_struct *mm = next->mm; |
struct mm_struct *oldmm = prev->active_mm; |
if (!mm) { |
BUG_ON(next->active_mm); |
next->active_mm = oldmm; |
atomic_inc(&oldmm->mm_count); |
enter_lazy_tlb(oldmm, next, this_cpu); |
} else { |
BUG_ON(next->active_mm != mm); |
switch_mm(oldmm, mm, next, this_cpu); |
} |
|
if (!prev->mm) { |
prev->active_mm = NULL; |
mmdrop(oldmm); |
} |
} |
|
/* |
* This just switches the register state and the |
* stack. |
*/ |
switch_to(prev, next, prev); |
__schedule_tail(prev); |
|
same_process: |
reacquire_kernel_lock(current); |
if (current->need_resched) |
goto need_resched_back; |
return; |
} |
|
/* |
* The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just wake everything |
* up. If it's an exclusive wakeup (nr_exclusive == small +ve number) then we wake all the |
* non-exclusive tasks and one exclusive task. |
* |
* There are circumstances in which we can try to wake a task which has already |
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns zero |
* in this (rare) case, and we handle it by contonuing to scan the queue. |
*/ |
static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode, |
int nr_exclusive, const int sync) |
{ |
struct list_head *tmp; |
struct task_struct *p; |
|
CHECK_MAGIC_WQHEAD(q); |
WQ_CHECK_LIST_HEAD(&q->task_list); |
|
list_for_each(tmp,&q->task_list) { |
unsigned int state; |
wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list); |
|
CHECK_MAGIC(curr->__magic); |
p = curr->task; |
state = p->state; |
if (state & mode) { |
WQ_NOTE_WAKER(curr); |
if (try_to_wake_up(p, sync) && (curr->flags&WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) |
break; |
} |
} |
} |
|
void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr) |
{ |
if (q) { |
unsigned long flags; |
wq_read_lock_irqsave(&q->lock, flags); |
__wake_up_common(q, mode, nr, 0); |
wq_read_unlock_irqrestore(&q->lock, flags); |
} |
} |
|
void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr) |
{ |
if (q) { |
unsigned long flags; |
wq_read_lock_irqsave(&q->lock, flags); |
__wake_up_common(q, mode, nr, 1); |
wq_read_unlock_irqrestore(&q->lock, flags); |
} |
} |
|
void complete(struct completion *x) |
{ |
unsigned long flags; |
|
spin_lock_irqsave(&x->wait.lock, flags); |
x->done++; |
__wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, 0); |
spin_unlock_irqrestore(&x->wait.lock, flags); |
} |
|
void wait_for_completion(struct completion *x) |
{ |
spin_lock_irq(&x->wait.lock); |
if (!x->done) { |
DECLARE_WAITQUEUE(wait, current); |
|
wait.flags |= WQ_FLAG_EXCLUSIVE; |
__add_wait_queue_tail(&x->wait, &wait); |
do { |
__set_current_state(TASK_UNINTERRUPTIBLE); |
spin_unlock_irq(&x->wait.lock); |
schedule(); |
spin_lock_irq(&x->wait.lock); |
} while (!x->done); |
__remove_wait_queue(&x->wait, &wait); |
} |
x->done--; |
spin_unlock_irq(&x->wait.lock); |
} |
|
#define SLEEP_ON_VAR \ |
unsigned long flags; \ |
wait_queue_t wait; \ |
init_waitqueue_entry(&wait, current); |
|
#define SLEEP_ON_HEAD \ |
wq_write_lock_irqsave(&q->lock,flags); \ |
__add_wait_queue(q, &wait); \ |
wq_write_unlock(&q->lock); |
|
#define SLEEP_ON_TAIL \ |
wq_write_lock_irq(&q->lock); \ |
__remove_wait_queue(q, &wait); \ |
wq_write_unlock_irqrestore(&q->lock,flags); |
|
void interruptible_sleep_on(wait_queue_head_t *q) |
{ |
SLEEP_ON_VAR |
|
current->state = TASK_INTERRUPTIBLE; |
|
SLEEP_ON_HEAD |
schedule(); |
SLEEP_ON_TAIL |
} |
|
long interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout) |
{ |
SLEEP_ON_VAR |
|
current->state = TASK_INTERRUPTIBLE; |
|
SLEEP_ON_HEAD |
timeout = schedule_timeout(timeout); |
SLEEP_ON_TAIL |
|
return timeout; |
} |
|
void sleep_on(wait_queue_head_t *q) |
{ |
SLEEP_ON_VAR |
|
current->state = TASK_UNINTERRUPTIBLE; |
|
SLEEP_ON_HEAD |
schedule(); |
SLEEP_ON_TAIL |
} |
|
long sleep_on_timeout(wait_queue_head_t *q, long timeout) |
{ |
SLEEP_ON_VAR |
|
current->state = TASK_UNINTERRUPTIBLE; |
|
SLEEP_ON_HEAD |
timeout = schedule_timeout(timeout); |
SLEEP_ON_TAIL |
|
return timeout; |
} |
|
void scheduling_functions_end_here(void) { } |
|
#if CONFIG_SMP |
/** |
* set_cpus_allowed() - change a given task's processor affinity |
* @p: task to bind |
* @new_mask: bitmask of allowed processors |
* |
* Upon return, the task is running on a legal processor. Note the caller |
* must have a valid reference to the task: it must not exit() prematurely. |
* This call can sleep; do not hold locks on call. |
*/ |
void set_cpus_allowed(struct task_struct *p, unsigned long new_mask) |
{ |
new_mask &= cpu_online_map; |
BUG_ON(!new_mask); |
|
p->cpus_allowed = new_mask; |
|
/* |
* If the task is on a no-longer-allowed processor, we need to move |
* it. If the task is not current, then set need_resched and send |
* its processor an IPI to reschedule. |
*/ |
if (!(p->cpus_runnable & p->cpus_allowed)) { |
if (p != current) { |
p->need_resched = 1; |
smp_send_reschedule(p->processor); |
} |
/* |
* Wait until we are on a legal processor. If the task is |
* current, then we should be on a legal processor the next |
* time we reschedule. Otherwise, we need to wait for the IPI. |
*/ |
while (!(p->cpus_runnable & p->cpus_allowed)) |
schedule(); |
} |
} |
#endif /* CONFIG_SMP */ |
|
#ifndef __alpha__ |
|
/* |
* This has been replaced by sys_setpriority. Maybe it should be |
* moved into the arch dependent tree for those ports that require |
* it for backward compatibility? |
*/ |
|
asmlinkage long sys_nice(int increment) |
{ |
long newprio; |
|
/* |
* Setpriority might change our priority at the same moment. |
* We don't have to worry. Conceptually one call occurs first |
* and we have a single winner. |
*/ |
if (increment < 0) { |
if (!capable(CAP_SYS_NICE)) |
return -EPERM; |
if (increment < -40) |
increment = -40; |
} |
if (increment > 40) |
increment = 40; |
|
newprio = current->nice + increment; |
if (newprio < -20) |
newprio = -20; |
if (newprio > 19) |
newprio = 19; |
current->nice = newprio; |
return 0; |
} |
|
#endif |
|
static inline struct task_struct *find_process_by_pid(pid_t pid) |
{ |
struct task_struct *tsk = current; |
|
if (pid) |
tsk = find_task_by_pid(pid); |
return tsk; |
} |
|
static int setscheduler(pid_t pid, int policy, |
struct sched_param *param) |
{ |
struct sched_param lp; |
struct task_struct *p; |
int retval; |
|
retval = -EINVAL; |
if (!param || pid < 0) |
goto out_nounlock; |
|
retval = -EFAULT; |
if (copy_from_user(&lp, param, sizeof(struct sched_param))) |
goto out_nounlock; |
|
/* |
* We play safe to avoid deadlocks. |
*/ |
read_lock_irq(&tasklist_lock); |
spin_lock(&runqueue_lock); |
|
p = find_process_by_pid(pid); |
|
retval = -ESRCH; |
if (!p) |
goto out_unlock; |
|
if (policy < 0) |
policy = p->policy; |
else { |
retval = -EINVAL; |
if (policy != SCHED_FIFO && policy != SCHED_RR && |
policy != SCHED_OTHER) |
goto out_unlock; |
} |
|
/* |
* Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid |
* priority for SCHED_OTHER is 0. |
*/ |
retval = -EINVAL; |
if (lp.sched_priority < 0 || lp.sched_priority > 99) |
goto out_unlock; |
if ((policy == SCHED_OTHER) != (lp.sched_priority == 0)) |
goto out_unlock; |
|
retval = -EPERM; |
if ((policy == SCHED_FIFO || policy == SCHED_RR) && |
!capable(CAP_SYS_NICE)) |
goto out_unlock; |
if ((current->euid != p->euid) && (current->euid != p->uid) && |
!capable(CAP_SYS_NICE)) |
goto out_unlock; |
|
retval = 0; |
p->policy = policy; |
p->rt_priority = lp.sched_priority; |
|
current->need_resched = 1; |
|
out_unlock: |
spin_unlock(&runqueue_lock); |
read_unlock_irq(&tasklist_lock); |
|
out_nounlock: |
return retval; |
} |
|
asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, |
struct sched_param *param) |
{ |
return setscheduler(pid, policy, param); |
} |
|
asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param *param) |
{ |
return setscheduler(pid, -1, param); |
} |
|
asmlinkage long sys_sched_getscheduler(pid_t pid) |
{ |
struct task_struct *p; |
int retval; |
|
retval = -EINVAL; |
if (pid < 0) |
goto out_nounlock; |
|
retval = -ESRCH; |
read_lock(&tasklist_lock); |
p = find_process_by_pid(pid); |
if (p) |
retval = p->policy & ~SCHED_YIELD; |
read_unlock(&tasklist_lock); |
|
out_nounlock: |
return retval; |
} |
|
asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param) |
{ |
struct task_struct *p; |
struct sched_param lp; |
int retval; |
|
retval = -EINVAL; |
if (!param || pid < 0) |
goto out_nounlock; |
|
read_lock(&tasklist_lock); |
p = find_process_by_pid(pid); |
retval = -ESRCH; |
if (!p) |
goto out_unlock; |
lp.sched_priority = p->rt_priority; |
read_unlock(&tasklist_lock); |
|
/* |
* This one might sleep, we cannot do it with a spinlock held ... |
*/ |
retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0; |
|
out_nounlock: |
return retval; |
|
out_unlock: |
read_unlock(&tasklist_lock); |
return retval; |
} |
|
asmlinkage long sys_sched_yield(void) |
{ |
/* |
* Trick. sched_yield() first counts the number of truly |
* 'pending' runnable processes, then returns if it's |
* only the current processes. (This test does not have |
* to be atomic.) In threaded applications this optimization |
* gets triggered quite often. |
*/ |
|
int nr_pending = nr_running; |
|
#if CONFIG_SMP |
int i; |
|
// Subtract non-idle processes running on other CPUs. |
for (i = 0; i < smp_num_cpus; i++) { |
int cpu = cpu_logical_map(i); |
if (aligned_data[cpu].schedule_data.curr != idle_task(cpu)) |
nr_pending--; |
} |
#else |
// on UP this process is on the runqueue as well |
nr_pending--; |
#endif |
if (nr_pending) { |
/* |
* This process can only be rescheduled by us, |
* so this is safe without any locking. |
*/ |
if (current->policy == SCHED_OTHER) |
current->policy |= SCHED_YIELD; |
current->need_resched = 1; |
|
spin_lock_irq(&runqueue_lock); |
move_last_runqueue(current); |
spin_unlock_irq(&runqueue_lock); |
} |
return 0; |
} |
|
/** |
* yield - yield the current processor to other threads. |
* |
* this is a shortcut for kernel-space yielding - it marks the |
* thread runnable and calls sys_sched_yield(). |
*/ |
void yield(void) |
{ |
set_current_state(TASK_RUNNING); |
sys_sched_yield(); |
schedule(); |
} |
|
void __cond_resched(void) |
{ |
set_current_state(TASK_RUNNING); |
schedule(); |
} |
|
asmlinkage long sys_sched_get_priority_max(int policy) |
{ |
int ret = -EINVAL; |
|
switch (policy) { |
case SCHED_FIFO: |
case SCHED_RR: |
ret = 99; |
break; |
case SCHED_OTHER: |
ret = 0; |
break; |
} |
return ret; |
} |
|
asmlinkage long sys_sched_get_priority_min(int policy) |
{ |
int ret = -EINVAL; |
|
switch (policy) { |
case SCHED_FIFO: |
case SCHED_RR: |
ret = 1; |
break; |
case SCHED_OTHER: |
ret = 0; |
} |
return ret; |
} |
|
asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec *interval) |
{ |
struct timespec t; |
struct task_struct *p; |
int retval = -EINVAL; |
|
if (pid < 0) |
goto out_nounlock; |
|
retval = -ESRCH; |
read_lock(&tasklist_lock); |
p = find_process_by_pid(pid); |
if (p) |
jiffies_to_timespec(p->policy & SCHED_FIFO ? 0 : NICE_TO_TICKS(p->nice), |
&t); |
read_unlock(&tasklist_lock); |
if (p) |
retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; |
out_nounlock: |
return retval; |
} |
|
static void show_task(struct task_struct * p) |
{ |
unsigned long free = 0; |
int state; |
static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" }; |
|
printk("%-13.13s ", p->comm); |
state = p->state ? ffz(~p->state) + 1 : 0; |
if (((unsigned) state) < sizeof(stat_nam)/sizeof(char *)) |
printk(stat_nam[state]); |
else |
printk(" "); |
#if (BITS_PER_LONG == 32) |
if (p == current) |
printk(" current "); |
else |
printk(" %08lX ", thread_saved_pc(&p->thread)); |
#else |
if (p == current) |
printk(" current task "); |
else |
printk(" %016lx ", thread_saved_pc(&p->thread)); |
#endif |
{ |
unsigned long * n = (unsigned long *) (p+1); |
while (!*n) |
n++; |
free = (unsigned long) n - (unsigned long)(p+1); |
} |
printk("%5lu %5d %6d ", free, p->pid, p->p_pptr->pid); |
if (p->p_cptr) |
printk("%5d ", p->p_cptr->pid); |
else |
printk(" "); |
if (p->p_ysptr) |
printk("%7d", p->p_ysptr->pid); |
else |
printk(" "); |
if (p->p_osptr) |
printk(" %5d", p->p_osptr->pid); |
else |
printk(" "); |
if (!p->mm) |
printk(" (L-TLB)\n"); |
else |
printk(" (NOTLB)\n"); |
|
{ |
extern void show_trace_task(struct task_struct *tsk); |
show_trace_task(p); |
} |
} |
|
char * render_sigset_t(sigset_t *set, char *buffer) |
{ |
int i = _NSIG, x; |
do { |
i -= 4, x = 0; |
if (sigismember(set, i+1)) x |= 1; |
if (sigismember(set, i+2)) x |= 2; |
if (sigismember(set, i+3)) x |= 4; |
if (sigismember(set, i+4)) x |= 8; |
*buffer++ = (x < 10 ? '0' : 'a' - 10) + x; |
} while (i >= 4); |
*buffer = 0; |
return buffer; |
} |
|
void show_state(void) |
{ |
struct task_struct *p; |
|
#if (BITS_PER_LONG == 32) |
printk("\n" |
" free sibling\n"); |
printk(" task PC stack pid father child younger older\n"); |
#else |
printk("\n" |
" free sibling\n"); |
printk(" task PC stack pid father child younger older\n"); |
#endif |
read_lock(&tasklist_lock); |
for_each_task(p) { |
/* |
* reset the NMI-timeout, listing all files on a slow |
* console might take alot of time: |
*/ |
touch_nmi_watchdog(); |
show_task(p); |
} |
read_unlock(&tasklist_lock); |
} |
|
/** |
* reparent_to_init() - Reparent the calling kernel thread to the init task. |
* |
* If a kernel thread is launched as a result of a system call, or if |
* it ever exits, it should generally reparent itself to init so that |
* it is correctly cleaned up on exit. |
* |
* The various task state such as scheduling policy and priority may have |
* been inherited fro a user process, so we reset them to sane values here. |
* |
* NOTE that reparent_to_init() gives the caller full capabilities. |
*/ |
void reparent_to_init(void) |
{ |
struct task_struct *this_task = current; |
|
write_lock_irq(&tasklist_lock); |
|
/* Reparent to init */ |
REMOVE_LINKS(this_task); |
this_task->p_pptr = child_reaper; |
this_task->p_opptr = child_reaper; |
SET_LINKS(this_task); |
|
/* Set the exit signal to SIGCHLD so we signal init on exit */ |
this_task->exit_signal = SIGCHLD; |
|
/* We also take the runqueue_lock while altering task fields |
* which affect scheduling decisions */ |
spin_lock(&runqueue_lock); |
|
this_task->ptrace = 0; |
this_task->nice = DEF_NICE; |
this_task->policy = SCHED_OTHER; |
/* cpus_allowed? */ |
/* rt_priority? */ |
/* signals? */ |
this_task->cap_effective = CAP_INIT_EFF_SET; |
this_task->cap_inheritable = CAP_INIT_INH_SET; |
this_task->cap_permitted = CAP_FULL_SET; |
this_task->keep_capabilities = 0; |
memcpy(this_task->rlim, init_task.rlim, sizeof(*(this_task->rlim))); |
switch_uid(INIT_USER); |
|
spin_unlock(&runqueue_lock); |
write_unlock_irq(&tasklist_lock); |
} |
|
/* |
* Put all the gunge required to become a kernel thread without |
* attached user resources in one place where it belongs. |
*/ |
|
void daemonize(void) |
{ |
struct fs_struct *fs; |
|
|
/* |
* If we were started as result of loading a module, close all of the |
* user space pages. We don't need them, and if we didn't close them |
* they would be locked into memory. |
*/ |
exit_mm(current); |
|
current->session = 1; |
current->pgrp = 1; |
current->tty = NULL; |
|
/* Become as one with the init task */ |
|
exit_fs(current); /* current->fs->count--; */ |
fs = init_task.fs; |
current->fs = fs; |
atomic_inc(&fs->count); |
exit_files(current); |
current->files = init_task.files; |
atomic_inc(¤t->files->count); |
} |
|
extern unsigned long wait_init_idle; |
|
void __init init_idle(void) |
{ |
struct schedule_data * sched_data; |
sched_data = &aligned_data[smp_processor_id()].schedule_data; |
|
if (current != &init_task && task_on_runqueue(current)) { |
printk("UGH! (%d:%d) was on the runqueue, removing.\n", |
smp_processor_id(), current->pid); |
del_from_runqueue(current); |
} |
sched_data->curr = current; |
sched_data->last_schedule = get_cycles(); |
clear_bit(current->processor, &wait_init_idle); |
} |
|
extern void init_timervecs (void); |
|
void __init sched_init(void) |
{ |
/* |
* We have to do a little magic to get the first |
* process right in SMP mode. |
*/ |
int cpu = smp_processor_id(); |
int nr; |
|
init_task.processor = cpu; |
|
for(nr = 0; nr < PIDHASH_SZ; nr++) |
pidhash[nr] = NULL; |
|
init_timervecs(); |
|
init_bh(TIMER_BH, timer_bh); |
init_bh(TQUEUE_BH, tqueue_bh); |
init_bh(IMMEDIATE_BH, immediate_bh); |
|
/* |
* The boot idle thread does lazy MMU switching as well: |
*/ |
atomic_inc(&init_mm.mm_count); |
enter_lazy_tlb(&init_mm, current, cpu); |
} |
/softirq.c
0,0 → 1,415
/* |
* linux/kernel/softirq.c |
* |
* Copyright (C) 1992 Linus Torvalds |
* |
* Fixed a disable_bh()/enable_bh() race (was causing a console lockup) |
* due bh_mask_count not atomic handling. Copyright (C) 1998 Andrea Arcangeli |
* |
* Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) |
*/ |
|
#include <linux/config.h> |
#include <linux/mm.h> |
#include <linux/kernel_stat.h> |
#include <linux/interrupt.h> |
#include <linux/smp_lock.h> |
#include <linux/init.h> |
#include <linux/tqueue.h> |
|
/* |
- No shared variables, all the data are CPU local. |
- If a softirq needs serialization, let it serialize itself |
by its own spinlocks. |
- Even if softirq is serialized, only local cpu is marked for |
execution. Hence, we get something sort of weak cpu binding. |
Though it is still not clear, will it result in better locality |
or will not. |
- These softirqs are not masked by global cli() and start_bh_atomic() |
(by clear reasons). Hence, old parts of code still using global locks |
MUST NOT use softirqs, but insert interfacing routines acquiring |
global locks. F.e. look at BHs implementation. |
|
Examples: |
- NET RX softirq. It is multithreaded and does not require |
any global serialization. |
- NET TX softirq. It kicks software netdevice queues, hence |
it is logically serialized per device, but this serialization |
is invisible to common code. |
- Tasklets: serialized wrt itself. |
- Bottom halves: globally serialized, grr... |
*/ |
|
irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned; |
|
static struct softirq_action softirq_vec[32] __cacheline_aligned; |
|
/* |
* we cannot loop indefinitely here to avoid userspace starvation, |
* but we also don't want to introduce a worst case 1/HZ latency |
* to the pending events, so lets the scheduler to balance |
* the softirq load for us. |
*/ |
static inline void wakeup_softirqd(unsigned cpu) |
{ |
struct task_struct * tsk = ksoftirqd_task(cpu); |
|
if (tsk && tsk->state != TASK_RUNNING) |
wake_up_process(tsk); |
} |
|
asmlinkage void do_softirq() |
{ |
int cpu = smp_processor_id(); |
__u32 pending; |
unsigned long flags; |
__u32 mask; |
|
if (in_interrupt()) |
return; |
|
local_irq_save(flags); |
|
pending = softirq_pending(cpu); |
|
if (pending) { |
struct softirq_action *h; |
|
mask = ~pending; |
local_bh_disable(); |
restart: |
/* Reset the pending bitmask before enabling irqs */ |
softirq_pending(cpu) = 0; |
|
local_irq_enable(); |
|
h = softirq_vec; |
|
do { |
if (pending & 1) |
h->action(h); |
h++; |
pending >>= 1; |
} while (pending); |
|
local_irq_disable(); |
|
pending = softirq_pending(cpu); |
if (pending & mask) { |
mask &= ~pending; |
goto restart; |
} |
__local_bh_enable(); |
|
if (pending) |
wakeup_softirqd(cpu); |
} |
|
local_irq_restore(flags); |
} |
|
/* |
* This function must run with irq disabled! |
*/ |
inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr) |
{ |
__cpu_raise_softirq(cpu, nr); |
|
/* |
* If we're in an interrupt or bh, we're done |
* (this also catches bh-disabled code). We will |
* actually run the softirq once we return from |
* the irq or bh. |
* |
* Otherwise we wake up ksoftirqd to make sure we |
* schedule the softirq soon. |
*/ |
if (!(local_irq_count(cpu) | local_bh_count(cpu))) |
wakeup_softirqd(cpu); |
} |
|
void raise_softirq(unsigned int nr) |
{ |
unsigned long flags; |
|
local_irq_save(flags); |
cpu_raise_softirq(smp_processor_id(), nr); |
local_irq_restore(flags); |
} |
|
void open_softirq(int nr, void (*action)(struct softirq_action*), void *data) |
{ |
softirq_vec[nr].data = data; |
softirq_vec[nr].action = action; |
} |
|
|
/* Tasklets */ |
|
struct tasklet_head tasklet_vec[NR_CPUS] __cacheline_aligned; |
struct tasklet_head tasklet_hi_vec[NR_CPUS] __cacheline_aligned; |
|
void __tasklet_schedule(struct tasklet_struct *t) |
{ |
int cpu = smp_processor_id(); |
unsigned long flags; |
|
local_irq_save(flags); |
t->next = tasklet_vec[cpu].list; |
tasklet_vec[cpu].list = t; |
cpu_raise_softirq(cpu, TASKLET_SOFTIRQ); |
local_irq_restore(flags); |
} |
|
void __tasklet_hi_schedule(struct tasklet_struct *t) |
{ |
int cpu = smp_processor_id(); |
unsigned long flags; |
|
local_irq_save(flags); |
t->next = tasklet_hi_vec[cpu].list; |
tasklet_hi_vec[cpu].list = t; |
cpu_raise_softirq(cpu, HI_SOFTIRQ); |
local_irq_restore(flags); |
} |
|
static void tasklet_action(struct softirq_action *a) |
{ |
int cpu = smp_processor_id(); |
struct tasklet_struct *list; |
|
local_irq_disable(); |
list = tasklet_vec[cpu].list; |
tasklet_vec[cpu].list = NULL; |
local_irq_enable(); |
|
while (list) { |
struct tasklet_struct *t = list; |
|
list = list->next; |
|
if (tasklet_trylock(t)) { |
if (!atomic_read(&t->count)) { |
if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) |
BUG(); |
t->func(t->data); |
tasklet_unlock(t); |
continue; |
} |
tasklet_unlock(t); |
} |
|
local_irq_disable(); |
t->next = tasklet_vec[cpu].list; |
tasklet_vec[cpu].list = t; |
__cpu_raise_softirq(cpu, TASKLET_SOFTIRQ); |
local_irq_enable(); |
} |
} |
|
static void tasklet_hi_action(struct softirq_action *a) |
{ |
int cpu = smp_processor_id(); |
struct tasklet_struct *list; |
|
local_irq_disable(); |
list = tasklet_hi_vec[cpu].list; |
tasklet_hi_vec[cpu].list = NULL; |
local_irq_enable(); |
|
while (list) { |
struct tasklet_struct *t = list; |
|
list = list->next; |
|
if (tasklet_trylock(t)) { |
if (!atomic_read(&t->count)) { |
if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) |
BUG(); |
t->func(t->data); |
tasklet_unlock(t); |
continue; |
} |
tasklet_unlock(t); |
} |
|
local_irq_disable(); |
t->next = tasklet_hi_vec[cpu].list; |
tasklet_hi_vec[cpu].list = t; |
__cpu_raise_softirq(cpu, HI_SOFTIRQ); |
local_irq_enable(); |
} |
} |
|
|
void tasklet_init(struct tasklet_struct *t, |
void (*func)(unsigned long), unsigned long data) |
{ |
t->next = NULL; |
t->state = 0; |
atomic_set(&t->count, 0); |
t->func = func; |
t->data = data; |
} |
|
void tasklet_kill(struct tasklet_struct *t) |
{ |
if (in_interrupt()) |
printk("Attempt to kill tasklet from interrupt\n"); |
|
while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { |
current->state = TASK_RUNNING; |
do { |
yield(); |
} while (test_bit(TASKLET_STATE_SCHED, &t->state)); |
} |
tasklet_unlock_wait(t); |
clear_bit(TASKLET_STATE_SCHED, &t->state); |
} |
|
|
|
/* Old style BHs */ |
|
static void (*bh_base[32])(void); |
struct tasklet_struct bh_task_vec[32]; |
|
/* BHs are serialized by spinlock global_bh_lock. |
|
It is still possible to make synchronize_bh() as |
spin_unlock_wait(&global_bh_lock). This operation is not used |
by kernel now, so that this lock is not made private only |
due to wait_on_irq(). |
|
It can be removed only after auditing all the BHs. |
*/ |
spinlock_t global_bh_lock = SPIN_LOCK_UNLOCKED; |
|
static void bh_action(unsigned long nr) |
{ |
int cpu = smp_processor_id(); |
|
if (!spin_trylock(&global_bh_lock)) |
goto resched; |
|
if (!hardirq_trylock(cpu)) |
goto resched_unlock; |
|
if (bh_base[nr]) |
bh_base[nr](); |
|
hardirq_endlock(cpu); |
spin_unlock(&global_bh_lock); |
return; |
|
resched_unlock: |
spin_unlock(&global_bh_lock); |
resched: |
mark_bh(nr); |
} |
|
void init_bh(int nr, void (*routine)(void)) |
{ |
bh_base[nr] = routine; |
mb(); |
} |
|
void remove_bh(int nr) |
{ |
tasklet_kill(bh_task_vec+nr); |
bh_base[nr] = NULL; |
} |
|
void __init softirq_init() |
{ |
int i; |
|
for (i=0; i<32; i++) |
tasklet_init(bh_task_vec+i, bh_action, i); |
|
open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL); |
open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL); |
} |
|
void __run_task_queue(task_queue *list) |
{ |
struct list_head head, *next; |
unsigned long flags; |
|
spin_lock_irqsave(&tqueue_lock, flags); |
list_add(&head, list); |
list_del_init(list); |
spin_unlock_irqrestore(&tqueue_lock, flags); |
|
next = head.next; |
while (next != &head) { |
void (*f) (void *); |
struct tq_struct *p; |
void *data; |
|
p = list_entry(next, struct tq_struct, list); |
next = next->next; |
f = p->routine; |
data = p->data; |
wmb(); |
p->sync = 0; |
if (f) |
f(data); |
} |
} |
|
static int ksoftirqd(void * __bind_cpu) |
{ |
int bind_cpu = (int) (long) __bind_cpu; |
int cpu = cpu_logical_map(bind_cpu); |
|
daemonize(); |
current->nice = 19; |
sigfillset(¤t->blocked); |
|
/* Migrate to the right CPU */ |
current->cpus_allowed = 1UL << cpu; |
while (smp_processor_id() != cpu) |
schedule(); |
|
sprintf(current->comm, "ksoftirqd_CPU%d", bind_cpu); |
|
__set_current_state(TASK_INTERRUPTIBLE); |
mb(); |
|
ksoftirqd_task(cpu) = current; |
|
for (;;) { |
if (!softirq_pending(cpu)) |
schedule(); |
|
__set_current_state(TASK_RUNNING); |
|
while (softirq_pending(cpu)) { |
do_softirq(); |
if (current->need_resched) |
schedule(); |
} |
|
__set_current_state(TASK_INTERRUPTIBLE); |
} |
} |
|
static __init int spawn_ksoftirqd(void) |
{ |
int cpu; |
|
for (cpu = 0; cpu < smp_num_cpus; cpu++) { |
if (kernel_thread(ksoftirqd, (void *) (long) cpu, |
CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0) |
printk("spawn_ksoftirqd() failed for cpu %d\n", cpu); |
else { |
while (!ksoftirqd_task(cpu_logical_map(cpu))) |
yield(); |
} |
} |
|
return 0; |
} |
|
__initcall(spawn_ksoftirqd); |
/resource.c
0,0 → 1,372
/* |
* linux/kernel/resource.c |
* |
* Copyright (C) 1999 Linus Torvalds |
* Copyright (C) 1999 Martin Mares <mj@ucw.cz> |
* |
* Arbitrary resource management. |
*/ |
|
#include <linux/sched.h> |
#include <linux/errno.h> |
#include <linux/ioport.h> |
#include <linux/init.h> |
#include <linux/slab.h> |
#include <linux/spinlock.h> |
#include <linux/seq_file.h> |
#include <asm/io.h> |
|
struct resource ioport_resource = { "PCI IO", 0x0000, IO_SPACE_LIMIT, IORESOURCE_IO }; |
struct resource iomem_resource = { "PCI mem", 0x00000000, 0xffffffff, IORESOURCE_MEM }; |
|
static rwlock_t resource_lock = RW_LOCK_UNLOCKED; |
|
enum { MAX_IORES_LEVEL = 5 }; |
|
static void *r_next(struct seq_file *m, void *v, loff_t *pos) |
{ |
struct resource *p = v; |
(*pos)++; |
if (p->child) |
return p->child; |
while (!p->sibling && p->parent) |
p = p->parent; |
return p->sibling; |
} |
|
static void *r_start(struct seq_file *m, loff_t *pos) |
{ |
struct resource *p = m->private; |
loff_t l = 0; |
read_lock(&resource_lock); |
for (p = p->child; p && l < *pos; p = r_next(m, p, &l)) |
; |
return p; |
} |
|
static void r_stop(struct seq_file *m, void *v) |
{ |
read_unlock(&resource_lock); |
} |
|
static int r_show(struct seq_file *m, void *v) |
{ |
struct resource *root = m->private; |
struct resource *r = v, *p; |
int width = root->end < 0x10000 ? 4 : 8; |
int depth; |
|
for (depth = 0, p = r; depth < MAX_IORES_LEVEL; depth++, p = p->parent) |
if (p->parent == root) |
break; |
seq_printf(m, "%*s%0*lx-%0*lx : %s\n", |
depth * 2, "", |
width, r->start, |
width, r->end, |
r->name ? r->name : "<BAD>"); |
return 0; |
} |
|
static struct seq_operations resource_op = { |
.start = r_start, |
.next = r_next, |
.stop = r_stop, |
.show = r_show, |
}; |
|
static int ioports_open(struct inode *inode, struct file *file) |
{ |
int res = seq_open(file, &resource_op); |
if (!res) { |
struct seq_file *m = file->private_data; |
m->private = &ioport_resource; |
} |
return res; |
} |
|
static int iomem_open(struct inode *inode, struct file *file) |
{ |
int res = seq_open(file, &resource_op); |
if (!res) { |
struct seq_file *m = file->private_data; |
m->private = &iomem_resource; |
} |
return res; |
} |
|
struct file_operations proc_ioports_operations = { |
.open = ioports_open, |
.read = seq_read, |
.llseek = seq_lseek, |
.release = seq_release, |
}; |
|
struct file_operations proc_iomem_operations = { |
.open = iomem_open, |
.read = seq_read, |
.llseek = seq_lseek, |
.release = seq_release, |
}; |
|
/* Return the conflict entry if you can't request it */ |
static struct resource * __request_resource(struct resource *root, struct resource *new) |
{ |
unsigned long start = new->start; |
unsigned long end = new->end; |
struct resource *tmp, **p; |
|
if (end < start) |
return root; |
if (start < root->start) |
return root; |
if (end > root->end) |
return root; |
p = &root->child; |
for (;;) { |
tmp = *p; |
if (!tmp || tmp->start > end) { |
new->sibling = tmp; |
*p = new; |
new->parent = root; |
return NULL; |
} |
p = &tmp->sibling; |
if (tmp->end < start) |
continue; |
return tmp; |
} |
} |
|
static int __release_resource(struct resource *old) |
{ |
struct resource *tmp, **p; |
|
p = &old->parent->child; |
for (;;) { |
tmp = *p; |
if (!tmp) |
break; |
if (tmp == old) { |
*p = tmp->sibling; |
old->parent = NULL; |
return 0; |
} |
p = &tmp->sibling; |
} |
return -EINVAL; |
} |
|
int request_resource(struct resource *root, struct resource *new) |
{ |
struct resource *conflict; |
|
write_lock(&resource_lock); |
conflict = __request_resource(root, new); |
write_unlock(&resource_lock); |
return conflict ? -EBUSY : 0; |
} |
|
int release_resource(struct resource *old) |
{ |
int retval; |
|
write_lock(&resource_lock); |
retval = __release_resource(old); |
write_unlock(&resource_lock); |
return retval; |
} |
|
int check_resource(struct resource *root, unsigned long start, unsigned long len) |
{ |
struct resource *conflict, tmp; |
|
tmp.start = start; |
tmp.end = start + len - 1; |
write_lock(&resource_lock); |
conflict = __request_resource(root, &tmp); |
if (!conflict) |
__release_resource(&tmp); |
write_unlock(&resource_lock); |
return conflict ? -EBUSY : 0; |
} |
|
/* |
* Find empty slot in the resource tree given range and alignment. |
*/ |
static int find_resource(struct resource *root, struct resource *new, |
unsigned long size, |
unsigned long min, unsigned long max, |
unsigned long align, |
void (*alignf)(void *, struct resource *, |
unsigned long, unsigned long), |
void *alignf_data) |
{ |
struct resource *this = root->child; |
|
new->start = root->start; |
for(;;) { |
if (this) |
new->end = this->start; |
else |
new->end = root->end; |
if (new->start < min) |
new->start = min; |
if (new->end > max) |
new->end = max; |
new->start = (new->start + align - 1) & ~(align - 1); |
if (alignf) |
alignf(alignf_data, new, size, align); |
if (new->start < new->end && new->end - new->start + 1 >= size) { |
new->end = new->start + size - 1; |
return 0; |
} |
if (!this) |
break; |
new->start = this->end + 1; |
this = this->sibling; |
} |
return -EBUSY; |
} |
|
/* |
* Allocate empty slot in the resource tree given range and alignment. |
*/ |
int allocate_resource(struct resource *root, struct resource *new, |
unsigned long size, |
unsigned long min, unsigned long max, |
unsigned long align, |
void (*alignf)(void *, struct resource *, |
unsigned long, unsigned long), |
void *alignf_data) |
{ |
int err; |
|
write_lock(&resource_lock); |
err = find_resource(root, new, size, min, max, align, alignf, alignf_data); |
if (err >= 0 && __request_resource(root, new)) |
err = -EBUSY; |
write_unlock(&resource_lock); |
return err; |
} |
|
/* |
* This is compatibility stuff for IO resources. |
* |
* Note how this, unlike the above, knows about |
* the IO flag meanings (busy etc). |
* |
* Request-region creates a new busy region. |
* |
* Check-region returns non-zero if the area is already busy |
* |
* Release-region releases a matching busy region. |
*/ |
struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name) |
{ |
struct resource *res = kmalloc(sizeof(*res), GFP_KERNEL); |
|
if (res) { |
memset(res, 0, sizeof(*res)); |
res->name = name; |
res->start = start; |
res->end = start + n - 1; |
res->flags = IORESOURCE_BUSY; |
|
write_lock(&resource_lock); |
|
for (;;) { |
struct resource *conflict; |
|
conflict = __request_resource(parent, res); |
if (!conflict) |
break; |
if (conflict != parent) { |
parent = conflict; |
if (!(conflict->flags & IORESOURCE_BUSY)) |
continue; |
} |
|
/* Uhhuh, that didn't work out.. */ |
kfree(res); |
res = NULL; |
break; |
} |
write_unlock(&resource_lock); |
} |
return res; |
} |
|
int __check_region(struct resource *parent, unsigned long start, unsigned long n) |
{ |
struct resource * res; |
|
res = __request_region(parent, start, n, "check-region"); |
if (!res) |
return -EBUSY; |
|
release_resource(res); |
kfree(res); |
return 0; |
} |
|
void __release_region(struct resource *parent, unsigned long start, unsigned long n) |
{ |
struct resource **p; |
unsigned long end; |
|
p = &parent->child; |
end = start + n - 1; |
|
for (;;) { |
struct resource *res = *p; |
|
if (!res) |
break; |
if (res->start <= start && res->end >= end) { |
if (!(res->flags & IORESOURCE_BUSY)) { |
p = &res->child; |
continue; |
} |
if (res->start != start || res->end != end) |
break; |
*p = res->sibling; |
kfree(res); |
return; |
} |
p = &res->sibling; |
} |
printk("Trying to free nonexistent resource <%08lx-%08lx>\n", start, end); |
} |
|
/* |
* Called from init/main.c to reserve IO ports. |
*/ |
#define MAXRESERVE 4 |
static int __init reserve_setup(char *str) |
{ |
static int reserved = 0; |
static struct resource reserve[MAXRESERVE]; |
|
for (;;) { |
int io_start, io_num; |
int x = reserved; |
|
if (get_option (&str, &io_start) != 2) |
break; |
if (get_option (&str, &io_num) == 0) |
break; |
if (x < MAXRESERVE) { |
struct resource *res = reserve + x; |
res->name = "reserved"; |
res->start = io_start; |
res->end = io_start + io_num - 1; |
res->flags = IORESOURCE_BUSY; |
res->child = NULL; |
if (request_resource(res->start >= 0x10000 ? &iomem_resource : &ioport_resource, res) == 0) |
reserved = x+1; |
} |
} |
return 1; |
} |
|
__setup("reserve=", reserve_setup); |
/uid16.c
0,0 → 1,163
/* |
* Wrapper functions for 16bit uid back compatibility. All nicely tied |
* together in the faint hope we can take the out in five years time. |
*/ |
|
#include <linux/mm.h> |
#include <linux/utsname.h> |
#include <linux/mman.h> |
#include <linux/smp_lock.h> |
#include <linux/notifier.h> |
#include <linux/reboot.h> |
#include <linux/prctl.h> |
#include <linux/init.h> |
#include <linux/highuid.h> |
|
#include <asm/uaccess.h> |
|
extern asmlinkage long sys_chown(const char *, uid_t,gid_t); |
extern asmlinkage long sys_lchown(const char *, uid_t,gid_t); |
extern asmlinkage long sys_fchown(unsigned int, uid_t,gid_t); |
extern asmlinkage long sys_setregid(gid_t, gid_t); |
extern asmlinkage long sys_setgid(gid_t); |
extern asmlinkage long sys_setreuid(uid_t, uid_t); |
extern asmlinkage long sys_setuid(uid_t); |
extern asmlinkage long sys_setresuid(uid_t, uid_t, uid_t); |
extern asmlinkage long sys_setresgid(gid_t, gid_t, gid_t); |
extern asmlinkage long sys_setfsuid(uid_t); |
extern asmlinkage long sys_setfsgid(gid_t); |
|
asmlinkage long sys_chown16(const char * filename, old_uid_t user, old_gid_t group) |
{ |
return sys_chown(filename, low2highuid(user), low2highgid(group)); |
} |
|
asmlinkage long sys_lchown16(const char * filename, old_uid_t user, old_gid_t group) |
{ |
return sys_lchown(filename, low2highuid(user), low2highgid(group)); |
} |
|
asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group) |
{ |
return sys_fchown(fd, low2highuid(user), low2highgid(group)); |
} |
|
asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid) |
{ |
return sys_setregid(low2highgid(rgid), low2highgid(egid)); |
} |
|
asmlinkage long sys_setgid16(old_gid_t gid) |
{ |
return sys_setgid((gid_t)gid); |
} |
|
asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid) |
{ |
return sys_setreuid(low2highuid(ruid), low2highuid(euid)); |
} |
|
asmlinkage long sys_setuid16(old_uid_t uid) |
{ |
return sys_setuid((uid_t)uid); |
} |
|
asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid) |
{ |
return sys_setresuid(low2highuid(ruid), low2highuid(euid), |
low2highuid(suid)); |
} |
|
asmlinkage long sys_getresuid16(old_uid_t *ruid, old_uid_t *euid, old_uid_t *suid) |
{ |
int retval; |
|
if (!(retval = put_user(high2lowuid(current->uid), ruid)) && |
!(retval = put_user(high2lowuid(current->euid), euid))) |
retval = put_user(high2lowuid(current->suid), suid); |
|
return retval; |
} |
|
asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid) |
{ |
return sys_setresgid(low2highgid(rgid), low2highgid(egid), |
low2highgid(sgid)); |
} |
|
asmlinkage long sys_getresgid16(old_gid_t *rgid, old_gid_t *egid, old_gid_t *sgid) |
{ |
int retval; |
|
if (!(retval = put_user(high2lowgid(current->gid), rgid)) && |
!(retval = put_user(high2lowgid(current->egid), egid))) |
retval = put_user(high2lowgid(current->sgid), sgid); |
|
return retval; |
} |
|
asmlinkage long sys_setfsuid16(old_uid_t uid) |
{ |
return sys_setfsuid((uid_t)uid); |
} |
|
asmlinkage long sys_setfsgid16(old_gid_t gid) |
{ |
return sys_setfsgid((gid_t)gid); |
} |
|
asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t *grouplist) |
{ |
old_gid_t groups[NGROUPS]; |
int i,j; |
|
if (gidsetsize < 0) |
return -EINVAL; |
i = current->ngroups; |
if (gidsetsize) { |
if (i > gidsetsize) |
return -EINVAL; |
for(j=0;j<i;j++) |
groups[j] = current->groups[j]; |
if (copy_to_user(grouplist, groups, sizeof(old_gid_t)*i)) |
return -EFAULT; |
} |
return i; |
} |
|
asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t *grouplist) |
{ |
old_gid_t groups[NGROUPS]; |
int i; |
|
if (!capable(CAP_SETGID)) |
return -EPERM; |
if ((unsigned) gidsetsize > NGROUPS) |
return -EINVAL; |
if (copy_from_user(groups, grouplist, gidsetsize * sizeof(old_gid_t))) |
return -EFAULT; |
for (i = 0 ; i < gidsetsize ; i++) |
current->groups[i] = (gid_t)groups[i]; |
current->ngroups = gidsetsize; |
return 0; |
} |
|
asmlinkage long sys_getuid16(void) |
{ |
return high2lowuid(current->uid); |
} |
|
asmlinkage long sys_geteuid16(void) |
{ |
return high2lowuid(current->euid); |
} |
|
asmlinkage long sys_getgid16(void) |
{ |
return high2lowgid(current->gid); |
} |
|
asmlinkage long sys_getegid16(void) |
{ |
return high2lowgid(current->egid); |
} |
/itimer.c
0,0 → 1,170
/* |
* linux/kernel/itimer.c |
* |
* Copyright (C) 1992 Darren Senn |
*/ |
|
/* These are all the functions necessary to implement itimers */ |
|
#include <linux/mm.h> |
#include <linux/smp_lock.h> |
#include <linux/interrupt.h> |
|
#include <asm/uaccess.h> |
|
/* |
* change timeval to jiffies, trying to avoid the |
* most obvious overflows.. |
* |
* The tv_*sec values are signed, but nothing seems to |
* indicate whether we really should use them as signed values |
* when doing itimers. POSIX doesn't mention this (but if |
* alarm() uses itimers without checking, we have to use unsigned |
* arithmetic). |
*/ |
static unsigned long tvtojiffies(struct timeval *value) |
{ |
unsigned long sec = (unsigned) value->tv_sec; |
unsigned long usec = (unsigned) value->tv_usec; |
|
if (sec > (ULONG_MAX / HZ)) |
return ULONG_MAX; |
usec += 1000000 / HZ - 1; |
usec /= 1000000 / HZ; |
return HZ*sec+usec; |
} |
|
static void jiffiestotv(unsigned long jiffies, struct timeval *value) |
{ |
value->tv_usec = (jiffies % HZ) * (1000000 / HZ); |
value->tv_sec = jiffies / HZ; |
} |
|
int do_getitimer(int which, struct itimerval *value) |
{ |
register unsigned long val, interval; |
|
switch (which) { |
case ITIMER_REAL: |
interval = current->it_real_incr; |
val = 0; |
/* |
* FIXME! This needs to be atomic, in case the kernel timer happens! |
*/ |
if (timer_pending(¤t->real_timer)) { |
val = current->real_timer.expires - jiffies; |
|
/* look out for negative/zero itimer.. */ |
if ((long) val <= 0) |
val = 1; |
} |
break; |
case ITIMER_VIRTUAL: |
val = current->it_virt_value; |
interval = current->it_virt_incr; |
break; |
case ITIMER_PROF: |
val = current->it_prof_value; |
interval = current->it_prof_incr; |
break; |
default: |
return(-EINVAL); |
} |
jiffiestotv(val, &value->it_value); |
jiffiestotv(interval, &value->it_interval); |
return 0; |
} |
|
/* SMP: Only we modify our itimer values. */ |
asmlinkage long sys_getitimer(int which, struct itimerval *value) |
{ |
int error = -EFAULT; |
struct itimerval get_buffer; |
|
if (value) { |
error = do_getitimer(which, &get_buffer); |
if (!error && |
copy_to_user(value, &get_buffer, sizeof(get_buffer))) |
error = -EFAULT; |
} |
return error; |
} |
|
void it_real_fn(unsigned long __data) |
{ |
struct task_struct * p = (struct task_struct *) __data; |
unsigned long interval; |
|
send_sig(SIGALRM, p, 1); |
interval = p->it_real_incr; |
if (interval) { |
if (interval > (unsigned long) LONG_MAX) |
interval = LONG_MAX; |
p->real_timer.expires = jiffies + interval; |
add_timer(&p->real_timer); |
} |
} |
|
int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) |
{ |
register unsigned long i, j; |
int k; |
|
i = tvtojiffies(&value->it_interval); |
j = tvtojiffies(&value->it_value); |
if (ovalue && (k = do_getitimer(which, ovalue)) < 0) |
return k; |
switch (which) { |
case ITIMER_REAL: |
del_timer_sync(¤t->real_timer); |
current->it_real_value = j; |
current->it_real_incr = i; |
if (!j) |
break; |
if (j > (unsigned long) LONG_MAX) |
j = LONG_MAX; |
i = j + jiffies; |
current->real_timer.expires = i; |
add_timer(¤t->real_timer); |
break; |
case ITIMER_VIRTUAL: |
if (j) |
j++; |
current->it_virt_value = j; |
current->it_virt_incr = i; |
break; |
case ITIMER_PROF: |
if (j) |
j++; |
current->it_prof_value = j; |
current->it_prof_incr = i; |
break; |
default: |
return -EINVAL; |
} |
return 0; |
} |
|
/* SMP: Again, only we play with our itimers, and signals are SMP safe |
* now so that is not an issue at all anymore. |
*/ |
asmlinkage long sys_setitimer(int which, struct itimerval *value, |
struct itimerval *ovalue) |
{ |
struct itimerval set_buffer, get_buffer; |
int error; |
|
if (value) { |
if(copy_from_user(&set_buffer, value, sizeof(set_buffer))) |
return -EFAULT; |
} else |
memset((char *) &set_buffer, 0, sizeof(set_buffer)); |
|
error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : 0); |
if (error || !ovalue) |
return error; |
|
if (copy_to_user(ovalue, &get_buffer, sizeof(get_buffer))) |
return -EFAULT; |
return 0; |
} |
/kmod.c
0,0 → 1,376
/* |
kmod, the new module loader (replaces kerneld) |
Kirk Petersen |
|
Reorganized not to be a daemon by Adam Richter, with guidance |
from Greg Zornetzer. |
|
Modified to avoid chroot and file sharing problems. |
Mikael Pettersson |
|
Limit the concurrent number of kmod modprobes to catch loops from |
"modprobe needs a service that is in a module". |
Keith Owens <kaos@ocs.com.au> December 1999 |
|
Unblock all signals when we exec a usermode process. |
Shuu Yamaguchi <shuu@wondernetworkresources.com> December 2000 |
*/ |
|
#define __KERNEL_SYSCALLS__ |
|
#include <linux/config.h> |
#include <linux/module.h> |
#include <linux/sched.h> |
#include <linux/unistd.h> |
#include <linux/kmod.h> |
#include <linux/smp_lock.h> |
#include <linux/slab.h> |
#include <linux/namespace.h> |
#include <linux/completion.h> |
|
#include <asm/uaccess.h> |
|
extern int max_threads; |
|
static inline void |
use_init_fs_context(void) |
{ |
struct fs_struct *our_fs, *init_fs; |
struct dentry *root, *pwd; |
struct vfsmount *rootmnt, *pwdmnt; |
struct namespace *our_ns, *init_ns; |
|
/* |
* Make modprobe's fs context be a copy of init's. |
* |
* We cannot use the user's fs context, because it |
* may have a different root than init. |
* Since init was created with CLONE_FS, we can grab |
* its fs context from "init_task". |
* |
* The fs context has to be a copy. If it is shared |
* with init, then any chdir() call in modprobe will |
* also affect init and the other threads sharing |
* init_task's fs context. |
* |
* We created the exec_modprobe thread without CLONE_FS, |
* so we can update the fields in our fs context freely. |
*/ |
|
init_fs = init_task.fs; |
init_ns = init_task.namespace; |
get_namespace(init_ns); |
our_ns = current->namespace; |
current->namespace = init_ns; |
put_namespace(our_ns); |
read_lock(&init_fs->lock); |
rootmnt = mntget(init_fs->rootmnt); |
root = dget(init_fs->root); |
pwdmnt = mntget(init_fs->pwdmnt); |
pwd = dget(init_fs->pwd); |
read_unlock(&init_fs->lock); |
|
/* FIXME - unsafe ->fs access */ |
our_fs = current->fs; |
our_fs->umask = init_fs->umask; |
set_fs_root(our_fs, rootmnt, root); |
set_fs_pwd(our_fs, pwdmnt, pwd); |
write_lock(&our_fs->lock); |
if (our_fs->altroot) { |
struct vfsmount *mnt = our_fs->altrootmnt; |
struct dentry *dentry = our_fs->altroot; |
our_fs->altrootmnt = NULL; |
our_fs->altroot = NULL; |
write_unlock(&our_fs->lock); |
dput(dentry); |
mntput(mnt); |
} else |
write_unlock(&our_fs->lock); |
dput(root); |
mntput(rootmnt); |
dput(pwd); |
mntput(pwdmnt); |
} |
|
int exec_usermodehelper(char *program_path, char *argv[], char *envp[]) |
{ |
int i; |
struct task_struct *curtask = current; |
|
curtask->session = 1; |
curtask->pgrp = 1; |
|
use_init_fs_context(); |
|
/* Prevent parent user process from sending signals to child. |
Otherwise, if the modprobe program does not exist, it might |
be possible to get a user defined signal handler to execute |
as the super user right after the execve fails if you time |
the signal just right. |
*/ |
spin_lock_irq(&curtask->sigmask_lock); |
sigemptyset(&curtask->blocked); |
flush_signals(curtask); |
flush_signal_handlers(curtask); |
recalc_sigpending(curtask); |
spin_unlock_irq(&curtask->sigmask_lock); |
|
for (i = 0; i < curtask->files->max_fds; i++ ) { |
if (curtask->files->fd[i]) close(i); |
} |
|
switch_uid(INIT_USER); |
|
/* Give kmod all effective privileges.. */ |
curtask->euid = curtask->uid = curtask->suid = curtask->fsuid = 0; |
curtask->egid = curtask->gid = curtask->sgid = curtask->fsgid = 0; |
|
curtask->ngroups = 0; |
|
cap_set_full(curtask->cap_effective); |
|
/* Allow execve args to be in kernel space. */ |
set_fs(KERNEL_DS); |
|
/* Go, go, go... */ |
if (execve(program_path, argv, envp) < 0) |
return -errno; |
return 0; |
} |
|
#ifdef CONFIG_KMOD |
|
/* |
modprobe_path is set via /proc/sys. |
*/ |
char modprobe_path[256] = "/sbin/modprobe"; |
|
static int exec_modprobe(void * module_name) |
{ |
static char * envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; |
char *argv[] = { modprobe_path, "-s", "-k", "--", (char*)module_name, NULL }; |
int ret; |
|
ret = exec_usermodehelper(modprobe_path, argv, envp); |
if (ret) { |
printk(KERN_ERR |
"kmod: failed to exec %s -s -k %s, errno = %d\n", |
modprobe_path, (char*) module_name, errno); |
} |
return ret; |
} |
|
/** |
* request_module - try to load a kernel module |
* @module_name: Name of module |
* |
* Load a module using the user mode module loader. The function returns |
* zero on success or a negative errno code on failure. Note that a |
* successful module load does not mean the module did not then unload |
* and exit on an error of its own. Callers must check that the service |
* they requested is now available not blindly invoke it. |
* |
* If module auto-loading support is disabled then this function |
* becomes a no-operation. |
*/ |
int request_module(const char * module_name) |
{ |
pid_t pid; |
int waitpid_result; |
sigset_t tmpsig; |
int i; |
static atomic_t kmod_concurrent = ATOMIC_INIT(0); |
#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */ |
static int kmod_loop_msg; |
|
/* Don't allow request_module() before the root fs is mounted! */ |
if ( ! current->fs->root ) { |
printk(KERN_ERR "request_module[%s]: Root fs not mounted\n", |
module_name); |
return -EPERM; |
} |
|
/* If modprobe needs a service that is in a module, we get a recursive |
* loop. Limit the number of running kmod threads to max_threads/2 or |
* MAX_KMOD_CONCURRENT, whichever is the smaller. A cleaner method |
* would be to run the parents of this process, counting how many times |
* kmod was invoked. That would mean accessing the internals of the |
* process tables to get the command line, proc_pid_cmdline is static |
* and it is not worth changing the proc code just to handle this case. |
* KAO. |
*/ |
i = max_threads/2; |
if (i > MAX_KMOD_CONCURRENT) |
i = MAX_KMOD_CONCURRENT; |
atomic_inc(&kmod_concurrent); |
if (atomic_read(&kmod_concurrent) > i) { |
if (kmod_loop_msg++ < 5) |
printk(KERN_ERR |
"kmod: runaway modprobe loop assumed and stopped\n"); |
atomic_dec(&kmod_concurrent); |
return -ENOMEM; |
} |
|
pid = kernel_thread(exec_modprobe, (void*) module_name, 0); |
if (pid < 0) { |
printk(KERN_ERR "request_module[%s]: fork failed, errno %d\n", module_name, -pid); |
atomic_dec(&kmod_concurrent); |
return pid; |
} |
|
/* Block everything but SIGKILL/SIGSTOP */ |
spin_lock_irq(¤t->sigmask_lock); |
tmpsig = current->blocked; |
siginitsetinv(¤t->blocked, sigmask(SIGKILL) | sigmask(SIGSTOP)); |
recalc_sigpending(current); |
spin_unlock_irq(¤t->sigmask_lock); |
|
waitpid_result = waitpid(pid, NULL, __WCLONE); |
atomic_dec(&kmod_concurrent); |
|
/* Allow signals again.. */ |
spin_lock_irq(¤t->sigmask_lock); |
current->blocked = tmpsig; |
recalc_sigpending(current); |
spin_unlock_irq(¤t->sigmask_lock); |
|
if (waitpid_result != pid) { |
printk(KERN_ERR "request_module[%s]: waitpid(%d,...) failed, errno %d\n", |
module_name, pid, -waitpid_result); |
} |
return 0; |
} |
#endif /* CONFIG_KMOD */ |
|
|
#ifdef CONFIG_HOTPLUG |
/* |
hotplug path is set via /proc/sys |
invoked by hotplug-aware bus drivers, |
with exec_usermodehelper and some thread-spawner |
|
argv [0] = hotplug_path; |
argv [1] = "usb", "scsi", "pci", "network", etc; |
... plus optional type-specific parameters |
argv [n] = 0; |
|
envp [*] = HOME, PATH; optional type-specific parameters |
|
a hotplug bus should invoke this for device add/remove |
events. the command is expected to load drivers when |
necessary, and may perform additional system setup. |
*/ |
char hotplug_path[256] = "/sbin/hotplug"; |
|
EXPORT_SYMBOL(hotplug_path); |
|
#endif /* CONFIG_HOTPLUG */ |
|
struct subprocess_info { |
struct completion *complete; |
char *path; |
char **argv; |
char **envp; |
pid_t retval; |
}; |
|
/* |
* This is the task which runs the usermode application |
*/ |
static int ____call_usermodehelper(void *data) |
{ |
struct subprocess_info *sub_info = data; |
int retval; |
|
retval = -EPERM; |
if (current->fs->root) |
retval = exec_usermodehelper(sub_info->path, sub_info->argv, sub_info->envp); |
|
/* Exec failed? */ |
sub_info->retval = (pid_t)retval; |
do_exit(0); |
} |
|
/* |
* This is run by keventd. |
*/ |
static void __call_usermodehelper(void *data) |
{ |
struct subprocess_info *sub_info = data; |
pid_t pid; |
|
/* |
* CLONE_VFORK: wait until the usermode helper has execve'd successfully |
* We need the data structures to stay around until that is done. |
*/ |
pid = kernel_thread(____call_usermodehelper, sub_info, CLONE_VFORK | SIGCHLD); |
if (pid < 0) |
sub_info->retval = pid; |
complete(sub_info->complete); |
} |
|
/** |
* call_usermodehelper - start a usermode application |
* @path: pathname for the application |
* @argv: null-terminated argument list |
* @envp: null-terminated environment list |
* |
* Runs a user-space application. The application is started asynchronously. It |
* runs as a child of keventd. It runs with full root capabilities. keventd silently |
* reaps the child when it exits. |
* |
* Must be called from process context. Returns zero on success, else a negative |
* error code. |
*/ |
int call_usermodehelper(char *path, char **argv, char **envp) |
{ |
DECLARE_COMPLETION(work); |
struct subprocess_info sub_info = { |
complete: &work, |
path: path, |
argv: argv, |
envp: envp, |
retval: 0, |
}; |
struct tq_struct tqs = { |
routine: __call_usermodehelper, |
data: &sub_info, |
}; |
|
if (path[0] == '\0') |
goto out; |
|
if (current_is_keventd()) { |
/* We can't wait on keventd! */ |
__call_usermodehelper(&sub_info); |
} else { |
schedule_task(&tqs); |
wait_for_completion(&work); |
} |
out: |
return sub_info.retval; |
} |
|
/* |
* This is for the serialisation of device probe() functions |
* against device open() functions |
*/ |
static DECLARE_MUTEX(dev_probe_sem); |
|
void dev_probe_lock(void) |
{ |
down(&dev_probe_sem); |
} |
|
void dev_probe_unlock(void) |
{ |
up(&dev_probe_sem); |
} |
|
EXPORT_SYMBOL(exec_usermodehelper); |
EXPORT_SYMBOL(call_usermodehelper); |
|
#ifdef CONFIG_KMOD |
EXPORT_SYMBOL(request_module); |
#endif |
|
/panic.c
0,0 → 1,154
/* |
* linux/kernel/panic.c |
* |
* Copyright (C) 1991, 1992 Linus Torvalds |
*/ |
|
/* |
* This function is used through-out the kernel (including mm and fs) |
* to indicate a major problem. |
*/ |
#include <linux/config.h> |
#include <linux/sched.h> |
#include <linux/delay.h> |
#include <linux/reboot.h> |
#include <linux/notifier.h> |
#include <linux/init.h> |
#include <linux/sysrq.h> |
#include <linux/interrupt.h> |
#include <linux/console.h> |
|
asmlinkage void sys_sync(void); /* it's really int */ |
|
int panic_timeout; |
|
struct notifier_block *panic_notifier_list; |
|
static int __init panic_setup(char *str) |
{ |
panic_timeout = simple_strtoul(str, NULL, 0); |
return 1; |
} |
|
__setup("panic=", panic_setup); |
|
int machine_paniced; |
|
/** |
* panic - halt the system |
* @fmt: The text string to print |
* |
* Display a message, then perform cleanups. Functions in the panic |
* notifier list are called after the filesystem cache is flushed (when possible). |
* |
* This function never returns. |
*/ |
|
NORET_TYPE void panic(const char * fmt, ...) |
{ |
static char buf[1024]; |
va_list args; |
#if defined(CONFIG_ARCH_S390) |
unsigned long caller = (unsigned long) __builtin_return_address(0); |
#endif |
|
#ifdef CONFIG_VT |
disable_console_blank(); |
#endif |
machine_paniced = 1; |
|
bust_spinlocks(1); |
va_start(args, fmt); |
vsprintf(buf, fmt, args); |
va_end(args); |
printk(KERN_EMERG "Kernel panic: %s\n",buf); |
if (in_interrupt()) |
printk(KERN_EMERG "In interrupt handler - not syncing\n"); |
else if (!current->pid) |
printk(KERN_EMERG "In idle task - not syncing\n"); |
else |
sys_sync(); |
bust_spinlocks(0); |
|
#ifdef CONFIG_SMP |
smp_send_stop(); |
#endif |
|
notifier_call_chain(&panic_notifier_list, 0, NULL); |
|
if (panic_timeout > 0) |
{ |
/* |
* Delay timeout seconds before rebooting the machine. |
* We can't use the "normal" timers since we just panicked.. |
*/ |
printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout); |
mdelay(panic_timeout*1000); |
/* |
* Should we run the reboot notifier. For the moment Im |
* choosing not too. It might crash, be corrupt or do |
* more harm than good for other reasons. |
*/ |
machine_restart(NULL); |
} |
#ifdef __sparc__ |
{ |
extern int stop_a_enabled; |
/* Make sure the user can actually press L1-A */ |
stop_a_enabled = 1; |
printk("Press L1-A to return to the boot prom\n"); |
} |
#endif |
#if defined(CONFIG_ARCH_S390) |
disabled_wait(caller); |
#endif |
sti(); |
for(;;) { |
#if defined(CONFIG_X86) && defined(CONFIG_VT) |
extern void panic_blink(void); |
panic_blink(); |
#endif |
CHECK_EMERGENCY_SYNC |
} |
} |
|
/** |
* print_tainted - return a string to represent the kernel taint state. |
* |
* The string is overwritten by the next call to print_taint(). |
*/ |
|
const char *print_tainted() |
{ |
static char buf[20]; |
if (tainted) { |
snprintf(buf, sizeof(buf), "Tainted: %c%c", |
tainted & 1 ? 'P' : 'G', |
tainted & 2 ? 'F' : ' '); |
} |
else |
snprintf(buf, sizeof(buf), "Not tainted"); |
return(buf); |
} |
|
int tainted = 0; |
|
/* |
* A BUG() call in an inline function in a header should be avoided, |
* because it can seriously bloat the kernel. So here we have |
* helper functions. |
* We lose the BUG()-time file-and-line info this way, but it's |
* usually not very useful from an inline anyway. The backtrace |
* tells us what we want to know. |
*/ |
|
void __out_of_line_bug(int line) |
{ |
printk("kernel BUG in header file at line %d\n", line); |
|
BUG(); |
|
/* Satisfy __attribute__((noreturn)) */ |
for ( ; ; ) |
; |
} |
/info.c
0,0 → 1,79
/* |
* linux/kernel/info.c |
* |
* Copyright (C) 1992 Darren Senn |
*/ |
|
/* This implements the sysinfo() system call */ |
|
#include <linux/mm.h> |
#include <linux/unistd.h> |
#include <linux/swap.h> |
#include <linux/smp_lock.h> |
|
#include <asm/uaccess.h> |
|
asmlinkage long sys_sysinfo(struct sysinfo *info) |
{ |
struct sysinfo val; |
|
memset((char *)&val, 0, sizeof(struct sysinfo)); |
|
cli(); |
val.uptime = jiffies / HZ; |
|
val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); |
val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); |
val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); |
|
val.procs = nr_threads-1; |
sti(); |
|
si_meminfo(&val); |
si_swapinfo(&val); |
|
{ |
unsigned long mem_total, sav_total; |
unsigned int mem_unit, bitcount; |
|
/* If the sum of all the available memory (i.e. ram + swap) |
* is less than can be stored in a 32 bit unsigned long then |
* we can be binary compatible with 2.2.x kernels. If not, |
* well, in that case 2.2.x was broken anyways... |
* |
* -Erik Andersen <andersee@debian.org> */ |
|
mem_total = val.totalram + val.totalswap; |
if (mem_total < val.totalram || mem_total < val.totalswap) |
goto out; |
bitcount = 0; |
mem_unit = val.mem_unit; |
while (mem_unit > 1) { |
bitcount++; |
mem_unit >>= 1; |
sav_total = mem_total; |
mem_total <<= 1; |
if (mem_total < sav_total) |
goto out; |
} |
|
/* If mem_total did not overflow, multiply all memory values by |
* val.mem_unit and set it to 1. This leaves things compatible |
* with 2.2.x, and also retains compatibility with earlier 2.4.x |
* kernels... */ |
|
val.mem_unit = 1; |
val.totalram <<= bitcount; |
val.freeram <<= bitcount; |
val.sharedram <<= bitcount; |
val.bufferram <<= bitcount; |
val.totalswap <<= bitcount; |
val.freeswap <<= bitcount; |
val.totalhigh <<= bitcount; |
val.freehigh <<= bitcount; |
} |
out: |
if (copy_to_user(info, &val, sizeof(struct sysinfo))) |
return -EFAULT; |
return 0; |
} |
/Makefile
0,0 → 1,34
# |
# Makefile for the linux kernel. |
# |
# Note! Dependencies are done automagically by 'make dep', which also |
# removes any old dependencies. DON'T put your own dependencies here |
# unless it's something special (ie not a .c file). |
# |
# Note 2! The CFLAGS definitions are now in the main makefile... |
|
O_TARGET := kernel.o |
|
export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o printk.o |
|
obj-y = sched.o dma.o fork.o exec_domain.o panic.o printk.o \ |
module.o exit.o itimer.o info.o time.o softirq.o resource.o \ |
sysctl.o acct.o capability.o ptrace.o timer.o user.o \ |
signal.o sys.o kmod.o context.o |
|
obj-$(CONFIG_UID16) += uid16.o |
obj-$(CONFIG_MODULES) += ksyms.o |
obj-$(CONFIG_PM) += pm.o |
|
ifneq ($(CONFIG_IA64),y) |
ifneq ($(CONFIG_OR32),y) |
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
# needed for x86 only. Why this used to be enabled for all architectures is beyond |
# me. I suspect most platforms don't need this, but until we know that for sure |
# I turn this off for IA-64 only. Andreas Schwab says it's also needed on m68k |
# to get a correct value for the wait-channel (WCHAN in ps). --davidm |
CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer |
endif |
endif |
|
include $(TOPDIR)/Rules.make |