/*
|
/*
|
* linux/kernel/sched.c
|
* linux/kernel/sched.c
|
*
|
*
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
*
|
*
|
* 1996-04-21 Modified by Ulrich Windl to make NTP work
|
* 1996-04-21 Modified by Ulrich Windl to make NTP work
|
* 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and
|
* 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and
|
* make semaphores SMP safe
|
* make semaphores SMP safe
|
* 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better.
|
* 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better.
|
* 1997-09-10 Updated NTP code according to technical memorandum Jan '96
|
* 1997-09-10 Updated NTP code according to technical memorandum Jan '96
|
* "A Kernel Model for Precision Timekeeping" by Dave Mills
|
* "A Kernel Model for Precision Timekeeping" by Dave Mills
|
*/
|
*/
|
|
|
/*
|
/*
|
* 'sched.c' is the main kernel file. It contains scheduling primitives
|
* 'sched.c' is the main kernel file. It contains scheduling primitives
|
* (sleep_on, wakeup, schedule etc) as well as a number of simple system
|
* (sleep_on, wakeup, schedule etc) as well as a number of simple system
|
* call functions (type getpid()), which just extract a field from
|
* call functions (type getpid()), which just extract a field from
|
* current-task
|
* current-task
|
*/
|
*/
|
|
|
/*
|
/*
|
* uClinux revisions for NO_MM
|
* uClinux revisions for NO_MM
|
* Copyright (C) 1998 Kenneth Albanowski <kjahds@kjahds.com>,
|
* Copyright (C) 1998 Kenneth Albanowski <kjahds@kjahds.com>,
|
* The Silver Hammer Group, Ltd.
|
* The Silver Hammer Group, Ltd.
|
*/
|
*/
|
|
|
#include <linux/signal.h>
|
#include <linux/signal.h>
|
#include <linux/sched.h>
|
#include <linux/sched.h>
|
#include <linux/timer.h>
|
#include <linux/timer.h>
|
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
#include <linux/kernel_stat.h>
|
#include <linux/kernel_stat.h>
|
#include <linux/fdreg.h>
|
#include <linux/fdreg.h>
|
#include <linux/errno.h>
|
#include <linux/errno.h>
|
#include <linux/time.h>
|
#include <linux/time.h>
|
#include <linux/ptrace.h>
|
#include <linux/ptrace.h>
|
#include <linux/delay.h>
|
#include <linux/delay.h>
|
#include <linux/interrupt.h>
|
#include <linux/interrupt.h>
|
#include <linux/tqueue.h>
|
#include <linux/tqueue.h>
|
#include <linux/resource.h>
|
#include <linux/resource.h>
|
#include <linux/mm.h>
|
#include <linux/mm.h>
|
#include <linux/smp.h>
|
#include <linux/smp.h>
|
|
|
#include <asm/system.h>
|
#include <asm/system.h>
|
#include <asm/io.h>
|
#include <asm/io.h>
|
#include <asm/segment.h>
|
#include <asm/segment.h>
|
#include <asm/pgtable.h>
|
#include <asm/pgtable.h>
|
#include <asm/mmu_context.h>
|
#include <asm/mmu_context.h>
|
|
|
#include <linux/timex.h>
|
#include <linux/timex.h>
|
|
|
/* SIMON - I don't know why the fuck this prototype can't be in header but it won't work */
|
/* SIMON - I don't know why the fuck this prototype can't be in header but it won't work */
|
extern void switch_to(struct task_struct *prev, struct task_struct *next);
|
extern void switch_to(struct task_struct *prev, struct task_struct *next);
|
|
|
|
|
/*
|
/*
|
* kernel variables
|
* kernel variables
|
*/
|
*/
|
|
|
int securelevel = 0; /* system security level */
|
int securelevel = 0; /* system security level */
|
|
|
long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */
|
long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */
|
volatile struct timeval xtime; /* The current time */
|
volatile struct timeval xtime; /* The current time */
|
int tickadj = 500/HZ ? 500/HZ : 1; /* microsecs */
|
int tickadj = 500/HZ ? 500/HZ : 1; /* microsecs */
|
|
|
DECLARE_TASK_QUEUE(tq_timer);
|
DECLARE_TASK_QUEUE(tq_timer);
|
DECLARE_TASK_QUEUE(tq_immediate);
|
DECLARE_TASK_QUEUE(tq_immediate);
|
DECLARE_TASK_QUEUE(tq_scheduler);
|
DECLARE_TASK_QUEUE(tq_scheduler);
|
|
|
/*
|
/*
|
* phase-lock loop variables
|
* phase-lock loop variables
|
*/
|
*/
|
/* TIME_ERROR prevents overwriting the CMOS clock */
|
/* TIME_ERROR prevents overwriting the CMOS clock */
|
int time_state = TIME_ERROR; /* clock synchronization status */
|
int time_state = TIME_ERROR; /* clock synchronization status */
|
int time_status = STA_UNSYNC; /* clock status bits */
|
int time_status = STA_UNSYNC; /* clock status bits */
|
long time_offset = 0; /* time adjustment (us) */
|
long time_offset = 0; /* time adjustment (us) */
|
long time_constant = 2; /* pll time constant */
|
long time_constant = 2; /* pll time constant */
|
long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */
|
long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */
|
long time_precision = 1; /* clock precision (us) */
|
long time_precision = 1; /* clock precision (us) */
|
long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */
|
long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */
|
long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */
|
long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */
|
long time_phase = 0; /* phase offset (scaled us) */
|
long time_phase = 0; /* phase offset (scaled us) */
|
long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC; /* frequency offset (scaled ppm) */
|
long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC; /* frequency offset (scaled ppm) */
|
long time_adj = 0; /* tick adjust (scaled 1 / HZ) */
|
long time_adj = 0; /* tick adjust (scaled 1 / HZ) */
|
long time_reftime = 0; /* time at last adjustment (s) */
|
long time_reftime = 0; /* time at last adjustment (s) */
|
|
|
long time_adjust = 0;
|
long time_adjust = 0;
|
long time_adjust_step = 0;
|
long time_adjust_step = 0;
|
|
|
int need_resched = 0;
|
int need_resched = 0;
|
unsigned long event = 0;
|
unsigned long event = 0;
|
|
|
extern int _setitimer(int, struct itimerval *, struct itimerval *);
|
extern int _setitimer(int, struct itimerval *, struct itimerval *);
|
unsigned int * prof_buffer = NULL;
|
unsigned int * prof_buffer = NULL;
|
unsigned long prof_len = 0;
|
unsigned long prof_len = 0;
|
unsigned long prof_shift = 0;
|
unsigned long prof_shift = 0;
|
|
|
#define _S(nr) (1<<((nr)-1))
|
#define _S(nr) (1<<((nr)-1))
|
|
|
extern void mem_use(void);
|
extern void mem_use(void);
|
|
|
unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
|
unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
|
#ifndef NO_MM
|
#ifndef NO_MM
|
unsigned long init_user_stack[1024] = { STACK_MAGIC, };
|
unsigned long init_user_stack[1024] = { STACK_MAGIC, };
|
static struct vm_area_struct init_mmap = INIT_MMAP;
|
static struct vm_area_struct init_mmap = INIT_MMAP;
|
#endif /* !NO_MM */
|
#endif /* !NO_MM */
|
static struct fs_struct init_fs = INIT_FS;
|
static struct fs_struct init_fs = INIT_FS;
|
static struct files_struct init_files = INIT_FILES;
|
static struct files_struct init_files = INIT_FILES;
|
static struct signal_struct init_signals = INIT_SIGNALS;
|
static struct signal_struct init_signals = INIT_SIGNALS;
|
|
|
struct mm_struct init_mm = INIT_MM;
|
struct mm_struct init_mm = INIT_MM;
|
struct task_struct init_task = INIT_TASK;
|
struct task_struct init_task = INIT_TASK;
|
|
|
unsigned long volatile jiffies=0;
|
unsigned long volatile jiffies=0;
|
|
|
struct task_struct *current_set[NR_CPUS];
|
struct task_struct *current_set[NR_CPUS];
|
struct task_struct *last_task_used_math = NULL;
|
struct task_struct *last_task_used_math = NULL;
|
|
|
struct task_struct * task[NR_TASKS] = {&init_task, };
|
struct task_struct * task[NR_TASKS] = {&init_task, };
|
|
|
struct kernel_stat kstat = { 0 };
|
struct kernel_stat kstat = { 0 };
|
|
|
static inline void add_to_runqueue(struct task_struct * p)
|
static inline void add_to_runqueue(struct task_struct * p)
|
{
|
{
|
#ifdef __SMP__
|
#ifdef __SMP__
|
int cpu=smp_processor_id();
|
int cpu=smp_processor_id();
|
#endif
|
#endif
|
#if 1 /* sanity tests */
|
#if 1 /* sanity tests */
|
if (p->next_run || p->prev_run) {
|
if (p->next_run || p->prev_run) {
|
printk("task already on run-queue\n");
|
printk("task already on run-queue\n");
|
return;
|
return;
|
}
|
}
|
#endif
|
#endif
|
if (p->policy != SCHED_OTHER || p->counter > current->counter + 3)
|
if (p->policy != SCHED_OTHER || p->counter > current->counter + 3)
|
need_resched = 1;
|
need_resched = 1;
|
nr_running++;
|
nr_running++;
|
(p->prev_run = init_task.prev_run)->next_run = p;
|
(p->prev_run = init_task.prev_run)->next_run = p;
|
p->next_run = &init_task;
|
p->next_run = &init_task;
|
init_task.prev_run = p;
|
init_task.prev_run = p;
|
#ifdef __SMP__
|
#ifdef __SMP__
|
/* this is safe only if called with cli()*/
|
/* this is safe only if called with cli()*/
|
while(set_bit(31,&smp_process_available))
|
while(set_bit(31,&smp_process_available))
|
{
|
{
|
while(test_bit(31,&smp_process_available))
|
while(test_bit(31,&smp_process_available))
|
{
|
{
|
if(clear_bit(cpu,&smp_invalidate_needed))
|
if(clear_bit(cpu,&smp_invalidate_needed))
|
{
|
{
|
local_flush_tlb();
|
local_flush_tlb();
|
set_bit(cpu,&cpu_callin_map[0]);
|
set_bit(cpu,&cpu_callin_map[0]);
|
}
|
}
|
}
|
}
|
}
|
}
|
smp_process_available++;
|
smp_process_available++;
|
clear_bit(31,&smp_process_available);
|
clear_bit(31,&smp_process_available);
|
if ((0!=p->pid) && smp_threads_ready)
|
if ((0!=p->pid) && smp_threads_ready)
|
{
|
{
|
int i;
|
int i;
|
for (i=0;i<smp_num_cpus;i++)
|
for (i=0;i<smp_num_cpus;i++)
|
{
|
{
|
if (0==current_set[cpu_logical_map[i]]->pid)
|
if (0==current_set[cpu_logical_map[i]]->pid)
|
{
|
{
|
smp_message_pass(cpu_logical_map[i], MSG_RESCHEDULE, 0L, 0);
|
smp_message_pass(cpu_logical_map[i], MSG_RESCHEDULE, 0L, 0);
|
break;
|
break;
|
}
|
}
|
}
|
}
|
}
|
}
|
#endif
|
#endif
|
}
|
}
|
|
|
static inline void del_from_runqueue(struct task_struct * p)
|
static inline void del_from_runqueue(struct task_struct * p)
|
{
|
{
|
struct task_struct *next = p->next_run;
|
struct task_struct *next = p->next_run;
|
struct task_struct *prev = p->prev_run;
|
struct task_struct *prev = p->prev_run;
|
|
|
#if 1 /* sanity tests */
|
#if 1 /* sanity tests */
|
if (!next || !prev) {
|
if (!next || !prev) {
|
printk("task not on run-queue\n");
|
printk("task not on run-queue\n");
|
return;
|
return;
|
}
|
}
|
#endif
|
#endif
|
if (p == &init_task) {
|
if (p == &init_task) {
|
static int nr = 0;
|
static int nr = 0;
|
if (nr < 5) {
|
if (nr < 5) {
|
nr++;
|
nr++;
|
printk("idle task may not sleep\n");
|
printk("idle task may not sleep\n");
|
}
|
}
|
return;
|
return;
|
}
|
}
|
nr_running--;
|
nr_running--;
|
next->prev_run = prev;
|
next->prev_run = prev;
|
prev->next_run = next;
|
prev->next_run = next;
|
p->next_run = NULL;
|
p->next_run = NULL;
|
p->prev_run = NULL;
|
p->prev_run = NULL;
|
}
|
}
|
|
|
static inline void move_last_runqueue(struct task_struct * p)
|
static inline void move_last_runqueue(struct task_struct * p)
|
{
|
{
|
struct task_struct *next = p->next_run;
|
struct task_struct *next = p->next_run;
|
struct task_struct *prev = p->prev_run;
|
struct task_struct *prev = p->prev_run;
|
|
|
/* remove from list */
|
/* remove from list */
|
next->prev_run = prev;
|
next->prev_run = prev;
|
prev->next_run = next;
|
prev->next_run = next;
|
/* add back to list */
|
/* add back to list */
|
p->next_run = &init_task;
|
p->next_run = &init_task;
|
prev = init_task.prev_run;
|
prev = init_task.prev_run;
|
init_task.prev_run = p;
|
init_task.prev_run = p;
|
p->prev_run = prev;
|
p->prev_run = prev;
|
prev->next_run = p;
|
prev->next_run = p;
|
}
|
}
|
|
|
/*
|
/*
|
* Wake up a process. Put it on the run-queue if it's not
|
* Wake up a process. Put it on the run-queue if it's not
|
* already there. The "current" process is always on the
|
* already there. The "current" process is always on the
|
* run-queue (except when the actual re-schedule is in
|
* run-queue (except when the actual re-schedule is in
|
* progress), and as such you're allowed to do the simpler
|
* progress), and as such you're allowed to do the simpler
|
* "current->state = TASK_RUNNING" to mark yourself runnable
|
* "current->state = TASK_RUNNING" to mark yourself runnable
|
* without the overhead of this.
|
* without the overhead of this.
|
*/
|
*/
|
inline void wake_up_process(struct task_struct * p)
|
inline void wake_up_process(struct task_struct * p)
|
{
|
{
|
unsigned long flags;
|
unsigned long flags;
|
|
|
save_flags(flags);
|
save_flags(flags);
|
cli();
|
cli();
|
p->state = TASK_RUNNING;
|
p->state = TASK_RUNNING;
|
if (!p->next_run)
|
if (!p->next_run)
|
add_to_runqueue(p);
|
add_to_runqueue(p);
|
restore_flags(flags);
|
restore_flags(flags);
|
}
|
}
|
|
|
static void process_timeout(unsigned long __data)
|
static void process_timeout(unsigned long __data)
|
{
|
{
|
struct task_struct * p = (struct task_struct *) __data;
|
struct task_struct * p = (struct task_struct *) __data;
|
|
|
p->timeout = 0;
|
p->timeout = 0;
|
wake_up_process(p);
|
wake_up_process(p);
|
}
|
}
|
|
|
/*
|
/*
|
* This is the function that decides how desirable a process is..
|
* This is the function that decides how desirable a process is..
|
* You can weigh different processes against each other depending
|
* You can weigh different processes against each other depending
|
* on what CPU they've run on lately etc to try to handle cache
|
* on what CPU they've run on lately etc to try to handle cache
|
* and TLB miss penalties.
|
* and TLB miss penalties.
|
*
|
*
|
* Return values:
|
* Return values:
|
* -1000: never select this
|
* -1000: never select this
|
* 0: out of time, recalculate counters (but it might still be
|
* 0: out of time, recalculate counters (but it might still be
|
* selected)
|
* selected)
|
* +ve: "goodness" value (the larger, the better)
|
* +ve: "goodness" value (the larger, the better)
|
* +1000: realtime process, select this.
|
* +1000: realtime process, select this.
|
*/
|
*/
|
static inline int goodness(struct task_struct * p, struct task_struct * prev, int this_cpu)
|
static inline int goodness(struct task_struct * p, struct task_struct * prev, int this_cpu)
|
{
|
{
|
int weight;
|
int weight;
|
|
|
#ifdef __SMP__
|
#ifdef __SMP__
|
/* We are not permitted to run a task someone else is running */
|
/* We are not permitted to run a task someone else is running */
|
if (p->processor != NO_PROC_ID)
|
if (p->processor != NO_PROC_ID)
|
return -1000;
|
return -1000;
|
#ifdef PAST_2_0
|
#ifdef PAST_2_0
|
/* This process is locked to a processor group */
|
/* This process is locked to a processor group */
|
if (p->processor_mask && !(p->processor_mask & (1<<this_cpu))
|
if (p->processor_mask && !(p->processor_mask & (1<<this_cpu))
|
return -1000;
|
return -1000;
|
#endif
|
#endif
|
#endif
|
#endif
|
|
|
/*
|
/*
|
* Realtime process, select the first one on the
|
* Realtime process, select the first one on the
|
* runqueue (taking priorities within processes
|
* runqueue (taking priorities within processes
|
* into account).
|
* into account).
|
*/
|
*/
|
if (p->policy != SCHED_OTHER)
|
if (p->policy != SCHED_OTHER)
|
return 1000 + p->rt_priority;
|
return 1000 + p->rt_priority;
|
|
|
/*
|
/*
|
* Give the process a first-approximation goodness value
|
* Give the process a first-approximation goodness value
|
* according to the number of clock-ticks it has left.
|
* according to the number of clock-ticks it has left.
|
*
|
*
|
* Don't do any other calculations if the time slice is
|
* Don't do any other calculations if the time slice is
|
* over..
|
* over..
|
*/
|
*/
|
weight = p->counter;
|
weight = p->counter;
|
if (weight) {
|
if (weight) {
|
|
|
#ifdef __SMP__
|
#ifdef __SMP__
|
/* Give a largish advantage to the same processor... */
|
/* Give a largish advantage to the same processor... */
|
/* (this is equivalent to penalizing other processors) */
|
/* (this is equivalent to penalizing other processors) */
|
if (p->last_processor == this_cpu)
|
if (p->last_processor == this_cpu)
|
weight += PROC_CHANGE_PENALTY;
|
weight += PROC_CHANGE_PENALTY;
|
#endif
|
#endif
|
|
|
/* .. and a slight advantage to the current process */
|
/* .. and a slight advantage to the current process */
|
if (p == prev)
|
if (p == prev)
|
weight += 1;
|
weight += 1;
|
}
|
}
|
|
|
return weight;
|
return weight;
|
}
|
}
|
|
|
|
|
/*
|
/*
|
The following allow_interrupts function is used to workaround a rare but
|
The following allow_interrupts function is used to workaround a rare but
|
nasty deadlock situation that is possible for 2.0.x Intel SMP because it uses
|
nasty deadlock situation that is possible for 2.0.x Intel SMP because it uses
|
a single kernel lock and interrupts are only routed to the boot CPU. There
|
a single kernel lock and interrupts are only routed to the boot CPU. There
|
are two deadlock scenarios this code protects against.
|
are two deadlock scenarios this code protects against.
|
|
|
The first scenario is that if a CPU other than the boot CPU holds the kernel
|
The first scenario is that if a CPU other than the boot CPU holds the kernel
|
lock and needs to wait for an operation to complete that itself requires an
|
lock and needs to wait for an operation to complete that itself requires an
|
interrupt, there is a deadlock since the boot CPU may be able to accept the
|
interrupt, there is a deadlock since the boot CPU may be able to accept the
|
interrupt but will not be able to acquire the kernel lock to process it.
|
interrupt but will not be able to acquire the kernel lock to process it.
|
|
|
The workaround for this deadlock requires adding calls to allow_interrupts to
|
The workaround for this deadlock requires adding calls to allow_interrupts to
|
places where this deadlock is possible. These places are known to be present
|
places where this deadlock is possible. These places are known to be present
|
in buffer.c and keyboard.c. It is also possible that there are other such
|
in buffer.c and keyboard.c. It is also possible that there are other such
|
places which have not been identified yet. In order to break the deadlock,
|
places which have not been identified yet. In order to break the deadlock,
|
the code in allow_interrupts temporarily yields the kernel lock directly to
|
the code in allow_interrupts temporarily yields the kernel lock directly to
|
the boot CPU to allow the interrupt to be processed. The boot CPU interrupt
|
the boot CPU to allow the interrupt to be processed. The boot CPU interrupt
|
entry code indicates that it is spinning waiting for the kernel lock by
|
entry code indicates that it is spinning waiting for the kernel lock by
|
setting the smp_blocked_interrupt_pending variable. This code notices that
|
setting the smp_blocked_interrupt_pending variable. This code notices that
|
and manipulates the active_kernel_processor variable to yield the kernel lock
|
and manipulates the active_kernel_processor variable to yield the kernel lock
|
without ever clearing it. When the interrupt has been processed, the
|
without ever clearing it. When the interrupt has been processed, the
|
saved_active_kernel_processor variable contains the value for the interrupt
|
saved_active_kernel_processor variable contains the value for the interrupt
|
exit code to restore, either the APICID of the CPU that granted it the kernel
|
exit code to restore, either the APICID of the CPU that granted it the kernel
|
lock, or NO_PROC_ID in the normal case where no yielding occurred. Restoring
|
lock, or NO_PROC_ID in the normal case where no yielding occurred. Restoring
|
active_kernel_processor from saved_active_kernel_processor returns the kernel
|
active_kernel_processor from saved_active_kernel_processor returns the kernel
|
lock back to the CPU that yielded it.
|
lock back to the CPU that yielded it.
|
|
|
The second form of deadlock is even more insidious. Suppose the boot CPU
|
The second form of deadlock is even more insidious. Suppose the boot CPU
|
takes a page fault and then the previous scenario ensues. In this case, the
|
takes a page fault and then the previous scenario ensues. In this case, the
|
boot CPU would spin with interrupts disabled waiting to acquire the kernel
|
boot CPU would spin with interrupts disabled waiting to acquire the kernel
|
lock. To resolve this deadlock, the kernel lock acquisition code must enable
|
lock. To resolve this deadlock, the kernel lock acquisition code must enable
|
interrupts briefly so that the pending interrupt can be handled as in the
|
interrupts briefly so that the pending interrupt can be handled as in the
|
case above.
|
case above.
|
|
|
An additional form of deadlock is where kernel code running on a non-boot CPU
|
An additional form of deadlock is where kernel code running on a non-boot CPU
|
waits for the jiffies variable to be incremented. This deadlock is avoided
|
waits for the jiffies variable to be incremented. This deadlock is avoided
|
by having the spin loops in ENTER_KERNEL increment jiffies approximately
|
by having the spin loops in ENTER_KERNEL increment jiffies approximately
|
every 10 milliseconds. Finally, if approximately 60 seconds elapse waiting
|
every 10 milliseconds. Finally, if approximately 60 seconds elapse waiting
|
for the kernel lock, a message will be printed if possible to indicate that a
|
for the kernel lock, a message will be printed if possible to indicate that a
|
deadlock has been detected.
|
deadlock has been detected.
|
|
|
Leonard N. Zubkoff
|
Leonard N. Zubkoff
|
4 August 1997
|
4 August 1997
|
*/
|
*/
|
|
|
#if defined(__SMP__) && defined(__i386__)
|
#if defined(__SMP__) && defined(__i386__)
|
|
|
volatile unsigned char smp_blocked_interrupt_pending = 0;
|
volatile unsigned char smp_blocked_interrupt_pending = 0;
|
|
|
volatile unsigned char saved_active_kernel_processor = NO_PROC_ID;
|
volatile unsigned char saved_active_kernel_processor = NO_PROC_ID;
|
|
|
void allow_interrupts(void)
|
void allow_interrupts(void)
|
{
|
{
|
if (smp_processor_id() == boot_cpu_id) return;
|
if (smp_processor_id() == boot_cpu_id) return;
|
if (smp_blocked_interrupt_pending)
|
if (smp_blocked_interrupt_pending)
|
{
|
{
|
unsigned long saved_kernel_counter;
|
unsigned long saved_kernel_counter;
|
long timeout_counter;
|
long timeout_counter;
|
saved_active_kernel_processor = active_kernel_processor;
|
saved_active_kernel_processor = active_kernel_processor;
|
saved_kernel_counter = kernel_counter;
|
saved_kernel_counter = kernel_counter;
|
kernel_counter = 0;
|
kernel_counter = 0;
|
active_kernel_processor = boot_cpu_id;
|
active_kernel_processor = boot_cpu_id;
|
timeout_counter = 6000000;
|
timeout_counter = 6000000;
|
while (active_kernel_processor != saved_active_kernel_processor &&
|
while (active_kernel_processor != saved_active_kernel_processor &&
|
--timeout_counter >= 0)
|
--timeout_counter >= 0)
|
{
|
{
|
udelay(10);
|
udelay(10);
|
barrier();
|
barrier();
|
}
|
}
|
if (timeout_counter < 0)
|
if (timeout_counter < 0)
|
panic("FORWARDED INTERRUPT TIMEOUT (AKP = %d, Saved AKP = %d)\n",
|
panic("FORWARDED INTERRUPT TIMEOUT (AKP = %d, Saved AKP = %d)\n",
|
active_kernel_processor, saved_active_kernel_processor);
|
active_kernel_processor, saved_active_kernel_processor);
|
kernel_counter = saved_kernel_counter;
|
kernel_counter = saved_kernel_counter;
|
saved_active_kernel_processor = NO_PROC_ID;
|
saved_active_kernel_processor = NO_PROC_ID;
|
}
|
}
|
}
|
}
|
|
|
#else
|
#else
|
|
|
void allow_interrupts(void) {}
|
void allow_interrupts(void) {}
|
|
|
#endif
|
#endif
|
|
|
|
|
/*
|
/*
|
* 'schedule()' is the scheduler function. It's a very simple and nice
|
* 'schedule()' is the scheduler function. It's a very simple and nice
|
* scheduler: it's not perfect, but certainly works for most things.
|
* scheduler: it's not perfect, but certainly works for most things.
|
*
|
*
|
* The goto is "interesting".
|
* The goto is "interesting".
|
*
|
*
|
* NOTE!! Task 0 is the 'idle' task, which gets called when no other
|
* NOTE!! Task 0 is the 'idle' task, which gets called when no other
|
* tasks can run. It can not be killed, and it cannot sleep. The 'state'
|
* tasks can run. It can not be killed, and it cannot sleep. The 'state'
|
* information in task[0] is never used.
|
* information in task[0] is never used.
|
*/
|
*/
|
asmlinkage void schedule(void)
|
asmlinkage void schedule(void)
|
{
|
{
|
int c;
|
int c;
|
struct task_struct * p;
|
struct task_struct * p;
|
struct task_struct * prev, * next;
|
struct task_struct * prev, * next;
|
unsigned long timeout = 0;
|
unsigned long timeout = 0;
|
int this_cpu=smp_processor_id();
|
int this_cpu=smp_processor_id();
|
/* check alarm, wake up any interruptible tasks that have got a signal */
|
/* check alarm, wake up any interruptible tasks that have got a signal */
|
|
|
allow_interrupts();
|
allow_interrupts();
|
|
|
if (intr_count)
|
if (intr_count)
|
goto scheduling_in_interrupt;
|
goto scheduling_in_interrupt;
|
|
|
if (bh_active & bh_mask) {
|
if (bh_active & bh_mask) {
|
intr_count = 1;
|
intr_count = 1;
|
do_bottom_half();
|
do_bottom_half();
|
intr_count = 0;
|
intr_count = 0;
|
}
|
}
|
|
|
run_task_queue(&tq_scheduler);
|
run_task_queue(&tq_scheduler);
|
|
|
need_resched = 0;
|
need_resched = 0;
|
prev = current;
|
prev = current;
|
cli();
|
cli();
|
/* move an exhausted RR process to be last.. */
|
/* move an exhausted RR process to be last.. */
|
if (!prev->counter && prev->policy == SCHED_RR) {
|
if (!prev->counter && prev->policy == SCHED_RR) {
|
prev->counter = prev->priority;
|
prev->counter = prev->priority;
|
move_last_runqueue(prev);
|
move_last_runqueue(prev);
|
}
|
}
|
switch (prev->state) {
|
switch (prev->state) {
|
case TASK_INTERRUPTIBLE:
|
case TASK_INTERRUPTIBLE:
|
if (prev->signal & ~prev->blocked)
|
if (prev->signal & ~prev->blocked)
|
goto makerunnable;
|
goto makerunnable;
|
timeout = prev->timeout;
|
timeout = prev->timeout;
|
if (timeout && (timeout <= jiffies)) {
|
if (timeout && (timeout <= jiffies)) {
|
prev->timeout = 0;
|
prev->timeout = 0;
|
timeout = 0;
|
timeout = 0;
|
makerunnable:
|
makerunnable:
|
prev->state = TASK_RUNNING;
|
prev->state = TASK_RUNNING;
|
break;
|
break;
|
}
|
}
|
default:
|
default:
|
del_from_runqueue(prev);
|
del_from_runqueue(prev);
|
case TASK_RUNNING:
|
case TASK_RUNNING:
|
}
|
}
|
p = init_task.next_run;
|
p = init_task.next_run;
|
sti();
|
sti();
|
|
|
#ifdef __SMP__
|
#ifdef __SMP__
|
/*
|
/*
|
* This is safe as we do not permit re-entry of schedule()
|
* This is safe as we do not permit re-entry of schedule()
|
*/
|
*/
|
prev->processor = NO_PROC_ID;
|
prev->processor = NO_PROC_ID;
|
#define idle_task (task[cpu_number_map[this_cpu]])
|
#define idle_task (task[cpu_number_map[this_cpu]])
|
#else
|
#else
|
#define idle_task (&init_task)
|
#define idle_task (&init_task)
|
#endif
|
#endif
|
|
|
/*
|
/*
|
* Note! there may appear new tasks on the run-queue during this, as
|
* Note! there may appear new tasks on the run-queue during this, as
|
* interrupts are enabled. However, they will be put on front of the
|
* interrupts are enabled. However, they will be put on front of the
|
* list, so our list starting at "p" is essentially fixed.
|
* list, so our list starting at "p" is essentially fixed.
|
*/
|
*/
|
/* this is the scheduler proper: */
|
/* this is the scheduler proper: */
|
c = -1000;
|
c = -1000;
|
next = idle_task;
|
next = idle_task;
|
while (p != &init_task) {
|
while (p != &init_task) {
|
int weight = goodness(p, prev, this_cpu);
|
int weight = goodness(p, prev, this_cpu);
|
if (weight > c)
|
if (weight > c)
|
c = weight, next = p;
|
c = weight, next = p;
|
p = p->next_run;
|
p = p->next_run;
|
}
|
}
|
|
|
/* if all runnable processes have "counter == 0", re-calculate counters */
|
/* if all runnable processes have "counter == 0", re-calculate counters */
|
if (!c) {
|
if (!c) {
|
for_each_task(p)
|
for_each_task(p)
|
p->counter = (p->counter >> 1) + p->priority;
|
p->counter = (p->counter >> 1) + p->priority;
|
}
|
}
|
#ifdef __SMP__
|
#ifdef __SMP__
|
/*
|
/*
|
* Allocate process to CPU
|
* Allocate process to CPU
|
*/
|
*/
|
|
|
next->processor = this_cpu;
|
next->processor = this_cpu;
|
next->last_processor = this_cpu;
|
next->last_processor = this_cpu;
|
#endif
|
#endif
|
#ifdef __SMP_PROF__
|
#ifdef __SMP_PROF__
|
/* mark processor running an idle thread */
|
/* mark processor running an idle thread */
|
if (0==next->pid)
|
if (0==next->pid)
|
set_bit(this_cpu,&smp_idle_map);
|
set_bit(this_cpu,&smp_idle_map);
|
else
|
else
|
clear_bit(this_cpu,&smp_idle_map);
|
clear_bit(this_cpu,&smp_idle_map);
|
#endif
|
#endif
|
if (prev != next) {
|
if (prev != next) {
|
struct timer_list timer;
|
struct timer_list timer;
|
|
|
kstat.context_swtch++;
|
kstat.context_swtch++;
|
if (timeout) {
|
if (timeout) {
|
init_timer(&timer);
|
init_timer(&timer);
|
timer.expires = timeout;
|
timer.expires = timeout;
|
timer.data = (unsigned long) prev;
|
timer.data = (unsigned long) prev;
|
timer.function = process_timeout;
|
timer.function = process_timeout;
|
add_timer(&timer);
|
add_timer(&timer);
|
}
|
}
|
get_mmu_context(next);
|
get_mmu_context(next);
|
switch_to(prev,next);
|
switch_to(prev,next);
|
if (timeout)
|
if (timeout)
|
del_timer(&timer);
|
del_timer(&timer);
|
}
|
}
|
return;
|
return;
|
|
|
scheduling_in_interrupt:
|
scheduling_in_interrupt:
|
printk("Aiee: scheduling in interrupt %p\n",
|
printk("Aiee: scheduling in interrupt %p\n",
|
__builtin_return_address(0));
|
__builtin_return_address(0));
|
}
|
}
|
|
|
#ifndef __alpha__
|
#ifndef __alpha__
|
|
|
/*
|
/*
|
* For backwards compatibility? This can be done in libc so Alpha
|
* For backwards compatibility? This can be done in libc so Alpha
|
* and all newer ports shouldn't need it.
|
* and all newer ports shouldn't need it.
|
*/
|
*/
|
asmlinkage int sys_pause(void)
|
asmlinkage int sys_pause(void)
|
{
|
{
|
current->state = TASK_INTERRUPTIBLE;
|
current->state = TASK_INTERRUPTIBLE;
|
schedule();
|
schedule();
|
return -ERESTARTNOHAND;
|
return -ERESTARTNOHAND;
|
}
|
}
|
|
|
#endif
|
#endif
|
|
|
/*
|
/*
|
* wake_up doesn't wake up stopped processes - they have to be awakened
|
* wake_up doesn't wake up stopped processes - they have to be awakened
|
* with signals or similar.
|
* with signals or similar.
|
*
|
*
|
* Note that this doesn't need cli-sti pairs: interrupts may not change
|
* Note that this doesn't need cli-sti pairs: interrupts may not change
|
* the wait-queue structures directly, but only call wake_up() to wake
|
* the wait-queue structures directly, but only call wake_up() to wake
|
* a process. The process itself must remove the queue once it has woken.
|
* a process. The process itself must remove the queue once it has woken.
|
*/
|
*/
|
void wake_up(struct wait_queue **q)
|
void wake_up(struct wait_queue **q)
|
{
|
{
|
struct wait_queue *next;
|
struct wait_queue *next;
|
struct wait_queue *head;
|
struct wait_queue *head;
|
|
|
if (!q || !(next = *q))
|
if (!q || !(next = *q))
|
return;
|
return;
|
head = WAIT_QUEUE_HEAD(q);
|
head = WAIT_QUEUE_HEAD(q);
|
while (next != head) {
|
while (next != head) {
|
struct task_struct *p = next->task;
|
struct task_struct *p = next->task;
|
next = next->next;
|
next = next->next;
|
if (p != NULL) {
|
if (p != NULL) {
|
if ((p->state == TASK_UNINTERRUPTIBLE) ||
|
if ((p->state == TASK_UNINTERRUPTIBLE) ||
|
(p->state == TASK_INTERRUPTIBLE))
|
(p->state == TASK_INTERRUPTIBLE))
|
wake_up_process(p);
|
wake_up_process(p);
|
}
|
}
|
if (!next)
|
if (!next)
|
goto bad;
|
goto bad;
|
}
|
}
|
return;
|
return;
|
bad:
|
bad:
|
printk("wait_queue is bad (eip = %p)\n",
|
printk("wait_queue is bad (eip = %p)\n",
|
__builtin_return_address(0));
|
__builtin_return_address(0));
|
printk(" q = %p\n",q);
|
printk(" q = %p\n",q);
|
printk(" *q = %p\n",*q);
|
printk(" *q = %p\n",*q);
|
}
|
}
|
|
|
void wake_up_interruptible(struct wait_queue **q)
|
void wake_up_interruptible(struct wait_queue **q)
|
{
|
{
|
struct wait_queue *next;
|
struct wait_queue *next;
|
struct wait_queue *head;
|
struct wait_queue *head;
|
|
|
if (!q || !(next = *q))
|
if (!q || !(next = *q))
|
return;
|
return;
|
head = WAIT_QUEUE_HEAD(q);
|
head = WAIT_QUEUE_HEAD(q);
|
while (next != head) {
|
while (next != head) {
|
struct task_struct *p = next->task;
|
struct task_struct *p = next->task;
|
next = next->next;
|
next = next->next;
|
if (p != NULL) {
|
if (p != NULL) {
|
if (p->state == TASK_INTERRUPTIBLE)
|
if (p->state == TASK_INTERRUPTIBLE)
|
wake_up_process(p);
|
wake_up_process(p);
|
}
|
}
|
if (!next)
|
if (!next)
|
goto bad;
|
goto bad;
|
}
|
}
|
return;
|
return;
|
bad:
|
bad:
|
printk("wait_queue is bad (eip = %p)\n",
|
printk("wait_queue is bad (eip = %p)\n",
|
__builtin_return_address(0));
|
__builtin_return_address(0));
|
printk(" q = %p\n",q);
|
printk(" q = %p\n",q);
|
printk(" *q = %p\n",*q);
|
printk(" *q = %p\n",*q);
|
}
|
}
|
|
|
|
|
/*
|
/*
|
* Semaphores are implemented using a two-way counter:
|
* Semaphores are implemented using a two-way counter:
|
* The "count" variable is decremented for each process
|
* The "count" variable is decremented for each process
|
* that tries to sleep, while the "waking" variable is
|
* that tries to sleep, while the "waking" variable is
|
* incremented when the "up()" code goes to wake up waiting
|
* incremented when the "up()" code goes to wake up waiting
|
* processes.
|
* processes.
|
*
|
*
|
* Notably, the inline "up()" and "down()" functions can
|
* Notably, the inline "up()" and "down()" functions can
|
* efficiently test if they need to do any extra work (up
|
* efficiently test if they need to do any extra work (up
|
* needs to do something only if count was negative before
|
* needs to do something only if count was negative before
|
* the increment operation.
|
* the increment operation.
|
*
|
*
|
* This routine must execute atomically.
|
* This routine must execute atomically.
|
*/
|
*/
|
static inline int waking_non_zero(struct semaphore *sem)
|
static inline int waking_non_zero(struct semaphore *sem)
|
{
|
{
|
int ret ;
|
int ret ;
|
long flags ;
|
long flags ;
|
|
|
get_buzz_lock(&sem->lock) ;
|
get_buzz_lock(&sem->lock) ;
|
save_flags(flags) ;
|
save_flags(flags) ;
|
cli() ;
|
cli() ;
|
|
|
if ((ret = (sem->waking > 0)))
|
if ((ret = (sem->waking > 0)))
|
sem->waking-- ;
|
sem->waking-- ;
|
|
|
restore_flags(flags) ;
|
restore_flags(flags) ;
|
give_buzz_lock(&sem->lock) ;
|
give_buzz_lock(&sem->lock) ;
|
return(ret) ;
|
return(ret) ;
|
}
|
}
|
|
|
/*
|
/*
|
* When __up() is called, the count was negative before
|
* When __up() is called, the count was negative before
|
* incrementing it, and we need to wake up somebody.
|
* incrementing it, and we need to wake up somebody.
|
*
|
*
|
* This routine adds one to the count of processes that need to
|
* This routine adds one to the count of processes that need to
|
* wake up and exit. ALL waiting processes actually wake up but
|
* wake up and exit. ALL waiting processes actually wake up but
|
* only the one that gets to the "waking" field first will gate
|
* only the one that gets to the "waking" field first will gate
|
* through and acquire the semaphore. The others will go back
|
* through and acquire the semaphore. The others will go back
|
* to sleep.
|
* to sleep.
|
*
|
*
|
* Note that these functions are only called when there is
|
* Note that these functions are only called when there is
|
* contention on the lock, and as such all this is the
|
* contention on the lock, and as such all this is the
|
* "non-critical" part of the whole semaphore business. The
|
* "non-critical" part of the whole semaphore business. The
|
* critical part is the inline stuff in <asm/semaphore.h>
|
* critical part is the inline stuff in <asm/semaphore.h>
|
* where we want to avoid any extra jumps and calls.
|
* where we want to avoid any extra jumps and calls.
|
*/
|
*/
|
void __up(struct semaphore *sem)
|
void __up(struct semaphore *sem)
|
{
|
{
|
atomic_inc(&sem->waking) ;
|
atomic_inc(&sem->waking) ;
|
wake_up(&sem->wait);
|
wake_up(&sem->wait);
|
}
|
}
|
|
|
/*
|
/*
|
* Perform the "down" function. Return zero for semaphore acquired,
|
* Perform the "down" function. Return zero for semaphore acquired,
|
* return negative for signalled out of the function.
|
* return negative for signalled out of the function.
|
*
|
*
|
* If called from __down, the return is ignored and the wait loop is
|
* If called from __down, the return is ignored and the wait loop is
|
* not interruptible. This means that a task waiting on a semaphore
|
* not interruptible. This means that a task waiting on a semaphore
|
* using "down()" cannot be killed until someone does an "up()" on
|
* using "down()" cannot be killed until someone does an "up()" on
|
* the semaphore.
|
* the semaphore.
|
*
|
*
|
* If called from __down_interruptible, the return value gets checked
|
* If called from __down_interruptible, the return value gets checked
|
* upon return. If the return value is negative then the task continues
|
* upon return. If the return value is negative then the task continues
|
* with the negative value in the return register (it can be tested by
|
* with the negative value in the return register (it can be tested by
|
* the caller).
|
* the caller).
|
*
|
*
|
* Either form may be used in conjunction with "up()".
|
* Either form may be used in conjunction with "up()".
|
*
|
*
|
*/
|
*/
|
int __do_down(struct semaphore * sem, int task_state)
|
int __do_down(struct semaphore * sem, int task_state)
|
{
|
{
|
struct task_struct *tsk = current;
|
struct task_struct *tsk = current;
|
struct wait_queue wait = { tsk, NULL };
|
struct wait_queue wait = { tsk, NULL };
|
int ret = 0 ;
|
int ret = 0 ;
|
|
|
tsk->state = task_state;
|
tsk->state = task_state;
|
add_wait_queue(&sem->wait, &wait);
|
add_wait_queue(&sem->wait, &wait);
|
|
|
/*
|
/*
|
* Ok, we're set up. sem->count is known to be less than zero
|
* Ok, we're set up. sem->count is known to be less than zero
|
* so we must wait.
|
* so we must wait.
|
*
|
*
|
* We can let go the lock for purposes of waiting.
|
* We can let go the lock for purposes of waiting.
|
* We re-acquire it after awaking so as to protect
|
* We re-acquire it after awaking so as to protect
|
* all semaphore operations.
|
* all semaphore operations.
|
*
|
*
|
* If "up()" is called before we call waking_non_zero() then
|
* If "up()" is called before we call waking_non_zero() then
|
* we will catch it right away. If it is called later then
|
* we will catch it right away. If it is called later then
|
* we will have to go through a wakeup cycle to catch it.
|
* we will have to go through a wakeup cycle to catch it.
|
*
|
*
|
* Multiple waiters contend for the semaphore lock to see
|
* Multiple waiters contend for the semaphore lock to see
|
* who gets to gate through and who has to wait some more.
|
* who gets to gate through and who has to wait some more.
|
*/
|
*/
|
for (;;)
|
for (;;)
|
{
|
{
|
if (waking_non_zero(sem)) /* are we waking up? */
|
if (waking_non_zero(sem)) /* are we waking up? */
|
break ; /* yes, exit loop */
|
break ; /* yes, exit loop */
|
|
|
if ( task_state == TASK_INTERRUPTIBLE
|
if ( task_state == TASK_INTERRUPTIBLE
|
&& (tsk->signal & ~tsk->blocked) /* signalled */
|
&& (tsk->signal & ~tsk->blocked) /* signalled */
|
)
|
)
|
{
|
{
|
ret = -EINTR ; /* interrupted */
|
ret = -EINTR ; /* interrupted */
|
atomic_inc(&sem->count) ; /* give up on down operation */
|
atomic_inc(&sem->count) ; /* give up on down operation */
|
break ;
|
break ;
|
}
|
}
|
|
|
schedule();
|
schedule();
|
tsk->state = task_state;
|
tsk->state = task_state;
|
}
|
}
|
|
|
tsk->state = TASK_RUNNING;
|
tsk->state = TASK_RUNNING;
|
remove_wait_queue(&sem->wait, &wait);
|
remove_wait_queue(&sem->wait, &wait);
|
return(ret) ;
|
return(ret) ;
|
|
|
} /* __do_down */
|
} /* __do_down */
|
|
|
void __down(struct semaphore * sem)
|
void __down(struct semaphore * sem)
|
{
|
{
|
__do_down(sem,TASK_UNINTERRUPTIBLE) ;
|
__do_down(sem,TASK_UNINTERRUPTIBLE) ;
|
}
|
}
|
|
|
int __down_interruptible(struct semaphore * sem)
|
int __down_interruptible(struct semaphore * sem)
|
{
|
{
|
return(__do_down(sem,TASK_INTERRUPTIBLE)) ;
|
return(__do_down(sem,TASK_INTERRUPTIBLE)) ;
|
}
|
}
|
|
|
|
|
static inline void __sleep_on(struct wait_queue **p, int state)
|
static inline void __sleep_on(struct wait_queue **p, int state)
|
{
|
{
|
unsigned long flags;
|
unsigned long flags;
|
struct wait_queue wait = { current, NULL };
|
struct wait_queue wait = { current, NULL };
|
|
|
if (!p)
|
if (!p)
|
return;
|
return;
|
if (current == task[0])
|
if (current == task[0])
|
panic("task[0] trying to sleep");
|
panic("task[0] trying to sleep");
|
current->state = state;
|
current->state = state;
|
save_flags(flags);
|
save_flags(flags);
|
cli();
|
cli();
|
__add_wait_queue(p, &wait);
|
__add_wait_queue(p, &wait);
|
sti();
|
sti();
|
schedule();
|
schedule();
|
cli();
|
cli();
|
__remove_wait_queue(p, &wait);
|
__remove_wait_queue(p, &wait);
|
restore_flags(flags);
|
restore_flags(flags);
|
}
|
}
|
|
|
void interruptible_sleep_on(struct wait_queue **p)
|
void interruptible_sleep_on(struct wait_queue **p)
|
{
|
{
|
__sleep_on(p,TASK_INTERRUPTIBLE);
|
__sleep_on(p,TASK_INTERRUPTIBLE);
|
}
|
}
|
|
|
void sleep_on(struct wait_queue **p)
|
void sleep_on(struct wait_queue **p)
|
{
|
{
|
__sleep_on(p,TASK_UNINTERRUPTIBLE);
|
__sleep_on(p,TASK_UNINTERRUPTIBLE);
|
}
|
}
|
|
|
#define TVN_BITS 6
|
#define TVN_BITS 6
|
#define TVR_BITS 8
|
#define TVR_BITS 8
|
#define TVN_SIZE (1 << TVN_BITS)
|
#define TVN_SIZE (1 << TVN_BITS)
|
#define TVR_SIZE (1 << TVR_BITS)
|
#define TVR_SIZE (1 << TVR_BITS)
|
#define TVN_MASK (TVN_SIZE - 1)
|
#define TVN_MASK (TVN_SIZE - 1)
|
#define TVR_MASK (TVR_SIZE - 1)
|
#define TVR_MASK (TVR_SIZE - 1)
|
|
|
#define SLOW_BUT_DEBUGGING_TIMERS 0
|
#define SLOW_BUT_DEBUGGING_TIMERS 0
|
|
|
struct timer_vec {
|
struct timer_vec {
|
int index;
|
int index;
|
struct timer_list *vec[TVN_SIZE];
|
struct timer_list *vec[TVN_SIZE];
|
};
|
};
|
|
|
struct timer_vec_root {
|
struct timer_vec_root {
|
int index;
|
int index;
|
struct timer_list *vec[TVR_SIZE];
|
struct timer_list *vec[TVR_SIZE];
|
};
|
};
|
|
|
static struct timer_vec tv5 = { 0 };
|
static struct timer_vec tv5 = { 0 };
|
static struct timer_vec tv4 = { 0 };
|
static struct timer_vec tv4 = { 0 };
|
static struct timer_vec tv3 = { 0 };
|
static struct timer_vec tv3 = { 0 };
|
static struct timer_vec tv2 = { 0 };
|
static struct timer_vec tv2 = { 0 };
|
static struct timer_vec_root tv1 = { 0 };
|
static struct timer_vec_root tv1 = { 0 };
|
|
|
static struct timer_vec * const tvecs[] = {
|
static struct timer_vec * const tvecs[] = {
|
(struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
|
(struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
|
};
|
};
|
|
|
#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
|
#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
|
|
|
static unsigned long timer_jiffies = 0;
|
static unsigned long timer_jiffies = 0;
|
|
|
static inline void insert_timer(struct timer_list *timer,
|
static inline void insert_timer(struct timer_list *timer,
|
struct timer_list **vec, int idx)
|
struct timer_list **vec, int idx)
|
{
|
{
|
if ((timer->next = vec[idx]))
|
if ((timer->next = vec[idx]))
|
vec[idx]->prev = timer;
|
vec[idx]->prev = timer;
|
vec[idx] = timer;
|
vec[idx] = timer;
|
timer->prev = (struct timer_list *)&vec[idx];
|
timer->prev = (struct timer_list *)&vec[idx];
|
}
|
}
|
|
|
static inline void internal_add_timer(struct timer_list *timer)
|
static inline void internal_add_timer(struct timer_list *timer)
|
{
|
{
|
/*
|
/*
|
* must be cli-ed when calling this
|
* must be cli-ed when calling this
|
*/
|
*/
|
unsigned long expires = timer->expires;
|
unsigned long expires = timer->expires;
|
unsigned long idx = expires - timer_jiffies;
|
unsigned long idx = expires - timer_jiffies;
|
|
|
if (idx < TVR_SIZE) {
|
if (idx < TVR_SIZE) {
|
int i = expires & TVR_MASK;
|
int i = expires & TVR_MASK;
|
insert_timer(timer, tv1.vec, i);
|
insert_timer(timer, tv1.vec, i);
|
} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
|
} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
|
int i = (expires >> TVR_BITS) & TVN_MASK;
|
int i = (expires >> TVR_BITS) & TVN_MASK;
|
insert_timer(timer, tv2.vec, i);
|
insert_timer(timer, tv2.vec, i);
|
} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
|
} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
|
int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
|
int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
|
insert_timer(timer, tv3.vec, i);
|
insert_timer(timer, tv3.vec, i);
|
} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
|
} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
|
int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
|
int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
|
insert_timer(timer, tv4.vec, i);
|
insert_timer(timer, tv4.vec, i);
|
} else if (expires < timer_jiffies) {
|
} else if (expires < timer_jiffies) {
|
/* can happen if you add a timer with expires == jiffies,
|
/* can happen if you add a timer with expires == jiffies,
|
* or you set a timer to go off in the past
|
* or you set a timer to go off in the past
|
*/
|
*/
|
insert_timer(timer, tv1.vec, tv1.index);
|
insert_timer(timer, tv1.vec, tv1.index);
|
} else if (idx < 0xffffffffUL) {
|
} else if (idx < 0xffffffffUL) {
|
int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
|
int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
|
insert_timer(timer, tv5.vec, i);
|
insert_timer(timer, tv5.vec, i);
|
} else {
|
} else {
|
/* Can only get here on architectures with 64-bit jiffies */
|
/* Can only get here on architectures with 64-bit jiffies */
|
timer->next = timer->prev = timer;
|
timer->next = timer->prev = timer;
|
}
|
}
|
}
|
}
|
|
|
void add_timer(struct timer_list *timer)
|
void add_timer(struct timer_list *timer)
|
{
|
{
|
unsigned long flags;
|
unsigned long flags;
|
save_flags(flags);
|
save_flags(flags);
|
cli();
|
cli();
|
#if SLOW_BUT_DEBUGGING_TIMERS
|
#if SLOW_BUT_DEBUGGING_TIMERS
|
if (timer->next || timer->prev) {
|
if (timer->next || timer->prev) {
|
printk("add_timer() called with non-zero list from %p\n",
|
printk("add_timer() called with non-zero list from %p\n",
|
__builtin_return_address(0));
|
__builtin_return_address(0));
|
goto out;
|
goto out;
|
}
|
}
|
#endif
|
#endif
|
internal_add_timer(timer);
|
internal_add_timer(timer);
|
#if SLOW_BUT_DEBUGGING_TIMERS
|
#if SLOW_BUT_DEBUGGING_TIMERS
|
out:
|
out:
|
#endif
|
#endif
|
restore_flags(flags);
|
restore_flags(flags);
|
}
|
}
|
|
|
static inline int detach_timer(struct timer_list *timer)
|
static inline int detach_timer(struct timer_list *timer)
|
{
|
{
|
int ret = 0;
|
int ret = 0;
|
struct timer_list *next, *prev;
|
struct timer_list *next, *prev;
|
next = timer->next;
|
next = timer->next;
|
prev = timer->prev;
|
prev = timer->prev;
|
if (next) {
|
if (next) {
|
next->prev = prev;
|
next->prev = prev;
|
}
|
}
|
if (prev) {
|
if (prev) {
|
ret = 1;
|
ret = 1;
|
prev->next = next;
|
prev->next = next;
|
}
|
}
|
return ret;
|
return ret;
|
}
|
}
|
|
|
|
|
int del_timer(struct timer_list * timer)
|
int del_timer(struct timer_list * timer)
|
{
|
{
|
int ret;
|
int ret;
|
unsigned long flags;
|
unsigned long flags;
|
save_flags(flags);
|
save_flags(flags);
|
cli();
|
cli();
|
ret = detach_timer(timer);
|
ret = detach_timer(timer);
|
timer->next = timer->prev = 0;
|
timer->next = timer->prev = 0;
|
restore_flags(flags);
|
restore_flags(flags);
|
return ret;
|
return ret;
|
}
|
}
|
|
|
static inline void cascade_timers(struct timer_vec *tv)
|
static inline void cascade_timers(struct timer_vec *tv)
|
{
|
{
|
/* cascade all the timers from tv up one level */
|
/* cascade all the timers from tv up one level */
|
struct timer_list *timer;
|
struct timer_list *timer;
|
timer = tv->vec[tv->index];
|
timer = tv->vec[tv->index];
|
/*
|
/*
|
* We are removing _all_ timers from the list, so we don't have to
|
* We are removing _all_ timers from the list, so we don't have to
|
* detach them individually, just clear the list afterwards.
|
* detach them individually, just clear the list afterwards.
|
*/
|
*/
|
while (timer) {
|
while (timer) {
|
struct timer_list *tmp = timer;
|
struct timer_list *tmp = timer;
|
timer = timer->next;
|
timer = timer->next;
|
internal_add_timer(tmp);
|
internal_add_timer(tmp);
|
}
|
}
|
tv->vec[tv->index] = NULL;
|
tv->vec[tv->index] = NULL;
|
tv->index = (tv->index + 1) & TVN_MASK;
|
tv->index = (tv->index + 1) & TVN_MASK;
|
}
|
}
|
|
|
static inline void run_timer_list(void)
|
static inline void run_timer_list(void)
|
{
|
{
|
cli();
|
cli();
|
while ((long)(jiffies - timer_jiffies) >= 0) {
|
while ((long)(jiffies - timer_jiffies) >= 0) {
|
struct timer_list *timer;
|
struct timer_list *timer;
|
if (!tv1.index) {
|
if (!tv1.index) {
|
int n = 1;
|
int n = 1;
|
do {
|
do {
|
cascade_timers(tvecs[n]);
|
cascade_timers(tvecs[n]);
|
} while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
|
} while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
|
}
|
}
|
while ((timer = tv1.vec[tv1.index])) {
|
while ((timer = tv1.vec[tv1.index])) {
|
void (*fn)(unsigned long) = timer->function;
|
void (*fn)(unsigned long) = timer->function;
|
unsigned long data = timer->data;
|
unsigned long data = timer->data;
|
detach_timer(timer);
|
detach_timer(timer);
|
timer->next = timer->prev = NULL;
|
timer->next = timer->prev = NULL;
|
sti();
|
sti();
|
fn(data);
|
fn(data);
|
cli();
|
cli();
|
}
|
}
|
++timer_jiffies;
|
++timer_jiffies;
|
tv1.index = (tv1.index + 1) & TVR_MASK;
|
tv1.index = (tv1.index + 1) & TVR_MASK;
|
}
|
}
|
sti();
|
sti();
|
}
|
}
|
|
|
static inline void run_old_timers(void)
|
static inline void run_old_timers(void)
|
{
|
{
|
struct timer_struct *tp;
|
struct timer_struct *tp;
|
unsigned long mask;
|
unsigned long mask;
|
|
|
for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
|
for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
|
if (mask > timer_active)
|
if (mask > timer_active)
|
break;
|
break;
|
if (!(mask & timer_active))
|
if (!(mask & timer_active))
|
continue;
|
continue;
|
if (tp->expires > jiffies)
|
if (tp->expires > jiffies)
|
continue;
|
continue;
|
timer_active &= ~mask;
|
timer_active &= ~mask;
|
tp->fn();
|
tp->fn();
|
sti();
|
sti();
|
}
|
}
|
}
|
}
|
|
|
void tqueue_bh(void)
|
void tqueue_bh(void)
|
{
|
{
|
run_task_queue(&tq_timer);
|
run_task_queue(&tq_timer);
|
}
|
}
|
|
|
void immediate_bh(void)
|
void immediate_bh(void)
|
{
|
{
|
run_task_queue(&tq_immediate);
|
run_task_queue(&tq_immediate);
|
}
|
}
|
|
|
unsigned long timer_active = 0;
|
unsigned long timer_active = 0;
|
struct timer_struct timer_table[32];
|
struct timer_struct timer_table[32];
|
|
|
/*
|
/*
|
* Hmm.. Changed this, as the GNU make sources (load.c) seems to
|
* Hmm.. Changed this, as the GNU make sources (load.c) seems to
|
* imply that avenrun[] is the standard name for this kind of thing.
|
* imply that avenrun[] is the standard name for this kind of thing.
|
* Nothing else seems to be standardized: the fractional size etc
|
* Nothing else seems to be standardized: the fractional size etc
|
* all seem to differ on different machines.
|
* all seem to differ on different machines.
|
*/
|
*/
|
unsigned long avenrun[3] = { 0,0,0 };
|
unsigned long avenrun[3] = { 0,0,0 };
|
|
|
/*
|
/*
|
* Nr of active tasks - counted in fixed-point numbers
|
* Nr of active tasks - counted in fixed-point numbers
|
*/
|
*/
|
static unsigned long count_active_tasks(void)
|
static unsigned long count_active_tasks(void)
|
{
|
{
|
struct task_struct **p;
|
struct task_struct **p;
|
unsigned long nr = 0;
|
unsigned long nr = 0;
|
|
|
for(p = &LAST_TASK; p > &FIRST_TASK; --p)
|
for(p = &LAST_TASK; p > &FIRST_TASK; --p)
|
if (*p && ((*p)->state == TASK_RUNNING ||
|
if (*p && ((*p)->state == TASK_RUNNING ||
|
(*p)->state == TASK_UNINTERRUPTIBLE ||
|
(*p)->state == TASK_UNINTERRUPTIBLE ||
|
(*p)->state == TASK_SWAPPING))
|
(*p)->state == TASK_SWAPPING))
|
nr += FIXED_1;
|
nr += FIXED_1;
|
#ifdef __SMP__
|
#ifdef __SMP__
|
nr-=(smp_num_cpus-1)*FIXED_1;
|
nr-=(smp_num_cpus-1)*FIXED_1;
|
#endif
|
#endif
|
return nr;
|
return nr;
|
}
|
}
|
|
|
static inline void calc_load(unsigned long ticks)
|
static inline void calc_load(unsigned long ticks)
|
{
|
{
|
unsigned long active_tasks; /* fixed-point */
|
unsigned long active_tasks; /* fixed-point */
|
static int count = LOAD_FREQ;
|
static int count = LOAD_FREQ;
|
|
|
count -= ticks;
|
count -= ticks;
|
if (count < 0) {
|
if (count < 0) {
|
count += LOAD_FREQ;
|
count += LOAD_FREQ;
|
active_tasks = count_active_tasks();
|
active_tasks = count_active_tasks();
|
CALC_LOAD(avenrun[0], EXP_1, active_tasks);
|
CALC_LOAD(avenrun[0], EXP_1, active_tasks);
|
CALC_LOAD(avenrun[1], EXP_5, active_tasks);
|
CALC_LOAD(avenrun[1], EXP_5, active_tasks);
|
CALC_LOAD(avenrun[2], EXP_15, active_tasks);
|
CALC_LOAD(avenrun[2], EXP_15, active_tasks);
|
}
|
}
|
}
|
}
|
|
|
/*
|
/*
|
* this routine handles the overflow of the microsecond field
|
* this routine handles the overflow of the microsecond field
|
*
|
*
|
* The tricky bits of code to handle the accurate clock support
|
* The tricky bits of code to handle the accurate clock support
|
* were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
|
* were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
|
* They were originally developed for SUN and DEC kernels.
|
* They were originally developed for SUN and DEC kernels.
|
* All the kudos should go to Dave for this stuff.
|
* All the kudos should go to Dave for this stuff.
|
*
|
*
|
*/
|
*/
|
static void second_overflow(void)
|
static void second_overflow(void)
|
{
|
{
|
long ltemp;
|
long ltemp;
|
|
|
/* Bump the maxerror field */
|
/* Bump the maxerror field */
|
time_maxerror += time_tolerance >> SHIFT_USEC;
|
time_maxerror += time_tolerance >> SHIFT_USEC;
|
if ( time_maxerror > NTP_PHASE_LIMIT ) {
|
if ( time_maxerror > NTP_PHASE_LIMIT ) {
|
time_maxerror = NTP_PHASE_LIMIT;
|
time_maxerror = NTP_PHASE_LIMIT;
|
time_state = TIME_ERROR; /* p. 17, sect. 4.3, (b) */
|
time_state = TIME_ERROR; /* p. 17, sect. 4.3, (b) */
|
time_status |= STA_UNSYNC;
|
time_status |= STA_UNSYNC;
|
}
|
}
|
|
|
/*
|
/*
|
* Leap second processing. If in leap-insert state at
|
* Leap second processing. If in leap-insert state at
|
* the end of the day, the system clock is set back one
|
* the end of the day, the system clock is set back one
|
* second; if in leap-delete state, the system clock is
|
* second; if in leap-delete state, the system clock is
|
* set ahead one second. The microtime() routine or
|
* set ahead one second. The microtime() routine or
|
* external clock driver will insure that reported time
|
* external clock driver will insure that reported time
|
* is always monotonic. The ugly divides should be
|
* is always monotonic. The ugly divides should be
|
* replaced.
|
* replaced.
|
*/
|
*/
|
switch (time_state) {
|
switch (time_state) {
|
|
|
case TIME_OK:
|
case TIME_OK:
|
if (time_status & STA_INS)
|
if (time_status & STA_INS)
|
time_state = TIME_INS;
|
time_state = TIME_INS;
|
else if (time_status & STA_DEL)
|
else if (time_status & STA_DEL)
|
time_state = TIME_DEL;
|
time_state = TIME_DEL;
|
break;
|
break;
|
|
|
case TIME_INS:
|
case TIME_INS:
|
if (xtime.tv_sec % 86400 == 0) {
|
if (xtime.tv_sec % 86400 == 0) {
|
xtime.tv_sec--;
|
xtime.tv_sec--;
|
time_state = TIME_OOP;
|
time_state = TIME_OOP;
|
printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
|
printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
|
}
|
}
|
break;
|
break;
|
|
|
case TIME_DEL:
|
case TIME_DEL:
|
if ((xtime.tv_sec + 1) % 86400 == 0) {
|
if ((xtime.tv_sec + 1) % 86400 == 0) {
|
xtime.tv_sec++;
|
xtime.tv_sec++;
|
time_state = TIME_WAIT;
|
time_state = TIME_WAIT;
|
printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
|
printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
|
}
|
}
|
break;
|
break;
|
|
|
case TIME_OOP:
|
case TIME_OOP:
|
time_state = TIME_WAIT;
|
time_state = TIME_WAIT;
|
break;
|
break;
|
|
|
case TIME_WAIT:
|
case TIME_WAIT:
|
if (!(time_status & (STA_INS | STA_DEL)))
|
if (!(time_status & (STA_INS | STA_DEL)))
|
time_state = TIME_OK;
|
time_state = TIME_OK;
|
}
|
}
|
|
|
/*
|
/*
|
* Compute the phase adjustment for the next second. In
|
* Compute the phase adjustment for the next second. In
|
* PLL mode, the offset is reduced by a fixed factor
|
* PLL mode, the offset is reduced by a fixed factor
|
* times the time constant. In FLL mode the offset is
|
* times the time constant. In FLL mode the offset is
|
* used directly. In either mode, the maximum phase
|
* used directly. In either mode, the maximum phase
|
* adjustment for each second is clamped so as to spread
|
* adjustment for each second is clamped so as to spread
|
* the adjustment over not more than the number of
|
* the adjustment over not more than the number of
|
* seconds between updates.
|
* seconds between updates.
|
*/
|
*/
|
if (time_offset < 0) {
|
if (time_offset < 0) {
|
ltemp = -time_offset;
|
ltemp = -time_offset;
|
if (!(time_status & STA_FLL))
|
if (!(time_status & STA_FLL))
|
ltemp >>= SHIFT_KG + time_constant;
|
ltemp >>= SHIFT_KG + time_constant;
|
if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
|
if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
|
ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
|
ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
|
time_offset += ltemp;
|
time_offset += ltemp;
|
time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
|
time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
|
} else {
|
} else {
|
ltemp = time_offset;
|
ltemp = time_offset;
|
if (!(time_status & STA_FLL))
|
if (!(time_status & STA_FLL))
|
ltemp >>= SHIFT_KG + time_constant;
|
ltemp >>= SHIFT_KG + time_constant;
|
if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
|
if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
|
ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
|
ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
|
time_offset -= ltemp;
|
time_offset -= ltemp;
|
time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
|
time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
|
}
|
}
|
|
|
/*
|
/*
|
* Compute the frequency estimate and additional phase
|
* Compute the frequency estimate and additional phase
|
* adjustment due to frequency error for the next
|
* adjustment due to frequency error for the next
|
* second. When the PPS signal is engaged, gnaw on the
|
* second. When the PPS signal is engaged, gnaw on the
|
* watchdog counter and update the frequency computed by
|
* watchdog counter and update the frequency computed by
|
* the pll and the PPS signal.
|
* the pll and the PPS signal.
|
*/
|
*/
|
pps_valid++;
|
pps_valid++;
|
if (pps_valid == PPS_VALID) { /* PPS signal lost */
|
if (pps_valid == PPS_VALID) { /* PPS signal lost */
|
pps_jitter = MAXTIME;
|
pps_jitter = MAXTIME;
|
pps_stabil = MAXFREQ;
|
pps_stabil = MAXFREQ;
|
time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
|
time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
|
STA_PPSWANDER | STA_PPSERROR);
|
STA_PPSWANDER | STA_PPSERROR);
|
}
|
}
|
ltemp = time_freq + pps_freq;
|
ltemp = time_freq + pps_freq;
|
if (ltemp < 0)
|
if (ltemp < 0)
|
time_adj -= -ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
|
time_adj -= -ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
|
else
|
else
|
time_adj += ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
|
time_adj += ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
|
|
|
#if HZ == 100
|
#if HZ == 100
|
/* Compensate for (HZ==100) != (1 << SHIFT_HZ).
|
/* Compensate for (HZ==100) != (1 << SHIFT_HZ).
|
* Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
|
* Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
|
*/
|
*/
|
if (time_adj < 0)
|
if (time_adj < 0)
|
time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
|
time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
|
else
|
else
|
time_adj += (time_adj >> 2) + (time_adj >> 5);
|
time_adj += (time_adj >> 2) + (time_adj >> 5);
|
#endif
|
#endif
|
}
|
}
|
|
|
/* in the NTP reference this is called "hardclock()" */
|
/* in the NTP reference this is called "hardclock()" */
|
static void update_wall_time_one_tick(void)
|
static void update_wall_time_one_tick(void)
|
{
|
{
|
if ( (time_adjust_step = time_adjust) != 0 ) {
|
if ( (time_adjust_step = time_adjust) != 0 ) {
|
/* We are doing an adjtime thing.
|
/* We are doing an adjtime thing.
|
*
|
*
|
* Prepare time_adjust_step to be within bounds.
|
* Prepare time_adjust_step to be within bounds.
|
* Note that a positive time_adjust means we want the clock
|
* Note that a positive time_adjust means we want the clock
|
* to run faster.
|
* to run faster.
|
*
|
*
|
* Limit the amount of the step to be in the range
|
* Limit the amount of the step to be in the range
|
* -tickadj .. +tickadj
|
* -tickadj .. +tickadj
|
*/
|
*/
|
if (time_adjust > tickadj)
|
if (time_adjust > tickadj)
|
time_adjust_step = tickadj;
|
time_adjust_step = tickadj;
|
else if (time_adjust < -tickadj)
|
else if (time_adjust < -tickadj)
|
time_adjust_step = -tickadj;
|
time_adjust_step = -tickadj;
|
|
|
/* Reduce by this step the amount of time left */
|
/* Reduce by this step the amount of time left */
|
time_adjust -= time_adjust_step;
|
time_adjust -= time_adjust_step;
|
}
|
}
|
xtime.tv_usec += tick + time_adjust_step;
|
xtime.tv_usec += tick + time_adjust_step;
|
/*
|
/*
|
* Advance the phase, once it gets to one microsecond, then
|
* Advance the phase, once it gets to one microsecond, then
|
* advance the tick more.
|
* advance the tick more.
|
*/
|
*/
|
time_phase += time_adj;
|
time_phase += time_adj;
|
if (time_phase <= -FINEUSEC) {
|
if (time_phase <= -FINEUSEC) {
|
long ltemp = -time_phase >> SHIFT_SCALE;
|
long ltemp = -time_phase >> SHIFT_SCALE;
|
time_phase += ltemp << SHIFT_SCALE;
|
time_phase += ltemp << SHIFT_SCALE;
|
xtime.tv_usec -= ltemp;
|
xtime.tv_usec -= ltemp;
|
}
|
}
|
else if (time_phase >= FINEUSEC) {
|
else if (time_phase >= FINEUSEC) {
|
long ltemp = time_phase >> SHIFT_SCALE;
|
long ltemp = time_phase >> SHIFT_SCALE;
|
time_phase -= ltemp << SHIFT_SCALE;
|
time_phase -= ltemp << SHIFT_SCALE;
|
xtime.tv_usec += ltemp;
|
xtime.tv_usec += ltemp;
|
}
|
}
|
}
|
}
|
|
|
/*
|
/*
|
* Using a loop looks inefficient, but "ticks" is
|
* Using a loop looks inefficient, but "ticks" is
|
* usually just one (we shouldn't be losing ticks,
|
* usually just one (we shouldn't be losing ticks,
|
* we're doing this this way mainly for interrupt
|
* we're doing this this way mainly for interrupt
|
* latency reasons, not because we think we'll
|
* latency reasons, not because we think we'll
|
* have lots of lost timer ticks
|
* have lots of lost timer ticks
|
*/
|
*/
|
static void update_wall_time(unsigned long ticks)
|
static void update_wall_time(unsigned long ticks)
|
{
|
{
|
do {
|
do {
|
ticks--;
|
ticks--;
|
update_wall_time_one_tick();
|
update_wall_time_one_tick();
|
} while (ticks);
|
} while (ticks);
|
|
|
if (xtime.tv_usec >= 1000000) {
|
if (xtime.tv_usec >= 1000000) {
|
xtime.tv_usec -= 1000000;
|
xtime.tv_usec -= 1000000;
|
xtime.tv_sec++;
|
xtime.tv_sec++;
|
second_overflow();
|
second_overflow();
|
}
|
}
|
}
|
}
|
|
|
static inline void do_process_times(struct task_struct *p,
|
static inline void do_process_times(struct task_struct *p,
|
unsigned long user, unsigned long system)
|
unsigned long user, unsigned long system)
|
{
|
{
|
long psecs;
|
long psecs;
|
|
|
p->utime += user;
|
p->utime += user;
|
p->stime += system;
|
p->stime += system;
|
|
|
psecs = (p->stime + p->utime) / HZ;
|
psecs = (p->stime + p->utime) / HZ;
|
if (psecs > p->rlim[RLIMIT_CPU].rlim_cur) {
|
if (psecs > p->rlim[RLIMIT_CPU].rlim_cur) {
|
/* Send SIGXCPU every second.. */
|
/* Send SIGXCPU every second.. */
|
if (psecs * HZ == p->stime + p->utime)
|
if (psecs * HZ == p->stime + p->utime)
|
send_sig(SIGXCPU, p, 1);
|
send_sig(SIGXCPU, p, 1);
|
/* and SIGKILL when we go over max.. */
|
/* and SIGKILL when we go over max.. */
|
if (psecs > p->rlim[RLIMIT_CPU].rlim_max)
|
if (psecs > p->rlim[RLIMIT_CPU].rlim_max)
|
send_sig(SIGKILL, p, 1);
|
send_sig(SIGKILL, p, 1);
|
}
|
}
|
}
|
}
|
|
|
static inline void do_it_virt(struct task_struct * p, unsigned long ticks)
|
static inline void do_it_virt(struct task_struct * p, unsigned long ticks)
|
{
|
{
|
unsigned long it_virt = p->it_virt_value;
|
unsigned long it_virt = p->it_virt_value;
|
|
|
if (it_virt) {
|
if (it_virt) {
|
if (it_virt <= ticks) {
|
if (it_virt <= ticks) {
|
it_virt = ticks + p->it_virt_incr;
|
it_virt = ticks + p->it_virt_incr;
|
send_sig(SIGVTALRM, p, 1);
|
send_sig(SIGVTALRM, p, 1);
|
}
|
}
|
p->it_virt_value = it_virt - ticks;
|
p->it_virt_value = it_virt - ticks;
|
}
|
}
|
}
|
}
|
|
|
static inline void do_it_prof(struct task_struct * p, unsigned long ticks)
|
static inline void do_it_prof(struct task_struct * p, unsigned long ticks)
|
{
|
{
|
unsigned long it_prof = p->it_prof_value;
|
unsigned long it_prof = p->it_prof_value;
|
|
|
if (it_prof) {
|
if (it_prof) {
|
if (it_prof <= ticks) {
|
if (it_prof <= ticks) {
|
it_prof = ticks + p->it_prof_incr;
|
it_prof = ticks + p->it_prof_incr;
|
send_sig(SIGPROF, p, 1);
|
send_sig(SIGPROF, p, 1);
|
}
|
}
|
p->it_prof_value = it_prof - ticks;
|
p->it_prof_value = it_prof - ticks;
|
}
|
}
|
}
|
}
|
|
|
static __inline__ void update_one_process(struct task_struct *p,
|
static __inline__ void update_one_process(struct task_struct *p,
|
unsigned long ticks, unsigned long user, unsigned long system)
|
unsigned long ticks, unsigned long user, unsigned long system)
|
{
|
{
|
do_process_times(p, user, system);
|
do_process_times(p, user, system);
|
do_it_virt(p, user);
|
do_it_virt(p, user);
|
do_it_prof(p, ticks);
|
do_it_prof(p, ticks);
|
}
|
}
|
|
|
static void update_process_times(unsigned long ticks, unsigned long system)
|
static void update_process_times(unsigned long ticks, unsigned long system)
|
{
|
{
|
#ifndef __SMP__
|
#ifndef __SMP__
|
struct task_struct * p = current;
|
struct task_struct * p = current;
|
unsigned long user = ticks - system;
|
unsigned long user = ticks - system;
|
if (p->pid) {
|
if (p->pid) {
|
p->counter -= ticks;
|
p->counter -= ticks;
|
if (p->counter < 0) {
|
if (p->counter < 0) {
|
p->counter = 0;
|
p->counter = 0;
|
need_resched = 1;
|
need_resched = 1;
|
}
|
}
|
if (p->priority < DEF_PRIORITY)
|
if (p->priority < DEF_PRIORITY)
|
kstat.cpu_nice += user;
|
kstat.cpu_nice += user;
|
else
|
else
|
kstat.cpu_user += user;
|
kstat.cpu_user += user;
|
kstat.cpu_system += system;
|
kstat.cpu_system += system;
|
}
|
}
|
update_one_process(p, ticks, user, system);
|
update_one_process(p, ticks, user, system);
|
#else
|
#else
|
int cpu,j;
|
int cpu,j;
|
cpu = smp_processor_id();
|
cpu = smp_processor_id();
|
for (j=0;j<smp_num_cpus;j++)
|
for (j=0;j<smp_num_cpus;j++)
|
{
|
{
|
int i = cpu_logical_map[j];
|
int i = cpu_logical_map[j];
|
struct task_struct *p;
|
struct task_struct *p;
|
|
|
#ifdef __SMP_PROF__
|
#ifdef __SMP_PROF__
|
if (test_bit(i,&smp_idle_map))
|
if (test_bit(i,&smp_idle_map))
|
smp_idle_count[i]++;
|
smp_idle_count[i]++;
|
#endif
|
#endif
|
p = current_set[i];
|
p = current_set[i];
|
/*
|
/*
|
* Do we have a real process?
|
* Do we have a real process?
|
*/
|
*/
|
if (p->pid) {
|
if (p->pid) {
|
/* assume user-mode process */
|
/* assume user-mode process */
|
unsigned long utime = ticks;
|
unsigned long utime = ticks;
|
unsigned long stime = 0;
|
unsigned long stime = 0;
|
if (cpu == i) {
|
if (cpu == i) {
|
utime = ticks-system;
|
utime = ticks-system;
|
stime = system;
|
stime = system;
|
} else if (smp_proc_in_lock[j]) {
|
} else if (smp_proc_in_lock[j]) {
|
utime = 0;
|
utime = 0;
|
stime = ticks;
|
stime = ticks;
|
}
|
}
|
update_one_process(p, ticks, utime, stime);
|
update_one_process(p, ticks, utime, stime);
|
|
|
if (p->priority < DEF_PRIORITY)
|
if (p->priority < DEF_PRIORITY)
|
kstat.cpu_nice += utime;
|
kstat.cpu_nice += utime;
|
else
|
else
|
kstat.cpu_user += utime;
|
kstat.cpu_user += utime;
|
kstat.cpu_system += stime;
|
kstat.cpu_system += stime;
|
|
|
p->counter -= ticks;
|
p->counter -= ticks;
|
if (p->counter >= 0)
|
if (p->counter >= 0)
|
continue;
|
continue;
|
p->counter = 0;
|
p->counter = 0;
|
} else {
|
} else {
|
/*
|
/*
|
* Idle processor found, do we have anything
|
* Idle processor found, do we have anything
|
* we could run?
|
* we could run?
|
*/
|
*/
|
if (!(0x7fffffff & smp_process_available))
|
if (!(0x7fffffff & smp_process_available))
|
continue;
|
continue;
|
}
|
}
|
/* Ok, we should reschedule, do the magic */
|
/* Ok, we should reschedule, do the magic */
|
if (i==cpu)
|
if (i==cpu)
|
need_resched = 1;
|
need_resched = 1;
|
else
|
else
|
smp_message_pass(i, MSG_RESCHEDULE, 0L, 0);
|
smp_message_pass(i, MSG_RESCHEDULE, 0L, 0);
|
}
|
}
|
#endif
|
#endif
|
}
|
}
|
|
|
static unsigned long lost_ticks = 0;
|
static unsigned long lost_ticks = 0;
|
static unsigned long lost_ticks_system = 0;
|
static unsigned long lost_ticks_system = 0;
|
|
|
static inline void update_times(void)
|
static inline void update_times(void)
|
{
|
{
|
unsigned long ticks;
|
unsigned long ticks;
|
|
|
ticks = xchg(&lost_ticks, 0);
|
ticks = xchg(&lost_ticks, 0);
|
|
|
if (ticks) {
|
if (ticks) {
|
unsigned long system;
|
unsigned long system;
|
|
|
system = xchg(&lost_ticks_system, 0);
|
system = xchg(&lost_ticks_system, 0);
|
calc_load(ticks);
|
calc_load(ticks);
|
update_wall_time(ticks);
|
update_wall_time(ticks);
|
update_process_times(ticks, system);
|
update_process_times(ticks, system);
|
}
|
}
|
}
|
}
|
|
|
void timer_bh(void)
|
void timer_bh(void)
|
{
|
{
|
update_times();
|
update_times();
|
run_old_timers();
|
run_old_timers();
|
run_timer_list();
|
run_timer_list();
|
}
|
}
|
|
|
void do_timer(struct pt_regs * regs)
|
void do_timer(struct pt_regs * regs)
|
{
|
{
|
(*(unsigned long *)&jiffies)++;
|
(*(unsigned long *)&jiffies)++;
|
lost_ticks++;
|
lost_ticks++;
|
mark_bh(TIMER_BH);
|
mark_bh(TIMER_BH);
|
if (!user_mode(regs)) {
|
if (!user_mode(regs)) {
|
lost_ticks_system++;
|
lost_ticks_system++;
|
if (prof_buffer && current->pid) {
|
if (prof_buffer && current->pid) {
|
extern int _stext;
|
extern int _stext;
|
unsigned long ip = instruction_pointer(regs);
|
unsigned long ip = instruction_pointer(regs);
|
ip -= (unsigned long) &_stext;
|
ip -= (unsigned long) &_stext;
|
ip >>= prof_shift;
|
ip >>= prof_shift;
|
if (ip < prof_len)
|
if (ip < prof_len)
|
prof_buffer[ip]++;
|
prof_buffer[ip]++;
|
}
|
}
|
}
|
}
|
if (tq_timer)
|
if (tq_timer)
|
mark_bh(TQUEUE_BH);
|
mark_bh(TQUEUE_BH);
|
}
|
}
|
|
|
#ifndef __alpha__
|
#ifndef __alpha__
|
|
|
/*
|
/*
|
* For backwards compatibility? This can be done in libc so Alpha
|
* For backwards compatibility? This can be done in libc so Alpha
|
* and all newer ports shouldn't need it.
|
* and all newer ports shouldn't need it.
|
*/
|
*/
|
asmlinkage unsigned int sys_alarm(unsigned int seconds)
|
asmlinkage unsigned int sys_alarm(unsigned int seconds)
|
{
|
{
|
struct itimerval it_new, it_old;
|
struct itimerval it_new, it_old;
|
unsigned int oldalarm;
|
unsigned int oldalarm;
|
|
|
it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
|
it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
|
it_new.it_value.tv_sec = seconds;
|
it_new.it_value.tv_sec = seconds;
|
it_new.it_value.tv_usec = 0;
|
it_new.it_value.tv_usec = 0;
|
_setitimer(ITIMER_REAL, &it_new, &it_old);
|
_setitimer(ITIMER_REAL, &it_new, &it_old);
|
oldalarm = it_old.it_value.tv_sec;
|
oldalarm = it_old.it_value.tv_sec;
|
/* ehhh.. We can't return 0 if we have an alarm pending.. */
|
/* ehhh.. We can't return 0 if we have an alarm pending.. */
|
/* And we'd better return too much than too little anyway */
|
/* And we'd better return too much than too little anyway */
|
if (it_old.it_value.tv_usec)
|
if (it_old.it_value.tv_usec)
|
oldalarm++;
|
oldalarm++;
|
return oldalarm;
|
return oldalarm;
|
}
|
}
|
|
|
/*
|
/*
|
* The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this
|
* The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this
|
* should be moved into arch/i386 instead?
|
* should be moved into arch/i386 instead?
|
*/
|
*/
|
asmlinkage int sys_getpid(void)
|
asmlinkage int sys_getpid(void)
|
{
|
{
|
return current->pid;
|
return current->pid;
|
}
|
}
|
|
|
asmlinkage int sys_getppid(void)
|
asmlinkage int sys_getppid(void)
|
{
|
{
|
return current->p_opptr->pid;
|
return current->p_opptr->pid;
|
}
|
}
|
|
|
asmlinkage int sys_getuid(void)
|
asmlinkage int sys_getuid(void)
|
{
|
{
|
return current->uid;
|
return current->uid;
|
}
|
}
|
|
|
asmlinkage int sys_geteuid(void)
|
asmlinkage int sys_geteuid(void)
|
{
|
{
|
return current->euid;
|
return current->euid;
|
}
|
}
|
|
|
asmlinkage int sys_getgid(void)
|
asmlinkage int sys_getgid(void)
|
{
|
{
|
return current->gid;
|
return current->gid;
|
}
|
}
|
|
|
asmlinkage int sys_getegid(void)
|
asmlinkage int sys_getegid(void)
|
{
|
{
|
return current->egid;
|
return current->egid;
|
}
|
}
|
|
|
/*
|
/*
|
* This has been replaced by sys_setpriority. Maybe it should be
|
* This has been replaced by sys_setpriority. Maybe it should be
|
* moved into the arch dependent tree for those ports that require
|
* moved into the arch dependent tree for those ports that require
|
* it for backward compatibility?
|
* it for backward compatibility?
|
*/
|
*/
|
asmlinkage int sys_nice(int increment)
|
asmlinkage int sys_nice(int increment)
|
{
|
{
|
unsigned long newprio;
|
unsigned long newprio;
|
int increase = 0;
|
int increase = 0;
|
|
|
newprio = increment;
|
newprio = increment;
|
if (increment < 0) {
|
if (increment < 0) {
|
if (!suser())
|
if (!suser())
|
return -EPERM;
|
return -EPERM;
|
newprio = -increment;
|
newprio = -increment;
|
increase = 1;
|
increase = 1;
|
}
|
}
|
if (newprio > 40)
|
if (newprio > 40)
|
newprio = 40;
|
newprio = 40;
|
/*
|
/*
|
* do a "normalization" of the priority (traditionally
|
* do a "normalization" of the priority (traditionally
|
* unix nice values are -20..20, linux doesn't really
|
* unix nice values are -20..20, linux doesn't really
|
* use that kind of thing, but uses the length of the
|
* use that kind of thing, but uses the length of the
|
* timeslice instead (default 150 msec). The rounding is
|
* timeslice instead (default 150 msec). The rounding is
|
* why we want to avoid negative values.
|
* why we want to avoid negative values.
|
*/
|
*/
|
newprio = (newprio * DEF_PRIORITY + 10) / 20;
|
newprio = (newprio * DEF_PRIORITY + 10) / 20;
|
increment = newprio;
|
increment = newprio;
|
if (increase)
|
if (increase)
|
increment = -increment;
|
increment = -increment;
|
newprio = current->priority - increment;
|
newprio = current->priority - increment;
|
if ((signed) newprio < 1)
|
if ((signed) newprio < 1)
|
newprio = 1;
|
newprio = 1;
|
if (newprio > DEF_PRIORITY*2)
|
if (newprio > DEF_PRIORITY*2)
|
newprio = DEF_PRIORITY*2;
|
newprio = DEF_PRIORITY*2;
|
current->priority = newprio;
|
current->priority = newprio;
|
return 0;
|
return 0;
|
}
|
}
|
|
|
#endif
|
#endif
|
|
|
static struct task_struct *find_process_by_pid(pid_t pid) {
|
static struct task_struct *find_process_by_pid(pid_t pid) {
|
struct task_struct *p, *q;
|
struct task_struct *p, *q;
|
|
|
if (pid == 0)
|
if (pid == 0)
|
p = current;
|
p = current;
|
else {
|
else {
|
p = 0;
|
p = 0;
|
for_each_task(q) {
|
for_each_task(q) {
|
if (q && q->pid == pid) {
|
if (q && q->pid == pid) {
|
p = q;
|
p = q;
|
break;
|
break;
|
}
|
}
|
}
|
}
|
}
|
}
|
return p;
|
return p;
|
}
|
}
|
|
|
static int setscheduler(pid_t pid, int policy,
|
static int setscheduler(pid_t pid, int policy,
|
struct sched_param *param)
|
struct sched_param *param)
|
{
|
{
|
int error;
|
int error;
|
struct sched_param lp;
|
struct sched_param lp;
|
struct task_struct *p;
|
struct task_struct *p;
|
|
|
if (!param || pid < 0)
|
if (!param || pid < 0)
|
return -EINVAL;
|
return -EINVAL;
|
|
|
error = verify_area(VERIFY_READ, param, sizeof(struct sched_param));
|
error = verify_area(VERIFY_READ, param, sizeof(struct sched_param));
|
if (error)
|
if (error)
|
return error;
|
return error;
|
memcpy_fromfs(&lp, param, sizeof(struct sched_param));
|
memcpy_fromfs(&lp, param, sizeof(struct sched_param));
|
|
|
p = find_process_by_pid(pid);
|
p = find_process_by_pid(pid);
|
if (!p)
|
if (!p)
|
return -ESRCH;
|
return -ESRCH;
|
|
|
if (policy < 0)
|
if (policy < 0)
|
policy = p->policy;
|
policy = p->policy;
|
else if (policy != SCHED_FIFO && policy != SCHED_RR &&
|
else if (policy != SCHED_FIFO && policy != SCHED_RR &&
|
policy != SCHED_OTHER)
|
policy != SCHED_OTHER)
|
return -EINVAL;
|
return -EINVAL;
|
|
|
/*
|
/*
|
* Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
|
* Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
|
* priority for SCHED_OTHER is 0.
|
* priority for SCHED_OTHER is 0.
|
*/
|
*/
|
if (lp.sched_priority < 0 || lp.sched_priority > 99)
|
if (lp.sched_priority < 0 || lp.sched_priority > 99)
|
return -EINVAL;
|
return -EINVAL;
|
if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
|
if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
|
return -EINVAL;
|
return -EINVAL;
|
|
|
if ((policy == SCHED_FIFO || policy == SCHED_RR) && !suser())
|
if ((policy == SCHED_FIFO || policy == SCHED_RR) && !suser())
|
return -EPERM;
|
return -EPERM;
|
if ((current->euid != p->euid) && (current->euid != p->uid) &&
|
if ((current->euid != p->euid) && (current->euid != p->uid) &&
|
!suser())
|
!suser())
|
return -EPERM;
|
return -EPERM;
|
|
|
p->policy = policy;
|
p->policy = policy;
|
p->rt_priority = lp.sched_priority;
|
p->rt_priority = lp.sched_priority;
|
cli();
|
cli();
|
if (p->next_run)
|
if (p->next_run)
|
move_last_runqueue(p);
|
move_last_runqueue(p);
|
sti();
|
sti();
|
need_resched = 1;
|
need_resched = 1;
|
return 0;
|
return 0;
|
}
|
}
|
|
|
asmlinkage int sys_sched_setscheduler(pid_t pid, int policy,
|
asmlinkage int sys_sched_setscheduler(pid_t pid, int policy,
|
struct sched_param *param)
|
struct sched_param *param)
|
{
|
{
|
return setscheduler(pid, policy, param);
|
return setscheduler(pid, policy, param);
|
}
|
}
|
|
|
asmlinkage int sys_sched_setparam(pid_t pid, struct sched_param *param)
|
asmlinkage int sys_sched_setparam(pid_t pid, struct sched_param *param)
|
{
|
{
|
return setscheduler(pid, -1, param);
|
return setscheduler(pid, -1, param);
|
}
|
}
|
|
|
asmlinkage int sys_sched_getscheduler(pid_t pid)
|
asmlinkage int sys_sched_getscheduler(pid_t pid)
|
{
|
{
|
struct task_struct *p;
|
struct task_struct *p;
|
|
|
if (pid < 0)
|
if (pid < 0)
|
return -EINVAL;
|
return -EINVAL;
|
|
|
p = find_process_by_pid(pid);
|
p = find_process_by_pid(pid);
|
if (!p)
|
if (!p)
|
return -ESRCH;
|
return -ESRCH;
|
|
|
return p->policy;
|
return p->policy;
|
}
|
}
|
|
|
asmlinkage int sys_sched_getparam(pid_t pid, struct sched_param *param)
|
asmlinkage int sys_sched_getparam(pid_t pid, struct sched_param *param)
|
{
|
{
|
int error;
|
int error;
|
struct task_struct *p;
|
struct task_struct *p;
|
struct sched_param lp;
|
struct sched_param lp;
|
|
|
if (!param || pid < 0)
|
if (!param || pid < 0)
|
return -EINVAL;
|
return -EINVAL;
|
|
|
error = verify_area(VERIFY_WRITE, param, sizeof(struct sched_param));
|
error = verify_area(VERIFY_WRITE, param, sizeof(struct sched_param));
|
if (error)
|
if (error)
|
return error;
|
return error;
|
|
|
p = find_process_by_pid(pid);
|
p = find_process_by_pid(pid);
|
if (!p)
|
if (!p)
|
return -ESRCH;
|
return -ESRCH;
|
|
|
lp.sched_priority = p->rt_priority;
|
lp.sched_priority = p->rt_priority;
|
memcpy_tofs(param, &lp, sizeof(struct sched_param));
|
memcpy_tofs(param, &lp, sizeof(struct sched_param));
|
|
|
return 0;
|
return 0;
|
}
|
}
|
|
|
asmlinkage int sys_sched_yield(void)
|
asmlinkage int sys_sched_yield(void)
|
{
|
{
|
cli();
|
cli();
|
move_last_runqueue(current);
|
move_last_runqueue(current);
|
current->counter = 0;
|
current->counter = 0;
|
need_resched = 1;
|
need_resched = 1;
|
sti();
|
sti();
|
return 0;
|
return 0;
|
}
|
}
|
|
|
asmlinkage int sys_sched_get_priority_max(int policy)
|
asmlinkage int sys_sched_get_priority_max(int policy)
|
{
|
{
|
switch (policy) {
|
switch (policy) {
|
case SCHED_FIFO:
|
case SCHED_FIFO:
|
case SCHED_RR:
|
case SCHED_RR:
|
return 99;
|
return 99;
|
case SCHED_OTHER:
|
case SCHED_OTHER:
|
return 0;
|
return 0;
|
}
|
}
|
|
|
return -EINVAL;
|
return -EINVAL;
|
}
|
}
|
|
|
asmlinkage int sys_sched_get_priority_min(int policy)
|
asmlinkage int sys_sched_get_priority_min(int policy)
|
{
|
{
|
switch (policy) {
|
switch (policy) {
|
case SCHED_FIFO:
|
case SCHED_FIFO:
|
case SCHED_RR:
|
case SCHED_RR:
|
return 1;
|
return 1;
|
case SCHED_OTHER:
|
case SCHED_OTHER:
|
return 0;
|
return 0;
|
}
|
}
|
|
|
return -EINVAL;
|
return -EINVAL;
|
}
|
}
|
|
|
asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
|
asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
|
{
|
{
|
int error;
|
int error;
|
struct timespec t;
|
struct timespec t;
|
|
|
error = verify_area(VERIFY_WRITE, interval, sizeof(struct timespec));
|
error = verify_area(VERIFY_WRITE, interval, sizeof(struct timespec));
|
if (error)
|
if (error)
|
return error;
|
return error;
|
|
|
/* Values taken from 2.1.38 */
|
/* Values taken from 2.1.38 */
|
t.tv_sec = 0;
|
t.tv_sec = 0;
|
t.tv_nsec = 150000; /* is this right for non-intel architecture too?*/
|
t.tv_nsec = 150000; /* is this right for non-intel architecture too?*/
|
memcpy_tofs(interval, &t, sizeof(struct timespec));
|
memcpy_tofs(interval, &t, sizeof(struct timespec));
|
|
|
return 0;
|
return 0;
|
}
|
}
|
|
|
/*
|
/*
|
* change timeval to jiffies, trying to avoid the
|
* change timeval to jiffies, trying to avoid the
|
* most obvious overflows..
|
* most obvious overflows..
|
*/
|
*/
|
static unsigned long timespectojiffies(struct timespec *value)
|
static unsigned long timespectojiffies(struct timespec *value)
|
{
|
{
|
unsigned long sec = (unsigned) value->tv_sec;
|
unsigned long sec = (unsigned) value->tv_sec;
|
long nsec = value->tv_nsec;
|
long nsec = value->tv_nsec;
|
|
|
if (sec > (LONG_MAX / HZ))
|
if (sec > (LONG_MAX / HZ))
|
return LONG_MAX;
|
return LONG_MAX;
|
nsec += 1000000000L / HZ - 1;
|
nsec += 1000000000L / HZ - 1;
|
nsec /= 1000000000L / HZ;
|
nsec /= 1000000000L / HZ;
|
return HZ * sec + nsec;
|
return HZ * sec + nsec;
|
}
|
}
|
|
|
static void jiffiestotimespec(unsigned long jiffies, struct timespec *value)
|
static void jiffiestotimespec(unsigned long jiffies, struct timespec *value)
|
{
|
{
|
value->tv_nsec = (jiffies % HZ) * (1000000000L / HZ);
|
value->tv_nsec = (jiffies % HZ) * (1000000000L / HZ);
|
value->tv_sec = jiffies / HZ;
|
value->tv_sec = jiffies / HZ;
|
return;
|
return;
|
}
|
}
|
|
|
asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
|
asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
|
{
|
{
|
int error;
|
int error;
|
struct timespec t;
|
struct timespec t;
|
unsigned long expire;
|
unsigned long expire;
|
|
|
error = verify_area(VERIFY_READ, rqtp, sizeof(struct timespec));
|
error = verify_area(VERIFY_READ, rqtp, sizeof(struct timespec));
|
if (error)
|
if (error)
|
return error;
|
return error;
|
memcpy_fromfs(&t, rqtp, sizeof(struct timespec));
|
memcpy_fromfs(&t, rqtp, sizeof(struct timespec));
|
if (rmtp) {
|
if (rmtp) {
|
error = verify_area(VERIFY_WRITE, rmtp,
|
error = verify_area(VERIFY_WRITE, rmtp,
|
sizeof(struct timespec));
|
sizeof(struct timespec));
|
if (error)
|
if (error)
|
return error;
|
return error;
|
}
|
}
|
|
|
if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
|
if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
|
return -EINVAL;
|
return -EINVAL;
|
|
|
if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
|
if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
|
current->policy != SCHED_OTHER) {
|
current->policy != SCHED_OTHER) {
|
/*
|
/*
|
* Short delay requests up to 2 ms will be handled with
|
* Short delay requests up to 2 ms will be handled with
|
* high precision by a busy wait for all real-time processes.
|
* high precision by a busy wait for all real-time processes.
|
*/
|
*/
|
udelay((t.tv_nsec + 999) / 1000);
|
udelay((t.tv_nsec + 999) / 1000);
|
return 0;
|
return 0;
|
}
|
}
|
|
|
expire = timespectojiffies(&t) + (t.tv_sec || t.tv_nsec) + jiffies;
|
expire = timespectojiffies(&t) + (t.tv_sec || t.tv_nsec) + jiffies;
|
current->timeout = expire;
|
current->timeout = expire;
|
current->state = TASK_INTERRUPTIBLE;
|
current->state = TASK_INTERRUPTIBLE;
|
schedule();
|
schedule();
|
|
|
if (expire > jiffies) {
|
if (expire > jiffies) {
|
if (rmtp) {
|
if (rmtp) {
|
jiffiestotimespec(expire - jiffies -
|
jiffiestotimespec(expire - jiffies -
|
(expire > jiffies + 1), &t);
|
(expire > jiffies + 1), &t);
|
memcpy_tofs(rmtp, &t, sizeof(struct timespec));
|
memcpy_tofs(rmtp, &t, sizeof(struct timespec));
|
}
|
}
|
return -EINTR;
|
return -EINTR;
|
}
|
}
|
|
|
return 0;
|
return 0;
|
}
|
}
|
|
|
/* Used in fs/proc/array.c */
|
/* Used in fs/proc/array.c */
|
unsigned long get_wchan(struct task_struct *p)
|
unsigned long get_wchan(struct task_struct *p)
|
{
|
{
|
if (!p || p == current || p->state == TASK_RUNNING)
|
if (!p || p == current || p->state == TASK_RUNNING)
|
return 0;
|
return 0;
|
#if defined(__i386__)
|
#if defined(__i386__)
|
{
|
{
|
unsigned long ebp, eip;
|
unsigned long ebp, eip;
|
unsigned long stack_page;
|
unsigned long stack_page;
|
int count = 0;
|
int count = 0;
|
|
|
stack_page = p->kernel_stack_page;
|
stack_page = p->kernel_stack_page;
|
if (!stack_page)
|
if (!stack_page)
|
return 0;
|
return 0;
|
ebp = p->tss.ebp;
|
ebp = p->tss.ebp;
|
do {
|
do {
|
if (ebp < stack_page || ebp >= 4092+stack_page)
|
if (ebp < stack_page || ebp >= 4092+stack_page)
|
return 0;
|
return 0;
|
eip = *(unsigned long *) (ebp+4);
|
eip = *(unsigned long *) (ebp+4);
|
if (eip < (unsigned long) interruptible_sleep_on
|
if (eip < (unsigned long) interruptible_sleep_on
|
|| eip >= (unsigned long) add_timer)
|
|| eip >= (unsigned long) add_timer)
|
return eip;
|
return eip;
|
ebp = *(unsigned long *) ebp;
|
ebp = *(unsigned long *) ebp;
|
} while (count++ < 16);
|
} while (count++ < 16);
|
}
|
}
|
#elif defined(__alpha__)
|
#elif defined(__alpha__)
|
/*
|
/*
|
* This one depends on the frame size of schedule(). Do a
|
* This one depends on the frame size of schedule(). Do a
|
* "disass schedule" in gdb to find the frame size. Also, the
|
* "disass schedule" in gdb to find the frame size. Also, the
|
* code assumes that sleep_on() follows immediately after
|
* code assumes that sleep_on() follows immediately after
|
* interruptible_sleep_on() and that add_timer() follows
|
* interruptible_sleep_on() and that add_timer() follows
|
* immediately after interruptible_sleep(). Ugly, isn't it?
|
* immediately after interruptible_sleep(). Ugly, isn't it?
|
* Maybe adding a wchan field to task_struct would be better,
|
* Maybe adding a wchan field to task_struct would be better,
|
* after all...
|
* after all...
|
*/
|
*/
|
{
|
{
|
unsigned long schedule_frame;
|
unsigned long schedule_frame;
|
unsigned long pc;
|
unsigned long pc;
|
|
|
pc = thread_saved_pc(&p->tss);
|
pc = thread_saved_pc(&p->tss);
|
if (pc >= (unsigned long) interruptible_sleep_on && pc < (unsigned long) add_timer) {
|
if (pc >= (unsigned long) interruptible_sleep_on && pc < (unsigned long) add_timer) {
|
schedule_frame = ((unsigned long *)p->tss.ksp)[6];
|
schedule_frame = ((unsigned long *)p->tss.ksp)[6];
|
return ((unsigned long *)schedule_frame)[12];
|
return ((unsigned long *)schedule_frame)[12];
|
}
|
}
|
return pc;
|
return pc;
|
}
|
}
|
#endif
|
#endif
|
return 0;
|
return 0;
|
}
|
}
|
|
|
static void show_task(int nr,struct task_struct * p)
|
static void show_task(int nr,struct task_struct * p)
|
{
|
{
|
unsigned long free;
|
unsigned long free;
|
static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
|
static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
|
|
|
printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
|
printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
|
if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *))
|
if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *))
|
printk(stat_nam[p->state]);
|
printk(stat_nam[p->state]);
|
else
|
else
|
printk(" ");
|
printk(" ");
|
#if ((~0UL) == 0xffffffff)
|
#if ((~0UL) == 0xffffffff)
|
if (p == current)
|
if (p == current)
|
printk(" current ");
|
printk(" current ");
|
else
|
else
|
printk(" %08lX ", thread_saved_pc(&p->tss));
|
printk(" %08lX ", thread_saved_pc(&p->tss));
|
printk("%08lX ", get_wchan(p));
|
printk("%08lX ", get_wchan(p));
|
#else
|
#else
|
if (p == current)
|
if (p == current)
|
printk(" current task ");
|
printk(" current task ");
|
else
|
else
|
printk(" %016lx ", thread_saved_pc(&p->tss));
|
printk(" %016lx ", thread_saved_pc(&p->tss));
|
printk("%08lX ", get_wchan(p) & 0xffffffffL);
|
printk("%08lX ", get_wchan(p) & 0xffffffffL);
|
#endif
|
#endif
|
if (((unsigned long *)p->kernel_stack_page)[0] != STACK_MAGIC)
|
if (((unsigned long *)p->kernel_stack_page)[0] != STACK_MAGIC)
|
printk(" bad-");
|
printk(" bad-");
|
|
|
for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) {
|
for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) {
|
if (((unsigned long *)p->kernel_stack_page)[free] != STACK_UNTOUCHED_MAGIC)
|
if (((unsigned long *)p->kernel_stack_page)[free] != STACK_UNTOUCHED_MAGIC)
|
break;
|
break;
|
}
|
}
|
printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid);
|
printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid);
|
if (p->p_cptr)
|
if (p->p_cptr)
|
printk("%5d ", p->p_cptr->pid);
|
printk("%5d ", p->p_cptr->pid);
|
else
|
else
|
printk(" ");
|
printk(" ");
|
if (p->p_ysptr)
|
if (p->p_ysptr)
|
printk("%7d", p->p_ysptr->pid);
|
printk("%7d", p->p_ysptr->pid);
|
else
|
else
|
printk(" ");
|
printk(" ");
|
if (p->p_osptr)
|
if (p->p_osptr)
|
printk(" %5d\n", p->p_osptr->pid);
|
printk(" %5d\n", p->p_osptr->pid);
|
else
|
else
|
printk("\n");
|
printk("\n");
|
}
|
}
|
|
|
void show_state(void)
|
void show_state(void)
|
{
|
{
|
int i;
|
int i;
|
|
|
#if ((~0UL) == 0xffffffff)
|
#if ((~0UL) == 0xffffffff)
|
printk("\n"
|
printk("\n"
|
" free sibling\n");
|
" free sibling\n");
|
printk(" task PC wchan stack pid father child younger older\n");
|
printk(" task PC wchan stack pid father child younger older\n");
|
#else
|
#else
|
printk("\n"
|
printk("\n"
|
" free sibling\n");
|
" free sibling\n");
|
printk(" task PC wchan stack pid father child younger older\n");
|
printk(" task PC wchan stack pid father child younger older\n");
|
#endif
|
#endif
|
for (i=0 ; i<NR_TASKS ; i++)
|
for (i=0 ; i<NR_TASKS ; i++)
|
if (task[i])
|
if (task[i])
|
show_task(i,task[i]);
|
show_task(i,task[i]);
|
}
|
}
|
|
|
void sched_init(void)
|
void sched_init(void)
|
{
|
{
|
/*
|
/*
|
* We have to do a little magic to get the first
|
* We have to do a little magic to get the first
|
* process right in SMP mode.
|
* process right in SMP mode.
|
*/
|
*/
|
int cpu=smp_processor_id();
|
int cpu=smp_processor_id();
|
int i;
|
int i;
|
#ifndef __SMP__
|
#ifndef __SMP__
|
current_set[cpu]=&init_task;
|
current_set[cpu]=&init_task;
|
#else
|
#else
|
init_task.processor=cpu;
|
init_task.processor=cpu;
|
for(cpu = 0; cpu < NR_CPUS; cpu++)
|
for(cpu = 0; cpu < NR_CPUS; cpu++)
|
current_set[cpu] = &init_task;
|
current_set[cpu] = &init_task;
|
#endif
|
#endif
|
|
|
init_kernel_stack[0] = STACK_MAGIC;
|
init_kernel_stack[0] = STACK_MAGIC;
|
for(i=1;i<1024;i++)
|
for(i=1;i<1024;i++)
|
init_kernel_stack[i] = STACK_UNTOUCHED_MAGIC;
|
init_kernel_stack[i] = STACK_UNTOUCHED_MAGIC;
|
|
|
init_bh(TIMER_BH, timer_bh);
|
init_bh(TIMER_BH, timer_bh);
|
init_bh(TQUEUE_BH, tqueue_bh);
|
init_bh(TQUEUE_BH, tqueue_bh);
|
init_bh(IMMEDIATE_BH, immediate_bh);
|
init_bh(IMMEDIATE_BH, immediate_bh);
|
}
|
}
|
|
|