OpenCores

Rev 1765	Rev 1782
`/*`	`/*`
`* linux/kernel/sched.c`	`* linux/kernel/sched.c`
`*`	`*`
`* Copyright (C) 1991, 1992 Linus Torvalds`	`* Copyright (C) 1991, 1992 Linus Torvalds`
`*`	`*`
`* 1996-04-21 Modified by Ulrich Windl to make NTP work`	`* 1996-04-21 Modified by Ulrich Windl to make NTP work`
`* 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and`	`* 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and`
`* make semaphores SMP safe`	`* make semaphores SMP safe`
`* 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better.`	`* 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better.`
`* 1997-09-10 Updated NTP code according to technical memorandum Jan '96`	`* 1997-09-10 Updated NTP code according to technical memorandum Jan '96`
`* "A Kernel Model for Precision Timekeeping" by Dave Mills`	`* "A Kernel Model for Precision Timekeeping" by Dave Mills`
`*/`	`*/`

`/*`	`/*`
`* 'sched.c' is the main kernel file. It contains scheduling primitives`	`* 'sched.c' is the main kernel file. It contains scheduling primitives`
`* (sleep_on, wakeup, schedule etc) as well as a number of simple system`	`* (sleep_on, wakeup, schedule etc) as well as a number of simple system`
`* call functions (type getpid()), which just extract a field from`	`* call functions (type getpid()), which just extract a field from`
`* current-task`	`* current-task`
`*/`	`*/`

`/*`	`/*`
`* uClinux revisions for NO_MM`	`* uClinux revisions for NO_MM`
`* Copyright (C) 1998 Kenneth Albanowski <kjahds@kjahds.com>,`	`* Copyright (C) 1998 Kenneth Albanowski <kjahds@kjahds.com>,`
`* The Silver Hammer Group, Ltd.`	`* The Silver Hammer Group, Ltd.`
`*/`	`*/`

`#include <linux/signal.h>`	`#include <linux/signal.h>`
`#include <linux/sched.h>`	`#include <linux/sched.h>`
`#include <linux/timer.h>`	`#include <linux/timer.h>`
`#include <linux/kernel.h>`	`#include <linux/kernel.h>`
`#include <linux/kernel_stat.h>`	`#include <linux/kernel_stat.h>`
`#include <linux/fdreg.h>`	`#include <linux/fdreg.h>`
`#include <linux/errno.h>`	`#include <linux/errno.h>`
`#include <linux/time.h>`	`#include <linux/time.h>`
`#include <linux/ptrace.h>`	`#include <linux/ptrace.h>`
`#include <linux/delay.h>`	`#include <linux/delay.h>`
`#include <linux/interrupt.h>`	`#include <linux/interrupt.h>`
`#include <linux/tqueue.h>`	`#include <linux/tqueue.h>`
`#include <linux/resource.h>`	`#include <linux/resource.h>`
`#include <linux/mm.h>`	`#include <linux/mm.h>`
`#include <linux/smp.h>`	`#include <linux/smp.h>`

`#include <asm/system.h>`	`#include <asm/system.h>`
`#include <asm/io.h>`	`#include <asm/io.h>`
`#include <asm/segment.h>`	`#include <asm/segment.h>`
`#include <asm/pgtable.h>`	`#include <asm/pgtable.h>`
`#include <asm/mmu_context.h>`	`#include <asm/mmu_context.h>`

`#include <linux/timex.h>`	`#include <linux/timex.h>`

`/* SIMON - I don't know why the fuck this prototype can't be in header but it won't work */`	`/* SIMON - I don't know why the fuck this prototype can't be in header but it won't work */`
`extern void switch_to(struct task_struct prev, struct task_struct next);`	`extern void switch_to(struct task_struct prev, struct task_struct next);`


`/*`	`/*`
`* kernel variables`	`* kernel variables`
`*/`	`*/`

`int securelevel = 0; /* system security level */`	`int securelevel = 0; /* system security level */`

`long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */`	`long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */`
`volatile struct timeval xtime; /* The current time */`	`volatile struct timeval xtime; /* The current time */`
`int tickadj = 500/HZ ? 500/HZ : 1; /* microsecs */`	`int tickadj = 500/HZ ? 500/HZ : 1; /* microsecs */`

`DECLARE_TASK_QUEUE(tq_timer);`	`DECLARE_TASK_QUEUE(tq_timer);`
`DECLARE_TASK_QUEUE(tq_immediate);`	`DECLARE_TASK_QUEUE(tq_immediate);`
`DECLARE_TASK_QUEUE(tq_scheduler);`	`DECLARE_TASK_QUEUE(tq_scheduler);`

`/*`	`/*`
`* phase-lock loop variables`	`* phase-lock loop variables`
`*/`	`*/`
`/* TIME_ERROR prevents overwriting the CMOS clock */`	`/* TIME_ERROR prevents overwriting the CMOS clock */`
`int time_state = TIME_ERROR; /* clock synchronization status */`	`int time_state = TIME_ERROR; /* clock synchronization status */`
`int time_status = STA_UNSYNC; /* clock status bits */`	`int time_status = STA_UNSYNC; /* clock status bits */`
`long time_offset = 0; /* time adjustment (us) */`	`long time_offset = 0; /* time adjustment (us) */`
`long time_constant = 2; /* pll time constant */`	`long time_constant = 2; /* pll time constant */`
`long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */`	`long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */`
`long time_precision = 1; /* clock precision (us) */`	`long time_precision = 1; /* clock precision (us) */`
`long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */`	`long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */`
`long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */`	`long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */`
`long time_phase = 0; /* phase offset (scaled us) */`	`long time_phase = 0; /* phase offset (scaled us) */`
`long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC; /* frequency offset (scaled ppm) */`	`long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC; /* frequency offset (scaled ppm) */`
`long time_adj = 0; /* tick adjust (scaled 1 / HZ) */`	`long time_adj = 0; /* tick adjust (scaled 1 / HZ) */`
`long time_reftime = 0; /* time at last adjustment (s) */`	`long time_reftime = 0; /* time at last adjustment (s) */`

`long time_adjust = 0;`	`long time_adjust = 0;`
`long time_adjust_step = 0;`	`long time_adjust_step = 0;`

`int need_resched = 0;`	`int need_resched = 0;`
`unsigned long event = 0;`	`unsigned long event = 0;`

`extern int _setitimer(int, struct itimerval , struct itimerval );`	`extern int _setitimer(int, struct itimerval , struct itimerval );`
`unsigned int * prof_buffer = NULL;`	`unsigned int * prof_buffer = NULL;`
`unsigned long prof_len = 0;`	`unsigned long prof_len = 0;`
`unsigned long prof_shift = 0;`	`unsigned long prof_shift = 0;`

`#define _S(nr) (1<<((nr)-1))`	`#define _S(nr) (1<<((nr)-1))`

`extern void mem_use(void);`	`extern void mem_use(void);`

`unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };`	`unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };`
`#ifndef NO_MM`	`#ifndef NO_MM`
`unsigned long init_user_stack[1024] = { STACK_MAGIC, };`	`unsigned long init_user_stack[1024] = { STACK_MAGIC, };`
`static struct vm_area_struct init_mmap = INIT_MMAP;`	`static struct vm_area_struct init_mmap = INIT_MMAP;`
`#endif /* !NO_MM */`	`#endif /* !NO_MM */`
`static struct fs_struct init_fs = INIT_FS;`	`static struct fs_struct init_fs = INIT_FS;`
`static struct files_struct init_files = INIT_FILES;`	`static struct files_struct init_files = INIT_FILES;`
`static struct signal_struct init_signals = INIT_SIGNALS;`	`static struct signal_struct init_signals = INIT_SIGNALS;`

`struct mm_struct init_mm = INIT_MM;`	`struct mm_struct init_mm = INIT_MM;`
`struct task_struct init_task = INIT_TASK;`	`struct task_struct init_task = INIT_TASK;`

`unsigned long volatile jiffies=0;`	`unsigned long volatile jiffies=0;`

`struct task_struct *current_set[NR_CPUS];`	`struct task_struct *current_set[NR_CPUS];`
`struct task_struct *last_task_used_math = NULL;`	`struct task_struct *last_task_used_math = NULL;`

`struct task_struct * task[NR_TASKS] = {&init_task, };`	`struct task_struct * task[NR_TASKS] = {&init_task, };`

`struct kernel_stat kstat = { 0 };`	`struct kernel_stat kstat = { 0 };`

`static inline void add_to_runqueue(struct task_struct * p)`	`static inline void add_to_runqueue(struct task_struct * p)`
`{`	`{`
`#ifdef __SMP__`	`#ifdef __SMP__`
`int cpu=smp_processor_id();`	`int cpu=smp_processor_id();`
`#endif`	`#endif`
`#if 1 /* sanity tests */`	`#if 1 /* sanity tests */`
`if (p->next_run \|\| p->prev_run) {`	`if (p->next_run \|\| p->prev_run) {`
`printk("task already on run-queue\n");`	`printk("task already on run-queue\n");`
`return;`	`return;`
`}`	`}`
`#endif`	`#endif`
`if (p->policy != SCHED_OTHER \|\| p->counter > current->counter + 3)`	`if (p->policy != SCHED_OTHER \|\| p->counter > current->counter + 3)`
`need_resched = 1;`	`need_resched = 1;`
`nr_running++;`	`nr_running++;`
`(p->prev_run = init_task.prev_run)->next_run = p;`	`(p->prev_run = init_task.prev_run)->next_run = p;`
`p->next_run = &init_task;`	`p->next_run = &init_task;`
`init_task.prev_run = p;`	`init_task.prev_run = p;`
`#ifdef __SMP__`	`#ifdef __SMP__`
`/* this is safe only if called with cli()*/`	`/* this is safe only if called with cli()*/`
`while(set_bit(31,&smp_process_available))`	`while(set_bit(31,&smp_process_available))`
`{`	`{`
`while(test_bit(31,&smp_process_available))`	`while(test_bit(31,&smp_process_available))`
`{`	`{`
`if(clear_bit(cpu,&smp_invalidate_needed))`	`if(clear_bit(cpu,&smp_invalidate_needed))`
`{`	`{`
`local_flush_tlb();`	`local_flush_tlb();`
`set_bit(cpu,&cpu_callin_map[0]);`	`set_bit(cpu,&cpu_callin_map[0]);`
`}`	`}`
`}`	`}`
`}`	`}`
`smp_process_available++;`	`smp_process_available++;`
`clear_bit(31,&smp_process_available);`	`clear_bit(31,&smp_process_available);`
`if ((0!=p->pid) && smp_threads_ready)`	`if ((0!=p->pid) && smp_threads_ready)`
`{`	`{`
`int i;`	`int i;`
`for (i=0;i<smp_num_cpus;i++)`	`for (i=0;i<smp_num_cpus;i++)`
`{`	`{`
`if (0==current_set[cpu_logical_map[i]]->pid)`	`if (0==current_set[cpu_logical_map[i]]->pid)`
`{`	`{`
`smp_message_pass(cpu_logical_map[i], MSG_RESCHEDULE, 0L, 0);`	`smp_message_pass(cpu_logical_map[i], MSG_RESCHEDULE, 0L, 0);`
`break;`	`break;`
`}`	`}`
`}`	`}`
`}`	`}`
`#endif`	`#endif`
`}`	`}`

`static inline void del_from_runqueue(struct task_struct * p)`	`static inline void del_from_runqueue(struct task_struct * p)`
`{`	`{`
`struct task_struct *next = p->next_run;`	`struct task_struct *next = p->next_run;`
`struct task_struct *prev = p->prev_run;`	`struct task_struct *prev = p->prev_run;`

`#if 1 /* sanity tests */`	`#if 1 /* sanity tests */`
`if (!next \|\| !prev) {`	`if (!next \|\| !prev) {`
`printk("task not on run-queue\n");`	`printk("task not on run-queue\n");`
`return;`	`return;`
`}`	`}`
`#endif`	`#endif`
`if (p == &init_task) {`	`if (p == &init_task) {`
`static int nr = 0;`	`static int nr = 0;`
`if (nr < 5) {`	`if (nr < 5) {`
`nr++;`	`nr++;`
`printk("idle task may not sleep\n");`	`printk("idle task may not sleep\n");`
`}`	`}`
`return;`	`return;`
`}`	`}`
`nr_running--;`	`nr_running--;`
`next->prev_run = prev;`	`next->prev_run = prev;`
`prev->next_run = next;`	`prev->next_run = next;`
`p->next_run = NULL;`	`p->next_run = NULL;`
`p->prev_run = NULL;`	`p->prev_run = NULL;`
`}`	`}`

`static inline void move_last_runqueue(struct task_struct * p)`	`static inline void move_last_runqueue(struct task_struct * p)`
`{`	`{`
`struct task_struct *next = p->next_run;`	`struct task_struct *next = p->next_run;`
`struct task_struct *prev = p->prev_run;`	`struct task_struct *prev = p->prev_run;`

`/* remove from list */`	`/* remove from list */`
`next->prev_run = prev;`	`next->prev_run = prev;`
`prev->next_run = next;`	`prev->next_run = next;`
`/* add back to list */`	`/* add back to list */`
`p->next_run = &init_task;`	`p->next_run = &init_task;`
`prev = init_task.prev_run;`	`prev = init_task.prev_run;`
`init_task.prev_run = p;`	`init_task.prev_run = p;`
`p->prev_run = prev;`	`p->prev_run = prev;`
`prev->next_run = p;`	`prev->next_run = p;`
`}`	`}`

`/*`	`/*`
`* Wake up a process. Put it on the run-queue if it's not`	`* Wake up a process. Put it on the run-queue if it's not`
`* already there. The "current" process is always on the`	`* already there. The "current" process is always on the`
`* run-queue (except when the actual re-schedule is in`	`* run-queue (except when the actual re-schedule is in`
`* progress), and as such you're allowed to do the simpler`	`* progress), and as such you're allowed to do the simpler`
`* "current->state = TASK_RUNNING" to mark yourself runnable`	`* "current->state = TASK_RUNNING" to mark yourself runnable`
`* without the overhead of this.`	`* without the overhead of this.`
`*/`	`*/`
`inline void wake_up_process(struct task_struct * p)`	`inline void wake_up_process(struct task_struct * p)`
`{`	`{`
`unsigned long flags;`	`unsigned long flags;`

`save_flags(flags);`	`save_flags(flags);`
`cli();`	`cli();`
`p->state = TASK_RUNNING;`	`p->state = TASK_RUNNING;`
`if (!p->next_run)`	`if (!p->next_run)`
`add_to_runqueue(p);`	`add_to_runqueue(p);`
`restore_flags(flags);`	`restore_flags(flags);`
`}`	`}`

`static void process_timeout(unsigned long __data)`	`static void process_timeout(unsigned long __data)`
`{`	`{`
`struct task_struct * p = (struct task_struct *) __data;`	`struct task_struct * p = (struct task_struct *) __data;`

`p->timeout = 0;`	`p->timeout = 0;`
`wake_up_process(p);`	`wake_up_process(p);`
`}`	`}`

`/*`	`/*`
`* This is the function that decides how desirable a process is..`	`* This is the function that decides how desirable a process is..`
`* You can weigh different processes against each other depending`	`* You can weigh different processes against each other depending`
`* on what CPU they've run on lately etc to try to handle cache`	`* on what CPU they've run on lately etc to try to handle cache`
`* and TLB miss penalties.`	`* and TLB miss penalties.`
`*`	`*`
`* Return values:`	`* Return values:`
`* -1000: never select this`	`* -1000: never select this`
`* 0: out of time, recalculate counters (but it might still be`	`* 0: out of time, recalculate counters (but it might still be`
`* selected)`	`* selected)`
`* +ve: "goodness" value (the larger, the better)`	`* +ve: "goodness" value (the larger, the better)`
`* +1000: realtime process, select this.`	`* +1000: realtime process, select this.`
`*/`	`*/`
`static inline int goodness(struct task_struct * p, struct task_struct * prev, int this_cpu)`	`static inline int goodness(struct task_struct * p, struct task_struct * prev, int this_cpu)`
`{`	`{`
`int weight;`	`int weight;`

`#ifdef __SMP__`	`#ifdef __SMP__`
`/* We are not permitted to run a task someone else is running */`	`/* We are not permitted to run a task someone else is running */`
`if (p->processor != NO_PROC_ID)`	`if (p->processor != NO_PROC_ID)`
`return -1000;`	`return -1000;`
`#ifdef PAST_2_0`	`#ifdef PAST_2_0`
`/* This process is locked to a processor group */`	`/* This process is locked to a processor group */`
`if (p->processor_mask && !(p->processor_mask & (1<<this_cpu))`	`if (p->processor_mask && !(p->processor_mask & (1<<this_cpu))`
`return -1000;`	`return -1000;`
`#endif`	`#endif`
`#endif`	`#endif`

`/*`	`/*`
`* Realtime process, select the first one on the`	`* Realtime process, select the first one on the`
`* runqueue (taking priorities within processes`	`* runqueue (taking priorities within processes`
`* into account).`	`* into account).`
`*/`	`*/`
`if (p->policy != SCHED_OTHER)`	`if (p->policy != SCHED_OTHER)`
`return 1000 + p->rt_priority;`	`return 1000 + p->rt_priority;`

`/*`	`/*`
`* Give the process a first-approximation goodness value`	`* Give the process a first-approximation goodness value`
`* according to the number of clock-ticks it has left.`	`* according to the number of clock-ticks it has left.`
`*`	`*`
`* Don't do any other calculations if the time slice is`	`* Don't do any other calculations if the time slice is`
`* over..`	`* over..`
`*/`	`*/`
`weight = p->counter;`	`weight = p->counter;`
`if (weight) {`	`if (weight) {`

`#ifdef __SMP__`	`#ifdef __SMP__`
`/* Give a largish advantage to the same processor... */`	`/* Give a largish advantage to the same processor... */`
`/* (this is equivalent to penalizing other processors) */`	`/* (this is equivalent to penalizing other processors) */`
`if (p->last_processor == this_cpu)`	`if (p->last_processor == this_cpu)`
`weight += PROC_CHANGE_PENALTY;`	`weight += PROC_CHANGE_PENALTY;`
`#endif`	`#endif`

`/* .. and a slight advantage to the current process */`	`/* .. and a slight advantage to the current process */`
`if (p == prev)`	`if (p == prev)`
`weight += 1;`	`weight += 1;`
`}`	`}`

`return weight;`	`return weight;`
`}`	`}`


`/*`	`/*`
`The following allow_interrupts function is used to workaround a rare but`	`The following allow_interrupts function is used to workaround a rare but`
`nasty deadlock situation that is possible for 2.0.x Intel SMP because it uses`	`nasty deadlock situation that is possible for 2.0.x Intel SMP because it uses`
`a single kernel lock and interrupts are only routed to the boot CPU. There`	`a single kernel lock and interrupts are only routed to the boot CPU. There`
`are two deadlock scenarios this code protects against.`	`are two deadlock scenarios this code protects against.`

`The first scenario is that if a CPU other than the boot CPU holds the kernel`	`The first scenario is that if a CPU other than the boot CPU holds the kernel`
`lock and needs to wait for an operation to complete that itself requires an`	`lock and needs to wait for an operation to complete that itself requires an`
`interrupt, there is a deadlock since the boot CPU may be able to accept the`	`interrupt, there is a deadlock since the boot CPU may be able to accept the`
`interrupt but will not be able to acquire the kernel lock to process it.`	`interrupt but will not be able to acquire the kernel lock to process it.`

`The workaround for this deadlock requires adding calls to allow_interrupts to`	`The workaround for this deadlock requires adding calls to allow_interrupts to`
`places where this deadlock is possible. These places are known to be present`	`places where this deadlock is possible. These places are known to be present`
`in buffer.c and keyboard.c. It is also possible that there are other such`	`in buffer.c and keyboard.c. It is also possible that there are other such`
`places which have not been identified yet. In order to break the deadlock,`	`places which have not been identified yet. In order to break the deadlock,`
`the code in allow_interrupts temporarily yields the kernel lock directly to`	`the code in allow_interrupts temporarily yields the kernel lock directly to`
`the boot CPU to allow the interrupt to be processed. The boot CPU interrupt`	`the boot CPU to allow the interrupt to be processed. The boot CPU interrupt`
`entry code indicates that it is spinning waiting for the kernel lock by`	`entry code indicates that it is spinning waiting for the kernel lock by`
`setting the smp_blocked_interrupt_pending variable. This code notices that`	`setting the smp_blocked_interrupt_pending variable. This code notices that`
`and manipulates the active_kernel_processor variable to yield the kernel lock`	`and manipulates the active_kernel_processor variable to yield the kernel lock`
`without ever clearing it. When the interrupt has been processed, the`	`without ever clearing it. When the interrupt has been processed, the`
`saved_active_kernel_processor variable contains the value for the interrupt`	`saved_active_kernel_processor variable contains the value for the interrupt`
`exit code to restore, either the APICID of the CPU that granted it the kernel`	`exit code to restore, either the APICID of the CPU that granted it the kernel`
`lock, or NO_PROC_ID in the normal case where no yielding occurred. Restoring`	`lock, or NO_PROC_ID in the normal case where no yielding occurred. Restoring`
`active_kernel_processor from saved_active_kernel_processor returns the kernel`	`active_kernel_processor from saved_active_kernel_processor returns the kernel`
`lock back to the CPU that yielded it.`	`lock back to the CPU that yielded it.`

`The second form of deadlock is even more insidious. Suppose the boot CPU`	`The second form of deadlock is even more insidious. Suppose the boot CPU`
`takes a page fault and then the previous scenario ensues. In this case, the`	`takes a page fault and then the previous scenario ensues. In this case, the`
`boot CPU would spin with interrupts disabled waiting to acquire the kernel`	`boot CPU would spin with interrupts disabled waiting to acquire the kernel`
`lock. To resolve this deadlock, the kernel lock acquisition code must enable`	`lock. To resolve this deadlock, the kernel lock acquisition code must enable`
`interrupts briefly so that the pending interrupt can be handled as in the`	`interrupts briefly so that the pending interrupt can be handled as in the`
`case above.`	`case above.`

`An additional form of deadlock is where kernel code running on a non-boot CPU`	`An additional form of deadlock is where kernel code running on a non-boot CPU`
`waits for the jiffies variable to be incremented. This deadlock is avoided`	`waits for the jiffies variable to be incremented. This deadlock is avoided`
`by having the spin loops in ENTER_KERNEL increment jiffies approximately`	`by having the spin loops in ENTER_KERNEL increment jiffies approximately`
`every 10 milliseconds. Finally, if approximately 60 seconds elapse waiting`	`every 10 milliseconds. Finally, if approximately 60 seconds elapse waiting`
`for the kernel lock, a message will be printed if possible to indicate that a`	`for the kernel lock, a message will be printed if possible to indicate that a`
`deadlock has been detected.`	`deadlock has been detected.`

`Leonard N. Zubkoff`	`Leonard N. Zubkoff`
`4 August 1997`	`4 August 1997`
`*/`	`*/`

`#if defined(__SMP__) && defined(__i386__)`	`#if defined(__SMP__) && defined(__i386__)`

`volatile unsigned char smp_blocked_interrupt_pending = 0;`	`volatile unsigned char smp_blocked_interrupt_pending = 0;`

`volatile unsigned char saved_active_kernel_processor = NO_PROC_ID;`	`volatile unsigned char saved_active_kernel_processor = NO_PROC_ID;`

`void allow_interrupts(void)`	`void allow_interrupts(void)`
`{`	`{`
`if (smp_processor_id() == boot_cpu_id) return;`	`if (smp_processor_id() == boot_cpu_id) return;`
`if (smp_blocked_interrupt_pending)`	`if (smp_blocked_interrupt_pending)`
`{`	`{`
`unsigned long saved_kernel_counter;`	`unsigned long saved_kernel_counter;`
`long timeout_counter;`	`long timeout_counter;`
`saved_active_kernel_processor = active_kernel_processor;`	`saved_active_kernel_processor = active_kernel_processor;`
`saved_kernel_counter = kernel_counter;`	`saved_kernel_counter = kernel_counter;`
`kernel_counter = 0;`	`kernel_counter = 0;`
`active_kernel_processor = boot_cpu_id;`	`active_kernel_processor = boot_cpu_id;`
`timeout_counter = 6000000;`	`timeout_counter = 6000000;`
`while (active_kernel_processor != saved_active_kernel_processor &&`	`while (active_kernel_processor != saved_active_kernel_processor &&`
`--timeout_counter >= 0)`	`--timeout_counter >= 0)`
`{`	`{`
`udelay(10);`	`udelay(10);`
`barrier();`	`barrier();`
`}`	`}`
`if (timeout_counter < 0)`	`if (timeout_counter < 0)`
`panic("FORWARDED INTERRUPT TIMEOUT (AKP = %d, Saved AKP = %d)\n",`	`panic("FORWARDED INTERRUPT TIMEOUT (AKP = %d, Saved AKP = %d)\n",`
`active_kernel_processor, saved_active_kernel_processor);`	`active_kernel_processor, saved_active_kernel_processor);`
`kernel_counter = saved_kernel_counter;`	`kernel_counter = saved_kernel_counter;`
`saved_active_kernel_processor = NO_PROC_ID;`	`saved_active_kernel_processor = NO_PROC_ID;`
`}`	`}`
`}`	`}`

`#else`	`#else`

`void allow_interrupts(void) {}`	`void allow_interrupts(void) {}`

`#endif`	`#endif`


`/*`	`/*`
`* 'schedule()' is the scheduler function. It's a very simple and nice`	`* 'schedule()' is the scheduler function. It's a very simple and nice`
`* scheduler: it's not perfect, but certainly works for most things.`	`* scheduler: it's not perfect, but certainly works for most things.`
`*`	`*`
`* The goto is "interesting".`	`* The goto is "interesting".`
`*`	`*`
`* NOTE!! Task 0 is the 'idle' task, which gets called when no other`	`* NOTE!! Task 0 is the 'idle' task, which gets called when no other`
`* tasks can run. It can not be killed, and it cannot sleep. The 'state'`	`* tasks can run. It can not be killed, and it cannot sleep. The 'state'`
`* information in task[0] is never used.`	`* information in task[0] is never used.`
`*/`	`*/`
`asmlinkage void schedule(void)`	`asmlinkage void schedule(void)`
`{`	`{`
`int c;`	`int c;`
`struct task_struct * p;`	`struct task_struct * p;`
`struct task_struct * prev, * next;`	`struct task_struct * prev, * next;`
`unsigned long timeout = 0;`	`unsigned long timeout = 0;`
`int this_cpu=smp_processor_id();`	`int this_cpu=smp_processor_id();`
`/* check alarm, wake up any interruptible tasks that have got a signal */`	`/* check alarm, wake up any interruptible tasks that have got a signal */`

`allow_interrupts();`	`allow_interrupts();`

`if (intr_count)`	`if (intr_count)`
`goto scheduling_in_interrupt;`	`goto scheduling_in_interrupt;`

`if (bh_active & bh_mask) {`	`if (bh_active & bh_mask) {`
`intr_count = 1;`	`intr_count = 1;`
`do_bottom_half();`	`do_bottom_half();`
`intr_count = 0;`	`intr_count = 0;`
`}`	`}`

`run_task_queue(&tq_scheduler);`	`run_task_queue(&tq_scheduler);`

`need_resched = 0;`	`need_resched = 0;`
`prev = current;`	`prev = current;`
`cli();`	`cli();`
`/* move an exhausted RR process to be last.. */`	`/* move an exhausted RR process to be last.. */`
`if (!prev->counter && prev->policy == SCHED_RR) {`	`if (!prev->counter && prev->policy == SCHED_RR) {`
`prev->counter = prev->priority;`	`prev->counter = prev->priority;`
`move_last_runqueue(prev);`	`move_last_runqueue(prev);`
`}`	`}`
`switch (prev->state) {`	`switch (prev->state) {`
`case TASK_INTERRUPTIBLE:`	`case TASK_INTERRUPTIBLE:`
`if (prev->signal & ~prev->blocked)`	`if (prev->signal & ~prev->blocked)`
`goto makerunnable;`	`goto makerunnable;`
`timeout = prev->timeout;`	`timeout = prev->timeout;`
`if (timeout && (timeout <= jiffies)) {`	`if (timeout && (timeout <= jiffies)) {`
`prev->timeout = 0;`	`prev->timeout = 0;`
`timeout = 0;`	`timeout = 0;`
`makerunnable:`	`makerunnable:`
`prev->state = TASK_RUNNING;`	`prev->state = TASK_RUNNING;`
`break;`	`break;`
`}`	`}`
`default:`	`default:`
`del_from_runqueue(prev);`	`del_from_runqueue(prev);`
`case TASK_RUNNING:`	`case TASK_RUNNING:`
`}`	`}`
`p = init_task.next_run;`	`p = init_task.next_run;`
`sti();`	`sti();`

`#ifdef __SMP__`	`#ifdef __SMP__`
`/*`	`/*`
`* This is safe as we do not permit re-entry of schedule()`	`* This is safe as we do not permit re-entry of schedule()`
`*/`	`*/`
`prev->processor = NO_PROC_ID;`	`prev->processor = NO_PROC_ID;`
`#define idle_task (task[cpu_number_map[this_cpu]])`	`#define idle_task (task[cpu_number_map[this_cpu]])`
`#else`	`#else`
`#define idle_task (&init_task)`	`#define idle_task (&init_task)`
`#endif`	`#endif`

`/*`	`/*`
`* Note! there may appear new tasks on the run-queue during this, as`	`* Note! there may appear new tasks on the run-queue during this, as`
`* interrupts are enabled. However, they will be put on front of the`	`* interrupts are enabled. However, they will be put on front of the`
`* list, so our list starting at "p" is essentially fixed.`	`* list, so our list starting at "p" is essentially fixed.`
`*/`	`*/`
`/* this is the scheduler proper: */`	`/* this is the scheduler proper: */`
`c = -1000;`	`c = -1000;`
`next = idle_task;`	`next = idle_task;`
`while (p != &init_task) {`	`while (p != &init_task) {`
`int weight = goodness(p, prev, this_cpu);`	`int weight = goodness(p, prev, this_cpu);`
`if (weight > c)`	`if (weight > c)`
`c = weight, next = p;`	`c = weight, next = p;`
`p = p->next_run;`	`p = p->next_run;`
`}`	`}`

`/* if all runnable processes have "counter == 0", re-calculate counters */`	`/* if all runnable processes have "counter == 0", re-calculate counters */`
`if (!c) {`	`if (!c) {`
`for_each_task(p)`	`for_each_task(p)`
`p->counter = (p->counter >> 1) + p->priority;`	`p->counter = (p->counter >> 1) + p->priority;`
`}`	`}`
`#ifdef __SMP__`	`#ifdef __SMP__`
`/*`	`/*`
`* Allocate process to CPU`	`* Allocate process to CPU`
`*/`	`*/`

`next->processor = this_cpu;`	`next->processor = this_cpu;`
`next->last_processor = this_cpu;`	`next->last_processor = this_cpu;`
`#endif`	`#endif`
`#ifdef __SMP_PROF__`	`#ifdef __SMP_PROF__`
`/* mark processor running an idle thread */`	`/* mark processor running an idle thread */`
`if (0==next->pid)`	`if (0==next->pid)`
`set_bit(this_cpu,&smp_idle_map);`	`set_bit(this_cpu,&smp_idle_map);`
`else`	`else`
`clear_bit(this_cpu,&smp_idle_map);`	`clear_bit(this_cpu,&smp_idle_map);`
`#endif`	`#endif`
`if (prev != next) {`	`if (prev != next) {`
`struct timer_list timer;`	`struct timer_list timer;`

`kstat.context_swtch++;`	`kstat.context_swtch++;`
`if (timeout) {`	`if (timeout) {`
`init_timer(&timer);`	`init_timer(&timer);`
`timer.expires = timeout;`	`timer.expires = timeout;`
`timer.data = (unsigned long) prev;`	`timer.data = (unsigned long) prev;`
`timer.function = process_timeout;`	`timer.function = process_timeout;`
`add_timer(&timer);`	`add_timer(&timer);`
`}`	`}`
`get_mmu_context(next);`	`get_mmu_context(next);`

/*

/*

 *  linux/kernel/sched.c

 *  linux/kernel/sched.c

 *  Copyright (C) 1991, 1992  Linus Torvalds

 *  Copyright (C) 1991, 1992  Linus Torvalds

 *  1996-04-21  Modified by Ulrich Windl to make NTP work

 *  1996-04-21  Modified by Ulrich Windl to make NTP work

 *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and

 *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and

 *              make semaphores SMP safe

 *              make semaphores SMP safe

 *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.

 *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.

 *  1997-09-10  Updated NTP code according to technical memorandum Jan '96

 *  1997-09-10  Updated NTP code according to technical memorandum Jan '96

 *              "A Kernel Model for Precision Timekeeping" by Dave Mills

 *              "A Kernel Model for Precision Timekeeping" by Dave Mills

*/

*/

/*

/*

 * 'sched.c' is the main kernel file. It contains scheduling primitives

 * 'sched.c' is the main kernel file. It contains scheduling primitives

 * (sleep_on, wakeup, schedule etc) as well as a number of simple system

 * (sleep_on, wakeup, schedule etc) as well as a number of simple system

 * call functions (type getpid()), which just extract a field from

 * call functions (type getpid()), which just extract a field from

 * current-task

 * current-task

*/

*/

/*

/*

 * uClinux revisions for NO_MM

 * uClinux revisions for NO_MM

 * Copyright (C) 1998  Kenneth Albanowski <kjahds@kjahds.com>,

 * Copyright (C) 1998  Kenneth Albanowski <kjahds@kjahds.com>,

 *                     The Silver Hammer Group, Ltd.

 *                     The Silver Hammer Group, Ltd.

*/

*/

#include <linux/signal.h>

#include <linux/signal.h>

#include <linux/sched.h>

#include <linux/sched.h>

#include <linux/timer.h>

#include <linux/timer.h>

#include <linux/kernel.h>

#include <linux/kernel.h>

#include <linux/kernel_stat.h>

#include <linux/kernel_stat.h>

#include <linux/fdreg.h>

#include <linux/fdreg.h>

#include <linux/errno.h>

#include <linux/errno.h>

#include <linux/time.h>

#include <linux/time.h>

#include <linux/ptrace.h>

#include <linux/ptrace.h>

#include <linux/delay.h>

#include <linux/delay.h>

#include <linux/interrupt.h>

#include <linux/interrupt.h>

#include <linux/tqueue.h>

#include <linux/tqueue.h>

#include <linux/resource.h>

#include <linux/resource.h>

#include <linux/mm.h>

#include <linux/mm.h>

#include <linux/smp.h>

#include <linux/smp.h>

#include <asm/system.h>

#include <asm/system.h>

#include <asm/io.h>

#include <asm/io.h>

#include <asm/segment.h>

#include <asm/segment.h>

#include <asm/pgtable.h>

#include <asm/pgtable.h>

#include <asm/mmu_context.h>

#include <asm/mmu_context.h>

#include <linux/timex.h>

#include <linux/timex.h>

/* SIMON - I don't know why the fuck this prototype can't be in header but it won't work */

/* SIMON - I don't know why the fuck this prototype can't be in header but it won't work */

extern void switch_to(struct task_struct *prev, struct task_struct *next);

extern void switch_to(struct task_struct *prev, struct task_struct *next);

/*

/*

 * kernel variables

 * kernel variables

*/

*/

int securelevel = 0;                     /* system security level */

int securelevel = 0;                     /* system security level */

long tick = (1000000 + HZ/2) / HZ;      /* timer interrupt period */

long tick = (1000000 + HZ/2) / HZ;      /* timer interrupt period */

volatile struct timeval xtime;          /* The current time */

volatile struct timeval xtime;          /* The current time */

int tickadj = 500/HZ ? 500/HZ : 1;      /* microsecs */

int tickadj = 500/HZ ? 500/HZ : 1;      /* microsecs */

DECLARE_TASK_QUEUE(tq_timer);

DECLARE_TASK_QUEUE(tq_timer);

DECLARE_TASK_QUEUE(tq_immediate);

DECLARE_TASK_QUEUE(tq_immediate);

DECLARE_TASK_QUEUE(tq_scheduler);

DECLARE_TASK_QUEUE(tq_scheduler);

/*

/*

 * phase-lock loop variables

 * phase-lock loop variables

*/

*/

/* TIME_ERROR prevents overwriting the CMOS clock */

/* TIME_ERROR prevents overwriting the CMOS clock */

int time_state = TIME_ERROR;    /* clock synchronization status */

int time_state = TIME_ERROR;    /* clock synchronization status */

int time_status = STA_UNSYNC;   /* clock status bits */

int time_status = STA_UNSYNC;   /* clock status bits */

long time_offset = 0;            /* time adjustment (us) */

long time_offset = 0;            /* time adjustment (us) */

long time_constant = 2;         /* pll time constant */

long time_constant = 2;         /* pll time constant */

long time_tolerance = MAXFREQ;  /* frequency tolerance (ppm) */

long time_tolerance = MAXFREQ;  /* frequency tolerance (ppm) */

long time_precision = 1;        /* clock precision (us) */

long time_precision = 1;        /* clock precision (us) */

long time_maxerror = NTP_PHASE_LIMIT;   /* maximum error (us) */

long time_maxerror = NTP_PHASE_LIMIT;   /* maximum error (us) */

long time_esterror = NTP_PHASE_LIMIT;   /* estimated error (us) */

long time_esterror = NTP_PHASE_LIMIT;   /* estimated error (us) */

long time_phase = 0;             /* phase offset (scaled us) */

long time_phase = 0;             /* phase offset (scaled us) */

long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;  /* frequency offset (scaled ppm) */

long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;  /* frequency offset (scaled ppm) */

long time_adj = 0;               /* tick adjust (scaled 1 / HZ) */

long time_adj = 0;               /* tick adjust (scaled 1 / HZ) */

long time_reftime = 0;           /* time at last adjustment (s) */

long time_reftime = 0;           /* time at last adjustment (s) */

long time_adjust = 0;

long time_adjust = 0;

long time_adjust_step = 0;

long time_adjust_step = 0;

int need_resched = 0;

int need_resched = 0;

unsigned long event = 0;

unsigned long event = 0;

extern int _setitimer(int, struct itimerval *, struct itimerval *);

extern int _setitimer(int, struct itimerval *, struct itimerval *);

unsigned int * prof_buffer = NULL;

unsigned int * prof_buffer = NULL;

unsigned long prof_len = 0;

unsigned long prof_len = 0;

unsigned long prof_shift = 0;

unsigned long prof_shift = 0;

#define _S(nr) (1<<((nr)-1))

#define _S(nr) (1<<((nr)-1))

extern void mem_use(void);

extern void mem_use(void);

unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };

unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };

#ifndef NO_MM

#ifndef NO_MM

unsigned long init_user_stack[1024] = { STACK_MAGIC, };

unsigned long init_user_stack[1024] = { STACK_MAGIC, };

static struct vm_area_struct init_mmap = INIT_MMAP;

static struct vm_area_struct init_mmap = INIT_MMAP;

#endif /* !NO_MM */

#endif /* !NO_MM */

static struct fs_struct init_fs = INIT_FS;

static struct fs_struct init_fs = INIT_FS;

static struct files_struct init_files = INIT_FILES;

static struct files_struct init_files = INIT_FILES;

static struct signal_struct init_signals = INIT_SIGNALS;

static struct signal_struct init_signals = INIT_SIGNALS;

struct mm_struct init_mm = INIT_MM;

struct mm_struct init_mm = INIT_MM;

struct task_struct init_task = INIT_TASK;

struct task_struct init_task = INIT_TASK;

unsigned long volatile jiffies=0;

unsigned long volatile jiffies=0;

struct task_struct *current_set[NR_CPUS];

struct task_struct *current_set[NR_CPUS];

struct task_struct *last_task_used_math = NULL;

struct task_struct *last_task_used_math = NULL;

struct task_struct * task[NR_TASKS] = {&init_task, };

struct task_struct * task[NR_TASKS] = {&init_task, };

struct kernel_stat kstat = { 0 };

struct kernel_stat kstat = { 0 };

static inline void add_to_runqueue(struct task_struct * p)

static inline void add_to_runqueue(struct task_struct * p)

#ifdef __SMP__

#ifdef __SMP__

        int cpu=smp_processor_id();

        int cpu=smp_processor_id();

#endif

#endif

#if 1   /* sanity tests */

#if 1   /* sanity tests */

        if (p->next_run || p->prev_run) {

        if (p->next_run || p->prev_run) {

                printk("task already on run-queue\n");

                printk("task already on run-queue\n");

                return;

                return;

#endif

#endif

        if (p->policy != SCHED_OTHER || p->counter > current->counter + 3)

        if (p->policy != SCHED_OTHER || p->counter > current->counter + 3)

                need_resched = 1;

                need_resched = 1;

        nr_running++;

        nr_running++;

        (p->prev_run = init_task.prev_run)->next_run = p;

        (p->prev_run = init_task.prev_run)->next_run = p;

        p->next_run = &init_task;

        p->next_run = &init_task;

        init_task.prev_run = p;

        init_task.prev_run = p;

#ifdef __SMP__

#ifdef __SMP__

        /* this is safe only if called with cli()*/

        /* this is safe only if called with cli()*/

        while(set_bit(31,&smp_process_available))

        while(set_bit(31,&smp_process_available))

                while(test_bit(31,&smp_process_available))

                while(test_bit(31,&smp_process_available))

                        if(clear_bit(cpu,&smp_invalidate_needed))

                        if(clear_bit(cpu,&smp_invalidate_needed))

                                local_flush_tlb();

                                local_flush_tlb();

                                set_bit(cpu,&cpu_callin_map[0]);

                                set_bit(cpu,&cpu_callin_map[0]);

        smp_process_available++;

        smp_process_available++;

        clear_bit(31,&smp_process_available);

        clear_bit(31,&smp_process_available);

        if ((0!=p->pid) && smp_threads_ready)

        if ((0!=p->pid) && smp_threads_ready)

                int i;

                int i;

                for (i=0;i<smp_num_cpus;i++)

                for (i=0;i<smp_num_cpus;i++)

                        if (0==current_set[cpu_logical_map[i]]->pid)

                        if (0==current_set[cpu_logical_map[i]]->pid)

                                smp_message_pass(cpu_logical_map[i], MSG_RESCHEDULE, 0L, 0);

                                smp_message_pass(cpu_logical_map[i], MSG_RESCHEDULE, 0L, 0);

                                break;

                                break;

#endif

#endif

static inline void del_from_runqueue(struct task_struct * p)

static inline void del_from_runqueue(struct task_struct * p)

        struct task_struct *next = p->next_run;

        struct task_struct *next = p->next_run;

        struct task_struct *prev = p->prev_run;

        struct task_struct *prev = p->prev_run;

#if 1   /* sanity tests */

#if 1   /* sanity tests */

        if (!next || !prev) {

        if (!next || !prev) {

                printk("task not on run-queue\n");

                printk("task not on run-queue\n");

                return;

                return;

#endif

#endif

        if (p == &init_task) {

        if (p == &init_task) {

                static int nr = 0;

                static int nr = 0;

                if (nr < 5) {

                if (nr < 5) {

                        nr++;

                        nr++;

                        printk("idle task may not sleep\n");

                        printk("idle task may not sleep\n");

                return;

                return;

        nr_running--;

        nr_running--;

        next->prev_run = prev;

        next->prev_run = prev;

        prev->next_run = next;

        prev->next_run = next;

        p->next_run = NULL;

        p->next_run = NULL;

        p->prev_run = NULL;

        p->prev_run = NULL;

static inline void move_last_runqueue(struct task_struct * p)

static inline void move_last_runqueue(struct task_struct * p)

        struct task_struct *next = p->next_run;

        struct task_struct *next = p->next_run;

        struct task_struct *prev = p->prev_run;

        struct task_struct *prev = p->prev_run;

        /* remove from list */

        /* remove from list */

        next->prev_run = prev;

        next->prev_run = prev;

        prev->next_run = next;

        prev->next_run = next;

        /* add back to list */

        /* add back to list */

        p->next_run = &init_task;

        p->next_run = &init_task;

        prev = init_task.prev_run;

        prev = init_task.prev_run;

        init_task.prev_run = p;

        init_task.prev_run = p;

        p->prev_run = prev;

        p->prev_run = prev;

        prev->next_run = p;

        prev->next_run = p;

/*

/*

 * Wake up a process. Put it on the run-queue if it's not

 * Wake up a process. Put it on the run-queue if it's not

 * already there.  The "current" process is always on the

 * already there.  The "current" process is always on the

 * run-queue (except when the actual re-schedule is in

 * run-queue (except when the actual re-schedule is in

 * progress), and as such you're allowed to do the simpler

 * progress), and as such you're allowed to do the simpler

 * "current->state = TASK_RUNNING" to mark yourself runnable

 * "current->state = TASK_RUNNING" to mark yourself runnable

 * without the overhead of this.

 * without the overhead of this.

*/

*/

inline void wake_up_process(struct task_struct * p)

inline void wake_up_process(struct task_struct * p)

        unsigned long flags;

        unsigned long flags;

        save_flags(flags);

        save_flags(flags);

        cli();

        cli();

        p->state = TASK_RUNNING;

        p->state = TASK_RUNNING;

        if (!p->next_run)

        if (!p->next_run)

                add_to_runqueue(p);

                add_to_runqueue(p);

        restore_flags(flags);

        restore_flags(flags);

static void process_timeout(unsigned long __data)

static void process_timeout(unsigned long __data)

        struct task_struct * p = (struct task_struct *) __data;

        struct task_struct * p = (struct task_struct *) __data;

        p->timeout = 0;

        p->timeout = 0;

        wake_up_process(p);

        wake_up_process(p);

/*

/*

 * This is the function that decides how desirable a process is..

 * This is the function that decides how desirable a process is..

 * You can weigh different processes against each other depending

 * You can weigh different processes against each other depending

 * on what CPU they've run on lately etc to try to handle cache

 * on what CPU they've run on lately etc to try to handle cache

 * and TLB miss penalties.

 * and TLB miss penalties.

 * Return values:

 * Return values:

 *       -1000: never select this

 *       -1000: never select this

 *           0: out of time, recalculate counters (but it might still be

 *           0: out of time, recalculate counters (but it might still be

 *              selected)

 *              selected)

 *         +ve: "goodness" value (the larger, the better)

 *         +ve: "goodness" value (the larger, the better)

 *       +1000: realtime process, select this.

 *       +1000: realtime process, select this.

*/

*/

static inline int goodness(struct task_struct * p, struct task_struct * prev, int this_cpu)

static inline int goodness(struct task_struct * p, struct task_struct * prev, int this_cpu)

        int weight;

        int weight;

#ifdef __SMP__

#ifdef __SMP__

        /* We are not permitted to run a task someone else is running */

        /* We are not permitted to run a task someone else is running */

        if (p->processor != NO_PROC_ID)

        if (p->processor != NO_PROC_ID)

                return -1000;

                return -1000;

#ifdef PAST_2_0

#ifdef PAST_2_0

        /* This process is locked to a processor group */

        /* This process is locked to a processor group */

        if (p->processor_mask && !(p->processor_mask & (1<<this_cpu))

        if (p->processor_mask && !(p->processor_mask & (1<<this_cpu))

                return -1000;

                return -1000;

#endif

#endif

#endif

#endif

/*

/*

         * Realtime process, select the first one on the

         * Realtime process, select the first one on the

         * runqueue (taking priorities within processes

         * runqueue (taking priorities within processes

         * into account).

         * into account).

*/

*/

        if (p->policy != SCHED_OTHER)

        if (p->policy != SCHED_OTHER)

                return 1000 + p->rt_priority;

                return 1000 + p->rt_priority;

/*

/*

         * Give the process a first-approximation goodness value

         * Give the process a first-approximation goodness value

         * according to the number of clock-ticks it has left.

         * according to the number of clock-ticks it has left.

         * Don't do any other calculations if the time slice is

         * Don't do any other calculations if the time slice is

         * over..

         * over..

*/

*/

        weight = p->counter;

        weight = p->counter;

        if (weight) {

        if (weight) {

#ifdef __SMP__

#ifdef __SMP__

                /* Give a largish advantage to the same processor...   */

                /* Give a largish advantage to the same processor...   */

                /* (this is equivalent to penalizing other processors) */

                /* (this is equivalent to penalizing other processors) */

                if (p->last_processor == this_cpu)

                if (p->last_processor == this_cpu)

                        weight += PROC_CHANGE_PENALTY;

                        weight += PROC_CHANGE_PENALTY;

#endif

#endif

                /* .. and a slight advantage to the current process */

                /* .. and a slight advantage to the current process */

                if (p == prev)

                if (p == prev)

                        weight += 1;

                        weight += 1;

        return weight;

        return weight;

/*

/*

  The following allow_interrupts function is used to workaround a rare but

  The following allow_interrupts function is used to workaround a rare but

  nasty deadlock situation that is possible for 2.0.x Intel SMP because it uses

  nasty deadlock situation that is possible for 2.0.x Intel SMP because it uses

  a single kernel lock and interrupts are only routed to the boot CPU.  There

  a single kernel lock and interrupts are only routed to the boot CPU.  There

  are two deadlock scenarios this code protects against.

  are two deadlock scenarios this code protects against.

  The first scenario is that if a CPU other than the boot CPU holds the kernel

  The first scenario is that if a CPU other than the boot CPU holds the kernel

  lock and needs to wait for an operation to complete that itself requires an

  lock and needs to wait for an operation to complete that itself requires an

  interrupt, there is a deadlock since the boot CPU may be able to accept the

  interrupt, there is a deadlock since the boot CPU may be able to accept the

  interrupt but will not be able to acquire the kernel lock to process it.

  interrupt but will not be able to acquire the kernel lock to process it.

  The workaround for this deadlock requires adding calls to allow_interrupts to

  The workaround for this deadlock requires adding calls to allow_interrupts to

  places where this deadlock is possible.  These places are known to be present

  places where this deadlock is possible.  These places are known to be present

  in buffer.c and keyboard.c.  It is also possible that there are other such

  in buffer.c and keyboard.c.  It is also possible that there are other such

  places which have not been identified yet.  In order to break the deadlock,

  places which have not been identified yet.  In order to break the deadlock,

  the code in allow_interrupts temporarily yields the kernel lock directly to

  the code in allow_interrupts temporarily yields the kernel lock directly to

  the boot CPU to allow the interrupt to be processed.  The boot CPU interrupt

  the boot CPU to allow the interrupt to be processed.  The boot CPU interrupt

  entry code indicates that it is spinning waiting for the kernel lock by

  entry code indicates that it is spinning waiting for the kernel lock by

  setting the smp_blocked_interrupt_pending variable.  This code notices that

  setting the smp_blocked_interrupt_pending variable.  This code notices that

  and manipulates the active_kernel_processor variable to yield the kernel lock

  and manipulates the active_kernel_processor variable to yield the kernel lock

  without ever clearing it.  When the interrupt has been processed, the

  without ever clearing it.  When the interrupt has been processed, the

  saved_active_kernel_processor variable contains the value for the interrupt

  saved_active_kernel_processor variable contains the value for the interrupt

  exit code to restore, either the APICID of the CPU that granted it the kernel

  exit code to restore, either the APICID of the CPU that granted it the kernel

  lock, or NO_PROC_ID in the normal case where no yielding occurred.  Restoring

  lock, or NO_PROC_ID in the normal case where no yielding occurred.  Restoring

  active_kernel_processor from saved_active_kernel_processor returns the kernel

  active_kernel_processor from saved_active_kernel_processor returns the kernel

  lock back to the CPU that yielded it.

  lock back to the CPU that yielded it.

  The second form of deadlock is even more insidious.  Suppose the boot CPU

  The second form of deadlock is even more insidious.  Suppose the boot CPU

  takes a page fault and then the previous scenario ensues.  In this case, the

  takes a page fault and then the previous scenario ensues.  In this case, the

  boot CPU would spin with interrupts disabled waiting to acquire the kernel

  boot CPU would spin with interrupts disabled waiting to acquire the kernel

  lock.  To resolve this deadlock, the kernel lock acquisition code must enable

  lock.  To resolve this deadlock, the kernel lock acquisition code must enable

  interrupts briefly so that the pending interrupt can be handled as in the

  interrupts briefly so that the pending interrupt can be handled as in the

  case above.

  case above.

  An additional form of deadlock is where kernel code running on a non-boot CPU

  An additional form of deadlock is where kernel code running on a non-boot CPU

  waits for the jiffies variable to be incremented.  This deadlock is avoided

  waits for the jiffies variable to be incremented.  This deadlock is avoided

  by having the spin loops in ENTER_KERNEL increment jiffies approximately

  by having the spin loops in ENTER_KERNEL increment jiffies approximately

  every 10 milliseconds.  Finally, if approximately 60 seconds elapse waiting

  every 10 milliseconds.  Finally, if approximately 60 seconds elapse waiting

  for the kernel lock, a message will be printed if possible to indicate that a

  for the kernel lock, a message will be printed if possible to indicate that a

  deadlock has been detected.

  deadlock has been detected.

                Leonard N. Zubkoff

                Leonard N. Zubkoff

                   4 August 1997

                   4 August 1997

*/

*/

#if defined(__SMP__) && defined(__i386__)

#if defined(__SMP__) && defined(__i386__)

volatile unsigned char smp_blocked_interrupt_pending = 0;

volatile unsigned char smp_blocked_interrupt_pending = 0;

volatile unsigned char saved_active_kernel_processor = NO_PROC_ID;

volatile unsigned char saved_active_kernel_processor = NO_PROC_ID;

void allow_interrupts(void)

void allow_interrupts(void)

  if (smp_processor_id() == boot_cpu_id) return;

  if (smp_processor_id() == boot_cpu_id) return;

  if (smp_blocked_interrupt_pending)

  if (smp_blocked_interrupt_pending)

      unsigned long saved_kernel_counter;

      unsigned long saved_kernel_counter;

      long timeout_counter;

      long timeout_counter;

      saved_active_kernel_processor = active_kernel_processor;

      saved_active_kernel_processor = active_kernel_processor;

      saved_kernel_counter = kernel_counter;

      saved_kernel_counter = kernel_counter;

      kernel_counter = 0;

      kernel_counter = 0;

      active_kernel_processor = boot_cpu_id;

      active_kernel_processor = boot_cpu_id;

      timeout_counter = 6000000;

      timeout_counter = 6000000;

      while (active_kernel_processor != saved_active_kernel_processor &&

      while (active_kernel_processor != saved_active_kernel_processor &&

             --timeout_counter >= 0)

             --timeout_counter >= 0)

          udelay(10);

          udelay(10);

          barrier();

          barrier();

      if (timeout_counter < 0)

      if (timeout_counter < 0)

        panic("FORWARDED INTERRUPT TIMEOUT (AKP = %d, Saved AKP = %d)\n",

        panic("FORWARDED INTERRUPT TIMEOUT (AKP = %d, Saved AKP = %d)\n",

              active_kernel_processor, saved_active_kernel_processor);

              active_kernel_processor, saved_active_kernel_processor);

      kernel_counter = saved_kernel_counter;

      kernel_counter = saved_kernel_counter;

      saved_active_kernel_processor = NO_PROC_ID;

      saved_active_kernel_processor = NO_PROC_ID;

#else

#else

void allow_interrupts(void) {}

void allow_interrupts(void) {}

#endif

#endif

/*

/*

 *  'schedule()' is the scheduler function. It's a very simple and nice

 *  'schedule()' is the scheduler function. It's a very simple and nice

 * scheduler: it's not perfect, but certainly works for most things.

 * scheduler: it's not perfect, but certainly works for most things.

 * The goto is "interesting".

 * The goto is "interesting".

 *   NOTE!!  Task 0 is the 'idle' task, which gets called when no other

 *   NOTE!!  Task 0 is the 'idle' task, which gets called when no other

 * tasks can run. It can not be killed, and it cannot sleep. The 'state'

 * tasks can run. It can not be killed, and it cannot sleep. The 'state'

 * information in task[0] is never used.

 * information in task[0] is never used.

*/

*/

asmlinkage void schedule(void)

asmlinkage void schedule(void)

        int c;

        int c;

        struct task_struct * p;

        struct task_struct * p;

        struct task_struct * prev, * next;

        struct task_struct * prev, * next;

        unsigned long timeout = 0;

        unsigned long timeout = 0;

        int this_cpu=smp_processor_id();

        int this_cpu=smp_processor_id();

/* check alarm, wake up any interruptible tasks that have got a signal */

/* check alarm, wake up any interruptible tasks that have got a signal */

        allow_interrupts();

        allow_interrupts();

        if (intr_count)

        if (intr_count)

                goto scheduling_in_interrupt;

                goto scheduling_in_interrupt;

        if (bh_active & bh_mask) {

        if (bh_active & bh_mask) {

                intr_count = 1;

                intr_count = 1;

                do_bottom_half();

                do_bottom_half();

                intr_count = 0;

                intr_count = 0;

        run_task_queue(&tq_scheduler);

        run_task_queue(&tq_scheduler);

        need_resched = 0;

        need_resched = 0;

        prev = current;

        prev = current;

        cli();

        cli();

        /* move an exhausted RR process to be last.. */

        /* move an exhausted RR process to be last.. */

        if (!prev->counter && prev->policy == SCHED_RR) {

        if (!prev->counter && prev->policy == SCHED_RR) {

                prev->counter = prev->priority;

                prev->counter = prev->priority;

                move_last_runqueue(prev);

                move_last_runqueue(prev);

        switch (prev->state) {

        switch (prev->state) {

                case TASK_INTERRUPTIBLE:

                case TASK_INTERRUPTIBLE:

                        if (prev->signal & ~prev->blocked)

                        if (prev->signal & ~prev->blocked)

                                goto makerunnable;

                                goto makerunnable;

                        timeout = prev->timeout;

                        timeout = prev->timeout;

                        if (timeout && (timeout <= jiffies)) {

                        if (timeout && (timeout <= jiffies)) {

                                prev->timeout = 0;

                                prev->timeout = 0;

                                timeout = 0;

                                timeout = 0;

                makerunnable:

                makerunnable:

                                prev->state = TASK_RUNNING;

                                prev->state = TASK_RUNNING;

                                break;

                                break;

                default:

                default:

                        del_from_runqueue(prev);

                        del_from_runqueue(prev);

                case TASK_RUNNING:

                case TASK_RUNNING:

        p = init_task.next_run;

        p = init_task.next_run;

        sti();

        sti();

#ifdef __SMP__

#ifdef __SMP__

/*

/*

         *      This is safe as we do not permit re-entry of schedule()

         *      This is safe as we do not permit re-entry of schedule()

*/

*/

        prev->processor = NO_PROC_ID;

        prev->processor = NO_PROC_ID;

#define idle_task (task[cpu_number_map[this_cpu]])

#define idle_task (task[cpu_number_map[this_cpu]])

#else

#else

#define idle_task (&init_task)

#define idle_task (&init_task)

#endif

#endif

/*

/*

 * Note! there may appear new tasks on the run-queue during this, as

 * Note! there may appear new tasks on the run-queue during this, as

 * interrupts are enabled. However, they will be put on front of the

 * interrupts are enabled. However, they will be put on front of the

 * list, so our list starting at "p" is essentially fixed.

 * list, so our list starting at "p" is essentially fixed.

*/

*/

/* this is the scheduler proper: */

/* this is the scheduler proper: */

        c = -1000;

        c = -1000;

        next = idle_task;

        next = idle_task;

        while (p != &init_task) {

        while (p != &init_task) {

                int weight = goodness(p, prev, this_cpu);

                int weight = goodness(p, prev, this_cpu);

                if (weight > c)

                if (weight > c)

                        c = weight, next = p;

                        c = weight, next = p;

                p = p->next_run;

                p = p->next_run;

        /* if all runnable processes have "counter == 0", re-calculate counters */

        /* if all runnable processes have "counter == 0", re-calculate counters */

        if (!c) {

        if (!c) {

                for_each_task(p)

                for_each_task(p)

                        p->counter = (p->counter >> 1) + p->priority;

                        p->counter = (p->counter >> 1) + p->priority;

#ifdef __SMP__

#ifdef __SMP__

/*

/*

         *      Allocate process to CPU

         *      Allocate process to CPU

*/

*/

         next->processor = this_cpu;

         next->processor = this_cpu;

         next->last_processor = this_cpu;

         next->last_processor = this_cpu;

#endif

#endif

#ifdef __SMP_PROF__

#ifdef __SMP_PROF__

        /* mark processor running an idle thread */

        /* mark processor running an idle thread */

        if (0==next->pid)

        if (0==next->pid)

                set_bit(this_cpu,&smp_idle_map);

                set_bit(this_cpu,&smp_idle_map);

        else

        else

                clear_bit(this_cpu,&smp_idle_map);

                clear_bit(this_cpu,&smp_idle_map);

#endif

#endif

        if (prev != next) {

        if (prev != next) {

                struct timer_list timer;

                struct timer_list timer;

                kstat.context_swtch++;

                kstat.context_swtch++;

                if (timeout) {

                if (timeout) {

                        init_timer(&timer);

                        init_timer(&timer);

                        timer.expires = timeout;

                        timer.expires = timeout;

                        timer.data = (unsigned long) prev;

                        timer.data = (unsigned long) prev;

                        timer.function = process_timeout;

                        timer.function = process_timeout;

                        add_timer(&timer);

                        add_timer(&timer);

                get_mmu_context(next);

                get_mmu_context(next);

                switch_to(prev,next);

                switch_to(prev,next);

                if (timeout)

                if (timeout)

                        del_timer(&timer);

                        del_timer(&timer);

        return;

        return;

scheduling_in_interrupt:

scheduling_in_interrupt:

        printk("Aiee: scheduling in interrupt %p\n",

        printk("Aiee: scheduling in interrupt %p\n",

                __builtin_return_address(0));

                __builtin_return_address(0));

#ifndef __alpha__

#ifndef __alpha__

/*

/*

 * For backwards compatibility?  This can be done in libc so Alpha

 * For backwards compatibility?  This can be done in libc so Alpha

 * and all newer ports shouldn't need it.

 * and all newer ports shouldn't need it.

*/

*/

asmlinkage int sys_pause(void)

asmlinkage int sys_pause(void)

        current->state = TASK_INTERRUPTIBLE;

        current->state = TASK_INTERRUPTIBLE;

        schedule();

        schedule();

        return -ERESTARTNOHAND;

        return -ERESTARTNOHAND;

#endif

#endif

/*

/*

 * wake_up doesn't wake up stopped processes - they have to be awakened

 * wake_up doesn't wake up stopped processes - they have to be awakened

 * with signals or similar.

 * with signals or similar.

 * Note that this doesn't need cli-sti pairs: interrupts may not change

 * Note that this doesn't need cli-sti pairs: interrupts may not change

 * the wait-queue structures directly, but only call wake_up() to wake

 * the wait-queue structures directly, but only call wake_up() to wake

 * a process. The process itself must remove the queue once it has woken.

 * a process. The process itself must remove the queue once it has woken.

*/

*/

void wake_up(struct wait_queue **q)

void wake_up(struct wait_queue **q)

        struct wait_queue *next;

        struct wait_queue *next;

        struct wait_queue *head;

        struct wait_queue *head;

        if (!q || !(next = *q))

        if (!q || !(next = *q))

                return;

                return;

        head = WAIT_QUEUE_HEAD(q);

        head = WAIT_QUEUE_HEAD(q);

        while (next != head) {

        while (next != head) {

                struct task_struct *p = next->task;

                struct task_struct *p = next->task;

                next = next->next;

                next = next->next;

                if (p != NULL) {

                if (p != NULL) {

                        if ((p->state == TASK_UNINTERRUPTIBLE) ||

                        if ((p->state == TASK_UNINTERRUPTIBLE) ||

                            (p->state == TASK_INTERRUPTIBLE))

                            (p->state == TASK_INTERRUPTIBLE))

                                wake_up_process(p);

                                wake_up_process(p);

                if (!next)

                if (!next)

                        goto bad;

                        goto bad;

        return;

        return;

bad:

bad:

        printk("wait_queue is bad (eip = %p)\n",

        printk("wait_queue is bad (eip = %p)\n",

                __builtin_return_address(0));

                __builtin_return_address(0));

        printk("        q = %p\n",q);

        printk("        q = %p\n",q);

        printk("       *q = %p\n",*q);

        printk("       *q = %p\n",*q);

void wake_up_interruptible(struct wait_queue **q)

void wake_up_interruptible(struct wait_queue **q)

        struct wait_queue *next;

        struct wait_queue *next;

        struct wait_queue *head;

        struct wait_queue *head;

        if (!q || !(next = *q))

        if (!q || !(next = *q))

                return;

                return;

        head = WAIT_QUEUE_HEAD(q);

        head = WAIT_QUEUE_HEAD(q);

        while (next != head) {

        while (next != head) {

                struct task_struct *p = next->task;

                struct task_struct *p = next->task;

                next = next->next;

                next = next->next;

                if (p != NULL) {

                if (p != NULL) {

                        if (p->state == TASK_INTERRUPTIBLE)

                        if (p->state == TASK_INTERRUPTIBLE)

                                wake_up_process(p);

                                wake_up_process(p);

                if (!next)

                if (!next)

                        goto bad;

                        goto bad;

        return;

        return;

bad:

bad:

        printk("wait_queue is bad (eip = %p)\n",

        printk("wait_queue is bad (eip = %p)\n",

                __builtin_return_address(0));

                __builtin_return_address(0));

        printk("        q = %p\n",q);

        printk("        q = %p\n",q);

        printk("       *q = %p\n",*q);

        printk("       *q = %p\n",*q);

/*

/*

 * Semaphores are implemented using a two-way counter:

 * Semaphores are implemented using a two-way counter:

 * The "count" variable is decremented for each process

 * The "count" variable is decremented for each process

 * that tries to sleep, while the "waking" variable is

 * that tries to sleep, while the "waking" variable is

 * incremented when the "up()" code goes to wake up waiting

 * incremented when the "up()" code goes to wake up waiting

 * processes.

 * processes.

 * Notably, the inline "up()" and "down()" functions can

 * Notably, the inline "up()" and "down()" functions can

 * efficiently test if they need to do any extra work (up

 * efficiently test if they need to do any extra work (up

 * needs to do something only if count was negative before

 * needs to do something only if count was negative before

 * the increment operation.

 * the increment operation.

 * This routine must execute atomically.

 * This routine must execute atomically.

*/

*/

static inline int waking_non_zero(struct semaphore *sem)

static inline int waking_non_zero(struct semaphore *sem)

        int     ret ;

        int     ret ;

        long    flags ;

        long    flags ;

        get_buzz_lock(&sem->lock) ;

        get_buzz_lock(&sem->lock) ;

        save_flags(flags) ;

        save_flags(flags) ;

        cli() ;

        cli() ;

        if ((ret = (sem->waking > 0)))

        if ((ret = (sem->waking > 0)))

                sem->waking-- ;

                sem->waking-- ;

        restore_flags(flags) ;

        restore_flags(flags) ;

        give_buzz_lock(&sem->lock) ;

        give_buzz_lock(&sem->lock) ;

        return(ret) ;

        return(ret) ;

/*

/*

 * When __up() is called, the count was negative before

 * When __up() is called, the count was negative before

 * incrementing it, and we need to wake up somebody.

 * incrementing it, and we need to wake up somebody.

 * This routine adds one to the count of processes that need to

 * This routine adds one to the count of processes that need to

 * wake up and exit.  ALL waiting processes actually wake up but

 * wake up and exit.  ALL waiting processes actually wake up but

 * only the one that gets to the "waking" field first will gate

 * only the one that gets to the "waking" field first will gate

 * through and acquire the semaphore.  The others will go back

 * through and acquire the semaphore.  The others will go back

 * to sleep.

 * to sleep.

 * Note that these functions are only called when there is

 * Note that these functions are only called when there is

 * contention on the lock, and as such all this is the

 * contention on the lock, and as such all this is the

 * "non-critical" part of the whole semaphore business. The

 * "non-critical" part of the whole semaphore business. The

 * critical part is the inline stuff in <asm/semaphore.h>

 * critical part is the inline stuff in <asm/semaphore.h>

 * where we want to avoid any extra jumps and calls.

 * where we want to avoid any extra jumps and calls.

*/

*/

void __up(struct semaphore *sem)

void __up(struct semaphore *sem)

        atomic_inc(&sem->waking) ;

        atomic_inc(&sem->waking) ;

        wake_up(&sem->wait);

        wake_up(&sem->wait);

/*

/*

 * Perform the "down" function.  Return zero for semaphore acquired,

 * Perform the "down" function.  Return zero for semaphore acquired,

 * return negative for signalled out of the function.

 * return negative for signalled out of the function.

 * If called from __down, the return is ignored and the wait loop is

 * If called from __down, the return is ignored and the wait loop is

 * not interruptible.  This means that a task waiting on a semaphore

 * not interruptible.  This means that a task waiting on a semaphore

 * using "down()" cannot be killed until someone does an "up()" on

 * using "down()" cannot be killed until someone does an "up()" on

 * the semaphore.

 * the semaphore.

 * If called from __down_interruptible, the return value gets checked

 * If called from __down_interruptible, the return value gets checked

 * upon return.  If the return value is negative then the task continues

 * upon return.  If the return value is negative then the task continues

 * with the negative value in the return register (it can be tested by

 * with the negative value in the return register (it can be tested by

 * the caller).

 * the caller).

 * Either form may be used in conjunction with "up()".

 * Either form may be used in conjunction with "up()".

*/

*/

int __do_down(struct semaphore * sem, int task_state)

int __do_down(struct semaphore * sem, int task_state)

        struct task_struct *tsk = current;

        struct task_struct *tsk = current;

        struct wait_queue wait = { tsk, NULL };

        struct wait_queue wait = { tsk, NULL };

        int               ret = 0 ;

        int               ret = 0 ;

        tsk->state = task_state;

        tsk->state = task_state;

        add_wait_queue(&sem->wait, &wait);

        add_wait_queue(&sem->wait, &wait);

/*

/*

         * Ok, we're set up.  sem->count is known to be less than zero

         * Ok, we're set up.  sem->count is known to be less than zero

         * so we must wait.

         * so we must wait.

         * We can let go the lock for purposes of waiting.

         * We can let go the lock for purposes of waiting.

         * We re-acquire it after awaking so as to protect

         * We re-acquire it after awaking so as to protect

         * all semaphore operations.

         * all semaphore operations.

         * If "up()" is called before we call waking_non_zero() then

         * If "up()" is called before we call waking_non_zero() then

         * we will catch it right away.  If it is called later then

         * we will catch it right away.  If it is called later then

         * we will have to go through a wakeup cycle to catch it.

         * we will have to go through a wakeup cycle to catch it.

         * Multiple waiters contend for the semaphore lock to see

         * Multiple waiters contend for the semaphore lock to see

         * who gets to gate through and who has to wait some more.

         * who gets to gate through and who has to wait some more.

*/

*/

        for (;;)

        for (;;)

                if (waking_non_zero(sem))       /* are we waking up?  */

                if (waking_non_zero(sem))       /* are we waking up?  */

                    break ;                     /* yes, exit loop */

                    break ;                     /* yes, exit loop */

                if (   task_state == TASK_INTERRUPTIBLE

                if (   task_state == TASK_INTERRUPTIBLE

                    && (tsk->signal & ~tsk->blocked)    /* signalled */

                    && (tsk->signal & ~tsk->blocked)    /* signalled */

                    ret = -EINTR ;              /* interrupted */

                    ret = -EINTR ;              /* interrupted */

                    atomic_inc(&sem->count) ;   /* give up on down operation */

                    atomic_inc(&sem->count) ;   /* give up on down operation */

                    break ;

                    break ;

                schedule();

                schedule();

                tsk->state = task_state;

                tsk->state = task_state;

        tsk->state = TASK_RUNNING;

        tsk->state = TASK_RUNNING;

        remove_wait_queue(&sem->wait, &wait);

        remove_wait_queue(&sem->wait, &wait);

        return(ret) ;

        return(ret) ;

} /* __do_down */

} /* __do_down */

void __down(struct semaphore * sem)

void __down(struct semaphore * sem)

        __do_down(sem,TASK_UNINTERRUPTIBLE) ;

        __do_down(sem,TASK_UNINTERRUPTIBLE) ;

int __down_interruptible(struct semaphore * sem)

int __down_interruptible(struct semaphore * sem)

        return(__do_down(sem,TASK_INTERRUPTIBLE)) ;

        return(__do_down(sem,TASK_INTERRUPTIBLE)) ;

static inline void __sleep_on(struct wait_queue **p, int state)

static inline void __sleep_on(struct wait_queue **p, int state)

        unsigned long flags;

        unsigned long flags;

        struct wait_queue wait = { current, NULL };

        struct wait_queue wait = { current, NULL };

        if (!p)

        if (!p)

                return;

                return;

        if (current == task[0])

        if (current == task[0])

                panic("task[0] trying to sleep");

                panic("task[0] trying to sleep");

        current->state = state;

        current->state = state;

        save_flags(flags);

        save_flags(flags);

        cli();

        cli();

        __add_wait_queue(p, &wait);

        __add_wait_queue(p, &wait);

        sti();

        sti();

        schedule();

        schedule();

        cli();

        cli();

        __remove_wait_queue(p, &wait);

        __remove_wait_queue(p, &wait);

        restore_flags(flags);

        restore_flags(flags);

void interruptible_sleep_on(struct wait_queue **p)

void interruptible_sleep_on(struct wait_queue **p)

        __sleep_on(p,TASK_INTERRUPTIBLE);

        __sleep_on(p,TASK_INTERRUPTIBLE);

void sleep_on(struct wait_queue **p)

void sleep_on(struct wait_queue **p)

        __sleep_on(p,TASK_UNINTERRUPTIBLE);

        __sleep_on(p,TASK_UNINTERRUPTIBLE);

#define TVN_BITS 6

#define TVN_BITS 6

#define TVR_BITS 8

#define TVR_BITS 8

#define TVN_SIZE (1 << TVN_BITS)

#define TVN_SIZE (1 << TVN_BITS)

#define TVR_SIZE (1 << TVR_BITS)

#define TVR_SIZE (1 << TVR_BITS)

#define TVN_MASK (TVN_SIZE - 1)

#define TVN_MASK (TVN_SIZE - 1)

#define TVR_MASK (TVR_SIZE - 1)

#define TVR_MASK (TVR_SIZE - 1)

#define SLOW_BUT_DEBUGGING_TIMERS 0

#define SLOW_BUT_DEBUGGING_TIMERS 0

struct timer_vec {

struct timer_vec {

        int index;

        int index;

        struct timer_list *vec[TVN_SIZE];

        struct timer_list *vec[TVN_SIZE];

};

};

struct timer_vec_root {

struct timer_vec_root {

        int index;

        int index;

        struct timer_list *vec[TVR_SIZE];

        struct timer_list *vec[TVR_SIZE];

};

};

static struct timer_vec tv5 = { 0 };

static struct timer_vec tv5 = { 0 };

static struct timer_vec tv4 = { 0 };

static struct timer_vec tv4 = { 0 };

static struct timer_vec tv3 = { 0 };

static struct timer_vec tv3 = { 0 };

static struct timer_vec tv2 = { 0 };

static struct timer_vec tv2 = { 0 };

static struct timer_vec_root tv1 = { 0 };

static struct timer_vec_root tv1 = { 0 };

static struct timer_vec * const tvecs[] = {

static struct timer_vec * const tvecs[] = {

        (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5

        (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5

};

};

#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))

#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))

static unsigned long timer_jiffies = 0;

static unsigned long timer_jiffies = 0;

static inline void insert_timer(struct timer_list *timer,

static inline void insert_timer(struct timer_list *timer,

                                struct timer_list **vec, int idx)

                                struct timer_list **vec, int idx)

        if ((timer->next = vec[idx]))

        if ((timer->next = vec[idx]))

                vec[idx]->prev = timer;

                vec[idx]->prev = timer;

        vec[idx] = timer;

        vec[idx] = timer;

        timer->prev = (struct timer_list *)&vec[idx];

        timer->prev = (struct timer_list *)&vec[idx];

static inline void internal_add_timer(struct timer_list *timer)

static inline void internal_add_timer(struct timer_list *timer)

/*

/*

         * must be cli-ed when calling this

         * must be cli-ed when calling this

*/

*/

        unsigned long expires = timer->expires;

        unsigned long expires = timer->expires;

        unsigned long idx = expires - timer_jiffies;

        unsigned long idx = expires - timer_jiffies;

        if (idx < TVR_SIZE) {

        if (idx < TVR_SIZE) {

                int i = expires & TVR_MASK;

                int i = expires & TVR_MASK;

                insert_timer(timer, tv1.vec, i);

                insert_timer(timer, tv1.vec, i);

        } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {

        } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {

                int i = (expires >> TVR_BITS) & TVN_MASK;

                int i = (expires >> TVR_BITS) & TVN_MASK;

                insert_timer(timer, tv2.vec, i);

                insert_timer(timer, tv2.vec, i);

        } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {

        } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {

                int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;

                int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;

                insert_timer(timer, tv3.vec, i);

                insert_timer(timer, tv3.vec, i);

        } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {

        } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {

                int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;

                int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;

                insert_timer(timer, tv4.vec, i);

                insert_timer(timer, tv4.vec, i);

        } else if (expires < timer_jiffies) {

        } else if (expires < timer_jiffies) {

                /* can happen if you add a timer with expires == jiffies,

                /* can happen if you add a timer with expires == jiffies,

                 * or you set a timer to go off in the past

                 * or you set a timer to go off in the past

*/

*/

                insert_timer(timer, tv1.vec, tv1.index);

                insert_timer(timer, tv1.vec, tv1.index);

        } else if (idx < 0xffffffffUL) {

        } else if (idx < 0xffffffffUL) {

                int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;

                int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;

                insert_timer(timer, tv5.vec, i);

                insert_timer(timer, tv5.vec, i);

        } else {

        } else {

                /* Can only get here on architectures with 64-bit jiffies */

                /* Can only get here on architectures with 64-bit jiffies */

                timer->next = timer->prev = timer;

                timer->next = timer->prev = timer;

void add_timer(struct timer_list *timer)

void add_timer(struct timer_list *timer)

        unsigned long flags;

        unsigned long flags;

        save_flags(flags);

        save_flags(flags);

        cli();

        cli();

#if SLOW_BUT_DEBUGGING_TIMERS

#if SLOW_BUT_DEBUGGING_TIMERS

        if (timer->next || timer->prev) {

        if (timer->next || timer->prev) {

                printk("add_timer() called with non-zero list from %p\n",

                printk("add_timer() called with non-zero list from %p\n",

                       __builtin_return_address(0));

                       __builtin_return_address(0));

                goto out;

                goto out;

#endif

#endif

        internal_add_timer(timer);

        internal_add_timer(timer);

#if SLOW_BUT_DEBUGGING_TIMERS

#if SLOW_BUT_DEBUGGING_TIMERS

out:

out:

#endif

#endif

        restore_flags(flags);

        restore_flags(flags);

static inline int detach_timer(struct timer_list *timer)

static inline int detach_timer(struct timer_list *timer)

        int ret = 0;

        int ret = 0;

        struct timer_list *next, *prev;

        struct timer_list *next, *prev;

        next = timer->next;

        next = timer->next;

        prev = timer->prev;

        prev = timer->prev;

        if (next) {

        if (next) {

                next->prev = prev;

                next->prev = prev;

        if (prev) {

        if (prev) {

                ret = 1;

                ret = 1;

                prev->next = next;

                prev->next = next;

        return ret;

        return ret;

int del_timer(struct timer_list * timer)

int del_timer(struct timer_list * timer)

        int ret;

        int ret;

        unsigned long flags;

        unsigned long flags;

        save_flags(flags);

        save_flags(flags);

        cli();

        cli();

        ret = detach_timer(timer);

        ret = detach_timer(timer);

        timer->next = timer->prev = 0;

        timer->next = timer->prev = 0;

        restore_flags(flags);

        restore_flags(flags);

        return ret;

        return ret;

static inline void cascade_timers(struct timer_vec *tv)

static inline void cascade_timers(struct timer_vec *tv)

        /* cascade all the timers from tv up one level */

        /* cascade all the timers from tv up one level */

        struct timer_list *timer;

        struct timer_list *timer;

        timer = tv->vec[tv->index];

        timer = tv->vec[tv->index];

/*

/*

         * We are removing _all_ timers from the list, so we don't  have to

         * We are removing _all_ timers from the list, so we don't  have to

         * detach them individually, just clear the list afterwards.

         * detach them individually, just clear the list afterwards.

*/

*/

        while (timer) {

        while (timer) {

                struct timer_list *tmp = timer;

                struct timer_list *tmp = timer;

                timer = timer->next;

                timer = timer->next;

                internal_add_timer(tmp);

                internal_add_timer(tmp);

        tv->vec[tv->index] = NULL;

        tv->vec[tv->index] = NULL;

        tv->index = (tv->index + 1) & TVN_MASK;

        tv->index = (tv->index + 1) & TVN_MASK;

static inline void run_timer_list(void)

static inline void run_timer_list(void)

        cli();

        cli();

        while ((long)(jiffies - timer_jiffies) >= 0) {

        while ((long)(jiffies - timer_jiffies) >= 0) {

                struct timer_list *timer;

                struct timer_list *timer;

                if (!tv1.index) {

                if (!tv1.index) {

                        int n = 1;

                        int n = 1;

                        do {

                        do {

                                cascade_timers(tvecs[n]);

                                cascade_timers(tvecs[n]);

                        } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);

                        } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);

                while ((timer = tv1.vec[tv1.index])) {

                while ((timer = tv1.vec[tv1.index])) {

                        void (*fn)(unsigned long) = timer->function;

                        void (*fn)(unsigned long) = timer->function;

                        unsigned long data = timer->data;

                        unsigned long data = timer->data;

                        detach_timer(timer);

                        detach_timer(timer);

                        timer->next = timer->prev = NULL;

                        timer->next = timer->prev = NULL;

                        sti();

                        sti();

                        fn(data);

                        fn(data);

                        cli();

                        cli();

                ++timer_jiffies;

                ++timer_jiffies;

                tv1.index = (tv1.index + 1) & TVR_MASK;

                tv1.index = (tv1.index + 1) & TVR_MASK;

        sti();

        sti();

static inline void run_old_timers(void)

static inline void run_old_timers(void)

        struct timer_struct *tp;

        struct timer_struct *tp;

        unsigned long mask;

        unsigned long mask;

        for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {

        for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {

                if (mask > timer_active)

                if (mask > timer_active)

                        break;

                        break;

                if (!(mask & timer_active))

                if (!(mask & timer_active))

                        continue;

                        continue;

                if (tp->expires > jiffies)

                if (tp->expires > jiffies)

                        continue;

                        continue;

                timer_active &= ~mask;

                timer_active &= ~mask;

                tp->fn();

                tp->fn();

                sti();

                sti();

void tqueue_bh(void)

void tqueue_bh(void)

        run_task_queue(&tq_timer);

        run_task_queue(&tq_timer);

void immediate_bh(void)

void immediate_bh(void)

        run_task_queue(&tq_immediate);

        run_task_queue(&tq_immediate);

unsigned long timer_active = 0;

unsigned long timer_active = 0;

struct timer_struct timer_table[32];

struct timer_struct timer_table[32];

/*

/*

 * Hmm.. Changed this, as the GNU make sources (load.c) seems to

 * Hmm.. Changed this, as the GNU make sources (load.c) seems to

 * imply that avenrun[] is the standard name for this kind of thing.

 * imply that avenrun[] is the standard name for this kind of thing.

 * Nothing else seems to be standardized: the fractional size etc

 * Nothing else seems to be standardized: the fractional size etc

 * all seem to differ on different machines.

 * all seem to differ on different machines.

*/

*/

unsigned long avenrun[3] = { 0,0,0 };

unsigned long avenrun[3] = { 0,0,0 };

/*

/*

 * Nr of active tasks - counted in fixed-point numbers

 * Nr of active tasks - counted in fixed-point numbers

*/

*/

static unsigned long count_active_tasks(void)

static unsigned long count_active_tasks(void)

        struct task_struct **p;

        struct task_struct **p;

        unsigned long nr = 0;

        unsigned long nr = 0;

        for(p = &LAST_TASK; p > &FIRST_TASK; --p)

        for(p = &LAST_TASK; p > &FIRST_TASK; --p)

                if (*p && ((*p)->state == TASK_RUNNING ||

                if (*p && ((*p)->state == TASK_RUNNING ||

                           (*p)->state == TASK_UNINTERRUPTIBLE ||

                           (*p)->state == TASK_UNINTERRUPTIBLE ||

                           (*p)->state == TASK_SWAPPING))

                           (*p)->state == TASK_SWAPPING))

                        nr += FIXED_1;

                        nr += FIXED_1;

#ifdef __SMP__

#ifdef __SMP__

        nr-=(smp_num_cpus-1)*FIXED_1;

        nr-=(smp_num_cpus-1)*FIXED_1;

#endif

#endif

        return nr;

        return nr;

static inline void calc_load(unsigned long ticks)

static inline void calc_load(unsigned long ticks)

        unsigned long active_tasks; /* fixed-point */

        unsigned long active_tasks; /* fixed-point */

        static int count = LOAD_FREQ;

        static int count = LOAD_FREQ;

        count -= ticks;

        count -= ticks;

        if (count < 0) {

        if (count < 0) {

                count += LOAD_FREQ;

                count += LOAD_FREQ;

                active_tasks = count_active_tasks();

                active_tasks = count_active_tasks();

                CALC_LOAD(avenrun[0], EXP_1, active_tasks);

                CALC_LOAD(avenrun[0], EXP_1, active_tasks);

                CALC_LOAD(avenrun[1], EXP_5, active_tasks);

                CALC_LOAD(avenrun[1], EXP_5, active_tasks);

                CALC_LOAD(avenrun[2], EXP_15, active_tasks);

                CALC_LOAD(avenrun[2], EXP_15, active_tasks);

/*

/*

 * this routine handles the overflow of the microsecond field

 * this routine handles the overflow of the microsecond field

 * The tricky bits of code to handle the accurate clock support

 * The tricky bits of code to handle the accurate clock support

 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.

 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.

 * They were originally developed for SUN and DEC kernels.

 * They were originally developed for SUN and DEC kernels.

 * All the kudos should go to Dave for this stuff.

 * All the kudos should go to Dave for this stuff.

*/

*/

static void second_overflow(void)

static void second_overflow(void)

    long ltemp;

    long ltemp;

    /* Bump the maxerror field */

    /* Bump the maxerror field */

    time_maxerror += time_tolerance >> SHIFT_USEC;

    time_maxerror += time_tolerance >> SHIFT_USEC;

    if ( time_maxerror > NTP_PHASE_LIMIT ) {

    if ( time_maxerror > NTP_PHASE_LIMIT ) {

        time_maxerror = NTP_PHASE_LIMIT;

        time_maxerror = NTP_PHASE_LIMIT;

        time_state = TIME_ERROR;        /* p. 17, sect. 4.3, (b) */

        time_state = TIME_ERROR;        /* p. 17, sect. 4.3, (b) */

        time_status |= STA_UNSYNC;

        time_status |= STA_UNSYNC;

/*

/*

     * Leap second processing. If in leap-insert state at

     * Leap second processing. If in leap-insert state at

     * the end of the day, the system clock is set back one

     * the end of the day, the system clock is set back one

     * second; if in leap-delete state, the system clock is

     * second; if in leap-delete state, the system clock is

     * set ahead one second. The microtime() routine or

     * set ahead one second. The microtime() routine or

     * external clock driver will insure that reported time

     * external clock driver will insure that reported time

     * is always monotonic. The ugly divides should be

     * is always monotonic. The ugly divides should be

     * replaced.

     * replaced.

*/

*/

    switch (time_state) {

    switch (time_state) {

    case TIME_OK:

    case TIME_OK:

        if (time_status & STA_INS)

        if (time_status & STA_INS)

            time_state = TIME_INS;

            time_state = TIME_INS;

        else if (time_status & STA_DEL)

        else if (time_status & STA_DEL)

            time_state = TIME_DEL;

            time_state = TIME_DEL;

        break;

        break;

    case TIME_INS:

    case TIME_INS:

        if (xtime.tv_sec % 86400 == 0) {

        if (xtime.tv_sec % 86400 == 0) {

            xtime.tv_sec--;

            xtime.tv_sec--;

            time_state = TIME_OOP;

            time_state = TIME_OOP;

            printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");

            printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");

        break;

        break;

    case TIME_DEL:

    case TIME_DEL:

        if ((xtime.tv_sec + 1) % 86400 == 0) {

        if ((xtime.tv_sec + 1) % 86400 == 0) {

            xtime.tv_sec++;

            xtime.tv_sec++;

            time_state = TIME_WAIT;

            time_state = TIME_WAIT;

            printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");

            printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");

        break;

        break;

    case TIME_OOP:

    case TIME_OOP:

        time_state = TIME_WAIT;

        time_state = TIME_WAIT;

        break;

        break;

    case TIME_WAIT:

    case TIME_WAIT:

        if (!(time_status & (STA_INS | STA_DEL)))

        if (!(time_status & (STA_INS | STA_DEL)))

            time_state = TIME_OK;

            time_state = TIME_OK;

/*

/*

     * Compute the phase adjustment for the next second. In

     * Compute the phase adjustment for the next second. In

     * PLL mode, the offset is reduced by a fixed factor

     * PLL mode, the offset is reduced by a fixed factor

     * times the time constant. In FLL mode the offset is

     * times the time constant. In FLL mode the offset is

     * used directly. In either mode, the maximum phase

     * used directly. In either mode, the maximum phase

     * adjustment for each second is clamped so as to spread

     * adjustment for each second is clamped so as to spread

     * the adjustment over not more than the number of

     * the adjustment over not more than the number of

     * seconds between updates.

     * seconds between updates.

*/

*/

    if (time_offset < 0) {

    if (time_offset < 0) {

        ltemp = -time_offset;

        ltemp = -time_offset;

        if (!(time_status & STA_FLL))

        if (!(time_status & STA_FLL))

            ltemp >>= SHIFT_KG + time_constant;

            ltemp >>= SHIFT_KG + time_constant;

        if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)

        if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)

            ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;

            ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;

        time_offset += ltemp;

        time_offset += ltemp;

        time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);

        time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);

    } else {

    } else {

        ltemp = time_offset;

        ltemp = time_offset;

        if (!(time_status & STA_FLL))

        if (!(time_status & STA_FLL))

            ltemp >>= SHIFT_KG + time_constant;

            ltemp >>= SHIFT_KG + time_constant;

        if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)

        if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)

            ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;

            ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;

        time_offset -= ltemp;

        time_offset -= ltemp;

        time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);

        time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);

/*

/*

     * Compute the frequency estimate and additional phase

     * Compute the frequency estimate and additional phase

     * adjustment due to frequency error for the next

     * adjustment due to frequency error for the next

     * second. When the PPS signal is engaged, gnaw on the

     * second. When the PPS signal is engaged, gnaw on the

     * watchdog counter and update the frequency computed by

     * watchdog counter and update the frequency computed by

     * the pll and the PPS signal.

     * the pll and the PPS signal.

*/

*/

    pps_valid++;

    pps_valid++;

    if (pps_valid == PPS_VALID) {       /* PPS signal lost */

    if (pps_valid == PPS_VALID) {       /* PPS signal lost */

        pps_jitter = MAXTIME;

        pps_jitter = MAXTIME;

        pps_stabil = MAXFREQ;

        pps_stabil = MAXFREQ;

        time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |

        time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |

                         STA_PPSWANDER | STA_PPSERROR);

                         STA_PPSWANDER | STA_PPSERROR);

    ltemp = time_freq + pps_freq;

    ltemp = time_freq + pps_freq;

    if (ltemp < 0)

    if (ltemp < 0)

        time_adj -= -ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);

        time_adj -= -ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);

    else

    else

        time_adj +=  ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);

        time_adj +=  ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);

#if HZ == 100

#if HZ == 100

    /* Compensate for (HZ==100) != (1 << SHIFT_HZ).

    /* Compensate for (HZ==100) != (1 << SHIFT_HZ).

     * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)

     * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)

*/

*/

    if (time_adj < 0)

    if (time_adj < 0)

        time_adj -= (-time_adj >> 2) + (-time_adj >> 5);

        time_adj -= (-time_adj >> 2) + (-time_adj >> 5);

    else

    else

        time_adj += (time_adj >> 2) + (time_adj >> 5);

        time_adj += (time_adj >> 2) + (time_adj >> 5);

#endif

#endif

/* in the NTP reference this is called "hardclock()" */

/* in the NTP reference this is called "hardclock()" */

static void update_wall_time_one_tick(void)

static void update_wall_time_one_tick(void)

        if ( (time_adjust_step = time_adjust) != 0 ) {

        if ( (time_adjust_step = time_adjust) != 0 ) {

            /* We are doing an adjtime thing.

            /* We are doing an adjtime thing.

             * Prepare time_adjust_step to be within bounds.

             * Prepare time_adjust_step to be within bounds.

             * Note that a positive time_adjust means we want the clock

             * Note that a positive time_adjust means we want the clock

             * to run faster.

             * to run faster.

             * Limit the amount of the step to be in the range

             * Limit the amount of the step to be in the range

             * -tickadj .. +tickadj

             * -tickadj .. +tickadj

*/

*/

             if (time_adjust > tickadj)

             if (time_adjust > tickadj)

                time_adjust_step = tickadj;

                time_adjust_step = tickadj;

             else if (time_adjust < -tickadj)

             else if (time_adjust < -tickadj)

                time_adjust_step = -tickadj;

                time_adjust_step = -tickadj;

            /* Reduce by this step the amount of time left  */

            /* Reduce by this step the amount of time left  */

            time_adjust -= time_adjust_step;

            time_adjust -= time_adjust_step;

        xtime.tv_usec += tick + time_adjust_step;

        xtime.tv_usec += tick + time_adjust_step;

/*

/*

         * Advance the phase, once it gets to one microsecond, then

         * Advance the phase, once it gets to one microsecond, then

         * advance the tick more.

         * advance the tick more.

*/

*/

        time_phase += time_adj;

        time_phase += time_adj;

        if (time_phase <= -FINEUSEC) {

        if (time_phase <= -FINEUSEC) {

                long ltemp = -time_phase >> SHIFT_SCALE;

                long ltemp = -time_phase >> SHIFT_SCALE;

                time_phase += ltemp << SHIFT_SCALE;

                time_phase += ltemp << SHIFT_SCALE;

                xtime.tv_usec -= ltemp;

                xtime.tv_usec -= ltemp;

        else if (time_phase >= FINEUSEC) {

        else if (time_phase >= FINEUSEC) {

                long ltemp = time_phase >> SHIFT_SCALE;

                long ltemp = time_phase >> SHIFT_SCALE;

                time_phase -= ltemp << SHIFT_SCALE;

                time_phase -= ltemp << SHIFT_SCALE;

                xtime.tv_usec += ltemp;

                xtime.tv_usec += ltemp;

/*

/*

 * Using a loop looks inefficient, but "ticks" is

 * Using a loop looks inefficient, but "ticks" is

 * usually just one (we shouldn't be losing ticks,

 * usually just one (we shouldn't be losing ticks,

 * we're doing this this way mainly for interrupt

 * we're doing this this way mainly for interrupt

 * latency reasons, not because we think we'll

 * latency reasons, not because we think we'll

 * have lots of lost timer ticks

 * have lots of lost timer ticks

*/

*/

static void update_wall_time(unsigned long ticks)

static void update_wall_time(unsigned long ticks)

        do {

        do {

                ticks--;

                ticks--;

                update_wall_time_one_tick();

                update_wall_time_one_tick();

        } while (ticks);

        } while (ticks);

        if (xtime.tv_usec >= 1000000) {

        if (xtime.tv_usec >= 1000000) {

            xtime.tv_usec -= 1000000;

            xtime.tv_usec -= 1000000;

            xtime.tv_sec++;

            xtime.tv_sec++;

            second_overflow();

            second_overflow();

static inline void do_process_times(struct task_struct *p,

static inline void do_process_times(struct task_struct *p,

        unsigned long user, unsigned long system)

        unsigned long user, unsigned long system)

        long psecs;

        long psecs;

        p->utime += user;

        p->utime += user;

        p->stime += system;

        p->stime += system;

        psecs = (p->stime + p->utime) / HZ;

        psecs = (p->stime + p->utime) / HZ;

        if (psecs > p->rlim[RLIMIT_CPU].rlim_cur) {

        if (psecs > p->rlim[RLIMIT_CPU].rlim_cur) {

                /* Send SIGXCPU every second.. */

                /* Send SIGXCPU every second.. */

                if (psecs * HZ == p->stime + p->utime)

                if (psecs * HZ == p->stime + p->utime)

                        send_sig(SIGXCPU, p, 1);

                        send_sig(SIGXCPU, p, 1);

                /* and SIGKILL when we go over max.. */

                /* and SIGKILL when we go over max.. */

                if (psecs > p->rlim[RLIMIT_CPU].rlim_max)

                if (psecs > p->rlim[RLIMIT_CPU].rlim_max)

                        send_sig(SIGKILL, p, 1);

                        send_sig(SIGKILL, p, 1);

static inline void do_it_virt(struct task_struct * p, unsigned long ticks)

static inline void do_it_virt(struct task_struct * p, unsigned long ticks)

        unsigned long it_virt = p->it_virt_value;

        unsigned long it_virt = p->it_virt_value;

        if (it_virt) {

        if (it_virt) {

                if (it_virt <= ticks) {

                if (it_virt <= ticks) {

                        it_virt = ticks + p->it_virt_incr;

                        it_virt = ticks + p->it_virt_incr;

                        send_sig(SIGVTALRM, p, 1);

                        send_sig(SIGVTALRM, p, 1);

                p->it_virt_value = it_virt - ticks;

                p->it_virt_value = it_virt - ticks;

static inline void do_it_prof(struct task_struct * p, unsigned long ticks)

static inline void do_it_prof(struct task_struct * p, unsigned long ticks)

        unsigned long it_prof = p->it_prof_value;

        unsigned long it_prof = p->it_prof_value;

        if (it_prof) {

        if (it_prof) {

                if (it_prof <= ticks) {

                if (it_prof <= ticks) {

                        it_prof = ticks + p->it_prof_incr;

                        it_prof = ticks + p->it_prof_incr;

                        send_sig(SIGPROF, p, 1);

                        send_sig(SIGPROF, p, 1);

                p->it_prof_value = it_prof - ticks;

                p->it_prof_value = it_prof - ticks;

static __inline__ void update_one_process(struct task_struct *p,

static __inline__ void update_one_process(struct task_struct *p,

        unsigned long ticks, unsigned long user, unsigned long system)

        unsigned long ticks, unsigned long user, unsigned long system)

        do_process_times(p, user, system);

        do_process_times(p, user, system);

        do_it_virt(p, user);

        do_it_virt(p, user);

        do_it_prof(p, ticks);

        do_it_prof(p, ticks);

static void update_process_times(unsigned long ticks, unsigned long system)

static void update_process_times(unsigned long ticks, unsigned long system)

#ifndef  __SMP__

#ifndef  __SMP__

        struct task_struct * p = current;

        struct task_struct * p = current;

        unsigned long user = ticks - system;

        unsigned long user = ticks - system;

        if (p->pid) {

        if (p->pid) {

                p->counter -= ticks;

                p->counter -= ticks;

                if (p->counter < 0) {

                if (p->counter < 0) {

                        p->counter = 0;

                        p->counter = 0;

                        need_resched = 1;

                        need_resched = 1;

                if (p->priority < DEF_PRIORITY)

                if (p->priority < DEF_PRIORITY)

                        kstat.cpu_nice += user;

                        kstat.cpu_nice += user;

                else

                else

                        kstat.cpu_user += user;

                        kstat.cpu_user += user;

                kstat.cpu_system += system;

                kstat.cpu_system += system;

        update_one_process(p, ticks, user, system);

        update_one_process(p, ticks, user, system);

#else

#else

        int cpu,j;

        int cpu,j;

        cpu = smp_processor_id();

        cpu = smp_processor_id();

        for (j=0;j<smp_num_cpus;j++)

        for (j=0;j<smp_num_cpus;j++)

                int i = cpu_logical_map[j];

                int i = cpu_logical_map[j];

                struct task_struct *p;

                struct task_struct *p;

#ifdef __SMP_PROF__

#ifdef __SMP_PROF__

                if (test_bit(i,&smp_idle_map))

                if (test_bit(i,&smp_idle_map))

                        smp_idle_count[i]++;

                        smp_idle_count[i]++;

#endif

#endif

                p = current_set[i];

                p = current_set[i];

/*

/*

                 * Do we have a real process?

                 * Do we have a real process?

*/

*/

                if (p->pid) {

                if (p->pid) {

                        /* assume user-mode process */

                        /* assume user-mode process */

                        unsigned long utime = ticks;

                        unsigned long utime = ticks;

                        unsigned long stime = 0;

                        unsigned long stime = 0;

                        if (cpu == i) {

                        if (cpu == i) {

                                utime = ticks-system;

                                utime = ticks-system;

                                stime = system;

                                stime = system;

                        } else if (smp_proc_in_lock[j]) {

                        } else if (smp_proc_in_lock[j]) {

                                utime = 0;

                                utime = 0;

                                stime = ticks;

                                stime = ticks;

                        update_one_process(p, ticks, utime, stime);

                        update_one_process(p, ticks, utime, stime);

                        if (p->priority < DEF_PRIORITY)

                        if (p->priority < DEF_PRIORITY)

                                kstat.cpu_nice += utime;

                                kstat.cpu_nice += utime;

                        else

                        else

                                kstat.cpu_user += utime;

                                kstat.cpu_user += utime;

                        kstat.cpu_system += stime;

                        kstat.cpu_system += stime;

                        p->counter -= ticks;

                        p->counter -= ticks;

                        if (p->counter >= 0)

                        if (p->counter >= 0)

                                continue;

                                continue;

                        p->counter = 0;

                        p->counter = 0;

                } else {

                } else {

/*

/*

                         * Idle processor found, do we have anything

                         * Idle processor found, do we have anything

                         * we could run?

                         * we could run?

*/

*/

                        if (!(0x7fffffff & smp_process_available))

                        if (!(0x7fffffff & smp_process_available))

                                continue;

                                continue;

                /* Ok, we should reschedule, do the magic */

                /* Ok, we should reschedule, do the magic */

                if (i==cpu)

                if (i==cpu)

                        need_resched = 1;

                        need_resched = 1;

                else

                else

                        smp_message_pass(i, MSG_RESCHEDULE, 0L, 0);

                        smp_message_pass(i, MSG_RESCHEDULE, 0L, 0);

#endif

#endif

static unsigned long lost_ticks = 0;

static unsigned long lost_ticks = 0;

static unsigned long lost_ticks_system = 0;

static unsigned long lost_ticks_system = 0;

static inline void update_times(void)

static inline void update_times(void)

        unsigned long ticks;

        unsigned long ticks;

        ticks = xchg(&lost_ticks, 0);

        ticks = xchg(&lost_ticks, 0);

        if (ticks) {

        if (ticks) {

                unsigned long system;

                unsigned long system;

                system = xchg(&lost_ticks_system, 0);

                system = xchg(&lost_ticks_system, 0);

                calc_load(ticks);

                calc_load(ticks);

                update_wall_time(ticks);

                update_wall_time(ticks);

                update_process_times(ticks, system);

                update_process_times(ticks, system);

void timer_bh(void)

void timer_bh(void)

        update_times();

        update_times();

        run_old_timers();

        run_old_timers();

        run_timer_list();

        run_timer_list();

void do_timer(struct pt_regs * regs)

void do_timer(struct pt_regs * regs)

        (*(unsigned long *)&jiffies)++;

        (*(unsigned long *)&jiffies)++;

        lost_ticks++;

        lost_ticks++;

        mark_bh(TIMER_BH);

        mark_bh(TIMER_BH);

        if (!user_mode(regs)) {

        if (!user_mode(regs)) {

                lost_ticks_system++;

                lost_ticks_system++;

                if (prof_buffer && current->pid) {

                if (prof_buffer && current->pid) {

                        extern int _stext;

                        extern int _stext;

                        unsigned long ip = instruction_pointer(regs);

                        unsigned long ip = instruction_pointer(regs);

                        ip -= (unsigned long) &_stext;

                        ip -= (unsigned long) &_stext;

                        ip >>= prof_shift;

                        ip >>= prof_shift;

                        if (ip < prof_len)

                        if (ip < prof_len)

                                prof_buffer[ip]++;

                                prof_buffer[ip]++;

        if (tq_timer)

        if (tq_timer)

                mark_bh(TQUEUE_BH);

                mark_bh(TQUEUE_BH);

#ifndef __alpha__

#ifndef __alpha__

/*

/*

 * For backwards compatibility?  This can be done in libc so Alpha

 * For backwards compatibility?  This can be done in libc so Alpha

 * and all newer ports shouldn't need it.

 * and all newer ports shouldn't need it.

*/

*/

asmlinkage unsigned int sys_alarm(unsigned int seconds)

asmlinkage unsigned int sys_alarm(unsigned int seconds)

        struct itimerval it_new, it_old;

        struct itimerval it_new, it_old;

        unsigned int oldalarm;

        unsigned int oldalarm;

        it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;

        it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;

        it_new.it_value.tv_sec = seconds;

        it_new.it_value.tv_sec = seconds;

        it_new.it_value.tv_usec = 0;

        it_new.it_value.tv_usec = 0;

        _setitimer(ITIMER_REAL, &it_new, &it_old);

        _setitimer(ITIMER_REAL, &it_new, &it_old);

        oldalarm = it_old.it_value.tv_sec;

        oldalarm = it_old.it_value.tv_sec;

        /* ehhh.. We can't return 0 if we have an alarm pending.. */

        /* ehhh.. We can't return 0 if we have an alarm pending.. */

        /* And we'd better return too much than too little anyway */

        /* And we'd better return too much than too little anyway */

        if (it_old.it_value.tv_usec)

        if (it_old.it_value.tv_usec)

                oldalarm++;

                oldalarm++;

        return oldalarm;

        return oldalarm;

/*

/*

 * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this

 * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this

 * should be moved into arch/i386 instead?

 * should be moved into arch/i386 instead?

*/

*/

asmlinkage int sys_getpid(void)

asmlinkage int sys_getpid(void)

        return current->pid;

        return current->pid;

asmlinkage int sys_getppid(void)

asmlinkage int sys_getppid(void)

        return current->p_opptr->pid;

        return current->p_opptr->pid;

asmlinkage int sys_getuid(void)

asmlinkage int sys_getuid(void)

        return current->uid;

        return current->uid;

asmlinkage int sys_geteuid(void)

asmlinkage int sys_geteuid(void)

        return current->euid;

        return current->euid;

asmlinkage int sys_getgid(void)

asmlinkage int sys_getgid(void)

        return current->gid;

        return current->gid;

asmlinkage int sys_getegid(void)

asmlinkage int sys_getegid(void)

        return current->egid;

        return current->egid;

/*

/*

 * This has been replaced by sys_setpriority.  Maybe it should be

 * This has been replaced by sys_setpriority.  Maybe it should be

 * moved into the arch dependent tree for those ports that require

 * moved into the arch dependent tree for those ports that require

 * it for backward compatibility?

 * it for backward compatibility?

*/

*/

asmlinkage int sys_nice(int increment)

asmlinkage int sys_nice(int increment)

        unsigned long newprio;

        unsigned long newprio;

        int increase = 0;

        int increase = 0;

        newprio = increment;

        newprio = increment;

        if (increment < 0) {

        if (increment < 0) {

                if (!suser())

                if (!suser())

                        return -EPERM;

                        return -EPERM;

                newprio = -increment;

                newprio = -increment;

                increase = 1;

                increase = 1;

        if (newprio > 40)

        if (newprio > 40)

                newprio = 40;

                newprio = 40;

/*

/*

         * do a "normalization" of the priority (traditionally

         * do a "normalization" of the priority (traditionally

         * unix nice values are -20..20, linux doesn't really

         * unix nice values are -20..20, linux doesn't really

         * use that kind of thing, but uses the length of the

         * use that kind of thing, but uses the length of the

         * timeslice instead (default 150 msec). The rounding is

         * timeslice instead (default 150 msec). The rounding is

         * why we want to avoid negative values.

         * why we want to avoid negative values.

*/

*/

        newprio = (newprio * DEF_PRIORITY + 10) / 20;

        newprio = (newprio * DEF_PRIORITY + 10) / 20;

        increment = newprio;

        increment = newprio;

        if (increase)

        if (increase)

                increment = -increment;

                increment = -increment;

        newprio = current->priority - increment;

        newprio = current->priority - increment;

        if ((signed) newprio < 1)

        if ((signed) newprio < 1)

                newprio = 1;

                newprio = 1;

        if (newprio > DEF_PRIORITY*2)

        if (newprio > DEF_PRIORITY*2)

                newprio = DEF_PRIORITY*2;

                newprio = DEF_PRIORITY*2;

        current->priority = newprio;

        current->priority = newprio;

        return 0;

        return 0;

#endif

#endif

static struct task_struct *find_process_by_pid(pid_t pid) {

static struct task_struct *find_process_by_pid(pid_t pid) {

        struct task_struct *p, *q;

        struct task_struct *p, *q;

        if (pid == 0)

        if (pid == 0)

                p = current;

                p = current;

        else {

        else {

                p = 0;

                p = 0;

                for_each_task(q) {

                for_each_task(q) {

                        if (q && q->pid == pid) {

                        if (q && q->pid == pid) {

                                p = q;

                                p = q;

                                break;

                                break;

        return p;

        return p;

static int setscheduler(pid_t pid, int policy,

static int setscheduler(pid_t pid, int policy,

                        struct sched_param *param)

                        struct sched_param *param)

        int error;

        int error;

        struct sched_param lp;

        struct sched_param lp;

        struct task_struct *p;

        struct task_struct *p;

        if (!param || pid < 0)

        if (!param || pid < 0)

                return -EINVAL;

                return -EINVAL;

        error = verify_area(VERIFY_READ, param, sizeof(struct sched_param));

        error = verify_area(VERIFY_READ, param, sizeof(struct sched_param));

        if (error)

        if (error)

                return error;

                return error;

        memcpy_fromfs(&lp, param, sizeof(struct sched_param));

        memcpy_fromfs(&lp, param, sizeof(struct sched_param));

        p = find_process_by_pid(pid);

        p = find_process_by_pid(pid);

        if (!p)

        if (!p)

                return -ESRCH;

                return -ESRCH;

        if (policy < 0)

        if (policy < 0)

                policy = p->policy;

                policy = p->policy;

        else if (policy != SCHED_FIFO && policy != SCHED_RR &&

        else if (policy != SCHED_FIFO && policy != SCHED_RR &&

                 policy != SCHED_OTHER)

                 policy != SCHED_OTHER)

                return -EINVAL;

                return -EINVAL;

/*

/*

         * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid

         * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid

         * priority for SCHED_OTHER is 0.

         * priority for SCHED_OTHER is 0.

*/

*/

        if (lp.sched_priority < 0 || lp.sched_priority > 99)

        if (lp.sched_priority < 0 || lp.sched_priority > 99)

                return -EINVAL;

                return -EINVAL;

        if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))

        if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))

                return -EINVAL;

                return -EINVAL;

        if ((policy == SCHED_FIFO || policy == SCHED_RR) && !suser())

        if ((policy == SCHED_FIFO || policy == SCHED_RR) && !suser())

                return -EPERM;

                return -EPERM;

        if ((current->euid != p->euid) && (current->euid != p->uid) &&

        if ((current->euid != p->euid) && (current->euid != p->uid) &&

            !suser())

            !suser())

                return -EPERM;

                return -EPERM;

        p->policy = policy;

        p->policy = policy;

        p->rt_priority = lp.sched_priority;

        p->rt_priority = lp.sched_priority;

        cli();

        cli();

        if (p->next_run)

        if (p->next_run)

                move_last_runqueue(p);

                move_last_runqueue(p);

        sti();

        sti();

        need_resched = 1;

        need_resched = 1;

        return 0;

        return 0;

asmlinkage int sys_sched_setscheduler(pid_t pid, int policy,

asmlinkage int sys_sched_setscheduler(pid_t pid, int policy,

                                      struct sched_param *param)

                                      struct sched_param *param)

        return setscheduler(pid, policy, param);

        return setscheduler(pid, policy, param);

asmlinkage int sys_sched_setparam(pid_t pid, struct sched_param *param)

asmlinkage int sys_sched_setparam(pid_t pid, struct sched_param *param)

        return setscheduler(pid, -1, param);

        return setscheduler(pid, -1, param);

asmlinkage int sys_sched_getscheduler(pid_t pid)

asmlinkage int sys_sched_getscheduler(pid_t pid)

        struct task_struct *p;

        struct task_struct *p;

        if (pid < 0)

        if (pid < 0)

                return -EINVAL;

                return -EINVAL;

        p = find_process_by_pid(pid);

        p = find_process_by_pid(pid);

        if (!p)

        if (!p)

                return -ESRCH;

                return -ESRCH;

        return p->policy;

        return p->policy;

asmlinkage int sys_sched_getparam(pid_t pid, struct sched_param *param)

asmlinkage int sys_sched_getparam(pid_t pid, struct sched_param *param)

        int error;

        int error;

        struct task_struct *p;

        struct task_struct *p;

        struct sched_param lp;

        struct sched_param lp;

        if (!param || pid < 0)

        if (!param || pid < 0)

                return -EINVAL;

                return -EINVAL;

        error = verify_area(VERIFY_WRITE, param, sizeof(struct sched_param));

        error = verify_area(VERIFY_WRITE, param, sizeof(struct sched_param));

        if (error)

        if (error)

                return error;

                return error;

        p = find_process_by_pid(pid);

        p = find_process_by_pid(pid);

        if (!p)

        if (!p)

                return -ESRCH;

                return -ESRCH;

        lp.sched_priority = p->rt_priority;

        lp.sched_priority = p->rt_priority;

        memcpy_tofs(param, &lp, sizeof(struct sched_param));

        memcpy_tofs(param, &lp, sizeof(struct sched_param));

        return 0;

        return 0;

asmlinkage int sys_sched_yield(void)

asmlinkage int sys_sched_yield(void)

        cli();

        cli();

        move_last_runqueue(current);

        move_last_runqueue(current);

        current->counter = 0;

        current->counter = 0;

        need_resched = 1;

        need_resched = 1;

        sti();

        sti();

        return 0;

        return 0;

asmlinkage int sys_sched_get_priority_max(int policy)

asmlinkage int sys_sched_get_priority_max(int policy)

        switch (policy) {

        switch (policy) {

              case SCHED_FIFO:

              case SCHED_FIFO:

              case SCHED_RR:

              case SCHED_RR:

                return 99;

                return 99;

              case SCHED_OTHER:

              case SCHED_OTHER:

                return 0;

                return 0;

        return -EINVAL;

        return -EINVAL;

asmlinkage int sys_sched_get_priority_min(int policy)

asmlinkage int sys_sched_get_priority_min(int policy)

        switch (policy) {

        switch (policy) {

              case SCHED_FIFO:

              case SCHED_FIFO:

              case SCHED_RR:

              case SCHED_RR:

                return 1;

                return 1;

              case SCHED_OTHER:

              case SCHED_OTHER:

                return 0;

                return 0;

        return -EINVAL;

        return -EINVAL;

asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)

asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)

        int error;

        int error;

        struct timespec t;

        struct timespec t;

        error = verify_area(VERIFY_WRITE, interval, sizeof(struct timespec));

        error = verify_area(VERIFY_WRITE, interval, sizeof(struct timespec));

        if (error)

        if (error)

                return error;

                return error;

        /* Values taken from 2.1.38 */

        /* Values taken from 2.1.38 */

        t.tv_sec = 0;

        t.tv_sec = 0;

        t.tv_nsec = 150000;   /* is this right for non-intel architecture too?*/

        t.tv_nsec = 150000;   /* is this right for non-intel architecture too?*/

        memcpy_tofs(interval, &t, sizeof(struct timespec));

        memcpy_tofs(interval, &t, sizeof(struct timespec));

        return 0;

        return 0;

/*

/*

 * change timeval to jiffies, trying to avoid the

 * change timeval to jiffies, trying to avoid the

 * most obvious overflows..

 * most obvious overflows..

*/

*/

static unsigned long timespectojiffies(struct timespec *value)

static unsigned long timespectojiffies(struct timespec *value)

        unsigned long sec = (unsigned) value->tv_sec;

        unsigned long sec = (unsigned) value->tv_sec;

        long nsec = value->tv_nsec;

        long nsec = value->tv_nsec;

        if (sec > (LONG_MAX / HZ))

        if (sec > (LONG_MAX / HZ))

                return LONG_MAX;

                return LONG_MAX;

        nsec += 1000000000L / HZ - 1;

        nsec += 1000000000L / HZ - 1;

        nsec /= 1000000000L / HZ;

        nsec /= 1000000000L / HZ;

        return HZ * sec + nsec;

        return HZ * sec + nsec;

static void jiffiestotimespec(unsigned long jiffies, struct timespec *value)

static void jiffiestotimespec(unsigned long jiffies, struct timespec *value)

        value->tv_nsec = (jiffies % HZ) * (1000000000L / HZ);

        value->tv_nsec = (jiffies % HZ) * (1000000000L / HZ);

        value->tv_sec = jiffies / HZ;

        value->tv_sec = jiffies / HZ;

        return;

        return;

asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)

asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)

        int error;

        int error;

        struct timespec t;

        struct timespec t;

        unsigned long expire;

        unsigned long expire;

        error = verify_area(VERIFY_READ, rqtp, sizeof(struct timespec));

        error = verify_area(VERIFY_READ, rqtp, sizeof(struct timespec));

        if (error)

        if (error)

                return error;

                return error;

        memcpy_fromfs(&t, rqtp, sizeof(struct timespec));

        memcpy_fromfs(&t, rqtp, sizeof(struct timespec));

        if (rmtp) {

        if (rmtp) {

                error = verify_area(VERIFY_WRITE, rmtp,

                error = verify_area(VERIFY_WRITE, rmtp,

                                    sizeof(struct timespec));

                                    sizeof(struct timespec));

                if (error)

                if (error)

                        return error;

                        return error;

        if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)

        if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)

                return -EINVAL;

                return -EINVAL;

        if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&

        if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&

            current->policy != SCHED_OTHER) {

            current->policy != SCHED_OTHER) {

/*

/*

                 * Short delay requests up to 2 ms will be handled with

                 * Short delay requests up to 2 ms will be handled with

                 * high precision by a busy wait for all real-time processes.

                 * high precision by a busy wait for all real-time processes.

*/

*/

                udelay((t.tv_nsec + 999) / 1000);

                udelay((t.tv_nsec + 999) / 1000);

                return 0;

                return 0;

        expire = timespectojiffies(&t) + (t.tv_sec || t.tv_nsec) + jiffies;

        expire = timespectojiffies(&t) + (t.tv_sec || t.tv_nsec) + jiffies;

        current->timeout = expire;

        current->timeout = expire;

        current->state = TASK_INTERRUPTIBLE;

        current->state = TASK_INTERRUPTIBLE;

        schedule();

        schedule();

        if (expire > jiffies) {

        if (expire > jiffies) {

                if (rmtp) {

                if (rmtp) {

                        jiffiestotimespec(expire - jiffies -

                        jiffiestotimespec(expire - jiffies -

                                          (expire > jiffies + 1), &t);

                                          (expire > jiffies + 1), &t);

                        memcpy_tofs(rmtp, &t, sizeof(struct timespec));

                        memcpy_tofs(rmtp, &t, sizeof(struct timespec));

                return -EINTR;

                return -EINTR;

        return 0;

        return 0;

/* Used in fs/proc/array.c */

/* Used in fs/proc/array.c */

unsigned long get_wchan(struct task_struct *p)

unsigned long get_wchan(struct task_struct *p)

        if (!p || p == current || p->state == TASK_RUNNING)

        if (!p || p == current || p->state == TASK_RUNNING)

                return 0;

                return 0;

#if defined(__i386__)

#if defined(__i386__)

                unsigned long ebp, eip;

                unsigned long ebp, eip;

                unsigned long stack_page;

                unsigned long stack_page;

                int count = 0;

                int count = 0;

                stack_page = p->kernel_stack_page;

                stack_page = p->kernel_stack_page;

                if (!stack_page)

                if (!stack_page)

                        return 0;

                        return 0;

                ebp = p->tss.ebp;

                ebp = p->tss.ebp;

                do {

                do {

                        if (ebp < stack_page || ebp >= 4092+stack_page)

                        if (ebp < stack_page || ebp >= 4092+stack_page)

                                return 0;

                                return 0;

                        eip = *(unsigned long *) (ebp+4);

                        eip = *(unsigned long *) (ebp+4);

                        if (eip < (unsigned long) interruptible_sleep_on

                        if (eip < (unsigned long) interruptible_sleep_on

                            || eip >= (unsigned long) add_timer)

                            || eip >= (unsigned long) add_timer)

                                return eip;

                                return eip;

                        ebp = *(unsigned long *) ebp;

                        ebp = *(unsigned long *) ebp;

                } while (count++ < 16);

                } while (count++ < 16);

#elif defined(__alpha__)

#elif defined(__alpha__)

/*

/*

         * This one depends on the frame size of schedule().  Do a

         * This one depends on the frame size of schedule().  Do a

         * "disass schedule" in gdb to find the frame size.  Also, the

         * "disass schedule" in gdb to find the frame size.  Also, the

         * code assumes that sleep_on() follows immediately after

         * code assumes that sleep_on() follows immediately after

         * interruptible_sleep_on() and that add_timer() follows

         * interruptible_sleep_on() and that add_timer() follows

         * immediately after interruptible_sleep().  Ugly, isn't it?

         * immediately after interruptible_sleep().  Ugly, isn't it?

         * Maybe adding a wchan field to task_struct would be better,

         * Maybe adding a wchan field to task_struct would be better,

         * after all...

         * after all...

*/

*/

            unsigned long schedule_frame;

            unsigned long schedule_frame;

            unsigned long pc;

            unsigned long pc;

            pc = thread_saved_pc(&p->tss);

            pc = thread_saved_pc(&p->tss);

            if (pc >= (unsigned long) interruptible_sleep_on && pc < (unsigned long) add_timer) {

            if (pc >= (unsigned long) interruptible_sleep_on && pc < (unsigned long) add_timer) {

                schedule_frame = ((unsigned long *)p->tss.ksp)[6];

                schedule_frame = ((unsigned long *)p->tss.ksp)[6];

                return ((unsigned long *)schedule_frame)[12];

                return ((unsigned long *)schedule_frame)[12];

            return pc;

            return pc;

#endif

#endif

        return 0;

        return 0;

static void show_task(int nr,struct task_struct * p)

static void show_task(int nr,struct task_struct * p)

        unsigned long free;

        unsigned long free;

        static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };

        static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };

        printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);

        printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);

        if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *))

        if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *))

                printk(stat_nam[p->state]);

                printk(stat_nam[p->state]);

        else

        else

                printk(" ");

                printk(" ");

#if ((~0UL) == 0xffffffff)

#if ((~0UL) == 0xffffffff)

        if (p == current)

        if (p == current)

                printk(" current  ");

                printk(" current  ");

        else

        else

                printk(" %08lX ", thread_saved_pc(&p->tss));

                printk(" %08lX ", thread_saved_pc(&p->tss));

        printk("%08lX ", get_wchan(p));

        printk("%08lX ", get_wchan(p));

#else

#else

        if (p == current)

        if (p == current)

                printk("   current task   ");

                printk("   current task   ");

        else

        else

                printk(" %016lx ", thread_saved_pc(&p->tss));

                printk(" %016lx ", thread_saved_pc(&p->tss));

        printk("%08lX ", get_wchan(p) & 0xffffffffL);

        printk("%08lX ", get_wchan(p) & 0xffffffffL);

#endif

#endif

        if (((unsigned long *)p->kernel_stack_page)[0] != STACK_MAGIC)

        if (((unsigned long *)p->kernel_stack_page)[0] != STACK_MAGIC)

                printk(" bad-");

                printk(" bad-");

        for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) {

        for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) {

                if (((unsigned long *)p->kernel_stack_page)[free] != STACK_UNTOUCHED_MAGIC)

                if (((unsigned long *)p->kernel_stack_page)[free] != STACK_UNTOUCHED_MAGIC)

                        break;

                        break;

        printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid);

        printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid);

        if (p->p_cptr)

        if (p->p_cptr)

                printk("%5d ", p->p_cptr->pid);

                printk("%5d ", p->p_cptr->pid);

        else

        else

                printk("      ");

                printk("      ");

        if (p->p_ysptr)

        if (p->p_ysptr)

                printk("%7d", p->p_ysptr->pid);

                printk("%7d", p->p_ysptr->pid);

        else

        else

                printk("       ");

                printk("       ");

        if (p->p_osptr)

        if (p->p_osptr)

                printk(" %5d\n", p->p_osptr->pid);

                printk(" %5d\n", p->p_osptr->pid);

        else

        else

                printk("\n");

                printk("\n");

void show_state(void)

void show_state(void)

        int i;

        int i;

#if ((~0UL) == 0xffffffff)

#if ((~0UL) == 0xffffffff)

        printk("\n"

        printk("\n"

               "                                  free                        sibling\n");

               "                                  free                        sibling\n");

        printk("  task             PC     wchan   stack   pid father child younger older\n");

        printk("  task             PC     wchan   stack   pid father child younger older\n");

#else

#else

        printk("\n"

        printk("\n"

               "                                           free                        sibling\n");

               "                                           free                        sibling\n");

        printk("  task                 PC         wchan    stack   pid father child younger older\n");

        printk("  task                 PC         wchan    stack   pid father child younger older\n");

#endif

#endif

        for (i=0 ; i<NR_TASKS ; i++)

        for (i=0 ; i<NR_TASKS ; i++)

                if (task[i])

                if (task[i])

                        show_task(i,task[i]);

                        show_task(i,task[i]);

void sched_init(void)

void sched_init(void)

/*

/*

         *      We have to do a little magic to get the first

         *      We have to do a little magic to get the first

         *      process right in SMP mode.

         *      process right in SMP mode.

*/

*/

        int cpu=smp_processor_id();

        int cpu=smp_processor_id();

        int i;

        int i;

#ifndef __SMP__

#ifndef __SMP__

        current_set[cpu]=&init_task;

        current_set[cpu]=&init_task;

#else

#else

        init_task.processor=cpu;

        init_task.processor=cpu;

        for(cpu = 0; cpu < NR_CPUS; cpu++)

        for(cpu = 0; cpu < NR_CPUS; cpu++)

                current_set[cpu] = &init_task;

                current_set[cpu] = &init_task;

#endif

#endif

        init_kernel_stack[0] = STACK_MAGIC;

        init_kernel_stack[0] = STACK_MAGIC;

        for(i=1;i<1024;i++)

        for(i=1;i<1024;i++)

                init_kernel_stack[i] = STACK_UNTOUCHED_MAGIC;

                init_kernel_stack[i] = STACK_UNTOUCHED_MAGIC;

        init_bh(TIMER_BH, timer_bh);

        init_bh(TIMER_BH, timer_bh);

        init_bh(TQUEUE_BH, tqueue_bh);

        init_bh(TQUEUE_BH, tqueue_bh);

        init_bh(IMMEDIATE_BH, immediate_bh);

        init_bh(IMMEDIATE_BH, immediate_bh);

Browse

Tools

Subversion Repositories or1k_old

[/] [or1k_old/] [trunk/] [rc203soc/] [sw/] [uClinux/] [kernel/] [sched.c] - Diff between revs 1765 and 1782