OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [uclinux/] [uClinux-2.0.x/] [kernel/] [sched.c] - Blame information for rev 1780

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 199 simons
/*
2
 *  linux/kernel/sched.c
3
 *
4
 *  Copyright (C) 1991, 1992  Linus Torvalds
5
 *
6
 *  1996-04-21  Modified by Ulrich Windl to make NTP work
7
 *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and
8
 *              make semaphores SMP safe
9
 *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
10
 *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
11
 *              "A Kernel Model for Precision Timekeeping" by Dave Mills
12
 */
13
 
14
/*
15
 * 'sched.c' is the main kernel file. It contains scheduling primitives
16
 * (sleep_on, wakeup, schedule etc) as well as a number of simple system
17
 * call functions (type getpid()), which just extract a field from
18
 * current-task
19
 */
20
 
21
/*
22
 * uClinux revisions for NO_MM
23
 * Copyright (C) 1998  Kenneth Albanowski <kjahds@kjahds.com>,
24
 *                     The Silver Hammer Group, Ltd.
25
 */
26
 
27
#include <linux/signal.h>
28
#include <linux/sched.h>
29
#include <linux/timer.h>
30
#include <linux/kernel.h>
31
#include <linux/kernel_stat.h>
32
#include <linux/fdreg.h>
33
#include <linux/errno.h>
34
#include <linux/time.h>
35
#include <linux/ptrace.h>
36
#include <linux/delay.h>
37
#include <linux/interrupt.h>
38
#include <linux/tqueue.h>
39
#include <linux/resource.h>
40
#include <linux/mm.h>
41
#include <linux/smp.h>
42
 
43
#include <asm/system.h>
44
#include <asm/io.h>
45
#include <asm/segment.h>
46
#include <asm/pgtable.h>
47
#include <asm/mmu_context.h>
48
 
49
#include <linux/timex.h>
50
 
51
/* SIMON - I don't know why the fuck this prototype can't be in header but it won't work */
52
extern void switch_to(struct task_struct *prev, struct task_struct *next);
53
 
54
 
55
/*
56
 * kernel variables
57
 */
58
 
59
int securelevel = 0;                     /* system security level */
60
 
61
long tick = (1000000 + HZ/2) / HZ;      /* timer interrupt period */
62
volatile struct timeval xtime;          /* The current time */
63
int tickadj = 500/HZ ? 500/HZ : 1;      /* microsecs */
64
 
65
DECLARE_TASK_QUEUE(tq_timer);
66
DECLARE_TASK_QUEUE(tq_immediate);
67
DECLARE_TASK_QUEUE(tq_scheduler);
68
 
69
/*
70
 * phase-lock loop variables
71
 */
72
/* TIME_ERROR prevents overwriting the CMOS clock */
73
int time_state = TIME_ERROR;    /* clock synchronization status */
74
int time_status = STA_UNSYNC;   /* clock status bits */
75
long time_offset = 0;            /* time adjustment (us) */
76
long time_constant = 2;         /* pll time constant */
77
long time_tolerance = MAXFREQ;  /* frequency tolerance (ppm) */
78
long time_precision = 1;        /* clock precision (us) */
79
long time_maxerror = NTP_PHASE_LIMIT;   /* maximum error (us) */
80
long time_esterror = NTP_PHASE_LIMIT;   /* estimated error (us) */
81
long time_phase = 0;             /* phase offset (scaled us) */
82
long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;  /* frequency offset (scaled ppm) */
83
long time_adj = 0;               /* tick adjust (scaled 1 / HZ) */
84
long time_reftime = 0;           /* time at last adjustment (s) */
85
 
86
long time_adjust = 0;
87
long time_adjust_step = 0;
88
 
89
int need_resched = 0;
90
unsigned long event = 0;
91
 
92
extern int _setitimer(int, struct itimerval *, struct itimerval *);
93
unsigned int * prof_buffer = NULL;
94
unsigned long prof_len = 0;
95
unsigned long prof_shift = 0;
96
 
97
#define _S(nr) (1<<((nr)-1))
98
 
99
extern void mem_use(void);
100
 
101
unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
102
#ifndef NO_MM
103
unsigned long init_user_stack[1024] = { STACK_MAGIC, };
104
static struct vm_area_struct init_mmap = INIT_MMAP;
105
#endif /* !NO_MM */
106
static struct fs_struct init_fs = INIT_FS;
107
static struct files_struct init_files = INIT_FILES;
108
static struct signal_struct init_signals = INIT_SIGNALS;
109
 
110
struct mm_struct init_mm = INIT_MM;
111
struct task_struct init_task = INIT_TASK;
112
 
113
unsigned long volatile jiffies=0;
114
 
115
struct task_struct *current_set[NR_CPUS];
116
struct task_struct *last_task_used_math = NULL;
117
 
118
struct task_struct * task[NR_TASKS] = {&init_task, };
119
 
120
struct kernel_stat kstat = { 0 };
121
 
122
static inline void add_to_runqueue(struct task_struct * p)
123
{
124
#ifdef __SMP__
125
        int cpu=smp_processor_id();
126
#endif  
127
#if 1   /* sanity tests */
128
        if (p->next_run || p->prev_run) {
129
                printk("task already on run-queue\n");
130
                return;
131
        }
132
#endif
133
        if (p->policy != SCHED_OTHER || p->counter > current->counter + 3)
134
                need_resched = 1;
135
        nr_running++;
136
        (p->prev_run = init_task.prev_run)->next_run = p;
137
        p->next_run = &init_task;
138
        init_task.prev_run = p;
139
#ifdef __SMP__
140
        /* this is safe only if called with cli()*/
141
        while(set_bit(31,&smp_process_available))
142
        {
143
                while(test_bit(31,&smp_process_available))
144
                {
145
                        if(clear_bit(cpu,&smp_invalidate_needed))
146
                        {
147
                                local_flush_tlb();
148
                                set_bit(cpu,&cpu_callin_map[0]);
149
                        }
150
                }
151
        }
152
        smp_process_available++;
153
        clear_bit(31,&smp_process_available);
154
        if ((0!=p->pid) && smp_threads_ready)
155
        {
156
                int i;
157
                for (i=0;i<smp_num_cpus;i++)
158
                {
159
                        if (0==current_set[cpu_logical_map[i]]->pid)
160
                        {
161
                                smp_message_pass(cpu_logical_map[i], MSG_RESCHEDULE, 0L, 0);
162
                                break;
163
                        }
164
                }
165
        }
166
#endif
167
}
168
 
169
static inline void del_from_runqueue(struct task_struct * p)
170
{
171
        struct task_struct *next = p->next_run;
172
        struct task_struct *prev = p->prev_run;
173
 
174
#if 1   /* sanity tests */
175
        if (!next || !prev) {
176
                printk("task not on run-queue\n");
177
                return;
178
        }
179
#endif
180
        if (p == &init_task) {
181
                static int nr = 0;
182
                if (nr < 5) {
183
                        nr++;
184
                        printk("idle task may not sleep\n");
185
                }
186
                return;
187
        }
188
        nr_running--;
189
        next->prev_run = prev;
190
        prev->next_run = next;
191
        p->next_run = NULL;
192
        p->prev_run = NULL;
193
}
194
 
195
static inline void move_last_runqueue(struct task_struct * p)
196
{
197
        struct task_struct *next = p->next_run;
198
        struct task_struct *prev = p->prev_run;
199
 
200
        /* remove from list */
201
        next->prev_run = prev;
202
        prev->next_run = next;
203
        /* add back to list */
204
        p->next_run = &init_task;
205
        prev = init_task.prev_run;
206
        init_task.prev_run = p;
207
        p->prev_run = prev;
208
        prev->next_run = p;
209
}
210
 
211
/*
212
 * Wake up a process. Put it on the run-queue if it's not
213
 * already there.  The "current" process is always on the
214
 * run-queue (except when the actual re-schedule is in
215
 * progress), and as such you're allowed to do the simpler
216
 * "current->state = TASK_RUNNING" to mark yourself runnable
217
 * without the overhead of this.
218
 */
219
inline void wake_up_process(struct task_struct * p)
220
{
221
        unsigned long flags;
222
 
223
        save_flags(flags);
224
        cli();
225
        p->state = TASK_RUNNING;
226
        if (!p->next_run)
227
                add_to_runqueue(p);
228
        restore_flags(flags);
229
}
230
 
231
static void process_timeout(unsigned long __data)
232
{
233
        struct task_struct * p = (struct task_struct *) __data;
234
 
235
        p->timeout = 0;
236
        wake_up_process(p);
237
}
238
 
239
/*
240
 * This is the function that decides how desirable a process is..
241
 * You can weigh different processes against each other depending
242
 * on what CPU they've run on lately etc to try to handle cache
243
 * and TLB miss penalties.
244
 *
245
 * Return values:
246
 *       -1000: never select this
247
 *           0: out of time, recalculate counters (but it might still be
248
 *              selected)
249
 *         +ve: "goodness" value (the larger, the better)
250
 *       +1000: realtime process, select this.
251
 */
252
static inline int goodness(struct task_struct * p, struct task_struct * prev, int this_cpu)
253
{
254
        int weight;
255
 
256
#ifdef __SMP__  
257
        /* We are not permitted to run a task someone else is running */
258
        if (p->processor != NO_PROC_ID)
259
                return -1000;
260
#ifdef PAST_2_0         
261
        /* This process is locked to a processor group */
262
        if (p->processor_mask && !(p->processor_mask & (1<<this_cpu))
263
                return -1000;
264
#endif          
265
#endif
266
 
267
        /*
268
         * Realtime process, select the first one on the
269
         * runqueue (taking priorities within processes
270
         * into account).
271
         */
272
        if (p->policy != SCHED_OTHER)
273
                return 1000 + p->rt_priority;
274
 
275
        /*
276
         * Give the process a first-approximation goodness value
277
         * according to the number of clock-ticks it has left.
278
         *
279
         * Don't do any other calculations if the time slice is
280
         * over..
281
         */
282
        weight = p->counter;
283
        if (weight) {
284
 
285
#ifdef __SMP__
286
                /* Give a largish advantage to the same processor...   */
287
                /* (this is equivalent to penalizing other processors) */
288
                if (p->last_processor == this_cpu)
289
                        weight += PROC_CHANGE_PENALTY;
290
#endif
291
 
292
                /* .. and a slight advantage to the current process */
293
                if (p == prev)
294
                        weight += 1;
295
        }
296
 
297
        return weight;
298
}
299
 
300
 
301
/*
302
  The following allow_interrupts function is used to workaround a rare but
303
  nasty deadlock situation that is possible for 2.0.x Intel SMP because it uses
304
  a single kernel lock and interrupts are only routed to the boot CPU.  There
305
  are two deadlock scenarios this code protects against.
306
 
307
  The first scenario is that if a CPU other than the boot CPU holds the kernel
308
  lock and needs to wait for an operation to complete that itself requires an
309
  interrupt, there is a deadlock since the boot CPU may be able to accept the
310
  interrupt but will not be able to acquire the kernel lock to process it.
311
 
312
  The workaround for this deadlock requires adding calls to allow_interrupts to
313
  places where this deadlock is possible.  These places are known to be present
314
  in buffer.c and keyboard.c.  It is also possible that there are other such
315
  places which have not been identified yet.  In order to break the deadlock,
316
  the code in allow_interrupts temporarily yields the kernel lock directly to
317
  the boot CPU to allow the interrupt to be processed.  The boot CPU interrupt
318
  entry code indicates that it is spinning waiting for the kernel lock by
319
  setting the smp_blocked_interrupt_pending variable.  This code notices that
320
  and manipulates the active_kernel_processor variable to yield the kernel lock
321
  without ever clearing it.  When the interrupt has been processed, the
322
  saved_active_kernel_processor variable contains the value for the interrupt
323
  exit code to restore, either the APICID of the CPU that granted it the kernel
324
  lock, or NO_PROC_ID in the normal case where no yielding occurred.  Restoring
325
  active_kernel_processor from saved_active_kernel_processor returns the kernel
326
  lock back to the CPU that yielded it.
327
 
328
  The second form of deadlock is even more insidious.  Suppose the boot CPU
329
  takes a page fault and then the previous scenario ensues.  In this case, the
330
  boot CPU would spin with interrupts disabled waiting to acquire the kernel
331
  lock.  To resolve this deadlock, the kernel lock acquisition code must enable
332
  interrupts briefly so that the pending interrupt can be handled as in the
333
  case above.
334
 
335
  An additional form of deadlock is where kernel code running on a non-boot CPU
336
  waits for the jiffies variable to be incremented.  This deadlock is avoided
337
  by having the spin loops in ENTER_KERNEL increment jiffies approximately
338
  every 10 milliseconds.  Finally, if approximately 60 seconds elapse waiting
339
  for the kernel lock, a message will be printed if possible to indicate that a
340
  deadlock has been detected.
341
 
342
                Leonard N. Zubkoff
343
                   4 August 1997
344
*/
345
 
346
#if defined(__SMP__) && defined(__i386__)
347
 
348
volatile unsigned char smp_blocked_interrupt_pending = 0;
349
 
350
volatile unsigned char saved_active_kernel_processor = NO_PROC_ID;
351
 
352
void allow_interrupts(void)
353
{
354
  if (smp_processor_id() == boot_cpu_id) return;
355
  if (smp_blocked_interrupt_pending)
356
    {
357
      unsigned long saved_kernel_counter;
358
      long timeout_counter;
359
      saved_active_kernel_processor = active_kernel_processor;
360
      saved_kernel_counter = kernel_counter;
361
      kernel_counter = 0;
362
      active_kernel_processor = boot_cpu_id;
363
      timeout_counter = 6000000;
364
      while (active_kernel_processor != saved_active_kernel_processor &&
365
             --timeout_counter >= 0)
366
        {
367
          udelay(10);
368
          barrier();
369
        }
370
      if (timeout_counter < 0)
371
        panic("FORWARDED INTERRUPT TIMEOUT (AKP = %d, Saved AKP = %d)\n",
372
              active_kernel_processor, saved_active_kernel_processor);
373
      kernel_counter = saved_kernel_counter;
374
      saved_active_kernel_processor = NO_PROC_ID;
375
    }
376
}
377
 
378
#else
379
 
380
void allow_interrupts(void) {}
381
 
382
#endif
383
 
384
 
385
/*
386
 *  'schedule()' is the scheduler function. It's a very simple and nice
387
 * scheduler: it's not perfect, but certainly works for most things.
388
 *
389
 * The goto is "interesting".
390
 *
391
 *   NOTE!!  Task 0 is the 'idle' task, which gets called when no other
392
 * tasks can run. It can not be killed, and it cannot sleep. The 'state'
393
 * information in task[0] is never used.
394
 */
395
asmlinkage void schedule(void)
396
{
397
        int c;
398
        struct task_struct * p;
399
        struct task_struct * prev, * next;
400
        unsigned long timeout = 0;
401
        int this_cpu=smp_processor_id();
402
/* check alarm, wake up any interruptible tasks that have got a signal */
403
 
404
        allow_interrupts();
405
 
406
        if (intr_count)
407
                goto scheduling_in_interrupt;
408
 
409
        if (bh_active & bh_mask) {
410
                intr_count = 1;
411
                do_bottom_half();
412
                intr_count = 0;
413
        }
414
 
415
        run_task_queue(&tq_scheduler);
416
 
417
        need_resched = 0;
418
        prev = current;
419
        cli();
420
        /* move an exhausted RR process to be last.. */
421
        if (!prev->counter && prev->policy == SCHED_RR) {
422
                prev->counter = prev->priority;
423
                move_last_runqueue(prev);
424
        }
425
        switch (prev->state) {
426
                case TASK_INTERRUPTIBLE:
427
                        if (prev->signal & ~prev->blocked)
428
                                goto makerunnable;
429
                        timeout = prev->timeout;
430
                        if (timeout && (timeout <= jiffies)) {
431
                                prev->timeout = 0;
432
                                timeout = 0;
433
                makerunnable:
434
                                prev->state = TASK_RUNNING;
435
                                break;
436
                        }
437
                default:
438
                        del_from_runqueue(prev);
439
                case TASK_RUNNING:
440
        }
441
        p = init_task.next_run;
442
        sti();
443
 
444
#ifdef __SMP__
445
        /*
446
         *      This is safe as we do not permit re-entry of schedule()
447
         */
448
        prev->processor = NO_PROC_ID;
449
#define idle_task (task[cpu_number_map[this_cpu]])
450
#else
451
#define idle_task (&init_task)
452
#endif  
453
 
454
/*
455
 * Note! there may appear new tasks on the run-queue during this, as
456
 * interrupts are enabled. However, they will be put on front of the
457
 * list, so our list starting at "p" is essentially fixed.
458
 */
459
/* this is the scheduler proper: */
460
        c = -1000;
461
        next = idle_task;
462
        while (p != &init_task) {
463
                int weight = goodness(p, prev, this_cpu);
464
                if (weight > c)
465
                        c = weight, next = p;
466
                p = p->next_run;
467
        }
468
 
469
        /* if all runnable processes have "counter == 0", re-calculate counters */
470
        if (!c) {
471
                for_each_task(p)
472
                        p->counter = (p->counter >> 1) + p->priority;
473
        }
474
#ifdef __SMP__
475
        /*
476
         *      Allocate process to CPU
477
         */
478
 
479
         next->processor = this_cpu;
480
         next->last_processor = this_cpu;
481
#endif   
482
#ifdef __SMP_PROF__ 
483
        /* mark processor running an idle thread */
484
        if (0==next->pid)
485
                set_bit(this_cpu,&smp_idle_map);
486
        else
487
                clear_bit(this_cpu,&smp_idle_map);
488
#endif
489
        if (prev != next) {
490
                struct timer_list timer;
491
 
492
                kstat.context_swtch++;
493
                if (timeout) {
494
                        init_timer(&timer);
495
                        timer.expires = timeout;
496
                        timer.data = (unsigned long) prev;
497
                        timer.function = process_timeout;
498
                        add_timer(&timer);
499
                }
500
                get_mmu_context(next);
501
                switch_to(prev,next);
502
                if (timeout)
503
                        del_timer(&timer);
504
        }
505
        return;
506
 
507
scheduling_in_interrupt:
508
        printk("Aiee: scheduling in interrupt %p\n",
509
                __builtin_return_address(0));
510
}
511
 
512
#ifndef __alpha__
513
 
514
/*
515
 * For backwards compatibility?  This can be done in libc so Alpha
516
 * and all newer ports shouldn't need it.
517
 */
518
asmlinkage int sys_pause(void)
519
{
520
        current->state = TASK_INTERRUPTIBLE;
521
        schedule();
522
        return -ERESTARTNOHAND;
523
}
524
 
525
#endif
526
 
527
/*
528
 * wake_up doesn't wake up stopped processes - they have to be awakened
529
 * with signals or similar.
530
 *
531
 * Note that this doesn't need cli-sti pairs: interrupts may not change
532
 * the wait-queue structures directly, but only call wake_up() to wake
533
 * a process. The process itself must remove the queue once it has woken.
534
 */
535
void wake_up(struct wait_queue **q)
536
{
537
        struct wait_queue *next;
538
        struct wait_queue *head;
539
 
540
        if (!q || !(next = *q))
541
                return;
542
        head = WAIT_QUEUE_HEAD(q);
543
        while (next != head) {
544
                struct task_struct *p = next->task;
545
                next = next->next;
546
                if (p != NULL) {
547
                        if ((p->state == TASK_UNINTERRUPTIBLE) ||
548
                            (p->state == TASK_INTERRUPTIBLE))
549
                                wake_up_process(p);
550
                }
551
                if (!next)
552
                        goto bad;
553
        }
554
        return;
555
bad:
556
        printk("wait_queue is bad (eip = %p)\n",
557
                __builtin_return_address(0));
558
        printk("        q = %p\n",q);
559
        printk("       *q = %p\n",*q);
560
}
561
 
562
void wake_up_interruptible(struct wait_queue **q)
563
{
564
        struct wait_queue *next;
565
        struct wait_queue *head;
566
 
567
        if (!q || !(next = *q))
568
                return;
569
        head = WAIT_QUEUE_HEAD(q);
570
        while (next != head) {
571
                struct task_struct *p = next->task;
572
                next = next->next;
573
                if (p != NULL) {
574
                        if (p->state == TASK_INTERRUPTIBLE)
575
                                wake_up_process(p);
576
                }
577
                if (!next)
578
                        goto bad;
579
        }
580
        return;
581
bad:
582
        printk("wait_queue is bad (eip = %p)\n",
583
                __builtin_return_address(0));
584
        printk("        q = %p\n",q);
585
        printk("       *q = %p\n",*q);
586
}
587
 
588
 
589
/*
590
 * Semaphores are implemented using a two-way counter:
591
 * The "count" variable is decremented for each process
592
 * that tries to sleep, while the "waking" variable is
593
 * incremented when the "up()" code goes to wake up waiting
594
 * processes.
595
 *
596
 * Notably, the inline "up()" and "down()" functions can
597
 * efficiently test if they need to do any extra work (up
598
 * needs to do something only if count was negative before
599
 * the increment operation.
600
 *
601
 * This routine must execute atomically.
602
 */
603
static inline int waking_non_zero(struct semaphore *sem)
604
{
605
        int     ret ;
606
        long    flags ;
607
 
608
        get_buzz_lock(&sem->lock) ;
609
        save_flags(flags) ;
610
        cli() ;
611
 
612
        if ((ret = (sem->waking > 0)))
613
                sem->waking-- ;
614
 
615
        restore_flags(flags) ;
616
        give_buzz_lock(&sem->lock) ;
617
        return(ret) ;
618
}
619
 
620
/*
621
 * When __up() is called, the count was negative before
622
 * incrementing it, and we need to wake up somebody.
623
 *
624
 * This routine adds one to the count of processes that need to
625
 * wake up and exit.  ALL waiting processes actually wake up but
626
 * only the one that gets to the "waking" field first will gate
627
 * through and acquire the semaphore.  The others will go back
628
 * to sleep.
629
 *
630
 * Note that these functions are only called when there is
631
 * contention on the lock, and as such all this is the
632
 * "non-critical" part of the whole semaphore business. The
633
 * critical part is the inline stuff in <asm/semaphore.h>
634
 * where we want to avoid any extra jumps and calls.
635
 */
636
void __up(struct semaphore *sem)
637
{
638
        atomic_inc(&sem->waking) ;
639
        wake_up(&sem->wait);
640
}
641
 
642
/*
643
 * Perform the "down" function.  Return zero for semaphore acquired,
644
 * return negative for signalled out of the function.
645
 *
646
 * If called from __down, the return is ignored and the wait loop is
647
 * not interruptible.  This means that a task waiting on a semaphore
648
 * using "down()" cannot be killed until someone does an "up()" on
649
 * the semaphore.
650
 *
651
 * If called from __down_interruptible, the return value gets checked
652
 * upon return.  If the return value is negative then the task continues
653
 * with the negative value in the return register (it can be tested by
654
 * the caller).
655
 *
656
 * Either form may be used in conjunction with "up()".
657
 *
658
 */
659
int __do_down(struct semaphore * sem, int task_state)
660
{
661
        struct task_struct *tsk = current;
662
        struct wait_queue wait = { tsk, NULL };
663
        int               ret = 0 ;
664
 
665
        tsk->state = task_state;
666
        add_wait_queue(&sem->wait, &wait);
667
 
668
        /*
669
         * Ok, we're set up.  sem->count is known to be less than zero
670
         * so we must wait.
671
         *
672
         * We can let go the lock for purposes of waiting.
673
         * We re-acquire it after awaking so as to protect
674
         * all semaphore operations.
675
         *
676
         * If "up()" is called before we call waking_non_zero() then
677
         * we will catch it right away.  If it is called later then
678
         * we will have to go through a wakeup cycle to catch it.
679
         *
680
         * Multiple waiters contend for the semaphore lock to see
681
         * who gets to gate through and who has to wait some more.
682
         */
683
        for (;;)
684
        {
685
                if (waking_non_zero(sem))       /* are we waking up?  */
686
                    break ;                     /* yes, exit loop */
687
 
688
                if (   task_state == TASK_INTERRUPTIBLE
689
                    && (tsk->signal & ~tsk->blocked)    /* signalled */
690
                   )
691
                {
692
                    ret = -EINTR ;              /* interrupted */
693
                    atomic_inc(&sem->count) ;   /* give up on down operation */
694
                    break ;
695
                }
696
 
697
                schedule();
698
                tsk->state = task_state;
699
        }
700
 
701
        tsk->state = TASK_RUNNING;
702
        remove_wait_queue(&sem->wait, &wait);
703
        return(ret) ;
704
 
705
} /* __do_down */
706
 
707
void __down(struct semaphore * sem)
708
{
709
        __do_down(sem,TASK_UNINTERRUPTIBLE) ;
710
}
711
 
712
int __down_interruptible(struct semaphore * sem)
713
{
714
        return(__do_down(sem,TASK_INTERRUPTIBLE)) ;
715
}
716
 
717
 
718
static inline void __sleep_on(struct wait_queue **p, int state)
719
{
720
        unsigned long flags;
721
        struct wait_queue wait = { current, NULL };
722
 
723
        if (!p)
724
                return;
725
        if (current == task[0])
726
                panic("task[0] trying to sleep");
727
        current->state = state;
728
        save_flags(flags);
729
        cli();
730
        __add_wait_queue(p, &wait);
731
        sti();
732
        schedule();
733
        cli();
734
        __remove_wait_queue(p, &wait);
735
        restore_flags(flags);
736
}
737
 
738
void interruptible_sleep_on(struct wait_queue **p)
739
{
740
        __sleep_on(p,TASK_INTERRUPTIBLE);
741
}
742
 
743
void sleep_on(struct wait_queue **p)
744
{
745
        __sleep_on(p,TASK_UNINTERRUPTIBLE);
746
}
747
 
748
#define TVN_BITS 6
749
#define TVR_BITS 8
750
#define TVN_SIZE (1 << TVN_BITS)
751
#define TVR_SIZE (1 << TVR_BITS)
752
#define TVN_MASK (TVN_SIZE - 1)
753
#define TVR_MASK (TVR_SIZE - 1)
754
 
755
#define SLOW_BUT_DEBUGGING_TIMERS 0
756
 
757
struct timer_vec {
758
        int index;
759
        struct timer_list *vec[TVN_SIZE];
760
};
761
 
762
struct timer_vec_root {
763
        int index;
764
        struct timer_list *vec[TVR_SIZE];
765
};
766
 
767
static struct timer_vec tv5 = { 0 };
768
static struct timer_vec tv4 = { 0 };
769
static struct timer_vec tv3 = { 0 };
770
static struct timer_vec tv2 = { 0 };
771
static struct timer_vec_root tv1 = { 0 };
772
 
773
static struct timer_vec * const tvecs[] = {
774
        (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
775
};
776
 
777
#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
778
 
779
static unsigned long timer_jiffies = 0;
780
 
781
static inline void insert_timer(struct timer_list *timer,
782
                                struct timer_list **vec, int idx)
783
{
784
        if ((timer->next = vec[idx]))
785
                vec[idx]->prev = timer;
786
        vec[idx] = timer;
787
        timer->prev = (struct timer_list *)&vec[idx];
788
}
789
 
790
static inline void internal_add_timer(struct timer_list *timer)
791
{
792
        /*
793
         * must be cli-ed when calling this
794
         */
795
        unsigned long expires = timer->expires;
796
        unsigned long idx = expires - timer_jiffies;
797
 
798
        if (idx < TVR_SIZE) {
799
                int i = expires & TVR_MASK;
800
                insert_timer(timer, tv1.vec, i);
801
        } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
802
                int i = (expires >> TVR_BITS) & TVN_MASK;
803
                insert_timer(timer, tv2.vec, i);
804
        } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
805
                int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
806
                insert_timer(timer, tv3.vec, i);
807
        } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
808
                int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
809
                insert_timer(timer, tv4.vec, i);
810
        } else if (expires < timer_jiffies) {
811
                /* can happen if you add a timer with expires == jiffies,
812
                 * or you set a timer to go off in the past
813
                 */
814
                insert_timer(timer, tv1.vec, tv1.index);
815
        } else if (idx < 0xffffffffUL) {
816
                int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
817
                insert_timer(timer, tv5.vec, i);
818
        } else {
819
                /* Can only get here on architectures with 64-bit jiffies */
820
                timer->next = timer->prev = timer;
821
        }
822
}
823
 
824
void add_timer(struct timer_list *timer)
825
{
826
        unsigned long flags;
827
        save_flags(flags);
828
        cli();
829
#if SLOW_BUT_DEBUGGING_TIMERS
830
        if (timer->next || timer->prev) {
831
                printk("add_timer() called with non-zero list from %p\n",
832
                       __builtin_return_address(0));
833
                goto out;
834
        }
835
#endif
836
        internal_add_timer(timer);
837
#if SLOW_BUT_DEBUGGING_TIMERS
838
out:
839
#endif
840
        restore_flags(flags);
841
}
842
 
843
static inline int detach_timer(struct timer_list *timer)
844
{
845
        int ret = 0;
846
        struct timer_list *next, *prev;
847
        next = timer->next;
848
        prev = timer->prev;
849
        if (next) {
850
                next->prev = prev;
851
        }
852
        if (prev) {
853
                ret = 1;
854
                prev->next = next;
855
        }
856
        return ret;
857
}
858
 
859
 
860
int del_timer(struct timer_list * timer)
861
{
862
        int ret;
863
        unsigned long flags;
864
        save_flags(flags);
865
        cli();
866
        ret = detach_timer(timer);
867
        timer->next = timer->prev = 0;
868
        restore_flags(flags);
869
        return ret;
870
}
871
 
872
static inline void cascade_timers(struct timer_vec *tv)
873
{
874
        /* cascade all the timers from tv up one level */
875
        struct timer_list *timer;
876
        timer = tv->vec[tv->index];
877
        /*
878
         * We are removing _all_ timers from the list, so we don't  have to
879
         * detach them individually, just clear the list afterwards.
880
         */
881
        while (timer) {
882
                struct timer_list *tmp = timer;
883
                timer = timer->next;
884
                internal_add_timer(tmp);
885
        }
886
        tv->vec[tv->index] = NULL;
887
        tv->index = (tv->index + 1) & TVN_MASK;
888
}
889
 
890
static inline void run_timer_list(void)
891
{
892
        cli();
893
        while ((long)(jiffies - timer_jiffies) >= 0) {
894
                struct timer_list *timer;
895
                if (!tv1.index) {
896
                        int n = 1;
897
                        do {
898
                                cascade_timers(tvecs[n]);
899
                        } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
900
                }
901
                while ((timer = tv1.vec[tv1.index])) {
902
                        void (*fn)(unsigned long) = timer->function;
903
                        unsigned long data = timer->data;
904
                        detach_timer(timer);
905
                        timer->next = timer->prev = NULL;
906
                        sti();
907
                        fn(data);
908
                        cli();
909
                }
910
                ++timer_jiffies;
911
                tv1.index = (tv1.index + 1) & TVR_MASK;
912
        }
913
        sti();
914
}
915
 
916
static inline void run_old_timers(void)
917
{
918
        struct timer_struct *tp;
919
        unsigned long mask;
920
 
921
        for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
922
                if (mask > timer_active)
923
                        break;
924
                if (!(mask & timer_active))
925
                        continue;
926
                if (tp->expires > jiffies)
927
                        continue;
928
                timer_active &= ~mask;
929
                tp->fn();
930
                sti();
931
        }
932
}
933
 
934
void tqueue_bh(void)
935
{
936
        run_task_queue(&tq_timer);
937
}
938
 
939
void immediate_bh(void)
940
{
941
        run_task_queue(&tq_immediate);
942
}
943
 
944
unsigned long timer_active = 0;
945
struct timer_struct timer_table[32];
946
 
947
/*
948
 * Hmm.. Changed this, as the GNU make sources (load.c) seems to
949
 * imply that avenrun[] is the standard name for this kind of thing.
950
 * Nothing else seems to be standardized: the fractional size etc
951
 * all seem to differ on different machines.
952
 */
953
unsigned long avenrun[3] = { 0,0,0 };
954
 
955
/*
956
 * Nr of active tasks - counted in fixed-point numbers
957
 */
958
static unsigned long count_active_tasks(void)
959
{
960
        struct task_struct **p;
961
        unsigned long nr = 0;
962
 
963
        for(p = &LAST_TASK; p > &FIRST_TASK; --p)
964
                if (*p && ((*p)->state == TASK_RUNNING ||
965
                           (*p)->state == TASK_UNINTERRUPTIBLE ||
966
                           (*p)->state == TASK_SWAPPING))
967
                        nr += FIXED_1;
968
#ifdef __SMP__
969
        nr-=(smp_num_cpus-1)*FIXED_1;
970
#endif                  
971
        return nr;
972
}
973
 
974
static inline void calc_load(unsigned long ticks)
975
{
976
        unsigned long active_tasks; /* fixed-point */
977
        static int count = LOAD_FREQ;
978
 
979
        count -= ticks;
980
        if (count < 0) {
981
                count += LOAD_FREQ;
982
                active_tasks = count_active_tasks();
983
                CALC_LOAD(avenrun[0], EXP_1, active_tasks);
984
                CALC_LOAD(avenrun[1], EXP_5, active_tasks);
985
                CALC_LOAD(avenrun[2], EXP_15, active_tasks);
986
        }
987
}
988
 
989
/*
990
 * this routine handles the overflow of the microsecond field
991
 *
992
 * The tricky bits of code to handle the accurate clock support
993
 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
994
 * They were originally developed for SUN and DEC kernels.
995
 * All the kudos should go to Dave for this stuff.
996
 *
997
 */
998
static void second_overflow(void)
999
{
1000
    long ltemp;
1001
 
1002
    /* Bump the maxerror field */
1003
    time_maxerror += time_tolerance >> SHIFT_USEC;
1004
    if ( time_maxerror > NTP_PHASE_LIMIT ) {
1005
        time_maxerror = NTP_PHASE_LIMIT;
1006
        time_state = TIME_ERROR;        /* p. 17, sect. 4.3, (b) */
1007
        time_status |= STA_UNSYNC;
1008
    }
1009
 
1010
    /*
1011
     * Leap second processing. If in leap-insert state at
1012
     * the end of the day, the system clock is set back one
1013
     * second; if in leap-delete state, the system clock is
1014
     * set ahead one second. The microtime() routine or
1015
     * external clock driver will insure that reported time
1016
     * is always monotonic. The ugly divides should be
1017
     * replaced.
1018
     */
1019
    switch (time_state) {
1020
 
1021
    case TIME_OK:
1022
        if (time_status & STA_INS)
1023
            time_state = TIME_INS;
1024
        else if (time_status & STA_DEL)
1025
            time_state = TIME_DEL;
1026
        break;
1027
 
1028
    case TIME_INS:
1029
        if (xtime.tv_sec % 86400 == 0) {
1030
            xtime.tv_sec--;
1031
            time_state = TIME_OOP;
1032
            printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
1033
        }
1034
        break;
1035
 
1036
    case TIME_DEL:
1037
        if ((xtime.tv_sec + 1) % 86400 == 0) {
1038
            xtime.tv_sec++;
1039
            time_state = TIME_WAIT;
1040
            printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
1041
        }
1042
        break;
1043
 
1044
    case TIME_OOP:
1045
        time_state = TIME_WAIT;
1046
        break;
1047
 
1048
    case TIME_WAIT:
1049
        if (!(time_status & (STA_INS | STA_DEL)))
1050
            time_state = TIME_OK;
1051
    }
1052
 
1053
    /*
1054
     * Compute the phase adjustment for the next second. In
1055
     * PLL mode, the offset is reduced by a fixed factor
1056
     * times the time constant. In FLL mode the offset is
1057
     * used directly. In either mode, the maximum phase
1058
     * adjustment for each second is clamped so as to spread
1059
     * the adjustment over not more than the number of
1060
     * seconds between updates.
1061
     */
1062
    if (time_offset < 0) {
1063
        ltemp = -time_offset;
1064
        if (!(time_status & STA_FLL))
1065
            ltemp >>= SHIFT_KG + time_constant;
1066
        if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
1067
            ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
1068
        time_offset += ltemp;
1069
        time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
1070
    } else {
1071
        ltemp = time_offset;
1072
        if (!(time_status & STA_FLL))
1073
            ltemp >>= SHIFT_KG + time_constant;
1074
        if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
1075
            ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
1076
        time_offset -= ltemp;
1077
        time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
1078
    }
1079
 
1080
    /*
1081
     * Compute the frequency estimate and additional phase
1082
     * adjustment due to frequency error for the next
1083
     * second. When the PPS signal is engaged, gnaw on the
1084
     * watchdog counter and update the frequency computed by
1085
     * the pll and the PPS signal.
1086
     */
1087
    pps_valid++;
1088
    if (pps_valid == PPS_VALID) {       /* PPS signal lost */
1089
        pps_jitter = MAXTIME;
1090
        pps_stabil = MAXFREQ;
1091
        time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
1092
                         STA_PPSWANDER | STA_PPSERROR);
1093
    }
1094
    ltemp = time_freq + pps_freq;
1095
    if (ltemp < 0)
1096
        time_adj -= -ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
1097
    else
1098
        time_adj +=  ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
1099
 
1100
#if HZ == 100
1101
    /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
1102
     * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
1103
     */
1104
    if (time_adj < 0)
1105
        time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
1106
    else
1107
        time_adj += (time_adj >> 2) + (time_adj >> 5);
1108
#endif
1109
}
1110
 
1111
/* in the NTP reference this is called "hardclock()" */
1112
static void update_wall_time_one_tick(void)
1113
{
1114
        if ( (time_adjust_step = time_adjust) != 0 ) {
1115
            /* We are doing an adjtime thing.
1116
             *
1117
             * Prepare time_adjust_step to be within bounds.
1118
             * Note that a positive time_adjust means we want the clock
1119
             * to run faster.
1120
             *
1121
             * Limit the amount of the step to be in the range
1122
             * -tickadj .. +tickadj
1123
             */
1124
             if (time_adjust > tickadj)
1125
                time_adjust_step = tickadj;
1126
             else if (time_adjust < -tickadj)
1127
                time_adjust_step = -tickadj;
1128
 
1129
            /* Reduce by this step the amount of time left  */
1130
            time_adjust -= time_adjust_step;
1131
        }
1132
        xtime.tv_usec += tick + time_adjust_step;
1133
        /*
1134
         * Advance the phase, once it gets to one microsecond, then
1135
         * advance the tick more.
1136
         */
1137
        time_phase += time_adj;
1138
        if (time_phase <= -FINEUSEC) {
1139
                long ltemp = -time_phase >> SHIFT_SCALE;
1140
                time_phase += ltemp << SHIFT_SCALE;
1141
                xtime.tv_usec -= ltemp;
1142
        }
1143
        else if (time_phase >= FINEUSEC) {
1144
                long ltemp = time_phase >> SHIFT_SCALE;
1145
                time_phase -= ltemp << SHIFT_SCALE;
1146
                xtime.tv_usec += ltemp;
1147
        }
1148
}
1149
 
1150
/*
1151
 * Using a loop looks inefficient, but "ticks" is
1152
 * usually just one (we shouldn't be losing ticks,
1153
 * we're doing this this way mainly for interrupt
1154
 * latency reasons, not because we think we'll
1155
 * have lots of lost timer ticks
1156
 */
1157
static void update_wall_time(unsigned long ticks)
1158
{
1159
        do {
1160
                ticks--;
1161
                update_wall_time_one_tick();
1162
        } while (ticks);
1163
 
1164
        if (xtime.tv_usec >= 1000000) {
1165
            xtime.tv_usec -= 1000000;
1166
            xtime.tv_sec++;
1167
            second_overflow();
1168
        }
1169
}
1170
 
1171
static inline void do_process_times(struct task_struct *p,
1172
        unsigned long user, unsigned long system)
1173
{
1174
        long psecs;
1175
 
1176
        p->utime += user;
1177
        p->stime += system;
1178
 
1179
        psecs = (p->stime + p->utime) / HZ;
1180
        if (psecs > p->rlim[RLIMIT_CPU].rlim_cur) {
1181
                /* Send SIGXCPU every second.. */
1182
                if (psecs * HZ == p->stime + p->utime)
1183
                        send_sig(SIGXCPU, p, 1);
1184
                /* and SIGKILL when we go over max.. */
1185
                if (psecs > p->rlim[RLIMIT_CPU].rlim_max)
1186
                        send_sig(SIGKILL, p, 1);
1187
        }
1188
}
1189
 
1190
static inline void do_it_virt(struct task_struct * p, unsigned long ticks)
1191
{
1192
        unsigned long it_virt = p->it_virt_value;
1193
 
1194
        if (it_virt) {
1195
                if (it_virt <= ticks) {
1196
                        it_virt = ticks + p->it_virt_incr;
1197
                        send_sig(SIGVTALRM, p, 1);
1198
                }
1199
                p->it_virt_value = it_virt - ticks;
1200
        }
1201
}
1202
 
1203
static inline void do_it_prof(struct task_struct * p, unsigned long ticks)
1204
{
1205
        unsigned long it_prof = p->it_prof_value;
1206
 
1207
        if (it_prof) {
1208
                if (it_prof <= ticks) {
1209
                        it_prof = ticks + p->it_prof_incr;
1210
                        send_sig(SIGPROF, p, 1);
1211
                }
1212
                p->it_prof_value = it_prof - ticks;
1213
        }
1214
}
1215
 
1216
static __inline__ void update_one_process(struct task_struct *p,
1217
        unsigned long ticks, unsigned long user, unsigned long system)
1218
{
1219
        do_process_times(p, user, system);
1220
        do_it_virt(p, user);
1221
        do_it_prof(p, ticks);
1222
}
1223
 
1224
static void update_process_times(unsigned long ticks, unsigned long system)
1225
{
1226
#ifndef  __SMP__
1227
        struct task_struct * p = current;
1228
        unsigned long user = ticks - system;
1229
        if (p->pid) {
1230
                p->counter -= ticks;
1231
                if (p->counter < 0) {
1232
                        p->counter = 0;
1233
                        need_resched = 1;
1234
                }
1235
                if (p->priority < DEF_PRIORITY)
1236
                        kstat.cpu_nice += user;
1237
                else
1238
                        kstat.cpu_user += user;
1239
                kstat.cpu_system += system;
1240
        }
1241
        update_one_process(p, ticks, user, system);
1242
#else
1243
        int cpu,j;
1244
        cpu = smp_processor_id();
1245
        for (j=0;j<smp_num_cpus;j++)
1246
        {
1247
                int i = cpu_logical_map[j];
1248
                struct task_struct *p;
1249
 
1250
#ifdef __SMP_PROF__
1251
                if (test_bit(i,&smp_idle_map))
1252
                        smp_idle_count[i]++;
1253
#endif
1254
                p = current_set[i];
1255
                /*
1256
                 * Do we have a real process?
1257
                 */
1258
                if (p->pid) {
1259
                        /* assume user-mode process */
1260
                        unsigned long utime = ticks;
1261
                        unsigned long stime = 0;
1262
                        if (cpu == i) {
1263
                                utime = ticks-system;
1264
                                stime = system;
1265
                        } else if (smp_proc_in_lock[j]) {
1266
                                utime = 0;
1267
                                stime = ticks;
1268
                        }
1269
                        update_one_process(p, ticks, utime, stime);
1270
 
1271
                        if (p->priority < DEF_PRIORITY)
1272
                                kstat.cpu_nice += utime;
1273
                        else
1274
                                kstat.cpu_user += utime;
1275
                        kstat.cpu_system += stime;
1276
 
1277
                        p->counter -= ticks;
1278
                        if (p->counter >= 0)
1279
                                continue;
1280
                        p->counter = 0;
1281
                } else {
1282
                        /*
1283
                         * Idle processor found, do we have anything
1284
                         * we could run?
1285
                         */
1286
                        if (!(0x7fffffff & smp_process_available))
1287
                                continue;
1288
                }
1289
                /* Ok, we should reschedule, do the magic */
1290
                if (i==cpu)
1291
                        need_resched = 1;
1292
                else
1293
                        smp_message_pass(i, MSG_RESCHEDULE, 0L, 0);
1294
        }
1295
#endif
1296
}
1297
 
1298
static unsigned long lost_ticks = 0;
1299
static unsigned long lost_ticks_system = 0;
1300
 
1301
static inline void update_times(void)
1302
{
1303
        unsigned long ticks;
1304
 
1305
        ticks = xchg(&lost_ticks, 0);
1306
 
1307
        if (ticks) {
1308
                unsigned long system;
1309
 
1310
                system = xchg(&lost_ticks_system, 0);
1311
                calc_load(ticks);
1312
                update_wall_time(ticks);
1313
                update_process_times(ticks, system);
1314
        }
1315
}
1316
 
1317
void timer_bh(void)
1318
{
1319
        update_times();
1320
        run_old_timers();
1321
        run_timer_list();
1322
}
1323
 
1324
void do_timer(struct pt_regs * regs)
1325
{
1326
        (*(unsigned long *)&jiffies)++;
1327
        lost_ticks++;
1328
        mark_bh(TIMER_BH);
1329
        if (!user_mode(regs)) {
1330
                lost_ticks_system++;
1331
                if (prof_buffer && current->pid) {
1332
                        extern int _stext;
1333
                        unsigned long ip = instruction_pointer(regs);
1334
                        ip -= (unsigned long) &_stext;
1335
                        ip >>= prof_shift;
1336
                        if (ip < prof_len)
1337
                                prof_buffer[ip]++;
1338
                }
1339
        }
1340
        if (tq_timer)
1341
                mark_bh(TQUEUE_BH);
1342
}
1343
 
1344
#ifndef __alpha__
1345
 
1346
/*
1347
 * For backwards compatibility?  This can be done in libc so Alpha
1348
 * and all newer ports shouldn't need it.
1349
 */
1350
asmlinkage unsigned int sys_alarm(unsigned int seconds)
1351
{
1352
        struct itimerval it_new, it_old;
1353
        unsigned int oldalarm;
1354
 
1355
        it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
1356
        it_new.it_value.tv_sec = seconds;
1357
        it_new.it_value.tv_usec = 0;
1358
        _setitimer(ITIMER_REAL, &it_new, &it_old);
1359
        oldalarm = it_old.it_value.tv_sec;
1360
        /* ehhh.. We can't return 0 if we have an alarm pending.. */
1361
        /* And we'd better return too much than too little anyway */
1362
        if (it_old.it_value.tv_usec)
1363
                oldalarm++;
1364
        return oldalarm;
1365
}
1366
 
1367
/*
1368
 * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
1369
 * should be moved into arch/i386 instead?
1370
 */
1371
asmlinkage int sys_getpid(void)
1372
{
1373
        return current->pid;
1374
}
1375
 
1376
asmlinkage int sys_getppid(void)
1377
{
1378
        return current->p_opptr->pid;
1379
}
1380
 
1381
asmlinkage int sys_getuid(void)
1382
{
1383
        return current->uid;
1384
}
1385
 
1386
asmlinkage int sys_geteuid(void)
1387
{
1388
        return current->euid;
1389
}
1390
 
1391
asmlinkage int sys_getgid(void)
1392
{
1393
        return current->gid;
1394
}
1395
 
1396
asmlinkage int sys_getegid(void)
1397
{
1398
        return current->egid;
1399
}
1400
 
1401
/*
1402
 * This has been replaced by sys_setpriority.  Maybe it should be
1403
 * moved into the arch dependent tree for those ports that require
1404
 * it for backward compatibility?
1405
 */
1406
asmlinkage int sys_nice(int increment)
1407
{
1408
        unsigned long newprio;
1409
        int increase = 0;
1410
 
1411
        newprio = increment;
1412
        if (increment < 0) {
1413
                if (!suser())
1414
                        return -EPERM;
1415
                newprio = -increment;
1416
                increase = 1;
1417
        }
1418
        if (newprio > 40)
1419
                newprio = 40;
1420
        /*
1421
         * do a "normalization" of the priority (traditionally
1422
         * unix nice values are -20..20, linux doesn't really
1423
         * use that kind of thing, but uses the length of the
1424
         * timeslice instead (default 150 msec). The rounding is
1425
         * why we want to avoid negative values.
1426
         */
1427
        newprio = (newprio * DEF_PRIORITY + 10) / 20;
1428
        increment = newprio;
1429
        if (increase)
1430
                increment = -increment;
1431
        newprio = current->priority - increment;
1432
        if ((signed) newprio < 1)
1433
                newprio = 1;
1434
        if (newprio > DEF_PRIORITY*2)
1435
                newprio = DEF_PRIORITY*2;
1436
        current->priority = newprio;
1437
        return 0;
1438
}
1439
 
1440
#endif
1441
 
1442
static struct task_struct *find_process_by_pid(pid_t pid) {
1443
        struct task_struct *p, *q;
1444
 
1445
        if (pid == 0)
1446
                p = current;
1447
        else {
1448
                p = 0;
1449
                for_each_task(q) {
1450
                        if (q && q->pid == pid) {
1451
                                p = q;
1452
                                break;
1453
                        }
1454
                }
1455
        }
1456
        return p;
1457
}
1458
 
1459
static int setscheduler(pid_t pid, int policy,
1460
                        struct sched_param *param)
1461
{
1462
        int error;
1463
        struct sched_param lp;
1464
        struct task_struct *p;
1465
 
1466
        if (!param || pid < 0)
1467
                return -EINVAL;
1468
 
1469
        error = verify_area(VERIFY_READ, param, sizeof(struct sched_param));
1470
        if (error)
1471
                return error;
1472
        memcpy_fromfs(&lp, param, sizeof(struct sched_param));
1473
 
1474
        p = find_process_by_pid(pid);
1475
        if (!p)
1476
                return -ESRCH;
1477
 
1478
        if (policy < 0)
1479
                policy = p->policy;
1480
        else if (policy != SCHED_FIFO && policy != SCHED_RR &&
1481
                 policy != SCHED_OTHER)
1482
                return -EINVAL;
1483
 
1484
        /*
1485
         * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
1486
         * priority for SCHED_OTHER is 0.
1487
         */
1488
        if (lp.sched_priority < 0 || lp.sched_priority > 99)
1489
                return -EINVAL;
1490
        if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
1491
                return -EINVAL;
1492
 
1493
        if ((policy == SCHED_FIFO || policy == SCHED_RR) && !suser())
1494
                return -EPERM;
1495
        if ((current->euid != p->euid) && (current->euid != p->uid) &&
1496
            !suser())
1497
                return -EPERM;
1498
 
1499
        p->policy = policy;
1500
        p->rt_priority = lp.sched_priority;
1501
        cli();
1502
        if (p->next_run)
1503
                move_last_runqueue(p);
1504
        sti();
1505
        need_resched = 1;
1506
        return 0;
1507
}
1508
 
1509
asmlinkage int sys_sched_setscheduler(pid_t pid, int policy,
1510
                                      struct sched_param *param)
1511
{
1512
        return setscheduler(pid, policy, param);
1513
}
1514
 
1515
asmlinkage int sys_sched_setparam(pid_t pid, struct sched_param *param)
1516
{
1517
        return setscheduler(pid, -1, param);
1518
}
1519
 
1520
asmlinkage int sys_sched_getscheduler(pid_t pid)
1521
{
1522
        struct task_struct *p;
1523
 
1524
        if (pid < 0)
1525
                return -EINVAL;
1526
 
1527
        p = find_process_by_pid(pid);
1528
        if (!p)
1529
                return -ESRCH;
1530
 
1531
        return p->policy;
1532
}
1533
 
1534
asmlinkage int sys_sched_getparam(pid_t pid, struct sched_param *param)
1535
{
1536
        int error;
1537
        struct task_struct *p;
1538
        struct sched_param lp;
1539
 
1540
        if (!param || pid < 0)
1541
                return -EINVAL;
1542
 
1543
        error = verify_area(VERIFY_WRITE, param, sizeof(struct sched_param));
1544
        if (error)
1545
                return error;
1546
 
1547
        p = find_process_by_pid(pid);
1548
        if (!p)
1549
                return -ESRCH;
1550
 
1551
        lp.sched_priority = p->rt_priority;
1552
        memcpy_tofs(param, &lp, sizeof(struct sched_param));
1553
 
1554
        return 0;
1555
}
1556
 
1557
asmlinkage int sys_sched_yield(void)
1558
{
1559
        cli();
1560
        move_last_runqueue(current);
1561
        current->counter = 0;
1562
        need_resched = 1;
1563
        sti();
1564
        return 0;
1565
}
1566
 
1567
asmlinkage int sys_sched_get_priority_max(int policy)
1568
{
1569
        switch (policy) {
1570
              case SCHED_FIFO:
1571
              case SCHED_RR:
1572
                return 99;
1573
              case SCHED_OTHER:
1574
                return 0;
1575
        }
1576
 
1577
        return -EINVAL;
1578
}
1579
 
1580
asmlinkage int sys_sched_get_priority_min(int policy)
1581
{
1582
        switch (policy) {
1583
              case SCHED_FIFO:
1584
              case SCHED_RR:
1585
                return 1;
1586
              case SCHED_OTHER:
1587
                return 0;
1588
        }
1589
 
1590
        return -EINVAL;
1591
}
1592
 
1593
asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
1594
{
1595
        int error;
1596
        struct timespec t;
1597
 
1598
        error = verify_area(VERIFY_WRITE, interval, sizeof(struct timespec));
1599
        if (error)
1600
                return error;
1601
 
1602
        /* Values taken from 2.1.38 */
1603
        t.tv_sec = 0;
1604
        t.tv_nsec = 150000;   /* is this right for non-intel architecture too?*/
1605
        memcpy_tofs(interval, &t, sizeof(struct timespec));
1606
 
1607
        return 0;
1608
}
1609
 
1610
/*
1611
 * change timeval to jiffies, trying to avoid the
1612
 * most obvious overflows..
1613
 */
1614
static unsigned long timespectojiffies(struct timespec *value)
1615
{
1616
        unsigned long sec = (unsigned) value->tv_sec;
1617
        long nsec = value->tv_nsec;
1618
 
1619
        if (sec > (LONG_MAX / HZ))
1620
                return LONG_MAX;
1621
        nsec += 1000000000L / HZ - 1;
1622
        nsec /= 1000000000L / HZ;
1623
        return HZ * sec + nsec;
1624
}
1625
 
1626
static void jiffiestotimespec(unsigned long jiffies, struct timespec *value)
1627
{
1628
        value->tv_nsec = (jiffies % HZ) * (1000000000L / HZ);
1629
        value->tv_sec = jiffies / HZ;
1630
        return;
1631
}
1632
 
1633
asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
1634
{
1635
        int error;
1636
        struct timespec t;
1637
        unsigned long expire;
1638
 
1639
        error = verify_area(VERIFY_READ, rqtp, sizeof(struct timespec));
1640
        if (error)
1641
                return error;
1642
        memcpy_fromfs(&t, rqtp, sizeof(struct timespec));
1643
        if (rmtp) {
1644
                error = verify_area(VERIFY_WRITE, rmtp,
1645
                                    sizeof(struct timespec));
1646
                if (error)
1647
                        return error;
1648
        }
1649
 
1650
        if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
1651
                return -EINVAL;
1652
 
1653
        if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
1654
            current->policy != SCHED_OTHER) {
1655
                /*
1656
                 * Short delay requests up to 2 ms will be handled with
1657
                 * high precision by a busy wait for all real-time processes.
1658
                 */
1659
                udelay((t.tv_nsec + 999) / 1000);
1660
                return 0;
1661
        }
1662
 
1663
        expire = timespectojiffies(&t) + (t.tv_sec || t.tv_nsec) + jiffies;
1664
        current->timeout = expire;
1665
        current->state = TASK_INTERRUPTIBLE;
1666
        schedule();
1667
 
1668
        if (expire > jiffies) {
1669
                if (rmtp) {
1670
                        jiffiestotimespec(expire - jiffies -
1671
                                          (expire > jiffies + 1), &t);
1672
                        memcpy_tofs(rmtp, &t, sizeof(struct timespec));
1673
                }
1674
                return -EINTR;
1675
        }
1676
 
1677
        return 0;
1678
}
1679
 
1680
/* Used in fs/proc/array.c */
1681
unsigned long get_wchan(struct task_struct *p)
1682
{
1683
        if (!p || p == current || p->state == TASK_RUNNING)
1684
                return 0;
1685
#if defined(__i386__)
1686
        {
1687
                unsigned long ebp, eip;
1688
                unsigned long stack_page;
1689
                int count = 0;
1690
 
1691
                stack_page = p->kernel_stack_page;
1692
                if (!stack_page)
1693
                        return 0;
1694
                ebp = p->tss.ebp;
1695
                do {
1696
                        if (ebp < stack_page || ebp >= 4092+stack_page)
1697
                                return 0;
1698
                        eip = *(unsigned long *) (ebp+4);
1699
                        if (eip < (unsigned long) interruptible_sleep_on
1700
                            || eip >= (unsigned long) add_timer)
1701
                                return eip;
1702
                        ebp = *(unsigned long *) ebp;
1703
                } while (count++ < 16);
1704
        }
1705
#elif defined(__alpha__)
1706
        /*
1707
         * This one depends on the frame size of schedule().  Do a
1708
         * "disass schedule" in gdb to find the frame size.  Also, the
1709
         * code assumes that sleep_on() follows immediately after
1710
         * interruptible_sleep_on() and that add_timer() follows
1711
         * immediately after interruptible_sleep().  Ugly, isn't it?
1712
         * Maybe adding a wchan field to task_struct would be better,
1713
         * after all...
1714
         */
1715
        {
1716
            unsigned long schedule_frame;
1717
            unsigned long pc;
1718
 
1719
            pc = thread_saved_pc(&p->tss);
1720
            if (pc >= (unsigned long) interruptible_sleep_on && pc < (unsigned long) add_timer) {
1721
                schedule_frame = ((unsigned long *)p->tss.ksp)[6];
1722
                return ((unsigned long *)schedule_frame)[12];
1723
            }
1724
            return pc;
1725
        }
1726
#endif
1727
        return 0;
1728
}
1729
 
1730
static void show_task(int nr,struct task_struct * p)
1731
{
1732
        unsigned long free;
1733
        static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
1734
 
1735
        printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
1736
        if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *))
1737
                printk(stat_nam[p->state]);
1738
        else
1739
                printk(" ");
1740
#if ((~0UL) == 0xffffffff)
1741
        if (p == current)
1742
                printk(" current  ");
1743
        else
1744
                printk(" %08lX ", thread_saved_pc(&p->tss));
1745
        printk("%08lX ", get_wchan(p));
1746
#else
1747
        if (p == current)
1748
                printk("   current task   ");
1749
        else
1750
                printk(" %016lx ", thread_saved_pc(&p->tss));
1751
        printk("%08lX ", get_wchan(p) & 0xffffffffL);
1752
#endif
1753
        if (((unsigned long *)p->kernel_stack_page)[0] != STACK_MAGIC)
1754
                printk(" bad-");
1755
 
1756
        for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) {
1757
                if (((unsigned long *)p->kernel_stack_page)[free] != STACK_UNTOUCHED_MAGIC)
1758
                        break;
1759
        }
1760
        printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid);
1761
        if (p->p_cptr)
1762
                printk("%5d ", p->p_cptr->pid);
1763
        else
1764
                printk("      ");
1765
        if (p->p_ysptr)
1766
                printk("%7d", p->p_ysptr->pid);
1767
        else
1768
                printk("       ");
1769
        if (p->p_osptr)
1770
                printk(" %5d\n", p->p_osptr->pid);
1771
        else
1772
                printk("\n");
1773
}
1774
 
1775
void show_state(void)
1776
{
1777
        int i;
1778
 
1779
#if ((~0UL) == 0xffffffff)
1780
        printk("\n"
1781
               "                                  free                        sibling\n");
1782
        printk("  task             PC     wchan   stack   pid father child younger older\n");
1783
#else
1784
        printk("\n"
1785
               "                                           free                        sibling\n");
1786
        printk("  task                 PC         wchan    stack   pid father child younger older\n");
1787
#endif
1788
        for (i=0 ; i<NR_TASKS ; i++)
1789
                if (task[i])
1790
                        show_task(i,task[i]);
1791
}
1792
 
1793
void sched_init(void)
1794
{
1795
        /*
1796
         *      We have to do a little magic to get the first
1797
         *      process right in SMP mode.
1798
         */
1799
        int cpu=smp_processor_id();
1800
        int i;
1801
#ifndef __SMP__ 
1802
        current_set[cpu]=&init_task;
1803
#else
1804
        init_task.processor=cpu;
1805
        for(cpu = 0; cpu < NR_CPUS; cpu++)
1806
                current_set[cpu] = &init_task;
1807
#endif
1808
 
1809
        init_kernel_stack[0] = STACK_MAGIC;
1810
        for(i=1;i<1024;i++)
1811
                init_kernel_stack[i] = STACK_UNTOUCHED_MAGIC;
1812
 
1813
        init_bh(TIMER_BH, timer_bh);
1814
        init_bh(TQUEUE_BH, tqueue_bh);
1815
        init_bh(IMMEDIATE_BH, immediate_bh);
1816
}

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.