OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [kernel/] [sched.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 *  linux/kernel/sched.c
3
 *
4
 *  Kernel scheduler and related syscalls
5
 *
6
 *  Copyright (C) 1991, 1992  Linus Torvalds
7
 *
8
 *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and
9
 *              make semaphores SMP safe
10
 *  1998-11-19  Implemented schedule_timeout() and related stuff
11
 *              by Andrea Arcangeli
12
 *  1998-12-28  Implemented better SMP scheduling by Ingo Molnar
13
 */
14
 
15
/*
16
 * 'sched.c' is the main kernel file. It contains scheduling primitives
17
 * (sleep_on, wakeup, schedule etc) as well as a number of simple system
18
 * call functions (type getpid()), which just extract a field from
19
 * current-task
20
 */
21
 
22
#include <linux/config.h>
23
#include <linux/mm.h>
24
#include <linux/init.h>
25
#include <linux/smp_lock.h>
26
#include <linux/nmi.h>
27
#include <linux/interrupt.h>
28
#include <linux/kernel_stat.h>
29
#include <linux/completion.h>
30
#include <linux/prefetch.h>
31
#include <linux/compiler.h>
32
 
33
#include <asm/uaccess.h>
34
#include <asm/mmu_context.h>
35
 
36
extern void timer_bh(void);
37
extern void tqueue_bh(void);
38
extern void immediate_bh(void);
39
 
40
/*
41
 * scheduler variables
42
 */
43
 
44
unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
45
 
46
extern void mem_use(void);
47
 
48
/*
49
 * Scheduling quanta.
50
 *
51
 * NOTE! The unix "nice" value influences how long a process
52
 * gets. The nice value ranges from -20 to +19, where a -20
53
 * is a "high-priority" task, and a "+10" is a low-priority
54
 * task.
55
 *
56
 * We want the time-slice to be around 50ms or so, so this
57
 * calculation depends on the value of HZ.
58
 */
59
#if HZ < 200
60
#define TICK_SCALE(x)   ((x) >> 2)
61
#elif HZ < 400
62
#define TICK_SCALE(x)   ((x) >> 1)
63
#elif HZ < 800
64
#define TICK_SCALE(x)   (x)
65
#elif HZ < 1600
66
#define TICK_SCALE(x)   ((x) << 1)
67
#else
68
#define TICK_SCALE(x)   ((x) << 2)
69
#endif
70
 
71
#define NICE_TO_TICKS(nice)     (TICK_SCALE(20-(nice))+1)
72
 
73
 
74
/*
75
 *      Init task must be ok at boot for the ix86 as we will check its signals
76
 *      via the SMP irq return path.
77
 */
78
 
79
struct task_struct * init_tasks[NR_CPUS] = {&init_task, };
80
 
81
/*
82
 * The tasklist_lock protects the linked list of processes.
83
 *
84
 * The runqueue_lock locks the parts that actually access
85
 * and change the run-queues, and have to be interrupt-safe.
86
 *
87
 * If both locks are to be concurrently held, the runqueue_lock
88
 * nests inside the tasklist_lock.
89
 *
90
 * task->alloc_lock nests inside tasklist_lock.
91
 */
92
spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED;  /* inner */
93
rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;  /* outer */
94
 
95
static LIST_HEAD(runqueue_head);
96
 
97
/*
98
 * We align per-CPU scheduling data on cacheline boundaries,
99
 * to prevent cacheline ping-pong.
100
 */
101
static union {
102
        struct schedule_data {
103
                struct task_struct * curr;
104
                cycles_t last_schedule;
105
        } schedule_data;
106
        char __pad [SMP_CACHE_BYTES];
107
} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}};
108
 
109
#define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr
110
#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule
111
 
112
struct kernel_stat kstat;
113
extern struct task_struct *child_reaper;
114
 
115
#ifdef CONFIG_SMP
116
 
117
#define idle_task(cpu) (init_tasks[cpu_number_map(cpu)])
118
#define can_schedule(p,cpu) \
119
        ((p)->cpus_runnable & (p)->cpus_allowed & (1UL << cpu))
120
 
121
#else
122
 
123
#define idle_task(cpu) (&init_task)
124
#define can_schedule(p,cpu) (1)
125
 
126
#endif
127
 
128
void scheduling_functions_start_here(void) { }
129
 
130
/*
131
 * This is the function that decides how desirable a process is..
132
 * You can weigh different processes against each other depending
133
 * on what CPU they've run on lately etc to try to handle cache
134
 * and TLB miss penalties.
135
 *
136
 * Return values:
137
 *       -1000: never select this
138
 *           0: out of time, recalculate counters (but it might still be
139
 *              selected)
140
 *         +ve: "goodness" value (the larger, the better)
141
 *       +1000: realtime process, select this.
142
 */
143
 
144
static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm)
145
{
146
        int weight;
147
 
148
        /*
149
         * select the current process after every other
150
         * runnable process, but before the idle thread.
151
         * Also, dont trigger a counter recalculation.
152
         */
153
        weight = -1;
154
        if (p->policy & SCHED_YIELD)
155
                goto out;
156
 
157
        /*
158
         * Non-RT process - normal case first.
159
         */
160
        if (p->policy == SCHED_OTHER) {
161
                /*
162
                 * Give the process a first-approximation goodness value
163
                 * according to the number of clock-ticks it has left.
164
                 *
165
                 * Don't do any other calculations if the time slice is
166
                 * over..
167
                 */
168
                weight = p->counter;
169
                if (!weight)
170
                        goto out;
171
 
172
#ifdef CONFIG_SMP
173
                /* Give a largish advantage to the same processor...   */
174
                /* (this is equivalent to penalizing other processors) */
175
                if (p->processor == this_cpu)
176
                        weight += PROC_CHANGE_PENALTY;
177
#endif
178
 
179
                /* .. and a slight advantage to the current MM */
180
                if (p->mm == this_mm || !p->mm)
181
                        weight += 1;
182
                weight += 20 - p->nice;
183
                goto out;
184
        }
185
 
186
        /*
187
         * Realtime process, select the first one on the
188
         * runqueue (taking priorities within processes
189
         * into account).
190
         */
191
        weight = 1000 + p->rt_priority;
192
out:
193
        return weight;
194
}
195
 
196
/*
197
 * the 'goodness value' of replacing a process on a given CPU.
198
 * positive value means 'replace', zero or negative means 'dont'.
199
 */
200
static inline int preemption_goodness(struct task_struct * prev, struct task_struct * p, int cpu)
201
{
202
        return goodness(p, cpu, prev->active_mm) - goodness(prev, cpu, prev->active_mm);
203
}
204
 
205
/*
206
 * This is ugly, but reschedule_idle() is very timing-critical.
207
 * We are called with the runqueue spinlock held and we must
208
 * not claim the tasklist_lock.
209
 */
210
static FASTCALL(void reschedule_idle(struct task_struct * p));
211
 
212
static void reschedule_idle(struct task_struct * p)
213
{
214
#ifdef CONFIG_SMP
215
        int this_cpu = smp_processor_id();
216
        struct task_struct *tsk, *target_tsk;
217
        int cpu, best_cpu, i, max_prio;
218
        cycles_t oldest_idle;
219
 
220
        /*
221
         * shortcut if the woken up task's last CPU is
222
         * idle now.
223
         */
224
        best_cpu = p->processor;
225
        if (can_schedule(p, best_cpu)) {
226
                tsk = idle_task(best_cpu);
227
                if (cpu_curr(best_cpu) == tsk) {
228
                        int need_resched;
229
send_now_idle:
230
                        /*
231
                         * If need_resched == -1 then we can skip sending
232
                         * the IPI altogether, tsk->need_resched is
233
                         * actively watched by the idle thread.
234
                         */
235
                        need_resched = tsk->need_resched;
236
                        tsk->need_resched = 1;
237
                        if ((best_cpu != this_cpu) && !need_resched)
238
                                smp_send_reschedule(best_cpu);
239
                        return;
240
                }
241
        }
242
 
243
        /*
244
         * We know that the preferred CPU has a cache-affine current
245
         * process, lets try to find a new idle CPU for the woken-up
246
         * process. Select the least recently active idle CPU. (that
247
         * one will have the least active cache context.) Also find
248
         * the executing process which has the least priority.
249
         */
250
        oldest_idle = (cycles_t) -1;
251
        target_tsk = NULL;
252
        max_prio = 0;
253
 
254
        for (i = 0; i < smp_num_cpus; i++) {
255
                cpu = cpu_logical_map(i);
256
                if (!can_schedule(p, cpu))
257
                        continue;
258
                tsk = cpu_curr(cpu);
259
                /*
260
                 * We use the first available idle CPU. This creates
261
                 * a priority list between idle CPUs, but this is not
262
                 * a problem.
263
                 */
264
                if (tsk == idle_task(cpu)) {
265
#if defined(__i386__) && defined(CONFIG_SMP)
266
                        /*
267
                         * Check if two siblings are idle in the same
268
                         * physical package. Use them if found.
269
                         */
270
                        if (smp_num_siblings == 2) {
271
                                if (cpu_curr(cpu_sibling_map[cpu]) ==
272
                                    idle_task(cpu_sibling_map[cpu])) {
273
                                        oldest_idle = last_schedule(cpu);
274
                                        target_tsk = tsk;
275
                                        break;
276
                                }
277
 
278
                        }
279
#endif          
280
                        if (last_schedule(cpu) < oldest_idle) {
281
                                oldest_idle = last_schedule(cpu);
282
                                target_tsk = tsk;
283
                        }
284
                } else {
285
                        if (oldest_idle == (cycles_t)-1) {
286
                                int prio = preemption_goodness(tsk, p, cpu);
287
 
288
                                if (prio > max_prio) {
289
                                        max_prio = prio;
290
                                        target_tsk = tsk;
291
                                }
292
                        }
293
                }
294
        }
295
        tsk = target_tsk;
296
        if (tsk) {
297
                if (oldest_idle != (cycles_t)-1) {
298
                        best_cpu = tsk->processor;
299
                        goto send_now_idle;
300
                }
301
                tsk->need_resched = 1;
302
                if (tsk->processor != this_cpu)
303
                        smp_send_reschedule(tsk->processor);
304
        }
305
        return;
306
 
307
 
308
#else /* UP */
309
        int this_cpu = smp_processor_id();
310
        struct task_struct *tsk;
311
 
312
        tsk = cpu_curr(this_cpu);
313
        if (preemption_goodness(tsk, p, this_cpu) > 0)
314
                tsk->need_resched = 1;
315
#endif
316
}
317
 
318
/*
319
 * Careful!
320
 *
321
 * This has to add the process to the _end_ of the
322
 * run-queue, not the beginning. The goodness value will
323
 * determine whether this process will run next. This is
324
 * important to get SCHED_FIFO and SCHED_RR right, where
325
 * a process that is either pre-empted or its time slice
326
 * has expired, should be moved to the tail of the run
327
 * queue for its priority - Bhavesh Davda
328
 */
329
static inline void add_to_runqueue(struct task_struct * p)
330
{
331
        list_add_tail(&p->run_list, &runqueue_head);
332
        nr_running++;
333
}
334
 
335
static inline void move_last_runqueue(struct task_struct * p)
336
{
337
        list_del(&p->run_list);
338
        list_add_tail(&p->run_list, &runqueue_head);
339
}
340
 
341
/*
342
 * Wake up a process. Put it on the run-queue if it's not
343
 * already there.  The "current" process is always on the
344
 * run-queue (except when the actual re-schedule is in
345
 * progress), and as such you're allowed to do the simpler
346
 * "current->state = TASK_RUNNING" to mark yourself runnable
347
 * without the overhead of this.
348
 */
349
static inline int try_to_wake_up(struct task_struct * p, int synchronous)
350
{
351
        unsigned long flags;
352
        int success = 0;
353
 
354
        /*
355
         * We want the common case fall through straight, thus the goto.
356
         */
357
        spin_lock_irqsave(&runqueue_lock, flags);
358
        p->state = TASK_RUNNING;
359
        if (task_on_runqueue(p))
360
                goto out;
361
        add_to_runqueue(p);
362
        if (!synchronous || !(p->cpus_allowed & (1UL << smp_processor_id())))
363
                reschedule_idle(p);
364
        success = 1;
365
out:
366
        spin_unlock_irqrestore(&runqueue_lock, flags);
367
        return success;
368
}
369
 
370
inline int wake_up_process(struct task_struct * p)
371
{
372
        return try_to_wake_up(p, 0);
373
}
374
 
375
static void process_timeout(unsigned long __data)
376
{
377
        struct task_struct * p = (struct task_struct *) __data;
378
 
379
        wake_up_process(p);
380
}
381
 
382
/**
383
 * schedule_timeout - sleep until timeout
384
 * @timeout: timeout value in jiffies
385
 *
386
 * Make the current task sleep until @timeout jiffies have
387
 * elapsed. The routine will return immediately unless
388
 * the current task state has been set (see set_current_state()).
389
 *
390
 * You can set the task state as follows -
391
 *
392
 * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
393
 * pass before the routine returns. The routine will return 0
394
 *
395
 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
396
 * delivered to the current task. In this case the remaining time
397
 * in jiffies will be returned, or 0 if the timer expired in time
398
 *
399
 * The current task state is guaranteed to be TASK_RUNNING when this
400
 * routine returns.
401
 *
402
 * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
403
 * the CPU away without a bound on the timeout. In this case the return
404
 * value will be %MAX_SCHEDULE_TIMEOUT.
405
 *
406
 * In all cases the return value is guaranteed to be non-negative.
407
 */
408
signed long schedule_timeout(signed long timeout)
409
{
410
        struct timer_list timer;
411
        unsigned long expire;
412
 
413
        switch (timeout)
414
        {
415
        case MAX_SCHEDULE_TIMEOUT:
416
                /*
417
                 * These two special cases are useful to be comfortable
418
                 * in the caller. Nothing more. We could take
419
                 * MAX_SCHEDULE_TIMEOUT from one of the negative value
420
                 * but I' d like to return a valid offset (>=0) to allow
421
                 * the caller to do everything it want with the retval.
422
                 */
423
                schedule();
424
                goto out;
425
        default:
426
                /*
427
                 * Another bit of PARANOID. Note that the retval will be
428
                 * 0 since no piece of kernel is supposed to do a check
429
                 * for a negative retval of schedule_timeout() (since it
430
                 * should never happens anyway). You just have the printk()
431
                 * that will tell you if something is gone wrong and where.
432
                 */
433
                if (timeout < 0)
434
                {
435
                        printk(KERN_ERR "schedule_timeout: wrong timeout "
436
                               "value %lx from %p\n", timeout,
437
                               __builtin_return_address(0));
438
                        current->state = TASK_RUNNING;
439
                        goto out;
440
                }
441
        }
442
 
443
        expire = timeout + jiffies;
444
 
445
        init_timer(&timer);
446
        timer.expires = expire;
447
        timer.data = (unsigned long) current;
448
        timer.function = process_timeout;
449
 
450
        add_timer(&timer);
451
        schedule();
452
        del_timer_sync(&timer);
453
 
454
        timeout = expire - jiffies;
455
 
456
 out:
457
        return timeout < 0 ? 0 : timeout;
458
}
459
 
460
/*
461
 * schedule_tail() is getting called from the fork return path. This
462
 * cleans up all remaining scheduler things, without impacting the
463
 * common case.
464
 */
465
static inline void __schedule_tail(struct task_struct *prev)
466
{
467
#ifdef CONFIG_SMP
468
        int policy;
469
 
470
        /*
471
         * prev->policy can be written from here only before `prev'
472
         * can be scheduled (before setting prev->cpus_runnable to ~0UL).
473
         * Of course it must also be read before allowing prev
474
         * to be rescheduled, but since the write depends on the read
475
         * to complete, wmb() is enough. (the spin_lock() acquired
476
         * before setting cpus_runnable is not enough because the spin_lock()
477
         * common code semantics allows code outside the critical section
478
         * to enter inside the critical section)
479
         */
480
        policy = prev->policy;
481
        prev->policy = policy & ~SCHED_YIELD;
482
        wmb();
483
 
484
        /*
485
         * fast path falls through. We have to clear cpus_runnable before
486
         * checking prev->state to avoid a wakeup race. Protect against
487
         * the task exiting early.
488
         */
489
        task_lock(prev);
490
        task_release_cpu(prev);
491
        mb();
492
        if (prev->state == TASK_RUNNING)
493
                goto needs_resched;
494
 
495
out_unlock:
496
        task_unlock(prev);      /* Synchronise here with release_task() if prev is TASK_ZOMBIE */
497
        return;
498
 
499
        /*
500
         * Slow path - we 'push' the previous process and
501
         * reschedule_idle() will attempt to find a new
502
         * processor for it. (but it might preempt the
503
         * current process as well.) We must take the runqueue
504
         * lock and re-check prev->state to be correct. It might
505
         * still happen that this process has a preemption
506
         * 'in progress' already - but this is not a problem and
507
         * might happen in other circumstances as well.
508
         */
509
needs_resched:
510
        {
511
                unsigned long flags;
512
 
513
                /*
514
                 * Avoid taking the runqueue lock in cases where
515
                 * no preemption-check is necessery:
516
                 */
517
                if ((prev == idle_task(smp_processor_id())) ||
518
                                                (policy & SCHED_YIELD))
519
                        goto out_unlock;
520
 
521
                spin_lock_irqsave(&runqueue_lock, flags);
522
                if ((prev->state == TASK_RUNNING) && !task_has_cpu(prev))
523
                        reschedule_idle(prev);
524
                spin_unlock_irqrestore(&runqueue_lock, flags);
525
                goto out_unlock;
526
        }
527
#else
528
        prev->policy &= ~SCHED_YIELD;
529
#endif /* CONFIG_SMP */
530
}
531
 
532
asmlinkage void schedule_tail(struct task_struct *prev)
533
{
534
        __schedule_tail(prev);
535
}
536
 
537
/*
538
 *  'schedule()' is the scheduler function. It's a very simple and nice
539
 * scheduler: it's not perfect, but certainly works for most things.
540
 *
541
 * The goto is "interesting".
542
 *
543
 *   NOTE!!  Task 0 is the 'idle' task, which gets called when no other
544
 * tasks can run. It can not be killed, and it cannot sleep. The 'state'
545
 * information in task[0] is never used.
546
 */
547
asmlinkage void schedule(void)
548
{
549
        struct schedule_data * sched_data;
550
        struct task_struct *prev, *next, *p;
551
        struct list_head *tmp;
552
        int this_cpu, c;
553
 
554
 
555
        spin_lock_prefetch(&runqueue_lock);
556
 
557
        BUG_ON(!current->active_mm);
558
need_resched_back:
559
        prev = current;
560
        this_cpu = prev->processor;
561
 
562
        if (unlikely(in_interrupt())) {
563
                printk("Scheduling in interrupt\n");
564
                BUG();
565
        }
566
 
567
        release_kernel_lock(prev, this_cpu);
568
 
569
        /*
570
         * 'sched_data' is protected by the fact that we can run
571
         * only one process per CPU.
572
         */
573
        sched_data = & aligned_data[this_cpu].schedule_data;
574
 
575
        spin_lock_irq(&runqueue_lock);
576
 
577
        /* move an exhausted RR process to be last.. */
578
        if (unlikely(prev->policy == SCHED_RR))
579
                if (!prev->counter) {
580
                        prev->counter = NICE_TO_TICKS(prev->nice);
581
                        move_last_runqueue(prev);
582
                }
583
 
584
        switch (prev->state) {
585
                case TASK_INTERRUPTIBLE:
586
                        if (signal_pending(prev)) {
587
                                prev->state = TASK_RUNNING;
588
                                break;
589
                        }
590
                default:
591
                        del_from_runqueue(prev);
592
                case TASK_RUNNING:;
593
        }
594
        prev->need_resched = 0;
595
 
596
        /*
597
         * this is the scheduler proper:
598
         */
599
 
600
repeat_schedule:
601
        /*
602
         * Default process to select..
603
         */
604
        next = idle_task(this_cpu);
605
        c = -1000;
606
        list_for_each(tmp, &runqueue_head) {
607
                p = list_entry(tmp, struct task_struct, run_list);
608
                if (can_schedule(p, this_cpu)) {
609
                        int weight = goodness(p, this_cpu, prev->active_mm);
610
                        if (weight > c)
611
                                c = weight, next = p;
612
                }
613
        }
614
 
615
        /* Do we need to re-calculate counters? */
616
        if (unlikely(!c)) {
617
                struct task_struct *p;
618
 
619
                spin_unlock_irq(&runqueue_lock);
620
                read_lock(&tasklist_lock);
621
                for_each_task(p)
622
                        p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);
623
                read_unlock(&tasklist_lock);
624
                spin_lock_irq(&runqueue_lock);
625
                goto repeat_schedule;
626
        }
627
 
628
        /*
629
         * from this point on nothing can prevent us from
630
         * switching to the next task, save this fact in
631
         * sched_data.
632
         */
633
        sched_data->curr = next;
634
        task_set_cpu(next, this_cpu);
635
        spin_unlock_irq(&runqueue_lock);
636
 
637
        if (unlikely(prev == next)) {
638
                /* We won't go through the normal tail, so do this by hand */
639
                prev->policy &= ~SCHED_YIELD;
640
                goto same_process;
641
        }
642
 
643
#ifdef CONFIG_SMP
644
        /*
645
         * maintain the per-process 'last schedule' value.
646
         * (this has to be recalculated even if we reschedule to
647
         * the same process) Currently this is only used on SMP,
648
         * and it's approximate, so we do not have to maintain
649
         * it while holding the runqueue spinlock.
650
         */
651
        sched_data->last_schedule = get_cycles();
652
 
653
        /*
654
         * We drop the scheduler lock early (it's a global spinlock),
655
         * thus we have to lock the previous process from getting
656
         * rescheduled during switch_to().
657
         */
658
 
659
#endif /* CONFIG_SMP */
660
 
661
        kstat.context_swtch++;
662
        /*
663
         * there are 3 processes which are affected by a context switch:
664
         *
665
         * prev == .... ==> (last => next)
666
         *
667
         * It's the 'much more previous' 'prev' that is on next's stack,
668
         * but prev is set to (the just run) 'last' process by switch_to().
669
         * This might sound slightly confusing but makes tons of sense.
670
         */
671
        prepare_to_switch();
672
        {
673
                struct mm_struct *mm = next->mm;
674
                struct mm_struct *oldmm = prev->active_mm;
675
                if (!mm) {
676
                        BUG_ON(next->active_mm);
677
                        next->active_mm = oldmm;
678
                        atomic_inc(&oldmm->mm_count);
679
                        enter_lazy_tlb(oldmm, next, this_cpu);
680
                } else {
681
                        BUG_ON(next->active_mm != mm);
682
                        switch_mm(oldmm, mm, next, this_cpu);
683
                }
684
 
685
                if (!prev->mm) {
686
                        prev->active_mm = NULL;
687
                        mmdrop(oldmm);
688
                }
689
        }
690
 
691
        /*
692
         * This just switches the register state and the
693
         * stack.
694
         */
695
        switch_to(prev, next, prev);
696
        __schedule_tail(prev);
697
 
698
same_process:
699
        reacquire_kernel_lock(current);
700
        if (current->need_resched)
701
                goto need_resched_back;
702
        return;
703
}
704
 
705
/*
706
 * The core wakeup function.  Non-exclusive wakeups (nr_exclusive == 0) just wake everything
707
 * up.  If it's an exclusive wakeup (nr_exclusive == small +ve number) then we wake all the
708
 * non-exclusive tasks and one exclusive task.
709
 *
710
 * There are circumstances in which we can try to wake a task which has already
711
 * started to run but is not in state TASK_RUNNING.  try_to_wake_up() returns zero
712
 * in this (rare) case, and we handle it by contonuing to scan the queue.
713
 */
714
static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode,
715
                                     int nr_exclusive, const int sync)
716
{
717
        struct list_head *tmp;
718
        struct task_struct *p;
719
 
720
        CHECK_MAGIC_WQHEAD(q);
721
        WQ_CHECK_LIST_HEAD(&q->task_list);
722
 
723
        list_for_each(tmp,&q->task_list) {
724
                unsigned int state;
725
                wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
726
 
727
                CHECK_MAGIC(curr->__magic);
728
                p = curr->task;
729
                state = p->state;
730
                if (state & mode) {
731
                        WQ_NOTE_WAKER(curr);
732
                        if (try_to_wake_up(p, sync) && (curr->flags&WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
733
                                break;
734
                }
735
        }
736
}
737
 
738
void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr)
739
{
740
        if (q) {
741
                unsigned long flags;
742
                wq_read_lock_irqsave(&q->lock, flags);
743
                __wake_up_common(q, mode, nr, 0);
744
                wq_read_unlock_irqrestore(&q->lock, flags);
745
        }
746
}
747
 
748
void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr)
749
{
750
        if (q) {
751
                unsigned long flags;
752
                wq_read_lock_irqsave(&q->lock, flags);
753
                __wake_up_common(q, mode, nr, 1);
754
                wq_read_unlock_irqrestore(&q->lock, flags);
755
        }
756
}
757
 
758
void complete(struct completion *x)
759
{
760
        unsigned long flags;
761
 
762
        spin_lock_irqsave(&x->wait.lock, flags);
763
        x->done++;
764
        __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, 0);
765
        spin_unlock_irqrestore(&x->wait.lock, flags);
766
}
767
 
768
void wait_for_completion(struct completion *x)
769
{
770
        spin_lock_irq(&x->wait.lock);
771
        if (!x->done) {
772
                DECLARE_WAITQUEUE(wait, current);
773
 
774
                wait.flags |= WQ_FLAG_EXCLUSIVE;
775
                __add_wait_queue_tail(&x->wait, &wait);
776
                do {
777
                        __set_current_state(TASK_UNINTERRUPTIBLE);
778
                        spin_unlock_irq(&x->wait.lock);
779
                        schedule();
780
                        spin_lock_irq(&x->wait.lock);
781
                } while (!x->done);
782
                __remove_wait_queue(&x->wait, &wait);
783
        }
784
        x->done--;
785
        spin_unlock_irq(&x->wait.lock);
786
}
787
 
788
#define SLEEP_ON_VAR                            \
789
        unsigned long flags;                    \
790
        wait_queue_t wait;                      \
791
        init_waitqueue_entry(&wait, current);
792
 
793
#define SLEEP_ON_HEAD                                   \
794
        wq_write_lock_irqsave(&q->lock,flags);          \
795
        __add_wait_queue(q, &wait);                     \
796
        wq_write_unlock(&q->lock);
797
 
798
#define SLEEP_ON_TAIL                                           \
799
        wq_write_lock_irq(&q->lock);                            \
800
        __remove_wait_queue(q, &wait);                          \
801
        wq_write_unlock_irqrestore(&q->lock,flags);
802
 
803
void interruptible_sleep_on(wait_queue_head_t *q)
804
{
805
        SLEEP_ON_VAR
806
 
807
        current->state = TASK_INTERRUPTIBLE;
808
 
809
        SLEEP_ON_HEAD
810
        schedule();
811
        SLEEP_ON_TAIL
812
}
813
 
814
long interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
815
{
816
        SLEEP_ON_VAR
817
 
818
        current->state = TASK_INTERRUPTIBLE;
819
 
820
        SLEEP_ON_HEAD
821
        timeout = schedule_timeout(timeout);
822
        SLEEP_ON_TAIL
823
 
824
        return timeout;
825
}
826
 
827
void sleep_on(wait_queue_head_t *q)
828
{
829
        SLEEP_ON_VAR
830
 
831
        current->state = TASK_UNINTERRUPTIBLE;
832
 
833
        SLEEP_ON_HEAD
834
        schedule();
835
        SLEEP_ON_TAIL
836
}
837
 
838
long sleep_on_timeout(wait_queue_head_t *q, long timeout)
839
{
840
        SLEEP_ON_VAR
841
 
842
        current->state = TASK_UNINTERRUPTIBLE;
843
 
844
        SLEEP_ON_HEAD
845
        timeout = schedule_timeout(timeout);
846
        SLEEP_ON_TAIL
847
 
848
        return timeout;
849
}
850
 
851
void scheduling_functions_end_here(void) { }
852
 
853
#if CONFIG_SMP
854
/**
855
 * set_cpus_allowed() - change a given task's processor affinity
856
 * @p: task to bind
857
 * @new_mask: bitmask of allowed processors
858
 *
859
 * Upon return, the task is running on a legal processor.  Note the caller
860
 * must have a valid reference to the task: it must not exit() prematurely.
861
 * This call can sleep; do not hold locks on call.
862
 */
863
void set_cpus_allowed(struct task_struct *p, unsigned long new_mask)
864
{
865
        new_mask &= cpu_online_map;
866
        BUG_ON(!new_mask);
867
 
868
        p->cpus_allowed = new_mask;
869
 
870
        /*
871
         * If the task is on a no-longer-allowed processor, we need to move
872
         * it.  If the task is not current, then set need_resched and send
873
         * its processor an IPI to reschedule.
874
         */
875
        if (!(p->cpus_runnable & p->cpus_allowed)) {
876
                if (p != current) {
877
                        p->need_resched = 1;
878
                        smp_send_reschedule(p->processor);
879
                }
880
                /*
881
                 * Wait until we are on a legal processor.  If the task is
882
                 * current, then we should be on a legal processor the next
883
                 * time we reschedule.  Otherwise, we need to wait for the IPI.
884
                 */
885
                while (!(p->cpus_runnable & p->cpus_allowed))
886
                        schedule();
887
        }
888
}
889
#endif /* CONFIG_SMP */
890
 
891
#ifndef __alpha__
892
 
893
/*
894
 * This has been replaced by sys_setpriority.  Maybe it should be
895
 * moved into the arch dependent tree for those ports that require
896
 * it for backward compatibility?
897
 */
898
 
899
asmlinkage long sys_nice(int increment)
900
{
901
        long newprio;
902
 
903
        /*
904
         *      Setpriority might change our priority at the same moment.
905
         *      We don't have to worry. Conceptually one call occurs first
906
         *      and we have a single winner.
907
         */
908
        if (increment < 0) {
909
                if (!capable(CAP_SYS_NICE))
910
                        return -EPERM;
911
                if (increment < -40)
912
                        increment = -40;
913
        }
914
        if (increment > 40)
915
                increment = 40;
916
 
917
        newprio = current->nice + increment;
918
        if (newprio < -20)
919
                newprio = -20;
920
        if (newprio > 19)
921
                newprio = 19;
922
        current->nice = newprio;
923
        return 0;
924
}
925
 
926
#endif
927
 
928
static inline struct task_struct *find_process_by_pid(pid_t pid)
929
{
930
        struct task_struct *tsk = current;
931
 
932
        if (pid)
933
                tsk = find_task_by_pid(pid);
934
        return tsk;
935
}
936
 
937
static int setscheduler(pid_t pid, int policy,
938
                        struct sched_param *param)
939
{
940
        struct sched_param lp;
941
        struct task_struct *p;
942
        int retval;
943
 
944
        retval = -EINVAL;
945
        if (!param || pid < 0)
946
                goto out_nounlock;
947
 
948
        retval = -EFAULT;
949
        if (copy_from_user(&lp, param, sizeof(struct sched_param)))
950
                goto out_nounlock;
951
 
952
        /*
953
         * We play safe to avoid deadlocks.
954
         */
955
        read_lock_irq(&tasklist_lock);
956
        spin_lock(&runqueue_lock);
957
 
958
        p = find_process_by_pid(pid);
959
 
960
        retval = -ESRCH;
961
        if (!p)
962
                goto out_unlock;
963
 
964
        if (policy < 0)
965
                policy = p->policy;
966
        else {
967
                retval = -EINVAL;
968
                if (policy != SCHED_FIFO && policy != SCHED_RR &&
969
                                policy != SCHED_OTHER)
970
                        goto out_unlock;
971
        }
972
 
973
        /*
974
         * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
975
         * priority for SCHED_OTHER is 0.
976
         */
977
        retval = -EINVAL;
978
        if (lp.sched_priority < 0 || lp.sched_priority > 99)
979
                goto out_unlock;
980
        if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
981
                goto out_unlock;
982
 
983
        retval = -EPERM;
984
        if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
985
            !capable(CAP_SYS_NICE))
986
                goto out_unlock;
987
        if ((current->euid != p->euid) && (current->euid != p->uid) &&
988
            !capable(CAP_SYS_NICE))
989
                goto out_unlock;
990
 
991
        retval = 0;
992
        p->policy = policy;
993
        p->rt_priority = lp.sched_priority;
994
 
995
        current->need_resched = 1;
996
 
997
out_unlock:
998
        spin_unlock(&runqueue_lock);
999
        read_unlock_irq(&tasklist_lock);
1000
 
1001
out_nounlock:
1002
        return retval;
1003
}
1004
 
1005
asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
1006
                                      struct sched_param *param)
1007
{
1008
        return setscheduler(pid, policy, param);
1009
}
1010
 
1011
asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param *param)
1012
{
1013
        return setscheduler(pid, -1, param);
1014
}
1015
 
1016
asmlinkage long sys_sched_getscheduler(pid_t pid)
1017
{
1018
        struct task_struct *p;
1019
        int retval;
1020
 
1021
        retval = -EINVAL;
1022
        if (pid < 0)
1023
                goto out_nounlock;
1024
 
1025
        retval = -ESRCH;
1026
        read_lock(&tasklist_lock);
1027
        p = find_process_by_pid(pid);
1028
        if (p)
1029
                retval = p->policy & ~SCHED_YIELD;
1030
        read_unlock(&tasklist_lock);
1031
 
1032
out_nounlock:
1033
        return retval;
1034
}
1035
 
1036
asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param)
1037
{
1038
        struct task_struct *p;
1039
        struct sched_param lp;
1040
        int retval;
1041
 
1042
        retval = -EINVAL;
1043
        if (!param || pid < 0)
1044
                goto out_nounlock;
1045
 
1046
        read_lock(&tasklist_lock);
1047
        p = find_process_by_pid(pid);
1048
        retval = -ESRCH;
1049
        if (!p)
1050
                goto out_unlock;
1051
        lp.sched_priority = p->rt_priority;
1052
        read_unlock(&tasklist_lock);
1053
 
1054
        /*
1055
         * This one might sleep, we cannot do it with a spinlock held ...
1056
         */
1057
        retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
1058
 
1059
out_nounlock:
1060
        return retval;
1061
 
1062
out_unlock:
1063
        read_unlock(&tasklist_lock);
1064
        return retval;
1065
}
1066
 
1067
asmlinkage long sys_sched_yield(void)
1068
{
1069
        /*
1070
         * Trick. sched_yield() first counts the number of truly
1071
         * 'pending' runnable processes, then returns if it's
1072
         * only the current processes. (This test does not have
1073
         * to be atomic.) In threaded applications this optimization
1074
         * gets triggered quite often.
1075
         */
1076
 
1077
        int nr_pending = nr_running;
1078
 
1079
#if CONFIG_SMP
1080
        int i;
1081
 
1082
        // Subtract non-idle processes running on other CPUs.
1083
        for (i = 0; i < smp_num_cpus; i++) {
1084
                int cpu = cpu_logical_map(i);
1085
                if (aligned_data[cpu].schedule_data.curr != idle_task(cpu))
1086
                        nr_pending--;
1087
        }
1088
#else
1089
        // on UP this process is on the runqueue as well
1090
        nr_pending--;
1091
#endif
1092
        if (nr_pending) {
1093
                /*
1094
                 * This process can only be rescheduled by us,
1095
                 * so this is safe without any locking.
1096
                 */
1097
                if (current->policy == SCHED_OTHER)
1098
                        current->policy |= SCHED_YIELD;
1099
                current->need_resched = 1;
1100
 
1101
                spin_lock_irq(&runqueue_lock);
1102
                move_last_runqueue(current);
1103
                spin_unlock_irq(&runqueue_lock);
1104
        }
1105
        return 0;
1106
}
1107
 
1108
/**
1109
 * yield - yield the current processor to other threads.
1110
 *
1111
 * this is a shortcut for kernel-space yielding - it marks the
1112
 * thread runnable and calls sys_sched_yield().
1113
 */
1114
void yield(void)
1115
{
1116
        set_current_state(TASK_RUNNING);
1117
        sys_sched_yield();
1118
        schedule();
1119
}
1120
 
1121
void __cond_resched(void)
1122
{
1123
        set_current_state(TASK_RUNNING);
1124
        schedule();
1125
}
1126
 
1127
asmlinkage long sys_sched_get_priority_max(int policy)
1128
{
1129
        int ret = -EINVAL;
1130
 
1131
        switch (policy) {
1132
        case SCHED_FIFO:
1133
        case SCHED_RR:
1134
                ret = 99;
1135
                break;
1136
        case SCHED_OTHER:
1137
                ret = 0;
1138
                break;
1139
        }
1140
        return ret;
1141
}
1142
 
1143
asmlinkage long sys_sched_get_priority_min(int policy)
1144
{
1145
        int ret = -EINVAL;
1146
 
1147
        switch (policy) {
1148
        case SCHED_FIFO:
1149
        case SCHED_RR:
1150
                ret = 1;
1151
                break;
1152
        case SCHED_OTHER:
1153
                ret = 0;
1154
        }
1155
        return ret;
1156
}
1157
 
1158
asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
1159
{
1160
        struct timespec t;
1161
        struct task_struct *p;
1162
        int retval = -EINVAL;
1163
 
1164
        if (pid < 0)
1165
                goto out_nounlock;
1166
 
1167
        retval = -ESRCH;
1168
        read_lock(&tasklist_lock);
1169
        p = find_process_by_pid(pid);
1170
        if (p)
1171
                jiffies_to_timespec(p->policy & SCHED_FIFO ? 0 : NICE_TO_TICKS(p->nice),
1172
                                    &t);
1173
        read_unlock(&tasklist_lock);
1174
        if (p)
1175
                retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
1176
out_nounlock:
1177
        return retval;
1178
}
1179
 
1180
static void show_task(struct task_struct * p)
1181
{
1182
        unsigned long free = 0;
1183
        int state;
1184
        static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
1185
 
1186
        printk("%-13.13s ", p->comm);
1187
        state = p->state ? ffz(~p->state) + 1 : 0;
1188
        if (((unsigned) state) < sizeof(stat_nam)/sizeof(char *))
1189
                printk(stat_nam[state]);
1190
        else
1191
                printk(" ");
1192
#if (BITS_PER_LONG == 32)
1193
        if (p == current)
1194
                printk(" current  ");
1195
        else
1196
                printk(" %08lX ", thread_saved_pc(&p->thread));
1197
#else
1198
        if (p == current)
1199
                printk("   current task   ");
1200
        else
1201
                printk(" %016lx ", thread_saved_pc(&p->thread));
1202
#endif
1203
        {
1204
                unsigned long * n = (unsigned long *) (p+1);
1205
                while (!*n)
1206
                        n++;
1207
                free = (unsigned long) n - (unsigned long)(p+1);
1208
        }
1209
        printk("%5lu %5d %6d ", free, p->pid, p->p_pptr->pid);
1210
        if (p->p_cptr)
1211
                printk("%5d ", p->p_cptr->pid);
1212
        else
1213
                printk("      ");
1214
        if (p->p_ysptr)
1215
                printk("%7d", p->p_ysptr->pid);
1216
        else
1217
                printk("       ");
1218
        if (p->p_osptr)
1219
                printk(" %5d", p->p_osptr->pid);
1220
        else
1221
                printk("      ");
1222
        if (!p->mm)
1223
                printk(" (L-TLB)\n");
1224
        else
1225
                printk(" (NOTLB)\n");
1226
 
1227
        {
1228
                extern void show_trace_task(struct task_struct *tsk);
1229
                show_trace_task(p);
1230
        }
1231
}
1232
 
1233
char * render_sigset_t(sigset_t *set, char *buffer)
1234
{
1235
        int i = _NSIG, x;
1236
        do {
1237
                i -= 4, x = 0;
1238
                if (sigismember(set, i+1)) x |= 1;
1239
                if (sigismember(set, i+2)) x |= 2;
1240
                if (sigismember(set, i+3)) x |= 4;
1241
                if (sigismember(set, i+4)) x |= 8;
1242
                *buffer++ = (x < 10 ? '0' : 'a' - 10) + x;
1243
        } while (i >= 4);
1244
        *buffer = 0;
1245
        return buffer;
1246
}
1247
 
1248
void show_state(void)
1249
{
1250
        struct task_struct *p;
1251
 
1252
#if (BITS_PER_LONG == 32)
1253
        printk("\n"
1254
               "                         free                        sibling\n");
1255
        printk("  task             PC    stack   pid father child younger older\n");
1256
#else
1257
        printk("\n"
1258
               "                                 free                        sibling\n");
1259
        printk("  task                 PC        stack   pid father child younger older\n");
1260
#endif
1261
        read_lock(&tasklist_lock);
1262
        for_each_task(p) {
1263
                /*
1264
                 * reset the NMI-timeout, listing all files on a slow
1265
                 * console might take alot of time:
1266
                 */
1267
                touch_nmi_watchdog();
1268
                show_task(p);
1269
        }
1270
        read_unlock(&tasklist_lock);
1271
}
1272
 
1273
/**
1274
 * reparent_to_init() - Reparent the calling kernel thread to the init task.
1275
 *
1276
 * If a kernel thread is launched as a result of a system call, or if
1277
 * it ever exits, it should generally reparent itself to init so that
1278
 * it is correctly cleaned up on exit.
1279
 *
1280
 * The various task state such as scheduling policy and priority may have
1281
 * been inherited fro a user process, so we reset them to sane values here.
1282
 *
1283
 * NOTE that reparent_to_init() gives the caller full capabilities.
1284
 */
1285
void reparent_to_init(void)
1286
{
1287
        struct task_struct *this_task = current;
1288
 
1289
        write_lock_irq(&tasklist_lock);
1290
 
1291
        /* Reparent to init */
1292
        REMOVE_LINKS(this_task);
1293
        this_task->p_pptr = child_reaper;
1294
        this_task->p_opptr = child_reaper;
1295
        SET_LINKS(this_task);
1296
 
1297
        /* Set the exit signal to SIGCHLD so we signal init on exit */
1298
        this_task->exit_signal = SIGCHLD;
1299
 
1300
        /* We also take the runqueue_lock while altering task fields
1301
         * which affect scheduling decisions */
1302
        spin_lock(&runqueue_lock);
1303
 
1304
        this_task->ptrace = 0;
1305
        this_task->nice = DEF_NICE;
1306
        this_task->policy = SCHED_OTHER;
1307
        /* cpus_allowed? */
1308
        /* rt_priority? */
1309
        /* signals? */
1310
        this_task->cap_effective = CAP_INIT_EFF_SET;
1311
        this_task->cap_inheritable = CAP_INIT_INH_SET;
1312
        this_task->cap_permitted = CAP_FULL_SET;
1313
        this_task->keep_capabilities = 0;
1314
        memcpy(this_task->rlim, init_task.rlim, sizeof(*(this_task->rlim)));
1315
        switch_uid(INIT_USER);
1316
 
1317
        spin_unlock(&runqueue_lock);
1318
        write_unlock_irq(&tasklist_lock);
1319
}
1320
 
1321
/*
1322
 *      Put all the gunge required to become a kernel thread without
1323
 *      attached user resources in one place where it belongs.
1324
 */
1325
 
1326
void daemonize(void)
1327
{
1328
        struct fs_struct *fs;
1329
 
1330
 
1331
        /*
1332
         * If we were started as result of loading a module, close all of the
1333
         * user space pages.  We don't need them, and if we didn't close them
1334
         * they would be locked into memory.
1335
         */
1336
        exit_mm(current);
1337
 
1338
        current->session = 1;
1339
        current->pgrp = 1;
1340
        current->tty = NULL;
1341
 
1342
        /* Become as one with the init task */
1343
 
1344
        exit_fs(current);       /* current->fs->count--; */
1345
        fs = init_task.fs;
1346
        current->fs = fs;
1347
        atomic_inc(&fs->count);
1348
        exit_files(current);
1349
        current->files = init_task.files;
1350
        atomic_inc(&current->files->count);
1351
}
1352
 
1353
extern unsigned long wait_init_idle;
1354
 
1355
void __init init_idle(void)
1356
{
1357
        struct schedule_data * sched_data;
1358
        sched_data = &aligned_data[smp_processor_id()].schedule_data;
1359
 
1360
        if (current != &init_task && task_on_runqueue(current)) {
1361
                printk("UGH! (%d:%d) was on the runqueue, removing.\n",
1362
                        smp_processor_id(), current->pid);
1363
                del_from_runqueue(current);
1364
        }
1365
        sched_data->curr = current;
1366
        sched_data->last_schedule = get_cycles();
1367
        clear_bit(current->processor, &wait_init_idle);
1368
}
1369
 
1370
extern void init_timervecs (void);
1371
 
1372
void __init sched_init(void)
1373
{
1374
        /*
1375
         * We have to do a little magic to get the first
1376
         * process right in SMP mode.
1377
         */
1378
        int cpu = smp_processor_id();
1379
        int nr;
1380
 
1381
        init_task.processor = cpu;
1382
 
1383
        for(nr = 0; nr < PIDHASH_SZ; nr++)
1384
                pidhash[nr] = NULL;
1385
 
1386
        init_timervecs();
1387
 
1388
        init_bh(TIMER_BH, timer_bh);
1389
        init_bh(TQUEUE_BH, tqueue_bh);
1390
        init_bh(IMMEDIATE_BH, immediate_bh);
1391
 
1392
        /*
1393
         * The boot idle thread does lazy MMU switching as well:
1394
         */
1395
        atomic_inc(&init_mm.mm_count);
1396
        enter_lazy_tlb(&init_mm, current, cpu);
1397
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.