OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [arch/] [sparc64/] [kernel/] [smp.c] - Blame information for rev 1275

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/* smp.c: Sparc64 SMP support.
2
 *
3
 * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
4
 */
5
 
6
#include <linux/kernel.h>
7
#include <linux/sched.h>
8
#include <linux/mm.h>
9
#include <linux/pagemap.h>
10
#include <linux/threads.h>
11
#include <linux/smp.h>
12
#include <linux/smp_lock.h>
13
#include <linux/interrupt.h>
14
#include <linux/kernel_stat.h>
15
#include <linux/delay.h>
16
#include <linux/init.h>
17
#include <linux/spinlock.h>
18
#include <linux/fs.h>
19
#include <linux/seq_file.h>
20
#include <linux/cache.h>
21
#include <linux/timer.h>
22
 
23
#include <asm/head.h>
24
#include <asm/ptrace.h>
25
#include <asm/atomic.h>
26
 
27
#include <asm/irq.h>
28
#include <asm/page.h>
29
#include <asm/pgtable.h>
30
#include <asm/oplib.h>
31
#include <asm/hardirq.h>
32
#include <asm/softirq.h>
33
#include <asm/uaccess.h>
34
#include <asm/timer.h>
35
#include <asm/starfire.h>
36
 
37
#define __KERNEL_SYSCALLS__
38
#include <linux/unistd.h>
39
 
40
extern int linux_num_cpus;
41
extern void calibrate_delay(void);
42
extern unsigned prom_cpu_nodes[];
43
 
44
cpuinfo_sparc cpu_data[NR_CPUS];
45
 
46
volatile int __cpu_number_map[NR_CPUS]  __attribute__ ((aligned (SMP_CACHE_BYTES)));
47
volatile int __cpu_logical_map[NR_CPUS] __attribute__ ((aligned (SMP_CACHE_BYTES)));
48
 
49
/* Please don't make this stuff initdata!!!  --DaveM */
50
static unsigned char boot_cpu_id;
51
static int smp_activated;
52
 
53
/* Kernel spinlock */
54
spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
55
 
56
volatile int smp_processors_ready = 0;
57
unsigned long cpu_present_map = 0;
58
int smp_num_cpus = 1;
59
int smp_threads_ready = 0;
60
 
61
void __init smp_setup(char *str, int *ints)
62
{
63
        /* XXX implement me XXX */
64
}
65
 
66
static int max_cpus = NR_CPUS;
67
static int __init maxcpus(char *str)
68
{
69
        get_option(&str, &max_cpus);
70
        return 1;
71
}
72
 
73
__setup("maxcpus=", maxcpus);
74
 
75
void smp_info(struct seq_file *m)
76
{
77
        int i;
78
 
79
        seq_printf(m, "State:\n");
80
        for (i = 0; i < NR_CPUS; i++) {
81
                if (cpu_present_map & (1UL << i))
82
                        seq_printf(m,
83
                                   "CPU%d:\t\tonline\n", i);
84
        }
85
}
86
 
87
void smp_bogo(struct seq_file *m)
88
{
89
        int i;
90
 
91
        for (i = 0; i < NR_CPUS; i++)
92
                if (cpu_present_map & (1UL << i))
93
                        seq_printf(m,
94
                                   "Cpu%dBogo\t: %lu.%02lu\n"
95
                                   "Cpu%dClkTck\t: %016lx\n",
96
                                   i, cpu_data[i].udelay_val / (500000/HZ),
97
                                   (cpu_data[i].udelay_val / (5000/HZ)) % 100,
98
                                   i, cpu_data[i].clock_tick);
99
}
100
 
101
void __init smp_store_cpu_info(int id)
102
{
103
        int i, no;
104
 
105
        /* multiplier and counter set by
106
           smp_setup_percpu_timer()  */
107
        cpu_data[id].udelay_val                 = loops_per_jiffy;
108
 
109
        for (no = 0; no < linux_num_cpus; no++)
110
                if (linux_cpus[no].mid == id)
111
                        break;
112
 
113
        cpu_data[id].clock_tick = prom_getintdefault(linux_cpus[no].prom_node,
114
                                                     "clock-frequency", 0);
115
 
116
        cpu_data[id].pgcache_size               = 0;
117
        cpu_data[id].pte_cache[0]                = NULL;
118
        cpu_data[id].pte_cache[1]               = NULL;
119
        cpu_data[id].pgdcache_size              = 0;
120
        cpu_data[id].pgd_cache                  = NULL;
121
        cpu_data[id].idle_volume                = 1;
122
 
123
        for (i = 0; i < 16; i++)
124
                cpu_data[id].irq_worklists[i] = 0;
125
}
126
 
127
void __init smp_commence(void)
128
{
129
}
130
 
131
static void smp_setup_percpu_timer(void);
132
 
133
static volatile unsigned long callin_flag = 0;
134
 
135
extern void inherit_locked_prom_mappings(int save_p);
136
 
137
void __init smp_callin(void)
138
{
139
        int cpuid = hard_smp_processor_id();
140
        extern int bigkernel;
141
        extern unsigned long kern_locked_tte_data;
142
 
143
        if (bigkernel) {
144
                prom_dtlb_load(sparc64_highest_locked_tlbent()-1,
145
                        kern_locked_tte_data + 0x400000, KERNBASE + 0x400000);
146
                prom_itlb_load(sparc64_highest_locked_tlbent()-1,
147
                        kern_locked_tte_data + 0x400000, KERNBASE + 0x400000);
148
        }
149
 
150
        inherit_locked_prom_mappings(0);
151
 
152
        __flush_cache_all();
153
        __flush_tlb_all();
154
 
155
        smp_setup_percpu_timer();
156
 
157
        __sti();
158
 
159
        calibrate_delay();
160
        smp_store_cpu_info(cpuid);
161
        callin_flag = 1;
162
        __asm__ __volatile__("membar #Sync\n\t"
163
                             "flush  %%g6" : : : "memory");
164
 
165
        /* Clear this or we will die instantly when we
166
         * schedule back to this idler...
167
         */
168
        current->thread.flags &= ~(SPARC_FLAG_NEWCHILD);
169
 
170
        /* Attach to the address space of init_task. */
171
        atomic_inc(&init_mm.mm_count);
172
        current->active_mm = &init_mm;
173
 
174
        while (!smp_threads_ready)
175
                membar("#LoadLoad");
176
}
177
 
178
extern int cpu_idle(void);
179
extern void init_IRQ(void);
180
 
181
int start_secondary(void *unused)
182
{
183
        trap_init();
184
        init_IRQ();
185
        return cpu_idle();
186
}
187
 
188
void cpu_panic(void)
189
{
190
        printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());
191
        panic("SMP bolixed\n");
192
}
193
 
194
static unsigned long current_tick_offset;
195
 
196
/* This tick register synchronization scheme is taken entirely from
197
 * the ia64 port, see arch/ia64/kernel/smpboot.c for details and credit.
198
 *
199
 * The only change I've made is to rework it so that the master
200
 * initiates the synchonization instead of the slave. -DaveM
201
 */
202
 
203
#define MASTER  0
204
#define SLAVE   (SMP_CACHE_BYTES/sizeof(unsigned long))
205
 
206
#define NUM_ROUNDS      64      /* magic value */
207
#define NUM_ITERS       5       /* likewise */
208
 
209
static spinlock_t itc_sync_lock = SPIN_LOCK_UNLOCKED;
210
static unsigned long go[SLAVE + 1];
211
 
212
#define DEBUG_TICK_SYNC 0
213
 
214
static inline long get_delta (long *rt, long *master)
215
{
216
        unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
217
        unsigned long tcenter, t0, t1, tm;
218
        unsigned long i;
219
 
220
        for (i = 0; i < NUM_ITERS; i++) {
221
                t0 = tick_ops->get_tick();
222
                go[MASTER] = 1;
223
                membar("#StoreLoad");
224
                while (!(tm = go[SLAVE]))
225
                        membar("#LoadLoad");
226
                go[SLAVE] = 0;
227
                membar("#StoreStore");
228
                t1 = tick_ops->get_tick();
229
 
230
                if (t1 - t0 < best_t1 - best_t0)
231
                        best_t0 = t0, best_t1 = t1, best_tm = tm;
232
        }
233
 
234
        *rt = best_t1 - best_t0;
235
        *master = best_tm - best_t0;
236
 
237
        /* average best_t0 and best_t1 without overflow: */
238
        tcenter = (best_t0/2 + best_t1/2);
239
        if (best_t0 % 2 + best_t1 % 2 == 2)
240
                tcenter++;
241
        return tcenter - best_tm;
242
}
243
 
244
void smp_synchronize_tick_client(void)
245
{
246
        long i, delta, adj, adjust_latency = 0, done = 0;
247
        unsigned long flags, rt, master_time_stamp, bound;
248
#if DEBUG_TICK_SYNC
249
        struct {
250
                long rt;        /* roundtrip time */
251
                long master;    /* master's timestamp */
252
                long diff;      /* difference between midpoint and master's timestamp */
253
                long lat;       /* estimate of itc adjustment latency */
254
        } t[NUM_ROUNDS];
255
#endif
256
 
257
        go[MASTER] = 1;
258
 
259
        while (go[MASTER])
260
                membar("#LoadLoad");
261
 
262
        local_irq_save(flags);
263
        {
264
                for (i = 0; i < NUM_ROUNDS; i++) {
265
                        delta = get_delta(&rt, &master_time_stamp);
266
                        if (delta == 0) {
267
                                done = 1;       /* let's lock on to this... */
268
                                bound = rt;
269
                        }
270
 
271
                        if (!done) {
272
                                if (i > 0) {
273
                                        adjust_latency += -delta;
274
                                        adj = -delta + adjust_latency/4;
275
                                } else
276
                                        adj = -delta;
277
 
278
                                tick_ops->add_tick(adj, current_tick_offset);
279
                        }
280
#if DEBUG_TICK_SYNC
281
                        t[i].rt = rt;
282
                        t[i].master = master_time_stamp;
283
                        t[i].diff = delta;
284
                        t[i].lat = adjust_latency/4;
285
#endif
286
                }
287
        }
288
        local_irq_restore(flags);
289
 
290
#if DEBUG_TICK_SYNC
291
        for (i = 0; i < NUM_ROUNDS; i++)
292
                printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
293
                       t[i].rt, t[i].master, t[i].diff, t[i].lat);
294
#endif
295
 
296
        printk(KERN_INFO "CPU %d: synchronized TICK with master CPU (last diff %ld cycles,"
297
               "maxerr %lu cycles)\n", smp_processor_id(), delta, rt);
298
}
299
 
300
static void smp_start_sync_tick_client(int cpu);
301
 
302
static void smp_synchronize_one_tick(int cpu)
303
{
304
        unsigned long flags, i;
305
 
306
        go[MASTER] = 0;
307
 
308
        smp_start_sync_tick_client(cpu);
309
 
310
        /* wait for client to be ready */
311
        while (!go[MASTER])
312
                membar("#LoadLoad");
313
 
314
        /* now let the client proceed into his loop */
315
        go[MASTER] = 0;
316
        membar("#StoreLoad");
317
 
318
        spin_lock_irqsave(&itc_sync_lock, flags);
319
        {
320
                for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
321
                        while (!go[MASTER])
322
                                membar("#LoadLoad");
323
                        go[MASTER] = 0;
324
                        membar("#StoreStore");
325
                        go[SLAVE] = tick_ops->get_tick();
326
                        membar("#StoreLoad");
327
                }
328
        }
329
        spin_unlock_irqrestore(&itc_sync_lock, flags);
330
}
331
 
332
static void smp_synchronize_tick(void)
333
{
334
        int cpu = smp_processor_id();
335
        int i;
336
 
337
        for (i = 0; i < NR_CPUS; i++) {
338
                if (cpu_present_map & (1UL << i)) {
339
                        if (i == cpu)
340
                                continue;
341
                        smp_synchronize_one_tick(i);
342
                }
343
        }
344
}
345
 
346
extern struct prom_cpuinfo linux_cpus[64];
347
 
348
extern unsigned long sparc64_cpu_startup;
349
 
350
/* The OBP cpu startup callback truncates the 3rd arg cookie to
351
 * 32-bits (I think) so to be safe we have it read the pointer
352
 * contained here so we work on >4GB machines. -DaveM
353
 */
354
static struct task_struct *cpu_new_task = NULL;
355
 
356
void __init smp_boot_cpus(void)
357
{
358
        int cpucount = 0, i;
359
 
360
        printk("Entering UltraSMPenguin Mode...\n");
361
        __sti();
362
        smp_store_cpu_info(boot_cpu_id);
363
        init_idle();
364
 
365
        if (linux_num_cpus == 1)
366
                return;
367
 
368
        for (i = 0; i < NR_CPUS; i++) {
369
                if (i == boot_cpu_id)
370
                        continue;
371
 
372
                if ((cpucount + 1) == max_cpus)
373
                        goto ignorecpu;
374
                if (cpu_present_map & (1UL << i)) {
375
                        unsigned long entry = (unsigned long)(&sparc64_cpu_startup);
376
                        unsigned long cookie = (unsigned long)(&cpu_new_task);
377
                        struct task_struct *p;
378
                        int timeout;
379
                        int no;
380
 
381
                        prom_printf("Starting CPU %d... ", i);
382
                        kernel_thread(start_secondary, NULL, CLONE_PID);
383
                        cpucount++;
384
 
385
                        p = init_task.prev_task;
386
                        init_tasks[cpucount] = p;
387
 
388
                        p->processor = i;
389
                        p->cpus_runnable = 1UL << i; /* we schedule the first task manually */
390
 
391
                        del_from_runqueue(p);
392
                        unhash_process(p);
393
 
394
                        callin_flag = 0;
395
                        for (no = 0; no < linux_num_cpus; no++)
396
                                if (linux_cpus[no].mid == i)
397
                                        break;
398
                        cpu_new_task = p;
399
                        prom_startcpu(linux_cpus[no].prom_node,
400
                                      entry, cookie);
401
                        for (timeout = 0; timeout < 5000000; timeout++) {
402
                                if (callin_flag)
403
                                        break;
404
                                udelay(100);
405
                        }
406
                        if (callin_flag) {
407
                                __cpu_number_map[i] = cpucount;
408
                                __cpu_logical_map[cpucount] = i;
409
                                prom_cpu_nodes[i] = linux_cpus[no].prom_node;
410
                                prom_printf("OK\n");
411
                        } else {
412
                                cpucount--;
413
                                printk("Processor %d is stuck.\n", i);
414
                                prom_printf("FAILED\n");
415
                        }
416
                }
417
                if (!callin_flag) {
418
ignorecpu:
419
                        cpu_present_map &= ~(1UL << i);
420
                        __cpu_number_map[i] = -1;
421
                }
422
        }
423
        cpu_new_task = NULL;
424
        if (cpucount == 0) {
425
                if (max_cpus != 1)
426
                        printk("Error: only one processor found.\n");
427
                cpu_present_map = (1UL << smp_processor_id());
428
        } else {
429
                unsigned long bogosum = 0;
430
 
431
                for (i = 0; i < NR_CPUS; i++) {
432
                        if (cpu_present_map & (1UL << i))
433
                                bogosum += cpu_data[i].udelay_val;
434
                }
435
                printk("Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
436
                       cpucount + 1,
437
                       bogosum/(500000/HZ),
438
                       (bogosum/(5000/HZ))%100);
439
                smp_activated = 1;
440
                smp_num_cpus = cpucount + 1;
441
        }
442
        smp_processors_ready = 1;
443
        membar("#StoreStore | #StoreLoad");
444
 
445
        smp_synchronize_tick();
446
}
447
 
448
static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu)
449
{
450
        u64 result, target;
451
        int stuck, tmp;
452
 
453
        if (this_is_starfire) {
454
                /* map to real upaid */
455
                cpu = (((cpu & 0x3c) << 1) |
456
                        ((cpu & 0x40) >> 4) |
457
                        (cpu & 0x3));
458
        }
459
 
460
        target = (cpu << 14) | 0x70;
461
again:
462
        /* Ok, this is the real Spitfire Errata #54.
463
         * One must read back from a UDB internal register
464
         * after writes to the UDB interrupt dispatch, but
465
         * before the membar Sync for that write.
466
         * So we use the high UDB control register (ASI 0x7f,
467
         * ADDR 0x20) for the dummy read. -DaveM
468
         */
469
        tmp = 0x40;
470
        __asm__ __volatile__(
471
        "wrpr   %1, %2, %%pstate\n\t"
472
        "stxa   %4, [%0] %3\n\t"
473
        "stxa   %5, [%0+%8] %3\n\t"
474
        "add    %0, %8, %0\n\t"
475
        "stxa   %6, [%0+%8] %3\n\t"
476
        "membar #Sync\n\t"
477
        "stxa   %%g0, [%7] %3\n\t"
478
        "membar #Sync\n\t"
479
        "mov    0x20, %%g1\n\t"
480
        "ldxa   [%%g1] 0x7f, %%g0\n\t"
481
        "membar #Sync"
482
        : "=r" (tmp)
483
        : "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W),
484
          "r" (data0), "r" (data1), "r" (data2), "r" (target), "r" (0x10), "0" (tmp)
485
       : "g1");
486
 
487
        /* NOTE: PSTATE_IE is still clear. */
488
        stuck = 100000;
489
        do {
490
                __asm__ __volatile__("ldxa [%%g0] %1, %0"
491
                        : "=r" (result)
492
                        : "i" (ASI_INTR_DISPATCH_STAT));
493
                if (result == 0) {
494
                        __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
495
                                             : : "r" (pstate));
496
                        return;
497
                }
498
                stuck -= 1;
499
                if (stuck == 0)
500
                        break;
501
        } while (result & 0x1);
502
        __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
503
                             : : "r" (pstate));
504
        if (stuck == 0) {
505
                printk("CPU[%d]: mondo stuckage result[%016lx]\n",
506
                       smp_processor_id(), result);
507
        } else {
508
                udelay(2);
509
                goto again;
510
        }
511
}
512
 
513
static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, unsigned long mask)
514
{
515
        int ncpus = smp_num_cpus - 1;
516
        int i;
517
        u64 pstate;
518
 
519
        __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
520
        for (i = 0; (i < NR_CPUS) && ncpus; i++) {
521
                if (mask & (1UL << i)) {
522
                        spitfire_xcall_helper(data0, data1, data2, pstate, i);
523
                        ncpus--;
524
                }
525
        }
526
}
527
 
528
/* Cheetah now allows to send the whole 64-bytes of data in the interrupt
529
 * packet, but we have no use for that.  However we do take advantage of
530
 * the new pipelining feature (ie. dispatch to multiple cpus simultaneously).
531
 */
532
#if NR_CPUS > 32
533
#error Fixup cheetah_xcall_deliver Dave...
534
#endif
535
static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, unsigned long mask)
536
{
537
        u64 pstate, ver;
538
        int nack_busy_id, is_jalapeno;
539
 
540
        if (!mask)
541
                return;
542
 
543
        /* Unfortunately, someone at Sun had the brilliant idea to make the
544
         * busy/nack fields hard-coded by ITID number for this Ultra-III
545
         * derivative processor.
546
         */
547
        __asm__ ("rdpr %%ver, %0" : "=r" (ver));
548
        is_jalapeno = ((ver >> 32) == 0x003e0016);
549
 
550
        __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
551
 
552
retry:
553
        __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
554
                             : : "r" (pstate), "i" (PSTATE_IE));
555
 
556
        /* Setup the dispatch data registers. */
557
        __asm__ __volatile__("stxa      %0, [%3] %6\n\t"
558
                             "stxa      %1, [%4] %6\n\t"
559
                             "stxa      %2, [%5] %6\n\t"
560
                             "membar    #Sync\n\t"
561
                             : /* no outputs */
562
                             : "r" (data0), "r" (data1), "r" (data2),
563
                               "r" (0x40), "r" (0x50), "r" (0x60),
564
                               "i" (ASI_INTR_W));
565
 
566
        nack_busy_id = 0;
567
        {
568
                int i, ncpus = smp_num_cpus - 1;
569
 
570
                for (i = 0; (i < NR_CPUS) && ncpus; i++) {
571
                        if (mask & (1UL << i)) {
572
                                u64 target = (i << 14) | 0x70;
573
 
574
                                if (!is_jalapeno)
575
                                        target |= (nack_busy_id << 24);
576
                                __asm__ __volatile__("stxa      %%g0, [%0] %1\n\t"
577
                                                     "membar    #Sync\n\t"
578
                                                     : /* no outputs */
579
                                                     : "r" (target), "i" (ASI_INTR_W));
580
                                nack_busy_id++;
581
                                ncpus--;
582
                        }
583
                }
584
        }
585
 
586
        /* Now, poll for completion. */
587
        {
588
                u64 dispatch_stat;
589
                long stuck;
590
 
591
                stuck = 100000 * nack_busy_id;
592
                do {
593
                        __asm__ __volatile__("ldxa      [%%g0] %1, %0"
594
                                             : "=r" (dispatch_stat)
595
                                             : "i" (ASI_INTR_DISPATCH_STAT));
596
                        if (dispatch_stat == 0UL) {
597
                                __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
598
                                                     : : "r" (pstate));
599
                                return;
600
                        }
601
                        if (!--stuck)
602
                                break;
603
                } while (dispatch_stat & 0x5555555555555555UL);
604
 
605
                __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
606
                                     : : "r" (pstate));
607
 
608
                if ((dispatch_stat & ~(0x5555555555555555UL)) == 0) {
609
                        /* Busy bits will not clear, continue instead
610
                         * of freezing up on this cpu.
611
                         */
612
                        printk("CPU[%d]: mondo stuckage result[%016lx]\n",
613
                               smp_processor_id(), dispatch_stat);
614
                } else {
615
                        int i, this_busy_nack = 0;
616
 
617
                        /* Delay some random time with interrupts enabled
618
                         * to prevent deadlock.
619
                         */
620
                        udelay(2 * nack_busy_id);
621
 
622
                        /* Clear out the mask bits for cpus which did not
623
                         * NACK us.
624
                         */
625
                        for (i = 0; i < NR_CPUS; i++) {
626
                                if (mask & (1UL << i)) {
627
                                        u64 check_mask;
628
 
629
                                        if (is_jalapeno)
630
                                                check_mask = (0x2UL << (2*i));
631
                                        else
632
                                                check_mask = (0x2UL <<
633
                                                              this_busy_nack);
634
                                        if ((dispatch_stat & check_mask) == 0)
635
                                                mask &= ~(1UL << i);
636
                                        this_busy_nack += 2;
637
                                }
638
                        }
639
 
640
                        goto retry;
641
                }
642
        }
643
}
644
 
645
/* Send cross call to all processors mentioned in MASK
646
 * except self.
647
 */
648
static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, unsigned long mask)
649
{
650
        if (smp_processors_ready) {
651
                u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));
652
 
653
                mask &= ~(1UL<<smp_processor_id());
654
 
655
                if (tlb_type == spitfire)
656
                        spitfire_xcall_deliver(data0, data1, data2, mask);
657
                else
658
                        cheetah_xcall_deliver(data0, data1, data2, mask);
659
 
660
                /* NOTE: Caller runs local copy on master. */
661
        }
662
}
663
 
664
extern unsigned long xcall_sync_tick;
665
 
666
static void smp_start_sync_tick_client(int cpu)
667
{
668
        smp_cross_call_masked(&xcall_sync_tick,
669
                              0, 0, 0,
670
                              (1UL << cpu));
671
}
672
 
673
/* Send cross call to all processors except self. */
674
#define smp_cross_call(func, ctx, data1, data2) \
675
        smp_cross_call_masked(func, ctx, data1, data2, cpu_present_map)
676
 
677
struct call_data_struct {
678
        void (*func) (void *info);
679
        void *info;
680
        atomic_t finished;
681
        int wait;
682
};
683
 
684
static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
685
static struct call_data_struct *call_data;
686
 
687
extern unsigned long xcall_call_function;
688
 
689
int smp_call_function(void (*func)(void *info), void *info,
690
                      int nonatomic, int wait)
691
{
692
        struct call_data_struct data;
693
        int cpus = smp_num_cpus - 1;
694
        long timeout;
695
 
696
        if (!cpus)
697
                return 0;
698
 
699
        data.func = func;
700
        data.info = info;
701
        atomic_set(&data.finished, 0);
702
        data.wait = wait;
703
 
704
        spin_lock_bh(&call_lock);
705
 
706
        call_data = &data;
707
 
708
        smp_cross_call(&xcall_call_function, 0, 0, 0);
709
 
710
        /*
711
         * Wait for other cpus to complete function or at
712
         * least snap the call data.
713
         */
714
        timeout = 1000000;
715
        while (atomic_read(&data.finished) != cpus) {
716
                if (--timeout <= 0)
717
                        goto out_timeout;
718
                barrier();
719
                udelay(1);
720
        }
721
 
722
        spin_unlock_bh(&call_lock);
723
 
724
        return 0;
725
 
726
out_timeout:
727
        spin_unlock_bh(&call_lock);
728
        printk("XCALL: Remote cpus not responding, ncpus=%d finished=%d\n",
729
               smp_num_cpus - 1, atomic_read(&data.finished));
730
        return 0;
731
}
732
 
733
void smp_call_function_client(int irq, struct pt_regs *regs)
734
{
735
        void (*func) (void *info) = call_data->func;
736
        void *info = call_data->info;
737
 
738
        clear_softint(1 << irq);
739
        if (call_data->wait) {
740
                /* let initiator proceed only after completion */
741
                func(info);
742
                atomic_inc(&call_data->finished);
743
        } else {
744
                /* let initiator proceed after getting data */
745
                atomic_inc(&call_data->finished);
746
                func(info);
747
        }
748
}
749
 
750
extern unsigned long xcall_flush_tlb_page;
751
extern unsigned long xcall_flush_tlb_mm;
752
extern unsigned long xcall_flush_tlb_range;
753
extern unsigned long xcall_flush_tlb_all_spitfire;
754
extern unsigned long xcall_flush_tlb_all_cheetah;
755
extern unsigned long xcall_flush_cache_all_spitfire;
756
extern unsigned long xcall_report_regs;
757
extern unsigned long xcall_receive_signal;
758
extern unsigned long xcall_flush_dcache_page_cheetah;
759
extern unsigned long xcall_flush_dcache_page_spitfire;
760
 
761
#ifdef CONFIG_DEBUG_DCFLUSH
762
extern atomic_t dcpage_flushes;
763
extern atomic_t dcpage_flushes_xcall;
764
#endif
765
 
766
static __inline__ void __local_flush_dcache_page(struct page *page)
767
{
768
#if (L1DCACHE_SIZE > PAGE_SIZE)
769
        __flush_dcache_page(page->virtual,
770
                            ((tlb_type == spitfire) &&
771
                             page->mapping != NULL));
772
#else
773
        if (page->mapping != NULL &&
774
            tlb_type == spitfire)
775
                __flush_icache_page(__pa(page->virtual));
776
#endif
777
}
778
 
779
void smp_flush_dcache_page_impl(struct page *page, int cpu)
780
{
781
        if (smp_processors_ready) {
782
                unsigned long mask = 1UL << cpu;
783
 
784
#ifdef CONFIG_DEBUG_DCFLUSH
785
                atomic_inc(&dcpage_flushes);
786
#endif
787
                if (cpu == smp_processor_id()) {
788
                        __local_flush_dcache_page(page);
789
                } else if ((cpu_present_map & mask) != 0) {
790
                        u64 data0;
791
 
792
                        if (tlb_type == spitfire) {
793
                                data0 = ((u64)&xcall_flush_dcache_page_spitfire);
794
                                if (page->mapping != NULL)
795
                                        data0 |= ((u64)1 << 32);
796
                                spitfire_xcall_deliver(data0,
797
                                                       __pa(page->virtual),
798
                                                       (u64) page->virtual,
799
                                                       mask);
800
                        } else {
801
                                data0 = ((u64)&xcall_flush_dcache_page_cheetah);
802
                                cheetah_xcall_deliver(data0,
803
                                                      __pa(page->virtual),
804
                                                      0, mask);
805
                        }
806
#ifdef CONFIG_DEBUG_DCFLUSH
807
                        atomic_inc(&dcpage_flushes_xcall);
808
#endif
809
                }
810
        }
811
}
812
 
813
void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
814
{
815
        if (smp_processors_ready) {
816
                unsigned long mask = cpu_present_map & ~(1UL << smp_processor_id());
817
                u64 data0;
818
 
819
#ifdef CONFIG_DEBUG_DCFLUSH
820
                atomic_inc(&dcpage_flushes);
821
#endif
822
                if (mask == 0UL)
823
                        goto flush_self;
824
                if (tlb_type == spitfire) {
825
                        data0 = ((u64)&xcall_flush_dcache_page_spitfire);
826
                        if (page->mapping != NULL)
827
                                data0 |= ((u64)1 << 32);
828
                        spitfire_xcall_deliver(data0,
829
                                               __pa(page->virtual),
830
                                               (u64) page->virtual,
831
                                               mask);
832
                } else {
833
                        data0 = ((u64)&xcall_flush_dcache_page_cheetah);
834
                        cheetah_xcall_deliver(data0,
835
                                              __pa(page->virtual),
836
                                              0, mask);
837
                }
838
#ifdef CONFIG_DEBUG_DCFLUSH
839
                atomic_inc(&dcpage_flushes_xcall);
840
#endif
841
        flush_self:
842
                __local_flush_dcache_page(page);
843
        }
844
}
845
 
846
void smp_receive_signal(int cpu)
847
{
848
        if (smp_processors_ready) {
849
                unsigned long mask = 1UL << cpu;
850
 
851
                if ((cpu_present_map & mask) != 0) {
852
                        u64 data0 = (((u64)&xcall_receive_signal) & 0xffffffff);
853
 
854
                        if (tlb_type == spitfire)
855
                                spitfire_xcall_deliver(data0, 0, 0, mask);
856
                        else
857
                                cheetah_xcall_deliver(data0, 0, 0, mask);
858
                }
859
        }
860
}
861
 
862
void smp_receive_signal_client(int irq, struct pt_regs *regs)
863
{
864
        /* Just return, rtrap takes care of the rest. */
865
        clear_softint(1 << irq);
866
}
867
 
868
void smp_report_regs(void)
869
{
870
        smp_cross_call(&xcall_report_regs, 0, 0, 0);
871
}
872
 
873
void smp_flush_cache_all(void)
874
{
875
        /* Cheetah need do nothing. */
876
        if (tlb_type == spitfire) {
877
                smp_cross_call(&xcall_flush_cache_all_spitfire, 0, 0, 0);
878
                __flush_cache_all();
879
        }
880
}
881
 
882
void smp_flush_tlb_all(void)
883
{
884
        if (tlb_type == spitfire)
885
                smp_cross_call(&xcall_flush_tlb_all_spitfire, 0, 0, 0);
886
        else
887
                smp_cross_call(&xcall_flush_tlb_all_cheetah, 0, 0, 0);
888
        __flush_tlb_all();
889
}
890
 
891
/* We know that the window frames of the user have been flushed
892
 * to the stack before we get here because all callers of us
893
 * are flush_tlb_*() routines, and these run after flush_cache_*()
894
 * which performs the flushw.
895
 *
896
 * The SMP TLB coherency scheme we use works as follows:
897
 *
898
 * 1) mm->cpu_vm_mask is a bit mask of which cpus an address
899
 *    space has (potentially) executed on, this is the heuristic
900
 *    we use to avoid doing cross calls.
901
 *
902
 *    Also, for flushing from kswapd and also for clones, we
903
 *    use cpu_vm_mask as the list of cpus to make run the TLB.
904
 *
905
 * 2) TLB context numbers are shared globally across all processors
906
 *    in the system, this allows us to play several games to avoid
907
 *    cross calls.
908
 *
909
 *    One invariant is that when a cpu switches to a process, and
910
 *    that processes tsk->active_mm->cpu_vm_mask does not have the
911
 *    current cpu's bit set, that tlb context is flushed locally.
912
 *
913
 *    If the address space is non-shared (ie. mm->count == 1) we avoid
914
 *    cross calls when we want to flush the currently running process's
915
 *    tlb state.  This is done by clearing all cpu bits except the current
916
 *    processor's in current->active_mm->cpu_vm_mask and performing the
917
 *    flush locally only.  This will force any subsequent cpus which run
918
 *    this task to flush the context from the local tlb if the process
919
 *    migrates to another cpu (again).
920
 *
921
 * 3) For shared address spaces (threads) and swapping we bite the
922
 *    bullet for most cases and perform the cross call (but only to
923
 *    the cpus listed in cpu_vm_mask).
924
 *
925
 *    The performance gain from "optimizing" away the cross call for threads is
926
 *    questionable (in theory the big win for threads is the massive sharing of
927
 *    address space state across processors).
928
 */
929
void smp_flush_tlb_mm(struct mm_struct *mm)
930
{
931
        /*
932
         * This code is called from two places, dup_mmap and exit_mmap. In the
933
         * former case, we really need a flush. In the later case, the callers
934
         * are single threaded exec_mmap (really need a flush), multithreaded
935
         * exec_mmap case (do not need to flush, since the caller gets a new
936
         * context via activate_mm), and all other callers of mmput() whence
937
         * the flush can be optimized since the associated threads are dead and
938
         * the mm is being torn down (__exit_mm and other mmput callers) or the
939
         * owning thread is dissociating itself from the mm. The
940
         * (atomic_read(&mm->mm_users) == 0) check ensures real work is done
941
         * for single thread exec and dup_mmap cases. An alternate check might
942
         * have been (current->mm != mm).
943
         *                                              Kanoj Sarcar
944
         */
945
        if (atomic_read(&mm->mm_users) == 0)
946
                return;
947
 
948
        {
949
                u32 ctx = CTX_HWBITS(mm->context);
950
                int cpu = smp_processor_id();
951
 
952
                if (atomic_read(&mm->mm_users) == 1) {
953
                        /* See smp_flush_tlb_page for info about this. */
954
                        mm->cpu_vm_mask = (1UL << cpu);
955
                        goto local_flush_and_out;
956
                }
957
 
958
                smp_cross_call_masked(&xcall_flush_tlb_mm,
959
                                      ctx, 0, 0,
960
                                      mm->cpu_vm_mask);
961
 
962
        local_flush_and_out:
963
                __flush_tlb_mm(ctx, SECONDARY_CONTEXT);
964
        }
965
}
966
 
967
void smp_flush_tlb_range(struct mm_struct *mm, unsigned long start,
968
                         unsigned long end)
969
{
970
        {
971
                u32 ctx = CTX_HWBITS(mm->context);
972
                int cpu = smp_processor_id();
973
 
974
                start &= PAGE_MASK;
975
                end    = PAGE_ALIGN(end);
976
 
977
                if (mm == current->active_mm && atomic_read(&mm->mm_users) == 1) {
978
                        mm->cpu_vm_mask = (1UL << cpu);
979
                        goto local_flush_and_out;
980
                }
981
 
982
                smp_cross_call_masked(&xcall_flush_tlb_range,
983
                                      ctx, start, end,
984
                                      mm->cpu_vm_mask);
985
 
986
        local_flush_and_out:
987
                __flush_tlb_range(ctx, start, SECONDARY_CONTEXT, end, PAGE_SIZE, (end-start));
988
        }
989
}
990
 
991
void smp_flush_tlb_page(struct mm_struct *mm, unsigned long page)
992
{
993
        {
994
                u32 ctx = CTX_HWBITS(mm->context);
995
                int cpu = smp_processor_id();
996
 
997
                page &= PAGE_MASK;
998
                if (mm == current->active_mm && atomic_read(&mm->mm_users) == 1) {
999
                        /* By virtue of being the current address space, and
1000
                         * having the only reference to it, the following operation
1001
                         * is safe.
1002
                         *
1003
                         * It would not be a win to perform the xcall tlb flush in
1004
                         * this case, because even if we switch back to one of the
1005
                         * other processors in cpu_vm_mask it is almost certain that
1006
                         * all TLB entries for this context will be replaced by the
1007
                         * time that happens.
1008
                         */
1009
                        mm->cpu_vm_mask = (1UL << cpu);
1010
                        goto local_flush_and_out;
1011
                } else {
1012
                        /* By virtue of running under the mm->page_table_lock,
1013
                         * and mmu_context.h:switch_mm doing the same, the following
1014
                         * operation is safe.
1015
                         */
1016
                        if (mm->cpu_vm_mask == (1UL << cpu))
1017
                                goto local_flush_and_out;
1018
                }
1019
 
1020
                /* OK, we have to actually perform the cross call.  Most likely
1021
                 * this is a cloned mm or kswapd is kicking out pages for a task
1022
                 * which has run recently on another cpu.
1023
                 */
1024
                smp_cross_call_masked(&xcall_flush_tlb_page,
1025
                                      ctx, page, 0,
1026
                                      mm->cpu_vm_mask);
1027
                if (!(mm->cpu_vm_mask & (1UL << cpu)))
1028
                        return;
1029
 
1030
        local_flush_and_out:
1031
                __flush_tlb_page(ctx, page, SECONDARY_CONTEXT);
1032
        }
1033
}
1034
 
1035
/* CPU capture. */
1036
/* #define CAPTURE_DEBUG */
1037
extern unsigned long xcall_capture;
1038
 
1039
static atomic_t smp_capture_depth = ATOMIC_INIT(0);
1040
static atomic_t smp_capture_registry = ATOMIC_INIT(0);
1041
static unsigned long penguins_are_doing_time;
1042
 
1043
void smp_capture(void)
1044
{
1045
        if (smp_processors_ready) {
1046
                int result = __atomic_add(1, &smp_capture_depth);
1047
 
1048
                membar("#StoreStore | #LoadStore");
1049
                if (result == 1) {
1050
                        int ncpus = smp_num_cpus;
1051
 
1052
#ifdef CAPTURE_DEBUG
1053
                        printk("CPU[%d]: Sending penguins to jail...",
1054
                               smp_processor_id());
1055
#endif
1056
                        penguins_are_doing_time = 1;
1057
                        membar("#StoreStore | #LoadStore");
1058
                        atomic_inc(&smp_capture_registry);
1059
                        smp_cross_call(&xcall_capture, 0, 0, 0);
1060
                        while (atomic_read(&smp_capture_registry) != ncpus)
1061
                                membar("#LoadLoad");
1062
#ifdef CAPTURE_DEBUG
1063
                        printk("done\n");
1064
#endif
1065
                }
1066
        }
1067
}
1068
 
1069
void smp_release(void)
1070
{
1071
        if (smp_processors_ready) {
1072
                if (atomic_dec_and_test(&smp_capture_depth)) {
1073
#ifdef CAPTURE_DEBUG
1074
                        printk("CPU[%d]: Giving pardon to imprisoned penguins\n",
1075
                               smp_processor_id());
1076
#endif
1077
                        penguins_are_doing_time = 0;
1078
                        membar("#StoreStore | #StoreLoad");
1079
                        atomic_dec(&smp_capture_registry);
1080
                }
1081
        }
1082
}
1083
 
1084
/* Imprisoned penguins run with %pil == 15, but PSTATE_IE set, so they
1085
 * can service tlb flush xcalls...
1086
 */
1087
extern void prom_world(int);
1088
extern void save_alternate_globals(unsigned long *);
1089
extern void restore_alternate_globals(unsigned long *);
1090
void smp_penguin_jailcell(int irq, struct pt_regs *regs)
1091
{
1092
        unsigned long global_save[24];
1093
 
1094
        clear_softint(1 << irq);
1095
 
1096
        __asm__ __volatile__("flushw");
1097
        save_alternate_globals(global_save);
1098
        prom_world(1);
1099
        atomic_inc(&smp_capture_registry);
1100
        membar("#StoreLoad | #StoreStore");
1101
        while (penguins_are_doing_time)
1102
                membar("#LoadLoad");
1103
        restore_alternate_globals(global_save);
1104
        atomic_dec(&smp_capture_registry);
1105
        prom_world(0);
1106
}
1107
 
1108
extern unsigned long xcall_promstop;
1109
 
1110
void smp_promstop_others(void)
1111
{
1112
        if (smp_processors_ready)
1113
                smp_cross_call(&xcall_promstop, 0, 0, 0);
1114
}
1115
 
1116
extern void sparc64_do_profile(unsigned long pc, unsigned long o7);
1117
 
1118
#define prof_multiplier(__cpu)          cpu_data[(__cpu)].multiplier
1119
#define prof_counter(__cpu)             cpu_data[(__cpu)].counter
1120
 
1121
void smp_percpu_timer_interrupt(struct pt_regs *regs)
1122
{
1123
        unsigned long compare, tick, pstate;
1124
        int cpu = smp_processor_id();
1125
        int user = user_mode(regs);
1126
 
1127
        /*
1128
         * Check for level 14 softint.
1129
         */
1130
        {
1131
                unsigned long tick_mask = tick_ops->softint_mask;
1132
 
1133
                if (!(get_softint() & tick_mask)) {
1134
                        extern void handler_irq(int, struct pt_regs *);
1135
 
1136
                        handler_irq(14, regs);
1137
                        return;
1138
                }
1139
                clear_softint(tick_mask);
1140
        }
1141
 
1142
        do {
1143
                if (!user)
1144
                        sparc64_do_profile(regs->tpc, regs->u_regs[UREG_RETPC]);
1145
                if (!--prof_counter(cpu)) {
1146
                        irq_enter(cpu, 0);
1147
 
1148
                        if (cpu == boot_cpu_id) {
1149
                                kstat.irqs[cpu][0]++;
1150
                                timer_tick_interrupt(regs);
1151
                        }
1152
 
1153
                        update_process_times(user);
1154
 
1155
                        irq_exit(cpu, 0);
1156
 
1157
                        prof_counter(cpu) = prof_multiplier(cpu);
1158
                }
1159
 
1160
                /* Guarentee that the following sequences execute
1161
                 * uninterrupted.
1162
                 */
1163
                __asm__ __volatile__("rdpr      %%pstate, %0\n\t"
1164
                                     "wrpr      %0, %1, %%pstate"
1165
                                     : "=r" (pstate)
1166
                                     : "i" (PSTATE_IE));
1167
 
1168
                compare = tick_ops->add_compare(current_tick_offset);
1169
                tick = tick_ops->get_tick();
1170
 
1171
                /* Restore PSTATE_IE. */
1172
                __asm__ __volatile__("wrpr      %0, 0x0, %%pstate"
1173
                                     : /* no outputs */
1174
                                     : "r" (pstate));
1175
        } while (time_after_eq(tick, compare));
1176
}
1177
 
1178
static void __init smp_setup_percpu_timer(void)
1179
{
1180
        int cpu = smp_processor_id();
1181
        unsigned long pstate;
1182
 
1183
        prof_counter(cpu) = prof_multiplier(cpu) = 1;
1184
 
1185
        /* Guarentee that the following sequences execute
1186
         * uninterrupted.
1187
         */
1188
        __asm__ __volatile__("rdpr      %%pstate, %0\n\t"
1189
                             "wrpr      %0, %1, %%pstate"
1190
                             : "=r" (pstate)
1191
                             : "i" (PSTATE_IE));
1192
 
1193
        tick_ops->init_tick(current_tick_offset);
1194
 
1195
        /* Restore PSTATE_IE. */
1196
        __asm__ __volatile__("wrpr      %0, 0x0, %%pstate"
1197
                             : /* no outputs */
1198
                             : "r" (pstate));
1199
}
1200
 
1201
void __init smp_tick_init(void)
1202
{
1203
        int i;
1204
 
1205
        boot_cpu_id = hard_smp_processor_id();
1206
        current_tick_offset = timer_tick_offset;
1207
        cpu_present_map = 0;
1208
        for (i = 0; i < linux_num_cpus; i++)
1209
                cpu_present_map |= (1UL << linux_cpus[i].mid);
1210
        for (i = 0; i < NR_CPUS; i++) {
1211
                __cpu_number_map[i] = -1;
1212
                __cpu_logical_map[i] = -1;
1213
        }
1214
        __cpu_number_map[boot_cpu_id] = 0;
1215
        prom_cpu_nodes[boot_cpu_id] = linux_cpus[0].prom_node;
1216
        __cpu_logical_map[0] = boot_cpu_id;
1217
        current->processor = boot_cpu_id;
1218
        prof_counter(boot_cpu_id) = prof_multiplier(boot_cpu_id) = 1;
1219
}
1220
 
1221
static inline unsigned long find_flush_base(unsigned long size)
1222
{
1223
        struct page *p = mem_map;
1224
        unsigned long found, base;
1225
 
1226
        size = PAGE_ALIGN(size);
1227
        found = size;
1228
        base = (unsigned long) page_address(p);
1229
        while (found != 0) {
1230
                /* Failure. */
1231
                if (p >= (mem_map + max_mapnr))
1232
                        return 0UL;
1233
                if (PageReserved(p)) {
1234
                        found = size;
1235
                        base = (unsigned long) page_address(p);
1236
                } else {
1237
                        found -= PAGE_SIZE;
1238
                }
1239
                p++;
1240
        }
1241
        return base;
1242
}
1243
 
1244
/* /proc/profile writes can call this, don't __init it please. */
1245
int setup_profiling_timer(unsigned int multiplier)
1246
{
1247
        unsigned long flags;
1248
        int i;
1249
 
1250
        if ((!multiplier) || (timer_tick_offset / multiplier) < 1000)
1251
                return -EINVAL;
1252
 
1253
        save_and_cli(flags);
1254
        for (i = 0; i < NR_CPUS; i++) {
1255
                if (cpu_present_map & (1UL << i))
1256
                        prof_multiplier(i) = multiplier;
1257
        }
1258
        current_tick_offset = (timer_tick_offset / multiplier);
1259
        restore_flags(flags);
1260
 
1261
        return 0;
1262
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.