OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [arch/] [x86_64/] [kernel/] [nmi.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 *  linux/arch/x86_64/nmi.c
3
 *
4
 *  NMI watchdog support on APIC systems
5
 *
6
 *  Started by Ingo Molnar <mingo@redhat.com>
7
 *
8
 *  Fixes:
9
 *  Mikael Pettersson   : AMD K7 support for local APIC NMI watchdog.
10
 *  Mikael Pettersson   : Power Management for local APIC NMI watchdog.
11
 *  Mikael Pettersson   : Pentium 4 support for local APIC NMI watchdog.
12
 */
13
 
14
#include <linux/config.h>
15
#include <linux/mm.h>
16
#include <linux/irq.h>
17
#include <linux/delay.h>
18
#include <linux/bootmem.h>
19
#include <linux/smp_lock.h>
20
#include <linux/interrupt.h>
21
#include <linux/mc146818rtc.h>
22
#include <linux/kernel_stat.h>
23
 
24
#include <asm/smp.h>
25
#include <asm/mtrr.h>
26
#include <asm/mpspec.h>
27
#include <asm/proto.h>
28
#include <asm/kdebug.h>
29
 
30
unsigned int nmi_watchdog = NMI_LOCAL_APIC;
31
static unsigned int nmi_hz = HZ;
32
unsigned int nmi_perfctr_msr;   /* the MSR to reset in NMI handler */
33
int panic_on_timeout;
34
 
35
int nmi_watchdog_disabled;
36
 
37
/* Small problem with theaw events is that they stop ticking
38
   when the CPU is idling. This means you get varying NMI watchdog
39
   frequencies depending on the CPU load.
40
 
41
   I doubt it can be fixed because it's unlikely that the CPU does
42
   performance counters while being in C* states. -AK */
43
 
44
#define K7_EVNTSEL_ENABLE       (1 << 22)
45
#define K7_EVNTSEL_INT          (1 << 20)
46
#define K7_EVNTSEL_OS           (1 << 17)
47
#define K7_EVNTSEL_USR          (1 << 16)
48
#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING    0x76
49
#define K7_NMI_EVENT            K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
50
 
51
#define P6_EVNTSEL0_ENABLE      (1 << 22)
52
#define P6_EVNTSEL_INT          (1 << 20)
53
#define P6_EVNTSEL_OS           (1 << 17)
54
#define P6_EVNTSEL_USR          (1 << 16)
55
#define P6_EVENT_CPU_CLOCKS_NOT_HALTED  0x79
56
#define P6_NMI_EVENT            P6_EVENT_CPU_CLOCKS_NOT_HALTED
57
 
58
#define MSR_P4_MISC_ENABLE      0x1A0
59
#define MSR_P4_MISC_ENABLE_PERF_AVAIL   (1<<7)
60
#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
61
#define MSR_P4_PERFCTR0         0x300
62
#define MSR_P4_CCCR0            0x360
63
#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
64
#define P4_ESCR_OS              (1<<3)
65
#define P4_ESCR_USR             (1<<2)
66
#define P4_CCCR_OVF_PMI         (1<<26)
67
#define P4_CCCR_THRESHOLD(N)    ((N)<<20)
68
#define P4_CCCR_COMPLEMENT      (1<<19)
69
#define P4_CCCR_COMPARE         (1<<18)
70
#define P4_CCCR_REQUIRED        (3<<16)
71
#define P4_CCCR_ESCR_SELECT(N)  ((N)<<13)
72
#define P4_CCCR_ENABLE          (1<<12)
73
/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
74
   CRU_ESCR0 (with any non-null event selector) through a complemented
75
   max threshold. [IA32-Vol3, Section 14.9.9] */
76
#define MSR_P4_IQ_COUNTER0      0x30C
77
#define MSR_P4_IQ_CCCR0         0x36C
78
#define MSR_P4_CRU_ESCR0        0x3B8
79
#define P4_NMI_CRU_ESCR0        (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
80
#define P4_NMI_IQ_CCCR0 \
81
        (P4_CCCR_OVF_PMI|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT|      \
82
         P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
83
 
84
/* Why is there no CPUID flag for this? */
85
static __init int cpu_has_lapic(void)
86
{
87
    switch (boot_cpu_data.x86_vendor) {
88
    case X86_VENDOR_INTEL:
89
    case X86_VENDOR_AMD:
90
            return boot_cpu_data.x86 >= 6;
91
            /* add more cpus here or find a better way to figure this out. */
92
    default:
93
            return 0;
94
    }
95
}
96
 
97
int __init check_nmi_watchdog (void)
98
{
99
        irq_cpustat_t tmp[NR_CPUS];
100
        int j, cpu;
101
 
102
        if (nmi_watchdog == NMI_LOCAL_APIC && !cpu_has_lapic())
103
            return -1;
104
 
105
        printk(KERN_INFO "testing NMI watchdog ... ");
106
 
107
        memcpy(tmp, irq_stat, sizeof(tmp));
108
        sti();
109
        mdelay((10*1000)/nmi_hz); // wait 10 ticks
110
 
111
        for (j = 0; j < smp_num_cpus; j++) {
112
                cpu = cpu_logical_map(j);
113
                if (nmi_count(cpu) - tmp[cpu].__nmi_count <= 5) {
114
                        printk("CPU#%d: NMI appears to be stuck!\n", cpu);
115
                        return -1;
116
                }
117
        }
118
        printk("OK.\n");
119
 
120
        /* now that we know it works we can reduce NMI frequency to
121
           something more reasonable; makes a difference in some configs */
122
        if (nmi_watchdog == NMI_LOCAL_APIC)
123
                nmi_hz = 1;
124
 
125
        return 0;
126
}
127
 
128
static int __init setup_nmi_watchdog(char *str)
129
{
130
        int nmi;
131
 
132
        if (!strncmp(str,"panic",5)) {
133
                panic_on_timeout = 1;
134
                str = strchr(str, ',');
135
                if (!str)
136
                        return 1;
137
                ++str;
138
        }
139
        get_option(&str, &nmi);
140
 
141
        if (nmi >= NMI_INVALID)
142
                return 0;
143
        if (nmi == NMI_NONE)
144
                nmi_watchdog = nmi;
145
        nmi_watchdog = nmi;
146
        return 1;
147
}
148
 
149
__setup("nmi_watchdog=", setup_nmi_watchdog);
150
 
151
#ifdef CONFIG_PM
152
 
153
#include <linux/pm.h>
154
 
155
struct pm_dev *nmi_pmdev;
156
 
157
static void disable_apic_nmi_watchdog(void)
158
{
159
        switch (boot_cpu_data.x86_vendor) {
160
        case X86_VENDOR_AMD:
161
                if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
162
                        return;
163
                wrmsr(MSR_K7_EVNTSEL0, 0, 0);
164
                break;
165
        case X86_VENDOR_INTEL:
166
                switch (boot_cpu_data.x86) {
167
                case 6:
168
                        wrmsr(MSR_P6_EVNTSEL0, 0, 0);
169
                        break;
170
                case 15:
171
                        wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
172
                        wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
173
                        break;
174
                }
175
                break;
176
        }
177
}
178
 
179
static int nmi_pm_callback(struct pm_dev *dev, pm_request_t rqst, void *data)
180
{
181
        switch (rqst) {
182
        case PM_SUSPEND:
183
                disable_apic_nmi_watchdog();
184
                break;
185
        case PM_RESUME:
186
                setup_apic_nmi_watchdog();
187
                break;
188
        }
189
        return 0;
190
}
191
 
192
static void nmi_pm_init(void)
193
{
194
        if (!nmi_pmdev)
195
                nmi_pmdev = apic_pm_register(PM_SYS_DEV, 0, nmi_pm_callback);
196
}
197
 
198
#define __pminit        /*empty*/
199
 
200
#else   /* CONFIG_PM */
201
 
202
static inline void nmi_pm_init(void) { }
203
 
204
#define __pminit        __init
205
 
206
#endif  /* CONFIG_PM */
207
 
208
/*
209
 * Activate the NMI watchdog via the local APIC.
210
 * Original code written by Keith Owens.
211
 */
212
 
213
static void __pminit clear_msr_range(unsigned int base, unsigned int n)
214
{
215
        unsigned int i;
216
 
217
        for(i = 0; i < n; ++i)
218
                wrmsr(base+i, 0, 0);
219
}
220
 
221
static void __pminit setup_k7_watchdog(void)
222
{
223
        unsigned int evntsel;
224
 
225
#if 0
226
        /* No check, so can start with slow frequency */
227
        nmi_hz = 1;
228
#endif  
229
 
230
        nmi_perfctr_msr = MSR_K7_PERFCTR0;
231
 
232
        clear_msr_range(MSR_K7_EVNTSEL0, 4);
233
        clear_msr_range(MSR_K7_PERFCTR0, 4);
234
 
235
        evntsel = K7_EVNTSEL_INT
236
                | K7_EVNTSEL_OS
237
                | K7_EVNTSEL_USR
238
                | K7_NMI_EVENT;
239
 
240
        wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
241
        Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
242
        wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
243
        apic_write(APIC_LVTPC, APIC_DM_NMI);
244
        evntsel |= K7_EVNTSEL_ENABLE;
245
        wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
246
}
247
 
248
#ifndef CONFIG_MK8
249
static void __pminit setup_p6_watchdog(void)
250
{
251
        unsigned int evntsel;
252
 
253
        nmi_perfctr_msr = MSR_P6_PERFCTR0;
254
 
255
        clear_msr_range(MSR_P6_EVNTSEL0, 2);
256
        clear_msr_range(MSR_P6_PERFCTR0, 2);
257
 
258
        evntsel = P6_EVNTSEL_INT
259
                | P6_EVNTSEL_OS
260
                | P6_EVNTSEL_USR
261
                | P6_NMI_EVENT;
262
 
263
        wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
264
        Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
265
        wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
266
        apic_write(APIC_LVTPC, APIC_DM_NMI);
267
        evntsel |= P6_EVNTSEL0_ENABLE;
268
        wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
269
}
270
 
271
static int __pminit setup_p4_watchdog(void)
272
{
273
        unsigned int misc_enable, dummy;
274
 
275
        rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
276
        if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
277
                return 0;
278
 
279
        nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
280
 
281
        if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
282
                clear_msr_range(0x3F1, 2);
283
        /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
284
           docs doesn't fully define it, so leave it alone for now. */
285
        clear_msr_range(0x3A0, 31);
286
        clear_msr_range(0x3C0, 6);
287
        clear_msr_range(0x3C8, 6);
288
        clear_msr_range(0x3E0, 2);
289
        clear_msr_range(MSR_P4_CCCR0, 18);
290
        clear_msr_range(MSR_P4_PERFCTR0, 18);
291
 
292
        wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
293
        wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
294
        Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
295
        wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
296
        apic_write(APIC_LVTPC, APIC_DM_NMI);
297
        wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
298
        return 1;
299
}
300
#endif
301
 
302
void __pminit setup_apic_nmi_watchdog (void)
303
{
304
        switch (boot_cpu_data.x86_vendor) {
305
        case X86_VENDOR_AMD:
306
                if (boot_cpu_data.x86 < 6)
307
                        return;
308
                if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
309
                        return;
310
                setup_k7_watchdog();
311
                break;
312
#ifndef CONFIG_MK8
313
        case X86_VENDOR_INTEL:
314
                switch (boot_cpu_data.x86) {
315
                case 6:
316
                        setup_p6_watchdog();
317
                        break;
318
                case 15:
319
                        if (!setup_p4_watchdog())
320
                                return;
321
                        break;
322
                default:
323
                        return;
324
                }
325
                break;
326
#endif
327
        default:
328
                return;
329
        }
330
        nmi_pm_init();
331
}
332
 
333
static spinlock_t nmi_print_lock = SPIN_LOCK_UNLOCKED;
334
 
335
/*
336
 * the best way to detect whether a CPU has a 'hard lockup' problem
337
 * is to check it's local APIC timer IRQ counts. If they are not
338
 * changing then that CPU has some problem.
339
 *
340
 * as these watchdog NMI IRQs are generated on every CPU, we only
341
 * have to check the current processor.
342
 *
343
 * since NMIs dont listen to _any_ locks, we have to be extremely
344
 * careful not to rely on unsafe variables. The printk might lock
345
 * up though, so we have to break up any console locks first ...
346
 * [when there will be more tty-related locks, break them up
347
 *  here too!]
348
 */
349
 
350
static unsigned int
351
        last_irq_sums [NR_CPUS],
352
        alert_counter [NR_CPUS];
353
 
354
void touch_nmi_watchdog (void)
355
{
356
        int i;
357
 
358
        /*
359
         * Just reset the alert counters, (other CPUs might be
360
         * spinning on locks we hold):
361
         */
362
        for (i = 0; i < smp_num_cpus; i++)
363
                alert_counter[i] = 0;
364
}
365
 
366
void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
367
{
368
 
369
        /*
370
         * Since current-> is always on the stack, and we always switch
371
         * the stack NMI-atomically, it's safe to use smp_processor_id().
372
         */
373
        int sum, cpu = safe_smp_processor_id();
374
 
375
        if (nmi_watchdog_disabled)
376
                return;
377
 
378
        sum = apic_timer_irqs[cpu];
379
 
380
        if (last_irq_sums[cpu] == sum) {
381
                /*
382
                 * Ayiee, looks like this CPU is stuck ...
383
                 * wait a few IRQs (5 seconds) before doing the oops ...
384
                 */
385
                alert_counter[cpu]++;
386
                if (alert_counter[cpu] == 5*nmi_hz) {
387
 
388
 
389
                        if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_BAD) {
390
                                alert_counter[cpu] = 0;
391
                                return;
392
                        }
393
 
394
 
395
                        spin_lock(&nmi_print_lock);
396
                        /*
397
                         * We are in trouble anyway, lets at least try
398
                         * to get a message out.
399
                         */
400
                        bust_spinlocks(1);
401
                        printk("NMI Watchdog detected LOCKUP on CPU%d, eip %16lx, registers:\n", cpu, regs->rip);
402
                        show_registers(regs);
403
                        if (panic_on_timeout)
404
                                panic("NMI lockup");
405
                        printk("console shuts up ...\n");
406
                        console_silent();
407
                        spin_unlock(&nmi_print_lock);
408
                        bust_spinlocks(0);
409
                        do_exit(SIGSEGV);
410
                }
411
        } else {
412
                last_irq_sums[cpu] = sum;
413
                alert_counter[cpu] = 0;
414
        }
415
        if (nmi_perfctr_msr) {
416
#ifndef CONFIG_MK8
417
                if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
418
                        /*
419
                         * P4 quirks:
420
                         * - An overflown perfctr will assert its interrupt
421
                         *   until the OVF flag in its CCCR is cleared.
422
                         * - LVTPC is masked on interrupt and must be
423
                         *   unmasked by the LVTPC handler.
424
                         */
425
                        wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
426
                        apic_write(APIC_LVTPC, APIC_DM_NMI);
427
                }
428
#endif
429
                wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
430
        }
431
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.