aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGraf Yang <graf.yang@analog.com>2010-01-20 10:56:24 +0000
committerMike Frysinger <vapier@gentoo.org>2010-03-09 00:30:49 -0500
commit60ffdb36547da2397d6cfefe9c752ebad16524f6 (patch)
treece188bc0ccd1d8d0e6ff0f49937dca95e1f571c0
parent726e96561e4704278bc5197238f6459e1a63aa77 (diff)
Blackfin: implement nmi_watchdog for SMP on BF561
Signed-off-by: Graf Yang <graf.yang@analog.com> Signed-off-by: Mike Frysinger <vapier@gentoo.org>
-rw-r--r--arch/blackfin/Kconfig.debug9
-rw-r--r--arch/blackfin/include/asm/irq.h4
-rw-r--r--arch/blackfin/include/asm/nmi.h12
-rw-r--r--arch/blackfin/include/asm/smp.h1
-rw-r--r--arch/blackfin/kernel/Makefile1
-rw-r--r--arch/blackfin/kernel/nmi.c313
-rw-r--r--arch/blackfin/kernel/time-ts.c4
-rw-r--r--arch/blackfin/mach-common/interrupt.S18
8 files changed, 361 insertions, 1 deletions
diff --git a/arch/blackfin/Kconfig.debug b/arch/blackfin/Kconfig.debug
index 87f195ee2e0..1460d7b5edc 100644
--- a/arch/blackfin/Kconfig.debug
+++ b/arch/blackfin/Kconfig.debug
@@ -238,6 +238,15 @@ config EARLY_PRINTK
all of this lives in the init section and is thrown away after the
kernel boots completely.
+config NMI_WATCHDOG
+ bool "Enable NMI watchdog to help debugging lockup on SMP"
+ default n
+ depends on (SMP && !BFIN_SCRATCH_REG_RETN)
+ help
+ If any CPU in the system does not execute the period local timer
+ interrupt for more than 5 seconds, then the NMI handler dumps debug
+ information. This information can be used to debug the lockup.
+
config CPLB_INFO
bool "Display the CPLB information"
help
diff --git a/arch/blackfin/include/asm/irq.h b/arch/blackfin/include/asm/irq.h
index 89de539ed01..12f4060a31b 100644
--- a/arch/blackfin/include/asm/irq.h
+++ b/arch/blackfin/include/asm/irq.h
@@ -38,4 +38,8 @@
#include <asm-generic/irq.h>
+#ifdef CONFIG_NMI_WATCHDOG
+# define ARCH_HAS_NMI_WATCHDOG
+#endif
+
#endif /* _BFIN_IRQ_H_ */
diff --git a/arch/blackfin/include/asm/nmi.h b/arch/blackfin/include/asm/nmi.h
new file mode 100644
index 00000000000..b9caac4fcfd
--- /dev/null
+++ b/arch/blackfin/include/asm/nmi.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright 2010 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2
+ */
+
+#ifndef _BFIN_NMI_H_
+#define _BFIN_NMI_H_
+
+#include <linux/nmi.h>
+
+#endif
diff --git a/arch/blackfin/include/asm/smp.h b/arch/blackfin/include/asm/smp.h
index 29fb8821947..7f26de09ca9 100644
--- a/arch/blackfin/include/asm/smp.h
+++ b/arch/blackfin/include/asm/smp.h
@@ -22,6 +22,7 @@ extern char coreb_trampoline_start, coreb_trampoline_end;
struct corelock_slot {
int lock;
};
+extern struct corelock_slot corelock;
void smp_icache_flush_range_others(unsigned long start,
unsigned long end);
diff --git a/arch/blackfin/kernel/Makefile b/arch/blackfin/kernel/Makefile
index a8ddbc8ed5a..346a421f156 100644
--- a/arch/blackfin/kernel/Makefile
+++ b/arch/blackfin/kernel/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_CPLB_INFO) += cplbinfo.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_KGDB_TESTS) += kgdb_test.o
+obj-$(CONFIG_NMI_WATCHDOG) += nmi.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_EARLY_PRINTK) += shadow_console.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
diff --git a/arch/blackfin/kernel/nmi.c b/arch/blackfin/kernel/nmi.c
new file mode 100644
index 00000000000..19093c17632
--- /dev/null
+++ b/arch/blackfin/kernel/nmi.c
@@ -0,0 +1,313 @@
+/*
+ * Blackfin nmi_watchdog Driver
+ *
+ * Originally based on bfin_wdt.c
+ * Copyright 2010-2010 Analog Devices Inc.
+ * Graff Yang <graf.yang@analog.com>
+ *
+ * Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/bitops.h>
+#include <linux/hardirq.h>
+#include <linux/sysdev.h>
+#include <linux/pm.h>
+#include <linux/nmi.h>
+#include <linux/smp.h>
+#include <linux/timer.h>
+#include <asm/blackfin.h>
+#include <asm/atomic.h>
+#include <asm/cacheflush.h>
+
+/* Bit in WDOG_CTL that indicates watchdog has expired (WDR0) */
+#define WDOG_EXPIRED 0x8000
+
+/* Masks for WDEV field in WDOG_CTL register */
+#define ICTL_RESET 0x0
+#define ICTL_NMI 0x2
+#define ICTL_GPI 0x4
+#define ICTL_NONE 0x6
+#define ICTL_MASK 0x6
+
+/* Masks for WDEN field in WDOG_CTL register */
+#define WDEN_MASK 0x0FF0
+#define WDEN_ENABLE 0x0000
+#define WDEN_DISABLE 0x0AD0
+
+#define DRV_NAME "nmi-wdt"
+
+#define NMI_WDT_TIMEOUT 5 /* 5 seconds */
+#define NMI_CHECK_TIMEOUT (4 * HZ) /* 4 seconds in jiffies */
+static int nmi_wdt_cpu = 1;
+
+static unsigned int timeout = NMI_WDT_TIMEOUT;
+static int nmi_active;
+
+static unsigned short wdoga_ctl;
+static unsigned int wdoga_cnt;
+static struct corelock_slot saved_corelock;
+static atomic_t nmi_touched[NR_CPUS];
+static struct timer_list ntimer;
+
+enum {
+ COREA_ENTER_NMI = 0,
+ COREA_EXIT_NMI,
+ COREB_EXIT_NMI,
+
+ NMI_EVENT_NR,
+};
+static unsigned long nmi_event __attribute__ ((__section__(".l2.bss")));
+
+/* we are in nmi, non-atomic bit ops is safe */
+static inline void set_nmi_event(int event)
+{
+ __set_bit(event, &nmi_event);
+}
+
+static inline void wait_nmi_event(int event)
+{
+ while (!test_bit(event, &nmi_event))
+ barrier();
+ __clear_bit(event, &nmi_event);
+}
+
+static inline void send_corea_nmi(void)
+{
+ wdoga_ctl = bfin_read_WDOGA_CTL();
+ wdoga_cnt = bfin_read_WDOGA_CNT();
+
+ bfin_write_WDOGA_CTL(WDEN_DISABLE);
+ bfin_write_WDOGA_CNT(0);
+ bfin_write_WDOGA_CTL(WDEN_ENABLE | ICTL_NMI);
+}
+
+static inline void restore_corea_nmi(void)
+{
+ bfin_write_WDOGA_CTL(WDEN_DISABLE);
+ bfin_write_WDOGA_CTL(WDOG_EXPIRED | WDEN_DISABLE | ICTL_NONE);
+
+ bfin_write_WDOGA_CNT(wdoga_cnt);
+ bfin_write_WDOGA_CTL(wdoga_ctl);
+}
+
+static inline void save_corelock(void)
+{
+ saved_corelock = corelock;
+ corelock.lock = 0;
+}
+
+static inline void restore_corelock(void)
+{
+ corelock = saved_corelock;
+}
+
+
+static inline void nmi_wdt_keepalive(void)
+{
+ bfin_write_WDOGB_STAT(0);
+}
+
+static inline void nmi_wdt_stop(void)
+{
+ bfin_write_WDOGB_CTL(WDEN_DISABLE);
+}
+
+/* before calling this function, you must stop the WDT */
+static inline void nmi_wdt_clear(void)
+{
+ /* clear TRO bit, disable event generation */
+ bfin_write_WDOGB_CTL(WDOG_EXPIRED | WDEN_DISABLE | ICTL_NONE);
+}
+
+static inline void nmi_wdt_start(void)
+{
+ bfin_write_WDOGB_CTL(WDEN_ENABLE | ICTL_NMI);
+}
+
+static inline int nmi_wdt_running(void)
+{
+ return ((bfin_read_WDOGB_CTL() & WDEN_MASK) != WDEN_DISABLE);
+}
+
+static inline int nmi_wdt_set_timeout(unsigned long t)
+{
+ u32 cnt, max_t, sclk;
+ int run;
+
+ sclk = get_sclk();
+ max_t = -1 / sclk;
+ cnt = t * sclk;
+ if (t > max_t) {
+ pr_warning("NMI: timeout value is too large\n");
+ return -EINVAL;
+ }
+
+ run = nmi_wdt_running();
+ nmi_wdt_stop();
+ bfin_write_WDOGB_CNT(cnt);
+ if (run)
+ nmi_wdt_start();
+
+ timeout = t;
+
+ return 0;
+}
+
+int check_nmi_wdt_touched(void)
+{
+ unsigned int this_cpu = smp_processor_id();
+ unsigned int cpu;
+
+ cpumask_t mask = cpu_online_map;
+
+ if (!atomic_read(&nmi_touched[this_cpu]))
+ return 0;
+
+ atomic_set(&nmi_touched[this_cpu], 0);
+
+ cpu_clear(this_cpu, mask);
+ for_each_cpu_mask(cpu, mask) {
+ invalidate_dcache_range((unsigned long)(&nmi_touched[cpu]),
+ (unsigned long)(&nmi_touched[cpu]));
+ if (!atomic_read(&nmi_touched[cpu]))
+ return 0;
+ atomic_set(&nmi_touched[cpu], 0);
+ }
+
+ return 1;
+}
+
+static void nmi_wdt_timer(unsigned long data)
+{
+ if (check_nmi_wdt_touched())
+ nmi_wdt_keepalive();
+
+ mod_timer(&ntimer, jiffies + NMI_CHECK_TIMEOUT);
+}
+
+static int __init init_nmi_wdt(void)
+{
+ nmi_wdt_set_timeout(timeout);
+ nmi_wdt_start();
+ nmi_active = true;
+
+ init_timer(&ntimer);
+ ntimer.function = nmi_wdt_timer;
+ ntimer.expires = jiffies + NMI_CHECK_TIMEOUT;
+ add_timer(&ntimer);
+
+ pr_info("nmi_wdt: initialized: timeout=%d sec\n", timeout);
+ return 0;
+}
+device_initcall(init_nmi_wdt);
+
+void touch_nmi_watchdog(void)
+{
+ atomic_set(&nmi_touched[smp_processor_id()], 1);
+}
+
+/* Suspend/resume support */
+#ifdef CONFIG_PM
+static int nmi_wdt_suspend(struct sys_device *dev, pm_message_t state)
+{
+ nmi_wdt_stop();
+ return 0;
+}
+
+static int nmi_wdt_resume(struct sys_device *dev)
+{
+ if (nmi_active)
+ nmi_wdt_start();
+ return 0;
+}
+
+static struct sysdev_class nmi_sysclass = {
+ .name = DRV_NAME,
+ .resume = nmi_wdt_resume,
+ .suspend = nmi_wdt_suspend,
+};
+
+static struct sys_device device_nmi_wdt = {
+ .id = 0,
+ .cls = &nmi_sysclass,
+};
+
+static int __init init_nmi_wdt_sysfs(void)
+{
+ int error;
+
+ if (!nmi_active)
+ return 0;
+
+ error = sysdev_class_register(&nmi_sysclass);
+ if (!error)
+ error = sysdev_register(&device_nmi_wdt);
+ return error;
+}
+late_initcall(init_nmi_wdt_sysfs);
+
+#endif /* CONFIG_PM */
+
+
+asmlinkage notrace void do_nmi(struct pt_regs *fp)
+{
+ unsigned int cpu = smp_processor_id();
+ nmi_enter();
+
+ cpu_pda[cpu].__nmi_count += 1;
+
+ if (cpu == nmi_wdt_cpu) {
+ /* CoreB goes here first */
+
+ /* reload the WDOG_STAT */
+ nmi_wdt_keepalive();
+
+ /* clear nmi interrupt for CoreB */
+ nmi_wdt_stop();
+ nmi_wdt_clear();
+
+ /* trigger NMI interrupt of CoreA */
+ send_corea_nmi();
+
+ /* waiting CoreB to enter NMI */
+ wait_nmi_event(COREA_ENTER_NMI);
+
+ /* recover WDOGA's settings */
+ restore_corea_nmi();
+
+ save_corelock();
+
+ /* corelock is save/cleared, CoreA is dummping messages */
+
+ wait_nmi_event(COREA_EXIT_NMI);
+ } else {
+ /* OK, CoreA entered NMI */
+ set_nmi_event(COREA_ENTER_NMI);
+ }
+
+ pr_emerg("\nNMI Watchdog detected LOCKUP, dump for CPU %d\n", cpu);
+ dump_bfin_process(fp);
+ dump_bfin_mem(fp);
+ show_regs(fp);
+ dump_bfin_trace_buffer();
+ show_stack(current, (unsigned long *)fp);
+
+ if (cpu == nmi_wdt_cpu) {
+ pr_emerg("This fault is not recoverable, sorry!\n");
+
+ /* CoreA dump finished, restore the corelock */
+ restore_corelock();
+
+ set_nmi_event(COREB_EXIT_NMI);
+ } else {
+ /* CoreB dump finished, notice the CoreA we are done */
+ set_nmi_event(COREA_EXIT_NMI);
+
+ /* synchronize with CoreA */
+ wait_nmi_event(COREB_EXIT_NMI);
+ }
+
+ nmi_exit();
+}
diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c
index a351f97c87a..41a907596c7 100644
--- a/arch/blackfin/kernel/time-ts.c
+++ b/arch/blackfin/kernel/time-ts.c
@@ -21,6 +21,7 @@
#include <asm/blackfin.h>
#include <asm/time.h>
#include <asm/gptimers.h>
+#include <asm/nmi.h>
/* Accelerators for sched_clock()
* convert from cycles(64bits) => nanoseconds (64bits)
@@ -309,6 +310,9 @@ irqreturn_t bfin_coretmr_interrupt(int irq, void *dev_id)
smp_mb();
evt->event_handler(evt);
+
+ touch_nmi_watchdog();
+
return IRQ_HANDLED;
}
diff --git a/arch/blackfin/mach-common/interrupt.S b/arch/blackfin/mach-common/interrupt.S
index 0a0c088ead8..cee62cf4acd 100644
--- a/arch/blackfin/mach-common/interrupt.S
+++ b/arch/blackfin/mach-common/interrupt.S
@@ -194,12 +194,28 @@ ENTRY(_evt_ivhw)
ENDPROC(_evt_ivhw)
/* Interrupt routine for evt2 (NMI).
- * We don't actually use this, so just return.
* For inner circle type details, please see:
* http://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:nmi
*/
ENTRY(_evt_nmi)
+#ifndef CONFIG_NMI_WATCHDOG
.weak _evt_nmi
+#else
+ /* Not take account of CPLBs, this handler will not return */
+ SAVE_ALL_SYS
+ r0 = sp;
+ r1 = retn;
+ [sp + PT_PC] = r1;
+ trace_buffer_save(p4,r5);
+
+ ANOMALY_283_315_WORKAROUND(p4, r5)
+
+ SP += -12;
+ call _do_nmi;
+ SP += 12;
+1:
+ jump 1b;
+#endif
rtn;
ENDPROC(_evt_nmi)