aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/alpha/include/asm/thread_info.h2
-rw-r--r--arch/avr32/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/irqflags.h56
-rw-r--r--arch/x86/include/asm/paravirt.h4
-rw-r--r--arch/x86/include/asm/paravirt_types.h1
-rw-r--r--arch/x86/kernel/asm-offsets_32.c1
-rw-r--r--arch/x86/kernel/asm-offsets_64.c1
-rw-r--r--arch/x86/kernel/dumpstack.c2
-rw-r--r--arch/x86/kernel/entry_32.S30
-rw-r--r--arch/x86/kernel/entry_64.S33
-rw-r--r--arch/x86/kernel/paravirt.c3
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c6
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c6
-rw-r--r--arch/x86/lguest/boot.c1
-rw-r--r--arch/x86/xen/enlighten.c1
15 files changed, 143 insertions, 6 deletions
diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 6f32f9c84a2..1500fa4db88 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -56,7 +56,7 @@ register struct thread_info *__current_thread_info __asm__("$8");
#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE (2*PAGE_SIZE)
-#define PREEMPT_ACTIVE 0x40000000
+#define PREEMPT_ACTIVE 0x10000000
/*
* Thread information flags:
diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h
index 7a9c03dcb0b..845b14c6ab7 100644
--- a/arch/avr32/include/asm/thread_info.h
+++ b/arch/avr32/include/asm/thread_info.h
@@ -66,7 +66,7 @@ static inline struct thread_info *current_thread_info(void)
#endif /* !__ASSEMBLY__ */
-#define PREEMPT_ACTIVE 0x40000000
+#define PREEMPT_ACTIVE 0x10000000
/*
* Thread information flags
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 5745ce8bf10..fdf897373e1 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -56,6 +56,61 @@ static inline void native_halt(void)
#endif
+#ifdef CONFIG_X86_64
+/*
+ * Only returns from a trap or exception to a NMI context (intra-privilege
+ * level near return) to the same SS and CS segments. Should be used
+ * upon trap or exception return when nested over a NMI context so no iret is
+ * issued. It takes care of modifying the eflags, rsp and returning to the
+ * previous function.
+ *
+ * The stack, at that point, looks like :
+ *
+ * 0(rsp) RIP
+ * 8(rsp) CS
+ * 16(rsp) EFLAGS
+ * 24(rsp) RSP
+ * 32(rsp) SS
+ *
+ * Upon execution :
+ * Copy EIP to the top of the return stack
+ * Update top of return stack address
+ * Pop eflags into the eflags register
+ * Make the return stack current
+ * Near return (popping the return address from the return stack)
+ */
+#define NATIVE_INTERRUPT_RETURN_NMI_SAFE pushq %rax; \
+ movq %rsp, %rax; \
+ movq 24+8(%rax), %rsp; \
+ pushq 0+8(%rax); \
+ pushq 16+8(%rax); \
+ movq (%rax), %rax; \
+ popfq; \
+ ret
+#else
+/*
+ * Protected mode only, no V8086. Implies that protected mode must
+ * be entered before NMIs or MCEs are enabled. Only returns from a trap or
+ * exception to a NMI context (intra-privilege level far return). Should be used
+ * upon trap or exception return when nested over a NMI context so no iret is
+ * issued.
+ *
+ * The stack, at that point, looks like :
+ *
+ * 0(esp) EIP
+ * 4(esp) CS
+ * 8(esp) EFLAGS
+ *
+ * Upon execution :
+ * Copy the stack eflags to top of stack
+ * Pop eflags into the eflags register
+ * Far return: pop EIP and CS into their register, and additionally pop EFLAGS.
+ */
+#define NATIVE_INTERRUPT_RETURN_NMI_SAFE pushl 8(%esp); \
+ popfl; \
+ lret $4
+#endif
+
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
@@ -112,6 +167,7 @@ static inline unsigned long arch_local_irq_save(void)
#define ENABLE_INTERRUPTS(x) sti
#define DISABLE_INTERRUPTS(x) cli
+#define INTERRUPT_RETURN_NMI_SAFE NATIVE_INTERRUPT_RETURN_NMI_SAFE
#ifdef CONFIG_X86_64
#define SWAPGS swapgs
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index ebbc4d8ab17..1ef6906c179 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -962,6 +962,10 @@ extern void default_banner(void);
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
+#define INTERRUPT_RETURN_NMI_SAFE \
+ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_nmi_return), CLBR_NONE, \
+ jmp *%cs:pv_cpu_ops+PV_CPU_nmi_return)
+
#define DISABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 82885099c86..3e0634cc127 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -181,6 +181,7 @@ struct pv_cpu_ops {
/* Normal iret. Jump to this with the standard iret stack
frame set up. */
void (*iret)(void);
+ void (*nmi_return)(void);
void (*swapgs)(void);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 1a4088dda37..677f8475d9d 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -111,6 +111,7 @@ void foo(void)
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+ OFFSET(PV_CPU_nmi_return, pv_cpu_ops, nmi_return);
OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
#endif
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 4a6aeedcd96..1aea11cd840 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -58,6 +58,7 @@ int main(void)
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+ OFFSET(PV_CPU_nmi_return, pv_cpu_ops, nmi_return);
OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index df20723a6a1..6679e16fc31 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -253,6 +253,8 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
if (!signr)
return;
+ if (in_nmi())
+ panic("Fatal exception in non-maskable interrupt");
if (in_interrupt())
panic("Fatal exception in interrupt");
if (panic_on_oops)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c8b4efad7eb..2fae6c570fd 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -80,6 +80,8 @@
#define nr_syscalls ((syscall_table_size)/4)
+#define NMI_MASK 0x04000000
+
#ifdef CONFIG_PREEMPT
#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
@@ -321,8 +323,32 @@ END(ret_from_fork)
# userspace resumption stub bypassing syscall exit tracing
ALIGN
RING0_PTREGS_FRAME
+
ret_from_exception:
preempt_stop(CLBR_ANY)
+ GET_THREAD_INFO(%ebp)
+ movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
+ movb PT_CS(%esp), %al
+ andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
+ cmpl $USER_RPL, %eax
+ jae resume_userspace # returning to v8086 or userspace
+ testl $NMI_MASK,TI_preempt_count(%ebp)
+ jz resume_kernel /* Not nested over NMI ? */
+ testw $X86_EFLAGS_TF, PT_EFLAGS(%esp)
+ jnz resume_kernel /*
+ * If single-stepping an NMI handler,
+ * use the normal iret path instead of
+ * the popf/lret because lret would be
+ * single-stepped. It should not
+ * happen : it will reactivate NMIs
+ * prematurely.
+ */
+ TRACE_IRQS_IRET
+ RESTORE_REGS
+ addl $4, %esp # skip orig_eax/error_code
+ CFI_ADJUST_CFA_OFFSET -4
+ INTERRUPT_RETURN_NMI_SAFE
+
ret_from_intr:
GET_THREAD_INFO(%ebp)
check_userspace:
@@ -906,6 +932,10 @@ ENTRY(native_iret)
.previous
END(native_iret)
+ENTRY(native_nmi_return)
+ NATIVE_INTERRUPT_RETURN_NMI_SAFE # Should we deal with popf exception ?
+END(native_nmi_return)
+
ENTRY(native_irq_enable_sysexit)
sti
sysexit
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index aed1ffbeb0c..9da8f4da7b3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -163,6 +163,8 @@ GLOBAL(return_to_handler)
#endif
+#define NMI_MASK 0x04000000
+
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
#endif
@@ -872,6 +874,9 @@ ENTRY(native_iret)
.section __ex_table,"a"
.quad native_iret, bad_iret
.previous
+
+ENTRY(native_nmi_return)
+ NATIVE_INTERRUPT_RETURN_NMI_SAFE
#endif
.section .fixup,"ax"
@@ -924,6 +929,24 @@ retint_signal:
GET_THREAD_INFO(%rcx)
jmp retint_with_reschedule
+ /* Returning to kernel space from exception. */
+ /* rcx: threadinfo. interrupts off. */
+ENTRY(retexc_kernel)
+ testl $NMI_MASK,TI_preempt_count(%rcx)
+ jz retint_kernel /* Not nested over NMI ? */
+ testw $X86_EFLAGS_TF,EFLAGS-ARGOFFSET(%rsp) /* trap flag? */
+ jnz retint_kernel /*
+ * If single-stepping an NMI handler,
+ * use the normal iret path instead of
+ * the popf/lret because lret would be
+ * single-stepped. It should not
+ * happen : it will reactivate NMIs
+ * prematurely.
+ */
+ RESTORE_ARGS 0,8,0
+ TRACE_IRQS_IRETQ
+ INTERRUPT_RETURN_NMI_SAFE
+
#ifdef CONFIG_PREEMPT
/* Returning to kernel space. Check if we need preemption */
/* rcx: threadinfo. interrupts off. */
@@ -1361,12 +1384,18 @@ ENTRY(paranoid_exit)
paranoid_swapgs:
TRACE_IRQS_IRETQ 0
SWAPGS_UNSAFE_STACK
+paranoid_restore_no_nmi:
RESTORE_ALL 8
jmp irq_return
paranoid_restore:
+ GET_THREAD_INFO(%rcx)
TRACE_IRQS_IRETQ 0
+ testl $NMI_MASK,TI_preempt_count(%rcx)
+ jz paranoid_restore_no_nmi /* Nested over NMI ? */
+ testw $X86_EFLAGS_TF,EFLAGS-0(%rsp) /* trap flag? */
+ jnz paranoid_restore_no_nmi
RESTORE_ALL 8
- jmp irq_return
+ INTERRUPT_RETURN_NMI_SAFE
paranoid_userspace:
GET_THREAD_INFO(%rcx)
movl TI_flags(%rcx),%ebx
@@ -1465,7 +1494,7 @@ ENTRY(error_exit)
TRACE_IRQS_OFF
GET_THREAD_INFO(%rcx)
testl %eax,%eax
- jne retint_kernel
+ jne retexc_kernel
LOCKDEP_SYS_EXIT_IRQ
movl TI_flags(%rcx),%edx
movl $_TIF_WORK_MASK,%edi
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 869e1aeeb71..1fc5da98373 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -156,6 +156,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
ret = paravirt_patch_ident_64(insnbuf, len);
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
+ type == PARAVIRT_PATCH(pv_cpu_ops.nmi_return) ||
type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
@@ -204,6 +205,7 @@ static void native_flush_tlb_single(unsigned long addr)
/* These are in entry.S */
extern void native_iret(void);
+extern void native_nmi_return(void);
extern void native_irq_enable_sysexit(void);
extern void native_usergs_sysret32(void);
extern void native_usergs_sysret64(void);
@@ -373,6 +375,7 @@ struct pv_cpu_ops pv_cpu_ops = {
.usergs_sysret64 = native_usergs_sysret64,
#endif
.iret = native_iret,
+ .nmi_return = native_nmi_return,
.swapgs = native_swapgs,
.set_iopl_mask = native_set_iopl_mask,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index d9f32e6d6ab..ac372778bbc 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -1,10 +1,13 @@
-#include <asm/paravirt.h>
+#include <linux/stringify.h>
+#include <linux/irqflags.h>
DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
DEF_NATIVE(pv_cpu_ops, iret, "iret");
+DEF_NATIVE(pv_cpu_ops, nmi_return,
+ __stringify(NATIVE_INTERRUPT_RETURN_NMI_SAFE));
DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
@@ -41,6 +44,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, restore_fl);
PATCH_SITE(pv_irq_ops, save_fl);
PATCH_SITE(pv_cpu_ops, iret);
+ PATCH_SITE(pv_cpu_ops, nmi_return);
PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
PATCH_SITE(pv_mmu_ops, read_cr2);
PATCH_SITE(pv_mmu_ops, read_cr3);
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 3f08f34f93e..5339e67dc15 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -1,12 +1,15 @@
+#include <linux/irqflags.h>
+#include <linux/stringify.h>
#include <asm/paravirt.h>
#include <asm/asm-offsets.h>
-#include <linux/stringify.h>
DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq");
DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
DEF_NATIVE(pv_cpu_ops, iret, "iretq");
+DEF_NATIVE(pv_cpu_ops, nmi_return,
+ __stringify(NATIVE_INTERRUPT_RETURN_NMI_SAFE));
DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
@@ -51,6 +54,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, irq_enable);
PATCH_SITE(pv_irq_ops, irq_disable);
PATCH_SITE(pv_cpu_ops, iret);
+ PATCH_SITE(pv_cpu_ops, nmi_return);
PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
PATCH_SITE(pv_cpu_ops, usergs_sysret32);
PATCH_SITE(pv_cpu_ops, usergs_sysret64);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index eba687f0cc0..07f7a272226 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1276,6 +1276,7 @@ __init void lguest_init(void)
pv_cpu_ops.cpuid = lguest_cpuid;
pv_cpu_ops.load_idt = lguest_load_idt;
pv_cpu_ops.iret = lguest_iret;
+ pv_cpu_ops.nmi_return = lguest_iret;
pv_cpu_ops.load_sp0 = lguest_load_sp0;
pv_cpu_ops.load_tr_desc = lguest_load_tr_desc;
pv_cpu_ops.set_ldt = lguest_set_ldt;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 50542efe45f..e3839c74ec4 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -974,6 +974,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
.read_pmc = native_read_pmc,
.iret = xen_iret,
+ .nmi_return = xen_iret,
.irq_enable_sysexit = xen_sysexit,
#ifdef CONFIG_X86_64
.usergs_sysret32 = xen_sysret32,