aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>2011-03-16 19:04:09 -0400
committerMathieu Desnoyers <mathieu.desnoyers@polymtl.ca>2011-03-16 19:04:09 -0400
commit51faf68d803933510e8fa91b375eb0680d36b8e8 (patch)
tree33d504e5781ab35e0dfbf56991e024d79304ef5e /arch
parent4fb1aee3cb1b2acc3e841dbfda85a233d5b607ad (diff)
downloadlinux-linaro-android-51faf68d803933510e8fa91b375eb0680d36b8e8.tar.gz
nmi-safe-kernel/x86-nmi-safe-int3-and-page-fault
x86 NMI-safe INT3 and Page Fault Implements an alternative iret with popf and return so trap and exception handlers can return to the NMI handler without issuing iret. iret would cause NMIs to be reenabled prematurely. x86_32 uses popf and far return. x86_64 has to copy the return instruction pointer to the top of the previous stack, issue a popf, loads the previous esp and issue a near return (ret). It allows placing immediate values (and therefore optimized trace_marks) in NMI code since returning from a breakpoint would be valid. Accessing vmalloc'd memory, which allows executing module code or accessing vmapped or vmalloc'd areas from NMI context, would also be valid. This is very useful to tracers like LTTng. This patch makes all faults, traps and exception safe to be called from NMI context *except* single-stepping, which requires iret to restore the TF (trap flag) and jump to the return address in a single instruction. Sorry, no kprobes support in NMI handlers because of this limitation. We cannot single-step an NMI handler, because iret must set the TF flag and return back to the instruction to single-step in a single instruction. This cannot be emulated with popf/lret, because lret would be single-stepped. It does not apply to immediate values because they do not use single-stepping. This code detects if the TF flag is set and uses the iret path for single-stepping, even if it reactivates NMIs prematurely. Test to detect if nested under a NMI handler is only done upon the return from trap/exception to kernel, which is not frequent. Other return paths (return from trap/exception to userspace, return from interrupt) keep the exact same behavior (no slowdown). alpha and avr32 use the active count bit 31. This patch moves them to 28. TODO : test alpha and avr32 active count modification TODO : test with lguest, xen, kvm. ** This patch depends on the "Stringify support commas" patchset ** ** Also depends on fix-x86_64-page-fault-scheduler-race patch ** tested on x86_32 (tests implemented in a separate patch) : - instrumented the return path to export the EIP, CS and EFLAGS values when taken so we know the return path code has been executed. - trace_mark, using immediate values, with 10ms delay with the breakpoint activated. Runs well through the return path. - tested vmalloc faults in NMI handler by placing a non-optimized marker in the NMI handler (so no breakpoint is executed) and connecting a probe which touches every pages of a 20MB vmalloc'd buffer. It executes trough the return path without problem. - Tested with and without preemption tested on x86_64 - instrumented the return path to export the EIP, CS and EFLAGS values when taken so we know the return path code has been executed. - trace_mark, using immediate values, with 10ms delay with the breakpoint activated. Runs well through the return path. To test on x86_64 : - Test without preemption - Test vmalloc faults - Test on Intel 64 bits CPUs. (AMD64 was fine) Changelog since v1 : - x86_64 fixes. Changelog since v2 : - fix paravirt build Changelog since v3 : - Include modifications suggested by Jeremy Changelog since v4 : - including hardirq.h in entry_32/64.S is a bad idea (non ifndef'd C code), define NMI_MASK in the .S files directly. Changelog since v5 : - Add NMI_MASK to irq_count() and make die() more verbose for NMIs. Changelog since v7 : - Implement paravirtualized nmi_return. Changelog since v8 : - refreshed the patch for asm-offsets. Those were left out of v8. - now depends on "Stringify support commas" patch. Changelog since v9 : - Only test the nmi nested preempt count flag upon return from exceptions, not on return from interrupts. Only the kernel return path has this test. - Add Xen, VMI, lguest support. Use their iret pavavirt ops in lieu of nmi_return. - update for 2.6.30-rc1 Follow NMI_MASK bits merged in mainline. Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> CC: akpm@osdl.org CC: mingo@elte.hu CC: "H. Peter Anvin" <hpa@zytor.com> CC: Jeremy Fitzhardinge <jeremy@goop.org> CC: Steven Rostedt <rostedt@goodmis.org> CC: "Frank Ch. Eigler" <fche@redhat.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/thread_info.h2
-rw-r--r--arch/avr32/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/irqflags.h56
-rw-r--r--arch/x86/include/asm/paravirt.h4
-rw-r--r--arch/x86/include/asm/paravirt_types.h1
-rw-r--r--arch/x86/kernel/asm-offsets_32.c1
-rw-r--r--arch/x86/kernel/asm-offsets_64.c1
-rw-r--r--arch/x86/kernel/dumpstack.c2
-rw-r--r--arch/x86/kernel/entry_32.S30
-rw-r--r--arch/x86/kernel/entry_64.S33
-rw-r--r--arch/x86/kernel/paravirt.c3
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c6
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c6
-rw-r--r--arch/x86/lguest/boot.c1
-rw-r--r--arch/x86/xen/enlighten.c1
15 files changed, 143 insertions, 6 deletions
diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 6f32f9c84a2..1500fa4db88 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -56,7 +56,7 @@ register struct thread_info *__current_thread_info __asm__("$8");
#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE (2*PAGE_SIZE)
-#define PREEMPT_ACTIVE 0x40000000
+#define PREEMPT_ACTIVE 0x10000000
/*
* Thread information flags:
diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h
index 7a9c03dcb0b..845b14c6ab7 100644
--- a/arch/avr32/include/asm/thread_info.h
+++ b/arch/avr32/include/asm/thread_info.h
@@ -66,7 +66,7 @@ static inline struct thread_info *current_thread_info(void)
#endif /* !__ASSEMBLY__ */
-#define PREEMPT_ACTIVE 0x40000000
+#define PREEMPT_ACTIVE 0x10000000
/*
* Thread information flags
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 5745ce8bf10..fdf897373e1 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -56,6 +56,61 @@ static inline void native_halt(void)
#endif
+#ifdef CONFIG_X86_64
+/*
+ * Only returns from a trap or exception to a NMI context (intra-privilege
+ * level near return) to the same SS and CS segments. Should be used
+ * upon trap or exception return when nested over a NMI context so no iret is
+ * issued. It takes care of modifying the eflags, rsp and returning to the
+ * previous function.
+ *
+ * The stack, at that point, looks like :
+ *
+ * 0(rsp) RIP
+ * 8(rsp) CS
+ * 16(rsp) EFLAGS
+ * 24(rsp) RSP
+ * 32(rsp) SS
+ *
+ * Upon execution :
+ * Copy EIP to the top of the return stack
+ * Update top of return stack address
+ * Pop eflags into the eflags register
+ * Make the return stack current
+ * Near return (popping the return address from the return stack)
+ */
+#define NATIVE_INTERRUPT_RETURN_NMI_SAFE pushq %rax; \
+ movq %rsp, %rax; \
+ movq 24+8(%rax), %rsp; \
+ pushq 0+8(%rax); \
+ pushq 16+8(%rax); \
+ movq (%rax), %rax; \
+ popfq; \
+ ret
+#else
+/*
+ * Protected mode only, no V8086. Implies that protected mode must
+ * be entered before NMIs or MCEs are enabled. Only returns from a trap or
+ * exception to a NMI context (intra-privilege level far return). Should be used
+ * upon trap or exception return when nested over a NMI context so no iret is
+ * issued.
+ *
+ * The stack, at that point, looks like :
+ *
+ * 0(esp) EIP
+ * 4(esp) CS
+ * 8(esp) EFLAGS
+ *
+ * Upon execution :
+ * Copy the stack eflags to top of stack
+ * Pop eflags into the eflags register
+ * Far return: pop EIP and CS into their register, and additionally pop EFLAGS.
+ */
+#define NATIVE_INTERRUPT_RETURN_NMI_SAFE pushl 8(%esp); \
+ popfl; \
+ lret $4
+#endif
+
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
@@ -112,6 +167,7 @@ static inline unsigned long arch_local_irq_save(void)
#define ENABLE_INTERRUPTS(x) sti
#define DISABLE_INTERRUPTS(x) cli
+#define INTERRUPT_RETURN_NMI_SAFE NATIVE_INTERRUPT_RETURN_NMI_SAFE
#ifdef CONFIG_X86_64
#define SWAPGS swapgs
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index ebbc4d8ab17..1ef6906c179 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -962,6 +962,10 @@ extern void default_banner(void);
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
+#define INTERRUPT_RETURN_NMI_SAFE \
+ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_nmi_return), CLBR_NONE, \
+ jmp *%cs:pv_cpu_ops+PV_CPU_nmi_return)
+
#define DISABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 82885099c86..3e0634cc127 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -181,6 +181,7 @@ struct pv_cpu_ops {
/* Normal iret. Jump to this with the standard iret stack
frame set up. */
void (*iret)(void);
+ void (*nmi_return)(void);
void (*swapgs)(void);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 1a4088dda37..677f8475d9d 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -111,6 +111,7 @@ void foo(void)
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+ OFFSET(PV_CPU_nmi_return, pv_cpu_ops, nmi_return);
OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
#endif
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 4a6aeedcd96..1aea11cd840 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -58,6 +58,7 @@ int main(void)
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+ OFFSET(PV_CPU_nmi_return, pv_cpu_ops, nmi_return);
OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index df20723a6a1..6679e16fc31 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -253,6 +253,8 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
if (!signr)
return;
+ if (in_nmi())
+ panic("Fatal exception in non-maskable interrupt");
if (in_interrupt())
panic("Fatal exception in interrupt");
if (panic_on_oops)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c8b4efad7eb..2fae6c570fd 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -80,6 +80,8 @@
#define nr_syscalls ((syscall_table_size)/4)
+#define NMI_MASK 0x04000000
+
#ifdef CONFIG_PREEMPT
#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
@@ -321,8 +323,32 @@ END(ret_from_fork)
# userspace resumption stub bypassing syscall exit tracing
ALIGN
RING0_PTREGS_FRAME
+
ret_from_exception:
preempt_stop(CLBR_ANY)
+ GET_THREAD_INFO(%ebp)
+ movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
+ movb PT_CS(%esp), %al
+ andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
+ cmpl $USER_RPL, %eax
+ jae resume_userspace # returning to v8086 or userspace
+ testl $NMI_MASK,TI_preempt_count(%ebp)
+ jz resume_kernel /* Not nested over NMI ? */
+ testw $X86_EFLAGS_TF, PT_EFLAGS(%esp)
+ jnz resume_kernel /*
+ * If single-stepping an NMI handler,
+ * use the normal iret path instead of
+ * the popf/lret because lret would be
+ * single-stepped. It should not
+ * happen : it will reactivate NMIs
+ * prematurely.
+ */
+ TRACE_IRQS_IRET
+ RESTORE_REGS
+ addl $4, %esp # skip orig_eax/error_code
+ CFI_ADJUST_CFA_OFFSET -4
+ INTERRUPT_RETURN_NMI_SAFE
+
ret_from_intr:
GET_THREAD_INFO(%ebp)
check_userspace:
@@ -906,6 +932,10 @@ ENTRY(native_iret)
.previous
END(native_iret)
+ENTRY(native_nmi_return)
+ NATIVE_INTERRUPT_RETURN_NMI_SAFE # Should we deal with popf exception ?
+END(native_nmi_return)
+
ENTRY(native_irq_enable_sysexit)
sti
sysexit
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index aed1ffbeb0c..9da8f4da7b3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -163,6 +163,8 @@ GLOBAL(return_to_handler)
#endif
+#define NMI_MASK 0x04000000
+
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
#endif
@@ -872,6 +874,9 @@ ENTRY(native_iret)
.section __ex_table,"a"
.quad native_iret, bad_iret
.previous
+
+ENTRY(native_nmi_return)
+ NATIVE_INTERRUPT_RETURN_NMI_SAFE
#endif
.section .fixup,"ax"
@@ -924,6 +929,24 @@ retint_signal:
GET_THREAD_INFO(%rcx)
jmp retint_with_reschedule
+ /* Returning to kernel space from exception. */
+ /* rcx: threadinfo. interrupts off. */
+ENTRY(retexc_kernel)
+ testl $NMI_MASK,TI_preempt_count(%rcx)
+ jz retint_kernel /* Not nested over NMI ? */
+ testw $X86_EFLAGS_TF,EFLAGS-ARGOFFSET(%rsp) /* trap flag? */
+ jnz retint_kernel /*
+ * If single-stepping an NMI handler,
+ * use the normal iret path instead of
+ * the popf/lret because lret would be
+ * single-stepped. It should not
+ * happen : it will reactivate NMIs
+ * prematurely.
+ */
+ RESTORE_ARGS 0,8,0
+ TRACE_IRQS_IRETQ
+ INTERRUPT_RETURN_NMI_SAFE
+
#ifdef CONFIG_PREEMPT
/* Returning to kernel space. Check if we need preemption */
/* rcx: threadinfo. interrupts off. */
@@ -1361,12 +1384,18 @@ ENTRY(paranoid_exit)
paranoid_swapgs:
TRACE_IRQS_IRETQ 0
SWAPGS_UNSAFE_STACK
+paranoid_restore_no_nmi:
RESTORE_ALL 8
jmp irq_return
paranoid_restore:
+ GET_THREAD_INFO(%rcx)
TRACE_IRQS_IRETQ 0
+ testl $NMI_MASK,TI_preempt_count(%rcx)
+ jz paranoid_restore_no_nmi /* Nested over NMI ? */
+ testw $X86_EFLAGS_TF,EFLAGS-0(%rsp) /* trap flag? */
+ jnz paranoid_restore_no_nmi
RESTORE_ALL 8
- jmp irq_return
+ INTERRUPT_RETURN_NMI_SAFE
paranoid_userspace:
GET_THREAD_INFO(%rcx)
movl TI_flags(%rcx),%ebx
@@ -1465,7 +1494,7 @@ ENTRY(error_exit)
TRACE_IRQS_OFF
GET_THREAD_INFO(%rcx)
testl %eax,%eax
- jne retint_kernel
+ jne retexc_kernel
LOCKDEP_SYS_EXIT_IRQ
movl TI_flags(%rcx),%edx
movl $_TIF_WORK_MASK,%edi
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 869e1aeeb71..1fc5da98373 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -156,6 +156,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
ret = paravirt_patch_ident_64(insnbuf, len);
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
+ type == PARAVIRT_PATCH(pv_cpu_ops.nmi_return) ||
type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
@@ -204,6 +205,7 @@ static void native_flush_tlb_single(unsigned long addr)
/* These are in entry.S */
extern void native_iret(void);
+extern void native_nmi_return(void);
extern void native_irq_enable_sysexit(void);
extern void native_usergs_sysret32(void);
extern void native_usergs_sysret64(void);
@@ -373,6 +375,7 @@ struct pv_cpu_ops pv_cpu_ops = {
.usergs_sysret64 = native_usergs_sysret64,
#endif
.iret = native_iret,
+ .nmi_return = native_nmi_return,
.swapgs = native_swapgs,
.set_iopl_mask = native_set_iopl_mask,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index d9f32e6d6ab..ac372778bbc 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -1,10 +1,13 @@
-#include <asm/paravirt.h>
+#include <linux/stringify.h>
+#include <linux/irqflags.h>
DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
DEF_NATIVE(pv_cpu_ops, iret, "iret");
+DEF_NATIVE(pv_cpu_ops, nmi_return,
+ __stringify(NATIVE_INTERRUPT_RETURN_NMI_SAFE));
DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
@@ -41,6 +44,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, restore_fl);
PATCH_SITE(pv_irq_ops, save_fl);
PATCH_SITE(pv_cpu_ops, iret);
+ PATCH_SITE(pv_cpu_ops, nmi_return);
PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
PATCH_SITE(pv_mmu_ops, read_cr2);
PATCH_SITE(pv_mmu_ops, read_cr3);
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 3f08f34f93e..5339e67dc15 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -1,12 +1,15 @@
+#include <linux/irqflags.h>
+#include <linux/stringify.h>
#include <asm/paravirt.h>
#include <asm/asm-offsets.h>
-#include <linux/stringify.h>
DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq");
DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
DEF_NATIVE(pv_cpu_ops, iret, "iretq");
+DEF_NATIVE(pv_cpu_ops, nmi_return,
+ __stringify(NATIVE_INTERRUPT_RETURN_NMI_SAFE));
DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
@@ -51,6 +54,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, irq_enable);
PATCH_SITE(pv_irq_ops, irq_disable);
PATCH_SITE(pv_cpu_ops, iret);
+ PATCH_SITE(pv_cpu_ops, nmi_return);
PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
PATCH_SITE(pv_cpu_ops, usergs_sysret32);
PATCH_SITE(pv_cpu_ops, usergs_sysret64);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index eba687f0cc0..07f7a272226 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1276,6 +1276,7 @@ __init void lguest_init(void)
pv_cpu_ops.cpuid = lguest_cpuid;
pv_cpu_ops.load_idt = lguest_load_idt;
pv_cpu_ops.iret = lguest_iret;
+ pv_cpu_ops.nmi_return = lguest_iret;
pv_cpu_ops.load_sp0 = lguest_load_sp0;
pv_cpu_ops.load_tr_desc = lguest_load_tr_desc;
pv_cpu_ops.set_ldt = lguest_set_ldt;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 50542efe45f..e3839c74ec4 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -974,6 +974,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
.read_pmc = native_read_pmc,
.iret = xen_iret,
+ .nmi_return = xen_iret,
.irq_enable_sysexit = xen_sysexit,
#ifdef CONFIG_X86_64
.usergs_sysret32 = xen_sysret32,