diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/alpha/include/asm/thread_info.h | 2 | ||||
-rw-r--r-- | arch/avr32/include/asm/thread_info.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/irqflags.h | 56 | ||||
-rw-r--r-- | arch/x86/include/asm/paravirt.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/paravirt_types.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/asm-offsets_32.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/asm-offsets_64.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/entry_32.S | 30 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 33 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt_patch_32.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt_patch_64.c | 6 | ||||
-rw-r--r-- | arch/x86/lguest/boot.c | 1 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 1 |
15 files changed, 143 insertions, 6 deletions
diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index 6f32f9c84a2..1500fa4db88 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -56,7 +56,7 @@ register struct thread_info *__current_thread_info __asm__("$8"); #define THREAD_SIZE_ORDER 1 #define THREAD_SIZE (2*PAGE_SIZE) -#define PREEMPT_ACTIVE 0x40000000 +#define PREEMPT_ACTIVE 0x10000000 /* * Thread information flags: diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h index 7a9c03dcb0b..845b14c6ab7 100644 --- a/arch/avr32/include/asm/thread_info.h +++ b/arch/avr32/include/asm/thread_info.h @@ -66,7 +66,7 @@ static inline struct thread_info *current_thread_info(void) #endif /* !__ASSEMBLY__ */ -#define PREEMPT_ACTIVE 0x40000000 +#define PREEMPT_ACTIVE 0x10000000 /* * Thread information flags diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 5745ce8bf10..fdf897373e1 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -56,6 +56,61 @@ static inline void native_halt(void) #endif +#ifdef CONFIG_X86_64 +/* + * Only returns from a trap or exception to a NMI context (intra-privilege + * level near return) to the same SS and CS segments. Should be used + * upon trap or exception return when nested over a NMI context so no iret is + * issued. It takes care of modifying the eflags, rsp and returning to the + * previous function. + * + * The stack, at that point, looks like : + * + * 0(rsp) RIP + * 8(rsp) CS + * 16(rsp) EFLAGS + * 24(rsp) RSP + * 32(rsp) SS + * + * Upon execution : + * Copy EIP to the top of the return stack + * Update top of return stack address + * Pop eflags into the eflags register + * Make the return stack current + * Near return (popping the return address from the return stack) + */ +#define NATIVE_INTERRUPT_RETURN_NMI_SAFE pushq %rax; \ + movq %rsp, %rax; \ + movq 24+8(%rax), %rsp; \ + pushq 0+8(%rax); \ + pushq 16+8(%rax); \ + movq (%rax), %rax; \ + popfq; \ + ret +#else +/* + * Protected mode only, no V8086. Implies that protected mode must + * be entered before NMIs or MCEs are enabled. Only returns from a trap or + * exception to a NMI context (intra-privilege level far return). Should be used + * upon trap or exception return when nested over a NMI context so no iret is + * issued. + * + * The stack, at that point, looks like : + * + * 0(esp) EIP + * 4(esp) CS + * 8(esp) EFLAGS + * + * Upon execution : + * Copy the stack eflags to top of stack + * Pop eflags into the eflags register + * Far return: pop EIP and CS into their register, and additionally pop EFLAGS. + */ +#define NATIVE_INTERRUPT_RETURN_NMI_SAFE pushl 8(%esp); \ + popfl; \ + lret $4 +#endif + #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else @@ -112,6 +167,7 @@ static inline unsigned long arch_local_irq_save(void) #define ENABLE_INTERRUPTS(x) sti #define DISABLE_INTERRUPTS(x) cli +#define INTERRUPT_RETURN_NMI_SAFE NATIVE_INTERRUPT_RETURN_NMI_SAFE #ifdef CONFIG_X86_64 #define SWAPGS swapgs diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ebbc4d8ab17..1ef6906c179 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -962,6 +962,10 @@ extern void default_banner(void); PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) +#define INTERRUPT_RETURN_NMI_SAFE \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_nmi_return), CLBR_NONE, \ + jmp *%cs:pv_cpu_ops+PV_CPU_nmi_return) + #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 82885099c86..3e0634cc127 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -181,6 +181,7 @@ struct pv_cpu_ops { /* Normal iret. Jump to this with the standard iret stack frame set up. */ void (*iret)(void); + void (*nmi_return)(void); void (*swapgs)(void); diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 1a4088dda37..677f8475d9d 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -111,6 +111,7 @@ void foo(void) OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_CPU_iret, pv_cpu_ops, iret); + OFFSET(PV_CPU_nmi_return, pv_cpu_ops, nmi_return); OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); #endif diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 4a6aeedcd96..1aea11cd840 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -58,6 +58,7 @@ int main(void) OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame); OFFSET(PV_CPU_iret, pv_cpu_ops, iret); + OFFSET(PV_CPU_nmi_return, pv_cpu_ops, nmi_return); OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32); OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index df20723a6a1..6679e16fc31 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -253,6 +253,8 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) if (!signr) return; + if (in_nmi()) + panic("Fatal exception in non-maskable interrupt"); if (in_interrupt()) panic("Fatal exception in interrupt"); if (panic_on_oops) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c8b4efad7eb..2fae6c570fd 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -80,6 +80,8 @@ #define nr_syscalls ((syscall_table_size)/4) +#define NMI_MASK 0x04000000 + #ifdef CONFIG_PREEMPT #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF #else @@ -321,8 +323,32 @@ END(ret_from_fork) # userspace resumption stub bypassing syscall exit tracing ALIGN RING0_PTREGS_FRAME + ret_from_exception: preempt_stop(CLBR_ANY) + GET_THREAD_INFO(%ebp) + movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS + movb PT_CS(%esp), %al + andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax + cmpl $USER_RPL, %eax + jae resume_userspace # returning to v8086 or userspace + testl $NMI_MASK,TI_preempt_count(%ebp) + jz resume_kernel /* Not nested over NMI ? */ + testw $X86_EFLAGS_TF, PT_EFLAGS(%esp) + jnz resume_kernel /* + * If single-stepping an NMI handler, + * use the normal iret path instead of + * the popf/lret because lret would be + * single-stepped. It should not + * happen : it will reactivate NMIs + * prematurely. + */ + TRACE_IRQS_IRET + RESTORE_REGS + addl $4, %esp # skip orig_eax/error_code + CFI_ADJUST_CFA_OFFSET -4 + INTERRUPT_RETURN_NMI_SAFE + ret_from_intr: GET_THREAD_INFO(%ebp) check_userspace: @@ -906,6 +932,10 @@ ENTRY(native_iret) .previous END(native_iret) +ENTRY(native_nmi_return) + NATIVE_INTERRUPT_RETURN_NMI_SAFE # Should we deal with popf exception ? +END(native_nmi_return) + ENTRY(native_irq_enable_sysexit) sti sysexit diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index aed1ffbeb0c..9da8f4da7b3 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -163,6 +163,8 @@ GLOBAL(return_to_handler) #endif +#define NMI_MASK 0x04000000 + #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args #endif @@ -872,6 +874,9 @@ ENTRY(native_iret) .section __ex_table,"a" .quad native_iret, bad_iret .previous + +ENTRY(native_nmi_return) + NATIVE_INTERRUPT_RETURN_NMI_SAFE #endif .section .fixup,"ax" @@ -924,6 +929,24 @@ retint_signal: GET_THREAD_INFO(%rcx) jmp retint_with_reschedule + /* Returning to kernel space from exception. */ + /* rcx: threadinfo. interrupts off. */ +ENTRY(retexc_kernel) + testl $NMI_MASK,TI_preempt_count(%rcx) + jz retint_kernel /* Not nested over NMI ? */ + testw $X86_EFLAGS_TF,EFLAGS-ARGOFFSET(%rsp) /* trap flag? */ + jnz retint_kernel /* + * If single-stepping an NMI handler, + * use the normal iret path instead of + * the popf/lret because lret would be + * single-stepped. It should not + * happen : it will reactivate NMIs + * prematurely. + */ + RESTORE_ARGS 0,8,0 + TRACE_IRQS_IRETQ + INTERRUPT_RETURN_NMI_SAFE + #ifdef CONFIG_PREEMPT /* Returning to kernel space. Check if we need preemption */ /* rcx: threadinfo. interrupts off. */ @@ -1361,12 +1384,18 @@ ENTRY(paranoid_exit) paranoid_swapgs: TRACE_IRQS_IRETQ 0 SWAPGS_UNSAFE_STACK +paranoid_restore_no_nmi: RESTORE_ALL 8 jmp irq_return paranoid_restore: + GET_THREAD_INFO(%rcx) TRACE_IRQS_IRETQ 0 + testl $NMI_MASK,TI_preempt_count(%rcx) + jz paranoid_restore_no_nmi /* Nested over NMI ? */ + testw $X86_EFLAGS_TF,EFLAGS-0(%rsp) /* trap flag? */ + jnz paranoid_restore_no_nmi RESTORE_ALL 8 - jmp irq_return + INTERRUPT_RETURN_NMI_SAFE paranoid_userspace: GET_THREAD_INFO(%rcx) movl TI_flags(%rcx),%ebx @@ -1465,7 +1494,7 @@ ENTRY(error_exit) TRACE_IRQS_OFF GET_THREAD_INFO(%rcx) testl %eax,%eax - jne retint_kernel + jne retexc_kernel LOCKDEP_SYS_EXIT_IRQ movl TI_flags(%rcx),%edx movl $_TIF_WORK_MASK,%edi diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 869e1aeeb71..1fc5da98373 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -156,6 +156,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, ret = paravirt_patch_ident_64(insnbuf, len); else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || + type == PARAVIRT_PATCH(pv_cpu_ops.nmi_return) || type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) @@ -204,6 +205,7 @@ static void native_flush_tlb_single(unsigned long addr) /* These are in entry.S */ extern void native_iret(void); +extern void native_nmi_return(void); extern void native_irq_enable_sysexit(void); extern void native_usergs_sysret32(void); extern void native_usergs_sysret64(void); @@ -373,6 +375,7 @@ struct pv_cpu_ops pv_cpu_ops = { .usergs_sysret64 = native_usergs_sysret64, #endif .iret = native_iret, + .nmi_return = native_nmi_return, .swapgs = native_swapgs, .set_iopl_mask = native_set_iopl_mask, diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index d9f32e6d6ab..ac372778bbc 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -1,10 +1,13 @@ -#include <asm/paravirt.h> +#include <linux/stringify.h> +#include <linux/irqflags.h> DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); DEF_NATIVE(pv_cpu_ops, iret, "iret"); +DEF_NATIVE(pv_cpu_ops, nmi_return, + __stringify(NATIVE_INTERRUPT_RETURN_NMI_SAFE)); DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit"); DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); @@ -41,6 +44,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, restore_fl); PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_cpu_ops, iret); + PATCH_SITE(pv_cpu_ops, nmi_return); PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_mmu_ops, read_cr2); PATCH_SITE(pv_mmu_ops, read_cr3); diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 3f08f34f93e..5339e67dc15 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -1,12 +1,15 @@ +#include <linux/irqflags.h> +#include <linux/stringify.h> #include <asm/paravirt.h> #include <asm/asm-offsets.h> -#include <linux/stringify.h> DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq"); DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); DEF_NATIVE(pv_cpu_ops, iret, "iretq"); +DEF_NATIVE(pv_cpu_ops, nmi_return, + __stringify(NATIVE_INTERRUPT_RETURN_NMI_SAFE)); DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); @@ -51,6 +54,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, irq_disable); PATCH_SITE(pv_cpu_ops, iret); + PATCH_SITE(pv_cpu_ops, nmi_return); PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_cpu_ops, usergs_sysret32); PATCH_SITE(pv_cpu_ops, usergs_sysret64); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index eba687f0cc0..07f7a272226 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1276,6 +1276,7 @@ __init void lguest_init(void) pv_cpu_ops.cpuid = lguest_cpuid; pv_cpu_ops.load_idt = lguest_load_idt; pv_cpu_ops.iret = lguest_iret; + pv_cpu_ops.nmi_return = lguest_iret; pv_cpu_ops.load_sp0 = lguest_load_sp0; pv_cpu_ops.load_tr_desc = lguest_load_tr_desc; pv_cpu_ops.set_ldt = lguest_set_ldt; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 50542efe45f..e3839c74ec4 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -974,6 +974,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .read_pmc = native_read_pmc, .iret = xen_iret, + .nmi_return = xen_iret, .irq_enable_sysexit = xen_sysexit, #ifdef CONFIG_X86_64 .usergs_sysret32 = xen_sysret32, |