diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/Makefile | 4 | ||||
-rw-r--r-- | arch/x86/kernel/apic/apic.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/apm_32.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/asm-offsets_32.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/asm-offsets_64.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/entry_32.S | 30 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 45 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt_patch_32.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt_patch_64.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 43 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 34 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/syscall_64.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/trace-clock.c | 302 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 80 | ||||
-rw-r--r-- | arch/x86/kernel/tsc_sync.c | 198 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_64.c | 14 |
22 files changed, 599 insertions, 230 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 34244b2cd88..717cf9c620b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -46,6 +46,7 @@ obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o obj-y += tsc.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o +obj-$(CONFIG_HAVE_TRACE_CLOCK) += trace-clock.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o @@ -66,9 +67,8 @@ obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o +obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += setup_percpu.o -obj-$(CONFIG_X86_64_SMP) += tsc_sync.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-y += apic/ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 76b96d74978..c604d23b4f3 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -33,6 +33,7 @@ #include <linux/dmi.h> #include <linux/smp.h> #include <linux/mm.h> +#include <trace/irq.h> #include <asm/perf_event.h> #include <asm/x86_init.h> @@ -868,7 +869,9 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) */ exit_idle(); irq_enter(); + trace_irq_entry(LOCAL_TIMER_VECTOR, regs, NULL); local_apic_timer_interrupt(); + trace_irq_exit(IRQ_HANDLED); irq_exit(); set_irq_regs(old_regs); @@ -1788,6 +1791,7 @@ void smp_spurious_interrupt(struct pt_regs *regs) exit_idle(); irq_enter(); + trace_irq_entry(SPURIOUS_APIC_VECTOR, NULL, NULL); /* * Check if this really is a spurious interrupt and ACK it * if it is a vectored one. Just in case... @@ -1802,6 +1806,7 @@ void smp_spurious_interrupt(struct pt_regs *regs) /* see sw-dev-man vol 3, chapter 7.4.13.5 */ pr_info("spurious APIC interrupt on CPU#%d, " "should never happen.\n", smp_processor_id()); + trace_irq_exit(IRQ_HANDLED); irq_exit(); } @@ -1814,6 +1819,7 @@ void smp_error_interrupt(struct pt_regs *regs) exit_idle(); irq_enter(); + trace_irq_entry(ERROR_APIC_VECTOR, NULL, NULL); /* First tickle the hardware, only then report what went on. -- REW */ v = apic_read(APIC_ESR); apic_write(APIC_ESR, 0); @@ -1834,6 +1840,7 @@ void smp_error_interrupt(struct pt_regs *regs) */ pr_debug("APIC error on CPU%d: %02x(%02x)\n", smp_processor_id(), v , v1); + trace_irq_exit(IRQ_HANDLED); irq_exit(); } diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 0e4f24c2a74..60939d5f226 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -227,6 +227,7 @@ #include <linux/suspend.h> #include <linux/kthread.h> #include <linux/jiffies.h> +#include <linux/idle.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -235,6 +236,7 @@ #include <asm/olpc.h> #include <asm/paravirt.h> #include <asm/reboot.h> +#include <asm/idle.h> #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) extern int (*console_blank_hook)(int); @@ -947,10 +949,17 @@ recalc: break; } } + enter_idle(); if (original_pm_idle) original_pm_idle(); else default_idle(); + /* + * In many cases the interrupt that ended idle + * has already called exit_idle. But some idle + * loops can be woken up without interrupt. + */ + __exit_idle(); local_irq_disable(); jiffies_since_last_check = jiffies - last_jiffies; if (jiffies_since_last_check > idle_period) diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 1a4088dda37..677f8475d9d 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -111,6 +111,7 @@ void foo(void) OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_CPU_iret, pv_cpu_ops, iret); + OFFSET(PV_CPU_nmi_return, pv_cpu_ops, nmi_return); OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); #endif diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 4a6aeedcd96..1aea11cd840 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -58,6 +58,7 @@ int main(void) OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame); OFFSET(PV_CPU_iret, pv_cpu_ops, iret); + OFFSET(PV_CPU_nmi_return, pv_cpu_ops, nmi_return); OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32); OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1d59834396b..6052f6f65a6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1069,6 +1069,7 @@ unsigned long kernel_eflags; * debugging, no special alignment required. */ DEFINE_PER_CPU(struct orig_ist, orig_ist); +EXPORT_PER_CPU_SYMBOL_GPL(orig_ist); #else /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 6f8c5e9da97..c8a6411d8ba 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -23,6 +23,7 @@ #include <linux/init.h> #include <linux/smp.h> #include <linux/cpu.h> +#include <trace/irq.h> #include <asm/processor.h> #include <asm/system.h> @@ -402,8 +403,10 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) { exit_idle(); irq_enter(); + trace_irq_entry(THERMAL_APIC_VECTOR, regs, NULL); inc_irq_stat(irq_thermal_count); smp_thermal_vector(); + trace_irq_exit(IRQ_HANDLED); irq_exit(); /* Ack only at the end to avoid potential reentry */ ack_APIC_irq(); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index df20723a6a1..6bed23e1c74 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -15,6 +15,7 @@ #include <linux/bug.h> #include <linux/nmi.h> #include <linux/sysfs.h> +#include <linux/ltt-core.h> #include <asm/stacktrace.h> @@ -253,6 +254,8 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) if (!signr) return; + if (in_nmi()) + panic("Fatal exception in non-maskable interrupt"); if (in_interrupt()) panic("Fatal exception in interrupt"); if (panic_on_oops) @@ -277,6 +280,10 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) printk("DEBUG_PAGEALLOC"); #endif printk("\n"); +#ifdef CONFIG_LTT + printk(KERN_EMERG "LTT NESTING LEVEL : %u", __get_cpu_var(ltt_nesting)); + printk("\n"); +#endif sysfs_printk_last_file(); if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 9ca3b0e343e..afd6d8ef78c 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -80,6 +80,8 @@ #define nr_syscalls ((syscall_table_size)/4) +#define NMI_MASK 0x04000000 + #ifdef CONFIG_PREEMPT #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF #else @@ -321,8 +323,32 @@ END(ret_from_fork) # userspace resumption stub bypassing syscall exit tracing ALIGN RING0_PTREGS_FRAME + ret_from_exception: preempt_stop(CLBR_ANY) + GET_THREAD_INFO(%ebp) + movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS + movb PT_CS(%esp), %al + andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax + cmpl $USER_RPL, %eax + jae resume_userspace # returning to v8086 or userspace + testl $NMI_MASK,TI_preempt_count(%ebp) + jz resume_kernel /* Not nested over NMI ? */ + testw $X86_EFLAGS_TF, PT_EFLAGS(%esp) + jnz resume_kernel /* + * If single-stepping an NMI handler, + * use the normal iret path instead of + * the popf/lret because lret would be + * single-stepped. It should not + * happen : it will reactivate NMIs + * prematurely. + */ + TRACE_IRQS_IRET + RESTORE_REGS + addl $4, %esp # skip orig_eax/error_code + CFI_ADJUST_CFA_OFFSET -4 + INTERRUPT_RETURN_NMI_SAFE + ret_from_intr: GET_THREAD_INFO(%ebp) check_userspace: @@ -906,6 +932,10 @@ ENTRY(native_iret) .previous END(native_iret) +ENTRY(native_nmi_return) + NATIVE_INTERRUPT_RETURN_NMI_SAFE # Should we deal with popf exception ? +END(native_nmi_return) + ENTRY(native_irq_enable_sysexit) sti sysexit diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index bbd5c80cb09..7800ff65aab 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -163,6 +163,8 @@ GLOBAL(return_to_handler) #endif +#define NMI_MASK 0x04000000 + #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args #endif @@ -515,6 +517,8 @@ sysret_check: /* Handle reschedules */ /* edx: work, edi: workmask */ sysret_careful: + testl $_TIF_KERNEL_TRACE,%edx /* Re-read : concurrently changed */ + jnz ret_from_sys_call_trace bt $TIF_NEED_RESCHED,%edx jnc sysret_signal TRACE_IRQS_ON @@ -524,6 +528,16 @@ sysret_careful: popq_cfi %rdi jmp sysret_check +ret_from_sys_call_trace: + TRACE_IRQS_ON + sti + SAVE_REST + FIXUP_TOP_OF_STACK %rdi + movq %rsp,%rdi + LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST + jmp int_ret_from_sys_call + /* Handle a signal */ sysret_signal: TRACE_IRQS_ON @@ -872,6 +886,9 @@ ENTRY(native_iret) .section __ex_table,"a" .quad native_iret, bad_iret .previous + +ENTRY(native_nmi_return) + NATIVE_INTERRUPT_RETURN_NMI_SAFE #endif .section .fixup,"ax" @@ -924,6 +941,24 @@ retint_signal: GET_THREAD_INFO(%rcx) jmp retint_with_reschedule + /* Returning to kernel space from exception. */ + /* rcx: threadinfo. interrupts off. */ +ENTRY(retexc_kernel) + testl $NMI_MASK,TI_preempt_count(%rcx) + jz retint_kernel /* Not nested over NMI ? */ + testw $X86_EFLAGS_TF,EFLAGS-ARGOFFSET(%rsp) /* trap flag? */ + jnz retint_kernel /* + * If single-stepping an NMI handler, + * use the normal iret path instead of + * the popf/lret because lret would be + * single-stepped. It should not + * happen : it will reactivate NMIs + * prematurely. + */ + RESTORE_ARGS 0,8,0 + TRACE_IRQS_IRETQ + INTERRUPT_RETURN_NMI_SAFE + #ifdef CONFIG_PREEMPT /* Returning to kernel space. Check if we need preemption */ /* rcx: threadinfo. interrupts off. */ @@ -1361,12 +1396,18 @@ ENTRY(paranoid_exit) paranoid_swapgs: TRACE_IRQS_IRETQ 0 SWAPGS_UNSAFE_STACK +paranoid_restore_no_nmi: RESTORE_ALL 8 jmp irq_return paranoid_restore: + GET_THREAD_INFO(%rcx) TRACE_IRQS_IRETQ 0 + testl $NMI_MASK,TI_preempt_count(%rcx) + jz paranoid_restore_no_nmi /* Nested over NMI ? */ + testw $X86_EFLAGS_TF,EFLAGS-0(%rsp) /* trap flag? */ + jnz paranoid_restore_no_nmi RESTORE_ALL 8 - jmp irq_return + INTERRUPT_RETURN_NMI_SAFE paranoid_userspace: GET_THREAD_INFO(%rcx) movl TI_flags(%rcx),%ebx @@ -1465,7 +1506,7 @@ ENTRY(error_exit) TRACE_IRQS_OFF GET_THREAD_INFO(%rcx) testl %eax,%eax - jne retint_kernel + jne retexc_kernel LOCKDEP_SYS_EXIT_IRQ movl TI_flags(%rcx),%edx movl $_TIF_WORK_MASK,%edi diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 869e1aeeb71..1fc5da98373 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -156,6 +156,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, ret = paravirt_patch_ident_64(insnbuf, len); else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || + type == PARAVIRT_PATCH(pv_cpu_ops.nmi_return) || type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) @@ -204,6 +205,7 @@ static void native_flush_tlb_single(unsigned long addr) /* These are in entry.S */ extern void native_iret(void); +extern void native_nmi_return(void); extern void native_irq_enable_sysexit(void); extern void native_usergs_sysret32(void); extern void native_usergs_sysret64(void); @@ -373,6 +375,7 @@ struct pv_cpu_ops pv_cpu_ops = { .usergs_sysret64 = native_usergs_sysret64, #endif .iret = native_iret, + .nmi_return = native_nmi_return, .swapgs = native_swapgs, .set_iopl_mask = native_set_iopl_mask, diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index d9f32e6d6ab..ac372778bbc 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -1,10 +1,13 @@ -#include <asm/paravirt.h> +#include <linux/stringify.h> +#include <linux/irqflags.h> DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); DEF_NATIVE(pv_cpu_ops, iret, "iret"); +DEF_NATIVE(pv_cpu_ops, nmi_return, + __stringify(NATIVE_INTERRUPT_RETURN_NMI_SAFE)); DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit"); DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); @@ -41,6 +44,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, restore_fl); PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_cpu_ops, iret); + PATCH_SITE(pv_cpu_ops, nmi_return); PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_mmu_ops, read_cr2); PATCH_SITE(pv_mmu_ops, read_cr3); diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 3f08f34f93e..5339e67dc15 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -1,12 +1,15 @@ +#include <linux/irqflags.h> +#include <linux/stringify.h> #include <asm/paravirt.h> #include <asm/asm-offsets.h> -#include <linux/stringify.h> DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq"); DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); DEF_NATIVE(pv_cpu_ops, iret, "iretq"); +DEF_NATIVE(pv_cpu_ops, nmi_return, + __stringify(NATIVE_INTERRUPT_RETURN_NMI_SAFE)); DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); @@ -51,6 +54,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, irq_disable); PATCH_SITE(pv_cpu_ops, iret); + PATCH_SITE(pv_cpu_ops, nmi_return); PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_cpu_ops, usergs_sysret32); PATCH_SITE(pv_cpu_ops, usergs_sysret64); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ff455419898..e0e4ffcad48 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -13,6 +13,7 @@ #include <linux/dmi.h> #include <linux/utsname.h> #include <trace/events/power.h> +#include <trace/sched.h> #include <linux/hw_breakpoint.h> #include <asm/cpu.h> #include <asm/system.h> @@ -23,6 +24,8 @@ #include <asm/i387.h> #include <asm/debugreg.h> +DEFINE_TRACE(sched_kthread_create); + struct kmem_cache *task_xstate_cachep; EXPORT_SYMBOL_GPL(task_xstate_cachep); @@ -278,6 +281,7 @@ extern void kernel_thread_helper(void); int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) { struct pt_regs regs; + long pid; memset(®s, 0, sizeof(regs)); @@ -299,7 +303,10 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) regs.flags = X86_EFLAGS_IF | 0x2; /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED, + 0, ®s, 0, NULL, NULL); + trace_sched_kthread_create(fn, pid); + return pid; } EXPORT_SYMBOL(kernel_thread); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 8d128783af4..256ba23211d 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -38,6 +38,9 @@ #include <linux/uaccess.h> #include <linux/io.h> #include <linux/kdebug.h> +#include <linux/notifier.h> +#include <linux/idle.h> +#include <trace/pm.h> #include <asm/pgtable.h> #include <asm/system.h> @@ -59,6 +62,38 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); +DEFINE_TRACE(pm_idle_exit); +DEFINE_TRACE(pm_idle_entry); + +static DEFINE_PER_CPU(unsigned char, is_idle); + +void enter_idle(void) +{ + percpu_write(is_idle, 1); + trace_pm_idle_entry(); + notify_idle(IDLE_START); +} +EXPORT_SYMBOL_GPL(enter_idle); + +void __exit_idle(void) +{ + if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) + return; + notify_idle(IDLE_END); + trace_pm_idle_exit(); +} +EXPORT_SYMBOL_GPL(__exit_idle); + +/* Called from interrupts to signify idle end */ +void exit_idle(void) +{ + /* idle loop has pid 0 */ + if (current->pid) + return; + __exit_idle(); +} +EXPORT_SYMBOL_GPL(exit_idle); + /* * Return saved PC of a blocked thread. */ @@ -107,10 +142,18 @@ void cpu_idle(void) play_dead(); local_irq_disable(); + enter_idle(); /* Don't trace irqs off for idle */ stop_critical_timings(); pm_idle(); start_critical_timings(); + + /* + * In many cases the interrupt that ended idle + * has already called exit_idle. But some idle + * loops can be woken up without interrupt. + */ + __exit_idle(); } tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index bd387e8f73b..fbde94f1447 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -35,8 +35,10 @@ #include <linux/tick.h> #include <linux/prctl.h> #include <linux/uaccess.h> +#include <linux/idle.h> #include <linux/io.h> #include <linux/ftrace.h> +#include <trace/pm.h> #include <asm/pgtable.h> #include <asm/system.h> @@ -51,37 +53,34 @@ #include <asm/syscalls.h> #include <asm/debugreg.h> +DEFINE_TRACE(pm_idle_exit); +DEFINE_TRACE(pm_idle_entry); + asmlinkage extern void ret_from_fork(void); DEFINE_PER_CPU(unsigned long, old_rsp); static DEFINE_PER_CPU(unsigned char, is_idle); -static ATOMIC_NOTIFIER_HEAD(idle_notifier); - -void idle_notifier_register(struct notifier_block *n) -{ - atomic_notifier_chain_register(&idle_notifier, n); -} -EXPORT_SYMBOL_GPL(idle_notifier_register); - -void idle_notifier_unregister(struct notifier_block *n) -{ - atomic_notifier_chain_unregister(&idle_notifier, n); -} -EXPORT_SYMBOL_GPL(idle_notifier_unregister); - void enter_idle(void) { percpu_write(is_idle, 1); - atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); + /* + * Trace last event before calling notifiers. Notifiers flush + * data from buffers before going to idle. + */ + trace_pm_idle_entry(); + notify_idle(IDLE_START); } +EXPORT_SYMBOL_GPL(enter_idle); -static void __exit_idle(void) +void __exit_idle(void) { if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) return; - atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); + notify_idle(IDLE_END); + trace_pm_idle_exit(); } +EXPORT_SYMBOL_GPL(__exit_idle); /* Called from interrupts to signify idle end */ void exit_idle(void) @@ -91,6 +90,7 @@ void exit_idle(void) return; __exit_idle(); } +EXPORT_SYMBOL_GPL(exit_idle); #ifndef CONFIG_SMP static inline void play_dead(void) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 45892dc4b72..ee3024d4f61 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -21,6 +21,7 @@ #include <linux/signal.h> #include <linux/perf_event.h> #include <linux/hw_breakpoint.h> +#include <trace/syscall.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -152,6 +153,9 @@ static const int arg_offs_table[] = { X86_EFLAGS_DF | X86_EFLAGS_OF | \ X86_EFLAGS_RF | X86_EFLAGS_AC)) +DEFINE_TRACE(syscall_entry); +DEFINE_TRACE(syscall_exit); + /* * Determines whether a value may be installed in a segment register. */ @@ -1361,6 +1365,8 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) if (test_thread_flag(TIF_SINGLESTEP)) regs->flags |= X86_EFLAGS_TF; + trace_syscall_entry(regs, regs->orig_ax); + /* do the secure computing check first */ secure_computing(regs->orig_ax); @@ -1396,6 +1402,8 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) { bool step; + trace_syscall_exit(regs->ax); + if (unlikely(current->audit_context)) audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c index de87d600829..5e74f6aa3c0 100644 --- a/arch/x86/kernel/syscall_64.c +++ b/arch/x86/kernel/syscall_64.c @@ -1,8 +1,11 @@ /* System call table for x86-64. */ #include <linux/linkage.h> +#include <linux/module.h> #include <linux/sys.h> #include <linux/cache.h> +#include <linux/marker.h> +#include <linux/kallsyms.h> #include <asm/asm-offsets.h> #define __NO_STUBS @@ -27,3 +30,18 @@ const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { [0 ... __NR_syscall_max] = &sys_ni_syscall, #include <asm/unistd_64.h> }; + +void ltt_dump_sys_call_table(void *call_data) +{ + int i; + char namebuf[KSYM_NAME_LEN]; + + for (i = 0; i < __NR_syscall_max + 1; i++) { + sprint_symbol(namebuf, (unsigned long)sys_call_table[i]); + __trace_mark(0, syscall_state, sys_call_table, + call_data, + "id %d address %p symbol %s", + i, (void*)sys_call_table[i], namebuf); + } +} +EXPORT_SYMBOL_GPL(ltt_dump_sys_call_table); diff --git a/arch/x86/kernel/trace-clock.c b/arch/x86/kernel/trace-clock.c new file mode 100644 index 00000000000..47539e28276 --- /dev/null +++ b/arch/x86/kernel/trace-clock.c @@ -0,0 +1,302 @@ +/* + * arch/x86/kernel/trace-clock.c + * + * Trace clock for x86. + * + * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>, October 2008 + */ + +#include <linux/module.h> +#include <linux/trace-clock.h> +#include <linux/jiffies.h> +#include <linux/timer.h> +#include <linux/cpu.h> +#include <linux/posix-timers.h> +#include <asm/vgtod.h> + +static cycles_t trace_clock_last_tsc; +static DEFINE_PER_CPU(struct timer_list, update_timer); +static DEFINE_SPINLOCK(async_tsc_lock); +static int async_tsc_refcount; /* Number of readers */ +static int async_tsc_enabled; /* Async TSC enabled on all online CPUs */ + +int _trace_clock_is_sync = 1; +EXPORT_SYMBOL_GPL(_trace_clock_is_sync); + +/* + * Is the trace clock being used by user-space ? We leave the trace clock active + * as soon as user-space starts using it. We never unref the trace clock + * reference taken by user-space. + */ +static atomic_t user_trace_clock_ref; + +/* + * Called by check_tsc_sync_source from CPU hotplug. + */ +void set_trace_clock_is_sync(int state) +{ + _trace_clock_is_sync = state; + update_trace_clock_is_sync_vdso(); +} + +#if BITS_PER_LONG == 64 +static cycles_t read_last_tsc(void) +{ + return trace_clock_last_tsc; +} +#else +/* + * A cmpxchg64 update can happen concurrently. Based on the assumption that + * two cmpxchg64 will never update it to the same value (the count always + * increases), reading it twice insures that we read a coherent value with the + * same "sequence number". + */ +static cycles_t read_last_tsc(void) +{ + cycles_t val1, val2; + + val1 = trace_clock_last_tsc; + for (;;) { + val2 = val1; + barrier(); + val1 = trace_clock_last_tsc; + if (likely(val1 == val2)) + break; + } + return val1; +} +#endif + +/* + * Support for architectures with non-sync TSCs. + * When the local TSC is discovered to lag behind the highest TSC counter, we + * increment the TSC count of an amount that should be, ideally, lower than the + * execution time of this routine, in cycles : this is the granularity we look + * for : we must be able to order the events. + */ +notrace cycles_t trace_clock_async_tsc_read(void) +{ + cycles_t new_tsc, last_tsc; + + WARN_ON(!async_tsc_refcount || !async_tsc_enabled); + new_tsc = get_cycles(); + last_tsc = read_last_tsc(); + do { + if (new_tsc < last_tsc) + new_tsc = last_tsc + TRACE_CLOCK_MIN_PROBE_DURATION; + /* + * If cmpxchg fails with a value higher than the new_tsc, don't + * retry : the value has been incremented and the events + * happened almost at the same time. + * We must retry if cmpxchg fails with a lower value : + * it means that we are the CPU with highest frequency and + * therefore MUST update the value. + */ + last_tsc = cmpxchg64(&trace_clock_last_tsc, last_tsc, new_tsc); + } while (unlikely(last_tsc < new_tsc)); + return new_tsc; +} +EXPORT_SYMBOL_GPL(trace_clock_async_tsc_read); + +static void update_timer_ipi(void *info) +{ + (void)trace_clock_async_tsc_read(); +} + +/* + * update_timer_fct : - Timer function to resync the clocks + * @data: unused + * + * Fires every jiffy. + */ +static void update_timer_fct(unsigned long data) +{ + (void)trace_clock_async_tsc_read(); + mod_timer_pinned(&per_cpu(update_timer, smp_processor_id()), + jiffies + 1); +} + +static void enable_trace_clock(int cpu) +{ + init_timer(&per_cpu(update_timer, cpu)); + per_cpu(update_timer, cpu).function = update_timer_fct; + per_cpu(update_timer, cpu).expires = jiffies + 1; + smp_call_function_single(cpu, update_timer_ipi, NULL, 1); + add_timer_on(&per_cpu(update_timer, cpu), cpu); +} + +static void disable_trace_clock(int cpu) +{ + del_timer_sync(&per_cpu(update_timer, cpu)); +} + +/* + * hotcpu_callback - CPU hotplug callback + * @nb: notifier block + * @action: hotplug action to take + * @hcpu: CPU number + * + * Returns the success/failure of the operation. (NOTIFY_OK, NOTIFY_BAD) + */ +static int __cpuinit hotcpu_callback(struct notifier_block *nb, + unsigned long action, + void *hcpu) +{ + unsigned int hotcpu = (unsigned long)hcpu; + int cpu; + + spin_lock(&async_tsc_lock); + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + break; + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + /* + * trace_clock_is_sync() is updated by set_trace_clock_is_sync() + * code, protected by cpu hotplug disable. + * It is ok to let the hotplugged CPU read the timebase before + * the CPU_ONLINE notification. It's just there to give a + * maximum bound to the TSC error. + */ + if (async_tsc_refcount && !trace_clock_is_sync()) { + if (!async_tsc_enabled) { + async_tsc_enabled = 1; + for_each_online_cpu(cpu) + enable_trace_clock(cpu); + } else { + enable_trace_clock(hotcpu); + } + } + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + if (!async_tsc_refcount && num_online_cpus() == 1) + set_trace_clock_is_sync(1); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + /* + * We cannot stop the trace clock on other CPUs when readers are + * active even if we go back to a synchronized state (1 CPU) + * because the CPU left could be the one lagging behind. + */ + if (async_tsc_refcount && async_tsc_enabled) + disable_trace_clock(hotcpu); + if (!async_tsc_refcount && num_online_cpus() == 1) + set_trace_clock_is_sync(1); + break; +#endif /* CONFIG_HOTPLUG_CPU */ + } + spin_unlock(&async_tsc_lock); + + return NOTIFY_OK; +} + +int get_trace_clock(void) +{ + int cpu; + + if (!trace_clock_is_sync()) { + printk(KERN_WARNING + "Trace clock falls back on cache-line bouncing\n" + "workaround due to non-synchronized TSCs.\n" + "This workaround preserves event order across CPUs.\n" + "Please consider disabling Speedstep or PowerNow and\n" + "using kernel parameters " + "\"force_tsc_sync=1 idle=poll\"\n" + "for accurate and fast tracing clock source.\n"); + } + + get_online_cpus(); + spin_lock(&async_tsc_lock); + if (async_tsc_refcount++ || trace_clock_is_sync()) + goto end; + + async_tsc_enabled = 1; + for_each_online_cpu(cpu) + enable_trace_clock(cpu); +end: + spin_unlock(&async_tsc_lock); + put_online_cpus(); + return 0; +} +EXPORT_SYMBOL_GPL(get_trace_clock); + +void put_trace_clock(void) +{ + int cpu; + + get_online_cpus(); + spin_lock(&async_tsc_lock); + WARN_ON(async_tsc_refcount <= 0); + if (async_tsc_refcount != 1 || !async_tsc_enabled) + goto end; + + for_each_online_cpu(cpu) + disable_trace_clock(cpu); + async_tsc_enabled = 0; +end: + async_tsc_refcount--; + if (!async_tsc_refcount && num_online_cpus() == 1) + set_trace_clock_is_sync(1); + spin_unlock(&async_tsc_lock); + put_online_cpus(); +} +EXPORT_SYMBOL_GPL(put_trace_clock); + +static int posix_get_trace(clockid_t which_clock, struct timespec *tp) +{ + union lttng_timespec *lts = (union lttng_timespec *) tp; + int ret; + + /* + * Yes, there is a race here that would lead to refcount being + * incremented more than once, but all we care is to leave the trace + * clock active forever, so precise accounting is not needed. + */ + if (unlikely(!atomic_read(&user_trace_clock_ref))) { + ret = get_trace_clock(); + if (ret) + return ret; + atomic_inc(&user_trace_clock_ref); + } + lts->lttng_ts = trace_clock_read64(); + return 0; +} + +static int posix_get_trace_freq(clockid_t which_clock, struct timespec *tp) +{ + union lttng_timespec *lts = (union lttng_timespec *) tp; + + lts->lttng_ts = trace_clock_frequency(); + return 0; +} + +static int posix_get_trace_res(const clockid_t which_clock, struct timespec *tp) +{ + union lttng_timespec *lts = (union lttng_timespec *) tp; + + lts->lttng_ts = TRACE_CLOCK_RES; + return 0; +} + +static __init int init_unsync_trace_clock(void) +{ + struct k_clock clock_trace = { + .clock_getres = posix_get_trace_res, + .clock_get = posix_get_trace, + }; + struct k_clock clock_trace_freq = { + .clock_getres = posix_get_trace_res, + .clock_get = posix_get_trace_freq, + }; + + register_posix_clock(CLOCK_TRACE, &clock_trace); + register_posix_clock(CLOCK_TRACE_FREQ, &clock_trace_freq); + + hotcpu_notifier(hotcpu_callback, 4); + return 0; +} +early_initcall(init_unsync_trace_clock); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b9b67166f9d..d45030679f9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -31,6 +31,7 @@ #include <linux/mm.h> #include <linux/smp.h> #include <linux/io.h> +#include <trace/trap.h> #ifdef CONFIG_EISA #include <linux/ioport.h> @@ -52,6 +53,7 @@ #include <asm/atomic.h> #include <asm/system.h> #include <asm/traps.h> +#include <asm/unistd.h> #include <asm/desc.h> #include <asm/i387.h> #include <asm/mce.h> @@ -76,11 +78,21 @@ char ignore_fpu_irq; * F0 0F bug workaround. */ gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, }; + +extern unsigned long sys_call_table[]; +extern unsigned long syscall_table_size; + #endif DECLARE_BITMAP(used_vectors, NR_VECTORS); EXPORT_SYMBOL_GPL(used_vectors); +/* + * Also used in arch/x86/mm/fault.c. + */ +DEFINE_TRACE(trap_entry); +DEFINE_TRACE(trap_exit); + static int ignore_nmis; int unknown_nmi_panic; @@ -122,6 +134,8 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, { struct task_struct *tsk = current; + trace_trap_entry(regs, trapnr); + #ifdef CONFIG_X86_32 if (regs->flags & X86_VM_MASK) { /* @@ -168,7 +182,7 @@ trap_signal: force_sig_info(signr, info, tsk); else force_sig(signr, tsk); - return; + goto end; kernel_trap: if (!fixup_exception(regs)) { @@ -176,15 +190,17 @@ kernel_trap: tsk->thread.trap_no = trapnr; die(str, regs, error_code); } - return; + goto end; #ifdef CONFIG_X86_32 vm86_trap: if (handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr)) goto trap_signal; - return; + goto end; #endif +end: + trace_trap_exit(); } #define DO_ERROR(trapnr, signr, str, name) \ @@ -285,7 +301,9 @@ do_general_protection(struct pt_regs *regs, long error_code) printk("\n"); } + trace_trap_entry(regs, 13); force_sig(SIGSEGV, tsk); + trace_trap_exit(); return; #ifdef CONFIG_X86_32 @@ -371,9 +389,11 @@ io_check_error(unsigned char reason, struct pt_regs *regs) static notrace __kprobes void unknown_nmi_error(unsigned char reason, struct pt_regs *regs) { + trace_trap_entry(regs, 2); + if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) - return; + goto end; #ifdef CONFIG_MCA /* * Might actually be able to figure out what the guilty party @@ -381,7 +401,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) */ if (MCA_bus) { mca_handle_nmi(); - return; + goto end; } #endif pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", @@ -392,19 +412,23 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) panic("NMI: Not continuing"); pr_emerg("Dazed and confused, but trying to continue\n"); +end: + trace_trap_exit(); } static notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; + trace_trap_entry(regs, 2); + /* * CPU-specific NMI must be processed before non-CPU-specific * NMI, otherwise we may lose it, because the CPU-specific * NMI can not be detected/processed on other CPUs. */ if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; + goto end; /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ raw_spin_lock(&nmi_reason_lock); @@ -423,11 +447,13 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) reassert_nmi(); #endif raw_spin_unlock(&nmi_reason_lock); - return; + goto end; } raw_spin_unlock(&nmi_reason_lock); unknown_nmi_error(reason, regs); +end: + trace_trap_exit(); } dotraplinkage notrace __kprobes void @@ -570,8 +596,10 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) preempt_conditional_sti(regs); if (regs->flags & X86_VM_MASK) { + trace_trap_entry(regs, 1); handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); + trace_trap_exit(); preempt_conditional_cli(regs); return; } @@ -589,13 +617,32 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) regs->flags &= ~X86_EFLAGS_TF; } si_code = get_si_code(tsk->thread.debugreg6); - if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) + if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) { + trace_trap_entry(regs, 1); send_sigtrap(tsk, regs, error_code, si_code); + trace_trap_exit(); + } preempt_conditional_cli(regs); return; } +#ifdef CONFIG_X86_32 +void ltt_dump_sys_call_table(void *call_data) +{ + int i; + char namebuf[KSYM_NAME_LEN]; + + for (i = 0; i < NR_syscalls; i++) { + sprint_symbol(namebuf, sys_call_table[i]); + __trace_mark(0, syscall_state, sys_call_table, call_data, + "id %d address %p symbol %s", + i, (void*)sys_call_table[i], namebuf); + } +} +EXPORT_SYMBOL_GPL(ltt_dump_sys_call_table); +#endif + /* * Note that we play around with the 'TS' bit in an attempt to get * the correct behaviour even in the presence of the asynchronous @@ -701,11 +748,13 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code) dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) { + trace_trap_entry(regs, 16); conditional_sti(regs); #if 0 /* No need to warn about this any longer. */ printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); #endif + trace_trap_exit(); } asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) @@ -738,6 +787,21 @@ void __math_state_restore(void) tsk->fpu_counter++; } +void ltt_dump_idt_table(void *call_data) +{ + int i; + char namebuf[KSYM_NAME_LEN]; + + for (i = 0; i < IDT_ENTRIES; i++) { + unsigned long address = gate_offset(idt_table[i]); + sprint_symbol(namebuf, address); + __trace_mark(0, irq_state, idt_table, call_data, + "irq %d address %p symbol %s", + i, (void *)address, namebuf); + } +} +EXPORT_SYMBOL_GPL(ltt_dump_idt_table); + /* * 'math_state_restore()' saves the current math information in the * old math state array, and gets the new ones from the current task diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c deleted file mode 100644 index 0aa5fed8b9e..00000000000 --- a/arch/x86/kernel/tsc_sync.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - * check TSC synchronization. - * - * Copyright (C) 2006, Red Hat, Inc., Ingo Molnar - * - * We check whether all boot CPUs have their TSC's synchronized, - * print a warning if not and turn off the TSC clock-source. - * - * The warp-check is point-to-point between two CPUs, the CPU - * initiating the bootup is the 'source CPU', the freshly booting - * CPU is the 'target CPU'. - * - * Only two CPUs may participate - they can enter in any order. - * ( The serial nature of the boot logic and the CPU hotplug lock - * protects against more than 2 CPUs entering this code. ) - */ -#include <linux/spinlock.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/nmi.h> -#include <asm/tsc.h> - -/* - * Entry/exit counters that make sure that both CPUs - * run the measurement code at once: - */ -static __cpuinitdata atomic_t start_count; -static __cpuinitdata atomic_t stop_count; - -/* - * We use a raw spinlock in this exceptional case, because - * we want to have the fastest, inlined, non-debug version - * of a critical section, to be able to prove TSC time-warps: - */ -static __cpuinitdata arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED; - -static __cpuinitdata cycles_t last_tsc; -static __cpuinitdata cycles_t max_warp; -static __cpuinitdata int nr_warps; - -/* - * TSC-warp measurement loop running on both CPUs: - */ -static __cpuinit void check_tsc_warp(void) -{ - cycles_t start, now, prev, end; - int i; - - rdtsc_barrier(); - start = get_cycles(); - rdtsc_barrier(); - /* - * The measurement runs for 20 msecs: - */ - end = start + tsc_khz * 20ULL; - now = start; - - for (i = 0; ; i++) { - /* - * We take the global lock, measure TSC, save the - * previous TSC that was measured (possibly on - * another CPU) and update the previous TSC timestamp. - */ - arch_spin_lock(&sync_lock); - prev = last_tsc; - rdtsc_barrier(); - now = get_cycles(); - rdtsc_barrier(); - last_tsc = now; - arch_spin_unlock(&sync_lock); - - /* - * Be nice every now and then (and also check whether - * measurement is done [we also insert a 10 million - * loops safety exit, so we dont lock up in case the - * TSC readout is totally broken]): - */ - if (unlikely(!(i & 7))) { - if (now > end || i > 10000000) - break; - cpu_relax(); - touch_nmi_watchdog(); - } - /* - * Outside the critical section we can now see whether - * we saw a time-warp of the TSC going backwards: - */ - if (unlikely(prev > now)) { - arch_spin_lock(&sync_lock); - max_warp = max(max_warp, prev - now); - nr_warps++; - arch_spin_unlock(&sync_lock); - } - } - WARN(!(now-start), - "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", - now-start, end-start); -} - -/* - * Source CPU calls into this - it waits for the freshly booted - * target CPU to arrive and then starts the measurement: - */ -void __cpuinit check_tsc_sync_source(int cpu) -{ - int cpus = 2; - - /* - * No need to check if we already know that the TSC is not - * synchronized: - */ - if (unsynchronized_tsc()) - return; - - if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { - if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) - pr_info( - "Skipped synchronization checks as TSC is reliable.\n"); - return; - } - - /* - * Reset it - in case this is a second bootup: - */ - atomic_set(&stop_count, 0); - - /* - * Wait for the target to arrive: - */ - while (atomic_read(&start_count) != cpus-1) - cpu_relax(); - /* - * Trigger the target to continue into the measurement too: - */ - atomic_inc(&start_count); - - check_tsc_warp(); - - while (atomic_read(&stop_count) != cpus-1) - cpu_relax(); - - if (nr_warps) { - pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n", - smp_processor_id(), cpu); - pr_warning("Measured %Ld cycles TSC warp between CPUs, " - "turning off TSC clock.\n", max_warp); - mark_tsc_unstable("check_tsc_sync_source failed"); - } else { - pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n", - smp_processor_id(), cpu); - } - - /* - * Reset it - just in case we boot another CPU later: - */ - atomic_set(&start_count, 0); - nr_warps = 0; - max_warp = 0; - last_tsc = 0; - - /* - * Let the target continue with the bootup: - */ - atomic_inc(&stop_count); -} - -/* - * Freshly booted CPUs call into this: - */ -void __cpuinit check_tsc_sync_target(void) -{ - int cpus = 2; - - if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) - return; - - /* - * Register this CPU's participation and wait for the - * source CPU to start the measurement: - */ - atomic_inc(&start_count); - while (atomic_read(&start_count) != cpus) - cpu_relax(); - - check_tsc_warp(); - - /* - * Ok, we are done: - */ - atomic_inc(&stop_count); - - /* - * Wait for the source CPU to print stuff: - */ - while (atomic_read(&stop_count) != cpus) - cpu_relax(); -} diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index dcbb28c4b69..df18f14c473 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -44,6 +44,8 @@ #include <asm/desc.h> #include <asm/topology.h> #include <asm/vgtod.h> +#include <asm/trace-clock.h> +#include <asm/timer.h> #define __vsyscall(nr) \ __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace @@ -61,6 +63,7 @@ struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data = { .lock = SEQLOCK_UNLOCKED, .sysctl_enabled = 1, + .trace_clock_is_sync = 1, }; void update_vsyscall_tz(void) @@ -73,6 +76,16 @@ void update_vsyscall_tz(void) write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } +void update_trace_clock_is_sync_vdso(void) +{ + unsigned long flags; + + write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + vsyscall_gtod_data.trace_clock_is_sync = _trace_clock_is_sync; + write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); +} +EXPORT_SYMBOL_GPL(update_trace_clock_is_sync_vdso); + void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, struct clocksource *clock, u32 mult) { @@ -89,6 +102,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; vsyscall_gtod_data.wall_to_monotonic = *wtm; vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); + vsyscall_gtod_data.trace_clock_is_sync = _trace_clock_is_sync; write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } |