aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/apic/apic.c7
-rw-r--r--arch/x86/kernel/apm_32.c9
-rw-r--r--arch/x86/kernel/asm-offsets_32.c1
-rw-r--r--arch/x86/kernel/asm-offsets_64.c1
-rw-r--r--arch/x86/kernel/cpu/common.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c3
-rw-r--r--arch/x86/kernel/dumpstack.c7
-rw-r--r--arch/x86/kernel/entry_32.S30
-rw-r--r--arch/x86/kernel/entry_64.S45
-rw-r--r--arch/x86/kernel/paravirt.c3
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c6
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c6
-rw-r--r--arch/x86/kernel/process.c9
-rw-r--r--arch/x86/kernel/process_32.c43
-rw-r--r--arch/x86/kernel/process_64.c34
-rw-r--r--arch/x86/kernel/ptrace.c8
-rw-r--r--arch/x86/kernel/syscall_64.c18
-rw-r--r--arch/x86/kernel/trace-clock.c302
-rw-r--r--arch/x86/kernel/traps.c80
-rw-r--r--arch/x86/kernel/tsc_sync.c198
-rw-r--r--arch/x86/kernel/vsyscall_64.c14
22 files changed, 599 insertions, 230 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 34244b2cd88..717cf9c620b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -46,6 +46,7 @@ obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
obj-y += tsc.o io_delay.o rtc.o
obj-y += pci-iommu_table.o
obj-y += resource.o
+obj-$(CONFIG_HAVE_TRACE_CLOCK) += trace-clock.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
obj-y += process.o
@@ -66,9 +67,8 @@ obj-$(CONFIG_PCI) += early-quirks.o
apm-y := apm_32.o
obj-$(CONFIG_APM) += apm.o
obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o
+obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += setup_percpu.o
-obj-$(CONFIG_X86_64_SMP) += tsc_sync.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-y += apic/
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 76b96d74978..c604d23b4f3 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -33,6 +33,7 @@
#include <linux/dmi.h>
#include <linux/smp.h>
#include <linux/mm.h>
+#include <trace/irq.h>
#include <asm/perf_event.h>
#include <asm/x86_init.h>
@@ -868,7 +869,9 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
*/
exit_idle();
irq_enter();
+ trace_irq_entry(LOCAL_TIMER_VECTOR, regs, NULL);
local_apic_timer_interrupt();
+ trace_irq_exit(IRQ_HANDLED);
irq_exit();
set_irq_regs(old_regs);
@@ -1788,6 +1791,7 @@ void smp_spurious_interrupt(struct pt_regs *regs)
exit_idle();
irq_enter();
+ trace_irq_entry(SPURIOUS_APIC_VECTOR, NULL, NULL);
/*
* Check if this really is a spurious interrupt and ACK it
* if it is a vectored one. Just in case...
@@ -1802,6 +1806,7 @@ void smp_spurious_interrupt(struct pt_regs *regs)
/* see sw-dev-man vol 3, chapter 7.4.13.5 */
pr_info("spurious APIC interrupt on CPU#%d, "
"should never happen.\n", smp_processor_id());
+ trace_irq_exit(IRQ_HANDLED);
irq_exit();
}
@@ -1814,6 +1819,7 @@ void smp_error_interrupt(struct pt_regs *regs)
exit_idle();
irq_enter();
+ trace_irq_entry(ERROR_APIC_VECTOR, NULL, NULL);
/* First tickle the hardware, only then report what went on. -- REW */
v = apic_read(APIC_ESR);
apic_write(APIC_ESR, 0);
@@ -1834,6 +1840,7 @@ void smp_error_interrupt(struct pt_regs *regs)
*/
pr_debug("APIC error on CPU%d: %02x(%02x)\n",
smp_processor_id(), v , v1);
+ trace_irq_exit(IRQ_HANDLED);
irq_exit();
}
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 0e4f24c2a74..60939d5f226 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -227,6 +227,7 @@
#include <linux/suspend.h>
#include <linux/kthread.h>
#include <linux/jiffies.h>
+#include <linux/idle.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -235,6 +236,7 @@
#include <asm/olpc.h>
#include <asm/paravirt.h>
#include <asm/reboot.h>
+#include <asm/idle.h>
#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
extern int (*console_blank_hook)(int);
@@ -947,10 +949,17 @@ recalc:
break;
}
}
+ enter_idle();
if (original_pm_idle)
original_pm_idle();
else
default_idle();
+ /*
+ * In many cases the interrupt that ended idle
+ * has already called exit_idle. But some idle
+ * loops can be woken up without interrupt.
+ */
+ __exit_idle();
local_irq_disable();
jiffies_since_last_check = jiffies - last_jiffies;
if (jiffies_since_last_check > idle_period)
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 1a4088dda37..677f8475d9d 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -111,6 +111,7 @@ void foo(void)
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+ OFFSET(PV_CPU_nmi_return, pv_cpu_ops, nmi_return);
OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
#endif
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 4a6aeedcd96..1aea11cd840 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -58,6 +58,7 @@ int main(void)
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+ OFFSET(PV_CPU_nmi_return, pv_cpu_ops, nmi_return);
OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1d59834396b..6052f6f65a6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1069,6 +1069,7 @@ unsigned long kernel_eflags;
* debugging, no special alignment required.
*/
DEFINE_PER_CPU(struct orig_ist, orig_ist);
+EXPORT_PER_CPU_SYMBOL_GPL(orig_ist);
#else /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 6f8c5e9da97..c8a6411d8ba 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -23,6 +23,7 @@
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/cpu.h>
+#include <trace/irq.h>
#include <asm/processor.h>
#include <asm/system.h>
@@ -402,8 +403,10 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
{
exit_idle();
irq_enter();
+ trace_irq_entry(THERMAL_APIC_VECTOR, regs, NULL);
inc_irq_stat(irq_thermal_count);
smp_thermal_vector();
+ trace_irq_exit(IRQ_HANDLED);
irq_exit();
/* Ack only at the end to avoid potential reentry */
ack_APIC_irq();
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index df20723a6a1..6bed23e1c74 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -15,6 +15,7 @@
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/sysfs.h>
+#include <linux/ltt-core.h>
#include <asm/stacktrace.h>
@@ -253,6 +254,8 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
if (!signr)
return;
+ if (in_nmi())
+ panic("Fatal exception in non-maskable interrupt");
if (in_interrupt())
panic("Fatal exception in interrupt");
if (panic_on_oops)
@@ -277,6 +280,10 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
printk("DEBUG_PAGEALLOC");
#endif
printk("\n");
+#ifdef CONFIG_LTT
+ printk(KERN_EMERG "LTT NESTING LEVEL : %u", __get_cpu_var(ltt_nesting));
+ printk("\n");
+#endif
sysfs_printk_last_file();
if (notify_die(DIE_OOPS, str, regs, err,
current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 9ca3b0e343e..afd6d8ef78c 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -80,6 +80,8 @@
#define nr_syscalls ((syscall_table_size)/4)
+#define NMI_MASK 0x04000000
+
#ifdef CONFIG_PREEMPT
#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
@@ -321,8 +323,32 @@ END(ret_from_fork)
# userspace resumption stub bypassing syscall exit tracing
ALIGN
RING0_PTREGS_FRAME
+
ret_from_exception:
preempt_stop(CLBR_ANY)
+ GET_THREAD_INFO(%ebp)
+ movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
+ movb PT_CS(%esp), %al
+ andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
+ cmpl $USER_RPL, %eax
+ jae resume_userspace # returning to v8086 or userspace
+ testl $NMI_MASK,TI_preempt_count(%ebp)
+ jz resume_kernel /* Not nested over NMI ? */
+ testw $X86_EFLAGS_TF, PT_EFLAGS(%esp)
+ jnz resume_kernel /*
+ * If single-stepping an NMI handler,
+ * use the normal iret path instead of
+ * the popf/lret because lret would be
+ * single-stepped. It should not
+ * happen : it will reactivate NMIs
+ * prematurely.
+ */
+ TRACE_IRQS_IRET
+ RESTORE_REGS
+ addl $4, %esp # skip orig_eax/error_code
+ CFI_ADJUST_CFA_OFFSET -4
+ INTERRUPT_RETURN_NMI_SAFE
+
ret_from_intr:
GET_THREAD_INFO(%ebp)
check_userspace:
@@ -906,6 +932,10 @@ ENTRY(native_iret)
.previous
END(native_iret)
+ENTRY(native_nmi_return)
+ NATIVE_INTERRUPT_RETURN_NMI_SAFE # Should we deal with popf exception ?
+END(native_nmi_return)
+
ENTRY(native_irq_enable_sysexit)
sti
sysexit
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index bbd5c80cb09..7800ff65aab 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -163,6 +163,8 @@ GLOBAL(return_to_handler)
#endif
+#define NMI_MASK 0x04000000
+
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
#endif
@@ -515,6 +517,8 @@ sysret_check:
/* Handle reschedules */
/* edx: work, edi: workmask */
sysret_careful:
+ testl $_TIF_KERNEL_TRACE,%edx /* Re-read : concurrently changed */
+ jnz ret_from_sys_call_trace
bt $TIF_NEED_RESCHED,%edx
jnc sysret_signal
TRACE_IRQS_ON
@@ -524,6 +528,16 @@ sysret_careful:
popq_cfi %rdi
jmp sysret_check
+ret_from_sys_call_trace:
+ TRACE_IRQS_ON
+ sti
+ SAVE_REST
+ FIXUP_TOP_OF_STACK %rdi
+ movq %rsp,%rdi
+ LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
+ RESTORE_REST
+ jmp int_ret_from_sys_call
+
/* Handle a signal */
sysret_signal:
TRACE_IRQS_ON
@@ -872,6 +886,9 @@ ENTRY(native_iret)
.section __ex_table,"a"
.quad native_iret, bad_iret
.previous
+
+ENTRY(native_nmi_return)
+ NATIVE_INTERRUPT_RETURN_NMI_SAFE
#endif
.section .fixup,"ax"
@@ -924,6 +941,24 @@ retint_signal:
GET_THREAD_INFO(%rcx)
jmp retint_with_reschedule
+ /* Returning to kernel space from exception. */
+ /* rcx: threadinfo. interrupts off. */
+ENTRY(retexc_kernel)
+ testl $NMI_MASK,TI_preempt_count(%rcx)
+ jz retint_kernel /* Not nested over NMI ? */
+ testw $X86_EFLAGS_TF,EFLAGS-ARGOFFSET(%rsp) /* trap flag? */
+ jnz retint_kernel /*
+ * If single-stepping an NMI handler,
+ * use the normal iret path instead of
+ * the popf/lret because lret would be
+ * single-stepped. It should not
+ * happen : it will reactivate NMIs
+ * prematurely.
+ */
+ RESTORE_ARGS 0,8,0
+ TRACE_IRQS_IRETQ
+ INTERRUPT_RETURN_NMI_SAFE
+
#ifdef CONFIG_PREEMPT
/* Returning to kernel space. Check if we need preemption */
/* rcx: threadinfo. interrupts off. */
@@ -1361,12 +1396,18 @@ ENTRY(paranoid_exit)
paranoid_swapgs:
TRACE_IRQS_IRETQ 0
SWAPGS_UNSAFE_STACK
+paranoid_restore_no_nmi:
RESTORE_ALL 8
jmp irq_return
paranoid_restore:
+ GET_THREAD_INFO(%rcx)
TRACE_IRQS_IRETQ 0
+ testl $NMI_MASK,TI_preempt_count(%rcx)
+ jz paranoid_restore_no_nmi /* Nested over NMI ? */
+ testw $X86_EFLAGS_TF,EFLAGS-0(%rsp) /* trap flag? */
+ jnz paranoid_restore_no_nmi
RESTORE_ALL 8
- jmp irq_return
+ INTERRUPT_RETURN_NMI_SAFE
paranoid_userspace:
GET_THREAD_INFO(%rcx)
movl TI_flags(%rcx),%ebx
@@ -1465,7 +1506,7 @@ ENTRY(error_exit)
TRACE_IRQS_OFF
GET_THREAD_INFO(%rcx)
testl %eax,%eax
- jne retint_kernel
+ jne retexc_kernel
LOCKDEP_SYS_EXIT_IRQ
movl TI_flags(%rcx),%edx
movl $_TIF_WORK_MASK,%edi
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 869e1aeeb71..1fc5da98373 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -156,6 +156,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
ret = paravirt_patch_ident_64(insnbuf, len);
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
+ type == PARAVIRT_PATCH(pv_cpu_ops.nmi_return) ||
type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
@@ -204,6 +205,7 @@ static void native_flush_tlb_single(unsigned long addr)
/* These are in entry.S */
extern void native_iret(void);
+extern void native_nmi_return(void);
extern void native_irq_enable_sysexit(void);
extern void native_usergs_sysret32(void);
extern void native_usergs_sysret64(void);
@@ -373,6 +375,7 @@ struct pv_cpu_ops pv_cpu_ops = {
.usergs_sysret64 = native_usergs_sysret64,
#endif
.iret = native_iret,
+ .nmi_return = native_nmi_return,
.swapgs = native_swapgs,
.set_iopl_mask = native_set_iopl_mask,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index d9f32e6d6ab..ac372778bbc 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -1,10 +1,13 @@
-#include <asm/paravirt.h>
+#include <linux/stringify.h>
+#include <linux/irqflags.h>
DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
DEF_NATIVE(pv_cpu_ops, iret, "iret");
+DEF_NATIVE(pv_cpu_ops, nmi_return,
+ __stringify(NATIVE_INTERRUPT_RETURN_NMI_SAFE));
DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
@@ -41,6 +44,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, restore_fl);
PATCH_SITE(pv_irq_ops, save_fl);
PATCH_SITE(pv_cpu_ops, iret);
+ PATCH_SITE(pv_cpu_ops, nmi_return);
PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
PATCH_SITE(pv_mmu_ops, read_cr2);
PATCH_SITE(pv_mmu_ops, read_cr3);
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 3f08f34f93e..5339e67dc15 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -1,12 +1,15 @@
+#include <linux/irqflags.h>
+#include <linux/stringify.h>
#include <asm/paravirt.h>
#include <asm/asm-offsets.h>
-#include <linux/stringify.h>
DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq");
DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
DEF_NATIVE(pv_cpu_ops, iret, "iretq");
+DEF_NATIVE(pv_cpu_ops, nmi_return,
+ __stringify(NATIVE_INTERRUPT_RETURN_NMI_SAFE));
DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
@@ -51,6 +54,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, irq_enable);
PATCH_SITE(pv_irq_ops, irq_disable);
PATCH_SITE(pv_cpu_ops, iret);
+ PATCH_SITE(pv_cpu_ops, nmi_return);
PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
PATCH_SITE(pv_cpu_ops, usergs_sysret32);
PATCH_SITE(pv_cpu_ops, usergs_sysret64);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ff455419898..e0e4ffcad48 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -13,6 +13,7 @@
#include <linux/dmi.h>
#include <linux/utsname.h>
#include <trace/events/power.h>
+#include <trace/sched.h>
#include <linux/hw_breakpoint.h>
#include <asm/cpu.h>
#include <asm/system.h>
@@ -23,6 +24,8 @@
#include <asm/i387.h>
#include <asm/debugreg.h>
+DEFINE_TRACE(sched_kthread_create);
+
struct kmem_cache *task_xstate_cachep;
EXPORT_SYMBOL_GPL(task_xstate_cachep);
@@ -278,6 +281,7 @@ extern void kernel_thread_helper(void);
int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
struct pt_regs regs;
+ long pid;
memset(&regs, 0, sizeof(regs));
@@ -299,7 +303,10 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
regs.flags = X86_EFLAGS_IF | 0x2;
/* Ok, create the new process.. */
- return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
+ pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED,
+ 0, &regs, 0, NULL, NULL);
+ trace_sched_kthread_create(fn, pid);
+ return pid;
}
EXPORT_SYMBOL(kernel_thread);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8d128783af4..256ba23211d 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -38,6 +38,9 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/kdebug.h>
+#include <linux/notifier.h>
+#include <linux/idle.h>
+#include <trace/pm.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -59,6 +62,38 @@
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+DEFINE_TRACE(pm_idle_exit);
+DEFINE_TRACE(pm_idle_entry);
+
+static DEFINE_PER_CPU(unsigned char, is_idle);
+
+void enter_idle(void)
+{
+ percpu_write(is_idle, 1);
+ trace_pm_idle_entry();
+ notify_idle(IDLE_START);
+}
+EXPORT_SYMBOL_GPL(enter_idle);
+
+void __exit_idle(void)
+{
+ if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
+ return;
+ notify_idle(IDLE_END);
+ trace_pm_idle_exit();
+}
+EXPORT_SYMBOL_GPL(__exit_idle);
+
+/* Called from interrupts to signify idle end */
+void exit_idle(void)
+{
+ /* idle loop has pid 0 */
+ if (current->pid)
+ return;
+ __exit_idle();
+}
+EXPORT_SYMBOL_GPL(exit_idle);
+
/*
* Return saved PC of a blocked thread.
*/
@@ -107,10 +142,18 @@ void cpu_idle(void)
play_dead();
local_irq_disable();
+ enter_idle();
/* Don't trace irqs off for idle */
stop_critical_timings();
pm_idle();
start_critical_timings();
+
+ /*
+ * In many cases the interrupt that ended idle
+ * has already called exit_idle. But some idle
+ * loops can be woken up without interrupt.
+ */
+ __exit_idle();
}
tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index bd387e8f73b..fbde94f1447 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -35,8 +35,10 @@
#include <linux/tick.h>
#include <linux/prctl.h>
#include <linux/uaccess.h>
+#include <linux/idle.h>
#include <linux/io.h>
#include <linux/ftrace.h>
+#include <trace/pm.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -51,37 +53,34 @@
#include <asm/syscalls.h>
#include <asm/debugreg.h>
+DEFINE_TRACE(pm_idle_exit);
+DEFINE_TRACE(pm_idle_entry);
+
asmlinkage extern void ret_from_fork(void);
DEFINE_PER_CPU(unsigned long, old_rsp);
static DEFINE_PER_CPU(unsigned char, is_idle);
-static ATOMIC_NOTIFIER_HEAD(idle_notifier);
-
-void idle_notifier_register(struct notifier_block *n)
-{
- atomic_notifier_chain_register(&idle_notifier, n);
-}
-EXPORT_SYMBOL_GPL(idle_notifier_register);
-
-void idle_notifier_unregister(struct notifier_block *n)
-{
- atomic_notifier_chain_unregister(&idle_notifier, n);
-}
-EXPORT_SYMBOL_GPL(idle_notifier_unregister);
-
void enter_idle(void)
{
percpu_write(is_idle, 1);
- atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
+ /*
+ * Trace last event before calling notifiers. Notifiers flush
+ * data from buffers before going to idle.
+ */
+ trace_pm_idle_entry();
+ notify_idle(IDLE_START);
}
+EXPORT_SYMBOL_GPL(enter_idle);
-static void __exit_idle(void)
+void __exit_idle(void)
{
if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
return;
- atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
+ notify_idle(IDLE_END);
+ trace_pm_idle_exit();
}
+EXPORT_SYMBOL_GPL(__exit_idle);
/* Called from interrupts to signify idle end */
void exit_idle(void)
@@ -91,6 +90,7 @@ void exit_idle(void)
return;
__exit_idle();
}
+EXPORT_SYMBOL_GPL(exit_idle);
#ifndef CONFIG_SMP
static inline void play_dead(void)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 45892dc4b72..ee3024d4f61 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -21,6 +21,7 @@
#include <linux/signal.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
+#include <trace/syscall.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -152,6 +153,9 @@ static const int arg_offs_table[] = {
X86_EFLAGS_DF | X86_EFLAGS_OF | \
X86_EFLAGS_RF | X86_EFLAGS_AC))
+DEFINE_TRACE(syscall_entry);
+DEFINE_TRACE(syscall_exit);
+
/*
* Determines whether a value may be installed in a segment register.
*/
@@ -1361,6 +1365,8 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
if (test_thread_flag(TIF_SINGLESTEP))
regs->flags |= X86_EFLAGS_TF;
+ trace_syscall_entry(regs, regs->orig_ax);
+
/* do the secure computing check first */
secure_computing(regs->orig_ax);
@@ -1396,6 +1402,8 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
{
bool step;
+ trace_syscall_exit(regs->ax);
+
if (unlikely(current->audit_context))
audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
index de87d600829..5e74f6aa3c0 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/kernel/syscall_64.c
@@ -1,8 +1,11 @@
/* System call table for x86-64. */
#include <linux/linkage.h>
+#include <linux/module.h>
#include <linux/sys.h>
#include <linux/cache.h>
+#include <linux/marker.h>
+#include <linux/kallsyms.h>
#include <asm/asm-offsets.h>
#define __NO_STUBS
@@ -27,3 +30,18 @@ const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
[0 ... __NR_syscall_max] = &sys_ni_syscall,
#include <asm/unistd_64.h>
};
+
+void ltt_dump_sys_call_table(void *call_data)
+{
+ int i;
+ char namebuf[KSYM_NAME_LEN];
+
+ for (i = 0; i < __NR_syscall_max + 1; i++) {
+ sprint_symbol(namebuf, (unsigned long)sys_call_table[i]);
+ __trace_mark(0, syscall_state, sys_call_table,
+ call_data,
+ "id %d address %p symbol %s",
+ i, (void*)sys_call_table[i], namebuf);
+ }
+}
+EXPORT_SYMBOL_GPL(ltt_dump_sys_call_table);
diff --git a/arch/x86/kernel/trace-clock.c b/arch/x86/kernel/trace-clock.c
new file mode 100644
index 00000000000..47539e28276
--- /dev/null
+++ b/arch/x86/kernel/trace-clock.c
@@ -0,0 +1,302 @@
+/*
+ * arch/x86/kernel/trace-clock.c
+ *
+ * Trace clock for x86.
+ *
+ * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>, October 2008
+ */
+
+#include <linux/module.h>
+#include <linux/trace-clock.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/cpu.h>
+#include <linux/posix-timers.h>
+#include <asm/vgtod.h>
+
+static cycles_t trace_clock_last_tsc;
+static DEFINE_PER_CPU(struct timer_list, update_timer);
+static DEFINE_SPINLOCK(async_tsc_lock);
+static int async_tsc_refcount; /* Number of readers */
+static int async_tsc_enabled; /* Async TSC enabled on all online CPUs */
+
+int _trace_clock_is_sync = 1;
+EXPORT_SYMBOL_GPL(_trace_clock_is_sync);
+
+/*
+ * Is the trace clock being used by user-space ? We leave the trace clock active
+ * as soon as user-space starts using it. We never unref the trace clock
+ * reference taken by user-space.
+ */
+static atomic_t user_trace_clock_ref;
+
+/*
+ * Called by check_tsc_sync_source from CPU hotplug.
+ */
+void set_trace_clock_is_sync(int state)
+{
+ _trace_clock_is_sync = state;
+ update_trace_clock_is_sync_vdso();
+}
+
+#if BITS_PER_LONG == 64
+static cycles_t read_last_tsc(void)
+{
+ return trace_clock_last_tsc;
+}
+#else
+/*
+ * A cmpxchg64 update can happen concurrently. Based on the assumption that
+ * two cmpxchg64 will never update it to the same value (the count always
+ * increases), reading it twice insures that we read a coherent value with the
+ * same "sequence number".
+ */
+static cycles_t read_last_tsc(void)
+{
+ cycles_t val1, val2;
+
+ val1 = trace_clock_last_tsc;
+ for (;;) {
+ val2 = val1;
+ barrier();
+ val1 = trace_clock_last_tsc;
+ if (likely(val1 == val2))
+ break;
+ }
+ return val1;
+}
+#endif
+
+/*
+ * Support for architectures with non-sync TSCs.
+ * When the local TSC is discovered to lag behind the highest TSC counter, we
+ * increment the TSC count of an amount that should be, ideally, lower than the
+ * execution time of this routine, in cycles : this is the granularity we look
+ * for : we must be able to order the events.
+ */
+notrace cycles_t trace_clock_async_tsc_read(void)
+{
+ cycles_t new_tsc, last_tsc;
+
+ WARN_ON(!async_tsc_refcount || !async_tsc_enabled);
+ new_tsc = get_cycles();
+ last_tsc = read_last_tsc();
+ do {
+ if (new_tsc < last_tsc)
+ new_tsc = last_tsc + TRACE_CLOCK_MIN_PROBE_DURATION;
+ /*
+ * If cmpxchg fails with a value higher than the new_tsc, don't
+ * retry : the value has been incremented and the events
+ * happened almost at the same time.
+ * We must retry if cmpxchg fails with a lower value :
+ * it means that we are the CPU with highest frequency and
+ * therefore MUST update the value.
+ */
+ last_tsc = cmpxchg64(&trace_clock_last_tsc, last_tsc, new_tsc);
+ } while (unlikely(last_tsc < new_tsc));
+ return new_tsc;
+}
+EXPORT_SYMBOL_GPL(trace_clock_async_tsc_read);
+
+static void update_timer_ipi(void *info)
+{
+ (void)trace_clock_async_tsc_read();
+}
+
+/*
+ * update_timer_fct : - Timer function to resync the clocks
+ * @data: unused
+ *
+ * Fires every jiffy.
+ */
+static void update_timer_fct(unsigned long data)
+{
+ (void)trace_clock_async_tsc_read();
+ mod_timer_pinned(&per_cpu(update_timer, smp_processor_id()),
+ jiffies + 1);
+}
+
+static void enable_trace_clock(int cpu)
+{
+ init_timer(&per_cpu(update_timer, cpu));
+ per_cpu(update_timer, cpu).function = update_timer_fct;
+ per_cpu(update_timer, cpu).expires = jiffies + 1;
+ smp_call_function_single(cpu, update_timer_ipi, NULL, 1);
+ add_timer_on(&per_cpu(update_timer, cpu), cpu);
+}
+
+static void disable_trace_clock(int cpu)
+{
+ del_timer_sync(&per_cpu(update_timer, cpu));
+}
+
+/*
+ * hotcpu_callback - CPU hotplug callback
+ * @nb: notifier block
+ * @action: hotplug action to take
+ * @hcpu: CPU number
+ *
+ * Returns the success/failure of the operation. (NOTIFY_OK, NOTIFY_BAD)
+ */
+static int __cpuinit hotcpu_callback(struct notifier_block *nb,
+ unsigned long action,
+ void *hcpu)
+{
+ unsigned int hotcpu = (unsigned long)hcpu;
+ int cpu;
+
+ spin_lock(&async_tsc_lock);
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ break;
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ /*
+ * trace_clock_is_sync() is updated by set_trace_clock_is_sync()
+ * code, protected by cpu hotplug disable.
+ * It is ok to let the hotplugged CPU read the timebase before
+ * the CPU_ONLINE notification. It's just there to give a
+ * maximum bound to the TSC error.
+ */
+ if (async_tsc_refcount && !trace_clock_is_sync()) {
+ if (!async_tsc_enabled) {
+ async_tsc_enabled = 1;
+ for_each_online_cpu(cpu)
+ enable_trace_clock(cpu);
+ } else {
+ enable_trace_clock(hotcpu);
+ }
+ }
+ break;
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
+ if (!async_tsc_refcount && num_online_cpus() == 1)
+ set_trace_clock_is_sync(1);
+ break;
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ /*
+ * We cannot stop the trace clock on other CPUs when readers are
+ * active even if we go back to a synchronized state (1 CPU)
+ * because the CPU left could be the one lagging behind.
+ */
+ if (async_tsc_refcount && async_tsc_enabled)
+ disable_trace_clock(hotcpu);
+ if (!async_tsc_refcount && num_online_cpus() == 1)
+ set_trace_clock_is_sync(1);
+ break;
+#endif /* CONFIG_HOTPLUG_CPU */
+ }
+ spin_unlock(&async_tsc_lock);
+
+ return NOTIFY_OK;
+}
+
+int get_trace_clock(void)
+{
+ int cpu;
+
+ if (!trace_clock_is_sync()) {
+ printk(KERN_WARNING
+ "Trace clock falls back on cache-line bouncing\n"
+ "workaround due to non-synchronized TSCs.\n"
+ "This workaround preserves event order across CPUs.\n"
+ "Please consider disabling Speedstep or PowerNow and\n"
+ "using kernel parameters "
+ "\"force_tsc_sync=1 idle=poll\"\n"
+ "for accurate and fast tracing clock source.\n");
+ }
+
+ get_online_cpus();
+ spin_lock(&async_tsc_lock);
+ if (async_tsc_refcount++ || trace_clock_is_sync())
+ goto end;
+
+ async_tsc_enabled = 1;
+ for_each_online_cpu(cpu)
+ enable_trace_clock(cpu);
+end:
+ spin_unlock(&async_tsc_lock);
+ put_online_cpus();
+ return 0;
+}
+EXPORT_SYMBOL_GPL(get_trace_clock);
+
+void put_trace_clock(void)
+{
+ int cpu;
+
+ get_online_cpus();
+ spin_lock(&async_tsc_lock);
+ WARN_ON(async_tsc_refcount <= 0);
+ if (async_tsc_refcount != 1 || !async_tsc_enabled)
+ goto end;
+
+ for_each_online_cpu(cpu)
+ disable_trace_clock(cpu);
+ async_tsc_enabled = 0;
+end:
+ async_tsc_refcount--;
+ if (!async_tsc_refcount && num_online_cpus() == 1)
+ set_trace_clock_is_sync(1);
+ spin_unlock(&async_tsc_lock);
+ put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(put_trace_clock);
+
+static int posix_get_trace(clockid_t which_clock, struct timespec *tp)
+{
+ union lttng_timespec *lts = (union lttng_timespec *) tp;
+ int ret;
+
+ /*
+ * Yes, there is a race here that would lead to refcount being
+ * incremented more than once, but all we care is to leave the trace
+ * clock active forever, so precise accounting is not needed.
+ */
+ if (unlikely(!atomic_read(&user_trace_clock_ref))) {
+ ret = get_trace_clock();
+ if (ret)
+ return ret;
+ atomic_inc(&user_trace_clock_ref);
+ }
+ lts->lttng_ts = trace_clock_read64();
+ return 0;
+}
+
+static int posix_get_trace_freq(clockid_t which_clock, struct timespec *tp)
+{
+ union lttng_timespec *lts = (union lttng_timespec *) tp;
+
+ lts->lttng_ts = trace_clock_frequency();
+ return 0;
+}
+
+static int posix_get_trace_res(const clockid_t which_clock, struct timespec *tp)
+{
+ union lttng_timespec *lts = (union lttng_timespec *) tp;
+
+ lts->lttng_ts = TRACE_CLOCK_RES;
+ return 0;
+}
+
+static __init int init_unsync_trace_clock(void)
+{
+ struct k_clock clock_trace = {
+ .clock_getres = posix_get_trace_res,
+ .clock_get = posix_get_trace,
+ };
+ struct k_clock clock_trace_freq = {
+ .clock_getres = posix_get_trace_res,
+ .clock_get = posix_get_trace_freq,
+ };
+
+ register_posix_clock(CLOCK_TRACE, &clock_trace);
+ register_posix_clock(CLOCK_TRACE_FREQ, &clock_trace_freq);
+
+ hotcpu_notifier(hotcpu_callback, 4);
+ return 0;
+}
+early_initcall(init_unsync_trace_clock);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b9b67166f9d..d45030679f9 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -31,6 +31,7 @@
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/io.h>
+#include <trace/trap.h>
#ifdef CONFIG_EISA
#include <linux/ioport.h>
@@ -52,6 +53,7 @@
#include <asm/atomic.h>
#include <asm/system.h>
#include <asm/traps.h>
+#include <asm/unistd.h>
#include <asm/desc.h>
#include <asm/i387.h>
#include <asm/mce.h>
@@ -76,11 +78,21 @@ char ignore_fpu_irq;
* F0 0F bug workaround.
*/
gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
+
+extern unsigned long sys_call_table[];
+extern unsigned long syscall_table_size;
+
#endif
DECLARE_BITMAP(used_vectors, NR_VECTORS);
EXPORT_SYMBOL_GPL(used_vectors);
+/*
+ * Also used in arch/x86/mm/fault.c.
+ */
+DEFINE_TRACE(trap_entry);
+DEFINE_TRACE(trap_exit);
+
static int ignore_nmis;
int unknown_nmi_panic;
@@ -122,6 +134,8 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
{
struct task_struct *tsk = current;
+ trace_trap_entry(regs, trapnr);
+
#ifdef CONFIG_X86_32
if (regs->flags & X86_VM_MASK) {
/*
@@ -168,7 +182,7 @@ trap_signal:
force_sig_info(signr, info, tsk);
else
force_sig(signr, tsk);
- return;
+ goto end;
kernel_trap:
if (!fixup_exception(regs)) {
@@ -176,15 +190,17 @@ kernel_trap:
tsk->thread.trap_no = trapnr;
die(str, regs, error_code);
}
- return;
+ goto end;
#ifdef CONFIG_X86_32
vm86_trap:
if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
error_code, trapnr))
goto trap_signal;
- return;
+ goto end;
#endif
+end:
+ trace_trap_exit();
}
#define DO_ERROR(trapnr, signr, str, name) \
@@ -285,7 +301,9 @@ do_general_protection(struct pt_regs *regs, long error_code)
printk("\n");
}
+ trace_trap_entry(regs, 13);
force_sig(SIGSEGV, tsk);
+ trace_trap_exit();
return;
#ifdef CONFIG_X86_32
@@ -371,9 +389,11 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
static notrace __kprobes void
unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
{
+ trace_trap_entry(regs, 2);
+
if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
NOTIFY_STOP)
- return;
+ goto end;
#ifdef CONFIG_MCA
/*
* Might actually be able to figure out what the guilty party
@@ -381,7 +401,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
*/
if (MCA_bus) {
mca_handle_nmi();
- return;
+ goto end;
}
#endif
pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
@@ -392,19 +412,23 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
panic("NMI: Not continuing");
pr_emerg("Dazed and confused, but trying to continue\n");
+end:
+ trace_trap_exit();
}
static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
{
unsigned char reason = 0;
+ trace_trap_entry(regs, 2);
+
/*
* CPU-specific NMI must be processed before non-CPU-specific
* NMI, otherwise we may lose it, because the CPU-specific
* NMI can not be detected/processed on other CPUs.
*/
if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
- return;
+ goto end;
/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
raw_spin_lock(&nmi_reason_lock);
@@ -423,11 +447,13 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
reassert_nmi();
#endif
raw_spin_unlock(&nmi_reason_lock);
- return;
+ goto end;
}
raw_spin_unlock(&nmi_reason_lock);
unknown_nmi_error(reason, regs);
+end:
+ trace_trap_exit();
}
dotraplinkage notrace __kprobes void
@@ -570,8 +596,10 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
preempt_conditional_sti(regs);
if (regs->flags & X86_VM_MASK) {
+ trace_trap_entry(regs, 1);
handle_vm86_trap((struct kernel_vm86_regs *) regs,
error_code, 1);
+ trace_trap_exit();
preempt_conditional_cli(regs);
return;
}
@@ -589,13 +617,32 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
regs->flags &= ~X86_EFLAGS_TF;
}
si_code = get_si_code(tsk->thread.debugreg6);
- if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
+ if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) {
+ trace_trap_entry(regs, 1);
send_sigtrap(tsk, regs, error_code, si_code);
+ trace_trap_exit();
+ }
preempt_conditional_cli(regs);
return;
}
+#ifdef CONFIG_X86_32
+void ltt_dump_sys_call_table(void *call_data)
+{
+ int i;
+ char namebuf[KSYM_NAME_LEN];
+
+ for (i = 0; i < NR_syscalls; i++) {
+ sprint_symbol(namebuf, sys_call_table[i]);
+ __trace_mark(0, syscall_state, sys_call_table, call_data,
+ "id %d address %p symbol %s",
+ i, (void*)sys_call_table[i], namebuf);
+ }
+}
+EXPORT_SYMBOL_GPL(ltt_dump_sys_call_table);
+#endif
+
/*
* Note that we play around with the 'TS' bit in an attempt to get
* the correct behaviour even in the presence of the asynchronous
@@ -701,11 +748,13 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
dotraplinkage void
do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
{
+ trace_trap_entry(regs, 16);
conditional_sti(regs);
#if 0
/* No need to warn about this any longer. */
printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
#endif
+ trace_trap_exit();
}
asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
@@ -738,6 +787,21 @@ void __math_state_restore(void)
tsk->fpu_counter++;
}
+void ltt_dump_idt_table(void *call_data)
+{
+ int i;
+ char namebuf[KSYM_NAME_LEN];
+
+ for (i = 0; i < IDT_ENTRIES; i++) {
+ unsigned long address = gate_offset(idt_table[i]);
+ sprint_symbol(namebuf, address);
+ __trace_mark(0, irq_state, idt_table, call_data,
+ "irq %d address %p symbol %s",
+ i, (void *)address, namebuf);
+ }
+}
+EXPORT_SYMBOL_GPL(ltt_dump_idt_table);
+
/*
* 'math_state_restore()' saves the current math information in the
* old math state array, and gets the new ones from the current task
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
deleted file mode 100644
index 0aa5fed8b9e..00000000000
--- a/arch/x86/kernel/tsc_sync.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * check TSC synchronization.
- *
- * Copyright (C) 2006, Red Hat, Inc., Ingo Molnar
- *
- * We check whether all boot CPUs have their TSC's synchronized,
- * print a warning if not and turn off the TSC clock-source.
- *
- * The warp-check is point-to-point between two CPUs, the CPU
- * initiating the bootup is the 'source CPU', the freshly booting
- * CPU is the 'target CPU'.
- *
- * Only two CPUs may participate - they can enter in any order.
- * ( The serial nature of the boot logic and the CPU hotplug lock
- * protects against more than 2 CPUs entering this code. )
- */
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/nmi.h>
-#include <asm/tsc.h>
-
-/*
- * Entry/exit counters that make sure that both CPUs
- * run the measurement code at once:
- */
-static __cpuinitdata atomic_t start_count;
-static __cpuinitdata atomic_t stop_count;
-
-/*
- * We use a raw spinlock in this exceptional case, because
- * we want to have the fastest, inlined, non-debug version
- * of a critical section, to be able to prove TSC time-warps:
- */
-static __cpuinitdata arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED;
-
-static __cpuinitdata cycles_t last_tsc;
-static __cpuinitdata cycles_t max_warp;
-static __cpuinitdata int nr_warps;
-
-/*
- * TSC-warp measurement loop running on both CPUs:
- */
-static __cpuinit void check_tsc_warp(void)
-{
- cycles_t start, now, prev, end;
- int i;
-
- rdtsc_barrier();
- start = get_cycles();
- rdtsc_barrier();
- /*
- * The measurement runs for 20 msecs:
- */
- end = start + tsc_khz * 20ULL;
- now = start;
-
- for (i = 0; ; i++) {
- /*
- * We take the global lock, measure TSC, save the
- * previous TSC that was measured (possibly on
- * another CPU) and update the previous TSC timestamp.
- */
- arch_spin_lock(&sync_lock);
- prev = last_tsc;
- rdtsc_barrier();
- now = get_cycles();
- rdtsc_barrier();
- last_tsc = now;
- arch_spin_unlock(&sync_lock);
-
- /*
- * Be nice every now and then (and also check whether
- * measurement is done [we also insert a 10 million
- * loops safety exit, so we dont lock up in case the
- * TSC readout is totally broken]):
- */
- if (unlikely(!(i & 7))) {
- if (now > end || i > 10000000)
- break;
- cpu_relax();
- touch_nmi_watchdog();
- }
- /*
- * Outside the critical section we can now see whether
- * we saw a time-warp of the TSC going backwards:
- */
- if (unlikely(prev > now)) {
- arch_spin_lock(&sync_lock);
- max_warp = max(max_warp, prev - now);
- nr_warps++;
- arch_spin_unlock(&sync_lock);
- }
- }
- WARN(!(now-start),
- "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n",
- now-start, end-start);
-}
-
-/*
- * Source CPU calls into this - it waits for the freshly booted
- * target CPU to arrive and then starts the measurement:
- */
-void __cpuinit check_tsc_sync_source(int cpu)
-{
- int cpus = 2;
-
- /*
- * No need to check if we already know that the TSC is not
- * synchronized:
- */
- if (unsynchronized_tsc())
- return;
-
- if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
- if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING)
- pr_info(
- "Skipped synchronization checks as TSC is reliable.\n");
- return;
- }
-
- /*
- * Reset it - in case this is a second bootup:
- */
- atomic_set(&stop_count, 0);
-
- /*
- * Wait for the target to arrive:
- */
- while (atomic_read(&start_count) != cpus-1)
- cpu_relax();
- /*
- * Trigger the target to continue into the measurement too:
- */
- atomic_inc(&start_count);
-
- check_tsc_warp();
-
- while (atomic_read(&stop_count) != cpus-1)
- cpu_relax();
-
- if (nr_warps) {
- pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n",
- smp_processor_id(), cpu);
- pr_warning("Measured %Ld cycles TSC warp between CPUs, "
- "turning off TSC clock.\n", max_warp);
- mark_tsc_unstable("check_tsc_sync_source failed");
- } else {
- pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n",
- smp_processor_id(), cpu);
- }
-
- /*
- * Reset it - just in case we boot another CPU later:
- */
- atomic_set(&start_count, 0);
- nr_warps = 0;
- max_warp = 0;
- last_tsc = 0;
-
- /*
- * Let the target continue with the bootup:
- */
- atomic_inc(&stop_count);
-}
-
-/*
- * Freshly booted CPUs call into this:
- */
-void __cpuinit check_tsc_sync_target(void)
-{
- int cpus = 2;
-
- if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
- return;
-
- /*
- * Register this CPU's participation and wait for the
- * source CPU to start the measurement:
- */
- atomic_inc(&start_count);
- while (atomic_read(&start_count) != cpus)
- cpu_relax();
-
- check_tsc_warp();
-
- /*
- * Ok, we are done:
- */
- atomic_inc(&stop_count);
-
- /*
- * Wait for the source CPU to print stuff:
- */
- while (atomic_read(&stop_count) != cpus)
- cpu_relax();
-}
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index dcbb28c4b69..df18f14c473 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -44,6 +44,8 @@
#include <asm/desc.h>
#include <asm/topology.h>
#include <asm/vgtod.h>
+#include <asm/trace-clock.h>
+#include <asm/timer.h>
#define __vsyscall(nr) \
__attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
@@ -61,6 +63,7 @@ struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
{
.lock = SEQLOCK_UNLOCKED,
.sysctl_enabled = 1,
+ .trace_clock_is_sync = 1,
};
void update_vsyscall_tz(void)
@@ -73,6 +76,16 @@ void update_vsyscall_tz(void)
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}
+void update_trace_clock_is_sync_vdso(void)
+{
+ unsigned long flags;
+
+ write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
+ vsyscall_gtod_data.trace_clock_is_sync = _trace_clock_is_sync;
+ write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
+}
+EXPORT_SYMBOL_GPL(update_trace_clock_is_sync_vdso);
+
void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
struct clocksource *clock, u32 mult)
{
@@ -89,6 +102,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
vsyscall_gtod_data.wall_to_monotonic = *wtm;
vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
+ vsyscall_gtod_data.trace_clock_is_sync = _trace_clock_is_sync;
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}