22 files changed, 599 insertions, 230 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 34244b2cd88..717cf9c620b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -46,6 +46,7 @@ obj-y			+= alternative.o i8253.o pci-nommu.o hw_breakpoint.o
 obj-y			+= tsc.o io_delay.o rtc.o
 obj-y			+= pci-iommu_table.o
 obj-y			+= resource.o
+obj-$(CONFIG_HAVE_TRACE_CLOCK)	+= trace-clock.o
 
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline.o
 obj-y				+= process.o
@@ -66,9 +67,8 @@ obj-$(CONFIG_PCI)		+= early-quirks.o
 apm-y				:= apm_32.o
 obj-$(CONFIG_APM)		+= apm.o
 obj-$(CONFIG_SMP)		+= smp.o
-obj-$(CONFIG_SMP)		+= smpboot.o tsc_sync.o
+obj-$(CONFIG_SMP)		+= smpboot.o
 obj-$(CONFIG_SMP)		+= setup_percpu.o
-obj-$(CONFIG_X86_64_SMP)	+= tsc_sync.o
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline_$(BITS).o
 obj-$(CONFIG_X86_MPPARSE)	+= mpparse.o
 obj-y				+= apic/
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 76b96d74978..c604d23b4f3 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -33,6 +33,7 @@
 #include <linux/dmi.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
+#include <trace/irq.h>
 
 #include <asm/perf_event.h>
 #include <asm/x86_init.h>
@@ -868,7 +869,9 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
 	 */
 	exit_idle();
 	irq_enter();
+	trace_irq_entry(LOCAL_TIMER_VECTOR, regs, NULL);
 	local_apic_timer_interrupt();
+	trace_irq_exit(IRQ_HANDLED);
 	irq_exit();
 
 	set_irq_regs(old_regs);
@@ -1788,6 +1791,7 @@ void smp_spurious_interrupt(struct pt_regs *regs)
 
 	exit_idle();
 	irq_enter();
+	trace_irq_entry(SPURIOUS_APIC_VECTOR, NULL, NULL);
 	/*
 	 * Check if this really is a spurious interrupt and ACK it
 	 * if it is a vectored one.  Just in case...
@@ -1802,6 +1806,7 @@ void smp_spurious_interrupt(struct pt_regs *regs)
 	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
 	pr_info("spurious APIC interrupt on CPU#%d, "
 		"should never happen.\n", smp_processor_id());
+	trace_irq_exit(IRQ_HANDLED);
 	irq_exit();
 }
 
@@ -1814,6 +1819,7 @@ void smp_error_interrupt(struct pt_regs *regs)
 
 	exit_idle();
 	irq_enter();
+	trace_irq_entry(ERROR_APIC_VECTOR, NULL, NULL);
 	/* First tickle the hardware, only then report what went on. -- REW */
 	v = apic_read(APIC_ESR);
 	apic_write(APIC_ESR, 0);
@@ -1834,6 +1840,7 @@ void smp_error_interrupt(struct pt_regs *regs)
 	 */
 	pr_debug("APIC error on CPU%d: %02x(%02x)\n",
 		smp_processor_id(), v , v1);
+	trace_irq_exit(IRQ_HANDLED);
 	irq_exit();
 }
 
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 0e4f24c2a74..60939d5f226 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -227,6 +227,7 @@
 #include <linux/suspend.h>
 #include <linux/kthread.h>
 #include <linux/jiffies.h>
+#include <linux/idle.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -235,6 +236,7 @@
 #include <asm/olpc.h>
 #include <asm/paravirt.h>
 #include <asm/reboot.h>
+#include <asm/idle.h>
 
 #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
 extern int (*console_blank_hook)(int);
@@ -947,10 +949,17 @@ recalc:
 				break;
 			}
 		}
+		enter_idle();
 		if (original_pm_idle)
 			original_pm_idle();
 		else
 			default_idle();
+		/*
+		 * In many cases the interrupt that ended idle
+		 * has already called exit_idle. But some idle
+		 * loops can be woken up without interrupt.
+		 */
+		__exit_idle();
 		local_irq_disable();
 		jiffies_since_last_check = jiffies - last_jiffies;
 		if (jiffies_since_last_check > idle_period)
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 1a4088dda37..677f8475d9d 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -111,6 +111,7 @@ void foo(void)
 	OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
 	OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
 	OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+	OFFSET(PV_CPU_nmi_return, pv_cpu_ops, nmi_return);
 	OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
 	OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
 #endif
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 4a6aeedcd96..1aea11cd840 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -58,6 +58,7 @@ int main(void)
 	OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
 	OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
 	OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+	OFFSET(PV_CPU_nmi_return, pv_cpu_ops, nmi_return);
 	OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
 	OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
 	OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1d59834396b..6052f6f65a6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1069,6 +1069,7 @@ unsigned long kernel_eflags;
  * debugging, no special alignment required.
  */
 DEFINE_PER_CPU(struct orig_ist, orig_ist);
+EXPORT_PER_CPU_SYMBOL_GPL(orig_ist);
 
 #else	/* CONFIG_X86_64 */
 
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 6f8c5e9da97..c8a6411d8ba 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -23,6 +23,7 @@
 #include <linux/init.h>
 #include <linux/smp.h>
 #include <linux/cpu.h>
+#include <trace/irq.h>
 
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -402,8 +403,10 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
 {
 	exit_idle();
 	irq_enter();
+	trace_irq_entry(THERMAL_APIC_VECTOR, regs, NULL);
 	inc_irq_stat(irq_thermal_count);
 	smp_thermal_vector();
+	trace_irq_exit(IRQ_HANDLED);
 	irq_exit();
 	/* Ack only at the end to avoid potential reentry */
 	ack_APIC_irq();
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index df20723a6a1..6bed23e1c74 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -15,6 +15,7 @@
 #include <linux/bug.h>
 #include <linux/nmi.h>
 #include <linux/sysfs.h>
+#include <linux/ltt-core.h>
 
 #include <asm/stacktrace.h>
 
@@ -253,6 +254,8 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 
 	if (!signr)
 		return;
+	if (in_nmi())
+		panic("Fatal exception in non-maskable interrupt");
 	if (in_interrupt())
 		panic("Fatal exception in interrupt");
 	if (panic_on_oops)
@@ -277,6 +280,10 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 	printk("DEBUG_PAGEALLOC");
 #endif
 	printk("\n");
+#ifdef CONFIG_LTT
+	printk(KERN_EMERG "LTT NESTING LEVEL : %u", __get_cpu_var(ltt_nesting));
+	printk("\n");
+#endif
 	sysfs_printk_last_file();
 	if (notify_die(DIE_OOPS, str, regs, err,
 			current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 9ca3b0e343e..afd6d8ef78c 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -80,6 +80,8 @@
 
 #define nr_syscalls ((syscall_table_size)/4)
 
+#define NMI_MASK 0x04000000
+
 #ifdef CONFIG_PREEMPT
 #define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
 #else
@@ -321,8 +323,32 @@ END(ret_from_fork)
 	# userspace resumption stub bypassing syscall exit tracing
 	ALIGN
 	RING0_PTREGS_FRAME
+
 ret_from_exception:
 	preempt_stop(CLBR_ANY)
+	GET_THREAD_INFO(%ebp)
+	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS and CS
+	movb PT_CS(%esp), %al
+	andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
+	cmpl $USER_RPL, %eax
+	jae resume_userspace	# returning to v8086 or userspace
+	testl $NMI_MASK,TI_preempt_count(%ebp)
+	jz resume_kernel		/* Not nested over NMI ? */
+	testw $X86_EFLAGS_TF, PT_EFLAGS(%esp)
+	jnz resume_kernel		/*
+					 * If single-stepping an NMI handler,
+					 * use the normal iret path instead of
+					 * the popf/lret because lret would be
+					 * single-stepped. It should not
+					 * happen : it will reactivate NMIs
+					 * prematurely.
+					 */
+	TRACE_IRQS_IRET
+	RESTORE_REGS
+	addl $4, %esp			# skip orig_eax/error_code
+	CFI_ADJUST_CFA_OFFSET -4
+	INTERRUPT_RETURN_NMI_SAFE
+
 ret_from_intr:
 	GET_THREAD_INFO(%ebp)
 check_userspace:
@@ -906,6 +932,10 @@ ENTRY(native_iret)
 .previous
 END(native_iret)
 
+ENTRY(native_nmi_return)
+	NATIVE_INTERRUPT_RETURN_NMI_SAFE # Should we deal with popf exception ?
+END(native_nmi_return)
+
 ENTRY(native_irq_enable_sysexit)
 	sti
 	sysexit
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index bbd5c80cb09..7800ff65aab 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -163,6 +163,8 @@ GLOBAL(return_to_handler)
 #endif
 
 
+#define NMI_MASK 0x04000000
+
 #ifndef CONFIG_PREEMPT
 #define retint_kernel retint_restore_args
 #endif
@@ -515,6 +517,8 @@ sysret_check:
 	/* Handle reschedules */
 	/* edx:	work, edi: workmask */
 sysret_careful:
+	testl $_TIF_KERNEL_TRACE,%edx	/* Re-read : concurrently changed */
+	jnz ret_from_sys_call_trace
 	bt $TIF_NEED_RESCHED,%edx
 	jnc sysret_signal
 	TRACE_IRQS_ON
@@ -524,6 +528,16 @@ sysret_careful:
 	popq_cfi %rdi
 	jmp sysret_check
 
+ret_from_sys_call_trace:
+	TRACE_IRQS_ON
+	sti
+	SAVE_REST
+	FIXUP_TOP_OF_STACK %rdi
+	movq %rsp,%rdi
+	LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
+	RESTORE_REST
+	jmp int_ret_from_sys_call
+
 	/* Handle a signal */
 sysret_signal:
 	TRACE_IRQS_ON
@@ -872,6 +886,9 @@ ENTRY(native_iret)
 	.section __ex_table,"a"
 	.quad native_iret, bad_iret
 	.previous
+
+ENTRY(native_nmi_return)
+	NATIVE_INTERRUPT_RETURN_NMI_SAFE
 #endif
 
 	.section .fixup,"ax"
@@ -924,6 +941,24 @@ retint_signal:
 	GET_THREAD_INFO(%rcx)
 	jmp retint_with_reschedule
 
+	/* Returning to kernel space from exception. */
+	/* rcx:	 threadinfo. interrupts off. */
+ENTRY(retexc_kernel)
+	testl $NMI_MASK,TI_preempt_count(%rcx)
+	jz retint_kernel		/* Not nested over NMI ? */
+	testw $X86_EFLAGS_TF,EFLAGS-ARGOFFSET(%rsp)	/* trap flag? */
+	jnz retint_kernel		/*
+					 * If single-stepping an NMI handler,
+					 * use the normal iret path instead of
+					 * the popf/lret because lret would be
+					 * single-stepped. It should not
+					 * happen : it will reactivate NMIs
+					 * prematurely.
+					 */
+	RESTORE_ARGS 0,8,0
+	TRACE_IRQS_IRETQ
+	INTERRUPT_RETURN_NMI_SAFE
+
 #ifdef CONFIG_PREEMPT
 	/* Returning to kernel space. Check if we need preemption */
 	/* rcx:	 threadinfo. interrupts off. */
@@ -1361,12 +1396,18 @@ ENTRY(paranoid_exit)
 paranoid_swapgs:
 	TRACE_IRQS_IRETQ 0
 	SWAPGS_UNSAFE_STACK
+paranoid_restore_no_nmi:
 	RESTORE_ALL 8
 	jmp irq_return
 paranoid_restore:
+	GET_THREAD_INFO(%rcx)
 	TRACE_IRQS_IRETQ 0
+	testl $NMI_MASK,TI_preempt_count(%rcx)
+	jz paranoid_restore_no_nmi              /* Nested over NMI ? */
+	testw $X86_EFLAGS_TF,EFLAGS-0(%rsp)     /* trap flag? */
+	jnz paranoid_restore_no_nmi
 	RESTORE_ALL 8
-	jmp irq_return
+	INTERRUPT_RETURN_NMI_SAFE
 paranoid_userspace:
 	GET_THREAD_INFO(%rcx)
 	movl TI_flags(%rcx),%ebx
@@ -1465,7 +1506,7 @@ ENTRY(error_exit)
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)
 	testl %eax,%eax
-	jne retint_kernel
+	jne retexc_kernel
 	LOCKDEP_SYS_EXIT_IRQ
 	movl TI_flags(%rcx),%edx
 	movl $_TIF_WORK_MASK,%edi
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 869e1aeeb71..1fc5da98373 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -156,6 +156,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
 		ret = paravirt_patch_ident_64(insnbuf, len);
 
 	else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
+		 type == PARAVIRT_PATCH(pv_cpu_ops.nmi_return) ||
 		 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
 		 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
 		 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
@@ -204,6 +205,7 @@ static void native_flush_tlb_single(unsigned long addr)
 
 /* These are in entry.S */
 extern void native_iret(void);
+extern void native_nmi_return(void);
 extern void native_irq_enable_sysexit(void);
 extern void native_usergs_sysret32(void);
 extern void native_usergs_sysret64(void);
@@ -373,6 +375,7 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.usergs_sysret64 = native_usergs_sysret64,
 #endif
 	.iret = native_iret,
+	.nmi_return = native_nmi_return,
 	.swapgs = native_swapgs,
 
 	.set_iopl_mask = native_set_iopl_mask,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index d9f32e6d6ab..ac372778bbc 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -1,10 +1,13 @@
-#include <asm/paravirt.h>
+#include <linux/stringify.h>
+#include <linux/irqflags.h>
 
 DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
 DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
 DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
 DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
 DEF_NATIVE(pv_cpu_ops, iret, "iret");
+DEF_NATIVE(pv_cpu_ops, nmi_return,
+	__stringify(NATIVE_INTERRUPT_RETURN_NMI_SAFE));
 DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
 DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
 DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
@@ -41,6 +44,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		PATCH_SITE(pv_irq_ops, restore_fl);
 		PATCH_SITE(pv_irq_ops, save_fl);
 		PATCH_SITE(pv_cpu_ops, iret);
+		PATCH_SITE(pv_cpu_ops, nmi_return);
 		PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
 		PATCH_SITE(pv_mmu_ops, read_cr2);
 		PATCH_SITE(pv_mmu_ops, read_cr3);
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 3f08f34f93e..5339e67dc15 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -1,12 +1,15 @@
+#include <linux/irqflags.h>
+#include <linux/stringify.h>
 #include <asm/paravirt.h>
 #include <asm/asm-offsets.h>
-#include <linux/stringify.h>
 
 DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
 DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
 DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq");
 DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
 DEF_NATIVE(pv_cpu_ops, iret, "iretq");
+DEF_NATIVE(pv_cpu_ops, nmi_return,
+	__stringify(NATIVE_INTERRUPT_RETURN_NMI_SAFE));
 DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
 DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
 DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
@@ -51,6 +54,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		PATCH_SITE(pv_irq_ops, irq_enable);
 		PATCH_SITE(pv_irq_ops, irq_disable);
 		PATCH_SITE(pv_cpu_ops, iret);
+		PATCH_SITE(pv_cpu_ops, nmi_return);
 		PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
 		PATCH_SITE(pv_cpu_ops, usergs_sysret32);
 		PATCH_SITE(pv_cpu_ops, usergs_sysret64);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ff455419898..e0e4ffcad48 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/dmi.h>
 #include <linux/utsname.h>
 #include <trace/events/power.h>
+#include <trace/sched.h>
 #include <linux/hw_breakpoint.h>
 #include <asm/cpu.h>
 #include <asm/system.h>
@@ -23,6 +24,8 @@
 #include <asm/i387.h>
 #include <asm/debugreg.h>
 
+DEFINE_TRACE(sched_kthread_create);
+
 struct kmem_cache *task_xstate_cachep;
 EXPORT_SYMBOL_GPL(task_xstate_cachep);
 
@@ -278,6 +281,7 @@ extern void kernel_thread_helper(void);
 int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 {
 	struct pt_regs regs;
+	long pid;
 
 	memset(&regs, 0, sizeof(regs));
 
@@ -299,7 +303,10 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 	regs.flags = X86_EFLAGS_IF | 0x2;
 
 	/* Ok, create the new process.. */
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
+	pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED,
+		      0, &regs, 0, NULL, NULL);
+	trace_sched_kthread_create(fn, pid);
+	return pid;
 }
 EXPORT_SYMBOL(kernel_thread);
 
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8d128783af4..256ba23211d 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -38,6 +38,9 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/kdebug.h>
+#include <linux/notifier.h>
+#include <linux/idle.h>
+#include <trace/pm.h>
 
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -59,6 +62,38 @@
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
+DEFINE_TRACE(pm_idle_exit);
+DEFINE_TRACE(pm_idle_entry);
+
+static DEFINE_PER_CPU(unsigned char, is_idle);
+
+void enter_idle(void)
+{
+	percpu_write(is_idle, 1);
+	trace_pm_idle_entry();
+	notify_idle(IDLE_START);
+}
+EXPORT_SYMBOL_GPL(enter_idle);
+
+void __exit_idle(void)
+{
+	if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
+		return;
+	notify_idle(IDLE_END);
+	trace_pm_idle_exit();
+}
+EXPORT_SYMBOL_GPL(__exit_idle);
+
+/* Called from interrupts to signify idle end */
+void exit_idle(void)
+{
+	/* idle loop has pid 0 */
+	if (current->pid)
+		return;
+	__exit_idle();
+}
+EXPORT_SYMBOL_GPL(exit_idle);
+
 /*
  * Return saved PC of a blocked thread.
  */
@@ -107,10 +142,18 @@ void cpu_idle(void)
 				play_dead();
 
 			local_irq_disable();
+			enter_idle();
 			/* Don't trace irqs off for idle */
 			stop_critical_timings();
 			pm_idle();
 			start_critical_timings();
+
+			/*
+			 * In many cases the interrupt that ended idle
+			 * has already called exit_idle. But some idle
+			 * loops can be woken up without interrupt.
+			 */
+			__exit_idle();
 		}
 		tick_nohz_restart_sched_tick();
 		preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index bd387e8f73b..fbde94f1447 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -35,8 +35,10 @@
 #include <linux/tick.h>
 #include <linux/prctl.h>
 #include <linux/uaccess.h>
+#include <linux/idle.h>
 #include <linux/io.h>
 #include <linux/ftrace.h>
+#include <trace/pm.h>
 
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -51,37 +53,34 @@
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
 
+DEFINE_TRACE(pm_idle_exit);
+DEFINE_TRACE(pm_idle_entry);
+
 asmlinkage extern void ret_from_fork(void);
 
 DEFINE_PER_CPU(unsigned long, old_rsp);
 static DEFINE_PER_CPU(unsigned char, is_idle);
 
-static ATOMIC_NOTIFIER_HEAD(idle_notifier);
-
-void idle_notifier_register(struct notifier_block *n)
-{
-	atomic_notifier_chain_register(&idle_notifier, n);
-}
-EXPORT_SYMBOL_GPL(idle_notifier_register);
-
-void idle_notifier_unregister(struct notifier_block *n)
-{
-	atomic_notifier_chain_unregister(&idle_notifier, n);
-}
-EXPORT_SYMBOL_GPL(idle_notifier_unregister);
-
 void enter_idle(void)
 {
 	percpu_write(is_idle, 1);
-	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
+	/*
+	 * Trace last event before calling notifiers. Notifiers flush
+	 * data from buffers before going to idle.
+	 */
+	trace_pm_idle_entry();
+	notify_idle(IDLE_START);
 }
+EXPORT_SYMBOL_GPL(enter_idle);
 
-static void __exit_idle(void)
+void __exit_idle(void)
 {
 	if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
 		return;
-	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
+	notify_idle(IDLE_END);
+	trace_pm_idle_exit();
 }
+EXPORT_SYMBOL_GPL(__exit_idle);
 
 /* Called from interrupts to signify idle end */
 void exit_idle(void)
@@ -91,6 +90,7 @@ void exit_idle(void)
 		return;
 	__exit_idle();
 }
+EXPORT_SYMBOL_GPL(exit_idle);
 
 #ifndef CONFIG_SMP
 static inline void play_dead(void)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 45892dc4b72..ee3024d4f61 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -21,6 +21,7 @@
 #include <linux/signal.h>
 #include <linux/perf_event.h>
 #include <linux/hw_breakpoint.h>
+#include <trace/syscall.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -152,6 +153,9 @@ static const int arg_offs_table[] = {
 				  X86_EFLAGS_DF | X86_EFLAGS_OF |	\
 				  X86_EFLAGS_RF | X86_EFLAGS_AC))
 
+DEFINE_TRACE(syscall_entry);
+DEFINE_TRACE(syscall_exit);
+
 /*
  * Determines whether a value may be installed in a segment register.
  */
@@ -1361,6 +1365,8 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
 	if (test_thread_flag(TIF_SINGLESTEP))
 		regs->flags |= X86_EFLAGS_TF;
 
+	trace_syscall_entry(regs, regs->orig_ax);
+
 	/* do the secure computing check first */
 	secure_computing(regs->orig_ax);
 
@@ -1396,6 +1402,8 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
 {
 	bool step;
 
+	trace_syscall_exit(regs->ax);
+
 	if (unlikely(current->audit_context))
 		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
index de87d600829..5e74f6aa3c0 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/kernel/syscall_64.c
@@ -1,8 +1,11 @@
 /* System call table for x86-64. */
 
 #include <linux/linkage.h>
+#include <linux/module.h>
 #include <linux/sys.h>
 #include <linux/cache.h>
+#include <linux/marker.h>
+#include <linux/kallsyms.h>
 #include <asm/asm-offsets.h>
 
 #define __NO_STUBS
@@ -27,3 +30,18 @@ const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
 	[0 ... __NR_syscall_max] = &sys_ni_syscall,
 #include <asm/unistd_64.h>
 };
+
+void ltt_dump_sys_call_table(void *call_data)
+{
+	int i;
+	char namebuf[KSYM_NAME_LEN];
+
+	for (i = 0; i < __NR_syscall_max + 1; i++) {
+		sprint_symbol(namebuf, (unsigned long)sys_call_table[i]);
+		__trace_mark(0, syscall_state, sys_call_table,
+			call_data,
+			"id %d address %p symbol %s",
+			i, (void*)sys_call_table[i], namebuf);
+	}
+}
+EXPORT_SYMBOL_GPL(ltt_dump_sys_call_table);
diff --git a/arch/x86/kernel/trace-clock.c b/arch/x86/kernel/trace-clock.c
new file mode 100644
index 00000000000..47539e28276
--- /dev/null
+++ b/arch/x86/kernel/trace-clock.c
@@ -0,0 +1,302 @@
+/*
+ * arch/x86/kernel/trace-clock.c
+ *
+ * Trace clock for x86.
+ *
+ * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>, October 2008
+ */
+
+#include <linux/module.h>
+#include <linux/trace-clock.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/cpu.h>
+#include <linux/posix-timers.h>
+#include <asm/vgtod.h>
+
+static cycles_t trace_clock_last_tsc;
+static DEFINE_PER_CPU(struct timer_list, update_timer);
+static DEFINE_SPINLOCK(async_tsc_lock);
+static int async_tsc_refcount;	/* Number of readers */
+static int async_tsc_enabled;	/* Async TSC enabled on all online CPUs */
+
+int _trace_clock_is_sync = 1;
+EXPORT_SYMBOL_GPL(_trace_clock_is_sync);
+
+/*
+ * Is the trace clock being used by user-space ? We leave the trace clock active
+ * as soon as user-space starts using it. We never unref the trace clock
+ * reference taken by user-space.
+ */
+static atomic_t user_trace_clock_ref;
+
+/*
+ * Called by check_tsc_sync_source from CPU hotplug.
+ */
+void set_trace_clock_is_sync(int state)
+{
+	_trace_clock_is_sync = state;
+	update_trace_clock_is_sync_vdso();
+}
+
+#if BITS_PER_LONG == 64
+static cycles_t read_last_tsc(void)
+{
+	return trace_clock_last_tsc;
+}
+#else
+/*
+ * A cmpxchg64 update can happen concurrently. Based on the assumption that
+ * two cmpxchg64 will never update it to the same value (the count always
+ * increases), reading it twice insures that we read a coherent value with the
+ * same "sequence number".
+ */
+static cycles_t read_last_tsc(void)
+{
+	cycles_t val1, val2;
+
+	val1 = trace_clock_last_tsc;
+	for (;;) {
+		val2 = val1;
+		barrier();
+		val1 = trace_clock_last_tsc;
+		if (likely(val1 == val2))
+			break;
+	}
+	return val1;
+}
+#endif
+
+/*
+ * Support for architectures with non-sync TSCs.
+ * When the local TSC is discovered to lag behind the highest TSC counter, we
+ * increment the TSC count of an amount that should be, ideally, lower than the
+ * execution time of this routine, in cycles : this is the granularity we look
+ * for : we must be able to order the events.
+ */
+notrace cycles_t trace_clock_async_tsc_read(void)
+{
+	cycles_t new_tsc, last_tsc;
+
+	WARN_ON(!async_tsc_refcount || !async_tsc_enabled);
+	new_tsc = get_cycles();
+	last_tsc = read_last_tsc();
+	do {
+		if (new_tsc < last_tsc)
+			new_tsc = last_tsc + TRACE_CLOCK_MIN_PROBE_DURATION;
+		/*
+		 * If cmpxchg fails with a value higher than the new_tsc, don't
+		 * retry : the value has been incremented and the events
+		 * happened almost at the same time.
+		 * We must retry if cmpxchg fails with a lower value :
+		 * it means that we are the CPU with highest frequency and
+		 * therefore MUST update the value.
+		 */
+		last_tsc = cmpxchg64(&trace_clock_last_tsc, last_tsc, new_tsc);
+	} while (unlikely(last_tsc < new_tsc));
+	return new_tsc;
+}
+EXPORT_SYMBOL_GPL(trace_clock_async_tsc_read);
+
+static void update_timer_ipi(void *info)
+{
+	(void)trace_clock_async_tsc_read();
+}
+
+/*
+ * update_timer_fct : - Timer function to resync the clocks
+ * @data: unused
+ *
+ * Fires every jiffy.
+ */
+static void update_timer_fct(unsigned long data)
+{
+	(void)trace_clock_async_tsc_read();
+	mod_timer_pinned(&per_cpu(update_timer, smp_processor_id()),
+			 jiffies + 1);
+}
+
+static void enable_trace_clock(int cpu)
+{
+	init_timer(&per_cpu(update_timer, cpu));
+	per_cpu(update_timer, cpu).function = update_timer_fct;
+	per_cpu(update_timer, cpu).expires = jiffies + 1;
+	smp_call_function_single(cpu, update_timer_ipi, NULL, 1);
+	add_timer_on(&per_cpu(update_timer, cpu), cpu);
+}
+
+static void disable_trace_clock(int cpu)
+{
+	del_timer_sync(&per_cpu(update_timer, cpu));
+}
+
+/*
+ * 	hotcpu_callback - CPU hotplug callback
+ * 	@nb: notifier block
+ * 	@action: hotplug action to take
+ * 	@hcpu: CPU number
+ *
+ * 	Returns the success/failure of the operation. (NOTIFY_OK, NOTIFY_BAD)
+ */
+static int __cpuinit hotcpu_callback(struct notifier_block *nb,
+				unsigned long action,
+				void *hcpu)
+{
+	unsigned int hotcpu = (unsigned long)hcpu;
+	int cpu;
+
+	spin_lock(&async_tsc_lock);
+	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		break;
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		/*
+		 * trace_clock_is_sync() is updated by set_trace_clock_is_sync()
+		 * code, protected by cpu hotplug disable.
+		 * It is ok to let the hotplugged CPU read the timebase before
+		 * the CPU_ONLINE notification. It's just there to give a
+		 * maximum bound to the TSC error.
+		 */
+		if (async_tsc_refcount && !trace_clock_is_sync()) {
+			if (!async_tsc_enabled) {
+				async_tsc_enabled = 1;
+				for_each_online_cpu(cpu)
+					enable_trace_clock(cpu);
+			} else {
+				enable_trace_clock(hotcpu);
+			}
+		}
+		break;
+#ifdef CONFIG_HOTPLUG_CPU
+	case CPU_UP_CANCELED:
+	case CPU_UP_CANCELED_FROZEN:
+		if (!async_tsc_refcount && num_online_cpus() == 1)
+			set_trace_clock_is_sync(1);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		/*
+		 * We cannot stop the trace clock on other CPUs when readers are
+		 * active even if we go back to a synchronized state (1 CPU)
+		 * because the CPU left could be the one lagging behind.
+		 */
+		if (async_tsc_refcount && async_tsc_enabled)
+			disable_trace_clock(hotcpu);
+		if (!async_tsc_refcount && num_online_cpus() == 1)
+			set_trace_clock_is_sync(1);
+		break;
+#endif /* CONFIG_HOTPLUG_CPU */
+	}
+	spin_unlock(&async_tsc_lock);
+
+	return NOTIFY_OK;
+}
+
+int get_trace_clock(void)
+{
+	int cpu;
+
+	if (!trace_clock_is_sync()) {
+		printk(KERN_WARNING
+			"Trace clock falls back on cache-line bouncing\n"
+			"workaround due to non-synchronized TSCs.\n"
+			"This workaround preserves event order across CPUs.\n"
+			"Please consider disabling Speedstep or PowerNow and\n"
+			"using kernel parameters "
+			"\"force_tsc_sync=1 idle=poll\"\n"
+			"for accurate and fast tracing clock source.\n");
+	}
+
+	get_online_cpus();
+	spin_lock(&async_tsc_lock);
+	if (async_tsc_refcount++ || trace_clock_is_sync())
+		goto end;
+
+	async_tsc_enabled = 1;
+	for_each_online_cpu(cpu)
+		enable_trace_clock(cpu);
+end:
+	spin_unlock(&async_tsc_lock);
+	put_online_cpus();
+	return 0;
+}
+EXPORT_SYMBOL_GPL(get_trace_clock);
+
+void put_trace_clock(void)
+{
+	int cpu;
+
+	get_online_cpus();
+	spin_lock(&async_tsc_lock);
+	WARN_ON(async_tsc_refcount <= 0);
+	if (async_tsc_refcount != 1 || !async_tsc_enabled)
+		goto end;
+
+	for_each_online_cpu(cpu)
+		disable_trace_clock(cpu);
+	async_tsc_enabled = 0;
+end:
+	async_tsc_refcount--;
+	if (!async_tsc_refcount && num_online_cpus() == 1)
+		set_trace_clock_is_sync(1);
+	spin_unlock(&async_tsc_lock);
+	put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(put_trace_clock);
+
+static int posix_get_trace(clockid_t which_clock, struct timespec *tp)
+{
+	union lttng_timespec *lts = (union lttng_timespec *) tp;
+	int ret;
+
+	/*
+	 * Yes, there is a race here that would lead to refcount being
+	 * incremented more than once, but all we care is to leave the trace
+	 * clock active forever, so precise accounting is not needed.
+	 */
+	if (unlikely(!atomic_read(&user_trace_clock_ref))) {
+		ret = get_trace_clock();
+		if (ret)
+			return ret;
+		atomic_inc(&user_trace_clock_ref);
+	}
+	lts->lttng_ts = trace_clock_read64();
+	return 0;
+}
+
+static int posix_get_trace_freq(clockid_t which_clock, struct timespec *tp)
+{
+	union lttng_timespec *lts = (union lttng_timespec *) tp;
+
+	lts->lttng_ts = trace_clock_frequency();
+	return 0;
+}
+
+static int posix_get_trace_res(const clockid_t which_clock, struct timespec *tp)
+{
+	union lttng_timespec *lts = (union lttng_timespec *) tp;
+
+	lts->lttng_ts = TRACE_CLOCK_RES;
+	return 0;
+}
+
+static __init int init_unsync_trace_clock(void)
+{
+	struct k_clock clock_trace = {
+		.clock_getres = posix_get_trace_res,
+		.clock_get = posix_get_trace,
+	};
+	struct k_clock clock_trace_freq = {
+		.clock_getres = posix_get_trace_res,
+		.clock_get = posix_get_trace_freq,
+	};
+
+	register_posix_clock(CLOCK_TRACE, &clock_trace);
+	register_posix_clock(CLOCK_TRACE_FREQ, &clock_trace_freq);
+
+	hotcpu_notifier(hotcpu_callback, 4);
+	return 0;
+}
+early_initcall(init_unsync_trace_clock);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b9b67166f9d..d45030679f9 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -31,6 +31,7 @@
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/io.h>
+#include <trace/trap.h>
 
 #ifdef CONFIG_EISA
 #include <linux/ioport.h>
@@ -52,6 +53,7 @@
 #include <asm/atomic.h>
 #include <asm/system.h>
 #include <asm/traps.h>
+#include <asm/unistd.h>
 #include <asm/desc.h>
 #include <asm/i387.h>
 #include <asm/mce.h>
@@ -76,11 +78,21 @@ char ignore_fpu_irq;
  * F0 0F bug workaround.
  */
 gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
+
+extern unsigned long sys_call_table[];
+extern unsigned long syscall_table_size;
+
 #endif
 
 DECLARE_BITMAP(used_vectors, NR_VECTORS);
 EXPORT_SYMBOL_GPL(used_vectors);
 
+/*
+ * Also used in arch/x86/mm/fault.c.
+ */
+DEFINE_TRACE(trap_entry);
+DEFINE_TRACE(trap_exit);
+
 static int ignore_nmis;
 
 int unknown_nmi_panic;
@@ -122,6 +134,8 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
 {
 	struct task_struct *tsk = current;
 
+	trace_trap_entry(regs, trapnr);
+
 #ifdef CONFIG_X86_32
 	if (regs->flags & X86_VM_MASK) {
 		/*
@@ -168,7 +182,7 @@ trap_signal:
 		force_sig_info(signr, info, tsk);
 	else
 		force_sig(signr, tsk);
-	return;
+	goto end;
 
 kernel_trap:
 	if (!fixup_exception(regs)) {
@@ -176,15 +190,17 @@ kernel_trap:
 		tsk->thread.trap_no = trapnr;
 		die(str, regs, error_code);
 	}
-	return;
+	goto end;
 
 #ifdef CONFIG_X86_32
 vm86_trap:
 	if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
 						error_code, trapnr))
 		goto trap_signal;
-	return;
+	goto end;
 #endif
+end:
+	trace_trap_exit();
 }
 
 #define DO_ERROR(trapnr, signr, str, name)				\
@@ -285,7 +301,9 @@ do_general_protection(struct pt_regs *regs, long error_code)
 		printk("\n");
 	}
 
+	trace_trap_entry(regs, 13);
 	force_sig(SIGSEGV, tsk);
+	trace_trap_exit();
 	return;
 
 #ifdef CONFIG_X86_32
@@ -371,9 +389,11 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
 static notrace __kprobes void
 unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 {
+	trace_trap_entry(regs, 2);
+
 	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
 			NOTIFY_STOP)
-		return;
+		goto end;
 #ifdef CONFIG_MCA
 	/*
 	 * Might actually be able to figure out what the guilty party
@@ -381,7 +401,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 	 */
 	if (MCA_bus) {
 		mca_handle_nmi();
-		return;
+		goto end;
 	}
 #endif
 	pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
@@ -392,19 +412,23 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 		panic("NMI: Not continuing");
 
 	pr_emerg("Dazed and confused, but trying to continue\n");
+end:
+	trace_trap_exit();
 }
 
 static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 {
 	unsigned char reason = 0;
 
+	trace_trap_entry(regs, 2);
+
 	/*
 	 * CPU-specific NMI must be processed before non-CPU-specific
 	 * NMI, otherwise we may lose it, because the CPU-specific
 	 * NMI can not be detected/processed on other CPUs.
 	 */
 	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
-		return;
+		goto end;
 
 	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
 	raw_spin_lock(&nmi_reason_lock);
@@ -423,11 +447,13 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 		reassert_nmi();
 #endif
 		raw_spin_unlock(&nmi_reason_lock);
-		return;
+		goto end;
 	}
 	raw_spin_unlock(&nmi_reason_lock);
 
 	unknown_nmi_error(reason, regs);
+end:
+	trace_trap_exit();
 }
 
 dotraplinkage notrace __kprobes void
@@ -570,8 +596,10 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 	preempt_conditional_sti(regs);
 
 	if (regs->flags & X86_VM_MASK) {
+		trace_trap_entry(regs, 1);
 		handle_vm86_trap((struct kernel_vm86_regs *) regs,
 				error_code, 1);
+		trace_trap_exit();
 		preempt_conditional_cli(regs);
 		return;
 	}
@@ -589,13 +617,32 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 		regs->flags &= ~X86_EFLAGS_TF;
 	}
 	si_code = get_si_code(tsk->thread.debugreg6);
-	if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
+	if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) {
+		trace_trap_entry(regs, 1);
 		send_sigtrap(tsk, regs, error_code, si_code);
+		trace_trap_exit();
+	}
 	preempt_conditional_cli(regs);
 
 	return;
 }
 
+#ifdef CONFIG_X86_32
+void ltt_dump_sys_call_table(void *call_data)
+{
+	int i;
+	char namebuf[KSYM_NAME_LEN];
+
+	for (i = 0; i < NR_syscalls; i++) {
+		sprint_symbol(namebuf, sys_call_table[i]);
+		__trace_mark(0, syscall_state, sys_call_table, call_data,
+			"id %d address %p symbol %s",
+			i, (void*)sys_call_table[i], namebuf);
+	}
+}
+EXPORT_SYMBOL_GPL(ltt_dump_sys_call_table);
+#endif
+
 /*
  * Note that we play around with the 'TS' bit in an attempt to get
  * the correct behaviour even in the presence of the asynchronous
@@ -701,11 +748,13 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
 dotraplinkage void
 do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
 {
+	trace_trap_entry(regs, 16);
 	conditional_sti(regs);
 #if 0
 	/* No need to warn about this any longer. */
 	printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
 #endif
+	trace_trap_exit();
 }
 
 asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
@@ -738,6 +787,21 @@ void __math_state_restore(void)
 	tsk->fpu_counter++;
 }
 
+void ltt_dump_idt_table(void *call_data)
+{
+	int i;
+	char namebuf[KSYM_NAME_LEN];
+
+	for (i = 0; i < IDT_ENTRIES; i++) {
+		unsigned long address = gate_offset(idt_table[i]);
+		sprint_symbol(namebuf, address);
+		__trace_mark(0, irq_state, idt_table, call_data,
+			"irq %d address %p symbol %s",
+			i, (void *)address, namebuf);
+	}
+}
+EXPORT_SYMBOL_GPL(ltt_dump_idt_table);
+
 /*
  * 'math_state_restore()' saves the current math information in the
  * old math state array, and gets the new ones from the current task
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
deleted file mode 100644
index 0aa5fed8b9e..00000000000
--- a/arch/x86/kernel/tsc_sync.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * check TSC synchronization.
- *
- * Copyright (C) 2006, Red Hat, Inc., Ingo Molnar
- *
- * We check whether all boot CPUs have their TSC's synchronized,
- * print a warning if not and turn off the TSC clock-source.
- *
- * The warp-check is point-to-point between two CPUs, the CPU
- * initiating the bootup is the 'source CPU', the freshly booting
- * CPU is the 'target CPU'.
- *
- * Only two CPUs may participate - they can enter in any order.
- * ( The serial nature of the boot logic and the CPU hotplug lock
- *   protects against more than 2 CPUs entering this code. )
- */
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/nmi.h>
-#include <asm/tsc.h>
-
-/*
- * Entry/exit counters that make sure that both CPUs
- * run the measurement code at once:
- */
-static __cpuinitdata atomic_t start_count;
-static __cpuinitdata atomic_t stop_count;
-
-/*
- * We use a raw spinlock in this exceptional case, because
- * we want to have the fastest, inlined, non-debug version
- * of a critical section, to be able to prove TSC time-warps:
- */
-static __cpuinitdata arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED;
-
-static __cpuinitdata cycles_t last_tsc;
-static __cpuinitdata cycles_t max_warp;
-static __cpuinitdata int nr_warps;
-
-/*
- * TSC-warp measurement loop running on both CPUs:
- */
-static __cpuinit void check_tsc_warp(void)
-{
-	cycles_t start, now, prev, end;
-	int i;
-
-	rdtsc_barrier();
-	start = get_cycles();
-	rdtsc_barrier();
-	/*
-	 * The measurement runs for 20 msecs:
-	 */
-	end = start + tsc_khz * 20ULL;
-	now = start;
-
-	for (i = 0; ; i++) {
-		/*
-		 * We take the global lock, measure TSC, save the
-		 * previous TSC that was measured (possibly on
-		 * another CPU) and update the previous TSC timestamp.
-		 */
-		arch_spin_lock(&sync_lock);
-		prev = last_tsc;
-		rdtsc_barrier();
-		now = get_cycles();
-		rdtsc_barrier();
-		last_tsc = now;
-		arch_spin_unlock(&sync_lock);
-
-		/*
-		 * Be nice every now and then (and also check whether
-		 * measurement is done [we also insert a 10 million
-		 * loops safety exit, so we dont lock up in case the
-		 * TSC readout is totally broken]):
-		 */
-		if (unlikely(!(i & 7))) {
-			if (now > end || i > 10000000)
-				break;
-			cpu_relax();
-			touch_nmi_watchdog();
-		}
-		/*
-		 * Outside the critical section we can now see whether
-		 * we saw a time-warp of the TSC going backwards:
-		 */
-		if (unlikely(prev > now)) {
-			arch_spin_lock(&sync_lock);
-			max_warp = max(max_warp, prev - now);
-			nr_warps++;
-			arch_spin_unlock(&sync_lock);
-		}
-	}
-	WARN(!(now-start),
-		"Warning: zero tsc calibration delta: %Ld [max: %Ld]\n",
-			now-start, end-start);
-}
-
-/*
- * Source CPU calls into this - it waits for the freshly booted
- * target CPU to arrive and then starts the measurement:
- */
-void __cpuinit check_tsc_sync_source(int cpu)
-{
-	int cpus = 2;
-
-	/*
-	 * No need to check if we already know that the TSC is not
-	 * synchronized:
-	 */
-	if (unsynchronized_tsc())
-		return;
-
-	if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
-		if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING)
-			pr_info(
-			"Skipped synchronization checks as TSC is reliable.\n");
-		return;
-	}
-
-	/*
-	 * Reset it - in case this is a second bootup:
-	 */
-	atomic_set(&stop_count, 0);
-
-	/*
-	 * Wait for the target to arrive:
-	 */
-	while (atomic_read(&start_count) != cpus-1)
-		cpu_relax();
-	/*
-	 * Trigger the target to continue into the measurement too:
-	 */
-	atomic_inc(&start_count);
-
-	check_tsc_warp();
-
-	while (atomic_read(&stop_count) != cpus-1)
-		cpu_relax();
-
-	if (nr_warps) {
-		pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n",
-			smp_processor_id(), cpu);
-		pr_warning("Measured %Ld cycles TSC warp between CPUs, "
-			   "turning off TSC clock.\n", max_warp);
-		mark_tsc_unstable("check_tsc_sync_source failed");
-	} else {
-		pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n",
-			smp_processor_id(), cpu);
-	}
-
-	/*
-	 * Reset it - just in case we boot another CPU later:
-	 */
-	atomic_set(&start_count, 0);
-	nr_warps = 0;
-	max_warp = 0;
-	last_tsc = 0;
-
-	/*
-	 * Let the target continue with the bootup:
-	 */
-	atomic_inc(&stop_count);
-}
-
-/*
- * Freshly booted CPUs call into this:
- */
-void __cpuinit check_tsc_sync_target(void)
-{
-	int cpus = 2;
-
-	if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
-		return;
-
-	/*
-	 * Register this CPU's participation and wait for the
-	 * source CPU to start the measurement:
-	 */
-	atomic_inc(&start_count);
-	while (atomic_read(&start_count) != cpus)
-		cpu_relax();
-
-	check_tsc_warp();
-
-	/*
-	 * Ok, we are done:
-	 */
-	atomic_inc(&stop_count);
-
-	/*
-	 * Wait for the source CPU to print stuff:
-	 */
-	while (atomic_read(&stop_count) != cpus)
-		cpu_relax();
-}
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index dcbb28c4b69..df18f14c473 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -44,6 +44,8 @@
 #include <asm/desc.h>
 #include <asm/topology.h>
 #include <asm/vgtod.h>
+#include <asm/trace-clock.h>
+#include <asm/timer.h>
 
 #define __vsyscall(nr) \
 		__attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
@@ -61,6 +63,7 @@ struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
 {
 	.lock = SEQLOCK_UNLOCKED,
 	.sysctl_enabled = 1,
+	.trace_clock_is_sync = 1,
 };
 
 void update_vsyscall_tz(void)
@@ -73,6 +76,16 @@ void update_vsyscall_tz(void)
 	write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
 }
 
+void update_trace_clock_is_sync_vdso(void)
+{
+	unsigned long flags;
+
+	write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
+	vsyscall_gtod_data.trace_clock_is_sync = _trace_clock_is_sync;
+	write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
+}
+EXPORT_SYMBOL_GPL(update_trace_clock_is_sync_vdso);
+
 void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
 			struct clocksource *clock, u32 mult)
 {
@@ -89,6 +102,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
 	vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
 	vsyscall_gtod_data.wall_to_monotonic = *wtm;
 	vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
+	vsyscall_gtod_data.trace_clock_is_sync = _trace_clock_is_sync;
 	write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
 }