diff options
Diffstat (limited to 'arch')
146 files changed, 2838 insertions, 359 deletions
diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index 6f32f9c84a2..1ba67b14578 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -56,7 +56,7 @@ register struct thread_info *__current_thread_info __asm__("$8"); #define THREAD_SIZE_ORDER 1 #define THREAD_SIZE (2*PAGE_SIZE) -#define PREEMPT_ACTIVE 0x40000000 +#define PREEMPT_ACTIVE 0x10000000 /* * Thread information flags: @@ -79,6 +79,7 @@ register struct thread_info *__current_thread_info __asm__("$8"); #define TIF_UAC_SIGBUS 12 #define TIF_MEMDIE 13 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 14 /* restore signal mask in do_signal */ +#define TIF_KERNEL_TRACE 15 /* Kernel tracing of syscalls */ #define TIF_FREEZE 16 /* is freezing for suspend */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -87,6 +88,7 @@ register struct thread_info *__current_thread_info __asm__("$8"); #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) +#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE) #define _TIF_FREEZE (1<<TIF_FREEZE) /* Work to do on interrupt/exception return. */ @@ -95,7 +97,7 @@ register struct thread_info *__current_thread_info __asm__("$8"); /* Work to do on any return to userspace. */ #define _TIF_ALLWORK_MASK (_TIF_WORK_MASK \ - | _TIF_SYSCALL_TRACE) + | _TIF_SYSCALL_TRACE | _TIF_KERNEL_TRACE) #define ALPHA_UAC_SHIFT 10 #define ALPHA_UAC_MASK (1 << TIF_UAC_NOPRINT | 1 << TIF_UAC_NOFIX | \ diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a53245a9ef5..d8cdd7b00ac 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -13,6 +13,7 @@ config ARM select HAVE_KPROBES if (!XIP_KERNEL && !THUMB2_KERNEL) select HAVE_KRETPROBES if (HAVE_KPROBES) select HAVE_FUNCTION_TRACER if (!XIP_KERNEL) + select HAVE_LTT_DUMP_TABLES select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL) select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 494224a9b45..23f8c4b764e 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -147,4 +147,8 @@ config DEBUG_S3C_UART The uncompressor code port configuration is now handled by CONFIG_S3C_LOWLEVEL_UART_PORT. +config DEBUG_TRACE_CLOCK + bool "Debug trace clock" + depends on HAVE_TRACE_CLOCK + endmenu diff --git a/arch/arm/include/asm/a.out-core.h b/arch/arm/include/asm/a.out-core.h index 93d04acaa31..92f10cb5c70 100644 --- a/arch/arm/include/asm/a.out-core.h +++ b/arch/arm/include/asm/a.out-core.h @@ -32,11 +32,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) dump->u_dsize = (tsk->mm->brk - tsk->mm->start_data + PAGE_SIZE - 1) >> PAGE_SHIFT; dump->u_ssize = 0; - dump->u_debugreg[0] = tsk->thread.debug.bp[0].address; - dump->u_debugreg[1] = tsk->thread.debug.bp[1].address; - dump->u_debugreg[2] = tsk->thread.debug.bp[0].insn.arm; - dump->u_debugreg[3] = tsk->thread.debug.bp[1].insn.arm; - dump->u_debugreg[4] = tsk->thread.debug.nsaved; + memset(dump->u_debugreg, 0, sizeof(dump->u_debugreg)); if (dump->start_stack < 0x04000000) dump->u_ssize = (0x04000000 - dump->start_stack) >> PAGE_SHIFT; diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 9a87823642d..cf7dc925c63 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -395,7 +395,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, break; case 2: do { - asm volatile("@ __cmpxchg1\n" + asm volatile("@ __cmpxchg2\n" " ldrexh %1, [%2]\n" " mov %0, #0\n" " teq %1, %3\n" diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 7b5cc8dae06..1f925b8bcd5 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -129,6 +129,7 @@ extern void vfp_flush_hwstate(struct thread_info *); /* * thread information flags: * TIF_SYSCALL_TRACE - syscall trace active + * TIF_KERNEL_TRACE - kernel trace active * TIF_SIGPENDING - signal pending * TIF_NEED_RESCHED - rescheduling necessary * TIF_NOTIFY_RESUME - callback before returning to user @@ -138,6 +139,7 @@ extern void vfp_flush_hwstate(struct thread_info *); #define TIF_SIGPENDING 0 #define TIF_NEED_RESCHED 1 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ +#define TIF_KERNEL_TRACE 7 #define TIF_SYSCALL_TRACE 8 #define TIF_POLLING_NRFLAG 16 #define TIF_USING_IWMMXT 17 @@ -149,6 +151,7 @@ extern void vfp_flush_hwstate(struct thread_info *); #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_KERNEL_TRACE (1 << TIF_KERNEL_TRACE) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) diff --git a/arch/arm/include/asm/trace-clock.h b/arch/arm/include/asm/trace-clock.h new file mode 100644 index 00000000000..8a13b7dedde --- /dev/null +++ b/arch/arm/include/asm/trace-clock.h @@ -0,0 +1 @@ +#include <plat/trace-clock.h> diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index c891eb76c0e..92684d2e905 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -397,6 +397,8 @@ #define __NR_fanotify_mark (__NR_SYSCALL_BASE+368) #define __NR_prlimit64 (__NR_SYSCALL_BASE+369) +#define __NR_syscall_max 370 + /* * The following SWIs are ARM private. */ diff --git a/arch/arm/include/asm/user.h b/arch/arm/include/asm/user.h index 05ac4b06876..35917b3a97f 100644 --- a/arch/arm/include/asm/user.h +++ b/arch/arm/include/asm/user.h @@ -71,7 +71,7 @@ struct user{ /* the registers. */ unsigned long magic; /* To uniquely identify a core file */ char u_comm[32]; /* User command that was responsible */ - int u_debugreg[8]; + int u_debugreg[8]; /* No longer used */ struct user_fp u_fp; /* FP state */ struct user_fp_struct * u_fp0;/* Used by gdb to help find the values for */ /* the FP registers. */ diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 1e7b04a40a3..1edf1deadf8 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -43,6 +43,8 @@ ret_fast_syscall: * Ok, we need to do extra processing, enter the slow path. */ fast_work_pending: + tst r1, #_TIF_KERNEL_TRACE @ flag can be set asynchronously + bne __sys_trace_return str r0, [sp, #S_R0+S_OFF]! @ returned r0 work_pending: tst r1, #_TIF_NEED_RESCHED @@ -85,8 +87,8 @@ ENTRY(ret_from_fork) get_thread_info tsk ldr r1, [tsk, #TI_FLAGS] @ check for syscall tracing mov why, #1 - tst r1, #_TIF_SYSCALL_TRACE @ are we tracing syscalls? - beq ret_slow_syscall + tst r1, #_TIF_SYSCALL_TRACE | _TIF_KERNEL_TRACE + beq ret_slow_syscall @ are we tracing syscalls? mov r1, sp mov r0, #1 @ trace exit [IP = 1] bl syscall_trace @@ -441,8 +443,8 @@ ENTRY(vector_swi) 1: #endif - tst r10, #_TIF_SYSCALL_TRACE @ are we tracing syscalls? - bne __sys_trace + tst r10, #_TIF_SYSCALL_TRACE | _TIF_KERNEL_TRACE + bne __sys_trace @ are we tracing syscalls? cmp scno, #NR_syscalls @ check upper syscall limit adr lr, BSYM(ret_fast_syscall) @ return address diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 94bbedbed63..fe2277c5d8c 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -30,6 +30,7 @@ #include <linux/uaccess.h> #include <linux/random.h> #include <linux/hw_breakpoint.h> +#include <trace/sched.h> #include <asm/cacheflush.h> #include <asm/leds.h> @@ -45,6 +46,8 @@ unsigned long __stack_chk_guard __read_mostly; EXPORT_SYMBOL(__stack_chk_guard); #endif +DEFINE_TRACE(sched_kthread_create); + static const char *processor_modes[] = { "USER_26", "FIQ_26" , "IRQ_26" , "SVC_26" , "UK4_26" , "UK5_26" , "UK6_26" , "UK7_26" , "UK8_26" , "UK9_26" , "UK10_26", "UK11_26", "UK12_26", "UK13_26", "UK14_26", "UK15_26", @@ -442,6 +445,7 @@ asm( ".pushsection .text\n" pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) { struct pt_regs regs; + long pid; memset(®s, 0, sizeof(regs)); @@ -452,7 +456,10 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) regs.ARM_pc = (unsigned long)kernel_thread_helper; regs.ARM_cpsr = regs.ARM_r7 | PSR_I_BIT; - return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + pid = do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + + trace_sched_kthread_create(fn, pid); + return pid; } EXPORT_SYMBOL(kernel_thread); diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 2bf27f364d0..03438e9cc06 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -21,10 +21,15 @@ #include <linux/uaccess.h> #include <linux/perf_event.h> #include <linux/hw_breakpoint.h> +#include <linux/module.h> +#include <linux/marker.h> +#include <linux/kallsyms.h> +#include <trace/syscall.h> #include <asm/pgtable.h> #include <asm/system.h> #include <asm/traps.h> +#include <asm/unistd.h> #define REG_PC 15 #define REG_PSR 16 @@ -52,6 +57,30 @@ #define BREAKINST_THUMB 0xde01 #endif +DEFINE_TRACE(syscall_entry); +DEFINE_TRACE(syscall_exit); + +extern unsigned long sys_call_table[]; + +void ltt_dump_sys_call_table(void *call_data) +{ + int i; + char namebuf[KSYM_NAME_LEN]; + + for (i = 0; i < __NR_syscall_max + 1; i++) { + sprint_symbol(namebuf, sys_call_table[i]); + __trace_mark(0, syscall_state, sys_call_table, call_data, + "id %d address %p symbol %s", + i, (void*)sys_call_table[i], namebuf); + } +} +EXPORT_SYMBOL_GPL(ltt_dump_sys_call_table); + +void ltt_dump_idt_table(void *call_data) +{ +} +EXPORT_SYMBOL_GPL(ltt_dump_idt_table); + struct pt_regs_offset { const char *name; int offset; @@ -788,6 +817,11 @@ asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno) { unsigned long ip; + if (!why) + trace_syscall_entry(regs, scno); + else + trace_syscall_exit(regs->ARM_r0); + if (!test_thread_flag(TIF_SYSCALL_TRACE)) return scno; if (!(current->ptrace & PT_PTRACED)) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 21ac43f1c2d..41eb77da882 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -23,6 +23,7 @@ #include <linux/kexec.h> #include <linux/delay.h> #include <linux/init.h> +#include <trace/trap.h> #include <linux/sched.h> #include <asm/atomic.h> @@ -35,6 +36,9 @@ #include "signal.h" +DEFINE_TRACE(trap_entry); +DEFINE_TRACE(trap_exit); + static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" }; void *vectors_page; @@ -296,7 +300,11 @@ void arm_notify_die(const char *str, struct pt_regs *regs, current->thread.error_code = err; current->thread.trap_no = trap; + trace_trap_entry(regs, current->thread.trap_no); + force_sig_info(info->si_signo, info, current); + + trace_trap_exit(); } else { die(str, regs, err); } diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 898fffe0e9c..1d6f14a584f 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -180,6 +180,7 @@ obj-$(CONFIG_MACH_OMAP_3430SDP) += board-3430sdp.o \ hsmmc.o \ board-flash.o obj-$(CONFIG_MACH_NOKIA_N8X0) += board-n8x0.o +obj-$(CONFIG_HAVE_TRACE_CLOCK) += trace-clock.o obj-$(CONFIG_MACH_NOKIA_RM680) += board-rm680.o \ sdram-nokia.o \ hsmmc.o diff --git a/arch/arm/mach-omap2/clkt34xx_dpll3m2.c b/arch/arm/mach-omap2/clkt34xx_dpll3m2.c index b2b1e37bb6b..b10d9efd6db 100644 --- a/arch/arm/mach-omap2/clkt34xx_dpll3m2.c +++ b/arch/arm/mach-omap2/clkt34xx_dpll3m2.c @@ -24,6 +24,7 @@ #include <plat/clock.h> #include <plat/sram.h> #include <plat/sdrc.h> +#include <asm/trace-clock.h> #include "clock.h" #include "clock3xxx.h" @@ -79,6 +80,8 @@ int omap3_core_dpll_m2_set_rate(struct clk *clk, unsigned long rate) unlock_dll = 1; } + cpu_hz = arm_fck_p->rate; + /* * XXX This only needs to be done when the CPU frequency changes */ diff --git a/arch/arm/mach-omap2/clock34xx.c b/arch/arm/mach-omap2/clock34xx.c index 287abc48092..8971015538a 100644 --- a/arch/arm/mach-omap2/clock34xx.c +++ b/arch/arm/mach-omap2/clock34xx.c @@ -18,6 +18,7 @@ #undef DEBUG #include <linux/kernel.h> +#include <linux/module.h> #include <linux/clk.h> #include <linux/io.h> @@ -94,6 +95,9 @@ const struct clkops clkops_omap3430es2_dss_usbhost_wait = { .find_companion = omap2_clk_dflt_find_companion, }; +unsigned long long cpu_hz; +EXPORT_SYMBOL(cpu_hz); + /** * omap3430es2_clk_hsotgusb_find_idlest - return CM_IDLEST info for HSOTGUSB * @clk: struct clk * being enabled diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 2f864e4b085..dcb1dd36c24 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -29,6 +29,7 @@ #include <linux/delay.h> #include <linux/slab.h> #include <linux/console.h> +#include <trace/pm.h> #include <plat/sram.h> #include "clockdomain.h" @@ -41,6 +42,8 @@ #include <asm/tlbflush.h> +#include <asm/trace-clock.h> + #include "cm2xxx_3xxx.h" #include "cm-regbits-34xx.h" #include "prm-regbits-34xx.h" @@ -80,6 +83,11 @@ struct power_state { struct list_head node; }; +DEFINE_TRACE(pm_idle_entry); +DEFINE_TRACE(pm_idle_exit); +DEFINE_TRACE(pm_suspend_entry); +DEFINE_TRACE(pm_suspend_exit); + static LIST_HEAD(pwrst_list); static void (*_omap_sram_idle)(u32 *addr, int save_state); @@ -519,8 +527,23 @@ static void omap3_pm_idle(void) if (omap_irq_pending() || need_resched()) goto out; + trace_pm_idle_entry(); + save_sync_trace_clock(); + omap_sram_idle(); + /* + * Resyncing the trace clock should ideally be done much sooner. When + * we arrive here, there are already some interrupt handlers which have + * run before us, using potentially wrong timestamps. This leads + * to problems when restarting the clock (and synchronizing on the 32k + * clock) if the cycle counter was still active. + * resync_track_clock must ensure that timestamps never ever go + * backward. + */ + resync_trace_clock(); + trace_pm_idle_exit(); + out: local_fiq_enable(); local_irq_enable(); @@ -550,7 +573,11 @@ static int omap3_pm_suspend(void) omap_uart_prepare_suspend(); omap3_intc_suspend(); - omap_sram_idle(); + trace_pm_suspend_entry(); + save_sync_trace_clock(); + omap_sram_idle(); + resync_trace_clock(); + trace_pm_suspend_exit(); restore: /* Restore next_pwrsts */ diff --git a/arch/arm/mach-omap2/trace-clock.c b/arch/arm/mach-omap2/trace-clock.c new file mode 100644 index 00000000000..3db1cdb8d59 --- /dev/null +++ b/arch/arm/mach-omap2/trace-clock.c @@ -0,0 +1,726 @@ +/* + * arch/arm/mach-omap2/trace-clock.c + * + * Trace clock for ARM OMAP3 + * + * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> 2009 + */ + +#include <linux/module.h> +#include <linux/clocksource.h> +#include <linux/timer.h> +#include <linux/spinlock.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <linux/cpufreq.h> +#include <linux/err.h> + +#include <plat/clock.h> +#include <asm/trace-clock.h> +#include <asm/pmu.h> + +/* depends on CONFIG_OMAP_32K_TIMER */ +/* Need direct access to the clock from arch/arm/mach-omap2/timer-gp.c */ +static struct clocksource *clock; + +DEFINE_PER_CPU(struct pm_save_count, pm_save_count); +EXPORT_PER_CPU_SYMBOL_GPL(pm_save_count); + +static void clear_ccnt_ms(unsigned long data); + +/* According to timer32k.c, this is a 32768Hz clock, not a 32000Hz clock. */ +#define TIMER_32K_FREQ 32768 +#define TIMER_32K_SHIFT 15 + +/* + * Clear ccnt twice per 31-bit overflow, or 4 times per 32-bits period. + */ +static u32 clear_ccnt_interval; + +static DEFINE_SPINLOCK(trace_clock_lock); +static int trace_clock_refcount; + +static int print_info_done; + +static struct platform_device *reserved_pmu; + +static u32 get_mul_fact(u64 max_freq, u64 cur_freq) +{ + u64 rem; + + BUG_ON(cur_freq == 0); + return __iter_div_u64_rem(max_freq << 10, cur_freq, &rem); +} + +/* + * Cycle counter management. + */ + +static inline void write_pmnc(u32 val) +{ + __asm__ __volatile__ ("mcr p15, 0, %0, c9, c12, 0" : : "r" (val)); +} + +static inline u32 read_pmnc(void) +{ + u32 val; + __asm__ __volatile__ ("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); + return val; +} + +static inline void write_ctens(u32 val) +{ + __asm__ __volatile__ ("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); +} + +static inline u32 read_ctens(void) +{ + u32 val; + __asm__ __volatile__ ("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); + return val; +} + +static inline void write_intenc(u32 val) +{ + __asm__ __volatile__ ("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); +} + +static inline u32 read_intenc(void) +{ + u32 val; + __asm__ __volatile__ ("mrc p15, 0, %0, c9, c14, 2" : "=r" (val)); + return val; +} + +static inline void write_useren(u32 val) +{ + __asm__ __volatile__ ("mcr p15, 0, %0, c9, c14, 0" : : "r" (val)); +} + +static inline u32 read_useren(void) +{ + u32 val; + __asm__ __volatile__ ("mrc p15, 0, %0, c9, c14, 0" : "=r" (val)); + return val; +} + +/* + * Must disable counter before writing to it. + */ +static inline void write_ccnt(u32 val) +{ + __asm__ __volatile__ ("mcr p15, 0, %0, c9, c13, 0" : : "r" (val)); +} + +/* + * Periodical timer handler, clears ccnt most significant bit each half-period + * of 31-bit overflow. Makes sure the ccnt never overflows. + */ +static void clear_ccnt_ms(unsigned long data) +{ + struct pm_save_count *pm_count; + unsigned int cycles; + unsigned long flags; + int cpu; + + cpu = smp_processor_id(); + pm_count = &per_cpu(pm_save_count, cpu); + + local_irq_save(flags); + + if (!pm_count->fast_clock_ready) + goto end; + + isb(); /* clear the pipeline so we can execute ASAP */ + write_ctens(read_ctens() & ~(1 << 31)); /* disable counter */ + cycles = read_ccnt(); + write_ccnt(cycles & ~(1 << 31)); + isb(); + write_ctens(read_ctens() | (1 << 31)); /* enable counter */ + isb(); +end: + local_irq_restore(flags); + + mod_timer_pinned(&pm_count->clear_ccnt_ms_timer, + jiffies + clear_ccnt_interval); +} + +/* + * disabling interrupts to protect against concurrent IPI save/resync. + */ +void save_sync_trace_clock(void) +{ + struct pm_save_count *pm_count; + unsigned long flags; + int cpu; + + local_irq_save(flags); + cpu = smp_processor_id(); + pm_count = &per_cpu(pm_save_count, cpu); + raw_spin_lock(&pm_count->lock); + + if (!pm_count->refcount) + goto end; + + pm_count->ext_32k = clock->read(clock); + pm_count->int_fast_clock = trace_clock_read64(); +end: + raw_spin_unlock(&pm_count->lock); + + /* + * Only enable slow read after saving the clock values. + */ + barrier(); + pm_count->fast_clock_ready = 0; + + /* + * Disable counter to ensure there is no overflow while we are + * keeping track of time with ext. clock. + */ + write_ctens(read_ctens() & ~(1 << 31)); /* disable counter */ + local_irq_restore(flags); +} + +/* + * Called with preemption disabled. Read the external clock source directly + * and return corresponding time in fast clock source time frame. + * Called after time is saved and before it is resynced. + * Also used to periodically resync the drifting dvfs clock on external clock. + */ +u64 _trace_clock_read_slow(void) +{ + struct pm_save_count *pm_count; + u64 ref_time; + unsigned int count_32k; + int cpu; + + cpu = smp_processor_id(); + pm_count = &per_cpu(pm_save_count, cpu); + WARN_ON_ONCE(!pm_count->refcount); + + /* + * Set the timer's value MSBs to the same as current 32K timer. + */ + ref_time = pm_count->int_fast_clock; + if (!pm_count->init_clock) + count_32k = clock->read(clock); + else + count_32k = pm_count->init_clock; + + /* + * Delta done on 32-bits, then casted to u64. Must guarantee + * that we are called often enough so the difference does not + * overflow 32 bits anyway. + */ + ref_time += (u64)(count_32k - pm_count->ext_32k) + * (cpu_hz >> TIMER_32K_SHIFT); + return ref_time; +} +EXPORT_SYMBOL_GPL(_trace_clock_read_slow); + +/* + * resynchronize the per-cpu fast clock with the last save_sync values and the + * external clock. Called from PM (thread) context and IPI context. + */ +void resync_trace_clock(void) +{ + struct pm_save_count *pm_count; + struct tc_cur_freq *new_cf, *cf; + unsigned int new_index, index; + u64 ref_time; + unsigned long flags; + u32 regval; + int cpu; + + local_irq_save(flags); + cpu = smp_processor_id(); + pm_count = &per_cpu(pm_save_count, cpu); + raw_spin_lock(&pm_count->lock); + + if (!pm_count->refcount) + goto end; + + /* Let userspace access performance counter registers */ + regval = read_useren(); + regval |= (1 << 0); /* User mode enable */ + write_useren(regval); + + regval = read_intenc(); + regval |= (1 << 31); /* CCNT overflow interrupt disable */ + write_intenc(regval); + + regval = read_pmnc(); + regval |= (1 << 0); /* Enable all counters */ + regval &= ~(1 << 3); /* count every cycles */ + regval &= ~(1 << 5); /* Enable even in non-invasive debug prohib. */ + write_pmnc(regval); + + ref_time = _trace_clock_read_slow(); + + if (pm_count->init_clock) + pm_count->init_clock = 0; + + write_ctens(read_ctens() & ~(1 << 31)); /* disable counter */ + write_ccnt((u32)ref_time & ~(1 << 31)); + write_ctens(read_ctens() | (1 << 31)); /* enable counter */ + + _trace_clock_write_synthetic_tsc(ref_time); + + index = pm_count->index; + new_index = 1 - index; + cf = &pm_count->cf[index]; + new_cf = &pm_count->cf[new_index]; + new_cf->hw_base = ref_time; + new_cf->virt_base = ref_time; + new_cf->cur_cpu_freq = cpufreq_quick_get(cpu); + if (new_cf->cur_cpu_freq == 0) + new_cf->cur_cpu_freq = pm_count->max_cpu_freq; + new_cf->mul_fact = get_mul_fact(pm_count->max_cpu_freq, + new_cf->cur_cpu_freq); + new_cf->floor = max(ref_time, cf->floor); + barrier(); + pm_count->index = new_index; + barrier(); /* make clock ready before enabling */ + pm_count->fast_clock_ready = 1; + + /* Delete resync timer if present. Just done its job anyway. */ + if (pm_count->dvfs_count) + del_timer(&pm_count->clock_resync_timer); + pm_count->dvfs_count = 0; + + if (unlikely(!print_info_done)) { + printk(KERN_INFO "Trace clock using cycle counter at %llu HZ\n" + "saved 32k clk value 0x%08X, " + "saved cycle counter value 0x%016llX\n" + "synthetic value (write, read) 0x%016llX, 0x%016llX\n", + cpu_hz, + pm_count->ext_32k, + pm_count->int_fast_clock, + ref_time, trace_clock_read64()); + printk(KERN_INFO "Reference clock used : %s\n", clock->name); + print_info_done = 1; + } +end: + raw_spin_unlock(&pm_count->lock); + local_irq_restore(flags); +} + +/* + * Called with IRQ and FIQ off. + */ +static void resync_on_32k(struct pm_save_count *pm_count, int cpu, + unsigned int cached_freq, int new_freq) +{ + struct tc_cur_freq *new_cf, *cf; + u64 ref_time; + unsigned int new_index, index; + + index = pm_count->index; + + new_index = 1 - index; + cf = &pm_count->cf[index]; + new_cf = &pm_count->cf[new_index]; + ref_time = _trace_clock_read_slow(); + new_cf->hw_base = trace_clock_read_synthetic_tsc(); + new_cf->virt_base = ref_time; + if (cached_freq) + new_cf->cur_cpu_freq = cf->cur_cpu_freq; + else { + new_cf->cur_cpu_freq = new_freq; + if (new_cf->cur_cpu_freq == 0) + new_cf->cur_cpu_freq = pm_count->max_cpu_freq; + } + new_cf->mul_fact = get_mul_fact(pm_count->max_cpu_freq, + new_cf->cur_cpu_freq); + new_cf->floor = max((((new_cf->hw_base - cf->hw_base) + * cf->mul_fact) >> 10) + cf->virt_base, + cf->floor); + barrier(); + pm_count->index = new_index; +} + +/* + * Timer to resynchronize with ext. 32k clock after DVFS update (but not too + * often if flooded by DVFS updates). + * Necessary to deal with drift caused by DVFS updates. + * Per-cpu timer added by cpu freq events, single-shot. + */ +static void clock_resync_timer_fct(unsigned long data) +{ + struct pm_save_count *pm_count; + unsigned long flags; + int cpu; + + cpu = smp_processor_id(); + pm_count = &per_cpu(pm_save_count, cpu); + + local_irq_save(flags); + local_fiq_disable(); /* disable fiqs for floor value */ + + /* Need to resync if we had more than 1 dvfs event in period */ + if (pm_count->dvfs_count > 1) + resync_on_32k(pm_count, cpu, 1, 0); + pm_count->dvfs_count = 0; + + local_fiq_enable(); + local_irq_restore(flags); +} + +static void prepare_timer(int cpu) +{ + struct pm_save_count *pm_count; + + pm_count = &per_cpu(pm_save_count, cpu); + init_timer_deferrable(&pm_count->clear_ccnt_ms_timer); + pm_count->clear_ccnt_ms_timer.function = clear_ccnt_ms; + pm_count->clear_ccnt_ms_timer.expires = jiffies + clear_ccnt_interval; + + init_timer_deferrable(&pm_count->clock_resync_timer); + pm_count->clock_resync_timer.function = clock_resync_timer_fct; +} + +static void enable_timer(int cpu) +{ + struct pm_save_count *pm_count; + + pm_count = &per_cpu(pm_save_count, cpu); + add_timer_on(&pm_count->clear_ccnt_ms_timer, cpu); +} + +static void disable_timer_ipi(void *info) +{ + save_sync_trace_clock(); +} + +static void disable_timer(int cpu) +{ + struct pm_save_count *pm_count; + + pm_count = &per_cpu(pm_save_count, cpu); + del_timer_sync(&pm_count->clear_ccnt_ms_timer); + if (pm_count->dvfs_count) + del_timer_sync(&pm_count->clock_resync_timer); + smp_call_function_single(cpu, disable_timer_ipi, NULL, 1); +} + +static void resync_ipi(void *info) +{ + resync_trace_clock(); +} + +void _start_trace_clock(void) +{ + struct pm_save_count *pm_count; + u32 ext_32k; + u64 old_fast_clock; + int cpu; + + ext_32k = clock->read(clock); + old_fast_clock = per_cpu(pm_save_count, 0).int_fast_clock; + + for_each_online_cpu(cpu) { + pm_count = &per_cpu(pm_save_count, cpu); + pm_count->ext_32k = ext_32k; + pm_count->int_fast_clock = old_fast_clock; + pm_count->refcount = 1; + pm_count->init_clock = ext_32k; + pm_count->dvfs_count = 0; + } + + on_each_cpu(resync_ipi, NULL, 1); + + get_synthetic_tsc(); + + for_each_online_cpu(cpu) { + prepare_timer(cpu); + enable_timer(cpu); + } +} + +void _stop_trace_clock(void) +{ + struct pm_save_count *pm_count; + int cpu; + + per_cpu(pm_save_count, 0).int_fast_clock = trace_clock_read64(); + + for_each_online_cpu(cpu) { + pm_count = &per_cpu(pm_save_count, cpu); + disable_timer(cpu); + pm_count->refcount = 0; + } + put_synthetic_tsc(); +} + +void start_trace_clock(void) +{ + spin_lock(&trace_clock_lock); + if (!trace_clock_refcount) + goto end; + _start_trace_clock(); +end: + spin_unlock(&trace_clock_lock); +} + +void stop_trace_clock(void) +{ + spin_lock(&trace_clock_lock); + if (!trace_clock_refcount) + goto end; + _stop_trace_clock(); +end: + spin_unlock(&trace_clock_lock); +} + +/* + * hotcpu_callback - CPU hotplug callback + * @nb: notifier block + * @action: hotplug action to take + * @hcpu: CPU number + * + * Start/stop timers for trace clock upon cpu hotplug. + * Also resync the clock. + * + * Returns the success/failure of the operation. (NOTIFY_OK, NOTIFY_BAD) + */ +static int __cpuinit hotcpu_callback(struct notifier_block *nb, + unsigned long action, + void *hcpu) +{ + struct pm_save_count *pm_count; + unsigned int hotcpu = (unsigned long)hcpu; + unsigned long flags; + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + spin_lock(&trace_clock_lock); + if (trace_clock_refcount) { + pm_count = &per_cpu(pm_save_count, hotcpu); + local_irq_save(flags); + pm_count->ext_32k = clock->read(clock); + pm_count->int_fast_clock = trace_clock_read64(); + local_irq_restore(flags); + pm_count->refcount = 1; + pm_count->dvfs_count = 0; + prepare_timer(hotcpu); + } + spin_unlock(&trace_clock_lock); + break; + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + spin_lock(&trace_clock_lock); + if (trace_clock_refcount) { + resync_trace_clock(); + enable_timer(hotcpu); + } + spin_unlock(&trace_clock_lock); + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + spin_lock(&trace_clock_lock); + if (trace_clock_refcount) + disable_timer(hotcpu); + spin_unlock(&trace_clock_lock); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + spin_lock(&trace_clock_lock); + if (trace_clock_refcount) { + pm_count = &per_cpu(pm_save_count, hotcpu); + pm_count->refcount = 0; + } + spin_unlock(&trace_clock_lock); + break; +#endif /* CONFIG_HOTPLUG_CPU */ + } + return NOTIFY_OK; +} + +int get_trace_clock(void) +{ + int ret = 0; + + spin_lock(&trace_clock_lock); + if (trace_clock_refcount) + goto end; + reserved_pmu = reserve_pmu(ARM_PMU_DEVICE_CPU); + if (IS_ERR_OR_NULL(reserved_pmu) && PTR_ERR(reserved_pmu) != -ENODEV) { + ret = -EBUSY; + goto end; + } + trace_clock_refcount++; + _start_trace_clock(); +end: + spin_unlock(&trace_clock_lock); + return ret; +} +EXPORT_SYMBOL_GPL(get_trace_clock); + +void put_trace_clock(void) +{ + spin_lock(&trace_clock_lock); + WARN_ON(trace_clock_refcount <= 0); + if (trace_clock_refcount != 1) + goto end; + _stop_trace_clock(); + release_pmu(reserved_pmu); +end: + trace_clock_refcount--; + spin_unlock(&trace_clock_lock); +} +EXPORT_SYMBOL_GPL(put_trace_clock); + +/* + * We do not use prechange hook to sample 2 clock values and average because + * locking wrt other timers can be difficult to get right. + * A bit more imprecision just increases the drift. We have a periodic timer + * in place to resynchronize periodically on the 32k clock anyway. + */ +static int cpufreq_trace_clock(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + struct pm_save_count *pm_count; + struct tc_cur_freq *new_cf, *cf; + unsigned long flags; + unsigned int new_index, index; + u64 post_val; + int cpu; + +#if 0 /* debug trace_mark */ + trace_mark(test, freq_change, + "%s cpu %u oldfreq %u newfreq %u const %u", + (val != CPUFREQ_POSTCHANGE) ? "prechange" : "postchange", + freq->cpu, freq->old, freq->new, + (freq->flags & CPUFREQ_CONST_LOOPS) ? 1 : 0); +#endif + + if (freq->flags & CPUFREQ_CONST_LOOPS) + return 0; + + if (val != CPUFREQ_POSTCHANGE) + return 0; + + local_irq_save(flags); + cpu = smp_processor_id(); + WARN_ON_ONCE(cpu != freq->cpu); + pm_count = &per_cpu(pm_save_count, cpu); + raw_spin_lock(&pm_count->lock); + + if (!pm_count->refcount) + goto end; + + /* + * Disable FIQs to ensure the floor value is indeed the + * floor. + */ + local_fiq_disable(); + + if (!pm_count->dvfs_count) { + resync_on_32k(pm_count, cpu, 0, freq->new); + pm_count->clock_resync_timer.expires = jiffies + + (TC_RESYNC_PERIOD * HZ / 1000); + add_timer_on(&pm_count->clock_resync_timer, cpu); + } else { + post_val = trace_clock_read_synthetic_tsc(); + /* disable irqs to ensure we are the only value modifier */ + index = pm_count->index; + new_index = 1 - index; + cf = &pm_count->cf[index]; + new_cf = &pm_count->cf[new_index]; + new_cf->hw_base = post_val; + new_cf->virt_base = (((post_val - cf->hw_base) + * cf->mul_fact) >> 10) + cf->virt_base; + new_cf->cur_cpu_freq = freq->new; + new_cf->mul_fact = get_mul_fact(pm_count->max_cpu_freq, + freq->new); + new_cf->floor = max((((post_val - cf->hw_base) + * cf->mul_fact) >> 10) + cf->virt_base, + cf->floor); + barrier(); + pm_count->index = new_index; + } + + local_fiq_enable(); + pm_count->dvfs_count++; +end: + raw_spin_unlock(&pm_count->lock); + local_irq_restore(flags); + return 0; +} + +static struct notifier_block cpufreq_trace_clock_nb = { + .notifier_call = cpufreq_trace_clock, +}; + +#ifdef CONFIG_DEBUG_TRACE_CLOCK +/* + * Clock expected to never overflow and never go backward. + */ +static DEFINE_PER_CPU(u64, last_clock_value); +static DEFINE_PER_CPU(u32, last_ccnt_value); +DEFINE_PER_CPU(unsigned int, last_clock_nest); +EXPORT_PER_CPU_SYMBOL_GPL(last_clock_nest); + +static int tc_print_done; + +/* + * Called with interrupts disabled. + */ +void trace_clock_debug(u64 value) +{ + int cpu; + + cpu = smp_processor_id(); + if (unlikely(per_cpu(last_clock_nest, cpu) != 1)) + return; /* fiq nesting, don't perform racy check */ + if (unlikely(!tc_print_done + && (per_cpu(last_clock_value, cpu) > value))) { + printk(KERN_WARNING "Trace clock going back last %llu new %llu " + "diff %llu last_ccnt %u ccnt %u\n", + (unsigned long long) per_cpu(last_clock_value, cpu), + (unsigned long long) value, + (unsigned long long) per_cpu(last_clock_value, cpu) + - value, + per_cpu(last_ccnt_value, cpu), + trace_clock_read32()); + tc_print_done = 1; + } + per_cpu(last_clock_value, cpu) = value; + per_cpu(last_ccnt_value, cpu) = trace_clock_read32();; +} +EXPORT_SYMBOL_GPL(trace_clock_debug); +#endif + +static __init int init_trace_clock(void) +{ + int cpu, ret; + u64 rem; + + ret = init_pmu(ARM_PMU_DEVICE_CPU); + if (ret && ret != -ENODEV) + return ret; + clock = get_clocksource_32k(); + /* + * clear_ccnt_interval based on the cpu fastest frequency. Never + * recomputed. + */ + clear_ccnt_interval = __iter_div_u64_rem(HZ * (1ULL << 30), cpu_hz, + &rem); + printk(KERN_INFO "LTTng will clear ccnt top bit every %u jiffies.\n", + clear_ccnt_interval); + for_each_possible_cpu(cpu) { + per_cpu(pm_save_count, cpu).max_cpu_freq = + __iter_div_u64_rem(cpu_hz, 1000, &rem); + per_cpu(pm_save_count, cpu).lock = + __RAW_SPIN_LOCK_UNLOCKED(per_cpu(pm_save_count, + cpu).lock); + } + hotcpu_notifier(hotcpu_callback, 4); + cpufreq_register_notifier(&cpufreq_trace_clock_nb, + CPUFREQ_TRANSITION_NOTIFIER); + return 0; +} +__initcall(init_trace_clock); diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig index b6333ae3f92..99593d71a85 100644 --- a/arch/arm/plat-omap/Kconfig +++ b/arch/arm/plat-omap/Kconfig @@ -17,6 +17,10 @@ config ARCH_OMAP1 config ARCH_OMAP2PLUS bool "TI OMAP2/3/4" + select COMMON_CLKDEV + select HAVE_TRACE_CLOCK + select HAVE_TRACE_CLOCK_32_TO_64 + select OMAP_32K_TIMER select CLKDEV_LOOKUP select OMAP_DM_TIMER help diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c index 862dda95d61..8627a516688 100644 --- a/arch/arm/plat-omap/counter_32k.c +++ b/arch/arm/plat-omap/counter_32k.c @@ -107,6 +107,11 @@ static struct clocksource clocksource_32k = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +struct clocksource *get_clocksource_32k(void) +{ + return &clocksource_32k; +} + /* * Returns current time from boot in nsecs. It's OK for this to wrap * around for now, as it's just a relative time stamp. diff --git a/arch/arm/plat-omap/include/plat/clock.h b/arch/arm/plat-omap/include/plat/clock.h index 8eb0adab19e..2ad01f37a3c 100644 --- a/arch/arm/plat-omap/include/plat/clock.h +++ b/arch/arm/plat-omap/include/plat/clock.h @@ -297,4 +297,6 @@ extern const struct clkops clkops_null; extern struct clk dummy_ck; +struct clocksource *get_clocksource_32k(void); + #endif diff --git a/arch/arm/plat-omap/include/plat/trace-clock.h b/arch/arm/plat-omap/include/plat/trace-clock.h new file mode 100644 index 00000000000..7fcdbf98063 --- /dev/null +++ b/arch/arm/plat-omap/include/plat/trace-clock.h @@ -0,0 +1,172 @@ +/* + * Copyright (C) 2009 Mathieu Desnoyers + * + * Trace clock ARM OMAP3 definitions. + */ + +#ifndef _ASM_ARM_TRACE_CLOCK_OMAP3_H +#define _ASM_ARM_TRACE_CLOCK_OMAP3_H + +#include <linux/clk.h> +#include <linux/timer.h> +#include <linux/percpu.h> +#include <plat/clock.h> + +/* + * Number of hardware clock bits. The higher order bits are expected to be 0. + * If the hardware clock source has more than 32 bits, the bits higher than the + * 32nd will be truncated by a cast to a 32 bits unsigned. Range : 1 - 32. + * (too few bits would be unrealistic though, since we depend on the timer to + * detect the overflows). + * OMAP3-specific : we clear bit 31 periodically so it never overflows. There + * is a hardware bug with CP14 and CP15 being executed at the same time a ccnt + * overflow occurs. + * + * Siarhei Siamashka <siarhei.siamashka@nokia.com> : + * Performance monitoring unit breaks if somebody is accessing CP14/CP15 + * coprocessor register exactly at the same time as CCNT overflows (regardless + * of the fact if generation of interrupts is enabled or not). A workaround + * suggested by ARM was to never allow it to overflow and reset it + * periodically. + */ +#define TC_HW_BITS 31 + +/* Expected maximum interrupt latency in ms : 15ms, *2 for security */ +#define TC_EXPECTED_INTERRUPT_LATENCY 30 + +/* Resync with 32k clock each 100ms */ +#define TC_RESYNC_PERIOD 100 + +struct tc_cur_freq { + u64 cur_cpu_freq; /* in khz */ + /* cur time : (now - base) * (max_freq / cur_freq) + base */ + u32 mul_fact; /* (max_cpu_freq << 10) / cur_freq */ + u64 hw_base; /* stamp of last cpufreq change, hw cycles */ + u64 virt_base; /* same as above, virtual trace clock cycles */ + u64 floor; /* floor value, so time never go back */ +}; + +/* 32KHz counter per-cpu count save upon PM sleep and cpufreq management */ +struct pm_save_count { + struct tc_cur_freq cf[2]; /* rcu-protected */ + unsigned int index; /* tc_cur_freq current read index */ + /* + * Is fast clock ready to be read ? Read with preemption off. Modified + * only by local CPU in thread and interrupt context or by start/stop + * when time is not read concurrently. + */ + int fast_clock_ready; + + u64 int_fast_clock; + struct timer_list clear_ccnt_ms_timer; + struct timer_list clock_resync_timer; + u32 ext_32k; + int refcount; + u32 init_clock; + raw_spinlock_t lock; /* spinlock only sync the refcount */ + unsigned int dvfs_count; /* Number of DVFS updates in period */ + /* cpufreq management */ + u64 max_cpu_freq; /* in khz */ +}; + +DECLARE_PER_CPU(struct pm_save_count, pm_save_count); + +extern u64 trace_clock_read_synthetic_tsc(void); +extern void _trace_clock_write_synthetic_tsc(u64 value); +extern unsigned long long cpu_hz; + +DECLARE_PER_CPU(int, fast_clock_ready); +extern u64 _trace_clock_read_slow(void); + +/* + * ARM OMAP3 timers only return 32-bits values. We ened to extend it to a + * 64-bit value, which is provided by trace-clock-32-to-64. + */ +extern u64 trace_clock_async_tsc_read(void); +/* + * Update done by the architecture upon wakeup. + */ +extern void _trace_clock_write_synthetic_tsc(u64 value); + +#ifdef CONFIG_DEBUG_TRACE_CLOCK +DECLARE_PER_CPU(unsigned int, last_clock_nest); +extern void trace_clock_debug(u64 value); +#else +static inline void trace_clock_debug(u64 value) +{ +} +#endif + +static inline u32 read_ccnt(void) +{ + u32 val; + __asm__ __volatile__ ("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); + return val & ~(1 << TC_HW_BITS); +} + +static inline u32 trace_clock_read32(void) +{ + u32 val; + + isb(); + val = read_ccnt(); + isb(); + return val; +} + +static inline u64 trace_clock_read64(void) +{ + struct pm_save_count *pm_count; + struct tc_cur_freq *cf; + u64 val; +#ifdef CONFIG_DEBUG_TRACE_CLOCK + unsigned long flags; + + local_irq_save(flags); + per_cpu(last_clock_nest, smp_processor_id())++; + barrier(); +#endif + + preempt_disable(); + pm_count = &per_cpu(pm_save_count, smp_processor_id()); + if (likely(pm_count->fast_clock_ready)) { + cf = &pm_count->cf[ACCESS_ONCE(pm_count->index)]; + val = max((((trace_clock_read_synthetic_tsc() - cf->hw_base) + * cf->mul_fact) >> 10) + cf->virt_base, cf->floor); + } else + val = _trace_clock_read_slow(); + trace_clock_debug(val); + preempt_enable(); + +#ifdef CONFIG_DEBUG_TRACE_CLOCK + barrier(); + per_cpu(last_clock_nest, smp_processor_id())--; + local_irq_restore(flags); +#endif + return val; +} + +static inline u64 trace_clock_frequency(void) +{ + return cpu_hz; +} + +static inline u32 trace_clock_freq_scale(void) +{ + return 1; +} + +extern int get_trace_clock(void); +extern void put_trace_clock(void); +extern void get_synthetic_tsc(void); +extern void put_synthetic_tsc(void); + +extern void resync_trace_clock(void); +extern void save_sync_trace_clock(void); +extern void start_trace_clock(void); +extern void stop_trace_clock(void); + +static inline void set_trace_clock_is_sync(int state) +{ +} +#endif /* _ASM_MIPS_TRACE_CLOCK_OMAP3_H */ diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h index 7a9c03dcb0b..6e882a9584e 100644 --- a/arch/avr32/include/asm/thread_info.h +++ b/arch/avr32/include/asm/thread_info.h @@ -66,7 +66,7 @@ static inline struct thread_info *current_thread_info(void) #endif /* !__ASSEMBLY__ */ -#define PREEMPT_ACTIVE 0x40000000 +#define PREEMPT_ACTIVE 0x10000000 /* * Thread information flags @@ -85,6 +85,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTORE_SIGMASK 7 /* restore signal mask in do_signal */ #define TIF_CPU_GOING_TO_SLEEP 8 /* CPU is entering sleep 0 mode */ #define TIF_NOTIFY_RESUME 9 /* callback before returning to user */ +#define TIF_KERNEL_TRACE 10 /* kernel trace active */ #define TIF_FREEZE 29 #define TIF_DEBUG 30 /* debugging enabled */ #define TIF_USERSPACE 31 /* true if FS sets userspace */ @@ -93,28 +94,32 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) +#define _TIF_BREAKPOINT (1 << TIF_BREAKPOINT) #define _TIF_SINGLE_STEP (1 << TIF_SINGLE_STEP) #define _TIF_MEMDIE (1 << TIF_MEMDIE) #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_CPU_GOING_TO_SLEEP (1 << TIF_CPU_GOING_TO_SLEEP) +#define _TIF_KERNEL_TRACE (1 << TIF_KERNEL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_FREEZE (1 << TIF_FREEZE) +#define _TIF_DEBUG (1 << TIF_DEBUG) +#define _TIF_USERSPACE (1 << TIF_USERSPACE) /* Note: The masks below must never span more than 16 bits! */ /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ - ((1 << TIF_SIGPENDING) \ + (_TIF_SIGPENDING \ | _TIF_NOTIFY_RESUME \ - | (1 << TIF_NEED_RESCHED) \ - | (1 << TIF_POLLING_NRFLAG) \ - | (1 << TIF_BREAKPOINT) \ - | (1 << TIF_RESTORE_SIGMASK)) + | _TIF_NEED_RESCHED \ + | _TIF_POLLING_NRFLAG \ + | _TIF_BREAKPOINT \ + | _TIF_RESTORE_SIGMASK) /* work to do on any return to userspace */ -#define _TIF_ALLWORK_MASK (_TIF_WORK_MASK | (1 << TIF_SYSCALL_TRACE) | \ - _TIF_NOTIFY_RESUME) +#define _TIF_ALLWORK_MASK (_TIF_WORK_MASK | _TIF_SYSCALL_TRACE | \ + _TIF_NOTIFY_RESUME | _TIF_KERNEL_TRACE) /* work to do on return from debug mode */ -#define _TIF_DBGWORK_MASK (_TIF_WORK_MASK & ~(1 << TIF_BREAKPOINT)) +#define _TIF_DBGWORK_MASK (_TIF_WORK_MASK & ~_TIF_BREAKPOINT) #endif /* __ASM_AVR32_THREAD_INFO_H */ diff --git a/arch/blackfin/include/asm/thread_info.h b/arch/blackfin/include/asm/thread_info.h index 02560fd8a12..510e54bfd0a 100644 --- a/arch/blackfin/include/asm/thread_info.h +++ b/arch/blackfin/include/asm/thread_info.h @@ -102,8 +102,9 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */ #define TIF_FREEZE 6 /* is freezing for suspend */ #define TIF_IRQ_SYNC 7 /* sync pipeline stage */ -#define TIF_NOTIFY_RESUME 8 /* callback before returning to user */ -#define TIF_SINGLESTEP 9 +#define TIF_KERNEL_TRACE 8 /* kernel trace active */ +#define TIF_NOTIFY_RESUME 9 /* callback before returning to user */ +#define TIF_SINGLESTEP 10 /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -115,8 +116,9 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_IRQ_SYNC (1<<TIF_IRQ_SYNC) #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP) +#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE) -#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */ +#define _TIF_WORK_MASK 0x0000FEFE /* work to do on interrupt/exception return */ #endif /* __KERNEL__ */ diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h index 91776069ca8..bc2024dbe32 100644 --- a/arch/cris/include/asm/thread_info.h +++ b/arch/cris/include/asm/thread_info.h @@ -83,6 +83,7 @@ struct thread_info { #define TIF_NOTIFY_RESUME 1 /* resumption notification requested */ #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_KERNEL_TRACE 4 /* kernel trace active */ #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ @@ -92,12 +93,16 @@ struct thread_info { #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) +#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE) #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_FREEZE (1<<TIF_FREEZE) -#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */ -#define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */ +/* work to do on interrupt/exception return */ +#define _TIF_WORK_MASK 0x0000FFFF & \ + (~_TIF_SYSCALL_TRACE | ~_TIF_KERNEL_TRACE) +/* work to do on any return to u-space */ +#define _TIF_ALLWORK_MASK 0x0000FFFF #endif /* __KERNEL__ */ diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h index 11f33ead29b..8adf256d213 100644 --- a/arch/frv/include/asm/thread_info.h +++ b/arch/frv/include/asm/thread_info.h @@ -112,6 +112,7 @@ register struct thread_info *__current_thread_info asm("gr15"); #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */ #define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */ +#define TIF_KERNEL_TRACE 6 /* kernel trace active */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ #define TIF_FREEZE 18 /* freezing for suspend */ @@ -122,10 +123,11 @@ register struct thread_info *__current_thread_info asm("gr15"); #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) +#define _TIF_KERNEL_TRACE (1 << TIF_KERNEL_TRACE) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_FREEZE (1 << TIF_FREEZE) -#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */ +#define _TIF_WORK_MASK 0x0000FFBE /* work to do on interrupt/exception return */ #define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */ /* diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h index d6f1784bfde..65685fa4554 100644 --- a/arch/h8300/include/asm/thread_info.h +++ b/arch/h8300/include/asm/thread_info.h @@ -90,18 +90,20 @@ static inline struct thread_info *current_thread_info(void) #define TIF_MEMDIE 4 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */ #define TIF_NOTIFY_RESUME 6 /* callback before returning to user */ +#define TIF_KERNEL_TRACE 7 /* kernel trace active */ #define TIF_FREEZE 16 /* is freezing for suspend */ /* as above, but as bit values */ -#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) -#define _TIF_SIGPENDING (1<<TIF_SIGPENDING) -#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) -#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) -#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) +#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) -#define _TIF_FREEZE (1<<TIF_FREEZE) +#define _TIF_KERNEL_TRACE (1 << TIF_KERNEL_TRACE) +#define _TIF_FREEZE (1 << TIF_FREEZE) -#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */ +#define _TIF_WORK_MASK 0x0000FFBE /* work to do on interrupt/exception return */ #endif /* __KERNEL__ */ diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index b6a5ba2aca3..3206bb5575b 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -100,6 +100,7 @@ struct thread_info { #define TIF_SYSCALL_TRACE 2 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */ #define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */ +#define TIF_KERNEL_TRACE 5 /* kernel trace active */ #define TIF_NOTIFY_RESUME 6 /* resumption notification requested */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ @@ -111,7 +112,9 @@ struct thread_info { #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -#define _TIF_SYSCALL_TRACEAUDIT (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP) +#define _TIF_KERNEL_TRACE (1 << TIF_KERNEL_TRACE) +#define _TIF_SYSCALL_TRACEAUDIT (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|\ + _TIF_SINGLESTEP|_TIF_KERNEL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) @@ -124,8 +127,9 @@ struct thread_info { /* "work to do on user-return" bits */ #define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\ _TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE) -/* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */ -#define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)) +/* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE, TIF_KERNEL_TRACE or TIF_SYSCALL_AUDIT */ +#define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_KERNEL_TRACE|\ + _TIF_SYSCALL_AUDIT)) #define TS_POLLING 1 /* true if in idle loop and not sleeping */ #define TS_RESTORE_SIGMASK 2 /* restore signal mask in do_signal() */ diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 244704a174d..56c4de30197 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -620,9 +620,11 @@ GLOBAL_ENTRY(ia64_ret_from_clone) ;; ld4 r2=[r2] ;; + movl r8=_TIF_SYSCALL_TRACEAUDIT + ;; // added stop bits to prevent r8 dependency + and r2=r8,r2 mov r8=0 - and r2=_TIF_SYSCALL_TRACEAUDIT,r2 - ;; + ;; // added stop bits to prevent r2 dependency cmp.ne p6,p0=r2,r0 (p6) br.cond.spnt .strace_check_retval ;; // added stop bits to prevent r8 dependency diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h index 71faff5bcc2..8538b1a0eaf 100644 --- a/arch/m32r/include/asm/thread_info.h +++ b/arch/m32r/include/asm/thread_info.h @@ -139,6 +139,7 @@ static inline unsigned int get_thread_fault_code(void) #define TIF_SINGLESTEP 3 /* restore singlestep on return to user mode */ #define TIF_IRET 4 /* return with iret */ #define TIF_NOTIFY_RESUME 5 /* callback before returning to user */ +#define TIF_KERNEL_TRACE 6 /* kernel trace active */ #define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal() */ #define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */ #define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */ @@ -150,14 +151,19 @@ static inline unsigned int get_thread_fault_code(void) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP) #define _TIF_IRET (1<<TIF_IRET) +#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE) #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) #define _TIF_USEDFPU (1<<TIF_USEDFPU) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_FREEZE (1<<TIF_FREEZE) -#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */ -#define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */ +/* work to do on any return to u-space */ +#define _TIF_ALLWORK_MASK 0x0000FFFF + +/* work to do on interrupt/exception return */ +#define _TIF_WORK_MASK \ + (_TIF_ALLWORK_MASK & ~(_TIF_SYSCALL_TRACE | _TIF_KERNEL_TRACE)) /* * Thread-synchronous status. diff --git a/arch/m68k/include/asm/thread_info.h b/arch/m68k/include/asm/thread_info.h index 790988967ba..fa8256a17eb 100644 --- a/arch/m68k/include/asm/thread_info.h +++ b/arch/m68k/include/asm/thread_info.h @@ -100,6 +100,7 @@ static inline struct thread_info *current_thread_info(void) */ #define TIF_SIGPENDING 6 /* signal pending */ #define TIF_NEED_RESCHED 7 /* rescheduling necessary */ +#define TIF_KERNEL_TRACE 13 /* kernel trace active */ #define TIF_DELAYED_TRACE 14 /* single step a syscall */ #define TIF_SYSCALL_TRACE 15 /* syscall trace active */ #define TIF_MEMDIE 16 /* is terminating due to OOM killer */ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index f5ecc0566bc..e0c246d26e1 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -17,6 +17,7 @@ config MIPS select HAVE_KRETPROBES select RTC_LIB if !MACH_LOONGSON select GENERIC_ATOMIC64 if !64BIT + select HAVE_LTT_DUMP_TABLES select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG select HAVE_GENERIC_HARDIRQS @@ -1963,6 +1964,20 @@ config CPU_R4000_WORKAROUNDS config CPU_R4400_WORKAROUNDS bool +config HAVE_GET_CYCLES_32 + def_bool y + depends on !CPU_R4400_WORKAROUNDS + depends on !CPU_CAVIUM_OCTEON + select HAVE_TRACE_CLOCK + select HAVE_TRACE_CLOCK_32_TO_64 + select HAVE_UNSYNCHRONIZED_TSC + +config HAVE_GET_CYCLES + def_bool y + depends on CPU_CAVIUM_OCTEON + select HAVE_TRACE_CLOCK + select HAVE_UNSYNCHRONIZED_TSC + # # - Highmem only makes sense for the 32-bit kernel. # - The current highmem code will only work properly on physically indexed diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index c0884f02d3a..1419b787e1e 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -178,4 +178,10 @@ #define nudge_writes() mb() #endif +/* + * MIPS does not have any instruction to serialize instruction execution on the + * core. + */ +#define sync_core() + #endif /* __ASM_BARRIER_H */ diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 4d987097538..44f631b39ff 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -438,6 +438,7 @@ */ #define CAUSEB_EXCCODE 2 #define CAUSEF_EXCCODE (_ULCAST_(31) << 2) +#define CAUSE_EXCCODE(cause) (((cause) & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE) #define CAUSEB_IP 8 #define CAUSEF_IP (_ULCAST_(255) << 8) #define CAUSEB_IP0 8 diff --git a/arch/mips/include/asm/octeon/trace-clock.h b/arch/mips/include/asm/octeon/trace-clock.h new file mode 100644 index 00000000000..062662b732a --- /dev/null +++ b/arch/mips/include/asm/octeon/trace-clock.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2005,2008 Mathieu Desnoyers + * + * Trace clock MIPS Octeon definitions. + */ + +#ifndef _ASM_MIPS_OCTEON_TRACE_CLOCK_H +#define _ASM_MIPS_OCTEON_TRACE_CLOCK_H + +#include <asm/octeon/octeon.h> + +#define TC_HW_BITS 64 + +static inline u32 trace_clock_read32(void) +{ + return (u32)read_c0_cvmcount(); /* only need the 32 LSB */ +} + +static inline u64 trace_clock_read64(void) +{ + return read_c0_cvmcount(); +} + +static inline u64 trace_clock_frequency(void) +{ + return octeon_get_clock_rate(); +} + +static inline u32 trace_clock_freq_scale(void) +{ + return 1; +} + +static inline int get_trace_clock(void) +{ + return 0; +} + +static inline void put_trace_clock(void) +{ + return; +} +#endif /* _ASM_MIPS_OCTEON_TRACE_CLOCK_H */ diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index d309556cacf..eb7f7b99038 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -122,6 +122,7 @@ register struct thread_info *__current_thread_info __asm__("$28"); #define TIF_32BIT_ADDR 23 /* 32-bit address space (o32/n32) */ #define TIF_FPUBOUND 24 /* thread bound to FPU-full CPU set */ #define TIF_LOAD_WATCH 25 /* If set, load watch registers */ +#define TIF_KERNEL_TRACE 30 /* kernel trace active */ #define TIF_SYSCALL_TRACE 31 /* syscall trace active */ #ifdef CONFIG_MIPS32_O32 @@ -131,6 +132,7 @@ register struct thread_info *__current_thread_info __asm__("$28"); #endif /* CONFIG_MIPS32_O32 */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) +#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) @@ -151,7 +153,7 @@ register struct thread_info *__current_thread_info __asm__("$28"); #define _TIF_WORK_MASK (0x0000ffef & \ ~(_TIF_SECCOMP | _TIF_SYSCALL_AUDIT)) /* work to do on any return to u-space */ -#define _TIF_ALLWORK_MASK (0x8000ffff & ~_TIF_SECCOMP) +#define _TIF_ALLWORK_MASK (0xc000ffff & ~_TIF_SECCOMP) #endif /* __KERNEL__ */ diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h index 6529704aa73..6c150979fa2 100644 --- a/arch/mips/include/asm/timex.h +++ b/arch/mips/include/asm/timex.h @@ -20,6 +20,8 @@ */ #define CLOCK_TICK_RATE 1193182 +extern unsigned int mips_hpt_frequency; + /* * Standard way to access the cycle counter. * Currently only used on SMP for scheduling. @@ -29,14 +31,109 @@ * which isn't an evil thing. * * We know that all SMP capable CPUs have cycle counters. + * + * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> + * HAVE_GET_CYCLES makes sure that this case is handled properly : + * + * Ralf Baechle <ralf@linux-mips.org> : + * This avoids us executing an mfc0 c0_count instruction on processors which + * don't have but also on certain R4000 and R4400 versions where reading from + * the count register just in the very moment when its value equals c0_compare + * will result in the timer interrupt getting lost. */ +#ifdef CONFIG_HAVE_GET_CYCLES +# ifdef CONFIG_CPU_CAVIUM_OCTEON +typedef unsigned long cycles_t; + +static inline cycles_t get_cycles(void) +{ + return read_c0_cvmcount(); +} + +static inline void get_cycles_barrier(void) +{ +} + +static inline cycles_t get_cycles_rate(void) +{ + return mips_hpt_frequency; +} + +extern int test_tsc_synchronization(void); +extern int _tsc_is_sync; +static inline int tsc_is_sync(void) +{ + return _tsc_is_sync; +} +# else /* #ifdef CONFIG_CPU_CAVIUM_OCTEON */ +# error "64-bit get_cycles() supported only on Cavium Octeon MIPS architectures" +# endif /* #else #ifdef CONFIG_CPU_CAVIUM_OCTEON */ +#elif defined(CONFIG_HAVE_GET_CYCLES_32) typedef unsigned int cycles_t; static inline cycles_t get_cycles(void) { + return read_c0_count(); +} + +static inline void get_cycles_barrier(void) +{ +} + +static inline cycles_t get_cycles_rate(void) +{ + return mips_hpt_frequency; +} + +extern int test_tsc_synchronization(void); +extern int _tsc_is_sync; +static inline int tsc_is_sync(void) +{ + return _tsc_is_sync; +} +#else +typedef unsigned int cycles_t; + +static inline cycles_t get_cycles(void) +{ + return 0; +} +static inline int test_tsc_synchronization(void) +{ return 0; } +static inline int tsc_is_sync(void) +{ + return 0; +} +#endif + +#define DELAY_INTERRUPT 100 +/* + * Only updates 32 LSB. + */ +static inline void write_tsc(u32 val1, u32 val2) +{ + write_c0_count(val1); + /* Arrange for an interrupt in a short while */ + write_c0_compare(read_c0_count() + DELAY_INTERRUPT); +} + +/* + * Currently unused, should update internal tsc-related timekeeping sources. + */ +static inline void mark_tsc_unstable(char *reason) +{ +} + +/* + * Currently simply use the tsc_is_sync value. + */ +static inline int unsynchronized_tsc(void) +{ + return !tsc_is_sync(); +} #endif /* __KERNEL__ */ diff --git a/arch/mips/include/asm/trace-clock.h b/arch/mips/include/asm/trace-clock.h new file mode 100644 index 00000000000..9bbcf999bef --- /dev/null +++ b/arch/mips/include/asm/trace-clock.h @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2005,2008 Mathieu Desnoyers + * + * Trace clock MIPS definitions. + */ + +#ifndef _ASM_MIPS_TRACE_CLOCK_H +#define _ASM_MIPS_TRACE_CLOCK_H + +#include <linux/timex.h> +#include <asm/processor.h> + +#define TRACE_CLOCK_MIN_PROBE_DURATION 200 + +#ifdef CONFIG_CPU_CAVIUM_OCTEON +# include <asm/octeon/trace-clock.h> +#else /* !CONFIG_CPU_CAVIUM_OCTEON */ +/* + * Number of hardware clock bits. The higher order bits are expected to be 0. + * If the hardware clock source has more than 32 bits, the bits higher than the + * 32nd will be truncated by a cast to a 32 bits unsigned. Range : 1 - 32. + * (too few bits would be unrealistic though, since we depend on the timer to + * detect the overflows). + */ +#define TC_HW_BITS 32 + +/* Expected maximum interrupt latency in ms : 15ms, *2 for security */ +#define TC_EXPECTED_INTERRUPT_LATENCY 30 + +extern u64 trace_clock_read_synthetic_tsc(void); + +/* + * MIPS get_cycles only returns a 32 bits TSC (see timex.h). The assumption + * there is that the reschedule is done every 8 seconds or so. Given that + * tracing needs to detect delays longer than 8 seconds, we need a full 64-bits + * TSC, whic is provided by trace-clock-32-to-64. +*/ + +static inline u32 trace_clock_read32(void) +{ + return (u32)get_cycles(); /* only need the 32 LSB */ +} + +static inline u64 trace_clock_read64(void) +{ + return trace_clock_read_synthetic_tsc(); +} + +static inline u64 trace_clock_frequency(void) +{ + return get_cycles_rate(); +} + +static inline u32 trace_clock_freq_scale(void) +{ + return 1; +} + +extern void get_synthetic_tsc(void); +extern void put_synthetic_tsc(void); + +static inline int get_trace_clock(void) +{ + get_synthetic_tsc(); + return 0; +} + +static inline void put_trace_clock(void) +{ + put_synthetic_tsc(); +} +#endif /* CONFIG_CPU_CAVIUM_OCTEON */ + +static inline void set_trace_clock_is_sync(int state) +{ +} +#endif /* _ASM_MIPS_TRACE_CLOCK_H */ diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index ffa331029e0..8c5410f97e7 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -167,7 +167,7 @@ work_notifysig: # deal with pending signals and FEXPORT(syscall_exit_work_partial) SAVE_STATIC syscall_exit_work: - li t0, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT + li t0, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_KERNEL_TRACE and t0, a2 # a2 is preloaded with TI_FLAGS beqz t0, work_pending # trace bit set? local_irq_enable # could let do_syscall_trace() diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 876a75cc376..76a82609626 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -34,6 +34,7 @@ #include <linux/vfs.h> #include <linux/ipc.h> #include <linux/slab.h> +#include <trace/ipc.h> #include <net/sock.h> #include <net/scm.h> @@ -44,6 +45,8 @@ #include <asm/mmu_context.h> #include <asm/mman.h> +DEFINE_TRACE(ipc_call); + /* Use this to get at 32-bit user passed pointers. */ /* A() macro should be used for places where you e.g. have some internal variable u32 and just want to get @@ -166,6 +169,8 @@ SYSCALL_DEFINE6(32_ipc, u32, call, long, first, long, second, long, third, version = call >> 16; /* hack for backward compatibility */ call &= 0xffff; + trace_ipc_call(call, first); + switch (call) { case SEMOP: /* struct sembuf is the same on 32 and 64bit :)) */ diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index ae167df73dd..7d9bb1cdd7f 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -25,6 +25,7 @@ #include <linux/completion.h> #include <linux/kallsyms.h> #include <linux/random.h> +#include <trace/sched.h> #include <asm/asm.h> #include <asm/bootinfo.h> @@ -42,6 +43,8 @@ #include <asm/inst.h> #include <asm/stacktrace.h> +DEFINE_TRACE(sched_kthread_create); + /* * The idle thread. There's no useful work to be done, so just try to conserve * power and have a low exit latency (ie sit in a loop waiting for somebody to @@ -234,6 +237,7 @@ static void __noreturn kernel_thread_helper(void *arg, int (*fn)(void *)) long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) { struct pt_regs regs; + long pid; memset(®s, 0, sizeof(regs)); @@ -249,7 +253,10 @@ long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) #endif /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED, + 0, ®s, 0, NULL, NULL); + trace_sched_kthread_create(fn, pid); + return pid; } /* diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index d21c388c011..79e1750cc7c 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -25,6 +25,7 @@ #include <linux/security.h> #include <linux/audit.h> #include <linux/seccomp.h> +#include <trace/syscall.h> #include <asm/byteorder.h> #include <asm/cpu.h> @@ -39,6 +40,9 @@ #include <asm/bootinfo.h> #include <asm/reg.h> +DEFINE_TRACE(syscall_entry); +DEFINE_TRACE(syscall_exit); + /* * Called by kernel/ptrace.c when detaching.. * @@ -535,6 +539,11 @@ static inline int audit_arch(void) */ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit) { + if (!entryexit) + trace_syscall_entry(regs, regs->regs[2]); + else + trace_syscall_exit(regs->regs[2]); + /* do the secure computing check first */ if (!entryexit) secure_computing(regs->regs[2]); diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index fbaabad0e6e..1b90e8255da 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -52,7 +52,7 @@ NESTED(handle_sys, PT_SIZE, sp) stack_done: lw t0, TI_FLAGS($28) # syscall tracing enabled? - li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT + li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_KERNEL_TRACE and t0, t1 bnez t0, syscall_trace_entry # -> yes diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 3f417928320..c574a1a12f2 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -54,7 +54,7 @@ NESTED(handle_sys64, PT_SIZE, sp) sd a3, PT_R26(sp) # save a3 for syscall restarting - li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT + li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_KERNEL_TRACE LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 bnez t0, syscall_trace_entry @@ -126,7 +126,8 @@ illegal_syscall: END(handle_sys64) .align 3 -sys_call_table: + .type sys_call_table,@object +EXPORT(sys_call_table) PTR sys_read /* 5000 */ PTR sys_write PTR sys_open diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index f08ece6d8ac..0d312c2d54d 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -53,7 +53,7 @@ NESTED(handle_sysn32, PT_SIZE, sp) sd a3, PT_R26(sp) # save a3 for syscall restarting - li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT + li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_KERNEL_TRACE LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 bnez t0, n32_syscall_trace_entry @@ -121,6 +121,8 @@ not_n32_scall: END(handle_sysn32) + .align 3 + .type sysn32_call_table,@object EXPORT(sysn32_call_table) PTR sys_read /* 6000 */ PTR sys_write diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 78d768a3e19..635d0d84344 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -53,7 +53,7 @@ NESTED(handle_sys, PT_SIZE, sp) sll a3, a3, 0 dsll t0, v0, 3 # offset into table - ld t2, (sys_call_table - (__NR_O32_Linux * 8))(t0) + ld t2, (syso32_call_table - (__NR_O32_Linux * 8))(t0) sd a3, PT_R26(sp) # save a3 for syscall restarting @@ -81,7 +81,7 @@ NESTED(handle_sys, PT_SIZE, sp) PTR 4b, bad_stack .previous - li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT + li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_KERNEL_TRACE LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 bnez t0, trace_a_syscall @@ -180,7 +180,7 @@ LEAF(sys32_syscall) beqz t0, einval # do not recurse dsll t1, t0, 3 beqz v0, einval - ld t2, sys_call_table(t1) # syscall routine + ld t2, syso32_call_table(t1) # syscall routine move a0, a1 # shift argument registers move a1, a2 @@ -202,8 +202,8 @@ einval: li v0, -ENOSYS END(sys32_syscall) .align 3 - .type sys_call_table,@object -sys_call_table: + .type syso32_call_table,@object +EXPORT(syso32_call_table) PTR sys32_syscall /* 4000 */ PTR sys_exit PTR sys_fork diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 383aeb95cb4..3faf9d20ee6 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -164,6 +164,9 @@ void __init smp_cpus_done(unsigned int max_cpus) { mp_ops->cpus_done(); synchronise_count_master(); +#ifdef CONFIG_HAVE_UNSYNCHRONIZED_TSC + test_tsc_synchronization(); +#endif } /* called from main before smp_init() */ diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 1dc6edff45e..9965dedbcc1 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -31,6 +31,8 @@ #include <linux/slab.h> #include <linux/random.h> #include <linux/elf.h> +#include <linux/ipc.h> +#include <linux/kallsyms.h> #include <asm/asm.h> #include <asm/branch.h> @@ -464,3 +466,67 @@ int kernel_execve(const char *filename, return -__v0; } + +void ltt_dump_sys_call_table(void *call_data) +{ + int i; + char namebuf[KSYM_NAME_LEN]; + +#ifdef CONFIG_32BIT + for (i = 0; i < __NR_O32_Linux_syscalls; i++) { + extern struct { + unsigned long ptr; + long j; + } sys_call_table[]; + + sprint_symbol(namebuf, sys_call_table[i].ptr); + __trace_mark(0, syscall_state, sys_call_table, call_data, + "id %d address %p symbol %s", + i + __NR_O32_Linux, (void *)sys_call_table[i].ptr, + namebuf); + } +#endif +#ifdef CONFIG_64BIT +# ifdef CONFIG_MIPS32_O32 + for (i = 0; i < __NR_O32_Linux_syscalls; i++) { + extern unsigned long syso32_call_table[]; + + sprint_symbol(namebuf, syso32_call_table[i]); + __trace_mark(0, syscall_state, sys_call_table, call_data, + "id %d address %p symbol %s", + i + __NR_O32_Linux, (void *)syso32_call_table[i], + namebuf); + } +# endif + + for (i = 0; i < __NR_64_Linux_syscalls; i++) { + extern unsigned long sys_call_table[]; + + sprint_symbol(namebuf, sys_call_table[i]); + __trace_mark(0, syscall_state, sys_call_table, call_data, + "id %d address %p symbol %s", + i + __NR_64_Linux, (void *)sys_call_table[i], + namebuf); + } + +# ifdef CONFIG_MIPS32_N32 + for (i = 0; i < __NR_N32_Linux_syscalls; i++) { + extern unsigned long sysn32_call_table[]; + + sprint_symbol(namebuf, sysn32_call_table[i]); + __trace_mark(0, syscall_state, sys_call_table, call_data, + "id %d address %p symbol %s", + i + __NR_N32_Linux, (void *)sysn32_call_table[i], + namebuf); + } +# endif +#endif +} +EXPORT_SYMBOL_GPL(ltt_dump_sys_call_table); + +void ltt_dump_idt_table(void *call_data) +{ + /* No IDT information yet. */ + return; +} +EXPORT_SYMBOL_GPL(ltt_dump_idt_table); diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c index fb749740551..51561a75dcf 100644 --- a/arch/mips/kernel/time.c +++ b/arch/mips/kernel/time.c @@ -70,6 +70,7 @@ EXPORT_SYMBOL(perf_irq); */ unsigned int mips_hpt_frequency; +EXPORT_SYMBOL(mips_hpt_frequency); /* * This function exists in order to cause an error due to a duplicate diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 71350f7f2d8..b6a12d70e8c 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -30,6 +30,7 @@ #include <linux/kdb.h> #include <linux/irq.h> #include <linux/perf_event.h> +#include <trace/trap.h> #include <asm/bootinfo.h> #include <asm/branch.h> @@ -55,6 +56,12 @@ #include <asm/stacktrace.h> #include <asm/uasm.h> +/* + * Also used in unaligned.c and fault.c. + */ +DEFINE_TRACE(trap_entry); +DEFINE_TRACE(trap_exit); + extern void check_wait(void); extern asmlinkage void r4k_wait(void); extern asmlinkage void rollback_handle_int(void); @@ -321,7 +328,7 @@ static void __show_regs(const struct pt_regs *regs) printk("Cause : %08x\n", cause); - cause = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE; + cause = CAUSE_EXCCODE(cause); if (1 <= cause && cause <= 5) printk("BadVA : %0*lx\n", field, regs->cp0_badvaddr); @@ -698,6 +705,7 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) return; die_if_kernel("FP exception in kernel code", regs); + trace_trap_entry(regs, CAUSE_EXCCODE(regs->cp0_cause)); if (fcr31 & FPU_CSR_UNI_X) { int sig; void __user *fault_addr = NULL; @@ -730,7 +738,7 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) /* If something went wrong, signal */ process_fpemu_return(sig, fault_addr); - + trace_trap_exit(); return; } else if (fcr31 & FPU_CSR_INV_X) info.si_code = FPE_FLTINV; @@ -748,6 +756,7 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) info.si_errno = 0; info.si_addr = (void __user *) regs->cp0_epc; force_sig_info(SIGFPE, &info, current); + trace_trap_exit(); } static void do_trap_or_bp(struct pt_regs *regs, unsigned int code, @@ -979,6 +988,8 @@ asmlinkage void do_cpu(struct pt_regs *regs) int status; unsigned long __maybe_unused flags; + trace_trap_entry(regs, CAUSE_EXCCODE(regs->cp0_cause)); + die_if_kernel("do_cpu invoked from kernel context!", regs); cpid = (regs->cp0_cause >> CAUSEB_CE) & 3; @@ -990,8 +1001,10 @@ asmlinkage void do_cpu(struct pt_regs *regs) opcode = 0; status = -1; - if (unlikely(compute_return_epc(regs) < 0)) + if (unlikely(compute_return_epc(regs) < 0)) { + trace_trap_exit(); return; + } if (unlikely(get_user(opcode, epc) < 0)) status = SIGSEGV; @@ -1009,7 +1022,7 @@ asmlinkage void do_cpu(struct pt_regs *regs) regs->cp0_epc = old_epc; /* Undo skip-over. */ force_sig(status, current); } - + trace_trap_exit(); return; case 1: @@ -1029,11 +1042,12 @@ asmlinkage void do_cpu(struct pt_regs *regs) if (!process_fpemu_return(sig, fault_addr)) mt_ase_fp_affinity(); } - + trace_trap_exit(); return; case 2: raw_notifier_call_chain(&cu2_chain, CU2_EXCEPTION, regs); + trace_trap_exit(); return; case 3: @@ -1041,6 +1055,7 @@ asmlinkage void do_cpu(struct pt_regs *regs) } force_sig(SIGILL, current); + trace_trap_exit(); } asmlinkage void do_mdmx(struct pt_regs *regs) diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index cfea1adfa15..d3af94de240 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -79,6 +79,7 @@ #include <linux/sched.h> #include <linux/debugfs.h> #include <linux/perf_event.h> +#include <trace/trap.h> #include <asm/asm.h> #include <asm/branch.h> @@ -518,6 +519,7 @@ asmlinkage void do_ade(struct pt_regs *regs) perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, regs->cp0_badvaddr); + trace_trap_entry(regs, CAUSE_EXCCODE(regs->cp0_cause)); /* * Did we catch a fault trying to load an instruction? * Or are we running in MIPS16 mode? @@ -543,6 +545,8 @@ asmlinkage void do_ade(struct pt_regs *regs) emulate_load_store_insn(regs, (void __user *)regs->cp0_badvaddr, pc); set_fs(seg); + trace_trap_exit(); + return; sigbus: @@ -552,6 +556,8 @@ sigbus: /* * XXX On return from the signal handler we should advance the epc */ + + trace_trap_exit(); } #ifdef CONFIG_DEBUG_FS diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 137ee76a004..1a5bd7b9018 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -19,6 +19,7 @@ #include <linux/module.h> #include <linux/kprobes.h> #include <linux/perf_event.h> +#include <trace/fault.h> #include <asm/branch.h> #include <asm/mmu_context.h> @@ -28,6 +29,9 @@ #include <asm/highmem.h> /* For VMALLOC_END */ #include <linux/kdebug.h> +DEFINE_TRACE(page_fault_entry); +DEFINE_TRACE(page_fault_exit); + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -144,7 +148,10 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ + trace_page_fault_entry(regs, CAUSE_EXCCODE(regs->cp0_cause), mm, vma, + address, write); fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0); + trace_page_fault_exit(fault); perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index aa8de727e90..cfcb2e70eba 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -62,6 +62,7 @@ struct thread_info { #define TIF_NOTIFY_RESUME 8 /* callback before returning to user */ #define TIF_SINGLESTEP 9 /* single stepping? */ #define TIF_BLOCKSTEP 10 /* branch stepping? */ +#define TIF_KERNEL_TRACE 11 /* kernel trace active */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) @@ -73,6 +74,7 @@ struct thread_info { #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) +#define _TIF_KERNEL_TRACE (1 << TIF_KERNEL_TRACE) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \ _TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 7d69e9bf5e6..b13eeab289a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -117,11 +117,13 @@ config PPC select HAVE_IOREMAP_PROT select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_KPROBES + select HAVE_TRACE_CLOCK select HAVE_ARCH_KGDB select HAVE_KRETPROBES select HAVE_ARCH_TRACEHOOK select HAVE_MEMBLOCK select HAVE_DMA_ATTRS + select HAVE_GET_CYCLES if PPC64 select HAVE_DMA_API_DEBUG select USE_GENERIC_SMP_HELPERS if SMP select HAVE_OPROFILE diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 65eb85976a0..34ba6e0a4d8 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -100,7 +100,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_32BIT 4 /* 32 bit binary */ -#define TIF_PERFMON_WORK 5 /* work for pfm_handle_work() */ +#define TIF_KERNEL_TRACE 5 /* kernel trace active */ #define TIF_PERFMON_CTXSW 6 /* perfmon needs ctxsw calls */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SINGLESTEP 8 /* singlestepping active */ @@ -111,6 +111,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NOTIFY_RESUME 13 /* callback before returning to user */ #define TIF_FREEZE 14 /* Freezing for suspend */ #define TIF_RUNLATCH 15 /* Is the runlatch enabled? */ +#define TIF_PERFMON_WORK 16 /* work for pfm_handle_work() */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -118,7 +119,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_32BIT (1<<TIF_32BIT) -#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK) +#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE) #define _TIF_PERFMON_CTXSW (1<<TIF_PERFMON_CTXSW) #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) #define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP) @@ -128,7 +129,8 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_FREEZE (1<<TIF_FREEZE) #define _TIF_RUNLATCH (1<<TIF_RUNLATCH) -#define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP) +#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK) +#define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP|_TIF_KERNEL_TRACE) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ _TIF_NOTIFY_RESUME) diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index c55e14f7ef4..2fe7460cbf9 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -14,6 +14,8 @@ typedef unsigned long cycles_t; +extern unsigned long tb_ticks_per_sec; + static inline cycles_t get_cycles(void) { #ifdef __powerpc64__ @@ -46,5 +48,15 @@ static inline cycles_t get_cycles(void) #endif } +static inline cycles_t get_cycles_rate(void) +{ + return tb_ticks_per_sec; +} + +static inline void get_cycles_barrier(void) +{ + isync(); +} + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_TIMEX_H */ diff --git a/arch/powerpc/include/asm/trace-clock.h b/arch/powerpc/include/asm/trace-clock.h new file mode 100644 index 00000000000..05facc3e372 --- /dev/null +++ b/arch/powerpc/include/asm/trace-clock.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2005,2008 Mathieu Desnoyers + * + * Trace clock PowerPC definitions. + * + * Use get_tb() directly to insure reading a 64-bits value on powerpc 32. + */ + +#ifndef _ASM_TRACE_CLOCK_H +#define _ASM_TRACE_CLOCK_H + +#include <linux/timex.h> +#include <linux/time.h> +#include <asm/time.h> + +static inline u32 trace_clock_read32(void) +{ + return get_tbl(); +} + +static inline u64 trace_clock_read64(void) +{ + return get_tb(); +} + +static inline unsigned int trace_clock_frequency(void) +{ + return get_cycles_rate(); +} + +static inline u32 trace_clock_freq_scale(void) +{ + return 1; +} + +static inline int get_trace_clock(void) +{ + return 0; +} + +static inline void put_trace_clock(void) +{ +} + +static inline void set_trace_clock_is_sync(int state) +{ +} +#endif /* _ASM_TRACE_CLOCK_H */ diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index cbe2297d68b..d1c27723f84 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -7,7 +7,7 @@ #include <linux/tracepoint.h> struct pt_regs; - +#if 0 /* disabled by Mathieu Desnoyers. Belongs to generic IRQS. */ TRACE_EVENT(irq_entry, TP_PROTO(struct pt_regs *regs), @@ -41,6 +41,7 @@ TRACE_EVENT(irq_exit, TP_printk("pt_regs=%p", __entry->regs) ); +#endif //0 TRACE_EVENT(timer_interrupt_entry, diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index ce557f6f00f..d21cf5bc503 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -401,8 +401,6 @@ void do_IRQ(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); unsigned int irq; - trace_irq_entry(regs); - irq_enter(); check_stack_overflow(); @@ -425,8 +423,6 @@ void do_IRQ(struct pt_regs *regs) timer_interrupt(regs); } #endif - - trace_irq_exit(regs); } void __init init_IRQ(void) diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 094bd9821ad..8294f73feac 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -665,7 +665,7 @@ _GLOBAL(abs) * Create a kernel thread * kernel_thread(fn, arg, flags) */ -_GLOBAL(kernel_thread) +_GLOBAL(original_kernel_thread) stwu r1,-16(r1) stw r30,8(r1) stw r31,12(r1) diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 206a321a71d..1e10e579922 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -415,7 +415,7 @@ _GLOBAL(scom970_write) * Create a kernel thread * kernel_thread(fn, arg, flags) */ -_GLOBAL(kernel_thread) +_GLOBAL(original_kernel_thread) std r29,-24(r1) std r30,-16(r1) stdu r1,-STACK_FRAME_OVERHEAD(r1) diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index ef3ef566235..046b16e4a57 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -161,6 +161,9 @@ EXPORT_SYMBOL(screen_info); #ifdef CONFIG_PPC32 EXPORT_SYMBOL(timer_interrupt); +#ifndef CONFIG_SPARSE_IRQ +EXPORT_SYMBOL(irq_desc); +#endif EXPORT_SYMBOL(tb_ticks_per_jiffy); EXPORT_SYMBOL(cacheable_memcpy); EXPORT_SYMBOL(cacheable_memzero); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8303a6c65ef..0a85886a784 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -38,6 +38,7 @@ #include <linux/personality.h> #include <linux/random.h> #include <linux/hw_breakpoint.h> +#include <trace/sched.h> #include <asm/pgtable.h> #include <asm/uaccess.h> @@ -55,6 +56,8 @@ #include <linux/kprobes.h> #include <linux/kdebug.h> +DEFINE_TRACE(sched_kthread_create); + extern unsigned long _get_SP(void); #ifndef CONFIG_SMP @@ -663,6 +666,17 @@ void show_regs(struct pt_regs * regs) show_instructions(regs); } +long original_kernel_thread(int (*fn) (void *), void *arg, unsigned long flags); + +long kernel_thread(int (fn) (void *), void *arg, unsigned long flags) +{ + long retval; + + retval = original_kernel_thread(fn, arg, flags); + trace_sched_kthread_create(fn, retval); + return retval; +} + void exit_thread(void) { discard_lazy_cpu_state(); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 90653699829..fb8924c5fdf 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -34,12 +34,16 @@ #endif #include <linux/hw_breakpoint.h> #include <linux/perf_event.h> +#include <trace/syscall.h> #include <asm/uaccess.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/system.h> +DEFINE_TRACE(syscall_entry); +DEFINE_TRACE(syscall_exit); + /* * The parameter save area on the stack is used to store arguments being passed * to callee function and is located at fixed offset from stack pointer. @@ -1680,6 +1684,8 @@ long do_syscall_trace_enter(struct pt_regs *regs) { long ret = 0; + trace_syscall_entry(regs, regs->gpr[0]); + secure_computing(regs->gpr[0]); if (test_thread_flag(TIF_SYSCALL_TRACE) && @@ -1715,6 +1721,8 @@ void do_syscall_trace_leave(struct pt_regs *regs) { int step; + trace_syscall_exit(regs->result); + if (unlikely(current->audit_context)) audit_syscall_exit((regs->ccr&0x10000000)?AUDITSC_FAILURE:AUDITSC_SUCCESS, regs->result); diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 4e5bf1edc0f..5fe3cb1f38b 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -41,6 +41,7 @@ #include <linux/elf.h> #include <linux/ipc.h> #include <linux/slab.h> +#include <trace/ipc.h> #include <asm/ptrace.h> #include <asm/types.h> @@ -51,6 +52,7 @@ #include <asm/ppc-pci.h> #include <asm/syscalls.h> +DEFINE_TRACE(ipc_call); asmlinkage long ppc32_select(u32 n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, @@ -79,6 +81,8 @@ long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t pt version = call >> 16; /* hack for backward compatibility */ call &= 0xffff; + trace_ipc_call(call, first); + switch (call) { case SEMTIMEDOP: diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 09d31dbf43f..fcbe3f5c074 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -54,6 +54,7 @@ #include <linux/irq.h> #include <linux/delay.h> #include <linux/irq_work.h> +#include <trace/trap.h> #include <asm/trace.h> #include <asm/io.h> @@ -585,6 +586,8 @@ void timer_interrupt(struct pt_regs * regs) * some CPUs will continuue to take decrementer exceptions */ set_dec(DECREMENTER_MAX); + trace_trap_entry(regs, regs->trap); + #if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); @@ -631,6 +634,7 @@ void timer_interrupt(struct pt_regs * regs) set_irq_regs(old_regs); trace_timer_interrupt_exit(regs); + trace_trap_exit(); } #ifdef CONFIG_SUSPEND diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index bd74fac169b..62ae8cad792 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -34,6 +34,8 @@ #include <linux/bug.h> #include <linux/kdebug.h> #include <linux/debugfs.h> +#include <linux/ltt-core.h> +#include <trace/trap.h> #include <asm/emulated_ops.h> #include <asm/pgtable.h> @@ -75,6 +77,12 @@ EXPORT_SYMBOL(__debugger_fault_handler); #endif /* + * Also used in time.c and fault.c. + */ +DEFINE_TRACE(trap_entry); +DEFINE_TRACE(trap_exit); + +/* * Trap & Exception support */ @@ -141,6 +149,10 @@ int die(const char *str, struct pt_regs *regs, long err) #ifdef CONFIG_NUMA printk("NUMA "); #endif +#ifdef CONFIG_LTT + printk("LTT NESTING LEVEL : %u ", __get_cpu_var(ltt_nesting)); + printk("\n"); +#endif printk("%s\n", ppc_md.name ? ppc_md.name : ""); sysfs_printk_last_file(); @@ -204,11 +216,14 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) addr, regs->nip, regs->link, code); } + trace_trap_entry(regs, regs->trap); + memset(&info, 0, sizeof(info)); info.si_signo = signr; info.si_code = code; info.si_addr = (void __user *) addr; force_sig_info(signr, &info, current); + trace_trap_exit(); } #ifdef CONFIG_PPC64 @@ -1087,7 +1102,9 @@ void performance_monitor_exception(struct pt_regs *regs) { __get_cpu_var(irq_stat).pmu_irqs++; + trace_trap_entry(regs, regs->trap); perf_irq(regs); + trace_trap_exit(); } #ifdef CONFIG_8xx @@ -1308,12 +1325,14 @@ void altivec_assist_exception(struct pt_regs *regs) /* got an error reading the instruction */ _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip); } else { + trace_trap_entry(regs, regs->trap); /* didn't recognize the instruction */ /* XXX quick hack for now: set the non-Java bit in the VSCR */ if (printk_ratelimit()) printk(KERN_ERR "Unrecognized altivec instruction " "in %s at %lx\n", current->comm, regs->nip); current->thread.vscr.u[3] |= 0x10000; + trace_trap_exit(); } } #endif /* CONFIG_ALTIVEC */ diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 54f4fb994e9..2fb3d0ce222 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -31,6 +31,7 @@ #include <linux/kdebug.h> #include <linux/perf_event.h> #include <linux/magic.h> +#include <trace/fault.h> #include <asm/firmware.h> #include <asm/page.h> @@ -43,6 +44,9 @@ #include <asm/siginfo.h> #include <mm/mmu_decl.h> +DEFINE_TRACE(page_fault_entry); +DEFINE_TRACE(page_fault_exit); + #ifdef CONFIG_KPROBES static inline int notify_page_fault(struct pt_regs *regs) { @@ -309,7 +313,9 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ + trace_page_fault_entry(regs, regs->trap, mm, vma, address, is_write); ret = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0); + trace_page_fault_exit(ret); if (unlikely(ret & VM_FAULT_ERROR)) { if (ret & VM_FAULT_OOM) goto out_of_memory; diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 3c7c3f82d84..3f098525259 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -30,6 +30,7 @@ #include <linux/ptrace.h> #include <linux/seq_file.h> #include <linux/slab.h> +#include <linux/marker.h> #include <asm/io.h> #include <asm/time.h> diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 0b046628493..a7b4b883863 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -40,6 +40,7 @@ #include <linux/pid_namespace.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/marker.h> #include <asm/io.h> #include <asm/mmu_context.h> diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index ad1382f7932..637ce13517a 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -94,6 +94,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */ #define TIF_SECCOMP 10 /* secure computing */ #define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */ +#define TIF_KERNEL_TRACE 12 /* kernel trace active */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_31BIT 17 /* 32bit process */ @@ -113,6 +114,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1<<TIF_SECCOMP) #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) +#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_31BIT (1<<TIF_31BIT) #define _TIF_SINGLE_STEP (1<<TIF_FREEZE) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 648f64239a9..831874ffbc6 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -52,7 +52,8 @@ _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ - _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8) + _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8 | \ + _TIF_KERNEL_TRACE>>8) STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 9d3603d6c51..7b5255e3b94 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -55,7 +55,8 @@ _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ - _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8) + _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8 | \ + _TIF_KERNEL_TRACE>>8) #define BASED(name) name-system_call(%r13) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ef86ad24398..df0af62a76d 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -45,6 +45,9 @@ enum s390_regset { REGSET_GENERAL_EXTENDED, }; +DEFINE_TRACE(syscall_entry); +DEFINE_TRACE(syscall_exit); + void update_per_regs(struct task_struct *task) { static const struct per_regs per_single_step = { @@ -723,6 +726,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) /* Do the secure computing check first. */ secure_computing(regs->gprs[2]); + trace_syscall_entry(regs, regs->gprs[2]); /* * The sysc_tracesys code in entry.S stored the system * call number to gprs[2]. @@ -753,6 +757,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) { + trace_syscall_exit(regs->gprs[2]); if (unlikely(current->audit_context)) audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]), regs->gprs[2]); diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index 476081440df..dcc9d509af0 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -29,9 +29,12 @@ #include <linux/personality.h> #include <linux/unistd.h> #include <linux/ipc.h> +#include <trace/ipc.h> #include <asm/uaccess.h> #include "entry.h" +DEFINE_TRACE(ipc_call); + /* * Perform the mmap() system call. Linux for S/390 isn't able to handle more * than 5 system call parameters, so this system call uses a memory block @@ -70,6 +73,8 @@ SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, struct ipc_kludge tmp; int ret; + trace_ipc_call(call, first); + switch (call) { case SEMOP: return sys_semtimedop(first, (struct sembuf __user *)ptr, diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index b5a4a739b47..ff0dc02adc3 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -5,6 +5,7 @@ * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Portions added by T. Halloran: (C) Copyright 2002 IBM Poughkeepsie, IBM Corporation * * Derived from "arch/i386/kernel/traps.c" * Copyright (C) 1991, 1992 Linus Torvalds @@ -33,6 +34,7 @@ #include <linux/kprobes.h> #include <linux/bug.h> #include <linux/utsname.h> +#include <trace/trap.h> #include <asm/system.h> #include <asm/uaccess.h> #include <asm/io.h> @@ -65,6 +67,12 @@ static int kstack_depth_to_print = 20; #endif /* CONFIG_64BIT */ /* + * Also used in fault.c. + */ +DEFINE_TRACE(trap_entry); +DEFINE_TRACE(trap_exit); + +/* * For show_trace we have tree different stack to consider: * - the panic stack which is used if the kernel stack has overflown * - the asynchronous interrupt stack (cpu related) @@ -299,6 +307,8 @@ static inline void __kprobes do_trap(long pgm_int_code, int signr, char *str, pgm_int_code, signr) == NOTIFY_STOP) return; + trace_trap_entry(regs, pgm_int_code & 0xffff); + if (regs->psw.mask & PSW_MASK_PSTATE) { struct task_struct *tsk = current; @@ -314,11 +324,14 @@ static inline void __kprobes do_trap(long pgm_int_code, int signr, char *str, enum bug_trap_type btt; btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs); - if (btt == BUG_TRAP_TYPE_WARN) + if (btt == BUG_TRAP_TYPE_WARN) { + trace_trap_exit(); return; + } die(str, regs, pgm_int_code); } } + trace_trap_exit(); } static inline void __user *get_psw_address(struct pt_regs *regs, @@ -422,9 +435,11 @@ static void __kprobes illegal_op(struct pt_regs *regs, long pgm_int_code, location = get_psw_address(regs, pgm_int_code); + trace_trap_entry(regs, pgm_int_code & 0xffff); + if (regs->psw.mask & PSW_MASK_PSTATE) { if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) - return; + goto end; if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) { if (tracehook_consider_fatal_signal(current, SIGTRAP)) force_sig(SIGTRAP, current); @@ -433,24 +448,24 @@ static void __kprobes illegal_op(struct pt_regs *regs, long pgm_int_code, #ifdef CONFIG_MATHEMU } else if (opcode[0] == 0xb3) { if (get_user(*((__u16 *) (opcode+2)), location+1)) - return; + goto end; signal = math_emu_b3(opcode, regs); } else if (opcode[0] == 0xed) { if (get_user(*((__u32 *) (opcode+2)), (__u32 __user *)(location+1))) - return; + goto end; signal = math_emu_ed(opcode, regs); } else if (*((__u16 *) opcode) == 0xb299) { if (get_user(*((__u16 *) (opcode+2)), location+1)) - return; + goto end; signal = math_emu_srnm(opcode, regs); } else if (*((__u16 *) opcode) == 0xb29c) { if (get_user(*((__u16 *) (opcode+2)), location+1)) - return; + goto end; signal = math_emu_stfpc(opcode, regs); } else if (*((__u16 *) opcode) == 0xb29d) { if (get_user(*((__u16 *) (opcode+2)), location+1)) - return; + goto end; signal = math_emu_lfpc(opcode, regs); #endif } else @@ -486,6 +501,8 @@ static void __kprobes illegal_op(struct pt_regs *regs, long pgm_int_code, do_trap(pgm_int_code, signal, "illegal operation", regs, &info); } +end: + trace_trap_exit(); } @@ -500,6 +517,8 @@ asmlinkage void specification_exception(struct pt_regs *regs, location = (__u16 __user *) get_psw_address(regs, pgm_int_code); + trace_trap_entry(regs, pgm_int_code & 0xffff); + if (regs->psw.mask & PSW_MASK_PSTATE) { get_user(*((__u16 *) opcode), location); switch (opcode[0]) { @@ -544,6 +563,7 @@ asmlinkage void specification_exception(struct pt_regs *regs, do_trap(pgm_int_code, signal, "specification exception", regs, &info); } + trace_trap_exit(); } #else DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, @@ -558,6 +578,8 @@ static void data_exception(struct pt_regs *regs, long pgm_int_code, location = get_psw_address(regs, pgm_int_code); + trace_trap_entry(regs, pgm_int_code & 0xffff); + if (MACHINE_HAS_IEEE) asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc)); @@ -631,6 +653,7 @@ static void data_exception(struct pt_regs *regs, long pgm_int_code, info.si_addr = location; do_trap(pgm_int_code, signal, "data exception", regs, &info); } + trace_trap_exit(); } static void space_switch_exception(struct pt_regs *regs, long pgm_int_code, @@ -638,6 +661,7 @@ static void space_switch_exception(struct pt_regs *regs, long pgm_int_code, { siginfo_t info; + trace_trap_entry(regs, pgm_int_code & 0xffff); /* Set user psw back to home space mode. */ if (regs->psw.mask & PSW_MASK_PSTATE) regs->psw.mask |= PSW_ASC_HOME; @@ -647,6 +671,7 @@ static void space_switch_exception(struct pt_regs *regs, long pgm_int_code, info.si_code = ILL_PRVOPC; info.si_addr = get_psw_address(regs, pgm_int_code); do_trap(pgm_int_code, SIGILL, "space switch event", regs, &info); + trace_trap_exit(); } asmlinkage void __kprobes kernel_stack_overflow(struct pt_regs * regs) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 2c57806c085..f07b4d2cb53 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -5,6 +5,7 @@ * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation * Author(s): Hartmut Penner (hp@de.ibm.com) * Ulrich Weigand (uweigand@de.ibm.com) + * Portions added by T. Halloran: (C) Copyright 2002 IBM Poughkeepsie, IBM Corporation * * Derived from "arch/i386/mm/fault.c" * Copyright (C) 1995 Linus Torvalds @@ -31,6 +32,7 @@ #include <linux/kprobes.h> #include <linux/uaccess.h> #include <linux/hugetlb.h> +#include <trace/fault.h> #include <asm/asm-offsets.h> #include <asm/system.h> #include <asm/pgtable.h> @@ -39,6 +41,11 @@ #include <asm/compat.h> #include "../kernel/entry.h" +DEFINE_TRACE(page_fault_entry); +DEFINE_TRACE(page_fault_exit); +DEFINE_TRACE(page_fault_nosem_entry); +DEFINE_TRACE(page_fault_nosem_exit); + #ifndef CONFIG_64BIT #define __FAIL_ADDR_MASK 0x7ffff000 #define __SUBCODE_MASK 0x0200 @@ -272,7 +279,10 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code, /* User mode accesses just cause a SIGSEGV */ si_code = (fault == VM_FAULT_BADMAP) ? SEGV_MAPERR : SEGV_ACCERR; + trace_page_fault_nosem_entry(regs, int_code & 0xffff, + trans_exc_code); do_sigsegv(regs, int_code, si_code, trans_exc_code); + trace_page_fault_nosem_exit(); return; } case VM_FAULT_BADCONTEXT: diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 8a9011dced1..fb4ef6a78db 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -48,6 +48,7 @@ config SUPERH32 select PERF_EVENTS select ARCH_HIBERNATION_POSSIBLE if MMU select SPARSE_IRQ + select HAVE_LTT_DUMP_TABLES config SUPERH64 def_bool ARCH = "sh64" @@ -211,6 +212,8 @@ config CPU_SH4 select CPU_HAS_FPU if !CPU_SH4AL_DSP select SYS_SUPPORTS_TMU select SYS_SUPPORTS_HUGETLBFS if MMU + select HAVE_TRACE_CLOCK + select HAVE_TRACE_CLOCK_32_TO_64 config CPU_SH4A bool diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h index c228946926e..2d94b81add3 100644 --- a/arch/sh/include/asm/thread_info.h +++ b/arch/sh/include/asm/thread_info.h @@ -120,6 +120,7 @@ extern void init_thread_xstate(void); #define TIF_SECCOMP 6 /* secure computing */ #define TIF_NOTIFY_RESUME 7 /* callback before returning to user */ #define TIF_SYSCALL_TRACEPOINT 8 /* for ftrace syscall instrumentation */ +#define TIF_KERNEL_TRACE 9 /* kernel trace active */ #define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_FREEZE 19 /* Freezing for suspend */ @@ -132,6 +133,7 @@ extern void init_thread_xstate(void); #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) +#define _TIF_KERNEL_TRACE (1 << TIF_KERNEL_TRACE) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_FREEZE (1 << TIF_FREEZE) @@ -144,17 +146,18 @@ extern void init_thread_xstate(void); /* work to do in syscall trace */ #define _TIF_WORK_SYSCALL_MASK (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP | \ _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ - _TIF_SYSCALL_TRACEPOINT) + _TIF_SYSCALL_TRACEPOINT | _TIF_KERNEL_TRACE) /* work to do on any return to u-space */ #define _TIF_ALLWORK_MASK (_TIF_SYSCALL_TRACE | _TIF_SIGPENDING | \ _TIF_NEED_RESCHED | _TIF_SYSCALL_AUDIT | \ _TIF_SINGLESTEP | _TIF_NOTIFY_RESUME | \ - _TIF_SYSCALL_TRACEPOINT) + _TIF_SYSCALL_TRACEPOINT | _TIF_KERNEL_TRACE) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK (_TIF_ALLWORK_MASK & ~(_TIF_SYSCALL_TRACE | \ - _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP)) + _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ + _TIF_KERNEL_TRACE)) /* * Thread-synchronous status. diff --git a/arch/sh/include/asm/timex.h b/arch/sh/include/asm/timex.h index 18bf06d9c76..5249bee819c 100644 --- a/arch/sh/include/asm/timex.h +++ b/arch/sh/include/asm/timex.h @@ -12,6 +12,8 @@ * can be used for accurately setting CLOCK_TICK_RATE, otherwise we * simply fall back on the i8253 PIT value. */ + +#if 0 #ifdef CONFIG_SH_PCLK_FREQ #define CLOCK_TICK_RATE (CONFIG_SH_PCLK_FREQ / 4) /* Underlying HZ */ #else @@ -19,5 +21,18 @@ #endif #include <asm-generic/timex.h> +#endif //0 + +#include <linux/io.h> +#include <cpu/timer.h> + +#define CLOCK_TICK_RATE (HZ * 100000UL) + +typedef unsigned long long cycles_t; + +static __inline__ cycles_t get_cycles (void) +{ + return 0xffffffff - ctrl_inl(TMU1_TCNT); +} #endif /* __ASM_SH_TIMEX_H */ diff --git a/arch/sh/include/asm/trace-clock.h b/arch/sh/include/asm/trace-clock.h new file mode 100644 index 00000000000..152d54c4181 --- /dev/null +++ b/arch/sh/include/asm/trace-clock.h @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2007,2008 Giuseppe Cavallaro <peppe.cavallaro@st.com> + * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> + * + * Trace clock definitions for SuperH. + */ + +#ifndef _ASM_SH_TRACE_CLOCK_H +#define _ASM_SH_TRACE_CLOCK_H + +#include <linux/clocksource.h> +#include <asm/clock.h> + +/* + * Number of hardware clock bits. The higher order bits are expected to be 0. + * If the hardware clock source has more than 32 bits, the bits higher than the + * 32nd will be truncated by a cast to a 32 bits unsigned. Range : 1 - 32. + * (too few bits would be unrealistic though, since we depend on the timer to + * detect the overflows). + */ +#define TC_HW_BITS 32 + +/* Expected maximum interrupt latency in ms : 15ms, *2 for security */ +#define TC_EXPECTED_INTERRUPT_LATENCY 30 + +extern u64 trace_clock_read_synthetic_tsc(void); +extern u64 sh_get_clock_frequency(void); +extern u32 sh_read_timer_count(void); +extern void get_synthetic_tsc(void); +extern void put_synthetic_tsc(void); + +static inline u32 trace_clock_read32(void) +{ + return sh_read_timer_count(); +} + +static inline u64 trace_clock_read64(void) +{ + return trace_clock_read_synthetic_tsc(); +} + +static inline u64 trace_clock_frequency(void) +{ + return sh_get_clock_frequency(); +} + +static inline u32 trace_clock_freq_scale(void) +{ + return 1; +} + +static inline int get_trace_clock(void) +{ + get_synthetic_tsc(); + return 0; +} + +static inline void put_trace_clock(void) +{ + put_synthetic_tsc(); +} + +static inline void set_trace_clock_is_sync(int state) +{ +} +#endif /* _ASM_SH_TRACE_CLOCK_H */ diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile index 77f7ae1d464..fcb0da93c42 100644 --- a/arch/sh/kernel/Makefile +++ b/arch/sh/kernel/Makefile @@ -47,5 +47,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_callchain.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += localtimer.o +obj-$(CONFIG_HAVE_TRACE_CLOCK) += trace-clock.o ccflags-y := -Werror diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 762a13984bb..ddffe37d968 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -21,12 +21,15 @@ #include <linux/fs.h> #include <linux/ftrace.h> #include <linux/hw_breakpoint.h> +#include <trace/sched.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> #include <asm/system.h> #include <asm/fpu.h> #include <asm/syscalls.h> +DEFINE_TRACE(sched_kthread_create); + void show_regs(struct pt_regs * regs) { printk("\n"); @@ -94,6 +97,8 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + trace_sched_kthread_create(fn, pid); + return pid; } EXPORT_SYMBOL(kernel_thread); diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index 210c1cabcb7..4b17fd9ed79 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -25,12 +25,15 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/io.h> +#include <trace/sched.h> #include <asm/syscalls.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/mmu_context.h> #include <asm/fpu.h> +DEFINE_TRACE(sched_kthread_create); + struct task_struct *last_task_used_math = NULL; void show_regs(struct pt_regs *regs) @@ -300,6 +303,7 @@ ATTRIB_NORET void kernel_thread_helper(void *arg, int (*fn)(void *)) */ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) { + int pid; struct pt_regs regs; memset(®s, 0, sizeof(regs)); @@ -310,8 +314,12 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) regs.sr = (1 << 30); /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, + pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + + trace_sched_kthread_create(fn, pid); + + return pid; } EXPORT_SYMBOL(kernel_thread); diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index 90a15d29fee..0373238e0d5 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -26,6 +26,10 @@ #include <linux/elf.h> #include <linux/regset.h> #include <linux/hw_breakpoint.h> +#include <linux/module.h> +#include <linux/kallsyms.h> +#include <linux/marker.h> +#include <trace/syscall.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/system.h> @@ -33,10 +37,34 @@ #include <asm/mmu_context.h> #include <asm/syscalls.h> #include <asm/fpu.h> +#include <asm/unistd.h> #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> +DEFINE_TRACE(syscall_entry); +DEFINE_TRACE(syscall_exit); + +extern unsigned long sys_call_table[]; +void ltt_dump_sys_call_table(void *call_data) +{ + int i; + char namebuf[KSYM_NAME_LEN]; + + for (i = 0; i < NR_syscalls; i++) { + sprint_symbol(namebuf, sys_call_table[i]); + __trace_mark(0, syscall_state, sys_call_table, call_data, + "id %d address %p symbol %s", + i, (void *)sys_call_table[i], namebuf); + } +} +EXPORT_SYMBOL_GPL(ltt_dump_sys_call_table); + +void ltt_dump_idt_table(void *call_data) +{ +} +EXPORT_SYMBOL_GPL(ltt_dump_idt_table); + /* * This routine will get a word off of the process kernel stack. */ @@ -491,6 +519,8 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { long ret = 0; + trace_syscall_entry(regs, regs->regs[3]); + secure_computing(regs->regs[0]); if (test_thread_flag(TIF_SYSCALL_TRACE) && @@ -517,6 +547,8 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs) { int step; + trace_syscall_exit(regs->regs[0]); + if (unlikely(current->audit_context)) audit_syscall_exit(AUDITSC_RESULT(regs->regs[0]), regs->regs[0]); diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c index 4436eacddb1..c893d20483b 100644 --- a/arch/sh/kernel/ptrace_64.c +++ b/arch/sh/kernel/ptrace_64.c @@ -31,6 +31,7 @@ #include <linux/tracehook.h> #include <linux/elf.h> #include <linux/regset.h> +#include <trace/syscall.h> #include <asm/io.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -43,6 +44,9 @@ #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> +DEFINE_TRACE(syscall_entry); +DEFINE_TRACE(syscall_exit); + /* This mask defines the bits of the SR which the user is not allowed to change, which are everything except S, Q, M, PR, SZ, FR. */ #define SR_MASK (0xffff8cfd) diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c index 8c6a350df75..b519b22b575 100644 --- a/arch/sh/kernel/sys_sh.c +++ b/arch/sh/kernel/sys_sh.c @@ -27,6 +27,9 @@ #include <asm/unistd.h> #include <asm/cacheflush.h> #include <asm/cachectl.h> +#include <trace/ipc.h> + +DEFINE_TRACE(ipc_call); asmlinkage int old_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, diff --git a/arch/sh/kernel/trace-clock.c b/arch/sh/kernel/trace-clock.c new file mode 100644 index 00000000000..0c5509b9667 --- /dev/null +++ b/arch/sh/kernel/trace-clock.c @@ -0,0 +1,55 @@ +/* + * arch/sh/kernel/trace-clock.c + * + * Trace clock for SuperH. + * + * Copyright (C) 2010 STMicroelectronics Ltd + * + * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> + * + * Note: currently only tested and supported on SH4 CPU + * (TODO: tests on other SuperH architectures). + */ + +#include <linux/module.h> +#include <linux/clocksource.h> +#include <asm/clock.h> + +static struct clocksource *clksrc; + +/* In case of the TMU, for SH4 architectures, it returns + * the value of timer counter register (TCNT). */ +u32 sh_read_timer_count(void) +{ + u32 value = 0; + + if (likely(clksrc)) + value = (u32) clksrc->read(clksrc); + + return value; +} + +/* Get the clock rate for the timer (e.g. TMU for SH4) */ +u64 sh_get_clock_frequency(void) +{ + u64 rate = 0; + struct clk *clk; + + clk = clk_get(NULL, "module_clk"); + if (likely(clk)) + rate = clk_get_rate(clk) / 4; + + return rate; +} + +/* Get the clock source needed to read the timer counter. + * For example a TMU channel for SH4 architectures. */ +static __init int init_sh_clocksource(void) +{ + clksrc = clocksource_get_next(); + if (unlikely(!clksrc)) + pr_err("%s: no clocksource found\n", __func__); + + return 0; +} +early_initcall(init_sh_clocksource); diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index 3484c2f65ab..5abd87752eb 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -27,6 +27,7 @@ #include <linux/sysfs.h> #include <linux/uaccess.h> #include <linux/perf_event.h> +#include <trace/trap.h> #include <asm/system.h> #include <asm/alignment.h> #include <asm/fpu.h> @@ -47,6 +48,9 @@ #define TRAP_ILLEGAL_SLOT_INST 13 #endif +DEFINE_TRACE(trap_entry); +DEFINE_TRACE(trap_exit); + static void dump_mem(const char *str, unsigned long bottom, unsigned long top) { unsigned long p; @@ -545,6 +549,8 @@ asmlinkage void do_address_error(struct pt_regs *regs, error_code = lookup_exception_vector(); #endif + trace_trap_entry(regs, error_code >> 5); + oldfs = get_fs(); if (user_mode(regs)) { @@ -589,8 +595,10 @@ fixup: address); set_fs(oldfs); - if (tmp == 0) + if (!tmp) { + trace_trap_exit(); return; /* sorted */ + } uspace_segv: printk(KERN_NOTICE "Sending SIGBUS to \"%s\" due to unaligned " "access (PC %lx PR %lx)\n", current->comm, regs->pc, @@ -623,6 +631,7 @@ uspace_segv: 0, address); set_fs(oldfs); } + trace_trap_exit(); } #ifdef CONFIG_SH_DSP diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c index d4c34d757f0..22695763403 100644 --- a/arch/sh/mm/fault_32.c +++ b/arch/sh/mm/fault_32.c @@ -16,11 +16,17 @@ #include <linux/hardirq.h> #include <linux/kprobes.h> #include <linux/perf_event.h> +#include <trace/fault.h> #include <asm/io_trapped.h> #include <asm/system.h> #include <asm/mmu_context.h> #include <asm/tlbflush.h> +DEFINE_TRACE(page_fault_entry); +DEFINE_TRACE(page_fault_exit); +DEFINE_TRACE(page_fault_nosem_entry); +DEFINE_TRACE(page_fault_nosem_exit); + static inline int notify_page_fault(struct pt_regs *regs, int trap) { int ret = 0; @@ -200,7 +206,14 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ + trace_page_fault_entry(regs, + ({ + unsigned long trapnr; + asm volatile("stc r2_bank,%0": "=r" (trapnr)); + trapnr; + }) >> 5, mm, vma, address, writeaccess); fault = handle_mm_fault(mm, vma, address, writeaccess ? FAULT_FLAG_WRITE : 0); + trace_page_fault_exit(fault); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; @@ -230,11 +243,18 @@ bad_area: bad_area_nosemaphore: if (user_mode(regs)) { + trace_page_fault_nosem_entry(regs, + ({ + unsigned long trapnr; + asm volatile("stc r2_bank,%0": "=r" (trapnr)); + trapnr; + }) >> 5, address); info.si_signo = SIGSEGV; info.si_errno = 0; info.si_code = si_code; info.si_addr = (void *) address; force_sig_info(SIGSEGV, &info, tsk); + trace_page_fault_nosem_exit(); return; } @@ -324,6 +344,11 @@ handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess, pmd_t *pmd; pte_t *pte; pte_t entry; + int ret; + int irqvec; + + irqvec = lookup_exception_vector(); + trace_page_fault_nosem_entry(regs, irqvec, address); /* * We don't take page faults for P1, P2, and parts of P4, these @@ -333,24 +358,34 @@ handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess, if (address >= P3SEG && address < P3_ADDR_MAX) { pgd = pgd_offset_k(address); } else { - if (unlikely(address >= TASK_SIZE || !current->mm)) - return 1; + if (unlikely(address >= TASK_SIZE || !current->mm)) { + ret = 1; + goto out; + } pgd = pgd_offset(current->mm, address); } pud = pud_offset(pgd, address); - if (pud_none_or_clear_bad(pud)) - return 1; + if (pud_none_or_clear_bad(pud)) { + ret = 1; + goto out; + } pmd = pmd_offset(pud, address); - if (pmd_none_or_clear_bad(pmd)) - return 1; + if (pmd_none_or_clear_bad(pmd)) { + ret = 1; + goto out; + } pte = pte_offset_kernel(pmd, address); entry = *pte; - if (unlikely(pte_none(entry) || pte_not_present(entry))) - return 1; - if (unlikely(writeaccess && !pte_write(entry))) - return 1; + if (unlikely(pte_none(entry) || pte_not_present(entry))) { + ret = 1; + goto out; + } + if (unlikely(writeaccess && !pte_write(entry))) { + ret = 1; + goto out; + } if (writeaccess) entry = pte_mkdirty(entry); @@ -370,5 +405,8 @@ handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess, update_mmu_cache(NULL, address, pte); - return 0; + ret = 0; +out: + trace_page_fault_nosem_exit(); + return ret; } diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 95695e97703..76f95707b97 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -40,6 +40,7 @@ config SPARC64 select HAVE_KPROBES select HAVE_MEMBLOCK select HAVE_SYSCALL_WRAPPERS + select HAVE_GET_CYCLES select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_SYSCALL_TRACEPOINTS @@ -50,6 +51,7 @@ config SPARC64 select RTC_DRV_STARFIRE select HAVE_PERF_EVENTS select PERF_USE_VMALLOC + select HAVE_TRACE_CLOCK select HAVE_GENERIC_HARDIRQS config ARCH_DEFCONFIG diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h index 9dd0318d3dd..65489902244 100644 --- a/arch/sparc/include/asm/thread_info_32.h +++ b/arch/sparc/include/asm/thread_info_32.h @@ -128,6 +128,7 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *) #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_RESTORE_SIGMASK 4 /* restore signal mask in do_signal() */ +#define TIF_KERNEL_TRACE 5 /* kernel trace active */ #define TIF_USEDFPU 8 /* FPU was used by this task * this quantum (SMP) */ #define TIF_POLLING_NRFLAG 9 /* true if poll_idle() is polling @@ -137,6 +138,7 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *) /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) +#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE) #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index fb2ea7705a4..9de58956ace 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -214,7 +214,7 @@ register struct thread_info *current_thread_info_reg asm("g6"); #define TIF_UNALIGNED 5 /* allowed to do unaligned accesses */ /* flag bit 6 is available */ #define TIF_32BIT 7 /* 32-bit binary */ -/* flag bit 8 is available */ +#define TIF_KERNEL_TRACE 8 /* kernel trace active */ #define TIF_SECCOMP 9 /* secure computing */ #define TIF_SYSCALL_AUDIT 10 /* syscall auditing active */ #define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */ @@ -233,6 +233,7 @@ register struct thread_info *current_thread_info_reg asm("g6"); #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) #define _TIF_UNALIGNED (1<<TIF_UNALIGNED) #define _TIF_32BIT (1<<TIF_32BIT) +#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE) #define _TIF_SECCOMP (1<<TIF_SECCOMP) #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) diff --git a/arch/sparc/include/asm/timex_64.h b/arch/sparc/include/asm/timex_64.h index 18b30bc9823..905443a8889 100644 --- a/arch/sparc/include/asm/timex_64.h +++ b/arch/sparc/include/asm/timex_64.h @@ -12,7 +12,24 @@ /* Getting on the cycle counter on sparc64. */ typedef unsigned long cycles_t; -#define get_cycles() tick_ops->get_tick() + +static inline cycles_t get_cycles(void) +{ + return tick_ops->get_tick(); +} + +/* get_cycles instruction is synchronized on sparc64 */ +static inline void get_cycles_barrier(void) +{ + return; +} + +extern unsigned long tb_ticks_per_usec; + +static inline cycles_t get_cycles_rate(void) +{ + return (cycles_t)tb_ticks_per_usec * 1000000UL; +} #define ARCH_HAS_READ_CURRENT_TIMER diff --git a/arch/sparc/include/asm/trace-clock.h b/arch/sparc/include/asm/trace-clock.h new file mode 100644 index 00000000000..306fdf7b7ba --- /dev/null +++ b/arch/sparc/include/asm/trace-clock.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2008, Mathieu Desnoyers + * + * Trace clock definitions for Sparc64. + */ + +#ifndef _ASM_SPARC_TRACE_CLOCK_H +#define _ASM_SPARC_TRACE_CLOCK_H + +#include <linux/timex.h> + +static inline u32 trace_clock_read32(void) +{ + return get_cycles(); +} + +static inline u64 trace_clock_read64(void) +{ + return get_cycles(); +} + +static inline unsigned int trace_clock_frequency(void) +{ + return get_cycles_rate(); +} + +static inline u32 trace_clock_freq_scale(void) +{ + return 1; +} + +static inline int get_trace_clock(void) +{ + return 0; +} + +static inline void put_trace_clock(void) +{ +} + +static inline void set_trace_clock_is_sync(int state) +{ +} +#endif /* _ASM_SPARC_TRACE_CLOCK_H */ diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index 1504df8ddf7..54210d48dba 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -1151,7 +1151,7 @@ sys_sigreturn: add %sp, STACKFRAME_SZ, %o0 ld [%curptr + TI_FLAGS], %l5 - andcc %l5, _TIF_SYSCALL_TRACE, %g0 + andcc %l5, (_TIF_SYSCALL_TRACE|_TIF_KERNEL_TRACE), %g0 be 1f nop @@ -1171,7 +1171,7 @@ sys_rt_sigreturn: add %sp, STACKFRAME_SZ, %o0 ld [%curptr + TI_FLAGS], %l5 - andcc %l5, _TIF_SYSCALL_TRACE, %g0 + andcc %l5, (_TIF_SYSCALL_TRACE|_TIF_KERNEL_TRACE), %g0 be 1f nop @@ -1313,7 +1313,7 @@ syscall_is_too_hard: ld [%curptr + TI_FLAGS], %l5 mov %i3, %o3 - andcc %l5, _TIF_SYSCALL_TRACE, %g0 + andcc %l5, (_TIF_SYSCALL_TRACE|_TIF_KERNEL_TRACE), %g0 mov %i4, %o4 bne linux_syscall_trace mov %i0, %l5 @@ -1330,7 +1330,7 @@ ret_sys_call: ld [%sp + STACKFRAME_SZ + PT_PSR], %g3 set PSR_C, %g2 bgeu 1f - andcc %l6, _TIF_SYSCALL_TRACE, %g0 + andcc %l6, (_TIF_SYSCALL_TRACE|_TIF_KERNEL_TRACE), %g0 /* System call success, clear Carry condition code. */ andn %g3, %g2, %g3 diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 17529298c50..57c0d67b5c4 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -24,6 +24,7 @@ #include <linux/pm.h> #include <linux/init.h> #include <linux/slab.h> +#include <trace/sched.h> #include <asm/auxio.h> #include <asm/oplib.h> @@ -39,6 +40,8 @@ #include <asm/prom.h> #include <asm/unistd.h> +DEFINE_TRACE(sched_kthread_create); + /* * Power management idle function * Set in pm platform drivers (apc.c and pmc.c) @@ -674,6 +677,7 @@ pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) "i" (__NR_clone), "r" (flags | CLONE_VM | CLONE_UNTRACED), "i" (__NR_exit), "r" (fn), "r" (arg) : "g1", "g2", "g3", "o0", "o1", "memory", "cc"); + trace_sched_kthread_create(fn, retval); return retval; } EXPORT_SYMBOL(kernel_thread); diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 3bc9c9979b9..d57935a0245 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -782,7 +782,8 @@ static struct clocksource clocksource_tick = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static unsigned long tb_ticks_per_usec __read_mostly; +unsigned long tb_ticks_per_usec __read_mostly; +EXPORT_SYMBOL_GPL(tb_ticks_per_usec); void __delay(unsigned long loops) { diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h index e2cf786bda0..510bf5ba964 100644 --- a/arch/um/include/asm/thread_info.h +++ b/arch/um/include/asm/thread_info.h @@ -68,6 +68,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ #define TIF_SYSCALL_AUDIT 6 #define TIF_RESTORE_SIGMASK 7 +#define TIF_KERNEL_TRACE 8 /* kernel trace active */ #define TIF_FREEZE 16 /* is freezing for suspend */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) @@ -77,6 +78,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_MEMDIE (1 << TIF_MEMDIE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) +#define _TIF_KERNEL_TRACE (1 << TIF_KERNEL_TRACE) #define _TIF_FREEZE (1 << TIF_FREEZE) #endif diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d5ed94d30aa..b0389519b6d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -19,6 +19,7 @@ config X86 select HAVE_READQ select HAVE_WRITEQ select HAVE_UNSTABLE_SCHED_CLOCK + select HAVE_GET_CYCLES select HAVE_IDE select HAVE_OPROFILE select HAVE_PERF_EVENTS @@ -27,9 +28,11 @@ config X86 select HAVE_KPROBES select HAVE_MEMBLOCK select ARCH_WANT_OPTIONAL_GPIOLIB + select HAVE_LTT_DUMP_TABLES select ARCH_WANT_FRAME_POINTERS select HAVE_DMA_ATTRS select HAVE_KRETPROBES + select HAVE_TRACE_CLOCK select HAVE_OPTPROBES select HAVE_FTRACE_MCOUNT_RECORD select HAVE_C_RECORDMCOUNT @@ -208,10 +211,12 @@ config HAVE_INTEL_TXT config X86_32_SMP def_bool y depends on X86_32 && SMP + select HAVE_UNSYNCHRONIZED_TSC config X86_64_SMP def_bool y depends on X86_64 && SMP + select HAVE_UNSYNCHRONIZED_TSC config X86_HT def_bool y diff --git a/arch/x86/ia32/ipc32.c b/arch/x86/ia32/ipc32.c index 29cdcd02ead..accd6b42bd2 100644 --- a/arch/x86/ia32/ipc32.c +++ b/arch/x86/ia32/ipc32.c @@ -8,8 +8,11 @@ #include <linux/shm.h> #include <linux/ipc.h> #include <linux/compat.h> +#include <trace/ipc.h> #include <asm/sys_ia32.h> +DEFINE_TRACE(ipc_call); + asmlinkage long sys32_ipc(u32 call, int first, int second, int third, compat_uptr_t ptr, u32 fifth) { @@ -18,6 +21,8 @@ asmlinkage long sys32_ipc(u32 call, int first, int second, int third, version = call >> 16; /* hack for backward compatibility */ call &= 0xffff; + trace_ipc_call(call, first); + switch (call) { case SEMOP: /* struct sembuf is the same on 32 and 64bit :)) */ diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h index 38d87379e27..9b1db108f9e 100644 --- a/arch/x86/include/asm/idle.h +++ b/arch/x86/include/asm/idle.h @@ -1,20 +1,9 @@ #ifndef _ASM_X86_IDLE_H #define _ASM_X86_IDLE_H -#define IDLE_START 1 -#define IDLE_END 2 - -struct notifier_block; -void idle_notifier_register(struct notifier_block *n); -void idle_notifier_unregister(struct notifier_block *n); - -#ifdef CONFIG_X86_64 -void enter_idle(void); -void exit_idle(void); -#else /* !CONFIG_X86_64 */ -static inline void enter_idle(void) { } -static inline void exit_idle(void) { } -#endif /* CONFIG_X86_64 */ +extern void enter_idle(void); +extern void __exit_idle(void); +extern void exit_idle(void); void c1e_remove_cpu(int cpu); diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 5745ce8bf10..fdf897373e1 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -56,6 +56,61 @@ static inline void native_halt(void) #endif +#ifdef CONFIG_X86_64 +/* + * Only returns from a trap or exception to a NMI context (intra-privilege + * level near return) to the same SS and CS segments. Should be used + * upon trap or exception return when nested over a NMI context so no iret is + * issued. It takes care of modifying the eflags, rsp and returning to the + * previous function. + * + * The stack, at that point, looks like : + * + * 0(rsp) RIP + * 8(rsp) CS + * 16(rsp) EFLAGS + * 24(rsp) RSP + * 32(rsp) SS + * + * Upon execution : + * Copy EIP to the top of the return stack + * Update top of return stack address + * Pop eflags into the eflags register + * Make the return stack current + * Near return (popping the return address from the return stack) + */ +#define NATIVE_INTERRUPT_RETURN_NMI_SAFE pushq %rax; \ + movq %rsp, %rax; \ + movq 24+8(%rax), %rsp; \ + pushq 0+8(%rax); \ + pushq 16+8(%rax); \ + movq (%rax), %rax; \ + popfq; \ + ret +#else +/* + * Protected mode only, no V8086. Implies that protected mode must + * be entered before NMIs or MCEs are enabled. Only returns from a trap or + * exception to a NMI context (intra-privilege level far return). Should be used + * upon trap or exception return when nested over a NMI context so no iret is + * issued. + * + * The stack, at that point, looks like : + * + * 0(esp) EIP + * 4(esp) CS + * 8(esp) EFLAGS + * + * Upon execution : + * Copy the stack eflags to top of stack + * Pop eflags into the eflags register + * Far return: pop EIP and CS into their register, and additionally pop EFLAGS. + */ +#define NATIVE_INTERRUPT_RETURN_NMI_SAFE pushl 8(%esp); \ + popfl; \ + lret $4 +#endif + #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else @@ -112,6 +167,7 @@ static inline unsigned long arch_local_irq_save(void) #define ENABLE_INTERRUPTS(x) sti #define DISABLE_INTERRUPTS(x) cli +#define INTERRUPT_RETURN_NMI_SAFE NATIVE_INTERRUPT_RETURN_NMI_SAFE #ifdef CONFIG_X86_64 #define SWAPGS swapgs diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/include/asm/kvm-mmutrace.h index b60b4fdb3ed..42d117d0418 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/include/asm/kvm-mmutrace.h @@ -217,9 +217,9 @@ TRACE_EVENT( #endif /* _TRACE_KVMMMU_H */ #undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_PATH asm #undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE mmutrace +#define TRACE_INCLUDE_FILE kvm-mmutrace /* This part must be outside protection */ #include <trace/define_trace.h> diff --git a/arch/x86/kvm/trace.h b/arch/x86/include/asm/kvm-trace.h index 1357d7cf4ec..c1e151c092b 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/include/asm/kvm-trace.h @@ -701,9 +701,9 @@ TRACE_EVENT(kvm_emulate_insn, #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH arch/x86/kvm +#define TRACE_INCLUDE_PATH asm #undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE trace +#define TRACE_INCLUDE_FILE kvm-trace /* This part must be outside protection */ #include <trace/define_trace.h> diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ebbc4d8ab17..1ef6906c179 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -962,6 +962,10 @@ extern void default_banner(void); PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) +#define INTERRUPT_RETURN_NMI_SAFE \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_nmi_return), CLBR_NONE, \ + jmp *%cs:pv_cpu_ops+PV_CPU_nmi_return) + #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 82885099c86..3e0634cc127 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -181,6 +181,7 @@ struct pv_cpu_ops { /* Normal iret. Jump to this with the standard iret stack frame set up. */ void (*iret)(void); + void (*nmi_return)(void); void (*swapgs)(void); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index f0b6e5dbc5a..58a37ae7565 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -82,6 +82,7 @@ struct thread_info { #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ +#define TIF_KERNEL_TRACE 9 /* kernel trace active */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ @@ -105,6 +106,7 @@ struct thread_info { #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_KERNEL_TRACE (1 << TIF_KERNEL_TRACE) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_NOTSC (1 << TIF_NOTSC) @@ -121,18 +123,19 @@ struct thread_info { /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT) + _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_KERNEL_TRACE) /* work to do in syscall_trace_leave() */ #define _TIF_WORK_SYSCALL_EXIT \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ - _TIF_SYSCALL_TRACEPOINT) + _TIF_SYSCALL_TRACEPOINT | _TIF_KERNEL_TRACE) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ (0x0000FFFF & \ ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT| \ - _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU)) + _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU|_TIF_KERNEL_TRACE)) /* work to do on any return to user space */ #define _TIF_ALLWORK_MASK \ diff --git a/arch/x86/include/asm/trace-clock.h b/arch/x86/include/asm/trace-clock.h new file mode 100644 index 00000000000..8ca73323366 --- /dev/null +++ b/arch/x86/include/asm/trace-clock.h @@ -0,0 +1,73 @@ +#ifndef _ASM_X86_TRACE_CLOCK_H +#define _ASM_X86_TRACE_CLOCK_H + +/* + * linux/arch/x86/include/asm/trace-clock.h + * + * Copyright (C) 2005,2006,2008 + * Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) + * + * Trace clock definitions for x86. + */ + +#include <linux/timex.h> +#include <linux/time.h> +#include <asm/system.h> +#include <asm/processor.h> +#include <asm/atomic.h> + +/* Minimum duration of a probe, in cycles */ +#define TRACE_CLOCK_MIN_PROBE_DURATION 200 +#define TRACE_CLOCK_RES TRACE_CLOCK_MIN_PROBE_DURATION + +union lttng_timespec { + struct timespec ts; + u64 lttng_ts; +}; + +extern cycles_t trace_clock_async_tsc_read(void); + +extern int _trace_clock_is_sync; +static inline int trace_clock_is_sync(void) +{ + return _trace_clock_is_sync; +} + +static inline u32 trace_clock_read32(void) +{ + u32 cycles; + + if (likely(trace_clock_is_sync())) + cycles = (u32)get_cycles(); /* only need the 32 LSB */ + else + cycles = (u32)trace_clock_async_tsc_read(); + return cycles; +} + +static inline u64 trace_clock_read64(void) +{ + u64 cycles; + + if (likely(trace_clock_is_sync())) + cycles = get_cycles(); + else + cycles = trace_clock_async_tsc_read(); + return cycles; +} + +static inline u64 trace_clock_frequency(void) +{ + return (u64)cpu_khz * 1000; +} + +static inline u32 trace_clock_freq_scale(void) +{ + return 1; +} + +extern int get_trace_clock(void); +extern void put_trace_clock(void); + +extern void set_trace_clock_is_sync(int state); + +#endif /* _ASM_X86_TRACE_CLOCK_H */ diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 1ca132fc0d0..28e56e1ec3c 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -51,6 +51,18 @@ extern int unsynchronized_tsc(void); extern int check_tsc_unstable(void); extern unsigned long native_calibrate_tsc(void); +static inline cycles_t get_cycles_rate(void) +{ + if (check_tsc_unstable()) + return 0; + return (cycles_t)tsc_khz * 1000; +} + +static inline void get_cycles_barrier(void) +{ + rdtsc_barrier(); +} + /* * Boot-time check whether the TSCs are synchronized across * all CPUs/cores: @@ -62,4 +74,10 @@ extern int notsc_setup(char *); extern void save_sched_clock_state(void); extern void restore_sched_clock_state(void); +extern int test_tsc_synchronization(void); +extern int _tsc_is_sync; +static inline int tsc_is_sync(void) +{ + return _tsc_is_sync; +} #endif /* _ASM_X86_TSC_H */ diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 3d61e204826..06abe8f409a 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -12,6 +12,7 @@ struct vsyscall_gtod_data { u32 wall_time_nsec; int sysctl_enabled; + int trace_clock_is_sync; struct timezone sys_tz; struct { /* extract of a clocksource struct */ cycle_t (*vread)(void); diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index d0983d255fb..47b80f3ba4d 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -39,6 +39,14 @@ extern struct timezone sys_tz; extern void map_vsyscall(void); +#ifdef CONFIG_X86_64 +extern void update_trace_clock_is_sync_vdso(void); +#else +static inline void update_trace_clock_is_sync_vdso(void) +{ +} +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_X86_VSYSCALL_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 34244b2cd88..717cf9c620b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -46,6 +46,7 @@ obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o obj-y += tsc.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o +obj-$(CONFIG_HAVE_TRACE_CLOCK) += trace-clock.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o @@ -66,9 +67,8 @@ obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o +obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += setup_percpu.o -obj-$(CONFIG_X86_64_SMP) += tsc_sync.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-y += apic/ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 76b96d74978..c604d23b4f3 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -33,6 +33,7 @@ #include <linux/dmi.h> #include <linux/smp.h> #include <linux/mm.h> +#include <trace/irq.h> #include <asm/perf_event.h> #include <asm/x86_init.h> @@ -868,7 +869,9 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) */ exit_idle(); irq_enter(); + trace_irq_entry(LOCAL_TIMER_VECTOR, regs, NULL); local_apic_timer_interrupt(); + trace_irq_exit(IRQ_HANDLED); irq_exit(); set_irq_regs(old_regs); @@ -1788,6 +1791,7 @@ void smp_spurious_interrupt(struct pt_regs *regs) exit_idle(); irq_enter(); + trace_irq_entry(SPURIOUS_APIC_VECTOR, NULL, NULL); /* * Check if this really is a spurious interrupt and ACK it * if it is a vectored one. Just in case... @@ -1802,6 +1806,7 @@ void smp_spurious_interrupt(struct pt_regs *regs) /* see sw-dev-man vol 3, chapter 7.4.13.5 */ pr_info("spurious APIC interrupt on CPU#%d, " "should never happen.\n", smp_processor_id()); + trace_irq_exit(IRQ_HANDLED); irq_exit(); } @@ -1814,6 +1819,7 @@ void smp_error_interrupt(struct pt_regs *regs) exit_idle(); irq_enter(); + trace_irq_entry(ERROR_APIC_VECTOR, NULL, NULL); /* First tickle the hardware, only then report what went on. -- REW */ v = apic_read(APIC_ESR); apic_write(APIC_ESR, 0); @@ -1834,6 +1840,7 @@ void smp_error_interrupt(struct pt_regs *regs) */ pr_debug("APIC error on CPU%d: %02x(%02x)\n", smp_processor_id(), v , v1); + trace_irq_exit(IRQ_HANDLED); irq_exit(); } diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 0e4f24c2a74..60939d5f226 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -227,6 +227,7 @@ #include <linux/suspend.h> #include <linux/kthread.h> #include <linux/jiffies.h> +#include <linux/idle.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -235,6 +236,7 @@ #include <asm/olpc.h> #include <asm/paravirt.h> #include <asm/reboot.h> +#include <asm/idle.h> #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) extern int (*console_blank_hook)(int); @@ -947,10 +949,17 @@ recalc: break; } } + enter_idle(); if (original_pm_idle) original_pm_idle(); else default_idle(); + /* + * In many cases the interrupt that ended idle + * has already called exit_idle. But some idle + * loops can be woken up without interrupt. + */ + __exit_idle(); local_irq_disable(); jiffies_since_last_check = jiffies - last_jiffies; if (jiffies_since_last_check > idle_period) diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 1a4088dda37..677f8475d9d 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -111,6 +111,7 @@ void foo(void) OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_CPU_iret, pv_cpu_ops, iret); + OFFSET(PV_CPU_nmi_return, pv_cpu_ops, nmi_return); OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); #endif diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 4a6aeedcd96..1aea11cd840 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -58,6 +58,7 @@ int main(void) OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame); OFFSET(PV_CPU_iret, pv_cpu_ops, iret); + OFFSET(PV_CPU_nmi_return, pv_cpu_ops, nmi_return); OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32); OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1d59834396b..6052f6f65a6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1069,6 +1069,7 @@ unsigned long kernel_eflags; * debugging, no special alignment required. */ DEFINE_PER_CPU(struct orig_ist, orig_ist); +EXPORT_PER_CPU_SYMBOL_GPL(orig_ist); #else /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 6f8c5e9da97..c8a6411d8ba 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -23,6 +23,7 @@ #include <linux/init.h> #include <linux/smp.h> #include <linux/cpu.h> +#include <trace/irq.h> #include <asm/processor.h> #include <asm/system.h> @@ -402,8 +403,10 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) { exit_idle(); irq_enter(); + trace_irq_entry(THERMAL_APIC_VECTOR, regs, NULL); inc_irq_stat(irq_thermal_count); smp_thermal_vector(); + trace_irq_exit(IRQ_HANDLED); irq_exit(); /* Ack only at the end to avoid potential reentry */ ack_APIC_irq(); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index df20723a6a1..6bed23e1c74 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -15,6 +15,7 @@ #include <linux/bug.h> #include <linux/nmi.h> #include <linux/sysfs.h> +#include <linux/ltt-core.h> #include <asm/stacktrace.h> @@ -253,6 +254,8 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) if (!signr) return; + if (in_nmi()) + panic("Fatal exception in non-maskable interrupt"); if (in_interrupt()) panic("Fatal exception in interrupt"); if (panic_on_oops) @@ -277,6 +280,10 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) printk("DEBUG_PAGEALLOC"); #endif printk("\n"); +#ifdef CONFIG_LTT + printk(KERN_EMERG "LTT NESTING LEVEL : %u", __get_cpu_var(ltt_nesting)); + printk("\n"); +#endif sysfs_printk_last_file(); if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c8b4efad7eb..2fae6c570fd 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -80,6 +80,8 @@ #define nr_syscalls ((syscall_table_size)/4) +#define NMI_MASK 0x04000000 + #ifdef CONFIG_PREEMPT #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF #else @@ -321,8 +323,32 @@ END(ret_from_fork) # userspace resumption stub bypassing syscall exit tracing ALIGN RING0_PTREGS_FRAME + ret_from_exception: preempt_stop(CLBR_ANY) + GET_THREAD_INFO(%ebp) + movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS + movb PT_CS(%esp), %al + andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax + cmpl $USER_RPL, %eax + jae resume_userspace # returning to v8086 or userspace + testl $NMI_MASK,TI_preempt_count(%ebp) + jz resume_kernel /* Not nested over NMI ? */ + testw $X86_EFLAGS_TF, PT_EFLAGS(%esp) + jnz resume_kernel /* + * If single-stepping an NMI handler, + * use the normal iret path instead of + * the popf/lret because lret would be + * single-stepped. It should not + * happen : it will reactivate NMIs + * prematurely. + */ + TRACE_IRQS_IRET + RESTORE_REGS + addl $4, %esp # skip orig_eax/error_code + CFI_ADJUST_CFA_OFFSET -4 + INTERRUPT_RETURN_NMI_SAFE + ret_from_intr: GET_THREAD_INFO(%ebp) check_userspace: @@ -906,6 +932,10 @@ ENTRY(native_iret) .previous END(native_iret) +ENTRY(native_nmi_return) + NATIVE_INTERRUPT_RETURN_NMI_SAFE # Should we deal with popf exception ? +END(native_nmi_return) + ENTRY(native_irq_enable_sysexit) sti sysexit diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index aed1ffbeb0c..c841c0fb5cc 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -163,6 +163,8 @@ GLOBAL(return_to_handler) #endif +#define NMI_MASK 0x04000000 + #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args #endif @@ -515,6 +517,8 @@ sysret_check: /* Handle reschedules */ /* edx: work, edi: workmask */ sysret_careful: + testl $_TIF_KERNEL_TRACE,%edx /* Re-read : concurrently changed */ + jnz ret_from_sys_call_trace bt $TIF_NEED_RESCHED,%edx jnc sysret_signal TRACE_IRQS_ON @@ -524,6 +528,16 @@ sysret_careful: popq_cfi %rdi jmp sysret_check +ret_from_sys_call_trace: + TRACE_IRQS_ON + sti + SAVE_REST + FIXUP_TOP_OF_STACK %rdi + movq %rsp,%rdi + LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST + jmp int_ret_from_sys_call + /* Handle a signal */ sysret_signal: TRACE_IRQS_ON @@ -872,6 +886,9 @@ ENTRY(native_iret) .section __ex_table,"a" .quad native_iret, bad_iret .previous + +ENTRY(native_nmi_return) + NATIVE_INTERRUPT_RETURN_NMI_SAFE #endif .section .fixup,"ax" @@ -924,6 +941,24 @@ retint_signal: GET_THREAD_INFO(%rcx) jmp retint_with_reschedule + /* Returning to kernel space from exception. */ + /* rcx: threadinfo. interrupts off. */ +ENTRY(retexc_kernel) + testl $NMI_MASK,TI_preempt_count(%rcx) + jz retint_kernel /* Not nested over NMI ? */ + testw $X86_EFLAGS_TF,EFLAGS-ARGOFFSET(%rsp) /* trap flag? */ + jnz retint_kernel /* + * If single-stepping an NMI handler, + * use the normal iret path instead of + * the popf/lret because lret would be + * single-stepped. It should not + * happen : it will reactivate NMIs + * prematurely. + */ + RESTORE_ARGS 0,8,0 + TRACE_IRQS_IRETQ + INTERRUPT_RETURN_NMI_SAFE + #ifdef CONFIG_PREEMPT /* Returning to kernel space. Check if we need preemption */ /* rcx: threadinfo. interrupts off. */ @@ -1361,12 +1396,18 @@ ENTRY(paranoid_exit) paranoid_swapgs: TRACE_IRQS_IRETQ 0 SWAPGS_UNSAFE_STACK +paranoid_restore_no_nmi: RESTORE_ALL 8 jmp irq_return paranoid_restore: + GET_THREAD_INFO(%rcx) TRACE_IRQS_IRETQ 0 + testl $NMI_MASK,TI_preempt_count(%rcx) + jz paranoid_restore_no_nmi /* Nested over NMI ? */ + testw $X86_EFLAGS_TF,EFLAGS-0(%rsp) /* trap flag? */ + jnz paranoid_restore_no_nmi RESTORE_ALL 8 - jmp irq_return + INTERRUPT_RETURN_NMI_SAFE paranoid_userspace: GET_THREAD_INFO(%rcx) movl TI_flags(%rcx),%ebx @@ -1465,7 +1506,7 @@ ENTRY(error_exit) TRACE_IRQS_OFF GET_THREAD_INFO(%rcx) testl %eax,%eax - jne retint_kernel + jne retexc_kernel LOCKDEP_SYS_EXIT_IRQ movl TI_flags(%rcx),%edx movl $_TIF_WORK_MASK,%edi diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 869e1aeeb71..1fc5da98373 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -156,6 +156,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, ret = paravirt_patch_ident_64(insnbuf, len); else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || + type == PARAVIRT_PATCH(pv_cpu_ops.nmi_return) || type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) @@ -204,6 +205,7 @@ static void native_flush_tlb_single(unsigned long addr) /* These are in entry.S */ extern void native_iret(void); +extern void native_nmi_return(void); extern void native_irq_enable_sysexit(void); extern void native_usergs_sysret32(void); extern void native_usergs_sysret64(void); @@ -373,6 +375,7 @@ struct pv_cpu_ops pv_cpu_ops = { .usergs_sysret64 = native_usergs_sysret64, #endif .iret = native_iret, + .nmi_return = native_nmi_return, .swapgs = native_swapgs, .set_iopl_mask = native_set_iopl_mask, diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index d9f32e6d6ab..ac372778bbc 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -1,10 +1,13 @@ -#include <asm/paravirt.h> +#include <linux/stringify.h> +#include <linux/irqflags.h> DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); DEF_NATIVE(pv_cpu_ops, iret, "iret"); +DEF_NATIVE(pv_cpu_ops, nmi_return, + __stringify(NATIVE_INTERRUPT_RETURN_NMI_SAFE)); DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit"); DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); @@ -41,6 +44,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, restore_fl); PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_cpu_ops, iret); + PATCH_SITE(pv_cpu_ops, nmi_return); PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_mmu_ops, read_cr2); PATCH_SITE(pv_mmu_ops, read_cr3); diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 3f08f34f93e..5339e67dc15 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -1,12 +1,15 @@ +#include <linux/irqflags.h> +#include <linux/stringify.h> #include <asm/paravirt.h> #include <asm/asm-offsets.h> -#include <linux/stringify.h> DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq"); DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); DEF_NATIVE(pv_cpu_ops, iret, "iretq"); +DEF_NATIVE(pv_cpu_ops, nmi_return, + __stringify(NATIVE_INTERRUPT_RETURN_NMI_SAFE)); DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); @@ -51,6 +54,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, irq_disable); PATCH_SITE(pv_cpu_ops, iret); + PATCH_SITE(pv_cpu_ops, nmi_return); PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_cpu_ops, usergs_sysret32); PATCH_SITE(pv_cpu_ops, usergs_sysret64); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ff455419898..e0e4ffcad48 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -13,6 +13,7 @@ #include <linux/dmi.h> #include <linux/utsname.h> #include <trace/events/power.h> +#include <trace/sched.h> #include <linux/hw_breakpoint.h> #include <asm/cpu.h> #include <asm/system.h> @@ -23,6 +24,8 @@ #include <asm/i387.h> #include <asm/debugreg.h> +DEFINE_TRACE(sched_kthread_create); + struct kmem_cache *task_xstate_cachep; EXPORT_SYMBOL_GPL(task_xstate_cachep); @@ -278,6 +281,7 @@ extern void kernel_thread_helper(void); int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) { struct pt_regs regs; + long pid; memset(®s, 0, sizeof(regs)); @@ -299,7 +303,10 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) regs.flags = X86_EFLAGS_IF | 0x2; /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED, + 0, ®s, 0, NULL, NULL); + trace_sched_kthread_create(fn, pid); + return pid; } EXPORT_SYMBOL(kernel_thread); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 8d128783af4..3a8c9ee0fc6 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -38,6 +38,9 @@ #include <linux/uaccess.h> #include <linux/io.h> #include <linux/kdebug.h> +#include <linux/notifier.h> +#include <linux/idle.h> +#include <trace/pm.h> #include <asm/pgtable.h> #include <asm/system.h> @@ -59,6 +62,38 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); +DEFINE_TRACE(pm_idle_exit); +DEFINE_TRACE(pm_idle_entry); + +static DEFINE_PER_CPU(unsigned char, is_idle); + +void enter_idle(void) +{ + percpu_write(is_idle, 1); + trace_pm_idle_entry(); + notify_idle(IDLE_START); +} +EXPORT_SYMBOL_GPL(enter_idle); + +void __exit_idle(void) +{ + if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) + return; + notify_idle(IDLE_END); + trace_pm_idle_exit(); +} +EXPORT_SYMBOL_GPL(__exit_idle); + +/* Called from interrupts to signify idle end */ +void exit_idle(void) +{ + /* idle loop has pid 0 */ + if (current->pid) + return; + __exit_idle(); +} +EXPORT_SYMBOL_GPL(exit_idle); + /* * Return saved PC of a blocked thread. */ @@ -107,10 +142,18 @@ void cpu_idle(void) play_dead(); local_irq_disable(); + enter_idle(); /* Don't trace irqs off for idle */ stop_critical_timings(); pm_idle(); start_critical_timings(); + /* + * In many cases the interrupt that ended idle + * has already called exit_idle. But some idle + * loops can be woken up without interrupt. + */ + __exit_idle(); + trace_power_end(smp_processor_id()); } tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index bd387e8f73b..b21b379013e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -35,8 +35,10 @@ #include <linux/tick.h> #include <linux/prctl.h> #include <linux/uaccess.h> +#include <linux/idle.h> #include <linux/io.h> #include <linux/ftrace.h> +#include <trace/pm.h> #include <asm/pgtable.h> #include <asm/system.h> @@ -51,37 +53,36 @@ #include <asm/syscalls.h> #include <asm/debugreg.h> +#include <trace/events/power.h> + +DEFINE_TRACE(pm_idle_exit); +DEFINE_TRACE(pm_idle_entry); + asmlinkage extern void ret_from_fork(void); DEFINE_PER_CPU(unsigned long, old_rsp); static DEFINE_PER_CPU(unsigned char, is_idle); -static ATOMIC_NOTIFIER_HEAD(idle_notifier); - -void idle_notifier_register(struct notifier_block *n) -{ - atomic_notifier_chain_register(&idle_notifier, n); -} -EXPORT_SYMBOL_GPL(idle_notifier_register); - -void idle_notifier_unregister(struct notifier_block *n) -{ - atomic_notifier_chain_unregister(&idle_notifier, n); -} -EXPORT_SYMBOL_GPL(idle_notifier_unregister); - void enter_idle(void) { percpu_write(is_idle, 1); - atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); + /* + * Trace last event before calling notifiers. Notifiers flush + * data from buffers before going to idle. + */ + trace_pm_idle_entry(); + notify_idle(IDLE_START); } +EXPORT_SYMBOL_GPL(enter_idle); -static void __exit_idle(void) +void __exit_idle(void) { if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) return; - atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); + notify_idle(IDLE_END); + trace_pm_idle_exit(); } +EXPORT_SYMBOL_GPL(__exit_idle); /* Called from interrupts to signify idle end */ void exit_idle(void) @@ -91,6 +92,7 @@ void exit_idle(void) return; __exit_idle(); } +EXPORT_SYMBOL_GPL(exit_idle); #ifndef CONFIG_SMP static inline void play_dead(void) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 45892dc4b72..ee3024d4f61 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -21,6 +21,7 @@ #include <linux/signal.h> #include <linux/perf_event.h> #include <linux/hw_breakpoint.h> +#include <trace/syscall.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -152,6 +153,9 @@ static const int arg_offs_table[] = { X86_EFLAGS_DF | X86_EFLAGS_OF | \ X86_EFLAGS_RF | X86_EFLAGS_AC)) +DEFINE_TRACE(syscall_entry); +DEFINE_TRACE(syscall_exit); + /* * Determines whether a value may be installed in a segment register. */ @@ -1361,6 +1365,8 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) if (test_thread_flag(TIF_SINGLESTEP)) regs->flags |= X86_EFLAGS_TF; + trace_syscall_entry(regs, regs->orig_ax); + /* do the secure computing check first */ secure_computing(regs->orig_ax); @@ -1396,6 +1402,8 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) { bool step; + trace_syscall_exit(regs->ax); + if (unlikely(current->audit_context)) audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c index de87d600829..5e74f6aa3c0 100644 --- a/arch/x86/kernel/syscall_64.c +++ b/arch/x86/kernel/syscall_64.c @@ -1,8 +1,11 @@ /* System call table for x86-64. */ #include <linux/linkage.h> +#include <linux/module.h> #include <linux/sys.h> #include <linux/cache.h> +#include <linux/marker.h> +#include <linux/kallsyms.h> #include <asm/asm-offsets.h> #define __NO_STUBS @@ -27,3 +30,18 @@ const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { [0 ... __NR_syscall_max] = &sys_ni_syscall, #include <asm/unistd_64.h> }; + +void ltt_dump_sys_call_table(void *call_data) +{ + int i; + char namebuf[KSYM_NAME_LEN]; + + for (i = 0; i < __NR_syscall_max + 1; i++) { + sprint_symbol(namebuf, (unsigned long)sys_call_table[i]); + __trace_mark(0, syscall_state, sys_call_table, + call_data, + "id %d address %p symbol %s", + i, (void*)sys_call_table[i], namebuf); + } +} +EXPORT_SYMBOL_GPL(ltt_dump_sys_call_table); diff --git a/arch/x86/kernel/trace-clock.c b/arch/x86/kernel/trace-clock.c new file mode 100644 index 00000000000..47539e28276 --- /dev/null +++ b/arch/x86/kernel/trace-clock.c @@ -0,0 +1,302 @@ +/* + * arch/x86/kernel/trace-clock.c + * + * Trace clock for x86. + * + * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>, October 2008 + */ + +#include <linux/module.h> +#include <linux/trace-clock.h> +#include <linux/jiffies.h> +#include <linux/timer.h> +#include <linux/cpu.h> +#include <linux/posix-timers.h> +#include <asm/vgtod.h> + +static cycles_t trace_clock_last_tsc; +static DEFINE_PER_CPU(struct timer_list, update_timer); +static DEFINE_SPINLOCK(async_tsc_lock); +static int async_tsc_refcount; /* Number of readers */ +static int async_tsc_enabled; /* Async TSC enabled on all online CPUs */ + +int _trace_clock_is_sync = 1; +EXPORT_SYMBOL_GPL(_trace_clock_is_sync); + +/* + * Is the trace clock being used by user-space ? We leave the trace clock active + * as soon as user-space starts using it. We never unref the trace clock + * reference taken by user-space. + */ +static atomic_t user_trace_clock_ref; + +/* + * Called by check_tsc_sync_source from CPU hotplug. + */ +void set_trace_clock_is_sync(int state) +{ + _trace_clock_is_sync = state; + update_trace_clock_is_sync_vdso(); +} + +#if BITS_PER_LONG == 64 +static cycles_t read_last_tsc(void) +{ + return trace_clock_last_tsc; +} +#else +/* + * A cmpxchg64 update can happen concurrently. Based on the assumption that + * two cmpxchg64 will never update it to the same value (the count always + * increases), reading it twice insures that we read a coherent value with the + * same "sequence number". + */ +static cycles_t read_last_tsc(void) +{ + cycles_t val1, val2; + + val1 = trace_clock_last_tsc; + for (;;) { + val2 = val1; + barrier(); + val1 = trace_clock_last_tsc; + if (likely(val1 == val2)) + break; + } + return val1; +} +#endif + +/* + * Support for architectures with non-sync TSCs. + * When the local TSC is discovered to lag behind the highest TSC counter, we + * increment the TSC count of an amount that should be, ideally, lower than the + * execution time of this routine, in cycles : this is the granularity we look + * for : we must be able to order the events. + */ +notrace cycles_t trace_clock_async_tsc_read(void) +{ + cycles_t new_tsc, last_tsc; + + WARN_ON(!async_tsc_refcount || !async_tsc_enabled); + new_tsc = get_cycles(); + last_tsc = read_last_tsc(); + do { + if (new_tsc < last_tsc) + new_tsc = last_tsc + TRACE_CLOCK_MIN_PROBE_DURATION; + /* + * If cmpxchg fails with a value higher than the new_tsc, don't + * retry : the value has been incremented and the events + * happened almost at the same time. + * We must retry if cmpxchg fails with a lower value : + * it means that we are the CPU with highest frequency and + * therefore MUST update the value. + */ + last_tsc = cmpxchg64(&trace_clock_last_tsc, last_tsc, new_tsc); + } while (unlikely(last_tsc < new_tsc)); + return new_tsc; +} +EXPORT_SYMBOL_GPL(trace_clock_async_tsc_read); + +static void update_timer_ipi(void *info) +{ + (void)trace_clock_async_tsc_read(); +} + +/* + * update_timer_fct : - Timer function to resync the clocks + * @data: unused + * + * Fires every jiffy. + */ +static void update_timer_fct(unsigned long data) +{ + (void)trace_clock_async_tsc_read(); + mod_timer_pinned(&per_cpu(update_timer, smp_processor_id()), + jiffies + 1); +} + +static void enable_trace_clock(int cpu) +{ + init_timer(&per_cpu(update_timer, cpu)); + per_cpu(update_timer, cpu).function = update_timer_fct; + per_cpu(update_timer, cpu).expires = jiffies + 1; + smp_call_function_single(cpu, update_timer_ipi, NULL, 1); + add_timer_on(&per_cpu(update_timer, cpu), cpu); +} + +static void disable_trace_clock(int cpu) +{ + del_timer_sync(&per_cpu(update_timer, cpu)); +} + +/* + * hotcpu_callback - CPU hotplug callback + * @nb: notifier block + * @action: hotplug action to take + * @hcpu: CPU number + * + * Returns the success/failure of the operation. (NOTIFY_OK, NOTIFY_BAD) + */ +static int __cpuinit hotcpu_callback(struct notifier_block *nb, + unsigned long action, + void *hcpu) +{ + unsigned int hotcpu = (unsigned long)hcpu; + int cpu; + + spin_lock(&async_tsc_lock); + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + break; + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + /* + * trace_clock_is_sync() is updated by set_trace_clock_is_sync() + * code, protected by cpu hotplug disable. + * It is ok to let the hotplugged CPU read the timebase before + * the CPU_ONLINE notification. It's just there to give a + * maximum bound to the TSC error. + */ + if (async_tsc_refcount && !trace_clock_is_sync()) { + if (!async_tsc_enabled) { + async_tsc_enabled = 1; + for_each_online_cpu(cpu) + enable_trace_clock(cpu); + } else { + enable_trace_clock(hotcpu); + } + } + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + if (!async_tsc_refcount && num_online_cpus() == 1) + set_trace_clock_is_sync(1); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + /* + * We cannot stop the trace clock on other CPUs when readers are + * active even if we go back to a synchronized state (1 CPU) + * because the CPU left could be the one lagging behind. + */ + if (async_tsc_refcount && async_tsc_enabled) + disable_trace_clock(hotcpu); + if (!async_tsc_refcount && num_online_cpus() == 1) + set_trace_clock_is_sync(1); + break; +#endif /* CONFIG_HOTPLUG_CPU */ + } + spin_unlock(&async_tsc_lock); + + return NOTIFY_OK; +} + +int get_trace_clock(void) +{ + int cpu; + + if (!trace_clock_is_sync()) { + printk(KERN_WARNING + "Trace clock falls back on cache-line bouncing\n" + "workaround due to non-synchronized TSCs.\n" + "This workaround preserves event order across CPUs.\n" + "Please consider disabling Speedstep or PowerNow and\n" + "using kernel parameters " + "\"force_tsc_sync=1 idle=poll\"\n" + "for accurate and fast tracing clock source.\n"); + } + + get_online_cpus(); + spin_lock(&async_tsc_lock); + if (async_tsc_refcount++ || trace_clock_is_sync()) + goto end; + + async_tsc_enabled = 1; + for_each_online_cpu(cpu) + enable_trace_clock(cpu); +end: + spin_unlock(&async_tsc_lock); + put_online_cpus(); + return 0; +} +EXPORT_SYMBOL_GPL(get_trace_clock); + +void put_trace_clock(void) +{ + int cpu; + + get_online_cpus(); + spin_lock(&async_tsc_lock); + WARN_ON(async_tsc_refcount <= 0); + if (async_tsc_refcount != 1 || !async_tsc_enabled) + goto end; + + for_each_online_cpu(cpu) + disable_trace_clock(cpu); + async_tsc_enabled = 0; +end: + async_tsc_refcount--; + if (!async_tsc_refcount && num_online_cpus() == 1) + set_trace_clock_is_sync(1); + spin_unlock(&async_tsc_lock); + put_online_cpus(); +} +EXPORT_SYMBOL_GPL(put_trace_clock); + +static int posix_get_trace(clockid_t which_clock, struct timespec *tp) +{ + union lttng_timespec *lts = (union lttng_timespec *) tp; + int ret; + + /* + * Yes, there is a race here that would lead to refcount being + * incremented more than once, but all we care is to leave the trace + * clock active forever, so precise accounting is not needed. + */ + if (unlikely(!atomic_read(&user_trace_clock_ref))) { + ret = get_trace_clock(); + if (ret) + return ret; + atomic_inc(&user_trace_clock_ref); + } + lts->lttng_ts = trace_clock_read64(); + return 0; +} + +static int posix_get_trace_freq(clockid_t which_clock, struct timespec *tp) +{ + union lttng_timespec *lts = (union lttng_timespec *) tp; + + lts->lttng_ts = trace_clock_frequency(); + return 0; +} + +static int posix_get_trace_res(const clockid_t which_clock, struct timespec *tp) +{ + union lttng_timespec *lts = (union lttng_timespec *) tp; + + lts->lttng_ts = TRACE_CLOCK_RES; + return 0; +} + +static __init int init_unsync_trace_clock(void) +{ + struct k_clock clock_trace = { + .clock_getres = posix_get_trace_res, + .clock_get = posix_get_trace, + }; + struct k_clock clock_trace_freq = { + .clock_getres = posix_get_trace_res, + .clock_get = posix_get_trace_freq, + }; + + register_posix_clock(CLOCK_TRACE, &clock_trace); + register_posix_clock(CLOCK_TRACE_FREQ, &clock_trace_freq); + + hotcpu_notifier(hotcpu_callback, 4); + return 0; +} +early_initcall(init_unsync_trace_clock); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b9b67166f9d..bc618945fb1 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -31,6 +31,7 @@ #include <linux/mm.h> #include <linux/smp.h> #include <linux/io.h> +#include <trace/trap.h> #ifdef CONFIG_EISA #include <linux/ioport.h> @@ -52,6 +53,7 @@ #include <asm/atomic.h> #include <asm/system.h> #include <asm/traps.h> +#include <asm/unistd.h> #include <asm/desc.h> #include <asm/i387.h> #include <asm/mce.h> @@ -76,11 +78,21 @@ char ignore_fpu_irq; * F0 0F bug workaround. */ gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, }; + +extern unsigned long sys_call_table[]; +extern unsigned long syscall_table_size; + #endif DECLARE_BITMAP(used_vectors, NR_VECTORS); EXPORT_SYMBOL_GPL(used_vectors); +/* + * Also used in arch/x86/mm/fault.c. + */ +DEFINE_TRACE(trap_entry); +DEFINE_TRACE(trap_exit); + static int ignore_nmis; int unknown_nmi_panic; @@ -122,6 +134,8 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, { struct task_struct *tsk = current; + trace_trap_entry(regs, trapnr); + #ifdef CONFIG_X86_32 if (regs->flags & X86_VM_MASK) { /* @@ -168,7 +182,7 @@ trap_signal: force_sig_info(signr, info, tsk); else force_sig(signr, tsk); - return; + goto end; kernel_trap: if (!fixup_exception(regs)) { @@ -176,15 +190,17 @@ kernel_trap: tsk->thread.trap_no = trapnr; die(str, regs, error_code); } - return; + goto end; #ifdef CONFIG_X86_32 vm86_trap: if (handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr)) goto trap_signal; - return; + goto end; #endif +end: + trace_trap_exit(); } #define DO_ERROR(trapnr, signr, str, name) \ @@ -285,7 +301,9 @@ do_general_protection(struct pt_regs *regs, long error_code) printk("\n"); } + trace_trap_entry(regs, 13); force_sig(SIGSEGV, tsk); + trace_trap_exit(); return; #ifdef CONFIG_X86_32 @@ -398,13 +416,15 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; + trace_trap_entry(regs, 2); + /* * CPU-specific NMI must be processed before non-CPU-specific * NMI, otherwise we may lose it, because the CPU-specific * NMI can not be detected/processed on other CPUs. */ if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; + goto end; /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ raw_spin_lock(&nmi_reason_lock); @@ -423,11 +443,13 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) reassert_nmi(); #endif raw_spin_unlock(&nmi_reason_lock); - return; + goto end; } raw_spin_unlock(&nmi_reason_lock); unknown_nmi_error(reason, regs); +end: + trace_trap_exit(); } dotraplinkage notrace __kprobes void @@ -570,8 +592,10 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) preempt_conditional_sti(regs); if (regs->flags & X86_VM_MASK) { + trace_trap_entry(regs, 1); handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); + trace_trap_exit(); preempt_conditional_cli(regs); return; } @@ -589,13 +613,32 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) regs->flags &= ~X86_EFLAGS_TF; } si_code = get_si_code(tsk->thread.debugreg6); - if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) + if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) { + trace_trap_entry(regs, 1); send_sigtrap(tsk, regs, error_code, si_code); + trace_trap_exit(); + } preempt_conditional_cli(regs); return; } +#ifdef CONFIG_X86_32 +void ltt_dump_sys_call_table(void *call_data) +{ + int i; + char namebuf[KSYM_NAME_LEN]; + + for (i = 0; i < NR_syscalls; i++) { + sprint_symbol(namebuf, sys_call_table[i]); + __trace_mark(0, syscall_state, sys_call_table, call_data, + "id %d address %p symbol %s", + i, (void*)sys_call_table[i], namebuf); + } +} +EXPORT_SYMBOL_GPL(ltt_dump_sys_call_table); +#endif + /* * Note that we play around with the 'TS' bit in an attempt to get * the correct behaviour even in the presence of the asynchronous @@ -701,11 +744,13 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code) dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) { + trace_trap_entry(regs, 16); conditional_sti(regs); #if 0 /* No need to warn about this any longer. */ printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); #endif + trace_trap_exit(); } asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) @@ -738,6 +783,21 @@ void __math_state_restore(void) tsk->fpu_counter++; } +void ltt_dump_idt_table(void *call_data) +{ + int i; + char namebuf[KSYM_NAME_LEN]; + + for (i = 0; i < IDT_ENTRIES; i++) { + unsigned long address = gate_offset(idt_table[i]); + sprint_symbol(namebuf, address); + __trace_mark(0, irq_state, idt_table, call_data, + "irq %d address %p symbol %s", + i, (void *)address, namebuf); + } +} +EXPORT_SYMBOL_GPL(ltt_dump_idt_table); + /* * 'math_state_restore()' saves the current math information in the * old math state array, and gets the new ones from the current task diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c deleted file mode 100644 index 0aa5fed8b9e..00000000000 --- a/arch/x86/kernel/tsc_sync.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - * check TSC synchronization. - * - * Copyright (C) 2006, Red Hat, Inc., Ingo Molnar - * - * We check whether all boot CPUs have their TSC's synchronized, - * print a warning if not and turn off the TSC clock-source. - * - * The warp-check is point-to-point between two CPUs, the CPU - * initiating the bootup is the 'source CPU', the freshly booting - * CPU is the 'target CPU'. - * - * Only two CPUs may participate - they can enter in any order. - * ( The serial nature of the boot logic and the CPU hotplug lock - * protects against more than 2 CPUs entering this code. ) - */ -#include <linux/spinlock.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/nmi.h> -#include <asm/tsc.h> - -/* - * Entry/exit counters that make sure that both CPUs - * run the measurement code at once: - */ -static __cpuinitdata atomic_t start_count; -static __cpuinitdata atomic_t stop_count; - -/* - * We use a raw spinlock in this exceptional case, because - * we want to have the fastest, inlined, non-debug version - * of a critical section, to be able to prove TSC time-warps: - */ -static __cpuinitdata arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED; - -static __cpuinitdata cycles_t last_tsc; -static __cpuinitdata cycles_t max_warp; -static __cpuinitdata int nr_warps; - -/* - * TSC-warp measurement loop running on both CPUs: - */ -static __cpuinit void check_tsc_warp(void) -{ - cycles_t start, now, prev, end; - int i; - - rdtsc_barrier(); - start = get_cycles(); - rdtsc_barrier(); - /* - * The measurement runs for 20 msecs: - */ - end = start + tsc_khz * 20ULL; - now = start; - - for (i = 0; ; i++) { - /* - * We take the global lock, measure TSC, save the - * previous TSC that was measured (possibly on - * another CPU) and update the previous TSC timestamp. - */ - arch_spin_lock(&sync_lock); - prev = last_tsc; - rdtsc_barrier(); - now = get_cycles(); - rdtsc_barrier(); - last_tsc = now; - arch_spin_unlock(&sync_lock); - - /* - * Be nice every now and then (and also check whether - * measurement is done [we also insert a 10 million - * loops safety exit, so we dont lock up in case the - * TSC readout is totally broken]): - */ - if (unlikely(!(i & 7))) { - if (now > end || i > 10000000) - break; - cpu_relax(); - touch_nmi_watchdog(); - } - /* - * Outside the critical section we can now see whether - * we saw a time-warp of the TSC going backwards: - */ - if (unlikely(prev > now)) { - arch_spin_lock(&sync_lock); - max_warp = max(max_warp, prev - now); - nr_warps++; - arch_spin_unlock(&sync_lock); - } - } - WARN(!(now-start), - "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", - now-start, end-start); -} - -/* - * Source CPU calls into this - it waits for the freshly booted - * target CPU to arrive and then starts the measurement: - */ -void __cpuinit check_tsc_sync_source(int cpu) -{ - int cpus = 2; - - /* - * No need to check if we already know that the TSC is not - * synchronized: - */ - if (unsynchronized_tsc()) - return; - - if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { - if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) - pr_info( - "Skipped synchronization checks as TSC is reliable.\n"); - return; - } - - /* - * Reset it - in case this is a second bootup: - */ - atomic_set(&stop_count, 0); - - /* - * Wait for the target to arrive: - */ - while (atomic_read(&start_count) != cpus-1) - cpu_relax(); - /* - * Trigger the target to continue into the measurement too: - */ - atomic_inc(&start_count); - - check_tsc_warp(); - - while (atomic_read(&stop_count) != cpus-1) - cpu_relax(); - - if (nr_warps) { - pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n", - smp_processor_id(), cpu); - pr_warning("Measured %Ld cycles TSC warp between CPUs, " - "turning off TSC clock.\n", max_warp); - mark_tsc_unstable("check_tsc_sync_source failed"); - } else { - pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n", - smp_processor_id(), cpu); - } - - /* - * Reset it - just in case we boot another CPU later: - */ - atomic_set(&start_count, 0); - nr_warps = 0; - max_warp = 0; - last_tsc = 0; - - /* - * Let the target continue with the bootup: - */ - atomic_inc(&stop_count); -} - -/* - * Freshly booted CPUs call into this: - */ -void __cpuinit check_tsc_sync_target(void) -{ - int cpus = 2; - - if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) - return; - - /* - * Register this CPU's participation and wait for the - * source CPU to start the measurement: - */ - atomic_inc(&start_count); - while (atomic_read(&start_count) != cpus) - cpu_relax(); - - check_tsc_warp(); - - /* - * Ok, we are done: - */ - atomic_inc(&stop_count); - - /* - * Wait for the source CPU to print stuff: - */ - while (atomic_read(&stop_count) != cpus) - cpu_relax(); -} diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index dcbb28c4b69..df18f14c473 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -44,6 +44,8 @@ #include <asm/desc.h> #include <asm/topology.h> #include <asm/vgtod.h> +#include <asm/trace-clock.h> +#include <asm/timer.h> #define __vsyscall(nr) \ __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace @@ -61,6 +63,7 @@ struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data = { .lock = SEQLOCK_UNLOCKED, .sysctl_enabled = 1, + .trace_clock_is_sync = 1, }; void update_vsyscall_tz(void) @@ -73,6 +76,16 @@ void update_vsyscall_tz(void) write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } +void update_trace_clock_is_sync_vdso(void) +{ + unsigned long flags; + + write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + vsyscall_gtod_data.trace_clock_is_sync = _trace_clock_is_sync; + write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); +} +EXPORT_SYMBOL_GPL(update_trace_clock_is_sync_vdso); + void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, struct clocksource *clock, u32 mult) { @@ -89,6 +102,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; vsyscall_gtod_data.wall_to_monotonic = *wtm; vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); + vsyscall_gtod_data.trace_clock_is_sync = _trace_clock_is_sync; write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 3cece05e4ac..f894af174b8 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -32,7 +32,7 @@ #include "irq.h" #include <linux/kvm_host.h> -#include "trace.h" +#include <asm/kvm-trace.h> static void pic_irq_request(struct kvm *kvm, int level); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 93cf9d0d365..58bcbce5b02 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -36,7 +36,7 @@ #include <asm/atomic.h> #include "kvm_cache_regs.h" #include "irq.h" -#include "trace.h" +#include <asm/kvm-trace.h> #include "x86.h" #ifndef CONFIG_X86_64 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f02b8edc3d4..3612044ed1f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -163,7 +163,7 @@ module_param(oos_shadow, bool, 0644); #include <trace/events/kvm.h> #define CREATE_TRACE_POINTS -#include "mmutrace.h" +#include <asm/kvm-mmutrace.h> #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 63fec1531e8..b14429dda24 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -34,7 +34,7 @@ #include <asm/kvm_para.h> #include <asm/virtext.h> -#include "trace.h" +#include <asm/kvm-trace.h> #define __ex(x) __kvm_handle_fault_on_reboot(x) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bf89ec2cfb8..d12b42e234b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -40,7 +40,7 @@ #include <asm/i387.h> #include <asm/xcr.h> -#include "trace.h" +#include <asm/kvm-trace.h> #define __ex(x) __kvm_handle_fault_on_reboot(x) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bcc0efce85b..6a8cb6fe5c1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -47,7 +47,7 @@ #include <trace/events/kvm.h> #define CREATE_TRACE_POINTS -#include "trace.h" +#include <asm/kvm-trace.h> #include <asm/debugreg.h> #include <asm/msr.h> diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index eba687f0cc0..07f7a272226 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1276,6 +1276,7 @@ __init void lguest_init(void) pv_cpu_ops.cpuid = lguest_cpuid; pv_cpu_ops.load_idt = lguest_load_idt; pv_cpu_ops.iret = lguest_iret; + pv_cpu_ops.nmi_return = lguest_iret; pv_cpu_ops.load_sp0 = lguest_load_sp0; pv_cpu_ops.load_tr_desc = lguest_load_tr_desc; pv_cpu_ops.set_ldt = lguest_set_ldt; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 7d90ceb882a..00309849aa1 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -12,6 +12,7 @@ #include <linux/mmiotrace.h> /* kmmio_handler, ... */ #include <linux/perf_event.h> /* perf_sw_event */ #include <linux/hugetlb.h> /* hstate_index_to_shift */ +#include <trace/fault.h> #include <asm/traps.h> /* dotraplinkage, ... */ #include <asm/pgalloc.h> /* pgd_*(), ... */ @@ -35,6 +36,11 @@ enum x86_pf_error_code { PF_INSTR = 1 << 4, }; +DEFINE_TRACE(page_fault_entry); +DEFINE_TRACE(page_fault_exit); +DEFINE_TRACE(page_fault_nosem_entry); +DEFINE_TRACE(page_fault_nosem_exit); + /* * Returns 0 if mmiotrace is disabled, or if the fault is not * handled by mmiotrace: @@ -720,6 +726,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, if (is_errata100(regs, address)) return; + trace_page_fault_nosem_entry(regs, 14, address); if (unlikely(show_unhandled_signals)) show_signal_msg(regs, error_code, address, tsk); @@ -729,6 +736,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, tsk->thread.trap_no = 14; force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0); + trace_page_fault_nosem_exit(); return; } @@ -1124,7 +1132,9 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault: */ + trace_page_fault_entry(regs, 14, mm, vma, address, write); fault = handle_mm_fault(mm, vma, address, flags); + trace_page_fault_exit(fault); if (unlikely(fault & VM_FAULT_ERROR)) { mm_fault_error(regs, error_code, address, fault); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 6acc724d5d8..14b9317eccb 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -6,6 +6,7 @@ #include <linux/interrupt.h> #include <linux/module.h> #include <linux/cpu.h> +#include <trace/irq.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> @@ -141,6 +142,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs) sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; f = &flush_state[sender]; + trace_irq_entry(sender, regs, NULL); + if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask))) goto out; /* @@ -167,6 +170,7 @@ out: cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); smp_mb__after_clear_bit(); inc_irq_stat(irq_tlb_count); + trace_irq_exit(IRQ_HANDLED); } static void flush_tlb_others_ipi(const struct cpumask *cpumask, diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index ee55754cc3c..7bc481508d0 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -22,6 +22,8 @@ #include <asm/hpet.h> #include <asm/unistd.h> #include <asm/io.h> +#include <asm/trace-clock.h> +#include <asm/timer.h> #include "vextern.h" #define gtod vdso_vsyscall_gtod_data @@ -111,6 +113,46 @@ notrace static noinline int do_monotonic_coarse(struct timespec *ts) return 0; } +/* + * If the TSC is synchronized across all CPUs, read the current TSC + * and export its value in the nsec field of the timespec + */ +notrace static noinline int do_trace_clock(struct timespec *ts) +{ + unsigned long seq; + union lttng_timespec *lts = (union lttng_timespec *) ts; + + do { + seq = read_seqbegin(>od->lock); + if (unlikely(!gtod->trace_clock_is_sync)) + return vdso_fallback_gettime(CLOCK_TRACE, ts); + /* + * We don't protect the rdtsc with the rdtsc_barrier because + * we can't obtain with tracing that level of precision. + * The operation of recording an event is not atomic therefore + * the small chance of imprecision doesn't justify the overhead + * of a barrier. + */ + /* + * TODO: check that vget_cycles(), using paravirt ops, will + * match the TSC read by get_cycles() at the kernel level. + */ + lts->lttng_ts = vget_cycles(); + } while (unlikely(read_seqretry(>od->lock, seq))); + + return 0; +} + +/* + * Returns the cpu_khz, it needs to be a syscall because we can't access + * this value from userspace and it will only be called at the beginning + * of the tracing session + */ +notrace static noinline int do_trace_clock_freq(struct timespec *ts) +{ + return vdso_fallback_gettime(CLOCK_TRACE_FREQ, ts); +} + notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) { if (likely(gtod->sysctl_enabled)) @@ -127,6 +169,12 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) return do_realtime_coarse(ts); case CLOCK_MONOTONIC_COARSE: return do_monotonic_coarse(ts); + case CLOCK_TRACE: + return do_trace_clock(ts); + case CLOCK_TRACE_FREQ: + return do_trace_clock_freq(ts); + default: + return -EINVAL; } return vdso_fallback_gettime(clock, ts); } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 50542efe45f..e3839c74ec4 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -974,6 +974,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .read_pmc = native_read_pmc, .iret = xen_iret, + .nmi_return = xen_iret, .irq_enable_sysexit = xen_sysexit, #ifdef CONFIG_X86_64 .usergs_sysret32 = xen_sysret32, diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h index 7be8accb0b0..a380dcf32a5 100644 --- a/arch/xtensa/include/asm/thread_info.h +++ b/arch/xtensa/include/asm/thread_info.h @@ -131,6 +131,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_IRET 4 /* return with iret */ #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 6 /* restore signal mask in do_signal() */ +#define TIF_KERNEL_TRACE 7 /* kernel trace active */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_FREEZE 17 /* is freezing for suspend */ @@ -139,11 +140,12 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP) #define _TIF_IRET (1<<TIF_IRET) +#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) #define _TIF_FREEZE (1<<TIF_FREEZE) -#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */ +#define _TIF_WORK_MASK 0x0000FF7E /* work to do on interrupt/exception return */ #define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */ /* |