diff options
author | Mathieu Desnoyers <mathieu.desnoyers@efficios.com> | 2011-03-16 19:04:20 -0400 |
---|---|---|
committer | Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> | 2011-03-16 19:04:20 -0400 |
commit | 83922c9929caba21a5bbdfa1a560a5f1c6eddb47 (patch) | |
tree | de7cd7171b0a9ff35a647f90b512f4ff613eaa7e /arch/x86 | |
parent | 638aa0c32d106e18cb99059d47ea0b4360b15b06 (diff) |
trace-clock/x86-trace-clock
x86 trace clock
X86 trace clock. Depends on tsc_sync to detect if timestamp counters are
synchronized on the machine.
I am leaving this poorly scalable solution for now as this is the simplest, yet
working, solution I found (compared to using the HPET which also scales very
poorly, probably due to bus contention). This should be a good start and let us
trace a good amount of machines out there.
A "Big Fat" (TM) warning is shown on the console when the trace clock is used on
systems without synchronized TSCs to tell the user to
- use force_tsc_sync=1
- use idle=poll
- disable Powernow or Speedstep
In order to get accurate and fast timestamps.
This keeps room for further improvement in a second phase.
Changelog:
- freq_scale is not used as a divisor rather than multiplier to support systems
with frequency < 1HZ.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: H. Peter Anvin <hpa@zytor.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/trace-clock.h | 70 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/kernel/trace-clock.c | 248 |
4 files changed, 320 insertions, 0 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a87190ccf47..70b1eb3b8fd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -31,6 +31,7 @@ config X86 select ARCH_WANT_FRAME_POINTERS select HAVE_DMA_ATTRS select HAVE_KRETPROBES + select HAVE_TRACE_CLOCK select HAVE_OPTPROBES select HAVE_FTRACE_MCOUNT_RECORD select HAVE_C_RECORDMCOUNT diff --git a/arch/x86/include/asm/trace-clock.h b/arch/x86/include/asm/trace-clock.h new file mode 100644 index 00000000000..ccdcb677aa5 --- /dev/null +++ b/arch/x86/include/asm/trace-clock.h @@ -0,0 +1,70 @@ +#ifndef _ASM_X86_TRACE_CLOCK_H +#define _ASM_X86_TRACE_CLOCK_H + +/* + * linux/arch/x86/include/asm/trace-clock.h + * + * Copyright (C) 2005,2006,2008 + * Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) + * + * Trace clock definitions for x86. + */ + +#include <linux/timex.h> +#include <asm/system.h> +#include <asm/processor.h> +#include <asm/atomic.h> + +/* Minimum duration of a probe, in cycles */ +#define TRACE_CLOCK_MIN_PROBE_DURATION 200 + +extern cycles_t trace_clock_async_tsc_read(void); + +extern int _trace_clock_is_sync; +static inline int trace_clock_is_sync(void) +{ + return _trace_clock_is_sync; +} + +static inline u32 trace_clock_read32(void) +{ + u32 cycles; + + if (likely(trace_clock_is_sync())) { + get_cycles_barrier(); + cycles = (u32)get_cycles(); /* only need the 32 LSB */ + get_cycles_barrier(); + } else + cycles = (u32)trace_clock_async_tsc_read(); + return cycles; +} + +static inline u64 trace_clock_read64(void) +{ + u64 cycles; + + if (likely(trace_clock_is_sync())) { + get_cycles_barrier(); + cycles = get_cycles(); + get_cycles_barrier(); + } else + cycles = trace_clock_async_tsc_read(); + return cycles; +} + +static inline u64 trace_clock_frequency(void) +{ + return (u64)cpu_khz * 1000; +} + +static inline u32 trace_clock_freq_scale(void) +{ + return 1; +} + +extern void get_trace_clock(void); +extern void put_trace_clock(void); + +extern void set_trace_clock_is_sync(int state); + +#endif /* _ASM_X86_TRACE_CLOCK_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index c8664925391..717cf9c620b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -46,6 +46,7 @@ obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o obj-y += tsc.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o +obj-$(CONFIG_HAVE_TRACE_CLOCK) += trace-clock.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o diff --git a/arch/x86/kernel/trace-clock.c b/arch/x86/kernel/trace-clock.c new file mode 100644 index 00000000000..a5e45cb466e --- /dev/null +++ b/arch/x86/kernel/trace-clock.c @@ -0,0 +1,248 @@ +/* + * arch/x86/kernel/trace-clock.c + * + * Trace clock for x86. + * + * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>, October 2008 + */ + +#include <linux/module.h> +#include <linux/trace-clock.h> +#include <linux/jiffies.h> +#include <linux/mutex.h> +#include <linux/timer.h> +#include <linux/cpu.h> + +static cycles_t trace_clock_last_tsc; +static DEFINE_PER_CPU(struct timer_list, update_timer); +static DEFINE_MUTEX(async_tsc_mutex); +static int async_tsc_refcount; /* Number of readers */ +static int async_tsc_enabled; /* Async TSC enabled on all online CPUs */ + +int _trace_clock_is_sync = 1; +EXPORT_SYMBOL_GPL(_trace_clock_is_sync); + +/* + * Called by check_tsc_sync_source from CPU hotplug. + */ +void set_trace_clock_is_sync(int state) +{ + _trace_clock_is_sync = state; +} + +#if BITS_PER_LONG == 64 +static cycles_t read_last_tsc(void) +{ + return trace_clock_last_tsc; +} +#else +/* + * A cmpxchg64 update can happen concurrently. Based on the assumption that + * two cmpxchg64 will never update it to the same value (the count always + * increases), reading it twice insures that we read a coherent value with the + * same "sequence number". + */ +static cycles_t read_last_tsc(void) +{ + cycles_t val1, val2; + + val1 = trace_clock_last_tsc; + for (;;) { + val2 = val1; + barrier(); + val1 = trace_clock_last_tsc; + if (likely(val1 == val2)) + break; + } + return val1; +} +#endif + +/* + * Support for architectures with non-sync TSCs. + * When the local TSC is discovered to lag behind the highest TSC counter, we + * increment the TSC count of an amount that should be, ideally, lower than the + * execution time of this routine, in cycles : this is the granularity we look + * for : we must be able to order the events. + */ +notrace cycles_t trace_clock_async_tsc_read(void) +{ + cycles_t new_tsc, last_tsc; + + WARN_ON(!async_tsc_refcount || !async_tsc_enabled); + rdtsc_barrier(); + new_tsc = get_cycles(); + rdtsc_barrier(); + last_tsc = read_last_tsc(); + do { + if (new_tsc < last_tsc) + new_tsc = last_tsc + TRACE_CLOCK_MIN_PROBE_DURATION; + /* + * If cmpxchg fails with a value higher than the new_tsc, don't + * retry : the value has been incremented and the events + * happened almost at the same time. + * We must retry if cmpxchg fails with a lower value : + * it means that we are the CPU with highest frequency and + * therefore MUST update the value. + */ + last_tsc = cmpxchg64(&trace_clock_last_tsc, last_tsc, new_tsc); + } while (unlikely(last_tsc < new_tsc)); + return new_tsc; +} +EXPORT_SYMBOL_GPL(trace_clock_async_tsc_read); + +static void update_timer_ipi(void *info) +{ + (void)trace_clock_async_tsc_read(); +} + +/* + * update_timer_fct : - Timer function to resync the clocks + * @data: unused + * + * Fires every jiffy. + */ +static void update_timer_fct(unsigned long data) +{ + (void)trace_clock_async_tsc_read(); + + per_cpu(update_timer, smp_processor_id()).expires = jiffies + 1; + add_timer_on(&per_cpu(update_timer, smp_processor_id()), + smp_processor_id()); +} + +static void enable_trace_clock(int cpu) +{ + init_timer(&per_cpu(update_timer, cpu)); + per_cpu(update_timer, cpu).function = update_timer_fct; + per_cpu(update_timer, cpu).expires = jiffies + 1; + smp_call_function_single(cpu, update_timer_ipi, NULL, 1); + add_timer_on(&per_cpu(update_timer, cpu), cpu); +} + +static void disable_trace_clock(int cpu) +{ + del_timer_sync(&per_cpu(update_timer, cpu)); +} + +/* + * hotcpu_callback - CPU hotplug callback + * @nb: notifier block + * @action: hotplug action to take + * @hcpu: CPU number + * + * Returns the success/failure of the operation. (NOTIFY_OK, NOTIFY_BAD) + */ +static int __cpuinit hotcpu_callback(struct notifier_block *nb, + unsigned long action, + void *hcpu) +{ + unsigned int hotcpu = (unsigned long)hcpu; + int cpu; + + mutex_lock(&async_tsc_mutex); + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + break; + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + /* + * trace_clock_is_sync() is updated by set_trace_clock_is_sync() + * code, protected by cpu hotplug disable. + * It is ok to let the hotplugged CPU read the timebase before + * the CPU_ONLINE notification. It's just there to give a + * maximum bound to the TSC error. + */ + if (async_tsc_refcount && !trace_clock_is_sync()) { + if (!async_tsc_enabled) { + async_tsc_enabled = 1; + for_each_online_cpu(cpu) + enable_trace_clock(cpu); + } else { + enable_trace_clock(hotcpu); + } + } + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + if (!async_tsc_refcount && num_online_cpus() == 1) + set_trace_clock_is_sync(1); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + /* + * We cannot stop the trace clock on other CPUs when readers are + * active even if we go back to a synchronized state (1 CPU) + * because the CPU left could be the one lagging behind. + */ + if (async_tsc_refcount && async_tsc_enabled) + disable_trace_clock(hotcpu); + if (!async_tsc_refcount && num_online_cpus() == 1) + set_trace_clock_is_sync(1); + break; +#endif /* CONFIG_HOTPLUG_CPU */ + } + mutex_unlock(&async_tsc_mutex); + + return NOTIFY_OK; +} + +void get_trace_clock(void) +{ + int cpu; + + if (!trace_clock_is_sync()) { + printk(KERN_WARNING + "Trace clock falls back on cache-line bouncing\n" + "workaround due to non-synchronized TSCs.\n" + "This workaround preserves event order across CPUs.\n" + "Please consider disabling Speedstep or PowerNow and\n" + "using kernel parameters " + "\"force_tsc_sync=1 idle=poll\"\n" + "for accurate and fast tracing clock source.\n"); + } + + get_online_cpus(); + mutex_lock(&async_tsc_mutex); + if (async_tsc_refcount++ || trace_clock_is_sync()) + goto end; + + async_tsc_enabled = 1; + for_each_online_cpu(cpu) + enable_trace_clock(cpu); +end: + mutex_unlock(&async_tsc_mutex); + put_online_cpus(); +} +EXPORT_SYMBOL_GPL(get_trace_clock); + +void put_trace_clock(void) +{ + int cpu; + + get_online_cpus(); + mutex_lock(&async_tsc_mutex); + WARN_ON(async_tsc_refcount <= 0); + if (async_tsc_refcount != 1 || !async_tsc_enabled) + goto end; + + for_each_online_cpu(cpu) + disable_trace_clock(cpu); + async_tsc_enabled = 0; +end: + async_tsc_refcount--; + if (!async_tsc_refcount && num_online_cpus() == 1) + set_trace_clock_is_sync(1); + mutex_unlock(&async_tsc_mutex); + put_online_cpus(); +} +EXPORT_SYMBOL_GPL(put_trace_clock); + +static __init int init_unsync_trace_clock(void) +{ + hotcpu_notifier(hotcpu_callback, 4); + return 0; +} +early_initcall(init_unsync_trace_clock); |