aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel/trace-clock.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/trace-clock.c')
-rw-r--r--arch/x86/kernel/trace-clock.c302
1 files changed, 302 insertions, 0 deletions
diff --git a/arch/x86/kernel/trace-clock.c b/arch/x86/kernel/trace-clock.c
new file mode 100644
index 00000000000..47539e28276
--- /dev/null
+++ b/arch/x86/kernel/trace-clock.c
@@ -0,0 +1,302 @@
+/*
+ * arch/x86/kernel/trace-clock.c
+ *
+ * Trace clock for x86.
+ *
+ * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>, October 2008
+ */
+
+#include <linux/module.h>
+#include <linux/trace-clock.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/cpu.h>
+#include <linux/posix-timers.h>
+#include <asm/vgtod.h>
+
+static cycles_t trace_clock_last_tsc;
+static DEFINE_PER_CPU(struct timer_list, update_timer);
+static DEFINE_SPINLOCK(async_tsc_lock);
+static int async_tsc_refcount; /* Number of readers */
+static int async_tsc_enabled; /* Async TSC enabled on all online CPUs */
+
+int _trace_clock_is_sync = 1;
+EXPORT_SYMBOL_GPL(_trace_clock_is_sync);
+
+/*
+ * Is the trace clock being used by user-space ? We leave the trace clock active
+ * as soon as user-space starts using it. We never unref the trace clock
+ * reference taken by user-space.
+ */
+static atomic_t user_trace_clock_ref;
+
+/*
+ * Called by check_tsc_sync_source from CPU hotplug.
+ */
+void set_trace_clock_is_sync(int state)
+{
+ _trace_clock_is_sync = state;
+ update_trace_clock_is_sync_vdso();
+}
+
+#if BITS_PER_LONG == 64
+static cycles_t read_last_tsc(void)
+{
+ return trace_clock_last_tsc;
+}
+#else
+/*
+ * A cmpxchg64 update can happen concurrently. Based on the assumption that
+ * two cmpxchg64 will never update it to the same value (the count always
+ * increases), reading it twice insures that we read a coherent value with the
+ * same "sequence number".
+ */
+static cycles_t read_last_tsc(void)
+{
+ cycles_t val1, val2;
+
+ val1 = trace_clock_last_tsc;
+ for (;;) {
+ val2 = val1;
+ barrier();
+ val1 = trace_clock_last_tsc;
+ if (likely(val1 == val2))
+ break;
+ }
+ return val1;
+}
+#endif
+
+/*
+ * Support for architectures with non-sync TSCs.
+ * When the local TSC is discovered to lag behind the highest TSC counter, we
+ * increment the TSC count of an amount that should be, ideally, lower than the
+ * execution time of this routine, in cycles : this is the granularity we look
+ * for : we must be able to order the events.
+ */
+notrace cycles_t trace_clock_async_tsc_read(void)
+{
+ cycles_t new_tsc, last_tsc;
+
+ WARN_ON(!async_tsc_refcount || !async_tsc_enabled);
+ new_tsc = get_cycles();
+ last_tsc = read_last_tsc();
+ do {
+ if (new_tsc < last_tsc)
+ new_tsc = last_tsc + TRACE_CLOCK_MIN_PROBE_DURATION;
+ /*
+ * If cmpxchg fails with a value higher than the new_tsc, don't
+ * retry : the value has been incremented and the events
+ * happened almost at the same time.
+ * We must retry if cmpxchg fails with a lower value :
+ * it means that we are the CPU with highest frequency and
+ * therefore MUST update the value.
+ */
+ last_tsc = cmpxchg64(&trace_clock_last_tsc, last_tsc, new_tsc);
+ } while (unlikely(last_tsc < new_tsc));
+ return new_tsc;
+}
+EXPORT_SYMBOL_GPL(trace_clock_async_tsc_read);
+
+static void update_timer_ipi(void *info)
+{
+ (void)trace_clock_async_tsc_read();
+}
+
+/*
+ * update_timer_fct : - Timer function to resync the clocks
+ * @data: unused
+ *
+ * Fires every jiffy.
+ */
+static void update_timer_fct(unsigned long data)
+{
+ (void)trace_clock_async_tsc_read();
+ mod_timer_pinned(&per_cpu(update_timer, smp_processor_id()),
+ jiffies + 1);
+}
+
+static void enable_trace_clock(int cpu)
+{
+ init_timer(&per_cpu(update_timer, cpu));
+ per_cpu(update_timer, cpu).function = update_timer_fct;
+ per_cpu(update_timer, cpu).expires = jiffies + 1;
+ smp_call_function_single(cpu, update_timer_ipi, NULL, 1);
+ add_timer_on(&per_cpu(update_timer, cpu), cpu);
+}
+
+static void disable_trace_clock(int cpu)
+{
+ del_timer_sync(&per_cpu(update_timer, cpu));
+}
+
+/*
+ * hotcpu_callback - CPU hotplug callback
+ * @nb: notifier block
+ * @action: hotplug action to take
+ * @hcpu: CPU number
+ *
+ * Returns the success/failure of the operation. (NOTIFY_OK, NOTIFY_BAD)
+ */
+static int __cpuinit hotcpu_callback(struct notifier_block *nb,
+ unsigned long action,
+ void *hcpu)
+{
+ unsigned int hotcpu = (unsigned long)hcpu;
+ int cpu;
+
+ spin_lock(&async_tsc_lock);
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ break;
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ /*
+ * trace_clock_is_sync() is updated by set_trace_clock_is_sync()
+ * code, protected by cpu hotplug disable.
+ * It is ok to let the hotplugged CPU read the timebase before
+ * the CPU_ONLINE notification. It's just there to give a
+ * maximum bound to the TSC error.
+ */
+ if (async_tsc_refcount && !trace_clock_is_sync()) {
+ if (!async_tsc_enabled) {
+ async_tsc_enabled = 1;
+ for_each_online_cpu(cpu)
+ enable_trace_clock(cpu);
+ } else {
+ enable_trace_clock(hotcpu);
+ }
+ }
+ break;
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
+ if (!async_tsc_refcount && num_online_cpus() == 1)
+ set_trace_clock_is_sync(1);
+ break;
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ /*
+ * We cannot stop the trace clock on other CPUs when readers are
+ * active even if we go back to a synchronized state (1 CPU)
+ * because the CPU left could be the one lagging behind.
+ */
+ if (async_tsc_refcount && async_tsc_enabled)
+ disable_trace_clock(hotcpu);
+ if (!async_tsc_refcount && num_online_cpus() == 1)
+ set_trace_clock_is_sync(1);
+ break;
+#endif /* CONFIG_HOTPLUG_CPU */
+ }
+ spin_unlock(&async_tsc_lock);
+
+ return NOTIFY_OK;
+}
+
+int get_trace_clock(void)
+{
+ int cpu;
+
+ if (!trace_clock_is_sync()) {
+ printk(KERN_WARNING
+ "Trace clock falls back on cache-line bouncing\n"
+ "workaround due to non-synchronized TSCs.\n"
+ "This workaround preserves event order across CPUs.\n"
+ "Please consider disabling Speedstep or PowerNow and\n"
+ "using kernel parameters "
+ "\"force_tsc_sync=1 idle=poll\"\n"
+ "for accurate and fast tracing clock source.\n");
+ }
+
+ get_online_cpus();
+ spin_lock(&async_tsc_lock);
+ if (async_tsc_refcount++ || trace_clock_is_sync())
+ goto end;
+
+ async_tsc_enabled = 1;
+ for_each_online_cpu(cpu)
+ enable_trace_clock(cpu);
+end:
+ spin_unlock(&async_tsc_lock);
+ put_online_cpus();
+ return 0;
+}
+EXPORT_SYMBOL_GPL(get_trace_clock);
+
+void put_trace_clock(void)
+{
+ int cpu;
+
+ get_online_cpus();
+ spin_lock(&async_tsc_lock);
+ WARN_ON(async_tsc_refcount <= 0);
+ if (async_tsc_refcount != 1 || !async_tsc_enabled)
+ goto end;
+
+ for_each_online_cpu(cpu)
+ disable_trace_clock(cpu);
+ async_tsc_enabled = 0;
+end:
+ async_tsc_refcount--;
+ if (!async_tsc_refcount && num_online_cpus() == 1)
+ set_trace_clock_is_sync(1);
+ spin_unlock(&async_tsc_lock);
+ put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(put_trace_clock);
+
+static int posix_get_trace(clockid_t which_clock, struct timespec *tp)
+{
+ union lttng_timespec *lts = (union lttng_timespec *) tp;
+ int ret;
+
+ /*
+ * Yes, there is a race here that would lead to refcount being
+ * incremented more than once, but all we care is to leave the trace
+ * clock active forever, so precise accounting is not needed.
+ */
+ if (unlikely(!atomic_read(&user_trace_clock_ref))) {
+ ret = get_trace_clock();
+ if (ret)
+ return ret;
+ atomic_inc(&user_trace_clock_ref);
+ }
+ lts->lttng_ts = trace_clock_read64();
+ return 0;
+}
+
+static int posix_get_trace_freq(clockid_t which_clock, struct timespec *tp)
+{
+ union lttng_timespec *lts = (union lttng_timespec *) tp;
+
+ lts->lttng_ts = trace_clock_frequency();
+ return 0;
+}
+
+static int posix_get_trace_res(const clockid_t which_clock, struct timespec *tp)
+{
+ union lttng_timespec *lts = (union lttng_timespec *) tp;
+
+ lts->lttng_ts = TRACE_CLOCK_RES;
+ return 0;
+}
+
+static __init int init_unsync_trace_clock(void)
+{
+ struct k_clock clock_trace = {
+ .clock_getres = posix_get_trace_res,
+ .clock_get = posix_get_trace,
+ };
+ struct k_clock clock_trace_freq = {
+ .clock_getres = posix_get_trace_res,
+ .clock_get = posix_get_trace_freq,
+ };
+
+ register_posix_clock(CLOCK_TRACE, &clock_trace);
+ register_posix_clock(CLOCK_TRACE_FREQ, &clock_trace_freq);
+
+ hotcpu_notifier(hotcpu_callback, 4);
+ return 0;
+}
+early_initcall(init_unsync_trace_clock);