diff options
Diffstat (limited to 'kernel/time/tsc-sync.c')
-rw-r--r-- | kernel/time/tsc-sync.c | 313 |
1 files changed, 313 insertions, 0 deletions
diff --git a/kernel/time/tsc-sync.c b/kernel/time/tsc-sync.c new file mode 100644 index 00000000000..2ac1544ee22 --- /dev/null +++ b/kernel/time/tsc-sync.c @@ -0,0 +1,313 @@ +/* + * kernel/time/tsc-sync.c + * + * Test TSC synchronization + * + * marks the tsc as unstable _and_ keep a simple "_tsc_is_sync" variable, which + * is fast to read when a simple test must determine which clock source to use + * for kernel tracing. + * + * - CPU init : + * + * We check whether all boot CPUs have their TSC's synchronized, + * print a warning if not and turn off the TSC clock-source. + * + * Only two CPUs may participate - they can enter in any order. + * ( The serial nature of the boot logic and the CPU hotplug lock + * protects against more than 2 CPUs entering this code. + * + * - When CPUs are up : + * + * TSC synchronicity of all CPUs can be checked later at run-time by calling + * test_tsc_synchronization(). + * + * Copyright 2007, 2008 + * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> + */ +#include <linux/module.h> +#include <linux/timer.h> +#include <linux/timex.h> +#include <linux/jiffies.h> +#include <linux/trace-clock.h> +#include <linux/cpu.h> +#include <linux/kthread.h> +#include <linux/mutex.h> +#include <linux/cpu.h> + +#define MAX_CYCLES_DELTA 3000ULL + +/* + * Number of loops to take care of MCE, NMIs, SMIs. + */ +#define NR_LOOPS 200 + +static DEFINE_MUTEX(tscsync_mutex); + +struct sync_data { + int nr_waits; + int wait_sync; + cycles_t tsc_count; +} ____cacheline_aligned; + +/* 0 is master, 1 is slave */ +static struct sync_data sync_data[2] = { + [0 ... 1] = { + .nr_waits = 3 * NR_LOOPS + 1, + .wait_sync = 3 * NR_LOOPS + 1, + }, +}; + +int _tsc_is_sync = 1; +EXPORT_SYMBOL(_tsc_is_sync); + +static int force_tsc_sync; +static cycles_t slave_offset; +static int slave_offset_ready; /* for 32-bits architectures */ + +static int __init force_tsc_sync_setup(char *str) +{ + force_tsc_sync = simple_strtoul(str, NULL, 0); + return 1; +} +__setup("force_tsc_sync=", force_tsc_sync_setup); + +/* + * Mark it noinline so we make sure it is not unrolled. + * Wait until value is reached. + */ +static noinline void tsc_barrier(long this_cpu) +{ + sync_core(); + sync_data[this_cpu].wait_sync--; + smp_mb(); /* order master/slave sync_data read/write */ + while (unlikely(sync_data[1 - this_cpu].wait_sync >= + sync_data[this_cpu].nr_waits)) + barrier(); /* + * barrier is used because faster and + * more predictable than cpu_idle(). + */ + smp_mb(); /* order master/slave sync_data read/write */ + sync_data[this_cpu].nr_waits--; + get_cycles_barrier(); + sync_data[this_cpu].tsc_count = get_cycles(); + get_cycles_barrier(); +} + +/* + * Worker thread called on each CPU. + * First wait with interrupts enabled, then wait with interrupt disabled, + * for precision. We are already bound to one CPU. + * this_cpu 0 : master + * this_cpu 1 : slave + */ +static void test_sync(void *arg) +{ + long this_cpu = (long)arg; + unsigned long flags; + + local_irq_save(flags); + /* Make sure the instructions are in I-CACHE */ + tsc_barrier(this_cpu); + tsc_barrier(this_cpu); + sync_data[this_cpu].wait_sync--; + smp_mb(); /* order master/slave sync_data read/write */ + while (unlikely(sync_data[1 - this_cpu].wait_sync >= + sync_data[this_cpu].nr_waits)) + barrier(); /* + * barrier is used because faster and + * more predictable than cpu_idle(). + */ + smp_mb(); /* order master/slave sync_data read/write */ + sync_data[this_cpu].nr_waits--; + /* + * Here, only the master will wait for the slave to reach this barrier. + * This makes sure that the master, which holds the mutex and will reset + * the barriers, waits for the slave to stop using the barrier values + * before it continues. This is only done at the complete end of all the + * loops. This is why there is a + 1 in original wait_sync value. + */ + if (sync_data[this_cpu].nr_waits == 1) + sync_data[this_cpu].wait_sync--; + local_irq_restore(flags); +} + +/* + * Each CPU (master and target) must decrement the wait_sync value twice (one + * for priming in cache), and also once after the get_cycles. After all the + * loops, one last synchronization is required to make sure the master waits + * for the slave before resetting the barriers. + */ +static void reset_barriers(void) +{ + int i; + + /* + * Wait until slave is done so that we don't overwrite + * wait_end_sync prematurely. + */ + smp_mb(); /* order master/slave sync_data read/write */ + while (unlikely(sync_data[1].wait_sync >= sync_data[0].nr_waits)) + barrier(); /* + * barrier is used because faster and + * more predictable than cpu_idle(). + */ + smp_mb(); /* order master/slave sync_data read/write */ + + for (i = 0; i < 2; i++) { + WARN_ON(sync_data[i].wait_sync != 0); + WARN_ON(sync_data[i].nr_waits != 1); + sync_data[i].wait_sync = 3 * NR_LOOPS + 1; + sync_data[i].nr_waits = 3 * NR_LOOPS + 1; + } +} + +/* + * Do loops (making sure no unexpected event changes the timing), keep the best + * one. The result of each loop is the highest tsc delta between the master CPU + * and the slaves. Stop CPU hotplug when this code is executed to make sure we + * are concurrency-safe wrt CPU hotplug also using this code. Test TSC + * synchronization even if we already "know" CPUs were not synchronized. This + * can be used as a test to check if, for some reason, the CPUs eventually got + * in sync after a CPU has been unplugged. This code is kept separate from the + * CPU hotplug code because the slave CPU executes in an IPI, which we want to + * keep as short as possible (this is happening while the system is running). + * Therefore, we do not send a single IPI for all the test loops, but rather + * send one IPI per loop. + */ +int test_tsc_synchronization(void) +{ + long cpu, master; + cycles_t max_diff = 0, diff, best_loop, worse_loop = 0; + int i; + + mutex_lock(&tscsync_mutex); + get_online_cpus(); + + printk(KERN_INFO + "checking TSC synchronization across all online CPUs:"); + + preempt_disable(); + master = smp_processor_id(); + for_each_online_cpu(cpu) { + if (master == cpu) + continue; + best_loop = (cycles_t)ULLONG_MAX; + for (i = 0; i < NR_LOOPS; i++) { + smp_call_function_single(cpu, test_sync, + (void *)1UL, 0); + test_sync((void *)0UL); + diff = abs(sync_data[1].tsc_count + - sync_data[0].tsc_count); + best_loop = min(best_loop, diff); + worse_loop = max(worse_loop, diff); + } + reset_barriers(); + max_diff = max(best_loop, max_diff); + } + preempt_enable(); + if (max_diff >= MAX_CYCLES_DELTA) { + printk(KERN_WARNING + "Measured %llu cycles TSC offset between CPUs," + " turning off TSC clock.\n", (u64)max_diff); + mark_tsc_unstable("check_tsc_sync_source failed"); + _tsc_is_sync = 0; + } else { + printk(" passed.\n"); + } + put_online_cpus(); + mutex_unlock(&tscsync_mutex); + return max_diff < MAX_CYCLES_DELTA; +} +EXPORT_SYMBOL_GPL(test_tsc_synchronization); + +/* + * Test synchronicity of a single core when it is hotplugged. + * Source CPU calls into this - waits for the freshly booted target CPU to + * arrive and then start the measurement: + */ +void __cpuinit check_tsc_sync_source(int cpu) +{ + cycles_t diff, abs_diff, + best_loop = (cycles_t)ULLONG_MAX, worse_loop = 0; + int i; + + /* + * No need to check if we already know that the TSC is not synchronized: + */ + if (!force_tsc_sync && unsynchronized_tsc()) { + /* + * Make sure we mark _tsc_is_sync to 0 if the TSC is found + * to be unsynchronized for other causes than non-synchronized + * TSCs across CPUs. + */ + _tsc_is_sync = 0; + set_trace_clock_is_sync(0); + return; + } + + printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:", + smp_processor_id(), cpu); + + for (i = 0; i < NR_LOOPS; i++) { + test_sync((void *)0UL); + diff = sync_data[1].tsc_count - sync_data[0].tsc_count; + abs_diff = abs(diff); + best_loop = min(best_loop, abs_diff); + worse_loop = max(worse_loop, abs_diff); + if (force_tsc_sync && best_loop == abs_diff) + slave_offset = diff; + } + reset_barriers(); + + if (!force_tsc_sync && best_loop >= MAX_CYCLES_DELTA) { + printk(" failed.\n"); + printk(KERN_WARNING + "Measured %llu cycles TSC offset between CPUs," + " turning off TSC clock.\n", (u64)best_loop); + mark_tsc_unstable("check_tsc_sync_source failed"); + _tsc_is_sync = 0; + set_trace_clock_is_sync(0); + } else { + printk(" %s.\n", !force_tsc_sync ? "passed" : "forced"); + } + if (force_tsc_sync) { + /* order slave_offset and slave_offset_ready writes */ + smp_wmb(); + slave_offset_ready = 1; + } +} + +/* + * Freshly booted CPUs call into this: + */ +void __cpuinit check_tsc_sync_target(void) +{ + int i; + + if (!force_tsc_sync && unsynchronized_tsc()) + return; + + for (i = 0; i < NR_LOOPS; i++) + test_sync((void *)1UL); + + /* + * Force slave synchronization if requested. + */ + if (force_tsc_sync) { + unsigned long flags; + cycles_t new_tsc; + + while (!slave_offset_ready) + cpu_relax(); + /* order slave_offset and slave_offset_ready reads */ + smp_rmb(); + local_irq_save(flags); + /* + * slave_offset is read when master has finished writing to it, + * and is protected by cpu hotplug serialization. + */ + new_tsc = get_cycles() - slave_offset; + write_tsc((u32)new_tsc, (u32)((u64)new_tsc >> 32)); + local_irq_restore(flags); + } +} |