From ec44bc7acc3687ba6ae8154b4b5a845b70279237 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 May 2012 22:08:57 +0000 Subject: timers: Create detach_if_pending() and use it Most callers of detach_timer() have the same pattern around them. Check whether the timer is pending and eventually updating base->next_timer. Create detach_if_pending() and replace the duplicated code. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Gilad Ben-Yossef Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20120525214819.131246037@linutronix.de --- kernel/timer.c | 56 +++++++++++++++++++++++--------------------------------- 1 file changed, 23 insertions(+), 33 deletions(-) (limited to 'kernel') diff --git a/kernel/timer.c b/kernel/timer.c index 6ec7e7e0db43..0f70deb20151 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -654,8 +654,7 @@ void init_timer_deferrable_key(struct timer_list *timer, } EXPORT_SYMBOL(init_timer_deferrable_key); -static inline void detach_timer(struct timer_list *timer, - int clear_pending) +static inline void detach_timer(struct timer_list *timer, bool clear_pending) { struct list_head *entry = &timer->entry; @@ -667,6 +666,19 @@ static inline void detach_timer(struct timer_list *timer, entry->prev = LIST_POISON2; } +static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, + bool clear_pending) +{ + if (!timer_pending(timer)) + return 0; + + detach_timer(timer, clear_pending); + if (timer->expires == base->next_timer && + !tbase_get_deferrable(timer->base)) + base->next_timer = base->timer_jiffies; + return 1; +} + /* * We are using hashed locking: holding per_cpu(tvec_bases).lock * means that all timers which are tied to this base via timer->base are @@ -712,16 +724,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires, base = lock_timer_base(timer, &flags); - if (timer_pending(timer)) { - detach_timer(timer, 0); - if (timer->expires == base->next_timer && - !tbase_get_deferrable(timer->base)) - base->next_timer = base->timer_jiffies; - ret = 1; - } else { - if (pending_only) - goto out_unlock; - } + ret = detach_if_pending(timer, base, false); + if (!ret && pending_only) + goto out_unlock; debug_activate(timer, expires); @@ -959,13 +964,7 @@ int del_timer(struct timer_list *timer) timer_stats_timer_clear_start_info(timer); if (timer_pending(timer)) { base = lock_timer_base(timer, &flags); - if (timer_pending(timer)) { - detach_timer(timer, 1); - if (timer->expires == base->next_timer && - !tbase_get_deferrable(timer->base)) - base->next_timer = base->timer_jiffies; - ret = 1; - } + ret = detach_if_pending(timer, base, true); spin_unlock_irqrestore(&base->lock, flags); } @@ -990,19 +989,10 @@ int try_to_del_timer_sync(struct timer_list *timer) base = lock_timer_base(timer, &flags); - if (base->running_timer == timer) - goto out; - - timer_stats_timer_clear_start_info(timer); - ret = 0; - if (timer_pending(timer)) { - detach_timer(timer, 1); - if (timer->expires == base->next_timer && - !tbase_get_deferrable(timer->base)) - base->next_timer = base->timer_jiffies; - ret = 1; + if (base->running_timer != timer) { + timer_stats_timer_clear_start_info(timer); + ret = detach_if_pending(timer, base, true); } -out: spin_unlock_irqrestore(&base->lock, flags); return ret; @@ -1178,7 +1168,7 @@ static inline void __run_timers(struct tvec_base *base) timer_stats_account_timer(timer); base->running_timer = timer; - detach_timer(timer, 1); + detach_timer(timer, true); spin_unlock_irq(&base->lock); call_timer_fn(timer, fn, data); @@ -1714,7 +1704,7 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea while (!list_empty(head)) { timer = list_first_entry(head, struct timer_list, entry); - detach_timer(timer, 0); + detach_timer(timer, false); timer_set_base(timer, new_base); if (time_before(timer->expires, new_base->next_timer) && !tbase_get_deferrable(timer->base)) -- cgit v1.2.3 From facbb4a7efbd658046bf615f03cd97a1504785d8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 May 2012 22:08:57 +0000 Subject: timers: Consolidate base->next_timer update Another bunch of mindlessly copied code. All callers of internal_add_timer() except the recascading code updates base->next_timer. Move this into internal_add_timer() and let the cascading code call __internal_add_timer(). Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Gilad Ben-Yossef Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20120525214819.189946224@linutronix.de --- kernel/timer.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/timer.c b/kernel/timer.c index 0f70deb20151..7207690b5353 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -330,7 +330,8 @@ void set_timer_slack(struct timer_list *timer, int slack_hz) } EXPORT_SYMBOL_GPL(set_timer_slack); -static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) +static void +__internal_add_timer(struct tvec_base *base, struct timer_list *timer) { unsigned long expires = timer->expires; unsigned long idx = expires - base->timer_jiffies; @@ -372,6 +373,17 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) list_add_tail(&timer->entry, vec); } +static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) +{ + __internal_add_timer(base, timer); + /* + * Update base->next_timer if this is the earliest one. + */ + if (time_before(timer->expires, base->next_timer) && + !tbase_get_deferrable(timer->base)) + base->next_timer = timer->expires; +} + #ifdef CONFIG_TIMER_STATS void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) { @@ -757,9 +769,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, } timer->expires = expires; - if (time_before(timer->expires, base->next_timer) && - !tbase_get_deferrable(timer->base)) - base->next_timer = timer->expires; internal_add_timer(base, timer); out_unlock: @@ -925,9 +934,6 @@ void add_timer_on(struct timer_list *timer, int cpu) spin_lock_irqsave(&base->lock, flags); timer_set_base(timer, base); debug_activate(timer, timer->expires); - if (time_before(timer->expires, base->next_timer) && - !tbase_get_deferrable(timer->base)) - base->next_timer = timer->expires; internal_add_timer(base, timer); /* * Check whether the other CPU is idle and needs to be @@ -1079,7 +1085,8 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index) */ list_for_each_entry_safe(timer, tmp, &tv_list, entry) { BUG_ON(tbase_get_base(timer->base) != base); - internal_add_timer(base, timer); + /* No accounting, while moving them */ + __internal_add_timer(base, timer); } return index; @@ -1706,9 +1713,6 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea timer = list_first_entry(head, struct timer_list, entry); detach_timer(timer, false); timer_set_base(timer, new_base); - if (time_before(timer->expires, new_base->next_timer) && - !tbase_get_deferrable(timer->base)) - new_base->next_timer = timer->expires; internal_add_timer(new_base, timer); } } -- cgit v1.2.3 From 99d5f3aac674fe081ffddd2dbb8946ccbc14c410 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 May 2012 22:08:58 +0000 Subject: timers: Add accounting of non deferrable timers The code in get_next_timer_interrupt() is suboptimal as it has to run through the cascade to find the next expiring timer. On a completely idle core we should only do that when there is an active timer enqueued and base->next_timer does not give us a fast answer. Add accounting of the active timers to the now consolidated attach/detach code. I deliberately avoided sanity checks because the code is fully symetric and any fiddling with timers w/o using the API functions will lead to cute explosions anyway. ulong is big enough even on 32bit and if we really run into the situation to have more than 1<<32 timers enqueued there, then we are definitely not in a state to go idle and run through that code. This allows us to fix another shortcoming of get_next_timer_interrupt(). Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Gilad Ben-Yossef Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20120525214819.236377028@linutronix.de --- kernel/timer.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/timer.c b/kernel/timer.c index 7207690b5353..7fada698bd1a 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -77,6 +77,7 @@ struct tvec_base { struct timer_list *running_timer; unsigned long timer_jiffies; unsigned long next_timer; + unsigned long active_timers; struct tvec_root tv1; struct tvec tv2; struct tvec tv3; @@ -377,11 +378,13 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) { __internal_add_timer(base, timer); /* - * Update base->next_timer if this is the earliest one. + * Update base->active_timers and base->next_timer */ - if (time_before(timer->expires, base->next_timer) && - !tbase_get_deferrable(timer->base)) - base->next_timer = timer->expires; + if (!tbase_get_deferrable(timer->base)) { + if (time_before(timer->expires, base->next_timer)) + base->next_timer = timer->expires; + base->active_timers++; + } } #ifdef CONFIG_TIMER_STATS @@ -678,6 +681,14 @@ static inline void detach_timer(struct timer_list *timer, bool clear_pending) entry->prev = LIST_POISON2; } +static inline void +detach_expired_timer(struct timer_list *timer, struct tvec_base *base) +{ + detach_timer(timer, true); + if (!tbase_get_deferrable(timer->base)) + timer->base->active_timers--; +} + static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, bool clear_pending) { @@ -685,9 +696,11 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, return 0; detach_timer(timer, clear_pending); - if (timer->expires == base->next_timer && - !tbase_get_deferrable(timer->base)) - base->next_timer = base->timer_jiffies; + if (!tbase_get_deferrable(timer->base)) { + timer->base->active_timers--; + if (timer->expires == base->next_timer) + base->next_timer = base->timer_jiffies; + } return 1; } @@ -1175,7 +1188,7 @@ static inline void __run_timers(struct tvec_base *base) timer_stats_account_timer(timer); base->running_timer = timer; - detach_timer(timer, true); + detach_expired_timer(timer, base); spin_unlock_irq(&base->lock); call_timer_fn(timer, fn, data); @@ -1701,6 +1714,7 @@ static int __cpuinit init_timers_cpu(int cpu) base->timer_jiffies = jiffies; base->next_timer = base->timer_jiffies; + base->active_timers = 0; return 0; } @@ -1711,6 +1725,7 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea while (!list_empty(head)) { timer = list_first_entry(head, struct timer_list, entry); + /* We ignore the accounting on the dying cpu */ detach_timer(timer, false); timer_set_base(timer, new_base); internal_add_timer(new_base, timer); -- cgit v1.2.3 From e40468a54882ef7411fb178dbf2e465ec2349af7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 May 2012 22:08:59 +0000 Subject: timers: Improve get_next_timer_interrupt() Gilad reported at http://lkml.kernel.org/r/1336056962-10465-2-git-send-email-gilad@benyossef.com "Current timer code fails to correctly return a value meaning that there is no future timer event, with the result that the timer keeps getting re-armed in HZ one shot mode even when we could turn it off, generating unneeded interrupts. What is happening is that when __next_timer_interrupt() wishes to return a value that signifies "there is no future timer event", it returns (base->timer_jiffies + NEXT_TIMER_MAX_DELTA). However, the code in tick_nohz_stop_sched_tick(), which called __next_timer_interrupt() via get_next_timer_interrupt(), compares the return value to (last_jiffies + NEXT_TIMER_MAX_DELTA) to see if the timer needs to be re-armed. base->timer_jiffies != last_jiffies and so tick_nohz_stop_sched_tick() interperts the return value as indication that there is a distant future event 12 days from now and programs the timer to fire next after KTIME_MAX nsecs instead of avoiding to arm it. This ends up causing a needless interrupt once every KTIME_MAX nsecs." Fix this by using the new active timer accounting. This avoids scans when no active timer is enqueued completely, so we don't have to rely on base->timer_next and base->timer_jiffies anymore. Reported-by: Gilad Ben-Yossef Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20120525214819.317535385@linutronix.de --- kernel/timer.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/timer.c b/kernel/timer.c index 7fada698bd1a..a61c09374eba 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1326,18 +1326,21 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now, unsigned long get_next_timer_interrupt(unsigned long now) { struct tvec_base *base = __this_cpu_read(tvec_bases); - unsigned long expires; + unsigned long expires = now + NEXT_TIMER_MAX_DELTA; /* * Pretend that there is no timer pending if the cpu is offline. * Possible pending timers will be migrated later to an active cpu. */ if (cpu_is_offline(smp_processor_id())) - return now + NEXT_TIMER_MAX_DELTA; + return expires; + spin_lock(&base->lock); - if (time_before_eq(base->next_timer, base->timer_jiffies)) - base->next_timer = __next_timer_interrupt(base); - expires = base->next_timer; + if (base->active_timers) { + if (time_before_eq(base->next_timer, base->timer_jiffies)) + base->next_timer = __next_timer_interrupt(base); + expires = base->next_timer; + } spin_unlock(&base->lock); if (time_before_eq(expires, now)) -- cgit v1.2.3 From 19f5f7364a1cc770b14692f609bb9b802fc334d5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 27 Jul 2011 17:29:28 +0200 Subject: nohz: Separate idle sleeping time accounting from nohz logic As we plan to be able to stop the tick outside the idle task, we need to prepare for separating nohz logic from idle. As a start, this pulls the idle sleeping time accounting out of the tick stop/restart API to the callers on idle entry/exit. Signed-off-by: Frederic Weisbecker Cc: Alessio Igor Bogani Cc: Andrew Morton Cc: Avi Kivity Cc: Chris Metcalf Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Max Krasnyansky Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Sven-Thorsten Dietrich Cc: Thomas Gleixner --- kernel/time/tick-sched.c | 77 ++++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 35 deletions(-) (limited to 'kernel') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index da70c6db496c..81409bba2425 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -271,10 +271,10 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) } EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); -static void tick_nohz_stop_sched_tick(struct tick_sched *ts) +static void tick_nohz_stop_sched_tick(struct tick_sched *ts, ktime_t now) { unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; - ktime_t last_update, expires, now; + ktime_t last_update, expires; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; u64 time_delta; int cpu; @@ -282,8 +282,6 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) cpu = smp_processor_id(); ts = &per_cpu(tick_cpu_sched, cpu); - now = tick_nohz_start_idle(cpu, ts); - /* * If this cpu is offline and it is the one which updates * jiffies, then give up the assignment and let it be taken by @@ -444,6 +442,14 @@ out: ts->sleep_length = ktime_sub(dev->next_event, now); } +static void __tick_nohz_idle_enter(struct tick_sched *ts) +{ + ktime_t now; + + now = tick_nohz_start_idle(smp_processor_id(), ts); + tick_nohz_stop_sched_tick(ts, now); +} + /** * tick_nohz_idle_enter - stop the idle tick from the idle task * @@ -479,7 +485,7 @@ void tick_nohz_idle_enter(void) * update of the idle time accounting in tick_nohz_start_idle(). */ ts->inidle = 1; - tick_nohz_stop_sched_tick(ts); + __tick_nohz_idle_enter(ts); local_irq_enable(); } @@ -499,7 +505,7 @@ void tick_nohz_irq_exit(void) if (!ts->inidle) return; - tick_nohz_stop_sched_tick(ts); + __tick_nohz_idle_enter(ts); } /** @@ -540,39 +546,11 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) } } -/** - * tick_nohz_idle_exit - restart the idle tick from the idle task - * - * Restart the idle tick when the CPU is woken up from idle - * This also exit the RCU extended quiescent state. The CPU - * can use RCU again after this function is called. - */ -void tick_nohz_idle_exit(void) +static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) { - int cpu = smp_processor_id(); - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); #ifndef CONFIG_VIRT_CPU_ACCOUNTING unsigned long ticks; #endif - ktime_t now; - - local_irq_disable(); - - WARN_ON_ONCE(!ts->inidle); - - ts->inidle = 0; - - if (ts->idle_active || ts->tick_stopped) - now = ktime_get(); - - if (ts->idle_active) - tick_nohz_stop_idle(cpu, now); - - if (!ts->tick_stopped) { - local_irq_enable(); - return; - } - /* Update jiffies first */ select_nohz_load_balancer(0); tick_do_update_jiffies64(now); @@ -600,6 +578,35 @@ void tick_nohz_idle_exit(void) ts->idle_exittime = now; tick_nohz_restart(ts, now); +} + +/** + * tick_nohz_idle_exit - restart the idle tick from the idle task + * + * Restart the idle tick when the CPU is woken up from idle + * This also exit the RCU extended quiescent state. The CPU + * can use RCU again after this function is called. + */ +void tick_nohz_idle_exit(void) +{ + int cpu = smp_processor_id(); + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t now; + + local_irq_disable(); + + WARN_ON_ONCE(!ts->inidle); + + ts->inidle = 0; + + if (ts->idle_active || ts->tick_stopped) + now = ktime_get(); + + if (ts->idle_active) + tick_nohz_stop_idle(cpu, now); + + if (ts->tick_stopped) + tick_nohz_restart_sched_tick(ts, now); local_irq_enable(); } -- cgit v1.2.3 From 2ac0d98fd624ae50f5e6ae9c800977a9dbbfcde6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 28 Jul 2011 04:00:47 +0200 Subject: nohz: Make nohz API agnostic against idle ticks cputime accounting When the timer tick fires, it accounts the new jiffy as either part of system, user or idle time. This is how we record the cputime statistics. But when the tick is stopped from the idle task, we still need to record the number of jiffies spent tickless until we restart the tick and fall back to traditional tick-based cputime accounting. To do this, we take a snapshot of jiffies when the tick is stopped and compute the difference against the new value of jiffies when the tick is restarted. Then we account this whole difference to the idle cputime. However we are preparing to be able to stop the tick from other places than idle. So this idle time accounting needs to be performed from the callers of nohz APIs, not from the nohz APIs themselves because we now want them to be agnostic against places that stop/restart tick. Therefore, we pull the tickless idle time accounting out of generic nohz helpers up to idle entry/exit callers. Signed-off-by: Frederic Weisbecker Cc: Alessio Igor Bogani Cc: Andrew Morton Cc: Avi Kivity Cc: Chris Metcalf Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Max Krasnyansky Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Sven-Thorsten Dietrich Cc: Thomas Gleixner --- kernel/time/tick-sched.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 81409bba2425..911834b33b8a 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -402,7 +402,6 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts, ktime_t now) ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; - ts->idle_jiffies = last_jiffies; } ts->idle_sleeps++; @@ -445,9 +444,13 @@ out: static void __tick_nohz_idle_enter(struct tick_sched *ts) { ktime_t now; + int was_stopped = ts->tick_stopped; now = tick_nohz_start_idle(smp_processor_id(), ts); tick_nohz_stop_sched_tick(ts, now); + + if (!was_stopped && ts->tick_stopped) + ts->idle_jiffies = ts->last_jiffies; } /** @@ -548,15 +551,25 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) { -#ifndef CONFIG_VIRT_CPU_ACCOUNTING - unsigned long ticks; -#endif /* Update jiffies first */ select_nohz_load_balancer(0); tick_do_update_jiffies64(now); update_cpu_load_nohz(); + touch_softlockup_watchdog(); + /* + * Cancel the scheduled timer and restore the tick + */ + ts->tick_stopped = 0; + ts->idle_exittime = now; + + tick_nohz_restart(ts, now); +} + +static void tick_nohz_account_idle_ticks(struct tick_sched *ts) +{ #ifndef CONFIG_VIRT_CPU_ACCOUNTING + unsigned long ticks; /* * We stopped the tick in idle. Update process times would miss the * time we slept as update_process_times does only a 1 tick @@ -569,15 +582,6 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) if (ticks && ticks < LONG_MAX) account_idle_ticks(ticks); #endif - - touch_softlockup_watchdog(); - /* - * Cancel the scheduled timer and restore the tick - */ - ts->tick_stopped = 0; - ts->idle_exittime = now; - - tick_nohz_restart(ts, now); } /** @@ -605,8 +609,10 @@ void tick_nohz_idle_exit(void) if (ts->idle_active) tick_nohz_stop_idle(cpu, now); - if (ts->tick_stopped) + if (ts->tick_stopped) { tick_nohz_restart_sched_tick(ts, now); + tick_nohz_account_idle_ticks(ts); + } local_irq_enable(); } @@ -811,7 +817,8 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) */ if (ts->tick_stopped) { touch_softlockup_watchdog(); - ts->idle_jiffies++; + if (idle_cpu(cpu)) + ts->idle_jiffies++; } update_process_times(user_mode(regs)); profile_tick(CPU_PROFILING); -- cgit v1.2.3 From f5d411c91ede162240f34e05a233f2759412988e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 31 Jul 2011 17:44:12 +0200 Subject: nohz: Rename ts->idle_tick to ts->last_tick Now that idle and nohz logics are going to be independant each others, ts->idle_tick becomes too much a biased name to describe the field that saves the last scheduled tick on top of which we re-calculate the next tick to schedule when the timer is restarted. We want to reuse this even to stop the tick outside idle cases. So let's rename it to some more generic name: ts->last_tick. This changes a bit the timer list stat export so we need to increase its version. Signed-off-by: Frederic Weisbecker Cc: Alessio Igor Bogani Cc: Andrew Morton Cc: Avi Kivity Cc: Chris Metcalf Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Max Krasnyansky Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Sven-Thorsten Dietrich Cc: Thomas Gleixner --- kernel/time/tick-sched.c | 4 ++-- kernel/time/timer_list.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 911834b33b8a..73cc4901336d 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -400,7 +400,7 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts, ktime_t now) if (!ts->tick_stopped) { select_nohz_load_balancer(1); - ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); + ts->last_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; } @@ -526,7 +526,7 @@ ktime_t tick_nohz_get_sleep_length(void) static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) { hrtimer_cancel(&ts->sched_timer); - hrtimer_set_expires(&ts->sched_timer, ts->idle_tick); + hrtimer_set_expires(&ts->sched_timer, ts->last_tick); while (1) { /* Forward the time to expire in the future */ diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 3258455549f4..af5a7e9f164b 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -167,7 +167,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) { struct tick_sched *ts = tick_get_tick_sched(cpu); P(nohz_mode); - P_ns(idle_tick); + P_ns(last_tick); P(tick_stopped); P(idle_jiffies); P(idle_calls); @@ -259,7 +259,7 @@ static int timer_list_show(struct seq_file *m, void *v) u64 now = ktime_to_ns(ktime_get()); int cpu; - SEQ_printf(m, "Timer List Version: v0.6\n"); + SEQ_printf(m, "Timer List Version: v0.7\n"); SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); -- cgit v1.2.3 From 5b39939a40801f0c17e31adaf643d6e974227856 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 1 Aug 2011 00:06:10 +0200 Subject: nohz: Move ts->idle_calls incrementation into strict idle logic Since we want to prepare for making the nohz API to work further the idle case, we need to pull ts->idle_calls incrementation up to the callers in idle. To perform this, we split tick_nohz_stop_sched_tick() in two parts: a first one that checks if we can really stop the tick for idle, and another that actually stops it. Then from the callers in idle, we check if we can stop the tick and only then we increment idle_calls and finally relay to the nohz API that won't care about these details anymore. Signed-off-by: Frederic Weisbecker Cc: Alessio Igor Bogani Cc: Andrew Morton Cc: Avi Kivity Cc: Chris Metcalf Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Max Krasnyansky Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Sven-Thorsten Dietrich Cc: Thomas Gleixner --- kernel/time/tick-sched.c | 86 ++++++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 39 deletions(-) (limited to 'kernel') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 73cc4901336d..430e1b6901cc 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -271,47 +271,15 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) } EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); -static void tick_nohz_stop_sched_tick(struct tick_sched *ts, ktime_t now) +static void tick_nohz_stop_sched_tick(struct tick_sched *ts, + ktime_t now, int cpu) { unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; ktime_t last_update, expires; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; u64 time_delta; - int cpu; - - cpu = smp_processor_id(); - ts = &per_cpu(tick_cpu_sched, cpu); - - /* - * If this cpu is offline and it is the one which updates - * jiffies, then give up the assignment and let it be taken by - * the cpu which runs the tick timer next. If we don't drop - * this here the jiffies might be stale and do_timer() never - * invoked. - */ - if (unlikely(!cpu_online(cpu))) { - if (cpu == tick_do_timer_cpu) - tick_do_timer_cpu = TICK_DO_TIMER_NONE; - } - - if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) - return; - - if (need_resched()) - return; - if (unlikely(local_softirq_pending() && cpu_online(cpu))) { - static int ratelimit; - - if (ratelimit < 10) { - printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", - (unsigned int) local_softirq_pending()); - ratelimit++; - } - return; - } - ts->idle_calls++; /* Read jiffies and the time when jiffies were updated last */ do { seq = read_seqbegin(&xtime_lock); @@ -441,16 +409,56 @@ out: ts->sleep_length = ktime_sub(dev->next_event, now); } +static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) +{ + /* + * If this cpu is offline and it is the one which updates + * jiffies, then give up the assignment and let it be taken by + * the cpu which runs the tick timer next. If we don't drop + * this here the jiffies might be stale and do_timer() never + * invoked. + */ + if (unlikely(!cpu_online(cpu))) { + if (cpu == tick_do_timer_cpu) + tick_do_timer_cpu = TICK_DO_TIMER_NONE; + } + + if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) + return false; + + if (need_resched()) + return false; + + if (unlikely(local_softirq_pending() && cpu_online(cpu))) { + static int ratelimit; + + if (ratelimit < 10) { + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", + (unsigned int) local_softirq_pending()); + ratelimit++; + } + return false; + } + + return true; +} + static void __tick_nohz_idle_enter(struct tick_sched *ts) { ktime_t now; - int was_stopped = ts->tick_stopped; + int cpu = smp_processor_id(); - now = tick_nohz_start_idle(smp_processor_id(), ts); - tick_nohz_stop_sched_tick(ts, now); + now = tick_nohz_start_idle(cpu, ts); - if (!was_stopped && ts->tick_stopped) - ts->idle_jiffies = ts->last_jiffies; + if (can_stop_idle_tick(cpu, ts)) { + int was_stopped = ts->tick_stopped; + + ts->idle_calls++; + tick_nohz_stop_sched_tick(ts, now, cpu); + + if (!was_stopped && ts->tick_stopped) + ts->idle_jiffies = ts->last_jiffies; + } } /** -- cgit v1.2.3 From 84bf1bccc60cc64376125ea2eae05e4ba12f795b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 1 Aug 2011 01:25:38 +0200 Subject: nohz: Move next idle expiry time record into idle logic area The next idle expiry time record and idle sleeps tracking are statistics that only concern idle. Since we want the nohz APIs to become usable further idle context, let's pull up the handling of these statistics to the callers in idle. Signed-off-by: Frederic Weisbecker Cc: Alessio Igor Bogani Cc: Andrew Morton Cc: Avi Kivity Cc: Chris Metcalf Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Max Krasnyansky Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Sven-Thorsten Dietrich Cc: Thomas Gleixner --- kernel/time/tick-sched.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 430e1b6901cc..60c9c60e9108 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -271,11 +271,11 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) } EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); -static void tick_nohz_stop_sched_tick(struct tick_sched *ts, - ktime_t now, int cpu) +static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, + ktime_t now, int cpu) { unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; - ktime_t last_update, expires; + ktime_t last_update, expires, ret = { .tv64 = 0 }; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; u64 time_delta; @@ -358,6 +358,8 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts, if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) goto out; + ret = expires; + /* * nohz_stop_sched_tick can be called several times before * the nohz_restart_sched_tick is called. This happens when @@ -372,11 +374,6 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts, ts->tick_stopped = 1; } - ts->idle_sleeps++; - - /* Mark expires */ - ts->idle_expires = expires; - /* * If the expiration time == KTIME_MAX, then * in this case we simply stop the tick timer. @@ -407,6 +404,8 @@ out: ts->next_jiffies = next_jiffies; ts->last_jiffies = last_jiffies; ts->sleep_length = ktime_sub(dev->next_event, now); + + return ret; } static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) @@ -445,7 +444,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) static void __tick_nohz_idle_enter(struct tick_sched *ts) { - ktime_t now; + ktime_t now, expires; int cpu = smp_processor_id(); now = tick_nohz_start_idle(cpu, ts); @@ -454,7 +453,12 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts) int was_stopped = ts->tick_stopped; ts->idle_calls++; - tick_nohz_stop_sched_tick(ts, now, cpu); + + expires = tick_nohz_stop_sched_tick(ts, now, cpu); + if (expires.tv64 > 0LL) { + ts->idle_sleeps++; + ts->idle_expires = expires; + } if (!was_stopped && ts->tick_stopped) ts->idle_jiffies = ts->last_jiffies; -- cgit v1.2.3 From 42e71e81f5bb5125ca7c194b5ccf1c93511ff8fb Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 13 Jul 2012 01:21:51 -0400 Subject: time: Whitespace cleanups per Ingo%27s requests Ingo noted a number of places where there is inconsistent use of whitespace. This patch tries to address the main culprits. Signed-off-by: John Stultz Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Richard Cochran Cc: Prarit Bhargava Link: http://lkml.kernel.org/r/1342156917-25092-3-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) (limited to 'kernel') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 269b1fe5f2ae..c2f12aa87fce 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -24,32 +24,31 @@ /* Structure holding internal timekeeping values. */ struct timekeeper { /* Current clocksource used for timekeeping. */ - struct clocksource *clock; + struct clocksource *clock; /* NTP adjusted clock multiplier */ - u32 mult; + u32 mult; /* The shift value of the current clocksource. */ - int shift; - + int shift; /* Number of clock cycles in one NTP interval. */ - cycle_t cycle_interval; + cycle_t cycle_interval; /* Number of clock shifted nano seconds in one NTP interval. */ - u64 xtime_interval; + u64 xtime_interval; /* shifted nano seconds left over when rounding cycle_interval */ - s64 xtime_remainder; + s64 xtime_remainder; /* Raw nano seconds accumulated per NTP interval. */ - u32 raw_interval; + u32 raw_interval; /* Clock shifted nano seconds remainder not stored in xtime.tv_nsec. */ - u64 xtime_nsec; + u64 xtime_nsec; /* Difference between accumulated time and NTP time in ntp * shifted nano seconds. */ - s64 ntp_error; + s64 ntp_error; /* Shift conversion between clock shifted nano seconds and * ntp shifted nano seconds. */ - int ntp_error_shift; + int ntp_error_shift; /* The current time */ - struct timespec xtime; + struct timespec xtime; /* * wall_to_monotonic is what we need to add to xtime (or xtime corrected * for sub jiffie times) to get to monotonic time. Monotonic is pegged @@ -64,20 +63,17 @@ struct timekeeper { * - wall_to_monotonic is no longer the boot time, getboottime must be * used instead. */ - struct timespec wall_to_monotonic; + struct timespec wall_to_monotonic; /* time spent in suspend */ - struct timespec total_sleep_time; + struct timespec total_sleep_time; /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ - struct timespec raw_time; - + struct timespec raw_time; /* Offset clock monotonic -> clock realtime */ - ktime_t offs_real; - + ktime_t offs_real; /* Offset clock monotonic -> clock boottime */ - ktime_t offs_boot; - + ktime_t offs_boot; /* Seqlock for all timekeeper values */ - seqlock_t lock; + seqlock_t lock; }; static struct timekeeper timekeeper; @@ -547,6 +543,7 @@ u64 timekeeping_max_deferment(void) { unsigned long seq; u64 ret; + do { seq = read_seqbegin(&timekeeper.lock); -- cgit v1.2.3 From fee84c43e6afc42295ae8058cbbef9ea5633926c Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 13 Jul 2012 01:21:52 -0400 Subject: time: Explicitly use u32 instead of int for shift values Ingo noted that using a u32 instead of int for shift values would be better to make sure the compiler doesn't unnecessarily use complex signed arithmetic. Signed-off-by: John Stultz Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Richard Cochran Cc: Prarit Bhargava Link: http://lkml.kernel.org/r/1342156917-25092-4-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c2f12aa87fce..4fd83df0b14d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -28,7 +28,7 @@ struct timekeeper { /* NTP adjusted clock multiplier */ u32 mult; /* The shift value of the current clocksource. */ - int shift; + u32 shift; /* Number of clock cycles in one NTP interval. */ cycle_t cycle_interval; /* Number of clock shifted nano seconds in one NTP interval. */ @@ -45,7 +45,7 @@ struct timekeeper { s64 ntp_error; /* Shift conversion between clock shifted nano seconds and * ntp shifted nano seconds. */ - int ntp_error_shift; + u32 ntp_error_shift; /* The current time */ struct timespec xtime; @@ -960,7 +960,7 @@ static void timekeeping_adjust(s64 offset) * * Returns the unconsumed cycles. */ -static cycle_t logarithmic_accumulation(cycle_t offset, int shift) +static cycle_t logarithmic_accumulation(cycle_t offset, u32 shift) { u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; u64 raw_nsecs; -- cgit v1.2.3 From 1e75fa8be9fb61e1af46b5b3b176347a4c958ca1 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 13 Jul 2012 01:21:53 -0400 Subject: time: Condense timekeeper.xtime into xtime_sec The timekeeper struct has a xtime_nsec, which keeps the sub-nanosecond remainder. This ends up being somewhat duplicative of the timekeeper.xtime.tv_nsec value, and we have to do extra work to keep them apart, copying the full nsec portion out and back in over and over. This patch simplifies some of the logic by taking the timekeeper xtime value and splitting it into timekeeper.xtime_sec and reuses the timekeeper.xtime_nsec for the sub-second portion (stored in higher res shifted nanoseconds). This simplifies some of the accumulation logic. And will allow for more accurate timekeeping once the vsyscall code is updated to use the shifted nanosecond remainder. Signed-off-by: John Stultz Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Richard Cochran Cc: Prarit Bhargava Link: http://lkml.kernel.org/r/1342156917-25092-5-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 181 ++++++++++++++++++++++++++++------------------ 1 file changed, 110 insertions(+), 71 deletions(-) (limited to 'kernel') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 4fd83df0b14d..b98d9bd73e5e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -38,8 +38,11 @@ struct timekeeper { /* Raw nano seconds accumulated per NTP interval. */ u32 raw_interval; - /* Clock shifted nano seconds remainder not stored in xtime.tv_nsec. */ + /* Current CLOCK_REALTIME time in seconds */ + u64 xtime_sec; + /* Clock shifted nano seconds */ u64 xtime_nsec; + /* Difference between accumulated time and NTP time in ntp * shifted nano seconds. */ s64 ntp_error; @@ -47,8 +50,6 @@ struct timekeeper { * ntp shifted nano seconds. */ u32 ntp_error_shift; - /* The current time */ - struct timespec xtime; /* * wall_to_monotonic is what we need to add to xtime (or xtime corrected * for sub jiffie times) to get to monotonic time. Monotonic is pegged @@ -84,11 +85,37 @@ static struct timekeeper timekeeper; */ __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); - /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; +static inline void tk_normalize_xtime(struct timekeeper *tk) +{ + while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) { + tk->xtime_nsec -= (u64)NSEC_PER_SEC << tk->shift; + tk->xtime_sec++; + } +} + +static struct timespec tk_xtime(struct timekeeper *tk) +{ + struct timespec ts; + + ts.tv_sec = tk->xtime_sec; + ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); + return ts; +} +static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts) +{ + tk->xtime_sec = ts->tv_sec; + tk->xtime_nsec = ts->tv_nsec << tk->shift; +} + +static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts) +{ + tk->xtime_sec += ts->tv_sec; + tk->xtime_nsec += ts->tv_nsec << tk->shift; +} /** * timekeeper_setup_internals - Set up internals to use clocksource clock. @@ -104,7 +131,9 @@ static void timekeeper_setup_internals(struct clocksource *clock) { cycle_t interval; u64 tmp, ntpinterval; + struct clocksource *old_clock; + old_clock = timekeeper.clock; timekeeper.clock = clock; clock->cycle_last = clock->read(clock); @@ -126,7 +155,14 @@ static void timekeeper_setup_internals(struct clocksource *clock) timekeeper.raw_interval = ((u64) interval * clock->mult) >> clock->shift; - timekeeper.xtime_nsec = 0; + /* if changing clocks, convert xtime_nsec shift units */ + if (old_clock) { + int shift_change = clock->shift - old_clock->shift; + if (shift_change < 0) + timekeeper.xtime_nsec >>= -shift_change; + else + timekeeper.xtime_nsec <<= shift_change; + } timekeeper.shift = clock->shift; timekeeper.ntp_error = 0; @@ -145,6 +181,7 @@ static inline s64 timekeeping_get_ns(void) { cycle_t cycle_now, cycle_delta; struct clocksource *clock; + s64 nsec; /* read clocksource: */ clock = timekeeper.clock; @@ -153,9 +190,8 @@ static inline s64 timekeeping_get_ns(void) /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; - /* return delta convert to nanoseconds using ntp adjusted mult. */ - return clocksource_cyc2ns(cycle_delta, timekeeper.mult, - timekeeper.shift); + nsec = cycle_delta * timekeeper.mult + timekeeper.xtime_nsec; + return nsec >> timekeeper.shift; } static inline s64 timekeeping_get_ns_raw(void) @@ -185,12 +221,15 @@ static void update_rt_offset(void) /* must hold write on timekeeper.lock */ static void timekeeping_update(bool clearntp) { + struct timespec xt; + if (clearntp) { timekeeper.ntp_error = 0; ntp_clear(); } update_rt_offset(); - update_vsyscall(&timekeeper.xtime, &timekeeper.wall_to_monotonic, + xt = tk_xtime(&timekeeper); + update_vsyscall(&xt, &timekeeper.wall_to_monotonic, timekeeper.clock, timekeeper.mult); } @@ -213,13 +252,12 @@ static void timekeeping_forward_now(void) cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; clock->cycle_last = cycle_now; - nsec = clocksource_cyc2ns(cycle_delta, timekeeper.mult, - timekeeper.shift); + timekeeper.xtime_nsec += cycle_delta * timekeeper.mult; /* If arch requires, add in gettimeoffset() */ - nsec += arch_gettimeoffset(); + timekeeper.xtime_nsec += arch_gettimeoffset() << timekeeper.shift; - timespec_add_ns(&timekeeper.xtime, nsec); + tk_normalize_xtime(&timekeeper); nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); timespec_add_ns(&timekeeper.raw_time, nsec); @@ -234,15 +272,15 @@ static void timekeeping_forward_now(void) void getnstimeofday(struct timespec *ts) { unsigned long seq; - s64 nsecs; + s64 nsecs = 0; WARN_ON(timekeeping_suspended); do { seq = read_seqbegin(&timekeeper.lock); - *ts = timekeeper.xtime; - nsecs = timekeeping_get_ns(); + ts->tv_sec = timekeeper.xtime_sec; + ts->tv_nsec = timekeeping_get_ns(); /* If arch requires, add in gettimeoffset() */ nsecs += arch_gettimeoffset(); @@ -262,11 +300,10 @@ ktime_t ktime_get(void) do { seq = read_seqbegin(&timekeeper.lock); - secs = timekeeper.xtime.tv_sec + + secs = timekeeper.xtime_sec + timekeeper.wall_to_monotonic.tv_sec; - nsecs = timekeeper.xtime.tv_nsec + + nsecs = timekeeping_get_ns() + timekeeper.wall_to_monotonic.tv_nsec; - nsecs += timekeeping_get_ns(); /* If arch requires, add in gettimeoffset() */ nsecs += arch_gettimeoffset(); @@ -291,22 +328,21 @@ void ktime_get_ts(struct timespec *ts) { struct timespec tomono; unsigned int seq; - s64 nsecs; WARN_ON(timekeeping_suspended); do { seq = read_seqbegin(&timekeeper.lock); - *ts = timekeeper.xtime; + ts->tv_sec = timekeeper.xtime_sec; + ts->tv_nsec = timekeeping_get_ns(); tomono = timekeeper.wall_to_monotonic; - nsecs = timekeeping_get_ns(); /* If arch requires, add in gettimeoffset() */ - nsecs += arch_gettimeoffset(); + ts->tv_nsec += arch_gettimeoffset(); } while (read_seqretry(&timekeeper.lock, seq)); set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec, - ts->tv_nsec + tomono.tv_nsec + nsecs); + ts->tv_nsec + tomono.tv_nsec); } EXPORT_SYMBOL_GPL(ktime_get_ts); @@ -334,7 +370,8 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) seq = read_seqbegin(&timekeeper.lock); *ts_raw = timekeeper.raw_time; - *ts_real = timekeeper.xtime; + ts_real->tv_sec = timekeeper.xtime_sec; + ts_real->tv_nsec = 0; nsecs_raw = timekeeping_get_ns_raw(); nsecs_real = timekeeping_get_ns(); @@ -377,7 +414,7 @@ EXPORT_SYMBOL(do_gettimeofday); */ int do_settimeofday(const struct timespec *tv) { - struct timespec ts_delta; + struct timespec ts_delta, xt; unsigned long flags; if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) @@ -387,12 +424,15 @@ int do_settimeofday(const struct timespec *tv) timekeeping_forward_now(); - ts_delta.tv_sec = tv->tv_sec - timekeeper.xtime.tv_sec; - ts_delta.tv_nsec = tv->tv_nsec - timekeeper.xtime.tv_nsec; + xt = tk_xtime(&timekeeper); + ts_delta.tv_sec = tv->tv_sec - xt.tv_sec; + ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec; + timekeeper.wall_to_monotonic = timespec_sub(timekeeper.wall_to_monotonic, ts_delta); - timekeeper.xtime = *tv; + tk_set_xtime(&timekeeper, tv); + timekeeping_update(true); write_sequnlock_irqrestore(&timekeeper.lock, flags); @@ -422,7 +462,8 @@ int timekeeping_inject_offset(struct timespec *ts) timekeeping_forward_now(); - timekeeper.xtime = timespec_add(timekeeper.xtime, *ts); + + tk_xtime_add(&timekeeper, ts); timekeeper.wall_to_monotonic = timespec_sub(timekeeper.wall_to_monotonic, *ts); @@ -606,14 +647,12 @@ void __init timekeeping_init(void) clock->enable(clock); timekeeper_setup_internals(clock); - timekeeper.xtime.tv_sec = now.tv_sec; - timekeeper.xtime.tv_nsec = now.tv_nsec; + tk_set_xtime(&timekeeper, &now); timekeeper.raw_time.tv_sec = 0; timekeeper.raw_time.tv_nsec = 0; - if (boot.tv_sec == 0 && boot.tv_nsec == 0) { - boot.tv_sec = timekeeper.xtime.tv_sec; - boot.tv_nsec = timekeeper.xtime.tv_nsec; - } + if (boot.tv_sec == 0 && boot.tv_nsec == 0) + boot = tk_xtime(&timekeeper); + set_normalized_timespec(&timekeeper.wall_to_monotonic, -boot.tv_sec, -boot.tv_nsec); update_rt_offset(); @@ -646,7 +685,7 @@ static void __timekeeping_inject_sleeptime(struct timespec *delta) return; } - timekeeper.xtime = timespec_add(timekeeper.xtime, *delta); + tk_xtime_add(&timekeeper, delta); timekeeper.wall_to_monotonic = timespec_sub(timekeeper.wall_to_monotonic, *delta); update_sleep_time(timespec_add(timekeeper.total_sleep_time, *delta)); @@ -742,7 +781,7 @@ static int timekeeping_suspend(void) * try to compensate so the difference in system time * and persistent_clock time stays close to constant. */ - delta = timespec_sub(timekeeper.xtime, timekeeping_suspend_time); + delta = timespec_sub(tk_xtime(&timekeeper), timekeeping_suspend_time); delta_delta = timespec_sub(delta, old_delta); if (abs(delta_delta.tv_sec) >= 2) { /* @@ -977,9 +1016,9 @@ static cycle_t logarithmic_accumulation(cycle_t offset, u32 shift) while (timekeeper.xtime_nsec >= nsecps) { int leap; timekeeper.xtime_nsec -= nsecps; - timekeeper.xtime.tv_sec++; - leap = second_overflow(timekeeper.xtime.tv_sec); - timekeeper.xtime.tv_sec += leap; + timekeeper.xtime_sec++; + leap = second_overflow(timekeeper.xtime_sec); + timekeeper.xtime_sec += leap; timekeeper.wall_to_monotonic.tv_sec -= leap; if (leap) clock_was_set_delayed(); @@ -1015,6 +1054,7 @@ static void update_wall_time(void) cycle_t offset; int shift = 0, maxshift; unsigned long flags; + s64 remainder; write_seqlock_irqsave(&timekeeper.lock, flags); @@ -1029,8 +1069,6 @@ static void update_wall_time(void) #else offset = (clock->read(clock) - clock->cycle_last) & clock->mask; #endif - timekeeper.xtime_nsec = (s64)timekeeper.xtime.tv_nsec << - timekeeper.shift; /* * With NO_HZ we may have to accumulate many cycle_intervals @@ -1076,28 +1114,31 @@ static void update_wall_time(void) timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; } - /* - * Store full nanoseconds into xtime after rounding it up and - * add the remainder to the error difference. - */ - timekeeper.xtime.tv_nsec = ((s64)timekeeper.xtime_nsec >> - timekeeper.shift) + 1; - timekeeper.xtime_nsec -= (s64)timekeeper.xtime.tv_nsec << - timekeeper.shift; - timekeeper.ntp_error += timekeeper.xtime_nsec << - timekeeper.ntp_error_shift; + * Store only full nanoseconds into xtime_nsec after rounding + * it up and add the remainder to the error difference. + * XXX - This is necessary to avoid small 1ns inconsistnecies caused + * by truncating the remainder in vsyscalls. However, it causes + * additional work to be done in timekeeping_adjust(). Once + * the vsyscall implementations are converted to use xtime_nsec + * (shifted nanoseconds), this can be killed. + */ + remainder = timekeeper.xtime_nsec & ((1 << timekeeper.shift) - 1); + timekeeper.xtime_nsec -= remainder; + timekeeper.xtime_nsec += 1 << timekeeper.shift; + timekeeper.ntp_error += remainder << timekeeper.ntp_error_shift; /* * Finally, make sure that after the rounding - * xtime.tv_nsec isn't larger than NSEC_PER_SEC + * xtime_nsec isn't larger than NSEC_PER_SEC */ - if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) { + if (unlikely(timekeeper.xtime_nsec >= + ((u64)NSEC_PER_SEC << timekeeper.shift))) { int leap; - timekeeper.xtime.tv_nsec -= NSEC_PER_SEC; - timekeeper.xtime.tv_sec++; - leap = second_overflow(timekeeper.xtime.tv_sec); - timekeeper.xtime.tv_sec += leap; + timekeeper.xtime_nsec -= (u64)NSEC_PER_SEC << timekeeper.shift; + timekeeper.xtime_sec++; + leap = second_overflow(timekeeper.xtime_sec); + timekeeper.xtime_sec += leap; timekeeper.wall_to_monotonic.tv_sec -= leap; if (leap) clock_was_set_delayed(); @@ -1148,21 +1189,20 @@ void get_monotonic_boottime(struct timespec *ts) { struct timespec tomono, sleep; unsigned int seq; - s64 nsecs; WARN_ON(timekeeping_suspended); do { seq = read_seqbegin(&timekeeper.lock); - *ts = timekeeper.xtime; + ts->tv_sec = timekeeper.xtime_sec; + ts->tv_nsec = timekeeping_get_ns(); tomono = timekeeper.wall_to_monotonic; sleep = timekeeper.total_sleep_time; - nsecs = timekeeping_get_ns(); } while (read_seqretry(&timekeeper.lock, seq)); set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec, - ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec + nsecs); + ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec); } EXPORT_SYMBOL_GPL(get_monotonic_boottime); @@ -1195,13 +1235,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased); unsigned long get_seconds(void) { - return timekeeper.xtime.tv_sec; + return timekeeper.xtime_sec; } EXPORT_SYMBOL(get_seconds); struct timespec __current_kernel_time(void) { - return timekeeper.xtime; + return tk_xtime(&timekeeper); } struct timespec current_kernel_time(void) @@ -1212,7 +1252,7 @@ struct timespec current_kernel_time(void) do { seq = read_seqbegin(&timekeeper.lock); - now = timekeeper.xtime; + now = tk_xtime(&timekeeper); } while (read_seqretry(&timekeeper.lock, seq)); return now; @@ -1227,7 +1267,7 @@ struct timespec get_monotonic_coarse(void) do { seq = read_seqbegin(&timekeeper.lock); - now = timekeeper.xtime; + now = tk_xtime(&timekeeper); mono = timekeeper.wall_to_monotonic; } while (read_seqretry(&timekeeper.lock, seq)); @@ -1262,7 +1302,7 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, do { seq = read_seqbegin(&timekeeper.lock); - *xtim = timekeeper.xtime; + *xtim = tk_xtime(&timekeeper); *wtom = timekeeper.wall_to_monotonic; *sleep = timekeeper.total_sleep_time; } while (read_seqretry(&timekeeper.lock, seq)); @@ -1286,9 +1326,8 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) do { seq = read_seqbegin(&timekeeper.lock); - secs = timekeeper.xtime.tv_sec; - nsecs = timekeeper.xtime.tv_nsec; - nsecs += timekeeping_get_ns(); + secs = timekeeper.xtime_sec; + nsecs = timekeeping_get_ns(); /* If arch requires, add in gettimeoffset() */ nsecs += arch_gettimeoffset(); -- cgit v1.2.3 From 1f4f948706bcec1b51bf6492bf04057d2e21e273 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 13 Jul 2012 01:21:54 -0400 Subject: time: Refactor accumulation of nsecs to secs We do the exact same logic moving nsecs to secs in the timekeeper in multiple places, so condense this into a single function. Signed-off-by: John Stultz Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Richard Cochran Cc: Prarit Bhargava Link: http://lkml.kernel.org/r/1342156917-25092-6-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 54 ++++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 22 deletions(-) (limited to 'kernel') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b98d9bd73e5e..cb4a433bab97 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -990,6 +990,35 @@ static void timekeeping_adjust(s64 offset) } +/** + * accumulate_nsecs_to_secs - Accumulates nsecs into secs + * + * Helper function that accumulates a the nsecs greater then a second + * from the xtime_nsec field to the xtime_secs field. + * It also calls into the NTP code to handle leapsecond processing. + * + */ +static inline void accumulate_nsecs_to_secs(struct timekeeper *tk) +{ + u64 nsecps = (u64)NSEC_PER_SEC << tk->shift; + + while (tk->xtime_nsec >= nsecps) { + int leap; + + tk->xtime_nsec -= nsecps; + tk->xtime_sec++; + + /* Figure out if its a leap sec and apply if needed */ + leap = second_overflow(tk->xtime_sec); + tk->xtime_sec += leap; + tk->wall_to_monotonic.tv_sec -= leap; + if (leap) + clock_was_set_delayed(); + + } +} + + /** * logarithmic_accumulation - shifted accumulation of cycles * @@ -1001,7 +1030,6 @@ static void timekeeping_adjust(s64 offset) */ static cycle_t logarithmic_accumulation(cycle_t offset, u32 shift) { - u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; u64 raw_nsecs; /* If the offset is smaller than a shifted interval, do nothing */ @@ -1013,16 +1041,8 @@ static cycle_t logarithmic_accumulation(cycle_t offset, u32 shift) timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift; timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; - while (timekeeper.xtime_nsec >= nsecps) { - int leap; - timekeeper.xtime_nsec -= nsecps; - timekeeper.xtime_sec++; - leap = second_overflow(timekeeper.xtime_sec); - timekeeper.xtime_sec += leap; - timekeeper.wall_to_monotonic.tv_sec -= leap; - if (leap) - clock_was_set_delayed(); - } + + accumulate_nsecs_to_secs(&timekeeper); /* Accumulate raw time */ raw_nsecs = timekeeper.raw_interval << shift; @@ -1132,17 +1152,7 @@ static void update_wall_time(void) * Finally, make sure that after the rounding * xtime_nsec isn't larger than NSEC_PER_SEC */ - if (unlikely(timekeeper.xtime_nsec >= - ((u64)NSEC_PER_SEC << timekeeper.shift))) { - int leap; - timekeeper.xtime_nsec -= (u64)NSEC_PER_SEC << timekeeper.shift; - timekeeper.xtime_sec++; - leap = second_overflow(timekeeper.xtime_sec); - timekeeper.xtime_sec += leap; - timekeeper.wall_to_monotonic.tv_sec -= leap; - if (leap) - clock_was_set_delayed(); - } + accumulate_nsecs_to_secs(&timekeeper); timekeeping_update(false); -- cgit v1.2.3 From f2a5a0854efc62abe7f69e9947842cb135837f9a Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 13 Jul 2012 01:21:55 -0400 Subject: time: Move arch_gettimeoffset() usage into timekeeping_get_ns() Since we call arch_gettimeoffset() in all the accessor functions, move arch_gettimeoffset() calls into timekeeping_get_ns() and timekeeping_get_ns_raw() to simplify the code. This also makes the code easier to maintain as we don't have to worry about forgetting the arch_gettimeoffset() as has happened in the past. Signed-off-by: John Stultz Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Richard Cochran Cc: Prarit Bhargava Link: http://lkml.kernel.org/r/1342156917-25092-7-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index cb4a433bab97..e43289df28c2 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -191,13 +191,17 @@ static inline s64 timekeeping_get_ns(void) cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; nsec = cycle_delta * timekeeper.mult + timekeeper.xtime_nsec; - return nsec >> timekeeper.shift; + nsec >>= timekeeper.shift; + + /* If arch requires, add in gettimeoffset() */ + return nsec + arch_gettimeoffset(); } static inline s64 timekeeping_get_ns_raw(void) { cycle_t cycle_now, cycle_delta; struct clocksource *clock; + s64 nsec; /* read clocksource: */ clock = timekeeper.clock; @@ -206,8 +210,11 @@ static inline s64 timekeeping_get_ns_raw(void) /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; - /* return delta convert to nanoseconds. */ - return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); + /* convert delta to nanoseconds. */ + nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); + + /* If arch requires, add in gettimeoffset() */ + return nsec + arch_gettimeoffset(); } static void update_rt_offset(void) @@ -282,9 +289,6 @@ void getnstimeofday(struct timespec *ts) ts->tv_sec = timekeeper.xtime_sec; ts->tv_nsec = timekeeping_get_ns(); - /* If arch requires, add in gettimeoffset() */ - nsecs += arch_gettimeoffset(); - } while (read_seqretry(&timekeeper.lock, seq)); timespec_add_ns(ts, nsecs); @@ -304,8 +308,6 @@ ktime_t ktime_get(void) timekeeper.wall_to_monotonic.tv_sec; nsecs = timekeeping_get_ns() + timekeeper.wall_to_monotonic.tv_nsec; - /* If arch requires, add in gettimeoffset() */ - nsecs += arch_gettimeoffset(); } while (read_seqretry(&timekeeper.lock, seq)); /* @@ -336,8 +338,6 @@ void ktime_get_ts(struct timespec *ts) ts->tv_sec = timekeeper.xtime_sec; ts->tv_nsec = timekeeping_get_ns(); tomono = timekeeper.wall_to_monotonic; - /* If arch requires, add in gettimeoffset() */ - ts->tv_nsec += arch_gettimeoffset(); } while (read_seqretry(&timekeeper.lock, seq)); @@ -365,8 +365,6 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) WARN_ON_ONCE(timekeeping_suspended); do { - u32 arch_offset; - seq = read_seqbegin(&timekeeper.lock); *ts_raw = timekeeper.raw_time; @@ -376,11 +374,6 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) nsecs_raw = timekeeping_get_ns_raw(); nsecs_real = timekeeping_get_ns(); - /* If arch requires, add in gettimeoffset() */ - arch_offset = arch_gettimeoffset(); - nsecs_raw += arch_offset; - nsecs_real += arch_offset; - } while (read_seqretry(&timekeeper.lock, seq)); timespec_add_ns(ts_raw, nsecs_raw); @@ -1338,8 +1331,6 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) secs = timekeeper.xtime_sec; nsecs = timekeeping_get_ns(); - /* If arch requires, add in gettimeoffset() */ - nsecs += arch_gettimeoffset(); *offs_real = timekeeper.offs_real; *offs_boot = timekeeper.offs_boot; -- cgit v1.2.3 From 2a8c0883c3cfffcc148ea606e2a4e7453cd75e73 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 13 Jul 2012 01:21:56 -0400 Subject: time: Move xtime_nsec adjustment underflow handling timekeeping_adjust When we make adjustments speeding up the clock, its possible for xtime_nsec to underflow. We already handle this properly, but we do so from update_wall_time() instead of the more logical timekeeping_adjust(), where the possible underflow actually occurs. Thus, move the correction logic to the timekeeping_adjust, which is the function that causes the issue. Making update_wall_time() more readable. Signed-off-by: John Stultz Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Richard Cochran Cc: Prarit Bhargava Link: http://lkml.kernel.org/r/1342156917-25092-8-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'kernel') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e43289df28c2..aeeaab8cba6e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -980,6 +980,27 @@ static void timekeeping_adjust(s64 offset) timekeeper.xtime_nsec -= offset; timekeeper.ntp_error -= (interval - offset) << timekeeper.ntp_error_shift; + + /* + * It may be possible that when we entered this function, xtime_nsec + * was very small. Further, if we're slightly speeding the clocksource + * in the code above, its possible the required corrective factor to + * xtime_nsec could cause it to underflow. + * + * Now, since we already accumulated the second, cannot simply roll + * the accumulated second back, since the NTP subsystem has been + * notified via second_overflow. So instead we push xtime_nsec forward + * by the amount we underflowed, and add that amount into the error. + * + * We'll correct this error next time through this function, when + * xtime_nsec is not as small. + */ + if (unlikely((s64)timekeeper.xtime_nsec < 0)) { + s64 neg = -(s64)timekeeper.xtime_nsec; + timekeeper.xtime_nsec = 0; + timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; + } + } @@ -1105,27 +1126,6 @@ static void update_wall_time(void) /* correct the clock when NTP error is too big */ timekeeping_adjust(offset); - /* - * Since in the loop above, we accumulate any amount of time - * in xtime_nsec over a second into xtime.tv_sec, its possible for - * xtime_nsec to be fairly small after the loop. Further, if we're - * slightly speeding the clocksource up in timekeeping_adjust(), - * its possible the required corrective factor to xtime_nsec could - * cause it to underflow. - * - * Now, we cannot simply roll the accumulated second back, since - * the NTP subsystem has been notified via second_overflow. So - * instead we push xtime_nsec forward by the amount we underflowed, - * and add that amount into the error. - * - * We'll correct this error next time through this function, when - * xtime_nsec is not as small. - */ - if (unlikely((s64)timekeeper.xtime_nsec < 0)) { - s64 neg = -(s64)timekeeper.xtime_nsec; - timekeeper.xtime_nsec = 0; - timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; - } /* * Store only full nanoseconds into xtime_nsec after rounding -- cgit v1.2.3 From f726a697d06102e7a1fc0a87308cb30a84580205 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 13 Jul 2012 01:21:57 -0400 Subject: time: Rework timekeeping functions to take timekeeper ptr as argument As part of cleaning up the timekeeping code, this patch converts a number of internal functions to takei a timekeeper ptr as an argument, so that the internal functions don't access the global timekeeper structure directly. This allows for further optimizations to reduce lock hold time later. This patch has been updated to include more consistent usage of the timekeeper value, by making sure it is always passed as a argument to non top-level functions. Signed-off-by: John Stultz Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Richard Cochran Cc: Prarit Bhargava Link: http://lkml.kernel.org/r/1342156917-25092-9-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 208 +++++++++++++++++++++++----------------------- 1 file changed, 103 insertions(+), 105 deletions(-) (limited to 'kernel') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index aeeaab8cba6e..5980e902978c 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -127,14 +127,14 @@ static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts) * * Unless you're the timekeeping code, you should not be using this! */ -static void timekeeper_setup_internals(struct clocksource *clock) +static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) { cycle_t interval; u64 tmp, ntpinterval; struct clocksource *old_clock; - old_clock = timekeeper.clock; - timekeeper.clock = clock; + old_clock = tk->clock; + tk->clock = clock; clock->cycle_last = clock->read(clock); /* Do the ns -> cycle conversion first, using original mult */ @@ -147,64 +147,64 @@ static void timekeeper_setup_internals(struct clocksource *clock) tmp = 1; interval = (cycle_t) tmp; - timekeeper.cycle_interval = interval; + tk->cycle_interval = interval; /* Go back from cycles -> shifted ns */ - timekeeper.xtime_interval = (u64) interval * clock->mult; - timekeeper.xtime_remainder = ntpinterval - timekeeper.xtime_interval; - timekeeper.raw_interval = + tk->xtime_interval = (u64) interval * clock->mult; + tk->xtime_remainder = ntpinterval - tk->xtime_interval; + tk->raw_interval = ((u64) interval * clock->mult) >> clock->shift; /* if changing clocks, convert xtime_nsec shift units */ if (old_clock) { int shift_change = clock->shift - old_clock->shift; if (shift_change < 0) - timekeeper.xtime_nsec >>= -shift_change; + tk->xtime_nsec >>= -shift_change; else - timekeeper.xtime_nsec <<= shift_change; + tk->xtime_nsec <<= shift_change; } - timekeeper.shift = clock->shift; + tk->shift = clock->shift; - timekeeper.ntp_error = 0; - timekeeper.ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; + tk->ntp_error = 0; + tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; /* * The timekeeper keeps its own mult values for the currently * active clocksource. These value will be adjusted via NTP * to counteract clock drifting. */ - timekeeper.mult = clock->mult; + tk->mult = clock->mult; } /* Timekeeper helper functions. */ -static inline s64 timekeeping_get_ns(void) +static inline s64 timekeeping_get_ns(struct timekeeper *tk) { cycle_t cycle_now, cycle_delta; struct clocksource *clock; s64 nsec; /* read clocksource: */ - clock = timekeeper.clock; + clock = tk->clock; cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; - nsec = cycle_delta * timekeeper.mult + timekeeper.xtime_nsec; - nsec >>= timekeeper.shift; + nsec = cycle_delta * tk->mult + tk->xtime_nsec; + nsec >>= tk->shift; /* If arch requires, add in gettimeoffset() */ return nsec + arch_gettimeoffset(); } -static inline s64 timekeeping_get_ns_raw(void) +static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) { cycle_t cycle_now, cycle_delta; struct clocksource *clock; s64 nsec; /* read clocksource: */ - clock = timekeeper.clock; + clock = tk->clock; cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ @@ -217,27 +217,26 @@ static inline s64 timekeeping_get_ns_raw(void) return nsec + arch_gettimeoffset(); } -static void update_rt_offset(void) +static void update_rt_offset(struct timekeeper *tk) { - struct timespec tmp, *wtm = &timekeeper.wall_to_monotonic; + struct timespec tmp, *wtm = &tk->wall_to_monotonic; set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec); - timekeeper.offs_real = timespec_to_ktime(tmp); + tk->offs_real = timespec_to_ktime(tmp); } /* must hold write on timekeeper.lock */ -static void timekeeping_update(bool clearntp) +static void timekeeping_update(struct timekeeper *tk, bool clearntp) { struct timespec xt; if (clearntp) { - timekeeper.ntp_error = 0; + tk->ntp_error = 0; ntp_clear(); } - update_rt_offset(); - xt = tk_xtime(&timekeeper); - update_vsyscall(&xt, &timekeeper.wall_to_monotonic, - timekeeper.clock, timekeeper.mult); + update_rt_offset(tk); + xt = tk_xtime(tk); + update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult); } @@ -248,26 +247,26 @@ static void timekeeping_update(bool clearntp) * update_wall_time(). This is useful before significant clock changes, * as it avoids having to deal with this time offset explicitly. */ -static void timekeeping_forward_now(void) +static void timekeeping_forward_now(struct timekeeper *tk) { cycle_t cycle_now, cycle_delta; struct clocksource *clock; s64 nsec; - clock = timekeeper.clock; + clock = tk->clock; cycle_now = clock->read(clock); cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; clock->cycle_last = cycle_now; - timekeeper.xtime_nsec += cycle_delta * timekeeper.mult; + tk->xtime_nsec += cycle_delta * tk->mult; /* If arch requires, add in gettimeoffset() */ - timekeeper.xtime_nsec += arch_gettimeoffset() << timekeeper.shift; + tk->xtime_nsec += arch_gettimeoffset() << tk->shift; - tk_normalize_xtime(&timekeeper); + tk_normalize_xtime(tk); nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); - timespec_add_ns(&timekeeper.raw_time, nsec); + timespec_add_ns(&tk->raw_time, nsec); } /** @@ -287,7 +286,7 @@ void getnstimeofday(struct timespec *ts) seq = read_seqbegin(&timekeeper.lock); ts->tv_sec = timekeeper.xtime_sec; - ts->tv_nsec = timekeeping_get_ns(); + ts->tv_nsec = timekeeping_get_ns(&timekeeper); } while (read_seqretry(&timekeeper.lock, seq)); @@ -306,7 +305,7 @@ ktime_t ktime_get(void) seq = read_seqbegin(&timekeeper.lock); secs = timekeeper.xtime_sec + timekeeper.wall_to_monotonic.tv_sec; - nsecs = timekeeping_get_ns() + + nsecs = timekeeping_get_ns(&timekeeper) + timekeeper.wall_to_monotonic.tv_nsec; } while (read_seqretry(&timekeeper.lock, seq)); @@ -336,7 +335,7 @@ void ktime_get_ts(struct timespec *ts) do { seq = read_seqbegin(&timekeeper.lock); ts->tv_sec = timekeeper.xtime_sec; - ts->tv_nsec = timekeeping_get_ns(); + ts->tv_nsec = timekeeping_get_ns(&timekeeper); tomono = timekeeper.wall_to_monotonic; } while (read_seqretry(&timekeeper.lock, seq)); @@ -371,8 +370,8 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) ts_real->tv_sec = timekeeper.xtime_sec; ts_real->tv_nsec = 0; - nsecs_raw = timekeeping_get_ns_raw(); - nsecs_real = timekeeping_get_ns(); + nsecs_raw = timekeeping_get_ns_raw(&timekeeper); + nsecs_real = timekeeping_get_ns(&timekeeper); } while (read_seqretry(&timekeeper.lock, seq)); @@ -415,7 +414,7 @@ int do_settimeofday(const struct timespec *tv) write_seqlock_irqsave(&timekeeper.lock, flags); - timekeeping_forward_now(); + timekeeping_forward_now(&timekeeper); xt = tk_xtime(&timekeeper); ts_delta.tv_sec = tv->tv_sec - xt.tv_sec; @@ -426,7 +425,7 @@ int do_settimeofday(const struct timespec *tv) tk_set_xtime(&timekeeper, tv); - timekeeping_update(true); + timekeeping_update(&timekeeper, true); write_sequnlock_irqrestore(&timekeeper.lock, flags); @@ -453,14 +452,14 @@ int timekeeping_inject_offset(struct timespec *ts) write_seqlock_irqsave(&timekeeper.lock, flags); - timekeeping_forward_now(); + timekeeping_forward_now(&timekeeper); tk_xtime_add(&timekeeper, ts); timekeeper.wall_to_monotonic = timespec_sub(timekeeper.wall_to_monotonic, *ts); - timekeeping_update(true); + timekeeping_update(&timekeeper, true); write_sequnlock_irqrestore(&timekeeper.lock, flags); @@ -485,14 +484,14 @@ static int change_clocksource(void *data) write_seqlock_irqsave(&timekeeper.lock, flags); - timekeeping_forward_now(); + timekeeping_forward_now(&timekeeper); if (!new->enable || new->enable(new) == 0) { old = timekeeper.clock; - timekeeper_setup_internals(new); + tk_setup_internals(&timekeeper, new); if (old->disable) old->disable(old); } - timekeeping_update(true); + timekeeping_update(&timekeeper, true); write_sequnlock_irqrestore(&timekeeper.lock, flags); @@ -542,7 +541,7 @@ void getrawmonotonic(struct timespec *ts) do { seq = read_seqbegin(&timekeeper.lock); - nsecs = timekeeping_get_ns_raw(); + nsecs = timekeeping_get_ns_raw(&timekeeper); *ts = timekeeper.raw_time; } while (read_seqretry(&timekeeper.lock, seq)); @@ -638,7 +637,7 @@ void __init timekeeping_init(void) clock = clocksource_default_clock(); if (clock->enable) clock->enable(clock); - timekeeper_setup_internals(clock); + tk_setup_internals(&timekeeper, clock); tk_set_xtime(&timekeeper, &now); timekeeper.raw_time.tv_sec = 0; @@ -648,7 +647,7 @@ void __init timekeeping_init(void) set_normalized_timespec(&timekeeper.wall_to_monotonic, -boot.tv_sec, -boot.tv_nsec); - update_rt_offset(); + update_rt_offset(&timekeeper); timekeeper.total_sleep_time.tv_sec = 0; timekeeper.total_sleep_time.tv_nsec = 0; write_sequnlock_irqrestore(&timekeeper.lock, flags); @@ -670,7 +669,8 @@ static void update_sleep_time(struct timespec t) * Takes a timespec offset measuring a suspend interval and properly * adds the sleep offset to the timekeeping variables. */ -static void __timekeeping_inject_sleeptime(struct timespec *delta) +static void __timekeeping_inject_sleeptime(struct timekeeper *tk, + struct timespec *delta) { if (!timespec_valid(delta)) { printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " @@ -678,10 +678,9 @@ static void __timekeeping_inject_sleeptime(struct timespec *delta) return; } - tk_xtime_add(&timekeeper, delta); - timekeeper.wall_to_monotonic = - timespec_sub(timekeeper.wall_to_monotonic, *delta); - update_sleep_time(timespec_add(timekeeper.total_sleep_time, *delta)); + tk_xtime_add(tk, delta); + tk->wall_to_monotonic = timespec_sub(tk->wall_to_monotonic, *delta); + update_sleep_time(timespec_add(tk->total_sleep_time, *delta)); } @@ -707,11 +706,11 @@ void timekeeping_inject_sleeptime(struct timespec *delta) write_seqlock_irqsave(&timekeeper.lock, flags); - timekeeping_forward_now(); + timekeeping_forward_now(&timekeeper); - __timekeeping_inject_sleeptime(delta); + __timekeeping_inject_sleeptime(&timekeeper, delta); - timekeeping_update(true); + timekeeping_update(&timekeeper, true); write_sequnlock_irqrestore(&timekeeper.lock, flags); @@ -740,7 +739,7 @@ static void timekeeping_resume(void) if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { ts = timespec_sub(ts, timekeeping_suspend_time); - __timekeeping_inject_sleeptime(&ts); + __timekeeping_inject_sleeptime(&timekeeper, &ts); } /* re-base the last cycle value */ timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); @@ -765,7 +764,7 @@ static int timekeeping_suspend(void) read_persistent_clock(&timekeeping_suspend_time); write_seqlock_irqsave(&timekeeper.lock, flags); - timekeeping_forward_now(); + timekeeping_forward_now(&timekeeper); timekeeping_suspended = 1; /* @@ -813,7 +812,8 @@ device_initcall(timekeeping_init_ops); * If the error is already larger, we look ahead even further * to compensate for late or lost adjustments. */ -static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, +static __always_inline int timekeeping_bigadjust(struct timekeeper *tk, + s64 error, s64 *interval, s64 *offset) { s64 tick_error, i; @@ -829,7 +829,7 @@ static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, * here. This is tuned so that an error of about 1 msec is adjusted * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). */ - error2 = timekeeper.ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); + error2 = tk->ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); error2 = abs(error2); for (look_ahead = 0; error2 > 0; look_ahead++) error2 >>= 2; @@ -838,8 +838,8 @@ static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, * Now calculate the error in (1 << look_ahead) ticks, but first * remove the single look ahead already included in the error. */ - tick_error = ntp_tick_length() >> (timekeeper.ntp_error_shift + 1); - tick_error -= timekeeper.xtime_interval >> 1; + tick_error = ntp_tick_length() >> (tk->ntp_error_shift + 1); + tick_error -= tk->xtime_interval >> 1; error = ((error - tick_error) >> look_ahead) + tick_error; /* Finally calculate the adjustment shift value. */ @@ -864,9 +864,9 @@ static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, * this is optimized for the most common adjustments of -1,0,1, * for other values we can do a bit more work. */ -static void timekeeping_adjust(s64 offset) +static void timekeeping_adjust(struct timekeeper *tk, s64 offset) { - s64 error, interval = timekeeper.cycle_interval; + s64 error, interval = tk->cycle_interval; int adj; /* @@ -882,7 +882,7 @@ static void timekeeping_adjust(s64 offset) * * Note: It does not "save" on aggravation when reading the code. */ - error = timekeeper.ntp_error >> (timekeeper.ntp_error_shift - 1); + error = tk->ntp_error >> (tk->ntp_error_shift - 1); if (error > interval) { /* * We now divide error by 4(via shift), which checks if @@ -904,7 +904,8 @@ static void timekeeping_adjust(s64 offset) if (likely(error <= interval)) adj = 1; else - adj = timekeeping_bigadjust(error, &interval, &offset); + adj = timekeeping_bigadjust(tk, error, &interval, + &offset); } else if (error < -interval) { /* See comment above, this is just switched for the negative */ error >>= 2; @@ -913,18 +914,17 @@ static void timekeeping_adjust(s64 offset) interval = -interval; offset = -offset; } else - adj = timekeeping_bigadjust(error, &interval, &offset); - } else /* No adjustment needed */ + adj = timekeeping_bigadjust(tk, error, &interval, + &offset); + } else return; - if (unlikely(timekeeper.clock->maxadj && - (timekeeper.mult + adj > - timekeeper.clock->mult + timekeeper.clock->maxadj))) { + if (unlikely(tk->clock->maxadj && + (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) { printk_once(KERN_WARNING "Adjusting %s more than 11%% (%ld vs %ld)\n", - timekeeper.clock->name, (long)timekeeper.mult + adj, - (long)timekeeper.clock->mult + - timekeeper.clock->maxadj); + tk->clock->name, (long)tk->mult + adj, + (long)tk->clock->mult + tk->clock->maxadj); } /* * So the following can be confusing. @@ -975,11 +975,10 @@ static void timekeeping_adjust(s64 offset) * * XXX - TODO: Doc ntp_error calculation. */ - timekeeper.mult += adj; - timekeeper.xtime_interval += interval; - timekeeper.xtime_nsec -= offset; - timekeeper.ntp_error -= (interval - offset) << - timekeeper.ntp_error_shift; + tk->mult += adj; + tk->xtime_interval += interval; + tk->xtime_nsec -= offset; + tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; /* * It may be possible that when we entered this function, xtime_nsec @@ -995,10 +994,10 @@ static void timekeeping_adjust(s64 offset) * We'll correct this error next time through this function, when * xtime_nsec is not as small. */ - if (unlikely((s64)timekeeper.xtime_nsec < 0)) { - s64 neg = -(s64)timekeeper.xtime_nsec; - timekeeper.xtime_nsec = 0; - timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; + if (unlikely((s64)tk->xtime_nsec < 0)) { + s64 neg = -(s64)tk->xtime_nsec; + tk->xtime_nsec = 0; + tk->ntp_error += neg << tk->ntp_error_shift; } } @@ -1042,37 +1041,36 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk) * * Returns the unconsumed cycles. */ -static cycle_t logarithmic_accumulation(cycle_t offset, u32 shift) +static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, + u32 shift) { u64 raw_nsecs; - /* If the offset is smaller than a shifted interval, do nothing */ - if (offset < timekeeper.cycle_interval<cycle_interval<cycle_last += timekeeper.cycle_interval << shift; + offset -= tk->cycle_interval << shift; + tk->clock->cycle_last += tk->cycle_interval << shift; - timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; - - accumulate_nsecs_to_secs(&timekeeper); + tk->xtime_nsec += tk->xtime_interval << shift; + accumulate_nsecs_to_secs(tk); /* Accumulate raw time */ - raw_nsecs = timekeeper.raw_interval << shift; - raw_nsecs += timekeeper.raw_time.tv_nsec; + raw_nsecs = tk->raw_interval << shift; + raw_nsecs += tk->raw_time.tv_nsec; if (raw_nsecs >= NSEC_PER_SEC) { u64 raw_secs = raw_nsecs; raw_nsecs = do_div(raw_secs, NSEC_PER_SEC); - timekeeper.raw_time.tv_sec += raw_secs; + tk->raw_time.tv_sec += raw_secs; } - timekeeper.raw_time.tv_nsec = raw_nsecs; + tk->raw_time.tv_nsec = raw_nsecs; /* Accumulate error between NTP and clock interval */ - timekeeper.ntp_error += ntp_tick_length() << shift; - timekeeper.ntp_error -= - (timekeeper.xtime_interval + timekeeper.xtime_remainder) << - (timekeeper.ntp_error_shift + shift); + tk->ntp_error += ntp_tick_length() << shift; + tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) << + (tk->ntp_error_shift + shift); return offset; } @@ -1118,13 +1116,13 @@ static void update_wall_time(void) maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; shift = min(shift, maxshift); while (offset >= timekeeper.cycle_interval) { - offset = logarithmic_accumulation(offset, shift); + offset = logarithmic_accumulation(&timekeeper, offset, shift); if(offset < timekeeper.cycle_interval<tv_sec = timekeeper.xtime_sec; - ts->tv_nsec = timekeeping_get_ns(); + ts->tv_nsec = timekeeping_get_ns(&timekeeper); tomono = timekeeper.wall_to_monotonic; sleep = timekeeper.total_sleep_time; @@ -1330,7 +1328,7 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) seq = read_seqbegin(&timekeeper.lock); secs = timekeeper.xtime_sec; - nsecs = timekeeping_get_ns(); + nsecs = timekeeping_get_ns(&timekeeper); *offs_real = timekeeper.offs_real; *offs_boot = timekeeper.offs_boot; -- cgit v1.2.3