diff options
Diffstat (limited to 'kernel/time/hrtimer.c')
-rw-r--r-- | kernel/time/hrtimer.c | 346 |
1 files changed, 317 insertions, 29 deletions
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 93ef7190bdea..2c6be169bdc7 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -48,11 +48,13 @@ #include <linux/sched/rt.h> #include <linux/sched/deadline.h> #include <linux/timer.h> +#include <linux/kthread.h> #include <linux/freezer.h> #include <asm/uaccess.h> #include <trace/events/timer.h> +#include <trace/events/hist.h> #include "tick-internal.h" @@ -576,8 +578,7 @@ static int hrtimer_reprogram(struct hrtimer *timer, * When the callback is running, we do not reprogram the clock event * device. The timer callback is either running on a different CPU or * the callback is executed in the hrtimer_interrupt context. The - * reprogramming is handled either by the softirq, which called the - * callback or at the end of the hrtimer_interrupt. + * reprogramming is handled at the end of the hrtimer_interrupt. */ if (hrtimer_callback_running(timer)) return 0; @@ -621,6 +622,9 @@ static int hrtimer_reprogram(struct hrtimer *timer, return res; } +static void __run_hrtimer(struct hrtimer *timer, ktime_t *now); +static int hrtimer_rt_defer(struct hrtimer *timer); + /* * Initialize the high resolution related parts of cpu_base */ @@ -630,6 +634,21 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) base->hres_active = 0; } +static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, + struct hrtimer_clock_base *base, + int wakeup) +{ + if (!hrtimer_reprogram(timer, base)) + return 0; + if (!wakeup) + return -ETIME; +#ifdef CONFIG_PREEMPT_RT_BASE + if (!hrtimer_rt_defer(timer)) + return -ETIME; +#endif + return 1; +} + static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) { ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; @@ -695,6 +714,44 @@ static void clock_was_set_work(struct work_struct *work) static DECLARE_WORK(hrtimer_work, clock_was_set_work); +#ifdef CONFIG_PREEMPT_RT_FULL +/* + * RT can not call schedule_work from real interrupt context. + * Need to make a thread to do the real work. + */ +static struct task_struct *clock_set_delay_thread; +static bool do_clock_set_delay; + +static int run_clock_set_delay(void *ignore) +{ + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + if (do_clock_set_delay) { + do_clock_set_delay = false; + schedule_work(&hrtimer_work); + } + schedule(); + } + __set_current_state(TASK_RUNNING); + return 0; +} + +void clock_was_set_delayed(void) +{ + do_clock_set_delay = true; + /* Make visible before waking up process */ + smp_wmb(); + wake_up_process(clock_set_delay_thread); +} + +static __init int create_clock_set_delay_thread(void) +{ + clock_set_delay_thread = kthread_run(run_clock_set_delay, NULL, "kclksetdelayd"); + BUG_ON(!clock_set_delay_thread); + return 0; +} +early_initcall(create_clock_set_delay_thread); +#else /* PREEMPT_RT_FULL */ /* * Called from timekeeping and resume code to reprogramm the hrtimer * interrupt device on all cpus. @@ -703,6 +760,7 @@ void clock_was_set_delayed(void) { schedule_work(&hrtimer_work); } +#endif #else @@ -711,6 +769,13 @@ static inline int hrtimer_is_hres_enabled(void) { return 0; } static inline int hrtimer_switch_to_hres(void) { return 0; } static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } +static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, + struct hrtimer_clock_base *base, + int wakeup) +{ + return 0; +} + static inline int hrtimer_reprogram(struct hrtimer *timer, struct hrtimer_clock_base *base) { @@ -718,7 +783,6 @@ static inline int hrtimer_reprogram(struct hrtimer *timer, } static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } static inline void retrigger_next_event(void *arg) { } - #endif /* CONFIG_HIGH_RES_TIMERS */ /* @@ -836,6 +900,32 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) } EXPORT_SYMBOL_GPL(hrtimer_forward); +#ifdef CONFIG_PREEMPT_RT_BASE +# define wake_up_timer_waiters(b) wake_up(&(b)->wait) + +/** + * hrtimer_wait_for_timer - Wait for a running timer + * + * @timer: timer to wait for + * + * The function waits in case the timers callback function is + * currently executed on the waitqueue of the timer base. The + * waitqueue is woken up after the timer callback function has + * finished execution. + */ +void hrtimer_wait_for_timer(const struct hrtimer *timer) +{ + struct hrtimer_clock_base *base = timer->base; + + if (base && base->cpu_base && !timer->irqsafe) + wait_event(base->cpu_base->wait, + !(timer->state & HRTIMER_STATE_CALLBACK)); +} + +#else +# define wake_up_timer_waiters(b) do { } while (0) +#endif + /* * enqueue_hrtimer - internal function to (re)start a timer * @@ -879,6 +969,11 @@ static void __remove_hrtimer(struct hrtimer *timer, if (!(timer->state & HRTIMER_STATE_ENQUEUED)) goto out; + if (unlikely(!list_empty(&timer->cb_entry))) { + list_del_init(&timer->cb_entry); + goto out; + } + next_timer = timerqueue_getnext(&base->active); timerqueue_del(&base->active, &timer->node); if (&timer->node == next_timer) { @@ -966,7 +1061,16 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); timer_stats_hrtimer_set_start_info(timer); +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST + { + ktime_t now = new_base->get_time(); + if (ktime_to_ns(tim) < ktime_to_ns(now)) + timer->praecox = now; + else + timer->praecox = ktime_set(0, 0); + } +#endif leftmost = enqueue_hrtimer(timer, new_base); if (!leftmost) { @@ -980,15 +1084,26 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * on dynticks target. */ wake_up_nohz_cpu(new_base->cpu_base->cpu); - } else if (new_base->cpu_base == this_cpu_ptr(&hrtimer_bases) && - hrtimer_reprogram(timer, new_base)) { + } else if (new_base->cpu_base == this_cpu_ptr(&hrtimer_bases)) { + + ret = hrtimer_enqueue_reprogram(timer, new_base, wakeup); + if (ret < 0) { + /* + * In case we failed to reprogram the timer (mostly + * because out current timer is already elapsed), + * remove it again and report a failure. This avoids + * stale base->first entries. + */ + debug_deactivate(timer); + __remove_hrtimer(timer, new_base, + timer->state & HRTIMER_STATE_CALLBACK, 0); + } else if (ret > 0) { /* * Only allow reprogramming if the new base is on this CPU. * (it might still be on another CPU if the timer was pending) * * XXX send_remote_softirq() ? */ - if (wakeup) { /* * We need to drop cpu_base->lock to avoid a * lock ordering issue vs. rq->lock. @@ -996,9 +1111,7 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, raw_spin_unlock(&new_base->cpu_base->lock); raise_softirq_irqoff(HRTIMER_SOFTIRQ); local_irq_restore(flags); - return ret; - } else { - __raise_softirq_irqoff(HRTIMER_SOFTIRQ); + return 0; } } @@ -1089,7 +1202,7 @@ int hrtimer_cancel(struct hrtimer *timer) if (ret >= 0) return ret; - cpu_relax(); + hrtimer_wait_for_timer(timer); } } EXPORT_SYMBOL_GPL(hrtimer_cancel); @@ -1153,6 +1266,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, base = hrtimer_clockid_to_base(clock_id); timer->base = &cpu_base->clock_base[base]; + INIT_LIST_HEAD(&timer->cb_entry); timerqueue_init(&timer->node); #ifdef CONFIG_TIMER_STATS @@ -1236,6 +1350,126 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) timer->state &= ~HRTIMER_STATE_CALLBACK; } +static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer); + +#ifdef CONFIG_PREEMPT_RT_BASE +static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer, + struct hrtimer_clock_base *base) +{ + /* + * Note, we clear the callback flag before we requeue the + * timer otherwise we trigger the callback_running() check + * in hrtimer_reprogram(). + */ + timer->state &= ~HRTIMER_STATE_CALLBACK; + + if (restart != HRTIMER_NORESTART) { + BUG_ON(hrtimer_active(timer)); + /* + * Enqueue the timer, if it's the leftmost timer then + * we need to reprogram it. + */ + if (!enqueue_hrtimer(timer, base)) + return; + +#ifndef CONFIG_HIGH_RES_TIMERS + } +#else + if (base->cpu_base->hres_active && + hrtimer_reprogram(timer, base)) + goto requeue; + + } else if (hrtimer_active(timer)) { + /* + * If the timer was rearmed on another CPU, reprogram + * the event device. + */ + if (&timer->node == base->active.next && + base->cpu_base->hres_active && + hrtimer_reprogram(timer, base)) + goto requeue; + } + return; + +requeue: + /* + * Timer is expired. Thus move it from tree to pending list + * again. + */ + __remove_hrtimer(timer, base, timer->state, 0); + list_add_tail(&timer->cb_entry, &base->expired); +#endif +} + +/* + * The changes in mainline which removed the callback modes from + * hrtimer are not yet working with -rt. The non wakeup_process() + * based callbacks which involve sleeping locks need to be treated + * seperately. + */ +static void hrtimer_rt_run_pending(void) +{ + enum hrtimer_restart (*fn)(struct hrtimer *); + struct hrtimer_cpu_base *cpu_base; + struct hrtimer_clock_base *base; + struct hrtimer *timer; + int index, restart; + + local_irq_disable(); + cpu_base = &per_cpu(hrtimer_bases, smp_processor_id()); + + raw_spin_lock(&cpu_base->lock); + + for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { + base = &cpu_base->clock_base[index]; + + while (!list_empty(&base->expired)) { + timer = list_first_entry(&base->expired, + struct hrtimer, cb_entry); + + /* + * Same as the above __run_hrtimer function + * just we run with interrupts enabled. + */ + debug_hrtimer_deactivate(timer); + __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); + timer_stats_account_hrtimer(timer); + fn = timer->function; + + raw_spin_unlock_irq(&cpu_base->lock); + restart = fn(timer); + raw_spin_lock_irq(&cpu_base->lock); + + hrtimer_rt_reprogram(restart, timer, base); + } + } + + raw_spin_unlock_irq(&cpu_base->lock); + + wake_up_timer_waiters(cpu_base); +} + +static int hrtimer_rt_defer(struct hrtimer *timer) +{ + if (timer->irqsafe) + return 0; + + __remove_hrtimer(timer, timer->base, timer->state, 0); + list_add_tail(&timer->cb_entry, &timer->base->expired); + return 1; +} + +#else + +static inline void hrtimer_rt_run_pending(void) +{ + hrtimer_peek_ahead_timers(); +} + +static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; } + +#endif + #ifdef CONFIG_HIGH_RES_TIMERS /* @@ -1246,7 +1480,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); ktime_t expires_next, now, entry_time, delta; - int i, retries = 0; + int i, retries = 0, raise = 0; BUG_ON(!cpu_base->hres_active); cpu_base->nr_events++; @@ -1281,6 +1515,15 @@ retry: timer = container_of(node, struct hrtimer, node); + trace_hrtimer_interrupt(raw_smp_processor_id(), + ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ? + timer->praecox : hrtimer_get_expires(timer), + basenow)), + current, + timer->function == hrtimer_wakeup ? + container_of(timer, struct hrtimer_sleeper, + timer)->task : NULL); + /* * The immediate goal for using the softexpires is * minimizing wakeups, not running timers at the @@ -1296,7 +1539,10 @@ retry: if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) break; - __run_hrtimer(timer, &basenow); + if (!hrtimer_rt_defer(timer)) + __run_hrtimer(timer, &basenow); + else + raise = 1; } } /* Reevaluate the clock bases for the next expiry */ @@ -1313,7 +1559,7 @@ retry: if (expires_next.tv64 == KTIME_MAX || !tick_program_event(expires_next, 0)) { cpu_base->hang_detected = 0; - return; + goto out; } /* @@ -1357,6 +1603,9 @@ retry: tick_program_event(expires_next, 1); printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta)); +out: + if (raise) + raise_softirq_irqoff(HRTIMER_SOFTIRQ); } /* @@ -1392,18 +1641,18 @@ void hrtimer_peek_ahead_timers(void) __hrtimer_peek_ahead_timers(); local_irq_restore(flags); } - -static void run_hrtimer_softirq(struct softirq_action *h) -{ - hrtimer_peek_ahead_timers(); -} - #else /* CONFIG_HIGH_RES_TIMERS */ static inline void __hrtimer_peek_ahead_timers(void) { } #endif /* !CONFIG_HIGH_RES_TIMERS */ + +static void run_hrtimer_softirq(struct softirq_action *h) +{ + hrtimer_rt_run_pending(); +} + /* * Called from timer softirq every jiffy, expire hrtimers: * @@ -1436,7 +1685,7 @@ void hrtimer_run_queues(void) struct timerqueue_node *node; struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); struct hrtimer_clock_base *base; - int index, gettime = 1; + int index, gettime = 1, raise = 0; if (hrtimer_hres_active()) return; @@ -1461,10 +1710,16 @@ void hrtimer_run_queues(void) hrtimer_get_expires_tv64(timer)) break; - __run_hrtimer(timer, &base->softirq_time); + if (!hrtimer_rt_defer(timer)) + __run_hrtimer(timer, &base->softirq_time); + else + raise = 1; } raw_spin_unlock(&cpu_base->lock); } + + if (raise) + raise_softirq_irqoff(HRTIMER_SOFTIRQ); } /* @@ -1486,16 +1741,18 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) { sl->timer.function = hrtimer_wakeup; + sl->timer.irqsafe = 1; sl->task = task; } EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); -static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) +static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode, + unsigned long state) { hrtimer_init_sleeper(t, current); do { - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(state); hrtimer_start_expires(&t->timer, mode); if (!hrtimer_active(&t->timer)) t->task = NULL; @@ -1539,7 +1796,8 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) HRTIMER_MODE_ABS); hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); - if (do_nanosleep(&t, HRTIMER_MODE_ABS)) + /* cpu_chill() does not care about restart state. */ + if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE)) goto out; rmtp = restart->nanosleep.rmtp; @@ -1556,8 +1814,10 @@ out: return ret; } -long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, - const enum hrtimer_mode mode, const clockid_t clockid) +static long +__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, + const enum hrtimer_mode mode, const clockid_t clockid, + unsigned long state) { struct restart_block *restart; struct hrtimer_sleeper t; @@ -1570,7 +1830,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, hrtimer_init_on_stack(&t.timer, clockid, mode); hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack); - if (do_nanosleep(&t, mode)) + if (do_nanosleep(&t, mode, state)) goto out; /* Absolute timers do not update the rmtp value and restart: */ @@ -1597,6 +1857,12 @@ out: return ret; } +long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, + const enum hrtimer_mode mode, const clockid_t clockid) +{ + return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE); +} + SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, struct timespec __user *, rmtp) { @@ -1611,6 +1877,26 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC); } +#ifdef CONFIG_PREEMPT_RT_FULL +/* + * Sleep for 1 ms in hope whoever holds what we want will let it go. + */ +void cpu_chill(void) +{ + struct timespec tu = { + .tv_nsec = NSEC_PER_MSEC, + }; + unsigned int freeze_flag = current->flags & PF_NOFREEZE; + + current->flags |= PF_NOFREEZE; + __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC, + TASK_UNINTERRUPTIBLE); + if (!freeze_flag) + current->flags &= ~PF_NOFREEZE; +} +EXPORT_SYMBOL(cpu_chill); +#endif + /* * Functions related to boot-time initialization: */ @@ -1622,10 +1908,14 @@ static void init_hrtimers_cpu(int cpu) for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { cpu_base->clock_base[i].cpu_base = cpu_base; timerqueue_init_head(&cpu_base->clock_base[i].active); + INIT_LIST_HEAD(&cpu_base->clock_base[i].expired); } cpu_base->cpu = cpu; hrtimer_init_hres(cpu_base); +#ifdef CONFIG_PREEMPT_RT_BASE + init_waitqueue_head(&cpu_base->wait); +#endif } #ifdef CONFIG_HOTPLUG_CPU @@ -1731,9 +2021,7 @@ void __init hrtimers_init(void) hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); register_cpu_notifier(&hrtimers_nb); -#ifdef CONFIG_HIGH_RES_TIMERS open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); -#endif } /** |