diff options
author | Steven Rostedt (Red Hat) <rostedt@goodmis.org> | 2015-03-16 15:54:29 -0400 |
---|---|---|
committer | Anders Roxell <anders.roxell@linaro.org> | 2015-06-01 12:03:39 +0200 |
commit | ea60c8a4d0c5f0ad17fcf77032c8e86c18c41a2c (patch) | |
tree | 634f5e1fc3a8b4c6371d060ad0afb78bebcf4490 | |
parent | 53be47e5c1fa3b4edce74a21771c6008163e91ab (diff) |
Revert "timers: do not raise softirq unconditionally"
This reverts commit 891f510568343d93c5aa2f477b6bebe009b48f05.
An issue arisen that if a rt_mutex (spin_lock converted to a mutex
in PREEMPT_RT) is taken in hard interrupt context, it could cause
a false deadlock detection and trigger a BUG_ON() from the return
value of task_blocks_on_rt_mutex() in rt_spin_lock_slowlock().
The problem is this:
CPU0 CPU1
---- ----
spin_lock(A)
spin_lock(A)
[ blocks, but spins as owner on
CPU 0 is running ]
<interrupt>
spin_trylock(B)
[ succeeds ]
spin_lock(B)
<blocks>
Now the deadlock detection triggers and follows the locking:
Task X (on CPU0) blocked on spinlock B owned by task Y on
CPU1 (via the interrupt taking it with a try lock)
The owner of B (Y) is blocked on spin_lock A (still spinning)
A is owned by task X (self). DEADLOCK detected! BUG_ON triggered.
This was caused by the code to try to not raise softirq unconditionally
to allow NO_HZ_FULL to work. Unfortunately, reverting that patch causes
NO_HZ_FULL to break again, but that's still better than triggering
a BUG_ON().
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Conflicts:
kernel/timer.c
-rw-r--r-- | include/linux/hrtimer.h | 3 | ||||
-rw-r--r-- | kernel/hrtimer.c | 31 | ||||
-rw-r--r-- | kernel/timer.c | 46 |
3 files changed, 28 insertions, 52 deletions
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index bdbf77db0f4d..79a7a35e1a6e 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -461,8 +461,9 @@ extern int schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta, const enum hrtimer_mode mode, int clock); extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); -/* Called from the periodic timer tick */ +/* Soft interrupt function to run the hrtimer queues: */ extern void hrtimer_run_queues(void); +extern void hrtimer_run_pending(void); /* Bootup initialization: */ extern void __init hrtimers_init(void); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index f97b9f65c5fa..164201aba52f 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1717,6 +1717,30 @@ static void run_hrtimer_softirq(struct softirq_action *h) } /* + * Called from timer softirq every jiffy, expire hrtimers: + * + * For HRT its the fall back code to run the softirq in the timer + * softirq context in case the hrtimer initialization failed or has + * not been done yet. + */ +void hrtimer_run_pending(void) +{ + if (hrtimer_hres_active()) + return; + + /* + * This _is_ ugly: We have to check in the softirq context, + * whether we can switch to highres and / or nohz mode. The + * clocksource switch happens in the timer interrupt with + * xtime_lock held. Notification from there only sets the + * check bit in the tick_oneshot code, otherwise we might + * deadlock vs. xtime_lock. + */ + if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) + hrtimer_switch_to_hres(); +} + +/* * Called from hardirq context every jiffy */ void hrtimer_run_queues(void) @@ -1729,13 +1753,6 @@ void hrtimer_run_queues(void) if (hrtimer_hres_active()) return; - /* - * Check whether we can switch to highres mode. - */ - if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()) - && hrtimer_switch_to_hres()) - return; - for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { base = &cpu_base->clock_base[index]; if (!timerqueue_getnext(&base->active)) diff --git a/kernel/timer.c b/kernel/timer.c index 2059f6b27595..34fd2dbba3e3 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1464,6 +1464,8 @@ static void run_timer_softirq(struct softirq_action *h) { struct tvec_base *base = __this_cpu_read(tvec_bases); + hrtimer_run_pending(); + #if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) irq_work_run(); #endif @@ -1477,52 +1479,8 @@ static void run_timer_softirq(struct softirq_action *h) */ void run_local_timers(void) { - struct tvec_base *base = __this_cpu_read(tvec_bases); - hrtimer_run_queues(); - /* - * We can access this lockless as we are in the timer - * interrupt. If there are no timers queued, nothing to do in - * the timer softirq. - */ -#ifdef CONFIG_PREEMPT_RT_FULL - -#ifndef CONFIG_SMP - /* - * The spin_do_trylock() later may fail as the lock may be hold before - * the interrupt arrived. The spin-lock debugging code will raise a - * warning if the try_lock fails on UP. Since this is only an - * optimization for the FULL_NO_HZ case (not to run the timer softirq on - * an nohz_full CPU) we don't really care and shedule the softirq. - */ raise_softirq(TIMER_SOFTIRQ); - return; -#endif - - /* On RT, irq work runs from softirq */ - if (irq_work_needs_cpu()) { - raise_softirq(TIMER_SOFTIRQ); - return; - } - - if (!spin_do_trylock(&base->lock)) { - raise_softirq(TIMER_SOFTIRQ); - return; - } -#endif - - if (!base->active_timers) - goto out; - - /* Check whether the next pending timer has expired */ - if (time_before_eq(base->next_timer, jiffies)) - raise_softirq(TIMER_SOFTIRQ); -out: -#ifdef CONFIG_PREEMPT_RT_FULL - rt_spin_unlock_after_trylock_in_irq(&base->lock); -#endif - /* The ; ensures that gcc won't complain in the !RT case */ - ; } #ifdef __ARCH_WANT_SYS_ALARM |