summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-02-28 10:13:16 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2010-02-28 10:13:16 -0800
commit642c4c75a765d7a3244ab39c8e6fb09be21eca5b (patch)
treece0be9b476f362835d3a3d6e4fd32801cd15c9fe /kernel
parentf91b22c35f6b0ae06ec5b67922eca1999c3b6e0a (diff)
parent71da81324c83ef65bb196c7f874ac1c6996d8287 (diff)
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (44 commits) rcu: Fix accelerated GPs for last non-dynticked CPU rcu: Make non-RCU_PROVE_LOCKING rcu_read_lock_sched_held() understand boot rcu: Fix accelerated grace periods for last non-dynticked CPU rcu: Export rcu_scheduler_active rcu: Make rcu_read_lock_sched_held() take boot time into account rcu: Make lockdep_rcu_dereference() message less alarmist sched, cgroups: Fix module export rcu: Add RCU_CPU_STALL_VERBOSE to dump detailed per-task information rcu: Fix rcutorture mod_timer argument to delay one jiffy rcu: Fix deadlock in TREE_PREEMPT_RCU CPU stall detection rcu: Convert to raw_spinlocks rcu: Stop overflowing signed integers rcu: Use canonical URL for Mathieu's dissertation rcu: Accelerate grace period if last non-dynticked CPU rcu: Fix citation of Mathieu's dissertation rcu: Documentation update for CONFIG_PROVE_RCU security: Apply lockdep-based checking to rcu_dereference() uses idr: Apply lockdep-based diagnostics to rcu_dereference() uses radix-tree: Disable RCU lockdep checking in radix tree vfs: Abstract rcu_dereference_check for files-fdtable use ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c15
-rw-r--r--kernel/exit.c14
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/lockdep.c18
-rw-r--r--kernel/notifier.c6
-rw-r--r--kernel/pid.c2
-rw-r--r--kernel/rcupdate.c29
-rw-r--r--kernel/rcutorture.c94
-rw-r--r--kernel/rcutree.c268
-rw-r--r--kernel/rcutree.h61
-rw-r--r--kernel/rcutree_plugin.h229
-rw-r--r--kernel/rcutree_trace.c14
-rw-r--r--kernel/sched.c11
-rw-r--r--kernel/srcu.c52
14 files changed, 589 insertions, 225 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index aa3bee56644..4fd90e12977 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -23,6 +23,7 @@
*/
#include <linux/cgroup.h>
+#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/errno.h>
#include <linux/fs.h>
@@ -166,6 +167,20 @@ static DEFINE_SPINLOCK(hierarchy_id_lock);
*/
static int need_forkexit_callback __read_mostly;
+#ifdef CONFIG_PROVE_LOCKING
+int cgroup_lock_is_held(void)
+{
+ return lockdep_is_held(&cgroup_mutex);
+}
+#else /* #ifdef CONFIG_PROVE_LOCKING */
+int cgroup_lock_is_held(void)
+{
+ return mutex_is_locked(&cgroup_mutex);
+}
+#endif /* #else #ifdef CONFIG_PROVE_LOCKING */
+
+EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
+
/* convenient tests for these bits */
inline int cgroup_is_removed(const struct cgroup *cgrp)
{
diff --git a/kernel/exit.c b/kernel/exit.c
index 546774a31a6..45ed043b8bf 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -85,7 +85,9 @@ static void __exit_signal(struct task_struct *tsk)
BUG_ON(!sig);
BUG_ON(!atomic_read(&sig->count));
- sighand = rcu_dereference(tsk->sighand);
+ sighand = rcu_dereference_check(tsk->sighand,
+ rcu_read_lock_held() ||
+ lockdep_is_held(&tasklist_lock));
spin_lock(&sighand->siglock);
posix_cpu_timers_exit(tsk);
@@ -170,8 +172,10 @@ void release_task(struct task_struct * p)
repeat:
tracehook_prepare_release_task(p);
/* don't need to get the RCU readlock here - the process is dead and
- * can't be modifying its own credentials */
+ * can't be modifying its own credentials. But shut RCU-lockdep up */
+ rcu_read_lock();
atomic_dec(&__task_cred(p)->user->processes);
+ rcu_read_unlock();
proc_flush_task(p);
@@ -473,9 +477,11 @@ static void close_files(struct files_struct * files)
/*
* It is safe to dereference the fd table without RCU or
* ->file_lock because this is the last reference to the
- * files structure.
+ * files structure. But use RCU to shut RCU-lockdep up.
*/
+ rcu_read_lock();
fdt = files_fdtable(files);
+ rcu_read_unlock();
for (;;) {
unsigned long set;
i = j * __NFDBITS;
@@ -521,10 +527,12 @@ void put_files_struct(struct files_struct *files)
* at the end of the RCU grace period. Otherwise,
* you can free files immediately.
*/
+ rcu_read_lock();
fdt = files_fdtable(files);
if (fdt != &files->fdtab)
kmem_cache_free(files_cachep, files);
free_fdtable(fdt);
+ rcu_read_unlock();
}
}
diff --git a/kernel/fork.c b/kernel/fork.c
index f88bd984df3..17bbf093356 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -86,6 +86,7 @@ int max_threads; /* tunable limit on nr_threads */
DEFINE_PER_CPU(unsigned long, process_counts) = 0;
__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
+EXPORT_SYMBOL_GPL(tasklist_lock);
int nr_processes(void)
{
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index c62ec14609b..0c30d0455de 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3809,3 +3809,21 @@ void lockdep_sys_exit(void)
lockdep_print_held_locks(curr);
}
}
+
+void lockdep_rcu_dereference(const char *file, const int line)
+{
+ struct task_struct *curr = current;
+
+ if (!debug_locks_off())
+ return;
+ printk("\n===================================================\n");
+ printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n");
+ printk( "---------------------------------------------------\n");
+ printk("%s:%d invoked rcu_dereference_check() without protection!\n",
+ file, line);
+ printk("\nother info that might help us debug this:\n\n");
+ lockdep_print_held_locks(curr);
+ printk("\nstack backtrace:\n");
+ dump_stack();
+}
+EXPORT_SYMBOL_GPL(lockdep_rcu_dereference);
diff --git a/kernel/notifier.c b/kernel/notifier.c
index acd24e7643e..2488ba7eb56 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -78,10 +78,10 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
int ret = NOTIFY_DONE;
struct notifier_block *nb, *next_nb;
- nb = rcu_dereference(*nl);
+ nb = rcu_dereference_raw(*nl);
while (nb && nr_to_call) {
- next_nb = rcu_dereference(nb->next);
+ next_nb = rcu_dereference_raw(nb->next);
#ifdef CONFIG_DEBUG_NOTIFIERS
if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
@@ -309,7 +309,7 @@ int __blocking_notifier_call_chain(struct blocking_notifier_head *nh,
* racy then it does not matter what the result of the test
* is, we re-check the list after having taken the lock anyway:
*/
- if (rcu_dereference(nh->head)) {
+ if (rcu_dereference_raw(nh->head)) {
down_read(&nh->rwsem);
ret = notifier_call_chain(&nh->head, val, v, nr_to_call,
nr_calls);
diff --git a/kernel/pid.c b/kernel/pid.c
index 2e17c9c92cb..b08e697cd83 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -367,7 +367,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type)
struct task_struct *result = NULL;
if (pid) {
struct hlist_node *first;
- first = rcu_dereference(pid->tasks[type].first);
+ first = rcu_dereference_check(pid->tasks[type].first, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock));
if (first)
result = hlist_entry(first, struct task_struct, pids[(type)].node);
}
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 9b7fd472387..f1125c1a632 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -44,14 +44,43 @@
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/module.h>
+#include <linux/kernel_stat.h>
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key;
struct lockdep_map rcu_lock_map =
STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
EXPORT_SYMBOL_GPL(rcu_lock_map);
+
+static struct lock_class_key rcu_bh_lock_key;
+struct lockdep_map rcu_bh_lock_map =
+ STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_bh", &rcu_bh_lock_key);
+EXPORT_SYMBOL_GPL(rcu_bh_lock_map);
+
+static struct lock_class_key rcu_sched_lock_key;
+struct lockdep_map rcu_sched_lock_map =
+ STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key);
+EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
#endif
+int rcu_scheduler_active __read_mostly;
+EXPORT_SYMBOL_GPL(rcu_scheduler_active);
+
+/*
+ * This function is invoked towards the end of the scheduler's initialization
+ * process. Before this is called, the idle task might contain
+ * RCU read-side critical sections (during which time, this idle
+ * task is booting the system). After this function is called, the
+ * idle tasks are prohibited from containing RCU read-side critical
+ * sections.
+ */
+void rcu_scheduler_starting(void)
+{
+ WARN_ON(num_online_cpus() != 1);
+ WARN_ON(nr_context_switches() > 0);
+ rcu_scheduler_active = 1;
+}
+
/*
* Awaken the corresponding synchronize_rcu() instance now that a
* grace period has elapsed.
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 9bb52177af0..258cdf0a91e 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -61,6 +61,9 @@ static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/
static int stutter = 5; /* Start/stop testing interval (in sec) */
static int irqreader = 1; /* RCU readers from irq (timers). */
+static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */
+static int fqs_holdoff = 0; /* Hold time within burst (us). */
+static int fqs_stutter = 3; /* Wait time between bursts (s). */
static char *torture_type = "rcu"; /* What RCU implementation to torture. */
module_param(nreaders, int, 0444);
@@ -79,6 +82,12 @@ module_param(stutter, int, 0444);
MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test");
module_param(irqreader, int, 0444);
MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers");
+module_param(fqs_duration, int, 0444);
+MODULE_PARM_DESC(fqs_duration, "Duration of fqs bursts (us)");
+module_param(fqs_holdoff, int, 0444);
+MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)");
+module_param(fqs_stutter, int, 0444);
+MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
module_param(torture_type, charp, 0444);
MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
@@ -99,6 +108,7 @@ static struct task_struct **reader_tasks;
static struct task_struct *stats_task;
static struct task_struct *shuffler_task;
static struct task_struct *stutter_task;
+static struct task_struct *fqs_task;
#define RCU_TORTURE_PIPE_LEN 10
@@ -263,6 +273,7 @@ struct rcu_torture_ops {
void (*deferred_free)(struct rcu_torture *p);
void (*sync)(void);
void (*cb_barrier)(void);
+ void (*fqs)(void);
int (*stats)(char *page);
int irq_capable;
char *name;
@@ -347,6 +358,7 @@ static struct rcu_torture_ops rcu_ops = {
.deferred_free = rcu_torture_deferred_free,
.sync = synchronize_rcu,
.cb_barrier = rcu_barrier,
+ .fqs = rcu_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.name = "rcu"
@@ -388,6 +400,7 @@ static struct rcu_torture_ops rcu_sync_ops = {
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu,
.cb_barrier = NULL,
+ .fqs = rcu_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.name = "rcu_sync"
@@ -403,6 +416,7 @@ static struct rcu_torture_ops rcu_expedited_ops = {
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu_expedited,
.cb_barrier = NULL,
+ .fqs = rcu_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.name = "rcu_expedited"
@@ -465,6 +479,7 @@ static struct rcu_torture_ops rcu_bh_ops = {
.deferred_free = rcu_bh_torture_deferred_free,
.sync = rcu_bh_torture_synchronize,
.cb_barrier = rcu_barrier_bh,
+ .fqs = rcu_bh_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.name = "rcu_bh"
@@ -480,6 +495,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
.deferred_free = rcu_sync_torture_deferred_free,
.sync = rcu_bh_torture_synchronize,
.cb_barrier = NULL,
+ .fqs = rcu_bh_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.name = "rcu_bh_sync"
@@ -621,6 +637,7 @@ static struct rcu_torture_ops sched_ops = {
.deferred_free = rcu_sched_torture_deferred_free,
.sync = sched_torture_synchronize,
.cb_barrier = rcu_barrier_sched,
+ .fqs = rcu_sched_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.name = "sched"
@@ -636,6 +653,7 @@ static struct rcu_torture_ops sched_sync_ops = {
.deferred_free = rcu_sync_torture_deferred_free,
.sync = sched_torture_synchronize,
.cb_barrier = NULL,
+ .fqs = rcu_sched_force_quiescent_state,
.stats = NULL,
.name = "sched_sync"
};
@@ -650,12 +668,45 @@ static struct rcu_torture_ops sched_expedited_ops = {
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_sched_expedited,
.cb_barrier = NULL,
+ .fqs = rcu_sched_force_quiescent_state,
.stats = rcu_expedited_torture_stats,
.irq_capable = 1,
.name = "sched_expedited"
};
/*
+ * RCU torture force-quiescent-state kthread. Repeatedly induces
+ * bursts of calls to force_quiescent_state(), increasing the probability
+ * of occurrence of some important types of race conditions.
+ */
+static int
+rcu_torture_fqs(void *arg)
+{
+ unsigned long fqs_resume_time;
+ int fqs_burst_remaining;
+
+ VERBOSE_PRINTK_STRING("rcu_torture_fqs task started");
+ do {
+ fqs_resume_time = jiffies + fqs_stutter * HZ;
+ while (jiffies - fqs_resume_time > LONG_MAX) {
+ schedule_timeout_interruptible(1);
+ }
+ fqs_burst_remaining = fqs_duration;
+ while (fqs_burst_remaining > 0) {
+ cur_ops->fqs();
+ udelay(fqs_holdoff);
+ fqs_burst_remaining -= fqs_holdoff;
+ }
+ rcu_stutter_wait("rcu_torture_fqs");
+ } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
+ VERBOSE_PRINTK_STRING("rcu_torture_fqs task stopping");
+ rcutorture_shutdown_absorb("rcu_torture_fqs");
+ while (!kthread_should_stop())
+ schedule_timeout_uninterruptible(1);
+ return 0;
+}
+
+/*
* RCU torture writer kthread. Repeatedly substitutes a new structure
* for that pointed to by rcu_torture_current, freeing the old structure
* after a series of grace periods (the "pipeline").
@@ -745,7 +796,11 @@ static void rcu_torture_timer(unsigned long unused)
idx = cur_ops->readlock();
completed = cur_ops->completed();
- p = rcu_dereference(rcu_torture_current);
+ p = rcu_dereference_check(rcu_torture_current,
+ rcu_read_lock_held() ||
+ rcu_read_lock_bh_held() ||
+ rcu_read_lock_sched_held() ||
+ srcu_read_lock_held(&srcu_ctl));
if (p == NULL) {
/* Leave because rcu_torture_writer is not yet underway */
cur_ops->readunlock(idx);
@@ -798,11 +853,15 @@ rcu_torture_reader(void *arg)
do {
if (irqreader && cur_ops->irq_capable) {
if (!timer_pending(&t))
- mod_timer(&t, 1);
+ mod_timer(&t, jiffies + 1);
}
idx = cur_ops->readlock();
completed = cur_ops->completed();
- p = rcu_dereference(rcu_torture_current);
+ p = rcu_dereference_check(rcu_torture_current,
+ rcu_read_lock_held() ||
+ rcu_read_lock_bh_held() ||
+ rcu_read_lock_sched_held() ||
+ srcu_read_lock_held(&srcu_ctl));
if (p == NULL) {
/* Wait for rcu_torture_writer to get underway */
cur_ops->readunlock(idx);
@@ -1030,10 +1089,11 @@ rcu_torture_print_module_parms(char *tag)
printk(KERN_ALERT "%s" TORTURE_FLAG
"--- %s: nreaders=%d nfakewriters=%d "
"stat_interval=%d verbose=%d test_no_idle_hz=%d "
- "shuffle_interval=%d stutter=%d irqreader=%d\n",
+ "shuffle_interval=%d stutter=%d irqreader=%d "
+ "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d\n",
torture_type, tag, nrealreaders, nfakewriters,
stat_interval, verbose, test_no_idle_hz, shuffle_interval,
- stutter, irqreader);
+ stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter);
}
static struct notifier_block rcutorture_nb = {
@@ -1109,6 +1169,12 @@ rcu_torture_cleanup(void)
}
stats_task = NULL;
+ if (fqs_task) {
+ VERBOSE_PRINTK_STRING("Stopping rcu_torture_fqs task");
+ kthread_stop(fqs_task);
+ }
+ fqs_task = NULL;
+
/* Wait for all RCU callbacks to fire. */
if (cur_ops->cb_barrier != NULL)
@@ -1154,6 +1220,11 @@ rcu_torture_init(void)
mutex_unlock(&fullstop_mutex);
return -EINVAL;
}
+ if (cur_ops->fqs == NULL && fqs_duration != 0) {
+ printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero "
+ "fqs_duration, fqs disabled.\n");
+ fqs_duration = 0;
+ }
if (cur_ops->init)
cur_ops->init(); /* no "goto unwind" prior to this point!!! */
@@ -1282,6 +1353,19 @@ rcu_torture_init(void)
goto unwind;
}
}
+ if (fqs_duration < 0)
+ fqs_duration = 0;
+ if (fqs_duration) {
+ /* Create the stutter thread */
+ fqs_task = kthread_run(rcu_torture_fqs, NULL,
+ "rcu_torture_fqs");
+ if (IS_ERR(fqs_task)) {
+ firsterr = PTR_ERR(fqs_task);
+ VERBOSE_PRINTK_ERRSTRING("Failed to create fqs");
+ fqs_task = NULL;
+ goto unwind;
+ }
+ }
register_reboot_notifier(&rcutorture_nb);
mutex_unlock(&fullstop_mutex);
return 0;
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 53ae9598f79..3ec8160fc75 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -46,7 +46,6 @@
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/time.h>
-#include <linux/kernel_stat.h>
#include "rcutree.h"
@@ -66,11 +65,11 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
.signaled = RCU_GP_IDLE, \
.gpnum = -300, \
.completed = -300, \
- .onofflock = __SPIN_LOCK_UNLOCKED(&name.onofflock), \
+ .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&name.onofflock), \
.orphan_cbs_list = NULL, \
.orphan_cbs_tail = &name.orphan_cbs_list, \
.orphan_qlen = 0, \
- .fqslock = __SPIN_LOCK_UNLOCKED(&name.fqslock), \
+ .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&name.fqslock), \
.n_force_qs = 0, \
.n_force_qs_ngp = 0, \
}
@@ -81,9 +80,6 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
-static int rcu_scheduler_active __read_mostly;
-
-
/*
* Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
* permit this function to be invoked without holding the root rcu_node
@@ -157,6 +153,24 @@ long rcu_batches_completed_bh(void)
EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
/*
+ * Force a quiescent state for RCU BH.
+ */
+void rcu_bh_force_quiescent_state(void)
+{
+ force_quiescent_state(&rcu_bh_state, 0);
+}
+EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
+
+/*
+ * Force a quiescent state for RCU-sched.
+ */
+void rcu_sched_force_quiescent_state(void)
+{
+ force_quiescent_state(&rcu_sched_state, 0);
+}
+EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
+
+/*
* Does the CPU have callbacks ready to be invoked?
*/
static int
@@ -439,10 +453,10 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
/* Only let one CPU complain about others per time interval. */
- spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave(&rnp->lock, flags);
delta = jiffies - rsp->jiffies_stall;
if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
@@ -452,13 +466,15 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
* due to CPU offlining.
*/
rcu_print_task_stall(rnp);
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
/* OK, time to rat on our buddy... */
printk(KERN_ERR "INFO: RCU detected CPU stalls:");
rcu_for_each_leaf_node(rsp, rnp) {
+ raw_spin_lock_irqsave(&rnp->lock, flags);
rcu_print_task_stall(rnp);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
if (rnp->qsmask == 0)
continue;
for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
@@ -469,6 +485,10 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
smp_processor_id(), (long)(jiffies - rsp->gp_start));
trigger_all_cpu_backtrace();
+ /* If so configured, complain about tasks blocking the grace period. */
+
+ rcu_print_detail_task_stall(rsp);
+
force_quiescent_state(rsp, 0); /* Kick them all. */
}
@@ -481,11 +501,11 @@ static void print_cpu_stall(struct rcu_state *rsp)
smp_processor_id(), jiffies - rsp->gp_start);
trigger_all_cpu_backtrace();
- spin_lock_irqsave(&rnp->lock, flags);
- if ((long)(jiffies - rsp->jiffies_stall) >= 0)
+ raw_spin_lock_irqsave(&rnp->lock, flags);
+ if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
rsp->jiffies_stall =
jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
set_need_resched(); /* kick ourselves to get things going. */
}
@@ -545,12 +565,12 @@ static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp)
local_irq_save(flags);
rnp = rdp->mynode;
if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */
- !spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */
+ !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
local_irq_restore(flags);
return;
}
__note_new_gpnum(rsp, rnp, rdp);
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
/*
@@ -609,12 +629,12 @@ rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
local_irq_save(flags);
rnp = rdp->mynode;
if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */
- !spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */
+ !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
local_irq_restore(flags);
return;
}
__rcu_process_gp_end(rsp, rnp, rdp);
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
/*
@@ -659,12 +679,14 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
struct rcu_data *rdp = rsp->rda[smp_processor_id()];
struct rcu_node *rnp = rcu_get_root(rsp);
- if (!cpu_needs_another_gp(rsp, rdp)) {
+ if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) {
+ if (cpu_needs_another_gp(rsp, rdp))
+ rsp->fqs_need_gp = 1;
if (rnp->completed == rsp->completed) {
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
- spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
/*
* Propagate new ->completed value to rcu_node structures
@@ -672,9 +694,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
* of the next grace period to process their callbacks.
*/
rcu_for_each_node_breadth_first(rsp, rnp) {
- spin_lock(&rnp->lock); /* irqs already disabled. */
+ raw_spin_lock(&rnp->lock); /* irqs already disabled. */
rnp->completed = rsp->completed;
- spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
}
local_irq_restore(flags);
return;
@@ -695,15 +717,15 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
rnp->completed = rsp->completed;
rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
rcu_start_gp_per_cpu(rsp, rnp, rdp);
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
- spin_unlock(&rnp->lock); /* leave irqs disabled. */
+ raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */
/* Exclude any concurrent CPU-hotplug operations. */
- spin_lock(&rsp->onofflock); /* irqs already disabled. */
+ raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
/*
* Set the quiescent-state-needed bits in all the rcu_node
@@ -723,21 +745,21 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
* irqs disabled.
*/
rcu_for_each_node_breadth_first(rsp, rnp) {
- spin_lock(&rnp->lock); /* irqs already disabled. */
+ raw_spin_lock(&rnp->lock); /* irqs already disabled. */
rcu_preempt_check_blocked_tasks(rnp);
rnp->qsmask = rnp->qsmaskinit;
rnp->gpnum = rsp->gpnum;
rnp->completed = rsp->completed;
if (rnp == rdp->mynode)
rcu_start_gp_per_cpu(rsp, rnp, rdp);
- spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
}
rnp = rcu_get_root(rsp);
- spin_lock(&rnp->lock); /* irqs already disabled. */
+ raw_spin_lock(&rnp->lock); /* irqs already disabled. */
rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */
- spin_unlock(&rnp->lock); /* irqs remain disabled. */
- spin_unlock_irqrestore(&rsp->onofflock, flags);
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
}
/*
@@ -776,14 +798,14 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
if (!(rnp->qsmask & mask)) {
/* Our bit has already been cleared, so done. */
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
rnp->qsmask &= ~mask;
if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
/* Other bits still set at this level, so done. */
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
mask = rnp->grpmask;
@@ -793,10 +815,10 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
break;
}
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
rnp_c = rnp;
rnp = rnp->parent;
- spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave(&rnp->lock, flags);
WARN_ON_ONCE(rnp_c->qsmask);
}
@@ -825,7 +847,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long las
struct rcu_node *rnp;
rnp = rdp->mynode;
- spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave(&rnp->lock, flags);
if (lastcomp != rnp->completed) {
/*
@@ -837,12 +859,12 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long las
* race occurred.
*/
rdp->passed_quiesc = 0; /* try again later! */
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
mask = rdp->grpmask;
if ((rnp->qsmask & mask) == 0) {
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
} else {
rdp->qs_pending = 0;
@@ -906,7 +928,7 @@ static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
if (rdp->nxtlist == NULL)
return; /* irqs disabled, so comparison is stable. */
- spin_lock(&rsp->onofflock); /* irqs already disabled. */
+ raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
*rsp->orphan_cbs_tail = rdp->nxtlist;
rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL];
rdp->nxtlist = NULL;
@@ -914,7 +936,7 @@ static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
rdp->nxttail[i] = &rdp->nxtlist;
rsp->orphan_qlen += rdp->qlen;
rdp->qlen = 0;
- spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
+ raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
}
/*
@@ -925,10 +947,10 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
unsigned long flags;
struct rcu_data *rdp;
- spin_lock_irqsave(&rsp->onofflock, flags);
+ raw_spin_lock_irqsave(&rsp->onofflock, flags);
rdp = rsp->rda[smp_processor_id()];
if (rsp->orphan_cbs_list == NULL) {
- spin_unlock_irqrestore(&rsp->onofflock, flags);
+ raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
return;
}
*rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list;
@@ -937,7 +959,7 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
rsp->orphan_cbs_list = NULL;
rsp->orphan_cbs_tail = &rsp->orphan_cbs_list;
rsp->orphan_qlen = 0;
- spin_unlock_irqrestore(&rsp->onofflock, flags);
+ raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
}
/*
@@ -953,23 +975,23 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
struct rcu_node *rnp;
/* Exclude any attempts to start a new grace period. */
- spin_lock_irqsave(&rsp->onofflock, flags);
+ raw_spin_lock_irqsave(&rsp->onofflock, flags);
/* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */
mask = rdp->grpmask; /* rnp->grplo is constant. */
do {
- spin_lock(&rnp->lock); /* irqs already disabled. */
+ raw_spin_lock(&rnp->lock); /* irqs already disabled. */
rnp->qsmaskinit &= ~mask;
if (rnp->qsmaskinit != 0) {
if (rnp != rdp->mynode)
- spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
break;
}
if (rnp == rdp->mynode)
need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
else
- spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
mask = rnp->grpmask;
rnp = rnp->parent;
} while (rnp != NULL);
@@ -980,12 +1002,12 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
* because invoking rcu_report_unblock_qs_rnp() with ->onofflock
* held leads to deadlock.
*/
- spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
+ raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
rnp = rdp->mynode;
if (need_report & RCU_OFL_TASKS_NORM_GP)
rcu_report_unblock_qs_rnp(rnp, flags);
else
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
if (need_report & RCU_OFL_TASKS_EXP_GP)
rcu_report_exp_rnp(rsp, rnp);
@@ -1144,11 +1166,9 @@ void rcu_check_callbacks(int cpu, int user)
/*
* Scan the leaf rcu_node structures, processing dyntick state for any that
* have not yet encountered a quiescent state, using the function specified.
- * Returns 1 if the current grace period ends while scanning (possibly
- * because we made it end).
+ * The caller must have suppressed start of new grace periods.
*/
-static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
- int (*f)(struct rcu_data *))
+static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
{
unsigned long bit;
int cpu;
@@ -1158,13 +1178,13 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
rcu_for_each_leaf_node(rsp, rnp) {
mask = 0;
- spin_lock_irqsave(&rnp->lock, flags);
- if (rnp->completed != lastcomp) {
- spin_unlock_irqrestore(&rnp->lock, flags);
- return 1;
+ raw_spin_lock_irqsave(&rnp->lock, flags);
+ if (!rcu_gp_in_progress(rsp)) {
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ return;
}
if (rnp->qsmask == 0) {
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
continue;
}
cpu = rnp->grplo;
@@ -1173,15 +1193,14 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu]))
mask |= bit;
}
- if (mask != 0 && rnp->completed == lastcomp) {
+ if (mask != 0) {
/* rcu_report_qs_rnp() releases rnp->lock. */
rcu_report_qs_rnp(mask, rsp, rnp, flags);
continue;
}
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
- return 0;
}
/*
@@ -1191,32 +1210,26 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
{
unsigned long flags;
- long lastcomp;
struct rcu_node *rnp = rcu_get_root(rsp);
- u8 signaled;
- u8 forcenow;
if (!rcu_gp_in_progress(rsp))
return; /* No grace period in progress, nothing to force. */
- if (!spin_trylock_irqsave(&rsp->fqslock, flags)) {
+ if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) {
rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */
return; /* Someone else is already on the job. */
}
- if (relaxed &&
- (long)(rsp->jiffies_force_qs - jiffies) >= 0)
- goto unlock_ret; /* no emergency and done recently. */
+ if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies))
+ goto unlock_fqs_ret; /* no emergency and done recently. */
rsp->n_force_qs++;
- spin_lock(&rnp->lock);
- lastcomp = rsp->gpnum - 1;
- signaled = rsp->signaled;
+ raw_spin_lock(&rnp->lock); /* irqs already disabled */
rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
if(!rcu_gp_in_progress(rsp)) {
rsp->n_force_qs_ngp++;
- spin_unlock(&rnp->lock);
- goto unlock_ret; /* no GP in progress, time updated. */
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
+ goto unlock_fqs_ret; /* no GP in progress, time updated. */
}
- spin_unlock(&rnp->lock);
- switch (signaled) {
+ rsp->fqs_active = 1;
+ switch (rsp->signaled) {
case RCU_GP_IDLE:
case RCU_GP_INIT:
@@ -1224,45 +1237,38 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
case RCU_SAVE_DYNTICK:
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK)
break; /* So gcc recognizes the dead code. */
/* Record dyntick-idle state. */
- if (rcu_process_dyntick(rsp, lastcomp,
- dyntick_save_progress_counter))
- goto unlock_ret;
- /* fall into next case. */
-
- case RCU_SAVE_COMPLETED:
-
- /* Update state, record completion counter. */
- forcenow = 0;
- spin_lock(&rnp->lock);
- if (lastcomp + 1 == rsp->gpnum &&
- lastcomp == rsp->completed &&
- rsp->signaled == signaled) {
+ force_qs_rnp(rsp, dyntick_save_progress_counter);
+ raw_spin_lock(&rnp->lock); /* irqs already disabled */
+ if (rcu_gp_in_progress(rsp))
rsp->signaled = RCU_FORCE_QS;
- rsp->completed_fqs = lastcomp;
- forcenow = signaled == RCU_SAVE_COMPLETED;
- }
- spin_unlock(&rnp->lock);
- if (!forcenow)
- break;
- /* fall into next case. */
+ break;
case RCU_FORCE_QS:
/* Check dyntick-idle state, send IPI to laggarts. */
- if (rcu_process_dyntick(rsp, rsp->completed_fqs,
- rcu_implicit_dynticks_qs))
- goto unlock_ret;
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
+ force_qs_rnp(rsp, rcu_implicit_dynticks_qs);
/* Leave state in case more forcing is required. */
+ raw_spin_lock(&rnp->lock); /* irqs already disabled */
break;
}
-unlock_ret:
- spin_unlock_irqrestore(&rsp->fqslock, flags);
+ rsp->fqs_active = 0;
+ if (rsp->fqs_need_gp) {
+ raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */
+ rsp->fqs_need_gp = 0;
+ rcu_start_gp(rsp, flags); /* releases rnp->lock */
+ return;
+ }
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
+unlock_fqs_ret:
+ raw_spin_unlock_irqrestore(&rsp->fqslock, flags);
}
#else /* #ifdef CONFIG_SMP */
@@ -1290,7 +1296,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
* If an RCU GP has gone long enough, go check for dyntick
* idle CPUs and, if needed, send resched IPIs.
*/
- if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)
+ if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
force_quiescent_state(rsp, 1);
/*
@@ -1304,7 +1310,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
/* Does this CPU require a not-yet-started grace period? */
if (cpu_needs_another_gp(rsp, rdp)) {
- spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags);
+ raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags);
rcu_start_gp(rsp, flags); /* releases above lock */
}
@@ -1335,6 +1341,9 @@ static void rcu_process_callbacks(struct softirq_action *unused)
* grace-period manipulations above.
*/
smp_mb(); /* See above block comment. */
+
+ /* If we are last CPU on way to dyntick-idle mode, accelerate it. */
+ rcu_needs_cpu_flush();
}
static void
@@ -1369,7 +1378,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
unsigned long nestflag;
struct rcu_node *rnp_root = rcu_get_root(rsp);
- spin_lock_irqsave(&rnp_root->lock, nestflag);
+ raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */
}
@@ -1387,7 +1396,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
force_quiescent_state(rsp, 0);
rdp->n_force_qs_snap = rsp->n_force_qs;
rdp->qlen_last_fqs_check = rdp->qlen;
- } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)
+ } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
force_quiescent_state(rsp, 1);
local_irq_restore(flags);
}
@@ -1520,7 +1529,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
/* Has an RCU GP gone long enough to send resched IPIs &c? */
if (rcu_gp_in_progress(rsp) &&
- ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) {
+ ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) {
rdp->n_rp_need_fqs++;
return 1;
}
@@ -1545,10 +1554,9 @@ static int rcu_pending(int cpu)
/*
* Check to see if any future RCU-related work will need to be done
* by the current CPU, even if none need be done immediately, returning
- * 1 if so. This function is part of the RCU implementation; it is -not-
- * an exported member of the RCU API.
+ * 1 if so.
*/
-int rcu_needs_cpu(int cpu)
+static int rcu_needs_cpu_quick_check(int cpu)
{
/* RCU callbacks either ready or pending? */
return per_cpu(rcu_sched_data, cpu).nxtlist ||
@@ -1556,21 +1564,6 @@ int rcu_needs_cpu(int cpu)
rcu_preempt_needs_cpu(cpu);
}
-/*
- * This function is invoked towards the end of the scheduler's initialization
- * process. Before this is called, the idle task might contain
- * RCU read-side critical sections (during which time, this idle
- * task is booting the system). After this function is called, the
- * idle tasks are prohibited from containing RCU read-side critical
- * sections.
- */
-void rcu_scheduler_starting(void)
-{
- WARN_ON(num_online_cpus() != 1);
- WARN_ON(nr_context_switches() > 0);
- rcu_scheduler_active = 1;
-}
-
static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
static atomic_t rcu_barrier_cpu_count;
static DEFINE_MUTEX(rcu_barrier_mutex);
@@ -1659,7 +1652,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
struct rcu_node *rnp = rcu_get_root(rsp);
/* Set up local state, ensuring consistent view of global state. */
- spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave(&rnp->lock, flags);
rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
rdp->nxtlist = NULL;
for (i = 0; i < RCU_NEXT_SIZE; i++)
@@ -1669,7 +1662,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
#endif /* #ifdef CONFIG_NO_HZ */
rdp->cpu = cpu;
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
/*
@@ -1687,7 +1680,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
struct rcu_node *rnp = rcu_get_root(rsp);
/* Set up local state, ensuring consistent view of global state. */
- spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave(&rnp->lock, flags);
rdp->passed_quiesc = 0; /* We could be racing with new GP, */
rdp->qs_pending = 1; /* so set up to respond to current GP. */
rdp->beenonline = 1; /* We have now been online. */
@@ -1695,7 +1688,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
rdp->qlen_last_fqs_check = 0;
rdp->n_force_qs_snap = rsp->n_force_qs;
rdp->blimit = blimit;
- spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
/*
* A new grace period might start here. If so, we won't be part
@@ -1703,14 +1696,14 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
*/
/* Exclude any attempts to start a new GP on large systems. */
- spin_lock(&rsp->onofflock); /* irqs already disabled. */
+ raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
/* Add CPU to rcu_node bitmasks. */
rnp = rdp->mynode;
mask = rdp->grpmask;
do {
/* Exclude any attempts to start a new GP on small systems. */
- spin_lock(&rnp->lock); /* irqs already disabled. */
+ raw_spin_lock(&rnp->lock); /* irqs already disabled. */
rnp->qsmaskinit |= mask;
mask = rnp->grpmask;
if (rnp == rdp->mynode) {
@@ -1718,11 +1711,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
rdp->completed = rnp->completed;
rdp->passed_quiesc_completed = rnp->completed - 1;
}
- spin_unlock(&rnp->lock); /* irqs already disabled. */
+ raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
rnp = rnp->parent;
} while (rnp != NULL && !(rnp->qsmaskinit & mask));
- spin_unlock_irqrestore(&rsp->onofflock, flags);
+ raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
}
static void __cpuinit rcu_online_cpu(int cpu)
@@ -1806,11 +1799,17 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
*/
static void __init rcu_init_one(struct rcu_state *rsp)
{
+ static char *buf[] = { "rcu_node_level_0",
+ "rcu_node_level_1",
+ "rcu_node_level_2",
+ "rcu_node_level_3" }; /* Match MAX_RCU_LVLS */
int cpustride = 1;
int i;
int j;
struct rcu_node *rnp;
+ BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */
+
/* Initialize the level-tracking arrays. */
for (i = 1; i < NUM_RCU_LVLS; i++)
@@ -1823,8 +1822,9 @@ static void __init rcu_init_one(struct rcu_state *rsp)
cpustride *= rsp->levelspread[i];
rnp = rsp->level[i];
for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
- spin_lock_init(&rnp->lock);
- lockdep_set_class(&rnp->lock, &rcu_node_class[i]);
+ raw_spin_lock_init(&rnp->lock);
+ lockdep_set_class_and_name(&rnp->lock,
+ &rcu_node_class[i], buf[i]);
rnp->gpnum = 0;
rnp->qsmask = 0;
rnp->qsmaskinit = 0;
@@ -1876,7 +1876,7 @@ do { \
void __init rcu_init(void)
{
- int i;
+ int cpu;
rcu_bootup_announce();
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
@@ -1896,8 +1896,8 @@ void __init rcu_init(void)
* or the scheduler are operational.
*/
cpu_notifier(rcu_cpu_notify, 0);
- for_each_online_cpu(i)
- rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)i);
+ for_each_online_cpu(cpu)
+ rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
}
#include "rcutree_plugin.h"
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index d2a0046f63b..1439eb504c2 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -90,12 +90,12 @@ struct rcu_dynticks {
* Definition for node within the RCU grace-period-detection hierarchy.
*/
struct rcu_node {
- spinlock_t lock; /* Root rcu_node's lock protects some */
+ raw_spinlock_t lock; /* Root rcu_node's lock protects some */
/* rcu_state fields as well as following. */
- long gpnum; /* Current grace period for this node. */
+ unsigned long gpnum; /* Current grace period for this node. */
/* This will either be equal to or one */
/* behind the root rcu_node's gpnum. */
- long completed; /* Last grace period completed for this node. */
+ unsigned long completed; /* Last GP completed for this node. */
/* This will either be equal to or one */
/* behind the root rcu_node's gpnum. */
unsigned long qsmask; /* CPUs or groups that need to switch in */
@@ -161,11 +161,11 @@ struct rcu_node {
/* Per-CPU data for read-copy update. */
struct rcu_data {
/* 1) quiescent-state and grace-period handling : */
- long completed; /* Track rsp->completed gp number */
+ unsigned long completed; /* Track rsp->completed gp number */
/* in order to detect GP end. */
- long gpnum; /* Highest gp number that this CPU */
+ unsigned long gpnum; /* Highest gp number that this CPU */
/* is aware of having started. */
- long passed_quiesc_completed;
+ unsigned long passed_quiesc_completed;
/* Value of completed at time of qs. */
bool passed_quiesc; /* User-mode/idle loop etc. */
bool qs_pending; /* Core waits for quiesc state. */
@@ -221,14 +221,14 @@ struct rcu_data {
unsigned long resched_ipi; /* Sent a resched IPI. */
/* 5) __rcu_pending() statistics. */
- long n_rcu_pending; /* rcu_pending() calls since boot. */
- long n_rp_qs_pending;
- long n_rp_cb_ready;
- long n_rp_cpu_needs_gp;
- long n_rp_gp_completed;
- long n_rp_gp_started;
- long n_rp_need_fqs;
- long n_rp_need_nothing;
+ unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */
+ unsigned long n_rp_qs_pending;
+ unsigned long n_rp_cb_ready;
+ unsigned long n_rp_cpu_needs_gp;
+ unsigned long n_rp_gp_completed;
+ unsigned long n_rp_gp_started;
+ unsigned long n_rp_need_fqs;
+ unsigned long n_rp_need_nothing;
int cpu;
};
@@ -237,12 +237,11 @@ struct rcu_data {
#define RCU_GP_IDLE 0 /* No grace period in progress. */
#define RCU_GP_INIT 1 /* Grace period being initialized. */
#define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */
-#define RCU_SAVE_COMPLETED 3 /* Need to save rsp->completed. */
-#define RCU_FORCE_QS 4 /* Need to force quiescent state. */
+#define RCU_FORCE_QS 3 /* Need to force quiescent state. */
#ifdef CONFIG_NO_HZ
#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
#else /* #ifdef CONFIG_NO_HZ */
-#define RCU_SIGNAL_INIT RCU_SAVE_COMPLETED
+#define RCU_SIGNAL_INIT RCU_FORCE_QS
#endif /* #else #ifdef CONFIG_NO_HZ */
#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
@@ -256,6 +255,9 @@ struct rcu_data {
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
+#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
+
/*
* RCU global state, including node hierarchy. This hierarchy is
* represented in "heap" form in a dense array. The root (first level)
@@ -277,12 +279,19 @@ struct rcu_state {
u8 signaled ____cacheline_internodealigned_in_smp;
/* Force QS state. */
- long gpnum; /* Current gp number. */
- long completed; /* # of last completed gp. */
+ u8 fqs_active; /* force_quiescent_state() */
+ /* is running. */
+ u8 fqs_need_gp; /* A CPU was prevented from */
+ /* starting a new grace */
+ /* period because */
+ /* force_quiescent_state() */
+ /* was running. */
+ unsigned long gpnum; /* Current gp number. */
+ unsigned long completed; /* # of last completed gp. */
/* End of fields guarded by root rcu_node's lock. */
- spinlock_t onofflock; /* exclude on/offline and */
+ raw_spinlock_t onofflock; /* exclude on/offline and */
/* starting new GP. Also */
/* protects the following */
/* orphan_cbs fields. */
@@ -292,10 +301,8 @@ struct rcu_state {
/* going offline. */
struct rcu_head **orphan_cbs_tail; /* And tail pointer. */
long orphan_qlen; /* Number of orphaned cbs. */
- spinlock_t fqslock; /* Only one task forcing */
+ raw_spinlock_t fqslock; /* Only one task forcing */
/* quiescent states. */
- long completed_fqs; /* Value of completed @ snap. */
- /* Protected by fqslock. */
unsigned long jiffies_force_qs; /* Time at which to invoke */
/* force_quiescent_state(). */
unsigned long n_force_qs; /* Number of calls to */
@@ -319,8 +326,6 @@ struct rcu_state {
#define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */
/* GP were moved to root. */
-#ifdef RCU_TREE_NONCORE
-
/*
* RCU implementation internal declarations:
*/
@@ -335,7 +340,7 @@ extern struct rcu_state rcu_preempt_state;
DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-#else /* #ifdef RCU_TREE_NONCORE */
+#ifndef RCU_TREE_NONCORE
/* Forward declarations for rcutree_plugin.h */
static void rcu_bootup_announce(void);
@@ -347,6 +352,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
unsigned long flags);
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+static void rcu_print_detail_task_stall(struct rcu_state *rsp);
static void rcu_print_task_stall(struct rcu_node *rnp);
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
@@ -367,5 +373,6 @@ static int rcu_preempt_needs_cpu(int cpu);
static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
static void rcu_preempt_send_cbs_to_orphanage(void);
static void __init __rcu_init_preempt(void);
+static void rcu_needs_cpu_flush(void);
-#endif /* #else #ifdef RCU_TREE_NONCORE */
+#endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 37fbccdf41d..464ad2cdee0 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -62,6 +62,15 @@ long rcu_batches_completed(void)
EXPORT_SYMBOL_GPL(rcu_batches_completed);
/*
+ * Force a quiescent state for preemptible RCU.
+ */
+void rcu_force_quiescent_state(void)
+{
+ force_quiescent_state(&rcu_preempt_state, 0);
+}
+EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
+
+/*
* Record a preemptable-RCU quiescent state for the specified CPU. Note
* that this just means that the task currently running on the CPU is
* not in a quiescent state. There might be any number of tasks blocked
@@ -102,7 +111,7 @@ static void rcu_preempt_note_context_switch(int cpu)
/* Possibly blocking in an RCU read-side critical section. */
rdp = rcu_preempt_state.rda[cpu];
rnp = rdp->mynode;
- spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave(&rnp->lock, flags);
t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
t->rcu_blocked_node = rnp;
@@ -123,7 +132,7 @@ static void rcu_preempt_note_context_switch(int cpu)
WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1;
list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
/*
@@ -180,7 +189,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
struct rcu_node *rnp_p;
if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
return; /* Still need more quiescent states! */
}
@@ -197,8 +206,8 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
/* Report up the rest of the hierarchy. */
mask = rnp->grpmask;
- spin_unlock(&rnp->lock); /* irqs remain disabled. */
- spin_lock(&rnp_p->lock); /* irqs already disabled. */
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */
rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
}
@@ -248,10 +257,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
*/
for (;;) {
rnp = t->rcu_blocked_node;
- spin_lock(&rnp->lock); /* irqs already disabled. */
+ raw_spin_lock(&rnp->lock); /* irqs already disabled. */
if (rnp == t->rcu_blocked_node)
break;
- spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
}
empty = !rcu_preempted_readers(rnp);
empty_exp = !rcu_preempted_readers_exp(rnp);
@@ -265,7 +274,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
* Note that rcu_report_unblock_qs_rnp() releases rnp->lock.
*/
if (empty)
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
else
rcu_report_unblock_qs_rnp(rnp, flags);
@@ -295,29 +304,73 @@ void __rcu_read_unlock(void)
if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 &&
unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
rcu_read_unlock_special(t);
+#ifdef CONFIG_PROVE_LOCKING
+ WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0);
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
}
EXPORT_SYMBOL_GPL(__rcu_read_unlock);
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+#ifdef CONFIG_RCU_CPU_STALL_VERBOSE
+
+/*
+ * Dump detailed information for all tasks blocking the current RCU
+ * grace period on the specified rcu_node structure.
+ */
+static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
+{
+ unsigned long flags;
+ struct list_head *lp;
+ int phase;
+ struct task_struct *t;
+
+ if (rcu_preempted_readers(rnp)) {
+ raw_spin_lock_irqsave(&rnp->lock, flags);
+ phase = rnp->gpnum & 0x1;
+ lp = &rnp->blocked_tasks[phase];
+ list_for_each_entry(t, lp, rcu_node_entry)
+ sched_show_task(t);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ }
+}
+
+/*
+ * Dump detailed information for all tasks blocking the current RCU
+ * grace period.
+ */
+static void rcu_print_detail_task_stall(struct rcu_state *rsp)
+{
+ struct rcu_node *rnp = rcu_get_root(rsp);
+
+ rcu_print_detail_task_stall_rnp(rnp);
+ rcu_for_each_leaf_node(rsp, rnp)
+ rcu_print_detail_task_stall_rnp(rnp);
+}
+
+#else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
+
+static void rcu_print_detail_task_stall(struct rcu_state *rsp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
+
/*
* Scan the current list of tasks blocked within RCU read-side critical
* sections, printing out the tid of each.
*/
static void rcu_print_task_stall(struct rcu_node *rnp)
{
- unsigned long flags;
struct list_head *lp;
int phase;
struct task_struct *t;
if (rcu_preempted_readers(rnp)) {
- spin_lock_irqsave(&rnp->lock, flags);
phase = rnp->gpnum & 0x1;
lp = &rnp->blocked_tasks[phase];
list_for_each_entry(t, lp, rcu_node_entry)
printk(" P%d", t->pid);
- spin_unlock_irqrestore(&rnp->lock, flags);
}
}
@@ -388,11 +441,11 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
lp_root = &rnp_root->blocked_tasks[i];
while (!list_empty(lp)) {
tp = list_entry(lp->next, typeof(*tp), rcu_node_entry);
- spin_lock(&rnp_root->lock); /* irqs already disabled */
+ raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
list_del(&tp->rcu_node_entry);
tp->rcu_blocked_node = rnp_root;
list_add(&tp->rcu_node_entry, lp_root);
- spin_unlock(&rnp_root->lock); /* irqs remain disabled */
+ raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */
}
}
return retval;
@@ -516,7 +569,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
unsigned long flags;
unsigned long mask;
- spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave(&rnp->lock, flags);
for (;;) {
if (!sync_rcu_preempt_exp_done(rnp))
break;
@@ -525,12 +578,12 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
break;
}
mask = rnp->grpmask;
- spin_unlock(&rnp->lock); /* irqs remain disabled */
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
rnp = rnp->parent;
- spin_lock(&rnp->lock); /* irqs already disabled */
+ raw_spin_lock(&rnp->lock); /* irqs already disabled */
rnp->expmask &= ~mask;
}
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
/*
@@ -545,11 +598,11 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
{
int must_wait;
- spin_lock(&rnp->lock); /* irqs already disabled */
+ raw_spin_lock(&rnp->lock); /* irqs already disabled */
list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]);
list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]);
must_wait = rcu_preempted_readers_exp(rnp);
- spin_unlock(&rnp->lock); /* irqs remain disabled */
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
if (!must_wait)
rcu_report_exp_rnp(rsp, rnp);
}
@@ -594,13 +647,13 @@ void synchronize_rcu_expedited(void)
/* force all RCU readers onto blocked_tasks[]. */
synchronize_sched_expedited();
- spin_lock_irqsave(&rsp->onofflock, flags);
+ raw_spin_lock_irqsave(&rsp->onofflock, flags);
/* Initialize ->expmask for all non-leaf rcu_node structures. */
rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
- spin_lock(&rnp->lock); /* irqs already disabled. */
+ raw_spin_lock(&rnp->lock); /* irqs already disabled. */
rnp->expmask = rnp->qsmaskinit;
- spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
}
/* Snapshot current state of ->blocked_tasks[] lists. */
@@ -609,7 +662,7 @@ void synchronize_rcu_expedited(void)
if (NUM_RCU_NODES > 1)
sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
- spin_unlock_irqrestore(&rsp->onofflock, flags);
+ raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
/* Wait for snapshotted ->blocked_tasks[] lists to drain. */
rnp = rcu_get_root(rsp);
@@ -713,6 +766,16 @@ long rcu_batches_completed(void)
EXPORT_SYMBOL_GPL(rcu_batches_completed);
/*
+ * Force a quiescent state for RCU, which, because there is no preemptible
+ * RCU, becomes the same as rcu-sched.
+ */
+void rcu_force_quiescent_state(void)
+{
+ rcu_sched_force_quiescent_state();
+}
+EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
+
+/*
* Because preemptable RCU does not exist, we never have to check for
* CPUs being in quiescent states.
*/
@@ -734,7 +797,7 @@ static int rcu_preempted_readers(struct rcu_node *rnp)
/* Because preemptible RCU does not exist, no quieting of tasks. */
static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
{
- spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
@@ -745,6 +808,14 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
* Because preemptable RCU does not exist, we never have to check for
* tasks blocked within RCU read-side critical sections.
*/
+static void rcu_print_detail_task_stall(struct rcu_state *rsp)
+{
+}
+
+/*
+ * Because preemptable RCU does not exist, we never have to check for
+ * tasks blocked within RCU read-side critical sections.
+ */
static void rcu_print_task_stall(struct rcu_node *rnp)
{
}
@@ -884,3 +955,113 @@ static void __init __rcu_init_preempt(void)
}
#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
+
+#if !defined(CONFIG_RCU_FAST_NO_HZ)
+
+/*
+ * Check to see if any future RCU-related work will need to be done
+ * by the current CPU, even if none need be done immediately, returning
+ * 1 if so. This function is part of the RCU implementation; it is -not-
+ * an exported member of the RCU API.
+ *
+ * Because we have preemptible RCU, just check whether this CPU needs
+ * any flavor of RCU. Do not chew up lots of CPU cycles with preemption
+ * disabled in a most-likely vain attempt to cause RCU not to need this CPU.
+ */
+int rcu_needs_cpu(int cpu)
+{
+ return rcu_needs_cpu_quick_check(cpu);
+}
+
+/*
+ * Check to see if we need to continue a callback-flush operations to
+ * allow the last CPU to enter dyntick-idle mode. But fast dyntick-idle
+ * entry is not configured, so we never do need to.
+ */
+static void rcu_needs_cpu_flush(void)
+{
+}
+
+#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
+
+#define RCU_NEEDS_CPU_FLUSHES 5
+static DEFINE_PER_CPU(int, rcu_dyntick_drain);
+static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
+
+/*
+ * Check to see if any future RCU-related work will need to be done
+ * by the current CPU, even if none need be done immediately, returning
+ * 1 if so. This function is part of the RCU implementation; it is -not-
+ * an exported member of the RCU API.
+ *
+ * Because we are not supporting preemptible RCU, attempt to accelerate
+ * any current grace periods so that RCU no longer needs this CPU, but
+ * only if all other CPUs are already in dynticks-idle mode. This will
+ * allow the CPU cores to be powered down immediately, as opposed to after
+ * waiting many milliseconds for grace periods to elapse.
+ *
+ * Because it is not legal to invoke rcu_process_callbacks() with irqs
+ * disabled, we do one pass of force_quiescent_state(), then do a
+ * raise_softirq() to cause rcu_process_callbacks() to be invoked later.
+ * The per-cpu rcu_dyntick_drain variable controls the sequencing.
+ */
+int rcu_needs_cpu(int cpu)
+{
+ int c = 0;
+ int thatcpu;
+
+ /* Don't bother unless we are the last non-dyntick-idle CPU. */
+ for_each_cpu_not(thatcpu, nohz_cpu_mask)
+ if (thatcpu != cpu) {
+ per_cpu(rcu_dyntick_drain, cpu) = 0;
+ per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
+ return rcu_needs_cpu_quick_check(cpu);
+ }
+
+ /* Check and update the rcu_dyntick_drain sequencing. */
+ if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
+ /* First time through, initialize the counter. */
+ per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES;
+ } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
+ /* We have hit the limit, so time to give up. */
+ per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
+ return rcu_needs_cpu_quick_check(cpu);
+ }
+
+ /* Do one step pushing remaining RCU callbacks through. */
+ if (per_cpu(rcu_sched_data, cpu).nxtlist) {
+ rcu_sched_qs(cpu);
+ force_quiescent_state(&rcu_sched_state, 0);
+ c = c || per_cpu(rcu_sched_data, cpu).nxtlist;
+ }
+ if (per_cpu(rcu_bh_data, cpu).nxtlist) {
+ rcu_bh_qs(cpu);
+ force_quiescent_state(&rcu_bh_state, 0);
+ c = c || per_cpu(rcu_bh_data, cpu).nxtlist;
+ }
+
+ /* If RCU callbacks are still pending, RCU still needs this CPU. */
+ if (c) {
+ raise_softirq(RCU_SOFTIRQ);
+ per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
+ }
+ return c;
+}
+
+/*
+ * Check to see if we need to continue a callback-flush operations to
+ * allow the last CPU to enter dyntick-idle mode.
+ */
+static void rcu_needs_cpu_flush(void)
+{
+ int cpu = smp_processor_id();
+ unsigned long flags;
+
+ if (per_cpu(rcu_dyntick_drain, cpu) <= 0)
+ return;
+ local_irq_save(flags);
+ (void)rcu_needs_cpu(cpu);
+ local_irq_restore(flags);
+}
+
+#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 9d2c88423b3..d45db2e35d2 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -50,7 +50,7 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
{
if (!rdp->beenonline)
return;
- seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d",
+ seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pqc=%lu qp=%d",
rdp->cpu,
cpu_is_offline(rdp->cpu) ? '!' : ' ',
rdp->completed, rdp->gpnum,
@@ -105,7 +105,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
{
if (!rdp->beenonline)
return;
- seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d",
+ seq_printf(m, "%d,%s,%lu,%lu,%d,%lu,%d",
rdp->cpu,
cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"",
rdp->completed, rdp->gpnum,
@@ -155,13 +155,13 @@ static const struct file_operations rcudata_csv_fops = {
static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
{
- long gpnum;
+ unsigned long gpnum;
int level = 0;
int phase;
struct rcu_node *rnp;
gpnum = rsp->gpnum;
- seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x "
+ seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x "
"nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld\n",
rsp->completed, gpnum, rsp->signaled,
(long)(rsp->jiffies_force_qs - jiffies),
@@ -215,12 +215,12 @@ static const struct file_operations rcuhier_fops = {
static int show_rcugp(struct seq_file *m, void *unused)
{
#ifdef CONFIG_TREE_PREEMPT_RCU
- seq_printf(m, "rcu_preempt: completed=%ld gpnum=%ld\n",
+ seq_printf(m, "rcu_preempt: completed=%ld gpnum=%lu\n",
rcu_preempt_state.completed, rcu_preempt_state.gpnum);
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
- seq_printf(m, "rcu_sched: completed=%ld gpnum=%ld\n",
+ seq_printf(m, "rcu_sched: completed=%ld gpnum=%lu\n",
rcu_sched_state.completed, rcu_sched_state.gpnum);
- seq_printf(m, "rcu_bh: completed=%ld gpnum=%ld\n",
+ seq_printf(m, "rcu_bh: completed=%ld gpnum=%lu\n",
rcu_bh_state.completed, rcu_bh_state.gpnum);
return 0;
}
diff --git a/kernel/sched.c b/kernel/sched.c
index 3a8fb30a91b..3218f521371 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -645,6 +645,11 @@ static inline int cpu_of(struct rq *rq)
#endif
}
+#define rcu_dereference_check_sched_domain(p) \
+ rcu_dereference_check((p), \
+ rcu_read_lock_sched_held() || \
+ lockdep_is_held(&sched_domains_mutex))
+
/*
* The domain tree (rq->sd) is protected by RCU's quiescent state transition.
* See detach_destroy_domains: synchronize_sched for details.
@@ -653,7 +658,7 @@ static inline int cpu_of(struct rq *rq)
* preempt-disabled sections.
*/
#define for_each_domain(cpu, __sd) \
- for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
+ for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
#define this_rq() (&__get_cpu_var(runqueues))
@@ -1531,7 +1536,7 @@ static unsigned long target_load(int cpu, int type)
static struct sched_group *group_of(int cpu)
{
- struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd);
+ struct sched_domain *sd = rcu_dereference_sched(cpu_rq(cpu)->sd);
if (!sd)
return NULL;
@@ -4888,7 +4893,7 @@ static void run_rebalance_domains(struct softirq_action *h)
static inline int on_null_domain(int cpu)
{
- return !rcu_dereference(cpu_rq(cpu)->sd);
+ return !rcu_dereference_sched(cpu_rq(cpu)->sd);
}
/*
diff --git a/kernel/srcu.c b/kernel/srcu.c
index 818d7d9aa03..bde4295774c 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -34,6 +34,30 @@
#include <linux/smp.h>
#include <linux/srcu.h>
+static int init_srcu_struct_fields(struct srcu_struct *sp)
+{
+ sp->completed = 0;
+ mutex_init(&sp->mutex);
+ sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
+ return sp->per_cpu_ref ? 0 : -ENOMEM;
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+int __init_srcu_struct(struct srcu_struct *sp, const char *name,
+ struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ /* Don't re-initialize a lock while it is held. */
+ debug_check_no_locks_freed((void *)sp, sizeof(*sp));
+ lockdep_init_map(&sp->dep_map, name, key, 0);
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+ return init_srcu_struct_fields(sp);
+}
+EXPORT_SYMBOL_GPL(__init_srcu_struct);
+
+#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
/**
* init_srcu_struct - initialize a sleep-RCU structure
* @sp: structure to initialize.
@@ -44,13 +68,12 @@
*/
int init_srcu_struct(struct srcu_struct *sp)
{
- sp->completed = 0;
- mutex_init(&sp->mutex);
- sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
- return (sp->per_cpu_ref ? 0 : -ENOMEM);
+ return init_srcu_struct_fields(sp);
}
EXPORT_SYMBOL_GPL(init_srcu_struct);
+#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
/*
* srcu_readers_active_idx -- returns approximate number of readers
* active on the specified rank of per-CPU counters.
@@ -100,15 +123,12 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
}
EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
-/**
- * srcu_read_lock - register a new reader for an SRCU-protected structure.
- * @sp: srcu_struct in which to register the new reader.
- *
+/*
* Counts the new reader in the appropriate per-CPU element of the
* srcu_struct. Must be called from process context.
* Returns an index that must be passed to the matching srcu_read_unlock().
*/
-int srcu_read_lock(struct srcu_struct *sp)
+int __srcu_read_lock(struct srcu_struct *sp)
{
int idx;
@@ -120,31 +140,27 @@ int srcu_read_lock(struct srcu_struct *sp)
preempt_enable();
return idx;
}
-EXPORT_SYMBOL_GPL(srcu_read_lock);
+EXPORT_SYMBOL_GPL(__srcu_read_lock);
-/**
- * srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
- * @sp: srcu_struct in which to unregister the old reader.
- * @idx: return value from corresponding srcu_read_lock().
- *
+/*
* Removes the count for the old reader from the appropriate per-CPU
* element of the srcu_struct. Note that this may well be a different
* CPU than that which was incremented by the corresponding srcu_read_lock().
* Must be called from process context.
*/
-void srcu_read_unlock(struct srcu_struct *sp, int idx)
+void __srcu_read_unlock(struct srcu_struct *sp, int idx)
{
preempt_disable();
srcu_barrier(); /* ensure compiler won't misorder critical section. */
per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--;
preempt_enable();
}
-EXPORT_SYMBOL_GPL(srcu_read_unlock);
+EXPORT_SYMBOL_GPL(__srcu_read_unlock);
/*
* Helper function for synchronize_srcu() and synchronize_srcu_expedited().
*/
-void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
+static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
{
int idx;