cpuidle: Single/Global registration of idle states

This patch makes the cpuidle_states structure global (single copy)
instead of per-cpu. The statistics needed on per-cpu basis
by the governor are kept per-cpu. This simplifies the cpuidle
subsystem as state registration is done by single cpu only.
Having single copy of cpuidle_states saves memory. Rare case
of asymmetric C-states can be handled within the cpuidle driver
and architectures such as POWER do not have asymmetric C-states.

Having single/global registration of all the idle states,
dynamic C-state transitions on x86 are handled by
the boot cpu. Here, the boot cpu  would disable all the devices,
re-populate the states and later enable all the devices,
irrespective of the cpu that would receive the notification first.

Reference:
https://lkml.org/lkml/2011/4/25/83

Signed-off-by: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
Signed-off-by: Trinabh Gupta <g.trinabh@gmail.com>
Tested-by: Jean Pihet <j-pihet@ti.com>
Reviewed-by: Kevin Hilman <khilman@ti.com>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Kevin Hilman <khilman@ti.com>
Signed-off-by: Len Brown <len.brown@intel.com>
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 7127e92..7a57b11 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -61,6 +61,7 @@
 int cpuidle_idle_call(void)
 {
 	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
+	struct cpuidle_driver *drv = cpuidle_get_driver();
 	struct cpuidle_state *target_state;
 	int next_state, entered_state;
 
@@ -84,18 +85,18 @@
 #endif
 
 	/* ask the governor for the next state */
-	next_state = cpuidle_curr_governor->select(dev);
+	next_state = cpuidle_curr_governor->select(drv, dev);
 	if (need_resched()) {
 		local_irq_enable();
 		return 0;
 	}
 
-	target_state = &dev->states[next_state];
+	target_state = &drv->states[next_state];
 
 	trace_power_start(POWER_CSTATE, next_state, dev->cpu);
 	trace_cpu_idle(next_state, dev->cpu);
 
-	entered_state = target_state->enter(dev, next_state);
+	entered_state = target_state->enter(dev, drv, next_state);
 
 	trace_power_end(dev->cpu);
 	trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu);
@@ -163,7 +164,8 @@
 EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock);
 
 #ifdef CONFIG_ARCH_HAS_CPU_RELAX
-static int poll_idle(struct cpuidle_device *dev, int index)
+static int poll_idle(struct cpuidle_device *dev,
+		struct cpuidle_driver *drv, int index)
 {
 	ktime_t	t1, t2;
 	s64 diff;
@@ -183,12 +185,9 @@
 	return index;
 }
 
-static void poll_idle_init(struct cpuidle_device *dev)
+static void poll_idle_init(struct cpuidle_driver *drv)
 {
-	struct cpuidle_state *state = &dev->states[0];
-	struct cpuidle_state_usage *state_usage = &dev->states_usage[0];
-
-	cpuidle_set_statedata(state_usage, NULL);
+	struct cpuidle_state *state = &drv->states[0];
 
 	snprintf(state->name, CPUIDLE_NAME_LEN, "POLL");
 	snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE");
@@ -199,7 +198,7 @@
 	state->enter = poll_idle;
 }
 #else
-static void poll_idle_init(struct cpuidle_device *dev) {}
+static void poll_idle_init(struct cpuidle_driver *drv) {}
 #endif /* CONFIG_ARCH_HAS_CPU_RELAX */
 
 /**
@@ -226,13 +225,13 @@
 			return ret;
 	}
 
-	poll_idle_init(dev);
+	poll_idle_init(cpuidle_get_driver());
 
 	if ((ret = cpuidle_add_state_sysfs(dev)))
 		return ret;
 
 	if (cpuidle_curr_governor->enable &&
-	    (ret = cpuidle_curr_governor->enable(dev)))
+	    (ret = cpuidle_curr_governor->enable(cpuidle_get_driver(), dev)))
 		goto fail_sysfs;
 
 	for (i = 0; i < dev->state_count; i++) {
@@ -273,7 +272,7 @@
 	dev->enabled = 0;
 
 	if (cpuidle_curr_governor->disable)
-		cpuidle_curr_governor->disable(dev);
+		cpuidle_curr_governor->disable(cpuidle_get_driver(), dev);
 
 	cpuidle_remove_state_sysfs(dev);
 	enabled_devices--;
@@ -301,26 +300,6 @@
 
 	init_completion(&dev->kobj_unregister);
 
-	/*
-	 * cpuidle driver should set the dev->power_specified bit
-	 * before registering the device if the driver provides
-	 * power_usage numbers.
-	 *
-	 * For those devices whose ->power_specified is not set,
-	 * we fill in power_usage with decreasing values as the
-	 * cpuidle code has an implicit assumption that state Cn
-	 * uses less power than C(n-1).
-	 *
-	 * With CONFIG_ARCH_HAS_CPU_RELAX, C0 is already assigned
-	 * an power value of -1.  So we use -2, -3, etc, for other
-	 * c-states.
-	 */
-	if (!dev->power_specified) {
-		int i;
-		for (i = CPUIDLE_DRIVER_STATE_START; i < dev->state_count; i++)
-			dev->states[i].power_usage = -1 - i;
-	}
-
 	per_cpu(cpuidle_devices, dev->cpu) = dev;
 	list_add(&dev->device_list, &cpuidle_detected_devices);
 	if ((ret = cpuidle_add_sysfs(sys_dev))) {
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 3f7e3ce..284d7af 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -17,6 +17,30 @@
 static struct cpuidle_driver *cpuidle_curr_driver;
 DEFINE_SPINLOCK(cpuidle_driver_lock);
 
+static void __cpuidle_register_driver(struct cpuidle_driver *drv)
+{
+	int i;
+	/*
+	 * cpuidle driver should set the drv->power_specified bit
+	 * before registering if the driver provides
+	 * power_usage numbers.
+	 *
+	 * If power_specified is not set,
+	 * we fill in power_usage with decreasing values as the
+	 * cpuidle code has an implicit assumption that state Cn
+	 * uses less power than C(n-1).
+	 *
+	 * With CONFIG_ARCH_HAS_CPU_RELAX, C0 is already assigned
+	 * an power value of -1.  So we use -2, -3, etc, for other
+	 * c-states.
+	 */
+	if (!drv->power_specified) {
+		for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++)
+			drv->states[i].power_usage = -1 - i;
+	}
+}
+
+
 /**
  * cpuidle_register_driver - registers a driver
  * @drv: the driver
@@ -34,6 +58,7 @@
 		spin_unlock(&cpuidle_driver_lock);
 		return -EBUSY;
 	}
+	__cpuidle_register_driver(drv);
 	cpuidle_curr_driver = drv;
 	spin_unlock(&cpuidle_driver_lock);
 
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index 6a686a7..ef6b9e4 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -60,9 +60,11 @@
 
 /**
  * ladder_select_state - selects the next state to enter
+ * @drv: cpuidle driver
  * @dev: the CPU
  */
-static int ladder_select_state(struct cpuidle_device *dev)
+static int ladder_select_state(struct cpuidle_driver *drv,
+				struct cpuidle_device *dev)
 {
 	struct ladder_device *ldev = &__get_cpu_var(ladder_devices);
 	struct ladder_device_state *last_state;
@@ -77,15 +79,17 @@
 
 	last_state = &ldev->states[last_idx];
 
-	if (dev->states[last_idx].flags & CPUIDLE_FLAG_TIME_VALID)
-		last_residency = cpuidle_get_last_residency(dev) - dev->states[last_idx].exit_latency;
+	if (drv->states[last_idx].flags & CPUIDLE_FLAG_TIME_VALID) {
+		last_residency = cpuidle_get_last_residency(dev) - \
+					 drv->states[last_idx].exit_latency;
+	}
 	else
 		last_residency = last_state->threshold.promotion_time + 1;
 
 	/* consider promotion */
-	if (last_idx < dev->state_count - 1 &&
+	if (last_idx < drv->state_count - 1 &&
 	    last_residency > last_state->threshold.promotion_time &&
-	    dev->states[last_idx + 1].exit_latency <= latency_req) {
+	    drv->states[last_idx + 1].exit_latency <= latency_req) {
 		last_state->stats.promotion_count++;
 		last_state->stats.demotion_count = 0;
 		if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
@@ -96,11 +100,11 @@
 
 	/* consider demotion */
 	if (last_idx > CPUIDLE_DRIVER_STATE_START &&
-	    dev->states[last_idx].exit_latency > latency_req) {
+	    drv->states[last_idx].exit_latency > latency_req) {
 		int i;
 
 		for (i = last_idx - 1; i > CPUIDLE_DRIVER_STATE_START; i--) {
-			if (dev->states[i].exit_latency <= latency_req)
+			if (drv->states[i].exit_latency <= latency_req)
 				break;
 		}
 		ladder_do_selection(ldev, last_idx, i);
@@ -123,9 +127,11 @@
 
 /**
  * ladder_enable_device - setup for the governor
+ * @drv: cpuidle driver
  * @dev: the CPU
  */
-static int ladder_enable_device(struct cpuidle_device *dev)
+static int ladder_enable_device(struct cpuidle_driver *drv,
+				struct cpuidle_device *dev)
 {
 	int i;
 	struct ladder_device *ldev = &per_cpu(ladder_devices, dev->cpu);
@@ -134,8 +140,8 @@
 
 	ldev->last_state_idx = CPUIDLE_DRIVER_STATE_START;
 
-	for (i = 0; i < dev->state_count; i++) {
-		state = &dev->states[i];
+	for (i = 0; i < drv->state_count; i++) {
+		state = &drv->states[i];
 		lstate = &ldev->states[i];
 
 		lstate->stats.promotion_count = 0;
@@ -144,7 +150,7 @@
 		lstate->threshold.promotion_count = PROMOTION_COUNT;
 		lstate->threshold.demotion_count = DEMOTION_COUNT;
 
-		if (i < dev->state_count - 1)
+		if (i < drv->state_count - 1)
 			lstate->threshold.promotion_time = state->exit_latency;
 		if (i > 0)
 			lstate->threshold.demotion_time = state->exit_latency;
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index af724e8..bcbe881 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -182,7 +182,7 @@
 
 static DEFINE_PER_CPU(struct menu_device, menu_devices);
 
-static void menu_update(struct cpuidle_device *dev);
+static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
 
 /* This implements DIV_ROUND_CLOSEST but avoids 64 bit division */
 static u64 div_round64(u64 dividend, u32 divisor)
@@ -228,9 +228,10 @@
 
 /**
  * menu_select - selects the next idle state to enter
+ * @drv: cpuidle driver containing state data
  * @dev: the CPU
  */
-static int menu_select(struct cpuidle_device *dev)
+static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
 	struct menu_device *data = &__get_cpu_var(menu_devices);
 	int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
@@ -240,7 +241,7 @@
 	struct timespec t;
 
 	if (data->needs_update) {
-		menu_update(dev);
+		menu_update(drv, dev);
 		data->needs_update = 0;
 	}
 
@@ -285,8 +286,8 @@
 	 * Find the idle state with the lowest power while satisfying
 	 * our constraints.
 	 */
-	for (i = CPUIDLE_DRIVER_STATE_START; i < dev->state_count; i++) {
-		struct cpuidle_state *s = &dev->states[i];
+	for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
+		struct cpuidle_state *s = &drv->states[i];
 
 		if (s->target_residency > data->predicted_us)
 			continue;
@@ -323,14 +324,15 @@
 
 /**
  * menu_update - attempts to guess what happened after entry
+ * @drv: cpuidle driver containing state data
  * @dev: the CPU
  */
-static void menu_update(struct cpuidle_device *dev)
+static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
 	struct menu_device *data = &__get_cpu_var(menu_devices);
 	int last_idx = data->last_state_idx;
 	unsigned int last_idle_us = cpuidle_get_last_residency(dev);
-	struct cpuidle_state *target = &dev->states[last_idx];
+	struct cpuidle_state *target = &drv->states[last_idx];
 	unsigned int measured_us;
 	u64 new_factor;
 
@@ -384,9 +386,11 @@
 
 /**
  * menu_enable_device - scans a CPU's states and does setup
+ * @drv: cpuidle driver
  * @dev: the CPU
  */
-static int menu_enable_device(struct cpuidle_device *dev)
+static int menu_enable_device(struct cpuidle_driver *drv,
+				struct cpuidle_device *dev)
 {
 	struct menu_device *data = &per_cpu(menu_devices, dev->cpu);
 
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 8a1ace1..1e756e1 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -322,13 +322,14 @@
 {
 	int i, ret = -ENOMEM;
 	struct cpuidle_state_kobj *kobj;
+	struct cpuidle_driver *drv = cpuidle_get_driver();
 
 	/* state statistics */
 	for (i = 0; i < device->state_count; i++) {
 		kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL);
 		if (!kobj)
 			goto error_state;
-		kobj->state = &device->states[i];
+		kobj->state = &drv->states[i];
 		kobj->state_usage = &device->states_usage[i];
 		init_completion(&kobj->kobj_unregister);