Merge tag 'pm-4.11-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management fixes from Rafael Wysocki:
"One of these is an intel_pstate regression fix and it is not a small
change, but it mostly removes code that shouldn't be there. That code
was acquired by mistake and has been a source of constant pain since
then, so the time has come to get rid of it finally. We have not seen
problems with this change in the lab, so fingers crossed.
The rest is more usual: one more intel_pstate commit removing useless
code, a cpufreq core fix to make it restore policy limits on CPU
online (which prevents the limits from being reset over system
suspend/resume), a schedutil cpufreq governor initialization fix to
make it actually work as advertised on all systems and an extra sanity
check in the cpuidle core to prevent crashes from happening if the
arch code messes things up.
Specifics:
- Make intel_pstate use one set of global P-state limits in the
active mode regardless of the scaling_governor settings for
individual CPUs instead of switching back and forth between two of
them in a way that is hard to control (Rafael Wysocki).
- Drop a useless function from intel_pstate to prevent it from
modifying the maximum supported frequency value unexpectedly which
may confuse the cpufreq core (Rafael Wysocki).
- Fix the cpufreq core to restore policy limits on CPU online so that
the limits are not reset over system suspend/resume, among other
things (Viresh Kumar).
- Fix the initialization of the schedutil cpufreq governor to make
the IO-wait boosting mechanism in it actually work on systems with
one CPU per cpufreq policy (Rafael Wysocki).
- Add a sanity check to the cpuidle core to prevent crashes from
happening if the architecture code initialization fails to set up
things as expected (Vaidyanathan Srinivasan)"
* tag 'pm-4.11-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
cpufreq: Restore policy min/max limits on CPU online
cpuidle: Validate cpu_dev in cpuidle_add_sysfs()
cpufreq: intel_pstate: Fix policy data management in passive mode
cpufreq: schedutil: Fix per-CPU structure initialization in sugov_start()
cpufreq: intel_pstate: One set of global limits in active mode
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index b8ff617..5dbdd26 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1184,6 +1184,9 @@
for_each_cpu(j, policy->related_cpus)
per_cpu(cpufreq_cpu_data, j) = policy;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
+ } else {
+ policy->min = policy->user_policy.min;
+ policy->max = policy->user_policy.max;
}
if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 08e134f..283491f 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -364,9 +364,7 @@
static bool acpi_ppc;
#endif
-static struct perf_limits performance_limits;
-static struct perf_limits powersave_limits;
-static struct perf_limits *limits;
+static struct perf_limits global;
static void intel_pstate_init_limits(struct perf_limits *limits)
{
@@ -377,14 +375,6 @@
limits->max_sysfs_pct = 100;
}
-static void intel_pstate_set_performance_limits(struct perf_limits *limits)
-{
- intel_pstate_init_limits(limits);
- limits->min_perf_pct = 100;
- limits->min_perf = int_ext_tofp(1);
- limits->min_sysfs_pct = 100;
-}
-
static DEFINE_MUTEX(intel_pstate_driver_lock);
static DEFINE_MUTEX(intel_pstate_limits_lock);
@@ -507,7 +497,7 @@
* correct max turbo frequency based on the turbo state.
* Also need to convert to MHz as _PSS freq is in MHz.
*/
- if (!limits->turbo_disabled)
+ if (!global.turbo_disabled)
cpu->acpi_perf_data.states[0].core_frequency =
policy->cpuinfo.max_freq / 1000;
cpu->valid_pss_table = true;
@@ -626,7 +616,7 @@
cpu = all_cpu_data[0];
rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
- limits->turbo_disabled =
+ global.turbo_disabled =
(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
}
@@ -851,7 +841,7 @@
static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
{
int min, hw_min, max, hw_max, cpu;
- struct perf_limits *perf_limits = limits;
+ struct perf_limits *perf_limits = &global;
u64 value, cap;
for_each_cpu(cpu, policy->cpus) {
@@ -863,19 +853,22 @@
rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
hw_min = HWP_LOWEST_PERF(cap);
- if (limits->no_turbo)
+ if (global.no_turbo)
hw_max = HWP_GUARANTEED_PERF(cap);
else
hw_max = HWP_HIGHEST_PERF(cap);
- min = fp_ext_toint(hw_max * perf_limits->min_perf);
+ max = fp_ext_toint(hw_max * perf_limits->max_perf);
+ if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
+ min = max;
+ else
+ min = fp_ext_toint(hw_max * perf_limits->min_perf);
rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
value &= ~HWP_MIN_PERF(~0L);
value |= HWP_MIN_PERF(min);
- max = fp_ext_toint(hw_max * perf_limits->max_perf);
value &= ~HWP_MAX_PERF(~0L);
value |= HWP_MAX_PERF(max);
@@ -968,20 +961,11 @@
}
static void intel_pstate_update_policies(void)
- __releases(&intel_pstate_limits_lock)
- __acquires(&intel_pstate_limits_lock)
{
- struct perf_limits *saved_limits = limits;
int cpu;
- mutex_unlock(&intel_pstate_limits_lock);
-
for_each_possible_cpu(cpu)
cpufreq_update_policy(cpu);
-
- mutex_lock(&intel_pstate_limits_lock);
-
- limits = saved_limits;
}
/************************** debugfs begin ************************/
@@ -1060,7 +1044,7 @@
static ssize_t show_##file_name \
(struct kobject *kobj, struct attribute *attr, char *buf) \
{ \
- return sprintf(buf, "%u\n", limits->object); \
+ return sprintf(buf, "%u\n", global.object); \
}
static ssize_t intel_pstate_show_status(char *buf);
@@ -1151,10 +1135,10 @@
}
update_turbo_state();
- if (limits->turbo_disabled)
- ret = sprintf(buf, "%u\n", limits->turbo_disabled);
+ if (global.turbo_disabled)
+ ret = sprintf(buf, "%u\n", global.turbo_disabled);
else
- ret = sprintf(buf, "%u\n", limits->no_turbo);
+ ret = sprintf(buf, "%u\n", global.no_turbo);
mutex_unlock(&intel_pstate_driver_lock);
@@ -1181,19 +1165,19 @@
mutex_lock(&intel_pstate_limits_lock);
update_turbo_state();
- if (limits->turbo_disabled) {
+ if (global.turbo_disabled) {
pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
mutex_unlock(&intel_pstate_limits_lock);
mutex_unlock(&intel_pstate_driver_lock);
return -EPERM;
}
- limits->no_turbo = clamp_t(int, input, 0, 1);
-
- intel_pstate_update_policies();
+ global.no_turbo = clamp_t(int, input, 0, 1);
mutex_unlock(&intel_pstate_limits_lock);
+ intel_pstate_update_policies();
+
mutex_unlock(&intel_pstate_driver_lock);
return count;
@@ -1218,19 +1202,16 @@
mutex_lock(&intel_pstate_limits_lock);
- limits->max_sysfs_pct = clamp_t(int, input, 0 , 100);
- limits->max_perf_pct = min(limits->max_policy_pct,
- limits->max_sysfs_pct);
- limits->max_perf_pct = max(limits->min_policy_pct,
- limits->max_perf_pct);
- limits->max_perf_pct = max(limits->min_perf_pct,
- limits->max_perf_pct);
- limits->max_perf = percent_ext_fp(limits->max_perf_pct);
-
- intel_pstate_update_policies();
+ global.max_sysfs_pct = clamp_t(int, input, 0 , 100);
+ global.max_perf_pct = min(global.max_policy_pct, global.max_sysfs_pct);
+ global.max_perf_pct = max(global.min_policy_pct, global.max_perf_pct);
+ global.max_perf_pct = max(global.min_perf_pct, global.max_perf_pct);
+ global.max_perf = percent_ext_fp(global.max_perf_pct);
mutex_unlock(&intel_pstate_limits_lock);
+ intel_pstate_update_policies();
+
mutex_unlock(&intel_pstate_driver_lock);
return count;
@@ -1255,19 +1236,16 @@
mutex_lock(&intel_pstate_limits_lock);
- limits->min_sysfs_pct = clamp_t(int, input, 0 , 100);
- limits->min_perf_pct = max(limits->min_policy_pct,
- limits->min_sysfs_pct);
- limits->min_perf_pct = min(limits->max_policy_pct,
- limits->min_perf_pct);
- limits->min_perf_pct = min(limits->max_perf_pct,
- limits->min_perf_pct);
- limits->min_perf = percent_ext_fp(limits->min_perf_pct);
-
- intel_pstate_update_policies();
+ global.min_sysfs_pct = clamp_t(int, input, 0 , 100);
+ global.min_perf_pct = max(global.min_policy_pct, global.min_sysfs_pct);
+ global.min_perf_pct = min(global.max_policy_pct, global.min_perf_pct);
+ global.min_perf_pct = min(global.max_perf_pct, global.min_perf_pct);
+ global.min_perf = percent_ext_fp(global.min_perf_pct);
mutex_unlock(&intel_pstate_limits_lock);
+ intel_pstate_update_policies();
+
mutex_unlock(&intel_pstate_driver_lock);
return count;
@@ -1387,7 +1365,7 @@
u32 vid;
val = (u64)pstate << 8;
- if (limits->no_turbo && !limits->turbo_disabled)
+ if (global.no_turbo && !global.turbo_disabled)
val |= (u64)1 << 32;
vid_fp = cpudata->vid.min + mul_fp(
@@ -1557,7 +1535,7 @@
u64 val;
val = (u64)pstate << 8;
- if (limits->no_turbo && !limits->turbo_disabled)
+ if (global.no_turbo && !global.turbo_disabled)
val |= (u64)1 << 32;
return val;
@@ -1683,9 +1661,9 @@
int max_perf = cpu->pstate.turbo_pstate;
int max_perf_adj;
int min_perf;
- struct perf_limits *perf_limits = limits;
+ struct perf_limits *perf_limits = &global;
- if (limits->no_turbo || limits->turbo_disabled)
+ if (global.no_turbo || global.turbo_disabled)
max_perf = cpu->pstate.max_pstate;
if (per_cpu_limits)
@@ -1820,7 +1798,7 @@
sample->busy_scaled = busy_frac * 100;
- target = limits->no_turbo || limits->turbo_disabled ?
+ target = global.no_turbo || global.turbo_disabled ?
cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
target += target >> 2;
target = mul_fp(target, busy_frac);
@@ -2116,7 +2094,7 @@
static int intel_pstate_set_policy(struct cpufreq_policy *policy)
{
struct cpudata *cpu;
- struct perf_limits *perf_limits = NULL;
+ struct perf_limits *perf_limits = &global;
if (!policy->cpuinfo.max_freq)
return -ENODEV;
@@ -2139,21 +2117,6 @@
mutex_lock(&intel_pstate_limits_lock);
- if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
- pr_debug("set performance\n");
- if (!perf_limits) {
- limits = &performance_limits;
- perf_limits = limits;
- }
- } else {
- pr_debug("set powersave\n");
- if (!perf_limits) {
- limits = &powersave_limits;
- perf_limits = limits;
- }
-
- }
-
intel_pstate_update_perf_limits(policy, perf_limits);
if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
@@ -2177,16 +2140,9 @@
static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
{
struct cpudata *cpu = all_cpu_data[policy->cpu];
- struct perf_limits *perf_limits;
-
- if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
- perf_limits = &performance_limits;
- else
- perf_limits = &powersave_limits;
update_turbo_state();
- policy->cpuinfo.max_freq = perf_limits->turbo_disabled ||
- perf_limits->no_turbo ?
+ policy->cpuinfo.max_freq = global.turbo_disabled || global.no_turbo ?
cpu->pstate.max_freq :
cpu->pstate.turbo_freq;
@@ -2201,9 +2157,9 @@
unsigned int max_freq, min_freq;
max_freq = policy->cpuinfo.max_freq *
- perf_limits->max_sysfs_pct / 100;
+ global.max_sysfs_pct / 100;
min_freq = policy->cpuinfo.max_freq *
- perf_limits->min_sysfs_pct / 100;
+ global.min_sysfs_pct / 100;
cpufreq_verify_within_limits(policy, min_freq, max_freq);
}
@@ -2255,7 +2211,7 @@
/* cpuinfo and default policy values */
policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
update_turbo_state();
- policy->cpuinfo.max_freq = limits->turbo_disabled ?
+ policy->cpuinfo.max_freq = global.turbo_disabled ?
cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
policy->cpuinfo.max_freq *= cpu->pstate.scaling;
@@ -2275,7 +2231,7 @@
return ret;
policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
- if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
+ if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE))
policy->policy = CPUFREQ_POLICY_PERFORMANCE;
else
policy->policy = CPUFREQ_POLICY_POWERSAVE;
@@ -2301,7 +2257,7 @@
struct cpudata *cpu = all_cpu_data[policy->cpu];
update_turbo_state();
- policy->cpuinfo.max_freq = limits->turbo_disabled ?
+ policy->cpuinfo.max_freq = global.no_turbo || global.turbo_disabled ?
cpu->pstate.max_freq : cpu->pstate.turbo_freq;
cpufreq_verify_within_cpu_limits(policy);
@@ -2309,26 +2265,6 @@
return 0;
}
-static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu,
- struct cpufreq_policy *policy,
- unsigned int target_freq)
-{
- unsigned int max_freq;
-
- update_turbo_state();
-
- max_freq = limits->no_turbo || limits->turbo_disabled ?
- cpu->pstate.max_freq : cpu->pstate.turbo_freq;
- policy->cpuinfo.max_freq = max_freq;
- if (policy->max > max_freq)
- policy->max = max_freq;
-
- if (target_freq > max_freq)
- target_freq = max_freq;
-
- return target_freq;
-}
-
static int intel_cpufreq_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
@@ -2337,8 +2273,10 @@
struct cpufreq_freqs freqs;
int target_pstate;
+ update_turbo_state();
+
freqs.old = policy->cur;
- freqs.new = intel_cpufreq_turbo_update(cpu, policy, target_freq);
+ freqs.new = target_freq;
cpufreq_freq_transition_begin(policy, &freqs);
switch (relation) {
@@ -2370,7 +2308,8 @@
struct cpudata *cpu = all_cpu_data[policy->cpu];
int target_pstate;
- target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq);
+ update_turbo_state();
+
target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
intel_pstate_update_pstate(cpu, target_pstate);
@@ -2425,13 +2364,7 @@
{
int ret;
- intel_pstate_init_limits(&powersave_limits);
- intel_pstate_set_performance_limits(&performance_limits);
- if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) &&
- intel_pstate_driver == &intel_pstate)
- limits = &performance_limits;
- else
- limits = &powersave_limits;
+ intel_pstate_init_limits(&global);
ret = cpufreq_register_driver(intel_pstate_driver);
if (ret) {
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index c5adc8c..ae948b1 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -615,6 +615,18 @@
struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
int error;
+ /*
+ * Return if cpu_device is not setup for this CPU.
+ *
+ * This could happen if the arch did not set up cpu_device
+ * since this CPU is not in cpu_present mask and the
+ * driver did not send a correct CPU mask during registration.
+ * Without this check we would end up passing bogus
+ * value for &cpu_dev->kobj in kobject_init_and_add()
+ */
+ if (!cpu_dev)
+ return -ENODEV;
+
kdev = kzalloc(sizeof(*kdev), GFP_KERNEL);
if (!kdev)
return -ENOMEM;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index cd7cd48..54c5775 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -584,20 +584,14 @@
for_each_cpu(cpu, policy->cpus) {
struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
+ memset(sg_cpu, 0, sizeof(*sg_cpu));
sg_cpu->sg_policy = sg_policy;
- if (policy_is_shared(policy)) {
- sg_cpu->util = 0;
- sg_cpu->max = 0;
- sg_cpu->flags = SCHED_CPUFREQ_RT;
- sg_cpu->last_update = 0;
- sg_cpu->iowait_boost = 0;
- sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
- cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
- sugov_update_shared);
- } else {
- cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
- sugov_update_single);
- }
+ sg_cpu->flags = SCHED_CPUFREQ_RT;
+ sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
+ cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
+ policy_is_shared(policy) ?
+ sugov_update_shared :
+ sugov_update_single);
}
return 0;
}