aboutsummaryrefslogtreecommitdiff
path: root/drivers/thermal
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/thermal')
-rw-r--r--drivers/thermal/Kconfig48
-rw-r--r--drivers/thermal/Makefile5
-rw-r--r--drivers/thermal/clock_cooling.c485
-rw-r--r--drivers/thermal/cpu_cooling.c917
-rw-r--r--drivers/thermal/db8500_cpufreq_cooling.c5
-rw-r--r--drivers/thermal/db8500_thermal.c2
-rw-r--r--drivers/thermal/fair_share.c39
-rw-r--r--drivers/thermal/imx_thermal.c8
-rw-r--r--drivers/thermal/int340x_thermal/int3403_thermal.c1
-rw-r--r--drivers/thermal/of-thermal.c192
-rw-r--r--drivers/thermal/power_allocator.c539
-rw-r--r--drivers/thermal/samsung/Kconfig2
-rw-r--r--drivers/thermal/samsung/exynos_thermal_common.c10
-rw-r--r--drivers/thermal/samsung/exynos_tmu.c5
-rw-r--r--drivers/thermal/tegra_soctherm.c476
-rw-r--r--drivers/thermal/thermal_core.c355
-rw-r--r--drivers/thermal/thermal_core.h29
-rw-r--r--drivers/thermal/ti-soc-thermal/ti-thermal-common.c17
18 files changed, 2778 insertions, 357 deletions
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index f554d25b4399..8b7d47f2f3aa 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -42,6 +42,17 @@ config THERMAL_OF
Say 'Y' here if you need to build thermal infrastructure
based on device tree.
+config THERMAL_WRITABLE_TRIPS
+ bool "Enable writable trip points"
+ help
+ This option allows the system integrator to choose whether
+ trip temperatures can be changed from userspace. The
+ writable trips need to be specified when setting up the
+ thermal zone but the choice here takes precedence.
+
+ Say 'Y' here if you would like to allow userspace tools to
+ change trip temperatures.
+
choice
prompt "Default Thermal governor"
default THERMAL_DEFAULT_GOV_STEP_WISE
@@ -71,6 +82,14 @@ config THERMAL_DEFAULT_GOV_USER_SPACE
Select this if you want to let the user space manage the
platform thermals.
+config THERMAL_DEFAULT_GOV_POWER_ALLOCATOR
+ bool "power_allocator"
+ select THERMAL_GOV_POWER_ALLOCATOR
+ help
+ Select this if you want to control temperature based on
+ system and device power allocation. This governor can only
+ operate on cooling devices that implement the power API.
+
endchoice
config THERMAL_GOV_FAIR_SHARE
@@ -99,6 +118,13 @@ config THERMAL_GOV_USER_SPACE
help
Enable this to let the user space manage the platform thermals.
+config THERMAL_GOV_POWER_ALLOCATOR
+ bool "Power allocator thermal governor"
+ select THERMAL_POWER_ACTOR
+ help
+ Enable this to manage platform thermals by dynamically
+ allocating and limiting power to devices.
+
config CPU_THERMAL
bool "generic cpu cooling support"
depends on CPU_FREQ
@@ -112,6 +138,18 @@ config CPU_THERMAL
If you want this support, you should say Y here.
+config CLOCK_THERMAL
+ bool "Generic clock cooling support"
+ depends on COMMON_CLK
+ depends on PM_OPP
+ help
+ This entry implements the generic clock cooling mechanism through
+ frequency clipping. Typically used to cool off co-processors. The
+ device that is configured to use this cooling mechanism will be
+ controlled to reduce clock frequency whenever temperature is high.
+
+ If you want this support, you should say Y here.
+
config THERMAL_EMULATION
bool "Thermal emulation mode support"
help
@@ -185,6 +223,16 @@ config ARMADA_THERMAL
Enable this option if you want to have support for thermal management
controller present in Armada 370 and Armada XP SoC.
+config TEGRA_SOCTHERM
+ tristate "Tegra SOCTHERM thermal management"
+ depends on ARCH_TEGRA
+ help
+ Enable this option for integrated thermal management support on NVIDIA
+ Tegra124 systems-on-chip. The driver supports four thermal zones
+ (CPU, GPU, MEM, PLLX). Cooling devices can be bound to the thermal
+ zones to manage temperatures. This option is also required for the
+ emergency thermal reset (thermtrip) feature to function.
+
config DB8500_CPUFREQ_COOLING
tristate "DB8500 cpufreq cooling"
depends on ARCH_U8500
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 39c4fe87da2f..dae37c089bb4 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -14,10 +14,14 @@ thermal_sys-$(CONFIG_THERMAL_GOV_FAIR_SHARE) += fair_share.o
thermal_sys-$(CONFIG_THERMAL_GOV_BANG_BANG) += gov_bang_bang.o
thermal_sys-$(CONFIG_THERMAL_GOV_STEP_WISE) += step_wise.o
thermal_sys-$(CONFIG_THERMAL_GOV_USER_SPACE) += user_space.o
+thermal_sys-$(CONFIG_THERMAL_GOV_POWER_ALLOCATOR) += power_allocator.o
# cpufreq cooling
thermal_sys-$(CONFIG_CPU_THERMAL) += cpu_cooling.o
+# clock cooling
+thermal_sys-$(CONFIG_CLOCK_THERMAL) += clock_cooling.o
+
# platform thermal drivers
obj-$(CONFIG_SPEAR_THERMAL) += spear_thermal.o
obj-$(CONFIG_RCAR_THERMAL) += rcar_thermal.o
@@ -34,3 +38,4 @@ obj-$(CONFIG_INTEL_SOC_DTS_THERMAL) += intel_soc_dts_thermal.o
obj-$(CONFIG_TI_SOC_THERMAL) += ti-soc-thermal/
obj-$(CONFIG_INT340X_THERMAL) += int340x_thermal/
obj-$(CONFIG_ST_THERMAL) += st/
+obj-$(CONFIG_TEGRA_SOCTHERM) += tegra_soctherm.o
diff --git a/drivers/thermal/clock_cooling.c b/drivers/thermal/clock_cooling.c
new file mode 100644
index 000000000000..1b4ff0f4c716
--- /dev/null
+++ b/drivers/thermal/clock_cooling.c
@@ -0,0 +1,485 @@
+/*
+ * drivers/thermal/clock_cooling.c
+ *
+ * Copyright (C) 2014 Eduardo Valentin <edubezval@gmail.com>
+ *
+ * Copyright (C) 2013 Texas Instruments Inc.
+ * Contact: Eduardo Valentin <eduardo.valentin@ti.com>
+ *
+ * Highly based on cpu_cooling.c.
+ * Copyright (C) 2012 Samsung Electronics Co., Ltd(http://www.samsung.com)
+ * Copyright (C) 2012 Amit Daniel <amit.kachhap@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/clk.h>
+#include <linux/cpufreq.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/mutex.h>
+#include <linux/pm_opp.h>
+#include <linux/slab.h>
+#include <linux/thermal.h>
+#include <linux/clock_cooling.h>
+
+/**
+ * struct clock_cooling_device - data for cooling device with clock
+ * @id: unique integer value corresponding to each clock_cooling_device
+ * registered.
+ * @dev: struct device pointer to the device being used to cool off using
+ * clock frequencies.
+ * @cdev: thermal_cooling_device pointer to keep track of the
+ * registered cooling device.
+ * @clk_rate_change_nb: reference to notifier block used to receive clock
+ * rate changes.
+ * @freq_table: frequency table used to keep track of available frequencies.
+ * @clock_state: integer value representing the current state of clock
+ * cooling devices.
+ * @clock_val: integer value representing the absolute value of the clipped
+ * frequency.
+ * @clk: struct clk reference used to enforce clock limits.
+ * @lock: mutex lock to protect this struct.
+ *
+ * This structure is required for keeping information of each
+ * clock_cooling_device registered. In order to prevent corruption of this a
+ * mutex @lock is used.
+ */
+struct clock_cooling_device {
+ int id;
+ struct device *dev;
+ struct thermal_cooling_device *cdev;
+ struct notifier_block clk_rate_change_nb;
+ struct cpufreq_frequency_table *freq_table;
+ unsigned long clock_state;
+ unsigned long clock_val;
+ struct clk *clk;
+ struct mutex lock; /* lock to protect the content of this struct */
+};
+#define to_clock_cooling_device(x) \
+ container_of(x, struct clock_cooling_device, clk_rate_change_nb)
+static DEFINE_IDR(clock_idr);
+static DEFINE_MUTEX(cooling_clock_lock);
+
+/**
+ * clock_cooling_get_idr - function to get an unique id.
+ * @id: int * value generated by this function.
+ *
+ * This function will populate @id with an unique
+ * id, using the idr API.
+ *
+ * Return: 0 on success, an error code on failure.
+ */
+static int clock_cooling_get_idr(int *id)
+{
+ int ret;
+
+ mutex_lock(&cooling_clock_lock);
+ ret = idr_alloc(&clock_idr, NULL, 0, 0, GFP_KERNEL);
+ mutex_unlock(&cooling_clock_lock);
+ if (unlikely(ret < 0))
+ return ret;
+ *id = ret;
+
+ return 0;
+}
+
+/**
+ * release_idr - function to free the unique id.
+ * @id: int value representing the unique id.
+ */
+static void release_idr(int id)
+{
+ mutex_lock(&cooling_clock_lock);
+ idr_remove(&clock_idr, id);
+ mutex_unlock(&cooling_clock_lock);
+}
+
+/* Below code defines functions to be used for clock as cooling device */
+
+enum clock_cooling_property {
+ GET_LEVEL,
+ GET_FREQ,
+ GET_MAXL,
+};
+
+/**
+ * clock_cooling_get_property - fetch a property of interest for a give cpu.
+ * @ccdev: clock cooling device reference
+ * @input: query parameter
+ * @output: query return
+ * @property: type of query (frequency, level, max level)
+ *
+ * This is the common function to
+ * 1. get maximum clock cooling states
+ * 2. translate frequency to cooling state
+ * 3. translate cooling state to frequency
+ * Note that the code may be not in good shape
+ * but it is written in this way in order to:
+ * a) reduce duplicate code as most of the code can be shared.
+ * b) make sure the logic is consistent when translating between
+ * cooling states and frequencies.
+ *
+ * Return: 0 on success, -EINVAL when invalid parameters are passed.
+ */
+static int clock_cooling_get_property(struct clock_cooling_device *ccdev,
+ unsigned long input,
+ unsigned long *output,
+ enum clock_cooling_property property)
+{
+ int i;
+ unsigned long max_level = 0, level = 0;
+ unsigned int freq = CPUFREQ_ENTRY_INVALID;
+ int descend = -1;
+ struct cpufreq_frequency_table *pos, *table = ccdev->freq_table;
+
+ if (!output)
+ return -EINVAL;
+
+ if (!table)
+ return -EINVAL;
+
+ cpufreq_for_each_valid_entry(pos, table) {
+ /* ignore duplicate entry */
+ if (freq == pos->frequency)
+ continue;
+
+ /* get the frequency order */
+ if (freq != CPUFREQ_ENTRY_INVALID && descend == -1)
+ descend = freq > pos->frequency;
+
+ freq = pos->frequency;
+ max_level++;
+ }
+
+ /* No valid cpu frequency entry */
+ if (max_level == 0)
+ return -EINVAL;
+
+ /* max_level is an index, not a counter */
+ max_level--;
+
+ /* get max level */
+ if (property == GET_MAXL) {
+ *output = max_level;
+ return 0;
+ }
+
+ if (property == GET_FREQ)
+ level = descend ? input : (max_level - input);
+
+ i = 0;
+ cpufreq_for_each_valid_entry(pos, table) {
+ /* ignore duplicate entry */
+ if (freq == pos->frequency)
+ continue;
+
+ /* now we have a valid frequency entry */
+ freq = pos->frequency;
+
+ if (property == GET_LEVEL && (unsigned int)input == freq) {
+ /* get level by frequency */
+ *output = descend ? i : (max_level - i);
+ return 0;
+ }
+ if (property == GET_FREQ && level == i) {
+ /* get frequency by level */
+ *output = freq;
+ return 0;
+ }
+ i++;
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * clock_cooling_get_level - return the cooling level of given clock cooling.
+ * @cdev: reference of a thermal cooling device of used as clock cooling device
+ * @freq: the frequency of interest
+ *
+ * This function will match the cooling level corresponding to the
+ * requested @freq and return it.
+ *
+ * Return: The matched cooling level on success or THERMAL_CSTATE_INVALID
+ * otherwise.
+ */
+unsigned long clock_cooling_get_level(struct thermal_cooling_device *cdev,
+ unsigned long freq)
+{
+ struct clock_cooling_device *ccdev = cdev->devdata;
+ unsigned long val;
+
+ if (clock_cooling_get_property(ccdev, (unsigned long)freq, &val,
+ GET_LEVEL))
+ return THERMAL_CSTATE_INVALID;
+
+ return val;
+}
+EXPORT_SYMBOL_GPL(clock_cooling_get_level);
+
+/**
+ * clock_cooling_get_frequency - get the absolute value of frequency from level.
+ * @ccdev: clock cooling device reference
+ * @level: cooling level
+ *
+ * This function matches cooling level with frequency. Based on a cooling level
+ * of frequency, equals cooling state of cpu cooling device, it will return
+ * the corresponding frequency.
+ * e.g level=0 --> 1st MAX FREQ, level=1 ---> 2nd MAX FREQ, .... etc
+ *
+ * Return: 0 on error, the corresponding frequency otherwise.
+ */
+static unsigned long
+clock_cooling_get_frequency(struct clock_cooling_device *ccdev,
+ unsigned long level)
+{
+ int ret = 0;
+ unsigned long freq;
+
+ ret = clock_cooling_get_property(ccdev, level, &freq, GET_FREQ);
+ if (ret)
+ return 0;
+
+ return freq;
+}
+
+/**
+ * clock_cooling_apply - function to apply frequency clipping.
+ * @ccdev: clock_cooling_device pointer containing frequency clipping data.
+ * @cooling_state: value of the cooling state.
+ *
+ * Function used to make sure the clock layer is aware of current thermal
+ * limits. The limits are applied by updating the clock rate in case it is
+ * higher than the corresponding frequency based on the requested cooling_state.
+ *
+ * Return: 0 on success, an error code otherwise (-EINVAL in case wrong
+ * cooling state).
+ */
+static int clock_cooling_apply(struct clock_cooling_device *ccdev,
+ unsigned long cooling_state)
+{
+ unsigned long clip_freq, cur_freq;
+ int ret = 0;
+
+ /* Here we write the clipping */
+ /* Check if the old cooling action is same as new cooling action */
+ if (ccdev->clock_state == cooling_state)
+ return 0;
+
+ clip_freq = clock_cooling_get_frequency(ccdev, cooling_state);
+ if (!clip_freq)
+ return -EINVAL;
+
+ cur_freq = clk_get_rate(ccdev->clk);
+
+ mutex_lock(&ccdev->lock);
+ ccdev->clock_state = cooling_state;
+ ccdev->clock_val = clip_freq;
+ /* enforce clock level */
+ if (cur_freq > clip_freq)
+ ret = clk_set_rate(ccdev->clk, clip_freq);
+ mutex_unlock(&ccdev->lock);
+
+ return ret;
+}
+
+/**
+ * clock_cooling_clock_notifier - notifier callback on clock rate changes.
+ * @nb: struct notifier_block * with callback info.
+ * @event: value showing clock event for which this function invoked.
+ * @data: callback-specific data
+ *
+ * Callback to hijack the notification on clock transition.
+ * Every time there is a clock change, we intercept all pre change events
+ * and block the transition in case the new rate infringes thermal limits.
+ *
+ * Return: NOTIFY_DONE (success) or NOTIFY_BAD (new_rate > thermal limit).
+ */
+static int clock_cooling_clock_notifier(struct notifier_block *nb,
+ unsigned long event, void *data)
+{
+ struct clk_notifier_data *ndata = data;
+ struct clock_cooling_device *ccdev = to_clock_cooling_device(nb);
+
+ switch (event) {
+ case PRE_RATE_CHANGE:
+ /*
+ * checks on current state
+ * TODO: current method is not best we can find as it
+ * allows possibly voltage transitions, in case DVFS
+ * layer is also hijacking clock pre notifications.
+ */
+ if (ndata->new_rate > ccdev->clock_val)
+ return NOTIFY_BAD;
+ /* fall through */
+ case POST_RATE_CHANGE:
+ case ABORT_RATE_CHANGE:
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+/* clock cooling device thermal callback functions are defined below */
+
+/**
+ * clock_cooling_get_max_state - callback function to get the max cooling state.
+ * @cdev: thermal cooling device pointer.
+ * @state: fill this variable with the max cooling state.
+ *
+ * Callback for the thermal cooling device to return the clock
+ * max cooling state.
+ *
+ * Return: 0 on success, an error code otherwise.
+ */
+static int clock_cooling_get_max_state(struct thermal_cooling_device *cdev,
+ unsigned long *state)
+{
+ struct clock_cooling_device *ccdev = cdev->devdata;
+ unsigned long count = 0;
+ int ret;
+
+ ret = clock_cooling_get_property(ccdev, 0, &count, GET_MAXL);
+ if (!ret)
+ *state = count;
+
+ return ret;
+}
+
+/**
+ * clock_cooling_get_cur_state - function to get the current cooling state.
+ * @cdev: thermal cooling device pointer.
+ * @state: fill this variable with the current cooling state.
+ *
+ * Callback for the thermal cooling device to return the clock
+ * current cooling state.
+ *
+ * Return: 0 (success)
+ */
+static int clock_cooling_get_cur_state(struct thermal_cooling_device *cdev,
+ unsigned long *state)
+{
+ struct clock_cooling_device *ccdev = cdev->devdata;
+
+ *state = ccdev->clock_state;
+
+ return 0;
+}
+
+/**
+ * clock_cooling_set_cur_state - function to set the current cooling state.
+ * @cdev: thermal cooling device pointer.
+ * @state: set this variable to the current cooling state.
+ *
+ * Callback for the thermal cooling device to change the clock cooling
+ * current cooling state.
+ *
+ * Return: 0 on success, an error code otherwise.
+ */
+static int clock_cooling_set_cur_state(struct thermal_cooling_device *cdev,
+ unsigned long state)
+{
+ struct clock_cooling_device *clock_device = cdev->devdata;
+
+ return clock_cooling_apply(clock_device, state);
+}
+
+/* Bind clock callbacks to thermal cooling device ops */
+static struct thermal_cooling_device_ops const clock_cooling_ops = {
+ .get_max_state = clock_cooling_get_max_state,
+ .get_cur_state = clock_cooling_get_cur_state,
+ .set_cur_state = clock_cooling_set_cur_state,
+};
+
+/**
+ * clock_cooling_register - function to create clock cooling device.
+ * @dev: struct device pointer to the device used as clock cooling device.
+ * @clock_name: string containing the clock used as cooling mechanism.
+ *
+ * This interface function registers the clock cooling device with the name
+ * "thermal-clock-%x". The cooling device is based on clock frequencies.
+ * The struct device is assumed to be capable of DVFS transitions.
+ * The OPP layer is used to fetch and fill the available frequencies for
+ * the referred device. The ordered frequency table is used to control
+ * the clock cooling device cooling states and to limit clock transitions
+ * based on the cooling state requested by the thermal framework.
+ *
+ * Return: a valid struct thermal_cooling_device pointer on success,
+ * on failure, it returns a corresponding ERR_PTR().
+ */
+struct thermal_cooling_device *
+clock_cooling_register(struct device *dev, const char *clock_name)
+{
+ struct thermal_cooling_device *cdev;
+ struct clock_cooling_device *ccdev = NULL;
+ char dev_name[THERMAL_NAME_LENGTH];
+ int ret = 0;
+
+ ccdev = devm_kzalloc(dev, sizeof(*ccdev), GFP_KERNEL);
+ if (!ccdev)
+ return ERR_PTR(-ENOMEM);
+
+ ccdev->dev = dev;
+ ccdev->clk = devm_clk_get(dev, clock_name);
+ if (IS_ERR(ccdev->clk))
+ return ERR_CAST(ccdev->clk);
+
+ ret = clock_cooling_get_idr(&ccdev->id);
+ if (ret)
+ return ERR_PTR(-EINVAL);
+
+ snprintf(dev_name, sizeof(dev_name), "thermal-clock-%d", ccdev->id);
+
+ cdev = thermal_cooling_device_register(dev_name, ccdev,
+ &clock_cooling_ops);
+ if (IS_ERR(cdev)) {
+ release_idr(ccdev->id);
+ return ERR_PTR(-EINVAL);
+ }
+ ccdev->cdev = cdev;
+ ccdev->clk_rate_change_nb.notifier_call = clock_cooling_clock_notifier;
+
+ /* Assuming someone has already filled the opp table for this device */
+ ret = dev_pm_opp_init_cpufreq_table(dev, &ccdev->freq_table);
+ if (ret) {
+ release_idr(ccdev->id);
+ return ERR_PTR(ret);
+ }
+ ccdev->clock_state = 0;
+ ccdev->clock_val = clock_cooling_get_frequency(ccdev, 0);
+
+ clk_notifier_register(ccdev->clk, &ccdev->clk_rate_change_nb);
+
+ return cdev;
+}
+EXPORT_SYMBOL_GPL(clock_cooling_register);
+
+/**
+ * clock_cooling_unregister - function to remove clock cooling device.
+ * @cdev: thermal cooling device pointer.
+ *
+ * This interface function unregisters the "thermal-clock-%x" cooling device.
+ */
+void clock_cooling_unregister(struct thermal_cooling_device *cdev)
+{
+ struct clock_cooling_device *ccdev;
+
+ if (!cdev)
+ return;
+
+ ccdev = cdev->devdata;
+
+ clk_notifier_unregister(ccdev->clk, &ccdev->clk_rate_change_nb);
+ dev_pm_opp_free_cpufreq_table(ccdev->dev, &ccdev->freq_table);
+
+ thermal_cooling_device_unregister(ccdev->cdev);
+ release_idr(ccdev->id);
+}
+EXPORT_SYMBOL_GPL(clock_cooling_unregister);
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index ad09e51ffae4..07a9629edf4b 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -4,6 +4,8 @@
* Copyright (C) 2012 Samsung Electronics Co., Ltd(http://www.samsung.com)
* Copyright (C) 2012 Amit Daniel <amit.kachhap@linaro.org>
*
+ * Copyright (C) 2014 Viresh Kumar <viresh.kumar@linaro.org>
+ *
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -24,10 +26,40 @@
#include <linux/thermal.h>
#include <linux/cpufreq.h>
#include <linux/err.h>
+#include <linux/pm_opp.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/cpu_cooling.h>
+#include <trace/events/thermal.h>
+
+/*
+ * Cooling state <-> CPUFreq frequency
+ *
+ * Cooling states are translated to frequencies throughout this driver and this
+ * is the relation between them.
+ *
+ * Highest cooling state corresponds to lowest possible frequency.
+ *
+ * i.e.
+ * level 0 --> 1st Max Freq
+ * level 1 --> 2nd Max Freq
+ * ...
+ */
+
+/**
+ * struct power_table - frequency to power conversion
+ * @frequency: frequency in KHz
+ * @power: power in mW
+ *
+ * This structure is built when the cooling device registers and helps
+ * in translating frequency to power and viceversa.
+ */
+struct power_table {
+ u32 frequency;
+ u32 power;
+};
+
/**
* struct cpufreq_cooling_device - data for cooling device with cpufreq
* @id: unique integer value corresponding to each cpufreq_cooling_device
@@ -38,25 +70,43 @@
* cooling devices.
* @cpufreq_val: integer value representing the absolute value of the clipped
* frequency.
+ * @max_level: maximum cooling level. One less than total number of valid
+ * cpufreq frequencies.
* @allowed_cpus: all the cpus involved for this cpufreq_cooling_device.
+ * @node: list_head to link all cpufreq_cooling_device together.
+ * @last_load: load measured by the latest call to cpufreq_get_actual_power()
+ * @time_in_idle: previous reading of the absolute time that this cpu was idle
+ * @time_in_idle_timestamp: wall time of the last invocation of
+ * get_cpu_idle_time_us()
+ * @dyn_power_table: array of struct power_table for frequency to power
+ * conversion, sorted in ascending order.
+ * @dyn_power_table_entries: number of entries in the @dyn_power_table array
+ * @cpu_dev: the first cpu_device from @allowed_cpus that has OPPs registered
+ * @plat_get_static_power: callback to calculate the static power
*
- * This structure is required for keeping information of each
- * cpufreq_cooling_device registered. In order to prevent corruption of this a
- * mutex lock cooling_cpufreq_lock is used.
+ * This structure is required for keeping information of each registered
+ * cpufreq_cooling_device.
*/
struct cpufreq_cooling_device {
int id;
struct thermal_cooling_device *cool_dev;
unsigned int cpufreq_state;
unsigned int cpufreq_val;
+ unsigned int max_level;
+ unsigned int *freq_table; /* In descending order */
struct cpumask allowed_cpus;
struct list_head node;
+ u32 last_load;
+ u64 *time_in_idle;
+ u64 *time_in_idle_timestamp;
+ struct power_table *dyn_power_table;
+ int dyn_power_table_entries;
+ struct device *cpu_dev;
+ get_static_t plat_get_static_power;
};
static DEFINE_IDR(cpufreq_idr);
static DEFINE_MUTEX(cooling_cpufreq_lock);
-static unsigned int cpufreq_dev_count;
-
static LIST_HEAD(cpufreq_dev_list);
/**
@@ -98,120 +148,30 @@ static void release_idr(struct idr *idr, int id)
/* Below code defines functions to be used for cpufreq as cooling device */
/**
- * is_cpufreq_valid - function to check frequency transitioning capability.
- * @cpu: cpu for which check is needed.
+ * get_level: Find the level for a particular frequency
+ * @cpufreq_dev: cpufreq_dev for which the property is required
+ * @freq: Frequency
*
- * This function will check the current state of the system if
- * it is capable of changing the frequency for a given @cpu.
- *
- * Return: 0 if the system is not currently capable of changing
- * the frequency of given cpu. !0 in case the frequency is changeable.
- */
-static int is_cpufreq_valid(int cpu)
-{
- struct cpufreq_policy policy;
-
- return !cpufreq_get_policy(&policy, cpu);
-}
-
-enum cpufreq_cooling_property {
- GET_LEVEL,
- GET_FREQ,
- GET_MAXL,
-};
-
-/**
- * get_property - fetch a property of interest for a give cpu.
- * @cpu: cpu for which the property is required
- * @input: query parameter
- * @output: query return
- * @property: type of query (frequency, level, max level)
- *
- * This is the common function to
- * 1. get maximum cpu cooling states
- * 2. translate frequency to cooling state
- * 3. translate cooling state to frequency
- * Note that the code may be not in good shape
- * but it is written in this way in order to:
- * a) reduce duplicate code as most of the code can be shared.
- * b) make sure the logic is consistent when translating between
- * cooling states and frequencies.
- *
- * Return: 0 on success, -EINVAL when invalid parameters are passed.
+ * Return: level on success, THERMAL_CSTATE_INVALID on error.
*/
-static int get_property(unsigned int cpu, unsigned long input,
- unsigned int *output,
- enum cpufreq_cooling_property property)
+static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_dev,
+ unsigned int freq)
{
- int i;
- unsigned long max_level = 0, level = 0;
- unsigned int freq = CPUFREQ_ENTRY_INVALID;
- int descend = -1;
- struct cpufreq_frequency_table *pos, *table =
- cpufreq_frequency_get_table(cpu);
-
- if (!output)
- return -EINVAL;
-
- if (!table)
- return -EINVAL;
-
- cpufreq_for_each_valid_entry(pos, table) {
- /* ignore duplicate entry */
- if (freq == pos->frequency)
- continue;
-
- /* get the frequency order */
- if (freq != CPUFREQ_ENTRY_INVALID && descend == -1)
- descend = freq > pos->frequency;
-
- freq = pos->frequency;
- max_level++;
- }
-
- /* No valid cpu frequency entry */
- if (max_level == 0)
- return -EINVAL;
+ unsigned long level;
- /* max_level is an index, not a counter */
- max_level--;
-
- /* get max level */
- if (property == GET_MAXL) {
- *output = (unsigned int)max_level;
- return 0;
- }
-
- if (property == GET_FREQ)
- level = descend ? input : (max_level - input);
-
- i = 0;
- cpufreq_for_each_valid_entry(pos, table) {
- /* ignore duplicate entry */
- if (freq == pos->frequency)
- continue;
-
- /* now we have a valid frequency entry */
- freq = pos->frequency;
+ for (level = 0; level <= cpufreq_dev->max_level; level++) {
+ if (freq == cpufreq_dev->freq_table[level])
+ return level;
- if (property == GET_LEVEL && (unsigned int)input == freq) {
- /* get level by frequency */
- *output = descend ? i : (max_level - i);
- return 0;
- }
- if (property == GET_FREQ && level == i) {
- /* get frequency by level */
- *output = freq;
- return 0;
- }
- i++;
+ if (freq > cpufreq_dev->freq_table[level])
+ break;
}
- return -EINVAL;
+ return THERMAL_CSTATE_INVALID;
}
/**
- * cpufreq_cooling_get_level - for a give cpu, return the cooling level.
+ * cpufreq_cooling_get_level - for a given cpu, return the cooling level.
* @cpu: cpu for which the level is required
* @freq: the frequency of interest
*
@@ -223,119 +183,268 @@ static int get_property(unsigned int cpu, unsigned long input,
*/
unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq)
{
- unsigned int val;
+ struct cpufreq_cooling_device *cpufreq_dev;
- if (get_property(cpu, (unsigned long)freq, &val, GET_LEVEL))
- return THERMAL_CSTATE_INVALID;
+ mutex_lock(&cooling_cpufreq_lock);
+ list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
+ if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) {
+ mutex_unlock(&cooling_cpufreq_lock);
+ return get_level(cpufreq_dev, freq);
+ }
+ }
+ mutex_unlock(&cooling_cpufreq_lock);
- return (unsigned long)val;
+ pr_err("%s: cpu:%d not part of any cooling device\n", __func__, cpu);
+ return THERMAL_CSTATE_INVALID;
}
EXPORT_SYMBOL_GPL(cpufreq_cooling_get_level);
/**
- * get_cpu_frequency - get the absolute value of frequency from level.
- * @cpu: cpu for which frequency is fetched.
- * @level: cooling level
+ * cpufreq_thermal_notifier - notifier callback for cpufreq policy change.
+ * @nb: struct notifier_block * with callback info.
+ * @event: value showing cpufreq event for which this function invoked.
+ * @data: callback-specific data
*
- * This function matches cooling level with frequency. Based on a cooling level
- * of frequency, equals cooling state of cpu cooling device, it will return
- * the corresponding frequency.
- * e.g level=0 --> 1st MAX FREQ, level=1 ---> 2nd MAX FREQ, .... etc
+ * Callback to hijack the notification on cpufreq policy transition.
+ * Every time there is a change in policy, we will intercept and
+ * update the cpufreq policy with thermal constraints.
*
- * Return: 0 on error, the corresponding frequency otherwise.
+ * Return: 0 (success)
*/
-static unsigned int get_cpu_frequency(unsigned int cpu, unsigned long level)
+static int cpufreq_thermal_notifier(struct notifier_block *nb,
+ unsigned long event, void *data)
{
- int ret = 0;
- unsigned int freq;
+ struct cpufreq_policy *policy = data;
+ unsigned long max_freq = 0;
+ struct cpufreq_cooling_device *cpufreq_dev;
- ret = get_property(cpu, level, &freq, GET_FREQ);
- if (ret)
- return 0;
+ switch (event) {
+
+ case CPUFREQ_ADJUST:
+ mutex_lock(&cooling_cpufreq_lock);
+ list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
+ if (!cpumask_test_cpu(policy->cpu,
+ &cpufreq_dev->allowed_cpus))
+ continue;
- return freq;
+ max_freq = cpufreq_dev->cpufreq_val;
+
+ if (policy->max != max_freq)
+ cpufreq_verify_within_limits(policy, 0,
+ max_freq);
+ }
+ mutex_unlock(&cooling_cpufreq_lock);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
}
/**
- * cpufreq_apply_cooling - function to apply frequency clipping.
- * @cpufreq_device: cpufreq_cooling_device pointer containing frequency
- * clipping data.
- * @cooling_state: value of the cooling state.
+ * build_dyn_power_table() - create a dynamic power to frequency table
+ * @cpufreq_device: the cpufreq cooling device in which to store the table
+ * @capacitance: dynamic power coefficient for these cpus
*
- * Function used to make sure the cpufreq layer is aware of current thermal
- * limits. The limits are applied by updating the cpufreq policy.
+ * Build a dynamic power to frequency table for this cpu and store it
+ * in @cpufreq_device. This table will be used in cpu_power_to_freq() and
+ * cpu_freq_to_power() to convert between power and frequency
+ * efficiently. Power is stored in mW, frequency in KHz. The
+ * resulting table is in ascending order.
*
- * Return: 0 on success, an error code otherwise (-EINVAL in case wrong
- * cooling state).
+ * Return: 0 on success, -E* on error.
*/
-static int cpufreq_apply_cooling(struct cpufreq_cooling_device *cpufreq_device,
- unsigned long cooling_state)
+static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device,
+ u32 capacitance)
{
- unsigned int cpuid, clip_freq;
- struct cpumask *mask = &cpufreq_device->allowed_cpus;
- unsigned int cpu = cpumask_any(mask);
+ struct power_table *power_table;
+ struct dev_pm_opp *opp;
+ struct device *dev = NULL;
+ int num_opps = 0, cpu, i, ret = 0;
+ unsigned long freq;
+
+ rcu_read_lock();
+
+ for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
+ dev = get_cpu_device(cpu);
+ if (!dev) {
+ dev_warn(&cpufreq_device->cool_dev->device,
+ "No cpu device for cpu %d\n", cpu);
+ continue;
+ }
+ num_opps = dev_pm_opp_get_opp_count(dev);
+ if (num_opps > 0) {
+ break;
+ } else if (num_opps < 0) {
+ ret = num_opps;
+ goto unlock;
+ }
+ }
- /* Check if the old cooling action is same as new cooling action */
- if (cpufreq_device->cpufreq_state == cooling_state)
- return 0;
+ if (num_opps == 0) {
+ ret = -EINVAL;
+ goto unlock;
+ }
- clip_freq = get_cpu_frequency(cpu, cooling_state);
- if (!clip_freq)
- return -EINVAL;
+ power_table = kcalloc(num_opps, sizeof(*power_table), GFP_KERNEL);
- cpufreq_device->cpufreq_state = cooling_state;
- cpufreq_device->cpufreq_val = clip_freq;
+ for (freq = 0, i = 0;
+ opp = dev_pm_opp_find_freq_ceil(dev, &freq), !IS_ERR(opp);
+ freq++, i++) {
+ u32 freq_mhz, voltage_mv;
+ u64 power;
+
+ freq_mhz = freq / 1000000;
+ voltage_mv = dev_pm_opp_get_voltage(opp) / 1000;
+
+ /*
+ * Do the multiplication with MHz and millivolt so as
+ * to not overflow.
+ */
+ power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv;
+ do_div(power, 1000000000);
+
+ /* frequency is stored in power_table in KHz */
+ power_table[i].frequency = freq / 1000;
- for_each_cpu(cpuid, mask) {
- if (is_cpufreq_valid(cpuid))
- cpufreq_update_policy(cpuid);
+ /* power is stored in mW */
+ power_table[i].power = power;
}
- return 0;
+ if (i == 0) {
+ ret = PTR_ERR(opp);
+ goto unlock;
+ }
+
+ cpufreq_device->cpu_dev = dev;
+ cpufreq_device->dyn_power_table = power_table;
+ cpufreq_device->dyn_power_table_entries = i;
+
+unlock:
+ rcu_read_unlock();
+ return ret;
+}
+
+static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_device,
+ u32 freq)
+{
+ int i;
+ struct power_table *pt = cpufreq_device->dyn_power_table;
+
+ for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
+ if (freq < pt[i].frequency)
+ break;
+
+ return pt[i - 1].power;
+}
+
+static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device,
+ u32 power)
+{
+ int i;
+ struct power_table *pt = cpufreq_device->dyn_power_table;
+
+ for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
+ if (power < pt[i].power)
+ break;
+
+ return pt[i - 1].frequency;
}
/**
- * cpufreq_thermal_notifier - notifier callback for cpufreq policy change.
- * @nb: struct notifier_block * with callback info.
- * @event: value showing cpufreq event for which this function invoked.
- * @data: callback-specific data
+ * get_load() - get load for a cpu since last updated
+ * @cpufreq_device: &struct cpufreq_cooling_device for this cpu
+ * @cpu: cpu number
*
- * Callback to hijack the notification on cpufreq policy transition.
- * Every time there is a change in policy, we will intercept and
- * update the cpufreq policy with thermal constraints.
- *
- * Return: 0 (success)
+ * Return: The average load of cpu @cpu in percentage since this
+ * function was last called.
*/
-static int cpufreq_thermal_notifier(struct notifier_block *nb,
- unsigned long event, void *data)
+static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu)
{
- struct cpufreq_policy *policy = data;
- unsigned long max_freq = 0;
- struct cpufreq_cooling_device *cpufreq_dev;
+ u32 load;
+ u64 now, now_idle, delta_time, delta_idle;
- if (event != CPUFREQ_ADJUST)
+ now_idle = get_cpu_idle_time(cpu, &now, 0);
+ delta_idle = now_idle - cpufreq_device->time_in_idle[cpu];
+ delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu];
+
+ if (delta_time <= delta_idle)
+ load = 0;
+ else
+ load = div64_u64(100 * (delta_time - delta_idle), delta_time);
+
+ cpufreq_device->time_in_idle[cpu] = now_idle;
+ cpufreq_device->time_in_idle_timestamp[cpu] = now;
+
+ return load;
+}
+
+/**
+ * get_static_power() - calculate the static power consumed by the cpus
+ * @cpufreq_device: struct &cpufreq_cooling_device for this cpu cdev
+ * @tz: thermal zone device in which we're operating
+ * @freq: frequency in KHz
+ * @power: pointer in which to store the calculated static power
+ *
+ * Calculate the static power consumed by the cpus described by
+ * @cpu_actor running at frequency @freq. This function relies on a
+ * platform specific function that should have been provided when the
+ * actor was registered. If it wasn't, the static power is assumed to
+ * be negligible. The calculated static power is stored in @power.
+ *
+ * Return: 0 on success, -E* on failure.
+ */
+static int get_static_power(struct cpufreq_cooling_device *cpufreq_device,
+ struct thermal_zone_device *tz, unsigned long freq,
+ u32 *power)
+{
+ struct dev_pm_opp *opp;
+ unsigned long voltage;
+ struct cpumask *cpumask = &cpufreq_device->allowed_cpus;
+ unsigned long freq_hz = freq * 1000;
+
+ if (!cpufreq_device->plat_get_static_power ||
+ !cpufreq_device->cpu_dev) {
+ *power = 0;
return 0;
+ }
- mutex_lock(&cooling_cpufreq_lock);
- list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
- if (!cpumask_test_cpu(policy->cpu,
- &cpufreq_dev->allowed_cpus))
- continue;
+ rcu_read_lock();
- if (!cpufreq_dev->cpufreq_val)
- cpufreq_dev->cpufreq_val = get_cpu_frequency(
- cpumask_any(&cpufreq_dev->allowed_cpus),
- cpufreq_dev->cpufreq_state);
+ opp = dev_pm_opp_find_freq_exact(cpufreq_device->cpu_dev, freq_hz,
+ true);
+ voltage = dev_pm_opp_get_voltage(opp);
- max_freq = cpufreq_dev->cpufreq_val;
+ rcu_read_unlock();
- if (policy->max != max_freq)
- cpufreq_verify_within_limits(policy, 0, max_freq);
+ if (voltage == 0) {
+ dev_warn_ratelimited(cpufreq_device->cpu_dev,
+ "Failed to get voltage for frequency %lu: %ld\n",
+ freq_hz, IS_ERR(opp) ? PTR_ERR(opp) : 0);
+ return -EINVAL;
}
- mutex_unlock(&cooling_cpufreq_lock);
- return 0;
+ return cpufreq_device->plat_get_static_power(cpumask, tz->passive_delay,
+ voltage, power);
+}
+
+/**
+ * get_dynamic_power() - calculate the dynamic power
+ * @cpufreq_device: &cpufreq_cooling_device for this cdev
+ * @freq: current frequency
+ *
+ * Return: the dynamic power consumed by the cpus described by
+ * @cpufreq_device.
+ */
+static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_device,
+ unsigned long freq)
+{
+ u32 raw_cpu_power;
+
+ raw_cpu_power = cpu_freq_to_power(cpufreq_device, freq);
+ return (raw_cpu_power * cpufreq_device->last_load) / 100;
}
/* cpufreq cooling device callback functions are defined below */
@@ -354,19 +463,9 @@ static int cpufreq_get_max_state(struct thermal_cooling_device *cdev,
unsigned long *state)
{
struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
- struct cpumask *mask = &cpufreq_device->allowed_cpus;
- unsigned int cpu;
- unsigned int count = 0;
- int ret;
-
- cpu = cpumask_any(mask);
- ret = get_property(cpu, 0, &count, GET_MAXL);
-
- if (count > 0)
- *state = count;
-
- return ret;
+ *state = cpufreq_device->max_level;
+ return 0;
}
/**
@@ -403,12 +502,225 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
unsigned long state)
{
struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+ unsigned int cpu = cpumask_any(&cpufreq_device->allowed_cpus);
+ unsigned int clip_freq;
+
+ /* Request state should be less than max_level */
+ if (WARN_ON(state > cpufreq_device->max_level))
+ return -EINVAL;
+
+ /* Check if the old cooling action is same as new cooling action */
+ if (cpufreq_device->cpufreq_state == state)
+ return 0;
+
+ clip_freq = cpufreq_device->freq_table[state];
+ cpufreq_device->cpufreq_state = state;
+ cpufreq_device->cpufreq_val = clip_freq;
+
+ cpufreq_update_policy(cpu);
+
+ return 0;
+}
+
+/**
+ * cpufreq_get_requested_power() - get the current power
+ * @cdev: &thermal_cooling_device pointer
+ * @tz: a valid thermal zone device pointer
+ * @power: pointer in which to store the resulting power
+ *
+ * Calculate the current power consumption of the cpus in milliwatts
+ * and store it in @power. This function should actually calculate
+ * the requested power, but it's hard to get the frequency that
+ * cpufreq would have assigned if there were no thermal limits.
+ * Instead, we calculate the current power on the assumption that the
+ * immediate future will look like the immediate past.
+ *
+ * We use the current frequency and the average load since this
+ * function was last called. In reality, there could have been
+ * multiple opps since this function was last called and that affects
+ * the load calculation. While it's not perfectly accurate, this
+ * simplification is good enough and works. REVISIT this, as more
+ * complex code may be needed if experiments show that it's not
+ * accurate enough.
+ *
+ * Return: 0 on success, -E* if getting the static power failed.
+ */
+static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
+ struct thermal_zone_device *tz,
+ u32 *power)
+{
+ unsigned long freq;
+ int i = 0, cpu, ret;
+ u32 static_power, dynamic_power, total_load = 0;
+ struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+ u32 *load_cpu = NULL;
+
+ cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
+
+ /*
+ * All the CPUs are offline, thus the requested power by
+ * the cdev is 0
+ */
+ if (cpu >= nr_cpu_ids) {
+ *power = 0;
+ return 0;
+ }
+
+ freq = cpufreq_quick_get(cpu);
+
+ if (trace_thermal_power_cpu_get_power_enabled()) {
+ u32 ncpus = cpumask_weight(&cpufreq_device->allowed_cpus);
+
+ load_cpu = devm_kcalloc(&cdev->device, ncpus, sizeof(*load_cpu),
+ GFP_KERNEL);
+ }
+
+ for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
+ u32 load;
+
+ if (cpu_online(cpu))
+ load = get_load(cpufreq_device, cpu);
+ else
+ load = 0;
+
+ total_load += load;
+ if (trace_thermal_power_cpu_limit_enabled() && load_cpu)
+ load_cpu[i] = load;
+
+ i++;
+ }
+
+ cpufreq_device->last_load = total_load;
+
+ dynamic_power = get_dynamic_power(cpufreq_device, freq);
+ ret = get_static_power(cpufreq_device, tz, freq, &static_power);
+ if (ret) {
+ if (load_cpu)
+ devm_kfree(&cdev->device, load_cpu);
+ return ret;
+ }
+
+ if (load_cpu) {
+ trace_thermal_power_cpu_get_power(
+ &cpufreq_device->allowed_cpus,
+ freq, load_cpu, i, dynamic_power, static_power);
+
+ devm_kfree(&cdev->device, load_cpu);
+ }
+
+ *power = static_power + dynamic_power;
+ return 0;
+}
+
+/**
+ * cpufreq_state2power() - convert a cpu cdev state to power consumed
+ * @cdev: &thermal_cooling_device pointer
+ * @tz: a valid thermal zone device pointer
+ * @state: cooling device state to be converted
+ * @power: pointer in which to store the resulting power
+ *
+ * Convert cooling device state @state into power consumption in
+ * milliwatts assuming 100% load. Store the calculated power in
+ * @power.
+ *
+ * Return: 0 on success, -EINVAL if the cooling device state could not
+ * be converted into a frequency or other -E* if there was an error
+ * when calculating the static power.
+ */
+static int cpufreq_state2power(struct thermal_cooling_device *cdev,
+ struct thermal_zone_device *tz,
+ unsigned long state, u32 *power)
+{
+ unsigned int freq, num_cpus;
+ cpumask_t cpumask;
+ u32 static_power, dynamic_power;
+ int ret;
+ struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+
+ cpumask_and(&cpumask, &cpufreq_device->allowed_cpus, cpu_online_mask);
+ num_cpus = cpumask_weight(&cpumask);
+
+ /* None of our cpus are online, so no power */
+ if (num_cpus == 0) {
+ *power = 0;
+ return 0;
+ }
+
+ freq = cpufreq_device->freq_table[state];
+ if (!freq)
+ return -EINVAL;
+
+ dynamic_power = cpu_freq_to_power(cpufreq_device, freq) * num_cpus;
+ ret = get_static_power(cpufreq_device, tz, freq, &static_power);
+ if (ret)
+ return ret;
+
+ *power = static_power + dynamic_power;
+ return 0;
+}
+
+/**
+ * cpufreq_power2state() - convert power to a cooling device state
+ * @cdev: &thermal_cooling_device pointer
+ * @tz: a valid thermal zone device pointer
+ * @power: power in milliwatts to be converted
+ * @state: pointer in which to store the resulting state
+ *
+ * Calculate a cooling device state for the cpus described by @cdev
+ * that would allow them to consume at most @power mW and store it in
+ * @state. Note that this calculation depends on external factors
+ * such as the cpu load or the current static power. Calling this
+ * function with the same power as input can yield different cooling
+ * device states depending on those external factors.
+ *
+ * Return: 0 on success, -ENODEV if no cpus are online or -EINVAL if
+ * the calculated frequency could not be converted to a valid state.
+ * The latter should not happen unless the frequencies available to
+ * cpufreq have changed since the initialization of the cpu cooling
+ * device.
+ */
+static int cpufreq_power2state(struct thermal_cooling_device *cdev,
+ struct thermal_zone_device *tz, u32 power,
+ unsigned long *state)
+{
+ unsigned int cpu, cur_freq, target_freq;
+ int ret;
+ s32 dyn_power;
+ u32 last_load, normalised_power, static_power;
+ struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+
+ cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
+
+ /* None of our cpus are online */
+ if (cpu >= nr_cpu_ids)
+ return -ENODEV;
+
+ cur_freq = cpufreq_quick_get(cpu);
+ ret = get_static_power(cpufreq_device, tz, cur_freq, &static_power);
+ if (ret)
+ return ret;
- return cpufreq_apply_cooling(cpufreq_device, state);
+ dyn_power = power - static_power;
+ dyn_power = dyn_power > 0 ? dyn_power : 0;
+ last_load = cpufreq_device->last_load ?: 1;
+ normalised_power = (dyn_power * 100) / last_load;
+ target_freq = cpu_power_to_freq(cpufreq_device, normalised_power);
+
+ *state = cpufreq_cooling_get_level(cpu, target_freq);
+ if (*state == THERMAL_CSTATE_INVALID) {
+ dev_warn_ratelimited(&cdev->device,
+ "Failed to convert %dKHz for cpu %d into a cdev state\n",
+ target_freq, cpu);
+ return -EINVAL;
+ }
+
+ trace_thermal_power_cpu_limit(&cpufreq_device->allowed_cpus,
+ target_freq, *state, power);
+ return 0;
}
/* Bind cpufreq callbacks to thermal cooling device ops */
-static struct thermal_cooling_device_ops const cpufreq_cooling_ops = {
+static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
.get_max_state = cpufreq_get_max_state,
.get_cur_state = cpufreq_get_cur_state,
.set_cur_state = cpufreq_set_cur_state,
@@ -419,10 +731,28 @@ static struct notifier_block thermal_cpufreq_notifier_block = {
.notifier_call = cpufreq_thermal_notifier,
};
+static unsigned int find_next_max(struct cpufreq_frequency_table *table,
+ unsigned int prev_max)
+{
+ struct cpufreq_frequency_table *pos;
+ unsigned int max = 0;
+
+ cpufreq_for_each_valid_entry(pos, table) {
+ if (pos->frequency > max && pos->frequency < prev_max)
+ max = pos->frequency;
+ }
+
+ return max;
+}
+
/**
* __cpufreq_cooling_register - helper function to create cpufreq cooling device
* @np: a valid struct device_node to the cooling device device tree node
* @clip_cpus: cpumask of cpus where the frequency constraints will happen.
+ * Normally this should be same as cpufreq policy->related_cpus.
+ * @capacitance: dynamic power coefficient for these cpus
+ * @plat_static_func: function to calculate the static power consumed by these
+ * cpus (optional)
*
* This interface function registers the cpufreq cooling device with the name
* "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
@@ -434,40 +764,77 @@ static struct notifier_block thermal_cpufreq_notifier_block = {
*/
static struct thermal_cooling_device *
__cpufreq_cooling_register(struct device_node *np,
- const struct cpumask *clip_cpus)
+ const struct cpumask *clip_cpus, u32 capacitance,
+ get_static_t plat_static_func)
{
struct thermal_cooling_device *cool_dev;
- struct cpufreq_cooling_device *cpufreq_dev = NULL;
- unsigned int min = 0, max = 0;
+ struct cpufreq_cooling_device *cpufreq_dev;
char dev_name[THERMAL_NAME_LENGTH];
- int ret = 0, i;
- struct cpufreq_policy policy;
+ struct cpufreq_frequency_table *pos, *table;
+ unsigned int freq, i, num_cpus;
+ int ret;
- /* Verify that all the clip cpus have same freq_min, freq_max limit */
- for_each_cpu(i, clip_cpus) {
- /* continue if cpufreq policy not found and not return error */
- if (!cpufreq_get_policy(&policy, i))
- continue;
- if (min == 0 && max == 0) {
- min = policy.cpuinfo.min_freq;
- max = policy.cpuinfo.max_freq;
- } else {
- if (min != policy.cpuinfo.min_freq ||
- max != policy.cpuinfo.max_freq)
- return ERR_PTR(-EINVAL);
- }
+ table = cpufreq_frequency_get_table(cpumask_first(clip_cpus));
+ if (!table) {
+ pr_debug("%s: CPUFreq table not found\n", __func__);
+ return ERR_PTR(-EPROBE_DEFER);
}
- cpufreq_dev = kzalloc(sizeof(struct cpufreq_cooling_device),
- GFP_KERNEL);
+
+ cpufreq_dev = kzalloc(sizeof(*cpufreq_dev), GFP_KERNEL);
if (!cpufreq_dev)
return ERR_PTR(-ENOMEM);
+ num_cpus = cpumask_weight(clip_cpus);
+ cpufreq_dev->time_in_idle = kcalloc(num_cpus,
+ sizeof(*cpufreq_dev->time_in_idle),
+ GFP_KERNEL);
+ if (!cpufreq_dev->time_in_idle) {
+ cool_dev = ERR_PTR(-ENOMEM);
+ goto free_cdev;
+ }
+
+ cpufreq_dev->time_in_idle_timestamp =
+ kcalloc(num_cpus, sizeof(*cpufreq_dev->time_in_idle_timestamp),
+ GFP_KERNEL);
+ if (!cpufreq_dev->time_in_idle_timestamp) {
+ cool_dev = ERR_PTR(-ENOMEM);
+ goto free_time_in_idle;
+ }
+
+ /* Find max levels */
+ cpufreq_for_each_valid_entry(pos, table)
+ cpufreq_dev->max_level++;
+
+ cpufreq_dev->freq_table = kmalloc(sizeof(*cpufreq_dev->freq_table) *
+ cpufreq_dev->max_level, GFP_KERNEL);
+ if (!cpufreq_dev->freq_table) {
+ cool_dev = ERR_PTR(-ENOMEM);
+ goto free_time_in_idle_timestamp;
+ }
+
+ /* max_level is an index, not a counter */
+ cpufreq_dev->max_level--;
+
cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus);
+ if (capacitance) {
+ cpufreq_cooling_ops.get_requested_power =
+ cpufreq_get_requested_power;
+ cpufreq_cooling_ops.state2power = cpufreq_state2power;
+ cpufreq_cooling_ops.power2state = cpufreq_power2state;
+ cpufreq_dev->plat_get_static_power = plat_static_func;
+
+ ret = build_dyn_power_table(cpufreq_dev, capacitance);
+ if (ret) {
+ cool_dev = ERR_PTR(ret);
+ goto free_table;
+ }
+ }
+
ret = get_idr(&cpufreq_idr, &cpufreq_dev->id);
if (ret) {
- kfree(cpufreq_dev);
- return ERR_PTR(-EINVAL);
+ cool_dev = ERR_PTR(ret);
+ goto free_table;
}
snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
@@ -475,25 +842,48 @@ __cpufreq_cooling_register(struct device_node *np,
cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
&cpufreq_cooling_ops);
- if (IS_ERR(cool_dev)) {
- release_idr(&cpufreq_idr, cpufreq_dev->id);
- kfree(cpufreq_dev);
- return cool_dev;
+ if (IS_ERR(cool_dev))
+ goto remove_idr;
+
+ /* Fill freq-table in descending order of frequencies */
+ for (i = 0, freq = -1; i <= cpufreq_dev->max_level; i++) {
+ freq = find_next_max(table, freq);
+ cpufreq_dev->freq_table[i] = freq;
+
+ /* Warn for duplicate entries */
+ if (!freq)
+ pr_warn("%s: table has duplicate entries\n", __func__);
+ else
+ pr_debug("%s: freq:%u KHz\n", __func__, freq);
}
+
+ cpufreq_dev->cpufreq_val = cpufreq_dev->freq_table[0];
cpufreq_dev->cool_dev = cool_dev;
- cpufreq_dev->cpufreq_state = 0;
+
mutex_lock(&cooling_cpufreq_lock);
/* Register the notifier for first cpufreq cooling device */
- if (cpufreq_dev_count == 0)
+ if (list_empty(&cpufreq_dev_list))
cpufreq_register_notifier(&thermal_cpufreq_notifier_block,
CPUFREQ_POLICY_NOTIFIER);
- cpufreq_dev_count++;
list_add(&cpufreq_dev->node, &cpufreq_dev_list);
mutex_unlock(&cooling_cpufreq_lock);
return cool_dev;
+
+remove_idr:
+ release_idr(&cpufreq_idr, cpufreq_dev->id);
+free_table:
+ kfree(cpufreq_dev->freq_table);
+free_time_in_idle_timestamp:
+ kfree(cpufreq_dev->time_in_idle_timestamp);
+free_time_in_idle:
+ kfree(cpufreq_dev->time_in_idle);
+free_cdev:
+ kfree(cpufreq_dev);
+
+ return cool_dev;
}
/**
@@ -510,7 +900,7 @@ __cpufreq_cooling_register(struct device_node *np,
struct thermal_cooling_device *
cpufreq_cooling_register(const struct cpumask *clip_cpus)
{
- return __cpufreq_cooling_register(NULL, clip_cpus);
+ return __cpufreq_cooling_register(NULL, clip_cpus, 0, NULL);
}
EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
@@ -534,11 +924,78 @@ of_cpufreq_cooling_register(struct device_node *np,
if (!np)
return ERR_PTR(-EINVAL);
- return __cpufreq_cooling_register(np, clip_cpus);
+ return __cpufreq_cooling_register(np, clip_cpus, 0, NULL);
}
EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
/**
+ * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
+ * @clip_cpus: cpumask of cpus where the frequency constraints will happen
+ * @capacitance: dynamic power coefficient for these cpus
+ * @plat_static_func: function to calculate the static power consumed by these
+ * cpus (optional)
+ *
+ * This interface function registers the cpufreq cooling device with
+ * the name "thermal-cpufreq-%x". This api can support multiple
+ * instances of cpufreq cooling devices. Using this function, the
+ * cooling device will implement the power extensions by using a
+ * simple cpu power model. The cpus must have registered their OPPs
+ * using the OPP library.
+ *
+ * An optional @plat_static_func may be provided to calculate the
+ * static power consumed by these cpus. If the platform's static
+ * power consumption is unknown or negligible, make it NULL.
+ *
+ * Return: a valid struct thermal_cooling_device pointer on success,
+ * on failure, it returns a corresponding ERR_PTR().
+ */
+struct thermal_cooling_device *
+cpufreq_power_cooling_register(const struct cpumask *clip_cpus, u32 capacitance,
+ get_static_t plat_static_func)
+{
+ return __cpufreq_cooling_register(NULL, clip_cpus, capacitance,
+ plat_static_func);
+}
+EXPORT_SYMBOL(cpufreq_power_cooling_register);
+
+/**
+ * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
+ * @np: a valid struct device_node to the cooling device device tree node
+ * @clip_cpus: cpumask of cpus where the frequency constraints will happen
+ * @capacitance: dynamic power coefficient for these cpus
+ * @plat_static_func: function to calculate the static power consumed by these
+ * cpus (optional)
+ *
+ * This interface function registers the cpufreq cooling device with
+ * the name "thermal-cpufreq-%x". This api can support multiple
+ * instances of cpufreq cooling devices. Using this API, the cpufreq
+ * cooling device will be linked to the device tree node provided.
+ * Using this function, the cooling device will implement the power
+ * extensions by using a simple cpu power model. The cpus must have
+ * registered their OPPs using the OPP library.
+ *
+ * An optional @plat_static_func may be provided to calculate the
+ * static power consumed by these cpus. If the platform's static
+ * power consumption is unknown or negligible, make it NULL.
+ *
+ * Return: a valid struct thermal_cooling_device pointer on success,
+ * on failure, it returns a corresponding ERR_PTR().
+ */
+struct thermal_cooling_device *
+of_cpufreq_power_cooling_register(struct device_node *np,
+ const struct cpumask *clip_cpus,
+ u32 capacitance,
+ get_static_t plat_static_func)
+{
+ if (!np)
+ return ERR_PTR(-EINVAL);
+
+ return __cpufreq_cooling_register(np, clip_cpus, capacitance,
+ plat_static_func);
+}
+EXPORT_SYMBOL(of_cpufreq_power_cooling_register);
+
+/**
* cpufreq_cooling_unregister - function to remove cpufreq cooling device.
* @cdev: thermal cooling device pointer.
*
@@ -554,16 +1011,18 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
cpufreq_dev = cdev->devdata;
mutex_lock(&cooling_cpufreq_lock);
list_del(&cpufreq_dev->node);
- cpufreq_dev_count--;
/* Unregister the notifier for the last cpufreq cooling device */
- if (cpufreq_dev_count == 0)
+ if (list_empty(&cpufreq_dev_list))
cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block,
CPUFREQ_POLICY_NOTIFIER);
mutex_unlock(&cooling_cpufreq_lock);
thermal_cooling_device_unregister(cpufreq_dev->cool_dev);
release_idr(&cpufreq_idr, cpufreq_dev->id);
+ kfree(cpufreq_dev->time_in_idle_timestamp);
+ kfree(cpufreq_dev->time_in_idle);
+ kfree(cpufreq_dev->freq_table);
kfree(cpufreq_dev);
}
EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);
diff --git a/drivers/thermal/db8500_cpufreq_cooling.c b/drivers/thermal/db8500_cpufreq_cooling.c
index 786d19263ab0..1ac7ec651c3f 100644
--- a/drivers/thermal/db8500_cpufreq_cooling.c
+++ b/drivers/thermal/db8500_cpufreq_cooling.c
@@ -18,7 +18,6 @@
*/
#include <linux/cpu_cooling.h>
-#include <linux/cpufreq.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/of.h>
@@ -30,10 +29,6 @@ static int db8500_cpufreq_cooling_probe(struct platform_device *pdev)
struct thermal_cooling_device *cdev;
struct cpumask mask_val;
- /* make sure cpufreq driver has been initialized */
- if (!cpufreq_frequency_get_table(0))
- return -EPROBE_DEFER;
-
cpumask_set_cpu(0, &mask_val);
cdev = cpufreq_cooling_register(&mask_val);
diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c
index 1e3b3bf9f993..e3ccc2218eb3 100644
--- a/drivers/thermal/db8500_thermal.c
+++ b/drivers/thermal/db8500_thermal.c
@@ -76,7 +76,7 @@ static int db8500_cdev_bind(struct thermal_zone_device *thermal,
upper = lower = i > max_state ? max_state : i;
ret = thermal_zone_bind_cooling_device(thermal, i, cdev,
- upper, lower);
+ upper, lower, THERMAL_WEIGHT_DEFAULT);
dev_info(&cdev->device, "%s bind to %d: %d-%s\n", cdev->type,
i, ret, ret ? "fail" : "succeed");
diff --git a/drivers/thermal/fair_share.c b/drivers/thermal/fair_share.c
index 6e0a3fbfae86..8c50b8d6afb7 100644
--- a/drivers/thermal/fair_share.c
+++ b/drivers/thermal/fair_share.c
@@ -59,13 +59,13 @@ static int get_trip_level(struct thermal_zone_device *tz)
}
static long get_target_state(struct thermal_zone_device *tz,
- struct thermal_cooling_device *cdev, int weight, int level)
+ struct thermal_cooling_device *cdev, int percentage, int level)
{
unsigned long max_state;
cdev->ops->get_max_state(cdev, &max_state);
- return (long)(weight * level * max_state) / (100 * tz->trips);
+ return (long)(percentage * level * max_state) / (100 * tz->trips);
}
/**
@@ -77,7 +77,7 @@ static long get_target_state(struct thermal_zone_device *tz,
*
* Parameters used for Throttling:
* P1. max_state: Maximum throttle state exposed by the cooling device.
- * P2. weight[i]/100:
+ * P2. percentage[i]/100:
* How 'effective' the 'i'th device is, in cooling the given zone.
* P3. cur_trip_level/max_no_of_trips:
* This describes the extent to which the devices should be throttled.
@@ -88,28 +88,33 @@ static long get_target_state(struct thermal_zone_device *tz,
*/
static int fair_share_throttle(struct thermal_zone_device *tz, int trip)
{
- const struct thermal_zone_params *tzp;
- struct thermal_cooling_device *cdev;
struct thermal_instance *instance;
- int i;
+ int total_weight = 0;
+ int total_instance = 0;
int cur_trip_level = get_trip_level(tz);
- if (!tz->tzp || !tz->tzp->tbp)
- return -EINVAL;
+ list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+ if (instance->trip != trip)
+ continue;
+
+ total_weight += instance->weight;
+ total_instance++;
+ }
- tzp = tz->tzp;
+ list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+ int percentage;
+ struct thermal_cooling_device *cdev = instance->cdev;
- for (i = 0; i < tzp->num_tbps; i++) {
- if (!tzp->tbp[i].cdev)
+ if (instance->trip != trip)
continue;
- cdev = tzp->tbp[i].cdev;
- instance = get_thermal_instance(tz, cdev, trip);
- if (!instance)
- continue;
+ if (!total_weight)
+ percentage = 100 / total_instance;
+ else
+ percentage = (instance->weight * 100) / total_weight;
- instance->target = get_target_state(tz, cdev,
- tzp->tbp[i].weight, cur_trip_level);
+ instance->target = get_target_state(tz, cdev, percentage,
+ cur_trip_level);
instance->cdev->updated = false;
thermal_cdev_update(cdev);
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index 5a1f1070b702..31ada06c7dc6 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -9,7 +9,6 @@
#include <linux/clk.h>
#include <linux/cpu_cooling.h>
-#include <linux/cpufreq.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/init.h>
@@ -307,7 +306,8 @@ static int imx_bind(struct thermal_zone_device *tz,
ret = thermal_zone_bind_cooling_device(tz, IMX_TRIP_PASSIVE, cdev,
THERMAL_NO_LIMIT,
- THERMAL_NO_LIMIT);
+ THERMAL_NO_LIMIT,
+ THERMAL_WEIGHT_DEFAULT);
if (ret) {
dev_err(&tz->device,
"binding zone %s with cdev %s failed:%d\n",
@@ -459,10 +459,6 @@ static int imx_thermal_probe(struct platform_device *pdev)
int measure_freq;
int ret;
- if (!cpufreq_get_current_driver()) {
- dev_dbg(&pdev->dev, "no cpufreq driver!");
- return -EPROBE_DEFER;
- }
data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
diff --git a/drivers/thermal/int340x_thermal/int3403_thermal.c b/drivers/thermal/int340x_thermal/int3403_thermal.c
index 6e9fb62eb817..76910d35eecb 100644
--- a/drivers/thermal/int340x_thermal/int3403_thermal.c
+++ b/drivers/thermal/int340x_thermal/int3403_thermal.c
@@ -471,7 +471,6 @@ static struct platform_driver int3403_driver = {
.remove = int3403_remove,
.driver = {
.name = "int3403 thermal",
- .owner = THIS_MODULE,
.acpi_match_table = int3403_device_ids,
},
};
diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index 62143ba31001..b295b2b6c191 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -30,27 +30,13 @@
#include <linux/err.h>
#include <linux/export.h>
#include <linux/string.h>
+#include <linux/thermal.h>
#include "thermal_core.h"
/*** Private data structures to represent thermal device tree data ***/
/**
- * struct __thermal_trip - representation of a point in temperature domain
- * @np: pointer to struct device_node that this trip point was created from
- * @temperature: temperature value in miliCelsius
- * @hysteresis: relative hysteresis in miliCelsius
- * @type: trip point type
- */
-
-struct __thermal_trip {
- struct device_node *np;
- unsigned long int temperature;
- unsigned long int hysteresis;
- enum thermal_trip_type type;
-};
-
-/**
* struct __thermal_bind_param - a match between trip and cooling device
* @cooling_device: a pointer to identify the referred cooling device
* @trip_id: the trip point index
@@ -72,23 +58,26 @@ struct __thermal_bind_params {
* @mode: current thermal zone device mode (enabled/disabled)
* @passive_delay: polling interval while passive cooling is activated
* @polling_delay: zone polling interval
+ * @slope: slope of the temperature adjustment curve
+ * @offset: offset of the temperature adjustment curve
* @ntrips: number of trip points
* @trips: an array of trip points (0..ntrips - 1)
* @num_tbps: number of thermal bind params
* @tbps: an array of thermal bind params (0..num_tbps - 1)
* @sensor_data: sensor private data used while reading temperature and trend
- * @get_temp: sensor callback to read temperature
- * @get_trend: sensor callback to read temperature trend
+ * @ops: set of callbacks to handle the thermal zone based on DT
*/
struct __thermal_zone {
enum thermal_device_mode mode;
int passive_delay;
int polling_delay;
+ int slope;
+ int offset;
/* trip data */
int ntrips;
- struct __thermal_trip *trips;
+ struct thermal_trip *trips;
/* cooling binding data */
int num_tbps;
@@ -96,8 +85,7 @@ struct __thermal_zone {
/* sensor interface */
void *sensor_data;
- int (*get_temp)(void *, long *);
- int (*get_trend)(void *, long *);
+ const struct thermal_zone_of_device_ops *ops;
};
/*** DT thermal zone device callbacks ***/
@@ -107,10 +95,96 @@ static int of_thermal_get_temp(struct thermal_zone_device *tz,
{
struct __thermal_zone *data = tz->devdata;
- if (!data->get_temp)
+ if (!data->ops->get_temp)
return -EINVAL;
- return data->get_temp(data->sensor_data, temp);
+ return data->ops->get_temp(data->sensor_data, temp);
+}
+
+/**
+ * of_thermal_get_ntrips - function to export number of available trip
+ * points.
+ * @tz: pointer to a thermal zone
+ *
+ * This function is a globally visible wrapper to get number of trip points
+ * stored in the local struct __thermal_zone
+ *
+ * Return: number of available trip points, -ENODEV when data not available
+ */
+int of_thermal_get_ntrips(struct thermal_zone_device *tz)
+{
+ struct __thermal_zone *data = tz->devdata;
+
+ if (!data || IS_ERR(data))
+ return -ENODEV;
+
+ return data->ntrips;
+}
+EXPORT_SYMBOL_GPL(of_thermal_get_ntrips);
+
+/**
+ * of_thermal_is_trip_valid - function to check if trip point is valid
+ *
+ * @tz: pointer to a thermal zone
+ * @trip: trip point to evaluate
+ *
+ * This function is responsible for checking if passed trip point is valid
+ *
+ * Return: true if trip point is valid, false otherwise
+ */
+bool of_thermal_is_trip_valid(struct thermal_zone_device *tz, int trip)
+{
+ struct __thermal_zone *data = tz->devdata;
+
+ if (!data || trip >= data->ntrips || trip < 0)
+ return false;
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(of_thermal_is_trip_valid);
+
+/**
+ * of_thermal_get_trip_points - function to get access to a globally exported
+ * trip points
+ *
+ * @tz: pointer to a thermal zone
+ *
+ * This function provides a pointer to trip points table
+ *
+ * Return: pointer to trip points table, NULL otherwise
+ */
+const struct thermal_trip *
+of_thermal_get_trip_points(struct thermal_zone_device *tz)
+{
+ struct __thermal_zone *data = tz->devdata;
+
+ if (!data)
+ return NULL;
+
+ return data->trips;
+}
+EXPORT_SYMBOL_GPL(of_thermal_get_trip_points);
+
+/**
+ * of_thermal_set_emul_temp - function to set emulated temperature
+ *
+ * @tz: pointer to a thermal zone
+ * @temp: temperature to set
+ *
+ * This function gives the ability to set emulated value of temperature,
+ * which is handy for debugging
+ *
+ * Return: zero on success, error code otherwise
+ */
+static int of_thermal_set_emul_temp(struct thermal_zone_device *tz,
+ unsigned long temp)
+{
+ struct __thermal_zone *data = tz->devdata;
+
+ if (!data->ops || !data->ops->set_emul_temp)
+ return -EINVAL;
+
+ return data->ops->set_emul_temp(data->sensor_data, temp);
}
static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip,
@@ -120,10 +194,10 @@ static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip,
long dev_trend;
int r;
- if (!data->get_trend)
+ if (!data->ops->get_trend)
return -EINVAL;
- r = data->get_trend(data->sensor_data, &dev_trend);
+ r = data->ops->get_trend(data->sensor_data, &dev_trend);
if (r)
return r;
@@ -157,7 +231,8 @@ static int of_thermal_bind(struct thermal_zone_device *thermal,
ret = thermal_zone_bind_cooling_device(thermal,
tbp->trip_id, cdev,
tbp->max,
- tbp->min);
+ tbp->min,
+ tbp->usage);
if (ret)
return ret;
}
@@ -324,8 +399,7 @@ static struct thermal_zone_device_ops of_thermal_ops = {
static struct thermal_zone_device *
thermal_zone_of_add_sensor(struct device_node *zone,
struct device_node *sensor, void *data,
- int (*get_temp)(void *, long *),
- int (*get_trend)(void *, long *))
+ const struct thermal_zone_of_device_ops *ops)
{
struct thermal_zone_device *tzd;
struct __thermal_zone *tz;
@@ -336,13 +410,16 @@ thermal_zone_of_add_sensor(struct device_node *zone,
tz = tzd->devdata;
+ if (!ops)
+ return ERR_PTR(-EINVAL);
+
mutex_lock(&tzd->lock);
- tz->get_temp = get_temp;
- tz->get_trend = get_trend;
+ tz->ops = ops;
tz->sensor_data = data;
tzd->ops->get_temp = of_thermal_get_temp;
tzd->ops->get_trend = of_thermal_get_trend;
+ tzd->ops->set_emul_temp = of_thermal_set_emul_temp;
mutex_unlock(&tzd->lock);
return tzd;
@@ -356,8 +433,7 @@ thermal_zone_of_add_sensor(struct device_node *zone,
* than one sensors
* @data: a private pointer (owned by the caller) that will be passed
* back, when a temperature reading is needed.
- * @get_temp: a pointer to a function that reads the sensor temperature.
- * @get_trend: a pointer to a function that reads the sensor temperature trend.
+ * @ops: struct thermal_zone_of_device_ops *. Must contain at least .get_temp.
*
* This function will search the list of thermal zones described in device
* tree and look for the zone that refer to the sensor device pointed by
@@ -382,9 +458,8 @@ thermal_zone_of_add_sensor(struct device_node *zone,
* check the return value with help of IS_ERR() helper.
*/
struct thermal_zone_device *
-thermal_zone_of_sensor_register(struct device *dev, int sensor_id,
- void *data, int (*get_temp)(void *, long *),
- int (*get_trend)(void *, long *))
+thermal_zone_of_sensor_register(struct device *dev, int sensor_id, void *data,
+ const struct thermal_zone_of_device_ops *ops)
{
struct device_node *np, *child, *sensor_np;
struct thermal_zone_device *tzd = ERR_PTR(-ENODEV);
@@ -426,9 +501,10 @@ thermal_zone_of_sensor_register(struct device *dev, int sensor_id,
if (sensor_specs.np == sensor_np && id == sensor_id) {
tzd = thermal_zone_of_add_sensor(child, sensor_np,
- data,
- get_temp,
- get_trend);
+ data, ops);
+ if (!IS_ERR(tzd))
+ tzd->ops->set_mode(tzd, THERMAL_DEVICE_ENABLED);
+
of_node_put(sensor_specs.np);
of_node_put(child);
goto exit;
@@ -475,9 +551,9 @@ void thermal_zone_of_sensor_unregister(struct device *dev,
mutex_lock(&tzd->lock);
tzd->ops->get_temp = NULL;
tzd->ops->get_trend = NULL;
+ tzd->ops->set_emul_temp = NULL;
- tz->get_temp = NULL;
- tz->get_trend = NULL;
+ tz->ops = NULL;
tz->sensor_data = NULL;
mutex_unlock(&tzd->lock);
}
@@ -501,7 +577,7 @@ EXPORT_SYMBOL_GPL(thermal_zone_of_sensor_unregister);
*/
static int thermal_of_populate_bind_params(struct device_node *np,
struct __thermal_bind_params *__tbp,
- struct __thermal_trip *trips,
+ struct thermal_trip *trips,
int ntrips)
{
struct of_phandle_args cooling_spec;
@@ -510,7 +586,7 @@ static int thermal_of_populate_bind_params(struct device_node *np,
u32 prop;
/* Default weight. Usage is optional */
- __tbp->usage = 0;
+ __tbp->usage = THERMAL_WEIGHT_DEFAULT;
ret = of_property_read_u32(np, "contribution", &prop);
if (ret == 0)
__tbp->usage = prop;
@@ -604,7 +680,7 @@ static int thermal_of_get_trip_type(struct device_node *np,
* Return: 0 on success, proper error code otherwise
*/
static int thermal_of_populate_trip(struct device_node *np,
- struct __thermal_trip *trip)
+ struct thermal_trip *trip)
{
int prop;
int ret;
@@ -644,7 +720,7 @@ static int thermal_of_populate_trip(struct device_node *np,
* @np parameter and fills the read data into a __thermal_zone data structure
* and return this pointer.
*
- * TODO: Missing properties to parse: thermal-sensor-names and coefficients
+ * TODO: Missing properties to parse: thermal-sensor-names
*
* Return: On success returns a valid struct __thermal_zone,
* otherwise, it returns a corresponding ERR_PTR(). Caller must
@@ -656,7 +732,7 @@ thermal_of_build_thermal_zone(struct device_node *np)
struct device_node *child = NULL, *gchild;
struct __thermal_zone *tz;
int ret, i;
- u32 prop;
+ u32 prop, coef[2];
if (!np) {
pr_err("no thermal zone np\n");
@@ -681,6 +757,20 @@ thermal_of_build_thermal_zone(struct device_node *np)
}
tz->polling_delay = prop;
+ /*
+ * REVIST: for now, the thermal framework supports only
+ * one sensor per thermal zone. Thus, we are considering
+ * only the first two values as slope and offset.
+ */
+ ret = of_property_read_u32_array(np, "coefficients", coef, 2);
+ if (ret == 0) {
+ tz->slope = coef[0];
+ tz->offset = coef[1];
+ } else {
+ tz->slope = 1;
+ tz->offset = 0;
+ }
+
/* trips */
child = of_get_child_by_name(np, "trips");
@@ -794,6 +884,8 @@ int __init of_parse_thermal_zones(void)
for_each_child_of_node(np, child) {
struct thermal_zone_device *zone;
struct thermal_zone_params *tzp;
+ int i, mask = 0;
+ u32 prop;
/* Check whether child is enabled or not */
if (!of_device_is_available(child))
@@ -820,8 +912,18 @@ int __init of_parse_thermal_zones(void)
/* No hwmon because there might be hwmon drivers registering */
tzp->no_hwmon = true;
+ if (!of_property_read_u32(child, "sustainable-power", &prop))
+ tzp->sustainable_power = prop;
+
+ for (i = 0; i < tz->ntrips; i++)
+ mask |= 1 << i;
+
+ /* these two are left for temperature drivers to use */
+ tzp->slope = tz->slope;
+ tzp->offset = tz->offset;
+
zone = thermal_zone_device_register(child->name, tz->ntrips,
- 0, tz,
+ mask, tz,
ops, tzp,
tz->passive_delay,
tz->polling_delay);
diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c
new file mode 100644
index 000000000000..4672250b329f
--- /dev/null
+++ b/drivers/thermal/power_allocator.c
@@ -0,0 +1,539 @@
+/*
+ * A power allocator to manage temperature
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "Power allocator: " fmt
+
+#include <linux/rculist.h>
+#include <linux/slab.h>
+#include <linux/thermal.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/thermal_power_allocator.h>
+
+#include "thermal_core.h"
+
+#define FRAC_BITS 10
+#define int_to_frac(x) ((x) << FRAC_BITS)
+#define frac_to_int(x) ((x) >> FRAC_BITS)
+
+/**
+ * mul_frac() - multiply two fixed-point numbers
+ * @x: first multiplicand
+ * @y: second multiplicand
+ *
+ * Return: the result of multiplying two fixed-point numbers. The
+ * result is also a fixed-point number.
+ */
+static inline s64 mul_frac(s64 x, s64 y)
+{
+ return (x * y) >> FRAC_BITS;
+}
+
+/**
+ * div_frac() - divide two fixed-point numbers
+ * @x: the dividend
+ * @y: the divisor
+ *
+ * Return: the result of dividing two fixed-point numbers. The
+ * result is also a fixed-point number.
+ */
+static inline s64 div_frac(s64 x, s64 y)
+{
+ return div_s64(x << FRAC_BITS, y);
+}
+
+/**
+ * struct power_allocator_params - parameters for the power allocator governor
+ * @err_integral: accumulated error in the PID controller.
+ * @prev_err: error in the previous iteration of the PID controller.
+ * Used to calculate the derivative term.
+ * @trip_switch_on: first passive trip point of the thermal zone. The
+ * governor switches on when this trip point is crossed.
+ * @trip_max_desired_temperature: last passive trip point of the thermal
+ * zone. The temperature we are
+ * controlling for.
+ */
+struct power_allocator_params {
+ s64 err_integral;
+ s32 prev_err;
+ int trip_switch_on;
+ int trip_max_desired_temperature;
+};
+
+/**
+ * pid_controller() - PID controller
+ * @tz: thermal zone we are operating in
+ * @current_temp: the current temperature in millicelsius
+ * @control_temp: the target temperature in millicelsius
+ * @max_allocatable_power: maximum allocatable power for this thermal zone
+ *
+ * This PID controller increases the available power budget so that the
+ * temperature of the thermal zone gets as close as possible to
+ * @control_temp and limits the power if it exceeds it. k_po is the
+ * proportional term when we are overshooting, k_pu is the
+ * proportional term when we are undershooting. integral_cutoff is a
+ * threshold below which we stop accumulating the error. The
+ * accumulated error is only valid if the requested power will make
+ * the system warmer. If the system is mostly idle, there's no point
+ * in accumulating positive error.
+ *
+ * Return: The power budget for the next period.
+ */
+static u32 pid_controller(struct thermal_zone_device *tz,
+ unsigned long current_temp,
+ unsigned long control_temp,
+ u32 max_allocatable_power)
+{
+ s64 p, i, d, power_range;
+ s32 err, max_power_frac;
+ struct power_allocator_params *params = tz->governor_data;
+
+ max_power_frac = int_to_frac(max_allocatable_power);
+
+ err = ((s32)control_temp - (s32)current_temp);
+ err = int_to_frac(err);
+
+ /* Calculate the proportional term */
+ p = mul_frac(err < 0 ? tz->tzp->k_po : tz->tzp->k_pu, err);
+
+ /*
+ * Calculate the integral term
+ *
+ * if the error is less than cut off allow integration (but
+ * the integral is limited to max power)
+ */
+ i = mul_frac(tz->tzp->k_i, params->err_integral);
+
+ if (err < int_to_frac(tz->tzp->integral_cutoff)) {
+ s64 i_next = i + mul_frac(tz->tzp->k_i, err);
+
+ if (abs64(i_next) < max_power_frac) {
+ i = i_next;
+ params->err_integral += err;
+ }
+ }
+
+ /*
+ * Calculate the derivative term
+ *
+ * We do err - prev_err, so with a positive k_d, a decreasing
+ * error (i.e. driving closer to the line) results in less
+ * power being applied, slowing down the controller)
+ */
+ d = mul_frac(tz->tzp->k_d, err - params->prev_err);
+ d = div_frac(d, tz->passive_delay);
+ params->prev_err = err;
+
+ power_range = p + i + d;
+
+ /* feed-forward the known sustainable dissipatable power */
+ power_range = tz->tzp->sustainable_power + frac_to_int(power_range);
+
+ power_range = clamp(power_range, (s64)0, (s64)max_allocatable_power);
+
+ trace_thermal_power_allocator_pid(tz, frac_to_int(err),
+ frac_to_int(params->err_integral),
+ frac_to_int(p), frac_to_int(i),
+ frac_to_int(d), power_range);
+
+ return power_range;
+}
+
+/**
+ * divvy_up_power() - divvy the allocated power between the actors
+ * @req_power: each actor's requested power
+ * @max_power: each actor's maximum available power
+ * @num_actors: size of the @req_power, @max_power and @granted_power's array
+ * @total_req_power: sum of @req_power
+ * @power_range: total allocated power
+ * @granted_power: output array: each actor's granted power
+ * @extra_actor_power: an appropriately sized array to be used in the
+ * function as temporary storage of the extra power given
+ * to the actors
+ *
+ * This function divides the total allocated power (@power_range)
+ * fairly between the actors. It first tries to give each actor a
+ * share of the @power_range according to how much power it requested
+ * compared to the rest of the actors. For example, if only one actor
+ * requests power, then it receives all the @power_range. If
+ * three actors each requests 1mW, each receives a third of the
+ * @power_range.
+ *
+ * If any actor received more than their maximum power, then that
+ * surplus is re-divvied among the actors based on how far they are
+ * from their respective maximums.
+ *
+ * Granted power for each actor is written to @granted_power, which
+ * should've been allocated by the calling function.
+ */
+static void divvy_up_power(u32 *req_power, u32 *max_power, int num_actors,
+ u32 total_req_power, u32 power_range,
+ u32 *granted_power, u32 *extra_actor_power)
+{
+ u32 extra_power, capped_extra_power;
+ int i;
+
+ /*
+ * Prevent division by 0 if none of the actors request power.
+ */
+ if (!total_req_power)
+ total_req_power = 1;
+
+ capped_extra_power = 0;
+ extra_power = 0;
+ for (i = 0; i < num_actors; i++) {
+ u64 req_range = req_power[i] * power_range;
+
+ granted_power[i] = DIV_ROUND_CLOSEST_ULL(req_range,
+ total_req_power);
+
+ if (granted_power[i] > max_power[i]) {
+ extra_power += granted_power[i] - max_power[i];
+ granted_power[i] = max_power[i];
+ }
+
+ extra_actor_power[i] = max_power[i] - granted_power[i];
+ capped_extra_power += extra_actor_power[i];
+ }
+
+ if (!extra_power)
+ return;
+
+ /*
+ * Re-divvy the reclaimed extra among actors based on
+ * how far they are from the max
+ */
+ extra_power = min(extra_power, capped_extra_power);
+ if (capped_extra_power > 0)
+ for (i = 0; i < num_actors; i++)
+ granted_power[i] += (extra_actor_power[i] *
+ extra_power) / capped_extra_power;
+}
+
+static int allocate_power(struct thermal_zone_device *tz,
+ unsigned long current_temp,
+ unsigned long control_temp)
+{
+ struct thermal_instance *instance;
+ struct power_allocator_params *params = tz->governor_data;
+ u32 *req_power, *max_power, *granted_power, *extra_actor_power;
+ u32 total_req_power, max_allocatable_power;
+ u32 total_granted_power, power_range;
+ int i, num_actors, total_weight, ret = 0;
+ int trip_max_desired_temperature = params->trip_max_desired_temperature;
+
+ mutex_lock(&tz->lock);
+
+ num_actors = 0;
+ total_weight = 0;
+ list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+ if ((instance->trip == trip_max_desired_temperature) &&
+ cdev_is_power_actor(instance->cdev)) {
+ num_actors++;
+ total_weight += instance->weight;
+ }
+ }
+
+ /*
+ * We need to allocate three arrays of the same size:
+ * req_power, max_power and granted_power. They are going to
+ * be needed until this function returns. Allocate them all
+ * in one go to simplify the allocation and deallocation
+ * logic.
+ */
+ BUILD_BUG_ON(sizeof(*req_power) != sizeof(*max_power));
+ BUILD_BUG_ON(sizeof(*req_power) != sizeof(*granted_power));
+ BUILD_BUG_ON(sizeof(*req_power) != sizeof(*extra_actor_power));
+ req_power = devm_kcalloc(&tz->device, num_actors * 4,
+ sizeof(*req_power), GFP_KERNEL);
+ if (!req_power) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ max_power = &req_power[num_actors];
+ granted_power = &req_power[2 * num_actors];
+ extra_actor_power = &req_power[3 * num_actors];
+
+ i = 0;
+ total_req_power = 0;
+ max_allocatable_power = 0;
+
+ list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+ int weight;
+ struct thermal_cooling_device *cdev = instance->cdev;
+
+ if (instance->trip != trip_max_desired_temperature)
+ continue;
+
+ if (!cdev_is_power_actor(cdev))
+ continue;
+
+ if (cdev->ops->get_requested_power(cdev, tz, &req_power[i]))
+ continue;
+
+ if (!total_weight)
+ weight = 1 << FRAC_BITS;
+ else
+ weight = instance->weight;
+
+ req_power[i] = frac_to_int(weight * req_power[i]);
+
+ if (power_actor_get_max_power(cdev, tz, &max_power[i]))
+ continue;
+
+ total_req_power += req_power[i];
+ max_allocatable_power += max_power[i];
+
+ i++;
+ }
+
+ power_range = pid_controller(tz, current_temp, control_temp,
+ max_allocatable_power);
+
+ divvy_up_power(req_power, max_power, num_actors, total_req_power,
+ power_range, granted_power, extra_actor_power);
+
+ total_granted_power = 0;
+ i = 0;
+ list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+ if (instance->trip != trip_max_desired_temperature)
+ continue;
+
+ if (!cdev_is_power_actor(instance->cdev))
+ continue;
+
+ power_actor_set_power(instance->cdev, instance,
+ granted_power[i]);
+ total_granted_power += granted_power[i];
+
+ i++;
+ }
+
+ trace_thermal_power_allocator(tz, req_power, total_req_power,
+ granted_power, total_granted_power,
+ num_actors, power_range,
+ max_allocatable_power, current_temp,
+ (s32)control_temp - (s32)current_temp);
+
+ devm_kfree(&tz->device, req_power);
+unlock:
+ mutex_unlock(&tz->lock);
+
+ return ret;
+}
+
+static int get_governor_trips(struct thermal_zone_device *tz,
+ struct power_allocator_params *params)
+{
+ int i, ret, last_passive;
+ bool found_first_passive;
+
+ found_first_passive = false;
+ last_passive = -1;
+ ret = -EINVAL;
+
+ for (i = 0; i < tz->trips; i++) {
+ enum thermal_trip_type type;
+
+ ret = tz->ops->get_trip_type(tz, i, &type);
+ if (ret)
+ return ret;
+
+ if (!found_first_passive) {
+ if (type == THERMAL_TRIP_PASSIVE) {
+ params->trip_switch_on = i;
+ found_first_passive = true;
+ }
+ } else if (type == THERMAL_TRIP_PASSIVE) {
+ last_passive = i;
+ } else {
+ break;
+ }
+ }
+
+ if (last_passive != -1) {
+ params->trip_max_desired_temperature = last_passive;
+ ret = 0;
+ } else {
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static void reset_pid_controller(struct power_allocator_params *params)
+{
+ params->err_integral = 0;
+ params->prev_err = 0;
+}
+
+static void allow_maximum_power(struct thermal_zone_device *tz)
+{
+ struct thermal_instance *instance;
+ struct power_allocator_params *params = tz->governor_data;
+
+ list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+ if ((instance->trip != params->trip_max_desired_temperature) ||
+ (!cdev_is_power_actor(instance->cdev)))
+ continue;
+
+ instance->target = 0;
+ instance->cdev->updated = false;
+ thermal_cdev_update(instance->cdev);
+ }
+}
+
+/**
+ * power_allocator_bind() - bind the power_allocator governor to a thermal zone
+ * @tz: thermal zone to bind it to
+ *
+ * Check that the thermal zone is valid for this governor, that is, it
+ * has two thermal trips. If so, initialize the PID controller
+ * parameters and bind it to the thermal zone.
+ *
+ * Return: 0 on success, -EINVAL if the trips were invalid or -ENOMEM
+ * if we ran out of memory.
+ */
+static int power_allocator_bind(struct thermal_zone_device *tz)
+{
+ int ret;
+ struct power_allocator_params *params;
+ unsigned long switch_on_temp, control_temp;
+ u32 temperature_threshold;
+
+ if (!tz->tzp || !tz->tzp->sustainable_power) {
+ dev_err(&tz->device,
+ "power_allocator: missing sustainable_power\n");
+ return -EINVAL;
+ }
+
+ params = devm_kzalloc(&tz->device, sizeof(*params), GFP_KERNEL);
+ if (!params)
+ return -ENOMEM;
+
+ ret = get_governor_trips(tz, params);
+ if (ret) {
+ dev_err(&tz->device,
+ "thermal zone %s has wrong trip setup for power allocator\n",
+ tz->type);
+ goto free;
+ }
+
+ ret = tz->ops->get_trip_temp(tz, params->trip_switch_on,
+ &switch_on_temp);
+ if (ret)
+ goto free;
+
+ ret = tz->ops->get_trip_temp(tz, params->trip_max_desired_temperature,
+ &control_temp);
+ if (ret)
+ goto free;
+
+ temperature_threshold = control_temp - switch_on_temp;
+
+ tz->tzp->k_po = tz->tzp->k_po ?:
+ int_to_frac(tz->tzp->sustainable_power) / temperature_threshold;
+ tz->tzp->k_pu = tz->tzp->k_pu ?:
+ int_to_frac(2 * tz->tzp->sustainable_power) /
+ temperature_threshold;
+ tz->tzp->k_i = tz->tzp->k_i ?: int_to_frac(10) / 1000;
+ /*
+ * The default for k_d and integral_cutoff is 0, so we can
+ * leave them as they are.
+ */
+
+ reset_pid_controller(params);
+
+ tz->governor_data = params;
+
+ return 0;
+
+free:
+ devm_kfree(&tz->device, params);
+ return ret;
+}
+
+static void power_allocator_unbind(struct thermal_zone_device *tz)
+{
+ dev_dbg(&tz->device, "Unbinding from thermal zone %d\n", tz->id);
+ devm_kfree(&tz->device, tz->governor_data);
+ tz->governor_data = NULL;
+}
+
+static int power_allocator_throttle(struct thermal_zone_device *tz, int trip)
+{
+ int ret;
+ unsigned long switch_on_temp, control_temp, current_temp;
+ struct power_allocator_params *params = tz->governor_data;
+
+ /*
+ * We get called for every trip point but we only need to do
+ * our calculations once
+ */
+ if (trip != params->trip_max_desired_temperature)
+ return 0;
+
+ ret = thermal_zone_get_temp(tz, &current_temp);
+ if (ret) {
+ dev_warn(&tz->device, "Failed to get temperature: %d\n", ret);
+ return ret;
+ }
+
+ ret = tz->ops->get_trip_temp(tz, params->trip_switch_on,
+ &switch_on_temp);
+ if (ret) {
+ dev_warn(&tz->device,
+ "Failed to get switch on temperature: %d\n", ret);
+ return ret;
+ }
+
+ if (current_temp < switch_on_temp) {
+ tz->passive = 0;
+ reset_pid_controller(params);
+ allow_maximum_power(tz);
+ return 0;
+ }
+
+ tz->passive = 1;
+
+ ret = tz->ops->get_trip_temp(tz, params->trip_max_desired_temperature,
+ &control_temp);
+ if (ret) {
+ dev_warn(&tz->device,
+ "Failed to get the maximum desired temperature: %d\n",
+ ret);
+ return ret;
+ }
+
+ return allocate_power(tz, current_temp, control_temp);
+}
+
+static struct thermal_governor thermal_gov_power_allocator = {
+ .name = "power_allocator",
+ .bind_to_tz = power_allocator_bind,
+ .unbind_from_tz = power_allocator_unbind,
+ .throttle = power_allocator_throttle,
+};
+
+int thermal_gov_power_allocator_register(void)
+{
+ return thermal_register_governor(&thermal_gov_power_allocator);
+}
+
+void thermal_gov_power_allocator_unregister(void)
+{
+ thermal_unregister_governor(&thermal_gov_power_allocator);
+}
diff --git a/drivers/thermal/samsung/Kconfig b/drivers/thermal/samsung/Kconfig
index f760389a204c..c43306ecc0ab 100644
--- a/drivers/thermal/samsung/Kconfig
+++ b/drivers/thermal/samsung/Kconfig
@@ -1,6 +1,6 @@
config EXYNOS_THERMAL
tristate "Exynos thermal management unit driver"
- depends on ARCH_HAS_BANDGAP && OF
+ depends on OF
help
If you say yes here you get support for the TMU (Thermal Management
Unit) driver for SAMSUNG EXYNOS series of SoCs. This driver initialises
diff --git a/drivers/thermal/samsung/exynos_thermal_common.c b/drivers/thermal/samsung/exynos_thermal_common.c
index b6be572704a4..7c97db1cf829 100644
--- a/drivers/thermal/samsung/exynos_thermal_common.c
+++ b/drivers/thermal/samsung/exynos_thermal_common.c
@@ -163,7 +163,7 @@ static int exynos_bind(struct thermal_zone_device *thermal,
case MONITOR_ZONE:
case WARN_ZONE:
if (thermal_zone_bind_cooling_device(thermal, i, cdev,
- level, 0)) {
+ level, 0, THERMAL_WEIGHT_DEFAULT)) {
dev_err(data->dev,
"error unbinding cdev inst=%d\n", i);
ret = -EINVAL;
@@ -371,9 +371,11 @@ int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf)
th_zone->cool_dev[th_zone->cool_dev_size] =
cpufreq_cooling_register(&mask_val);
if (IS_ERR(th_zone->cool_dev[th_zone->cool_dev_size])) {
- dev_err(sensor_conf->dev,
- "Failed to register cpufreq cooling device\n");
- ret = -EINVAL;
+ ret = PTR_ERR(th_zone->cool_dev[th_zone->cool_dev_size]);
+ if (ret != -EPROBE_DEFER)
+ dev_err(sensor_conf->dev,
+ "Failed to register cpufreq cooling device: %d\n",
+ ret);
goto err_unregister;
}
th_zone->cool_dev_size++;
diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
index 49c09243fd38..2afca9bf40d5 100644
--- a/drivers/thermal/samsung/exynos_tmu.c
+++ b/drivers/thermal/samsung/exynos_tmu.c
@@ -683,7 +683,10 @@ static int exynos_tmu_probe(struct platform_device *pdev)
/* Register the sensor with thermal management interface */
ret = exynos_register_thermal(sensor_conf);
if (ret) {
- dev_err(&pdev->dev, "Failed to register thermal interface\n");
+ if (ret != -EPROBE_DEFER)
+ dev_err(&pdev->dev,
+ "Failed to register thermal interface: %d\n",
+ ret);
goto err_clk;
}
data->reg_conf = sensor_conf;
diff --git a/drivers/thermal/tegra_soctherm.c b/drivers/thermal/tegra_soctherm.c
new file mode 100644
index 000000000000..9197fc05c5cc
--- /dev/null
+++ b/drivers/thermal/tegra_soctherm.c
@@ -0,0 +1,476 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Author:
+ * Mikko Perttunen <mperttunen@nvidia.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/thermal.h>
+
+#include <soc/tegra/fuse.h>
+
+#define SENSOR_CONFIG0 0
+#define SENSOR_CONFIG0_STOP BIT(0)
+#define SENSOR_CONFIG0_TALL_SHIFT 8
+#define SENSOR_CONFIG0_TCALC_OVER BIT(4)
+#define SENSOR_CONFIG0_OVER BIT(3)
+#define SENSOR_CONFIG0_CPTR_OVER BIT(2)
+
+#define SENSOR_CONFIG1 4
+#define SENSOR_CONFIG1_TSAMPLE_SHIFT 0
+#define SENSOR_CONFIG1_TIDDQ_EN_SHIFT 15
+#define SENSOR_CONFIG1_TEN_COUNT_SHIFT 24
+#define SENSOR_CONFIG1_TEMP_ENABLE BIT(31)
+
+#define SENSOR_CONFIG2 8
+#define SENSOR_CONFIG2_THERMA_SHIFT 16
+#define SENSOR_CONFIG2_THERMB_SHIFT 0
+
+#define SENSOR_PDIV 0x1c0
+#define SENSOR_PDIV_T124 0x8888
+#define SENSOR_HOTSPOT_OFF 0x1c4
+#define SENSOR_HOTSPOT_OFF_T124 0x00060600
+#define SENSOR_TEMP1 0x1c8
+#define SENSOR_TEMP2 0x1cc
+
+#define SENSOR_TEMP_MASK 0xffff
+#define READBACK_VALUE_MASK 0xff00
+#define READBACK_VALUE_SHIFT 8
+#define READBACK_ADD_HALF BIT(7)
+#define READBACK_NEGATE BIT(1)
+
+#define FUSE_TSENSOR8_CALIB 0x180
+#define FUSE_SPARE_REALIGNMENT_REG_0 0x1fc
+
+#define FUSE_TSENSOR_CALIB_CP_TS_BASE_MASK 0x1fff
+#define FUSE_TSENSOR_CALIB_FT_TS_BASE_MASK (0x1fff << 13)
+#define FUSE_TSENSOR_CALIB_FT_TS_BASE_SHIFT 13
+
+#define FUSE_TSENSOR8_CALIB_CP_TS_BASE_MASK 0x3ff
+#define FUSE_TSENSOR8_CALIB_FT_TS_BASE_MASK (0x7ff << 10)
+#define FUSE_TSENSOR8_CALIB_FT_TS_BASE_SHIFT 10
+
+#define FUSE_SPARE_REALIGNMENT_REG_SHIFT_CP_MASK 0x3f
+#define FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_MASK (0x1f << 21)
+#define FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_SHIFT 21
+
+#define NOMINAL_CALIB_FT_T124 105
+#define NOMINAL_CALIB_CP_T124 25
+
+struct tegra_tsensor_configuration {
+ u32 tall, tsample, tiddq_en, ten_count, pdiv, tsample_ate, pdiv_ate;
+};
+
+struct tegra_tsensor {
+ const struct tegra_tsensor_configuration *config;
+ u32 base, calib_fuse_offset;
+ /* Correction values used to modify values read from calibration fuses */
+ s32 fuse_corr_alpha, fuse_corr_beta;
+};
+
+struct tegra_thermctl_zone {
+ void __iomem *reg;
+ unsigned int shift;
+};
+
+static const struct tegra_tsensor_configuration t124_tsensor_config = {
+ .tall = 16300,
+ .tsample = 120,
+ .tiddq_en = 1,
+ .ten_count = 1,
+ .pdiv = 8,
+ .tsample_ate = 480,
+ .pdiv_ate = 8
+};
+
+static const struct tegra_tsensor t124_tsensors[] = {
+ {
+ .config = &t124_tsensor_config,
+ .base = 0xc0,
+ .calib_fuse_offset = 0x098,
+ .fuse_corr_alpha = 1135400,
+ .fuse_corr_beta = -6266900,
+ },
+ {
+ .config = &t124_tsensor_config,
+ .base = 0xe0,
+ .calib_fuse_offset = 0x084,
+ .fuse_corr_alpha = 1122220,
+ .fuse_corr_beta = -5700700,
+ },
+ {
+ .config = &t124_tsensor_config,
+ .base = 0x100,
+ .calib_fuse_offset = 0x088,
+ .fuse_corr_alpha = 1127000,
+ .fuse_corr_beta = -6768200,
+ },
+ {
+ .config = &t124_tsensor_config,
+ .base = 0x120,
+ .calib_fuse_offset = 0x12c,
+ .fuse_corr_alpha = 1110900,
+ .fuse_corr_beta = -6232000,
+ },
+ {
+ .config = &t124_tsensor_config,
+ .base = 0x140,
+ .calib_fuse_offset = 0x158,
+ .fuse_corr_alpha = 1122300,
+ .fuse_corr_beta = -5936400,
+ },
+ {
+ .config = &t124_tsensor_config,
+ .base = 0x160,
+ .calib_fuse_offset = 0x15c,
+ .fuse_corr_alpha = 1145700,
+ .fuse_corr_beta = -7124600,
+ },
+ {
+ .config = &t124_tsensor_config,
+ .base = 0x180,
+ .calib_fuse_offset = 0x154,
+ .fuse_corr_alpha = 1120100,
+ .fuse_corr_beta = -6000500,
+ },
+ {
+ .config = &t124_tsensor_config,
+ .base = 0x1a0,
+ .calib_fuse_offset = 0x160,
+ .fuse_corr_alpha = 1106500,
+ .fuse_corr_beta = -6729300,
+ },
+};
+
+struct tegra_soctherm {
+ struct reset_control *reset;
+ struct clk *clock_tsensor;
+ struct clk *clock_soctherm;
+ void __iomem *regs;
+
+ struct thermal_zone_device *thermctl_tzs[4];
+};
+
+struct tsensor_shared_calibration {
+ u32 base_cp, base_ft;
+ u32 actual_temp_cp, actual_temp_ft;
+};
+
+static int calculate_shared_calibration(struct tsensor_shared_calibration *r)
+{
+ u32 val, shifted_cp, shifted_ft;
+ int err;
+
+ err = tegra_fuse_readl(FUSE_TSENSOR8_CALIB, &val);
+ if (err)
+ return err;
+ r->base_cp = val & FUSE_TSENSOR8_CALIB_CP_TS_BASE_MASK;
+ r->base_ft = (val & FUSE_TSENSOR8_CALIB_FT_TS_BASE_MASK)
+ >> FUSE_TSENSOR8_CALIB_FT_TS_BASE_SHIFT;
+ val = ((val & FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_MASK)
+ >> FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_SHIFT);
+ shifted_ft = sign_extend32(val, 4);
+
+ err = tegra_fuse_readl(FUSE_SPARE_REALIGNMENT_REG_0, &val);
+ if (err)
+ return err;
+ shifted_cp = sign_extend32(val, 5);
+
+ r->actual_temp_cp = 2 * NOMINAL_CALIB_CP_T124 + shifted_cp;
+ r->actual_temp_ft = 2 * NOMINAL_CALIB_FT_T124 + shifted_ft;
+
+ return 0;
+}
+
+static s64 div64_s64_precise(s64 a, s64 b)
+{
+ s64 r, al;
+
+ /* Scale up for increased precision division */
+ al = a << 16;
+
+ r = div64_s64(al * 2 + 1, 2 * b);
+ return r >> 16;
+}
+
+static int
+calculate_tsensor_calibration(const struct tegra_tsensor *sensor,
+ const struct tsensor_shared_calibration *shared,
+ u32 *calib)
+{
+ u32 val;
+ s32 actual_tsensor_ft, actual_tsensor_cp, delta_sens, delta_temp,
+ mult, div;
+ s16 therma, thermb;
+ s64 tmp;
+ int err;
+
+ err = tegra_fuse_readl(sensor->calib_fuse_offset, &val);
+ if (err)
+ return err;
+
+ actual_tsensor_cp = (shared->base_cp * 64) + sign_extend32(val, 12);
+ val = (val & FUSE_TSENSOR_CALIB_FT_TS_BASE_MASK)
+ >> FUSE_TSENSOR_CALIB_FT_TS_BASE_SHIFT;
+ actual_tsensor_ft = (shared->base_ft * 32) + sign_extend32(val, 12);
+
+ delta_sens = actual_tsensor_ft - actual_tsensor_cp;
+ delta_temp = shared->actual_temp_ft - shared->actual_temp_cp;
+
+ mult = sensor->config->pdiv * sensor->config->tsample_ate;
+ div = sensor->config->tsample * sensor->config->pdiv_ate;
+
+ therma = div64_s64_precise((s64) delta_temp * (1LL << 13) * mult,
+ (s64) delta_sens * div);
+
+ tmp = (s64)actual_tsensor_ft * shared->actual_temp_cp -
+ (s64)actual_tsensor_cp * shared->actual_temp_ft;
+ thermb = div64_s64_precise(tmp, (s64)delta_sens);
+
+ therma = div64_s64_precise((s64)therma * sensor->fuse_corr_alpha,
+ (s64)1000000LL);
+ thermb = div64_s64_precise((s64)thermb * sensor->fuse_corr_alpha +
+ sensor->fuse_corr_beta, (s64)1000000LL);
+
+ *calib = ((u16)therma << SENSOR_CONFIG2_THERMA_SHIFT) |
+ ((u16)thermb << SENSOR_CONFIG2_THERMB_SHIFT);
+
+ return 0;
+}
+
+static int enable_tsensor(struct tegra_soctherm *tegra,
+ const struct tegra_tsensor *sensor,
+ const struct tsensor_shared_calibration *shared)
+{
+ void __iomem *base = tegra->regs + sensor->base;
+ unsigned int val;
+ u32 calib;
+ int err;
+
+ err = calculate_tsensor_calibration(sensor, shared, &calib);
+ if (err)
+ return err;
+
+ val = sensor->config->tall << SENSOR_CONFIG0_TALL_SHIFT;
+ writel(val, base + SENSOR_CONFIG0);
+
+ val = (sensor->config->tsample - 1) << SENSOR_CONFIG1_TSAMPLE_SHIFT;
+ val |= sensor->config->tiddq_en << SENSOR_CONFIG1_TIDDQ_EN_SHIFT;
+ val |= sensor->config->ten_count << SENSOR_CONFIG1_TEN_COUNT_SHIFT;
+ val |= SENSOR_CONFIG1_TEMP_ENABLE;
+ writel(val, base + SENSOR_CONFIG1);
+
+ writel(calib, base + SENSOR_CONFIG2);
+
+ return 0;
+}
+
+/*
+ * Translate from soctherm readback format to millicelsius.
+ * The soctherm readback format in bits is as follows:
+ * TTTTTTTT H______N
+ * where T's contain the temperature in Celsius,
+ * H denotes an addition of 0.5 Celsius and N denotes negation
+ * of the final value.
+ */
+static long translate_temp(u16 val)
+{
+ long t;
+
+ t = ((val & READBACK_VALUE_MASK) >> READBACK_VALUE_SHIFT) * 1000;
+ if (val & READBACK_ADD_HALF)
+ t += 500;
+ if (val & READBACK_NEGATE)
+ t *= -1;
+
+ return t;
+}
+
+static int tegra_thermctl_get_temp(void *data, long *out_temp)
+{
+ struct tegra_thermctl_zone *zone = data;
+ u32 val;
+
+ val = (readl(zone->reg) >> zone->shift) & SENSOR_TEMP_MASK;
+ *out_temp = translate_temp(val);
+
+ return 0;
+}
+
+static const struct thermal_zone_of_device_ops tegra_of_thermal_ops = {
+ .get_temp = tegra_thermctl_get_temp,
+};
+
+static const struct of_device_id tegra_soctherm_of_match[] = {
+ { .compatible = "nvidia,tegra124-soctherm" },
+ { },
+};
+MODULE_DEVICE_TABLE(of, tegra_soctherm_of_match);
+
+struct thermctl_zone_desc {
+ unsigned int offset;
+ unsigned int shift;
+};
+
+static const struct thermctl_zone_desc t124_thermctl_temp_zones[] = {
+ { SENSOR_TEMP1, 16 },
+ { SENSOR_TEMP2, 16 },
+ { SENSOR_TEMP1, 0 },
+ { SENSOR_TEMP2, 0 }
+};
+
+static int tegra_soctherm_probe(struct platform_device *pdev)
+{
+ struct tegra_soctherm *tegra;
+ struct thermal_zone_device *tz;
+ struct tsensor_shared_calibration shared_calib;
+ struct resource *res;
+ unsigned int i;
+ int err;
+
+ const struct tegra_tsensor *tsensors = t124_tsensors;
+
+ tegra = devm_kzalloc(&pdev->dev, sizeof(*tegra), GFP_KERNEL);
+ if (!tegra)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ tegra->regs = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(tegra->regs))
+ return PTR_ERR(tegra->regs);
+
+ tegra->reset = devm_reset_control_get(&pdev->dev, "soctherm");
+ if (IS_ERR(tegra->reset)) {
+ dev_err(&pdev->dev, "can't get soctherm reset\n");
+ return PTR_ERR(tegra->reset);
+ }
+
+ tegra->clock_tsensor = devm_clk_get(&pdev->dev, "tsensor");
+ if (IS_ERR(tegra->clock_tsensor)) {
+ dev_err(&pdev->dev, "can't get tsensor clock\n");
+ return PTR_ERR(tegra->clock_tsensor);
+ }
+
+ tegra->clock_soctherm = devm_clk_get(&pdev->dev, "soctherm");
+ if (IS_ERR(tegra->clock_soctherm)) {
+ dev_err(&pdev->dev, "can't get soctherm clock\n");
+ return PTR_ERR(tegra->clock_soctherm);
+ }
+
+ reset_control_assert(tegra->reset);
+
+ err = clk_prepare_enable(tegra->clock_soctherm);
+ if (err)
+ return err;
+
+ err = clk_prepare_enable(tegra->clock_tsensor);
+ if (err) {
+ clk_disable_unprepare(tegra->clock_soctherm);
+ return err;
+ }
+
+ reset_control_deassert(tegra->reset);
+
+ /* Initialize raw sensors */
+
+ err = calculate_shared_calibration(&shared_calib);
+ if (err)
+ goto disable_clocks;
+
+ for (i = 0; i < ARRAY_SIZE(t124_tsensors); ++i) {
+ err = enable_tsensor(tegra, tsensors + i, &shared_calib);
+ if (err)
+ goto disable_clocks;
+ }
+
+ writel(SENSOR_PDIV_T124, tegra->regs + SENSOR_PDIV);
+ writel(SENSOR_HOTSPOT_OFF_T124, tegra->regs + SENSOR_HOTSPOT_OFF);
+
+ /* Initialize thermctl sensors */
+
+ for (i = 0; i < ARRAY_SIZE(tegra->thermctl_tzs); ++i) {
+ struct tegra_thermctl_zone *zone =
+ devm_kzalloc(&pdev->dev, sizeof(*zone), GFP_KERNEL);
+ if (!zone) {
+ err = -ENOMEM;
+ goto unregister_tzs;
+ }
+
+ zone->reg = tegra->regs + t124_thermctl_temp_zones[i].offset;
+ zone->shift = t124_thermctl_temp_zones[i].shift;
+
+ tz = thermal_zone_of_sensor_register(&pdev->dev, i, zone,
+ &tegra_of_thermal_ops);
+ if (IS_ERR(tz)) {
+ err = PTR_ERR(tz);
+ dev_err(&pdev->dev, "failed to register sensor: %d\n",
+ err);
+ goto unregister_tzs;
+ }
+
+ tegra->thermctl_tzs[i] = tz;
+ }
+
+ return 0;
+
+unregister_tzs:
+ while (i--)
+ thermal_zone_of_sensor_unregister(&pdev->dev,
+ tegra->thermctl_tzs[i]);
+
+disable_clocks:
+ clk_disable_unprepare(tegra->clock_tsensor);
+ clk_disable_unprepare(tegra->clock_soctherm);
+
+ return err;
+}
+
+static int tegra_soctherm_remove(struct platform_device *pdev)
+{
+ struct tegra_soctherm *tegra = platform_get_drvdata(pdev);
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(tegra->thermctl_tzs); ++i) {
+ thermal_zone_of_sensor_unregister(&pdev->dev,
+ tegra->thermctl_tzs[i]);
+ }
+
+ clk_disable_unprepare(tegra->clock_tsensor);
+ clk_disable_unprepare(tegra->clock_soctherm);
+
+ return 0;
+}
+
+static struct platform_driver tegra_soctherm_driver = {
+ .probe = tegra_soctherm_probe,
+ .remove = tegra_soctherm_remove,
+ .driver = {
+ .name = "tegra-soctherm",
+ .of_match_table = tegra_soctherm_of_match,
+ },
+};
+module_platform_driver(tegra_soctherm_driver);
+
+MODULE_AUTHOR("Mikko Perttunen <mperttunen@nvidia.com>");
+MODULE_DESCRIPTION("NVIDIA Tegra SOCTHERM thermal management driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 488e9bfd996b..249b6123d65e 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -75,6 +75,58 @@ static struct thermal_governor *__find_governor(const char *name)
return NULL;
}
+/**
+ * bind_previous_governor() - bind the previous governor of the thermal zone
+ * @tz: a valid pointer to a struct thermal_zone_device
+ * @failed_gov_name: the name of the governor that failed to register
+ *
+ * Register the previous governor of the thermal zone after a new
+ * governor has failed to be bound.
+ */
+static void bind_previous_governor(struct thermal_zone_device *tz,
+ const char *failed_gov_name)
+{
+ if (tz->governor && tz->governor->bind_to_tz) {
+ if (tz->governor->bind_to_tz(tz)) {
+ dev_err(&tz->device,
+ "governor %s failed to bind and the previous one (%s) failed to bind again, thermal zone %s has no governor\n",
+ failed_gov_name, tz->governor->name, tz->type);
+ tz->governor = NULL;
+ }
+ }
+}
+
+/**
+ * thermal_set_governor() - Switch to another governor
+ * @tz: a valid pointer to a struct thermal_zone_device
+ * @new_gov: pointer to the new governor
+ *
+ * Change the governor of thermal zone @tz.
+ *
+ * Return: 0 on success, an error if the new governor's bind_to_tz() failed.
+ */
+static int thermal_set_governor(struct thermal_zone_device *tz,
+ struct thermal_governor *new_gov)
+{
+ int ret = 0;
+
+ if (tz->governor && tz->governor->unbind_from_tz)
+ tz->governor->unbind_from_tz(tz);
+
+ if (new_gov && new_gov->bind_to_tz) {
+ ret = new_gov->bind_to_tz(tz);
+ if (ret) {
+ bind_previous_governor(tz, new_gov->name);
+
+ return ret;
+ }
+ }
+
+ tz->governor = new_gov;
+
+ return ret;
+}
+
int thermal_register_governor(struct thermal_governor *governor)
{
int err;
@@ -107,8 +159,15 @@ int thermal_register_governor(struct thermal_governor *governor)
name = pos->tzp->governor_name;
- if (!strncasecmp(name, governor->name, THERMAL_NAME_LENGTH))
- pos->governor = governor;
+ if (!strncasecmp(name, governor->name, THERMAL_NAME_LENGTH)) {
+ int ret;
+
+ ret = thermal_set_governor(pos, governor);
+ if (ret)
+ dev_err(&pos->device,
+ "Failed to set governor %s for thermal zone %s: %d\n",
+ governor->name, pos->type, ret);
+ }
}
mutex_unlock(&thermal_list_lock);
@@ -134,7 +193,7 @@ void thermal_unregister_governor(struct thermal_governor *governor)
list_for_each_entry(pos, &thermal_tz_list, node) {
if (!strncasecmp(pos->governor->name, governor->name,
THERMAL_NAME_LENGTH))
- pos->governor = NULL;
+ thermal_set_governor(pos, NULL);
}
mutex_unlock(&thermal_list_lock);
@@ -218,7 +277,8 @@ static void print_bind_err_msg(struct thermal_zone_device *tz,
static void __bind(struct thermal_zone_device *tz, int mask,
struct thermal_cooling_device *cdev,
- unsigned long *limits)
+ unsigned long *limits,
+ unsigned int weight)
{
int i, ret;
@@ -233,7 +293,8 @@ static void __bind(struct thermal_zone_device *tz, int mask,
upper = limits[i * 2 + 1];
}
ret = thermal_zone_bind_cooling_device(tz, i, cdev,
- upper, lower);
+ upper, lower,
+ weight);
if (ret)
print_bind_err_msg(tz, cdev, ret);
}
@@ -280,7 +341,8 @@ static void bind_cdev(struct thermal_cooling_device *cdev)
continue;
tzp->tbp[i].cdev = cdev;
__bind(pos, tzp->tbp[i].trip_mask, cdev,
- tzp->tbp[i].binding_limits);
+ tzp->tbp[i].binding_limits,
+ tzp->tbp[i].weight);
}
}
@@ -319,7 +381,8 @@ static void bind_tz(struct thermal_zone_device *tz)
continue;
tzp->tbp[i].cdev = pos;
__bind(tz, tzp->tbp[i].trip_mask, pos,
- tzp->tbp[i].binding_limits);
+ tzp->tbp[i].binding_limits,
+ tzp->tbp[i].weight);
}
}
exit:
@@ -368,7 +431,7 @@ static void handle_critical_trips(struct thermal_zone_device *tz,
tz->ops->get_trip_temp(tz, trip, &trip_temp);
/* If we have not crossed the trip_temp, we do not care. */
- if (tz->temperature < trip_temp)
+ if (trip_temp <= 0 || tz->temperature < trip_temp)
return;
trace_thermal_zone_trip(tz, trip, trip_type);
@@ -711,7 +774,8 @@ passive_store(struct device *dev, struct device_attribute *attr,
thermal_zone_bind_cooling_device(tz,
THERMAL_TRIPS_NONE, cdev,
THERMAL_NO_LIMIT,
- THERMAL_NO_LIMIT);
+ THERMAL_NO_LIMIT,
+ THERMAL_WEIGHT_DEFAULT);
}
mutex_unlock(&thermal_list_lock);
if (!tz->passive_delay)
@@ -757,15 +821,18 @@ policy_store(struct device *dev, struct device_attribute *attr,
snprintf(name, sizeof(name), "%s", buf);
mutex_lock(&thermal_governor_lock);
+ mutex_lock(&tz->lock);
gov = __find_governor(strim(name));
if (!gov)
goto exit;
- tz->governor = gov;
- ret = count;
+ ret = thermal_set_governor(tz, gov);
+ if (!ret)
+ ret = count;
exit:
+ mutex_unlock(&tz->lock);
mutex_unlock(&thermal_governor_lock);
return ret;
}
@@ -806,6 +873,158 @@ emul_temp_store(struct device *dev, struct device_attribute *attr,
static DEVICE_ATTR(emul_temp, S_IWUSR, NULL, emul_temp_store);
#endif/*CONFIG_THERMAL_EMULATION*/
+static ssize_t
+sustainable_power_show(struct device *dev, struct device_attribute *devattr,
+ char *buf)
+{
+ struct thermal_zone_device *tz = to_thermal_zone(dev);
+
+ if (tz->tzp)
+ return sprintf(buf, "%u\n", tz->tzp->sustainable_power);
+ else
+ return -EIO;
+}
+
+static ssize_t
+sustainable_power_store(struct device *dev, struct device_attribute *devattr,
+ const char *buf, size_t count)
+{
+ struct thermal_zone_device *tz = to_thermal_zone(dev);
+ u32 sustainable_power;
+
+ if (!tz->tzp)
+ return -EIO;
+
+ if (kstrtou32(buf, 10, &sustainable_power))
+ return -EINVAL;
+
+ tz->tzp->sustainable_power = sustainable_power;
+
+ return count;
+}
+static DEVICE_ATTR(sustainable_power, S_IWUSR | S_IRUGO, sustainable_power_show,
+ sustainable_power_store);
+
+#define create_s32_tzp_attr(name) \
+ static ssize_t \
+ name##_show(struct device *dev, struct device_attribute *devattr, \
+ char *buf) \
+ { \
+ struct thermal_zone_device *tz = to_thermal_zone(dev); \
+ \
+ if (tz->tzp) \
+ return sprintf(buf, "%u\n", tz->tzp->name); \
+ else \
+ return -EIO; \
+ } \
+ \
+ static ssize_t \
+ name##_store(struct device *dev, struct device_attribute *devattr, \
+ const char *buf, size_t count) \
+ { \
+ struct thermal_zone_device *tz = to_thermal_zone(dev); \
+ s32 value; \
+ \
+ if (!tz->tzp) \
+ return -EIO; \
+ \
+ if (kstrtos32(buf, 10, &value)) \
+ return -EINVAL; \
+ \
+ tz->tzp->name = value; \
+ \
+ return count; \
+ } \
+ static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, name##_show, name##_store)
+
+create_s32_tzp_attr(k_po);
+create_s32_tzp_attr(k_pu);
+create_s32_tzp_attr(k_i);
+create_s32_tzp_attr(k_d);
+create_s32_tzp_attr(integral_cutoff);
+create_s32_tzp_attr(slope);
+create_s32_tzp_attr(offset);
+#undef create_s32_tzp_attr
+
+static struct device_attribute *dev_tzp_attrs[] = {
+ &dev_attr_sustainable_power,
+ &dev_attr_k_po,
+ &dev_attr_k_pu,
+ &dev_attr_k_i,
+ &dev_attr_k_d,
+ &dev_attr_integral_cutoff,
+ &dev_attr_slope,
+ &dev_attr_offset,
+};
+
+static int create_tzp_attrs(struct device *dev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(dev_tzp_attrs); i++) {
+ int ret;
+ struct device_attribute *dev_attr = dev_tzp_attrs[i];
+
+ ret = device_create_file(dev, dev_attr);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * power_actor_get_max_power() - get the maximum power that a cdev can consume
+ * @cdev: pointer to &thermal_cooling_device
+ * @tz: a valid thermal zone device pointer
+ * @max_power: pointer in which to store the maximum power
+ *
+ * Calculate the maximum power consumption in milliwats that the
+ * cooling device can currently consume and store it in @max_power.
+ *
+ * Return: 0 on success, -EINVAL if @cdev doesn't support the
+ * power_actor API or -E* on other error.
+ */
+int power_actor_get_max_power(struct thermal_cooling_device *cdev,
+ struct thermal_zone_device *tz, u32 *max_power)
+{
+ if (!cdev_is_power_actor(cdev))
+ return -EINVAL;
+
+ return cdev->ops->state2power(cdev, tz, 0, max_power);
+}
+
+/**
+ * power_actor_set_power() - limit the maximum power that a cooling device can consume
+ * @cdev: pointer to &thermal_cooling_device
+ * @instance: thermal instance to update
+ * @power: the power in milliwatts
+ *
+ * Set the cooling device to consume at most @power milliwatts.
+ *
+ * Return: 0 on success, -EINVAL if the cooling device does not
+ * implement the power actor API or -E* for other failures.
+ */
+int power_actor_set_power(struct thermal_cooling_device *cdev,
+ struct thermal_instance *instance, u32 power)
+{
+ unsigned long state;
+ int ret;
+
+ if (!cdev_is_power_actor(cdev))
+ return -EINVAL;
+
+ ret = cdev->ops->power2state(cdev, instance->tz, power, &state);
+ if (ret)
+ return ret;
+
+ instance->target = state;
+ cdev->updated = false;
+ thermal_cdev_update(cdev);
+
+ return 0;
+}
+
static DEVICE_ATTR(type, 0444, type_show, NULL);
static DEVICE_ATTR(temp, 0444, temp_show, NULL);
static DEVICE_ATTR(mode, 0644, mode_show, mode_store);
@@ -897,6 +1116,50 @@ thermal_cooling_device_trip_point_show(struct device *dev,
return sprintf(buf, "%d\n", instance->trip);
}
+static struct attribute *cooling_device_attrs[] = {
+ &dev_attr_cdev_type.attr,
+ &dev_attr_max_state.attr,
+ &dev_attr_cur_state.attr,
+ NULL,
+};
+
+static const struct attribute_group cooling_device_attr_group = {
+ .attrs = cooling_device_attrs,
+};
+
+static const struct attribute_group *cooling_device_attr_groups[] = {
+ &cooling_device_attr_group,
+ NULL,
+};
+
+static ssize_t
+thermal_cooling_device_weight_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct thermal_instance *instance;
+
+ instance = container_of(attr, struct thermal_instance, weight_attr);
+
+ return sprintf(buf, "%d\n", instance->weight);
+}
+
+static ssize_t
+thermal_cooling_device_weight_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct thermal_instance *instance;
+ int ret, weight;
+
+ ret = kstrtoint(buf, 0, &weight);
+ if (ret)
+ return ret;
+
+ instance = container_of(attr, struct thermal_instance, weight_attr);
+ instance->weight = weight;
+
+ return count;
+}
/* Device management */
/**
@@ -911,6 +1174,9 @@ thermal_cooling_device_trip_point_show(struct device *dev,
* @lower: the Minimum cooling state can be used for this trip point.
* THERMAL_NO_LIMIT means no lower limit,
* and the cooling device can be in cooling state 0.
+ * @weight: The weight of the cooling device to be bound to the
+ * thermal zone. Use THERMAL_WEIGHT_DEFAULT for the
+ * default value
*
* This interface function bind a thermal cooling device to the certain trip
* point of a thermal zone device.
@@ -921,7 +1187,8 @@ thermal_cooling_device_trip_point_show(struct device *dev,
int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
int trip,
struct thermal_cooling_device *cdev,
- unsigned long upper, unsigned long lower)
+ unsigned long upper, unsigned long lower,
+ unsigned int weight)
{
struct thermal_instance *dev;
struct thermal_instance *pos;
@@ -964,6 +1231,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
dev->upper = upper;
dev->lower = lower;
dev->target = THERMAL_NO_TARGET;
+ dev->weight = weight;
result = get_idr(&tz->idr, &tz->lock, &dev->id);
if (result)
@@ -984,6 +1252,16 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
if (result)
goto remove_symbol_link;
+ sprintf(dev->weight_attr_name, "cdev%d_weight", dev->id);
+ sysfs_attr_init(&dev->weight_attr.attr);
+ dev->weight_attr.attr.name = dev->weight_attr_name;
+ dev->weight_attr.attr.mode = S_IWUSR | S_IRUGO;
+ dev->weight_attr.show = thermal_cooling_device_weight_show;
+ dev->weight_attr.store = thermal_cooling_device_weight_store;
+ result = device_create_file(&tz->device, &dev->weight_attr);
+ if (result)
+ goto remove_trip_file;
+
mutex_lock(&tz->lock);
mutex_lock(&cdev->lock);
list_for_each_entry(pos, &tz->thermal_instances, tz_node)
@@ -1001,6 +1279,8 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
if (!result)
return 0;
+ device_remove_file(&tz->device, &dev->weight_attr);
+remove_trip_file:
device_remove_file(&tz->device, &dev->attr);
remove_symbol_link:
sysfs_remove_link(&tz->device.kobj, dev->name);
@@ -1126,6 +1406,7 @@ __thermal_cooling_device_register(struct device_node *np,
cdev->ops = ops;
cdev->updated = false;
cdev->device.class = &thermal_class;
+ cdev->device.groups = cooling_device_attr_groups;
cdev->devdata = devdata;
dev_set_name(&cdev->device, "cooling_device%d", cdev->id);
result = device_register(&cdev->device);
@@ -1135,21 +1416,6 @@ __thermal_cooling_device_register(struct device_node *np,
return ERR_PTR(result);
}
- /* sys I/F */
- if (type) {
- result = device_create_file(&cdev->device, &dev_attr_cdev_type);
- if (result)
- goto unregister;
- }
-
- result = device_create_file(&cdev->device, &dev_attr_max_state);
- if (result)
- goto unregister;
-
- result = device_create_file(&cdev->device, &dev_attr_cur_state);
- if (result)
- goto unregister;
-
/* Add 'this' new cdev to the global cdev list */
mutex_lock(&thermal_list_lock);
list_add(&cdev->node, &thermal_cdev_list);
@@ -1159,11 +1425,6 @@ __thermal_cooling_device_register(struct device_node *np,
bind_cdev(cdev);
return cdev;
-
-unregister:
- release_idr(&thermal_cdev_idr, &thermal_idr_lock, cdev->id);
- device_unregister(&cdev->device);
- return ERR_PTR(result);
}
/**
@@ -1374,7 +1635,8 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask)
tz->trip_temp_attrs[indx].name;
tz->trip_temp_attrs[indx].attr.attr.mode = S_IRUGO;
tz->trip_temp_attrs[indx].attr.show = trip_point_temp_show;
- if (mask & (1 << indx)) {
+ if (IS_ENABLED(CONFIG_THERMAL_WRITABLE_TRIPS) &&
+ mask & (1 << indx)) {
tz->trip_temp_attrs[indx].attr.attr.mode |= S_IWUSR;
tz->trip_temp_attrs[indx].attr.store =
trip_point_temp_store;
@@ -1451,7 +1713,7 @@ static void remove_trip_attrs(struct thermal_zone_device *tz)
struct thermal_zone_device *thermal_zone_device_register(const char *type,
int trips, int mask, void *devdata,
struct thermal_zone_device_ops *ops,
- const struct thermal_zone_params *tzp,
+ struct thermal_zone_params *tzp,
int passive_delay, int polling_delay)
{
struct thermal_zone_device *tz;
@@ -1459,6 +1721,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
int result;
int count;
int passive = 0;
+ struct thermal_governor *governor;
if (type && strlen(type) >= THERMAL_NAME_LENGTH)
return ERR_PTR(-EINVAL);
@@ -1545,13 +1808,24 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
if (result)
goto unregister;
+ /* Add thermal zone params */
+ result = create_tzp_attrs(&tz->device);
+ if (result)
+ goto unregister;
+
/* Update 'this' zone's governor information */
mutex_lock(&thermal_governor_lock);
if (tz->tzp)
- tz->governor = __find_governor(tz->tzp->governor_name);
+ governor = __find_governor(tz->tzp->governor_name);
else
- tz->governor = def_governor;
+ governor = def_governor;
+
+ result = thermal_set_governor(tz, governor);
+ if (result) {
+ mutex_unlock(&thermal_governor_lock);
+ goto unregister;
+ }
mutex_unlock(&thermal_governor_lock);
@@ -1640,7 +1914,7 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
device_remove_file(&tz->device, &dev_attr_mode);
device_remove_file(&tz->device, &dev_attr_policy);
remove_trip_attrs(tz);
- tz->governor = NULL;
+ thermal_set_governor(tz, NULL);
thermal_remove_hwmon_sysfs(tz);
release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id);
@@ -1800,7 +2074,11 @@ static int __init thermal_register_governors(void)
if (result)
return result;
- return thermal_gov_user_space_register();
+ result = thermal_gov_user_space_register();
+ if (result)
+ return result;
+
+ return thermal_gov_power_allocator_register();
}
static void thermal_unregister_governors(void)
@@ -1809,6 +2087,7 @@ static void thermal_unregister_governors(void)
thermal_gov_fair_share_unregister();
thermal_gov_bang_bang_unregister();
thermal_gov_user_space_unregister();
+ thermal_gov_power_allocator_unregister();
}
static int __init thermal_init(void)
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index d15d243de27a..8a6624488cc5 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -46,8 +46,11 @@ struct thermal_instance {
unsigned long target; /* expected cooling state */
char attr_name[THERMAL_NAME_LENGTH];
struct device_attribute attr;
+ char weight_attr_name[THERMAL_NAME_LENGTH];
+ struct device_attribute weight_attr;
struct list_head tz_node; /* node in tz->thermal_instances */
struct list_head cdev_node; /* node in cdev->thermal_instances */
+ unsigned int weight; /* The weight of the cooling device */
};
int thermal_register_governor(struct thermal_governor *);
@@ -85,13 +88,39 @@ static inline int thermal_gov_user_space_register(void) { return 0; }
static inline void thermal_gov_user_space_unregister(void) {}
#endif /* CONFIG_THERMAL_GOV_USER_SPACE */
+#ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
+int thermal_gov_power_allocator_register(void);
+void thermal_gov_power_allocator_unregister(void);
+#else
+static inline int thermal_gov_power_allocator_register(void) { return 0; }
+static inline void thermal_gov_power_allocator_unregister(void) {}
+#endif /* CONFIG_THERMAL_GOV_POWER_ALLOCATOR */
+
/* device tree support */
#ifdef CONFIG_THERMAL_OF
int of_parse_thermal_zones(void);
void of_thermal_destroy_zones(void);
+int of_thermal_get_ntrips(struct thermal_zone_device *);
+bool of_thermal_is_trip_valid(struct thermal_zone_device *, int);
+const struct thermal_trip *
+of_thermal_get_trip_points(struct thermal_zone_device *);
#else
static inline int of_parse_thermal_zones(void) { return 0; }
static inline void of_thermal_destroy_zones(void) { }
+static inline int of_thermal_get_ntrips(struct thermal_zone_device *tz)
+{
+ return 0;
+}
+static inline bool of_thermal_is_trip_valid(struct thermal_zone_device *tz,
+ int trip)
+{
+ return 0;
+}
+static inline const struct thermal_trip *
+of_thermal_get_trip_points(struct thermal_zone_device *tz)
+{
+ return NULL;
+}
#endif
#endif /* __THERMAL_CORE_H__ */
diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index 9eec26dc0448..68f53fcb8fb1 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -28,7 +28,6 @@
#include <linux/kernel.h>
#include <linux/workqueue.h>
#include <linux/thermal.h>
-#include <linux/cpufreq.h>
#include <linux/cpumask.h>
#include <linux/cpu_cooling.h>
#include <linux/of.h>
@@ -147,7 +146,8 @@ static int ti_thermal_bind(struct thermal_zone_device *thermal,
return thermal_zone_bind_cooling_device(thermal, 0, cdev,
/* bind with min and max states defined by cpu_cooling */
THERMAL_NO_LIMIT,
- THERMAL_NO_LIMIT);
+ THERMAL_NO_LIMIT,
+ THERMAL_WEIGHT_DEFAULT);
}
/* Unbind callback functions for thermal zone */
@@ -286,6 +286,11 @@ static int ti_thermal_get_crit_temp(struct thermal_zone_device *thermal,
return ti_thermal_get_trip_temp(thermal, OMAP_TRIP_NUMBER - 1, temp);
}
+static const struct thermal_zone_of_device_ops ti_of_thermal_ops = {
+ .get_temp = __ti_thermal_get_temp,
+ .get_trend = __ti_thermal_get_trend,
+};
+
static struct thermal_zone_device_ops ti_thermal_ops = {
.get_temp = ti_thermal_get_temp,
.get_trend = ti_thermal_get_trend,
@@ -333,8 +338,7 @@ int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id,
/* in case this is specified by DT */
data->ti_thermal = thermal_zone_of_sensor_register(bgp->dev, id,
- data, __ti_thermal_get_temp,
- __ti_thermal_get_trend);
+ data, &ti_of_thermal_ops);
if (IS_ERR(data->ti_thermal)) {
/* Create thermal zone */
data->ti_thermal = thermal_zone_device_register(domain,
@@ -403,11 +407,6 @@ int ti_thermal_register_cpu_cooling(struct ti_bandgap *bgp, int id)
if (!data)
return -EINVAL;
- if (!cpufreq_get_current_driver()) {
- dev_dbg(bgp->dev, "no cpufreq driver yet\n");
- return -EPROBE_DEFER;
- }
-
/* Register cooling device */
data->cool_dev = cpufreq_cooling_register(cpu_present_mask);
if (IS_ERR(data->cool_dev)) {