summaryrefslogtreecommitdiff
path: root/kernel/sched/sched.h
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r--kernel/sched/sched.h154
1 files changed, 152 insertions, 2 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 55d92a1ca070..e137445987d8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -506,10 +506,18 @@ struct dl_rq {
#else
struct dl_bw dl_bw;
#endif
+ /* This is the "average utilization" for this runqueue */
+ s64 avg_bw;
};
#ifdef CONFIG_SMP
+struct max_cpu_capacity {
+ raw_spinlock_t lock;
+ unsigned long val;
+ int cpu;
+};
+
/*
* We add the notion of a root-domain which will be used to define per-domain
* variables. Each exclusive cpuset essentially defines an island domain by
@@ -528,6 +536,9 @@ struct root_domain {
/* Indicate more than one runnable task for any CPU */
bool overload;
+ /* Indicate one or more cpus over-utilized (tipping point) */
+ bool overutilized;
+
/*
* The bit corresponding to a CPU gets set here if such CPU has more
* than one runnable -deadline task (as it is below for RT tasks).
@@ -543,6 +554,9 @@ struct root_domain {
*/
cpumask_var_t rto_mask;
struct cpupri cpupri;
+
+ /* Maximum cpu capacity in the system. */
+ struct max_cpu_capacity max_cpu_capacity;
};
extern struct root_domain def_root_domain;
@@ -572,6 +586,7 @@ struct rq {
#define CPU_LOAD_IDX_MAX 5
unsigned long cpu_load[CPU_LOAD_IDX_MAX];
unsigned long last_load_update_tick;
+ unsigned int misfit_task;
#ifdef CONFIG_NO_HZ_COMMON
u64 nohz_stamp;
unsigned long nohz_flags;
@@ -687,6 +702,7 @@ struct rq {
#ifdef CONFIG_CPU_IDLE
/* Must be inspected within a rcu lock section */
struct cpuidle_state *idle_state;
+ int idle_state_idx;
#endif
};
@@ -836,6 +852,8 @@ DECLARE_PER_CPU(int, sd_llc_id);
DECLARE_PER_CPU(struct sched_domain *, sd_numa);
DECLARE_PER_CPU(struct sched_domain *, sd_busy);
DECLARE_PER_CPU(struct sched_domain *, sd_asym);
+DECLARE_PER_CPU(struct sched_domain *, sd_ea);
+DECLARE_PER_CPU(struct sched_domain *, sd_scs);
struct sched_group_capacity {
atomic_t ref;
@@ -843,7 +861,8 @@ struct sched_group_capacity {
* CPU capacity of this group, SCHED_LOAD_SCALE being max capacity
* for a single CPU.
*/
- unsigned int capacity;
+ unsigned long capacity;
+ unsigned long max_capacity; /* Max per-cpu capacity in group */
unsigned long next_update;
int imbalance; /* XXX unrelated to capacity but shared group state */
/*
@@ -860,6 +879,7 @@ struct sched_group {
unsigned int group_weight;
struct sched_group_capacity *sgc;
+ const struct sched_group_energy const *sge;
/*
* The CPUs this group covers.
@@ -1163,6 +1183,7 @@ static const u32 prio_to_wmult[40] = {
#endif
#define ENQUEUE_REPLENISH 0x08
#define ENQUEUE_RESTORE 0x10
+#define ENQUEUE_WAKEUP_NEW 0x20
#define DEQUEUE_SLEEP 0x01
#define DEQUEUE_SAVE 0x02
@@ -1276,6 +1297,17 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq)
WARN_ON(!rcu_read_lock_held());
return rq->idle_state;
}
+
+static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx)
+{
+ rq->idle_state_idx = idle_state_idx;
+}
+
+static inline int idle_get_state_idx(struct rq *rq)
+{
+ WARN_ON(!rcu_read_lock_held());
+ return rq->idle_state_idx;
+}
#else
static inline void idle_set_state(struct rq *rq,
struct cpuidle_state *idle_state)
@@ -1286,6 +1318,15 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq)
{
return NULL;
}
+
+static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx)
+{
+}
+
+static inline int idle_get_state_idx(struct rq *rq)
+{
+ return -1;
+}
#endif
extern void sysrq_sched_debug_show(void);
@@ -1310,6 +1351,8 @@ unsigned long to_ratio(u64 period, u64 runtime);
extern void init_entity_runnable_average(struct sched_entity *se);
+extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc);
+
static inline void add_nr_running(struct rq *rq, unsigned count)
{
unsigned prev_nr = rq->nr_running;
@@ -1415,10 +1458,117 @@ unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
}
#endif
+#ifdef CONFIG_SMP
+static inline unsigned long capacity_of(int cpu)
+{
+ return cpu_rq(cpu)->cpu_capacity;
+}
+
+static inline unsigned long capacity_orig_of(int cpu)
+{
+ return cpu_rq(cpu)->cpu_capacity_orig;
+}
+
+/*
+ * cpu_util returns the amount of capacity of a CPU that is used by CFS
+ * tasks. The unit of the return value must be the one of capacity so we can
+ * compare the utilization with the capacity of the CPU that is available for
+ * CFS task (ie cpu_capacity).
+ *
+ * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
+ * recent utilization of currently non-runnable tasks on a CPU. It represents
+ * the amount of utilization of a CPU in the range [0..capacity_orig] where
+ * capacity_orig is the cpu_capacity available at the highest frequency
+ * (arch_scale_freq_capacity()).
+ * The utilization of a CPU converges towards a sum equal to or less than the
+ * current capacity (capacity_curr <= capacity_orig) of the CPU because it is
+ * the running time on this CPU scaled by capacity_curr.
+ *
+ * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
+ * higher than capacity_orig because of unfortunate rounding in
+ * cfs.avg.util_avg or just after migrating tasks and new task wakeups until
+ * the average stabilizes with the new running time. We need to check that the
+ * utilization stays within the range of [0..capacity_orig] and cap it if
+ * necessary. Without utilization capping, a group could be seen as overloaded
+ * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of
+ * available capacity. We allow utilization to overshoot capacity_curr (but not
+ * capacity_orig) as it useful for predicting the capacity required after task
+ * migrations (scheduler-driven DVFS).
+ */
+static inline unsigned long __cpu_util(int cpu, int delta)
+{
+ unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg;
+ unsigned long capacity = capacity_orig_of(cpu);
+
+ delta += util;
+ if (delta < 0)
+ return 0;
+
+ return (delta >= capacity) ? capacity : delta;
+}
+
+static inline unsigned long cpu_util(int cpu)
+{
+ return __cpu_util(cpu, 0);
+}
+
+#endif
+
+#ifdef CONFIG_CPU_FREQ_GOV_SCHED
+#define capacity_max SCHED_CAPACITY_SCALE
+extern unsigned int capacity_margin;
+extern struct static_key __sched_freq;
+
+static inline bool sched_freq(void)
+{
+ return static_key_false(&__sched_freq);
+}
+
+DECLARE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
+void update_cpu_capacity_request(int cpu, bool request);
+
+static inline void set_cfs_cpu_capacity(int cpu, bool request,
+ unsigned long capacity)
+{
+ if (per_cpu(cpu_sched_capacity_reqs, cpu).cfs != capacity) {
+ per_cpu(cpu_sched_capacity_reqs, cpu).cfs = capacity;
+ update_cpu_capacity_request(cpu, request);
+ }
+}
+
+static inline void set_rt_cpu_capacity(int cpu, bool request,
+ unsigned long capacity)
+{
+ if (per_cpu(cpu_sched_capacity_reqs, cpu).rt != capacity) {
+ per_cpu(cpu_sched_capacity_reqs, cpu).rt = capacity;
+ update_cpu_capacity_request(cpu, request);
+ }
+}
+
+static inline void set_dl_cpu_capacity(int cpu, bool request,
+ unsigned long capacity)
+{
+ if (per_cpu(cpu_sched_capacity_reqs, cpu).dl != capacity) {
+ per_cpu(cpu_sched_capacity_reqs, cpu).dl = capacity;
+ update_cpu_capacity_request(cpu, request);
+ }
+}
+#else
+static inline bool sched_freq(void) { return false; }
+static inline void set_cfs_cpu_capacity(int cpu, bool request,
+ unsigned long capacity)
+{ }
+static inline void set_rt_cpu_capacity(int cpu, bool request,
+ unsigned long capacity)
+{ }
+static inline void set_dl_cpu_capacity(int cpu, bool request,
+ unsigned long capacity)
+{ }
+#endif
+
static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
{
rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq));
- sched_avg_update(rq);
}
#else
static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }