diff options
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r-- | kernel/sched/core.c | 206 |
1 files changed, 197 insertions, 9 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index dece705b7f8c..da005dbde600 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2412,7 +2412,7 @@ void wake_up_new_task(struct task_struct *p) #endif rq = __task_rq_lock(p); - activate_task(rq, p, 0); + activate_task(rq, p, ENQUEUE_WAKEUP_NEW); p->on_rq = TASK_ON_RQ_QUEUED; trace_sched_wakeup_new(p); check_preempt_curr(rq, p, WF_FORK); @@ -2879,6 +2879,45 @@ unsigned long long task_sched_runtime(struct task_struct *p) return ns; } +#ifdef CONFIG_CPU_FREQ_GOV_SCHED +static unsigned long sum_capacity_reqs(unsigned long cfs_cap, + struct sched_capacity_reqs *scr) +{ + unsigned long total = cfs_cap + scr->rt; + + total = total * capacity_margin; + total /= SCHED_CAPACITY_SCALE; + total += scr->dl; + return total; +} + +static void sched_freq_tick(int cpu) +{ + struct sched_capacity_reqs *scr; + unsigned long capacity_orig, capacity_curr; + + if (!sched_freq()) + return; + + capacity_orig = capacity_orig_of(cpu); + capacity_curr = capacity_curr_of(cpu); + if (capacity_curr == capacity_orig) + return; + + /* + * To make free room for a task that is building up its "real" + * utilization and to harm its performance the least, request + * a jump to max OPP as soon as the margin of free capacity is + * impacted (specified by capacity_margin). + */ + scr = &per_cpu(cpu_sched_capacity_reqs, cpu); + if (capacity_curr < sum_capacity_reqs(cpu_util(cpu), scr)) + set_cfs_cpu_capacity(cpu, true, capacity_max); +} +#else +static inline void sched_freq_tick(int cpu) { } +#endif + /* * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. @@ -2896,6 +2935,7 @@ void scheduler_tick(void) curr->sched_class->task_tick(rq, curr, 0); update_cpu_load_active(rq); calc_global_load_tick(rq); + sched_freq_tick(cpu); raw_spin_unlock(&rq->lock); perf_event_task_tick(); @@ -5401,9 +5441,60 @@ set_table_entry(struct ctl_table *entry, } static struct ctl_table * +sd_alloc_ctl_energy_table(struct sched_group_energy *sge) +{ + struct ctl_table *table = sd_alloc_ctl_entry(5); + + if (table == NULL) + return NULL; + + set_table_entry(&table[0], "nr_idle_states", &sge->nr_idle_states, + sizeof(int), 0644, proc_dointvec_minmax, false); + set_table_entry(&table[1], "idle_states", &sge->idle_states[0].power, + sge->nr_idle_states*sizeof(struct idle_state), 0644, + proc_doulongvec_minmax, false); + set_table_entry(&table[2], "nr_cap_states", &sge->nr_cap_states, + sizeof(int), 0644, proc_dointvec_minmax, false); + set_table_entry(&table[3], "cap_states", &sge->cap_states[0].cap, + sge->nr_cap_states*sizeof(struct capacity_state), 0644, + proc_doulongvec_minmax, false); + + return table; +} + +static struct ctl_table * +sd_alloc_ctl_group_table(struct sched_group *sg) +{ + struct ctl_table *table = sd_alloc_ctl_entry(2); + + if (table == NULL) + return NULL; + + table->procname = kstrdup("energy", GFP_KERNEL); + table->mode = 0555; + table->child = sd_alloc_ctl_energy_table((struct sched_group_energy *)sg->sge); + + return table; +} + +static struct ctl_table * sd_alloc_ctl_domain_table(struct sched_domain *sd) { - struct ctl_table *table = sd_alloc_ctl_entry(14); + struct ctl_table *table; + unsigned int nr_entries = 14; + + int i = 0; + struct sched_group *sg = sd->groups; + + if (sg->sge) { + int nr_sgs = 0; + + do {} while (nr_sgs++, sg = sg->next, sg != sd->groups); + + nr_entries += nr_sgs; + } + + table = sd_alloc_ctl_entry(nr_entries); if (table == NULL) return NULL; @@ -5436,7 +5527,19 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd) sizeof(long), 0644, proc_doulongvec_minmax, false); set_table_entry(&table[12], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring, false); - /* &table[13] is terminator */ + sg = sd->groups; + if (sg->sge) { + char buf[32]; + struct ctl_table *entry = &table[13]; + + do { + snprintf(buf, 32, "group%d", i); + entry->procname = kstrdup(buf, GFP_KERNEL); + entry->mode = 0555; + entry->child = sd_alloc_ctl_group_table(sg); + } while (entry++, i++, sg = sg->next, sg != sd->groups); + } + /* &table[nr_entries-1] is terminator */ return table; } @@ -5742,7 +5845,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, printk(KERN_CONT " %*pbl", cpumask_pr_args(sched_group_cpus(group))); if (group->sgc->capacity != SCHED_CAPACITY_SCALE) { - printk(KERN_CONT " (cpu_capacity = %d)", + printk(KERN_CONT " (cpu_capacity = %lu)", group->sgc->capacity); } @@ -5803,7 +5906,8 @@ static int sd_degenerate(struct sched_domain *sd) SD_BALANCE_EXEC | SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | - SD_SHARE_POWERDOMAIN)) { + SD_SHARE_POWERDOMAIN | + SD_SHARE_CAP_STATES)) { if (sd->groups != sd->groups->next) return 0; } @@ -5835,7 +5939,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | SD_PREFER_SIBLING | - SD_SHARE_POWERDOMAIN); + SD_SHARE_POWERDOMAIN | + SD_SHARE_CAP_STATES); if (nr_node_ids == 1) pflags &= ~SD_SERIALIZE; } @@ -5914,6 +6019,8 @@ static int init_rootdomain(struct root_domain *rd) if (cpupri_init(&rd->cpupri) != 0) goto free_rto_mask; + + init_max_cpu_capacity(&rd->max_cpu_capacity); return 0; free_rto_mask: @@ -6019,11 +6126,13 @@ DEFINE_PER_CPU(int, sd_llc_id); DEFINE_PER_CPU(struct sched_domain *, sd_numa); DEFINE_PER_CPU(struct sched_domain *, sd_busy); DEFINE_PER_CPU(struct sched_domain *, sd_asym); +DEFINE_PER_CPU(struct sched_domain *, sd_ea); +DEFINE_PER_CPU(struct sched_domain *, sd_scs); static void update_top_cache_domain(int cpu) { struct sched_domain *sd; - struct sched_domain *busy_sd = NULL; + struct sched_domain *busy_sd = NULL, *ea_sd = NULL; int id = cpu; int size = 1; @@ -6044,6 +6153,17 @@ static void update_top_cache_domain(int cpu) sd = highest_flag_domain(cpu, SD_ASYM_PACKING); rcu_assign_pointer(per_cpu(sd_asym, cpu), sd); + + for_each_domain(cpu, sd) { + if (sd->groups->sge) + ea_sd = sd; + else + break; + } + rcu_assign_pointer(per_cpu(sd_ea, cpu), ea_sd); + + sd = highest_flag_domain(cpu, SD_SHARE_CAP_STATES); + rcu_assign_pointer(per_cpu(sd_scs, cpu), sd); } /* @@ -6220,6 +6340,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) * die on a /0 trap. */ sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span); + sg->sgc->max_capacity = SCHED_CAPACITY_SCALE; /* * Make sure the first group of this domain contains the @@ -6349,6 +6470,66 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd) } /* + * Check that the per-cpu provided sd energy data is consistent for all cpus + * within the mask. + */ +static inline void check_sched_energy_data(int cpu, sched_domain_energy_f fn, + const struct cpumask *cpumask) +{ + const struct sched_group_energy * const sge = fn(cpu); + struct cpumask mask; + int i; + + if (cpumask_weight(cpumask) <= 1) + return; + + cpumask_xor(&mask, cpumask, get_cpu_mask(cpu)); + + for_each_cpu(i, &mask) { + const struct sched_group_energy * const e = fn(i); + int y; + + BUG_ON(e->nr_idle_states != sge->nr_idle_states); + + for (y = 0; y < (e->nr_idle_states); y++) { + BUG_ON(e->idle_states[y].power != + sge->idle_states[y].power); + } + + BUG_ON(e->nr_cap_states != sge->nr_cap_states); + + for (y = 0; y < (e->nr_cap_states); y++) { + BUG_ON(e->cap_states[y].cap != sge->cap_states[y].cap); + BUG_ON(e->cap_states[y].power != + sge->cap_states[y].power); + } + } +} + +static void init_sched_energy(int cpu, struct sched_domain *sd, + sched_domain_energy_f fn) +{ + if (!(fn && fn(cpu))) + return; + + if (cpu != group_balance_cpu(sd->groups)) + return; + + if (sd->child && !sd->child->groups->sge) { + pr_err("BUG: EAS setup broken for CPU%d\n", cpu); +#ifdef CONFIG_SCHED_DEBUG + pr_err(" energy data on %s but not on %s domain\n", + sd->name, sd->child->name); +#endif + return; + } + + check_sched_energy_data(cpu, fn, sched_group_cpus(sd->groups)); + + sd->groups->sge = fn(cpu); +} + +/* * Initializers for schedule domains * Non-inlined to reduce accumulated stack pressure in build_sched_domains() */ @@ -6456,6 +6637,7 @@ static int sched_domains_curr_level; * SD_SHARE_PKG_RESOURCES - describes shared caches * SD_NUMA - describes NUMA topologies * SD_SHARE_POWERDOMAIN - describes shared power domain + * SD_SHARE_CAP_STATES - describes shared capacity states * * Odd one out: * SD_ASYM_PACKING - describes SMT quirks @@ -6465,7 +6647,8 @@ static int sched_domains_curr_level; SD_SHARE_PKG_RESOURCES | \ SD_NUMA | \ SD_ASYM_PACKING | \ - SD_SHARE_POWERDOMAIN) + SD_SHARE_POWERDOMAIN | \ + SD_SHARE_CAP_STATES) static struct sched_domain * sd_init(struct sched_domain_topology_level *tl, int cpu) @@ -7015,6 +7198,7 @@ static int build_sched_domains(const struct cpumask *cpu_map, enum s_alloc alloc_state; struct sched_domain *sd; struct s_data d; + struct rq *rq = NULL; int i, ret = -ENOMEM; alloc_state = __visit_domain_allocation_hell(&d, cpu_map); @@ -7053,10 +7237,13 @@ static int build_sched_domains(const struct cpumask *cpu_map, /* Calculate CPU capacity for physical packages and nodes */ for (i = nr_cpumask_bits-1; i >= 0; i--) { + struct sched_domain_topology_level *tl = sched_domain_topology; + if (!cpumask_test_cpu(i, cpu_map)) continue; - for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { + for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent, tl++) { + init_sched_energy(i, sd, tl->energy); claim_allocations(i, sd); init_sched_groups_capacity(i, sd); } @@ -7065,6 +7252,7 @@ static int build_sched_domains(const struct cpumask *cpu_map, /* Attach the domains */ rcu_read_lock(); for_each_cpu(i, cpu_map) { + rq = cpu_rq(i); sd = *per_cpu_ptr(d.sd, i); cpu_attach_domain(sd, d.rd, i); } |