From 671fd9dabe5239ad218c7eb48b2b9edee50250e6 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Thu, 21 Jul 2011 09:43:34 -0700 Subject: sched: Add support for unthrottling group entities At the start of each period we refresh the global bandwidth pool. At this time we must also unthrottle any cfs_rq entities who are now within bandwidth once more (as quota permits). Unthrottled entities have their corresponding cfs_rq->throttled flag cleared and their entities re-enqueued. Signed-off-by: Paul Turner Reviewed-by: Hidetoshi Seto Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110721184757.574628950@google.com Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 123 insertions(+), 4 deletions(-) (limited to 'kernel/sched_fair.c') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 72c9d4ed5991..76411950ff3b 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1439,6 +1439,84 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq) raw_spin_unlock(&cfs_b->lock); } +static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) +{ + struct rq *rq = rq_of(cfs_rq); + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); + struct sched_entity *se; + int enqueue = 1; + long task_delta; + + se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; + + cfs_rq->throttled = 0; + raw_spin_lock(&cfs_b->lock); + list_del_rcu(&cfs_rq->throttled_list); + raw_spin_unlock(&cfs_b->lock); + + if (!cfs_rq->load.weight) + return; + + task_delta = cfs_rq->h_nr_running; + for_each_sched_entity(se) { + if (se->on_rq) + enqueue = 0; + + cfs_rq = cfs_rq_of(se); + if (enqueue) + enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP); + cfs_rq->h_nr_running += task_delta; + + if (cfs_rq_throttled(cfs_rq)) + break; + } + + if (!se) + rq->nr_running += task_delta; + + /* determine whether we need to wake up potentially idle cpu */ + if (rq->curr == rq->idle && rq->cfs.nr_running) + resched_task(rq->curr); +} + +static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, + u64 remaining, u64 expires) +{ + struct cfs_rq *cfs_rq; + u64 runtime = remaining; + + rcu_read_lock(); + list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq, + throttled_list) { + struct rq *rq = rq_of(cfs_rq); + + raw_spin_lock(&rq->lock); + if (!cfs_rq_throttled(cfs_rq)) + goto next; + + runtime = -cfs_rq->runtime_remaining + 1; + if (runtime > remaining) + runtime = remaining; + remaining -= runtime; + + cfs_rq->runtime_remaining += runtime; + cfs_rq->runtime_expires = expires; + + /* we check whether we're throttled above */ + if (cfs_rq->runtime_remaining > 0) + unthrottle_cfs_rq(cfs_rq); + +next: + raw_spin_unlock(&rq->lock); + + if (!remaining) + break; + } + rcu_read_unlock(); + + return remaining; +} + /* * Responsible for refilling a task_group's bandwidth and unthrottling its * cfs_rqs as appropriate. If there has been no activity within the last @@ -1447,23 +1525,64 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq) */ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) { - int idle = 1; + u64 runtime, runtime_expires; + int idle = 1, throttled; raw_spin_lock(&cfs_b->lock); /* no need to continue the timer with no bandwidth constraint */ if (cfs_b->quota == RUNTIME_INF) goto out_unlock; - idle = cfs_b->idle; + throttled = !list_empty(&cfs_b->throttled_cfs_rq); + /* idle depends on !throttled (for the case of a large deficit) */ + idle = cfs_b->idle && !throttled; + /* if we're going inactive then everything else can be deferred */ if (idle) goto out_unlock; __refill_cfs_bandwidth_runtime(cfs_b); + if (!throttled) { + /* mark as potentially idle for the upcoming period */ + cfs_b->idle = 1; + goto out_unlock; + } + + /* + * There are throttled entities so we must first use the new bandwidth + * to unthrottle them before making it generally available. This + * ensures that all existing debts will be paid before a new cfs_rq is + * allowed to run. + */ + runtime = cfs_b->runtime; + runtime_expires = cfs_b->runtime_expires; + cfs_b->runtime = 0; + + /* + * This check is repeated as we are holding onto the new bandwidth + * while we unthrottle. This can potentially race with an unthrottled + * group trying to acquire new bandwidth from the global pool. + */ + while (throttled && runtime > 0) { + raw_spin_unlock(&cfs_b->lock); + /* we can't nest cfs_b->lock while distributing bandwidth */ + runtime = distribute_cfs_runtime(cfs_b, runtime, + runtime_expires); + raw_spin_lock(&cfs_b->lock); + + throttled = !list_empty(&cfs_b->throttled_cfs_rq); + } - /* mark as potentially idle for the upcoming period */ - cfs_b->idle = 1; + /* return (any) remaining runtime */ + cfs_b->runtime = runtime; + /* + * While we are ensured activity in the period following an + * unthrottle, this also covers the case in which the new bandwidth is + * insufficient to cover the existing bandwidth deficit. (Forcing the + * timer to remain active while there are any throttled entities.) + */ + cfs_b->idle = 0; out_unlock: if (idle) cfs_b->timer_active = 0; -- cgit v1.2.3