diff options
author | Paul Turner <pjt@google.com> | 2011-07-21 09:43:41 -0700 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-08-14 12:03:54 +0200 |
commit | d8b4986d3dbc4fabc2054d63f1d31d6ed2fb1ca8 (patch) | |
tree | d6afd92e5425f64b337c916d12dc58ca101c334d /kernel/sched.c | |
parent | e8da1b18b32064c43881bceef0f051c2110c9ab9 (diff) | |
download | talos-obmc-linux-d8b4986d3dbc4fabc2054d63f1d31d6ed2fb1ca8.tar.gz talos-obmc-linux-d8b4986d3dbc4fabc2054d63f1d31d6ed2fb1ca8.zip |
sched: Return unused runtime on group dequeue
When a local cfs_rq blocks we return the majority of its remaining quota to the
global bandwidth pool for use by other runqueues.
We do this only when the quota is current and there is more than
min_cfs_rq_quota [1ms by default] of runtime remaining on the rq.
In the case where there are throttled runqueues and we have sufficient
bandwidth to meter out a slice, a second timer is kicked off to handle this
delivery, unthrottling where appropriate.
Using a 'worst case' antagonist which executes on each cpu
for 1ms before moving onto the next on a fairly large machine:
no quota generations:
197.47 ms /cgroup/a/cpuacct.usage
199.46 ms /cgroup/a/cpuacct.usage
205.46 ms /cgroup/a/cpuacct.usage
198.46 ms /cgroup/a/cpuacct.usage
208.39 ms /cgroup/a/cpuacct.usage
Since we are allowed to use "stale" quota our usage is effectively bounded by
the rate of input into the global pool and performance is relatively stable.
with quota generations [1s increments]:
119.58 ms /cgroup/a/cpuacct.usage
119.65 ms /cgroup/a/cpuacct.usage
119.64 ms /cgroup/a/cpuacct.usage
119.63 ms /cgroup/a/cpuacct.usage
119.60 ms /cgroup/a/cpuacct.usage
The large deficit here is due to quota generations (/intentionally/) preventing
us from now using previously stranded slack quota. The cost is that this quota
becomes unavailable.
with quota generations and quota return:
200.09 ms /cgroup/a/cpuacct.usage
200.09 ms /cgroup/a/cpuacct.usage
198.09 ms /cgroup/a/cpuacct.usage
200.09 ms /cgroup/a/cpuacct.usage
200.06 ms /cgroup/a/cpuacct.usage
By returning unused quota we're able to both stably consume our desired quota
and prevent unintentional overages due to the abuse of slack quota from
previous quota periods (especially on a large machine).
Signed-off-by: Paul Turner <pjt@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110721184758.306848658@google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 15 |
1 files changed, 14 insertions, 1 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 35c91859f8a6..6baade0d7649 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -259,7 +259,7 @@ struct cfs_bandwidth { u64 runtime_expires; int idle, timer_active; - struct hrtimer period_timer; + struct hrtimer period_timer, slack_timer; struct list_head throttled_cfs_rq; /* statistics */ @@ -421,6 +421,16 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) static inline u64 default_cfs_period(void); static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun); +static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b); + +static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) +{ + struct cfs_bandwidth *cfs_b = + container_of(timer, struct cfs_bandwidth, slack_timer); + do_sched_cfs_slack_timer(cfs_b); + + return HRTIMER_NORESTART; +} static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) { @@ -453,6 +463,8 @@ static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq); hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); cfs_b->period_timer.function = sched_cfs_period_timer; + hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cfs_b->slack_timer.function = sched_cfs_slack_timer; } static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) @@ -488,6 +500,7 @@ static void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) { hrtimer_cancel(&cfs_b->period_timer); + hrtimer_cancel(&cfs_b->slack_timer); } #else static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} |