From 518cd62341786aa4e3839810832af2fbc0de1ea4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 7 Dec 2011 15:07:31 +0100 Subject: sched: Only queue remote wakeups when crossing cache boundaries Mike reported a 13% drop in netperf TCP_RR performance due to the new remote wakeup code. Suresh too noticed some performance issues with it. Reducing the IPIs to only cross cache domains solves the observed performance issues. Reported-by: Suresh Siddha Reported-by: Mike Galbraith Acked-by: Suresh Siddha Acked-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: Chris Mason Cc: Dave Kleikamp Link: http://lkml.kernel.org/r/1323338531.17673.7.camel@twins Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'kernel/sched/core.c') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index cdf51a2adc26..dba878c73a08 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1511,6 +1511,11 @@ static int ttwu_activate_remote(struct task_struct *p, int wake_flags) } #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ + +static inline int ttwu_share_cache(int this_cpu, int that_cpu) +{ + return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); +} #endif /* CONFIG_SMP */ static void ttwu_queue(struct task_struct *p, int cpu) @@ -1518,7 +1523,7 @@ static void ttwu_queue(struct task_struct *p, int cpu) struct rq *rq = cpu_rq(cpu); #if defined(CONFIG_SMP) - if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { + if (sched_feat(TTWU_QUEUE) && !ttwu_share_cache(smp_processor_id(), cpu)) { sched_clock_cpu(cpu); /* sync clocks x-cpu */ ttwu_queue_remote(p, cpu); return; @@ -5743,6 +5748,31 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu) destroy_sched_domain(sd, cpu); } +/* + * Keep a special pointer to the highest sched_domain that has + * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this + * allows us to avoid some pointer chasing select_idle_sibling(). + * + * Also keep a unique ID per domain (we use the first cpu number in + * the cpumask of the domain), this allows us to quickly tell if + * two cpus are in the same cache domain, see ttwu_share_cache(). + */ +DEFINE_PER_CPU(struct sched_domain *, sd_llc); +DEFINE_PER_CPU(int, sd_llc_id); + +static void update_top_cache_domain(int cpu) +{ + struct sched_domain *sd; + int id = cpu; + + sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES); + if (sd) + id = cpumask_first(sched_domain_span(sd)); + + rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); + per_cpu(sd_llc_id, cpu) = id; +} + /* * Attach the domain 'sd' to 'cpu' as its base domain. Callers must * hold the hotplug lock. @@ -5782,6 +5812,8 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) tmp = rq->sd; rcu_assign_pointer(rq->sd, sd); destroy_sched_domains(tmp, cpu); + + update_top_cache_domain(cpu); } /* cpus with isolated domains */ -- cgit v1.2.1