diff options
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 94 | ||||
-rw-r--r-- | kernel/sched/fair.c | 275 |
2 files changed, 2 insertions, 367 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bd314d7cd9f8..24ca677b5457 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5929,8 +5929,6 @@ static const struct cpumask *cpu_cpu_mask(int cpu) return cpumask_of_node(cpu_to_node(cpu)); } -int sched_smt_power_savings = 0, sched_mc_power_savings = 0; - struct sd_data { struct sched_domain **__percpu sd; struct sched_group **__percpu sg; @@ -6322,7 +6320,6 @@ sd_numa_init(struct sched_domain_topology_level *tl, int cpu) | 0*SD_WAKE_AFFINE | 0*SD_PREFER_LOCAL | 0*SD_SHARE_CPUPOWER - | 0*SD_POWERSAVINGS_BALANCE | 0*SD_SHARE_PKG_RESOURCES | 1*SD_SERIALIZE | 0*SD_PREFER_SIBLING @@ -6819,97 +6816,6 @@ match2: mutex_unlock(&sched_domains_mutex); } -#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) -static void reinit_sched_domains(void) -{ - get_online_cpus(); - - /* Destroy domains first to force the rebuild */ - partition_sched_domains(0, NULL, NULL); - - rebuild_sched_domains(); - put_online_cpus(); -} - -static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) -{ - unsigned int level = 0; - - if (sscanf(buf, "%u", &level) != 1) - return -EINVAL; - - /* - * level is always be positive so don't check for - * level < POWERSAVINGS_BALANCE_NONE which is 0 - * What happens on 0 or 1 byte write, - * need to check for count as well? - */ - - if (level >= MAX_POWERSAVINGS_BALANCE_LEVELS) - return -EINVAL; - - if (smt) - sched_smt_power_savings = level; - else - sched_mc_power_savings = level; - - reinit_sched_domains(); - - return count; -} - -#ifdef CONFIG_SCHED_MC -static ssize_t sched_mc_power_savings_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "%u\n", sched_mc_power_savings); -} -static ssize_t sched_mc_power_savings_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - return sched_power_savings_store(buf, count, 0); -} -static DEVICE_ATTR(sched_mc_power_savings, 0644, - sched_mc_power_savings_show, - sched_mc_power_savings_store); -#endif - -#ifdef CONFIG_SCHED_SMT -static ssize_t sched_smt_power_savings_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "%u\n", sched_smt_power_savings); -} -static ssize_t sched_smt_power_savings_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - return sched_power_savings_store(buf, count, 1); -} -static DEVICE_ATTR(sched_smt_power_savings, 0644, - sched_smt_power_savings_show, - sched_smt_power_savings_store); -#endif - -int __init sched_create_sysfs_power_savings_entries(struct device *dev) -{ - int err = 0; - -#ifdef CONFIG_SCHED_SMT - if (smt_capable()) - err = device_create_file(dev, &dev_attr_sched_smt_power_savings); -#endif -#ifdef CONFIG_SCHED_MC - if (!err && mc_capable()) - err = device_create_file(dev, &dev_attr_sched_mc_power_savings); -#endif - return err; -} -#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ - /* * Update cpusets according to cpu_active mask. If cpusets are * disabled, cpuset_update_active_cpus() becomes a simple wrapper diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0b42f4487329..940e6d17cf96 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2721,7 +2721,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) * If power savings logic is enabled for a domain, see if we * are not overloaded, if so, don't balance wider. */ - if (tmp->flags & (SD_POWERSAVINGS_BALANCE|SD_PREFER_LOCAL)) { + if (tmp->flags & (SD_PREFER_LOCAL)) { unsigned long power = 0; unsigned long nr_running = 0; unsigned long capacity; @@ -2734,9 +2734,6 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE); - if (tmp->flags & SD_POWERSAVINGS_BALANCE) - nr_running /= 2; - if (nr_running < capacity) want_sd = 0; } @@ -3435,14 +3432,6 @@ struct sd_lb_stats { unsigned int busiest_group_weight; int group_imb; /* Is there imbalance in this sd */ -#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) - int power_savings_balance; /* Is powersave balance needed for this sd */ - struct sched_group *group_min; /* Least loaded group in sd */ - struct sched_group *group_leader; /* Group which relieves group_min */ - unsigned long min_load_per_task; /* load_per_task in group_min */ - unsigned long leader_nr_running; /* Nr running of group_leader */ - unsigned long min_nr_running; /* Nr running of group_min */ -#endif }; /* @@ -3486,147 +3475,6 @@ static inline int get_sd_load_idx(struct sched_domain *sd, return load_idx; } - -#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) -/** - * init_sd_power_savings_stats - Initialize power savings statistics for - * the given sched_domain, during load balancing. - * - * @sd: Sched domain whose power-savings statistics are to be initialized. - * @sds: Variable containing the statistics for sd. - * @idle: Idle status of the CPU at which we're performing load-balancing. - */ -static inline void init_sd_power_savings_stats(struct sched_domain *sd, - struct sd_lb_stats *sds, enum cpu_idle_type idle) -{ - /* - * Busy processors will not participate in power savings - * balance. - */ - if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE)) - sds->power_savings_balance = 0; - else { - sds->power_savings_balance = 1; - sds->min_nr_running = ULONG_MAX; - sds->leader_nr_running = 0; - } -} - -/** - * update_sd_power_savings_stats - Update the power saving stats for a - * sched_domain while performing load balancing. - * - * @group: sched_group belonging to the sched_domain under consideration. - * @sds: Variable containing the statistics of the sched_domain - * @local_group: Does group contain the CPU for which we're performing - * load balancing ? - * @sgs: Variable containing the statistics of the group. - */ -static inline void update_sd_power_savings_stats(struct sched_group *group, - struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs) -{ - - if (!sds->power_savings_balance) - return; - - /* - * If the local group is idle or completely loaded - * no need to do power savings balance at this domain - */ - if (local_group && (sds->this_nr_running >= sgs->group_capacity || - !sds->this_nr_running)) - sds->power_savings_balance = 0; - - /* - * If a group is already running at full capacity or idle, - * don't include that group in power savings calculations - */ - if (!sds->power_savings_balance || - sgs->sum_nr_running >= sgs->group_capacity || - !sgs->sum_nr_running) - return; - - /* - * Calculate the group which has the least non-idle load. - * This is the group from where we need to pick up the load - * for saving power - */ - if ((sgs->sum_nr_running < sds->min_nr_running) || - (sgs->sum_nr_running == sds->min_nr_running && - group_first_cpu(group) > group_first_cpu(sds->group_min))) { - sds->group_min = group; - sds->min_nr_running = sgs->sum_nr_running; - sds->min_load_per_task = sgs->sum_weighted_load / - sgs->sum_nr_running; - } - - /* - * Calculate the group which is almost near its - * capacity but still has some space to pick up some load - * from other group and save more power - */ - if (sgs->sum_nr_running + 1 > sgs->group_capacity) - return; - - if (sgs->sum_nr_running > sds->leader_nr_running || - (sgs->sum_nr_running == sds->leader_nr_running && - group_first_cpu(group) < group_first_cpu(sds->group_leader))) { - sds->group_leader = group; - sds->leader_nr_running = sgs->sum_nr_running; - } -} - -/** - * check_power_save_busiest_group - see if there is potential for some power-savings balance - * @env: load balance environment - * @sds: Variable containing the statistics of the sched_domain - * under consideration. - * - * Description: - * Check if we have potential to perform some power-savings balance. - * If yes, set the busiest group to be the least loaded group in the - * sched_domain, so that it's CPUs can be put to idle. - * - * Returns 1 if there is potential to perform power-savings balance. - * Else returns 0. - */ -static inline -int check_power_save_busiest_group(struct lb_env *env, struct sd_lb_stats *sds) -{ - if (!sds->power_savings_balance) - return 0; - - if (sds->this != sds->group_leader || - sds->group_leader == sds->group_min) - return 0; - - env->imbalance = sds->min_load_per_task; - sds->busiest = sds->group_min; - - return 1; - -} -#else /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ -static inline void init_sd_power_savings_stats(struct sched_domain *sd, - struct sd_lb_stats *sds, enum cpu_idle_type idle) -{ - return; -} - -static inline void update_sd_power_savings_stats(struct sched_group *group, - struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs) -{ - return; -} - -static inline -int check_power_save_busiest_group(struct lb_env *env, struct sd_lb_stats *sds) -{ - return 0; -} -#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ - - unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu) { return SCHED_POWER_SCALE; @@ -3932,7 +3780,6 @@ static inline void update_sd_lb_stats(struct lb_env *env, if (child && child->flags & SD_PREFER_SIBLING) prefer_sibling = 1; - init_sd_power_savings_stats(env->sd, sds, env->idle); load_idx = get_sd_load_idx(env->sd, env->idle); do { @@ -3981,7 +3828,6 @@ static inline void update_sd_lb_stats(struct lb_env *env, sds->group_imb = sgs.group_imb; } - update_sd_power_savings_stats(sg, sds, local_group, &sgs); sg = sg->next; } while (sg != env->sd->groups); } @@ -4276,12 +4122,6 @@ force_balance: return sds.busiest; out_balanced: - /* - * There is no obvious imbalance. But check if we can do some balancing - * to save power. - */ - if (check_power_save_busiest_group(env, &sds)) - return sds.busiest; ret: env->imbalance = 0; return NULL; @@ -4359,28 +4199,6 @@ static int need_active_balance(struct lb_env *env) */ if ((sd->flags & SD_ASYM_PACKING) && env->src_cpu > env->dst_cpu) return 1; - - /* - * The only task running in a non-idle cpu can be moved to this - * cpu in an attempt to completely freeup the other CPU - * package. - * - * The package power saving logic comes from - * find_busiest_group(). If there are no imbalance, then - * f_b_g() will return NULL. However when sched_mc={1,2} then - * f_b_g() will select a group from which a running task may be - * pulled to this cpu in order to make the other package idle. - * If there is no opportunity to make a package idle and if - * there are no imbalance, then f_b_g() will return NULL and no - * action will be taken in load_balance_newidle(). - * - * Under normal task pull operation due to imbalance, there - * will be more than one task in the source run queue and - * move_tasks() will succeed. ld_moved will be true and this - * active balance code will not be triggered. - */ - if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP) - return 0; } return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); @@ -4700,104 +4518,15 @@ static struct { unsigned long next_balance; /* in jiffy units */ } nohz ____cacheline_aligned; -#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) -/** - * lowest_flag_domain - Return lowest sched_domain containing flag. - * @cpu: The cpu whose lowest level of sched domain is to - * be returned. - * @flag: The flag to check for the lowest sched_domain - * for the given cpu. - * - * Returns the lowest sched_domain of a cpu which contains the given flag. - */ -static inline struct sched_domain *lowest_flag_domain(int cpu, int flag) -{ - struct sched_domain *sd; - - for_each_domain(cpu, sd) - if (sd->flags & flag) - break; - - return sd; -} - -/** - * for_each_flag_domain - Iterates over sched_domains containing the flag. - * @cpu: The cpu whose domains we're iterating over. - * @sd: variable holding the value of the power_savings_sd - * for cpu. - * @flag: The flag to filter the sched_domains to be iterated. - * - * Iterates over all the scheduler domains for a given cpu that has the 'flag' - * set, starting from the lowest sched_domain to the highest. - */ -#define for_each_flag_domain(cpu, sd, flag) \ - for (sd = lowest_flag_domain(cpu, flag); \ - (sd && (sd->flags & flag)); sd = sd->parent) - -/** - * find_new_ilb - Finds the optimum idle load balancer for nomination. - * @cpu: The cpu which is nominating a new idle_load_balancer. - * - * Returns: Returns the id of the idle load balancer if it exists, - * Else, returns >= nr_cpu_ids. - * - * This algorithm picks the idle load balancer such that it belongs to a - * semi-idle powersavings sched_domain. The idea is to try and avoid - * completely idle packages/cores just for the purpose of idle load balancing - * when there are other idle cpu's which are better suited for that job. - */ -static int find_new_ilb(int cpu) +static inline int find_new_ilb(int call_cpu) { int ilb = cpumask_first(nohz.idle_cpus_mask); - struct sched_group *ilbg; - struct sched_domain *sd; - /* - * Have idle load balancer selection from semi-idle packages only - * when power-aware load balancing is enabled - */ - if (!(sched_smt_power_savings || sched_mc_power_savings)) - goto out_done; - - /* - * Optimize for the case when we have no idle CPUs or only one - * idle CPU. Don't walk the sched_domain hierarchy in such cases - */ - if (cpumask_weight(nohz.idle_cpus_mask) < 2) - goto out_done; - - rcu_read_lock(); - for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) { - ilbg = sd->groups; - - do { - if (ilbg->group_weight != - atomic_read(&ilbg->sgp->nr_busy_cpus)) { - ilb = cpumask_first_and(nohz.idle_cpus_mask, - sched_group_cpus(ilbg)); - goto unlock; - } - - ilbg = ilbg->next; - - } while (ilbg != sd->groups); - } -unlock: - rcu_read_unlock(); - -out_done: if (ilb < nr_cpu_ids && idle_cpu(ilb)) return ilb; return nr_cpu_ids; } -#else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */ -static inline int find_new_ilb(int call_cpu) -{ - return nr_cpu_ids; -} -#endif /* * Kick a CPU to do the nohz balancing, if it is time for it. We pick the |