Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sched/balancing: Simplify the sg_status bitmask and use separate ->overloaded and ->overutilized flags

SG_OVERLOADED and SG_OVERUTILIZED flags plus the sg_status bitmask are an
unnecessary complication that only make the code harder to read and slower.

We only ever set them separately:

thule:~/tip> git grep SG_OVER kernel/sched/
kernel/sched/fair.c: set_rd_overutilized_status(rq->rd, SG_OVERUTILIZED);
kernel/sched/fair.c: *sg_status |= SG_OVERLOADED;
kernel/sched/fair.c: *sg_status |= SG_OVERUTILIZED;
kernel/sched/fair.c: *sg_status |= SG_OVERLOADED;
kernel/sched/fair.c: set_rd_overloaded(env->dst_rq->rd, sg_status & SG_OVERLOADED);
kernel/sched/fair.c: sg_status & SG_OVERUTILIZED);
kernel/sched/fair.c: } else if (sg_status & SG_OVERUTILIZED) {
kernel/sched/fair.c: set_rd_overutilized_status(env->dst_rq->rd, SG_OVERUTILIZED);
kernel/sched/sched.h:#define SG_OVERLOADED 0x1 /* More than one runnable task on a CPU. */
kernel/sched/sched.h:#define SG_OVERUTILIZED 0x2 /* One or more CPUs are over-utilized. */
kernel/sched/sched.h: set_rd_overloaded(rq->rd, SG_OVERLOADED);

And use them separately, which results in suboptimal code:

/* update overload indicator if we are at root domain */
set_rd_overloaded(env->dst_rq->rd, sg_status & SG_OVERLOADED);

/* Update over-utilization (tipping point, U >= 0) indicator */
set_rd_overutilized_status(env->dst_rq->rd,

Introduce separate sg_overloaded and sg_overutilized flags in update_sd_lb_stats()
and its lower level functions, and change all of them to 'bool'.

Remove the now unused SG_OVERLOADED and SG_OVERUTILIZED flags.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Tested-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Cc: Qais Yousef <qyousef@layalina.io>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/ZgVPhODZ8/nbsqbP@gmail.com

+24 -29
+18 -18
kernel/sched/fair.c
··· 6688 6688 /* 6689 6689 * overutilized value make sense only if EAS is enabled 6690 6690 */ 6691 - static inline int is_rd_overutilized(struct root_domain *rd) 6691 + static inline bool is_rd_overutilized(struct root_domain *rd) 6692 6692 { 6693 6693 return !sched_energy_enabled() || READ_ONCE(rd->overutilized); 6694 6694 } 6695 6695 6696 - static inline void set_rd_overutilized(struct root_domain *rd, 6697 - unsigned int status) 6696 + static inline void set_rd_overutilized(struct root_domain *rd, bool flag) 6698 6697 { 6699 6698 if (!sched_energy_enabled()) 6700 6699 return; 6701 6700 6702 - WRITE_ONCE(rd->overutilized, status); 6703 - trace_sched_overutilized_tp(rd, !!status); 6701 + WRITE_ONCE(rd->overutilized, flag); 6702 + trace_sched_overutilized_tp(rd, flag); 6704 6703 } 6705 6704 6706 6705 static inline void check_update_overutilized_status(struct rq *rq) ··· 6710 6711 */ 6711 6712 6712 6713 if (!is_rd_overutilized(rq->rd) && cpu_overutilized(rq->cpu)) 6713 - set_rd_overutilized(rq->rd, SG_OVERUTILIZED); 6714 + set_rd_overutilized(rq->rd, 1); 6714 6715 } 6715 6716 #else 6716 6717 static inline void check_update_overutilized_status(struct rq *rq) { } ··· 9933 9934 * @sds: Load-balancing data with statistics of the local group. 9934 9935 * @group: sched_group whose statistics are to be updated. 9935 9936 * @sgs: variable to hold the statistics for this group. 9936 - * @sg_status: Holds flag indicating the status of the sched_group 9937 + * @sg_overloaded: sched_group is overloaded 9938 + * @sg_overutilized: sched_group is overutilized 9937 9939 */ 9938 9940 static inline void update_sg_lb_stats(struct lb_env *env, 9939 9941 struct sd_lb_stats *sds, 9940 9942 struct sched_group *group, 9941 9943 struct sg_lb_stats *sgs, 9942 - int *sg_status) 9944 + bool *sg_overloaded, 9945 + bool *sg_overutilized) 9943 9946 { 9944 9947 int i, nr_running, local_group; 9945 9948 ··· 9962 9961 sgs->sum_nr_running += nr_running; 9963 9962 9964 9963 if (nr_running > 1) 9965 - *sg_status |= SG_OVERLOADED; 9964 + *sg_overloaded = 1; 9966 9965 9967 9966 if (cpu_overutilized(i)) 9968 - *sg_status |= SG_OVERUTILIZED; 9967 + *sg_overutilized = 1; 9969 9968 9970 9969 #ifdef CONFIG_NUMA_BALANCING 9971 9970 sgs->nr_numa_running += rq->nr_numa_running; ··· 9987 9986 /* Check for a misfit task on the cpu */ 9988 9987 if (sgs->group_misfit_task_load < rq->misfit_task_load) { 9989 9988 sgs->group_misfit_task_load = rq->misfit_task_load; 9990 - *sg_status |= SG_OVERLOADED; 9989 + *sg_overloaded = 1; 9991 9990 } 9992 9991 } else if (env->idle && sched_reduced_capacity(rq, env->sd)) { 9993 9992 /* Check for a task running on a CPU with reduced capacity */ ··· 10613 10612 struct sg_lb_stats *local = &sds->local_stat; 10614 10613 struct sg_lb_stats tmp_sgs; 10615 10614 unsigned long sum_util = 0; 10616 - int sg_status = 0; 10615 + bool sg_overloaded = 0, sg_overutilized = 0; 10617 10616 10618 10617 do { 10619 10618 struct sg_lb_stats *sgs = &tmp_sgs; ··· 10629 10628 update_group_capacity(env->sd, env->dst_cpu); 10630 10629 } 10631 10630 10632 - update_sg_lb_stats(env, sds, sg, sgs, &sg_status); 10631 + update_sg_lb_stats(env, sds, sg, sgs, &sg_overloaded, &sg_overutilized); 10633 10632 10634 10633 if (!local_group && update_sd_pick_busiest(env, sds, sg, sgs)) { 10635 10634 sds->busiest = sg; ··· 10658 10657 10659 10658 if (!env->sd->parent) { 10660 10659 /* update overload indicator if we are at root domain */ 10661 - set_rd_overloaded(env->dst_rq->rd, sg_status & SG_OVERLOADED); 10660 + set_rd_overloaded(env->dst_rq->rd, sg_overloaded); 10662 10661 10663 10662 /* Update over-utilization (tipping point, U >= 0) indicator */ 10664 - set_rd_overutilized(env->dst_rq->rd, 10665 - sg_status & SG_OVERUTILIZED); 10666 - } else if (sg_status & SG_OVERUTILIZED) { 10667 - set_rd_overutilized(env->dst_rq->rd, SG_OVERUTILIZED); 10663 + set_rd_overutilized(env->dst_rq->rd, sg_overloaded); 10664 + } else if (sg_overutilized) { 10665 + set_rd_overutilized(env->dst_rq->rd, sg_overutilized); 10668 10666 } 10669 10667 10670 10668 update_idle_cpu_scan(env, sum_util);
+6 -11
kernel/sched/sched.h
··· 713 713 } highest_prio; 714 714 #endif 715 715 #ifdef CONFIG_SMP 716 - int overloaded; 716 + bool overloaded; 717 717 struct plist_head pushable_tasks; 718 718 719 719 #endif /* CONFIG_SMP */ ··· 757 757 u64 next; 758 758 } earliest_dl; 759 759 760 - int overloaded; 760 + bool overloaded; 761 761 762 762 /* 763 763 * Tasks on this rq that can be pushed away. They are kept in ··· 850 850 struct rcu_head rcu; 851 851 }; 852 852 853 - /* Scheduling group status flags */ 854 - #define SG_OVERLOADED 0x1 /* More than one runnable task on a CPU. */ 855 - #define SG_OVERUTILIZED 0x2 /* One or more CPUs are over-utilized. */ 856 - 857 853 /* 858 854 * We add the notion of a root-domain which will be used to define per-domain 859 855 * variables. Each exclusive cpuset essentially defines an island domain by ··· 870 874 * - More than one runnable task 871 875 * - Running task is misfit 872 876 */ 873 - int overloaded; 877 + bool overloaded; 874 878 875 879 /* Indicate one or more cpus over-utilized (tipping point) */ 876 - int overutilized; 880 + bool overutilized; 877 881 878 882 /* 879 883 * The bit corresponding to a CPU gets set here if such CPU has more ··· 2536 2540 } 2537 2541 2538 2542 #ifdef CONFIG_SMP 2539 - if (prev_nr < 2 && rq->nr_running >= 2) { 2540 - set_rd_overloaded(rq->rd, SG_OVERLOADED); 2541 - } 2543 + if (prev_nr < 2 && rq->nr_running >= 2) 2544 + set_rd_overloaded(rq->rd, 1); 2542 2545 #endif 2543 2546 2544 2547 sched_update_tick_dependency(rq);