Merge tag 'sched-urgent-2023-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar:
"Fix a performance regression on large SMT systems, an Intel SMT4
balancing bug, and a topology setup bug on (Intel) hybrid processors"

* tag 'sched-urgent-2023-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/sched: Restore the SD_ASYM_PACKING flag in the DIE domain
sched/fair: Fix SMT4 group_smt_balance handling
sched/fair: Optimize should_we_balance() for large SMT systems

Linus Torvalds 2 years ago e5a710d1 e54ca3c8

+34 -5

2 changed files

expand all

arch

x86

kernel

smpboot.c

kernel

sched

fair.c

+9 -3

arch/x86/kernel/smpboot.c

··· 579 579 } 580 580 581 581 582 - #if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_CLUSTER) || defined(CONFIG_SCHED_MC) 583 582 static inline int x86_sched_itmt_flags(void) 584 583 { 585 584 return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0; ··· 602 603 return cpu_cluster_flags() | x86_sched_itmt_flags(); 603 604 } 604 605 #endif 605 - #endif 606 + 607 + static int x86_die_flags(void) 608 + { 609 + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) 610 + return x86_sched_itmt_flags(); 611 + 612 + return 0; 613 + } 606 614 607 615 /* 608 616 * Set if a package/die has multiple NUMA nodes inside. ··· 646 640 */ 647 641 if (!x86_has_numa_in_package) { 648 642 x86_topology[i++] = (struct sched_domain_topology_level){ 649 - cpu_cpu_mask, SD_INIT_NAME(DIE) 643 + cpu_cpu_mask, x86_die_flags, SD_INIT_NAME(DIE) 650 644 }; 651 645 } 652 646

+25 -2

kernel/sched/fair.c

··· 6619 6619 /* Working cpumask for: load_balance, load_balance_newidle. */ 6620 6620 static DEFINE_PER_CPU(cpumask_var_t, load_balance_mask); 6621 6621 static DEFINE_PER_CPU(cpumask_var_t, select_rq_mask); 6622 + static DEFINE_PER_CPU(cpumask_var_t, should_we_balance_tmpmask); 6622 6623 6623 6624 #ifdef CONFIG_NO_HZ_COMMON 6624 6625 ··· 9580 9579 imbalance /= ncores_local + ncores_busiest; 9581 9580 9582 9581 /* Take advantage of resource in an empty sched group */ 9583 - if (imbalance == 0 && local->sum_nr_running == 0 && 9582 + if (imbalance <= 1 && local->sum_nr_running == 0 && 9584 9583 busiest->sum_nr_running > 1) 9585 9584 imbalance = 2; 9586 9585 ··· 9768 9767 break; 9769 9768 9770 9769 case group_smt_balance: 9770 + /* 9771 + * Check if we have spare CPUs on either SMT group to 9772 + * choose has spare or fully busy handling. 9773 + */ 9774 + if (sgs->idle_cpus != 0 || busiest->idle_cpus != 0) 9775 + goto has_spare; 9776 + 9777 + fallthrough; 9778 + 9771 9779 case group_fully_busy: 9772 9780 /* 9773 9781 * Select the fully busy group with highest avg_load. In ··· 9816 9806 else 9817 9807 return true; 9818 9808 } 9809 + has_spare: 9819 9810 9820 9811 /* 9821 9812 * Select not overloaded group with lowest number of idle cpus ··· 10928 10917 10929 10918 static int should_we_balance(struct lb_env *env) 10930 10919 { 10920 + struct cpumask *swb_cpus = this_cpu_cpumask_var_ptr(should_we_balance_tmpmask); 10931 10921 struct sched_group *sg = env->sd->groups; 10932 10922 int cpu, idle_smt = -1; 10933 10923 ··· 10952 10940 return 1; 10953 10941 } 10954 10942 10943 + cpumask_copy(swb_cpus, group_balance_mask(sg)); 10955 10944 /* Try to find first idle CPU */ 10956 - for_each_cpu_and(cpu, group_balance_mask(sg), env->cpus) { 10945 + for_each_cpu_and(cpu, swb_cpus, env->cpus) { 10957 10946 if (!idle_cpu(cpu)) 10958 10947 continue; 10959 10948 ··· 10966 10953 if (!(env->sd->flags & SD_SHARE_CPUCAPACITY) && !is_core_idle(cpu)) { 10967 10954 if (idle_smt == -1) 10968 10955 idle_smt = cpu; 10956 + /* 10957 + * If the core is not idle, and first SMT sibling which is 10958 + * idle has been found, then its not needed to check other 10959 + * SMT siblings for idleness: 10960 + */ 10961 + #ifdef CONFIG_SCHED_SMT 10962 + cpumask_andnot(swb_cpus, swb_cpus, cpu_smt_mask(cpu)); 10963 + #endif 10969 10964 continue; 10970 10965 } 10971 10966 ··· 12939 12918 for_each_possible_cpu(i) { 12940 12919 zalloc_cpumask_var_node(&per_cpu(load_balance_mask, i), GFP_KERNEL, cpu_to_node(i)); 12941 12920 zalloc_cpumask_var_node(&per_cpu(select_rq_mask, i), GFP_KERNEL, cpu_to_node(i)); 12921 + zalloc_cpumask_var_node(&per_cpu(should_we_balance_tmpmask, i), 12922 + GFP_KERNEL, cpu_to_node(i)); 12942 12923 12943 12924 #ifdef CONFIG_CFS_BANDWIDTH 12944 12925 INIT_CSD(&cpu_rq(i)->cfsb_csd, __cfsb_csd_unthrottle, cpu_rq(i));

Configure Feed

Configure Feed