Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sched/topology: Extract "imb_numa_nr" calculation into a separate helper

Subsequent changes to assign "sd->shared" from "s_data" would
necessitate finding the topmost SD_SHARE_LLC to assign shared object to.

This is very similar to the "imb_numa_nr" computation loop except that
"imb_numa_nr" cares about the first domain without the SD_SHARE_LLC flag
(immediate parent of sd_llc) whereas the "sd->shared" assignment would
require sd_llc itself.

Extract the "imb_numa_nr" calculation into a helper
adjust_numa_imbalance() and use the current loop in the
build_sched_domains() to find the sd_llc.

While at it, guard the call behind CONFIG_NUMA's status since
"imb_numa_nr" only makes sense on NUMA enabled configs with SD_NUMA
domains.

No functional changes intended.

Suggested-by: Valentin Schneider <vschneid@redhat.com>
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://patch.msgid.link/20260312044434.1974-3-kprateek.nayak@amd.com

authored by

K Prateek Nayak and committed by
Peter Zijlstra
5a7b576b 8e8e23de

+80 -53
+80 -53
kernel/sched/topology.c
··· 2550 2550 } 2551 2551 2552 2552 /* 2553 + * Calculate an allowed NUMA imbalance such that LLCs do not get 2554 + * imbalanced. 2555 + */ 2556 + static void adjust_numa_imbalance(struct sched_domain *sd_llc) 2557 + { 2558 + struct sched_domain *parent; 2559 + unsigned int imb_span = 1; 2560 + unsigned int imb = 0; 2561 + unsigned int nr_llcs; 2562 + 2563 + WARN_ON(!(sd_llc->flags & SD_SHARE_LLC)); 2564 + WARN_ON(!sd_llc->parent); 2565 + 2566 + /* 2567 + * For a single LLC per node, allow an 2568 + * imbalance up to 12.5% of the node. This is 2569 + * arbitrary cutoff based two factors -- SMT and 2570 + * memory channels. For SMT-2, the intent is to 2571 + * avoid premature sharing of HT resources but 2572 + * SMT-4 or SMT-8 *may* benefit from a different 2573 + * cutoff. For memory channels, this is a very 2574 + * rough estimate of how many channels may be 2575 + * active and is based on recent CPUs with 2576 + * many cores. 2577 + * 2578 + * For multiple LLCs, allow an imbalance 2579 + * until multiple tasks would share an LLC 2580 + * on one node while LLCs on another node 2581 + * remain idle. This assumes that there are 2582 + * enough logical CPUs per LLC to avoid SMT 2583 + * factors and that there is a correlation 2584 + * between LLCs and memory channels. 2585 + */ 2586 + nr_llcs = sd_llc->parent->span_weight / sd_llc->span_weight; 2587 + if (nr_llcs == 1) 2588 + imb = sd_llc->parent->span_weight >> 3; 2589 + else 2590 + imb = nr_llcs; 2591 + 2592 + imb = max(1U, imb); 2593 + sd_llc->parent->imb_numa_nr = imb; 2594 + 2595 + /* 2596 + * Set span based on the first NUMA domain. 2597 + * 2598 + * NUMA systems always add a NODE domain before 2599 + * iterating the NUMA domains. Since this is before 2600 + * degeneration, start from sd_llc's parent's 2601 + * parent which is the lowest an SD_NUMA domain can 2602 + * be relative to sd_llc. 2603 + */ 2604 + parent = sd_llc->parent->parent; 2605 + while (parent && !(parent->flags & SD_NUMA)) 2606 + parent = parent->parent; 2607 + 2608 + imb_span = parent ? parent->span_weight : sd_llc->parent->span_weight; 2609 + 2610 + /* Update the upper remainder of the topology */ 2611 + parent = sd_llc->parent; 2612 + while (parent) { 2613 + int factor = max(1U, (parent->span_weight / imb_span)); 2614 + 2615 + parent->imb_numa_nr = imb * factor; 2616 + parent = parent->parent; 2617 + } 2618 + } 2619 + 2620 + /* 2553 2621 * Build sched domains for a given set of CPUs and attach the sched domains 2554 2622 * to the individual CPUs 2555 2623 */ ··· 2674 2606 } 2675 2607 } 2676 2608 2677 - /* 2678 - * Calculate an allowed NUMA imbalance such that LLCs do not get 2679 - * imbalanced. 2680 - */ 2681 2609 for_each_cpu(i, cpu_map) { 2682 - unsigned int imb = 0; 2683 - unsigned int imb_span = 1; 2610 + sd = *per_cpu_ptr(d.sd, i); 2611 + if (!sd) 2612 + continue; 2684 2613 2685 - for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { 2686 - struct sched_domain *child = sd->child; 2614 + /* First, find the topmost SD_SHARE_LLC domain */ 2615 + while (sd->parent && (sd->parent->flags & SD_SHARE_LLC)) 2616 + sd = sd->parent; 2687 2617 2688 - if (!(sd->flags & SD_SHARE_LLC) && child && 2689 - (child->flags & SD_SHARE_LLC)) { 2690 - struct sched_domain __rcu *top_p; 2691 - unsigned int nr_llcs; 2692 - 2693 - /* 2694 - * For a single LLC per node, allow an 2695 - * imbalance up to 12.5% of the node. This is 2696 - * arbitrary cutoff based two factors -- SMT and 2697 - * memory channels. For SMT-2, the intent is to 2698 - * avoid premature sharing of HT resources but 2699 - * SMT-4 or SMT-8 *may* benefit from a different 2700 - * cutoff. For memory channels, this is a very 2701 - * rough estimate of how many channels may be 2702 - * active and is based on recent CPUs with 2703 - * many cores. 2704 - * 2705 - * For multiple LLCs, allow an imbalance 2706 - * until multiple tasks would share an LLC 2707 - * on one node while LLCs on another node 2708 - * remain idle. This assumes that there are 2709 - * enough logical CPUs per LLC to avoid SMT 2710 - * factors and that there is a correlation 2711 - * between LLCs and memory channels. 2712 - */ 2713 - nr_llcs = sd->span_weight / child->span_weight; 2714 - if (nr_llcs == 1) 2715 - imb = sd->span_weight >> 3; 2716 - else 2717 - imb = nr_llcs; 2718 - imb = max(1U, imb); 2719 - sd->imb_numa_nr = imb; 2720 - 2721 - /* Set span based on the first NUMA domain. */ 2722 - top_p = sd->parent; 2723 - while (top_p && !(top_p->flags & SD_NUMA)) { 2724 - top_p = top_p->parent; 2725 - } 2726 - imb_span = top_p ? top_p->span_weight : sd->span_weight; 2727 - } else { 2728 - int factor = max(1U, (sd->span_weight / imb_span)); 2729 - 2730 - sd->imb_numa_nr = imb * factor; 2731 - } 2732 - } 2618 + /* 2619 + * In presence of higher domains, adjust the 2620 + * NUMA imbalance stats for the hierarchy. 2621 + */ 2622 + if (IS_ENABLED(CONFIG_NUMA) && (sd->flags & SD_SHARE_LLC) && sd->parent) 2623 + adjust_numa_imbalance(sd); 2733 2624 } 2734 2625 2735 2626 /* Calculate CPU capacity for physical packages and nodes */