Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sched_ext: Make bypass LB cpumasks per-scheduler

scx_bypass_lb_{donee,resched}_cpumask were file-scope statics shared by all
scheduler instances. With CONFIG_EXT_SUB_SCHED, multiple sched instances
each arm their own bypass_lb_timer; concurrent bypass_lb_node() calls RMW
the global cpumasks with no lock, corrupting donee/resched decisions.

Move the cpumasks into struct scx_sched, allocate them alongside the timer
in scx_alloc_and_add_sched(), free them in scx_sched_free_rcu_work().

Fixes: 95d1df610cdc ("sched_ext: Implement load balancer for bypass mode")
Cc: stable@vger.kernel.org # v6.19+
Reported-by: Chris Mason <clm@meta.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andrea Righi <arighi@nvidia.com>

+21 -14
+19 -14
kernel/sched/ext.c
··· 53 53 DEFINE_STATIC_PERCPU_RWSEM(scx_fork_rwsem); 54 54 static atomic_t scx_enable_state_var = ATOMIC_INIT(SCX_DISABLED); 55 55 static DEFINE_RAW_SPINLOCK(scx_bypass_lock); 56 - static cpumask_var_t scx_bypass_lb_donee_cpumask; 57 - static cpumask_var_t scx_bypass_lb_resched_cpumask; 58 56 static bool scx_init_task_enabled; 59 57 static bool scx_switching_all; 60 58 DEFINE_STATIC_KEY_FALSE(__scx_switched_all); ··· 4745 4747 irq_work_sync(&sch->disable_irq_work); 4746 4748 kthread_destroy_worker(sch->helper); 4747 4749 timer_shutdown_sync(&sch->bypass_lb_timer); 4750 + free_cpumask_var(sch->bypass_lb_donee_cpumask); 4751 + free_cpumask_var(sch->bypass_lb_resched_cpumask); 4748 4752 4749 4753 #ifdef CONFIG_EXT_SUB_SCHED 4750 4754 kfree(sch->cgrp_path); ··· 5123 5123 static void bypass_lb_node(struct scx_sched *sch, int node) 5124 5124 { 5125 5125 const struct cpumask *node_mask = cpumask_of_node(node); 5126 - struct cpumask *donee_mask = scx_bypass_lb_donee_cpumask; 5127 - struct cpumask *resched_mask = scx_bypass_lb_resched_cpumask; 5126 + struct cpumask *donee_mask = sch->bypass_lb_donee_cpumask; 5127 + struct cpumask *resched_mask = sch->bypass_lb_resched_cpumask; 5128 5128 u32 nr_tasks = 0, nr_cpus = 0, nr_balanced = 0; 5129 5129 u32 nr_target, nr_donor_target; 5130 5130 u32 before_min = U32_MAX, before_max = 0; ··· 6520 6520 init_irq_work(&sch->disable_irq_work, scx_disable_irq_workfn); 6521 6521 kthread_init_work(&sch->disable_work, scx_disable_workfn); 6522 6522 timer_setup(&sch->bypass_lb_timer, scx_bypass_lb_timerfn, 0); 6523 + 6524 + if (!alloc_cpumask_var(&sch->bypass_lb_donee_cpumask, GFP_KERNEL)) { 6525 + ret = -ENOMEM; 6526 + goto err_stop_helper; 6527 + } 6528 + if (!alloc_cpumask_var(&sch->bypass_lb_resched_cpumask, GFP_KERNEL)) { 6529 + ret = -ENOMEM; 6530 + goto err_free_lb_cpumask; 6531 + } 6523 6532 sch->ops = *ops; 6524 6533 rcu_assign_pointer(ops->priv, sch); 6525 6534 ··· 6538 6529 char *buf = kzalloc(PATH_MAX, GFP_KERNEL); 6539 6530 if (!buf) { 6540 6531 ret = -ENOMEM; 6541 - goto err_stop_helper; 6532 + goto err_free_lb_resched; 6542 6533 } 6543 6534 cgroup_path(cgrp, buf, PATH_MAX); 6544 6535 sch->cgrp_path = kstrdup(buf, GFP_KERNEL); 6545 6536 kfree(buf); 6546 6537 if (!sch->cgrp_path) { 6547 6538 ret = -ENOMEM; 6548 - goto err_stop_helper; 6539 + goto err_free_lb_resched; 6549 6540 } 6550 6541 6551 6542 sch->cgrp = cgrp; ··· 6580 6571 #endif /* CONFIG_EXT_SUB_SCHED */ 6581 6572 return sch; 6582 6573 6583 - #ifdef CONFIG_EXT_SUB_SCHED 6574 + err_free_lb_resched: 6575 + free_cpumask_var(sch->bypass_lb_resched_cpumask); 6576 + err_free_lb_cpumask: 6577 + free_cpumask_var(sch->bypass_lb_donee_cpumask); 6584 6578 err_stop_helper: 6585 6579 kthread_destroy_worker(sch->helper); 6586 - #endif 6587 6580 err_free_pcpu: 6588 6581 for_each_possible_cpu(cpu) { 6589 6582 if (cpu == bypass_fail_cpu) ··· 9770 9759 if (ret < 0) { 9771 9760 pr_err("sched_ext: Failed to add global attributes\n"); 9772 9761 return ret; 9773 - } 9774 - 9775 - if (!alloc_cpumask_var(&scx_bypass_lb_donee_cpumask, GFP_KERNEL) || 9776 - !alloc_cpumask_var(&scx_bypass_lb_resched_cpumask, GFP_KERNEL)) { 9777 - pr_err("sched_ext: Failed to allocate cpumasks\n"); 9778 - return -ENOMEM; 9779 9762 } 9780 9763 9781 9764 return 0;
+2
kernel/sched/ext_internal.h
··· 1075 1075 struct irq_work disable_irq_work; 1076 1076 struct kthread_work disable_work; 1077 1077 struct timer_list bypass_lb_timer; 1078 + cpumask_var_t bypass_lb_donee_cpumask; 1079 + cpumask_var_t bypass_lb_resched_cpumask; 1078 1080 struct rcu_work rcu_work; 1079 1081 1080 1082 /* all ancestors including self */