Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sched_ext: Fix starvation of scx_enable() under fair-class saturation

During scx_enable(), the READY -> ENABLED task switching loop changes the
calling thread's sched_class from fair to ext. Since fair has higher
priority than ext, saturating fair-class workloads can indefinitely starve
the enable thread, hanging the system. This was introduced when the enable
path switched from preempt_disable() to scx_bypass() which doesn't protect
against fair-class starvation. Note that the original preempt_disable()
protection wasn't complete either - in partial switch modes, the calling
thread could still be starved after preempt_enable() as it may have been
switched to ext class.

Fix it by offloading the enable body to a dedicated system-wide RT
(SCHED_FIFO) kthread which cannot be starved by either fair or ext class
tasks. scx_enable() lazily creates the kthread on first use and passes the
ops pointer through a struct scx_enable_cmd containing the kthread_work,
then synchronously waits for completion.

The workfn runs on a different kthread from sch->helper (which runs
disable_work), so it can safely flush disable_work on the error path
without deadlock.

Fixes: 8c2090c504e9 ("sched_ext: Initialize in bypass mode")
Cc: stable@vger.kernel.org # v6.12+
Signed-off-by: Tejun Heo <tj@kernel.org>

Tejun Heo b06ccbab 1336b579

+56 -10
+56 -10
kernel/sched/ext.c
··· 4975 4975 return 0; 4976 4976 } 4977 4977 4978 - static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) 4978 + /* 4979 + * scx_enable() is offloaded to a dedicated system-wide RT kthread to avoid 4980 + * starvation. During the READY -> ENABLED task switching loop, the calling 4981 + * thread's sched_class gets switched from fair to ext. As fair has higher 4982 + * priority than ext, the calling thread can be indefinitely starved under 4983 + * fair-class saturation, leading to a system hang. 4984 + */ 4985 + struct scx_enable_cmd { 4986 + struct kthread_work work; 4987 + struct sched_ext_ops *ops; 4988 + int ret; 4989 + }; 4990 + 4991 + static void scx_enable_workfn(struct kthread_work *work) 4979 4992 { 4993 + struct scx_enable_cmd *cmd = 4994 + container_of(work, struct scx_enable_cmd, work); 4995 + struct sched_ext_ops *ops = cmd->ops; 4980 4996 struct scx_sched *sch; 4981 4997 struct scx_task_iter sti; 4982 4998 struct task_struct *p; 4983 4999 unsigned long timeout; 4984 5000 int i, cpu, ret; 4985 - 4986 - if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN), 4987 - cpu_possible_mask)) { 4988 - pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation\n"); 4989 - return -EINVAL; 4990 - } 4991 5001 4992 5002 mutex_lock(&scx_enable_mutex); 4993 5003 ··· 5215 5205 5216 5206 atomic_long_inc(&scx_enable_seq); 5217 5207 5218 - return 0; 5208 + cmd->ret = 0; 5209 + return; 5219 5210 5220 5211 err_free_ksyncs: 5221 5212 free_kick_syncs(); 5222 5213 err_unlock: 5223 5214 mutex_unlock(&scx_enable_mutex); 5224 - return ret; 5215 + cmd->ret = ret; 5216 + return; 5225 5217 5226 5218 err_disable_unlock_all: 5227 5219 scx_cgroup_unlock(); ··· 5242 5230 */ 5243 5231 scx_error(sch, "scx_enable() failed (%d)", ret); 5244 5232 kthread_flush_work(&sch->disable_work); 5245 - return 0; 5233 + cmd->ret = 0; 5234 + } 5235 + 5236 + static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) 5237 + { 5238 + static struct kthread_worker *helper; 5239 + static DEFINE_MUTEX(helper_mutex); 5240 + struct scx_enable_cmd cmd; 5241 + 5242 + if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN), 5243 + cpu_possible_mask)) { 5244 + pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation\n"); 5245 + return -EINVAL; 5246 + } 5247 + 5248 + if (!READ_ONCE(helper)) { 5249 + mutex_lock(&helper_mutex); 5250 + if (!helper) { 5251 + helper = kthread_run_worker(0, "scx_enable_helper"); 5252 + if (IS_ERR_OR_NULL(helper)) { 5253 + helper = NULL; 5254 + mutex_unlock(&helper_mutex); 5255 + return -ENOMEM; 5256 + } 5257 + sched_set_fifo(helper->task); 5258 + } 5259 + mutex_unlock(&helper_mutex); 5260 + } 5261 + 5262 + kthread_init_work(&cmd.work, scx_enable_workfn); 5263 + cmd.ops = ops; 5264 + 5265 + kthread_queue_work(READ_ONCE(helper), &cmd.work); 5266 + kthread_flush_work(&cmd.work); 5267 + return cmd.ret; 5246 5268 } 5247 5269 5248 5270