Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sched_ext: Use task_can_run_on_remote_rq() test in dispatch_to_local_dsq()

When deciding whether a task can be migrated to a CPU,
dispatch_to_local_dsq() was open-coding p->cpus_allowed and scx_rq_online()
tests instead of using task_can_run_on_remote_rq(). This had two problems.

- It was missing is_migration_disabled() check and thus could try to migrate
a task which shouldn't leading to assertion and scheduling failures.

- It was testing p->cpus_ptr directly instead of using task_allowed_on_cpu()
and thus failed to consider ISA compatibility.

Update dispatch_to_local_dsq() to use task_can_run_on_remote_rq():

- Move scx_ops_error() triggering into task_can_run_on_remote_rq().

- When migration isn't allowed, fall back to the global DSQ instead of the
source DSQ by returning DTL_INVALID. This is both simpler and an overall
better behavior.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Acked-by: David Vernet <void@manifault.com>

+20 -20
+20 -20
kernel/sched/ext.c
··· 2203 2203 * - The BPF scheduler is bypassed while the rq is offline and we can always say 2204 2204 * no to the BPF scheduler initiated migrations while offline. 2205 2205 */ 2206 - static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq) 2206 + static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq, 2207 + bool trigger_error) 2207 2208 { 2208 2209 int cpu = cpu_of(rq); 2209 2210 2210 - if (!task_allowed_on_cpu(p, cpu)) 2211 + /* 2212 + * We don't require the BPF scheduler to avoid dispatching to offline 2213 + * CPUs mostly for convenience but also because CPUs can go offline 2214 + * between scx_bpf_dispatch() calls and here. Trigger error iff the 2215 + * picked CPU is outside the allowed mask. 2216 + */ 2217 + if (!task_allowed_on_cpu(p, cpu)) { 2218 + if (trigger_error) 2219 + scx_ops_error("SCX_DSQ_LOCAL[_ON] verdict target cpu %d not allowed for %s[%d]", 2220 + cpu_of(rq), p->comm, p->pid); 2211 2221 return false; 2222 + } 2223 + 2212 2224 if (unlikely(is_migration_disabled(p))) 2213 2225 return false; 2226 + 2214 2227 if (!scx_rq_online(rq)) 2215 2228 return false; 2229 + 2216 2230 return true; 2217 2231 } 2218 2232 ··· 2254 2240 return move_task_to_local_dsq(p, 0, task_rq, rq); 2255 2241 } 2256 2242 #else /* CONFIG_SMP */ 2257 - static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq) { return false; } 2258 - static bool consume_remote_task(struct rq *rq, struct scx_dispatch_q *dsq, 2259 - struct task_struct *p, struct rq *task_rq) { return false; } 2243 + static inline bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq, bool trigger_error) { return false; } 2244 + static inline bool consume_remote_task(struct rq *rq, struct scx_dispatch_q *dsq, struct task_struct *p, struct rq *task_rq) { return false; } 2260 2245 #endif /* CONFIG_SMP */ 2261 2246 2262 2247 static bool consume_dispatch_q(struct rq *rq, struct scx_dispatch_q *dsq) ··· 2280 2267 return true; 2281 2268 } 2282 2269 2283 - if (task_can_run_on_remote_rq(p, rq)) { 2270 + if (task_can_run_on_remote_rq(p, rq, false)) { 2284 2271 if (likely(consume_remote_task(rq, dsq, p, task_rq))) 2285 2272 return true; 2286 2273 goto retry; ··· 2343 2330 } 2344 2331 2345 2332 #ifdef CONFIG_SMP 2346 - if (cpumask_test_cpu(cpu_of(dst_rq), p->cpus_ptr)) { 2333 + if (likely(task_can_run_on_remote_rq(p, dst_rq, true))) { 2347 2334 bool dsp; 2348 2335 2349 2336 /* ··· 2367 2354 raw_spin_rq_unlock(rq); 2368 2355 raw_spin_rq_lock(src_rq); 2369 2356 } 2370 - 2371 - /* 2372 - * We don't require the BPF scheduler to avoid dispatching to 2373 - * offline CPUs mostly for convenience but also because CPUs can 2374 - * go offline between scx_bpf_dispatch() calls and here. If @p 2375 - * is destined to an offline CPU, queue it on its current CPU 2376 - * instead, which should always be safe. As this is an allowed 2377 - * behavior, don't trigger an ops error. 2378 - */ 2379 - if (!scx_rq_online(dst_rq)) 2380 - dst_rq = src_rq; 2381 2357 2382 2358 if (src_rq == dst_rq) { 2383 2359 /* ··· 2401 2399 } 2402 2400 #endif /* CONFIG_SMP */ 2403 2401 2404 - scx_ops_error("SCX_DSQ_LOCAL[_ON] verdict target cpu %d not allowed for %s[%d]", 2405 - cpu_of(dst_rq), p->comm, p->pid); 2406 2402 return DTL_INVALID; 2407 2403 } 2408 2404