Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sched_ext: Deny SCX kfuncs to non-SCX struct_ops programs

scx_kfunc_context_filter() currently allows non-SCX struct_ops programs
(e.g. tcp_congestion_ops) to call SCX unlocked kfuncs. This is wrong
for two reasons:

- It is semantically incorrect: a TCP congestion control program has no
business calling SCX kfuncs such as scx_bpf_kick_cpu().

- With CONFIG_EXT_SUB_SCHED=y, kfuncs like scx_bpf_kick_cpu() call
scx_prog_sched(aux), which invokes bpf_prog_get_assoc_struct_ops(aux)
and casts the result to struct sched_ext_ops * before reading ops->priv.
For a non-SCX struct_ops program the returned pointer is the kdata of
that struct_ops type, which is far smaller than sched_ext_ops, making
the read an out-of-bounds access (confirmed with KASAN).

Extend the filter to cover scx_kfunc_set_any and scx_kfunc_set_idle as
well, and deny all SCX kfuncs for any struct_ops program that is not the
SCX struct_ops. This addresses both issues: the semantic contract is
enforced at the verifier level, and the runtime out-of-bounds access
becomes unreachable.

Fixes: d1d3c1c6ae36 ("sched_ext: Add verifier-time kfunc context filter")
Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

authored by

Cheng-Yang Chou and committed by
Tejun Heo
2d2b026c 87019cb6

+20 -14
+18 -14
kernel/sched/ext.c
··· 9480 9480 static const struct btf_kfunc_id_set scx_kfunc_set_any = { 9481 9481 .owner = THIS_MODULE, 9482 9482 .set = &scx_kfunc_ids_any, 9483 + .filter = scx_kfunc_context_filter, 9483 9484 }; 9484 9485 9485 9486 /* ··· 9528 9527 }; 9529 9528 9530 9529 /* 9531 - * Verifier-time filter for context-sensitive SCX kfuncs. Registered via the 9532 - * .filter field on each per-group btf_kfunc_id_set. The BPF core invokes this 9533 - * for every kfunc call in the registered hook (BPF_PROG_TYPE_STRUCT_OPS or 9530 + * Verifier-time filter for SCX kfuncs. Registered via the .filter field on 9531 + * each per-group btf_kfunc_id_set. The BPF core invokes this for every kfunc 9532 + * call in the registered hook (BPF_PROG_TYPE_STRUCT_OPS or 9534 9533 * BPF_PROG_TYPE_SYSCALL), regardless of which set originally introduced the 9535 - * kfunc - so the filter must short-circuit on kfuncs it doesn't govern (e.g. 9536 - * scx_kfunc_ids_any) by falling through to "allow" when none of the 9537 - * context-sensitive sets contain the kfunc. 9534 + * kfunc - so the filter must short-circuit on kfuncs it doesn't govern by 9535 + * falling through to "allow" when none of the SCX sets contain the kfunc. 9538 9536 */ 9539 9537 int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id) 9540 9538 { ··· 9542 9542 bool in_enqueue = btf_id_set8_contains(&scx_kfunc_ids_enqueue_dispatch, kfunc_id); 9543 9543 bool in_dispatch = btf_id_set8_contains(&scx_kfunc_ids_dispatch, kfunc_id); 9544 9544 bool in_cpu_release = btf_id_set8_contains(&scx_kfunc_ids_cpu_release, kfunc_id); 9545 + bool in_idle = btf_id_set8_contains(&scx_kfunc_ids_idle, kfunc_id); 9546 + bool in_any = btf_id_set8_contains(&scx_kfunc_ids_any, kfunc_id); 9545 9547 u32 moff, flags; 9546 9548 9547 - /* Not a context-sensitive kfunc (e.g. from scx_kfunc_ids_any) - allow. */ 9548 - if (!(in_unlocked || in_select_cpu || in_enqueue || in_dispatch || in_cpu_release)) 9549 + /* Not an SCX kfunc - allow. */ 9550 + if (!(in_unlocked || in_select_cpu || in_enqueue || in_dispatch || 9551 + in_cpu_release || in_idle || in_any)) 9549 9552 return 0; 9550 9553 9551 9554 /* SYSCALL progs (e.g. BPF test_run()) may call unlocked and select_cpu kfuncs. */ 9552 9555 if (prog->type == BPF_PROG_TYPE_SYSCALL) 9553 - return (in_unlocked || in_select_cpu) ? 0 : -EACCES; 9556 + return (in_unlocked || in_select_cpu || in_idle || in_any) ? 0 : -EACCES; 9554 9557 9555 9558 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) 9556 - return -EACCES; 9559 + return (in_any || in_idle) ? 0 : -EACCES; 9557 9560 9558 9561 /* 9559 9562 * add_subprog_and_kfunc() collects all kfunc calls, including dead code ··· 9569 9566 return 0; 9570 9567 9571 9568 /* 9572 - * Non-SCX struct_ops: only unlocked kfuncs are safe. The other 9573 - * context-sensitive kfuncs assume the rq lock is held by the SCX 9574 - * dispatch path, which doesn't apply to other struct_ops users. 9569 + * Non-SCX struct_ops: SCX kfuncs are not permitted. 9575 9570 */ 9576 9571 if (prog->aux->st_ops != &bpf_sched_ext_ops) 9577 - return in_unlocked ? 0 : -EACCES; 9572 + return -EACCES; 9578 9573 9579 9574 /* SCX struct_ops: check the per-op allow list. */ 9575 + if (in_any || in_idle) 9576 + return 0; 9577 + 9580 9578 moff = prog->aux->attach_st_ops_member_off; 9581 9579 flags = scx_kf_allow_flags[SCX_MOFF_IDX(moff)]; 9582 9580
+1
kernel/sched/ext_idle.c
··· 1467 1467 static const struct btf_kfunc_id_set scx_kfunc_set_idle = { 1468 1468 .owner = THIS_MODULE, 1469 1469 .set = &scx_kfunc_ids_idle, 1470 + .filter = scx_kfunc_context_filter, 1470 1471 }; 1471 1472 1472 1473 /*
+1
kernel/sched/ext_idle.h
··· 12 12 13 13 struct sched_ext_ops; 14 14 15 + extern struct btf_id_set8 scx_kfunc_ids_idle; 15 16 extern struct btf_id_set8 scx_kfunc_ids_select_cpu; 16 17 17 18 void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops);