Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sched_ext: Wrap kfunc args in struct to prepare for aux__prog

scx_bpf_dsq_insert_vtime() and scx_bpf_select_cpu_and() currently have 5
parameters. An upcoming change will add aux__prog parameter which will exceed
BPF's 5 argument limit.

Prepare by adding new kfuncs __scx_bpf_dsq_insert_vtime() and
__scx_bpf_select_cpu_and() that take args structs. The existing kfuncs are
kept as compatibility wrappers. BPF programs use inline wrappers that detect
kernel API version via bpf_core_type_exists() and use the new struct-based
kfuncs when available, falling back to compat kfuncs otherwise. This allows
BPF programs to work with both old and new kernels.

Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
Acked-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

Tejun Heo c0d630ba 3035addf

+182 -39
+70 -30
kernel/sched/ext.c
··· 5448 5448 scx_dsq_insert_commit(sch, p, dsq_id, enq_flags); 5449 5449 } 5450 5450 5451 - /** 5452 - * scx_bpf_dsq_insert_vtime - Insert a task into the vtime priority queue of a DSQ 5453 - * @p: task_struct to insert 5454 - * @dsq_id: DSQ to insert into 5455 - * @slice: duration @p can run for in nsecs, 0 to keep the current value 5456 - * @vtime: @p's ordering inside the vtime-sorted queue of the target DSQ 5457 - * @enq_flags: SCX_ENQ_* 5458 - * 5459 - * Insert @p into the vtime priority queue of the DSQ identified by @dsq_id. 5460 - * Tasks queued into the priority queue are ordered by @vtime. All other aspects 5461 - * are identical to scx_bpf_dsq_insert(). 5462 - * 5463 - * @vtime ordering is according to time_before64() which considers wrapping. A 5464 - * numerically larger vtime may indicate an earlier position in the ordering and 5465 - * vice-versa. 5466 - * 5467 - * A DSQ can only be used as a FIFO or priority queue at any given time and this 5468 - * function must not be called on a DSQ which already has one or more FIFO tasks 5469 - * queued and vice-versa. Also, the built-in DSQs (SCX_DSQ_LOCAL and 5470 - * SCX_DSQ_GLOBAL) cannot be used as priority queues. 5471 - */ 5472 - __bpf_kfunc void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, 5473 - u64 slice, u64 vtime, u64 enq_flags) 5451 + static void scx_dsq_insert_vtime(struct scx_sched *sch, struct task_struct *p, 5452 + u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) 5474 5453 { 5475 - struct scx_sched *sch; 5476 - 5477 - guard(rcu)(); 5478 - sch = rcu_dereference(scx_root); 5479 - if (unlikely(!sch)) 5480 - return; 5481 - 5482 5454 if (!scx_dsq_insert_preamble(sch, p, enq_flags)) 5483 5455 return; 5484 5456 ··· 5464 5492 scx_dsq_insert_commit(sch, p, dsq_id, enq_flags | SCX_ENQ_DSQ_PRIQ); 5465 5493 } 5466 5494 5495 + struct scx_bpf_dsq_insert_vtime_args { 5496 + /* @p can't be packed together as KF_RCU is not transitive */ 5497 + u64 dsq_id; 5498 + u64 slice; 5499 + u64 vtime; 5500 + u64 enq_flags; 5501 + }; 5502 + 5503 + /** 5504 + * __scx_bpf_dsq_insert_vtime - Arg-wrapped vtime DSQ insertion 5505 + * @p: task_struct to insert 5506 + * @args: struct containing the rest of the arguments 5507 + * @args->dsq_id: DSQ to insert into 5508 + * @args->slice: duration @p can run for in nsecs, 0 to keep the current value 5509 + * @args->vtime: @p's ordering inside the vtime-sorted queue of the target DSQ 5510 + * @args->enq_flags: SCX_ENQ_* 5511 + * 5512 + * Wrapper kfunc that takes arguments via struct to work around BPF's 5 argument 5513 + * limit. BPF programs should use scx_bpf_dsq_insert_vtime() which is provided 5514 + * as an inline wrapper in common.bpf.h. 5515 + * 5516 + * Insert @p into the vtime priority queue of the DSQ identified by 5517 + * @args->dsq_id. Tasks queued into the priority queue are ordered by 5518 + * @args->vtime. All other aspects are identical to scx_bpf_dsq_insert(). 5519 + * 5520 + * @args->vtime ordering is according to time_before64() which considers 5521 + * wrapping. A numerically larger vtime may indicate an earlier position in the 5522 + * ordering and vice-versa. 5523 + * 5524 + * A DSQ can only be used as a FIFO or priority queue at any given time and this 5525 + * function must not be called on a DSQ which already has one or more FIFO tasks 5526 + * queued and vice-versa. Also, the built-in DSQs (SCX_DSQ_LOCAL and 5527 + * SCX_DSQ_GLOBAL) cannot be used as priority queues. 5528 + */ 5529 + __bpf_kfunc void 5530 + __scx_bpf_dsq_insert_vtime(struct task_struct *p, 5531 + struct scx_bpf_dsq_insert_vtime_args *args) 5532 + { 5533 + struct scx_sched *sch; 5534 + 5535 + guard(rcu)(); 5536 + 5537 + sch = rcu_dereference(scx_root); 5538 + if (unlikely(!sch)) 5539 + return; 5540 + 5541 + scx_dsq_insert_vtime(sch, p, args->dsq_id, args->slice, args->vtime, 5542 + args->enq_flags); 5543 + } 5544 + 5545 + /* 5546 + * COMPAT: Will be removed in v6.23. 5547 + */ 5548 + __bpf_kfunc void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, 5549 + u64 slice, u64 vtime, u64 enq_flags) 5550 + { 5551 + struct scx_sched *sch; 5552 + 5553 + guard(rcu)(); 5554 + 5555 + sch = rcu_dereference(scx_root); 5556 + if (unlikely(!sch)) 5557 + return; 5558 + 5559 + scx_dsq_insert_vtime(sch, p, dsq_id, slice, vtime, enq_flags); 5560 + } 5561 + 5467 5562 __bpf_kfunc_end_defs(); 5468 5563 5469 5564 BTF_KFUNCS_START(scx_kfunc_ids_enqueue_dispatch) 5470 5565 BTF_ID_FLAGS(func, scx_bpf_dsq_insert, KF_RCU) 5566 + BTF_ID_FLAGS(func, __scx_bpf_dsq_insert_vtime, KF_RCU) 5471 5567 BTF_ID_FLAGS(func, scx_bpf_dsq_insert_vtime, KF_RCU) 5472 5568 BTF_KFUNCS_END(scx_kfunc_ids_enqueue_dispatch) 5473 5569
+37 -6
kernel/sched/ext_idle.c
··· 995 995 return prev_cpu; 996 996 } 997 997 998 + struct scx_bpf_select_cpu_and_args { 999 + /* @p and @cpus_allowed can't be packed together as KF_RCU is not transitive */ 1000 + s32 prev_cpu; 1001 + u64 wake_flags; 1002 + u64 flags; 1003 + }; 1004 + 998 1005 /** 999 - * scx_bpf_select_cpu_and - Pick an idle CPU usable by task @p, 1000 - * prioritizing those in @cpus_allowed 1006 + * __scx_bpf_select_cpu_and - Arg-wrapped CPU selection with cpumask 1001 1007 * @p: task_struct to select a CPU for 1002 - * @prev_cpu: CPU @p was on previously 1003 - * @wake_flags: %SCX_WAKE_* flags 1004 1008 * @cpus_allowed: cpumask of allowed CPUs 1005 - * @flags: %SCX_PICK_IDLE* flags 1009 + * @args: struct containing the rest of the arguments 1010 + * @args->prev_cpu: CPU @p was on previously 1011 + * @args->wake_flags: %SCX_WAKE_* flags 1012 + * @args->flags: %SCX_PICK_IDLE* flags 1013 + * 1014 + * Wrapper kfunc that takes arguments via struct to work around BPF's 5 argument 1015 + * limit. BPF programs should use scx_bpf_select_cpu_and() which is provided 1016 + * as an inline wrapper in common.bpf.h. 1006 1017 * 1007 1018 * Can be called from ops.select_cpu(), ops.enqueue(), or from an unlocked 1008 1019 * context such as a BPF test_run() call, as long as built-in CPU selection 1009 1020 * is enabled: ops.update_idle() is missing or %SCX_OPS_KEEP_BUILTIN_IDLE 1010 1021 * is set. 1011 1022 * 1012 - * @p, @prev_cpu and @wake_flags match ops.select_cpu(). 1023 + * @p, @args->prev_cpu and @args->wake_flags match ops.select_cpu(). 1013 1024 * 1014 1025 * Returns the selected idle CPU, which will be automatically awakened upon 1015 1026 * returning from ops.select_cpu() and can be used for direct dispatch, or 1016 1027 * a negative value if no idle CPU is available. 1028 + */ 1029 + __bpf_kfunc s32 1030 + __scx_bpf_select_cpu_and(struct task_struct *p, const struct cpumask *cpus_allowed, 1031 + struct scx_bpf_select_cpu_and_args *args) 1032 + { 1033 + struct scx_sched *sch; 1034 + 1035 + guard(rcu)(); 1036 + 1037 + sch = rcu_dereference(scx_root); 1038 + if (unlikely(!sch)) 1039 + return -ENODEV; 1040 + 1041 + return select_cpu_from_kfunc(sch, p, args->prev_cpu, args->wake_flags, 1042 + cpus_allowed, args->flags); 1043 + } 1044 + 1045 + /* 1046 + * COMPAT: Will be removed in v6.22. 1017 1047 */ 1018 1048 __bpf_kfunc s32 scx_bpf_select_cpu_and(struct task_struct *p, s32 prev_cpu, u64 wake_flags, 1019 1049 const struct cpumask *cpus_allowed, u64 flags) ··· 1413 1383 BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU) 1414 1384 BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu_node, KF_RCU) 1415 1385 BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU) 1386 + BTF_ID_FLAGS(func, __scx_bpf_select_cpu_and, KF_RCU) 1416 1387 BTF_ID_FLAGS(func, scx_bpf_select_cpu_and, KF_RCU) 1417 1388 BTF_ID_FLAGS(func, scx_bpf_select_cpu_dfl, KF_RCU) 1418 1389 BTF_KFUNCS_END(scx_kfunc_ids_idle)
+3 -3
tools/sched_ext/include/scx/common.bpf.h
··· 60 60 61 61 s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym; 62 62 s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) __ksym; 63 - s32 scx_bpf_select_cpu_and(struct task_struct *p, s32 prev_cpu, u64 wake_flags, 64 - const struct cpumask *cpus_allowed, u64 flags) __ksym __weak; 63 + s32 __scx_bpf_select_cpu_and(struct task_struct *p, const struct cpumask *cpus_allowed, 64 + struct scx_bpf_select_cpu_and_args *args) __ksym __weak; 65 65 void scx_bpf_dsq_insert(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak; 66 - void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak; 66 + void __scx_bpf_dsq_insert_vtime(struct task_struct *p, struct scx_bpf_dsq_insert_vtime_args *args) __ksym __weak; 67 67 u32 scx_bpf_dispatch_nr_slots(void) __ksym; 68 68 void scx_bpf_dispatch_cancel(void) __ksym; 69 69 bool scx_bpf_dsq_move_to_local(u64 dsq_id) __ksym __weak;
+72
tools/sched_ext/include/scx/compat.bpf.h
··· 144 144 } 145 145 146 146 /* 147 + * v6.19: To work around BPF maximum parameter limit, the following kfuncs are 148 + * replaced with variants that pack scalar arguments in a struct. Wrappers are 149 + * provided to maintain source compatibility. 150 + * 151 + * The kernel will carry the compat variants until v6.23 to maintain binary 152 + * compatibility. After v6.23 release, remove the compat handling and move the 153 + * wrappers to common.bpf.h. 154 + */ 155 + s32 scx_bpf_select_cpu_and___compat(struct task_struct *p, s32 prev_cpu, u64 wake_flags, 156 + const struct cpumask *cpus_allowed, u64 flags) __ksym __weak; 157 + void scx_bpf_dsq_insert_vtime___compat(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak; 158 + 159 + /** 160 + * scx_bpf_select_cpu_and - Pick an idle CPU usable by task @p 161 + * @p: task_struct to select a CPU for 162 + * @prev_cpu: CPU @p was on previously 163 + * @wake_flags: %SCX_WAKE_* flags 164 + * @cpus_allowed: cpumask of allowed CPUs 165 + * @flags: %SCX_PICK_IDLE* flags 166 + * 167 + * Inline wrapper that packs scalar arguments into a struct and calls 168 + * __scx_bpf_select_cpu_and(). See __scx_bpf_select_cpu_and() for details. 169 + */ 170 + static inline s32 171 + scx_bpf_select_cpu_and(struct task_struct *p, s32 prev_cpu, u64 wake_flags, 172 + const struct cpumask *cpus_allowed, u64 flags) 173 + { 174 + if (bpf_core_type_exists(struct scx_bpf_select_cpu_and_args)) { 175 + struct scx_bpf_select_cpu_and_args args = { 176 + .prev_cpu = prev_cpu, 177 + .wake_flags = wake_flags, 178 + .flags = flags, 179 + }; 180 + 181 + return __scx_bpf_select_cpu_and(p, cpus_allowed, &args); 182 + } else { 183 + return scx_bpf_select_cpu_and___compat(p, prev_cpu, wake_flags, 184 + cpus_allowed, flags); 185 + } 186 + } 187 + 188 + /** 189 + * scx_bpf_dsq_insert_vtime - Insert a task into the vtime priority queue of a DSQ 190 + * @p: task_struct to insert 191 + * @dsq_id: DSQ to insert into 192 + * @slice: duration @p can run for in nsecs, 0 to keep the current value 193 + * @vtime: @p's ordering inside the vtime-sorted queue of the target DSQ 194 + * @enq_flags: SCX_ENQ_* 195 + * 196 + * Inline wrapper that packs scalar arguments into a struct and calls 197 + * __scx_bpf_dsq_insert_vtime(). See __scx_bpf_dsq_insert_vtime() for details. 198 + */ 199 + static inline void 200 + scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, 201 + u64 enq_flags) 202 + { 203 + if (bpf_core_type_exists(struct scx_bpf_dsq_insert_vtime_args)) { 204 + struct scx_bpf_dsq_insert_vtime_args args = { 205 + .dsq_id = dsq_id, 206 + .slice = slice, 207 + .vtime = vtime, 208 + .enq_flags = enq_flags, 209 + }; 210 + 211 + __scx_bpf_dsq_insert_vtime(p, &args); 212 + } else { 213 + scx_bpf_dsq_insert_vtime___compat(p, dsq_id, slice, vtime, 214 + enq_flags); 215 + } 216 + } 217 + 218 + /* 147 219 * Define sched_ext_ops. This may be expanded to define multiple variants for 148 220 * backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH(). 149 221 */