Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sched_ext: Introduce scx_bpf_dsq_reenq() for remote local DSQ reenqueue

scx_bpf_reenqueue_local() can only trigger re-enqueue of the current CPU's
local DSQ. Introduce scx_bpf_dsq_reenq() which takes a DSQ ID and can target
any local DSQ including remote CPUs via SCX_DSQ_LOCAL_ON | cpu. This will be
expanded to support user DSQs by future changes.

scx_bpf_reenqueue_local() is reimplemented as a simple wrapper around
scx_bpf_dsq_reenq(SCX_DSQ_LOCAL, 0) and may be deprecated in the future.

Update compat.bpf.h with a compatibility shim and scx_qmap to test the new
functionality.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andrea Righi <arighi@nvidia.com>

Tejun Heo 9c34c507 0c4df54a

+106 -49
+73 -45
kernel/sched/ext.c
··· 1080 1080 schedule_deferred(rq); 1081 1081 } 1082 1082 1083 + static void schedule_dsq_reenq(struct scx_sched *sch, struct scx_dispatch_q *dsq) 1084 + { 1085 + /* 1086 + * Allowing reenqueues doesn't make sense while bypassing. This also 1087 + * blocks from new reenqueues to be scheduled on dead scheds. 1088 + */ 1089 + if (unlikely(READ_ONCE(sch->bypass_depth))) 1090 + return; 1091 + 1092 + if (dsq->id == SCX_DSQ_LOCAL) { 1093 + struct rq *rq = container_of(dsq, struct rq, scx.local_dsq); 1094 + struct scx_sched_pcpu *sch_pcpu = per_cpu_ptr(sch->pcpu, cpu_of(rq)); 1095 + struct scx_deferred_reenq_local *drl = &sch_pcpu->deferred_reenq_local; 1096 + 1097 + scoped_guard (raw_spinlock_irqsave, &rq->scx.deferred_reenq_lock) { 1098 + if (list_empty(&drl->node)) 1099 + list_move_tail(&drl->node, &rq->scx.deferred_reenq_locals); 1100 + } 1101 + 1102 + schedule_deferred(rq); 1103 + } else { 1104 + scx_error(sch, "DSQ 0x%llx not allowed for reenq", dsq->id); 1105 + } 1106 + } 1107 + 1083 1108 /** 1084 1109 * touch_core_sched - Update timestamp used for core-sched task ordering 1085 1110 * @rq: rq to read clock from, must be locked ··· 7799 7774 * Iterate over all of the tasks currently enqueued on the local DSQ of the 7800 7775 * caller's CPU, and re-enqueue them in the BPF scheduler. Returns the number of 7801 7776 * processed tasks. Can only be called from ops.cpu_release(). 7802 - * 7803 - * COMPAT: Will be removed in v6.23 along with the ___v2 suffix on the void 7804 - * returning variant that can be called from anywhere. 7805 7777 */ 7806 7778 __bpf_kfunc u32 scx_bpf_reenqueue_local(const struct bpf_prog_aux *aux) 7807 7779 { ··· 8228 8206 return rcu_dereference(dsq->first_task); 8229 8207 } 8230 8208 8209 + /** 8210 + * scx_bpf_dsq_reenq - Re-enqueue tasks on a DSQ 8211 + * @dsq_id: DSQ to re-enqueue 8212 + * @reenq_flags: %SCX_RENQ_* 8213 + * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs 8214 + * 8215 + * Iterate over all of the tasks currently enqueued on the DSQ identified by 8216 + * @dsq_id, and re-enqueue them in the BPF scheduler. The following DSQs are 8217 + * supported: 8218 + * 8219 + * - Local DSQs (%SCX_DSQ_LOCAL or %SCX_DSQ_LOCAL_ON | $cpu) 8220 + * 8221 + * Re-enqueues are performed asynchronously. Can be called from anywhere. 8222 + */ 8223 + __bpf_kfunc void scx_bpf_dsq_reenq(u64 dsq_id, u64 reenq_flags, 8224 + const struct bpf_prog_aux *aux) 8225 + { 8226 + struct scx_sched *sch; 8227 + struct scx_dispatch_q *dsq; 8228 + 8229 + guard(preempt)(); 8230 + 8231 + sch = scx_prog_sched(aux); 8232 + if (unlikely(!sch)) 8233 + return; 8234 + 8235 + dsq = find_dsq_for_dispatch(sch, this_rq(), dsq_id, smp_processor_id()); 8236 + schedule_dsq_reenq(sch, dsq); 8237 + } 8238 + 8239 + /** 8240 + * scx_bpf_reenqueue_local - Re-enqueue tasks on a local DSQ 8241 + * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs 8242 + * 8243 + * Iterate over all of the tasks currently enqueued on the local DSQ of the 8244 + * caller's CPU, and re-enqueue them in the BPF scheduler. Can be called from 8245 + * anywhere. 8246 + * 8247 + * This is now a special case of scx_bpf_dsq_reenq() and may be removed in the 8248 + * future. 8249 + */ 8250 + __bpf_kfunc void scx_bpf_reenqueue_local___v2(const struct bpf_prog_aux *aux) 8251 + { 8252 + scx_bpf_dsq_reenq(SCX_DSQ_LOCAL, 0, aux); 8253 + } 8254 + 8231 8255 __bpf_kfunc_end_defs(); 8232 8256 8233 8257 static s32 __bstr_format(struct scx_sched *sch, u64 *data_buf, char *line_buf, ··· 8429 8361 */ 8430 8362 if (dd->cursor >= sizeof(buf->line) || buf->line[dd->cursor - 1] == '\n') 8431 8363 ops_dump_flush(); 8432 - } 8433 - 8434 - /** 8435 - * scx_bpf_reenqueue_local - Re-enqueue tasks on a local DSQ 8436 - * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs 8437 - * 8438 - * Iterate over all of the tasks currently enqueued on the local DSQ of the 8439 - * caller's CPU, and re-enqueue them in the BPF scheduler. Can be called from 8440 - * anywhere. 8441 - */ 8442 - __bpf_kfunc void scx_bpf_reenqueue_local___v2(const struct bpf_prog_aux *aux) 8443 - { 8444 - unsigned long flags; 8445 - struct scx_sched *sch; 8446 - struct rq *rq; 8447 - 8448 - raw_local_irq_save(flags); 8449 - 8450 - sch = scx_prog_sched(aux); 8451 - if (unlikely(!sch)) 8452 - goto out_irq_restore; 8453 - 8454 - /* 8455 - * Allowing reenqueue-locals doesn't make sense while bypassing. This 8456 - * also blocks from new reenqueues to be scheduled on dead scheds. 8457 - */ 8458 - if (unlikely(sch->bypass_depth)) 8459 - goto out_irq_restore; 8460 - 8461 - rq = this_rq(); 8462 - scoped_guard (raw_spinlock, &rq->scx.deferred_reenq_lock) { 8463 - struct scx_sched_pcpu *pcpu = this_cpu_ptr(sch->pcpu); 8464 - 8465 - if (list_empty(&pcpu->deferred_reenq_local.node)) 8466 - list_move_tail(&pcpu->deferred_reenq_local.node, 8467 - &rq->scx.deferred_reenq_locals); 8468 - } 8469 - 8470 - schedule_deferred(rq); 8471 - out_irq_restore: 8472 - raw_local_irq_restore(flags); 8473 8364 } 8474 8365 8475 8366 /** ··· 8847 8820 BTF_ID_FLAGS(func, scx_bpf_dsq_nr_queued) 8848 8821 BTF_ID_FLAGS(func, scx_bpf_destroy_dsq) 8849 8822 BTF_ID_FLAGS(func, scx_bpf_dsq_peek, KF_IMPLICIT_ARGS | KF_RCU_PROTECTED | KF_RET_NULL) 8823 + BTF_ID_FLAGS(func, scx_bpf_dsq_reenq, KF_IMPLICIT_ARGS) 8824 + BTF_ID_FLAGS(func, scx_bpf_reenqueue_local___v2, KF_IMPLICIT_ARGS) 8850 8825 BTF_ID_FLAGS(func, bpf_iter_scx_dsq_new, KF_IMPLICIT_ARGS | KF_ITER_NEW | KF_RCU_PROTECTED) 8851 8826 BTF_ID_FLAGS(func, bpf_iter_scx_dsq_next, KF_ITER_NEXT | KF_RET_NULL) 8852 8827 BTF_ID_FLAGS(func, bpf_iter_scx_dsq_destroy, KF_ITER_DESTROY) 8853 8828 BTF_ID_FLAGS(func, scx_bpf_exit_bstr, KF_IMPLICIT_ARGS) 8854 8829 BTF_ID_FLAGS(func, scx_bpf_error_bstr, KF_IMPLICIT_ARGS) 8855 8830 BTF_ID_FLAGS(func, scx_bpf_dump_bstr, KF_IMPLICIT_ARGS) 8856 - BTF_ID_FLAGS(func, scx_bpf_reenqueue_local___v2, KF_IMPLICIT_ARGS) 8857 8831 BTF_ID_FLAGS(func, scx_bpf_cpuperf_cap, KF_IMPLICIT_ARGS) 8858 8832 BTF_ID_FLAGS(func, scx_bpf_cpuperf_cur, KF_IMPLICIT_ARGS) 8859 8833 BTF_ID_FLAGS(func, scx_bpf_cpuperf_set, KF_IMPLICIT_ARGS)
+21
tools/sched_ext/include/scx/compat.bpf.h
··· 376 376 } 377 377 378 378 /* 379 + * v6.20: New scx_bpf_dsq_reenq() that allows re-enqueues on more DSQs. This 380 + * will eventually deprecate scx_bpf_reenqueue_local(). 381 + */ 382 + void scx_bpf_dsq_reenq___compat(u64 dsq_id, u64 reenq_flags, const struct bpf_prog_aux *aux__prog) __ksym __weak; 383 + 384 + static inline bool __COMPAT_has_generic_reenq(void) 385 + { 386 + return bpf_ksym_exists(scx_bpf_dsq_reenq___compat); 387 + } 388 + 389 + static inline void scx_bpf_dsq_reenq(u64 dsq_id, u64 reenq_flags) 390 + { 391 + if (bpf_ksym_exists(scx_bpf_dsq_reenq___compat)) 392 + scx_bpf_dsq_reenq___compat(dsq_id, reenq_flags, NULL); 393 + else if (dsq_id == SCX_DSQ_LOCAL && reenq_flags == 0) 394 + scx_bpf_reenqueue_local(); 395 + else 396 + scx_bpf_error("kernel too old to reenqueue foreign local or user DSQs"); 397 + } 398 + 399 + /* 379 400 * Define sched_ext_ops. This may be expanded to define multiple variants for 380 401 * backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH(). 381 402 */
+9 -2
tools/sched_ext/scx_qmap.bpf.c
··· 131 131 } cpu_ctx_stor SEC(".maps"); 132 132 133 133 /* Statistics */ 134 - u64 nr_enqueued, nr_dispatched, nr_reenqueued, nr_dequeued, nr_ddsp_from_enq; 134 + u64 nr_enqueued, nr_dispatched, nr_reenqueued, nr_reenqueued_cpu0, nr_dequeued, nr_ddsp_from_enq; 135 135 u64 nr_core_sched_execed; 136 136 u64 nr_expedited_local, nr_expedited_remote, nr_expedited_lost, nr_expedited_from_timer; 137 137 u32 cpuperf_min, cpuperf_avg, cpuperf_max; ··· 206 206 void *ring; 207 207 s32 cpu; 208 208 209 - if (enq_flags & SCX_ENQ_REENQ) 209 + if (enq_flags & SCX_ENQ_REENQ) { 210 210 __sync_fetch_and_add(&nr_reenqueued, 1); 211 + if (scx_bpf_task_cpu(p) == 0) 212 + __sync_fetch_and_add(&nr_reenqueued_cpu0, 1); 213 + } 211 214 212 215 if (p->flags & PF_KTHREAD) { 213 216 if (stall_kernel_nth && !(++kernel_cnt % stall_kernel_nth)) ··· 564 561 case 2: /* SCHED_RR */ 565 562 case 6: /* SCHED_DEADLINE */ 566 563 scx_bpf_reenqueue_local(); 564 + 565 + /* trigger re-enqueue on CPU0 just to exercise LOCAL_ON */ 566 + if (__COMPAT_has_generic_reenq()) 567 + scx_bpf_dsq_reenq(SCX_DSQ_LOCAL_ON | 0, 0); 567 568 } 568 569 569 570 return 0;
+3 -2
tools/sched_ext/scx_qmap.c
··· 137 137 long nr_enqueued = skel->bss->nr_enqueued; 138 138 long nr_dispatched = skel->bss->nr_dispatched; 139 139 140 - printf("stats : enq=%lu dsp=%lu delta=%ld reenq=%"PRIu64" deq=%"PRIu64" core=%"PRIu64" enq_ddsp=%"PRIu64"\n", 140 + printf("stats : enq=%lu dsp=%lu delta=%ld reenq/cpu0=%"PRIu64"/%"PRIu64" deq=%"PRIu64" core=%"PRIu64" enq_ddsp=%"PRIu64"\n", 141 141 nr_enqueued, nr_dispatched, nr_enqueued - nr_dispatched, 142 - skel->bss->nr_reenqueued, skel->bss->nr_dequeued, 142 + skel->bss->nr_reenqueued, skel->bss->nr_reenqueued_cpu0, 143 + skel->bss->nr_dequeued, 143 144 skel->bss->nr_core_sched_execed, 144 145 skel->bss->nr_ddsp_from_enq); 145 146 printf(" exp_local=%"PRIu64" exp_remote=%"PRIu64" exp_timer=%"PRIu64" exp_lost=%"PRIu64"\n",