Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'sched_ext-for-6.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext

Pull sched_ext updates from Tejun Heo:

- scx_bpf_now() added so that BPF scheduler can access the cached
timestamp in struct rq to avoid reading TSC multiple times within a
locked scheduling operation.

- Minor updates to the built-in idle CPU selection logic.

- tool/sched_ext updates and other misc changes.

* tag 'sched_ext-for-6.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
sched_ext: fix kernel-doc warnings
sched_ext: Use time helpers in BPF schedulers
sched_ext: Replace bpf_ktime_get_ns() to scx_bpf_now()
sched_ext: Add time helpers for BPF schedulers
sched_ext: Add scx_bpf_now() for BPF scheduler
sched_ext: Implement scx_bpf_now()
sched_ext: Relocate scx_enabled() related code
sched_ext: Add option -l in selftest runner to list all available tests
sched_ext: Include remaining task time slice in error state dump
sched_ext: update scx_bpf_dsq_insert() doc for SCX_DSQ_LOCAL_ON
sched_ext: idle: small CPU iteration refactoring
sched_ext: idle: introduce check_builtin_idle_enabled() helper
sched_ext: idle: clarify comments
sched_ext: idle: use assign_cpu() to update the idle cpumask
sched_ext: Use str_enabled_disabled() helper in update_selcpu_topology()
sched_ext: Use sizeof_field for key_len in dsq_hash_params
tools/sched_ext: Receive updates from SCX repo
sched_ext: Use the NUMA scheduling domain for NUMA optimizations

+690 -160
+3 -3
Documentation/scheduler/sched-ext.rst
··· 242 242 task was inserted directly from ``ops.select_cpu()``). ``ops.enqueue()`` 243 243 can make one of the following decisions: 244 244 245 - * Immediately insert the task into either the global or local DSQ by 246 - calling ``scx_bpf_dsq_insert()`` with ``SCX_DSQ_GLOBAL`` or 247 - ``SCX_DSQ_LOCAL``, respectively. 245 + * Immediately insert the task into either the global or a local DSQ by 246 + calling ``scx_bpf_dsq_insert()`` with one of the following options: 247 + ``SCX_DSQ_GLOBAL``, ``SCX_DSQ_LOCAL``, or ``SCX_DSQ_LOCAL_ON | cpu``. 248 248 249 249 * Immediately insert the task into a custom DSQ by calling 250 250 ``scx_bpf_dsq_insert()`` with a DSQ ID which is smaller than 2^63.
+5 -1
kernel/sched/core.c
··· 793 793 void update_rq_clock(struct rq *rq) 794 794 { 795 795 s64 delta; 796 + u64 clock; 796 797 797 798 lockdep_assert_rq_held(rq); 798 799 ··· 805 804 SCHED_WARN_ON(rq->clock_update_flags & RQCF_UPDATED); 806 805 rq->clock_update_flags |= RQCF_UPDATED; 807 806 #endif 807 + clock = sched_clock_cpu(cpu_of(rq)); 808 + scx_rq_clock_update(rq, clock); 808 809 809 - delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; 810 + delta = clock - rq->clock; 810 811 if (delta < 0) 811 812 return; 812 813 rq->clock += delta; 814 + 813 815 update_rq_clock_task(rq, delta); 814 816 } 815 817
+233 -102
kernel/sched/ext.c
··· 206 206 */ 207 207 struct sched_ext_ops { 208 208 /** 209 - * select_cpu - Pick the target CPU for a task which is being woken up 209 + * @select_cpu: Pick the target CPU for a task which is being woken up 210 210 * @p: task being woken up 211 211 * @prev_cpu: the cpu @p was on before sleeping 212 212 * @wake_flags: SCX_WAKE_* ··· 233 233 s32 (*select_cpu)(struct task_struct *p, s32 prev_cpu, u64 wake_flags); 234 234 235 235 /** 236 - * enqueue - Enqueue a task on the BPF scheduler 236 + * @enqueue: Enqueue a task on the BPF scheduler 237 237 * @p: task being enqueued 238 238 * @enq_flags: %SCX_ENQ_* 239 239 * ··· 248 248 void (*enqueue)(struct task_struct *p, u64 enq_flags); 249 249 250 250 /** 251 - * dequeue - Remove a task from the BPF scheduler 251 + * @dequeue: Remove a task from the BPF scheduler 252 252 * @p: task being dequeued 253 253 * @deq_flags: %SCX_DEQ_* 254 254 * ··· 264 264 void (*dequeue)(struct task_struct *p, u64 deq_flags); 265 265 266 266 /** 267 - * dispatch - Dispatch tasks from the BPF scheduler and/or user DSQs 267 + * @dispatch: Dispatch tasks from the BPF scheduler and/or user DSQs 268 268 * @cpu: CPU to dispatch tasks for 269 269 * @prev: previous task being switched out 270 270 * ··· 287 287 void (*dispatch)(s32 cpu, struct task_struct *prev); 288 288 289 289 /** 290 - * tick - Periodic tick 290 + * @tick: Periodic tick 291 291 * @p: task running currently 292 292 * 293 293 * This operation is called every 1/HZ seconds on CPUs which are ··· 297 297 void (*tick)(struct task_struct *p); 298 298 299 299 /** 300 - * runnable - A task is becoming runnable on its associated CPU 300 + * @runnable: A task is becoming runnable on its associated CPU 301 301 * @p: task becoming runnable 302 302 * @enq_flags: %SCX_ENQ_* 303 303 * ··· 324 324 void (*runnable)(struct task_struct *p, u64 enq_flags); 325 325 326 326 /** 327 - * running - A task is starting to run on its associated CPU 327 + * @running: A task is starting to run on its associated CPU 328 328 * @p: task starting to run 329 329 * 330 330 * See ->runnable() for explanation on the task state notifiers. ··· 332 332 void (*running)(struct task_struct *p); 333 333 334 334 /** 335 - * stopping - A task is stopping execution 335 + * @stopping: A task is stopping execution 336 336 * @p: task stopping to run 337 337 * @runnable: is task @p still runnable? 338 338 * ··· 343 343 void (*stopping)(struct task_struct *p, bool runnable); 344 344 345 345 /** 346 - * quiescent - A task is becoming not runnable on its associated CPU 346 + * @quiescent: A task is becoming not runnable on its associated CPU 347 347 * @p: task becoming not runnable 348 348 * @deq_flags: %SCX_DEQ_* 349 349 * ··· 363 363 void (*quiescent)(struct task_struct *p, u64 deq_flags); 364 364 365 365 /** 366 - * yield - Yield CPU 366 + * @yield: Yield CPU 367 367 * @from: yielding task 368 368 * @to: optional yield target task 369 369 * ··· 378 378 bool (*yield)(struct task_struct *from, struct task_struct *to); 379 379 380 380 /** 381 - * core_sched_before - Task ordering for core-sched 381 + * @core_sched_before: Task ordering for core-sched 382 382 * @a: task A 383 383 * @b: task B 384 384 * ··· 396 396 bool (*core_sched_before)(struct task_struct *a, struct task_struct *b); 397 397 398 398 /** 399 - * set_weight - Set task weight 399 + * @set_weight: Set task weight 400 400 * @p: task to set weight for 401 401 * @weight: new weight [1..10000] 402 402 * ··· 405 405 void (*set_weight)(struct task_struct *p, u32 weight); 406 406 407 407 /** 408 - * set_cpumask - Set CPU affinity 408 + * @set_cpumask: Set CPU affinity 409 409 * @p: task to set CPU affinity for 410 410 * @cpumask: cpumask of cpus that @p can run on 411 411 * ··· 415 415 const struct cpumask *cpumask); 416 416 417 417 /** 418 - * update_idle - Update the idle state of a CPU 418 + * @update_idle: Update the idle state of a CPU 419 419 * @cpu: CPU to udpate the idle state for 420 420 * @idle: whether entering or exiting the idle state 421 421 * ··· 436 436 void (*update_idle)(s32 cpu, bool idle); 437 437 438 438 /** 439 - * cpu_acquire - A CPU is becoming available to the BPF scheduler 439 + * @cpu_acquire: A CPU is becoming available to the BPF scheduler 440 440 * @cpu: The CPU being acquired by the BPF scheduler. 441 441 * @args: Acquire arguments, see the struct definition. 442 442 * ··· 446 446 void (*cpu_acquire)(s32 cpu, struct scx_cpu_acquire_args *args); 447 447 448 448 /** 449 - * cpu_release - A CPU is taken away from the BPF scheduler 449 + * @cpu_release: A CPU is taken away from the BPF scheduler 450 450 * @cpu: The CPU being released by the BPF scheduler. 451 451 * @args: Release arguments, see the struct definition. 452 452 * ··· 458 458 void (*cpu_release)(s32 cpu, struct scx_cpu_release_args *args); 459 459 460 460 /** 461 - * init_task - Initialize a task to run in a BPF scheduler 461 + * @init_task: Initialize a task to run in a BPF scheduler 462 462 * @p: task to initialize for BPF scheduling 463 463 * @args: init arguments, see the struct definition 464 464 * ··· 473 473 s32 (*init_task)(struct task_struct *p, struct scx_init_task_args *args); 474 474 475 475 /** 476 - * exit_task - Exit a previously-running task from the system 476 + * @exit_task: Exit a previously-running task from the system 477 477 * @p: task to exit 478 + * @args: exit arguments, see the struct definition 478 479 * 479 480 * @p is exiting or the BPF scheduler is being unloaded. Perform any 480 481 * necessary cleanup for @p. ··· 483 482 void (*exit_task)(struct task_struct *p, struct scx_exit_task_args *args); 484 483 485 484 /** 486 - * enable - Enable BPF scheduling for a task 485 + * @enable: Enable BPF scheduling for a task 487 486 * @p: task to enable BPF scheduling for 488 487 * 489 488 * Enable @p for BPF scheduling. enable() is called on @p any time it ··· 492 491 void (*enable)(struct task_struct *p); 493 492 494 493 /** 495 - * disable - Disable BPF scheduling for a task 494 + * @disable: Disable BPF scheduling for a task 496 495 * @p: task to disable BPF scheduling for 497 496 * 498 497 * @p is exiting, leaving SCX or the BPF scheduler is being unloaded. ··· 502 501 void (*disable)(struct task_struct *p); 503 502 504 503 /** 505 - * dump - Dump BPF scheduler state on error 504 + * @dump: Dump BPF scheduler state on error 506 505 * @ctx: debug dump context 507 506 * 508 507 * Use scx_bpf_dump() to generate BPF scheduler specific debug dump. ··· 510 509 void (*dump)(struct scx_dump_ctx *ctx); 511 510 512 511 /** 513 - * dump_cpu - Dump BPF scheduler state for a CPU on error 512 + * @dump_cpu: Dump BPF scheduler state for a CPU on error 514 513 * @ctx: debug dump context 515 514 * @cpu: CPU to generate debug dump for 516 515 * @idle: @cpu is currently idle without any runnable tasks ··· 522 521 void (*dump_cpu)(struct scx_dump_ctx *ctx, s32 cpu, bool idle); 523 522 524 523 /** 525 - * dump_task - Dump BPF scheduler state for a runnable task on error 524 + * @dump_task: Dump BPF scheduler state for a runnable task on error 526 525 * @ctx: debug dump context 527 526 * @p: runnable task to generate debug dump for 528 527 * ··· 533 532 534 533 #ifdef CONFIG_EXT_GROUP_SCHED 535 534 /** 536 - * cgroup_init - Initialize a cgroup 535 + * @cgroup_init: Initialize a cgroup 537 536 * @cgrp: cgroup being initialized 538 537 * @args: init arguments, see the struct definition 539 538 * ··· 548 547 struct scx_cgroup_init_args *args); 549 548 550 549 /** 551 - * cgroup_exit - Exit a cgroup 550 + * @cgroup_exit: Exit a cgroup 552 551 * @cgrp: cgroup being exited 553 552 * 554 553 * Either the BPF scheduler is being unloaded or @cgrp destroyed, exit ··· 557 556 void (*cgroup_exit)(struct cgroup *cgrp); 558 557 559 558 /** 560 - * cgroup_prep_move - Prepare a task to be moved to a different cgroup 559 + * @cgroup_prep_move: Prepare a task to be moved to a different cgroup 561 560 * @p: task being moved 562 561 * @from: cgroup @p is being moved from 563 562 * @to: cgroup @p is being moved to ··· 572 571 struct cgroup *from, struct cgroup *to); 573 572 574 573 /** 575 - * cgroup_move - Commit cgroup move 574 + * @cgroup_move: Commit cgroup move 576 575 * @p: task being moved 577 576 * @from: cgroup @p is being moved from 578 577 * @to: cgroup @p is being moved to ··· 583 582 struct cgroup *from, struct cgroup *to); 584 583 585 584 /** 586 - * cgroup_cancel_move - Cancel cgroup move 585 + * @cgroup_cancel_move: Cancel cgroup move 587 586 * @p: task whose cgroup move is being canceled 588 587 * @from: cgroup @p was being moved from 589 588 * @to: cgroup @p was being moved to ··· 595 594 struct cgroup *from, struct cgroup *to); 596 595 597 596 /** 598 - * cgroup_set_weight - A cgroup's weight is being changed 597 + * @cgroup_set_weight: A cgroup's weight is being changed 599 598 * @cgrp: cgroup whose weight is being updated 600 599 * @weight: new weight [1..10000] 601 600 * ··· 609 608 */ 610 609 611 610 /** 612 - * cpu_online - A CPU became online 611 + * @cpu_online: A CPU became online 613 612 * @cpu: CPU which just came up 614 613 * 615 614 * @cpu just came online. @cpu will not call ops.enqueue() or ··· 618 617 void (*cpu_online)(s32 cpu); 619 618 620 619 /** 621 - * cpu_offline - A CPU is going offline 620 + * @cpu_offline: A CPU is going offline 622 621 * @cpu: CPU which is going offline 623 622 * 624 623 * @cpu is going offline. @cpu will not call ops.enqueue() or ··· 631 630 */ 632 631 633 632 /** 634 - * init - Initialize the BPF scheduler 633 + * @init: Initialize the BPF scheduler 635 634 */ 636 635 s32 (*init)(void); 637 636 638 637 /** 639 - * exit - Clean up after the BPF scheduler 638 + * @exit: Clean up after the BPF scheduler 640 639 * @info: Exit info 641 640 * 642 641 * ops.exit() is also called on ops.init() failure, which is a bit ··· 646 645 void (*exit)(struct scx_exit_info *info); 647 646 648 647 /** 649 - * dispatch_max_batch - Max nr of tasks that dispatch() can dispatch 648 + * @dispatch_max_batch: Max nr of tasks that dispatch() can dispatch 650 649 */ 651 650 u32 dispatch_max_batch; 652 651 653 652 /** 654 - * flags - %SCX_OPS_* flags 653 + * @flags: %SCX_OPS_* flags 655 654 */ 656 655 u64 flags; 657 656 658 657 /** 659 - * timeout_ms - The maximum amount of time, in milliseconds, that a 658 + * @timeout_ms: The maximum amount of time, in milliseconds, that a 660 659 * runnable task should be able to wait before being scheduled. The 661 660 * maximum timeout may not exceed the default timeout of 30 seconds. 662 661 * ··· 665 664 u32 timeout_ms; 666 665 667 666 /** 668 - * exit_dump_len - scx_exit_info.dump buffer length. If 0, the default 667 + * @exit_dump_len: scx_exit_info.dump buffer length. If 0, the default 669 668 * value of 32768 is used. 670 669 */ 671 670 u32 exit_dump_len; 672 671 673 672 /** 674 - * hotplug_seq - A sequence number that may be set by the scheduler to 673 + * @hotplug_seq: A sequence number that may be set by the scheduler to 675 674 * detect when a hotplug event has occurred during the loading process. 676 675 * If 0, no detection occurs. Otherwise, the scheduler will fail to 677 676 * load if the sequence number does not match @scx_hotplug_seq on the ··· 680 679 u64 hotplug_seq; 681 680 682 681 /** 683 - * name - BPF scheduler's name 682 + * @name: BPF scheduler's name 684 683 * 685 684 * Must be a non-zero valid BPF object name including only isalnum(), 686 685 * '_' and '.' chars. Shows up in kernel.sched_ext_ops sysctl while the ··· 961 960 static struct scx_dispatch_q **global_dsqs; 962 961 963 962 static const struct rhashtable_params dsq_hash_params = { 964 - .key_len = 8, 963 + .key_len = sizeof_field(struct scx_dispatch_q, id), 965 964 .key_offset = offsetof(struct scx_dispatch_q, id), 966 965 .head_offset = offsetof(struct scx_dispatch_q, hash_node), 967 966 }; ··· 1409 1408 /** 1410 1409 * scx_task_iter_next_locked - Next non-idle task with its rq locked 1411 1410 * @iter: iterator to walk 1412 - * @include_dead: Whether we should include dead tasks in the iteration 1413 1411 * 1414 1412 * Visit the non-idle task with its rq lock held. Allows callers to specify 1415 1413 * whether they would like to filter out dead tasks. See scx_task_iter_start() ··· 3136 3136 * scx_prio_less - Task ordering for core-sched 3137 3137 * @a: task A 3138 3138 * @b: task B 3139 + * @in_fi: in forced idle state 3139 3140 * 3140 3141 * Core-sched is implemented as an additional scheduling layer on top of the 3141 3142 * usual sched_class'es and needs to find out the expected task ordering. For ··· 3185 3184 * scx_pick_idle_cpu() can get caught in an infinite loop as 3186 3185 * @cpu is never cleared from idle_masks.smt. Ensure that @cpu 3187 3186 * is eventually cleared. 3187 + * 3188 + * NOTE: Use cpumask_intersects() and cpumask_test_cpu() to 3189 + * reduce memory writes, which may help alleviate cache 3190 + * coherence pressure. 3188 3191 */ 3189 3192 if (cpumask_intersects(smt, idle_masks.smt)) 3190 3193 cpumask_andnot(idle_masks.smt, idle_masks.smt, smt); ··· 3225 3220 } 3226 3221 3227 3222 /* 3223 + * Return the amount of CPUs in the same LLC domain of @cpu (or zero if the LLC 3224 + * domain is not defined). 3225 + */ 3226 + static unsigned int llc_weight(s32 cpu) 3227 + { 3228 + struct sched_domain *sd; 3229 + 3230 + sd = rcu_dereference(per_cpu(sd_llc, cpu)); 3231 + if (!sd) 3232 + return 0; 3233 + 3234 + return sd->span_weight; 3235 + } 3236 + 3237 + /* 3238 + * Return the cpumask representing the LLC domain of @cpu (or NULL if the LLC 3239 + * domain is not defined). 3240 + */ 3241 + static struct cpumask *llc_span(s32 cpu) 3242 + { 3243 + struct sched_domain *sd; 3244 + 3245 + sd = rcu_dereference(per_cpu(sd_llc, cpu)); 3246 + if (!sd) 3247 + return 0; 3248 + 3249 + return sched_domain_span(sd); 3250 + } 3251 + 3252 + /* 3253 + * Return the amount of CPUs in the same NUMA domain of @cpu (or zero if the 3254 + * NUMA domain is not defined). 3255 + */ 3256 + static unsigned int numa_weight(s32 cpu) 3257 + { 3258 + struct sched_domain *sd; 3259 + struct sched_group *sg; 3260 + 3261 + sd = rcu_dereference(per_cpu(sd_numa, cpu)); 3262 + if (!sd) 3263 + return 0; 3264 + sg = sd->groups; 3265 + if (!sg) 3266 + return 0; 3267 + 3268 + return sg->group_weight; 3269 + } 3270 + 3271 + /* 3272 + * Return the cpumask representing the NUMA domain of @cpu (or NULL if the NUMA 3273 + * domain is not defined). 3274 + */ 3275 + static struct cpumask *numa_span(s32 cpu) 3276 + { 3277 + struct sched_domain *sd; 3278 + struct sched_group *sg; 3279 + 3280 + sd = rcu_dereference(per_cpu(sd_numa, cpu)); 3281 + if (!sd) 3282 + return NULL; 3283 + sg = sd->groups; 3284 + if (!sg) 3285 + return NULL; 3286 + 3287 + return sched_group_span(sg); 3288 + } 3289 + 3290 + /* 3228 3291 * Return true if the LLC domains do not perfectly overlap with the NUMA 3229 3292 * domains, false otherwise. 3230 3293 */ ··· 3323 3250 * overlapping, which is incorrect (as NUMA 1 has two distinct LLC 3324 3251 * domains). 3325 3252 */ 3326 - for_each_online_cpu(cpu) { 3327 - const struct cpumask *numa_cpus; 3328 - struct sched_domain *sd; 3329 - 3330 - sd = rcu_dereference(per_cpu(sd_llc, cpu)); 3331 - if (!sd) 3253 + for_each_online_cpu(cpu) 3254 + if (llc_weight(cpu) != numa_weight(cpu)) 3332 3255 return true; 3333 - 3334 - numa_cpus = cpumask_of_node(cpu_to_node(cpu)); 3335 - if (sd->span_weight != cpumask_weight(numa_cpus)) 3336 - return true; 3337 - } 3338 3256 3339 3257 return false; 3340 3258 } ··· 3344 3280 static void update_selcpu_topology(void) 3345 3281 { 3346 3282 bool enable_llc = false, enable_numa = false; 3347 - struct sched_domain *sd; 3348 - const struct cpumask *cpus; 3283 + unsigned int nr_cpus; 3349 3284 s32 cpu = cpumask_first(cpu_online_mask); 3350 3285 3351 3286 /* ··· 3358 3295 * CPUs. 3359 3296 */ 3360 3297 rcu_read_lock(); 3361 - sd = rcu_dereference(per_cpu(sd_llc, cpu)); 3362 - if (sd) { 3363 - if (sd->span_weight < num_online_cpus()) 3298 + nr_cpus = llc_weight(cpu); 3299 + if (nr_cpus > 0) { 3300 + if (nr_cpus < num_online_cpus()) 3364 3301 enable_llc = true; 3302 + pr_debug("sched_ext: LLC=%*pb weight=%u\n", 3303 + cpumask_pr_args(llc_span(cpu)), llc_weight(cpu)); 3365 3304 } 3366 3305 3367 3306 /* ··· 3375 3310 * enabling both NUMA and LLC optimizations is unnecessary, as checking 3376 3311 * for an idle CPU in the same domain twice is redundant. 3377 3312 */ 3378 - cpus = cpumask_of_node(cpu_to_node(cpu)); 3379 - if ((cpumask_weight(cpus) < num_online_cpus()) && llc_numa_mismatch()) 3380 - enable_numa = true; 3313 + nr_cpus = numa_weight(cpu); 3314 + if (nr_cpus > 0) { 3315 + if (nr_cpus < num_online_cpus() && llc_numa_mismatch()) 3316 + enable_numa = true; 3317 + pr_debug("sched_ext: NUMA=%*pb weight=%u\n", 3318 + cpumask_pr_args(numa_span(cpu)), numa_weight(cpu)); 3319 + } 3381 3320 rcu_read_unlock(); 3382 3321 3383 3322 pr_debug("sched_ext: LLC idle selection %s\n", 3384 - enable_llc ? "enabled" : "disabled"); 3323 + str_enabled_disabled(enable_llc)); 3385 3324 pr_debug("sched_ext: NUMA idle selection %s\n", 3386 - enable_numa ? "enabled" : "disabled"); 3325 + str_enabled_disabled(enable_numa)); 3387 3326 3388 3327 if (enable_llc) 3389 3328 static_branch_enable_cpuslocked(&scx_selcpu_topo_llc); ··· 3417 3348 * 4. Pick a CPU within the same NUMA node, if enabled: 3418 3349 * - choose a CPU from the same NUMA node to reduce memory access latency. 3419 3350 * 3351 + * 5. Pick any idle CPU usable by the task. 3352 + * 3420 3353 * Step 3 and 4 are performed only if the system has, respectively, multiple 3421 3354 * LLC domains / multiple NUMA nodes (see scx_selcpu_topo_llc and 3422 3355 * scx_selcpu_topo_numa). ··· 3434 3363 s32 cpu; 3435 3364 3436 3365 *found = false; 3437 - 3438 3366 3439 3367 /* 3440 3368 * This is necessary to protect llc_cpus. ··· 3453 3383 */ 3454 3384 if (p->nr_cpus_allowed >= num_possible_cpus()) { 3455 3385 if (static_branch_maybe(CONFIG_NUMA, &scx_selcpu_topo_numa)) 3456 - numa_cpus = cpumask_of_node(cpu_to_node(prev_cpu)); 3386 + numa_cpus = numa_span(prev_cpu); 3457 3387 3458 - if (static_branch_maybe(CONFIG_SCHED_MC, &scx_selcpu_topo_llc)) { 3459 - struct sched_domain *sd; 3460 - 3461 - sd = rcu_dereference(per_cpu(sd_llc, prev_cpu)); 3462 - if (sd) 3463 - llc_cpus = sched_domain_span(sd); 3464 - } 3388 + if (static_branch_maybe(CONFIG_SCHED_MC, &scx_selcpu_topo_llc)) 3389 + llc_cpus = llc_span(prev_cpu); 3465 3390 } 3466 3391 3467 3392 /* ··· 3657 3592 3658 3593 static void update_builtin_idle(int cpu, bool idle) 3659 3594 { 3660 - if (idle) 3661 - cpumask_set_cpu(cpu, idle_masks.cpu); 3662 - else 3663 - cpumask_clear_cpu(cpu, idle_masks.cpu); 3595 + assign_cpu(cpu, idle_masks.cpu, idle); 3664 3596 3665 3597 #ifdef CONFIG_SCHED_SMT 3666 3598 if (sched_smt_active()) { ··· 3668 3606 * idle_masks.smt handling is racy but that's fine as 3669 3607 * it's only for optimization and self-correcting. 3670 3608 */ 3671 - for_each_cpu(cpu, smt) { 3672 - if (!cpumask_test_cpu(cpu, idle_masks.cpu)) 3673 - return; 3674 - } 3609 + if (!cpumask_subset(smt, idle_masks.cpu)) 3610 + return; 3675 3611 cpumask_or(idle_masks.smt, idle_masks.smt, smt); 3676 3612 } else { 3677 3613 cpumask_andnot(idle_masks.smt, idle_masks.smt, smt); ··· 4748 4688 4749 4689 /** 4750 4690 * scx_softlockup - sched_ext softlockup handler 4691 + * @dur_s: number of seconds of CPU stuck due to soft lockup 4751 4692 * 4752 4693 * On some multi-socket setups (e.g. 2x Intel 8480c), the BPF scheduler can 4753 4694 * live-lock the system by making many CPUs target the same DSQ to the point ··· 4792 4731 4793 4732 /** 4794 4733 * scx_ops_bypass - [Un]bypass scx_ops and guarantee forward progress 4734 + * @bypass: true for bypass, false for unbypass 4795 4735 * 4796 4736 * Bypassing guarantees that all runnable tasks make forward progress without 4797 4737 * trusting the BPF scheduler. We can't grab any mutexes or rwsems as they might ··· 4961 4899 struct task_struct *p; 4962 4900 struct rhashtable_iter rht_iter; 4963 4901 struct scx_dispatch_q *dsq; 4964 - int i, kind; 4902 + int i, kind, cpu; 4965 4903 4966 4904 kind = atomic_read(&scx_exit_kind); 4967 4905 while (true) { ··· 5043 4981 } 5044 4982 scx_task_iter_stop(&sti); 5045 4983 percpu_up_write(&scx_fork_rwsem); 4984 + 4985 + /* 4986 + * Invalidate all the rq clocks to prevent getting outdated 4987 + * rq clocks from a previous scx scheduler. 4988 + */ 4989 + for_each_possible_cpu(cpu) { 4990 + struct rq *rq = cpu_rq(cpu); 4991 + scx_rq_clock_invalidate(rq); 4992 + } 5046 4993 5047 4994 /* no task is on scx, turn off all the switches and flush in-progress calls */ 5048 4995 static_branch_disable(&__scx_ops_enabled); ··· 5277 5206 scx_get_task_state(p), p->scx.flags & ~SCX_TASK_STATE_MASK, 5278 5207 p->scx.dsq_flags, ops_state & SCX_OPSS_STATE_MASK, 5279 5208 ops_state >> SCX_OPSS_QSEQ_SHIFT); 5280 - dump_line(s, " sticky/holding_cpu=%d/%d dsq_id=%s dsq_vtime=%llu", 5209 + dump_line(s, " sticky/holding_cpu=%d/%d dsq_id=%s dsq_vtime=%llu slice=%llu", 5281 5210 p->scx.sticky_cpu, p->scx.holding_cpu, dsq_id_buf, 5282 - p->scx.dsq_vtime); 5211 + p->scx.dsq_vtime, p->scx.slice); 5283 5212 dump_line(s, " cpus=%*pb", cpumask_pr_args(p->cpus_ptr)); 5284 5213 5285 5214 if (SCX_HAS_OP(dump_task)) { ··· 6354 6283 6355 6284 __bpf_kfunc_start_defs(); 6356 6285 6286 + static bool check_builtin_idle_enabled(void) 6287 + { 6288 + if (static_branch_likely(&scx_builtin_idle_enabled)) 6289 + return true; 6290 + 6291 + scx_ops_error("built-in idle tracking is disabled"); 6292 + return false; 6293 + } 6294 + 6357 6295 /** 6358 6296 * scx_bpf_select_cpu_dfl - The default implementation of ops.select_cpu() 6359 6297 * @p: task_struct to select a CPU for ··· 6380 6300 __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, 6381 6301 u64 wake_flags, bool *is_idle) 6382 6302 { 6383 - if (!static_branch_likely(&scx_builtin_idle_enabled)) { 6384 - scx_ops_error("built-in idle tracking is disabled"); 6303 + if (!check_builtin_idle_enabled()) 6385 6304 goto prev_cpu; 6386 - } 6387 6305 6388 6306 if (!scx_kf_allowed(SCX_KF_SELECT_CPU)) 6389 6307 goto prev_cpu; ··· 6465 6387 * ops.select_cpu(), and ops.dispatch(). 6466 6388 * 6467 6389 * When called from ops.select_cpu() or ops.enqueue(), it's for direct dispatch 6468 - * and @p must match the task being enqueued. Also, %SCX_DSQ_LOCAL_ON can't be 6469 - * used to target the local DSQ of a CPU other than the enqueueing one. Use 6470 - * ops.select_cpu() to be on the target CPU in the first place. 6390 + * and @p must match the task being enqueued. 6471 6391 * 6472 6392 * When called from ops.select_cpu(), @enq_flags and @dsp_id are stored, and @p 6473 6393 * will be directly inserted into the corresponding dispatch queue after ··· 7304 7228 } 7305 7229 7306 7230 /** 7307 - * scx_bpf_dump - Generate extra debug dump specific to the BPF scheduler 7231 + * scx_bpf_dump_bstr - Generate extra debug dump specific to the BPF scheduler 7308 7232 * @fmt: format string 7309 7233 * @data: format string parameters packaged using ___bpf_fill() macro 7310 7234 * @data__sz: @data len, must end in '__sz' for the verifier ··· 7396 7320 * scx_bpf_cpuperf_set - Set the relative performance target of a CPU 7397 7321 * @cpu: CPU of interest 7398 7322 * @perf: target performance level [0, %SCX_CPUPERF_ONE] 7399 - * @flags: %SCX_CPUPERF_* flags 7400 7323 * 7401 7324 * Set the target performance level of @cpu to @perf. @perf is in linear 7402 7325 * relative scale between 0 and %SCX_CPUPERF_ONE. This determines how the ··· 7472 7397 */ 7473 7398 __bpf_kfunc const struct cpumask *scx_bpf_get_idle_cpumask(void) 7474 7399 { 7475 - if (!static_branch_likely(&scx_builtin_idle_enabled)) { 7476 - scx_ops_error("built-in idle tracking is disabled"); 7400 + if (!check_builtin_idle_enabled()) 7477 7401 return cpu_none_mask; 7478 - } 7479 7402 7480 7403 #ifdef CONFIG_SMP 7481 7404 return idle_masks.cpu; ··· 7491 7418 */ 7492 7419 __bpf_kfunc const struct cpumask *scx_bpf_get_idle_smtmask(void) 7493 7420 { 7494 - if (!static_branch_likely(&scx_builtin_idle_enabled)) { 7495 - scx_ops_error("built-in idle tracking is disabled"); 7421 + if (!check_builtin_idle_enabled()) 7496 7422 return cpu_none_mask; 7497 - } 7498 7423 7499 7424 #ifdef CONFIG_SMP 7500 7425 if (sched_smt_active()) ··· 7507 7436 /** 7508 7437 * scx_bpf_put_idle_cpumask - Release a previously acquired referenced kptr to 7509 7438 * either the percpu, or SMT idle-tracking cpumask. 7439 + * @idle_mask: &cpumask to use 7510 7440 */ 7511 7441 __bpf_kfunc void scx_bpf_put_idle_cpumask(const struct cpumask *idle_mask) 7512 7442 { ··· 7531 7459 */ 7532 7460 __bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) 7533 7461 { 7534 - if (!static_branch_likely(&scx_builtin_idle_enabled)) { 7535 - scx_ops_error("built-in idle tracking is disabled"); 7462 + if (!check_builtin_idle_enabled()) 7536 7463 return false; 7537 - } 7538 7464 7539 7465 if (ops_cpu_valid(cpu, NULL)) 7540 7466 return test_and_clear_cpu_idle(cpu); ··· 7562 7492 __bpf_kfunc s32 scx_bpf_pick_idle_cpu(const struct cpumask *cpus_allowed, 7563 7493 u64 flags) 7564 7494 { 7565 - if (!static_branch_likely(&scx_builtin_idle_enabled)) { 7566 - scx_ops_error("built-in idle tracking is disabled"); 7495 + if (!check_builtin_idle_enabled()) 7567 7496 return -EBUSY; 7568 - } 7569 7497 7570 7498 return scx_pick_idle_cpu(cpus_allowed, flags); 7571 7499 } ··· 7658 7590 } 7659 7591 #endif 7660 7592 7593 + /** 7594 + * scx_bpf_now - Returns a high-performance monotonically non-decreasing 7595 + * clock for the current CPU. The clock returned is in nanoseconds. 7596 + * 7597 + * It provides the following properties: 7598 + * 7599 + * 1) High performance: Many BPF schedulers call bpf_ktime_get_ns() frequently 7600 + * to account for execution time and track tasks' runtime properties. 7601 + * Unfortunately, in some hardware platforms, bpf_ktime_get_ns() -- which 7602 + * eventually reads a hardware timestamp counter -- is neither performant nor 7603 + * scalable. scx_bpf_now() aims to provide a high-performance clock by 7604 + * using the rq clock in the scheduler core whenever possible. 7605 + * 7606 + * 2) High enough resolution for the BPF scheduler use cases: In most BPF 7607 + * scheduler use cases, the required clock resolution is lower than the most 7608 + * accurate hardware clock (e.g., rdtsc in x86). scx_bpf_now() basically 7609 + * uses the rq clock in the scheduler core whenever it is valid. It considers 7610 + * that the rq clock is valid from the time the rq clock is updated 7611 + * (update_rq_clock) until the rq is unlocked (rq_unpin_lock). 7612 + * 7613 + * 3) Monotonically non-decreasing clock for the same CPU: scx_bpf_now() 7614 + * guarantees the clock never goes backward when comparing them in the same 7615 + * CPU. On the other hand, when comparing clocks in different CPUs, there 7616 + * is no such guarantee -- the clock can go backward. It provides a 7617 + * monotonically *non-decreasing* clock so that it would provide the same 7618 + * clock values in two different scx_bpf_now() calls in the same CPU 7619 + * during the same period of when the rq clock is valid. 7620 + */ 7621 + __bpf_kfunc u64 scx_bpf_now(void) 7622 + { 7623 + struct rq *rq; 7624 + u64 clock; 7625 + 7626 + preempt_disable(); 7627 + 7628 + rq = this_rq(); 7629 + if (smp_load_acquire(&rq->scx.flags) & SCX_RQ_CLK_VALID) { 7630 + /* 7631 + * If the rq clock is valid, use the cached rq clock. 7632 + * 7633 + * Note that scx_bpf_now() is re-entrant between a process 7634 + * context and an interrupt context (e.g., timer interrupt). 7635 + * However, we don't need to consider the race between them 7636 + * because such race is not observable from a caller. 7637 + */ 7638 + clock = READ_ONCE(rq->scx.clock); 7639 + } else { 7640 + /* 7641 + * Otherwise, return a fresh rq clock. 7642 + * 7643 + * The rq clock is updated outside of the rq lock. 7644 + * In this case, keep the updated rq clock invalid so the next 7645 + * kfunc call outside the rq lock gets a fresh rq clock. 7646 + */ 7647 + clock = sched_clock_cpu(cpu_of(rq)); 7648 + } 7649 + 7650 + preempt_enable(); 7651 + 7652 + return clock; 7653 + } 7654 + 7661 7655 __bpf_kfunc_end_defs(); 7662 7656 7663 7657 BTF_KFUNCS_START(scx_kfunc_ids_any) ··· 7751 7621 #ifdef CONFIG_CGROUP_SCHED 7752 7622 BTF_ID_FLAGS(func, scx_bpf_task_cgroup, KF_RCU | KF_ACQUIRE) 7753 7623 #endif 7624 + BTF_ID_FLAGS(func, scx_bpf_now) 7754 7625 BTF_KFUNCS_END(scx_kfunc_ids_any) 7755 7626 7756 7627 static const struct btf_kfunc_id_set scx_kfunc_set_any = {
+36 -15
kernel/sched/sched.h
··· 759 759 SCX_RQ_BAL_PENDING = 1 << 2, /* balance hasn't run yet */ 760 760 SCX_RQ_BAL_KEEP = 1 << 3, /* balance decided to keep current */ 761 761 SCX_RQ_BYPASSING = 1 << 4, 762 + SCX_RQ_CLK_VALID = 1 << 5, /* RQ clock is fresh and valid */ 762 763 763 764 SCX_RQ_IN_WAKEUP = 1 << 16, 764 765 SCX_RQ_IN_BALANCE = 1 << 17, ··· 772 771 unsigned long ops_qseq; 773 772 u64 extra_enq_flags; /* see move_task_to_local_dsq() */ 774 773 u32 nr_running; 775 - u32 flags; 776 774 u32 cpuperf_target; /* [0, SCHED_CAPACITY_SCALE] */ 777 775 bool cpu_released; 776 + u32 flags; 777 + u64 clock; /* current per-rq clock -- see scx_bpf_now() */ 778 778 cpumask_var_t cpus_to_kick; 779 779 cpumask_var_t cpus_to_kick_if_idle; 780 780 cpumask_var_t cpus_to_preempt; ··· 1724 1722 1725 1723 extern struct balance_callback balance_push_callback; 1726 1724 1725 + #ifdef CONFIG_SCHED_CLASS_EXT 1726 + extern const struct sched_class ext_sched_class; 1727 + 1728 + DECLARE_STATIC_KEY_FALSE(__scx_ops_enabled); /* SCX BPF scheduler loaded */ 1729 + DECLARE_STATIC_KEY_FALSE(__scx_switched_all); /* all fair class tasks on SCX */ 1730 + 1731 + #define scx_enabled() static_branch_unlikely(&__scx_ops_enabled) 1732 + #define scx_switched_all() static_branch_unlikely(&__scx_switched_all) 1733 + 1734 + static inline void scx_rq_clock_update(struct rq *rq, u64 clock) 1735 + { 1736 + if (!scx_enabled()) 1737 + return; 1738 + WRITE_ONCE(rq->scx.clock, clock); 1739 + smp_store_release(&rq->scx.flags, rq->scx.flags | SCX_RQ_CLK_VALID); 1740 + } 1741 + 1742 + static inline void scx_rq_clock_invalidate(struct rq *rq) 1743 + { 1744 + if (!scx_enabled()) 1745 + return; 1746 + WRITE_ONCE(rq->scx.flags, rq->scx.flags & ~SCX_RQ_CLK_VALID); 1747 + } 1748 + 1749 + #else /* !CONFIG_SCHED_CLASS_EXT */ 1750 + #define scx_enabled() false 1751 + #define scx_switched_all() false 1752 + 1753 + static inline void scx_rq_clock_update(struct rq *rq, u64 clock) {} 1754 + static inline void scx_rq_clock_invalidate(struct rq *rq) {} 1755 + #endif /* !CONFIG_SCHED_CLASS_EXT */ 1756 + 1727 1757 /* 1728 1758 * Lockdep annotation that avoids accidental unlocks; it's like a 1729 1759 * sticky/continuous lockdep_assert_held(). ··· 1785 1751 if (rq->clock_update_flags > RQCF_ACT_SKIP) 1786 1752 rf->clock_update_flags = RQCF_UPDATED; 1787 1753 #endif 1788 - 1754 + scx_rq_clock_invalidate(rq); 1789 1755 lockdep_unpin_lock(__rq_lockp(rq), rf->cookie); 1790 1756 } 1791 1757 ··· 2543 2509 extern const struct sched_class rt_sched_class; 2544 2510 extern const struct sched_class fair_sched_class; 2545 2511 extern const struct sched_class idle_sched_class; 2546 - 2547 - #ifdef CONFIG_SCHED_CLASS_EXT 2548 - extern const struct sched_class ext_sched_class; 2549 - 2550 - DECLARE_STATIC_KEY_FALSE(__scx_ops_enabled); /* SCX BPF scheduler loaded */ 2551 - DECLARE_STATIC_KEY_FALSE(__scx_switched_all); /* all fair class tasks on SCX */ 2552 - 2553 - #define scx_enabled() static_branch_unlikely(&__scx_ops_enabled) 2554 - #define scx_switched_all() static_branch_unlikely(&__scx_switched_all) 2555 - #else /* !CONFIG_SCHED_CLASS_EXT */ 2556 - #define scx_enabled() false 2557 - #define scx_switched_all() false 2558 - #endif /* !CONFIG_SCHED_CLASS_EXT */ 2559 2512 2560 2513 /* 2561 2514 * Iterate only active classes. SCX can take over all fair tasks or be
+176 -2
tools/sched_ext/include/scx/common.bpf.h
··· 9 9 10 10 #ifdef LSP 11 11 #define __bpf__ 12 - #include "../vmlinux/vmlinux.h" 12 + #include "../vmlinux.h" 13 13 #else 14 14 #include "vmlinux.h" 15 15 #endif ··· 23 23 #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ 24 24 #define PF_EXITING 0x00000004 25 25 #define CLOCK_MONOTONIC 1 26 + 27 + extern int LINUX_KERNEL_VERSION __kconfig; 28 + extern const char CONFIG_CC_VERSION_TEXT[64] __kconfig __weak; 29 + extern const char CONFIG_LOCALVERSION[64] __kconfig __weak; 26 30 27 31 /* 28 32 * Earlier versions of clang/pahole lost upper 32bits in 64bit enums which can ··· 76 72 s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym; 77 73 struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym; 78 74 struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak; 75 + u64 scx_bpf_now(void) __ksym __weak; 79 76 80 77 /* 81 78 * Use the following as @it__iter when calling scx_bpf_dsq_move[_vtime]() from ··· 103 98 _Pragma("GCC diagnostic push") \ 104 99 _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ 105 100 ___bpf_fill(___param, args); \ 106 - _Pragma("GCC diagnostic pop") \ 101 + _Pragma("GCC diagnostic pop") 107 102 108 103 /* 109 104 * scx_bpf_exit() wraps the scx_bpf_exit_bstr() kfunc with variadic arguments ··· 139 134 scx_bpf_bstr_preamble(fmt, args) \ 140 135 scx_bpf_dump_bstr(___fmt, ___param, sizeof(___param)); \ 141 136 ___scx_bpf_bstr_format_checker(fmt, ##args); \ 137 + }) 138 + 139 + /* 140 + * scx_bpf_dump_header() is a wrapper around scx_bpf_dump that adds a header 141 + * of system information for debugging. 142 + */ 143 + #define scx_bpf_dump_header() \ 144 + ({ \ 145 + scx_bpf_dump("kernel: %d.%d.%d %s\ncc: %s\n", \ 146 + LINUX_KERNEL_VERSION >> 16, \ 147 + LINUX_KERNEL_VERSION >> 8 & 0xFF, \ 148 + LINUX_KERNEL_VERSION & 0xFF, \ 149 + CONFIG_LOCALVERSION, \ 150 + CONFIG_CC_VERSION_TEXT); \ 142 151 }) 143 152 144 153 #define BPF_STRUCT_OPS(name, args...) \ ··· 336 317 const struct cpumask *src2) __ksym; 337 318 u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym; 338 319 320 + int bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_words) __ksym; 321 + int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym; 322 + void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym; 323 + 324 + #define def_iter_struct(name) \ 325 + struct bpf_iter_##name { \ 326 + struct bpf_iter_bits it; \ 327 + const struct cpumask *bitmap; \ 328 + }; 329 + 330 + #define def_iter_new(name) \ 331 + static inline int bpf_iter_##name##_new( \ 332 + struct bpf_iter_##name *it, const u64 *unsafe_ptr__ign, u32 nr_words) \ 333 + { \ 334 + it->bitmap = scx_bpf_get_##name##_cpumask(); \ 335 + return bpf_iter_bits_new(&it->it, (const u64 *)it->bitmap, \ 336 + sizeof(struct cpumask) / 8); \ 337 + } 338 + 339 + #define def_iter_next(name) \ 340 + static inline int *bpf_iter_##name##_next(struct bpf_iter_##name *it) { \ 341 + return bpf_iter_bits_next(&it->it); \ 342 + } 343 + 344 + #define def_iter_destroy(name) \ 345 + static inline void bpf_iter_##name##_destroy(struct bpf_iter_##name *it) { \ 346 + scx_bpf_put_cpumask(it->bitmap); \ 347 + bpf_iter_bits_destroy(&it->it); \ 348 + } 349 + #define def_for_each_cpu(cpu, name) for_each_##name##_cpu(cpu) 350 + 351 + /// Provides iterator for possible and online cpus. 352 + /// 353 + /// # Example 354 + /// 355 + /// ``` 356 + /// static inline void example_use() { 357 + /// int *cpu; 358 + /// 359 + /// for_each_possible_cpu(cpu){ 360 + /// bpf_printk("CPU %d is possible", *cpu); 361 + /// } 362 + /// 363 + /// for_each_online_cpu(cpu){ 364 + /// bpf_printk("CPU %d is online", *cpu); 365 + /// } 366 + /// } 367 + /// ``` 368 + def_iter_struct(possible); 369 + def_iter_new(possible); 370 + def_iter_next(possible); 371 + def_iter_destroy(possible); 372 + #define for_each_possible_cpu(cpu) bpf_for_each(possible, cpu, NULL, 0) 373 + 374 + def_iter_struct(online); 375 + def_iter_new(online); 376 + def_iter_next(online); 377 + def_iter_destroy(online); 378 + #define for_each_online_cpu(cpu) bpf_for_each(online, cpu, NULL, 0) 379 + 339 380 /* 340 381 * Access a cpumask in read-only mode (typically to check bits). 341 382 */ ··· 407 328 /* rcu */ 408 329 void bpf_rcu_read_lock(void) __ksym; 409 330 void bpf_rcu_read_unlock(void) __ksym; 331 + 332 + /* 333 + * Time helpers, most of which are from jiffies.h. 334 + */ 335 + 336 + /** 337 + * time_delta - Calculate the delta between new and old time stamp 338 + * @after: first comparable as u64 339 + * @before: second comparable as u64 340 + * 341 + * Return: the time difference, which is >= 0 342 + */ 343 + static inline s64 time_delta(u64 after, u64 before) 344 + { 345 + return (s64)(after - before) > 0 ? : 0; 346 + } 347 + 348 + /** 349 + * time_after - returns true if the time a is after time b. 350 + * @a: first comparable as u64 351 + * @b: second comparable as u64 352 + * 353 + * Do this with "<0" and ">=0" to only test the sign of the result. A 354 + * good compiler would generate better code (and a really good compiler 355 + * wouldn't care). Gcc is currently neither. 356 + * 357 + * Return: %true is time a is after time b, otherwise %false. 358 + */ 359 + static inline bool time_after(u64 a, u64 b) 360 + { 361 + return (s64)(b - a) < 0; 362 + } 363 + 364 + /** 365 + * time_before - returns true if the time a is before time b. 366 + * @a: first comparable as u64 367 + * @b: second comparable as u64 368 + * 369 + * Return: %true is time a is before time b, otherwise %false. 370 + */ 371 + static inline bool time_before(u64 a, u64 b) 372 + { 373 + return time_after(b, a); 374 + } 375 + 376 + /** 377 + * time_after_eq - returns true if the time a is after or the same as time b. 378 + * @a: first comparable as u64 379 + * @b: second comparable as u64 380 + * 381 + * Return: %true is time a is after or the same as time b, otherwise %false. 382 + */ 383 + static inline bool time_after_eq(u64 a, u64 b) 384 + { 385 + return (s64)(a - b) >= 0; 386 + } 387 + 388 + /** 389 + * time_before_eq - returns true if the time a is before or the same as time b. 390 + * @a: first comparable as u64 391 + * @b: second comparable as u64 392 + * 393 + * Return: %true is time a is before or the same as time b, otherwise %false. 394 + */ 395 + static inline bool time_before_eq(u64 a, u64 b) 396 + { 397 + return time_after_eq(b, a); 398 + } 399 + 400 + /** 401 + * time_in_range - Calculate whether a is in the range of [b, c]. 402 + * @a: time to test 403 + * @b: beginning of the range 404 + * @c: end of the range 405 + * 406 + * Return: %true is time a is in the range [b, c], otherwise %false. 407 + */ 408 + static inline bool time_in_range(u64 a, u64 b, u64 c) 409 + { 410 + return time_after_eq(a, b) && time_before_eq(a, c); 411 + } 412 + 413 + /** 414 + * time_in_range_open - Calculate whether a is in the range of [b, c). 415 + * @a: time to test 416 + * @b: beginning of the range 417 + * @c: end of the range 418 + * 419 + * Return: %true is time a is in the range [b, c), otherwise %false. 420 + */ 421 + static inline bool time_in_range_open(u64 a, u64 b, u64 c) 422 + { 423 + return time_after_eq(a, b) && time_before(a, c); 424 + } 410 425 411 426 412 427 /* ··· 596 423 } 597 424 598 425 #include "compat.bpf.h" 426 + #include "enums.bpf.h" 599 427 600 428 #endif /* __SCX_COMMON_BPF_H */
+6
tools/sched_ext/include/scx/common.h
··· 71 71 72 72 #include "user_exit_info.h" 73 73 #include "compat.h" 74 + #include "enums.h" 75 + 76 + /* not available when building kernel tools/sched_ext */ 77 + #if __has_include(<lib/sdt_task.h>) 78 + #include <lib/sdt_task.h> 79 + #endif 74 80 75 81 #endif /* __SCHED_EXT_COMMON_H */
+5
tools/sched_ext/include/scx/compat.bpf.h
··· 125 125 false; \ 126 126 }) 127 127 128 + #define scx_bpf_now() \ 129 + (bpf_ksym_exists(scx_bpf_now) ? \ 130 + scx_bpf_now() : \ 131 + bpf_ktime_get_ns()) 132 + 128 133 /* 129 134 * Define sched_ext_ops. This may be expanded to define multiple variants for 130 135 * backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().
+1
tools/sched_ext/include/scx/compat.h
··· 149 149 __skel = __scx_name##__open(); \ 150 150 SCX_BUG_ON(!__skel, "Could not open " #__scx_name); \ 151 151 __skel->struct_ops.__ops_name->hotplug_seq = scx_hotplug_seq(); \ 152 + SCX_ENUM_INIT(__skel); \ 152 153 __skel; \ 153 154 }) 154 155
+105
tools/sched_ext/include/scx/enums.autogen.bpf.h
··· 1 + /* 2 + * WARNING: This file is autogenerated from scripts/gen_enums.py. If you would 3 + * like to access an enum that is currently missing, add it to the script 4 + * and run it from the root directory to update this file. 5 + */ 6 + 7 + const volatile u64 __SCX_OPS_NAME_LEN __weak; 8 + #define SCX_OPS_NAME_LEN __SCX_OPS_NAME_LEN 9 + 10 + const volatile u64 __SCX_SLICE_DFL __weak; 11 + #define SCX_SLICE_DFL __SCX_SLICE_DFL 12 + 13 + const volatile u64 __SCX_SLICE_INF __weak; 14 + #define SCX_SLICE_INF __SCX_SLICE_INF 15 + 16 + const volatile u64 __SCX_DSQ_FLAG_BUILTIN __weak; 17 + #define SCX_DSQ_FLAG_BUILTIN __SCX_DSQ_FLAG_BUILTIN 18 + 19 + const volatile u64 __SCX_DSQ_FLAG_LOCAL_ON __weak; 20 + #define SCX_DSQ_FLAG_LOCAL_ON __SCX_DSQ_FLAG_LOCAL_ON 21 + 22 + const volatile u64 __SCX_DSQ_INVALID __weak; 23 + #define SCX_DSQ_INVALID __SCX_DSQ_INVALID 24 + 25 + const volatile u64 __SCX_DSQ_GLOBAL __weak; 26 + #define SCX_DSQ_GLOBAL __SCX_DSQ_GLOBAL 27 + 28 + const volatile u64 __SCX_DSQ_LOCAL __weak; 29 + #define SCX_DSQ_LOCAL __SCX_DSQ_LOCAL 30 + 31 + const volatile u64 __SCX_DSQ_LOCAL_ON __weak; 32 + #define SCX_DSQ_LOCAL_ON __SCX_DSQ_LOCAL_ON 33 + 34 + const volatile u64 __SCX_DSQ_LOCAL_CPU_MASK __weak; 35 + #define SCX_DSQ_LOCAL_CPU_MASK __SCX_DSQ_LOCAL_CPU_MASK 36 + 37 + const volatile u64 __SCX_TASK_QUEUED __weak; 38 + #define SCX_TASK_QUEUED __SCX_TASK_QUEUED 39 + 40 + const volatile u64 __SCX_TASK_RESET_RUNNABLE_AT __weak; 41 + #define SCX_TASK_RESET_RUNNABLE_AT __SCX_TASK_RESET_RUNNABLE_AT 42 + 43 + const volatile u64 __SCX_TASK_DEQD_FOR_SLEEP __weak; 44 + #define SCX_TASK_DEQD_FOR_SLEEP __SCX_TASK_DEQD_FOR_SLEEP 45 + 46 + const volatile u64 __SCX_TASK_STATE_SHIFT __weak; 47 + #define SCX_TASK_STATE_SHIFT __SCX_TASK_STATE_SHIFT 48 + 49 + const volatile u64 __SCX_TASK_STATE_BITS __weak; 50 + #define SCX_TASK_STATE_BITS __SCX_TASK_STATE_BITS 51 + 52 + const volatile u64 __SCX_TASK_STATE_MASK __weak; 53 + #define SCX_TASK_STATE_MASK __SCX_TASK_STATE_MASK 54 + 55 + const volatile u64 __SCX_TASK_CURSOR __weak; 56 + #define SCX_TASK_CURSOR __SCX_TASK_CURSOR 57 + 58 + const volatile u64 __SCX_TASK_NONE __weak; 59 + #define SCX_TASK_NONE __SCX_TASK_NONE 60 + 61 + const volatile u64 __SCX_TASK_INIT __weak; 62 + #define SCX_TASK_INIT __SCX_TASK_INIT 63 + 64 + const volatile u64 __SCX_TASK_READY __weak; 65 + #define SCX_TASK_READY __SCX_TASK_READY 66 + 67 + const volatile u64 __SCX_TASK_ENABLED __weak; 68 + #define SCX_TASK_ENABLED __SCX_TASK_ENABLED 69 + 70 + const volatile u64 __SCX_TASK_NR_STATES __weak; 71 + #define SCX_TASK_NR_STATES __SCX_TASK_NR_STATES 72 + 73 + const volatile u64 __SCX_TASK_DSQ_ON_PRIQ __weak; 74 + #define SCX_TASK_DSQ_ON_PRIQ __SCX_TASK_DSQ_ON_PRIQ 75 + 76 + const volatile u64 __SCX_KICK_IDLE __weak; 77 + #define SCX_KICK_IDLE __SCX_KICK_IDLE 78 + 79 + const volatile u64 __SCX_KICK_PREEMPT __weak; 80 + #define SCX_KICK_PREEMPT __SCX_KICK_PREEMPT 81 + 82 + const volatile u64 __SCX_KICK_WAIT __weak; 83 + #define SCX_KICK_WAIT __SCX_KICK_WAIT 84 + 85 + const volatile u64 __SCX_ENQ_WAKEUP __weak; 86 + #define SCX_ENQ_WAKEUP __SCX_ENQ_WAKEUP 87 + 88 + const volatile u64 __SCX_ENQ_HEAD __weak; 89 + #define SCX_ENQ_HEAD __SCX_ENQ_HEAD 90 + 91 + const volatile u64 __SCX_ENQ_PREEMPT __weak; 92 + #define SCX_ENQ_PREEMPT __SCX_ENQ_PREEMPT 93 + 94 + const volatile u64 __SCX_ENQ_REENQ __weak; 95 + #define SCX_ENQ_REENQ __SCX_ENQ_REENQ 96 + 97 + const volatile u64 __SCX_ENQ_LAST __weak; 98 + #define SCX_ENQ_LAST __SCX_ENQ_LAST 99 + 100 + const volatile u64 __SCX_ENQ_CLEAR_OPSS __weak; 101 + #define SCX_ENQ_CLEAR_OPSS __SCX_ENQ_CLEAR_OPSS 102 + 103 + const volatile u64 __SCX_ENQ_DSQ_PRIQ __weak; 104 + #define SCX_ENQ_DSQ_PRIQ __SCX_ENQ_DSQ_PRIQ 105 +
+41
tools/sched_ext/include/scx/enums.autogen.h
··· 1 + /* 2 + * WARNING: This file is autogenerated from scripts/gen_enums.py. If you would 3 + * like to access an enum that is currently missing, add it to the script 4 + * and run it from the root directory to update this file. 5 + */ 6 + 7 + #define SCX_ENUM_INIT(skel) do { \ 8 + SCX_ENUM_SET(skel, scx_public_consts, SCX_OPS_NAME_LEN); \ 9 + SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_DFL); \ 10 + SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_INF); \ 11 + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_BUILTIN); \ 12 + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_LOCAL_ON); \ 13 + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_INVALID); \ 14 + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_GLOBAL); \ 15 + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL); \ 16 + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL_ON); \ 17 + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL_CPU_MASK); \ 18 + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_QUEUED); \ 19 + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_RESET_RUNNABLE_AT); \ 20 + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_DEQD_FOR_SLEEP); \ 21 + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_SHIFT); \ 22 + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_BITS); \ 23 + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_MASK); \ 24 + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_CURSOR); \ 25 + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_NONE); \ 26 + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_INIT); \ 27 + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_READY); \ 28 + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_ENABLED); \ 29 + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_NR_STATES); \ 30 + SCX_ENUM_SET(skel, scx_ent_dsq_flags, SCX_TASK_DSQ_ON_PRIQ); \ 31 + SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_IDLE); \ 32 + SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_PREEMPT); \ 33 + SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_WAIT); \ 34 + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_WAKEUP); \ 35 + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_HEAD); \ 36 + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_PREEMPT); \ 37 + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_REENQ); \ 38 + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_LAST); \ 39 + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_CLEAR_OPSS); \ 40 + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_DSQ_PRIQ); \ 41 + } while (0)
+12
tools/sched_ext/include/scx/enums.bpf.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Convenience macros for getting/setting struct scx_enums instances. 4 + * 5 + * Copyright (c) 2024 Meta Platforms, Inc. and affiliates. 6 + */ 7 + #ifndef __SCX_ENUMS_BPF_H 8 + #define __SCX_ENUMS_BPF_H 9 + 10 + #include "enums.autogen.bpf.h" 11 + 12 + #endif /* __SCX_ENUMS_BPF_H */
+27
tools/sched_ext/include/scx/enums.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Define struct scx_enums that stores the load-time values of enums 4 + * used by the BPF program. 5 + * 6 + * Copyright (c) 2024 Meta Platforms, Inc. and affiliates. 7 + */ 8 + 9 + #ifndef __SCX_ENUMS_H 10 + #define __SCX_ENUMS_H 11 + 12 + static inline void __ENUM_set(u64 *val, char *type, char *name) 13 + { 14 + bool res; 15 + 16 + res = __COMPAT_read_enum(type, name, val); 17 + SCX_BUG_ON(!res, "enum not found(%s)", name); 18 + } 19 + 20 + #define SCX_ENUM_SET(skel, type, name) do { \ 21 + __ENUM_set(&skel->rodata->__##name, #type, #name); \ 22 + } while (0) 23 + 24 + 25 + #include "enums.autogen.h" 26 + 27 + #endif /* __SCX_ENUMS_H */
+6 -3
tools/sched_ext/include/scx/user_exit_info.h
··· 10 10 #ifndef __USER_EXIT_INFO_H 11 11 #define __USER_EXIT_INFO_H 12 12 13 + #ifdef LSP 14 + #define __bpf__ 15 + #include "../vmlinux.h" 16 + #endif 17 + 13 18 enum uei_sizes { 14 19 UEI_REASON_LEN = 128, 15 20 UEI_MSG_LEN = 1024, ··· 30 25 31 26 #ifdef __bpf__ 32 27 33 - #ifdef LSP 34 - #include "../vmlinux/vmlinux.h" 35 - #else 28 + #ifndef LSP 36 29 #include "vmlinux.h" 37 30 #endif 38 31 #include <bpf/bpf_core_read.h>
+4 -9
tools/sched_ext/scx_central.bpf.c
··· 57 57 58 58 const volatile s32 central_cpu; 59 59 const volatile u32 nr_cpu_ids = 1; /* !0 for veristat, set during init */ 60 - const volatile u64 slice_ns = SCX_SLICE_DFL; 60 + const volatile u64 slice_ns; 61 61 62 62 bool timer_pinned = true; 63 63 u64 nr_total, nr_locals, nr_queued, nr_lost_pids; ··· 86 86 __type(key, u32); 87 87 __type(value, struct central_timer); 88 88 } central_timer SEC(".maps"); 89 - 90 - static bool vtime_before(u64 a, u64 b) 91 - { 92 - return (s64)(a - b) < 0; 93 - } 94 89 95 90 s32 BPF_STRUCT_OPS(central_select_cpu, struct task_struct *p, 96 91 s32 prev_cpu, u64 wake_flags) ··· 240 245 s32 cpu = scx_bpf_task_cpu(p); 241 246 u64 *started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids); 242 247 if (started_at) 243 - *started_at = bpf_ktime_get_ns() ?: 1; /* 0 indicates idle */ 248 + *started_at = scx_bpf_now() ?: 1; /* 0 indicates idle */ 244 249 } 245 250 246 251 void BPF_STRUCT_OPS(central_stopping, struct task_struct *p, bool runnable) ··· 253 258 254 259 static int central_timerfn(void *map, int *key, struct bpf_timer *timer) 255 260 { 256 - u64 now = bpf_ktime_get_ns(); 261 + u64 now = scx_bpf_now(); 257 262 u64 nr_to_kick = nr_queued; 258 263 s32 i, curr_cpu; 259 264 ··· 274 279 /* kick iff the current one exhausted its slice */ 275 280 started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids); 276 281 if (started_at && *started_at && 277 - vtime_before(now, *started_at + slice_ns)) 282 + time_before(now, *started_at + slice_ns)) 278 283 continue; 279 284 280 285 /* and there's something pending */
+1
tools/sched_ext/scx_central.c
··· 58 58 59 59 skel->rodata->central_cpu = 0; 60 60 skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus(); 61 + skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL"); 61 62 62 63 while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) { 63 64 switch (opt) {
+10 -15
tools/sched_ext/scx_flatcg.bpf.c
··· 57 57 char _license[] SEC("license") = "GPL"; 58 58 59 59 const volatile u32 nr_cpus = 32; /* !0 for veristat, set during init */ 60 - const volatile u64 cgrp_slice_ns = SCX_SLICE_DFL; 60 + const volatile u64 cgrp_slice_ns; 61 61 const volatile bool fifo_sched; 62 62 63 63 u64 cvtime_now; ··· 135 135 static u64 div_round_up(u64 dividend, u64 divisor) 136 136 { 137 137 return (dividend + divisor - 1) / divisor; 138 - } 139 - 140 - static bool vtime_before(u64 a, u64 b) 141 - { 142 - return (s64)(a - b) < 0; 143 138 } 144 139 145 140 static bool cgv_node_less(struct bpf_rb_node *a, const struct bpf_rb_node *b) ··· 266 271 */ 267 272 max_budget = (cgrp_slice_ns * nr_cpus * cgc->hweight) / 268 273 (2 * FCG_HWEIGHT_ONE); 269 - if (vtime_before(cvtime, cvtime_now - max_budget)) 274 + if (time_before(cvtime, cvtime_now - max_budget)) 270 275 cvtime = cvtime_now - max_budget; 271 276 272 277 cgv_node->cvtime = cvtime; ··· 396 401 * Limit the amount of budget that an idling task can accumulate 397 402 * to one slice. 398 403 */ 399 - if (vtime_before(tvtime, cgc->tvtime_now - SCX_SLICE_DFL)) 404 + if (time_before(tvtime, cgc->tvtime_now - SCX_SLICE_DFL)) 400 405 tvtime = cgc->tvtime_now - SCX_SLICE_DFL; 401 406 402 407 scx_bpf_dsq_insert_vtime(p, cgrp->kn->id, SCX_SLICE_DFL, ··· 530 535 * from multiple CPUs and thus racy. Any error should be 531 536 * contained and temporary. Let's just live with it. 532 537 */ 533 - if (vtime_before(cgc->tvtime_now, p->scx.dsq_vtime)) 538 + if (time_before(cgc->tvtime_now, p->scx.dsq_vtime)) 534 539 cgc->tvtime_now = p->scx.dsq_vtime; 535 540 } 536 541 bpf_cgroup_release(cgrp); ··· 640 645 cgv_node = container_of(rb_node, struct cgv_node, rb_node); 641 646 cgid = cgv_node->cgid; 642 647 643 - if (vtime_before(cvtime_now, cgv_node->cvtime)) 648 + if (time_before(cvtime_now, cgv_node->cvtime)) 644 649 cvtime_now = cgv_node->cvtime; 645 650 646 651 /* ··· 729 734 struct fcg_cpu_ctx *cpuc; 730 735 struct fcg_cgrp_ctx *cgc; 731 736 struct cgroup *cgrp; 732 - u64 now = bpf_ktime_get_ns(); 737 + u64 now = scx_bpf_now(); 733 738 bool picked_next = false; 734 739 735 740 cpuc = find_cpu_ctx(); ··· 739 744 if (!cpuc->cur_cgid) 740 745 goto pick_next_cgroup; 741 746 742 - if (vtime_before(now, cpuc->cur_at + cgrp_slice_ns)) { 747 + if (time_before(now, cpuc->cur_at + cgrp_slice_ns)) { 743 748 if (scx_bpf_dsq_move_to_local(cpuc->cur_cgid)) { 744 749 stat_inc(FCG_STAT_CNS_KEEP); 745 750 return; ··· 915 920 struct cgroup *from, struct cgroup *to) 916 921 { 917 922 struct fcg_cgrp_ctx *from_cgc, *to_cgc; 918 - s64 vtime_delta; 923 + s64 delta; 919 924 920 925 /* find_cgrp_ctx() triggers scx_ops_error() on lookup failures */ 921 926 if (!(from_cgc = find_cgrp_ctx(from)) || !(to_cgc = find_cgrp_ctx(to))) 922 927 return; 923 928 924 - vtime_delta = p->scx.dsq_vtime - from_cgc->tvtime_now; 925 - p->scx.dsq_vtime = to_cgc->tvtime_now + vtime_delta; 929 + delta = time_delta(p->scx.dsq_vtime, from_cgc->tvtime_now); 930 + p->scx.dsq_vtime = to_cgc->tvtime_now + delta; 926 931 } 927 932 928 933 s32 BPF_STRUCT_OPS_SLEEPABLE(fcg_init)
+1
tools/sched_ext/scx_flatcg.c
··· 137 137 skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg); 138 138 139 139 skel->rodata->nr_cpus = libbpf_num_possible_cpus(); 140 + skel->rodata->cgrp_slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL"); 140 141 141 142 while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) { 142 143 double v;
+1 -1
tools/sched_ext/scx_qmap.bpf.c
··· 33 33 34 34 char _license[] SEC("license") = "GPL"; 35 35 36 - const volatile u64 slice_ns = SCX_SLICE_DFL; 36 + const volatile u64 slice_ns; 37 37 const volatile u32 stall_user_nth; 38 38 const volatile u32 stall_kernel_nth; 39 39 const volatile u32 dsp_inf_loop_after;
+2
tools/sched_ext/scx_qmap.c
··· 64 64 65 65 skel = SCX_OPS_OPEN(qmap_ops, scx_qmap); 66 66 67 + skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL"); 68 + 67 69 while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PHd:D:Spvh")) != -1) { 68 70 switch (opt) { 69 71 case 's':
+2 -7
tools/sched_ext/scx_simple.bpf.c
··· 52 52 (*cnt_p)++; 53 53 } 54 54 55 - static inline bool vtime_before(u64 a, u64 b) 56 - { 57 - return (s64)(a - b) < 0; 58 - } 59 - 60 55 s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) 61 56 { 62 57 bool is_idle = false; ··· 79 84 * Limit the amount of budget that an idling task can accumulate 80 85 * to one slice. 81 86 */ 82 - if (vtime_before(vtime, vtime_now - SCX_SLICE_DFL)) 87 + if (time_before(vtime, vtime_now - SCX_SLICE_DFL)) 83 88 vtime = vtime_now - SCX_SLICE_DFL; 84 89 85 90 scx_bpf_dsq_insert_vtime(p, SHARED_DSQ, SCX_SLICE_DFL, vtime, ··· 103 108 * thus racy. Any error should be contained and temporary. Let's just 104 109 * live with it. 105 110 */ 106 - if (vtime_before(vtime_now, p->scx.dsq_vtime)) 111 + if (time_before(vtime_now, p->scx.dsq_vtime)) 107 112 vtime_now = p->scx.dsq_vtime; 108 113 } 109 114
+13 -2
tools/testing/selftests/sched_ext/runner.c
··· 22 22 "\n" 23 23 " -t TEST Only run tests whose name includes this string\n" 24 24 " -s Include print output for skipped tests\n" 25 + " -l List all available tests\n" 25 26 " -q Don't print the test descriptions during run\n" 26 27 " -h Display this help and exit\n"; 27 28 28 29 static volatile int exit_req; 29 - static bool quiet, print_skipped; 30 + static bool quiet, print_skipped, list; 30 31 31 32 #define MAX_SCX_TESTS 2048 32 33 ··· 134 133 135 134 libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 136 135 137 - while ((opt = getopt(argc, argv, "qst:h")) != -1) { 136 + while ((opt = getopt(argc, argv, "qslt:h")) != -1) { 138 137 switch (opt) { 139 138 case 'q': 140 139 quiet = true; 141 140 break; 142 141 case 's': 143 142 print_skipped = true; 143 + break; 144 + case 'l': 145 + list = true; 144 146 break; 145 147 case 't': 146 148 filter = optarg; ··· 157 153 for (i = 0; i < __scx_num_tests; i++) { 158 154 enum scx_test_status status; 159 155 struct scx_test *test = &__scx_tests[i]; 156 + 157 + if (list) { 158 + printf("%s\n", test->name); 159 + if (i == (__scx_num_tests - 1)) 160 + return 0; 161 + continue; 162 + } 160 163 161 164 if (filter && should_skip_test(test, filter)) { 162 165 /*