Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sched/core: Rework sched_class::wakeup_preempt() and rq_modified_*()

Change sched_class::wakeup_preempt() to also get called for
cross-class wakeups, specifically those where the woken task
is of a higher class than the previous highest class.

In order to do this, track the current highest class of the runqueue
in rq::next_class and have wakeup_preempt() track this upwards for
each new wakeup. Additionally have schedule() re-set the value on
pick.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://patch.msgid.link/20251127154725.901391274@infradead.org

+54 -60
+23 -9
kernel/sched/core.c
··· 2090 2090 */ 2091 2091 uclamp_rq_inc(rq, p, flags); 2092 2092 2093 - rq->queue_mask |= p->sched_class->queue_mask; 2094 2093 p->sched_class->enqueue_task(rq, p, flags); 2095 2094 2096 2095 psi_enqueue(p, flags); ··· 2122 2123 * and mark the task ->sched_delayed. 2123 2124 */ 2124 2125 uclamp_rq_dec(rq, p); 2125 - rq->queue_mask |= p->sched_class->queue_mask; 2126 2126 return p->sched_class->dequeue_task(rq, p, flags); 2127 2127 } 2128 2128 ··· 2172 2174 { 2173 2175 struct task_struct *donor = rq->donor; 2174 2176 2175 - if (p->sched_class == donor->sched_class) 2176 - donor->sched_class->wakeup_preempt(rq, p, flags); 2177 - else if (sched_class_above(p->sched_class, donor->sched_class)) 2177 + if (p->sched_class == rq->next_class) { 2178 + rq->next_class->wakeup_preempt(rq, p, flags); 2179 + 2180 + } else if (sched_class_above(p->sched_class, rq->next_class)) { 2181 + rq->next_class->wakeup_preempt(rq, p, flags); 2178 2182 resched_curr(rq); 2183 + rq->next_class = p->sched_class; 2184 + } 2179 2185 2180 2186 /* 2181 2187 * A queue event has occurred, and we're going to schedule. In ··· 6806 6804 pick_again: 6807 6805 next = pick_next_task(rq, rq->donor, &rf); 6808 6806 rq_set_donor(rq, next); 6807 + rq->next_class = next->sched_class; 6809 6808 if (unlikely(task_is_blocked(next))) { 6810 6809 next = find_proxy_task(rq, next, &rf); 6811 6810 if (!next) ··· 8653 8650 rq->rt.rt_runtime = global_rt_runtime(); 8654 8651 init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); 8655 8652 #endif 8653 + rq->next_class = &idle_sched_class; 8654 + 8656 8655 rq->sd = NULL; 8657 8656 rq->rd = NULL; 8658 8657 rq->cpu_capacity = SCHED_CAPACITY_SCALE; ··· 10780 10775 flags |= DEQUEUE_NOCLOCK; 10781 10776 } 10782 10777 10783 - if (flags & DEQUEUE_CLASS) { 10784 - if (p->sched_class->switching_from) 10785 - p->sched_class->switching_from(rq, p); 10786 - } 10778 + if ((flags & DEQUEUE_CLASS) && p->sched_class->switching_from) 10779 + p->sched_class->switching_from(rq, p); 10787 10780 10788 10781 *ctx = (struct sched_change_ctx){ 10789 10782 .p = p, ··· 10832 10829 if (ctx->flags & ENQUEUE_CLASS) { 10833 10830 if (p->sched_class->switched_to) 10834 10831 p->sched_class->switched_to(rq, p); 10832 + 10833 + /* 10834 + * If this was a class promotion; let the old class know it 10835 + * got preempted. Note that none of the switch*_from() methods 10836 + * know the new class and none of the switch*_to() methods 10837 + * know the old class. 10838 + */ 10839 + if (ctx->running && sched_class_above(p->sched_class, ctx->class)) { 10840 + rq->next_class->wakeup_preempt(rq, p, 0); 10841 + rq->next_class = p->sched_class; 10842 + } 10835 10843 10836 10844 /* 10837 10845 * If this was a degradation in class someone should have set
+9 -5
kernel/sched/deadline.c
··· 2499 2499 * Only called when both the current and waking task are -deadline 2500 2500 * tasks. 2501 2501 */ 2502 - static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p, 2503 - int flags) 2502 + static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p, int flags) 2504 2503 { 2504 + /* 2505 + * Can only get preempted by stop-class, and those should be 2506 + * few and short lived, doesn't really make sense to push 2507 + * anything away for that. 2508 + */ 2509 + if (p->sched_class != &dl_sched_class) 2510 + return; 2511 + 2505 2512 if (dl_entity_preempt(&p->dl, &rq->donor->dl)) { 2506 2513 resched_curr(rq); 2507 2514 return; ··· 3353 3346 #endif 3354 3347 3355 3348 DEFINE_SCHED_CLASS(dl) = { 3356 - 3357 - .queue_mask = 8, 3358 - 3359 3349 .enqueue_task = enqueue_task_dl, 3360 3350 .dequeue_task = dequeue_task_dl, 3361 3351 .yield_task = yield_task_dl,
+4 -5
kernel/sched/ext.c
··· 2431 2431 /* see kick_cpus_irq_workfn() */ 2432 2432 smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1); 2433 2433 2434 - rq_modified_clear(rq); 2434 + rq->next_class = &ext_sched_class; 2435 2435 2436 2436 rq_unpin_lock(rq, rf); 2437 2437 balance_one(rq, prev); ··· 2446 2446 * If @force_scx is true, always try to pick a SCHED_EXT task, 2447 2447 * regardless of any higher-priority sched classes activity. 2448 2448 */ 2449 - if (!force_scx && rq_modified_above(rq, &ext_sched_class)) 2449 + if (!force_scx && sched_class_above(rq->next_class, &ext_sched_class)) 2450 2450 return RETRY_TASK; 2451 2451 2452 2452 keep_prev = rq->scx.flags & SCX_RQ_BAL_KEEP; ··· 3075 3075 scx_disable_task(p); 3076 3076 } 3077 3077 3078 - static void wakeup_preempt_scx(struct rq *rq, struct task_struct *p,int wake_flags) {} 3078 + static void wakeup_preempt_scx(struct rq *rq, struct task_struct *p, int wake_flags) {} 3079 + 3079 3080 static void switched_to_scx(struct rq *rq, struct task_struct *p) {} 3080 3081 3081 3082 int scx_check_setscheduler(struct task_struct *p, int policy) ··· 3337 3336 * their current sched_class. Call them directly from sched core instead. 3338 3337 */ 3339 3338 DEFINE_SCHED_CLASS(ext) = { 3340 - .queue_mask = 1, 3341 - 3342 3339 .enqueue_task = enqueue_task_scx, 3343 3340 .dequeue_task = dequeue_task_scx, 3344 3341 .yield_task = yield_task_scx,
+10 -7
kernel/sched/fair.c
··· 8736 8736 /* 8737 8737 * Preempt the current task with a newly woken task if needed: 8738 8738 */ 8739 - static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int wake_flags) 8739 + static void wakeup_preempt_fair(struct rq *rq, struct task_struct *p, int wake_flags) 8740 8740 { 8741 8741 enum preempt_wakeup_action preempt_action = PREEMPT_WAKEUP_PICK; 8742 8742 struct task_struct *donor = rq->donor; 8743 8743 struct sched_entity *se = &donor->se, *pse = &p->se; 8744 8744 struct cfs_rq *cfs_rq = task_cfs_rq(donor); 8745 8745 int cse_is_idle, pse_is_idle; 8746 + 8747 + /* 8748 + * XXX Getting preempted by higher class, try and find idle CPU? 8749 + */ 8750 + if (p->sched_class != &fair_sched_class) 8751 + return; 8746 8752 8747 8753 if (unlikely(se == pse)) 8748 8754 return; ··· 12917 12911 t0 = sched_clock_cpu(this_cpu); 12918 12912 __sched_balance_update_blocked_averages(this_rq); 12919 12913 12920 - rq_modified_clear(this_rq); 12914 + this_rq->next_class = &fair_sched_class; 12921 12915 raw_spin_rq_unlock(this_rq); 12922 12916 12923 12917 for_each_domain(this_cpu, sd) { ··· 12984 12978 pulled_task = 1; 12985 12979 12986 12980 /* If a higher prio class was modified, restart the pick */ 12987 - if (rq_modified_above(this_rq, &fair_sched_class)) 12981 + if (sched_class_above(this_rq->next_class, &fair_sched_class)) 12988 12982 pulled_task = -1; 12989 12983 12990 12984 out: ··· 13888 13882 * All the scheduling class methods: 13889 13883 */ 13890 13884 DEFINE_SCHED_CLASS(fair) = { 13891 - 13892 - .queue_mask = 2, 13893 - 13894 13885 .enqueue_task = enqueue_task_fair, 13895 13886 .dequeue_task = dequeue_task_fair, 13896 13887 .yield_task = yield_task_fair, 13897 13888 .yield_to_task = yield_to_task_fair, 13898 13889 13899 - .wakeup_preempt = check_preempt_wakeup_fair, 13890 + .wakeup_preempt = wakeup_preempt_fair, 13900 13891 13901 13892 .pick_task = pick_task_fair, 13902 13893 .pick_next_task = pick_next_task_fair,
-3
kernel/sched/idle.c
··· 536 536 * Simple, special scheduling class for the per-CPU idle tasks: 537 537 */ 538 538 DEFINE_SCHED_CLASS(idle) = { 539 - 540 - .queue_mask = 0, 541 - 542 539 /* no enqueue/yield_task for idle tasks */ 543 540 544 541 /* dequeue is not valid, we print a debug message there: */
+6 -3
kernel/sched/rt.c
··· 1615 1615 { 1616 1616 struct task_struct *donor = rq->donor; 1617 1617 1618 + /* 1619 + * XXX If we're preempted by DL, queue a push? 1620 + */ 1621 + if (p->sched_class != &rt_sched_class) 1622 + return; 1623 + 1618 1624 if (p->prio < donor->prio) { 1619 1625 resched_curr(rq); 1620 1626 return; ··· 2574 2568 #endif /* CONFIG_SCHED_CORE */ 2575 2569 2576 2570 DEFINE_SCHED_CLASS(rt) = { 2577 - 2578 - .queue_mask = 4, 2579 - 2580 2571 .enqueue_task = enqueue_task_rt, 2581 2572 .dequeue_task = dequeue_task_rt, 2582 2573 .yield_task = yield_task_rt,
+2 -25
kernel/sched/sched.h
··· 1118 1118 /* runqueue lock: */ 1119 1119 raw_spinlock_t __lock; 1120 1120 1121 - /* Per class runqueue modification mask; bits in class order. */ 1122 - unsigned int queue_mask; 1123 1121 unsigned int nr_running; 1124 1122 #ifdef CONFIG_NUMA_BALANCING 1125 1123 unsigned int nr_numa_running; ··· 1177 1179 struct sched_dl_entity *dl_server; 1178 1180 struct task_struct *idle; 1179 1181 struct task_struct *stop; 1182 + const struct sched_class *next_class; 1180 1183 unsigned long next_balance; 1181 1184 struct mm_struct *prev_mm; 1182 1185 ··· 2425 2426 #ifdef CONFIG_UCLAMP_TASK 2426 2427 int uclamp_enabled; 2427 2428 #endif 2428 - /* 2429 - * idle: 0 2430 - * ext: 1 2431 - * fair: 2 2432 - * rt: 4 2433 - * dl: 8 2434 - * stop: 16 2435 - */ 2436 - unsigned int queue_mask; 2437 2429 2438 2430 /* 2439 2431 * move_queued_task/activate_task/enqueue_task: rq->lock ··· 2582 2592 int (*task_is_throttled)(struct task_struct *p, int cpu); 2583 2593 #endif 2584 2594 }; 2585 - 2586 - /* 2587 - * Does not nest; only used around sched_class::pick_task() rq-lock-breaks. 2588 - */ 2589 - static inline void rq_modified_clear(struct rq *rq) 2590 - { 2591 - rq->queue_mask = 0; 2592 - } 2593 - 2594 - static inline bool rq_modified_above(struct rq *rq, const struct sched_class * class) 2595 - { 2596 - unsigned int mask = class->queue_mask; 2597 - return rq->queue_mask & ~((mask << 1) - 1); 2598 - } 2599 2595 2600 2596 static inline void put_prev_task(struct rq *rq, struct task_struct *prev) 2601 2597 { ··· 3875 3899 deactivate_task(src_rq, task, 0); 3876 3900 set_task_cpu(task, dst_rq->cpu); 3877 3901 activate_task(dst_rq, task, 0); 3902 + wakeup_preempt(dst_rq, task, 0); 3878 3903 } 3879 3904 3880 3905 static inline
-3
kernel/sched/stop_task.c
··· 97 97 * Simple, special scheduling class for the per-CPU stop tasks: 98 98 */ 99 99 DEFINE_SCHED_CLASS(stop) = { 100 - 101 - .queue_mask = 16, 102 - 103 100 .enqueue_task = enqueue_task_stop, 104 101 .dequeue_task = dequeue_task_stop, 105 102 .yield_task = yield_task_stop,