Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'sched-urgent-2026-01-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar:
"Misc deadline scheduler fixes, mainly for a new category of bugs that
were discovered and fixed recently:

- Fix a race condition in the DL server

- Fix a DL server bug which can result in incorrectly going idle when
there's work available

- Fix DL server bug which triggers a WARN() due to broken
get_prio_dl() logic and subsequent misbehavior

- Fix double update_rq_clock() calls

- Fix setscheduler() assumption about static priorities

- Make sure balancing callbacks are always called

- Plus a handful of preparatory commits for the fixes"

* tag 'sched-urgent-2026-01-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched/deadline: Use ENQUEUE_MOVE to allow priority change
sched: Deadline has dynamic priority
sched: Audit MOVE vs balance_callbacks
sched: Fold rq-pin swizzle into __balance_callbacks()
sched/deadline: Avoid double update_rq_clock()
sched/deadline: Ensure get_prio_dl() is up-to-date
sched/deadline: Fix server stopping with runnable tasks
sched: Provide idle_rq() helper
sched/deadline: Fix potential race in dl_add_task_root_domain()
sched/deadline: Remove unnecessary comment in dl_add_task_root_domain()

+59 -56
-1
include/linux/sched.h
··· 1874 1874 extern int can_nice(const struct task_struct *p, const int nice); 1875 1875 extern int task_curr(const struct task_struct *p); 1876 1876 extern int idle_cpu(int cpu); 1877 - extern int available_idle_cpu(int cpu); 1878 1877 extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *); 1879 1878 extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *); 1880 1879 extern void sched_set_fifo(struct task_struct *p);
+11 -7
kernel/sched/core.c
··· 4950 4950 return __splice_balance_callbacks(rq, true); 4951 4951 } 4952 4952 4953 - static void __balance_callbacks(struct rq *rq) 4953 + void __balance_callbacks(struct rq *rq, struct rq_flags *rf) 4954 4954 { 4955 + if (rf) 4956 + rq_unpin_lock(rq, rf); 4955 4957 do_balance_callbacks(rq, __splice_balance_callbacks(rq, false)); 4958 + if (rf) 4959 + rq_repin_lock(rq, rf); 4956 4960 } 4957 4961 4958 4962 void balance_callbacks(struct rq *rq, struct balance_callback *head) ··· 4995 4991 * prev into current: 4996 4992 */ 4997 4993 spin_acquire(&__rq_lockp(rq)->dep_map, 0, 0, _THIS_IP_); 4998 - __balance_callbacks(rq); 4994 + __balance_callbacks(rq, NULL); 4999 4995 raw_spin_rq_unlock_irq(rq); 5000 4996 } 5001 4997 ··· 6871 6867 proxy_tag_curr(rq, next); 6872 6868 6873 6869 rq_unpin_lock(rq, &rf); 6874 - __balance_callbacks(rq); 6870 + __balance_callbacks(rq, NULL); 6875 6871 raw_spin_rq_unlock_irq(rq); 6876 6872 } 6877 6873 trace_sched_exit_tp(is_switch); ··· 7320 7316 trace_sched_pi_setprio(p, pi_task); 7321 7317 oldprio = p->prio; 7322 7318 7323 - if (oldprio == prio) 7319 + if (oldprio == prio && !dl_prio(prio)) 7324 7320 queue_flag &= ~DEQUEUE_MOVE; 7325 7321 7326 7322 prev_class = p->sched_class; ··· 7366 7362 out_unlock: 7367 7363 /* Caller holds task_struct::pi_lock, IRQs are still disabled */ 7368 7364 7369 - rq_unpin_lock(rq, &rf); 7370 - __balance_callbacks(rq); 7371 - rq_repin_lock(rq, &rf); 7365 + __balance_callbacks(rq, &rf); 7372 7366 __task_rq_unlock(rq, p, &rf); 7373 7367 } 7374 7368 #endif /* CONFIG_RT_MUTEXES */ ··· 9126 9124 9127 9125 if (resched) 9128 9126 resched_curr(rq); 9127 + 9128 + __balance_callbacks(rq, &rq_guard.rf); 9129 9129 } 9130 9130 9131 9131 static struct cgroup_subsys_state *
+19 -17
kernel/sched/deadline.c
··· 752 752 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 753 753 struct rq *rq = rq_of_dl_rq(dl_rq); 754 754 755 - update_rq_clock(rq); 756 - 757 755 WARN_ON(is_dl_boosted(dl_se)); 758 756 WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline)); 759 757 ··· 1418 1420 1419 1421 static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec) 1420 1422 { 1421 - bool idle = rq->curr == rq->idle; 1423 + bool idle = idle_rq(rq); 1422 1424 s64 scaled_delta_exec; 1423 1425 1424 1426 if (unlikely(delta_exec <= 0)) { ··· 1601 1603 * | 8 | B:zero_laxity-wait | | | 1602 1604 * | | | <---+ | 1603 1605 * | +--------------------------------+ | 1604 - * | | ^ ^ 2 | 1605 - * | | 7 | 2 +--------------------+ 1606 + * | | ^ ^ 2 | 1607 + * | | 7 | 2, 1 +----------------+ 1606 1608 * | v | 1607 1609 * | +-------------+ | 1608 1610 * +-- | C:idle-wait | -+ ··· 1647 1649 * dl_defer_idle = 0 1648 1650 * 1649 1651 * 1650 - * [1] A->B, A->D 1652 + * [1] A->B, A->D, C->B 1651 1653 * dl_server_start() 1654 + * dl_defer_idle = 0; 1655 + * if (dl_server_active) 1656 + * return; // [B] 1652 1657 * dl_server_active = 1; 1653 1658 * enqueue_dl_entity() 1654 1659 * update_dl_entity(WAKEUP) ··· 1760 1759 * "B:zero_laxity-wait" -> "C:idle-wait" [label="7:dl_server_update_idle"] 1761 1760 * "B:zero_laxity-wait" -> "D:running" [label="3:dl_server_timer"] 1762 1761 * "C:idle-wait" -> "A:init" [label="8:dl_server_timer"] 1762 + * "C:idle-wait" -> "B:zero_laxity-wait" [label="1:dl_server_start"] 1763 1763 * "C:idle-wait" -> "B:zero_laxity-wait" [label="2:dl_server_update"] 1764 1764 * "C:idle-wait" -> "C:idle-wait" [label="7:dl_server_update_idle"] 1765 1765 * "D:running" -> "A:init" [label="4:pick_task_dl"] ··· 1786 1784 { 1787 1785 struct rq *rq = dl_se->rq; 1788 1786 1787 + dl_se->dl_defer_idle = 0; 1789 1788 if (!dl_server(dl_se) || dl_se->dl_server_active) 1790 1789 return; 1791 1790 ··· 1837 1834 rq = cpu_rq(cpu); 1838 1835 1839 1836 guard(rq_lock_irq)(rq); 1837 + update_rq_clock(rq); 1840 1838 1841 1839 dl_se = &rq->fair_server; 1842 1840 ··· 2214 2210 update_dl_entity(dl_se); 2215 2211 } else if (flags & ENQUEUE_REPLENISH) { 2216 2212 replenish_dl_entity(dl_se); 2217 - } else if ((flags & ENQUEUE_RESTORE) && 2213 + } else if ((flags & ENQUEUE_MOVE) && 2218 2214 !is_dl_boosted(dl_se) && 2219 2215 dl_time_before(dl_se->deadline, rq_clock(rq_of_dl_se(dl_se)))) { 2220 2216 setup_new_dl_entity(dl_se); ··· 3158 3154 struct rq *rq; 3159 3155 struct dl_bw *dl_b; 3160 3156 unsigned int cpu; 3161 - struct cpumask *msk = this_cpu_cpumask_var_ptr(local_cpu_mask_dl); 3157 + struct cpumask *msk; 3162 3158 3163 3159 raw_spin_lock_irqsave(&p->pi_lock, rf.flags); 3164 3160 if (!dl_task(p) || dl_entity_is_special(&p->dl)) { ··· 3166 3162 return; 3167 3163 } 3168 3164 3169 - /* 3170 - * Get an active rq, whose rq->rd traces the correct root 3171 - * domain. 3172 - * Ideally this would be under cpuset reader lock until rq->rd is 3173 - * fetched. However, sleepable locks cannot nest inside pi_lock, so we 3174 - * rely on the caller of dl_add_task_root_domain() holds 'cpuset_mutex' 3175 - * to guarantee the CPU stays in the cpuset. 3176 - */ 3165 + msk = this_cpu_cpumask_var_ptr(local_cpu_mask_dl); 3177 3166 dl_get_task_effective_cpus(p, msk); 3178 3167 cpu = cpumask_first_and(cpu_active_mask, msk); 3179 3168 BUG_ON(cpu >= nr_cpu_ids); 3180 3169 rq = cpu_rq(cpu); 3181 3170 dl_b = &rq->rd->dl_bw; 3182 - /* End of fetching rd */ 3183 3171 3184 3172 raw_spin_lock(&dl_b->lock); 3185 3173 __dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span)); ··· 3295 3299 3296 3300 static u64 get_prio_dl(struct rq *rq, struct task_struct *p) 3297 3301 { 3302 + /* 3303 + * Make sure to update current so we don't return a stale value. 3304 + */ 3305 + if (task_current_donor(rq, p)) 3306 + update_curr_dl(rq); 3307 + 3298 3308 return p->dl.deadline; 3299 3309 } 3300 3310
+1
kernel/sched/ext.c
··· 545 545 static void __scx_task_iter_rq_unlock(struct scx_task_iter *iter) 546 546 { 547 547 if (iter->locked_task) { 548 + __balance_callbacks(iter->rq, &iter->rf); 548 549 task_rq_unlock(iter->rq, iter->locked_task, &iter->rf); 549 550 iter->locked_task = NULL; 550 551 }
+26 -1
kernel/sched/sched.h
··· 1364 1364 #define cpu_curr(cpu) (cpu_rq(cpu)->curr) 1365 1365 #define raw_rq() raw_cpu_ptr(&runqueues) 1366 1366 1367 + static inline bool idle_rq(struct rq *rq) 1368 + { 1369 + return rq->curr == rq->idle && !rq->nr_running && !rq->ttwu_pending; 1370 + } 1371 + 1372 + /** 1373 + * available_idle_cpu - is a given CPU idle for enqueuing work. 1374 + * @cpu: the CPU in question. 1375 + * 1376 + * Return: 1 if the CPU is currently idle. 0 otherwise. 1377 + */ 1378 + static inline bool available_idle_cpu(int cpu) 1379 + { 1380 + if (!idle_rq(cpu_rq(cpu))) 1381 + return 0; 1382 + 1383 + if (vcpu_is_preempted(cpu)) 1384 + return 0; 1385 + 1386 + return 1; 1387 + } 1388 + 1367 1389 #ifdef CONFIG_SCHED_PROXY_EXEC 1368 1390 static inline void rq_set_donor(struct rq *rq, struct task_struct *t) 1369 1391 { ··· 2388 2366 * should preserve as much state as possible. 2389 2367 * 2390 2368 * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location 2391 - * in the runqueue. 2369 + * in the runqueue. IOW the priority is allowed to change. Callers 2370 + * must expect to deal with balance callbacks. 2392 2371 * 2393 2372 * NOCLOCK - skip the update_rq_clock() (avoids double updates) 2394 2373 * ··· 3970 3947 extern bool dequeue_task(struct rq *rq, struct task_struct *p, int flags); 3971 3948 3972 3949 extern struct balance_callback *splice_balance_callbacks(struct rq *rq); 3950 + 3951 + extern void __balance_callbacks(struct rq *rq, struct rq_flags *rf); 3973 3952 extern void balance_callbacks(struct rq *rq, struct balance_callback *head); 3974 3953 3975 3954 /*
+2 -30
kernel/sched/syscalls.c
··· 180 180 */ 181 181 int idle_cpu(int cpu) 182 182 { 183 - struct rq *rq = cpu_rq(cpu); 184 - 185 - if (rq->curr != rq->idle) 186 - return 0; 187 - 188 - if (rq->nr_running) 189 - return 0; 190 - 191 - if (rq->ttwu_pending) 192 - return 0; 193 - 194 - return 1; 195 - } 196 - 197 - /** 198 - * available_idle_cpu - is a given CPU idle for enqueuing work. 199 - * @cpu: the CPU in question. 200 - * 201 - * Return: 1 if the CPU is currently idle. 0 otherwise. 202 - */ 203 - int available_idle_cpu(int cpu) 204 - { 205 - if (!idle_cpu(cpu)) 206 - return 0; 207 - 208 - if (vcpu_is_preempted(cpu)) 209 - return 0; 210 - 211 - return 1; 183 + return idle_rq(cpu_rq(cpu)); 212 184 } 213 185 214 186 /** ··· 639 667 * itself. 640 668 */ 641 669 newprio = rt_effective_prio(p, newprio); 642 - if (newprio == oldprio) 670 + if (newprio == oldprio && !dl_prio(newprio)) 643 671 queue_flags &= ~DEQUEUE_MOVE; 644 672 } 645 673