Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'sched_urgent_for_v6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Borislav Petkov:

- Fix a performance regression when measuring the CPU time of a thread
(clock_gettime(CLOCK_THREAD_CPUTIME_ID,...)) due to the addition of
PSI IRQ time accounting in the hotpath

- Fix a task_struct leak due to missing to decrement the refcount when
the task is enqueued before the timer which is supposed to do that,
expires

- Revert an attempt to expedite detaching of movable tasks, as finding
those could become very costly. Turns out the original issue wasn't
even hit by anyone

* tag 'sched_urgent_for_v6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched: Move psi_account_irqtime() out of update_rq_clock_task() hotpath
sched/deadline: Fix task_struct reference leak
Revert "sched/fair: Make sure to try to detach at least one movable task"

+39 -20
+5 -2
kernel/sched/core.c
··· 723 723 724 724 rq->prev_irq_time += irq_delta; 725 725 delta -= irq_delta; 726 - psi_account_irqtime(rq->curr, irq_delta); 727 726 delayacct_irq(rq->curr, irq_delta); 728 727 #endif 729 728 #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING ··· 5664 5665 { 5665 5666 int cpu = smp_processor_id(); 5666 5667 struct rq *rq = cpu_rq(cpu); 5667 - struct task_struct *curr = rq->curr; 5668 + struct task_struct *curr; 5668 5669 struct rq_flags rf; 5669 5670 unsigned long hw_pressure; 5670 5671 u64 resched_latency; ··· 5675 5676 sched_clock_tick(); 5676 5677 5677 5678 rq_lock(rq, &rf); 5679 + 5680 + curr = rq->curr; 5681 + psi_account_irqtime(rq, curr, NULL); 5678 5682 5679 5683 update_rq_clock(rq); 5680 5684 hw_pressure = arch_scale_hw_pressure(cpu_of(rq)); ··· 6739 6737 ++*switch_count; 6740 6738 6741 6739 migrate_disable_switch(rq, prev); 6740 + psi_account_irqtime(rq, prev, next); 6742 6741 psi_sched_switch(prev, next, !task_on_rq_queued(prev)); 6743 6742 6744 6743 trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state);
+6 -1
kernel/sched/deadline.c
··· 1804 1804 * The replenish timer needs to be canceled. No 1805 1805 * problem if it fires concurrently: boosted threads 1806 1806 * are ignored in dl_task_timer(). 1807 + * 1808 + * If the timer callback was running (hrtimer_try_to_cancel == -1), 1809 + * it will eventually call put_task_struct(). 1807 1810 */ 1808 - hrtimer_try_to_cancel(&p->dl.dl_timer); 1811 + if (hrtimer_try_to_cancel(&p->dl.dl_timer) == 1 && 1812 + !dl_server(&p->dl)) 1813 + put_task_struct(p); 1809 1814 p->dl.dl_throttled = 0; 1810 1815 } 1811 1816 } else if (!dl_prio(p->normal_prio)) {
+3 -9
kernel/sched/fair.c
··· 9149 9149 break; 9150 9150 9151 9151 env->loop++; 9152 - /* 9153 - * We've more or less seen every task there is, call it quits 9154 - * unless we haven't found any movable task yet. 9155 - */ 9156 - if (env->loop > env->loop_max && 9157 - !(env->flags & LBF_ALL_PINNED)) 9152 + /* We've more or less seen every task there is, call it quits */ 9153 + if (env->loop > env->loop_max) 9158 9154 break; 9159 9155 9160 9156 /* take a breather every nr_migrate tasks */ ··· 11389 11393 11390 11394 if (env.flags & LBF_NEED_BREAK) { 11391 11395 env.flags &= ~LBF_NEED_BREAK; 11392 - /* Stop if we tried all running tasks */ 11393 - if (env.loop < busiest->nr_running) 11394 - goto more_balance; 11396 + goto more_balance; 11395 11397 } 11396 11398 11397 11399 /*
+16 -5
kernel/sched/psi.c
··· 773 773 enum psi_states s; 774 774 u32 state_mask; 775 775 776 + lockdep_assert_rq_held(cpu_rq(cpu)); 776 777 groupc = per_cpu_ptr(group->pcpu, cpu); 777 778 778 779 /* ··· 992 991 } 993 992 994 993 #ifdef CONFIG_IRQ_TIME_ACCOUNTING 995 - void psi_account_irqtime(struct task_struct *task, u32 delta) 994 + void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_struct *prev) 996 995 { 997 - int cpu = task_cpu(task); 996 + int cpu = task_cpu(curr); 998 997 struct psi_group *group; 999 998 struct psi_group_cpu *groupc; 1000 - u64 now; 999 + u64 now, irq; 1000 + s64 delta; 1001 1001 1002 1002 if (static_branch_likely(&psi_disabled)) 1003 1003 return; 1004 1004 1005 - if (!task->pid) 1005 + if (!curr->pid) 1006 + return; 1007 + 1008 + lockdep_assert_rq_held(rq); 1009 + group = task_psi_group(curr); 1010 + if (prev && task_psi_group(prev) == group) 1006 1011 return; 1007 1012 1008 1013 now = cpu_clock(cpu); 1014 + irq = irq_time_read(cpu); 1015 + delta = (s64)(irq - rq->psi_irq_time); 1016 + if (delta < 0) 1017 + return; 1018 + rq->psi_irq_time = irq; 1009 1019 1010 - group = task_psi_group(task); 1011 1020 do { 1012 1021 if (!group->enabled) 1013 1022 continue;
+1
kernel/sched/sched.h
··· 1126 1126 1127 1127 #ifdef CONFIG_IRQ_TIME_ACCOUNTING 1128 1128 u64 prev_irq_time; 1129 + u64 psi_irq_time; 1129 1130 #endif 1130 1131 #ifdef CONFIG_PARAVIRT 1131 1132 u64 prev_steal_time;
+8 -3
kernel/sched/stats.h
··· 110 110 void psi_task_change(struct task_struct *task, int clear, int set); 111 111 void psi_task_switch(struct task_struct *prev, struct task_struct *next, 112 112 bool sleep); 113 - void psi_account_irqtime(struct task_struct *task, u32 delta); 114 - 113 + #ifdef CONFIG_IRQ_TIME_ACCOUNTING 114 + void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_struct *prev); 115 + #else 116 + static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr, 117 + struct task_struct *prev) {} 118 + #endif /*CONFIG_IRQ_TIME_ACCOUNTING */ 115 119 /* 116 120 * PSI tracks state that persists across sleeps, such as iowaits and 117 121 * memory stalls. As a result, it has to distinguish between sleeps, ··· 196 192 static inline void psi_sched_switch(struct task_struct *prev, 197 193 struct task_struct *next, 198 194 bool sleep) {} 199 - static inline void psi_account_irqtime(struct task_struct *task, u32 delta) {} 195 + static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr, 196 + struct task_struct *prev) {} 200 197 #endif /* CONFIG_PSI */ 201 198 202 199 #ifdef CONFIG_SCHED_INFO