Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sched: Optimize hrtimer handling

schedule() provides several mechanisms to update the hrtick timer:

1) When the next task is picked

2) When the balance callbacks are invoked before rq::lock is released

Each of them can result in a first expiring timer and cause a reprogram of
the clock event device.

Solve this by deferring the rearm to the end of schedule() right before
releasing rq::lock by setting a flag on entry which tells hrtick_start() to
cache the runtime constraint in rq::hrtick_delay without touching the timer
itself.

Right before releasing rq::lock evaluate the flags and either rearm or
cancel the hrtick timer.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163429.273068659@kernel.org

authored by

Thomas Gleixner and committed by
Peter Zijlstra
96d1610e c3a92213

+51 -10
+49 -10
kernel/sched/core.c
··· 872 872 * Use HR-timers to deliver accurate preemption points. 873 873 */ 874 874 875 + enum { 876 + HRTICK_SCHED_NONE = 0, 877 + HRTICK_SCHED_DEFER = BIT(1), 878 + HRTICK_SCHED_START = BIT(2), 879 + }; 880 + 875 881 static void hrtick_clear(struct rq *rq) 876 882 { 877 883 if (hrtimer_active(&rq->hrtick_timer)) ··· 938 932 * doesn't make sense and can cause timer DoS. 939 933 */ 940 934 delta = max_t(s64, delay, 10000LL); 935 + 936 + /* 937 + * If this is in the middle of schedule() only note the delay 938 + * and let hrtick_schedule_exit() deal with it. 939 + */ 940 + if (rq->hrtick_sched) { 941 + rq->hrtick_sched |= HRTICK_SCHED_START; 942 + rq->hrtick_delay = delta; 943 + return; 944 + } 945 + 941 946 rq->hrtick_time = ktime_add_ns(ktime_get(), delta); 942 947 943 948 if (rq == this_rq()) ··· 957 940 smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd); 958 941 } 959 942 943 + static inline void hrtick_schedule_enter(struct rq *rq) 944 + { 945 + rq->hrtick_sched = HRTICK_SCHED_DEFER; 946 + } 947 + 948 + static inline void hrtick_schedule_exit(struct rq *rq) 949 + { 950 + if (rq->hrtick_sched & HRTICK_SCHED_START) { 951 + rq->hrtick_time = ktime_add_ns(ktime_get(), rq->hrtick_delay); 952 + __hrtick_restart(rq); 953 + } else if (idle_rq(rq)) { 954 + /* 955 + * No need for using hrtimer_is_active(). The timer is CPU local 956 + * and interrupts are disabled, so the callback cannot be 957 + * running and the queued state is valid. 958 + */ 959 + if (hrtimer_is_queued(&rq->hrtick_timer)) 960 + hrtimer_cancel(&rq->hrtick_timer); 961 + } 962 + 963 + rq->hrtick_sched = HRTICK_SCHED_NONE; 964 + } 965 + 960 966 static void hrtick_rq_init(struct rq *rq) 961 967 { 962 968 INIT_CSD(&rq->hrtick_csd, __hrtick_start, rq); 969 + rq->hrtick_sched = HRTICK_SCHED_NONE; 963 970 hrtimer_setup(&rq->hrtick_timer, hrtick, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); 964 971 } 965 972 #else /* !CONFIG_SCHED_HRTICK: */ 966 - static inline void hrtick_clear(struct rq *rq) 967 - { 968 - } 969 - 970 - static inline void hrtick_rq_init(struct rq *rq) 971 - { 972 - } 973 + static inline void hrtick_clear(struct rq *rq) { } 974 + static inline void hrtick_rq_init(struct rq *rq) { } 975 + static inline void hrtick_schedule_enter(struct rq *rq) { } 976 + static inline void hrtick_schedule_exit(struct rq *rq) { } 973 977 #endif /* !CONFIG_SCHED_HRTICK */ 974 978 975 979 /* ··· 5066 5028 */ 5067 5029 spin_acquire(&__rq_lockp(rq)->dep_map, 0, 0, _THIS_IP_); 5068 5030 __balance_callbacks(rq, NULL); 5031 + hrtick_schedule_exit(rq); 5069 5032 raw_spin_rq_unlock_irq(rq); 5070 5033 } 5071 5034 ··· 6820 6781 6821 6782 schedule_debug(prev, preempt); 6822 6783 6823 - if (sched_feat(HRTICK) || sched_feat(HRTICK_DL)) 6824 - hrtick_clear(rq); 6825 - 6826 6784 klp_sched_try_switch(prev); 6827 6785 6828 6786 local_irq_disable(); ··· 6845 6809 */ 6846 6810 rq_lock(rq, &rf); 6847 6811 smp_mb__after_spinlock(); 6812 + 6813 + hrtick_schedule_enter(rq); 6848 6814 6849 6815 /* Promote REQ to ACT */ 6850 6816 rq->clock_update_flags <<= 1; ··· 6949 6911 6950 6912 rq_unpin_lock(rq, &rf); 6951 6913 __balance_callbacks(rq, NULL); 6914 + hrtick_schedule_exit(rq); 6952 6915 raw_spin_rq_unlock_irq(rq); 6953 6916 } 6954 6917 trace_sched_exit_tp(is_switch);
+2
kernel/sched/sched.h
··· 1285 1285 call_single_data_t hrtick_csd; 1286 1286 struct hrtimer hrtick_timer; 1287 1287 ktime_t hrtick_time; 1288 + ktime_t hrtick_delay; 1289 + unsigned int hrtick_sched; 1288 1290 #endif 1289 1291 1290 1292 #ifdef CONFIG_SCHEDSTATS