Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

entry: Prepare for deferred hrtimer rearming

The hrtimer interrupt expires timers and at the end of the interrupt it
rearms the clockevent device for the next expiring timer.

That's obviously correct, but in the case that a expired timer sets
NEED_RESCHED the return from interrupt ends up in schedule(). If HRTICK is
enabled then schedule() will modify the hrtick timer, which causes another
reprogramming of the hardware.

That can be avoided by deferring the rearming to the return from interrupt
path and if the return results in a immediate schedule() invocation then it
can be deferred until the end of schedule(), which avoids multiple rearms
and re-evaluation of the timer wheel.

As this is only relevant for interrupt to user return split the work masks
up and hand them in as arguments from the relevant exit to user functions,
which allows the compiler to optimize the deferred handling out for the
syscall exit to user case.

Add the rearm checks to the approritate places in the exit to user loop and
the interrupt return to kernel path, so that the rearming is always
guaranteed.

In the return to user space path this is handled in the same way as
TIF_RSEQ to avoid extra instructions in the fast path, which are truly
hurtful for device interrupt heavy work loads as the extra instructions and
conditionals while benign at first sight accumulate quickly into measurable
regressions. The return from syscall path is completely unaffected due to
the above mentioned split so syscall heavy workloads wont have any extra
burden.

For now this is just placing empty stubs at the right places which are all
optimized out by the compiler until the actual functionality is in place.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163431.066469985@kernel.org

+35 -10
+19 -6
include/linux/irq-entry-common.h
··· 3 3 #define __LINUX_IRQENTRYCOMMON_H 4 4 5 5 #include <linux/context_tracking.h> 6 + #include <linux/hrtimer_rearm.h> 6 7 #include <linux/kmsan.h> 7 8 #include <linux/rseq_entry.h> 8 9 #include <linux/static_call_types.h> ··· 33 32 _TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ 34 33 _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | _TIF_RSEQ | \ 35 34 ARCH_EXIT_TO_USER_MODE_WORK) 35 + 36 + #ifdef CONFIG_HRTIMER_REARM_DEFERRED 37 + # define EXIT_TO_USER_MODE_WORK_SYSCALL (EXIT_TO_USER_MODE_WORK) 38 + # define EXIT_TO_USER_MODE_WORK_IRQ (EXIT_TO_USER_MODE_WORK | _TIF_HRTIMER_REARM) 39 + #else 40 + # define EXIT_TO_USER_MODE_WORK_SYSCALL (EXIT_TO_USER_MODE_WORK) 41 + # define EXIT_TO_USER_MODE_WORK_IRQ (EXIT_TO_USER_MODE_WORK) 42 + #endif 36 43 37 44 /** 38 45 * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs ··· 212 203 /** 213 204 * __exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required 214 205 * @regs: Pointer to pt_regs on entry stack 206 + * @work_mask: Which TIF bits need to be evaluated 215 207 * 216 208 * 1) check that interrupts are disabled 217 209 * 2) call tick_nohz_user_enter_prepare() ··· 222 212 * 223 213 * Don't invoke directly, use the syscall/irqentry_ prefixed variants below 224 214 */ 225 - static __always_inline void __exit_to_user_mode_prepare(struct pt_regs *regs) 215 + static __always_inline void __exit_to_user_mode_prepare(struct pt_regs *regs, 216 + const unsigned long work_mask) 226 217 { 227 218 unsigned long ti_work; 228 219 ··· 233 222 tick_nohz_user_enter_prepare(); 234 223 235 224 ti_work = read_thread_flags(); 236 - if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) 237 - ti_work = exit_to_user_mode_loop(regs, ti_work); 225 + if (unlikely(ti_work & work_mask)) { 226 + if (!hrtimer_rearm_deferred_user_irq(&ti_work, work_mask)) 227 + ti_work = exit_to_user_mode_loop(regs, ti_work); 228 + } 238 229 239 230 arch_exit_to_user_mode_prepare(regs, ti_work); 240 231 } ··· 252 239 /* Temporary workaround to keep ARM64 alive */ 253 240 static __always_inline void exit_to_user_mode_prepare_legacy(struct pt_regs *regs) 254 241 { 255 - __exit_to_user_mode_prepare(regs); 242 + __exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK); 256 243 rseq_exit_to_user_mode_legacy(); 257 244 __exit_to_user_mode_validate(); 258 245 } ··· 266 253 */ 267 254 static __always_inline void syscall_exit_to_user_mode_prepare(struct pt_regs *regs) 268 255 { 269 - __exit_to_user_mode_prepare(regs); 256 + __exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK_SYSCALL); 270 257 rseq_syscall_exit_to_user_mode(); 271 258 __exit_to_user_mode_validate(); 272 259 } ··· 280 267 */ 281 268 static __always_inline void irqentry_exit_to_user_mode_prepare(struct pt_regs *regs) 282 269 { 283 - __exit_to_user_mode_prepare(regs); 270 + __exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK_IRQ); 284 271 rseq_irqentry_exit_to_user_mode(); 285 272 __exit_to_user_mode_validate(); 286 273 }
+13 -3
include/linux/rseq_entry.h
··· 40 40 #endif /* !CONFIG_RSEQ_STATS */ 41 41 42 42 #ifdef CONFIG_RSEQ 43 + #include <linux/hrtimer_rearm.h> 43 44 #include <linux/jump_label.h> 44 45 #include <linux/rseq.h> 45 46 #include <linux/sched/signal.h> ··· 111 110 t->rseq.slice.state.granted = false; 112 111 } 113 112 114 - static __always_inline bool rseq_grant_slice_extension(bool work_pending) 113 + static __always_inline bool __rseq_grant_slice_extension(bool work_pending) 115 114 { 116 115 struct task_struct *curr = current; 117 116 struct rseq_slice_ctrl usr_ctrl; ··· 216 215 return false; 217 216 } 218 217 218 + static __always_inline bool rseq_grant_slice_extension(unsigned long ti_work, unsigned long mask) 219 + { 220 + if (unlikely(__rseq_grant_slice_extension(ti_work & mask))) { 221 + hrtimer_rearm_deferred_tif(ti_work); 222 + return true; 223 + } 224 + return false; 225 + } 226 + 219 227 #else /* CONFIG_RSEQ_SLICE_EXTENSION */ 220 228 static inline bool rseq_slice_extension_enabled(void) { return false; } 221 229 static inline bool rseq_arm_slice_extension_timer(void) { return false; } 222 230 static inline void rseq_slice_clear_grant(struct task_struct *t) { } 223 - static inline bool rseq_grant_slice_extension(bool work_pending) { return false; } 231 + static inline bool rseq_grant_slice_extension(unsigned long ti_work, unsigned long mask) { return false; } 224 232 #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */ 225 233 226 234 bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr); ··· 788 778 static inline void rseq_irqentry_exit_to_user_mode(void) { } 789 779 static inline void rseq_exit_to_user_mode_legacy(void) { } 790 780 static inline void rseq_debug_syscall_return(struct pt_regs *regs) { } 791 - static inline bool rseq_grant_slice_extension(bool work_pending) { return false; } 781 + static inline bool rseq_grant_slice_extension(unsigned long ti_work, unsigned long mask) { return false; } 792 782 #endif /* !CONFIG_RSEQ */ 793 783 794 784 #endif /* _LINUX_RSEQ_ENTRY_H */
+3 -1
kernel/entry/common.c
··· 50 50 local_irq_enable_exit_to_user(ti_work); 51 51 52 52 if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) { 53 - if (!rseq_grant_slice_extension(ti_work & TIF_SLICE_EXT_DENY)) 53 + if (!rseq_grant_slice_extension(ti_work, TIF_SLICE_EXT_DENY)) 54 54 schedule(); 55 55 } 56 56 ··· 225 225 */ 226 226 if (state.exit_rcu) { 227 227 instrumentation_begin(); 228 + hrtimer_rearm_deferred(); 228 229 /* Tell the tracer that IRET will enable interrupts */ 229 230 trace_hardirqs_on_prepare(); 230 231 lockdep_hardirqs_on_prepare(); ··· 239 238 if (IS_ENABLED(CONFIG_PREEMPTION)) 240 239 irqentry_exit_cond_resched(); 241 240 241 + hrtimer_rearm_deferred(); 242 242 /* Covers both tracing and lockdep */ 243 243 trace_hardirqs_on(); 244 244 instrumentation_end();