···412412/*413413 * Program the next event, relative to now414414 */415415-static int lapic_next_event(unsigned long delta,416416- struct clock_event_device *evt)415415+static int lapic_next_event(unsigned long delta, struct clock_event_device *evt)417416{418417 apic_write(APIC_TMICT, delta);419418 return 0;420419}421420422422-static int lapic_next_deadline(unsigned long delta,423423- struct clock_event_device *evt)421421+static int lapic_next_deadline(unsigned long delta, struct clock_event_device *evt)424422{425425- u64 tsc;423423+ /*424424+ * There is no weak_wrmsr_fence() required here as all of this is purely425425+ * CPU local. Avoid the [ml]fence overhead.426426+ */427427+ u64 tsc = rdtsc();426428427427- /* This MSR is special and need a special fence: */428428- weak_wrmsr_fence();429429-430430- tsc = rdtsc();431431- wrmsrq(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));429429+ native_wrmsrq(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));432430 return 0;433431}434432···450452 * the timer _and_ zero the counter registers:451453 */452454 if (v & APIC_LVT_TIMER_TSCDEADLINE)453453- wrmsrq(MSR_IA32_TSC_DEADLINE, 0);455455+ native_wrmsrq(MSR_IA32_TSC_DEADLINE, 0);454456 else455457 apic_write(APIC_TMICT, 0);456458···547549548550 if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))549551 return false;552552+553553+ /* XEN_PV does not support it, but be paranoia about it */554554+ if (boot_cpu_has(X86_FEATURE_XENPV))555555+ goto clear;556556+550557 if (boot_cpu_has(X86_FEATURE_HYPERVISOR))551558 return true;552559···564561 if (boot_cpu_data.microcode >= rev)565562 return true;566563567567- setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);568564 pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "569565 "please update microcode to version: 0x%x (or later)\n", rev);566566+567567+clear:568568+ setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);570569 return false;571570}572571···591586592587 if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {593588 levt->name = "lapic-deadline";594594- levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |595595- CLOCK_EVT_FEAT_DUMMY);589589+ levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_DUMMY);590590+ levt->features |= CLOCK_EVT_FEAT_CLOCKSOURCE_COUPLED;591591+ levt->cs_id = CSID_X86_TSC;596592 levt->set_next_event = lapic_next_deadline;597597- clockevents_config_and_register(levt,598598- tsc_khz * (1000 / TSC_DIVISOR),599599- 0xF, ~0UL);600600- } else593593+ clockevents_config_and_register(levt, tsc_khz * (1000 / TSC_DIVISOR), 0xF, ~0UL);594594+ } else {601595 clockevents_register_device(levt);596596+ }602597603598 apic_update_vector(smp_processor_id(), LOCAL_TIMER_VECTOR, true);604599}
···1313#define _LINUX_HRTIMER_H14141515#include <linux/hrtimer_defs.h>1616+#include <linux/hrtimer_rearm.h>1617#include <linux/hrtimer_types.h>1718#include <linux/init.h>1819#include <linux/list.h>···3231 * soft irq context3332 * HRTIMER_MODE_HARD - Timer callback function will be executed in3433 * hard irq context even on PREEMPT_RT.3434+ * HRTIMER_MODE_LAZY_REARM - Avoid reprogramming if the timer was the3535+ * first expiring timer and is moved into the3636+ * future. Special mode for the HRTICK timer to3737+ * avoid extensive reprogramming of the hardware,3838+ * which is expensive in virtual machines. Risks3939+ * a pointless expiry, but that's better than4040+ * reprogramming on every context switch,3541 */3642enum hrtimer_mode {3743 HRTIMER_MODE_ABS = 0x00,···4638 HRTIMER_MODE_PINNED = 0x02,4739 HRTIMER_MODE_SOFT = 0x04,4840 HRTIMER_MODE_HARD = 0x08,4141+ HRTIMER_MODE_LAZY_REARM = 0x10,49425043 HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED,5144 HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED,···6354 HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD,6455 HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD,6556};6666-6767-/*6868- * Values to track state of the timer6969- *7070- * Possible states:7171- *7272- * 0x00 inactive7373- * 0x01 enqueued into rbtree7474- *7575- * The callback state is not part of the timer->state because clearing it would7676- * mean touching the timer after the callback, this makes it impossible to free7777- * the timer from the callback function.7878- *7979- * Therefore we track the callback state in:8080- *8181- * timer->base->cpu_base->running == timer8282- *8383- * On SMP it is possible to have a "callback function running and enqueued"8484- * status. It happens for example when a posix timer expired and the callback8585- * queued a signal. Between dropping the lock which protects the posix timer8686- * and reacquiring the base lock of the hrtimer, another CPU can deliver the8787- * signal and rearm the timer.8888- *8989- * All state transitions are protected by cpu_base->lock.9090- */9191-#define HRTIMER_STATE_INACTIVE 0x009292-#define HRTIMER_STATE_ENQUEUED 0x0193579458/**9559 * struct hrtimer_sleeper - simple sleeper structure···128146 return ktime_sub(timer->node.expires, hrtimer_cb_get_time(timer));129147}130148131131-static inline int hrtimer_is_hres_active(struct hrtimer *timer)132132-{133133- return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ?134134- timer->base->cpu_base->hres_active : 0;135135-}136136-137149#ifdef CONFIG_HIGH_RES_TIMERS150150+extern unsigned int hrtimer_resolution;138151struct clock_event_device;139152140153extern void hrtimer_interrupt(struct clock_event_device *dev);141154142142-extern unsigned int hrtimer_resolution;155155+extern struct static_key_false hrtimer_highres_enabled_key;143156144144-#else157157+static inline bool hrtimer_highres_enabled(void)158158+{159159+ return static_branch_likely(&hrtimer_highres_enabled_key);160160+}145161162162+#else /* CONFIG_HIGH_RES_TIMERS */146163#define hrtimer_resolution (unsigned int)LOW_RES_NSEC147147-148148-#endif164164+static inline bool hrtimer_highres_enabled(void) { return false; }165165+#endif /* !CONFIG_HIGH_RES_TIMERS */149166150167static inline ktime_t151168__hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now)···274293 */275294static inline bool hrtimer_is_queued(struct hrtimer *timer)276295{277277- /* The READ_ONCE pairs with the update functions of timer->state */278278- return !!(READ_ONCE(timer->state) & HRTIMER_STATE_ENQUEUED);296296+ /* The READ_ONCE pairs with the update functions of timer->is_queued */297297+ return READ_ONCE(timer->is_queued);279298}280299281300/*
+41-38
include/linux/hrtimer_defs.h
···1919 * timer to a base on another cpu.2020 * @clockid: clock id for per_cpu support2121 * @seq: seqcount around __run_hrtimer2222+ * @expires_next: Absolute time of the next event in this clock base2223 * @running: pointer to the currently running hrtimer2324 * @active: red black tree root node for the active timers2425 * @offset: offset of this clock to the monotonic base2526 */2627struct hrtimer_clock_base {2727- struct hrtimer_cpu_base *cpu_base;2828- unsigned int index;2929- clockid_t clockid;3030- seqcount_raw_spinlock_t seq;3131- struct hrtimer *running;3232- struct timerqueue_head active;3333- ktime_t offset;2828+ struct hrtimer_cpu_base *cpu_base;2929+ unsigned int index;3030+ clockid_t clockid;3131+ seqcount_raw_spinlock_t seq;3232+ ktime_t expires_next;3333+ struct hrtimer *running;3434+ struct timerqueue_linked_head active;3535+ ktime_t offset;3436} __hrtimer_clock_base_align;35373638enum hrtimer_base_type {···49475048/**5149 * struct hrtimer_cpu_base - the per cpu clock bases5252- * @lock: lock protecting the base and associated clock bases5353- * and timers5454- * @cpu: cpu number5555- * @active_bases: Bitfield to mark bases with active timers5656- * @clock_was_set_seq: Sequence counter of clock was set events5757- * @hres_active: State of high resolution mode5858- * @in_hrtirq: hrtimer_interrupt() is currently executing5959- * @hang_detected: The last hrtimer interrupt detected a hang6060- * @softirq_activated: displays, if the softirq is raised - update of softirq6161- * related settings is not required then.6262- * @nr_events: Total number of hrtimer interrupt events6363- * @nr_retries: Total number of hrtimer interrupt retries6464- * @nr_hangs: Total number of hrtimer interrupt hangs6565- * @max_hang_time: Maximum time spent in hrtimer_interrupt6666- * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are6767- * expired6868- * @online: CPU is online from an hrtimers point of view6969- * @timer_waiters: A hrtimer_cancel() invocation waits for the timer7070- * callback to finish.7171- * @expires_next: absolute time of the next event, is required for remote7272- * hrtimer enqueue; it is the total first expiry time (hard7373- * and soft hrtimer are taken into account)7474- * @next_timer: Pointer to the first expiring timer7575- * @softirq_expires_next: Time to check, if soft queues needs also to be expired7676- * @softirq_next_timer: Pointer to the first expiring softirq based timer7777- * @clock_base: array of clock bases for this cpu5050+ * @lock: lock protecting the base and associated clock bases and timers5151+ * @cpu: cpu number5252+ * @active_bases: Bitfield to mark bases with active timers5353+ * @clock_was_set_seq: Sequence counter of clock was set events5454+ * @hres_active: State of high resolution mode5555+ * @deferred_rearm: A deferred rearm is pending5656+ * @deferred_needs_update: The deferred rearm must re-evaluate the first timer5757+ * @hang_detected: The last hrtimer interrupt detected a hang5858+ * @softirq_activated: displays, if the softirq is raised - update of softirq5959+ * related settings is not required then.6060+ * @nr_events: Total number of hrtimer interrupt events6161+ * @nr_retries: Total number of hrtimer interrupt retries6262+ * @nr_hangs: Total number of hrtimer interrupt hangs6363+ * @max_hang_time: Maximum time spent in hrtimer_interrupt6464+ * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are expired6565+ * @online: CPU is online from an hrtimers point of view6666+ * @timer_waiters: A hrtimer_cancel() waiters for the timer callback to finish.6767+ * @expires_next: Absolute time of the next event, is required for remote6868+ * hrtimer enqueue; it is the total first expiry time (hard6969+ * and soft hrtimer are taken into account)7070+ * @next_timer: Pointer to the first expiring timer7171+ * @softirq_expires_next: Time to check, if soft queues needs also to be expired7272+ * @softirq_next_timer: Pointer to the first expiring softirq based timer7373+ * @deferred_expires_next: Cached expires next value for deferred rearm7474+ * @clock_base: Array of clock bases for this cpu7875 *7976 * Note: next_timer is just an optimization for __remove_hrtimer().8077 * Do not dereference the pointer because it is not reliable on···8483 unsigned int cpu;8584 unsigned int active_bases;8685 unsigned int clock_was_set_seq;8787- unsigned int hres_active : 1,8888- in_hrtirq : 1,8989- hang_detected : 1,9090- softirq_activated : 1,9191- online : 1;8686+ bool hres_active;8787+ bool deferred_rearm;8888+ bool deferred_needs_update;8989+ bool hang_detected;9090+ bool softirq_activated;9191+ bool online;9292#ifdef CONFIG_HIGH_RES_TIMERS9393 unsigned int nr_events;9494 unsigned short nr_retries;···104102 struct hrtimer *next_timer;105103 ktime_t softirq_expires_next;106104 struct hrtimer *softirq_next_timer;105105+ ktime_t deferred_expires_next;107106 struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];108107 call_single_data_t csd;109108} ____cacheline_aligned;
+83
include/linux/hrtimer_rearm.h
···11+// SPDX-License-Identifier: GPL-2.022+#ifndef _LINUX_HRTIMER_REARM_H33+#define _LINUX_HRTIMER_REARM_H44+55+#ifdef CONFIG_HRTIMER_REARM_DEFERRED66+#include <linux/thread_info.h>77+88+void __hrtimer_rearm_deferred(void);99+1010+/*1111+ * This is purely CPU local, so check the TIF bit first to avoid the overhead of1212+ * the atomic test_and_clear_bit() operation for the common case where the bit1313+ * is not set.1414+ */1515+static __always_inline bool hrtimer_test_and_clear_rearm_deferred_tif(unsigned long tif_work)1616+{1717+ lockdep_assert_irqs_disabled();1818+1919+ if (unlikely(tif_work & _TIF_HRTIMER_REARM)) {2020+ clear_thread_flag(TIF_HRTIMER_REARM);2121+ return true;2222+ }2323+ return false;2424+}2525+2626+#define TIF_REARM_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | _TIF_HRTIMER_REARM)2727+2828+/* Invoked from the exit to user before invoking exit_to_user_mode_loop() */2929+static __always_inline bool3030+hrtimer_rearm_deferred_user_irq(unsigned long *tif_work, const unsigned long tif_mask)3131+{3232+ /* Help the compiler to optimize the function out for syscall returns */3333+ if (!(tif_mask & _TIF_HRTIMER_REARM))3434+ return false;3535+ /*3636+ * Rearm the timer if none of the resched flags is set before going into3737+ * the loop which re-enables interrupts.3838+ */3939+ if (unlikely((*tif_work & TIF_REARM_MASK) == _TIF_HRTIMER_REARM)) {4040+ clear_thread_flag(TIF_HRTIMER_REARM);4141+ __hrtimer_rearm_deferred();4242+ /* Don't go into the loop if HRTIMER_REARM was the only flag */4343+ *tif_work &= ~TIF_HRTIMER_REARM;4444+ return !*tif_work;4545+ }4646+ return false;4747+}4848+4949+/* Invoked from the time slice extension decision function */5050+static __always_inline void hrtimer_rearm_deferred_tif(unsigned long tif_work)5151+{5252+ if (hrtimer_test_and_clear_rearm_deferred_tif(tif_work))5353+ __hrtimer_rearm_deferred();5454+}5555+5656+/*5757+ * This is to be called on all irqentry_exit() paths that will enable5858+ * interrupts.5959+ */6060+static __always_inline void hrtimer_rearm_deferred(void)6161+{6262+ hrtimer_rearm_deferred_tif(read_thread_flags());6363+}6464+6565+/*6666+ * Invoked from the scheduler on entry to __schedule() so it can defer6767+ * rearming after the load balancing callbacks which might change hrtick.6868+ */6969+static __always_inline bool hrtimer_test_and_clear_rearm_deferred(void)7070+{7171+ return hrtimer_test_and_clear_rearm_deferred_tif(read_thread_flags());7272+}7373+7474+#else /* CONFIG_HRTIMER_REARM_DEFERRED */7575+static __always_inline void __hrtimer_rearm_deferred(void) { }7676+static __always_inline void hrtimer_rearm_deferred(void) { }7777+static __always_inline void hrtimer_rearm_deferred_tif(unsigned long tif_work) { }7878+static __always_inline bool7979+hrtimer_rearm_deferred_user_irq(unsigned long *tif_work, const unsigned long tif_mask) { return false; }8080+static __always_inline bool hrtimer_test_and_clear_rearm_deferred(void) { return false; }8181+#endif /* !CONFIG_HRTIMER_REARM_DEFERRED */8282+8383+#endif
+11-8
include/linux/hrtimer_types.h
···17171818/**1919 * struct hrtimer - the basic hrtimer structure2020- * @node: timerqueue node, which also manages node.expires,2020+ * @node: Linked timerqueue node, which also manages node.expires,2121 * the absolute expiry time in the hrtimers internal2222 * representation. The time is related to the clock on2323 * which the timer is based. Is setup by adding···2828 * was armed.2929 * @function: timer expiry callback function3030 * @base: pointer to the timer base (per cpu and per clock)3131- * @state: state information (See bit values above)3131+ * @is_queued: Indicates whether a timer is enqueued or not3232 * @is_rel: Set if the timer was armed relative3333 * @is_soft: Set if hrtimer will be expired in soft interrupt context.3434 * @is_hard: Set if hrtimer will be expired in hard interrupt context3535 * even on RT.3636+ * @is_lazy: Set if the timer is frequently rearmed to avoid updates3737+ * of the clock event device3638 *3739 * The hrtimer structure must be initialized by hrtimer_setup()3840 */3941struct hrtimer {4040- struct timerqueue_node node;4242+ struct timerqueue_linked_node node;4343+ struct hrtimer_clock_base *base;4444+ bool is_queued;4545+ bool is_rel;4646+ bool is_soft;4747+ bool is_hard;4848+ bool is_lazy;4149 ktime_t _softexpires;4250 enum hrtimer_restart (*__private function)(struct hrtimer *);4343- struct hrtimer_clock_base *base;4444- u8 state;4545- u8 is_rel;4646- u8 is_soft;4747- u8 is_hard;4851};49525053#endif /* _LINUX_HRTIMER_TYPES_H */
+19-6
include/linux/irq-entry-common.h
···33#define __LINUX_IRQENTRYCOMMON_H4455#include <linux/context_tracking.h>66+#include <linux/hrtimer_rearm.h>67#include <linux/kmsan.h>78#include <linux/rseq_entry.h>89#include <linux/static_call_types.h>···3332 _TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \3433 _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | _TIF_RSEQ | \3534 ARCH_EXIT_TO_USER_MODE_WORK)3535+3636+#ifdef CONFIG_HRTIMER_REARM_DEFERRED3737+# define EXIT_TO_USER_MODE_WORK_SYSCALL (EXIT_TO_USER_MODE_WORK)3838+# define EXIT_TO_USER_MODE_WORK_IRQ (EXIT_TO_USER_MODE_WORK | _TIF_HRTIMER_REARM)3939+#else4040+# define EXIT_TO_USER_MODE_WORK_SYSCALL (EXIT_TO_USER_MODE_WORK)4141+# define EXIT_TO_USER_MODE_WORK_IRQ (EXIT_TO_USER_MODE_WORK)4242+#endif36433744/**3845 * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs···212203/**213204 * __exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required214205 * @regs: Pointer to pt_regs on entry stack206206+ * @work_mask: Which TIF bits need to be evaluated215207 *216208 * 1) check that interrupts are disabled217209 * 2) call tick_nohz_user_enter_prepare()···222212 *223213 * Don't invoke directly, use the syscall/irqentry_ prefixed variants below224214 */225225-static __always_inline void __exit_to_user_mode_prepare(struct pt_regs *regs)215215+static __always_inline void __exit_to_user_mode_prepare(struct pt_regs *regs,216216+ const unsigned long work_mask)226217{227218 unsigned long ti_work;228219···233222 tick_nohz_user_enter_prepare();234223235224 ti_work = read_thread_flags();236236- if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))237237- ti_work = exit_to_user_mode_loop(regs, ti_work);225225+ if (unlikely(ti_work & work_mask)) {226226+ if (!hrtimer_rearm_deferred_user_irq(&ti_work, work_mask))227227+ ti_work = exit_to_user_mode_loop(regs, ti_work);228228+ }238229239230 arch_exit_to_user_mode_prepare(regs, ti_work);240231}···252239/* Temporary workaround to keep ARM64 alive */253240static __always_inline void exit_to_user_mode_prepare_legacy(struct pt_regs *regs)254241{255255- __exit_to_user_mode_prepare(regs);242242+ __exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK);256243 rseq_exit_to_user_mode_legacy();257244 __exit_to_user_mode_validate();258245}···266253 */267254static __always_inline void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)268255{269269- __exit_to_user_mode_prepare(regs);256256+ __exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK_SYSCALL);270257 rseq_syscall_exit_to_user_mode();271258 __exit_to_user_mode_validate();272259}···280267 */281268static __always_inline void irqentry_exit_to_user_mode_prepare(struct pt_regs *regs)282269{283283- __exit_to_user_mode_prepare(regs);270270+ __exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK_IRQ);284271 rseq_irqentry_exit_to_user_mode();285272 __exit_to_user_mode_validate();286273}
+72-9
include/linux/rbtree.h
···3535#define RB_CLEAR_NODE(node) \3636 ((node)->__rb_parent_color = (unsigned long)(node))37373838+#define RB_EMPTY_LINKED_NODE(lnode) RB_EMPTY_NODE(&(lnode)->node)3939+#define RB_CLEAR_LINKED_NODE(lnode) ({ \4040+ RB_CLEAR_NODE(&(lnode)->node); \4141+ (lnode)->prev = (lnode)->next = NULL; \4242+})38433944extern void rb_insert_color(struct rb_node *, struct rb_root *);4045extern void rb_erase(struct rb_node *, struct rb_root *);4141-4646+extern bool rb_erase_linked(struct rb_node_linked *, struct rb_root_linked *);42474348/* Find logical next and previous nodes in a tree */4449extern struct rb_node *rb_next(const struct rb_node *);···218213 return leftmost ? node : NULL;219214}220215221221-/**222222- * rb_add() - insert @node into @tree223223- * @node: node to insert224224- * @tree: tree to insert @node into225225- * @less: operator defining the (partial) node order226226- */227216static __always_inline void228228-rb_add(struct rb_node *node, struct rb_root *tree,229229- bool (*less)(struct rb_node *, const struct rb_node *))217217+__rb_add(struct rb_node *node, struct rb_root *tree,218218+ bool (*less)(struct rb_node *, const struct rb_node *),219219+ void (*linkop)(struct rb_node *, struct rb_node *, struct rb_node **))230220{231221 struct rb_node **link = &tree->rb_node;232222 struct rb_node *parent = NULL;···234234 link = &parent->rb_right;235235 }236236237237+ linkop(node, parent, link);237238 rb_link_node(node, parent, link);238239 rb_insert_color(node, tree);240240+}241241+242242+#define __node_2_linked_node(_n) \243243+ rb_entry((_n), struct rb_node_linked, node)244244+245245+static inline void246246+rb_link_linked_node(struct rb_node *node, struct rb_node *parent, struct rb_node **link)247247+{248248+ if (!parent)249249+ return;250250+251251+ struct rb_node_linked *nnew = __node_2_linked_node(node);252252+ struct rb_node_linked *npar = __node_2_linked_node(parent);253253+254254+ if (link == &parent->rb_left) {255255+ nnew->prev = npar->prev;256256+ nnew->next = npar;257257+ npar->prev = nnew;258258+ if (nnew->prev)259259+ nnew->prev->next = nnew;260260+ } else {261261+ nnew->next = npar->next;262262+ nnew->prev = npar;263263+ npar->next = nnew;264264+ if (nnew->next)265265+ nnew->next->prev = nnew;266266+ }267267+}268268+269269+/**270270+ * rb_add_linked() - insert @node into the leftmost linked tree @tree271271+ * @node: node to insert272272+ * @tree: linked tree to insert @node into273273+ * @less: operator defining the (partial) node order274274+ *275275+ * Returns @true when @node is the new leftmost, @false otherwise.276276+ */277277+static __always_inline bool278278+rb_add_linked(struct rb_node_linked *node, struct rb_root_linked *tree,279279+ bool (*less)(struct rb_node *, const struct rb_node *))280280+{281281+ __rb_add(&node->node, &tree->rb_root, less, rb_link_linked_node);282282+ if (!node->prev)283283+ tree->rb_leftmost = node;284284+ return !node->prev;285285+}286286+287287+/* Empty linkop function which is optimized away by the compiler */288288+static __always_inline void289289+rb_link_noop(struct rb_node *n, struct rb_node *p, struct rb_node **l) { }290290+291291+/**292292+ * rb_add() - insert @node into @tree293293+ * @node: node to insert294294+ * @tree: tree to insert @node into295295+ * @less: operator defining the (partial) node order296296+ */297297+static __always_inline void298298+rb_add(struct rb_node *node, struct rb_root *tree,299299+ bool (*less)(struct rb_node *, const struct rb_node *))300300+{301301+ __rb_add(node, tree, less, rb_link_noop);239302}240303241304/**
+16
include/linux/rbtree_types.h
···99} __attribute__((aligned(sizeof(long))));1010/* The alignment might seem pointless, but allegedly CRIS needs it */11111212+struct rb_node_linked {1313+ struct rb_node node;1414+ struct rb_node_linked *prev;1515+ struct rb_node_linked *next;1616+};1717+1218struct rb_root {1319 struct rb_node *rb_node;1420};···3428 struct rb_node *rb_leftmost;3529};36303131+/*3232+ * Leftmost tree with links. This would allow a trivial rb_rightmost update,3333+ * but that has been omitted due to the lack of users.3434+ */3535+struct rb_root_linked {3636+ struct rb_root rb_root;3737+ struct rb_node_linked *rb_leftmost;3838+};3939+3740#define RB_ROOT (struct rb_root) { NULL, }3841#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL }4242+#define RB_ROOT_LINKED (struct rb_root_linked) { {NULL, }, NULL }39434044#endif
···7272 * @id: The timekeeper ID7373 * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW7474 * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds7575+ * @cs_id: The ID of the current clocksource7676+ * @cs_ns_to_cyc_mult: Multiplicator for nanoseconds to cycles conversion7777+ * @cs_ns_to_cyc_shift: Shift value for nanoseconds to cycles conversion7878+ * @cs_ns_to_cyc_maxns: Maximum nanoseconds to cyles conversion range7579 * @clock_was_set_seq: The sequence number of clock was set events7680 * @cs_was_changed_seq: The sequence number of clocksource change events7781 * @clock_valid: Indicator for valid clock···163159 u64 raw_sec;164160165161 /* Cachline 3 and 4 (timekeeping internal variables): */162162+ enum clocksource_ids cs_id;163163+ u32 cs_ns_to_cyc_mult;164164+ u32 cs_ns_to_cyc_shift;165165+ u64 cs_ns_to_cyc_maxns;166166 unsigned int clock_was_set_seq;167167 u8 cs_was_changed_seq;168168 u8 clock_valid;
···218218 * hrtimer_start - called when the hrtimer is started219219 * @hrtimer: pointer to struct hrtimer220220 * @mode: the hrtimers mode221221+ * @was_armed: Was armed when hrtimer_start*() was invoked221222 */222223TRACE_EVENT(hrtimer_start,223224224224- TP_PROTO(struct hrtimer *hrtimer, enum hrtimer_mode mode),225225+ TP_PROTO(struct hrtimer *hrtimer, enum hrtimer_mode mode, bool was_armed),225226226226- TP_ARGS(hrtimer, mode),227227+ TP_ARGS(hrtimer, mode, was_armed),227228228229 TP_STRUCT__entry(229230 __field( void *, hrtimer )···232231 __field( s64, expires )233232 __field( s64, softexpires )234233 __field( enum hrtimer_mode, mode )234234+ __field( bool, was_armed )235235 ),236236237237 TP_fast_assign(···241239 __entry->expires = hrtimer_get_expires(hrtimer);242240 __entry->softexpires = hrtimer_get_softexpires(hrtimer);243241 __entry->mode = mode;242242+ __entry->was_armed = was_armed;244243 ),245244246245 TP_printk("hrtimer=%p function=%ps expires=%llu softexpires=%llu "247247- "mode=%s", __entry->hrtimer, __entry->function,246246+ "mode=%s was_armed=%d", __entry->hrtimer, __entry->function,248247 (unsigned long long) __entry->expires,249248 (unsigned long long) __entry->softexpires,250250- decode_hrtimer_mode(__entry->mode))249249+ decode_hrtimer_mode(__entry->mode), __entry->was_armed)251250);252251253252/**···322319 TP_PROTO(struct hrtimer *hrtimer),323320324321 TP_ARGS(hrtimer)322322+);323323+324324+/**325325+ * hrtimer_rearm - Invoked when the clockevent device is rearmed326326+ * @next_event: The next expiry time (CLOCK_MONOTONIC)327327+ */328328+TRACE_EVENT(hrtimer_rearm,329329+330330+ TP_PROTO(ktime_t next_event, bool deferred),331331+332332+ TP_ARGS(next_event, deferred),333333+334334+ TP_STRUCT__entry(335335+ __field( s64, next_event )336336+ __field( bool, deferred )337337+ ),338338+339339+ TP_fast_assign(340340+ __entry->next_event = next_event;341341+ __entry->deferred = deferred;342342+ ),343343+344344+ TP_printk("next_event=%llu deferred=%d",345345+ (unsigned long long) __entry->next_event, __entry->deferred)325346);326347327348/**
+3-1
kernel/entry/common.c
···5050 local_irq_enable_exit_to_user(ti_work);51515252 if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) {5353- if (!rseq_grant_slice_extension(ti_work & TIF_SLICE_EXT_DENY))5353+ if (!rseq_grant_slice_extension(ti_work, TIF_SLICE_EXT_DENY))5454 schedule();5555 }5656···225225 */226226 if (state.exit_rcu) {227227 instrumentation_begin();228228+ hrtimer_rearm_deferred();228229 /* Tell the tracer that IRET will enable interrupts */229230 trace_hardirqs_on_prepare();230231 lockdep_hardirqs_on_prepare();···239238 if (IS_ENABLED(CONFIG_PREEMPTION))240239 irqentry_exit_cond_resched();241240241241+ hrtimer_rearm_deferred();242242 /* Covers both tracing and lockdep */243243 trace_hardirqs_on();244244 instrumentation_end();
+77-18
kernel/sched/core.c
···872872 * Use HR-timers to deliver accurate preemption points.873873 */874874875875-static void hrtick_clear(struct rq *rq)875875+enum {876876+ HRTICK_SCHED_NONE = 0,877877+ HRTICK_SCHED_DEFER = BIT(1),878878+ HRTICK_SCHED_START = BIT(2),879879+ HRTICK_SCHED_REARM_HRTIMER = BIT(3)880880+};881881+882882+static void __used hrtick_clear(struct rq *rq)876883{877884 if (hrtimer_active(&rq->hrtick_timer))878885 hrtimer_cancel(&rq->hrtick_timer);···904897 return HRTIMER_NORESTART;905898}906899907907-static void __hrtick_restart(struct rq *rq)900900+static inline bool hrtick_needs_rearm(struct hrtimer *timer, ktime_t expires)901901+{902902+ /*903903+ * Queued is false when the timer is not started or currently904904+ * running the callback. In both cases, restart. If queued check905905+ * whether the expiry time actually changes substantially.906906+ */907907+ return !hrtimer_is_queued(timer) ||908908+ abs(expires - hrtimer_get_expires(timer)) > 5000;909909+}910910+911911+static void hrtick_cond_restart(struct rq *rq)908912{909913 struct hrtimer *timer = &rq->hrtick_timer;910914 ktime_t time = rq->hrtick_time;911915912912- hrtimer_start(timer, time, HRTIMER_MODE_ABS_PINNED_HARD);916916+ if (hrtick_needs_rearm(timer, time))917917+ hrtimer_start(timer, time, HRTIMER_MODE_ABS_PINNED_HARD);913918}914919915920/*···933914 struct rq_flags rf;934915935916 rq_lock(rq, &rf);936936- __hrtick_restart(rq);917917+ hrtick_cond_restart(rq);937918 rq_unlock(rq, &rf);938919}939920···944925 */945926void hrtick_start(struct rq *rq, u64 delay)946927{947947- struct hrtimer *timer = &rq->hrtick_timer;948928 s64 delta;949929950930 /*···951933 * doesn't make sense and can cause timer DoS.952934 */953935 delta = max_t(s64, delay, 10000LL);954954- rq->hrtick_time = ktime_add_ns(hrtimer_cb_get_time(timer), delta);936936+937937+ /*938938+ * If this is in the middle of schedule() only note the delay939939+ * and let hrtick_schedule_exit() deal with it.940940+ */941941+ if (rq->hrtick_sched) {942942+ rq->hrtick_sched |= HRTICK_SCHED_START;943943+ rq->hrtick_delay = delta;944944+ return;945945+ }946946+947947+ rq->hrtick_time = ktime_add_ns(ktime_get(), delta);948948+ if (!hrtick_needs_rearm(&rq->hrtick_timer, rq->hrtick_time))949949+ return;955950956951 if (rq == this_rq())957957- __hrtick_restart(rq);952952+ hrtimer_start(&rq->hrtick_timer, rq->hrtick_time, HRTIMER_MODE_ABS_PINNED_HARD);958953 else959954 smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd);955955+}956956+957957+static inline void hrtick_schedule_enter(struct rq *rq)958958+{959959+ rq->hrtick_sched = HRTICK_SCHED_DEFER;960960+ if (hrtimer_test_and_clear_rearm_deferred())961961+ rq->hrtick_sched |= HRTICK_SCHED_REARM_HRTIMER;962962+}963963+964964+static inline void hrtick_schedule_exit(struct rq *rq)965965+{966966+ if (rq->hrtick_sched & HRTICK_SCHED_START) {967967+ rq->hrtick_time = ktime_add_ns(ktime_get(), rq->hrtick_delay);968968+ hrtick_cond_restart(rq);969969+ } else if (idle_rq(rq)) {970970+ /*971971+ * No need for using hrtimer_is_active(). The timer is CPU local972972+ * and interrupts are disabled, so the callback cannot be973973+ * running and the queued state is valid.974974+ */975975+ if (hrtimer_is_queued(&rq->hrtick_timer))976976+ hrtimer_cancel(&rq->hrtick_timer);977977+ }978978+979979+ if (rq->hrtick_sched & HRTICK_SCHED_REARM_HRTIMER)980980+ __hrtimer_rearm_deferred();981981+982982+ rq->hrtick_sched = HRTICK_SCHED_NONE;960983}961984962985static void hrtick_rq_init(struct rq *rq)963986{964987 INIT_CSD(&rq->hrtick_csd, __hrtick_start, rq);965965- hrtimer_setup(&rq->hrtick_timer, hrtick, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);988988+ rq->hrtick_sched = HRTICK_SCHED_NONE;989989+ hrtimer_setup(&rq->hrtick_timer, hrtick, CLOCK_MONOTONIC,990990+ HRTIMER_MODE_REL_HARD | HRTIMER_MODE_LAZY_REARM);966991}967992#else /* !CONFIG_SCHED_HRTICK: */968968-static inline void hrtick_clear(struct rq *rq)969969-{970970-}971971-972972-static inline void hrtick_rq_init(struct rq *rq)973973-{974974-}993993+static inline void hrtick_clear(struct rq *rq) { }994994+static inline void hrtick_rq_init(struct rq *rq) { }995995+static inline void hrtick_schedule_enter(struct rq *rq) { }996996+static inline void hrtick_schedule_exit(struct rq *rq) { }975997#endif /* !CONFIG_SCHED_HRTICK */976998977999/*···50875029 */50885030 spin_acquire(&__rq_lockp(rq)->dep_map, 0, 0, _THIS_IP_);50895031 __balance_callbacks(rq, NULL);50325032+ hrtick_schedule_exit(rq);50905033 raw_spin_rq_unlock_irq(rq);50915034}50925035···6841678268426783 schedule_debug(prev, preempt);6843678468446844- if (sched_feat(HRTICK) || sched_feat(HRTICK_DL))68456845- hrtick_clear(rq);68466846-68476785 klp_sched_try_switch(prev);6848678668496787 local_irq_disable();···68666810 */68676811 rq_lock(rq, &rf);68686812 smp_mb__after_spinlock();68136813+68146814+ hrtick_schedule_enter(rq);6869681568706816 /* Promote REQ to ACT */68716817 rq->clock_update_flags <<= 1;···6970691269716913 rq_unpin_lock(rq, &rf);69726914 __balance_callbacks(rq, NULL);69156915+ hrtick_schedule_exit(rq);69736916 raw_spin_rq_unlock_irq(rq);69746917 }69756918 trace_sched_exit_tp(is_switch);
+1-1
kernel/sched/deadline.c
···10971097 act = ns_to_ktime(dl_next_period(dl_se));10981098 }1099109911001100- now = hrtimer_cb_get_time(timer);11001100+ now = ktime_get();11011101 delta = ktime_to_ns(now) - rq_clock(rq);11021102 act = ktime_add_ns(act, delta);11031103
+32-23
kernel/sched/fair.c
···55305530 * validating it and just reschedule.55315531 */55325532 if (queued) {55335533- resched_curr_lazy(rq_of(cfs_rq));55335533+ resched_curr(rq_of(cfs_rq));55345534 return;55355535 }55365536#endif···67356735static void hrtick_start_fair(struct rq *rq, struct task_struct *p)67366736{67376737 struct sched_entity *se = &p->se;67386738+ unsigned long scale = 1024;67396739+ unsigned long util = 0;67406740+ u64 vdelta;67416741+ u64 delta;6738674267396743 WARN_ON_ONCE(task_rq(p) != rq);6740674467416741- if (rq->cfs.h_nr_queued > 1) {67426742- u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;67436743- u64 slice = se->slice;67446744- s64 delta = slice - ran;67456745+ if (rq->cfs.h_nr_queued <= 1)67466746+ return;6745674767466746- if (delta < 0) {67476747- if (task_current_donor(rq, p))67486748- resched_curr(rq);67496749- return;67506750- }67516751- hrtick_start(rq, delta);67486748+ /*67496749+ * Compute time until virtual deadline67506750+ */67516751+ vdelta = se->deadline - se->vruntime;67526752+ if ((s64)vdelta < 0) {67536753+ if (task_current_donor(rq, p))67546754+ resched_curr(rq);67556755+ return;67526756 }67576757+ delta = (se->load.weight * vdelta) / NICE_0_LOAD;67586758+67596759+ /*67606760+ * Correct for instantaneous load of other classes.67616761+ */67626762+ util += cpu_util_irq(rq);67636763+ if (util && util < 1024) {67646764+ scale *= 1024;67656765+ scale /= (1024 - util);67666766+ }67676767+67686768+ hrtick_start(rq, (scale * delta) / 1024);67536769}6754677067556771/*67566756- * called from enqueue/dequeue and updates the hrtick when the67576757- * current task is from our class and nr_running is low enough67586758- * to matter.67726772+ * Called on enqueue to start the hrtick when h_nr_queued becomes more than 1.67596773 */67606774static void hrtick_update(struct rq *rq)67616775{67626776 struct task_struct *donor = rq->donor;6763677767646778 if (!hrtick_enabled_fair(rq) || donor->sched_class != &fair_sched_class)67796779+ return;67806780+67816781+ if (hrtick_active(rq))67656782 return;6766678367676784 hrtick_start_fair(rq, donor);···71037086 WARN_ON_ONCE(!task_sleep);71047087 WARN_ON_ONCE(p->on_rq != 1);7105708871067106- /* Fix-up what dequeue_task_fair() skipped */71077107- hrtick_update(rq);71087108-71097089 /*71107090 * Fix-up what block_task() skipped.71117091 *···71367122 /*71377123 * Must not reference @p after dequeue_entities(DEQUEUE_DELAYED).71387124 */71397139-71407140- hrtick_update(rq);71417125 return true;71427126}71437127···1337713365 entity_tick(cfs_rq, se, queued);1337813366 }13379133671338013380- if (queued) {1338113381- if (!need_resched())1338213382- hrtick_start_fair(rq, curr);1336813368+ if (queued)1338313369 return;1338413384- }13385133701338613371 if (static_branch_unlikely(&sched_numa_balancing))1338713372 task_tick_numa(rq, curr);
+5
kernel/sched/features.h
···6363 */6464SCHED_FEAT(WAKEUP_PREEMPTION, true)65656666+#ifdef CONFIG_HRTIMER_REARM_DEFERRED6767+SCHED_FEAT(HRTICK, true)6868+SCHED_FEAT(HRTICK_DL, true)6969+#else6670SCHED_FEAT(HRTICK, false)6771SCHED_FEAT(HRTICK_DL, false)7272+#endif68736974/*7075 * Decrement CPU capacity based on time not spent running tasks
···663663{664664 __irq_enter_raw();665665666666+ /*667667+ * If this is a nested interrupt that hits the exit_to_user_mode_loop668668+ * where it has enabled interrupts but before it has hit schedule() we669669+ * could have hrtimers in an undefined state. Fix it up here.670670+ */671671+ hrtimer_rearm_deferred();672672+666673 if (tick_nohz_full_cpu(smp_processor_id()) ||667674 (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET)))668675 tick_irq_enter();···726719#endif727720 account_hardirq_exit(current);728721 preempt_count_sub(HARDIRQ_OFFSET);729729- if (!in_interrupt() && local_softirq_pending())722722+ if (!in_interrupt() && local_softirq_pending()) {723723+ /*724724+ * If we left hrtimers unarmed, make sure to arm them now,725725+ * before enabling interrupts to run SoftIRQ.726726+ */727727+ hrtimer_rearm_deferred();730728 invoke_softirq();729729+ }731730732731 if (IS_ENABLED(CONFIG_IRQ_FORCED_THREADING) && force_irqthreads() &&733732 local_timers_pending_force_th() && !(in_nmi() | in_hardirq()))
+16
kernel/time/Kconfig
···1717config ARCH_CLOCKSOURCE_INIT1818 bool19192020+config ARCH_WANTS_CLOCKSOURCE_READ_INLINE2121+ bool2222+2023# Timekeeping vsyscall support2124config GENERIC_TIME_VSYSCALL2225 bool···4744config GENERIC_CLOCKEVENTS_MIN_ADJUST4845 bool49464747+config GENERIC_CLOCKEVENTS_COUPLED4848+ bool4949+5050+config GENERIC_CLOCKEVENTS_COUPLED_INLINE5151+ select GENERIC_CLOCKEVENTS_COUPLED5252+ bool5353+5054# Generic update of CMOS clock5155config GENERIC_CMOS_UPDATE5256 bool5757+5858+# Deferred rearming of the hrtimer interrupt5959+config HRTIMER_REARM_DEFERRED6060+ def_bool y6161+ depends on GENERIC_ENTRY && HAVE_GENERIC_TIF_BITS6262+ depends on HIGH_RES_TIMERS && SCHED_HRTICK53635464# Select to handle posix CPU timers from task_work5565# and not from the timer interrupt context
+41-7
kernel/time/clockevents.c
···292292293293#endif /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */294294295295+#ifdef CONFIG_GENERIC_CLOCKEVENTS_COUPLED296296+#ifdef CONFIG_GENERIC_CLOCKEVENTS_COUPLED_INLINE297297+#include <asm/clock_inlined.h>298298+#else299299+static __always_inline void300300+arch_inlined_clockevent_set_next_coupled(u64 u64 cycles, struct clock_event_device *dev) { }301301+#endif302302+303303+static inline bool clockevent_set_next_coupled(struct clock_event_device *dev, ktime_t expires)304304+{305305+ u64 cycles;306306+307307+ if (unlikely(!(dev->features & CLOCK_EVT_FEAT_CLOCKSOURCE_COUPLED)))308308+ return false;309309+310310+ if (unlikely(!ktime_expiry_to_cycles(dev->cs_id, expires, &cycles)))311311+ return false;312312+313313+ if (IS_ENABLED(CONFIG_GENERIC_CLOCKEVENTS_COUPLED_INLINE))314314+ arch_inlined_clockevent_set_next_coupled(cycles, dev);315315+ else316316+ dev->set_next_coupled(cycles, dev);317317+ return true;318318+}319319+320320+#else321321+static inline bool clockevent_set_next_coupled(struct clock_event_device *dev, ktime_t expires)322322+{323323+ return false;324324+}325325+#endif326326+295327/**296328 * clockevents_program_event - Reprogram the clock event device.297329 * @dev: device to program···332300 *333301 * Returns 0 on success, -ETIME when the event is in the past.334302 */335335-int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,336336- bool force)303303+int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, bool force)337304{338338- unsigned long long clc;339305 int64_t delta;306306+ u64 cycles;340307 int rc;341308342309 if (WARN_ON_ONCE(expires < 0))···350319 WARN_ONCE(!clockevent_state_oneshot(dev), "Current state: %d\n",351320 clockevent_get_state(dev));352321353353- /* Shortcut for clockevent devices that can deal with ktime. */354354- if (dev->features & CLOCK_EVT_FEAT_KTIME)322322+ /* ktime_t based reprogramming for the broadcast hrtimer device */323323+ if (unlikely(dev->features & CLOCK_EVT_FEAT_HRTIMER))355324 return dev->set_next_ktime(expires, dev);325325+326326+ if (likely(clockevent_set_next_coupled(dev, expires)))327327+ return 0;356328357329 delta = ktime_to_ns(ktime_sub(expires, ktime_get()));358330 if (delta <= 0)···364330 delta = min(delta, (int64_t) dev->max_delta_ns);365331 delta = max(delta, (int64_t) dev->min_delta_ns);366332367367- clc = ((unsigned long long) delta * dev->mult) >> dev->shift;368368- rc = dev->set_next_event((unsigned long) clc, dev);333333+ cycles = ((u64)delta * dev->mult) >> dev->shift;334334+ rc = dev->set_next_event((unsigned long) cycles, dev);369335370336 return (rc && force) ? clockevents_program_min_delta(dev) : rc;371337}
···5050#include "tick-internal.h"51515252/*5353+ * Constants to set the queued state of the timer (INACTIVE, ENQUEUED)5454+ *5555+ * The callback state is kept separate in the CPU base because having it in5656+ * the timer would required touching the timer after the callback, which5757+ * makes it impossible to free the timer from the callback function.5858+ *5959+ * Therefore we track the callback state in:6060+ *6161+ * timer->base->cpu_base->running == timer6262+ *6363+ * On SMP it is possible to have a "callback function running and enqueued"6464+ * status. It happens for example when a posix timer expired and the callback6565+ * queued a signal. Between dropping the lock which protects the posix timer6666+ * and reacquiring the base lock of the hrtimer, another CPU can deliver the6767+ * signal and rearm the timer.6868+ *6969+ * All state transitions are protected by cpu_base->lock.7070+ */7171+#define HRTIMER_STATE_INACTIVE false7272+#define HRTIMER_STATE_ENQUEUED true7373+7474+/*5375 * The resolution of the clocks. The resolution value is returned in5476 * the clock_getres() system call to give application programmers an5577 * idea of the (in)accuracy of timers. Timer values are rounded up to···9977 * to reach a base using a clockid, hrtimer_clockid_to_base()10078 * is used to convert from clockid to the proper hrtimer_base_type.10179 */8080+8181+#define BASE_INIT(idx, cid) \8282+ [idx] = { .index = idx, .clockid = cid }8383+10284DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =10385{10486 .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),105105- .clock_base =106106- {107107- {108108- .index = HRTIMER_BASE_MONOTONIC,109109- .clockid = CLOCK_MONOTONIC,110110- },111111- {112112- .index = HRTIMER_BASE_REALTIME,113113- .clockid = CLOCK_REALTIME,114114- },115115- {116116- .index = HRTIMER_BASE_BOOTTIME,117117- .clockid = CLOCK_BOOTTIME,118118- },119119- {120120- .index = HRTIMER_BASE_TAI,121121- .clockid = CLOCK_TAI,122122- },123123- {124124- .index = HRTIMER_BASE_MONOTONIC_SOFT,125125- .clockid = CLOCK_MONOTONIC,126126- },127127- {128128- .index = HRTIMER_BASE_REALTIME_SOFT,129129- .clockid = CLOCK_REALTIME,130130- },131131- {132132- .index = HRTIMER_BASE_BOOTTIME_SOFT,133133- .clockid = CLOCK_BOOTTIME,134134- },135135- {136136- .index = HRTIMER_BASE_TAI_SOFT,137137- .clockid = CLOCK_TAI,138138- },8787+ .clock_base = {8888+ BASE_INIT(HRTIMER_BASE_MONOTONIC, CLOCK_MONOTONIC),8989+ BASE_INIT(HRTIMER_BASE_REALTIME, CLOCK_REALTIME),9090+ BASE_INIT(HRTIMER_BASE_BOOTTIME, CLOCK_BOOTTIME),9191+ BASE_INIT(HRTIMER_BASE_TAI, CLOCK_TAI),9292+ BASE_INIT(HRTIMER_BASE_MONOTONIC_SOFT, CLOCK_MONOTONIC),9393+ BASE_INIT(HRTIMER_BASE_REALTIME_SOFT, CLOCK_REALTIME),9494+ BASE_INIT(HRTIMER_BASE_BOOTTIME_SOFT, CLOCK_BOOTTIME),9595+ BASE_INIT(HRTIMER_BASE_TAI_SOFT, CLOCK_TAI),13996 },14097 .csd = CSD_INIT(retrigger_next_event, NULL)14198};···127126 return likely(base->online);128127}129128129129+#ifdef CONFIG_HIGH_RES_TIMERS130130+DEFINE_STATIC_KEY_FALSE(hrtimer_highres_enabled_key);131131+132132+static void hrtimer_hres_workfn(struct work_struct *work)133133+{134134+ static_branch_enable(&hrtimer_highres_enabled_key);135135+}136136+137137+static DECLARE_WORK(hrtimer_hres_work, hrtimer_hres_workfn);138138+139139+static inline void hrtimer_schedule_hres_work(void)140140+{141141+ if (!hrtimer_highres_enabled())142142+ schedule_work(&hrtimer_hres_work);143143+}144144+#else145145+static inline void hrtimer_schedule_hres_work(void) { }146146+#endif147147+130148/*131149 * Functions and macros which are different for UP/SMP systems are kept in a132150 * single place133151 */134152#ifdef CONFIG_SMP135135-136153/*137154 * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base()138155 * such that hrtimer_callback_running() can unconditionally dereference139156 * timer->base->cpu_base140157 */141158static struct hrtimer_cpu_base migration_cpu_base = {142142- .clock_base = { {143143- .cpu_base = &migration_cpu_base,144144- .seq = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq,145145- &migration_cpu_base.lock),146146- }, },159159+ .clock_base = {160160+ [0] = {161161+ .cpu_base = &migration_cpu_base,162162+ .seq = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq,163163+ &migration_cpu_base.lock),164164+ },165165+ },147166};148167149168#define migration_base migration_cpu_base.clock_base[0]···180159 * possible to set timer->base = &migration_base and drop the lock: the timer181160 * remains locked.182161 */183183-static184184-struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,185185- unsigned long *flags)162162+static struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,163163+ unsigned long *flags)186164 __acquires(&timer->base->lock)187165{188188- struct hrtimer_clock_base *base;189189-190166 for (;;) {191191- base = READ_ONCE(timer->base);167167+ struct hrtimer_clock_base *base = READ_ONCE(timer->base);168168+192169 if (likely(base != &migration_base)) {193170 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);194171 if (likely(base == timer->base))···239220 return expires >= new_base->cpu_base->expires_next;240221}241222242242-static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, int pinned)223223+static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, bool pinned)243224{244225 if (!hrtimer_base_is_online(base)) {245226 int cpu = cpumask_any_and(cpu_online_mask, housekeeping_cpumask(HK_TYPE_TIMER));···267248 * the timer callback is currently running.268249 */269250static inline struct hrtimer_clock_base *270270-switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,271271- int pinned)251251+switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, bool pinned)272252{273253 struct hrtimer_cpu_base *new_cpu_base, *this_cpu_base;274254 struct hrtimer_clock_base *new_base;···280262281263 if (base != new_base) {282264 /*283283- * We are trying to move timer to new_base.284284- * However we can't change timer's base while it is running,285285- * so we keep it on the same CPU. No hassle vs. reprogramming286286- * the event source in the high resolution case. The softirq287287- * code will take care of this when the timer function has288288- * completed. There is no conflict as we hold the lock until289289- * the timer is enqueued.265265+ * We are trying to move timer to new_base. However we can't266266+ * change timer's base while it is running, so we keep it on267267+ * the same CPU. No hassle vs. reprogramming the event source268268+ * in the high resolution case. The remote CPU will take care269269+ * of this when the timer function has completed. There is no270270+ * conflict as we hold the lock until the timer is enqueued.290271 */291272 if (unlikely(hrtimer_callback_running(timer)))292273 return base;···295278 raw_spin_unlock(&base->cpu_base->lock);296279 raw_spin_lock(&new_base->cpu_base->lock);297280298298- if (!hrtimer_suitable_target(timer, new_base, new_cpu_base,299299- this_cpu_base)) {281281+ if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, this_cpu_base)) {300282 raw_spin_unlock(&new_base->cpu_base->lock);301283 raw_spin_lock(&base->cpu_base->lock);302284 new_cpu_base = this_cpu_base;···314298315299#else /* CONFIG_SMP */316300317317-static inline struct hrtimer_clock_base *318318-lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)301301+static inline struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,302302+ unsigned long *flags)319303 __acquires(&timer->base->cpu_base->lock)320304{321305 struct hrtimer_clock_base *base = timer->base;322306323307 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);324324-325308 return base;326309}327310···437422 }438423}439424425425+/* Stub timer callback for improperly used timers. */426426+static enum hrtimer_restart stub_timer(struct hrtimer *unused)427427+{428428+ WARN_ON_ONCE(1);429429+ return HRTIMER_NORESTART;430430+}431431+432432+/*433433+ * hrtimer_fixup_assert_init is called when:434434+ * - an untracked/uninit-ed object is found435435+ */436436+static bool hrtimer_fixup_assert_init(void *addr, enum debug_obj_state state)437437+{438438+ struct hrtimer *timer = addr;439439+440440+ switch (state) {441441+ case ODEBUG_STATE_NOTAVAILABLE:442442+ hrtimer_setup(timer, stub_timer, CLOCK_MONOTONIC, 0);443443+ return true;444444+ default:445445+ return false;446446+ }447447+}448448+440449static const struct debug_obj_descr hrtimer_debug_descr = {441441- .name = "hrtimer",442442- .debug_hint = hrtimer_debug_hint,443443- .fixup_init = hrtimer_fixup_init,444444- .fixup_activate = hrtimer_fixup_activate,445445- .fixup_free = hrtimer_fixup_free,450450+ .name = "hrtimer",451451+ .debug_hint = hrtimer_debug_hint,452452+ .fixup_init = hrtimer_fixup_init,453453+ .fixup_activate = hrtimer_fixup_activate,454454+ .fixup_free = hrtimer_fixup_free,455455+ .fixup_assert_init = hrtimer_fixup_assert_init,446456};447457448458static inline void debug_hrtimer_init(struct hrtimer *timer)···480440 debug_object_init_on_stack(timer, &hrtimer_debug_descr);481441}482442483483-static inline void debug_hrtimer_activate(struct hrtimer *timer,484484- enum hrtimer_mode mode)443443+static inline void debug_hrtimer_activate(struct hrtimer *timer, enum hrtimer_mode mode)485444{486445 debug_object_activate(timer, &hrtimer_debug_descr);487446}···488449static inline void debug_hrtimer_deactivate(struct hrtimer *timer)489450{490451 debug_object_deactivate(timer, &hrtimer_debug_descr);452452+}453453+454454+static inline void debug_hrtimer_assert_init(struct hrtimer *timer)455455+{456456+ debug_object_assert_init(timer, &hrtimer_debug_descr);491457}492458493459void destroy_hrtimer_on_stack(struct hrtimer *timer)···505461506462static inline void debug_hrtimer_init(struct hrtimer *timer) { }507463static inline void debug_hrtimer_init_on_stack(struct hrtimer *timer) { }508508-static inline void debug_hrtimer_activate(struct hrtimer *timer,509509- enum hrtimer_mode mode) { }464464+static inline void debug_hrtimer_activate(struct hrtimer *timer, enum hrtimer_mode mode) { }510465static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }466466+static inline void debug_hrtimer_assert_init(struct hrtimer *timer) { }511467#endif512468513469static inline void debug_setup(struct hrtimer *timer, clockid_t clockid, enum hrtimer_mode mode)···523479 trace_hrtimer_setup(timer, clockid, mode);524480}525481526526-static inline void debug_activate(struct hrtimer *timer,527527- enum hrtimer_mode mode)482482+static inline void debug_activate(struct hrtimer *timer, enum hrtimer_mode mode, bool was_armed)528483{529484 debug_hrtimer_activate(timer, mode);530530- trace_hrtimer_start(timer, mode);485485+ trace_hrtimer_start(timer, mode, was_armed);531486}532487533533-static inline void debug_deactivate(struct hrtimer *timer)488488+#define for_each_active_base(base, cpu_base, active) \489489+ for (unsigned int idx = ffs(active); idx--; idx = ffs((active))) \490490+ for (bool done = false; !done; active &= ~(1U << idx)) \491491+ for (base = &cpu_base->clock_base[idx]; !done; done = true)492492+493493+#if defined(CONFIG_NO_HZ_COMMON)494494+/*495495+ * Same as hrtimer_bases_next_event() below, but skips the excluded timer and496496+ * does not update cpu_base->next_timer/expires.497497+ */498498+static ktime_t hrtimer_bases_next_event_without(struct hrtimer_cpu_base *cpu_base,499499+ const struct hrtimer *exclude,500500+ unsigned int active, ktime_t expires_next)534501{535535- debug_hrtimer_deactivate(timer);536536- trace_hrtimer_cancel(timer);537537-}502502+ struct hrtimer_clock_base *base;503503+ ktime_t expires;538504539539-static struct hrtimer_clock_base *540540-__next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active)505505+ lockdep_assert_held(&cpu_base->lock);506506+507507+ for_each_active_base(base, cpu_base, active) {508508+ expires = ktime_sub(base->expires_next, base->offset);509509+ if (expires >= expires_next)510510+ continue;511511+512512+ /*513513+ * If the excluded timer is the first on this base evaluate the514514+ * next timer.515515+ */516516+ struct timerqueue_linked_node *node = timerqueue_linked_first(&base->active);517517+518518+ if (unlikely(&exclude->node == node)) {519519+ node = timerqueue_linked_next(node);520520+ if (!node)521521+ continue;522522+ expires = ktime_sub(node->expires, base->offset);523523+ if (expires >= expires_next)524524+ continue;525525+ }526526+ expires_next = expires;527527+ }528528+ /* If base->offset changed, the result might be negative */529529+ return max(expires_next, 0);530530+}531531+#endif532532+533533+static __always_inline struct hrtimer *clock_base_next_timer(struct hrtimer_clock_base *base)541534{542542- unsigned int idx;535535+ struct timerqueue_linked_node *next = timerqueue_linked_first(&base->active);543536544544- if (!*active)545545- return NULL;546546-547547- idx = __ffs(*active);548548- *active &= ~(1U << idx);549549-550550- return &cpu_base->clock_base[idx];537537+ return container_of(next, struct hrtimer, node);551538}552539553553-#define for_each_active_base(base, cpu_base, active) \554554- while ((base = __next_base((cpu_base), &(active))))555555-556556-static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,557557- const struct hrtimer *exclude,558558- unsigned int active,559559- ktime_t expires_next)540540+/* Find the base with the earliest expiry */541541+static void hrtimer_bases_first(struct hrtimer_cpu_base *cpu_base,unsigned int active,542542+ ktime_t *expires_next, struct hrtimer **next_timer)560543{561544 struct hrtimer_clock_base *base;562545 ktime_t expires;563546564547 for_each_active_base(base, cpu_base, active) {565565- struct timerqueue_node *next;566566- struct hrtimer *timer;567567-568568- next = timerqueue_getnext(&base->active);569569- timer = container_of(next, struct hrtimer, node);570570- if (timer == exclude) {571571- /* Get to the next timer in the queue. */572572- next = timerqueue_iterate_next(next);573573- if (!next)574574- continue;575575-576576- timer = container_of(next, struct hrtimer, node);577577- }578578- expires = ktime_sub(hrtimer_get_expires(timer), base->offset);579579- if (expires < expires_next) {580580- expires_next = expires;581581-582582- /* Skip cpu_base update if a timer is being excluded. */583583- if (exclude)584584- continue;585585-586586- if (timer->is_soft)587587- cpu_base->softirq_next_timer = timer;588588- else589589- cpu_base->next_timer = timer;548548+ expires = ktime_sub(base->expires_next, base->offset);549549+ if (expires < *expires_next) {550550+ *expires_next = expires;551551+ *next_timer = clock_base_next_timer(base);590552 }591553 }592592- /*593593- * clock_was_set() might have changed base->offset of any of594594- * the clock bases so the result might be negative. Fix it up595595- * to prevent a false positive in clockevents_program_event().596596- */597597- if (expires_next < 0)598598- expires_next = 0;599599- return expires_next;600554}601555602556/*···617575 * - HRTIMER_ACTIVE_SOFT, or618576 * - HRTIMER_ACTIVE_HARD.619577 */620620-static ktime_t621621-__hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask)578578+static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask)622579{623623- unsigned int active;624580 struct hrtimer *next_timer = NULL;625581 ktime_t expires_next = KTIME_MAX;582582+ unsigned int active;583583+584584+ lockdep_assert_held(&cpu_base->lock);626585627586 if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) {628587 active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;629629- cpu_base->softirq_next_timer = NULL;630630- expires_next = __hrtimer_next_event_base(cpu_base, NULL,631631- active, KTIME_MAX);632632-633633- next_timer = cpu_base->softirq_next_timer;588588+ if (active)589589+ hrtimer_bases_first(cpu_base, active, &expires_next, &next_timer);590590+ cpu_base->softirq_next_timer = next_timer;634591 }635592636593 if (active_mask & HRTIMER_ACTIVE_HARD) {637594 active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;595595+ if (active)596596+ hrtimer_bases_first(cpu_base, active, &expires_next, &next_timer);638597 cpu_base->next_timer = next_timer;639639- expires_next = __hrtimer_next_event_base(cpu_base, NULL, active,640640- expires_next);641598 }642642-643643- return expires_next;599599+ return max(expires_next, 0);644600}645601646602static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base)···678638 ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;679639 ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;680640681681- ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq,682682- offs_real, offs_boot, offs_tai);641641+ ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq, offs_real,642642+ offs_boot, offs_tai);683643684644 base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real;685645 base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot;···689649}690650691651/*692692- * Is the high resolution mode active ?652652+ * Is the high resolution mode active in the CPU base. This cannot use the653653+ * static key as the CPUs are switched to high resolution mode654654+ * asynchronously.693655 */694656static inline int hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)695657{···699657 cpu_base->hres_active : 0;700658}701659702702-static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base,703703- struct hrtimer *next_timer,660660+static inline void hrtimer_rearm_event(ktime_t expires_next, bool deferred)661661+{662662+ trace_hrtimer_rearm(expires_next, deferred);663663+ tick_program_event(expires_next, 1);664664+}665665+666666+static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, struct hrtimer *next_timer,704667 ktime_t expires_next)705668{706669 cpu_base->expires_next = expires_next;···730683 if (!hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)731684 return;732685733733- tick_program_event(expires_next, 1);686686+ hrtimer_rearm_event(expires_next, false);734687}735688736736-/*737737- * Reprogram the event source with checking both queues for the738738- * next event739739- * Called with interrupts disabled and base->lock held740740- */741741-static void742742-hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)689689+/* Reprogram the event source with a evaluation of all clock bases */690690+static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, bool skip_equal)743691{744744- ktime_t expires_next;745745-746746- expires_next = hrtimer_update_next_event(cpu_base);692692+ ktime_t expires_next = hrtimer_update_next_event(cpu_base);747693748694 if (skip_equal && expires_next == cpu_base->expires_next)749695 return;···747707/* High resolution timer related functions */748708#ifdef CONFIG_HIGH_RES_TIMERS749709750750-/*751751- * High resolution timer enabled ?752752- */710710+/* High resolution timer enabled ? */753711static bool hrtimer_hres_enabled __read_mostly = true;754712unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC;755713EXPORT_SYMBOL_GPL(hrtimer_resolution);756714757757-/*758758- * Enable / Disable high resolution mode759759- */715715+/* Enable / Disable high resolution mode */760716static int __init setup_hrtimer_hres(char *str)761717{762718 return (kstrtobool(str, &hrtimer_hres_enabled) == 0);763719}764764-765720__setup("highres=", setup_hrtimer_hres);766721767767-/*768768- * hrtimer_high_res_enabled - query, if the highres mode is enabled769769- */770770-static inline int hrtimer_is_hres_enabled(void)722722+/* hrtimer_high_res_enabled - query, if the highres mode is enabled */723723+static inline bool hrtimer_is_hres_enabled(void)771724{772725 return hrtimer_hres_enabled;773726}774727775775-/*776776- * Switch to high resolution mode777777- */728728+/* Switch to high resolution mode */778729static void hrtimer_switch_to_hres(void)779730{780731 struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);781732782733 if (tick_init_highres()) {783783- pr_warn("Could not switch to high resolution mode on CPU %u\n",784784- base->cpu);734734+ pr_warn("Could not switch to high resolution mode on CPU %u\n", base->cpu);785735 return;786736 }787787- base->hres_active = 1;737737+ base->hres_active = true;788738 hrtimer_resolution = HIGH_RES_NSEC;789739790740 tick_setup_sched_timer(true);791741 /* "Retrigger" the interrupt to get things going */792742 retrigger_next_event(NULL);743743+ hrtimer_schedule_hres_work();793744}794745795746#else796747797797-static inline int hrtimer_is_hres_enabled(void) { return 0; }748748+static inline bool hrtimer_is_hres_enabled(void) { return 0; }798749static inline void hrtimer_switch_to_hres(void) { }799750800751#endif /* CONFIG_HIGH_RES_TIMERS */752752+801753/*802754 * Retrigger next event is called after clock was set with interrupts803755 * disabled through an SMP function call or directly from low level···824792 * In periodic low resolution mode, the next softirq expiration825793 * must also be updated.826794 */827827- raw_spin_lock(&base->lock);795795+ guard(raw_spinlock)(&base->lock);828796 hrtimer_update_base(base);829797 if (hrtimer_hres_active(base))830830- hrtimer_force_reprogram(base, 0);798798+ hrtimer_force_reprogram(base, /* skip_equal */ false);831799 else832800 hrtimer_update_next_event(base);833833- raw_spin_unlock(&base->lock);834801}835802836803/*···843812{844813 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);845814 struct hrtimer_clock_base *base = timer->base;846846- ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);815815+ ktime_t expires = hrtimer_get_expires(timer);847816848848- WARN_ON_ONCE(hrtimer_get_expires(timer) < 0);817817+ WARN_ON_ONCE(expires < 0);849818819819+ expires = ktime_sub(expires, base->offset);850820 /*851821 * CLOCK_REALTIME timer might be requested with an absolute852822 * expiry time which is less than base->offset. Set it to 0.···874842 timer_cpu_base->softirq_next_timer = timer;875843 timer_cpu_base->softirq_expires_next = expires;876844877877- if (!ktime_before(expires, timer_cpu_base->expires_next) ||878878- !reprogram)845845+ if (!ktime_before(expires, timer_cpu_base->expires_next) || !reprogram)879846 return;880847 }881848···888857 if (expires >= cpu_base->expires_next)889858 return;890859891891- /*892892- * If the hrtimer interrupt is running, then it will reevaluate the893893- * clock bases and reprogram the clock event device.894894- */895895- if (cpu_base->in_hrtirq)860860+ /* If a deferred rearm is pending skip reprogramming the device */861861+ if (cpu_base->deferred_rearm)896862 return;897863898864 cpu_base->next_timer = timer;···897869 __hrtimer_reprogram(cpu_base, timer, expires);898870}899871900900-static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base,901901- unsigned int active)872872+static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base, unsigned int active)902873{903874 struct hrtimer_clock_base *base;904875 unsigned int seq;···923896 if (seq == cpu_base->clock_was_set_seq)924897 return false;925898926926- /*927927- * If the remote CPU is currently handling an hrtimer interrupt, it928928- * will reevaluate the first expiring timer of all clock bases929929- * before reprogramming. Nothing to do here.930930- */931931- if (cpu_base->in_hrtirq)899899+ /* If a deferred rearm is pending the remote CPU will take care of it */900900+ if (cpu_base->deferred_rearm) {901901+ cpu_base->deferred_needs_update = true;932902 return false;903903+ }933904934905 /*935906 * Walk the affected clock bases and check whether the first expiring···938913 active &= cpu_base->active_bases;939914940915 for_each_active_base(base, cpu_base, active) {941941- struct timerqueue_node *next;916916+ struct timerqueue_linked_node *next;942917943943- next = timerqueue_getnext(&base->active);918918+ next = timerqueue_linked_first(&base->active);944919 expires = ktime_sub(next->expires, base->offset);945920 if (expires < cpu_base->expires_next)946921 return true;···972947 */973948void clock_was_set(unsigned int bases)974949{975975- struct hrtimer_cpu_base *cpu_base = raw_cpu_ptr(&hrtimer_bases);976950 cpumask_var_t mask;977977- int cpu;978951979979- if (!hrtimer_hres_active(cpu_base) && !tick_nohz_is_active())952952+ if (!hrtimer_highres_enabled() && !tick_nohz_is_active())980953 goto out_timerfd;981954982955 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {···983960 }984961985962 /* Avoid interrupting CPUs if possible */986986- cpus_read_lock();987987- for_each_online_cpu(cpu) {988988- unsigned long flags;963963+ scoped_guard(cpus_read_lock) {964964+ int cpu;989965990990- cpu_base = &per_cpu(hrtimer_bases, cpu);991991- raw_spin_lock_irqsave(&cpu_base->lock, flags);966966+ for_each_online_cpu(cpu) {967967+ struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);992968993993- if (update_needs_ipi(cpu_base, bases))994994- cpumask_set_cpu(cpu, mask);995995-996996- raw_spin_unlock_irqrestore(&cpu_base->lock, flags);969969+ guard(raw_spinlock_irqsave)(&cpu_base->lock);970970+ if (update_needs_ipi(cpu_base, bases))971971+ cpumask_set_cpu(cpu, mask);972972+ }973973+ scoped_guard(preempt)974974+ smp_call_function_many(mask, retrigger_next_event, NULL, 1);997975 }998998-999999- preempt_disable();10001000- smp_call_function_many(mask, retrigger_next_event, NULL, 1);10011001- preempt_enable();10021002- cpus_read_unlock();1003976 free_cpumask_var(mask);10049771005978out_timerfd:···10301011 retrigger_next_event(NULL);10311012}1032101310331033-/*10341034- * Counterpart to lock_hrtimer_base above:10351035- */10361036-static inline10371037-void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)10141014+/* Counterpart to lock_hrtimer_base above */10151015+static inline void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)10381016 __releases(&timer->base->cpu_base->lock)10391017{10401018 raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);···10481032 * .. note::10491033 * This only updates the timer expiry value and does not requeue the timer.10501034 *10511051- * There is also a variant of the function hrtimer_forward_now().10351035+ * There is also a variant of this function: hrtimer_forward_now().10521036 *10531037 * Context: Can be safely called from the callback function of @timer. If called10541038 * from other contexts @timer must neither be enqueued nor running the···10581042 */10591043u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)10601044{10611061- u64 orun = 1;10621045 ktime_t delta;10461046+ u64 orun = 1;1063104710641048 delta = ktime_sub(now, hrtimer_get_expires(timer));1065104910661050 if (delta < 0)10671051 return 0;1068105210691069- if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED))10531053+ if (WARN_ON(timer->is_queued))10701054 return 0;1071105510721056 if (interval < hrtimer_resolution)···10951079 * enqueue_hrtimer - internal function to (re)start a timer10961080 *10971081 * The timer is inserted in expiry order. Insertion into the10981098- * red black tree is O(log(n)). Must hold the base lock.10821082+ * red black tree is O(log(n)).10991083 *11001084 * Returns true when the new timer is the leftmost timer in the tree.11011085 */11021086static bool enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base,11031103- enum hrtimer_mode mode)10871087+ enum hrtimer_mode mode, bool was_armed)11041088{11051105- debug_activate(timer, mode);10891089+ lockdep_assert_held(&base->cpu_base->lock);10901090+10911091+ debug_activate(timer, mode, was_armed);11061092 WARN_ON_ONCE(!base->cpu_base->online);1107109311081094 base->cpu_base->active_bases |= 1 << base->index;1109109511101096 /* Pairs with the lockless read in hrtimer_is_queued() */11111111- WRITE_ONCE(timer->state, HRTIMER_STATE_ENQUEUED);10971097+ WRITE_ONCE(timer->is_queued, HRTIMER_STATE_ENQUEUED);1112109811131113- return timerqueue_add(&base->active, &timer->node);10991099+ if (!timerqueue_linked_add(&base->active, &timer->node))11001100+ return false;11011101+11021102+ base->expires_next = hrtimer_get_expires(timer);11031103+ return true;11041104+}11051105+11061106+static inline void base_update_next_timer(struct hrtimer_clock_base *base)11071107+{11081108+ struct timerqueue_linked_node *next = timerqueue_linked_first(&base->active);11091109+11101110+ base->expires_next = next ? next->expires : KTIME_MAX;11141111}1115111211161113/*11171114 * __remove_hrtimer - internal function to remove a timer11181118- *11191119- * Caller must hold the base lock.11201115 *11211116 * High resolution timer mode reprograms the clock event device when the11221117 * timer is the one which expires next. The caller can disable this by setting11231118 * reprogram to zero. This is useful, when the context does a reprogramming11241119 * anyway (e.g. timer interrupt)11251120 */11261126-static void __remove_hrtimer(struct hrtimer *timer,11271127- struct hrtimer_clock_base *base,11281128- u8 newstate, int reprogram)11211121+static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base,11221122+ bool newstate, bool reprogram)11291123{11301124 struct hrtimer_cpu_base *cpu_base = base->cpu_base;11311131- u8 state = timer->state;11251125+ bool was_first;1132112611331133- /* Pairs with the lockless read in hrtimer_is_queued() */11341134- WRITE_ONCE(timer->state, newstate);11351135- if (!(state & HRTIMER_STATE_ENQUEUED))11271127+ lockdep_assert_held(&cpu_base->lock);11281128+11291129+ if (!timer->is_queued)11361130 return;1137113111381138- if (!timerqueue_del(&base->active, &timer->node))11321132+ /* Pairs with the lockless read in hrtimer_is_queued() */11331133+ WRITE_ONCE(timer->is_queued, newstate);11341134+11351135+ was_first = !timerqueue_linked_prev(&timer->node);11361136+11371137+ if (!timerqueue_linked_del(&base->active, &timer->node))11391138 cpu_base->active_bases &= ~(1 << base->index);1140113911401140+ /* Nothing to update if this was not the first timer in the base */11411141+ if (!was_first)11421142+ return;11431143+11441144+ base_update_next_timer(base);11451145+11411146 /*11421142- * Note: If reprogram is false we do not update11431143- * cpu_base->next_timer. This happens when we remove the first11441144- * timer on a remote cpu. No harm as we never dereference11451145- * cpu_base->next_timer. So the worst thing what can happen is11461146- * an superfluous call to hrtimer_force_reprogram() on the11471147- * remote cpu later on if the same timer gets enqueued again.11471147+ * If reprogram is false don't update cpu_base->next_timer and do not11481148+ * touch the clock event device.11491149+ *11501150+ * This happens when removing the first timer on a remote CPU, which11511151+ * will be handled by the remote CPU's interrupt. It also happens when11521152+ * a local timer is removed to be immediately restarted. That's handled11531153+ * at the call site.11481154 */11491149- if (reprogram && timer == cpu_base->next_timer)11501150- hrtimer_force_reprogram(cpu_base, 1);11551155+ if (!reprogram || timer != cpu_base->next_timer || timer->is_lazy)11561156+ return;11571157+11581158+ if (cpu_base->deferred_rearm)11591159+ cpu_base->deferred_needs_update = true;11601160+ else11611161+ hrtimer_force_reprogram(cpu_base, /* skip_equal */ true);11511162}1152116311531153-/*11541154- * remove hrtimer, called with base lock held11551155- */11561156-static inline int11571157-remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base,11581158- bool restart, bool keep_local)11641164+static inline bool remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base,11651165+ bool newstate)11591166{11601160- u8 state = timer->state;11671167+ lockdep_assert_held(&base->cpu_base->lock);1161116811621162- if (state & HRTIMER_STATE_ENQUEUED) {11691169+ if (timer->is_queued) {11631170 bool reprogram;11711171+11721172+ debug_hrtimer_deactivate(timer);1164117311651174 /*11661175 * Remove the timer and force reprogramming when high···11951154 * reprogramming happens in the interrupt handler. This is a11961155 * rare case and less expensive than a smp call.11971156 */11981198- debug_deactivate(timer);11991157 reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);1200115812011201- /*12021202- * If the timer is not restarted then reprogramming is12031203- * required if the timer is local. If it is local and about12041204- * to be restarted, avoid programming it twice (on removal12051205- * and a moment later when it's requeued).12061206- */12071207- if (!restart)12081208- state = HRTIMER_STATE_INACTIVE;12091209- else12101210- reprogram &= !keep_local;12111211-12121212- __remove_hrtimer(timer, base, state, reprogram);12131213- return 1;11591159+ __remove_hrtimer(timer, base, newstate, reprogram);11601160+ return true;12141161 }12151215- return 0;11621162+ return false;11631163+}11641164+11651165+/*11661166+ * Update in place has to retrieve the expiry times of the neighbour nodes11671167+ * if they exist. That is cache line neutral because the dequeue/enqueue11681168+ * operation is going to need the same cache lines. But there is a big win11691169+ * when the dequeue/enqueue can be avoided because the RB tree does not11701170+ * have to be rebalanced twice.11711171+ */11721172+static inline bool11731173+hrtimer_can_update_in_place(struct hrtimer *timer, struct hrtimer_clock_base *base, ktime_t expires)11741174+{11751175+ struct timerqueue_linked_node *next = timerqueue_linked_next(&timer->node);11761176+ struct timerqueue_linked_node *prev = timerqueue_linked_prev(&timer->node);11771177+11781178+ /* If the new expiry goes behind the next timer, requeue is required */11791179+ if (next && expires > next->expires)11801180+ return false;11811181+11821182+ /* If this is the first timer, update in place */11831183+ if (!prev)11841184+ return true;11851185+11861186+ /* Update in place when it does not go ahead of the previous one */11871187+ return expires >= prev->expires;11881188+}11891189+11901190+static inline bool11911191+remove_and_enqueue_same_base(struct hrtimer *timer, struct hrtimer_clock_base *base,11921192+ const enum hrtimer_mode mode, ktime_t expires, u64 delta_ns)11931193+{11941194+ bool was_first = false;11951195+11961196+ /* Remove it from the timer queue if active */11971197+ if (timer->is_queued) {11981198+ was_first = !timerqueue_linked_prev(&timer->node);11991199+12001200+ /* Try to update in place to avoid the de/enqueue dance */12011201+ if (hrtimer_can_update_in_place(timer, base, expires)) {12021202+ hrtimer_set_expires_range_ns(timer, expires, delta_ns);12031203+ trace_hrtimer_start(timer, mode, true);12041204+ if (was_first)12051205+ base->expires_next = expires;12061206+ return was_first;12071207+ }12081208+12091209+ debug_hrtimer_deactivate(timer);12101210+ timerqueue_linked_del(&base->active, &timer->node);12111211+ }12121212+12131213+ /* Set the new expiry time */12141214+ hrtimer_set_expires_range_ns(timer, expires, delta_ns);12151215+12161216+ debug_activate(timer, mode, timer->is_queued);12171217+ base->cpu_base->active_bases |= 1 << base->index;12181218+12191219+ /* Pairs with the lockless read in hrtimer_is_queued() */12201220+ WRITE_ONCE(timer->is_queued, HRTIMER_STATE_ENQUEUED);12211221+12221222+ /* If it's the first expiring timer now or again, update base */12231223+ if (timerqueue_linked_add(&base->active, &timer->node)) {12241224+ base->expires_next = expires;12251225+ return true;12261226+ }12271227+12281228+ if (was_first)12291229+ base_update_next_timer(base);12301230+12311231+ return false;12161232}1217123312181234static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,···12881190 return tim;12891191}1290119212911291-static void12921292-hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram)11931193+static void hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram)12931194{12941294- ktime_t expires;11951195+ ktime_t expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);1295119612961197 /*12971297- * Find the next SOFT expiration.12981298- */12991299- expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);13001300-13011301- /*13021302- * reprogramming needs to be triggered, even if the next soft13031303- * hrtimer expires at the same time than the next hard11981198+ * Reprogramming needs to be triggered, even if the next soft11991199+ * hrtimer expires at the same time as the next hard13041200 * hrtimer. cpu_base->softirq_expires_next needs to be updated!13051201 */13061202 if (expires == KTIME_MAX)13071203 return;1308120413091205 /*13101310- * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event()13111311- * cpu_base->*expires_next is only set by hrtimer_reprogram()12061206+ * cpu_base->next_timer is recomputed by __hrtimer_get_next_event()12071207+ * cpu_base->expires_next is only set by hrtimer_reprogram()13121208 */13131209 hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram);13141210}1315121113161316-static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,13171317- u64 delta_ns, const enum hrtimer_mode mode,13181318- struct hrtimer_clock_base *base)12121212+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)12131213+static __always_inline bool hrtimer_prefer_local(bool is_local, bool is_first, bool is_pinned)12141214+{12151215+ if (static_branch_likely(&timers_migration_enabled)) {12161216+ /*12171217+ * If it is local and the first expiring timer keep it on the local12181218+ * CPU to optimize reprogramming of the clockevent device. Also12191219+ * avoid switch_hrtimer_base() overhead when local and pinned.12201220+ */12211221+ if (!is_local)12221222+ return false;12231223+ if (is_first || is_pinned)12241224+ return true;12251225+12261226+ /* Honour the NOHZ full restrictions */12271227+ if (!housekeeping_cpu(smp_processor_id(), HK_TYPE_KERNEL_NOISE))12281228+ return false;12291229+12301230+ /*12311231+ * If the tick is not stopped or need_resched() is set, then12321232+ * there is no point in moving the timer somewhere else.12331233+ */12341234+ return !tick_nohz_tick_stopped() || need_resched();12351235+ }12361236+ return is_local;12371237+}12381238+#else12391239+static __always_inline bool hrtimer_prefer_local(bool is_local, bool is_first, bool is_pinned)12401240+{12411241+ return is_local;12421242+}12431243+#endif12441244+12451245+static inline bool hrtimer_keep_base(struct hrtimer *timer, bool is_local, bool is_first,12461246+ bool is_pinned)12471247+{12481248+ /* If the timer is running the callback it has to stay on its CPU base. */12491249+ if (unlikely(timer->base->running == timer))12501250+ return true;12511251+12521252+ return hrtimer_prefer_local(is_local, is_first, is_pinned);12531253+}12541254+12551255+static bool __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns,12561256+ const enum hrtimer_mode mode, struct hrtimer_clock_base *base)13191257{13201258 struct hrtimer_cpu_base *this_cpu_base = this_cpu_ptr(&hrtimer_bases);13211321- struct hrtimer_clock_base *new_base;13221322- bool force_local, first;12591259+ bool is_pinned, first, was_first, keep_base = false;12601260+ struct hrtimer_cpu_base *cpu_base = base->cpu_base;12611261+12621262+ was_first = cpu_base->next_timer == timer;12631263+ is_pinned = !!(mode & HRTIMER_MODE_PINNED);1323126413241265 /*13251325- * If the timer is on the local cpu base and is the first expiring13261326- * timer then this might end up reprogramming the hardware twice13271327- * (on removal and on enqueue). To avoid that by prevent the13281328- * reprogram on removal, keep the timer local to the current CPU13291329- * and enforce reprogramming after it is queued no matter whether13301330- * it is the new first expiring timer again or not.12661266+ * Don't keep it local if this enqueue happens on a unplugged CPU12671267+ * after hrtimer_cpu_dying() has been invoked.13311268 */13321332- force_local = base->cpu_base == this_cpu_base;13331333- force_local &= base->cpu_base->next_timer == timer;12691269+ if (likely(this_cpu_base->online)) {12701270+ bool is_local = cpu_base == this_cpu_base;1334127113351335- /*13361336- * Don't force local queuing if this enqueue happens on a unplugged13371337- * CPU after hrtimer_cpu_dying() has been invoked.13381338- */13391339- force_local &= this_cpu_base->online;12721272+ keep_base = hrtimer_keep_base(timer, is_local, was_first, is_pinned);12731273+ }12741274+12751275+ /* Calculate absolute expiry time for relative timers */12761276+ if (mode & HRTIMER_MODE_REL)12771277+ tim = ktime_add_safe(tim, __hrtimer_cb_get_time(base->clockid));12781278+ /* Compensate for low resolution granularity */12791279+ tim = hrtimer_update_lowres(timer, tim, mode);1340128013411281 /*13421282 * Remove an active timer from the queue. In case it is not queued···13861250 * reprogramming later if it was the first expiring timer. This13871251 * avoids programming the underlying clock event twice (once at13881252 * removal and once after enqueue).12531253+ *12541254+ * @keep_base is also true if the timer callback is running on a12551255+ * remote CPU and for local pinned timers.13891256 */13901390- remove_hrtimer(timer, base, true, force_local);13911391-13921392- if (mode & HRTIMER_MODE_REL)13931393- tim = ktime_add_safe(tim, __hrtimer_cb_get_time(base->clockid));13941394-13951395- tim = hrtimer_update_lowres(timer, tim, mode);13961396-13971397- hrtimer_set_expires_range_ns(timer, tim, delta_ns);13981398-13991399- /* Switch the timer base, if necessary: */14001400- if (!force_local) {14011401- new_base = switch_hrtimer_base(timer, base,14021402- mode & HRTIMER_MODE_PINNED);12571257+ if (likely(keep_base)) {12581258+ first = remove_and_enqueue_same_base(timer, base, mode, tim, delta_ns);14031259 } else {14041404- new_base = base;12601260+ /* Keep the ENQUEUED state in case it is queued */12611261+ bool was_armed = remove_hrtimer(timer, base, HRTIMER_STATE_ENQUEUED);12621262+12631263+ hrtimer_set_expires_range_ns(timer, tim, delta_ns);12641264+12651265+ /* Switch the timer base, if necessary: */12661266+ base = switch_hrtimer_base(timer, base, is_pinned);12671267+ cpu_base = base->cpu_base;12681268+12691269+ first = enqueue_hrtimer(timer, base, mode, was_armed);14051270 }1406127114071407- first = enqueue_hrtimer(timer, new_base, mode);14081408- if (!force_local) {12721272+ /* If a deferred rearm is pending skip reprogramming the device */12731273+ if (cpu_base->deferred_rearm) {12741274+ cpu_base->deferred_needs_update = true;12751275+ return false;12761276+ }12771277+12781278+ if (!was_first || cpu_base != this_cpu_base) {14091279 /*14101410- * If the current CPU base is online, then the timer is14111411- * never queued on a remote CPU if it would be the first14121412- * expiring timer there.12801280+ * If the current CPU base is online, then the timer is never12811281+ * queued on a remote CPU if it would be the first expiring12821282+ * timer there unless the timer callback is currently executed12831283+ * on the remote CPU. In the latter case the remote CPU will12841284+ * re-evaluate the first expiring timer after completing the12851285+ * callbacks.14131286 */14141414- if (hrtimer_base_is_online(this_cpu_base))12871287+ if (likely(hrtimer_base_is_online(this_cpu_base)))14151288 return first;1416128914171290 /*···14281283 * already offline. If the timer is the first to expire,14291284 * kick the remote CPU to reprogram the clock event.14301285 */14311431- if (first) {14321432- struct hrtimer_cpu_base *new_cpu_base = new_base->cpu_base;14331433-14341434- smp_call_function_single_async(new_cpu_base->cpu, &new_cpu_base->csd);14351435- }14361436- return 0;12861286+ if (first)12871287+ smp_call_function_single_async(cpu_base->cpu, &cpu_base->csd);12881288+ return false;14371289 }1438129014391291 /*14401440- * Timer was forced to stay on the current CPU to avoid14411441- * reprogramming on removal and enqueue. Force reprogram the14421442- * hardware by evaluating the new first expiring timer.12921292+ * Special case for the HRTICK timer. It is frequently rearmed and most12931293+ * of the time moves the expiry into the future. That's expensive in12941294+ * virtual machines and it's better to take the pointless already armed12951295+ * interrupt than reprogramming the hardware on every context switch.12961296+ *12971297+ * If the new expiry is before the armed time, then reprogramming is12981298+ * required.14431299 */14441444- hrtimer_force_reprogram(new_base->cpu_base, 1);14451445- return 0;13001300+ if (timer->is_lazy) {13011301+ if (cpu_base->expires_next <= hrtimer_get_expires(timer))13021302+ return false;13031303+ }13041304+13051305+ /*13061306+ * Timer was the first expiring timer and forced to stay on the13071307+ * current CPU to avoid reprogramming on removal and enqueue. Force13081308+ * reprogram the hardware by evaluating the new first expiring13091309+ * timer.13101310+ */13111311+ hrtimer_force_reprogram(cpu_base, /* skip_equal */ true);13121312+ return false;14461313}1447131414481315/**···14661309 * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED);14671310 * softirq based mode is considered for debug purpose only!14681311 */14691469-void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,14701470- u64 delta_ns, const enum hrtimer_mode mode)13121312+void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns,13131313+ const enum hrtimer_mode mode)14711314{14721315 struct hrtimer_clock_base *base;14731316 unsigned long flags;13171317+13181318+ debug_hrtimer_assert_init(timer);1474131914751320 /*14761321 * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft···1521136215221363 base = lock_hrtimer_base(timer, &flags);1523136415241524- if (!hrtimer_callback_running(timer))15251525- ret = remove_hrtimer(timer, base, false, false);13651365+ if (!hrtimer_callback_running(timer)) {13661366+ ret = remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE);13671367+ if (ret)13681368+ trace_hrtimer_cancel(timer);13691369+ }1526137015271371 unlock_hrtimer_base(timer, &flags);15281372···15591397 * the timer callback to finish. Drop expiry_lock and reacquire it. That15601398 * allows the waiter to acquire the lock and make progress.15611399 */15621562-static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,15631563- unsigned long flags)14001400+static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, unsigned long flags)15641401{15651402 if (atomic_read(&cpu_base->timer_waiters)) {15661403 raw_spin_unlock_irqrestore(&cpu_base->lock, flags);···16241463 spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);16251464}16261465#else16271627-static inline void16281628-hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { }16291629-static inline void16301630-hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }16311631-static inline void16321632-hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }16331633-static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base,16341634- unsigned long flags) { }14661466+static inline void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { }14671467+static inline void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }14681468+static inline void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }14691469+static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base, unsigned long fl) { }16351470#endif1636147116371472/**···16831526{16841527 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);16851528 u64 expires = KTIME_MAX;16861686- unsigned long flags;1687152916881688- raw_spin_lock_irqsave(&cpu_base->lock, flags);16891689-15301530+ guard(raw_spinlock_irqsave)(&cpu_base->lock);16901531 if (!hrtimer_hres_active(cpu_base))16911532 expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);16921692-16931693- raw_spin_unlock_irqrestore(&cpu_base->lock, flags);1694153316951534 return expires;16961535}···17021549{17031550 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);17041551 u64 expires = KTIME_MAX;17051705- unsigned long flags;15521552+ unsigned int active;1706155317071707- raw_spin_lock_irqsave(&cpu_base->lock, flags);15541554+ guard(raw_spinlock_irqsave)(&cpu_base->lock);15551555+ if (!hrtimer_hres_active(cpu_base))15561556+ return expires;1708155717091709- if (hrtimer_hres_active(cpu_base)) {17101710- unsigned int active;15581558+ active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;15591559+ if (active && !cpu_base->softirq_activated)15601560+ expires = hrtimer_bases_next_event_without(cpu_base, exclude, active, KTIME_MAX);1711156117121712- if (!cpu_base->softirq_activated) {17131713- active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;17141714- expires = __hrtimer_next_event_base(cpu_base, exclude,17151715- active, KTIME_MAX);17161716- }17171717- active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;17181718- expires = __hrtimer_next_event_base(cpu_base, exclude, active,17191719- expires);17201720- }17211721-17221722- raw_spin_unlock_irqrestore(&cpu_base->lock, flags);17231723-17241724- return expires;15621562+ active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;15631563+ if (!active)15641564+ return expires;15651565+ return hrtimer_bases_next_event_without(cpu_base, exclude, active, expires);17251566}17261567#endif17271568···17591612}17601613EXPORT_SYMBOL_GPL(hrtimer_cb_get_time);1761161417621762-static void __hrtimer_setup(struct hrtimer *timer,17631763- enum hrtimer_restart (*function)(struct hrtimer *),16151615+static void __hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*fn)(struct hrtimer *),17641616 clockid_t clock_id, enum hrtimer_mode mode)17651617{17661618 bool softtimer = !!(mode & HRTIMER_MODE_SOFT);···17911645 base += hrtimer_clockid_to_base(clock_id);17921646 timer->is_soft = softtimer;17931647 timer->is_hard = !!(mode & HRTIMER_MODE_HARD);16481648+ timer->is_lazy = !!(mode & HRTIMER_MODE_LAZY_REARM);17941649 timer->base = &cpu_base->clock_base[base];17951795- timerqueue_init(&timer->node);16501650+ timerqueue_linked_init(&timer->node);1796165117971797- if (WARN_ON_ONCE(!function))16521652+ if (WARN_ON_ONCE(!fn))17981653 ACCESS_PRIVATE(timer, function) = hrtimer_dummy_timeout;17991654 else18001800- ACCESS_PRIVATE(timer, function) = function;16551655+ ACCESS_PRIVATE(timer, function) = fn;18011656}1802165718031658/**···18571710 base = READ_ONCE(timer->base);18581711 seq = raw_read_seqcount_begin(&base->seq);1859171218601860- if (timer->state != HRTIMER_STATE_INACTIVE ||18611861- base->running == timer)17131713+ if (timer->is_queued || base->running == timer)18621714 return true;1863171518641864- } while (read_seqcount_retry(&base->seq, seq) ||18651865- base != READ_ONCE(timer->base));17161716+ } while (read_seqcount_retry(&base->seq, seq) || base != READ_ONCE(timer->base));1866171718671718 return false;18681719}···18741729 * - callback: the timer is being ran18751730 * - post: the timer is inactive or (re)queued18761731 *18771877- * On the read side we ensure we observe timer->state and cpu_base->running17321732+ * On the read side we ensure we observe timer->is_queued and cpu_base->running18781733 * from the same section, if anything changed while we looked at it, we retry.18791734 * This includes timer->base changing because sequence numbers alone are18801735 * insufficient for that.···18831738 * a false negative if the read side got smeared over multiple consecutive18841739 * __run_hrtimer() invocations.18851740 */18861886-18871887-static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,18881888- struct hrtimer_clock_base *base,18891889- struct hrtimer *timer, ktime_t *now,18901890- unsigned long flags) __must_hold(&cpu_base->lock)17411741+static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, struct hrtimer_clock_base *base,17421742+ struct hrtimer *timer, ktime_t *now, unsigned long flags)17431743+ __must_hold(&cpu_base->lock)18911744{18921745 enum hrtimer_restart (*fn)(struct hrtimer *);18931746 bool expires_in_hardirq;···18971754 base->running = timer;1898175518991756 /*19001900- * Separate the ->running assignment from the ->state assignment.17571757+ * Separate the ->running assignment from the ->is_queued assignment.19011758 *19021759 * As with a regular write barrier, this ensures the read side in19031760 * hrtimer_active() cannot observe base->running == NULL &&19041904- * timer->state == INACTIVE.17611761+ * timer->is_queued == INACTIVE.19051762 */19061763 raw_write_seqcount_barrier(&base->seq);1907176419081908- __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);17651765+ __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, false);19091766 fn = ACCESS_PRIVATE(timer, function);1910176719111768 /*···19401797 * hrtimer_start_range_ns() can have popped in and enqueued the timer19411798 * for us already.19421799 */19431943- if (restart != HRTIMER_NORESTART &&19441944- !(timer->state & HRTIMER_STATE_ENQUEUED))19451945- enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS);18001800+ if (restart == HRTIMER_RESTART && !timer->is_queued)18011801+ enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS, false);1946180219471803 /*19481948- * Separate the ->running assignment from the ->state assignment.18041804+ * Separate the ->running assignment from the ->is_queued assignment.19491805 *19501806 * As with a regular write barrier, this ensures the read side in19511807 * hrtimer_active() cannot observe base->running.timer == NULL &&19521952- * timer->state == INACTIVE.18081808+ * timer->is_queued == INACTIVE.19531809 */19541810 raw_write_seqcount_barrier(&base->seq);19551811···19561814 base->running = NULL;19571815}1958181618171817+static __always_inline struct hrtimer *clock_base_next_timer_safe(struct hrtimer_clock_base *base)18181818+{18191819+ struct timerqueue_linked_node *next = timerqueue_linked_first(&base->active);18201820+18211821+ return next ? container_of(next, struct hrtimer, node) : NULL;18221822+}18231823+19591824static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,19601825 unsigned long flags, unsigned int active_mask)19611826{19621962- struct hrtimer_clock_base *base;19631827 unsigned int active = cpu_base->active_bases & active_mask;18281828+ struct hrtimer_clock_base *base;1964182919651830 for_each_active_base(base, cpu_base, active) {19661966- struct timerqueue_node *node;19671967- ktime_t basenow;18311831+ ktime_t basenow = ktime_add(now, base->offset);18321832+ struct hrtimer *timer;1968183319691969- basenow = ktime_add(now, base->offset);19701970-19711971- while ((node = timerqueue_getnext(&base->active))) {19721972- struct hrtimer *timer;19731973-19741974- timer = container_of(node, struct hrtimer, node);19751975-18341834+ while ((timer = clock_base_next_timer(base))) {19761835 /*19771836 * The immediate goal for using the softexpires is19781837 * minimizing wakeups, not running timers at the···20081865 now = hrtimer_update_base(cpu_base);20091866 __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT);2010186720112011- cpu_base->softirq_activated = 0;18681868+ cpu_base->softirq_activated = false;20121869 hrtimer_update_softirq_timer(cpu_base, true);2013187020141871 raw_spin_unlock_irqrestore(&cpu_base->lock, flags);···20161873}2017187420181875#ifdef CONFIG_HIGH_RES_TIMERS18761876+18771877+/*18781878+ * Very similar to hrtimer_force_reprogram(), except it deals with18791879+ * deferred_rearm and hang_detected.18801880+ */18811881+static void hrtimer_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t expires_next, bool deferred)18821882+{18831883+ cpu_base->expires_next = expires_next;18841884+ cpu_base->deferred_rearm = false;18851885+18861886+ if (unlikely(cpu_base->hang_detected)) {18871887+ /*18881888+ * Give the system a chance to do something else than looping18891889+ * on hrtimer interrupts.18901890+ */18911891+ expires_next = ktime_add_ns(ktime_get(),18921892+ min(100 * NSEC_PER_MSEC, cpu_base->max_hang_time));18931893+ }18941894+ hrtimer_rearm_event(expires_next, deferred);18951895+}18961896+18971897+#ifdef CONFIG_HRTIMER_REARM_DEFERRED18981898+void __hrtimer_rearm_deferred(void)18991899+{19001900+ struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);19011901+ ktime_t expires_next;19021902+19031903+ if (!cpu_base->deferred_rearm)19041904+ return;19051905+19061906+ guard(raw_spinlock)(&cpu_base->lock);19071907+ if (cpu_base->deferred_needs_update) {19081908+ hrtimer_update_base(cpu_base);19091909+ expires_next = hrtimer_update_next_event(cpu_base);19101910+ } else {19111911+ /* No timer added/removed. Use the cached value */19121912+ expires_next = cpu_base->deferred_expires_next;19131913+ }19141914+ hrtimer_rearm(cpu_base, expires_next, true);19151915+}19161916+19171917+static __always_inline void19181918+hrtimer_interrupt_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t expires_next)19191919+{19201920+ /* hrtimer_interrupt() just re-evaluated the first expiring timer */19211921+ cpu_base->deferred_needs_update = false;19221922+ /* Cache the expiry time */19231923+ cpu_base->deferred_expires_next = expires_next;19241924+ set_thread_flag(TIF_HRTIMER_REARM);19251925+}19261926+#else /* CONFIG_HRTIMER_REARM_DEFERRED */19271927+static __always_inline void19281928+hrtimer_interrupt_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t expires_next)19291929+{19301930+ hrtimer_rearm(cpu_base, expires_next, false);19311931+}19321932+#endif /* !CONFIG_HRTIMER_REARM_DEFERRED */2019193320201934/*20211935 * High resolution timer interrupt···20921892 raw_spin_lock_irqsave(&cpu_base->lock, flags);20931893 entry_time = now = hrtimer_update_base(cpu_base);20941894retry:20952095- cpu_base->in_hrtirq = 1;18951895+ cpu_base->deferred_rearm = true;20961896 /*20972097- * We set expires_next to KTIME_MAX here with cpu_base->lock20982098- * held to prevent that a timer is enqueued in our queue via20992099- * the migration code. This does not affect enqueueing of21002100- * timers which run their callback and need to be requeued on21012101- * this CPU.18971897+ * Set expires_next to KTIME_MAX, which prevents that remote CPUs queue18981898+ * timers while __hrtimer_run_queues() is expiring the clock bases.18991899+ * Timers which are re/enqueued on the local CPU are not affected by19001900+ * this.21021901 */21031902 cpu_base->expires_next = KTIME_MAX;2104190321051904 if (!ktime_before(now, cpu_base->softirq_expires_next)) {21061905 cpu_base->softirq_expires_next = KTIME_MAX;21072107- cpu_base->softirq_activated = 1;19061906+ cpu_base->softirq_activated = true;21081907 raise_timer_softirq(HRTIMER_SOFTIRQ);21091908 }2110190921111910 __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);21122112-21132113- /* Reevaluate the clock bases for the [soft] next expiry */21142114- expires_next = hrtimer_update_next_event(cpu_base);21152115- /*21162116- * Store the new expiry value so the migration code can verify21172117- * against it.21182118- */21192119- cpu_base->expires_next = expires_next;21202120- cpu_base->in_hrtirq = 0;21212121- raw_spin_unlock_irqrestore(&cpu_base->lock, flags);21222122-21232123- /* Reprogramming necessary ? */21242124- if (!tick_program_event(expires_next, 0)) {21252125- cpu_base->hang_detected = 0;21262126- return;21272127- }2128191121291912 /*21301913 * The next timer was already expired due to:···21151932 * - long lasting callbacks21161933 * - being scheduled away when running in a VM21171934 *21182118- * We need to prevent that we loop forever in the hrtimer21192119- * interrupt routine. We give it 3 attempts to avoid21202120- * overreacting on some spurious event.21212121- *21222122- * Acquire base lock for updating the offsets and retrieving21232123- * the current time.19351935+ * We need to prevent that we loop forever in the hrtiner interrupt19361936+ * routine. We give it 3 attempts to avoid overreacting on some19371937+ * spurious event.21241938 */21252125- raw_spin_lock_irqsave(&cpu_base->lock, flags);21261939 now = hrtimer_update_base(cpu_base);21272127- cpu_base->nr_retries++;21282128- if (++retries < 3)21292129- goto retry;21302130- /*21312131- * Give the system a chance to do something else than looping21322132- * here. We stored the entry time, so we know exactly how long21332133- * we spent here. We schedule the next event this amount of21342134- * time away.21352135- */21362136- cpu_base->nr_hangs++;21372137- cpu_base->hang_detected = 1;21382138- raw_spin_unlock_irqrestore(&cpu_base->lock, flags);19401940+ expires_next = hrtimer_update_next_event(cpu_base);19411941+ cpu_base->hang_detected = false;19421942+ if (expires_next < now) {19431943+ if (++retries < 3)19441944+ goto retry;2139194521402140- delta = ktime_sub(now, entry_time);21412141- if ((unsigned int)delta > cpu_base->max_hang_time)21422142- cpu_base->max_hang_time = (unsigned int) delta;21432143- /*21442144- * Limit it to a sensible value as we enforce a longer21452145- * delay. Give the CPU at least 100ms to catch up.21462146- */21472147- if (delta > 100 * NSEC_PER_MSEC)21482148- expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);21492149- else21502150- expires_next = ktime_add(now, delta);21512151- tick_program_event(expires_next, 1);21522152- pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta));19461946+ delta = ktime_sub(now, entry_time);19471947+ cpu_base->max_hang_time = max_t(unsigned int, cpu_base->max_hang_time, delta);19481948+ cpu_base->nr_hangs++;19491949+ cpu_base->hang_detected = true;19501950+ }19511951+19521952+ hrtimer_interrupt_rearm(cpu_base, expires_next);19531953+ raw_spin_unlock_irqrestore(&cpu_base->lock, flags);21531954}19551955+21541956#endif /* !CONFIG_HIGH_RES_TIMERS */2155195721561958/*···2167199921682000 if (!ktime_before(now, cpu_base->softirq_expires_next)) {21692001 cpu_base->softirq_expires_next = KTIME_MAX;21702170- cpu_base->softirq_activated = 1;20022002+ cpu_base->softirq_activated = true;21712003 raise_timer_softirq(HRTIMER_SOFTIRQ);21722004 }21732005···21802012 */21812013static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)21822014{21832183- struct hrtimer_sleeper *t =21842184- container_of(timer, struct hrtimer_sleeper, timer);20152015+ struct hrtimer_sleeper *t = container_of(timer, struct hrtimer_sleeper, timer);21852016 struct task_struct *task = t->task;2186201721872018 t->task = NULL;···21982031 * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers21992032 * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context)22002033 */22012201-void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,22022202- enum hrtimer_mode mode)20342034+void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, enum hrtimer_mode mode)22032035{22042036 /*22052037 * Make the enqueue delivery mode check work on RT. If the sleeper···22142048}22152049EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);2216205022172217-static void __hrtimer_setup_sleeper(struct hrtimer_sleeper *sl,22182218- clockid_t clock_id, enum hrtimer_mode mode)20512051+static void __hrtimer_setup_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,20522052+ enum hrtimer_mode mode)22192053{22202054 /*22212055 * On PREEMPT_RT enabled kernels hrtimers which are not explicitly···22512085 * @clock_id: the clock to be used22522086 * @mode: timer mode abs/rel22532087 */22542254-void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl,22552255- clockid_t clock_id, enum hrtimer_mode mode)20882088+void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl, clockid_t clock_id,20892089+ enum hrtimer_mode mode)22562090{22572091 debug_setup_on_stack(&sl->timer, clock_id, mode);22582092 __hrtimer_setup_sleeper(sl, clock_id, mode);···23252159 return ret;23262160}2327216123282328-long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,23292329- const clockid_t clockid)21622162+long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, const clockid_t clockid)23302163{23312164 struct restart_block *restart;23322165 struct hrtimer_sleeper t;···23682203 current->restart_block.fn = do_no_restart_syscall;23692204 current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;23702205 current->restart_block.nanosleep.rmtp = rmtp;23712371- return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,23722372- CLOCK_MONOTONIC);22062206+ return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, CLOCK_MONOTONIC);23732207}2374220823752209#endif···23762212#ifdef CONFIG_COMPAT_32BIT_TIME2377221323782214SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp,23792379- struct old_timespec32 __user *, rmtp)22152215+ struct old_timespec32 __user *, rmtp)23802216{23812217 struct timespec64 tu;23822218···23892225 current->restart_block.fn = do_no_restart_syscall;23902226 current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;23912227 current->restart_block.nanosleep.compat_rmtp = rmtp;23922392- return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,23932393- CLOCK_MONOTONIC);22282228+ return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, CLOCK_MONOTONIC);23942229}23952230#endif23962231···23992236int hrtimers_prepare_cpu(unsigned int cpu)24002237{24012238 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);24022402- int i;2403223924042404- for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {22402240+ for (int i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {24052241 struct hrtimer_clock_base *clock_b = &cpu_base->clock_base[i];2406224224072243 clock_b->cpu_base = cpu_base;24082244 seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock);24092409- timerqueue_init_head(&clock_b->active);22452245+ timerqueue_linked_init_head(&clock_b->active);24102246 }2411224724122248 cpu_base->cpu = cpu;···2419225724202258 /* Clear out any left over state from a CPU down operation */24212259 cpu_base->active_bases = 0;24222422- cpu_base->hres_active = 0;24232423- cpu_base->hang_detected = 0;22602260+ cpu_base->hres_active = false;22612261+ cpu_base->hang_detected = false;24242262 cpu_base->next_timer = NULL;24252263 cpu_base->softirq_next_timer = NULL;24262264 cpu_base->expires_next = KTIME_MAX;24272265 cpu_base->softirq_expires_next = KTIME_MAX;24282428- cpu_base->online = 1;22662266+ cpu_base->softirq_activated = false;22672267+ cpu_base->online = true;24292268 return 0;24302269}24312270···24352272static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,24362273 struct hrtimer_clock_base *new_base)24372274{22752275+ struct timerqueue_linked_node *node;24382276 struct hrtimer *timer;24392439- struct timerqueue_node *node;2440227724412441- while ((node = timerqueue_getnext(&old_base->active))) {22782278+ while ((node = timerqueue_linked_first(&old_base->active))) {24422279 timer = container_of(node, struct hrtimer, node);24432280 BUG_ON(hrtimer_callback_running(timer));24442444- debug_deactivate(timer);22812281+ debug_hrtimer_deactivate(timer);2445228224462283 /*24472284 * Mark it as ENQUEUED not INACTIVE otherwise the24482285 * timer could be seen as !active and just vanish away24492286 * under us on another CPU24502287 */24512451- __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0);22882288+ __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, false);24522289 timer->base = new_base;24532290 /*24542291 * Enqueue the timers on the new cpu. This does not···24582295 * sort out already expired timers and reprogram the24592296 * event device.24602297 */24612461- enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS);22982298+ enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS, true);24622299 }24632300}2464230124652302int hrtimers_cpu_dying(unsigned int dying_cpu)24662303{24672467- int i, ncpu = cpumask_any_and(cpu_active_mask, housekeeping_cpumask(HK_TYPE_TIMER));23042304+ int ncpu = cpumask_any_and(cpu_active_mask, housekeeping_cpumask(HK_TYPE_TIMER));24682305 struct hrtimer_cpu_base *old_base, *new_base;2469230624702307 old_base = this_cpu_ptr(&hrtimer_bases);···24772314 raw_spin_lock(&old_base->lock);24782315 raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING);2479231624802480- for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {24812481- migrate_hrtimer_list(&old_base->clock_base[i],24822482- &new_base->clock_base[i]);24832483- }23172317+ for (int i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)23182318+ migrate_hrtimer_list(&old_base->clock_base[i], &new_base->clock_base[i]);2484231924852320 /* Tell the other CPU to retrigger the next event */24862321 smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);2487232224882323 raw_spin_unlock(&new_base->lock);24892489- old_base->online = 0;23242324+ old_base->online = false;24902325 raw_spin_unlock(&old_base->lock);2491232624922327 return 0;
···864864}865865EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);866866867867+/* Simplified variant of hrtimer_forward_now() */868868+static ktime_t tick_forward_now(ktime_t expires, ktime_t now)869869+{870870+ ktime_t delta = now - expires;871871+872872+ if (likely(delta < TICK_NSEC))873873+ return expires + TICK_NSEC;874874+875875+ expires += TICK_NSEC * ktime_divns(delta, TICK_NSEC);876876+ if (expires > now)877877+ return expires;878878+ return expires + TICK_NSEC;879879+}880880+867881static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)868882{869869- hrtimer_cancel(&ts->sched_timer);870870- hrtimer_set_expires(&ts->sched_timer, ts->last_tick);883883+ ktime_t expires = ts->last_tick;871884872872- /* Forward the time to expire in the future */873873- hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);885885+ if (now >= expires)886886+ expires = tick_forward_now(expires, now);874887875888 if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) {876876- hrtimer_start_expires(&ts->sched_timer,877877- HRTIMER_MODE_ABS_PINNED_HARD);889889+ hrtimer_start(&ts->sched_timer, expires, HRTIMER_MODE_ABS_PINNED_HARD);878890 } else {879879- tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);891891+ hrtimer_set_expires(&ts->sched_timer, expires);892892+ tick_program_event(expires, 1);880893 }881894882895 /*
+174-21
kernel/time/timekeeping.c
···33 * Kernel timekeeping code and accessor functions. Based on code from44 * timer.c, moved in commit 8524070b7982.55 */66-#include <linux/timekeeper_internal.h>77-#include <linux/module.h>88-#include <linux/interrupt.h>99-#include <linux/kobject.h>1010-#include <linux/percpu.h>1111-#include <linux/init.h>1212-#include <linux/mm.h>1313-#include <linux/nmi.h>1414-#include <linux/sched.h>1515-#include <linux/sched/loadavg.h>1616-#include <linux/sched/clock.h>1717-#include <linux/syscore_ops.h>66+#include <linux/audit.h>187#include <linux/clocksource.h>88+#include <linux/compiler.h>199#include <linux/jiffies.h>1010+#include <linux/kobject.h>1111+#include <linux/module.h>1212+#include <linux/nmi.h>1313+#include <linux/pvclock_gtod.h>1414+#include <linux/random.h>1515+#include <linux/sched/clock.h>1616+#include <linux/sched/loadavg.h>1717+#include <linux/static_key.h>1818+#include <linux/stop_machine.h>1919+#include <linux/syscore_ops.h>2020+#include <linux/tick.h>2021#include <linux/time.h>2122#include <linux/timex.h>2222-#include <linux/tick.h>2323-#include <linux/stop_machine.h>2424-#include <linux/pvclock_gtod.h>2525-#include <linux/compiler.h>2626-#include <linux/audit.h>2727-#include <linux/random.h>2323+#include <linux/timekeeper_internal.h>28242925#include <vdso/auxclock.h>30263127#include "tick-internal.h"3232-#include "ntp_internal.h"3328#include "timekeeping_internal.h"2929+#include "ntp_internal.h"34303531#define TK_CLEAR_NTP (1 << 0)3632#define TK_CLOCK_WAS_SET (1 << 1)···271275 tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot);272276}273277278278+#ifdef CONFIG_ARCH_WANTS_CLOCKSOURCE_READ_INLINE279279+#include <asm/clock_inlined.h>280280+281281+static DEFINE_STATIC_KEY_FALSE(clocksource_read_inlined);282282+274283/*275284 * tk_clock_read - atomic clocksource read() helper276285 *···289288 * a read of the fast-timekeeper tkrs (which is protected by its own locking290289 * and update logic).291290 */292292-static inline u64 tk_clock_read(const struct tk_read_base *tkr)291291+static __always_inline u64 tk_clock_read(const struct tk_read_base *tkr)292292+{293293+ struct clocksource *clock = READ_ONCE(tkr->clock);294294+295295+ if (static_branch_likely(&clocksource_read_inlined))296296+ return arch_inlined_clocksource_read(clock);297297+298298+ return clock->read(clock);299299+}300300+301301+static inline void clocksource_disable_inline_read(void)302302+{303303+ static_branch_disable(&clocksource_read_inlined);304304+}305305+306306+static inline void clocksource_enable_inline_read(void)307307+{308308+ static_branch_enable(&clocksource_read_inlined);309309+}310310+#else311311+static __always_inline u64 tk_clock_read(const struct tk_read_base *tkr)293312{294313 struct clocksource *clock = READ_ONCE(tkr->clock);295314296315 return clock->read(clock);297316}317317+static inline void clocksource_disable_inline_read(void) { }318318+static inline void clocksource_enable_inline_read(void) { }319319+#endif298320299321/**300322 * tk_setup_internals - Set up internals to use clocksource clock.···391367 tk->tkr_raw.mult = clock->mult;392368 tk->ntp_err_mult = 0;393369 tk->skip_second_overflow = 0;370370+371371+ tk->cs_id = clock->id;372372+373373+ /* Coupled clockevent data */374374+ if (IS_ENABLED(CONFIG_GENERIC_CLOCKEVENTS_COUPLED) &&375375+ clock->flags & CLOCK_SOURCE_HAS_COUPLED_CLOCK_EVENT) {376376+ /*377377+ * Aim for an one hour maximum delta and use KHz to handle378378+ * clocksources with a frequency above 4GHz correctly as379379+ * the frequency argument of clocks_calc_mult_shift() is u32.380380+ */381381+ clocks_calc_mult_shift(&tk->cs_ns_to_cyc_mult, &tk->cs_ns_to_cyc_shift,382382+ NSEC_PER_MSEC, clock->freq_khz, 3600 * 1000);383383+ /*384384+ * Initialize the conversion limit as the previous clocksource385385+ * might have the same shift/mult pair so the quick check in386386+ * tk_update_ns_to_cyc() fails to update it after a clocksource387387+ * change leaving it effectivly zero.388388+ */389389+ tk->cs_ns_to_cyc_maxns = div_u64(clock->mask, tk->cs_ns_to_cyc_mult);390390+ }394391}395392396393/* Timekeeper helper functions. */···420375 return mul_u64_u32_add_u64_shr(delta, tkr->mult, tkr->xtime_nsec, tkr->shift);421376}422377423423-static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)378378+static __always_inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)424379{425380 /* Calculate the delta since the last update_wall_time() */426381 u64 mask = tkr->mask, delta = (cycles - tkr->cycle_last) & mask;···741696 tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC);742697}743698699699+static inline void tk_update_ns_to_cyc(struct timekeeper *tks, struct timekeeper *tkc)700700+{701701+ struct tk_read_base *tkrs = &tks->tkr_mono;702702+ struct tk_read_base *tkrc = &tkc->tkr_mono;703703+ unsigned int shift;704704+705705+ if (!IS_ENABLED(CONFIG_GENERIC_CLOCKEVENTS_COUPLED) ||706706+ !(tkrs->clock->flags & CLOCK_SOURCE_HAS_COUPLED_CLOCK_EVENT))707707+ return;708708+709709+ if (tkrs->mult == tkrc->mult && tkrs->shift == tkrc->shift)710710+ return;711711+ /*712712+ * The conversion math is simple:713713+ *714714+ * CS::MULT (1 << NS_TO_CYC_SHIFT)715715+ * --------------- = ----------------------716716+ * (1 << CS:SHIFT) NS_TO_CYC_MULT717717+ *718718+ * Ergo:719719+ *720720+ * NS_TO_CYC_MULT = (1 << (CS::SHIFT + NS_TO_CYC_SHIFT)) / CS::MULT721721+ *722722+ * NS_TO_CYC_SHIFT has been set up in tk_setup_internals()723723+ */724724+ shift = tkrs->shift + tks->cs_ns_to_cyc_shift;725725+ tks->cs_ns_to_cyc_mult = (u32)div_u64(1ULL << shift, tkrs->mult);726726+ tks->cs_ns_to_cyc_maxns = div_u64(tkrs->clock->mask, tks->cs_ns_to_cyc_mult);727727+}728728+744729/*745730 * Restore the shadow timekeeper from the real timekeeper.746731 */···805730 tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real;806731807732 if (tk->id == TIMEKEEPER_CORE) {733733+ tk_update_ns_to_cyc(tk, &tkd->timekeeper);808734 update_vsyscall(tk);809735 update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);810736···858782 delta -= incr;859783 }860784 tk_update_coarse_nsecs(tk);785785+}786786+787787+/*788788+ * ktime_expiry_to_cycles - Convert a expiry time to clocksource cycles789789+ * @id: Clocksource ID which is required for validity790790+ * @expires_ns: Absolute CLOCK_MONOTONIC expiry time (nsecs) to be converted791791+ * @cycles: Pointer to storage for corresponding absolute cycles value792792+ *793793+ * Convert a CLOCK_MONOTONIC based absolute expiry time to a cycles value794794+ * based on the correlated clocksource of the clockevent device by using795795+ * the base nanoseconds and cycles values of the last timekeeper update and796796+ * converting the delta between @expires_ns and base nanoseconds to cycles.797797+ *798798+ * This only works for clockevent devices which are using a less than or799799+ * equal comparator against the clocksource.800800+ *801801+ * Utilizing this avoids two clocksource reads for such devices, the802802+ * ktime_get() in clockevents_program_event() to calculate the delta expiry803803+ * value and the readout in the device::set_next_event() callback to804804+ * convert the delta back to a absolute comparator value.805805+ *806806+ * Returns: True if @id matches the current clocksource ID, false otherwise807807+ */808808+bool ktime_expiry_to_cycles(enum clocksource_ids id, ktime_t expires_ns, u64 *cycles)809809+{810810+ struct timekeeper *tk = &tk_core.timekeeper;811811+ struct tk_read_base *tkrm = &tk->tkr_mono;812812+ ktime_t base_ns, delta_ns, max_ns;813813+ u64 base_cycles, delta_cycles;814814+ unsigned int seq;815815+ u32 mult, shift;816816+817817+ /*818818+ * Racy check to avoid the seqcount overhead when ID does not match. If819819+ * the relevant clocksource is installed concurrently, then this will820820+ * just delay the switch over to this mechanism until the next event is821821+ * programmed. If the ID is not matching the clock events code will use822822+ * the regular relative set_next_event() callback as before.823823+ */824824+ if (data_race(tk->cs_id) != id)825825+ return false;826826+827827+ do {828828+ seq = read_seqcount_begin(&tk_core.seq);829829+830830+ if (tk->cs_id != id)831831+ return false;832832+833833+ base_cycles = tkrm->cycle_last;834834+ base_ns = tkrm->base + (tkrm->xtime_nsec >> tkrm->shift);835835+836836+ mult = tk->cs_ns_to_cyc_mult;837837+ shift = tk->cs_ns_to_cyc_shift;838838+ max_ns = tk->cs_ns_to_cyc_maxns;839839+840840+ } while (read_seqcount_retry(&tk_core.seq, seq));841841+842842+ /* Prevent negative deltas and multiplication overflows */843843+ delta_ns = min(expires_ns - base_ns, max_ns);844844+ delta_ns = max(delta_ns, 0);845845+846846+ /* Convert to cycles */847847+ delta_cycles = ((u64)delta_ns * mult) >> shift;848848+ *cycles = base_cycles + delta_cycles;849849+ return true;861850}862851863852/**···1772163117731632 if (tk->tkr_mono.clock == clock)17741633 return 0;16341634+16351635+ /* Disable inlined reads accross the clocksource switch */16361636+ clocksource_disable_inline_read();16371637+17751638 stop_machine(change_clocksource, clock, NULL);16391639+16401640+ /*16411641+ * If the clocksource has been selected and supports inlined reads16421642+ * enable the branch.16431643+ */16441644+ if (tk->tkr_mono.clock == clock && clock->flags & CLOCK_SOURCE_CAN_INLINE_READ)16451645+ clocksource_enable_inline_read();16461646+17761647 tick_clock_notify();17771648 return tk->tkr_mono.clock == clock ? 0 : -1;17781649}