Merge tag 'timers-urgent-2024-04-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

+1 -2

arch/powerpc/include/asm/vdso/gettimeofday.h

··· 4 4 5 5 #ifndef __ASSEMBLY__ 6 6 7 - #include <asm/page.h> 8 7 #include <asm/vdso/timebase.h> 9 8 #include <asm/barrier.h> 10 9 #include <asm/unistd.h> ··· 94 95 static __always_inline 95 96 const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) 96 97 { 97 - return (void *)vd + PAGE_SIZE; 98 + return (void *)vd + (1U << CONFIG_PAGE_SHIFT); 98 99 } 99 100 #endif 100 101

+9 -2

include/linux/timecounter.h

··· 22 22 * 23 23 * @read: returns the current cycle value 24 24 * @mask: bitmask for two's complement 25 - * subtraction of non 64 bit counters, 25 + * subtraction of non-64-bit counters, 26 26 * see CYCLECOUNTER_MASK() helper macro 27 27 * @mult: cycle to nanosecond multiplier 28 28 * @shift: cycle to nanosecond divisor (power of two) ··· 35 35 }; 36 36 37 37 /** 38 - * struct timecounter - layer above a %struct cyclecounter which counts nanoseconds 38 + * struct timecounter - layer above a &struct cyclecounter which counts nanoseconds 39 39 * Contains the state needed by timecounter_read() to detect 40 40 * cycle counter wrap around. Initialize with 41 41 * timecounter_init(). Also used to convert cycle counts into the ··· 66 66 * @cycles: Cycles 67 67 * @mask: bit mask for maintaining the 'frac' field 68 68 * @frac: pointer to storage for the fractional nanoseconds. 69 + * 70 + * Returns: cycle counter cycles converted to nanoseconds 69 71 */ 70 72 static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc, 71 73 u64 cycles, u64 mask, u64 *frac) ··· 81 79 82 80 /** 83 81 * timecounter_adjtime - Shifts the time of the clock. 82 + * @tc: The &struct timecounter to adjust 84 83 * @delta: Desired change in nanoseconds. 85 84 */ 86 85 static inline void timecounter_adjtime(struct timecounter *tc, s64 delta) ··· 110 107 * 111 108 * In other words, keeps track of time since the same epoch as 112 109 * the function which generated the initial time stamp. 110 + * 111 + * Returns: nanoseconds since the initial time stamp 113 112 */ 114 113 extern u64 timecounter_read(struct timecounter *tc); 115 114 ··· 128 123 * 129 124 * This allows conversion of cycle counter values which were generated 130 125 * in the past. 126 + * 127 + * Returns: cycle counter converted to nanoseconds since the initial time stamp 131 128 */ 132 129 extern u64 timecounter_cyc2time(const struct timecounter *tc, 133 130 u64 cycle_tstamp);

+42 -7

include/linux/timekeeping.h

··· 22 22 const struct timezone *tz); 23 23 24 24 /* 25 - * ktime_get() family: read the current time in a multitude of ways, 25 + * ktime_get() family - read the current time in a multitude of ways. 26 26 * 27 27 * The default time reference is CLOCK_MONOTONIC, starting at 28 28 * boot time but not counting the time spent in suspend. 29 29 * For other references, use the functions with "real", "clocktai", 30 30 * "boottime" and "raw" suffixes. 31 31 * 32 - * To get the time in a different format, use the ones wit 32 + * To get the time in a different format, use the ones with 33 33 * "ns", "ts64" and "seconds" suffix. 34 34 * 35 35 * See Documentation/core-api/timekeeping.rst for more details. ··· 74 74 75 75 /** 76 76 * ktime_get_real - get the real (wall-) time in ktime_t format 77 + * 78 + * Returns: real (wall) time in ktime_t format 77 79 */ 78 80 static inline ktime_t ktime_get_real(void) 79 81 { ··· 88 86 } 89 87 90 88 /** 91 - * ktime_get_boottime - Returns monotonic time since boot in ktime_t format 89 + * ktime_get_boottime - Get monotonic time since boot in ktime_t format 92 90 * 93 91 * This is similar to CLOCK_MONTONIC/ktime_get, but also includes the 94 92 * time spent in suspend. 93 + * 94 + * Returns: monotonic time since boot in ktime_t format 95 95 */ 96 96 static inline ktime_t ktime_get_boottime(void) 97 97 { ··· 106 102 } 107 103 108 104 /** 109 - * ktime_get_clocktai - Returns the TAI time of day in ktime_t format 105 + * ktime_get_clocktai - Get the TAI time of day in ktime_t format 106 + * 107 + * Returns: the TAI time of day in ktime_t format 110 108 */ 111 109 static inline ktime_t ktime_get_clocktai(void) 112 110 { ··· 150 144 151 145 /** 152 146 * ktime_mono_to_real - Convert monotonic time to clock realtime 147 + * @mono: monotonic time to convert 148 + * 149 + * Returns: time converted to realtime clock 153 150 */ 154 151 static inline ktime_t ktime_mono_to_real(ktime_t mono) 155 152 { 156 153 return ktime_mono_to_any(mono, TK_OFFS_REAL); 157 154 } 158 155 156 + /** 157 + * ktime_get_ns - Get the current time in nanoseconds 158 + * 159 + * Returns: current time converted to nanoseconds 160 + */ 159 161 static inline u64 ktime_get_ns(void) 160 162 { 161 163 return ktime_to_ns(ktime_get()); 162 164 } 163 165 166 + /** 167 + * ktime_get_real_ns - Get the current real/wall time in nanoseconds 168 + * 169 + * Returns: current real time converted to nanoseconds 170 + */ 164 171 static inline u64 ktime_get_real_ns(void) 165 172 { 166 173 return ktime_to_ns(ktime_get_real()); 167 174 } 168 175 176 + /** 177 + * ktime_get_boottime_ns - Get the monotonic time since boot in nanoseconds 178 + * 179 + * Returns: current boottime converted to nanoseconds 180 + */ 169 181 static inline u64 ktime_get_boottime_ns(void) 170 182 { 171 183 return ktime_to_ns(ktime_get_boottime()); 172 184 } 173 185 186 + /** 187 + * ktime_get_clocktai_ns - Get the current TAI time of day in nanoseconds 188 + * 189 + * Returns: current TAI time converted to nanoseconds 190 + */ 174 191 static inline u64 ktime_get_clocktai_ns(void) 175 192 { 176 193 return ktime_to_ns(ktime_get_clocktai()); 177 194 } 178 195 196 + /** 197 + * ktime_get_raw_ns - Get the raw monotonic time in nanoseconds 198 + * 199 + * Returns: current raw monotonic time converted to nanoseconds 200 + */ 179 201 static inline u64 ktime_get_raw_ns(void) 180 202 { 181 203 return ktime_to_ns(ktime_get_raw()); ··· 258 224 259 225 extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta); 260 226 261 - /* 262 - * struct ktime_timestanps - Simultaneous mono/boot/real timestamps 227 + /** 228 + * struct ktime_timestamps - Simultaneous mono/boot/real timestamps 263 229 * @mono: Monotonic timestamp 264 230 * @boot: Boottime timestamp 265 231 * @real: Realtime timestamp ··· 276 242 * @cycles: Clocksource counter value to produce the system times 277 243 * @real: Realtime system time 278 244 * @raw: Monotonic raw system time 279 - * @clock_was_set_seq: The sequence number of clock was set events 245 + * @cs_id: Clocksource ID 246 + * @clock_was_set_seq: The sequence number of clock-was-set events 280 247 * @cs_was_changed_seq: The sequence number of clocksource change events 281 248 */ 282 249 struct system_time_snapshot {

+10 -2

include/linux/timer.h

··· 22 22 #define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn) 23 23 #endif 24 24 25 - /** 25 + /* 26 26 * @TIMER_DEFERRABLE: A deferrable timer will work normally when the 27 27 * system is busy, but will not cause a CPU to come out of idle just 28 28 * to service it; instead, the timer will be serviced when the CPU ··· 140 140 * or not. Callers must ensure serialization wrt. other operations done 141 141 * to this timer, eg. interrupt contexts, or other CPUs on SMP. 142 142 * 143 - * return value: 1 if the timer is pending, 0 if not. 143 + * Returns: 1 if the timer is pending, 0 if not. 144 144 */ 145 145 static inline int timer_pending(const struct timer_list * timer) 146 146 { ··· 175 175 * See timer_delete_sync() for detailed explanation. 176 176 * 177 177 * Do not use in new code. Use timer_delete_sync() instead. 178 + * 179 + * Returns: 180 + * * %0 - The timer was not pending 181 + * * %1 - The timer was pending and deactivated 178 182 */ 179 183 static inline int del_timer_sync(struct timer_list *timer) 180 184 { ··· 192 188 * See timer_delete() for detailed explanation. 193 189 * 194 190 * Do not use in new code. Use timer_delete() instead. 191 + * 192 + * Returns: 193 + * * %0 - The timer was not pending 194 + * * %1 - The timer was pending and deactivated 195 195 */ 196 196 static inline int del_timer(struct timer_list *timer) 197 197 {

+1 -7

include/vdso/datapage.h

··· 19 19 #include <vdso/time32.h> 20 20 #include <vdso/time64.h> 21 21 22 - #ifdef CONFIG_ARM64 23 - #include <asm/page-def.h> 24 - #else 25 - #include <asm/page.h> 26 - #endif 27 - 28 22 #ifdef CONFIG_ARCH_HAS_VDSO_DATA 29 23 #include <asm/vdso/data.h> 30 24 #else ··· 126 132 */ 127 133 union vdso_data_store { 128 134 struct vdso_data data[CS_BASES]; 129 - u8 page[PAGE_SIZE]; 135 + u8 page[1U << CONFIG_PAGE_SHIFT]; 130 136 }; 131 137 132 138 /*

+15 -3

kernel/time/tick-sched.c

··· 697 697 698 698 /** 699 699 * tick_nohz_update_jiffies - update jiffies when idle was interrupted 700 + * @now: current ktime_t 700 701 * 701 702 * Called from interrupt entry when the CPU was idle 702 703 * ··· 795 794 * This time is measured via accounting rather than sampling, 796 795 * and is as accurate as ktime_get() is. 797 796 * 798 - * This function returns -1 if NOHZ is not enabled. 797 + * Return: -1 if NOHZ is not enabled, else total idle time of the @cpu 799 798 */ 800 799 u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) 801 800 { ··· 821 820 * This time is measured via accounting rather than sampling, 822 821 * and is as accurate as ktime_get() is. 823 822 * 824 - * This function returns -1 if NOHZ is not enabled. 823 + * Return: -1 if NOHZ is not enabled, else total iowait time of @cpu 825 824 */ 826 825 u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) 827 826 { ··· 1288 1287 1289 1288 /** 1290 1289 * tick_nohz_idle_got_tick - Check whether or not the tick handler has run 1290 + * 1291 + * Return: %true if the tick handler has run, otherwise %false 1291 1292 */ 1292 1293 bool tick_nohz_idle_got_tick(void) 1293 1294 { ··· 1308 1305 * stopped, it returns the next hrtimer. 1309 1306 * 1310 1307 * Called from power state control code with interrupts disabled 1308 + * 1309 + * Return: the next expiration time 1311 1310 */ 1312 1311 ktime_t tick_nohz_get_next_hrtimer(void) 1313 1312 { ··· 1325 1320 * The return value of this function and/or the value returned by it through the 1326 1321 * @delta_next pointer can be negative which must be taken into account by its 1327 1322 * callers. 1323 + * 1324 + * Return: the expected length of the current sleep 1328 1325 */ 1329 1326 ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) 1330 1327 { ··· 1364 1357 /** 1365 1358 * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value 1366 1359 * for a particular CPU. 1360 + * @cpu: target CPU number 1367 1361 * 1368 1362 * Called from the schedutil frequency scaling governor in scheduler context. 1363 + * 1364 + * Return: the current idle calls counter value for @cpu 1369 1365 */ 1370 1366 unsigned long tick_nohz_get_idle_calls_cpu(int cpu) 1371 1367 { ··· 1381 1371 * tick_nohz_get_idle_calls - return the current idle calls counter value 1382 1372 * 1383 1373 * Called from the schedutil frequency scaling governor in scheduler context. 1374 + * 1375 + * Return: the current idle calls counter value for the current CPU 1384 1376 */ 1385 1377 unsigned long tick_nohz_get_idle_calls(void) 1386 1378 { ··· 1571 1559 1572 1560 /** 1573 1561 * tick_setup_sched_timer - setup the tick emulation timer 1574 - * @mode: tick_nohz_mode to setup for 1562 + * @hrtimer: whether to use the hrtimer or not 1575 1563 */ 1576 1564 void tick_setup_sched_timer(bool hrtimer) 1577 1565 {

+1 -1

kernel/time/tick-sched.h

··· 46 46 * @next_tick: Next tick to be fired when in dynticks mode. 47 47 * @idle_jiffies: jiffies at the entry to idle for idle time accounting 48 48 * @idle_waketime: Time when the idle was interrupted 49 + * @idle_sleeptime_seq: sequence counter for data consistency 49 50 * @idle_entrytime: Time when the idle call was entered 50 - * @nohz_mode: Mode - one state of tick_nohz_mode 51 51 * @last_jiffies: Base jiffies snapshot when next event was last computed 52 52 * @timer_expires_base: Base time clock monotonic for @timer_expires 53 53 * @timer_expires: Anticipated timer expiration time (in case sched tick is stopped)

+11 -11

kernel/time/timer.c

··· 64 64 65 65 /* 66 66 * The timer wheel has LVL_DEPTH array levels. Each level provides an array of 67 - * LVL_SIZE buckets. Each level is driven by its own clock and therefor each 67 + * LVL_SIZE buckets. Each level is driven by its own clock and therefore each 68 68 * level has a different granularity. 69 69 * 70 - * The level granularity is: LVL_CLK_DIV ^ lvl 70 + * The level granularity is: LVL_CLK_DIV ^ level 71 71 * The level clock frequency is: HZ / (LVL_CLK_DIV ^ level) 72 72 * 73 73 * The array level of a newly armed timer depends on the relative expiry 74 74 * time. The farther the expiry time is away the higher the array level and 75 - * therefor the granularity becomes. 75 + * therefore the granularity becomes. 76 76 * 77 77 * Contrary to the original timer wheel implementation, which aims for 'exact' 78 78 * expiry of the timers, this implementation removes the need for recascading ··· 207 207 * struct timer_base - Per CPU timer base (number of base depends on config) 208 208 * @lock: Lock protecting the timer_base 209 209 * @running_timer: When expiring timers, the lock is dropped. To make 210 - * sure not to race agains deleting/modifying a 210 + * sure not to race against deleting/modifying a 211 211 * currently running timer, the pointer is set to the 212 212 * timer, which expires at the moment. If no timer is 213 213 * running, the pointer is NULL. ··· 737 737 } 738 738 739 739 /* 740 - * fixup_init is called when: 740 + * timer_fixup_init is called when: 741 741 * - an active object is initialized 742 742 */ 743 743 static bool timer_fixup_init(void *addr, enum debug_obj_state state) ··· 761 761 } 762 762 763 763 /* 764 - * fixup_activate is called when: 764 + * timer_fixup_activate is called when: 765 765 * - an active object is activated 766 766 * - an unknown non-static object is activated 767 767 */ ··· 783 783 } 784 784 785 785 /* 786 - * fixup_free is called when: 786 + * timer_fixup_free is called when: 787 787 * - an active object is freed 788 788 */ 789 789 static bool timer_fixup_free(void *addr, enum debug_obj_state state) ··· 801 801 } 802 802 803 803 /* 804 - * fixup_assert_init is called when: 804 + * timer_fixup_assert_init is called when: 805 805 * - an untracked/uninit-ed object is found 806 806 */ 807 807 static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state) ··· 914 914 * @key: lockdep class key of the fake lock used for tracking timer 915 915 * sync lock dependencies 916 916 * 917 - * init_timer_key() must be done to a timer prior calling *any* of the 917 + * init_timer_key() must be done to a timer prior to calling *any* of the 918 918 * other timer functions. 919 919 */ 920 920 void init_timer_key(struct timer_list *timer, ··· 1417 1417 * If @shutdown is set then the lock has to be taken whether the 1418 1418 * timer is pending or not to protect against a concurrent rearm 1419 1419 * which might hit between the lockless pending check and the lock 1420 - * aquisition. By taking the lock it is ensured that such a newly 1420 + * acquisition. By taking the lock it is ensured that such a newly 1421 1421 * enqueued timer is dequeued and cannot end up with 1422 1422 * timer->function == NULL in the expiry code. 1423 1423 * ··· 2306 2306 2307 2307 /* 2308 2308 * When timer base is not set idle, undo the effect of 2309 - * tmigr_cpu_deactivate() to prevent inconsitent states - active 2309 + * tmigr_cpu_deactivate() to prevent inconsistent states - active 2310 2310 * timer base but inactive timer migration hierarchy. 2311 2311 * 2312 2312 * When timer base was already marked idle, nothing will be

+31 -1

kernel/time/timer_migration.c

··· 751 751 752 752 first_childevt = evt = data->evt; 753 753 754 + /* 755 + * Walking the hierarchy is required in any case when a 756 + * remote expiry was done before. This ensures to not lose 757 + * already queued events in non active groups (see section 758 + * "Required event and timerqueue update after a remote 759 + * expiry" in the documentation at the top). 760 + * 761 + * The two call sites which are executed without a remote expiry 762 + * before, are not prevented from propagating changes through 763 + * the hierarchy by the return: 764 + * - When entering this path by tmigr_new_timer(), @evt->ignore 765 + * is never set. 766 + * - tmigr_inactive_up() takes care of the propagation by 767 + * itself and ignores the return value. But an immediate 768 + * return is possible if there is a parent, sparing group 769 + * locking at this level, because the upper walking call to 770 + * the parent will take care about removing this event from 771 + * within the group and update next_expiry accordingly. 772 + * 773 + * However if there is no parent, ie: the hierarchy has only a 774 + * single level so @group is the top level group, make sure the 775 + * first event information of the group is updated properly and 776 + * also handled properly, so skip this fast return path. 777 + */ 778 + if (evt->ignore && !remote && group->parent) 779 + return true; 780 + 754 781 raw_spin_lock(&group->lock); 755 782 756 783 childstate.state = 0; ··· 789 762 * queue when the expiry time changed only or when it could be ignored. 790 763 */ 791 764 if (timerqueue_node_queued(&evt->nextevt)) { 792 - if ((evt->nextevt.expires == nextexp) && !evt->ignore) 765 + if ((evt->nextevt.expires == nextexp) && !evt->ignore) { 766 + /* Make sure not to miss a new CPU event with the same expiry */ 767 + evt->cpu = first_childevt->cpu; 793 768 goto check_toplvl; 769 + } 794 770 795 771 if (!timerqueue_del(&group->events, &evt->nextevt)) 796 772 WRITE_ONCE(group->next_expiry, KTIME_MAX);

Configure Feed

Configure Feed