Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

hrtimer: Optimize for local timers

The decision whether to keep timers on the local CPU or on the CPU they are
associated to is suboptimal and causes the expensive switch_hrtimer_base()
mechanism to be invoked more than necessary. This is especially true for
pinned timers.

Rewrite the decision logic so that the current base is kept if:

1) The callback is running on the base

2) The timer is associated to the local CPU and the first expiring timer as
that allows to optimize for reprogramming avoidance

3) The timer is associated to the local CPU and pinned

4) The timer is associated to the local CPU and timer migration is
disabled.

Only #2 was covered by the original code, but especially #3 makes a
difference for high frequency rearming timers like the scheduler hrtick
timer. If timer migration is disabled, then #4 avoids most of the base
switches.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163430.607935269@kernel.org

authored by

Thomas Gleixner and committed by
Peter Zijlstra
3288cd48 22f011be

+65 -36
+65 -36
kernel/time/hrtimer.c
··· 1147 1147 } 1148 1148 1149 1149 static inline bool remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, 1150 - bool restart, bool keep_local) 1150 + bool restart, bool keep_base) 1151 1151 { 1152 1152 bool queued_state = timer->is_queued; 1153 1153 ··· 1177 1177 if (!restart) 1178 1178 queued_state = HRTIMER_STATE_INACTIVE; 1179 1179 else 1180 - reprogram &= !keep_local; 1180 + reprogram &= !keep_base; 1181 1181 1182 1182 __remove_hrtimer(timer, base, queued_state, reprogram); 1183 1183 return true; ··· 1220 1220 hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram); 1221 1221 } 1222 1222 1223 + #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 1224 + static __always_inline bool hrtimer_prefer_local(bool is_local, bool is_first, bool is_pinned) 1225 + { 1226 + if (static_branch_likely(&timers_migration_enabled)) { 1227 + /* 1228 + * If it is local and the first expiring timer keep it on the local 1229 + * CPU to optimize reprogramming of the clockevent device. Also 1230 + * avoid switch_hrtimer_base() overhead when local and pinned. 1231 + */ 1232 + if (!is_local) 1233 + return false; 1234 + return is_first || is_pinned; 1235 + } 1236 + return is_local; 1237 + } 1238 + #else 1239 + static __always_inline bool hrtimer_prefer_local(bool is_local, bool is_first, bool is_pinned) 1240 + { 1241 + return is_local; 1242 + } 1243 + #endif 1244 + 1245 + static inline bool hrtimer_keep_base(struct hrtimer *timer, bool is_local, bool is_first, 1246 + bool is_pinned) 1247 + { 1248 + /* If the timer is running the callback it has to stay on its CPU base. */ 1249 + if (unlikely(timer->base->running == timer)) 1250 + return true; 1251 + 1252 + return hrtimer_prefer_local(is_local, is_first, is_pinned); 1253 + } 1254 + 1223 1255 static bool __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, 1224 1256 const enum hrtimer_mode mode, struct hrtimer_clock_base *base) 1225 1257 { 1226 1258 struct hrtimer_cpu_base *this_cpu_base = this_cpu_ptr(&hrtimer_bases); 1227 - struct hrtimer_clock_base *new_base; 1228 - bool force_local, first, was_armed; 1259 + bool is_pinned, first, was_first, was_armed, keep_base = false; 1260 + struct hrtimer_cpu_base *cpu_base = base->cpu_base; 1261 + 1262 + was_first = cpu_base->next_timer == timer; 1263 + is_pinned = !!(mode & HRTIMER_MODE_PINNED); 1229 1264 1230 1265 /* 1231 - * If the timer is on the local cpu base and is the first expiring 1232 - * timer then this might end up reprogramming the hardware twice 1233 - * (on removal and on enqueue). To avoid that prevent the reprogram 1234 - * on removal, keep the timer local to the current CPU and enforce 1235 - * reprogramming after it is queued no matter whether it is the new 1236 - * first expiring timer again or not. 1266 + * Don't keep it local if this enqueue happens on a unplugged CPU 1267 + * after hrtimer_cpu_dying() has been invoked. 1237 1268 */ 1238 - force_local = base->cpu_base == this_cpu_base; 1239 - force_local &= base->cpu_base->next_timer == timer; 1269 + if (likely(this_cpu_base->online)) { 1270 + bool is_local = cpu_base == this_cpu_base; 1240 1271 1241 - /* 1242 - * Don't force local queuing if this enqueue happens on a unplugged 1243 - * CPU after hrtimer_cpu_dying() has been invoked. 1244 - */ 1245 - force_local &= this_cpu_base->online; 1272 + keep_base = hrtimer_keep_base(timer, is_local, was_first, is_pinned); 1273 + } 1246 1274 1247 1275 /* 1248 1276 * Remove an active timer from the queue. In case it is not queued ··· 1282 1254 * reprogramming later if it was the first expiring timer. This 1283 1255 * avoids programming the underlying clock event twice (once at 1284 1256 * removal and once after enqueue). 1257 + * 1258 + * @keep_base is also true if the timer callback is running on a 1259 + * remote CPU and for local pinned timers. 1285 1260 */ 1286 - was_armed = remove_hrtimer(timer, base, true, force_local); 1261 + was_armed = remove_hrtimer(timer, base, true, keep_base); 1287 1262 1288 1263 if (mode & HRTIMER_MODE_REL) 1289 1264 tim = ktime_add_safe(tim, __hrtimer_cb_get_time(base->clockid)); ··· 1296 1265 hrtimer_set_expires_range_ns(timer, tim, delta_ns); 1297 1266 1298 1267 /* Switch the timer base, if necessary: */ 1299 - if (!force_local) 1300 - new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); 1301 - else 1302 - new_base = base; 1268 + if (!keep_base) { 1269 + base = switch_hrtimer_base(timer, base, is_pinned); 1270 + cpu_base = base->cpu_base; 1271 + } 1303 1272 1304 - first = enqueue_hrtimer(timer, new_base, mode, was_armed); 1273 + first = enqueue_hrtimer(timer, base, mode, was_armed); 1305 1274 1306 1275 /* 1307 1276 * If the hrtimer interrupt is running, then it will reevaluate the 1308 1277 * clock bases and reprogram the clock event device. 1309 1278 */ 1310 - if (new_base->cpu_base->in_hrtirq) 1279 + if (cpu_base->in_hrtirq) 1311 1280 return false; 1312 1281 1313 - if (!force_local) { 1282 + if (!was_first || cpu_base != this_cpu_base) { 1314 1283 /* 1315 1284 * If the current CPU base is online, then the timer is never 1316 1285 * queued on a remote CPU if it would be the first expiring ··· 1319 1288 * re-evaluate the first expiring timer after completing the 1320 1289 * callbacks. 1321 1290 */ 1322 - if (hrtimer_base_is_online(this_cpu_base)) 1291 + if (likely(hrtimer_base_is_online(this_cpu_base))) 1323 1292 return first; 1324 1293 1325 1294 /* ··· 1327 1296 * already offline. If the timer is the first to expire, 1328 1297 * kick the remote CPU to reprogram the clock event. 1329 1298 */ 1330 - if (first) { 1331 - struct hrtimer_cpu_base *new_cpu_base = new_base->cpu_base; 1332 - 1333 - smp_call_function_single_async(new_cpu_base->cpu, &new_cpu_base->csd); 1334 - } 1299 + if (first) 1300 + smp_call_function_single_async(cpu_base->cpu, &cpu_base->csd); 1335 1301 return false; 1336 1302 } 1337 1303 ··· 1342 1314 * required. 1343 1315 */ 1344 1316 if (timer->is_lazy) { 1345 - if (new_base->cpu_base->expires_next <= hrtimer_get_expires(timer)) 1317 + if (cpu_base->expires_next <= hrtimer_get_expires(timer)) 1346 1318 return false; 1347 1319 } 1348 1320 1349 1321 /* 1350 - * Timer was forced to stay on the current CPU to avoid 1351 - * reprogramming on removal and enqueue. Force reprogram the 1352 - * hardware by evaluating the new first expiring timer. 1322 + * Timer was the first expiring timer and forced to stay on the 1323 + * current CPU to avoid reprogramming on removal and enqueue. Force 1324 + * reprogram the hardware by evaluating the new first expiring 1325 + * timer. 1353 1326 */ 1354 - hrtimer_force_reprogram(new_base->cpu_base, /* skip_equal */ true); 1327 + hrtimer_force_reprogram(cpu_base, /* skip_equal */ true); 1355 1328 return false; 1356 1329 } 1357 1330