Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

hrtimer: Use and report correct timerslack values for realtime tasks

The timerslack_ns setting is used to specify how much the hardware
timers should be delayed, to potentially dispatch multiple timers in a
single interrupt. This is a performance optimization. Timers of
realtime tasks (having a realtime scheduling policy) should not be
delayed.

This logic was inconsitently applied to the hrtimers, leading to delays
of realtime tasks which used timed waits for events (e.g. condition
variables). Due to the downstream override of the slack for rt tasks,
the procfs reported incorrect (non-zero) timerslack_ns values.

This is changed by setting the timer_slack_ns task attribute to 0 for
all tasks with a rt policy. By that, downstream users do not need to
specially handle rt tasks (w.r.t. the slack), and the procfs entry
shows the correct value of "0". Setting non-zero slack values (either
via procfs or PR_SET_TIMERSLACK) on tasks with a rt policy is ignored,
as stated in "man 2 PR_SET_TIMERSLACK":

Timer slack is not applied to threads that are scheduled under a
real-time scheduling policy (see sched_setscheduler(2)).

The special handling of timerslack on rt tasks in downstream users
is removed as well.

Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20240814121032.368444-2-felix.moessbauer@siemens.com

authored by

Felix Moessbauer and committed by
Thomas Gleixner
ed4fb6d7 330dd6d9

+22 -26
+5 -4
fs/proc/base.c
··· 2569 2569 } 2570 2570 2571 2571 task_lock(p); 2572 - if (slack_ns == 0) 2573 - p->timer_slack_ns = p->default_timer_slack_ns; 2574 - else 2575 - p->timer_slack_ns = slack_ns; 2572 + if (task_is_realtime(p)) 2573 + slack_ns = 0; 2574 + else if (slack_ns == 0) 2575 + slack_ns = p->default_timer_slack_ns; 2576 + p->timer_slack_ns = slack_ns; 2576 2577 task_unlock(p); 2577 2578 2578 2579 out:
+4 -7
fs/select.c
··· 77 77 { 78 78 u64 ret; 79 79 struct timespec64 now; 80 + u64 slack = current->timer_slack_ns; 80 81 81 - /* 82 - * Realtime tasks get a slack of 0 for obvious reasons. 83 - */ 84 - 85 - if (rt_task(current)) 82 + if (slack == 0) 86 83 return 0; 87 84 88 85 ktime_get_ts64(&now); 89 86 now = timespec64_sub(*tv, now); 90 87 ret = __estimate_accuracy(&now); 91 - if (ret < current->timer_slack_ns) 92 - return current->timer_slack_ns; 88 + if (ret < slack) 89 + return slack; 93 90 return ret; 94 91 } 95 92
+8
kernel/sched/syscalls.c
··· 406 406 else if (fair_policy(policy)) 407 407 p->static_prio = NICE_TO_PRIO(attr->sched_nice); 408 408 409 + /* rt-policy tasks do not have a timerslack */ 410 + if (task_is_realtime(p)) { 411 + p->timer_slack_ns = 0; 412 + } else if (p->timer_slack_ns == 0) { 413 + /* when switching back to non-rt policy, restore timerslack */ 414 + p->timer_slack_ns = p->default_timer_slack_ns; 415 + } 416 + 409 417 /* 410 418 * __sched_setscheduler() ensures attr->sched_priority == 0 when 411 419 * !rt_policy. Always setting this ensures that things like
+2
kernel/sys.c
··· 2557 2557 error = current->timer_slack_ns; 2558 2558 break; 2559 2559 case PR_SET_TIMERSLACK: 2560 + if (task_is_realtime(current)) 2561 + break; 2560 2562 if (arg2 <= 0) 2561 2563 current->timer_slack_ns = 2562 2564 current->default_timer_slack_ns;
+3 -15
kernel/time/hrtimer.c
··· 2074 2074 struct restart_block *restart; 2075 2075 struct hrtimer_sleeper t; 2076 2076 int ret = 0; 2077 - u64 slack; 2078 - 2079 - slack = current->timer_slack_ns; 2080 - if (rt_task(current)) 2081 - slack = 0; 2082 2077 2083 2078 hrtimer_init_sleeper_on_stack(&t, clockid, mode); 2084 - hrtimer_set_expires_range_ns(&t.timer, rqtp, slack); 2079 + hrtimer_set_expires_range_ns(&t.timer, rqtp, current->timer_slack_ns); 2085 2080 ret = do_nanosleep(&t, mode); 2086 2081 if (ret != -ERESTART_RESTARTBLOCK) 2087 2082 goto out; ··· 2246 2251 /** 2247 2252 * schedule_hrtimeout_range_clock - sleep until timeout 2248 2253 * @expires: timeout value (ktime_t) 2249 - * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks 2254 + * @delta: slack in expires timeout (ktime_t) 2250 2255 * @mode: timer mode 2251 2256 * @clock_id: timer clock to be used 2252 2257 */ ··· 2273 2278 return -EINTR; 2274 2279 } 2275 2280 2276 - /* 2277 - * Override any slack passed by the user if under 2278 - * rt contraints. 2279 - */ 2280 - if (rt_task(current)) 2281 - delta = 0; 2282 - 2283 2281 hrtimer_init_sleeper_on_stack(&t, clock_id, mode); 2284 2282 hrtimer_set_expires_range_ns(&t.timer, *expires, delta); 2285 2283 hrtimer_sleeper_start_expires(&t, mode); ··· 2292 2304 /** 2293 2305 * schedule_hrtimeout_range - sleep until timeout 2294 2306 * @expires: timeout value (ktime_t) 2295 - * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks 2307 + * @delta: slack in expires timeout (ktime_t) 2296 2308 * @mode: timer mode 2297 2309 * 2298 2310 * Make the current task sleep until the given expiry time has