Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

cpufreq, sched/util: Optimize operations with single CPU capacity lookup

The max CPU capacity is the same for all CPUs sharing frequency domain.
There is a way to avoid heavy operations in a loop for each CPU by
leveraging this knowledge. Thus, simplify the looping code in the
sugov_next_freq_shared() and drop heavy multiplications. Instead, use
simple max() to get the highest utilization from these CPUs.

This is useful for platforms with many (4 or 6) little CPUs. We avoid
heavy 2*PD_CPU_NUM multiplications in that loop, which is called billions
of times, since it's not limited by the schedutil time delta filter in
sugov_should_update_freq(). When there was no need to change frequency
the code bailed out, not updating the sg_policy::last_freq_update_time.
Then every visit after delta_ns time longer than the
sg_policy::freq_update_delay_ns goes through and triggers the next
frequency calculation code. Although, if the next frequency, as outcome
of that, would be the same as current frequency, we won't update the
sg_policy::last_freq_update_time and the story will be repeated (in
a very short period, sometimes a few microseconds).

The max CPU capacity must be fetched every time we are called, due to
difficulties during the policy setup, where we are not able to get the
normalized CPU capacity at the right time.

The fetched CPU capacity value is than used in sugov_iowait_apply() to
calculate the right boost. This required a few changes in the local
functions and arguments. The capacity value should hopefully be fetched
once when needed and then passed over CPU registers to those functions.

Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20221208160256.859-2-lukasz.luba@arm.com
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Patrick Bellasi <patrick.bellasi@arm.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>

authored by

Lukasz Luba and committed by
Ingo Molnar
948fb4c4 160fb0d8

+23 -20
+23 -20
kernel/sched/cpufreq_schedutil.c
··· 48 48 49 49 unsigned long util; 50 50 unsigned long bw_dl; 51 - unsigned long max; 52 51 53 52 /* The field below is for single-CPU policies only: */ 54 53 #ifdef CONFIG_NO_HZ_COMMON ··· 157 158 { 158 159 struct rq *rq = cpu_rq(sg_cpu->cpu); 159 160 160 - sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu); 161 161 sg_cpu->bw_dl = cpu_bw_dl(rq); 162 162 sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), 163 163 FREQUENCY_UTIL, NULL); ··· 236 238 * sugov_iowait_apply() - Apply the IO boost to a CPU. 237 239 * @sg_cpu: the sugov data for the cpu to boost 238 240 * @time: the update time from the caller 241 + * @max_cap: the max CPU capacity 239 242 * 240 243 * A CPU running a task which woken up after an IO operation can have its 241 244 * utilization boosted to speed up the completion of those IO operations. ··· 250 251 * This mechanism is designed to boost high frequently IO waiting tasks, while 251 252 * being more conservative on tasks which does sporadic IO operations. 252 253 */ 253 - static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) 254 + static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, 255 + unsigned long max_cap) 254 256 { 255 257 unsigned long boost; 256 258 ··· 280 280 * sg_cpu->util is already in capacity scale; convert iowait_boost 281 281 * into the same scale so we can compare. 282 282 */ 283 - boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT; 283 + boost = (sg_cpu->iowait_boost * max_cap) >> SCHED_CAPACITY_SHIFT; 284 284 boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL); 285 285 if (sg_cpu->util < boost) 286 286 sg_cpu->util = boost; ··· 310 310 } 311 311 312 312 static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, 313 - u64 time, unsigned int flags) 313 + u64 time, unsigned long max_cap, 314 + unsigned int flags) 314 315 { 315 316 sugov_iowait_boost(sg_cpu, time, flags); 316 317 sg_cpu->last_update = time; ··· 322 321 return false; 323 322 324 323 sugov_get_util(sg_cpu); 325 - sugov_iowait_apply(sg_cpu, time); 324 + sugov_iowait_apply(sg_cpu, time, max_cap); 326 325 327 326 return true; 328 327 } ··· 333 332 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 334 333 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 335 334 unsigned int cached_freq = sg_policy->cached_raw_freq; 335 + unsigned long max_cap; 336 336 unsigned int next_f; 337 337 338 - if (!sugov_update_single_common(sg_cpu, time, flags)) 338 + max_cap = arch_scale_cpu_capacity(sg_cpu->cpu); 339 + 340 + if (!sugov_update_single_common(sg_cpu, time, max_cap, flags)) 339 341 return; 340 342 341 - next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max); 343 + next_f = get_next_freq(sg_policy, sg_cpu->util, max_cap); 342 344 /* 343 345 * Do not reduce the frequency if the CPU has not been idle 344 346 * recently, as the reduction is likely to be premature then. ··· 378 374 { 379 375 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 380 376 unsigned long prev_util = sg_cpu->util; 377 + unsigned long max_cap; 381 378 382 379 /* 383 380 * Fall back to the "frequency" path if frequency invariance is not ··· 390 385 return; 391 386 } 392 387 393 - if (!sugov_update_single_common(sg_cpu, time, flags)) 388 + max_cap = arch_scale_cpu_capacity(sg_cpu->cpu); 389 + 390 + if (!sugov_update_single_common(sg_cpu, time, max_cap, flags)) 394 391 return; 395 392 396 393 /* ··· 406 399 sg_cpu->util = prev_util; 407 400 408 401 cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl), 409 - map_util_perf(sg_cpu->util), sg_cpu->max); 402 + map_util_perf(sg_cpu->util), max_cap); 410 403 411 404 sg_cpu->sg_policy->last_freq_update_time = time; 412 405 } ··· 415 408 { 416 409 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 417 410 struct cpufreq_policy *policy = sg_policy->policy; 418 - unsigned long util = 0, max = 1; 411 + unsigned long util = 0, max_cap; 419 412 unsigned int j; 413 + 414 + max_cap = arch_scale_cpu_capacity(sg_cpu->cpu); 420 415 421 416 for_each_cpu(j, policy->cpus) { 422 417 struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); 423 - unsigned long j_util, j_max; 424 418 425 419 sugov_get_util(j_sg_cpu); 426 - sugov_iowait_apply(j_sg_cpu, time); 427 - j_util = j_sg_cpu->util; 428 - j_max = j_sg_cpu->max; 420 + sugov_iowait_apply(j_sg_cpu, time, max_cap); 429 421 430 - if (j_util * max > j_max * util) { 431 - util = j_util; 432 - max = j_max; 433 - } 422 + util = max(j_sg_cpu->util, util); 434 423 } 435 424 436 - return get_next_freq(sg_policy, util, max); 425 + return get_next_freq(sg_policy, util, max_cap); 437 426 } 438 427 439 428 static void