Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sched/core: Reorganize cgroup bandwidth control interface file writes

- Move input parameter validation from tg_set_cfs_bandwidth() to the new
outer function tg_set_bandwidth(). The outer function handles parameters
in usecs, validates them and calls tg_set_cfs_bandwidth() which converts
them into nsecs. This matches tg_bandwidth() on the read side.

- max/min_cfs_* consts are now used by tg_set_bandwidth(). Relocate, convert
into usecs and drop "cfs" from the names.

- Reimplement cpu_cfs_{period|quote|burst}_write_*() using tg_bandwidth()
and tg_set_bandwidth() and replace "cfs" in the names with "bw".

- Update cpu_max_write() to use tg_set_bandiwdth(). cpu_period_quota_parse()
is updated to drop nsec conversion accordingly. This aligns the behavior
with cfs_period_quota_print().

- Drop now unused tg_set_cfs_{period|quota|burst}().

- While at it, for consistency, rename default_cfs_period() to
default_bw_period_us() and make it return usecs.

This is to prepare for adding bandwidth control support to sched_ext.
tg_set_bandwidth() will be used as the muxing point. No functional changes
intended.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20250614012346.2358261-5-tj@kernel.org

authored by

Tejun Heo and committed by
Peter Zijlstra
5bc34be4 43e33f53

+106 -113
+99 -106
kernel/sched/core.c
··· 9309 9309 #ifdef CONFIG_CFS_BANDWIDTH 9310 9310 static DEFINE_MUTEX(cfs_constraints_mutex); 9311 9311 9312 - const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */ 9313 - static const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ 9314 - /* More than 203 days if BW_SHIFT equals 20. */ 9315 - static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC; 9316 - 9317 9312 static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime); 9318 9313 9319 - static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota, 9320 - u64 burst) 9314 + static int tg_set_cfs_bandwidth(struct task_group *tg, 9315 + u64 period_us, u64 quota_us, u64 burst_us) 9321 9316 { 9322 9317 int i, ret = 0, runtime_enabled, runtime_was_enabled; 9323 9318 struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; 9319 + u64 period, quota, burst; 9324 9320 9325 - if (tg == &root_task_group) 9326 - return -EINVAL; 9321 + period = (u64)period_us * NSEC_PER_USEC; 9327 9322 9328 - /* 9329 - * Ensure we have at some amount of bandwidth every period. This is 9330 - * to prevent reaching a state of large arrears when throttled via 9331 - * entity_tick() resulting in prolonged exit starvation. 9332 - */ 9333 - if (quota < min_cfs_quota_period || period < min_cfs_quota_period) 9334 - return -EINVAL; 9323 + if (quota_us == RUNTIME_INF) 9324 + quota = RUNTIME_INF; 9325 + else 9326 + quota = (u64)quota_us * NSEC_PER_USEC; 9335 9327 9336 - /* 9337 - * Likewise, bound things on the other side by preventing insane quota 9338 - * periods. This also allows us to normalize in computing quota 9339 - * feasibility. 9340 - */ 9341 - if (period > max_cfs_quota_period) 9342 - return -EINVAL; 9343 - 9344 - /* 9345 - * Bound quota to defend quota against overflow during bandwidth shift. 9346 - */ 9347 - if (quota != RUNTIME_INF && quota > max_cfs_runtime) 9348 - return -EINVAL; 9349 - 9350 - if (quota != RUNTIME_INF && (burst > quota || 9351 - burst + quota > max_cfs_runtime)) 9352 - return -EINVAL; 9328 + burst = (u64)burst_us * NSEC_PER_USEC; 9353 9329 9354 9330 /* 9355 9331 * Prevent race between setting of cfs_rq->runtime_enabled and ··· 9411 9435 do_div(burst_us, NSEC_PER_USEC); 9412 9436 9413 9437 return burst_us; 9414 - } 9415 - 9416 - static int tg_set_cfs_period(struct task_group *tg, long cfs_period_us) 9417 - { 9418 - u64 quota, period, burst; 9419 - 9420 - if ((u64)cfs_period_us > U64_MAX / NSEC_PER_USEC) 9421 - return -EINVAL; 9422 - 9423 - period = (u64)cfs_period_us * NSEC_PER_USEC; 9424 - quota = tg->cfs_bandwidth.quota; 9425 - burst = tg->cfs_bandwidth.burst; 9426 - 9427 - return tg_set_cfs_bandwidth(tg, period, quota, burst); 9428 - } 9429 - 9430 - static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) 9431 - { 9432 - u64 quota, period, burst; 9433 - 9434 - period = ktime_to_ns(tg->cfs_bandwidth.period); 9435 - burst = tg->cfs_bandwidth.burst; 9436 - if (cfs_quota_us < 0) 9437 - quota = RUNTIME_INF; 9438 - else if ((u64)cfs_quota_us <= U64_MAX / NSEC_PER_USEC) 9439 - quota = (u64)cfs_quota_us * NSEC_PER_USEC; 9440 - else 9441 - return -EINVAL; 9442 - 9443 - return tg_set_cfs_bandwidth(tg, period, quota, burst); 9444 - } 9445 - 9446 - static int tg_set_cfs_burst(struct task_group *tg, long cfs_burst_us) 9447 - { 9448 - u64 quota, period, burst; 9449 - 9450 - if ((u64)cfs_burst_us > U64_MAX / NSEC_PER_USEC) 9451 - return -EINVAL; 9452 - 9453 - burst = (u64)cfs_burst_us * NSEC_PER_USEC; 9454 - period = ktime_to_ns(tg->cfs_bandwidth.period); 9455 - quota = tg->cfs_bandwidth.quota; 9456 - 9457 - return tg_set_cfs_bandwidth(tg, period, quota, burst); 9458 9438 } 9459 9439 9460 9440 struct cfs_schedulable_data { ··· 9546 9614 return 0; 9547 9615 } 9548 9616 9617 + const u64 max_bw_quota_period_us = 1 * USEC_PER_SEC; /* 1s */ 9618 + static const u64 min_bw_quota_period_us = 1 * USEC_PER_MSEC; /* 1ms */ 9619 + /* More than 203 days if BW_SHIFT equals 20. */ 9620 + static const u64 max_bw_runtime_us = MAX_BW; 9621 + 9549 9622 static void tg_bandwidth(struct task_group *tg, 9550 9623 u64 *period_us_p, u64 *quota_us_p, u64 *burst_us_p) 9551 9624 { ··· 9571 9634 return period_us; 9572 9635 } 9573 9636 9637 + static int tg_set_bandwidth(struct task_group *tg, 9638 + u64 period_us, u64 quota_us, u64 burst_us) 9639 + { 9640 + const u64 max_usec = U64_MAX / NSEC_PER_USEC; 9641 + 9642 + if (tg == &root_task_group) 9643 + return -EINVAL; 9644 + 9645 + /* Values should survive translation to nsec */ 9646 + if (period_us > max_usec || 9647 + (quota_us != RUNTIME_INF && quota_us > max_usec) || 9648 + burst_us > max_usec) 9649 + return -EINVAL; 9650 + 9651 + /* 9652 + * Ensure we have some amount of bandwidth every period. This is to 9653 + * prevent reaching a state of large arrears when throttled via 9654 + * entity_tick() resulting in prolonged exit starvation. 9655 + */ 9656 + if (quota_us < min_bw_quota_period_us || 9657 + period_us < min_bw_quota_period_us) 9658 + return -EINVAL; 9659 + 9660 + /* 9661 + * Likewise, bound things on the other side by preventing insane quota 9662 + * periods. This also allows us to normalize in computing quota 9663 + * feasibility. 9664 + */ 9665 + if (period_us > max_bw_quota_period_us) 9666 + return -EINVAL; 9667 + 9668 + /* 9669 + * Bound quota to defend quota against overflow during bandwidth shift. 9670 + */ 9671 + if (quota_us != RUNTIME_INF && quota_us > max_bw_runtime_us) 9672 + return -EINVAL; 9673 + 9674 + if (quota_us != RUNTIME_INF && (burst_us > quota_us || 9675 + burst_us + quota_us > max_bw_runtime_us)) 9676 + return -EINVAL; 9677 + 9678 + return tg_set_cfs_bandwidth(tg, period_us, quota_us, burst_us); 9679 + } 9680 + 9574 9681 static s64 cpu_quota_read_s64(struct cgroup_subsys_state *css, 9575 9682 struct cftype *cft) 9576 9683 { ··· 9633 9652 return burst_us; 9634 9653 } 9635 9654 9636 - static int cpu_cfs_period_write_u64(struct cgroup_subsys_state *css, 9637 - struct cftype *cftype, u64 cfs_period_us) 9655 + static int cpu_period_write_u64(struct cgroup_subsys_state *css, 9656 + struct cftype *cftype, u64 period_us) 9638 9657 { 9639 - return tg_set_cfs_period(css_tg(css), cfs_period_us); 9658 + struct task_group *tg = css_tg(css); 9659 + u64 quota_us, burst_us; 9660 + 9661 + tg_bandwidth(tg, NULL, &quota_us, &burst_us); 9662 + return tg_set_bandwidth(tg, period_us, quota_us, burst_us); 9640 9663 } 9641 9664 9642 - static int cpu_cfs_quota_write_s64(struct cgroup_subsys_state *css, 9643 - struct cftype *cftype, s64 cfs_quota_us) 9665 + static int cpu_quota_write_s64(struct cgroup_subsys_state *css, 9666 + struct cftype *cftype, s64 quota_us) 9644 9667 { 9645 - return tg_set_cfs_quota(css_tg(css), cfs_quota_us); 9668 + struct task_group *tg = css_tg(css); 9669 + u64 period_us, burst_us; 9670 + 9671 + if (quota_us < 0) 9672 + quota_us = RUNTIME_INF; 9673 + 9674 + tg_bandwidth(tg, &period_us, NULL, &burst_us); 9675 + return tg_set_bandwidth(tg, period_us, quota_us, burst_us); 9646 9676 } 9647 9677 9648 - static int cpu_cfs_burst_write_u64(struct cgroup_subsys_state *css, 9649 - struct cftype *cftype, u64 cfs_burst_us) 9678 + static int cpu_burst_write_u64(struct cgroup_subsys_state *css, 9679 + struct cftype *cftype, u64 burst_us) 9650 9680 { 9651 - return tg_set_cfs_burst(css_tg(css), cfs_burst_us); 9681 + struct task_group *tg = css_tg(css); 9682 + u64 period_us, quota_us; 9683 + 9684 + tg_bandwidth(tg, &period_us, &quota_us, NULL); 9685 + return tg_set_bandwidth(tg, period_us, quota_us, burst_us); 9652 9686 } 9653 9687 #endif /* CONFIG_CFS_BANDWIDTH */ 9654 9688 ··· 9729 9733 { 9730 9734 .name = "cfs_period_us", 9731 9735 .read_u64 = cpu_period_read_u64, 9732 - .write_u64 = cpu_cfs_period_write_u64, 9736 + .write_u64 = cpu_period_write_u64, 9733 9737 }, 9734 9738 { 9735 9739 .name = "cfs_quota_us", 9736 9740 .read_s64 = cpu_quota_read_s64, 9737 - .write_s64 = cpu_cfs_quota_write_s64, 9741 + .write_s64 = cpu_quota_write_s64, 9738 9742 }, 9739 9743 { 9740 9744 .name = "cfs_burst_us", 9741 9745 .read_u64 = cpu_burst_read_u64, 9742 - .write_u64 = cpu_cfs_burst_write_u64, 9746 + .write_u64 = cpu_burst_write_u64, 9743 9747 }, 9744 9748 { 9745 9749 .name = "stat", ··· 9936 9940 } 9937 9941 9938 9942 /* caller should put the current value in *@periodp before calling */ 9939 - static int __maybe_unused cpu_period_quota_parse(char *buf, 9940 - u64 *periodp, u64 *quotap) 9943 + static int __maybe_unused cpu_period_quota_parse(char *buf, u64 *period_us_p, 9944 + u64 *quota_us_p) 9941 9945 { 9942 9946 char tok[21]; /* U64_MAX */ 9943 9947 9944 - if (sscanf(buf, "%20s %llu", tok, periodp) < 1) 9948 + if (sscanf(buf, "%20s %llu", tok, period_us_p) < 1) 9945 9949 return -EINVAL; 9946 9950 9947 - *periodp *= NSEC_PER_USEC; 9948 - 9949 - if (sscanf(tok, "%llu", quotap)) 9950 - *quotap *= NSEC_PER_USEC; 9951 - else if (!strcmp(tok, "max")) 9952 - *quotap = RUNTIME_INF; 9953 - else 9954 - return -EINVAL; 9951 + if (sscanf(tok, "%llu", quota_us_p) < 1) { 9952 + if (!strcmp(tok, "max")) 9953 + *quota_us_p = RUNTIME_INF; 9954 + else 9955 + return -EINVAL; 9956 + } 9955 9957 9956 9958 return 0; 9957 9959 } ··· 9969 9975 char *buf, size_t nbytes, loff_t off) 9970 9976 { 9971 9977 struct task_group *tg = css_tg(of_css(of)); 9972 - u64 period = tg_get_cfs_period(tg); 9973 - u64 burst = tg->cfs_bandwidth.burst; 9974 - u64 quota; 9978 + u64 period_us, quota_us, burst_us; 9975 9979 int ret; 9976 9980 9977 - ret = cpu_period_quota_parse(buf, &period, &quota); 9981 + tg_bandwidth(tg, &period_us, NULL, &burst_us); 9982 + ret = cpu_period_quota_parse(buf, &period_us, &quota_us); 9978 9983 if (!ret) 9979 - ret = tg_set_cfs_bandwidth(tg, period, quota, burst); 9984 + ret = tg_set_bandwidth(tg, period_us, quota_us, burst_us); 9980 9985 return ret ?: nbytes; 9981 9986 } 9982 9987 #endif /* CONFIG_CFS_BANDWIDTH */ ··· 10012 10019 .name = "max.burst", 10013 10020 .flags = CFTYPE_NOT_ON_ROOT, 10014 10021 .read_u64 = cpu_burst_read_u64, 10015 - .write_u64 = cpu_cfs_burst_write_u64, 10022 + .write_u64 = cpu_burst_write_u64, 10016 10023 }, 10017 10024 #endif /* CONFIG_CFS_BANDWIDTH */ 10018 10025 #ifdef CONFIG_UCLAMP_TASK_GROUP
+2 -2
kernel/sched/fair.c
··· 6422 6422 * to fail. 6423 6423 */ 6424 6424 new = old * 2; 6425 - if (new < max_cfs_quota_period) { 6425 + if (new < max_bw_quota_period_us * NSEC_PER_USEC) { 6426 6426 cfs_b->period = ns_to_ktime(new); 6427 6427 cfs_b->quota *= 2; 6428 6428 cfs_b->burst *= 2; ··· 6456 6456 raw_spin_lock_init(&cfs_b->lock); 6457 6457 cfs_b->runtime = 0; 6458 6458 cfs_b->quota = RUNTIME_INF; 6459 - cfs_b->period = ns_to_ktime(default_cfs_period()); 6459 + cfs_b->period = us_to_ktime(default_bw_period_us()); 6460 6460 cfs_b->burst = 0; 6461 6461 cfs_b->hierarchical_quota = parent ? parent->hierarchical_quota : RUNTIME_INF; 6462 6462
+5 -5
kernel/sched/sched.h
··· 403 403 extern struct list_head task_groups; 404 404 405 405 #ifdef CONFIG_CFS_BANDWIDTH 406 - extern const u64 max_cfs_quota_period; 406 + extern const u64 max_bw_quota_period_us; 407 407 408 408 /* 409 - * default period for cfs group bandwidth. 410 - * default: 0.1s, units: nanoseconds 409 + * default period for group bandwidth. 410 + * default: 0.1s, units: microseconds 411 411 */ 412 - static inline u64 default_cfs_period(void) 412 + static inline u64 default_bw_period_us(void) 413 413 { 414 - return 100000000ULL; 414 + return 100000ULL; 415 415 } 416 416 #endif /* CONFIG_CFS_BANDWIDTH */ 417 417