Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'sched/core' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into for-6.12

Pull tip/sched/core to resolve the following four conflicts. While 2-4 are
simple context conflicts, 1 is a bit subtle and easy to resolve incorrectly.

1. 2c8d046d5d51 ("sched: Add normal_policy()")
vs.
faa42d29419d ("sched/fair: Make SCHED_IDLE entity be preempted in strict hierarchy")

The former converts direct test on p->policy to use the helper
normal_policy(). The latter moves the p->policy test to a different
location. Resolve by converting the test on p->plicy in the new location to
use normal_policy().

2. a7a9fc549293 ("sched_ext: Add boilerplate for extensible scheduler class")
vs.
a110a81c52a9 ("sched/deadline: Deferrable dl server")

Both add calls to put_prev_task_idle() and set_next_task_idle(). Simple
context conflict. Resolve by taking changes from both.

3. a7a9fc549293 ("sched_ext: Add boilerplate for extensible scheduler class")
vs.
c245910049d0 ("sched/core: Add clearing of ->dl_server in put_prev_task_balance()")

The former changes for_each_class() itertion to use for_each_active_class().
The latter moves away the adjacent dl_server handling code. Simple context
conflict. Resolve by taking changes from both.

4. 60c27fb59f6c ("sched_ext: Implement sched_ext_ops.cpu_online/offline()")
vs.
31b164e2e4af ("sched/smt: Introduce sched_smt_present_inc/dec() helper")
2f027354122f ("sched/core: Introduce sched_set_rq_on/offline() helper")

The former adds scx_rq_deactivate() call. The latter two change code around
it. Simple context conflict. Resolve by taking changes from both.

Signed-off-by: Tejun Heo <tj@kernel.org>

+895 -282
+16 -1
include/linux/sched.h
··· 641 641 * 642 642 * @dl_overrun tells if the task asked to be informed about runtime 643 643 * overruns. 644 + * 645 + * @dl_server tells if this is a server entity. 646 + * 647 + * @dl_defer tells if this is a deferred or regular server. For 648 + * now only defer server exists. 649 + * 650 + * @dl_defer_armed tells if the deferrable server is waiting 651 + * for the replenishment timer to activate it. 652 + * 653 + * @dl_defer_running tells if the deferrable server is actually 654 + * running, skipping the defer phase. 644 655 */ 645 656 unsigned int dl_throttled : 1; 646 657 unsigned int dl_yielded : 1; 647 658 unsigned int dl_non_contending : 1; 648 659 unsigned int dl_overrun : 1; 649 660 unsigned int dl_server : 1; 661 + unsigned int dl_defer : 1; 662 + unsigned int dl_defer_armed : 1; 663 + unsigned int dl_defer_running : 1; 650 664 651 665 /* 652 666 * Bandwidth enforcement timer. Each -deadline task has its ··· 688 674 */ 689 675 struct rq *rq; 690 676 dl_server_has_tasks_f server_has_tasks; 691 - dl_server_pick_f server_pick; 677 + dl_server_pick_f server_pick_next; 678 + dl_server_pick_f server_pick_task; 692 679 693 680 #ifdef CONFIG_RT_MUTEXES 694 681 /*
+107 -39
kernel/sched/core.c
··· 163 163 if (p->sched_class == &stop_sched_class) /* trumps deadline */ 164 164 return -2; 165 165 166 + if (p->dl_server) 167 + return -1; /* deadline */ 168 + 166 169 if (rt_prio(p->prio)) /* includes deadline */ 167 170 return p->prio; /* [-1, 99] */ 168 171 ··· 198 195 if (-pb < -pa) 199 196 return false; 200 197 201 - if (pa == -1) /* dl_prio() doesn't work because of stop_class above */ 202 - return !dl_time_before(a->dl.deadline, b->dl.deadline); 198 + if (pa == -1) { /* dl_prio() doesn't work because of stop_class above */ 199 + const struct sched_dl_entity *a_dl, *b_dl; 200 + 201 + a_dl = &a->dl; 202 + /* 203 + * Since,'a' and 'b' can be CFS tasks served by DL server, 204 + * __task_prio() can return -1 (for DL) even for those. In that 205 + * case, get to the dl_server's DL entity. 206 + */ 207 + if (a->dl_server) 208 + a_dl = a->dl_server; 209 + 210 + b_dl = &b->dl; 211 + if (b->dl_server) 212 + b_dl = b->dl_server; 213 + 214 + return !dl_time_before(a_dl->deadline, b_dl->deadline); 215 + } 203 216 204 217 if (pa == MAX_RT_PRIO + MAX_NICE) /* fair */ 205 218 return cfs_prio_less(a, b, in_fi); ··· 1299 1280 * dequeued by migrating while the constrained task continues to run. 1300 1281 * E.g. going from 2->1 without going through pick_next_task(). 1301 1282 */ 1302 - if (sched_feat(HZ_BW) && __need_bw_check(rq, rq->curr)) { 1283 + if (__need_bw_check(rq, rq->curr)) { 1303 1284 if (cfs_task_bw_constrained(rq->curr)) 1304 1285 return false; 1305 1286 } ··· 2274 2255 struct task_struct *p = current; 2275 2256 2276 2257 if (p->migration_disabled) { 2258 + #ifdef CONFIG_DEBUG_PREEMPT 2259 + /* 2260 + *Warn about overflow half-way through the range. 2261 + */ 2262 + WARN_ON_ONCE((s16)p->migration_disabled < 0); 2263 + #endif 2277 2264 p->migration_disabled++; 2278 2265 return; 2279 2266 } ··· 2298 2273 .flags = SCA_MIGRATE_ENABLE, 2299 2274 }; 2300 2275 2276 + #ifdef CONFIG_DEBUG_PREEMPT 2277 + /* 2278 + * Check both overflow from migrate_disable() and superfluous 2279 + * migrate_enable(). 2280 + */ 2281 + if (WARN_ON_ONCE((s16)p->migration_disabled <= 0)) 2282 + return; 2283 + #endif 2284 + 2301 2285 if (p->migration_disabled > 1) { 2302 2286 p->migration_disabled--; 2303 2287 return; 2304 2288 } 2305 - 2306 - if (WARN_ON_ONCE(!p->migration_disabled)) 2307 - return; 2308 2289 2309 2290 /* 2310 2291 * Ensure stop_task runs either before or after this, and that ··· 4768 4737 update_rq_clock(rq); 4769 4738 post_init_entity_util_avg(p); 4770 4739 4771 - activate_task(rq, p, ENQUEUE_NOCLOCK); 4740 + activate_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_INITIAL); 4772 4741 trace_sched_wakeup_new(p); 4773 4742 wakeup_preempt(rq, p, WF_FORK); 4774 4743 #ifdef CONFIG_SMP ··· 5886 5855 #endif 5887 5856 5888 5857 put_prev_task(rq, prev); 5858 + 5859 + /* 5860 + * We've updated @prev and no longer need the server link, clear it. 5861 + * Must be done before ->pick_next_task() because that can (re)set 5862 + * ->dl_server. 5863 + */ 5864 + if (prev->dl_server) 5865 + prev->dl_server = NULL; 5889 5866 } 5890 5867 5891 5868 /* ··· 5928 5889 } 5929 5890 5930 5891 /* 5892 + * This is a normal CFS pick, but the previous could be a DL pick. 5893 + * Clear it as previous is no longer picked. 5894 + */ 5895 + if (prev->dl_server) 5896 + prev->dl_server = NULL; 5897 + 5898 + /* 5931 5899 * This is the fast path; it cannot be a DL server pick; 5932 5900 * therefore even if @p == @prev, ->dl_server must be NULL. 5933 5901 */ ··· 5946 5900 5947 5901 restart: 5948 5902 put_prev_task_balance(rq, prev, rf); 5949 - 5950 - /* 5951 - * We've updated @prev and no longer need the server link, clear it. 5952 - * Must be done before ->pick_next_task() because that can (re)set 5953 - * ->dl_server. 5954 - */ 5955 - if (prev->dl_server) 5956 - prev->dl_server = NULL; 5957 5903 5958 5904 for_each_active_class(class) { 5959 5905 p = class->pick_next_task(rq); ··· 7963 7925 } 7964 7926 } 7965 7927 7928 + static inline void sched_set_rq_online(struct rq *rq, int cpu) 7929 + { 7930 + struct rq_flags rf; 7931 + 7932 + rq_lock_irqsave(rq, &rf); 7933 + if (rq->rd) { 7934 + BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); 7935 + set_rq_online(rq); 7936 + } 7937 + rq_unlock_irqrestore(rq, &rf); 7938 + } 7939 + 7940 + static inline void sched_set_rq_offline(struct rq *rq, int cpu) 7941 + { 7942 + struct rq_flags rf; 7943 + 7944 + rq_lock_irqsave(rq, &rf); 7945 + if (rq->rd) { 7946 + BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); 7947 + set_rq_offline(rq); 7948 + } 7949 + rq_unlock_irqrestore(rq, &rf); 7950 + } 7951 + 7966 7952 /* 7967 7953 * used to mark begin/end of suspend/resume: 7968 7954 */ ··· 8037 7975 return 0; 8038 7976 } 8039 7977 7978 + static inline void sched_smt_present_inc(int cpu) 7979 + { 7980 + #ifdef CONFIG_SCHED_SMT 7981 + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) 7982 + static_branch_inc_cpuslocked(&sched_smt_present); 7983 + #endif 7984 + } 7985 + 7986 + static inline void sched_smt_present_dec(int cpu) 7987 + { 7988 + #ifdef CONFIG_SCHED_SMT 7989 + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) 7990 + static_branch_dec_cpuslocked(&sched_smt_present); 7991 + #endif 7992 + } 7993 + 8040 7994 int sched_cpu_activate(unsigned int cpu) 8041 7995 { 8042 7996 struct rq *rq = cpu_rq(cpu); 8043 - struct rq_flags rf; 8044 7997 8045 7998 /* 8046 7999 * Clear the balance_push callback and prepare to schedule ··· 8063 7986 */ 8064 7987 balance_push_set(cpu, false); 8065 7988 8066 - #ifdef CONFIG_SCHED_SMT 8067 7989 /* 8068 7990 * When going up, increment the number of cores with SMT present. 8069 7991 */ 8070 - if (cpumask_weight(cpu_smt_mask(cpu)) == 2) 8071 - static_branch_inc_cpuslocked(&sched_smt_present); 8072 - #endif 7992 + sched_smt_present_inc(cpu); 8073 7993 set_cpu_active(cpu, true); 8074 7994 8075 7995 if (sched_smp_initialized) { ··· 8086 8012 * 2) At runtime, if cpuset_cpu_active() fails to rebuild the 8087 8013 * domains. 8088 8014 */ 8089 - rq_lock_irqsave(rq, &rf); 8090 - if (rq->rd) { 8091 - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); 8092 - set_rq_online(rq); 8093 - } 8094 - rq_unlock_irqrestore(rq, &rf); 8015 + sched_set_rq_online(rq, cpu); 8095 8016 8096 8017 return 0; 8097 8018 } ··· 8094 8025 int sched_cpu_deactivate(unsigned int cpu) 8095 8026 { 8096 8027 struct rq *rq = cpu_rq(cpu); 8097 - struct rq_flags rf; 8098 8028 int ret; 8099 8029 8100 8030 /* ··· 8124 8056 */ 8125 8057 synchronize_rcu(); 8126 8058 8127 - rq_lock_irqsave(rq, &rf); 8128 - if (rq->rd) { 8129 - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); 8130 - set_rq_offline(rq); 8131 - } 8132 - rq_unlock_irqrestore(rq, &rf); 8059 + sched_set_rq_offline(rq, cpu); 8133 8060 8134 8061 scx_rq_deactivate(rq); 8135 8062 8136 - #ifdef CONFIG_SCHED_SMT 8137 8063 /* 8138 8064 * When going down, decrement the number of cores with SMT present. 8139 8065 */ 8140 - if (cpumask_weight(cpu_smt_mask(cpu)) == 2) 8141 - static_branch_dec_cpuslocked(&sched_smt_present); 8066 + sched_smt_present_dec(cpu); 8142 8067 8068 + #ifdef CONFIG_SCHED_SMT 8143 8069 sched_core_cpu_deactivate(cpu); 8144 8070 #endif 8145 8071 ··· 8143 8081 sched_update_numa(cpu, false); 8144 8082 ret = cpuset_cpu_inactive(cpu); 8145 8083 if (ret) { 8084 + sched_smt_present_inc(cpu); 8085 + sched_set_rq_online(rq, cpu); 8146 8086 balance_push_set(cpu, false); 8147 8087 set_cpu_active(cpu, true); 8148 8088 sched_update_numa(cpu, true); ··· 8354 8290 #endif /* CONFIG_RT_GROUP_SCHED */ 8355 8291 } 8356 8292 8357 - init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime()); 8358 - 8359 8293 #ifdef CONFIG_SMP 8360 8294 init_defrootdomain(); 8361 8295 #endif ··· 8408 8346 init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL); 8409 8347 #endif /* CONFIG_FAIR_GROUP_SCHED */ 8410 8348 8411 - rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; 8412 8349 #ifdef CONFIG_RT_GROUP_SCHED 8350 + /* 8351 + * This is required for init cpu because rt.c:__enable_runtime() 8352 + * starts working after scheduler_running, which is not the case 8353 + * yet. 8354 + */ 8355 + rq->rt.rt_runtime = global_rt_runtime(); 8413 8356 init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); 8414 8357 #endif 8415 8358 #ifdef CONFIG_SMP ··· 8446 8379 #endif /* CONFIG_SMP */ 8447 8380 hrtick_rq_init(rq); 8448 8381 atomic_set(&rq->nr_iowait, 0); 8382 + fair_server_init(rq); 8449 8383 8450 8384 #ifdef CONFIG_SCHED_CORE 8451 8385 rq->core = rq;
+6
kernel/sched/cputime.c
··· 582 582 } 583 583 584 584 stime = mul_u64_u64_div_u64(stime, rtime, stime + utime); 585 + /* 586 + * Because mul_u64_u64_div_u64() can approximate on some 587 + * achitectures; enforce the constraint that: a*b/(b+c) <= a. 588 + */ 589 + if (unlikely(stime > rtime)) 590 + stime = rtime; 585 591 586 592 update: 587 593 /*
+390 -59
kernel/sched/deadline.c
··· 320 320 __sub_running_bw(dl_se->dl_bw, dl_rq); 321 321 } 322 322 323 - static void dl_change_utilization(struct task_struct *p, u64 new_bw) 323 + static void dl_rq_change_utilization(struct rq *rq, struct sched_dl_entity *dl_se, u64 new_bw) 324 324 { 325 - struct rq *rq; 325 + if (dl_se->dl_non_contending) { 326 + sub_running_bw(dl_se, &rq->dl); 327 + dl_se->dl_non_contending = 0; 326 328 327 - WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV); 328 - 329 - if (task_on_rq_queued(p)) 330 - return; 331 - 332 - rq = task_rq(p); 333 - if (p->dl.dl_non_contending) { 334 - sub_running_bw(&p->dl, &rq->dl); 335 - p->dl.dl_non_contending = 0; 336 329 /* 337 330 * If the timer handler is currently running and the 338 331 * timer cannot be canceled, inactive_task_timer() ··· 333 340 * will not touch the rq's active utilization, 334 341 * so we are still safe. 335 342 */ 336 - if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1) 337 - put_task_struct(p); 343 + if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1) { 344 + if (!dl_server(dl_se)) 345 + put_task_struct(dl_task_of(dl_se)); 346 + } 338 347 } 339 - __sub_rq_bw(p->dl.dl_bw, &rq->dl); 348 + __sub_rq_bw(dl_se->dl_bw, &rq->dl); 340 349 __add_rq_bw(new_bw, &rq->dl); 350 + } 351 + 352 + static void dl_change_utilization(struct task_struct *p, u64 new_bw) 353 + { 354 + WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV); 355 + 356 + if (task_on_rq_queued(p)) 357 + return; 358 + 359 + dl_rq_change_utilization(task_rq(p), &p->dl, new_bw); 341 360 } 342 361 343 362 static void __dl_clear_params(struct sched_dl_entity *dl_se); ··· 776 771 /* for non-boosted task, pi_of(dl_se) == dl_se */ 777 772 dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline; 778 773 dl_se->runtime = pi_of(dl_se)->dl_runtime; 774 + 775 + /* 776 + * If it is a deferred reservation, and the server 777 + * is not handling an starvation case, defer it. 778 + */ 779 + if (dl_se->dl_defer & !dl_se->dl_defer_running) { 780 + dl_se->dl_throttled = 1; 781 + dl_se->dl_defer_armed = 1; 782 + } 779 783 } 780 784 781 785 /* ··· 823 809 replenish_dl_new_period(dl_se, rq); 824 810 } 825 811 812 + static int start_dl_timer(struct sched_dl_entity *dl_se); 813 + static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t); 814 + 826 815 /* 827 816 * Pure Earliest Deadline First (EDF) scheduling does not deal with the 828 817 * possibility of a entity lasting more than what it declared, and thus ··· 854 837 /* 855 838 * This could be the case for a !-dl task that is boosted. 856 839 * Just go with full inherited parameters. 840 + * 841 + * Or, it could be the case of a deferred reservation that 842 + * was not able to consume its runtime in background and 843 + * reached this point with current u > U. 844 + * 845 + * In both cases, set a new period. 857 846 */ 858 - if (dl_se->dl_deadline == 0) 859 - replenish_dl_new_period(dl_se, rq); 847 + if (dl_se->dl_deadline == 0 || 848 + (dl_se->dl_defer_armed && dl_entity_overflow(dl_se, rq_clock(rq)))) { 849 + dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline; 850 + dl_se->runtime = pi_of(dl_se)->dl_runtime; 851 + } 860 852 861 853 if (dl_se->dl_yielded && dl_se->runtime > 0) 862 854 dl_se->runtime = 0; ··· 899 873 dl_se->dl_yielded = 0; 900 874 if (dl_se->dl_throttled) 901 875 dl_se->dl_throttled = 0; 876 + 877 + /* 878 + * If this is the replenishment of a deferred reservation, 879 + * clear the flag and return. 880 + */ 881 + if (dl_se->dl_defer_armed) { 882 + dl_se->dl_defer_armed = 0; 883 + return; 884 + } 885 + 886 + /* 887 + * A this point, if the deferred server is not armed, and the deadline 888 + * is in the future, if it is not running already, throttle the server 889 + * and arm the defer timer. 890 + */ 891 + if (dl_se->dl_defer && !dl_se->dl_defer_running && 892 + dl_time_before(rq_clock(dl_se->rq), dl_se->deadline - dl_se->runtime)) { 893 + if (!is_dl_boosted(dl_se) && dl_se->server_has_tasks(dl_se)) { 894 + 895 + /* 896 + * Set dl_se->dl_defer_armed and dl_throttled variables to 897 + * inform the start_dl_timer() that this is a deferred 898 + * activation. 899 + */ 900 + dl_se->dl_defer_armed = 1; 901 + dl_se->dl_throttled = 1; 902 + if (!start_dl_timer(dl_se)) { 903 + /* 904 + * If for whatever reason (delays), a previous timer was 905 + * queued but not serviced, cancel it and clean the 906 + * deferrable server variables intended for start_dl_timer(). 907 + */ 908 + hrtimer_try_to_cancel(&dl_se->dl_timer); 909 + dl_se->dl_defer_armed = 0; 910 + dl_se->dl_throttled = 0; 911 + } 912 + } 913 + } 902 914 } 903 915 904 916 /* ··· 1087 1023 } 1088 1024 1089 1025 replenish_dl_new_period(dl_se, rq); 1026 + } else if (dl_server(dl_se) && dl_se->dl_defer) { 1027 + /* 1028 + * The server can still use its previous deadline, so check if 1029 + * it left the dl_defer_running state. 1030 + */ 1031 + if (!dl_se->dl_defer_running) { 1032 + dl_se->dl_defer_armed = 1; 1033 + dl_se->dl_throttled = 1; 1034 + } 1090 1035 } 1091 1036 } 1092 1037 ··· 1128 1055 * We want the timer to fire at the deadline, but considering 1129 1056 * that it is actually coming from rq->clock and not from 1130 1057 * hrtimer's time base reading. 1058 + * 1059 + * The deferred reservation will have its timer set to 1060 + * (deadline - runtime). At that point, the CBS rule will decide 1061 + * if the current deadline can be used, or if a replenishment is 1062 + * required to avoid add too much pressure on the system 1063 + * (current u > U). 1131 1064 */ 1132 - act = ns_to_ktime(dl_next_period(dl_se)); 1065 + if (dl_se->dl_defer_armed) { 1066 + WARN_ON_ONCE(!dl_se->dl_throttled); 1067 + act = ns_to_ktime(dl_se->deadline - dl_se->runtime); 1068 + } else { 1069 + /* act = deadline - rel-deadline + period */ 1070 + act = ns_to_ktime(dl_next_period(dl_se)); 1071 + } 1072 + 1133 1073 now = hrtimer_cb_get_time(timer); 1134 1074 delta = ktime_to_ns(now) - rq_clock(rq); 1135 1075 act = ktime_add_ns(act, delta); ··· 1192 1106 #endif 1193 1107 } 1194 1108 1109 + /* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */ 1110 + static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC; 1111 + 1112 + static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se) 1113 + { 1114 + struct rq *rq = rq_of_dl_se(dl_se); 1115 + u64 fw; 1116 + 1117 + scoped_guard (rq_lock, rq) { 1118 + struct rq_flags *rf = &scope.rf; 1119 + 1120 + if (!dl_se->dl_throttled || !dl_se->dl_runtime) 1121 + return HRTIMER_NORESTART; 1122 + 1123 + sched_clock_tick(); 1124 + update_rq_clock(rq); 1125 + 1126 + if (!dl_se->dl_runtime) 1127 + return HRTIMER_NORESTART; 1128 + 1129 + if (!dl_se->server_has_tasks(dl_se)) { 1130 + replenish_dl_entity(dl_se); 1131 + return HRTIMER_NORESTART; 1132 + } 1133 + 1134 + if (dl_se->dl_defer_armed) { 1135 + /* 1136 + * First check if the server could consume runtime in background. 1137 + * If so, it is possible to push the defer timer for this amount 1138 + * of time. The dl_server_min_res serves as a limit to avoid 1139 + * forwarding the timer for a too small amount of time. 1140 + */ 1141 + if (dl_time_before(rq_clock(dl_se->rq), 1142 + (dl_se->deadline - dl_se->runtime - dl_server_min_res))) { 1143 + 1144 + /* reset the defer timer */ 1145 + fw = dl_se->deadline - rq_clock(dl_se->rq) - dl_se->runtime; 1146 + 1147 + hrtimer_forward_now(timer, ns_to_ktime(fw)); 1148 + return HRTIMER_RESTART; 1149 + } 1150 + 1151 + dl_se->dl_defer_running = 1; 1152 + } 1153 + 1154 + enqueue_dl_entity(dl_se, ENQUEUE_REPLENISH); 1155 + 1156 + if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &dl_se->rq->curr->dl)) 1157 + resched_curr(rq); 1158 + 1159 + __push_dl_task(rq, rf); 1160 + } 1161 + 1162 + return HRTIMER_NORESTART; 1163 + } 1164 + 1195 1165 /* 1196 1166 * This is the bandwidth enforcement timer callback. If here, we know 1197 1167 * a task is not on its dl_rq, since the fact that the timer was running ··· 1270 1128 struct rq_flags rf; 1271 1129 struct rq *rq; 1272 1130 1273 - if (dl_server(dl_se)) { 1274 - struct rq *rq = rq_of_dl_se(dl_se); 1275 - struct rq_flags rf; 1276 - 1277 - rq_lock(rq, &rf); 1278 - if (dl_se->dl_throttled) { 1279 - sched_clock_tick(); 1280 - update_rq_clock(rq); 1281 - 1282 - if (dl_se->server_has_tasks(dl_se)) { 1283 - enqueue_dl_entity(dl_se, ENQUEUE_REPLENISH); 1284 - resched_curr(rq); 1285 - __push_dl_task(rq, &rf); 1286 - } else { 1287 - replenish_dl_entity(dl_se); 1288 - } 1289 - 1290 - } 1291 - rq_unlock(rq, &rf); 1292 - 1293 - return HRTIMER_NORESTART; 1294 - } 1131 + if (dl_server(dl_se)) 1132 + return dl_server_timer(timer, dl_se); 1295 1133 1296 1134 p = dl_task_of(dl_se); 1297 1135 rq = task_rq_lock(p, &rf); ··· 1441 1319 return (delta * u_act) >> BW_SHIFT; 1442 1320 } 1443 1321 1444 - static inline void 1445 - update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se, 1446 - int flags); 1447 - static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec) 1322 + s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec) 1448 1323 { 1449 1324 s64 scaled_delta_exec; 1450 - 1451 - if (unlikely(delta_exec <= 0)) { 1452 - if (unlikely(dl_se->dl_yielded)) 1453 - goto throttle; 1454 - return; 1455 - } 1456 - 1457 - if (dl_entity_is_special(dl_se)) 1458 - return; 1459 1325 1460 1326 /* 1461 1327 * For tasks that participate in GRUB, we implement GRUB-PA: the ··· 1463 1353 scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu); 1464 1354 } 1465 1355 1356 + return scaled_delta_exec; 1357 + } 1358 + 1359 + static inline void 1360 + update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se, 1361 + int flags); 1362 + static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec) 1363 + { 1364 + s64 scaled_delta_exec; 1365 + 1366 + if (unlikely(delta_exec <= 0)) { 1367 + if (unlikely(dl_se->dl_yielded)) 1368 + goto throttle; 1369 + return; 1370 + } 1371 + 1372 + if (dl_server(dl_se) && dl_se->dl_throttled && !dl_se->dl_defer) 1373 + return; 1374 + 1375 + if (dl_entity_is_special(dl_se)) 1376 + return; 1377 + 1378 + scaled_delta_exec = dl_scaled_delta_exec(rq, dl_se, delta_exec); 1379 + 1466 1380 dl_se->runtime -= scaled_delta_exec; 1381 + 1382 + /* 1383 + * The fair server can consume its runtime while throttled (not queued/ 1384 + * running as regular CFS). 1385 + * 1386 + * If the server consumes its entire runtime in this state. The server 1387 + * is not required for the current period. Thus, reset the server by 1388 + * starting a new period, pushing the activation. 1389 + */ 1390 + if (dl_se->dl_defer && dl_se->dl_throttled && dl_runtime_exceeded(dl_se)) { 1391 + /* 1392 + * If the server was previously activated - the starving condition 1393 + * took place, it this point it went away because the fair scheduler 1394 + * was able to get runtime in background. So return to the initial 1395 + * state. 1396 + */ 1397 + dl_se->dl_defer_running = 0; 1398 + 1399 + hrtimer_try_to_cancel(&dl_se->dl_timer); 1400 + 1401 + replenish_dl_new_period(dl_se, dl_se->rq); 1402 + 1403 + /* 1404 + * Not being able to start the timer seems problematic. If it could not 1405 + * be started for whatever reason, we need to "unthrottle" the DL server 1406 + * and queue right away. Otherwise nothing might queue it. That's similar 1407 + * to what enqueue_dl_entity() does on start_dl_timer==0. For now, just warn. 1408 + */ 1409 + WARN_ON_ONCE(!start_dl_timer(dl_se)); 1410 + 1411 + return; 1412 + } 1467 1413 1468 1414 throttle: 1469 1415 if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) { ··· 1548 1382 } 1549 1383 1550 1384 /* 1385 + * The fair server (sole dl_server) does not account for real-time 1386 + * workload because it is running fair work. 1387 + */ 1388 + if (dl_se == &rq->fair_server) 1389 + return; 1390 + 1391 + #ifdef CONFIG_RT_GROUP_SCHED 1392 + /* 1551 1393 * Because -- for now -- we share the rt bandwidth, we need to 1552 1394 * account our runtime there too, otherwise actual rt tasks 1553 1395 * would be able to exceed the shared quota. ··· 1579 1405 rt_rq->rt_time += delta_exec; 1580 1406 raw_spin_unlock(&rt_rq->rt_runtime_lock); 1581 1407 } 1408 + #endif 1409 + } 1410 + 1411 + /* 1412 + * In the non-defer mode, the idle time is not accounted, as the 1413 + * server provides a guarantee. 1414 + * 1415 + * If the dl_server is in defer mode, the idle time is also considered 1416 + * as time available for the fair server, avoiding a penalty for the 1417 + * rt scheduler that did not consumed that time. 1418 + */ 1419 + void dl_server_update_idle_time(struct rq *rq, struct task_struct *p) 1420 + { 1421 + s64 delta_exec, scaled_delta_exec; 1422 + 1423 + if (!rq->fair_server.dl_defer) 1424 + return; 1425 + 1426 + /* no need to discount more */ 1427 + if (rq->fair_server.runtime < 0) 1428 + return; 1429 + 1430 + delta_exec = rq_clock_task(rq) - p->se.exec_start; 1431 + if (delta_exec < 0) 1432 + return; 1433 + 1434 + scaled_delta_exec = dl_scaled_delta_exec(rq, &rq->fair_server, delta_exec); 1435 + 1436 + rq->fair_server.runtime -= scaled_delta_exec; 1437 + 1438 + if (rq->fair_server.runtime < 0) { 1439 + rq->fair_server.dl_defer_running = 0; 1440 + rq->fair_server.runtime = 0; 1441 + } 1442 + 1443 + p->se.exec_start = rq_clock_task(rq); 1582 1444 } 1583 1445 1584 1446 void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec) 1585 1447 { 1586 - update_curr_dl_se(dl_se->rq, dl_se, delta_exec); 1448 + /* 0 runtime = fair server disabled */ 1449 + if (dl_se->dl_runtime) 1450 + update_curr_dl_se(dl_se->rq, dl_se, delta_exec); 1587 1451 } 1588 1452 1589 1453 void dl_server_start(struct sched_dl_entity *dl_se) 1590 1454 { 1455 + struct rq *rq = dl_se->rq; 1456 + 1457 + /* 1458 + * XXX: the apply do not work fine at the init phase for the 1459 + * fair server because things are not yet set. We need to improve 1460 + * this before getting generic. 1461 + */ 1591 1462 if (!dl_server(dl_se)) { 1463 + u64 runtime = 50 * NSEC_PER_MSEC; 1464 + u64 period = 1000 * NSEC_PER_MSEC; 1465 + 1466 + dl_server_apply_params(dl_se, runtime, period, 1); 1467 + 1592 1468 dl_se->dl_server = 1; 1469 + dl_se->dl_defer = 1; 1593 1470 setup_new_dl_entity(dl_se); 1594 1471 } 1472 + 1473 + if (!dl_se->dl_runtime) 1474 + return; 1475 + 1595 1476 enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP); 1477 + if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &rq->curr->dl)) 1478 + resched_curr(dl_se->rq); 1596 1479 } 1597 1480 1598 1481 void dl_server_stop(struct sched_dl_entity *dl_se) 1599 1482 { 1483 + if (!dl_se->dl_runtime) 1484 + return; 1485 + 1600 1486 dequeue_dl_entity(dl_se, DEQUEUE_SLEEP); 1487 + hrtimer_try_to_cancel(&dl_se->dl_timer); 1488 + dl_se->dl_defer_armed = 0; 1489 + dl_se->dl_throttled = 0; 1601 1490 } 1602 1491 1603 1492 void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq, 1604 1493 dl_server_has_tasks_f has_tasks, 1605 - dl_server_pick_f pick) 1494 + dl_server_pick_f pick_next, 1495 + dl_server_pick_f pick_task) 1606 1496 { 1607 1497 dl_se->rq = rq; 1608 1498 dl_se->server_has_tasks = has_tasks; 1609 - dl_se->server_pick = pick; 1499 + dl_se->server_pick_next = pick_next; 1500 + dl_se->server_pick_task = pick_task; 1501 + } 1502 + 1503 + void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq) 1504 + { 1505 + u64 new_bw = dl_se->dl_bw; 1506 + int cpu = cpu_of(rq); 1507 + struct dl_bw *dl_b; 1508 + 1509 + dl_b = dl_bw_of(cpu_of(rq)); 1510 + guard(raw_spinlock)(&dl_b->lock); 1511 + 1512 + if (!dl_bw_cpus(cpu)) 1513 + return; 1514 + 1515 + __dl_add(dl_b, new_bw, dl_bw_cpus(cpu)); 1516 + } 1517 + 1518 + int dl_server_apply_params(struct sched_dl_entity *dl_se, u64 runtime, u64 period, bool init) 1519 + { 1520 + u64 old_bw = init ? 0 : to_ratio(dl_se->dl_period, dl_se->dl_runtime); 1521 + u64 new_bw = to_ratio(period, runtime); 1522 + struct rq *rq = dl_se->rq; 1523 + int cpu = cpu_of(rq); 1524 + struct dl_bw *dl_b; 1525 + unsigned long cap; 1526 + int retval = 0; 1527 + int cpus; 1528 + 1529 + dl_b = dl_bw_of(cpu); 1530 + guard(raw_spinlock)(&dl_b->lock); 1531 + 1532 + cpus = dl_bw_cpus(cpu); 1533 + cap = dl_bw_capacity(cpu); 1534 + 1535 + if (__dl_overflow(dl_b, cap, old_bw, new_bw)) 1536 + return -EBUSY; 1537 + 1538 + if (init) { 1539 + __add_rq_bw(new_bw, &rq->dl); 1540 + __dl_add(dl_b, new_bw, cpus); 1541 + } else { 1542 + __dl_sub(dl_b, dl_se->dl_bw, cpus); 1543 + __dl_add(dl_b, new_bw, cpus); 1544 + 1545 + dl_rq_change_utilization(rq, dl_se, new_bw); 1546 + } 1547 + 1548 + dl_se->dl_runtime = runtime; 1549 + dl_se->dl_deadline = period; 1550 + dl_se->dl_period = period; 1551 + 1552 + dl_se->runtime = 0; 1553 + dl_se->deadline = 0; 1554 + 1555 + dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); 1556 + dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime); 1557 + 1558 + return retval; 1610 1559 } 1611 1560 1612 1561 /* ··· 2032 1735 * be counted in the active utilization; hence, we need to call 2033 1736 * add_running_bw(). 2034 1737 */ 2035 - if (dl_se->dl_throttled && !(flags & ENQUEUE_REPLENISH)) { 1738 + if (!dl_se->dl_defer && dl_se->dl_throttled && !(flags & ENQUEUE_REPLENISH)) { 2036 1739 if (flags & ENQUEUE_WAKEUP) 2037 1740 task_contending(dl_se, flags); 2038 1741 ··· 2052 1755 } else if ((flags & ENQUEUE_RESTORE) && 2053 1756 dl_time_before(dl_se->deadline, rq_clock(rq_of_dl_se(dl_se)))) { 2054 1757 setup_new_dl_entity(dl_se); 1758 + } 1759 + 1760 + /* 1761 + * If the reservation is still throttled, e.g., it got replenished but is a 1762 + * deferred task and still got to wait, don't enqueue. 1763 + */ 1764 + if (dl_se->dl_throttled && start_dl_timer(dl_se)) 1765 + return; 1766 + 1767 + /* 1768 + * We're about to enqueue, make sure we're not ->dl_throttled! 1769 + * In case the timer was not started, say because the defer time 1770 + * has passed, mark as not throttled and mark unarmed. 1771 + * Also cancel earlier timers, since letting those run is pointless. 1772 + */ 1773 + if (dl_se->dl_throttled) { 1774 + hrtimer_try_to_cancel(&dl_se->dl_timer); 1775 + dl_se->dl_defer_armed = 0; 1776 + dl_se->dl_throttled = 0; 2055 1777 } 2056 1778 2057 1779 __enqueue_dl_entity(dl_se); ··· 2402 2086 return __node_2_dle(left); 2403 2087 } 2404 2088 2405 - static struct task_struct *pick_task_dl(struct rq *rq) 2089 + /* 2090 + * __pick_next_task_dl - Helper to pick the next -deadline task to run. 2091 + * @rq: The runqueue to pick the next task from. 2092 + * @peek: If true, just peek at the next task. Only relevant for dlserver. 2093 + */ 2094 + static struct task_struct *__pick_next_task_dl(struct rq *rq, bool peek) 2406 2095 { 2407 2096 struct sched_dl_entity *dl_se; 2408 2097 struct dl_rq *dl_rq = &rq->dl; ··· 2421 2100 WARN_ON_ONCE(!dl_se); 2422 2101 2423 2102 if (dl_server(dl_se)) { 2424 - p = dl_se->server_pick(dl_se); 2103 + if (IS_ENABLED(CONFIG_SMP) && peek) 2104 + p = dl_se->server_pick_task(dl_se); 2105 + else 2106 + p = dl_se->server_pick_next(dl_se); 2425 2107 if (!p) { 2426 2108 WARN_ON_ONCE(1); 2427 2109 dl_se->dl_yielded = 1; ··· 2439 2115 return p; 2440 2116 } 2441 2117 2118 + #ifdef CONFIG_SMP 2119 + static struct task_struct *pick_task_dl(struct rq *rq) 2120 + { 2121 + return __pick_next_task_dl(rq, true); 2122 + } 2123 + #endif 2124 + 2442 2125 static struct task_struct *pick_next_task_dl(struct rq *rq) 2443 2126 { 2444 2127 struct task_struct *p; 2445 2128 2446 - p = pick_task_dl(rq); 2129 + p = __pick_next_task_dl(rq, false); 2447 2130 if (!p) 2448 2131 return p; 2449 2132
+162 -4
kernel/sched/debug.c
··· 333 333 .release = seq_release, 334 334 }; 335 335 336 + enum dl_param { 337 + DL_RUNTIME = 0, 338 + DL_PERIOD, 339 + }; 340 + 341 + static unsigned long fair_server_period_max = (1 << 22) * NSEC_PER_USEC; /* ~4 seconds */ 342 + static unsigned long fair_server_period_min = (100) * NSEC_PER_USEC; /* 100 us */ 343 + 344 + static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubuf, 345 + size_t cnt, loff_t *ppos, enum dl_param param) 346 + { 347 + long cpu = (long) ((struct seq_file *) filp->private_data)->private; 348 + struct rq *rq = cpu_rq(cpu); 349 + u64 runtime, period; 350 + size_t err; 351 + int retval; 352 + u64 value; 353 + 354 + err = kstrtoull_from_user(ubuf, cnt, 10, &value); 355 + if (err) 356 + return err; 357 + 358 + scoped_guard (rq_lock_irqsave, rq) { 359 + runtime = rq->fair_server.dl_runtime; 360 + period = rq->fair_server.dl_period; 361 + 362 + switch (param) { 363 + case DL_RUNTIME: 364 + if (runtime == value) 365 + break; 366 + runtime = value; 367 + break; 368 + case DL_PERIOD: 369 + if (value == period) 370 + break; 371 + period = value; 372 + break; 373 + } 374 + 375 + if (runtime > period || 376 + period > fair_server_period_max || 377 + period < fair_server_period_min) { 378 + return -EINVAL; 379 + } 380 + 381 + if (rq->cfs.h_nr_running) { 382 + update_rq_clock(rq); 383 + dl_server_stop(&rq->fair_server); 384 + } 385 + 386 + retval = dl_server_apply_params(&rq->fair_server, runtime, period, 0); 387 + if (retval) 388 + cnt = retval; 389 + 390 + if (!runtime) 391 + printk_deferred("Fair server disabled in CPU %d, system may crash due to starvation.\n", 392 + cpu_of(rq)); 393 + 394 + if (rq->cfs.h_nr_running) 395 + dl_server_start(&rq->fair_server); 396 + } 397 + 398 + *ppos += cnt; 399 + return cnt; 400 + } 401 + 402 + static size_t sched_fair_server_show(struct seq_file *m, void *v, enum dl_param param) 403 + { 404 + unsigned long cpu = (unsigned long) m->private; 405 + struct rq *rq = cpu_rq(cpu); 406 + u64 value; 407 + 408 + switch (param) { 409 + case DL_RUNTIME: 410 + value = rq->fair_server.dl_runtime; 411 + break; 412 + case DL_PERIOD: 413 + value = rq->fair_server.dl_period; 414 + break; 415 + } 416 + 417 + seq_printf(m, "%llu\n", value); 418 + return 0; 419 + 420 + } 421 + 422 + static ssize_t 423 + sched_fair_server_runtime_write(struct file *filp, const char __user *ubuf, 424 + size_t cnt, loff_t *ppos) 425 + { 426 + return sched_fair_server_write(filp, ubuf, cnt, ppos, DL_RUNTIME); 427 + } 428 + 429 + static int sched_fair_server_runtime_show(struct seq_file *m, void *v) 430 + { 431 + return sched_fair_server_show(m, v, DL_RUNTIME); 432 + } 433 + 434 + static int sched_fair_server_runtime_open(struct inode *inode, struct file *filp) 435 + { 436 + return single_open(filp, sched_fair_server_runtime_show, inode->i_private); 437 + } 438 + 439 + static const struct file_operations fair_server_runtime_fops = { 440 + .open = sched_fair_server_runtime_open, 441 + .write = sched_fair_server_runtime_write, 442 + .read = seq_read, 443 + .llseek = seq_lseek, 444 + .release = single_release, 445 + }; 446 + 447 + static ssize_t 448 + sched_fair_server_period_write(struct file *filp, const char __user *ubuf, 449 + size_t cnt, loff_t *ppos) 450 + { 451 + return sched_fair_server_write(filp, ubuf, cnt, ppos, DL_PERIOD); 452 + } 453 + 454 + static int sched_fair_server_period_show(struct seq_file *m, void *v) 455 + { 456 + return sched_fair_server_show(m, v, DL_PERIOD); 457 + } 458 + 459 + static int sched_fair_server_period_open(struct inode *inode, struct file *filp) 460 + { 461 + return single_open(filp, sched_fair_server_period_show, inode->i_private); 462 + } 463 + 464 + static const struct file_operations fair_server_period_fops = { 465 + .open = sched_fair_server_period_open, 466 + .write = sched_fair_server_period_write, 467 + .read = seq_read, 468 + .llseek = seq_lseek, 469 + .release = single_release, 470 + }; 471 + 336 472 static struct dentry *debugfs_sched; 473 + 474 + static void debugfs_fair_server_init(void) 475 + { 476 + struct dentry *d_fair; 477 + unsigned long cpu; 478 + 479 + d_fair = debugfs_create_dir("fair_server", debugfs_sched); 480 + if (!d_fair) 481 + return; 482 + 483 + for_each_possible_cpu(cpu) { 484 + struct dentry *d_cpu; 485 + char buf[32]; 486 + 487 + snprintf(buf, sizeof(buf), "cpu%lu", cpu); 488 + d_cpu = debugfs_create_dir(buf, d_fair); 489 + 490 + debugfs_create_file("runtime", 0644, d_cpu, (void *) cpu, &fair_server_runtime_fops); 491 + debugfs_create_file("period", 0644, d_cpu, (void *) cpu, &fair_server_period_fops); 492 + } 493 + } 337 494 338 495 static __init int sched_init_debug(void) 339 496 { ··· 530 373 #endif 531 374 532 375 debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops); 376 + 377 + debugfs_fair_server_init(); 533 378 534 379 return 0; 535 380 } ··· 800 641 SEQ_printf(m, "\n"); 801 642 SEQ_printf(m, "cfs_rq[%d]:\n", cpu); 802 643 #endif 803 - SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", 804 - SPLIT_NS(cfs_rq->exec_clock)); 805 644 806 645 raw_spin_rq_lock_irqsave(rq, flags); 807 646 root = __pick_root_entity(cfs_rq); ··· 826 669 SPLIT_NS(right_vruntime)); 827 670 spread = right_vruntime - left_vruntime; 828 671 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread)); 829 - SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", 830 - cfs_rq->nr_spread_over); 831 672 SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running); 832 673 SEQ_printf(m, " .%-30s: %d\n", "h_nr_running", cfs_rq->h_nr_running); 833 674 SEQ_printf(m, " .%-30s: %d\n", "idle_nr_running", ··· 885 730 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x)) 886 731 887 732 PU(rt_nr_running); 733 + 734 + #ifdef CONFIG_RT_GROUP_SCHED 888 735 P(rt_throttled); 889 736 PN(rt_time); 890 737 PN(rt_runtime); 738 + #endif 891 739 892 740 #undef PN 893 741 #undef PU
+84 -32
kernel/sched/fair.c
··· 511 511 512 512 static int se_is_idle(struct sched_entity *se) 513 513 { 514 - return 0; 514 + return task_has_idle_policy(task_of(se)); 515 515 } 516 516 517 517 #endif /* CONFIG_FAIR_GROUP_SCHED */ ··· 1156 1156 static void update_curr(struct cfs_rq *cfs_rq) 1157 1157 { 1158 1158 struct sched_entity *curr = cfs_rq->curr; 1159 + struct rq *rq = rq_of(cfs_rq); 1159 1160 s64 delta_exec; 1160 1161 1161 1162 if (unlikely(!curr)) 1162 1163 return; 1163 1164 1164 - delta_exec = update_curr_se(rq_of(cfs_rq), curr); 1165 + delta_exec = update_curr_se(rq, curr); 1165 1166 if (unlikely(delta_exec <= 0)) 1166 1167 return; 1167 1168 ··· 1170 1169 update_deadline(cfs_rq, curr); 1171 1170 update_min_vruntime(cfs_rq); 1172 1171 1173 - if (entity_is_task(curr)) 1174 - update_curr_task(task_of(curr), delta_exec); 1172 + if (entity_is_task(curr)) { 1173 + struct task_struct *p = task_of(curr); 1174 + 1175 + update_curr_task(p, delta_exec); 1176 + 1177 + /* 1178 + * Any fair task that runs outside of fair_server should 1179 + * account against fair_server such that it can account for 1180 + * this time and possibly avoid running this period. 1181 + */ 1182 + if (p->dl_server != &rq->fair_server) 1183 + dl_server_update(&rq->fair_server, delta_exec); 1184 + } 1175 1185 1176 1186 account_cfs_rq_runtime(cfs_rq, delta_exec); 1177 1187 } ··· 5778 5766 struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); 5779 5767 struct sched_entity *se; 5780 5768 long task_delta, idle_task_delta, dequeue = 1; 5769 + long rq_h_nr_running = rq->cfs.h_nr_running; 5781 5770 5782 5771 raw_spin_lock(&cfs_b->lock); 5783 5772 /* This will start the period timer if necessary */ ··· 5850 5837 /* At this point se is NULL and we are at root level*/ 5851 5838 sub_nr_running(rq, task_delta); 5852 5839 5840 + /* Stop the fair server if throttling resulted in no runnable tasks */ 5841 + if (rq_h_nr_running && !rq->cfs.h_nr_running) 5842 + dl_server_stop(&rq->fair_server); 5853 5843 done: 5854 5844 /* 5855 5845 * Note: distribution will already see us throttled via the ··· 5871 5855 struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); 5872 5856 struct sched_entity *se; 5873 5857 long task_delta, idle_task_delta; 5858 + long rq_h_nr_running = rq->cfs.h_nr_running; 5874 5859 5875 5860 se = cfs_rq->tg->se[cpu_of(rq)]; 5876 5861 ··· 5940 5923 if (cfs_rq_throttled(qcfs_rq)) 5941 5924 goto unthrottle_throttle; 5942 5925 } 5926 + 5927 + /* Start the fair server if un-throttling resulted in new runnable tasks */ 5928 + if (!rq_h_nr_running && rq->cfs.h_nr_running) 5929 + dl_server_start(&rq->fair_server); 5943 5930 5944 5931 /* At this point se is NULL and we are at root level*/ 5945 5932 add_nr_running(rq, task_delta); ··· 6577 6556 { 6578 6557 int cpu = cpu_of(rq); 6579 6558 6580 - if (!sched_feat(HZ_BW) || !cfs_bandwidth_used()) 6559 + if (!cfs_bandwidth_used()) 6581 6560 return; 6582 6561 6583 6562 if (!tick_nohz_full_cpu(cpu)) ··· 6772 6751 struct sched_entity *se = &p->se; 6773 6752 int idle_h_nr_running = task_has_idle_policy(p); 6774 6753 int task_new = !(flags & ENQUEUE_WAKEUP); 6754 + int rq_h_nr_running = rq->cfs.h_nr_running; 6775 6755 6776 6756 /* 6777 6757 * The code below (indirectly) updates schedutil which looks at ··· 6827 6805 goto enqueue_throttle; 6828 6806 } 6829 6807 6808 + if (!rq_h_nr_running && rq->cfs.h_nr_running) { 6809 + /* Account for idle runtime */ 6810 + if (!rq->nr_running) 6811 + dl_server_update_idle_time(rq, rq->curr); 6812 + dl_server_start(&rq->fair_server); 6813 + } 6814 + 6830 6815 /* At this point se is NULL and we are at root level*/ 6831 6816 add_nr_running(rq, 1); 6832 6817 ··· 6874 6845 int task_sleep = flags & DEQUEUE_SLEEP; 6875 6846 int idle_h_nr_running = task_has_idle_policy(p); 6876 6847 bool was_sched_idle = sched_idle_rq(rq); 6848 + int rq_h_nr_running = rq->cfs.h_nr_running; 6877 6849 6878 6850 util_est_dequeue(&rq->cfs, p); 6879 6851 ··· 6928 6898 6929 6899 /* At this point se is NULL and we are at root level*/ 6930 6900 sub_nr_running(rq, 1); 6901 + 6902 + if (rq_h_nr_running && !rq->cfs.h_nr_running) 6903 + dl_server_stop(&rq->fair_server); 6931 6904 6932 6905 /* balance early to pull high priority tasks */ 6933 6906 if (unlikely(!was_sched_idle && sched_idle_rq(rq))) ··· 8415 8382 if (test_tsk_need_resched(curr)) 8416 8383 return; 8417 8384 8418 - /* Idle tasks are by definition preempted by non-idle tasks. */ 8419 - if (unlikely(task_has_idle_policy(curr)) && 8420 - likely(!task_has_idle_policy(p))) 8421 - goto preempt; 8422 - 8423 - /* 8424 - * Batch and idle tasks do not preempt non-idle tasks (their preemption 8425 - * is driven by the tick): 8426 - */ 8427 - if (unlikely(!normal_policy(p->policy)) || !sched_feat(WAKEUP_PREEMPTION)) 8385 + if (!sched_feat(WAKEUP_PREEMPTION)) 8428 8386 return; 8429 8387 8430 8388 find_matching_se(&se, &pse); ··· 8425 8401 pse_is_idle = se_is_idle(pse); 8426 8402 8427 8403 /* 8428 - * Preempt an idle group in favor of a non-idle group (and don't preempt 8404 + * Preempt an idle entity in favor of a non-idle entity (and don't preempt 8429 8405 * in the inverse case). 8430 8406 */ 8431 8407 if (cse_is_idle && !pse_is_idle) ··· 8433 8409 if (cse_is_idle != pse_is_idle) 8434 8410 return; 8435 8411 8412 + /* 8413 + * BATCH and IDLE tasks do not preempt others. 8414 + */ 8415 + if (unlikely(!normal_policy(p->policy))) 8416 + return; 8417 + 8436 8418 cfs_rq = cfs_rq_of(se); 8437 8419 update_curr(cfs_rq); 8438 - 8439 8420 /* 8440 8421 * XXX pick_eevdf(cfs_rq) != se ? 8441 8422 */ ··· 8481 8452 se = pick_next_entity(cfs_rq); 8482 8453 cfs_rq = group_cfs_rq(se); 8483 8454 } while (cfs_rq); 8455 + 8456 + /* 8457 + * This can be called from directly from CFS's ->pick_task() or indirectly 8458 + * from DL's ->pick_task when fair server is enabled. In the indirect case, 8459 + * DL will set ->dl_server just after this function is called, so its Ok to 8460 + * clear. In the direct case, we are picking directly so we must clear it. 8461 + */ 8462 + task_of(se)->dl_server = NULL; 8484 8463 8485 8464 return task_of(se); 8486 8465 } ··· 8642 8605 static struct task_struct *__pick_next_task_fair(struct rq *rq) 8643 8606 { 8644 8607 return pick_next_task_fair(rq, NULL, NULL); 8608 + } 8609 + 8610 + static bool fair_server_has_tasks(struct sched_dl_entity *dl_se) 8611 + { 8612 + return !!dl_se->rq->cfs.nr_running; 8613 + } 8614 + 8615 + static struct task_struct *fair_server_pick_task(struct sched_dl_entity *dl_se) 8616 + { 8617 + #ifdef CONFIG_SMP 8618 + return pick_task_fair(dl_se->rq); 8619 + #else 8620 + return NULL; 8621 + #endif 8622 + } 8623 + 8624 + static struct task_struct *fair_server_pick_next(struct sched_dl_entity *dl_se) 8625 + { 8626 + return pick_next_task_fair(dl_se->rq, NULL, NULL); 8627 + } 8628 + 8629 + void fair_server_init(struct rq *rq) 8630 + { 8631 + struct sched_dl_entity *dl_se = &rq->fair_server; 8632 + 8633 + init_dl_entity(dl_se); 8634 + 8635 + dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick_next, 8636 + fair_server_pick_task); 8637 + 8645 8638 } 8646 8639 8647 8640 /* ··· 12760 12693 */ 12761 12694 static void task_fork_fair(struct task_struct *p) 12762 12695 { 12763 - struct sched_entity *se = &p->se, *curr; 12764 - struct cfs_rq *cfs_rq; 12765 - struct rq *rq = this_rq(); 12766 - struct rq_flags rf; 12767 - 12768 - rq_lock(rq, &rf); 12769 - update_rq_clock(rq); 12770 - 12771 12696 set_task_max_allowed_capacity(p); 12772 - 12773 - cfs_rq = task_cfs_rq(current); 12774 - curr = cfs_rq->curr; 12775 - if (curr) 12776 - update_curr(cfs_rq); 12777 - place_entity(cfs_rq, se, ENQUEUE_INITIAL); 12778 - rq_unlock(rq, &rf); 12779 12697 } 12780 12698 12781 12699 /*
-2
kernel/sched/features.h
··· 85 85 SCHED_FEAT(UTIL_EST, true) 86 86 87 87 SCHED_FEAT(LATENCY_WARN, false) 88 - 89 - SCHED_FEAT(HZ_BW, true)
+2
kernel/sched/idle.c
··· 452 452 453 453 static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) 454 454 { 455 + dl_server_update_idle_time(rq, prev); 455 456 scx_update_idle(rq, false); 456 457 } 457 458 ··· 461 460 update_idle_core(rq); 462 461 scx_update_idle(rq, true); 463 462 schedstat_inc(rq->sched_goidle); 463 + next->se.exec_start = rq_clock_task(rq); 464 464 } 465 465 466 466 #ifdef CONFIG_SMP
+107 -135
kernel/sched/rt.c
··· 8 8 /* More than 4 hours if BW_SHIFT equals 20. */ 9 9 static const u64 max_rt_runtime = MAX_BW; 10 10 11 - static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); 12 - 13 - struct rt_bandwidth def_rt_bandwidth; 14 - 15 11 /* 16 12 * period over which we measure -rt task CPU usage in us. 17 13 * default: 1s ··· 61 65 } 62 66 late_initcall(sched_rt_sysctl_init); 63 67 #endif 68 + 69 + void init_rt_rq(struct rt_rq *rt_rq) 70 + { 71 + struct rt_prio_array *array; 72 + int i; 73 + 74 + array = &rt_rq->active; 75 + for (i = 0; i < MAX_RT_PRIO; i++) { 76 + INIT_LIST_HEAD(array->queue + i); 77 + __clear_bit(i, array->bitmap); 78 + } 79 + /* delimiter for bitsearch: */ 80 + __set_bit(MAX_RT_PRIO, array->bitmap); 81 + 82 + #if defined CONFIG_SMP 83 + rt_rq->highest_prio.curr = MAX_RT_PRIO-1; 84 + rt_rq->highest_prio.next = MAX_RT_PRIO-1; 85 + rt_rq->overloaded = 0; 86 + plist_head_init(&rt_rq->pushable_tasks); 87 + #endif /* CONFIG_SMP */ 88 + /* We start is dequeued state, because no RT tasks are queued */ 89 + rt_rq->rt_queued = 0; 90 + 91 + #ifdef CONFIG_RT_GROUP_SCHED 92 + rt_rq->rt_time = 0; 93 + rt_rq->rt_throttled = 0; 94 + rt_rq->rt_runtime = 0; 95 + raw_spin_lock_init(&rt_rq->rt_runtime_lock); 96 + #endif 97 + } 98 + 99 + #ifdef CONFIG_RT_GROUP_SCHED 100 + 101 + static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); 64 102 65 103 static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer) 66 104 { ··· 160 130 do_start_rt_bandwidth(rt_b); 161 131 } 162 132 163 - void init_rt_rq(struct rt_rq *rt_rq) 164 - { 165 - struct rt_prio_array *array; 166 - int i; 167 - 168 - array = &rt_rq->active; 169 - for (i = 0; i < MAX_RT_PRIO; i++) { 170 - INIT_LIST_HEAD(array->queue + i); 171 - __clear_bit(i, array->bitmap); 172 - } 173 - /* delimiter for bit-search: */ 174 - __set_bit(MAX_RT_PRIO, array->bitmap); 175 - 176 - #if defined CONFIG_SMP 177 - rt_rq->highest_prio.curr = MAX_RT_PRIO-1; 178 - rt_rq->highest_prio.next = MAX_RT_PRIO-1; 179 - rt_rq->overloaded = 0; 180 - plist_head_init(&rt_rq->pushable_tasks); 181 - #endif /* CONFIG_SMP */ 182 - /* We start is dequeued state, because no RT tasks are queued */ 183 - rt_rq->rt_queued = 0; 184 - 185 - rt_rq->rt_time = 0; 186 - rt_rq->rt_throttled = 0; 187 - rt_rq->rt_runtime = 0; 188 - raw_spin_lock_init(&rt_rq->rt_runtime_lock); 189 - } 190 - 191 - #ifdef CONFIG_RT_GROUP_SCHED 192 133 static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) 193 134 { 194 135 hrtimer_cancel(&rt_b->rt_period_timer); ··· 196 195 { 197 196 if (tg->rt_se) 198 197 destroy_rt_bandwidth(&tg->rt_bandwidth); 199 - 200 198 } 201 199 202 200 void free_rt_sched_group(struct task_group *tg) ··· 253 253 if (!tg->rt_se) 254 254 goto err; 255 255 256 - init_rt_bandwidth(&tg->rt_bandwidth, 257 - ktime_to_ns(def_rt_bandwidth.rt_period), 0); 256 + init_rt_bandwidth(&tg->rt_bandwidth, ktime_to_ns(global_rt_period()), 0); 258 257 259 258 for_each_possible_cpu(i) { 260 259 rt_rq = kzalloc_node(sizeof(struct rt_rq), ··· 603 604 return &rt_rq->tg->rt_bandwidth; 604 605 } 605 606 606 - #else /* !CONFIG_RT_GROUP_SCHED */ 607 - 608 - static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 609 - { 610 - return rt_rq->rt_runtime; 611 - } 612 - 613 - static inline u64 sched_rt_period(struct rt_rq *rt_rq) 614 - { 615 - return ktime_to_ns(def_rt_bandwidth.rt_period); 616 - } 617 - 618 - typedef struct rt_rq *rt_rq_iter_t; 619 - 620 - #define for_each_rt_rq(rt_rq, iter, rq) \ 621 - for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL) 622 - 623 - #define for_each_sched_rt_entity(rt_se) \ 624 - for (; rt_se; rt_se = NULL) 625 - 626 - static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 627 - { 628 - return NULL; 629 - } 630 - 631 - static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 632 - { 633 - struct rq *rq = rq_of_rt_rq(rt_rq); 634 - 635 - if (!rt_rq->rt_nr_running) 636 - return; 637 - 638 - enqueue_top_rt_rq(rt_rq); 639 - resched_curr(rq); 640 - } 641 - 642 - static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 643 - { 644 - dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running); 645 - } 646 - 647 - static inline int rt_rq_throttled(struct rt_rq *rt_rq) 648 - { 649 - return rt_rq->rt_throttled; 650 - } 651 - 652 - static inline const struct cpumask *sched_rt_period_mask(void) 653 - { 654 - return cpu_online_mask; 655 - } 656 - 657 - static inline 658 - struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 659 - { 660 - return &cpu_rq(cpu)->rt; 661 - } 662 - 663 - static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) 664 - { 665 - return &def_rt_bandwidth; 666 - } 667 - 668 - #endif /* CONFIG_RT_GROUP_SCHED */ 669 - 670 607 bool sched_rt_bandwidth_account(struct rt_rq *rt_rq) 671 608 { 672 609 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); ··· 794 859 const struct cpumask *span; 795 860 796 861 span = sched_rt_period_mask(); 797 - #ifdef CONFIG_RT_GROUP_SCHED 862 + 798 863 /* 799 864 * FIXME: isolated CPUs should really leave the root task group, 800 865 * whether they are isolcpus or were isolated via cpusets, lest ··· 806 871 */ 807 872 if (rt_b == &root_task_group.rt_bandwidth) 808 873 span = cpu_online_mask; 809 - #endif 874 + 810 875 for_each_cpu(i, span) { 811 876 int enqueue = 0; 812 877 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); ··· 873 938 return idle; 874 939 } 875 940 876 - static inline int rt_se_prio(struct sched_rt_entity *rt_se) 877 - { 878 - #ifdef CONFIG_RT_GROUP_SCHED 879 - struct rt_rq *rt_rq = group_rt_rq(rt_se); 880 - 881 - if (rt_rq) 882 - return rt_rq->highest_prio.curr; 883 - #endif 884 - 885 - return rt_task_of(rt_se)->prio; 886 - } 887 - 888 941 static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) 889 942 { 890 943 u64 runtime = sched_rt_runtime(rt_rq); ··· 916 993 return 0; 917 994 } 918 995 996 + #else /* !CONFIG_RT_GROUP_SCHED */ 997 + 998 + typedef struct rt_rq *rt_rq_iter_t; 999 + 1000 + #define for_each_rt_rq(rt_rq, iter, rq) \ 1001 + for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL) 1002 + 1003 + #define for_each_sched_rt_entity(rt_se) \ 1004 + for (; rt_se; rt_se = NULL) 1005 + 1006 + static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 1007 + { 1008 + return NULL; 1009 + } 1010 + 1011 + static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 1012 + { 1013 + struct rq *rq = rq_of_rt_rq(rt_rq); 1014 + 1015 + if (!rt_rq->rt_nr_running) 1016 + return; 1017 + 1018 + enqueue_top_rt_rq(rt_rq); 1019 + resched_curr(rq); 1020 + } 1021 + 1022 + static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 1023 + { 1024 + dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running); 1025 + } 1026 + 1027 + static inline int rt_rq_throttled(struct rt_rq *rt_rq) 1028 + { 1029 + return false; 1030 + } 1031 + 1032 + static inline const struct cpumask *sched_rt_period_mask(void) 1033 + { 1034 + return cpu_online_mask; 1035 + } 1036 + 1037 + static inline 1038 + struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 1039 + { 1040 + return &cpu_rq(cpu)->rt; 1041 + } 1042 + 1043 + #ifdef CONFIG_SMP 1044 + static void __enable_runtime(struct rq *rq) { } 1045 + static void __disable_runtime(struct rq *rq) { } 1046 + #endif 1047 + 1048 + #endif /* CONFIG_RT_GROUP_SCHED */ 1049 + 1050 + static inline int rt_se_prio(struct sched_rt_entity *rt_se) 1051 + { 1052 + #ifdef CONFIG_RT_GROUP_SCHED 1053 + struct rt_rq *rt_rq = group_rt_rq(rt_se); 1054 + 1055 + if (rt_rq) 1056 + return rt_rq->highest_prio.curr; 1057 + #endif 1058 + 1059 + return rt_task_of(rt_se)->prio; 1060 + } 1061 + 919 1062 /* 920 1063 * Update the current task's runtime statistics. Skip current tasks that 921 1064 * are not in our scheduling class. ··· 989 1000 static void update_curr_rt(struct rq *rq) 990 1001 { 991 1002 struct task_struct *curr = rq->curr; 992 - struct sched_rt_entity *rt_se = &curr->rt; 993 1003 s64 delta_exec; 994 1004 995 1005 if (curr->sched_class != &rt_sched_class) ··· 997 1009 delta_exec = update_curr_common(rq); 998 1010 if (unlikely(delta_exec <= 0)) 999 1011 return; 1012 + 1013 + #ifdef CONFIG_RT_GROUP_SCHED 1014 + struct sched_rt_entity *rt_se = &curr->rt; 1000 1015 1001 1016 if (!rt_bandwidth_enabled()) 1002 1017 return; ··· 1019 1028 do_start_rt_bandwidth(sched_rt_bandwidth(rt_rq)); 1020 1029 } 1021 1030 } 1031 + #endif 1022 1032 } 1023 1033 1024 1034 static void ··· 1176 1184 static void 1177 1185 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1178 1186 { 1179 - start_rt_bandwidth(&def_rt_bandwidth); 1180 1187 } 1181 1188 1182 1189 static inline ··· 2903 2912 #ifdef CONFIG_SYSCTL 2904 2913 static int sched_rt_global_constraints(void) 2905 2914 { 2906 - unsigned long flags; 2907 - int i; 2908 - 2909 - raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); 2910 - for_each_possible_cpu(i) { 2911 - struct rt_rq *rt_rq = &cpu_rq(i)->rt; 2912 - 2913 - raw_spin_lock(&rt_rq->rt_runtime_lock); 2914 - rt_rq->rt_runtime = global_rt_runtime(); 2915 - raw_spin_unlock(&rt_rq->rt_runtime_lock); 2916 - } 2917 - raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); 2918 - 2919 2915 return 0; 2920 2916 } 2921 2917 #endif /* CONFIG_SYSCTL */ ··· 2922 2944 2923 2945 static void sched_rt_do_global(void) 2924 2946 { 2925 - unsigned long flags; 2926 - 2927 - raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); 2928 - def_rt_bandwidth.rt_runtime = global_rt_runtime(); 2929 - def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period()); 2930 - raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); 2931 2947 } 2932 2948 2933 2949 static int sched_rt_handler(const struct ctl_table *table, int write, void *buffer,
+13 -10
kernel/sched/sched.h
··· 362 362 extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr); 363 363 extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); 364 364 extern int dl_bw_check_overflow(int cpu); 365 - 365 + extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec); 366 366 /* 367 367 * SCHED_DEADLINE supports servers (nested scheduling) with the following 368 368 * interface: ··· 388 388 extern void dl_server_stop(struct sched_dl_entity *dl_se); 389 389 extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq, 390 390 dl_server_has_tasks_f has_tasks, 391 - dl_server_pick_f pick); 391 + dl_server_pick_f pick_next, 392 + dl_server_pick_f pick_task); 393 + 394 + extern void dl_server_update_idle_time(struct rq *rq, 395 + struct task_struct *p); 396 + extern void fair_server_init(struct rq *rq); 397 + extern void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq); 398 + extern int dl_server_apply_params(struct sched_dl_entity *dl_se, 399 + u64 runtime, u64 period, bool init); 392 400 393 401 #ifdef CONFIG_CGROUP_SCHED 394 402 ··· 639 631 s64 avg_vruntime; 640 632 u64 avg_load; 641 633 642 - u64 exec_clock; 643 634 u64 min_vruntime; 644 635 #ifdef CONFIG_SCHED_CORE 645 636 unsigned int forceidle_seq; ··· 657 650 */ 658 651 struct sched_entity *curr; 659 652 struct sched_entity *next; 660 - 661 - #ifdef CONFIG_SCHED_DEBUG 662 - unsigned int nr_spread_over; 663 - #endif 664 653 665 654 #ifdef CONFIG_SMP 666 655 /* ··· 797 794 #endif /* CONFIG_SMP */ 798 795 int rt_queued; 799 796 797 + #ifdef CONFIG_RT_GROUP_SCHED 800 798 int rt_throttled; 801 799 u64 rt_time; 802 800 u64 rt_runtime; 803 801 /* Nests inside the rq lock: */ 804 802 raw_spinlock_t rt_runtime_lock; 805 803 806 - #ifdef CONFIG_RT_GROUP_SCHED 807 804 unsigned int rt_nr_boosted; 808 805 809 806 struct rq *rq; ··· 1113 1110 struct scx_rq scx; 1114 1111 #endif 1115 1112 1113 + struct sched_dl_entity fair_server; 1114 + 1116 1115 #ifdef CONFIG_FAIR_GROUP_SCHED 1117 1116 /* list of leaf cfs_rq on this CPU: */ 1118 1117 struct list_head leaf_cfs_rq_list; ··· 1229 1224 /* latency stats */ 1230 1225 struct sched_info rq_sched_info; 1231 1226 unsigned long long rq_cpu_time; 1232 - /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */ 1233 1227 1234 1228 /* sys_sched_yield() stats */ 1235 1229 unsigned int yld_count; ··· 2623 2619 extern void resched_curr(struct rq *rq); 2624 2620 extern void resched_cpu(int cpu); 2625 2621 2626 - extern struct rt_bandwidth def_rt_bandwidth; 2627 2622 extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); 2628 2623 extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); 2629 2624
+8
kernel/sched/topology.c
··· 516 516 if (cpumask_test_cpu(rq->cpu, cpu_active_mask)) 517 517 set_rq_online(rq); 518 518 519 + /* 520 + * Because the rq is not a task, dl_add_task_root_domain() did not 521 + * move the fair server bw to the rd if it already started. 522 + * Add it now. 523 + */ 524 + if (rq->fair_server.dl_server) 525 + __dl_server_attach_root(&rq->fair_server, rq); 526 + 519 527 rq_unlock_irqrestore(rq, &rf); 520 528 521 529 if (old_rd)