Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sched/rt: Remove default bandwidth control

Now that fair_server exists, we no longer need RT bandwidth control
unless RT_GROUP_SCHED.

Enable fair_server with parameters equivalent to RT throttling.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: "Vineeth Pillai (Google)" <vineeth@bitbyteword.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Juri Lelli <juri.lelli@redhat.com>
Link: https://lore.kernel.org/r/14d562db55df5c3c780d91940743acb166895ef7.1716811044.git.bristot@kernel.org

+120 -142
+6 -3
kernel/sched/core.c
··· 8266 8266 #endif /* CONFIG_RT_GROUP_SCHED */ 8267 8267 } 8268 8268 8269 - init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime()); 8270 - 8271 8269 #ifdef CONFIG_SMP 8272 8270 init_defrootdomain(); 8273 8271 #endif ··· 8320 8322 init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL); 8321 8323 #endif /* CONFIG_FAIR_GROUP_SCHED */ 8322 8324 8323 - rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; 8324 8325 #ifdef CONFIG_RT_GROUP_SCHED 8326 + /* 8327 + * This is required for init cpu because rt.c:__enable_runtime() 8328 + * starts working after scheduler_running, which is not the case 8329 + * yet. 8330 + */ 8331 + rq->rt.rt_runtime = global_rt_runtime(); 8325 8332 init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); 8326 8333 #endif 8327 8334 #ifdef CONFIG_SMP
+3 -2
kernel/sched/deadline.c
··· 1554 1554 if (dl_se == &rq->fair_server) 1555 1555 return; 1556 1556 1557 + #ifdef CONFIG_RT_GROUP_SCHED 1557 1558 /* 1558 1559 * Because -- for now -- we share the rt bandwidth, we need to 1559 1560 * account our runtime there too, otherwise actual rt tasks ··· 1579 1578 rt_rq->rt_time += delta_exec; 1580 1579 raw_spin_unlock(&rt_rq->rt_runtime_lock); 1581 1580 } 1581 + #endif 1582 1582 } 1583 1583 1584 1584 /* ··· 1634 1632 * this before getting generic. 1635 1633 */ 1636 1634 if (!dl_server(dl_se)) { 1637 - /* Disabled */ 1638 - u64 runtime = 0; 1635 + u64 runtime = 50 * NSEC_PER_MSEC; 1639 1636 u64 period = 1000 * NSEC_PER_MSEC; 1640 1637 1641 1638 dl_server_apply_params(dl_se, runtime, period, 1);
+3
kernel/sched/debug.c
··· 885 885 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x)) 886 886 887 887 PU(rt_nr_running); 888 + 889 + #ifdef CONFIG_RT_GROUP_SCHED 888 890 P(rt_throttled); 889 891 PN(rt_time); 890 892 PN(rt_runtime); 893 + #endif 891 894 892 895 #undef PN 893 896 #undef PU
+107 -135
kernel/sched/rt.c
··· 8 8 /* More than 4 hours if BW_SHIFT equals 20. */ 9 9 static const u64 max_rt_runtime = MAX_BW; 10 10 11 - static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); 12 - 13 - struct rt_bandwidth def_rt_bandwidth; 14 - 15 11 /* 16 12 * period over which we measure -rt task CPU usage in us. 17 13 * default: 1s ··· 61 65 } 62 66 late_initcall(sched_rt_sysctl_init); 63 67 #endif 68 + 69 + void init_rt_rq(struct rt_rq *rt_rq) 70 + { 71 + struct rt_prio_array *array; 72 + int i; 73 + 74 + array = &rt_rq->active; 75 + for (i = 0; i < MAX_RT_PRIO; i++) { 76 + INIT_LIST_HEAD(array->queue + i); 77 + __clear_bit(i, array->bitmap); 78 + } 79 + /* delimiter for bitsearch: */ 80 + __set_bit(MAX_RT_PRIO, array->bitmap); 81 + 82 + #if defined CONFIG_SMP 83 + rt_rq->highest_prio.curr = MAX_RT_PRIO-1; 84 + rt_rq->highest_prio.next = MAX_RT_PRIO-1; 85 + rt_rq->overloaded = 0; 86 + plist_head_init(&rt_rq->pushable_tasks); 87 + #endif /* CONFIG_SMP */ 88 + /* We start is dequeued state, because no RT tasks are queued */ 89 + rt_rq->rt_queued = 0; 90 + 91 + #ifdef CONFIG_RT_GROUP_SCHED 92 + rt_rq->rt_time = 0; 93 + rt_rq->rt_throttled = 0; 94 + rt_rq->rt_runtime = 0; 95 + raw_spin_lock_init(&rt_rq->rt_runtime_lock); 96 + #endif 97 + } 98 + 99 + #ifdef CONFIG_RT_GROUP_SCHED 100 + 101 + static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); 64 102 65 103 static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer) 66 104 { ··· 160 130 do_start_rt_bandwidth(rt_b); 161 131 } 162 132 163 - void init_rt_rq(struct rt_rq *rt_rq) 164 - { 165 - struct rt_prio_array *array; 166 - int i; 167 - 168 - array = &rt_rq->active; 169 - for (i = 0; i < MAX_RT_PRIO; i++) { 170 - INIT_LIST_HEAD(array->queue + i); 171 - __clear_bit(i, array->bitmap); 172 - } 173 - /* delimiter for bit-search: */ 174 - __set_bit(MAX_RT_PRIO, array->bitmap); 175 - 176 - #if defined CONFIG_SMP 177 - rt_rq->highest_prio.curr = MAX_RT_PRIO-1; 178 - rt_rq->highest_prio.next = MAX_RT_PRIO-1; 179 - rt_rq->overloaded = 0; 180 - plist_head_init(&rt_rq->pushable_tasks); 181 - #endif /* CONFIG_SMP */ 182 - /* We start is dequeued state, because no RT tasks are queued */ 183 - rt_rq->rt_queued = 0; 184 - 185 - rt_rq->rt_time = 0; 186 - rt_rq->rt_throttled = 0; 187 - rt_rq->rt_runtime = 0; 188 - raw_spin_lock_init(&rt_rq->rt_runtime_lock); 189 - } 190 - 191 - #ifdef CONFIG_RT_GROUP_SCHED 192 133 static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) 193 134 { 194 135 hrtimer_cancel(&rt_b->rt_period_timer); ··· 196 195 { 197 196 if (tg->rt_se) 198 197 destroy_rt_bandwidth(&tg->rt_bandwidth); 199 - 200 198 } 201 199 202 200 void free_rt_sched_group(struct task_group *tg) ··· 253 253 if (!tg->rt_se) 254 254 goto err; 255 255 256 - init_rt_bandwidth(&tg->rt_bandwidth, 257 - ktime_to_ns(def_rt_bandwidth.rt_period), 0); 256 + init_rt_bandwidth(&tg->rt_bandwidth, ktime_to_ns(global_rt_period()), 0); 258 257 259 258 for_each_possible_cpu(i) { 260 259 rt_rq = kzalloc_node(sizeof(struct rt_rq), ··· 603 604 return &rt_rq->tg->rt_bandwidth; 604 605 } 605 606 606 - #else /* !CONFIG_RT_GROUP_SCHED */ 607 - 608 - static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 609 - { 610 - return rt_rq->rt_runtime; 611 - } 612 - 613 - static inline u64 sched_rt_period(struct rt_rq *rt_rq) 614 - { 615 - return ktime_to_ns(def_rt_bandwidth.rt_period); 616 - } 617 - 618 - typedef struct rt_rq *rt_rq_iter_t; 619 - 620 - #define for_each_rt_rq(rt_rq, iter, rq) \ 621 - for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL) 622 - 623 - #define for_each_sched_rt_entity(rt_se) \ 624 - for (; rt_se; rt_se = NULL) 625 - 626 - static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 627 - { 628 - return NULL; 629 - } 630 - 631 - static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 632 - { 633 - struct rq *rq = rq_of_rt_rq(rt_rq); 634 - 635 - if (!rt_rq->rt_nr_running) 636 - return; 637 - 638 - enqueue_top_rt_rq(rt_rq); 639 - resched_curr(rq); 640 - } 641 - 642 - static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 643 - { 644 - dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running); 645 - } 646 - 647 - static inline int rt_rq_throttled(struct rt_rq *rt_rq) 648 - { 649 - return rt_rq->rt_throttled; 650 - } 651 - 652 - static inline const struct cpumask *sched_rt_period_mask(void) 653 - { 654 - return cpu_online_mask; 655 - } 656 - 657 - static inline 658 - struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 659 - { 660 - return &cpu_rq(cpu)->rt; 661 - } 662 - 663 - static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) 664 - { 665 - return &def_rt_bandwidth; 666 - } 667 - 668 - #endif /* CONFIG_RT_GROUP_SCHED */ 669 - 670 607 bool sched_rt_bandwidth_account(struct rt_rq *rt_rq) 671 608 { 672 609 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); ··· 794 859 const struct cpumask *span; 795 860 796 861 span = sched_rt_period_mask(); 797 - #ifdef CONFIG_RT_GROUP_SCHED 862 + 798 863 /* 799 864 * FIXME: isolated CPUs should really leave the root task group, 800 865 * whether they are isolcpus or were isolated via cpusets, lest ··· 806 871 */ 807 872 if (rt_b == &root_task_group.rt_bandwidth) 808 873 span = cpu_online_mask; 809 - #endif 874 + 810 875 for_each_cpu(i, span) { 811 876 int enqueue = 0; 812 877 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); ··· 873 938 return idle; 874 939 } 875 940 876 - static inline int rt_se_prio(struct sched_rt_entity *rt_se) 877 - { 878 - #ifdef CONFIG_RT_GROUP_SCHED 879 - struct rt_rq *rt_rq = group_rt_rq(rt_se); 880 - 881 - if (rt_rq) 882 - return rt_rq->highest_prio.curr; 883 - #endif 884 - 885 - return rt_task_of(rt_se)->prio; 886 - } 887 - 888 941 static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) 889 942 { 890 943 u64 runtime = sched_rt_runtime(rt_rq); ··· 916 993 return 0; 917 994 } 918 995 996 + #else /* !CONFIG_RT_GROUP_SCHED */ 997 + 998 + typedef struct rt_rq *rt_rq_iter_t; 999 + 1000 + #define for_each_rt_rq(rt_rq, iter, rq) \ 1001 + for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL) 1002 + 1003 + #define for_each_sched_rt_entity(rt_se) \ 1004 + for (; rt_se; rt_se = NULL) 1005 + 1006 + static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 1007 + { 1008 + return NULL; 1009 + } 1010 + 1011 + static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 1012 + { 1013 + struct rq *rq = rq_of_rt_rq(rt_rq); 1014 + 1015 + if (!rt_rq->rt_nr_running) 1016 + return; 1017 + 1018 + enqueue_top_rt_rq(rt_rq); 1019 + resched_curr(rq); 1020 + } 1021 + 1022 + static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 1023 + { 1024 + dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running); 1025 + } 1026 + 1027 + static inline int rt_rq_throttled(struct rt_rq *rt_rq) 1028 + { 1029 + return false; 1030 + } 1031 + 1032 + static inline const struct cpumask *sched_rt_period_mask(void) 1033 + { 1034 + return cpu_online_mask; 1035 + } 1036 + 1037 + static inline 1038 + struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 1039 + { 1040 + return &cpu_rq(cpu)->rt; 1041 + } 1042 + 1043 + #ifdef CONFIG_SMP 1044 + static void __enable_runtime(struct rq *rq) { } 1045 + static void __disable_runtime(struct rq *rq) { } 1046 + #endif 1047 + 1048 + #endif /* CONFIG_RT_GROUP_SCHED */ 1049 + 1050 + static inline int rt_se_prio(struct sched_rt_entity *rt_se) 1051 + { 1052 + #ifdef CONFIG_RT_GROUP_SCHED 1053 + struct rt_rq *rt_rq = group_rt_rq(rt_se); 1054 + 1055 + if (rt_rq) 1056 + return rt_rq->highest_prio.curr; 1057 + #endif 1058 + 1059 + return rt_task_of(rt_se)->prio; 1060 + } 1061 + 919 1062 /* 920 1063 * Update the current task's runtime statistics. Skip current tasks that 921 1064 * are not in our scheduling class. ··· 989 1000 static void update_curr_rt(struct rq *rq) 990 1001 { 991 1002 struct task_struct *curr = rq->curr; 992 - struct sched_rt_entity *rt_se = &curr->rt; 993 1003 s64 delta_exec; 994 1004 995 1005 if (curr->sched_class != &rt_sched_class) ··· 997 1009 delta_exec = update_curr_common(rq); 998 1010 if (unlikely(delta_exec <= 0)) 999 1011 return; 1012 + 1013 + #ifdef CONFIG_RT_GROUP_SCHED 1014 + struct sched_rt_entity *rt_se = &curr->rt; 1000 1015 1001 1016 if (!rt_bandwidth_enabled()) 1002 1017 return; ··· 1019 1028 do_start_rt_bandwidth(sched_rt_bandwidth(rt_rq)); 1020 1029 } 1021 1030 } 1031 + #endif 1022 1032 } 1023 1033 1024 1034 static void ··· 1176 1184 static void 1177 1185 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1178 1186 { 1179 - start_rt_bandwidth(&def_rt_bandwidth); 1180 1187 } 1181 1188 1182 1189 static inline ··· 2903 2912 #ifdef CONFIG_SYSCTL 2904 2913 static int sched_rt_global_constraints(void) 2905 2914 { 2906 - unsigned long flags; 2907 - int i; 2908 - 2909 - raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); 2910 - for_each_possible_cpu(i) { 2911 - struct rt_rq *rt_rq = &cpu_rq(i)->rt; 2912 - 2913 - raw_spin_lock(&rt_rq->rt_runtime_lock); 2914 - rt_rq->rt_runtime = global_rt_runtime(); 2915 - raw_spin_unlock(&rt_rq->rt_runtime_lock); 2916 - } 2917 - raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); 2918 - 2919 2915 return 0; 2920 2916 } 2921 2917 #endif /* CONFIG_SYSCTL */ ··· 2922 2944 2923 2945 static void sched_rt_do_global(void) 2924 2946 { 2925 - unsigned long flags; 2926 - 2927 - raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); 2928 - def_rt_bandwidth.rt_runtime = global_rt_runtime(); 2929 - def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period()); 2930 - raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); 2931 2947 } 2932 2948 2933 2949 static int sched_rt_handler(const struct ctl_table *table, int write, void *buffer,
+1 -2
kernel/sched/sched.h
··· 729 729 #endif /* CONFIG_SMP */ 730 730 int rt_queued; 731 731 732 + #ifdef CONFIG_RT_GROUP_SCHED 732 733 int rt_throttled; 733 734 u64 rt_time; 734 735 u64 rt_runtime; 735 736 /* Nests inside the rq lock: */ 736 737 raw_spinlock_t rt_runtime_lock; 737 738 738 - #ifdef CONFIG_RT_GROUP_SCHED 739 739 unsigned int rt_nr_boosted; 740 740 741 741 struct rq *rq; ··· 2519 2519 extern void resched_curr(struct rq *rq); 2520 2520 extern void resched_cpu(int cpu); 2521 2521 2522 - extern struct rt_bandwidth def_rt_bandwidth; 2523 2522 extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); 2524 2523 extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); 2525 2524