Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sched/deadline: Rebuild root domain accounting after every update

Rebuilding of root domains accounting information (total_bw) is
currently broken on some cases, e.g. suspend/resume on aarch64. Problem
is that the way we keep track of domain changes and try to add bandwidth
back is convoluted and fragile.

Fix it by simplify things by making sure bandwidth accounting is cleared
and completely restored after root domains changes (after root domains
are again stable).

To be sure we always call dl_rebuild_rd_accounting while holding
cpuset_mutex we also add cpuset_reset_sched_domains() wrapper.

Fixes: 53916d5fd3c0 ("sched/deadline: Check bandwidth overflow earlier for hotplug")
Reported-by: Jon Hunter <jonathanh@nvidia.com>
Co-developed-by: Waiman Long <llong@redhat.com>
Signed-off-by: Waiman Long <llong@redhat.com>
Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://lore.kernel.org/r/Z9MRfeJKJUOyUSto@jlelli-thinkpadt14gen4.remote.csb

authored by

Juri Lelli and committed by
Peter Zijlstra
2ff899e3 45007c6f

+38 -15
+6
include/linux/cpuset.h
··· 128 128 extern void rebuild_sched_domains(void); 129 129 130 130 extern void cpuset_print_current_mems_allowed(void); 131 + extern void cpuset_reset_sched_domains(void); 131 132 132 133 /* 133 134 * read_mems_allowed_begin is required when making decisions involving ··· 261 260 } 262 261 263 262 static inline void rebuild_sched_domains(void) 263 + { 264 + partition_sched_domains(1, NULL, NULL); 265 + } 266 + 267 + static inline void cpuset_reset_sched_domains(void) 264 268 { 265 269 partition_sched_domains(1, NULL, NULL); 266 270 }
+1
include/linux/sched/deadline.h
··· 34 34 struct root_domain; 35 35 extern void dl_add_task_root_domain(struct task_struct *p); 36 36 extern void dl_clear_root_domain(struct root_domain *rd); 37 + extern void dl_clear_root_domain_cpu(int cpu); 37 38 38 39 #endif /* CONFIG_SMP */ 39 40
+2
include/linux/sched/topology.h
··· 166 166 return to_cpumask(sd->span); 167 167 } 168 168 169 + extern void dl_rebuild_rd_accounting(void); 170 + 169 171 extern void partition_sched_domains_locked(int ndoms_new, 170 172 cpumask_var_t doms_new[], 171 173 struct sched_domain_attr *dattr_new);
+16 -7
kernel/cgroup/cpuset.c
··· 954 954 css_task_iter_end(&it); 955 955 } 956 956 957 - static void dl_rebuild_rd_accounting(void) 957 + void dl_rebuild_rd_accounting(void) 958 958 { 959 959 struct cpuset *cs = NULL; 960 960 struct cgroup_subsys_state *pos_css; 961 + int cpu; 962 + u64 cookie = ++dl_cookie; 961 963 962 964 lockdep_assert_held(&cpuset_mutex); 963 965 lockdep_assert_cpus_held(); ··· 967 965 968 966 rcu_read_lock(); 969 967 970 - /* 971 - * Clear default root domain DL accounting, it will be computed again 972 - * if a task belongs to it. 973 - */ 974 - dl_clear_root_domain(&def_root_domain); 968 + for_each_possible_cpu(cpu) { 969 + if (dl_bw_visited(cpu, cookie)) 970 + continue; 971 + 972 + dl_clear_root_domain_cpu(cpu); 973 + } 975 974 976 975 cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) { 977 976 ··· 999 996 { 1000 997 sched_domains_mutex_lock(); 1001 998 partition_sched_domains_locked(ndoms_new, doms_new, dattr_new); 1002 - dl_rebuild_rd_accounting(); 1003 999 sched_domains_mutex_unlock(); 1004 1000 } 1005 1001 ··· 1083 1081 cpus_read_lock(); 1084 1082 rebuild_sched_domains_cpuslocked(); 1085 1083 cpus_read_unlock(); 1084 + } 1085 + 1086 + void cpuset_reset_sched_domains(void) 1087 + { 1088 + mutex_lock(&cpuset_mutex); 1089 + partition_sched_domains(1, NULL, NULL); 1090 + mutex_unlock(&cpuset_mutex); 1086 1091 } 1087 1092 1088 1093 /**
+2 -2
kernel/sched/core.c
··· 8229 8229 * operation in the resume sequence, just build a single sched 8230 8230 * domain, ignoring cpusets. 8231 8231 */ 8232 - partition_sched_domains(1, NULL, NULL); 8232 + cpuset_reset_sched_domains(); 8233 8233 if (--num_cpus_frozen) 8234 8234 return; 8235 8235 /* ··· 8248 8248 cpuset_update_active_cpus(); 8249 8249 } else { 8250 8250 num_cpus_frozen++; 8251 - partition_sched_domains(1, NULL, NULL); 8251 + cpuset_reset_sched_domains(); 8252 8252 } 8253 8253 } 8254 8254
+10 -6
kernel/sched/deadline.c
··· 166 166 } 167 167 } 168 168 169 - static inline bool dl_bw_visited(int cpu, u64 cookie) 169 + bool dl_bw_visited(int cpu, u64 cookie) 170 170 { 171 171 struct root_domain *rd = cpu_rq(cpu)->rd; 172 172 ··· 207 207 return SCHED_CAPACITY_SCALE; 208 208 } 209 209 210 - static inline bool dl_bw_visited(int cpu, u64 cookie) 210 + bool dl_bw_visited(int cpu, u64 cookie) 211 211 { 212 212 return false; 213 213 } ··· 2981 2981 rd->dl_bw.total_bw = 0; 2982 2982 2983 2983 /* 2984 - * dl_server bandwidth is only restored when CPUs are attached to root 2985 - * domains (after domains are created or CPUs moved back to the 2986 - * default root doamin). 2984 + * dl_servers are not tasks. Since dl_add_task_root_domain ignores 2985 + * them, we need to account for them here explicitly. 2987 2986 */ 2988 2987 for_each_cpu(i, rd->span) { 2989 2988 struct sched_dl_entity *dl_se = &cpu_rq(i)->fair_server; 2990 2989 2991 2990 if (dl_server(dl_se) && cpu_active(i)) 2992 - rd->dl_bw.total_bw += dl_se->dl_bw; 2991 + __dl_add(&rd->dl_bw, dl_se->dl_bw, dl_bw_cpus(i)); 2993 2992 } 2993 + } 2994 + 2995 + void dl_clear_root_domain_cpu(int cpu) 2996 + { 2997 + dl_clear_root_domain(cpu_rq(cpu)->rd); 2994 2998 } 2995 2999 2996 3000 #endif /* CONFIG_SMP */
+1
kernel/sched/topology.c
··· 2791 2791 ndoms_cur = ndoms_new; 2792 2792 2793 2793 update_sched_domain_debugfs(); 2794 + dl_rebuild_rd_accounting(); 2794 2795 } 2795 2796 2796 2797 /*