Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sched_ext: Use cgroup_lock/unlock() to synchronize against cgroup operations

SCX hooks into CPU cgroup controller operations and read-locks
scx_cgroup_rwsem to exclude them while enabling and disable schedulers.
While this works, it's unnecessarily complicated given that
cgroup_[un]lock() are available and thus the cgroup operations can be locked
out that way.

Drop scx_cgroup_rwsem locking from the tg on/offline and cgroup [can_]attach
operations. Instead, grab cgroup_lock() from scx_cgroup_lock(). Drop
scx_cgroup_finish_attach() which is no longer necessary. Drop the now
unnecessary rcu locking and css ref bumping in scx_cgroup_init() and
scx_cgroup_exit().

As scx_cgroup_set_weight/bandwidth() paths aren't protected by
cgroup_lock(), rename scx_cgroup_rwsem to scx_cgroup_ops_rwsem and retain
the locking there.

This is overall simpler and will also allow enable/disable paths to
synchronize against cgroup changes independent of the CPU controller.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Acked-by: Andrea Righi <arighi@nvidia.com>

Tejun Heo a5bd6ba3 bcb7c230

+14 -56
-2
kernel/sched/core.c
··· 9362 9362 9363 9363 cgroup_taskset_for_each(task, css, tset) 9364 9364 sched_move_task(task, false); 9365 - 9366 - scx_cgroup_finish_attach(); 9367 9365 } 9368 9366 9369 9367 static void cpu_cgroup_cancel_attach(struct cgroup_taskset *tset)
+14 -52
kernel/sched/ext.c
··· 3055 3055 3056 3056 #ifdef CONFIG_EXT_GROUP_SCHED 3057 3057 3058 - DEFINE_STATIC_PERCPU_RWSEM(scx_cgroup_rwsem); 3058 + DEFINE_STATIC_PERCPU_RWSEM(scx_cgroup_ops_rwsem); 3059 3059 static bool scx_cgroup_enabled; 3060 3060 3061 3061 void scx_tg_init(struct task_group *tg) ··· 3071 3071 int ret = 0; 3072 3072 3073 3073 WARN_ON_ONCE(tg->scx.flags & (SCX_TG_ONLINE | SCX_TG_INITED)); 3074 - 3075 - percpu_down_read(&scx_cgroup_rwsem); 3076 3074 3077 3075 if (scx_cgroup_enabled) { 3078 3076 if (SCX_HAS_OP(sch, cgroup_init)) { ··· 3091 3093 tg->scx.flags |= SCX_TG_ONLINE; 3092 3094 } 3093 3095 3094 - percpu_up_read(&scx_cgroup_rwsem); 3095 3096 return ret; 3096 3097 } 3097 3098 ··· 3100 3103 3101 3104 WARN_ON_ONCE(!(tg->scx.flags & SCX_TG_ONLINE)); 3102 3105 3103 - percpu_down_read(&scx_cgroup_rwsem); 3104 - 3105 3106 if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_exit) && 3106 3107 (tg->scx.flags & SCX_TG_INITED)) 3107 3108 SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_exit, NULL, 3108 3109 tg->css.cgroup); 3109 3110 tg->scx.flags &= ~(SCX_TG_ONLINE | SCX_TG_INITED); 3110 - 3111 - percpu_up_read(&scx_cgroup_rwsem); 3112 3111 } 3113 3112 3114 3113 int scx_cgroup_can_attach(struct cgroup_taskset *tset) ··· 3113 3120 struct cgroup_subsys_state *css; 3114 3121 struct task_struct *p; 3115 3122 int ret; 3116 - 3117 - /* released in scx_finish/cancel_attach() */ 3118 - percpu_down_read(&scx_cgroup_rwsem); 3119 3123 3120 3124 if (!scx_cgroup_enabled) 3121 3125 return 0; ··· 3153 3163 p->scx.cgrp_moving_from = NULL; 3154 3164 } 3155 3165 3156 - percpu_up_read(&scx_cgroup_rwsem); 3157 3166 return ops_sanitize_err(sch, "cgroup_prep_move", ret); 3158 3167 } 3159 3168 ··· 3175 3186 p->scx.cgrp_moving_from = NULL; 3176 3187 } 3177 3188 3178 - void scx_cgroup_finish_attach(void) 3179 - { 3180 - percpu_up_read(&scx_cgroup_rwsem); 3181 - } 3182 - 3183 3189 void scx_cgroup_cancel_attach(struct cgroup_taskset *tset) 3184 3190 { 3185 3191 struct scx_sched *sch = scx_root; ··· 3182 3198 struct task_struct *p; 3183 3199 3184 3200 if (!scx_cgroup_enabled) 3185 - goto out_unlock; 3201 + return; 3186 3202 3187 3203 cgroup_taskset_for_each(p, css, tset) { 3188 3204 if (SCX_HAS_OP(sch, cgroup_cancel_move) && ··· 3191 3207 p, p->scx.cgrp_moving_from, css->cgroup); 3192 3208 p->scx.cgrp_moving_from = NULL; 3193 3209 } 3194 - out_unlock: 3195 - percpu_up_read(&scx_cgroup_rwsem); 3196 3210 } 3197 3211 3198 3212 void scx_group_set_weight(struct task_group *tg, unsigned long weight) 3199 3213 { 3200 3214 struct scx_sched *sch = scx_root; 3201 3215 3202 - percpu_down_read(&scx_cgroup_rwsem); 3216 + percpu_down_read(&scx_cgroup_ops_rwsem); 3203 3217 3204 3218 if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_weight) && 3205 3219 tg->scx.weight != weight) ··· 3206 3224 3207 3225 tg->scx.weight = weight; 3208 3226 3209 - percpu_up_read(&scx_cgroup_rwsem); 3227 + percpu_up_read(&scx_cgroup_ops_rwsem); 3210 3228 } 3211 3229 3212 3230 void scx_group_set_idle(struct task_group *tg, bool idle) ··· 3219 3237 { 3220 3238 struct scx_sched *sch = scx_root; 3221 3239 3222 - percpu_down_read(&scx_cgroup_rwsem); 3240 + percpu_down_read(&scx_cgroup_ops_rwsem); 3223 3241 3224 3242 if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_bandwidth) && 3225 3243 (tg->scx.bw_period_us != period_us || ··· 3232 3250 tg->scx.bw_quota_us = quota_us; 3233 3251 tg->scx.bw_burst_us = burst_us; 3234 3252 3235 - percpu_up_read(&scx_cgroup_rwsem); 3253 + percpu_up_read(&scx_cgroup_ops_rwsem); 3236 3254 } 3237 3255 3238 3256 static void scx_cgroup_lock(void) 3239 3257 { 3240 - percpu_down_write(&scx_cgroup_rwsem); 3258 + percpu_down_write(&scx_cgroup_ops_rwsem); 3259 + cgroup_lock(); 3241 3260 } 3242 3261 3243 3262 static void scx_cgroup_unlock(void) 3244 3263 { 3245 - percpu_up_write(&scx_cgroup_rwsem); 3264 + cgroup_unlock(); 3265 + percpu_up_write(&scx_cgroup_ops_rwsem); 3246 3266 } 3247 3267 3248 3268 #else /* CONFIG_EXT_GROUP_SCHED */ 3249 3269 3250 - static inline void scx_cgroup_lock(void) {} 3251 - static inline void scx_cgroup_unlock(void) {} 3270 + static void scx_cgroup_lock(void) {} 3271 + static void scx_cgroup_unlock(void) {} 3252 3272 3253 3273 #endif /* CONFIG_EXT_GROUP_SCHED */ 3254 3274 ··· 3366 3382 { 3367 3383 struct cgroup_subsys_state *css; 3368 3384 3369 - percpu_rwsem_assert_held(&scx_cgroup_rwsem); 3370 - 3371 3385 scx_cgroup_enabled = false; 3372 3386 3373 3387 /* 3374 - * scx_tg_on/offline() are excluded through scx_cgroup_rwsem. If we walk 3388 + * scx_tg_on/offline() are excluded through cgroup_lock(). If we walk 3375 3389 * cgroups and exit all the inited ones, all online cgroups are exited. 3376 3390 */ 3377 - rcu_read_lock(); 3378 3391 css_for_each_descendant_post(css, &root_task_group.css) { 3379 3392 struct task_group *tg = css_tg(css); 3380 3393 ··· 3382 3401 if (!sch->ops.cgroup_exit) 3383 3402 continue; 3384 3403 3385 - if (WARN_ON_ONCE(!css_tryget(css))) 3386 - continue; 3387 - rcu_read_unlock(); 3388 - 3389 3404 SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_exit, NULL, 3390 3405 css->cgroup); 3391 - 3392 - rcu_read_lock(); 3393 - css_put(css); 3394 3406 } 3395 - rcu_read_unlock(); 3396 3407 } 3397 3408 3398 3409 static int scx_cgroup_init(struct scx_sched *sch) ··· 3392 3419 struct cgroup_subsys_state *css; 3393 3420 int ret; 3394 3421 3395 - percpu_rwsem_assert_held(&scx_cgroup_rwsem); 3396 - 3397 3422 /* 3398 - * scx_tg_on/offline() are excluded through scx_cgroup_rwsem. If we walk 3423 + * scx_tg_on/offline() are excluded through cgroup_lock(). If we walk 3399 3424 * cgroups and init, all online cgroups are initialized. 3400 3425 */ 3401 - rcu_read_lock(); 3402 3426 css_for_each_descendant_pre(css, &root_task_group.css) { 3403 3427 struct task_group *tg = css_tg(css); 3404 3428 struct scx_cgroup_init_args args = { ··· 3414 3444 continue; 3415 3445 } 3416 3446 3417 - if (WARN_ON_ONCE(!css_tryget(css))) 3418 - continue; 3419 - rcu_read_unlock(); 3420 - 3421 3447 ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, cgroup_init, NULL, 3422 3448 css->cgroup, &args); 3423 3449 if (ret) { ··· 3422 3456 return ret; 3423 3457 } 3424 3458 tg->scx.flags |= SCX_TG_INITED; 3425 - 3426 - rcu_read_lock(); 3427 - css_put(css); 3428 3459 } 3429 - rcu_read_unlock(); 3430 3460 3431 3461 WARN_ON_ONCE(scx_cgroup_enabled); 3432 3462 scx_cgroup_enabled = true;
-2
kernel/sched/ext.h
··· 77 77 void scx_tg_offline(struct task_group *tg); 78 78 int scx_cgroup_can_attach(struct cgroup_taskset *tset); 79 79 void scx_cgroup_move_task(struct task_struct *p); 80 - void scx_cgroup_finish_attach(void); 81 80 void scx_cgroup_cancel_attach(struct cgroup_taskset *tset); 82 81 void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight); 83 82 void scx_group_set_idle(struct task_group *tg, bool idle); ··· 87 88 static inline void scx_tg_offline(struct task_group *tg) {} 88 89 static inline int scx_cgroup_can_attach(struct cgroup_taskset *tset) { return 0; } 89 90 static inline void scx_cgroup_move_task(struct task_struct *p) {} 90 - static inline void scx_cgroup_finish_attach(void) {} 91 91 static inline void scx_cgroup_cancel_attach(struct cgroup_taskset *tset) {} 92 92 static inline void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight) {} 93 93 static inline void scx_group_set_idle(struct task_group *tg, bool idle) {}