Merge tag 'sched_ext-for-6.15-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext

+124 -69

2 changed files

expand all

kernel

sched

ext.c

ext_idle.c

+123 -68

kernel/sched/ext.c

··· 1118 1118 current->scx.kf_mask &= ~mask; 1119 1119 } 1120 1120 1121 - #define SCX_CALL_OP(mask, op, args...) \ 1121 + /* 1122 + * Track the rq currently locked. 1123 + * 1124 + * This allows kfuncs to safely operate on rq from any scx ops callback, 1125 + * knowing which rq is already locked. 1126 + */ 1127 + static DEFINE_PER_CPU(struct rq *, locked_rq); 1128 + 1129 + static inline void update_locked_rq(struct rq *rq) 1130 + { 1131 + /* 1132 + * Check whether @rq is actually locked. This can help expose bugs 1133 + * or incorrect assumptions about the context in which a kfunc or 1134 + * callback is executed. 1135 + */ 1136 + if (rq) 1137 + lockdep_assert_rq_held(rq); 1138 + __this_cpu_write(locked_rq, rq); 1139 + } 1140 + 1141 + /* 1142 + * Return the rq currently locked from an scx callback, or NULL if no rq is 1143 + * locked. 1144 + */ 1145 + static inline struct rq *scx_locked_rq(void) 1146 + { 1147 + return __this_cpu_read(locked_rq); 1148 + } 1149 + 1150 + #define SCX_CALL_OP(mask, op, rq, args...) \ 1122 1151 do { \ 1152 + update_locked_rq(rq); \ 1123 1153 if (mask) { \ 1124 1154 scx_kf_allow(mask); \ 1125 1155 scx_ops.op(args); \ ··· 1157 1127 } else { \ 1158 1128 scx_ops.op(args); \ 1159 1129 } \ 1130 + update_locked_rq(NULL); \ 1160 1131 } while (0) 1161 1132 1162 - #define SCX_CALL_OP_RET(mask, op, args...) \ 1133 + #define SCX_CALL_OP_RET(mask, op, rq, args...) \ 1163 1134 ({ \ 1164 1135 __typeof__(scx_ops.op(args)) __ret; \ 1136 + \ 1137 + update_locked_rq(rq); \ 1165 1138 if (mask) { \ 1166 1139 scx_kf_allow(mask); \ 1167 1140 __ret = scx_ops.op(args); \ ··· 1172 1139 } else { \ 1173 1140 __ret = scx_ops.op(args); \ 1174 1141 } \ 1142 + update_locked_rq(NULL); \ 1175 1143 __ret; \ 1176 1144 }) 1177 1145 ··· 1187 1153 * scx_kf_allowed_on_arg_tasks() to test whether the invocation is allowed on 1188 1154 * the specific task. 1189 1155 */ 1190 - #define SCX_CALL_OP_TASK(mask, op, task, args...) \ 1156 + #define SCX_CALL_OP_TASK(mask, op, rq, task, args...) \ 1191 1157 do { \ 1192 1158 BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \ 1193 1159 current->scx.kf_tasks[0] = task; \ 1194 - SCX_CALL_OP(mask, op, task, ##args); \ 1160 + SCX_CALL_OP(mask, op, rq, task, ##args); \ 1195 1161 current->scx.kf_tasks[0] = NULL; \ 1196 1162 } while (0) 1197 1163 1198 - #define SCX_CALL_OP_TASK_RET(mask, op, task, args...) \ 1164 + #define SCX_CALL_OP_TASK_RET(mask, op, rq, task, args...) \ 1199 1165 ({ \ 1200 1166 __typeof__(scx_ops.op(task, ##args)) __ret; \ 1201 1167 BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \ 1202 1168 current->scx.kf_tasks[0] = task; \ 1203 - __ret = SCX_CALL_OP_RET(mask, op, task, ##args); \ 1169 + __ret = SCX_CALL_OP_RET(mask, op, rq, task, ##args); \ 1204 1170 current->scx.kf_tasks[0] = NULL; \ 1205 1171 __ret; \ 1206 1172 }) 1207 1173 1208 - #define SCX_CALL_OP_2TASKS_RET(mask, op, task0, task1, args...) \ 1174 + #define SCX_CALL_OP_2TASKS_RET(mask, op, rq, task0, task1, args...) \ 1209 1175 ({ \ 1210 1176 __typeof__(scx_ops.op(task0, task1, ##args)) __ret; \ 1211 1177 BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \ 1212 1178 current->scx.kf_tasks[0] = task0; \ 1213 1179 current->scx.kf_tasks[1] = task1; \ 1214 - __ret = SCX_CALL_OP_RET(mask, op, task0, task1, ##args); \ 1180 + __ret = SCX_CALL_OP_RET(mask, op, rq, task0, task1, ##args); \ 1215 1181 current->scx.kf_tasks[0] = NULL; \ 1216 1182 current->scx.kf_tasks[1] = NULL; \ 1217 1183 __ret; \ ··· 2206 2172 WARN_ON_ONCE(*ddsp_taskp); 2207 2173 *ddsp_taskp = p; 2208 2174 2209 - SCX_CALL_OP_TASK(SCX_KF_ENQUEUE, enqueue, p, enq_flags); 2175 + SCX_CALL_OP_TASK(SCX_KF_ENQUEUE, enqueue, rq, p, enq_flags); 2210 2176 2211 2177 *ddsp_taskp = NULL; 2212 2178 if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID) ··· 2303 2269 add_nr_running(rq, 1); 2304 2270 2305 2271 if (SCX_HAS_OP(runnable) && !task_on_rq_migrating(p)) 2306 - SCX_CALL_OP_TASK(SCX_KF_REST, runnable, p, enq_flags); 2272 + SCX_CALL_OP_TASK(SCX_KF_REST, runnable, rq, p, enq_flags); 2307 2273 2308 2274 if (enq_flags & SCX_ENQ_WAKEUP) 2309 2275 touch_core_sched(rq, p); ··· 2317 2283 __scx_add_event(SCX_EV_SELECT_CPU_FALLBACK, 1); 2318 2284 } 2319 2285 2320 - static void ops_dequeue(struct task_struct *p, u64 deq_flags) 2286 + static void ops_dequeue(struct rq *rq, struct task_struct *p, u64 deq_flags) 2321 2287 { 2322 2288 unsigned long opss; 2323 2289 ··· 2338 2304 BUG(); 2339 2305 case SCX_OPSS_QUEUED: 2340 2306 if (SCX_HAS_OP(dequeue)) 2341 - SCX_CALL_OP_TASK(SCX_KF_REST, dequeue, p, deq_flags); 2307 + SCX_CALL_OP_TASK(SCX_KF_REST, dequeue, rq, p, deq_flags); 2342 2308 2343 2309 if (atomic_long_try_cmpxchg(&p->scx.ops_state, &opss, 2344 2310 SCX_OPSS_NONE)) ··· 2371 2337 return true; 2372 2338 } 2373 2339 2374 - ops_dequeue(p, deq_flags); 2340 + ops_dequeue(rq, p, deq_flags); 2375 2341 2376 2342 /* 2377 2343 * A currently running task which is going off @rq first gets dequeued ··· 2387 2353 */ 2388 2354 if (SCX_HAS_OP(stopping) && task_current(rq, p)) { 2389 2355 update_curr_scx(rq); 2390 - SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, false); 2356 + SCX_CALL_OP_TASK(SCX_KF_REST, stopping, rq, p, false); 2391 2357 } 2392 2358 2393 2359 if (SCX_HAS_OP(quiescent) && !task_on_rq_migrating(p)) 2394 - SCX_CALL_OP_TASK(SCX_KF_REST, quiescent, p, deq_flags); 2360 + SCX_CALL_OP_TASK(SCX_KF_REST, quiescent, rq, p, deq_flags); 2395 2361 2396 2362 if (deq_flags & SCX_DEQ_SLEEP) 2397 2363 p->scx.flags |= SCX_TASK_DEQD_FOR_SLEEP; ··· 2411 2377 struct task_struct *p = rq->curr; 2412 2378 2413 2379 if (SCX_HAS_OP(yield)) 2414 - SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, p, NULL); 2380 + SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, rq, p, NULL); 2415 2381 else 2416 2382 p->scx.slice = 0; 2417 2383 } ··· 2421 2387 struct task_struct *from = rq->curr; 2422 2388 2423 2389 if (SCX_HAS_OP(yield)) 2424 - return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, from, to); 2390 + return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, rq, from, to); 2425 2391 else 2426 2392 return false; 2427 2393 } ··· 2979 2945 * emitted in switch_class(). 2980 2946 */ 2981 2947 if (SCX_HAS_OP(cpu_acquire)) 2982 - SCX_CALL_OP(SCX_KF_REST, cpu_acquire, cpu_of(rq), NULL); 2948 + SCX_CALL_OP(SCX_KF_REST, cpu_acquire, rq, cpu_of(rq), NULL); 2983 2949 rq->scx.cpu_released = false; 2984 2950 } 2985 2951 ··· 3024 2990 do { 3025 2991 dspc->nr_tasks = 0; 3026 2992 3027 - SCX_CALL_OP(SCX_KF_DISPATCH, dispatch, cpu_of(rq), 2993 + SCX_CALL_OP(SCX_KF_DISPATCH, dispatch, rq, cpu_of(rq), 3028 2994 prev_on_scx ? prev : NULL); 3029 2995 3030 2996 flush_dispatch_buf(rq); ··· 3138 3104 * Core-sched might decide to execute @p before it is 3139 3105 * dispatched. Call ops_dequeue() to notify the BPF scheduler. 3140 3106 */ 3141 - ops_dequeue(p, SCX_DEQ_CORE_SCHED_EXEC); 3107 + ops_dequeue(rq, p, SCX_DEQ_CORE_SCHED_EXEC); 3142 3108 dispatch_dequeue(rq, p); 3143 3109 } 3144 3110 ··· 3146 3112 3147 3113 /* see dequeue_task_scx() on why we skip when !QUEUED */ 3148 3114 if (SCX_HAS_OP(running) && (p->scx.flags & SCX_TASK_QUEUED)) 3149 - SCX_CALL_OP_TASK(SCX_KF_REST, running, p); 3115 + SCX_CALL_OP_TASK(SCX_KF_REST, running, rq, p); 3150 3116 3151 3117 clr_task_runnable(p, true); 3152 3118 ··· 3227 3193 .task = next, 3228 3194 }; 3229 3195 3230 - SCX_CALL_OP(SCX_KF_CPU_RELEASE, 3231 - cpu_release, cpu_of(rq), &args); 3196 + SCX_CALL_OP(SCX_KF_CPU_RELEASE, cpu_release, rq, cpu_of(rq), &args); 3232 3197 } 3233 3198 rq->scx.cpu_released = true; 3234 3199 } ··· 3240 3207 3241 3208 /* see dequeue_task_scx() on why we skip when !QUEUED */ 3242 3209 if (SCX_HAS_OP(stopping) && (p->scx.flags & SCX_TASK_QUEUED)) 3243 - SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, true); 3210 + SCX_CALL_OP_TASK(SCX_KF_REST, stopping, rq, p, true); 3244 3211 3245 3212 if (p->scx.flags & SCX_TASK_QUEUED) { 3246 3213 set_task_runnable(rq, p); ··· 3381 3348 * verifier. 3382 3349 */ 3383 3350 if (SCX_HAS_OP(core_sched_before) && !scx_rq_bypassing(task_rq(a))) 3384 - return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, core_sched_before, 3351 + return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, core_sched_before, NULL, 3385 3352 (struct task_struct *)a, 3386 3353 (struct task_struct *)b); 3387 3354 else ··· 3418 3385 *ddsp_taskp = p; 3419 3386 3420 3387 cpu = SCX_CALL_OP_TASK_RET(SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU, 3421 - select_cpu, p, prev_cpu, wake_flags); 3388 + select_cpu, NULL, p, prev_cpu, wake_flags); 3422 3389 p->scx.selected_cpu = cpu; 3423 3390 *ddsp_taskp = NULL; 3424 3391 if (ops_cpu_valid(cpu, "from ops.select_cpu()")) ··· 3463 3430 * designation pointless. Cast it away when calling the operation. 3464 3431 */ 3465 3432 if (SCX_HAS_OP(set_cpumask)) 3466 - SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p, 3467 - (struct cpumask *)p->cpus_ptr); 3433 + SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, NULL, 3434 + p, (struct cpumask *)p->cpus_ptr); 3468 3435 } 3469 3436 3470 3437 static void handle_hotplug(struct rq *rq, bool online) ··· 3477 3444 scx_idle_update_selcpu_topology(&scx_ops); 3478 3445 3479 3446 if (online && SCX_HAS_OP(cpu_online)) 3480 - SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_online, cpu); 3447 + SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_online, NULL, cpu); 3481 3448 else if (!online && SCX_HAS_OP(cpu_offline)) 3482 - SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_offline, cpu); 3449 + SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_offline, NULL, cpu); 3483 3450 else 3484 3451 scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG, 3485 3452 "cpu %d going %s, exiting scheduler", cpu, ··· 3583 3550 curr->scx.slice = 0; 3584 3551 touch_core_sched(rq, curr); 3585 3552 } else if (SCX_HAS_OP(tick)) { 3586 - SCX_CALL_OP_TASK(SCX_KF_REST, tick, curr); 3553 + SCX_CALL_OP_TASK(SCX_KF_REST, tick, rq, curr); 3587 3554 } 3588 3555 3589 3556 if (!curr->scx.slice) ··· 3660 3627 .fork = fork, 3661 3628 }; 3662 3629 3663 - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init_task, p, &args); 3630 + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init_task, NULL, p, &args); 3664 3631 if (unlikely(ret)) { 3665 3632 ret = ops_sanitize_err("init_task", ret); 3666 3633 return ret; ··· 3701 3668 3702 3669 static void scx_ops_enable_task(struct task_struct *p) 3703 3670 { 3671 + struct rq *rq = task_rq(p); 3704 3672 u32 weight; 3705 3673 3706 - lockdep_assert_rq_held(task_rq(p)); 3674 + lockdep_assert_rq_held(rq); 3707 3675 3708 3676 /* 3709 3677 * Set the weight before calling ops.enable() so that the scheduler ··· 3718 3684 p->scx.weight = sched_weight_to_cgroup(weight); 3719 3685 3720 3686 if (SCX_HAS_OP(enable)) 3721 - SCX_CALL_OP_TASK(SCX_KF_REST, enable, p); 3687 + SCX_CALL_OP_TASK(SCX_KF_REST, enable, rq, p); 3722 3688 scx_set_task_state(p, SCX_TASK_ENABLED); 3723 3689 3724 3690 if (SCX_HAS_OP(set_weight)) 3725 - SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, p, p->scx.weight); 3691 + SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, rq, p, p->scx.weight); 3726 3692 } 3727 3693 3728 3694 static void scx_ops_disable_task(struct task_struct *p) 3729 3695 { 3730 - lockdep_assert_rq_held(task_rq(p)); 3696 + struct rq *rq = task_rq(p); 3697 + 3698 + lockdep_assert_rq_held(rq); 3731 3699 WARN_ON_ONCE(scx_get_task_state(p) != SCX_TASK_ENABLED); 3732 3700 3733 3701 if (SCX_HAS_OP(disable)) 3734 - SCX_CALL_OP_TASK(SCX_KF_REST, disable, p); 3702 + SCX_CALL_OP_TASK(SCX_KF_REST, disable, rq, p); 3735 3703 scx_set_task_state(p, SCX_TASK_READY); 3736 3704 } 3737 3705 ··· 3762 3726 } 3763 3727 3764 3728 if (SCX_HAS_OP(exit_task)) 3765 - SCX_CALL_OP_TASK(SCX_KF_REST, exit_task, p, &args); 3729 + SCX_CALL_OP_TASK(SCX_KF_REST, exit_task, task_rq(p), p, &args); 3766 3730 scx_set_task_state(p, SCX_TASK_NONE); 3767 3731 } 3768 3732 ··· 3871 3835 3872 3836 p->scx.weight = sched_weight_to_cgroup(scale_load_down(lw->weight)); 3873 3837 if (SCX_HAS_OP(set_weight)) 3874 - SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, p, p->scx.weight); 3838 + SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, rq, p, p->scx.weight); 3875 3839 } 3876 3840 3877 3841 static void prio_changed_scx(struct rq *rq, struct task_struct *p, int oldprio) ··· 3887 3851 * different scheduler class. Keep the BPF scheduler up-to-date. 3888 3852 */ 3889 3853 if (SCX_HAS_OP(set_cpumask)) 3890 - SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p, 3891 - (struct cpumask *)p->cpus_ptr); 3854 + SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, rq, 3855 + p, (struct cpumask *)p->cpus_ptr); 3892 3856 } 3893 3857 3894 3858 static void switched_from_scx(struct rq *rq, struct task_struct *p) ··· 3949 3913 struct scx_cgroup_init_args args = 3950 3914 { .weight = tg->scx_weight }; 3951 3915 3952 - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, 3916 + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, NULL, 3953 3917 tg->css.cgroup, &args); 3954 3918 if (ret) 3955 3919 ret = ops_sanitize_err("cgroup_init", ret); ··· 3971 3935 percpu_down_read(&scx_cgroup_rwsem); 3972 3936 3973 3937 if (SCX_HAS_OP(cgroup_exit) && (tg->scx_flags & SCX_TG_INITED)) 3974 - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, tg->css.cgroup); 3938 + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, NULL, tg->css.cgroup); 3975 3939 tg->scx_flags &= ~(SCX_TG_ONLINE | SCX_TG_INITED); 3976 3940 3977 3941 percpu_up_read(&scx_cgroup_rwsem); ··· 4004 3968 continue; 4005 3969 4006 3970 if (SCX_HAS_OP(cgroup_prep_move)) { 4007 - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_prep_move, 3971 + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_prep_move, NULL, 4008 3972 p, from, css->cgroup); 4009 3973 if (ret) 4010 3974 goto err; ··· 4018 3982 err: 4019 3983 cgroup_taskset_for_each(p, css, tset) { 4020 3984 if (SCX_HAS_OP(cgroup_cancel_move) && p->scx.cgrp_moving_from) 4021 - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, p, 4022 - p->scx.cgrp_moving_from, css->cgroup); 3985 + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, NULL, 3986 + p, p->scx.cgrp_moving_from, css->cgroup); 4023 3987 p->scx.cgrp_moving_from = NULL; 4024 3988 } 4025 3989 ··· 4037 4001 * cgrp_moving_from set. 4038 4002 */ 4039 4003 if (SCX_HAS_OP(cgroup_move) && !WARN_ON_ONCE(!p->scx.cgrp_moving_from)) 4040 - SCX_CALL_OP_TASK(SCX_KF_UNLOCKED, cgroup_move, p, 4041 - p->scx.cgrp_moving_from, tg_cgrp(task_group(p))); 4004 + SCX_CALL_OP_TASK(SCX_KF_UNLOCKED, cgroup_move, NULL, 4005 + p, p->scx.cgrp_moving_from, tg_cgrp(task_group(p))); 4042 4006 p->scx.cgrp_moving_from = NULL; 4043 4007 } 4044 4008 ··· 4057 4021 4058 4022 cgroup_taskset_for_each(p, css, tset) { 4059 4023 if (SCX_HAS_OP(cgroup_cancel_move) && p->scx.cgrp_moving_from) 4060 - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, p, 4061 - p->scx.cgrp_moving_from, css->cgroup); 4024 + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, NULL, 4025 + p, p->scx.cgrp_moving_from, css->cgroup); 4062 4026 p->scx.cgrp_moving_from = NULL; 4063 4027 } 4064 4028 out_unlock: ··· 4071 4035 4072 4036 if (scx_cgroup_enabled && tg->scx_weight != weight) { 4073 4037 if (SCX_HAS_OP(cgroup_set_weight)) 4074 - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_set_weight, 4038 + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_set_weight, NULL, 4075 4039 tg_cgrp(tg), weight); 4076 4040 tg->scx_weight = weight; 4077 4041 } ··· 4260 4224 continue; 4261 4225 rcu_read_unlock(); 4262 4226 4263 - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, css->cgroup); 4227 + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, NULL, css->cgroup); 4264 4228 4265 4229 rcu_read_lock(); 4266 4230 css_put(css); ··· 4297 4261 continue; 4298 4262 rcu_read_unlock(); 4299 4263 4300 - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, 4264 + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, NULL, 4301 4265 css->cgroup, &args); 4302 4266 if (ret) { 4303 4267 css_put(css); ··· 4794 4758 } 4795 4759 4796 4760 if (scx_ops.exit) 4797 - SCX_CALL_OP(SCX_KF_UNLOCKED, exit, ei); 4761 + SCX_CALL_OP(SCX_KF_UNLOCKED, exit, NULL, ei); 4798 4762 4799 4763 cancel_delayed_work_sync(&scx_watchdog_work); 4800 4764 ··· 5001 4965 5002 4966 if (SCX_HAS_OP(dump_task)) { 5003 4967 ops_dump_init(s, " "); 5004 - SCX_CALL_OP(SCX_KF_REST, dump_task, dctx, p); 4968 + SCX_CALL_OP(SCX_KF_REST, dump_task, NULL, dctx, p); 5005 4969 ops_dump_exit(); 5006 4970 } 5007 4971 ··· 5048 5012 5049 5013 if (SCX_HAS_OP(dump)) { 5050 5014 ops_dump_init(&s, ""); 5051 - SCX_CALL_OP(SCX_KF_UNLOCKED, dump, &dctx); 5015 + SCX_CALL_OP(SCX_KF_UNLOCKED, dump, NULL, &dctx); 5052 5016 ops_dump_exit(); 5053 5017 } 5054 5018 ··· 5105 5069 used = seq_buf_used(&ns); 5106 5070 if (SCX_HAS_OP(dump_cpu)) { 5107 5071 ops_dump_init(&ns, " "); 5108 - SCX_CALL_OP(SCX_KF_REST, dump_cpu, &dctx, cpu, idle); 5072 + SCX_CALL_OP(SCX_KF_REST, dump_cpu, NULL, &dctx, cpu, idle); 5109 5073 ops_dump_exit(); 5110 5074 } 5111 5075 ··· 5364 5328 scx_idle_enable(ops); 5365 5329 5366 5330 if (scx_ops.init) { 5367 - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init); 5331 + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init, NULL); 5368 5332 if (ret) { 5369 5333 ret = ops_sanitize_err("init", ret); 5370 5334 cpus_read_unlock(); ··· 6827 6791 BUILD_BUG_ON(__alignof__(struct bpf_iter_scx_dsq_kern) != 6828 6792 __alignof__(struct bpf_iter_scx_dsq)); 6829 6793 6794 + /* 6795 + * next() and destroy() will be called regardless of the return value. 6796 + * Always clear $kit->dsq. 6797 + */ 6798 + kit->dsq = NULL; 6799 + 6830 6800 if (flags & ~__SCX_DSQ_ITER_USER_FLAGS) 6831 6801 return -EINVAL; 6832 6802 ··· 7119 7077 } 7120 7078 7121 7079 if (ops_cpu_valid(cpu, NULL)) { 7122 - struct rq *rq = cpu_rq(cpu); 7080 + struct rq *rq = cpu_rq(cpu), *locked_rq = scx_locked_rq(); 7081 + struct rq_flags rf; 7082 + 7083 + /* 7084 + * When called with an rq lock held, restrict the operation 7085 + * to the corresponding CPU to prevent ABBA deadlocks. 7086 + */ 7087 + if (locked_rq && rq != locked_rq) { 7088 + scx_ops_error("Invalid target CPU %d", cpu); 7089 + return; 7090 + } 7091 + 7092 + /* 7093 + * If no rq lock is held, allow to operate on any CPU by 7094 + * acquiring the corresponding rq lock. 7095 + */ 7096 + if (!locked_rq) { 7097 + rq_lock_irqsave(rq, &rf); 7098 + update_rq_clock(rq); 7099 + } 7123 7100 7124 7101 rq->scx.cpuperf_target = perf; 7102 + cpufreq_update_util(rq, 0); 7125 7103 7126 - rcu_read_lock_sched_notrace(); 7127 - cpufreq_update_util(cpu_rq(cpu), 0); 7128 - rcu_read_unlock_sched_notrace(); 7104 + if (!locked_rq) 7105 + rq_unlock_irqrestore(rq, &rf); 7129 7106 } 7130 7107 } 7131 7108 ··· 7375 7314 BTF_ID_FLAGS(func, scx_bpf_get_possible_cpumask, KF_ACQUIRE) 7376 7315 BTF_ID_FLAGS(func, scx_bpf_get_online_cpumask, KF_ACQUIRE) 7377 7316 BTF_ID_FLAGS(func, scx_bpf_put_cpumask, KF_RELEASE) 7378 - BTF_ID_FLAGS(func, scx_bpf_get_idle_cpumask, KF_ACQUIRE) 7379 - BTF_ID_FLAGS(func, scx_bpf_get_idle_smtmask, KF_ACQUIRE) 7380 - BTF_ID_FLAGS(func, scx_bpf_put_idle_cpumask, KF_RELEASE) 7381 - BTF_ID_FLAGS(func, scx_bpf_test_and_clear_cpu_idle) 7382 - BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU) 7383 - BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU) 7384 7317 BTF_ID_FLAGS(func, scx_bpf_task_running, KF_RCU) 7385 7318 BTF_ID_FLAGS(func, scx_bpf_task_cpu, KF_RCU) 7386 7319 BTF_ID_FLAGS(func, scx_bpf_cpu_rq)

+1 -1

kernel/sched/ext_idle.c

··· 674 674 * managed by put_prev_task_idle()/set_next_task_idle(). 675 675 */ 676 676 if (SCX_HAS_OP(update_idle) && do_notify && !scx_rq_bypassing(rq)) 677 - SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle); 677 + SCX_CALL_OP(SCX_KF_REST, update_idle, rq, cpu_of(rq), idle); 678 678 679 679 /* 680 680 * Update the idle masks:

Configure Feed

Configure Feed