Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'sched_ext-for-6.15-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext

Pull sched_ext fixes from Tejun Heo:
"A little bit invasive for rc6 but they're important fixes, pass tests
fine and won't break anything outside sched_ext:

- scx_bpf_cpuperf_set() calls internal functions that require the rq
to be locked. It assumed that the BPF caller has rq locked but
that's not always true. Fix it by tracking whether rq is currently
held by the CPU and grabbing it if necessary

- bpf_iter_scx_dsq_new() was leaving the DSQ iterator in an
uninitialized state after an error. However, next() and destroy()
can be called on an iterator which failed initialization and thus
they always need to be initialized even after an init error. Fix by
always initializing the iterator

- Remove duplicate BTF_ID_FLAGS() entries"

* tag 'sched_ext-for-6.15-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
sched_ext: bpf_iter_scx_dsq_new() should always initialize iterator
sched_ext: Fix rq lock state in hotplug ops
sched_ext: Remove duplicate BTF_ID_FLAGS definitions
sched_ext: Fix missing rq lock in scx_bpf_cpuperf_set()
sched_ext: Track currently locked rq

+124 -69
+123 -68
kernel/sched/ext.c
··· 1118 1118 current->scx.kf_mask &= ~mask; 1119 1119 } 1120 1120 1121 - #define SCX_CALL_OP(mask, op, args...) \ 1121 + /* 1122 + * Track the rq currently locked. 1123 + * 1124 + * This allows kfuncs to safely operate on rq from any scx ops callback, 1125 + * knowing which rq is already locked. 1126 + */ 1127 + static DEFINE_PER_CPU(struct rq *, locked_rq); 1128 + 1129 + static inline void update_locked_rq(struct rq *rq) 1130 + { 1131 + /* 1132 + * Check whether @rq is actually locked. This can help expose bugs 1133 + * or incorrect assumptions about the context in which a kfunc or 1134 + * callback is executed. 1135 + */ 1136 + if (rq) 1137 + lockdep_assert_rq_held(rq); 1138 + __this_cpu_write(locked_rq, rq); 1139 + } 1140 + 1141 + /* 1142 + * Return the rq currently locked from an scx callback, or NULL if no rq is 1143 + * locked. 1144 + */ 1145 + static inline struct rq *scx_locked_rq(void) 1146 + { 1147 + return __this_cpu_read(locked_rq); 1148 + } 1149 + 1150 + #define SCX_CALL_OP(mask, op, rq, args...) \ 1122 1151 do { \ 1152 + update_locked_rq(rq); \ 1123 1153 if (mask) { \ 1124 1154 scx_kf_allow(mask); \ 1125 1155 scx_ops.op(args); \ ··· 1157 1127 } else { \ 1158 1128 scx_ops.op(args); \ 1159 1129 } \ 1130 + update_locked_rq(NULL); \ 1160 1131 } while (0) 1161 1132 1162 - #define SCX_CALL_OP_RET(mask, op, args...) \ 1133 + #define SCX_CALL_OP_RET(mask, op, rq, args...) \ 1163 1134 ({ \ 1164 1135 __typeof__(scx_ops.op(args)) __ret; \ 1136 + \ 1137 + update_locked_rq(rq); \ 1165 1138 if (mask) { \ 1166 1139 scx_kf_allow(mask); \ 1167 1140 __ret = scx_ops.op(args); \ ··· 1172 1139 } else { \ 1173 1140 __ret = scx_ops.op(args); \ 1174 1141 } \ 1142 + update_locked_rq(NULL); \ 1175 1143 __ret; \ 1176 1144 }) 1177 1145 ··· 1187 1153 * scx_kf_allowed_on_arg_tasks() to test whether the invocation is allowed on 1188 1154 * the specific task. 1189 1155 */ 1190 - #define SCX_CALL_OP_TASK(mask, op, task, args...) \ 1156 + #define SCX_CALL_OP_TASK(mask, op, rq, task, args...) \ 1191 1157 do { \ 1192 1158 BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \ 1193 1159 current->scx.kf_tasks[0] = task; \ 1194 - SCX_CALL_OP(mask, op, task, ##args); \ 1160 + SCX_CALL_OP(mask, op, rq, task, ##args); \ 1195 1161 current->scx.kf_tasks[0] = NULL; \ 1196 1162 } while (0) 1197 1163 1198 - #define SCX_CALL_OP_TASK_RET(mask, op, task, args...) \ 1164 + #define SCX_CALL_OP_TASK_RET(mask, op, rq, task, args...) \ 1199 1165 ({ \ 1200 1166 __typeof__(scx_ops.op(task, ##args)) __ret; \ 1201 1167 BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \ 1202 1168 current->scx.kf_tasks[0] = task; \ 1203 - __ret = SCX_CALL_OP_RET(mask, op, task, ##args); \ 1169 + __ret = SCX_CALL_OP_RET(mask, op, rq, task, ##args); \ 1204 1170 current->scx.kf_tasks[0] = NULL; \ 1205 1171 __ret; \ 1206 1172 }) 1207 1173 1208 - #define SCX_CALL_OP_2TASKS_RET(mask, op, task0, task1, args...) \ 1174 + #define SCX_CALL_OP_2TASKS_RET(mask, op, rq, task0, task1, args...) \ 1209 1175 ({ \ 1210 1176 __typeof__(scx_ops.op(task0, task1, ##args)) __ret; \ 1211 1177 BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \ 1212 1178 current->scx.kf_tasks[0] = task0; \ 1213 1179 current->scx.kf_tasks[1] = task1; \ 1214 - __ret = SCX_CALL_OP_RET(mask, op, task0, task1, ##args); \ 1180 + __ret = SCX_CALL_OP_RET(mask, op, rq, task0, task1, ##args); \ 1215 1181 current->scx.kf_tasks[0] = NULL; \ 1216 1182 current->scx.kf_tasks[1] = NULL; \ 1217 1183 __ret; \ ··· 2206 2172 WARN_ON_ONCE(*ddsp_taskp); 2207 2173 *ddsp_taskp = p; 2208 2174 2209 - SCX_CALL_OP_TASK(SCX_KF_ENQUEUE, enqueue, p, enq_flags); 2175 + SCX_CALL_OP_TASK(SCX_KF_ENQUEUE, enqueue, rq, p, enq_flags); 2210 2176 2211 2177 *ddsp_taskp = NULL; 2212 2178 if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID) ··· 2303 2269 add_nr_running(rq, 1); 2304 2270 2305 2271 if (SCX_HAS_OP(runnable) && !task_on_rq_migrating(p)) 2306 - SCX_CALL_OP_TASK(SCX_KF_REST, runnable, p, enq_flags); 2272 + SCX_CALL_OP_TASK(SCX_KF_REST, runnable, rq, p, enq_flags); 2307 2273 2308 2274 if (enq_flags & SCX_ENQ_WAKEUP) 2309 2275 touch_core_sched(rq, p); ··· 2317 2283 __scx_add_event(SCX_EV_SELECT_CPU_FALLBACK, 1); 2318 2284 } 2319 2285 2320 - static void ops_dequeue(struct task_struct *p, u64 deq_flags) 2286 + static void ops_dequeue(struct rq *rq, struct task_struct *p, u64 deq_flags) 2321 2287 { 2322 2288 unsigned long opss; 2323 2289 ··· 2338 2304 BUG(); 2339 2305 case SCX_OPSS_QUEUED: 2340 2306 if (SCX_HAS_OP(dequeue)) 2341 - SCX_CALL_OP_TASK(SCX_KF_REST, dequeue, p, deq_flags); 2307 + SCX_CALL_OP_TASK(SCX_KF_REST, dequeue, rq, p, deq_flags); 2342 2308 2343 2309 if (atomic_long_try_cmpxchg(&p->scx.ops_state, &opss, 2344 2310 SCX_OPSS_NONE)) ··· 2371 2337 return true; 2372 2338 } 2373 2339 2374 - ops_dequeue(p, deq_flags); 2340 + ops_dequeue(rq, p, deq_flags); 2375 2341 2376 2342 /* 2377 2343 * A currently running task which is going off @rq first gets dequeued ··· 2387 2353 */ 2388 2354 if (SCX_HAS_OP(stopping) && task_current(rq, p)) { 2389 2355 update_curr_scx(rq); 2390 - SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, false); 2356 + SCX_CALL_OP_TASK(SCX_KF_REST, stopping, rq, p, false); 2391 2357 } 2392 2358 2393 2359 if (SCX_HAS_OP(quiescent) && !task_on_rq_migrating(p)) 2394 - SCX_CALL_OP_TASK(SCX_KF_REST, quiescent, p, deq_flags); 2360 + SCX_CALL_OP_TASK(SCX_KF_REST, quiescent, rq, p, deq_flags); 2395 2361 2396 2362 if (deq_flags & SCX_DEQ_SLEEP) 2397 2363 p->scx.flags |= SCX_TASK_DEQD_FOR_SLEEP; ··· 2411 2377 struct task_struct *p = rq->curr; 2412 2378 2413 2379 if (SCX_HAS_OP(yield)) 2414 - SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, p, NULL); 2380 + SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, rq, p, NULL); 2415 2381 else 2416 2382 p->scx.slice = 0; 2417 2383 } ··· 2421 2387 struct task_struct *from = rq->curr; 2422 2388 2423 2389 if (SCX_HAS_OP(yield)) 2424 - return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, from, to); 2390 + return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, rq, from, to); 2425 2391 else 2426 2392 return false; 2427 2393 } ··· 2979 2945 * emitted in switch_class(). 2980 2946 */ 2981 2947 if (SCX_HAS_OP(cpu_acquire)) 2982 - SCX_CALL_OP(SCX_KF_REST, cpu_acquire, cpu_of(rq), NULL); 2948 + SCX_CALL_OP(SCX_KF_REST, cpu_acquire, rq, cpu_of(rq), NULL); 2983 2949 rq->scx.cpu_released = false; 2984 2950 } 2985 2951 ··· 3024 2990 do { 3025 2991 dspc->nr_tasks = 0; 3026 2992 3027 - SCX_CALL_OP(SCX_KF_DISPATCH, dispatch, cpu_of(rq), 2993 + SCX_CALL_OP(SCX_KF_DISPATCH, dispatch, rq, cpu_of(rq), 3028 2994 prev_on_scx ? prev : NULL); 3029 2995 3030 2996 flush_dispatch_buf(rq); ··· 3138 3104 * Core-sched might decide to execute @p before it is 3139 3105 * dispatched. Call ops_dequeue() to notify the BPF scheduler. 3140 3106 */ 3141 - ops_dequeue(p, SCX_DEQ_CORE_SCHED_EXEC); 3107 + ops_dequeue(rq, p, SCX_DEQ_CORE_SCHED_EXEC); 3142 3108 dispatch_dequeue(rq, p); 3143 3109 } 3144 3110 ··· 3146 3112 3147 3113 /* see dequeue_task_scx() on why we skip when !QUEUED */ 3148 3114 if (SCX_HAS_OP(running) && (p->scx.flags & SCX_TASK_QUEUED)) 3149 - SCX_CALL_OP_TASK(SCX_KF_REST, running, p); 3115 + SCX_CALL_OP_TASK(SCX_KF_REST, running, rq, p); 3150 3116 3151 3117 clr_task_runnable(p, true); 3152 3118 ··· 3227 3193 .task = next, 3228 3194 }; 3229 3195 3230 - SCX_CALL_OP(SCX_KF_CPU_RELEASE, 3231 - cpu_release, cpu_of(rq), &args); 3196 + SCX_CALL_OP(SCX_KF_CPU_RELEASE, cpu_release, rq, cpu_of(rq), &args); 3232 3197 } 3233 3198 rq->scx.cpu_released = true; 3234 3199 } ··· 3240 3207 3241 3208 /* see dequeue_task_scx() on why we skip when !QUEUED */ 3242 3209 if (SCX_HAS_OP(stopping) && (p->scx.flags & SCX_TASK_QUEUED)) 3243 - SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, true); 3210 + SCX_CALL_OP_TASK(SCX_KF_REST, stopping, rq, p, true); 3244 3211 3245 3212 if (p->scx.flags & SCX_TASK_QUEUED) { 3246 3213 set_task_runnable(rq, p); ··· 3381 3348 * verifier. 3382 3349 */ 3383 3350 if (SCX_HAS_OP(core_sched_before) && !scx_rq_bypassing(task_rq(a))) 3384 - return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, core_sched_before, 3351 + return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, core_sched_before, NULL, 3385 3352 (struct task_struct *)a, 3386 3353 (struct task_struct *)b); 3387 3354 else ··· 3418 3385 *ddsp_taskp = p; 3419 3386 3420 3387 cpu = SCX_CALL_OP_TASK_RET(SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU, 3421 - select_cpu, p, prev_cpu, wake_flags); 3388 + select_cpu, NULL, p, prev_cpu, wake_flags); 3422 3389 p->scx.selected_cpu = cpu; 3423 3390 *ddsp_taskp = NULL; 3424 3391 if (ops_cpu_valid(cpu, "from ops.select_cpu()")) ··· 3463 3430 * designation pointless. Cast it away when calling the operation. 3464 3431 */ 3465 3432 if (SCX_HAS_OP(set_cpumask)) 3466 - SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p, 3467 - (struct cpumask *)p->cpus_ptr); 3433 + SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, NULL, 3434 + p, (struct cpumask *)p->cpus_ptr); 3468 3435 } 3469 3436 3470 3437 static void handle_hotplug(struct rq *rq, bool online) ··· 3477 3444 scx_idle_update_selcpu_topology(&scx_ops); 3478 3445 3479 3446 if (online && SCX_HAS_OP(cpu_online)) 3480 - SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_online, cpu); 3447 + SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_online, NULL, cpu); 3481 3448 else if (!online && SCX_HAS_OP(cpu_offline)) 3482 - SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_offline, cpu); 3449 + SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_offline, NULL, cpu); 3483 3450 else 3484 3451 scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG, 3485 3452 "cpu %d going %s, exiting scheduler", cpu, ··· 3583 3550 curr->scx.slice = 0; 3584 3551 touch_core_sched(rq, curr); 3585 3552 } else if (SCX_HAS_OP(tick)) { 3586 - SCX_CALL_OP_TASK(SCX_KF_REST, tick, curr); 3553 + SCX_CALL_OP_TASK(SCX_KF_REST, tick, rq, curr); 3587 3554 } 3588 3555 3589 3556 if (!curr->scx.slice) ··· 3660 3627 .fork = fork, 3661 3628 }; 3662 3629 3663 - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init_task, p, &args); 3630 + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init_task, NULL, p, &args); 3664 3631 if (unlikely(ret)) { 3665 3632 ret = ops_sanitize_err("init_task", ret); 3666 3633 return ret; ··· 3701 3668 3702 3669 static void scx_ops_enable_task(struct task_struct *p) 3703 3670 { 3671 + struct rq *rq = task_rq(p); 3704 3672 u32 weight; 3705 3673 3706 - lockdep_assert_rq_held(task_rq(p)); 3674 + lockdep_assert_rq_held(rq); 3707 3675 3708 3676 /* 3709 3677 * Set the weight before calling ops.enable() so that the scheduler ··· 3718 3684 p->scx.weight = sched_weight_to_cgroup(weight); 3719 3685 3720 3686 if (SCX_HAS_OP(enable)) 3721 - SCX_CALL_OP_TASK(SCX_KF_REST, enable, p); 3687 + SCX_CALL_OP_TASK(SCX_KF_REST, enable, rq, p); 3722 3688 scx_set_task_state(p, SCX_TASK_ENABLED); 3723 3689 3724 3690 if (SCX_HAS_OP(set_weight)) 3725 - SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, p, p->scx.weight); 3691 + SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, rq, p, p->scx.weight); 3726 3692 } 3727 3693 3728 3694 static void scx_ops_disable_task(struct task_struct *p) 3729 3695 { 3730 - lockdep_assert_rq_held(task_rq(p)); 3696 + struct rq *rq = task_rq(p); 3697 + 3698 + lockdep_assert_rq_held(rq); 3731 3699 WARN_ON_ONCE(scx_get_task_state(p) != SCX_TASK_ENABLED); 3732 3700 3733 3701 if (SCX_HAS_OP(disable)) 3734 - SCX_CALL_OP_TASK(SCX_KF_REST, disable, p); 3702 + SCX_CALL_OP_TASK(SCX_KF_REST, disable, rq, p); 3735 3703 scx_set_task_state(p, SCX_TASK_READY); 3736 3704 } 3737 3705 ··· 3762 3726 } 3763 3727 3764 3728 if (SCX_HAS_OP(exit_task)) 3765 - SCX_CALL_OP_TASK(SCX_KF_REST, exit_task, p, &args); 3729 + SCX_CALL_OP_TASK(SCX_KF_REST, exit_task, task_rq(p), p, &args); 3766 3730 scx_set_task_state(p, SCX_TASK_NONE); 3767 3731 } 3768 3732 ··· 3871 3835 3872 3836 p->scx.weight = sched_weight_to_cgroup(scale_load_down(lw->weight)); 3873 3837 if (SCX_HAS_OP(set_weight)) 3874 - SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, p, p->scx.weight); 3838 + SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, rq, p, p->scx.weight); 3875 3839 } 3876 3840 3877 3841 static void prio_changed_scx(struct rq *rq, struct task_struct *p, int oldprio) ··· 3887 3851 * different scheduler class. Keep the BPF scheduler up-to-date. 3888 3852 */ 3889 3853 if (SCX_HAS_OP(set_cpumask)) 3890 - SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p, 3891 - (struct cpumask *)p->cpus_ptr); 3854 + SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, rq, 3855 + p, (struct cpumask *)p->cpus_ptr); 3892 3856 } 3893 3857 3894 3858 static void switched_from_scx(struct rq *rq, struct task_struct *p) ··· 3949 3913 struct scx_cgroup_init_args args = 3950 3914 { .weight = tg->scx_weight }; 3951 3915 3952 - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, 3916 + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, NULL, 3953 3917 tg->css.cgroup, &args); 3954 3918 if (ret) 3955 3919 ret = ops_sanitize_err("cgroup_init", ret); ··· 3971 3935 percpu_down_read(&scx_cgroup_rwsem); 3972 3936 3973 3937 if (SCX_HAS_OP(cgroup_exit) && (tg->scx_flags & SCX_TG_INITED)) 3974 - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, tg->css.cgroup); 3938 + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, NULL, tg->css.cgroup); 3975 3939 tg->scx_flags &= ~(SCX_TG_ONLINE | SCX_TG_INITED); 3976 3940 3977 3941 percpu_up_read(&scx_cgroup_rwsem); ··· 4004 3968 continue; 4005 3969 4006 3970 if (SCX_HAS_OP(cgroup_prep_move)) { 4007 - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_prep_move, 3971 + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_prep_move, NULL, 4008 3972 p, from, css->cgroup); 4009 3973 if (ret) 4010 3974 goto err; ··· 4018 3982 err: 4019 3983 cgroup_taskset_for_each(p, css, tset) { 4020 3984 if (SCX_HAS_OP(cgroup_cancel_move) && p->scx.cgrp_moving_from) 4021 - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, p, 4022 - p->scx.cgrp_moving_from, css->cgroup); 3985 + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, NULL, 3986 + p, p->scx.cgrp_moving_from, css->cgroup); 4023 3987 p->scx.cgrp_moving_from = NULL; 4024 3988 } 4025 3989 ··· 4037 4001 * cgrp_moving_from set. 4038 4002 */ 4039 4003 if (SCX_HAS_OP(cgroup_move) && !WARN_ON_ONCE(!p->scx.cgrp_moving_from)) 4040 - SCX_CALL_OP_TASK(SCX_KF_UNLOCKED, cgroup_move, p, 4041 - p->scx.cgrp_moving_from, tg_cgrp(task_group(p))); 4004 + SCX_CALL_OP_TASK(SCX_KF_UNLOCKED, cgroup_move, NULL, 4005 + p, p->scx.cgrp_moving_from, tg_cgrp(task_group(p))); 4042 4006 p->scx.cgrp_moving_from = NULL; 4043 4007 } 4044 4008 ··· 4057 4021 4058 4022 cgroup_taskset_for_each(p, css, tset) { 4059 4023 if (SCX_HAS_OP(cgroup_cancel_move) && p->scx.cgrp_moving_from) 4060 - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, p, 4061 - p->scx.cgrp_moving_from, css->cgroup); 4024 + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, NULL, 4025 + p, p->scx.cgrp_moving_from, css->cgroup); 4062 4026 p->scx.cgrp_moving_from = NULL; 4063 4027 } 4064 4028 out_unlock: ··· 4071 4035 4072 4036 if (scx_cgroup_enabled && tg->scx_weight != weight) { 4073 4037 if (SCX_HAS_OP(cgroup_set_weight)) 4074 - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_set_weight, 4038 + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_set_weight, NULL, 4075 4039 tg_cgrp(tg), weight); 4076 4040 tg->scx_weight = weight; 4077 4041 } ··· 4260 4224 continue; 4261 4225 rcu_read_unlock(); 4262 4226 4263 - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, css->cgroup); 4227 + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, NULL, css->cgroup); 4264 4228 4265 4229 rcu_read_lock(); 4266 4230 css_put(css); ··· 4297 4261 continue; 4298 4262 rcu_read_unlock(); 4299 4263 4300 - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, 4264 + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, NULL, 4301 4265 css->cgroup, &args); 4302 4266 if (ret) { 4303 4267 css_put(css); ··· 4794 4758 } 4795 4759 4796 4760 if (scx_ops.exit) 4797 - SCX_CALL_OP(SCX_KF_UNLOCKED, exit, ei); 4761 + SCX_CALL_OP(SCX_KF_UNLOCKED, exit, NULL, ei); 4798 4762 4799 4763 cancel_delayed_work_sync(&scx_watchdog_work); 4800 4764 ··· 5001 4965 5002 4966 if (SCX_HAS_OP(dump_task)) { 5003 4967 ops_dump_init(s, " "); 5004 - SCX_CALL_OP(SCX_KF_REST, dump_task, dctx, p); 4968 + SCX_CALL_OP(SCX_KF_REST, dump_task, NULL, dctx, p); 5005 4969 ops_dump_exit(); 5006 4970 } 5007 4971 ··· 5048 5012 5049 5013 if (SCX_HAS_OP(dump)) { 5050 5014 ops_dump_init(&s, ""); 5051 - SCX_CALL_OP(SCX_KF_UNLOCKED, dump, &dctx); 5015 + SCX_CALL_OP(SCX_KF_UNLOCKED, dump, NULL, &dctx); 5052 5016 ops_dump_exit(); 5053 5017 } 5054 5018 ··· 5105 5069 used = seq_buf_used(&ns); 5106 5070 if (SCX_HAS_OP(dump_cpu)) { 5107 5071 ops_dump_init(&ns, " "); 5108 - SCX_CALL_OP(SCX_KF_REST, dump_cpu, &dctx, cpu, idle); 5072 + SCX_CALL_OP(SCX_KF_REST, dump_cpu, NULL, &dctx, cpu, idle); 5109 5073 ops_dump_exit(); 5110 5074 } 5111 5075 ··· 5364 5328 scx_idle_enable(ops); 5365 5329 5366 5330 if (scx_ops.init) { 5367 - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init); 5331 + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init, NULL); 5368 5332 if (ret) { 5369 5333 ret = ops_sanitize_err("init", ret); 5370 5334 cpus_read_unlock(); ··· 6827 6791 BUILD_BUG_ON(__alignof__(struct bpf_iter_scx_dsq_kern) != 6828 6792 __alignof__(struct bpf_iter_scx_dsq)); 6829 6793 6794 + /* 6795 + * next() and destroy() will be called regardless of the return value. 6796 + * Always clear $kit->dsq. 6797 + */ 6798 + kit->dsq = NULL; 6799 + 6830 6800 if (flags & ~__SCX_DSQ_ITER_USER_FLAGS) 6831 6801 return -EINVAL; 6832 6802 ··· 7119 7077 } 7120 7078 7121 7079 if (ops_cpu_valid(cpu, NULL)) { 7122 - struct rq *rq = cpu_rq(cpu); 7080 + struct rq *rq = cpu_rq(cpu), *locked_rq = scx_locked_rq(); 7081 + struct rq_flags rf; 7082 + 7083 + /* 7084 + * When called with an rq lock held, restrict the operation 7085 + * to the corresponding CPU to prevent ABBA deadlocks. 7086 + */ 7087 + if (locked_rq && rq != locked_rq) { 7088 + scx_ops_error("Invalid target CPU %d", cpu); 7089 + return; 7090 + } 7091 + 7092 + /* 7093 + * If no rq lock is held, allow to operate on any CPU by 7094 + * acquiring the corresponding rq lock. 7095 + */ 7096 + if (!locked_rq) { 7097 + rq_lock_irqsave(rq, &rf); 7098 + update_rq_clock(rq); 7099 + } 7123 7100 7124 7101 rq->scx.cpuperf_target = perf; 7102 + cpufreq_update_util(rq, 0); 7125 7103 7126 - rcu_read_lock_sched_notrace(); 7127 - cpufreq_update_util(cpu_rq(cpu), 0); 7128 - rcu_read_unlock_sched_notrace(); 7104 + if (!locked_rq) 7105 + rq_unlock_irqrestore(rq, &rf); 7129 7106 } 7130 7107 } 7131 7108 ··· 7375 7314 BTF_ID_FLAGS(func, scx_bpf_get_possible_cpumask, KF_ACQUIRE) 7376 7315 BTF_ID_FLAGS(func, scx_bpf_get_online_cpumask, KF_ACQUIRE) 7377 7316 BTF_ID_FLAGS(func, scx_bpf_put_cpumask, KF_RELEASE) 7378 - BTF_ID_FLAGS(func, scx_bpf_get_idle_cpumask, KF_ACQUIRE) 7379 - BTF_ID_FLAGS(func, scx_bpf_get_idle_smtmask, KF_ACQUIRE) 7380 - BTF_ID_FLAGS(func, scx_bpf_put_idle_cpumask, KF_RELEASE) 7381 - BTF_ID_FLAGS(func, scx_bpf_test_and_clear_cpu_idle) 7382 - BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU) 7383 - BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU) 7384 7317 BTF_ID_FLAGS(func, scx_bpf_task_running, KF_RCU) 7385 7318 BTF_ID_FLAGS(func, scx_bpf_task_cpu, KF_RCU) 7386 7319 BTF_ID_FLAGS(func, scx_bpf_cpu_rq)
+1 -1
kernel/sched/ext_idle.c
··· 674 674 * managed by put_prev_task_idle()/set_next_task_idle(). 675 675 */ 676 676 if (SCX_HAS_OP(update_idle) && do_notify && !scx_rq_bypassing(rq)) 677 - SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle); 677 + SCX_CALL_OP(SCX_KF_REST, update_idle, rq, cpu_of(rq), idle); 678 678 679 679 /* 680 680 * Update the idle masks: