Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'sched_ext-for-6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext

Pull sched_ext updates from Tejun Heo:

- Add support for cgroup "cpu.max" interface

- Code organization cleanup so that ext_idle.c doesn't depend on the
source-file-inclusion build method of sched/

- Drop UP paths in accordance with sched core changes

- Documentation and other misc changes

* tag 'sched_ext-for-6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
sched_ext: Fix scx_bpf_reenqueue_local() reference
sched_ext: Drop kfuncs marked for removal in 6.15
sched_ext, rcu: Eject BPF scheduler on RCU CPU stall panic
kernel/sched/ext.c: fix typo "occured" -> "occurred" in comments
sched_ext: Add support for cgroup bandwidth control interface
sched_ext, sched/core: Factor out struct scx_task_group
sched_ext: Return NULL in llc_span
sched_ext: Always use SMP versions in kernel/sched/ext_idle.h
sched_ext: Always use SMP versions in kernel/sched/ext_idle.c
sched_ext: Always use SMP versions in kernel/sched/ext.h
sched_ext: Always use SMP versions in kernel/sched/ext.c
sched_ext: Documentation: Clarify time slice handling in task lifecycle
sched_ext: Make scx_locked_rq() inline
sched_ext: Make scx_rq_bypassing() inline
sched_ext: idle: Make local functions static in ext_idle.c
sched_ext: idle: Remove unnecessary ifdef in scx_bpf_cpu_node()

+241 -198
+8 -3
Documentation/scheduler/sched-ext.rst
··· 313 313 ops.runnable(); /* Task becomes ready to run */ 314 314 315 315 while (task is runnable) { 316 - if (task is not in a DSQ) { 316 + if (task is not in a DSQ && task->scx.slice == 0) { 317 317 ops.enqueue(); /* Task can be added to a DSQ */ 318 318 319 - /* A CPU becomes available */ 319 + /* Any usable CPU becomes available */ 320 320 321 321 ops.dispatch(); /* Task is moved to a local DSQ */ 322 322 } 323 323 ops.running(); /* Task starts running on its assigned CPU */ 324 - ops.tick(); /* Called every 1/HZ seconds */ 324 + while (task->scx.slice > 0 && task is runnable) 325 + ops.tick(); /* Called every 1/HZ seconds */ 325 326 ops.stopping(); /* Task stops running (time slice expires or wait) */ 327 + 328 + /* Task's CPU becomes available */ 329 + 330 + ops.dispatch(); /* task->scx.slice can be refilled */ 326 331 } 327 332 328 333 ops.quiescent(); /* Task releases its assigned CPU (wait) */
+18 -5
include/linux/sched/ext.h
··· 164 164 165 165 /* 166 166 * Runtime budget in nsecs. This is usually set through 167 - * scx_bpf_dispatch() but can also be modified directly by the BPF 167 + * scx_bpf_dsq_insert() but can also be modified directly by the BPF 168 168 * scheduler. Automatically decreased by SCX as the task executes. On 169 169 * depletion, a scheduling event is triggered. 170 170 * ··· 176 176 177 177 /* 178 178 * Used to order tasks when dispatching to the vtime-ordered priority 179 - * queue of a dsq. This is usually set through scx_bpf_dispatch_vtime() 180 - * but can also be modified directly by the BPF scheduler. Modifying it 181 - * while a task is queued on a dsq may mangle the ordering and is not 182 - * recommended. 179 + * queue of a dsq. This is usually set through 180 + * scx_bpf_dsq_insert_vtime() but can also be modified directly by the 181 + * BPF scheduler. Modifying it while a task is queued on a dsq may 182 + * mangle the ordering and is not recommended. 183 183 */ 184 184 u64 dsq_vtime; 185 185 ··· 206 206 void sched_ext_free(struct task_struct *p); 207 207 void print_scx_info(const char *log_lvl, struct task_struct *p); 208 208 void scx_softlockup(u32 dur_s); 209 + bool scx_rcu_cpu_stall(void); 209 210 210 211 #else /* !CONFIG_SCHED_CLASS_EXT */ 211 212 212 213 static inline void sched_ext_free(struct task_struct *p) {} 213 214 static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {} 214 215 static inline void scx_softlockup(u32 dur_s) {} 216 + static inline bool scx_rcu_cpu_stall(void) { return false; } 215 217 216 218 #endif /* CONFIG_SCHED_CLASS_EXT */ 219 + 220 + struct scx_task_group { 221 + #ifdef CONFIG_EXT_GROUP_SCHED 222 + u32 flags; /* SCX_TG_* */ 223 + u32 weight; 224 + u64 bw_period_us; 225 + u64 bw_quota_us; 226 + u64 bw_burst_us; 227 + #endif 228 + }; 229 + 217 230 #endif /* _LINUX_SCHED_EXT_H */
+5
init/Kconfig
··· 1081 1081 config GROUP_SCHED_WEIGHT 1082 1082 def_bool n 1083 1083 1084 + config GROUP_SCHED_BANDWIDTH 1085 + def_bool n 1086 + 1084 1087 config FAIR_GROUP_SCHED 1085 1088 bool "Group scheduling for SCHED_OTHER" 1086 1089 depends on CGROUP_SCHED ··· 1093 1090 config CFS_BANDWIDTH 1094 1091 bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED" 1095 1092 depends on FAIR_GROUP_SCHED 1093 + select GROUP_SCHED_BANDWIDTH 1096 1094 default n 1097 1095 help 1098 1096 This option allows users to define CPU bandwidth rates (limits) for ··· 1128 1124 bool 1129 1125 depends on SCHED_CLASS_EXT && CGROUP_SCHED 1130 1126 select GROUP_SCHED_WEIGHT 1127 + select GROUP_SCHED_BANDWIDTH 1131 1128 default y 1132 1129 1133 1130 endif #CGROUP_SCHED
+7
kernel/rcu/tree_stall.h
··· 163 163 { 164 164 static int cpu_stall; 165 165 166 + /* 167 + * Attempt to kick out the BPF scheduler if it's installed and defer 168 + * the panic to give the system a chance to recover. 169 + */ 170 + if (scx_rcu_cpu_stall()) 171 + return; 172 + 166 173 if (++cpu_stall < sysctl_max_rcu_stall_to_panic) 167 174 return; 168 175
+24 -5
kernel/sched/core.c
··· 9815 9815 9816 9816 return 0; 9817 9817 } 9818 + #endif /* CONFIG_CFS_BANDWIDTH */ 9818 9819 9820 + #ifdef CONFIG_GROUP_SCHED_BANDWIDTH 9819 9821 const u64 max_bw_quota_period_us = 1 * USEC_PER_SEC; /* 1s */ 9820 9822 static const u64 min_bw_quota_period_us = 1 * USEC_PER_MSEC; /* 1ms */ 9821 9823 /* More than 203 days if BW_SHIFT equals 20. */ ··· 9826 9824 static void tg_bandwidth(struct task_group *tg, 9827 9825 u64 *period_us_p, u64 *quota_us_p, u64 *burst_us_p) 9828 9826 { 9827 + #ifdef CONFIG_CFS_BANDWIDTH 9829 9828 if (period_us_p) 9830 9829 *period_us_p = tg_get_cfs_period(tg); 9831 9830 if (quota_us_p) 9832 9831 *quota_us_p = tg_get_cfs_quota(tg); 9833 9832 if (burst_us_p) 9834 9833 *burst_us_p = tg_get_cfs_burst(tg); 9834 + #else /* !CONFIG_CFS_BANDWIDTH */ 9835 + if (period_us_p) 9836 + *period_us_p = tg->scx.bw_period_us; 9837 + if (quota_us_p) 9838 + *quota_us_p = tg->scx.bw_quota_us; 9839 + if (burst_us_p) 9840 + *burst_us_p = tg->scx.bw_burst_us; 9841 + #endif /* CONFIG_CFS_BANDWIDTH */ 9835 9842 } 9836 9843 9837 9844 static u64 cpu_period_read_u64(struct cgroup_subsys_state *css, ··· 9856 9845 u64 period_us, u64 quota_us, u64 burst_us) 9857 9846 { 9858 9847 const u64 max_usec = U64_MAX / NSEC_PER_USEC; 9848 + int ret = 0; 9859 9849 9860 9850 if (tg == &root_task_group) 9861 9851 return -EINVAL; ··· 9894 9882 burst_us + quota_us > max_bw_runtime_us)) 9895 9883 return -EINVAL; 9896 9884 9897 - return tg_set_cfs_bandwidth(tg, period_us, quota_us, burst_us); 9885 + #ifdef CONFIG_CFS_BANDWIDTH 9886 + ret = tg_set_cfs_bandwidth(tg, period_us, quota_us, burst_us); 9887 + #endif /* CONFIG_CFS_BANDWIDTH */ 9888 + if (!ret) 9889 + scx_group_set_bandwidth(tg, period_us, quota_us, burst_us); 9890 + return ret; 9898 9891 } 9899 9892 9900 9893 static s64 cpu_quota_read_s64(struct cgroup_subsys_state *css, ··· 9952 9935 tg_bandwidth(tg, &period_us, &quota_us, NULL); 9953 9936 return tg_set_bandwidth(tg, period_us, quota_us, burst_us); 9954 9937 } 9955 - #endif /* CONFIG_CFS_BANDWIDTH */ 9938 + #endif /* CONFIG_GROUP_SCHED_BANDWIDTH */ 9956 9939 9957 9940 #ifdef CONFIG_RT_GROUP_SCHED 9958 9941 static int cpu_rt_runtime_write(struct cgroup_subsys_state *css, ··· 10012 9995 .write_s64 = cpu_idle_write_s64, 10013 9996 }, 10014 9997 #endif 10015 - #ifdef CONFIG_CFS_BANDWIDTH 9998 + #ifdef CONFIG_GROUP_SCHED_BANDWIDTH 10016 9999 { 10017 10000 .name = "cfs_period_us", 10018 10001 .read_u64 = cpu_period_read_u64, ··· 10028 10011 .read_u64 = cpu_burst_read_u64, 10029 10012 .write_u64 = cpu_burst_write_u64, 10030 10013 }, 10014 + #endif 10015 + #ifdef CONFIG_CFS_BANDWIDTH 10031 10016 { 10032 10017 .name = "stat", 10033 10018 .seq_show = cpu_cfs_stat_show, ··· 10243 10224 return 0; 10244 10225 } 10245 10226 10246 - #ifdef CONFIG_CFS_BANDWIDTH 10227 + #ifdef CONFIG_GROUP_SCHED_BANDWIDTH 10247 10228 static int cpu_max_show(struct seq_file *sf, void *v) 10248 10229 { 10249 10230 struct task_group *tg = css_tg(seq_css(sf)); ··· 10290 10271 .write_s64 = cpu_idle_write_s64, 10291 10272 }, 10292 10273 #endif 10293 - #ifdef CONFIG_CFS_BANDWIDTH 10274 + #ifdef CONFIG_GROUP_SCHED_BANDWIDTH 10294 10275 { 10295 10276 .name = "max", 10296 10277 .flags = CFTYPE_NOT_ON_ROOT,
+120 -130
kernel/sched/ext.c
··· 203 203 struct scx_cgroup_init_args { 204 204 /* the weight of the cgroup [1..10000] */ 205 205 u32 weight; 206 + 207 + /* bandwidth control parameters from cpu.max and cpu.max.burst */ 208 + u64 bw_period_us; 209 + u64 bw_quota_us; 210 + u64 bw_burst_us; 206 211 }; 207 212 208 213 enum scx_cpu_preempt_reason { ··· 669 664 * @cgrp: cgroup whose weight is being updated 670 665 * @weight: new weight [1..10000] 671 666 * 672 - * Update @tg's weight to @weight. 667 + * Update @cgrp's weight to @weight. 673 668 */ 674 669 void (*cgroup_set_weight)(struct cgroup *cgrp, u32 weight); 670 + 671 + /** 672 + * @cgroup_set_bandwidth: A cgroup's bandwidth is being changed 673 + * @cgrp: cgroup whose bandwidth is being updated 674 + * @period_us: bandwidth control period 675 + * @quota_us: bandwidth control quota 676 + * @burst_us: bandwidth control burst 677 + * 678 + * Update @cgrp's bandwidth control parameters. This is from the cpu.max 679 + * cgroup interface. 680 + * 681 + * @quota_us / @period_us determines the CPU bandwidth @cgrp is entitled 682 + * to. For example, if @period_us is 1_000_000 and @quota_us is 683 + * 2_500_000. @cgrp is entitled to 2.5 CPUs. @burst_us can be 684 + * interpreted in the same fashion and specifies how much @cgrp can 685 + * burst temporarily. The specific control mechanism and thus the 686 + * interpretation of @period_us and burstiness is upto to the BPF 687 + * scheduler. 688 + */ 689 + void (*cgroup_set_bandwidth)(struct cgroup *cgrp, 690 + u64 period_us, u64 quota_us, u64 burst_us); 691 + 675 692 #endif /* CONFIG_EXT_GROUP_SCHED */ 676 693 677 694 /* ··· 911 884 /* 912 885 * The task being enqueued was previously enqueued on the current CPU's 913 886 * %SCX_DSQ_LOCAL, but was removed from it in a call to the 914 - * bpf_scx_reenqueue_local() kfunc. If bpf_scx_reenqueue_local() was 887 + * scx_bpf_reenqueue_local() kfunc. If scx_bpf_reenqueue_local() was 915 888 * invoked in a ->cpu_release() callback, and the task is again 916 889 * dispatched back to %SCX_LOCAL_DSQ by this current ->enqueue(), the 917 890 * task will not be scheduled on the CPU until at least the next invocation ··· 1274 1247 * This allows kfuncs to safely operate on rq from any scx ops callback, 1275 1248 * knowing which rq is already locked. 1276 1249 */ 1277 - static DEFINE_PER_CPU(struct rq *, locked_rq); 1250 + DEFINE_PER_CPU(struct rq *, scx_locked_rq_state); 1278 1251 1279 1252 static inline void update_locked_rq(struct rq *rq) 1280 1253 { ··· 1285 1258 */ 1286 1259 if (rq) 1287 1260 lockdep_assert_rq_held(rq); 1288 - __this_cpu_write(locked_rq, rq); 1289 - } 1290 - 1291 - /* 1292 - * Return the rq currently locked from an scx callback, or NULL if no rq is 1293 - * locked. 1294 - */ 1295 - static inline struct rq *scx_locked_rq(void) 1296 - { 1297 - return __this_cpu_read(locked_rq); 1261 + __this_cpu_write(scx_locked_rq_state, rq); 1298 1262 } 1299 1263 1300 1264 #define SCX_CALL_OP(sch, mask, op, rq, args...) \ ··· 1659 1641 * scx_add_event - Increase an event counter for 'name' by 'cnt' 1660 1642 * @sch: scx_sched to account events for 1661 1643 * @name: an event name defined in struct scx_event_stats 1662 - * @cnt: the number of the event occured 1644 + * @cnt: the number of the event occurred 1663 1645 * 1664 1646 * This can be used when preemption is not disabled. 1665 1647 */ ··· 1672 1654 * __scx_add_event - Increase an event counter for 'name' by 'cnt' 1673 1655 * @sch: scx_sched to account events for 1674 1656 * @name: an event name defined in struct scx_event_stats 1675 - * @cnt: the number of the event occured 1657 + * @cnt: the number of the event occurred 1676 1658 * 1677 1659 * This should be used only when preemption is disabled. 1678 1660 */ ··· 1721 1703 int from_v = from; 1722 1704 1723 1705 return atomic_try_cmpxchg(&scx_enable_state_var, &from_v, to); 1724 - } 1725 - 1726 - static bool scx_rq_bypassing(struct rq *rq) 1727 - { 1728 - return unlikely(rq->scx.flags & SCX_RQ_BYPASSING); 1729 1706 } 1730 1707 1731 1708 /** ··· 1809 1796 process_ddsp_deferred_locals(rq); 1810 1797 } 1811 1798 1812 - #ifdef CONFIG_SMP 1813 1799 static void deferred_bal_cb_workfn(struct rq *rq) 1814 1800 { 1815 1801 run_deferred(rq); 1816 1802 } 1817 - #endif 1818 1803 1819 1804 static void deferred_irq_workfn(struct irq_work *irq_work) 1820 1805 { ··· 1835 1824 { 1836 1825 lockdep_assert_rq_held(rq); 1837 1826 1838 - #ifdef CONFIG_SMP 1839 1827 /* 1840 1828 * If in the middle of waking up a task, task_woken_scx() will be called 1841 1829 * afterwards which will then run the deferred actions, no need to ··· 1852 1842 deferred_bal_cb_workfn); 1853 1843 return; 1854 1844 } 1855 - #endif 1845 + 1856 1846 /* 1857 1847 * No scheduler hooks available. Queue an irq work. They are executed on 1858 1848 * IRQ re-enable which may take a bit longer than the scheduler hooks. ··· 2556 2546 p->scx.dsq = dst_dsq; 2557 2547 } 2558 2548 2559 - #ifdef CONFIG_SMP 2560 2549 /** 2561 2550 * move_remote_task_to_local_dsq - Move a task from a foreign rq to a local DSQ 2562 2551 * @p: task to move ··· 2722 2713 return false; 2723 2714 } 2724 2715 } 2725 - #else /* CONFIG_SMP */ 2726 - static inline void move_remote_task_to_local_dsq(struct task_struct *p, u64 enq_flags, struct rq *src_rq, struct rq *dst_rq) { WARN_ON_ONCE(1); } 2727 - static inline bool task_can_run_on_remote_rq(struct scx_sched *sch, struct task_struct *p, struct rq *rq, bool enforce) { return false; } 2728 - static inline bool consume_remote_task(struct rq *this_rq, struct task_struct *p, struct scx_dispatch_q *dsq, struct rq *task_rq) { return false; } 2729 - #endif /* CONFIG_SMP */ 2730 2716 2731 2717 /** 2732 2718 * move_task_between_dsqs() - Move a task from one DSQ to another ··· 2894 2890 { 2895 2891 struct rq *src_rq = task_rq(p); 2896 2892 struct rq *dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq); 2897 - #ifdef CONFIG_SMP 2898 2893 struct rq *locked_rq = rq; 2899 - #endif 2900 2894 2901 2895 /* 2902 2896 * We're synchronized against dequeue through DISPATCHING. As @p can't ··· 2908 2906 return; 2909 2907 } 2910 2908 2911 - #ifdef CONFIG_SMP 2912 2909 if (src_rq != dst_rq && 2913 2910 unlikely(!task_can_run_on_remote_rq(sch, p, dst_rq, true))) { 2914 2911 dispatch_enqueue(sch, find_global_dsq(p), p, ··· 2967 2966 raw_spin_rq_unlock(locked_rq); 2968 2967 raw_spin_rq_lock(rq); 2969 2968 } 2970 - #else /* CONFIG_SMP */ 2971 - BUG(); /* control can not reach here on UP */ 2972 - #endif /* CONFIG_SMP */ 2973 2969 } 2974 2970 2975 2971 /** ··· 3290 3292 static enum scx_cpu_preempt_reason 3291 3293 preempt_reason_from_class(const struct sched_class *class) 3292 3294 { 3293 - #ifdef CONFIG_SMP 3294 3295 if (class == &stop_sched_class) 3295 3296 return SCX_CPU_PREEMPT_STOP; 3296 - #endif 3297 3297 if (class == &dl_sched_class) 3298 3298 return SCX_CPU_PREEMPT_DL; 3299 3299 if (class == &rt_sched_class) ··· 3304 3308 struct scx_sched *sch = scx_root; 3305 3309 const struct sched_class *next_class = next->sched_class; 3306 3310 3307 - #ifdef CONFIG_SMP 3308 3311 /* 3309 3312 * Pairs with the smp_load_acquire() issued by a CPU in 3310 3313 * kick_cpus_irq_workfn() who is waiting for this CPU to perform a 3311 3314 * resched. 3312 3315 */ 3313 3316 smp_store_release(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1); 3314 - #endif 3315 3317 if (!(sch->ops.flags & SCX_OPS_HAS_CPU_PREEMPT)) 3316 3318 return; 3317 3319 ··· 3506 3512 } 3507 3513 #endif /* CONFIG_SCHED_CORE */ 3508 3514 3509 - #ifdef CONFIG_SMP 3510 - 3511 3515 static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flags) 3512 3516 { 3513 3517 struct scx_sched *sch = scx_root; ··· 3635 3643 rq->scx.flags &= ~SCX_RQ_ONLINE; 3636 3644 } 3637 3645 3638 - #endif /* CONFIG_SMP */ 3639 3646 3640 3647 static bool check_rq_for_timeouts(struct rq *rq) 3641 3648 { ··· 4089 4098 4090 4099 void scx_tg_init(struct task_group *tg) 4091 4100 { 4092 - tg->scx_weight = CGROUP_WEIGHT_DFL; 4101 + tg->scx.weight = CGROUP_WEIGHT_DFL; 4102 + tg->scx.bw_period_us = default_bw_period_us(); 4103 + tg->scx.bw_quota_us = RUNTIME_INF; 4093 4104 } 4094 4105 4095 4106 int scx_tg_online(struct task_group *tg) ··· 4099 4106 struct scx_sched *sch = scx_root; 4100 4107 int ret = 0; 4101 4108 4102 - WARN_ON_ONCE(tg->scx_flags & (SCX_TG_ONLINE | SCX_TG_INITED)); 4109 + WARN_ON_ONCE(tg->scx.flags & (SCX_TG_ONLINE | SCX_TG_INITED)); 4103 4110 4104 4111 percpu_down_read(&scx_cgroup_rwsem); 4105 4112 4106 4113 if (scx_cgroup_enabled) { 4107 4114 if (SCX_HAS_OP(sch, cgroup_init)) { 4108 4115 struct scx_cgroup_init_args args = 4109 - { .weight = tg->scx_weight }; 4116 + { .weight = tg->scx.weight, 4117 + .bw_period_us = tg->scx.bw_period_us, 4118 + .bw_quota_us = tg->scx.bw_quota_us, 4119 + .bw_burst_us = tg->scx.bw_burst_us }; 4110 4120 4111 4121 ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, cgroup_init, 4112 4122 NULL, tg->css.cgroup, &args); ··· 4117 4121 ret = ops_sanitize_err(sch, "cgroup_init", ret); 4118 4122 } 4119 4123 if (ret == 0) 4120 - tg->scx_flags |= SCX_TG_ONLINE | SCX_TG_INITED; 4124 + tg->scx.flags |= SCX_TG_ONLINE | SCX_TG_INITED; 4121 4125 } else { 4122 - tg->scx_flags |= SCX_TG_ONLINE; 4126 + tg->scx.flags |= SCX_TG_ONLINE; 4123 4127 } 4124 4128 4125 4129 percpu_up_read(&scx_cgroup_rwsem); ··· 4130 4134 { 4131 4135 struct scx_sched *sch = scx_root; 4132 4136 4133 - WARN_ON_ONCE(!(tg->scx_flags & SCX_TG_ONLINE)); 4137 + WARN_ON_ONCE(!(tg->scx.flags & SCX_TG_ONLINE)); 4134 4138 4135 4139 percpu_down_read(&scx_cgroup_rwsem); 4136 4140 4137 4141 if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_exit) && 4138 - (tg->scx_flags & SCX_TG_INITED)) 4142 + (tg->scx.flags & SCX_TG_INITED)) 4139 4143 SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_exit, NULL, 4140 4144 tg->css.cgroup); 4141 - tg->scx_flags &= ~(SCX_TG_ONLINE | SCX_TG_INITED); 4145 + tg->scx.flags &= ~(SCX_TG_ONLINE | SCX_TG_INITED); 4142 4146 4143 4147 percpu_up_read(&scx_cgroup_rwsem); 4144 4148 } ··· 4247 4251 percpu_down_read(&scx_cgroup_rwsem); 4248 4252 4249 4253 if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_weight) && 4250 - tg->scx_weight != weight) 4254 + tg->scx.weight != weight) 4251 4255 SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_weight, NULL, 4252 4256 tg_cgrp(tg), weight); 4253 4257 4254 - tg->scx_weight = weight; 4258 + tg->scx.weight = weight; 4255 4259 4256 4260 percpu_up_read(&scx_cgroup_rwsem); 4257 4261 } ··· 4259 4263 void scx_group_set_idle(struct task_group *tg, bool idle) 4260 4264 { 4261 4265 /* TODO: Implement ops->cgroup_set_idle() */ 4266 + } 4267 + 4268 + void scx_group_set_bandwidth(struct task_group *tg, 4269 + u64 period_us, u64 quota_us, u64 burst_us) 4270 + { 4271 + struct scx_sched *sch = scx_root; 4272 + 4273 + percpu_down_read(&scx_cgroup_rwsem); 4274 + 4275 + if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_bandwidth) && 4276 + (tg->scx.bw_period_us != period_us || 4277 + tg->scx.bw_quota_us != quota_us || 4278 + tg->scx.bw_burst_us != burst_us)) 4279 + SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_bandwidth, NULL, 4280 + tg_cgrp(tg), period_us, quota_us, burst_us); 4281 + 4282 + tg->scx.bw_period_us = period_us; 4283 + tg->scx.bw_quota_us = quota_us; 4284 + tg->scx.bw_burst_us = burst_us; 4285 + 4286 + percpu_up_read(&scx_cgroup_rwsem); 4262 4287 } 4263 4288 4264 4289 static void scx_cgroup_lock(void) ··· 4325 4308 .put_prev_task = put_prev_task_scx, 4326 4309 .set_next_task = set_next_task_scx, 4327 4310 4328 - #ifdef CONFIG_SMP 4329 4311 .select_task_rq = select_task_rq_scx, 4330 4312 .task_woken = task_woken_scx, 4331 4313 .set_cpus_allowed = set_cpus_allowed_scx, 4332 4314 4333 4315 .rq_online = rq_online_scx, 4334 4316 .rq_offline = rq_offline_scx, 4335 - #endif 4336 4317 4337 4318 .task_tick = task_tick_scx, 4338 4319 ··· 4423 4408 css_for_each_descendant_post(css, &root_task_group.css) { 4424 4409 struct task_group *tg = css_tg(css); 4425 4410 4426 - if (!(tg->scx_flags & SCX_TG_INITED)) 4411 + if (!(tg->scx.flags & SCX_TG_INITED)) 4427 4412 continue; 4428 - tg->scx_flags &= ~SCX_TG_INITED; 4413 + tg->scx.flags &= ~SCX_TG_INITED; 4429 4414 4430 4415 if (!sch->ops.cgroup_exit) 4431 4416 continue; ··· 4457 4442 rcu_read_lock(); 4458 4443 css_for_each_descendant_pre(css, &root_task_group.css) { 4459 4444 struct task_group *tg = css_tg(css); 4460 - struct scx_cgroup_init_args args = { .weight = tg->scx_weight }; 4445 + struct scx_cgroup_init_args args = { 4446 + .weight = tg->scx.weight, 4447 + .bw_period_us = tg->scx.bw_period_us, 4448 + .bw_quota_us = tg->scx.bw_quota_us, 4449 + .bw_burst_us = tg->scx.bw_burst_us, 4450 + }; 4461 4451 4462 - if ((tg->scx_flags & 4452 + if ((tg->scx.flags & 4463 4453 (SCX_TG_ONLINE | SCX_TG_INITED)) != SCX_TG_ONLINE) 4464 4454 continue; 4465 4455 4466 4456 if (!sch->ops.cgroup_init) { 4467 - tg->scx_flags |= SCX_TG_INITED; 4457 + tg->scx.flags |= SCX_TG_INITED; 4468 4458 continue; 4469 4459 } 4470 4460 ··· 4484 4464 scx_error(sch, "ops.cgroup_init() failed (%d)", ret); 4485 4465 return ret; 4486 4466 } 4487 - tg->scx_flags |= SCX_TG_INITED; 4467 + tg->scx.flags |= SCX_TG_INITED; 4488 4468 4489 4469 rcu_read_lock(); 4490 4470 css_put(css); ··· 4674 4654 return !scx_enabled() || 4675 4655 (scx_root->ops.flags & SCX_OPS_ALLOW_QUEUED_WAKEUP) || 4676 4656 p->sched_class != &ext_sched_class; 4657 + } 4658 + 4659 + /** 4660 + * scx_rcu_cpu_stall - sched_ext RCU CPU stall handler 4661 + * 4662 + * While there are various reasons why RCU CPU stalls can occur on a system 4663 + * that may not be caused by the current BPF scheduler, try kicking out the 4664 + * current scheduler in an attempt to recover the system to a good state before 4665 + * issuing panics. 4666 + */ 4667 + bool scx_rcu_cpu_stall(void) 4668 + { 4669 + struct scx_sched *sch; 4670 + 4671 + rcu_read_lock(); 4672 + 4673 + sch = rcu_dereference(scx_root); 4674 + if (unlikely(!sch)) { 4675 + rcu_read_unlock(); 4676 + return false; 4677 + } 4678 + 4679 + switch (scx_enable_state()) { 4680 + case SCX_ENABLING: 4681 + case SCX_ENABLED: 4682 + break; 4683 + default: 4684 + rcu_read_unlock(); 4685 + return false; 4686 + } 4687 + 4688 + scx_error(sch, "RCU CPU stall detected!"); 4689 + rcu_read_unlock(); 4690 + 4691 + return true; 4677 4692 } 4678 4693 4679 4694 /** ··· 5999 5944 static void sched_ext_ops__cgroup_move(struct task_struct *p, struct cgroup *from, struct cgroup *to) {} 6000 5945 static void sched_ext_ops__cgroup_cancel_move(struct task_struct *p, struct cgroup *from, struct cgroup *to) {} 6001 5946 static void sched_ext_ops__cgroup_set_weight(struct cgroup *cgrp, u32 weight) {} 5947 + static void sched_ext_ops__cgroup_set_bandwidth(struct cgroup *cgrp, u64 period_us, u64 quota_us, u64 burst_us) {} 6002 5948 #endif 6003 5949 static void sched_ext_ops__cpu_online(s32 cpu) {} 6004 5950 static void sched_ext_ops__cpu_offline(s32 cpu) {} ··· 6037 5981 .cgroup_move = sched_ext_ops__cgroup_move, 6038 5982 .cgroup_cancel_move = sched_ext_ops__cgroup_cancel_move, 6039 5983 .cgroup_set_weight = sched_ext_ops__cgroup_set_weight, 5984 + .cgroup_set_bandwidth = sched_ext_ops__cgroup_set_bandwidth, 6040 5985 #endif 6041 5986 .cpu_online = sched_ext_ops__cpu_online, 6042 5987 .cpu_offline = sched_ext_ops__cpu_offline, ··· 6395 6338 * When called from ops.dispatch(), there are no restrictions on @p or @dsq_id 6396 6339 * and this function can be called upto ops.dispatch_max_batch times to insert 6397 6340 * multiple tasks. scx_bpf_dispatch_nr_slots() returns the number of the 6398 - * remaining slots. scx_bpf_consume() flushes the batch and resets the counter. 6341 + * remaining slots. scx_bpf_dsq_move_to_local() flushes the batch and resets the 6342 + * counter. 6399 6343 * 6400 6344 * This function doesn't have any locking restrictions and may be called under 6401 6345 * BPF locks (in the future when BPF introduces more flexible locking). ··· 6418 6360 p->scx.slice = p->scx.slice ?: 1; 6419 6361 6420 6362 scx_dsq_insert_commit(p, dsq_id, enq_flags); 6421 - } 6422 - 6423 - /* for backward compatibility, will be removed in v6.15 */ 6424 - __bpf_kfunc void scx_bpf_dispatch(struct task_struct *p, u64 dsq_id, u64 slice, 6425 - u64 enq_flags) 6426 - { 6427 - printk_deferred_once(KERN_WARNING "sched_ext: scx_bpf_dispatch() renamed to scx_bpf_dsq_insert()"); 6428 - scx_bpf_dsq_insert(p, dsq_id, slice, enq_flags); 6429 6363 } 6430 6364 6431 6365 /** ··· 6457 6407 scx_dsq_insert_commit(p, dsq_id, enq_flags | SCX_ENQ_DSQ_PRIQ); 6458 6408 } 6459 6409 6460 - /* for backward compatibility, will be removed in v6.15 */ 6461 - __bpf_kfunc void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, 6462 - u64 slice, u64 vtime, u64 enq_flags) 6463 - { 6464 - printk_deferred_once(KERN_WARNING "sched_ext: scx_bpf_dispatch_vtime() renamed to scx_bpf_dsq_insert_vtime()"); 6465 - scx_bpf_dsq_insert_vtime(p, dsq_id, slice, vtime, enq_flags); 6466 - } 6467 - 6468 6410 __bpf_kfunc_end_defs(); 6469 6411 6470 6412 BTF_KFUNCS_START(scx_kfunc_ids_enqueue_dispatch) 6471 6413 BTF_ID_FLAGS(func, scx_bpf_dsq_insert, KF_RCU) 6472 6414 BTF_ID_FLAGS(func, scx_bpf_dsq_insert_vtime, KF_RCU) 6473 - BTF_ID_FLAGS(func, scx_bpf_dispatch, KF_RCU) 6474 - BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime, KF_RCU) 6475 6415 BTF_KFUNCS_END(scx_kfunc_ids_enqueue_dispatch) 6476 6416 6477 6417 static const struct btf_kfunc_id_set scx_kfunc_set_enqueue_dispatch = { ··· 6634 6594 } 6635 6595 } 6636 6596 6637 - /* for backward compatibility, will be removed in v6.15 */ 6638 - __bpf_kfunc bool scx_bpf_consume(u64 dsq_id) 6639 - { 6640 - printk_deferred_once(KERN_WARNING "sched_ext: scx_bpf_consume() renamed to scx_bpf_dsq_move_to_local()"); 6641 - return scx_bpf_dsq_move_to_local(dsq_id); 6642 - } 6643 - 6644 6597 /** 6645 6598 * scx_bpf_dsq_move_set_slice - Override slice when moving between DSQs 6646 6599 * @it__iter: DSQ iterator in progress ··· 6650 6617 6651 6618 kit->slice = slice; 6652 6619 kit->cursor.flags |= __SCX_DSQ_ITER_HAS_SLICE; 6653 - } 6654 - 6655 - /* for backward compatibility, will be removed in v6.15 */ 6656 - __bpf_kfunc void scx_bpf_dispatch_from_dsq_set_slice( 6657 - struct bpf_iter_scx_dsq *it__iter, u64 slice) 6658 - { 6659 - printk_deferred_once(KERN_WARNING "sched_ext: scx_bpf_dispatch_from_dsq_set_slice() renamed to scx_bpf_dsq_move_set_slice()"); 6660 - scx_bpf_dsq_move_set_slice(it__iter, slice); 6661 6620 } 6662 6621 6663 6622 /** ··· 6669 6644 6670 6645 kit->vtime = vtime; 6671 6646 kit->cursor.flags |= __SCX_DSQ_ITER_HAS_VTIME; 6672 - } 6673 - 6674 - /* for backward compatibility, will be removed in v6.15 */ 6675 - __bpf_kfunc void scx_bpf_dispatch_from_dsq_set_vtime( 6676 - struct bpf_iter_scx_dsq *it__iter, u64 vtime) 6677 - { 6678 - printk_deferred_once(KERN_WARNING "sched_ext: scx_bpf_dispatch_from_dsq_set_vtime() renamed to scx_bpf_dsq_move_set_vtime()"); 6679 - scx_bpf_dsq_move_set_vtime(it__iter, vtime); 6680 6647 } 6681 6648 6682 6649 /** ··· 6703 6686 p, dsq_id, enq_flags); 6704 6687 } 6705 6688 6706 - /* for backward compatibility, will be removed in v6.15 */ 6707 - __bpf_kfunc bool scx_bpf_dispatch_from_dsq(struct bpf_iter_scx_dsq *it__iter, 6708 - struct task_struct *p, u64 dsq_id, 6709 - u64 enq_flags) 6710 - { 6711 - printk_deferred_once(KERN_WARNING "sched_ext: scx_bpf_dispatch_from_dsq() renamed to scx_bpf_dsq_move()"); 6712 - return scx_bpf_dsq_move(it__iter, p, dsq_id, enq_flags); 6713 - } 6714 - 6715 6689 /** 6716 6690 * scx_bpf_dsq_move_vtime - Move a task from DSQ iteration to a PRIQ DSQ 6717 6691 * @it__iter: DSQ iterator in progress ··· 6728 6720 p, dsq_id, enq_flags | SCX_ENQ_DSQ_PRIQ); 6729 6721 } 6730 6722 6731 - /* for backward compatibility, will be removed in v6.15 */ 6732 - __bpf_kfunc bool scx_bpf_dispatch_vtime_from_dsq(struct bpf_iter_scx_dsq *it__iter, 6733 - struct task_struct *p, u64 dsq_id, 6734 - u64 enq_flags) 6735 - { 6736 - printk_deferred_once(KERN_WARNING "sched_ext: scx_bpf_dispatch_from_dsq_vtime() renamed to scx_bpf_dsq_move_vtime()"); 6737 - return scx_bpf_dsq_move_vtime(it__iter, p, dsq_id, enq_flags); 6738 - } 6739 - 6740 6723 __bpf_kfunc_end_defs(); 6741 6724 6742 6725 BTF_KFUNCS_START(scx_kfunc_ids_dispatch) 6743 6726 BTF_ID_FLAGS(func, scx_bpf_dispatch_nr_slots) 6744 6727 BTF_ID_FLAGS(func, scx_bpf_dispatch_cancel) 6745 6728 BTF_ID_FLAGS(func, scx_bpf_dsq_move_to_local) 6746 - BTF_ID_FLAGS(func, scx_bpf_consume) 6747 6729 BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_slice) 6748 6730 BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime) 6749 6731 BTF_ID_FLAGS(func, scx_bpf_dsq_move, KF_RCU) 6750 6732 BTF_ID_FLAGS(func, scx_bpf_dsq_move_vtime, KF_RCU) 6751 - BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_slice) 6752 - BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_vtime) 6753 - BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU) 6754 - BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU) 6755 6733 BTF_KFUNCS_END(scx_kfunc_ids_dispatch) 6756 6734 6757 6735 static const struct btf_kfunc_id_set scx_kfunc_set_dispatch = { ··· 6868 6874 BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime) 6869 6875 BTF_ID_FLAGS(func, scx_bpf_dsq_move, KF_RCU) 6870 6876 BTF_ID_FLAGS(func, scx_bpf_dsq_move_vtime, KF_RCU) 6871 - BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_slice) 6872 - BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_vtime) 6873 - BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU) 6874 - BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU) 6875 6877 BTF_KFUNCS_END(scx_kfunc_ids_unlocked) 6876 6878 6877 6879 static const struct btf_kfunc_id_set scx_kfunc_set_unlocked = {
+19 -1
kernel/sched/ext.h
··· 13 13 return !current->scx.kf_mask; 14 14 } 15 15 16 + static inline bool scx_rq_bypassing(struct rq *rq) 17 + { 18 + return unlikely(rq->scx.flags & SCX_RQ_BYPASSING); 19 + } 20 + 16 21 DECLARE_STATIC_KEY_FALSE(scx_ops_allow_queued_wakeup); 22 + 23 + DECLARE_PER_CPU(struct rq *, scx_locked_rq_state); 24 + 25 + /* 26 + * Return the rq currently locked from an scx callback, or NULL if no rq is 27 + * locked. 28 + */ 29 + static inline struct rq *scx_locked_rq(void) 30 + { 31 + return __this_cpu_read(scx_locked_rq_state); 32 + } 17 33 18 34 void scx_tick(struct rq *rq); 19 35 void init_scx_entity(struct sched_ext_entity *scx); ··· 81 65 82 66 #endif /* CONFIG_SCHED_CLASS_EXT */ 83 67 84 - #if defined(CONFIG_SCHED_CLASS_EXT) && defined(CONFIG_SMP) 68 + #ifdef CONFIG_SCHED_CLASS_EXT 85 69 void __scx_update_idle(struct rq *rq, bool idle, bool do_notify); 86 70 87 71 static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify) ··· 104 88 void scx_cgroup_cancel_attach(struct cgroup_taskset *tset); 105 89 void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight); 106 90 void scx_group_set_idle(struct task_group *tg, bool idle); 91 + void scx_group_set_bandwidth(struct task_group *tg, u64 period_us, u64 quota_us, u64 burst_us); 107 92 #else /* CONFIG_EXT_GROUP_SCHED */ 108 93 static inline void scx_tg_init(struct task_group *tg) {} 109 94 static inline int scx_tg_online(struct task_group *tg) { return 0; } ··· 115 98 static inline void scx_cgroup_cancel_attach(struct cgroup_taskset *tset) {} 116 99 static inline void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight) {} 117 100 static inline void scx_group_set_idle(struct task_group *tg, bool idle) {} 101 + static inline void scx_group_set_bandwidth(struct task_group *tg, u64 period_us, u64 quota_us, u64 burst_us) {} 118 102 #endif /* CONFIG_EXT_GROUP_SCHED */ 119 103 #endif /* CONFIG_CGROUP_SCHED */
+9 -36
kernel/sched/ext_idle.c
··· 17 17 /* Enable/disable per-node idle cpumasks */ 18 18 static DEFINE_STATIC_KEY_FALSE(scx_builtin_idle_per_node); 19 19 20 - #ifdef CONFIG_SMP 21 20 /* Enable/disable LLC aware optimizations */ 22 21 static DEFINE_STATIC_KEY_FALSE(scx_selcpu_topo_llc); 23 22 ··· 74 75 return cpu_to_node(cpu); 75 76 } 76 77 77 - bool scx_idle_test_and_clear_cpu(int cpu) 78 + static bool scx_idle_test_and_clear_cpu(int cpu) 78 79 { 79 80 int node = scx_cpu_node_if_enabled(cpu); 80 81 struct cpumask *idle_cpus = idle_cpumask(node)->cpu; ··· 197 198 /* 198 199 * Find an idle CPU in the system, starting from @node. 199 200 */ 200 - s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node, u64 flags) 201 + static s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node, u64 flags) 201 202 { 202 203 s32 cpu; 203 204 ··· 249 250 250 251 sd = rcu_dereference(per_cpu(sd_llc, cpu)); 251 252 if (!sd) 252 - return 0; 253 + return NULL; 253 254 254 255 return sched_domain_span(sd); 255 256 } ··· 793 794 cpumask_and(idle_cpumask(node)->smt, cpu_online_mask, node_mask); 794 795 } 795 796 } 796 - #endif /* CONFIG_SMP */ 797 797 798 798 void scx_idle_enable(struct sched_ext_ops *ops) 799 799 { ··· 806 808 else 807 809 static_branch_disable_cpuslocked(&scx_builtin_idle_per_node); 808 810 809 - #ifdef CONFIG_SMP 810 811 reset_idle_masks(ops); 811 - #endif 812 812 } 813 813 814 814 void scx_idle_disable(void) ··· 856 860 return false; 857 861 } 858 862 859 - s32 select_cpu_from_kfunc(struct task_struct *p, s32 prev_cpu, u64 wake_flags, 860 - const struct cpumask *allowed, u64 flags) 863 + static s32 select_cpu_from_kfunc(struct task_struct *p, s32 prev_cpu, u64 wake_flags, 864 + const struct cpumask *allowed, u64 flags) 861 865 { 862 866 struct rq *rq; 863 867 struct rq_flags rf; ··· 892 896 if (!rq) 893 897 lockdep_assert_held(&p->pi_lock); 894 898 895 - #ifdef CONFIG_SMP 896 899 /* 897 900 * This may also be called from ops.enqueue(), so we need to handle 898 901 * per-CPU tasks as well. For these tasks, we can skip all idle CPU ··· 908 913 cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, 909 914 allowed ?: p->cpus_ptr, flags); 910 915 } 911 - #else 912 - cpu = -EBUSY; 913 - #endif 916 + 914 917 if (scx_kf_allowed_if_unlocked()) 915 918 task_rq_unlock(rq, p, &rf); 916 919 ··· 922 929 */ 923 930 __bpf_kfunc int scx_bpf_cpu_node(s32 cpu) 924 931 { 925 - #ifdef CONFIG_NUMA 926 932 if (!kf_cpu_valid(cpu, NULL)) 927 933 return NUMA_NO_NODE; 928 934 929 935 return cpu_to_node(cpu); 930 - #else 931 - return 0; 932 - #endif 933 936 } 934 937 935 938 /** ··· 999 1010 if (node < 0) 1000 1011 return cpu_none_mask; 1001 1012 1002 - #ifdef CONFIG_SMP 1003 1013 return idle_cpumask(node)->cpu; 1004 - #else 1005 - return cpu_none_mask; 1006 - #endif 1007 1014 } 1008 1015 1009 1016 /** ··· 1019 1034 if (!check_builtin_idle_enabled()) 1020 1035 return cpu_none_mask; 1021 1036 1022 - #ifdef CONFIG_SMP 1023 1037 return idle_cpumask(NUMA_NO_NODE)->cpu; 1024 - #else 1025 - return cpu_none_mask; 1026 - #endif 1027 1038 } 1028 1039 1029 1040 /** ··· 1038 1057 if (node < 0) 1039 1058 return cpu_none_mask; 1040 1059 1041 - #ifdef CONFIG_SMP 1042 1060 if (sched_smt_active()) 1043 1061 return idle_cpumask(node)->smt; 1044 1062 else 1045 1063 return idle_cpumask(node)->cpu; 1046 - #else 1047 - return cpu_none_mask; 1048 - #endif 1049 1064 } 1050 1065 1051 1066 /** ··· 1062 1085 if (!check_builtin_idle_enabled()) 1063 1086 return cpu_none_mask; 1064 1087 1065 - #ifdef CONFIG_SMP 1066 1088 if (sched_smt_active()) 1067 1089 return idle_cpumask(NUMA_NO_NODE)->smt; 1068 1090 else 1069 1091 return idle_cpumask(NUMA_NO_NODE)->cpu; 1070 - #else 1071 - return cpu_none_mask; 1072 - #endif 1073 1092 } 1074 1093 1075 1094 /** ··· 1098 1125 if (!check_builtin_idle_enabled()) 1099 1126 return false; 1100 1127 1101 - if (kf_cpu_valid(cpu, NULL)) 1102 - return scx_idle_test_and_clear_cpu(cpu); 1103 - else 1128 + if (!kf_cpu_valid(cpu, NULL)) 1104 1129 return false; 1130 + 1131 + return scx_idle_test_and_clear_cpu(cpu); 1105 1132 } 1106 1133 1107 1134 /**
-12
kernel/sched/ext_idle.h
··· 12 12 13 13 struct sched_ext_ops; 14 14 15 - #ifdef CONFIG_SMP 16 15 void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops); 17 16 void scx_idle_init_masks(void); 18 - bool scx_idle_test_and_clear_cpu(int cpu); 19 - s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node, u64 flags); 20 - #else /* !CONFIG_SMP */ 21 - static inline void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops) {} 22 - static inline void scx_idle_init_masks(void) {} 23 - static inline bool scx_idle_test_and_clear_cpu(int cpu) { return false; } 24 - static inline s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node, u64 flags) 25 - { 26 - return -EBUSY; 27 - } 28 - #endif /* CONFIG_SMP */ 29 17 30 18 s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, 31 19 const struct cpumask *cpus_allowed, u64 flags);
+3 -6
kernel/sched/sched.h
··· 403 403 404 404 extern struct list_head task_groups; 405 405 406 - #ifdef CONFIG_CFS_BANDWIDTH 406 + #ifdef CONFIG_GROUP_SCHED_BANDWIDTH 407 407 extern const u64 max_bw_quota_period_us; 408 408 409 409 /* ··· 414 414 { 415 415 return 100000ULL; 416 416 } 417 - #endif /* CONFIG_CFS_BANDWIDTH */ 417 + #endif /* CONFIG_GROUP_SCHED_BANDWIDTH */ 418 418 419 419 struct cfs_bandwidth { 420 420 #ifdef CONFIG_CFS_BANDWIDTH ··· 472 472 struct rt_bandwidth rt_bandwidth; 473 473 #endif 474 474 475 - #ifdef CONFIG_EXT_GROUP_SCHED 476 - u32 scx_flags; /* SCX_TG_* */ 477 - u32 scx_weight; 478 - #endif 475 + struct scx_task_group scx; 479 476 480 477 struct rcu_head rcu; 481 478 struct list_head list;
+23
tools/sched_ext/scx_qmap.bpf.c
··· 615 615 taskc->force_local, taskc->core_sched_seq); 616 616 } 617 617 618 + s32 BPF_STRUCT_OPS(qmap_cgroup_init, struct cgroup *cgrp, struct scx_cgroup_init_args *args) 619 + { 620 + bpf_printk("CGRP INIT %llu weight=%u period=%lu quota=%ld burst=%lu", 621 + cgrp->kn->id, args->weight, args->bw_period_us, 622 + args->bw_quota_us, args->bw_burst_us); 623 + return 0; 624 + } 625 + 626 + void BPF_STRUCT_OPS(qmap_cgroup_set_weight, struct cgroup *cgrp, u32 weight) 627 + { 628 + bpf_printk("CGRP SET %llu weight=%u", cgrp->kn->id, weight); 629 + } 630 + 631 + void BPF_STRUCT_OPS(qmap_cgroup_set_bandwidth, struct cgroup *cgrp, 632 + u64 period_us, u64 quota_us, u64 burst_us) 633 + { 634 + bpf_printk("CGRP SET %llu period=%lu quota=%ld burst=%lu", cgrp->kn->id, 635 + period_us, quota_us, burst_us); 636 + } 637 + 618 638 /* 619 639 * Print out the online and possible CPU map using bpf_printk() as a 620 640 * demonstration of using the cpumask kfuncs and ops.cpu_on/offline(). ··· 860 840 .dump = (void *)qmap_dump, 861 841 .dump_cpu = (void *)qmap_dump_cpu, 862 842 .dump_task = (void *)qmap_dump_task, 843 + .cgroup_init = (void *)qmap_cgroup_init, 844 + .cgroup_set_weight = (void *)qmap_cgroup_set_weight, 845 + .cgroup_set_bandwidth = (void *)qmap_cgroup_set_bandwidth, 863 846 .cpu_online = (void *)qmap_cpu_online, 864 847 .cpu_offline = (void *)qmap_cpu_offline, 865 848 .init = (void *)qmap_init,
+5
tools/testing/selftests/sched_ext/maximal.bpf.c
··· 123 123 void BPF_STRUCT_OPS(maximal_cgroup_set_weight, struct cgroup *cgrp, u32 weight) 124 124 {} 125 125 126 + void BPF_STRUCT_OPS(maximal_cgroup_set_bandwidth, struct cgroup *cgrp, 127 + u64 period_us, u64 quota_us, u64 burst_us) 128 + {} 129 + 126 130 s32 BPF_STRUCT_OPS_SLEEPABLE(maximal_init) 127 131 { 128 132 return scx_bpf_create_dsq(DSQ_ID, -1); ··· 164 160 .cgroup_move = (void *) maximal_cgroup_move, 165 161 .cgroup_cancel_move = (void *) maximal_cgroup_cancel_move, 166 162 .cgroup_set_weight = (void *) maximal_cgroup_set_weight, 163 + .cgroup_set_bandwidth = (void *) maximal_cgroup_set_bandwidth, 167 164 .init = (void *) maximal_init, 168 165 .exit = (void *) maximal_exit, 169 166 .name = "maximal",