Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sched_ext: Use shorter slice in bypass mode

There have been reported cases of bypass mode not making forward progress fast
enough. The 20ms default slice is unnecessarily long for bypass mode where the
primary goal is ensuring all tasks can make forward progress.

Introduce SCX_SLICE_BYPASS set to 5ms and make the scheduler automatically
switch to it when entering bypass mode. Also make the bypass slice value
tunable through the slice_bypass_us module parameter (adjustable between 100us
and 100ms) to make it easier to test whether slice durations are a factor in
problem cases.

v3: Use READ_ONCE/WRITE_ONCE for scx_slice_dfl access (Dan).

v2: Removed slice_dfl_us module parameter. Fixed typos (Andrea).

Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
Cc: Dan Schatzberg <schatzberg.dan@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

Tejun Heo bfd3749d 5a629ecb

+42 -3
+11
include/linux/sched/ext.h
··· 17 17 enum scx_public_consts { 18 18 SCX_OPS_NAME_LEN = 128, 19 19 20 + /* 21 + * %SCX_SLICE_DFL is used to refill slices when the BPF scheduler misses 22 + * to set the slice for a task that is selected for execution. 23 + * %SCX_EV_REFILL_SLICE_DFL counts the number of times the default slice 24 + * refill has been triggered. 25 + * 26 + * %SCX_SLICE_BYPASS is used as the slice for all tasks in the bypass 27 + * mode. As making forward progress for all tasks is the main goal of 28 + * the bypass mode, a shorter slice is used. 29 + */ 20 30 SCX_SLICE_DFL = 20 * 1000000, /* 20ms */ 31 + SCX_SLICE_BYPASS = 5 * 1000000, /* 5ms */ 21 32 SCX_SLICE_INF = U64_MAX, /* infinite, implies nohz */ 22 33 }; 23 34
+31 -3
kernel/sched/ext.c
··· 143 143 /* /sys/kernel/sched_ext interface */ 144 144 static struct kset *scx_kset; 145 145 146 + /* 147 + * Parameters that can be adjusted through /sys/module/sched_ext/parameters. 148 + * There usually is no reason to modify these as normal scheduler operation 149 + * shouldn't be affected by them. The knobs are primarily for debugging. 150 + */ 151 + static u64 scx_slice_dfl = SCX_SLICE_DFL; 152 + static unsigned int scx_slice_bypass_us = SCX_SLICE_BYPASS / NSEC_PER_USEC; 153 + 154 + static int set_slice_us(const char *val, const struct kernel_param *kp) 155 + { 156 + return param_set_uint_minmax(val, kp, 100, 100 * USEC_PER_MSEC); 157 + } 158 + 159 + static const struct kernel_param_ops slice_us_param_ops = { 160 + .set = set_slice_us, 161 + .get = param_get_uint, 162 + }; 163 + 164 + #undef MODULE_PARAM_PREFIX 165 + #define MODULE_PARAM_PREFIX "sched_ext." 166 + 167 + module_param_cb(slice_bypass_us, &slice_us_param_ops, &scx_slice_bypass_us, 0600); 168 + MODULE_PARM_DESC(slice_bypass_us, "bypass slice in microseconds, applied on [un]load (100us to 100ms)"); 169 + 170 + #undef MODULE_PARAM_PREFIX 171 + 146 172 #define CREATE_TRACE_POINTS 147 173 #include <trace/events/sched_ext.h> 148 174 ··· 945 919 946 920 static void refill_task_slice_dfl(struct scx_sched *sch, struct task_struct *p) 947 921 { 948 - p->scx.slice = SCX_SLICE_DFL; 922 + p->scx.slice = READ_ONCE(scx_slice_dfl); 949 923 __scx_add_event(sch, SCX_EV_REFILL_SLICE_DFL, 1); 950 924 } 951 925 ··· 2922 2896 INIT_LIST_HEAD(&scx->runnable_node); 2923 2897 scx->runnable_at = jiffies; 2924 2898 scx->ddsp_dsq_id = SCX_DSQ_INVALID; 2925 - scx->slice = SCX_SLICE_DFL; 2899 + scx->slice = READ_ONCE(scx_slice_dfl); 2926 2900 } 2927 2901 2928 2902 void scx_pre_fork(struct task_struct *p) ··· 3800 3774 WARN_ON_ONCE(scx_bypass_depth <= 0); 3801 3775 if (scx_bypass_depth != 1) 3802 3776 goto unlock; 3777 + WRITE_ONCE(scx_slice_dfl, scx_slice_bypass_us * NSEC_PER_USEC); 3803 3778 bypass_timestamp = ktime_get_ns(); 3804 3779 if (sch) 3805 3780 scx_add_event(sch, SCX_EV_BYPASS_ACTIVATE, 1); ··· 3809 3782 WARN_ON_ONCE(scx_bypass_depth < 0); 3810 3783 if (scx_bypass_depth != 0) 3811 3784 goto unlock; 3785 + WRITE_ONCE(scx_slice_dfl, SCX_SLICE_DFL); 3812 3786 if (sch) 3813 3787 scx_add_event(sch, SCX_EV_BYPASS_DURATION, 3814 3788 ktime_get_ns() - bypass_timestamp); ··· 4808 4780 queue_flags |= DEQUEUE_CLASS; 4809 4781 4810 4782 scoped_guard (sched_change, p, queue_flags) { 4811 - p->scx.slice = SCX_SLICE_DFL; 4783 + p->scx.slice = READ_ONCE(scx_slice_dfl); 4812 4784 p->sched_class = new_class; 4813 4785 } 4814 4786 }