Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sched_ext: Make scx_has_op a bitmap

scx_has_op is used to encode which ops are implemented by the BPF scheduler
into an array of static_keys. While this saves a bit of branching overhead,
that is unlikely to be noticeable compared to the overall cost. As the
global static_keys can't work with the planned hierarchical multiple
scheduler support, replace the static_key array with a bitmap.

In repeated hackbench runs before and after static_keys removal on an AMD
Ryzen 3900X, I couldn't tell any measurable performance difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Changwoo Min <changwoo@igalia.com>
Acked-by: Andrea Righi <arighi@nvidia.com>

+10 -12
+10 -12
kernel/sched/ext.c
··· 928 928 static struct sched_ext_ops scx_ops; 929 929 static bool scx_warned_zero_slice; 930 930 931 - static struct static_key_false scx_has_op[SCX_OPI_END] = 932 - { [0 ... SCX_OPI_END-1] = STATIC_KEY_FALSE_INIT }; 931 + static DECLARE_BITMAP(scx_has_op, SCX_OPI_END); 933 932 934 933 static atomic_t scx_exit_kind = ATOMIC_INIT(SCX_EXIT_DONE); 935 934 static struct scx_exit_info *scx_exit_info; ··· 1054 1055 #define scx_error(fmt, args...) \ 1055 1056 __scx_error(SCX_EXIT_ERROR, fmt, ##args) 1056 1057 1057 - #define SCX_HAS_OP(op) static_branch_likely(&scx_has_op[SCX_OP_IDX(op)]) 1058 + #define SCX_HAS_OP(op) test_bit(SCX_OP_IDX(op), scx_has_op) 1058 1059 1059 1060 static long jiffies_delta_msecs(unsigned long at, unsigned long now) 1060 1061 { ··· 1773 1774 lockdep_assert_rq_held(rq); 1774 1775 1775 1776 #ifdef CONFIG_SCHED_CORE 1776 - if (SCX_HAS_OP(core_sched_before)) 1777 + if (unlikely(SCX_HAS_OP(core_sched_before))) 1777 1778 touch_core_sched(rq, p); 1778 1779 #endif 1779 1780 } ··· 2155 2156 goto local; 2156 2157 } 2157 2158 2158 - if (!SCX_HAS_OP(enqueue)) 2159 + if (unlikely(!SCX_HAS_OP(enqueue))) 2159 2160 goto global; 2160 2161 2161 2162 /* DSQ bypass didn't trigger, enqueue on the BPF scheduler */ ··· 2971 2972 if (consume_global_dsq(rq)) 2972 2973 goto has_tasks; 2973 2974 2974 - if (!SCX_HAS_OP(dispatch) || scx_rq_bypassing(rq) || !scx_rq_online(rq)) 2975 + if (unlikely(!SCX_HAS_OP(dispatch)) || scx_rq_bypassing(rq) || !scx_rq_online(rq)) 2975 2976 goto no_tasks; 2976 2977 2977 2978 dspc->rq = rq; ··· 3372 3373 return prev_cpu; 3373 3374 3374 3375 rq_bypass = scx_rq_bypassing(task_rq(p)); 3375 - if (SCX_HAS_OP(select_cpu) && !rq_bypass) { 3376 + if (likely(SCX_HAS_OP(select_cpu)) && !rq_bypass) { 3376 3377 s32 cpu; 3377 3378 struct task_struct **ddsp_taskp; 3378 3379 ··· 4637 4638 struct task_struct *p; 4638 4639 struct rhashtable_iter rht_iter; 4639 4640 struct scx_dispatch_q *dsq; 4640 - int i, kind, cpu; 4641 + int kind, cpu; 4641 4642 4642 4643 kind = atomic_read(&scx_exit_kind); 4643 4644 while (true) { ··· 4730 4731 4731 4732 /* no task is on scx, turn off all the switches and flush in-progress calls */ 4732 4733 static_branch_disable(&__scx_enabled); 4733 - for (i = SCX_OPI_BEGIN; i < SCX_OPI_END; i++) 4734 - static_branch_disable(&scx_has_op[i]); 4734 + bitmap_zero(scx_has_op, SCX_OPI_END); 4735 4735 scx_idle_disable(); 4736 4736 synchronize_rcu(); 4737 4737 ··· 5326 5328 5327 5329 for (i = SCX_OPI_CPU_HOTPLUG_BEGIN; i < SCX_OPI_CPU_HOTPLUG_END; i++) 5328 5330 if (((void (**)(void))ops)[i]) 5329 - static_branch_enable_cpuslocked(&scx_has_op[i]); 5331 + set_bit(i, scx_has_op); 5330 5332 5331 5333 check_hotplug_seq(ops); 5332 5334 scx_idle_update_selcpu_topology(ops); ··· 5367 5369 5368 5370 for (i = SCX_OPI_NORMAL_BEGIN; i < SCX_OPI_NORMAL_END; i++) 5369 5371 if (((void (**)(void))ops)[i]) 5370 - static_branch_enable(&scx_has_op[i]); 5372 + set_bit(i, scx_has_op); 5371 5373 5372 5374 if (scx_ops.cpu_acquire || scx_ops.cpu_release) 5373 5375 scx_ops.flags |= SCX_OPS_HAS_CPU_PREEMPT;