Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sched_ext: Add per-CPU data to DSQs

Add per-CPU data structure to dispatch queues. Each DSQ now has a percpu
scx_dsq_pcpu which contains a back-pointer to the DSQ. This will be used by
future changes to implement per-CPU reenqueue tracking for user DSQs.

init_dsq() now allocates the percpu data and can fail, so it returns an
error code. All callers are updated to handle failures. exit_dsq() is added
to free the percpu data and is called from all DSQ cleanup paths.

In scx_bpf_create_dsq(), init_dsq() is called before rcu_read_lock() since
alloc_percpu() requires GFP_KERNEL context, and dsq->sched is set
afterwards.

v2: Fix err_free_pcpu to only exit_dsq() initialized bypass DSQs (Andrea
Righi).

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andrea Righi <arighi@nvidia.com>

Tejun Heo 30b05153 ffa7ae07

+77 -15
+5
include/linux/sched/ext.h
··· 62 62 SCX_DSQ_LOCAL_CPU_MASK = 0xffffffffLLU, 63 63 }; 64 64 65 + struct scx_dsq_pcpu { 66 + struct scx_dispatch_q *dsq; 67 + }; 68 + 65 69 /* 66 70 * A dispatch queue (DSQ) can be either a FIFO or p->scx.dsq_vtime ordered 67 71 * queue. A built-in DSQ is always a FIFO. The built-in local DSQs are used to ··· 83 79 struct rhash_head hash_node; 84 80 struct llist_node free_node; 85 81 struct scx_sched *sched; 82 + struct scx_dsq_pcpu __percpu *pcpu; 86 83 struct rcu_head rcu; 87 84 }; 88 85
+72 -15
kernel/sched/ext.c
··· 4020 4020 #endif 4021 4021 }; 4022 4022 4023 - static void init_dsq(struct scx_dispatch_q *dsq, u64 dsq_id, 4024 - struct scx_sched *sch) 4023 + static s32 init_dsq(struct scx_dispatch_q *dsq, u64 dsq_id, 4024 + struct scx_sched *sch) 4025 4025 { 4026 + s32 cpu; 4027 + 4026 4028 memset(dsq, 0, sizeof(*dsq)); 4027 4029 4028 4030 raw_spin_lock_init(&dsq->lock); 4029 4031 INIT_LIST_HEAD(&dsq->list); 4030 4032 dsq->id = dsq_id; 4031 4033 dsq->sched = sch; 4034 + 4035 + dsq->pcpu = alloc_percpu(struct scx_dsq_pcpu); 4036 + if (!dsq->pcpu) 4037 + return -ENOMEM; 4038 + 4039 + for_each_possible_cpu(cpu) { 4040 + struct scx_dsq_pcpu *pcpu = per_cpu_ptr(dsq->pcpu, cpu); 4041 + 4042 + pcpu->dsq = dsq; 4043 + } 4044 + 4045 + return 0; 4046 + } 4047 + 4048 + static void exit_dsq(struct scx_dispatch_q *dsq) 4049 + { 4050 + free_percpu(dsq->pcpu); 4051 + } 4052 + 4053 + static void free_dsq_rcufn(struct rcu_head *rcu) 4054 + { 4055 + struct scx_dispatch_q *dsq = container_of(rcu, struct scx_dispatch_q, rcu); 4056 + 4057 + exit_dsq(dsq); 4058 + kfree(dsq); 4032 4059 } 4033 4060 4034 4061 static void free_dsq_irq_workfn(struct irq_work *irq_work) ··· 4064 4037 struct scx_dispatch_q *dsq, *tmp_dsq; 4065 4038 4066 4039 llist_for_each_entry_safe(dsq, tmp_dsq, to_free, free_node) 4067 - kfree_rcu(dsq, rcu); 4040 + call_rcu(&dsq->rcu, free_dsq_rcufn); 4068 4041 } 4069 4042 4070 4043 static DEFINE_IRQ_WORK(free_dsq_irq_work, free_dsq_irq_workfn); ··· 4261 4234 cgroup_put(sch_cgroup(sch)); 4262 4235 #endif /* CONFIG_EXT_SUB_SCHED */ 4263 4236 4264 - /* 4265 - * $sch would have entered bypass mode before the RCU grace period. As 4266 - * that blocks new deferrals, all deferred_reenq_local_node's must be 4267 - * off-list by now. 4268 - */ 4269 4237 for_each_possible_cpu(cpu) { 4270 4238 struct scx_sched_pcpu *pcpu = per_cpu_ptr(sch->pcpu, cpu); 4271 4239 4240 + /* 4241 + * $sch would have entered bypass mode before the RCU grace 4242 + * period. As that blocks new deferrals, all 4243 + * deferred_reenq_local_node's must be off-list by now. 4244 + */ 4272 4245 WARN_ON_ONCE(!list_empty(&pcpu->deferred_reenq_local.node)); 4246 + 4247 + exit_dsq(bypass_dsq(sch, cpu)); 4273 4248 } 4274 4249 4275 4250 free_percpu(sch->pcpu); ··· 5816 5787 5817 5788 static void free_pnode(struct scx_sched_pnode *pnode) 5818 5789 { 5790 + if (!pnode) 5791 + return; 5792 + exit_dsq(&pnode->global_dsq); 5819 5793 kfree(pnode); 5820 5794 } 5821 5795 ··· 5830 5798 if (!pnode) 5831 5799 return NULL; 5832 5800 5833 - init_dsq(&pnode->global_dsq, SCX_DSQ_GLOBAL, sch); 5801 + if (init_dsq(&pnode->global_dsq, SCX_DSQ_GLOBAL, sch)) { 5802 + kfree(pnode); 5803 + return NULL; 5804 + } 5834 5805 5835 5806 return pnode; 5836 5807 } ··· 5844 5809 { 5845 5810 struct scx_sched *sch; 5846 5811 s32 level = parent ? parent->level + 1 : 0; 5847 - s32 node, cpu, ret; 5812 + s32 node, cpu, ret, bypass_fail_cpu = nr_cpu_ids; 5848 5813 5849 5814 sch = kzalloc_flex(*sch, ancestors, level); 5850 5815 if (!sch) ··· 5883 5848 goto err_free_pnode; 5884 5849 } 5885 5850 5886 - for_each_possible_cpu(cpu) 5887 - init_dsq(bypass_dsq(sch, cpu), SCX_DSQ_BYPASS, sch); 5851 + for_each_possible_cpu(cpu) { 5852 + ret = init_dsq(bypass_dsq(sch, cpu), SCX_DSQ_BYPASS, sch); 5853 + if (ret) { 5854 + bypass_fail_cpu = cpu; 5855 + goto err_free_pcpu; 5856 + } 5857 + } 5888 5858 5889 5859 for_each_possible_cpu(cpu) { 5890 5860 struct scx_sched_pcpu *pcpu = per_cpu_ptr(sch->pcpu, cpu); ··· 5971 5931 err_stop_helper: 5972 5932 kthread_destroy_worker(sch->helper); 5973 5933 err_free_pcpu: 5934 + for_each_possible_cpu(cpu) { 5935 + if (cpu == bypass_fail_cpu) 5936 + break; 5937 + exit_dsq(bypass_dsq(sch, cpu)); 5938 + } 5974 5939 free_percpu(sch->pcpu); 5975 5940 err_free_pnode: 5976 5941 for_each_node_state(node, N_POSSIBLE) ··· 7218 7173 int n = cpu_to_node(cpu); 7219 7174 7220 7175 /* local_dsq's sch will be set during scx_root_enable() */ 7221 - init_dsq(&rq->scx.local_dsq, SCX_DSQ_LOCAL, NULL); 7176 + BUG_ON(init_dsq(&rq->scx.local_dsq, SCX_DSQ_LOCAL, NULL)); 7222 7177 7223 7178 INIT_LIST_HEAD(&rq->scx.runnable_list); 7224 7179 INIT_LIST_HEAD(&rq->scx.ddsp_deferred_locals); ··· 7917 7872 if (!dsq) 7918 7873 return -ENOMEM; 7919 7874 7875 + /* 7876 + * init_dsq() must be called in GFP_KERNEL context. Init it with NULL 7877 + * @sch and update afterwards. 7878 + */ 7879 + ret = init_dsq(dsq, dsq_id, NULL); 7880 + if (ret) { 7881 + kfree(dsq); 7882 + return ret; 7883 + } 7884 + 7920 7885 rcu_read_lock(); 7921 7886 7922 7887 sch = scx_prog_sched(aux); 7923 7888 if (sch) { 7924 - init_dsq(dsq, dsq_id, sch); 7889 + dsq->sched = sch; 7925 7890 ret = rhashtable_lookup_insert_fast(&sch->dsq_hash, &dsq->hash_node, 7926 7891 dsq_hash_params); 7927 7892 } else { ··· 7939 7884 } 7940 7885 7941 7886 rcu_read_unlock(); 7942 - if (ret) 7887 + if (ret) { 7888 + exit_dsq(dsq); 7943 7889 kfree(dsq); 7890 + } 7944 7891 return ret; 7945 7892 } 7946 7893