Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

selftests/sched_ext: Add cyclic SCX_KICK_WAIT stress test

Add a test that creates a 3-CPU kick_wait cycle (A->B->C->A). A BPF
scheduler kicks the next CPU in the ring with SCX_KICK_WAIT on every
enqueue while userspace workers generate continuous scheduling churn via
sched_yield(). Without the preceding fix, this hangs the machine within seconds.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Christian Loehle <christian.loehle@arm.com>
Tested-by: Christian Loehle <christian.loehle@arm.com>

Tejun Heo 090d34f0 415cb193

+263
+1
tools/testing/selftests/sched_ext/Makefile
··· 188 188 rt_stall \ 189 189 test_example \ 190 190 total_bw \ 191 + cyclic_kick_wait \ 191 192 192 193 testcase-targets := $(addsuffix .o,$(addprefix $(SCXOBJ_DIR)/,$(auto-test-targets))) 193 194
+68
tools/testing/selftests/sched_ext/cyclic_kick_wait.bpf.c
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Stress concurrent SCX_KICK_WAIT calls to reproduce wait-cycle deadlock. 4 + * 5 + * Three CPUs are designated from userspace. Every enqueue from one of the 6 + * three CPUs kicks the next CPU in the ring with SCX_KICK_WAIT, creating a 7 + * persistent A -> B -> C -> A wait cycle pressure. 8 + */ 9 + #include <scx/common.bpf.h> 10 + 11 + char _license[] SEC("license") = "GPL"; 12 + 13 + const volatile s32 test_cpu_a; 14 + const volatile s32 test_cpu_b; 15 + const volatile s32 test_cpu_c; 16 + 17 + u64 nr_enqueues; 18 + u64 nr_wait_kicks; 19 + 20 + UEI_DEFINE(uei); 21 + 22 + static s32 target_cpu(s32 cpu) 23 + { 24 + if (cpu == test_cpu_a) 25 + return test_cpu_b; 26 + if (cpu == test_cpu_b) 27 + return test_cpu_c; 28 + if (cpu == test_cpu_c) 29 + return test_cpu_a; 30 + return -1; 31 + } 32 + 33 + void BPF_STRUCT_OPS(cyclic_kick_wait_enqueue, struct task_struct *p, 34 + u64 enq_flags) 35 + { 36 + s32 this_cpu = bpf_get_smp_processor_id(); 37 + s32 tgt; 38 + 39 + __sync_fetch_and_add(&nr_enqueues, 1); 40 + 41 + if (p->flags & PF_KTHREAD) { 42 + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_INF, 43 + enq_flags | SCX_ENQ_PREEMPT); 44 + return; 45 + } 46 + 47 + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); 48 + 49 + tgt = target_cpu(this_cpu); 50 + if (tgt < 0 || tgt == this_cpu) 51 + return; 52 + 53 + __sync_fetch_and_add(&nr_wait_kicks, 1); 54 + scx_bpf_kick_cpu(tgt, SCX_KICK_WAIT); 55 + } 56 + 57 + void BPF_STRUCT_OPS(cyclic_kick_wait_exit, struct scx_exit_info *ei) 58 + { 59 + UEI_RECORD(uei, ei); 60 + } 61 + 62 + SEC(".struct_ops.link") 63 + struct sched_ext_ops cyclic_kick_wait_ops = { 64 + .enqueue = cyclic_kick_wait_enqueue, 65 + .exit = cyclic_kick_wait_exit, 66 + .name = "cyclic_kick_wait", 67 + .timeout_ms = 1000U, 68 + };
+194
tools/testing/selftests/sched_ext/cyclic_kick_wait.c
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Test SCX_KICK_WAIT forward progress under cyclic wait pressure. 4 + * 5 + * SCX_KICK_WAIT busy-waits until the target CPU enters the scheduling path. 6 + * If multiple CPUs form a wait cycle (A waits for B, B waits for C, C waits 7 + * for A), all CPUs deadlock unless the implementation breaks the cycle. 8 + * 9 + * This test creates that scenario: three CPUs are arranged in a ring. The BPF 10 + * scheduler's ops.enqueue() kicks the next CPU in the ring with SCX_KICK_WAIT 11 + * on every enqueue. Userspace pins 4 worker threads per CPU that loop calling 12 + * sched_yield(), generating a steady stream of enqueues and thus sustained 13 + * A->B->C->A kick_wait cycle pressure. The test passes if the system remains 14 + * responsive for 5 seconds without the scheduler being killed by the watchdog. 15 + */ 16 + #define _GNU_SOURCE 17 + 18 + #include <bpf/bpf.h> 19 + #include <errno.h> 20 + #include <pthread.h> 21 + #include <sched.h> 22 + #include <scx/common.h> 23 + #include <stdint.h> 24 + #include <string.h> 25 + #include <time.h> 26 + #include <unistd.h> 27 + 28 + #include "scx_test.h" 29 + #include "cyclic_kick_wait.bpf.skel.h" 30 + 31 + #define WORKERS_PER_CPU 4 32 + #define NR_TEST_CPUS 3 33 + #define NR_WORKERS (NR_TEST_CPUS * WORKERS_PER_CPU) 34 + 35 + struct worker_ctx { 36 + pthread_t tid; 37 + int cpu; 38 + volatile bool stop; 39 + volatile __u64 iters; 40 + bool started; 41 + }; 42 + 43 + static void *worker_fn(void *arg) 44 + { 45 + struct worker_ctx *worker = arg; 46 + cpu_set_t mask; 47 + 48 + CPU_ZERO(&mask); 49 + CPU_SET(worker->cpu, &mask); 50 + 51 + if (sched_setaffinity(0, sizeof(mask), &mask)) 52 + return (void *)(uintptr_t)errno; 53 + 54 + while (!worker->stop) { 55 + sched_yield(); 56 + worker->iters++; 57 + } 58 + 59 + return NULL; 60 + } 61 + 62 + static int join_worker(struct worker_ctx *worker) 63 + { 64 + void *ret; 65 + struct timespec ts; 66 + int err; 67 + 68 + if (!worker->started) 69 + return 0; 70 + 71 + if (clock_gettime(CLOCK_REALTIME, &ts)) 72 + return -errno; 73 + 74 + ts.tv_sec += 2; 75 + err = pthread_timedjoin_np(worker->tid, &ret, &ts); 76 + if (err == ETIMEDOUT) 77 + pthread_detach(worker->tid); 78 + if (err) 79 + return -err; 80 + 81 + if ((uintptr_t)ret) 82 + return -(int)(uintptr_t)ret; 83 + 84 + return 0; 85 + } 86 + 87 + static enum scx_test_status setup(void **ctx) 88 + { 89 + struct cyclic_kick_wait *skel; 90 + 91 + skel = cyclic_kick_wait__open(); 92 + SCX_FAIL_IF(!skel, "Failed to open skel"); 93 + SCX_ENUM_INIT(skel); 94 + 95 + *ctx = skel; 96 + return SCX_TEST_PASS; 97 + } 98 + 99 + static enum scx_test_status run(void *ctx) 100 + { 101 + struct cyclic_kick_wait *skel = ctx; 102 + struct worker_ctx workers[NR_WORKERS] = {}; 103 + struct bpf_link *link = NULL; 104 + enum scx_test_status status = SCX_TEST_PASS; 105 + int test_cpus[NR_TEST_CPUS]; 106 + int nr_cpus = 0; 107 + cpu_set_t mask; 108 + int ret, i; 109 + 110 + if (sched_getaffinity(0, sizeof(mask), &mask)) { 111 + SCX_ERR("Failed to get affinity (%d)", errno); 112 + return SCX_TEST_FAIL; 113 + } 114 + 115 + for (i = 0; i < CPU_SETSIZE; i++) { 116 + if (CPU_ISSET(i, &mask)) 117 + test_cpus[nr_cpus++] = i; 118 + if (nr_cpus == NR_TEST_CPUS) 119 + break; 120 + } 121 + 122 + if (nr_cpus < NR_TEST_CPUS) 123 + return SCX_TEST_SKIP; 124 + 125 + skel->rodata->test_cpu_a = test_cpus[0]; 126 + skel->rodata->test_cpu_b = test_cpus[1]; 127 + skel->rodata->test_cpu_c = test_cpus[2]; 128 + 129 + if (cyclic_kick_wait__load(skel)) { 130 + SCX_ERR("Failed to load skel"); 131 + return SCX_TEST_FAIL; 132 + } 133 + 134 + link = bpf_map__attach_struct_ops(skel->maps.cyclic_kick_wait_ops); 135 + if (!link) { 136 + SCX_ERR("Failed to attach scheduler"); 137 + return SCX_TEST_FAIL; 138 + } 139 + 140 + for (i = 0; i < NR_WORKERS; i++) 141 + workers[i].cpu = test_cpus[i / WORKERS_PER_CPU]; 142 + 143 + for (i = 0; i < NR_WORKERS; i++) { 144 + ret = pthread_create(&workers[i].tid, NULL, worker_fn, &workers[i]); 145 + if (ret) { 146 + SCX_ERR("Failed to create worker thread %d (%d)", i, ret); 147 + status = SCX_TEST_FAIL; 148 + goto out; 149 + } 150 + workers[i].started = true; 151 + } 152 + 153 + sleep(5); 154 + 155 + if (skel->data->uei.kind != EXIT_KIND(SCX_EXIT_NONE)) { 156 + SCX_ERR("Scheduler exited unexpectedly (kind=%llu code=%lld)", 157 + (unsigned long long)skel->data->uei.kind, 158 + (long long)skel->data->uei.exit_code); 159 + status = SCX_TEST_FAIL; 160 + } 161 + 162 + out: 163 + for (i = 0; i < NR_WORKERS; i++) 164 + workers[i].stop = true; 165 + 166 + for (i = 0; i < NR_WORKERS; i++) { 167 + ret = join_worker(&workers[i]); 168 + if (ret && status == SCX_TEST_PASS) { 169 + SCX_ERR("Failed to join worker thread %d (%d)", i, ret); 170 + status = SCX_TEST_FAIL; 171 + } 172 + } 173 + 174 + if (link) 175 + bpf_link__destroy(link); 176 + 177 + return status; 178 + } 179 + 180 + static void cleanup(void *ctx) 181 + { 182 + struct cyclic_kick_wait *skel = ctx; 183 + 184 + cyclic_kick_wait__destroy(skel); 185 + } 186 + 187 + struct scx_test cyclic_kick_wait = { 188 + .name = "cyclic_kick_wait", 189 + .description = "Verify SCX_KICK_WAIT forward progress under a 3-CPU wait cycle", 190 + .setup = setup, 191 + .run = run, 192 + .cleanup = cleanup, 193 + }; 194 + REGISTER_SCX_TEST(&cyclic_kick_wait)