Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'sched_ext-for-7.0-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext

Pull sched_ext fixes from Tejun Heo:

- Various bug fixes for the example schedulers and selftests

* tag 'sched_ext-for-7.0-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
tools/sched_ext: fix getopt not re-parsed on restart
tools/sched_ext: scx_userland: fix data races on shared counters
tools/sched_ext: scx_pair: fix stride == 0 crash on single-CPU systems
tools/sched_ext: scx_central: fix CPU_SET and skeleton leak on early exit
tools/sched_ext: scx_userland: fix stale data on restart
tools/sched_ext: scx_flatcg: fix potential stack overflow from VLA in fcg_read_stats
selftests/sched_ext: Fix rt_stall flaky failure
tools/sched_ext: scx_userland: fix restart and stats thread lifecycle bugs
tools/sched_ext: scx_central: fix sched_setaffinity() call with the set size
tools/sched_ext: scx_flatcg: zero-initialize stats counter array

+96 -18
+7 -3
tools/sched_ext/scx_central.c
··· 50 50 __u64 seq = 0, ecode; 51 51 __s32 opt; 52 52 cpu_set_t *cpuset; 53 + size_t cpuset_size; 53 54 54 55 libbpf_set_print(libbpf_print_fn); 55 56 signal(SIGINT, sigint_handler); 56 57 signal(SIGTERM, sigint_handler); 57 58 restart: 59 + optind = 1; 58 60 skel = SCX_OPS_OPEN(central_ops, scx_central); 59 61 60 62 skel->rodata->central_cpu = 0; ··· 75 73 u32 central_cpu = strtoul(optarg, NULL, 0); 76 74 if (central_cpu >= skel->rodata->nr_cpu_ids) { 77 75 fprintf(stderr, "invalid central CPU id value, %u given (%u max)\n", central_cpu, skel->rodata->nr_cpu_ids); 76 + scx_central__destroy(skel); 78 77 return -1; 79 78 } 80 79 skel->rodata->central_cpu = (s32)central_cpu; ··· 109 106 */ 110 107 cpuset = CPU_ALLOC(skel->rodata->nr_cpu_ids); 111 108 SCX_BUG_ON(!cpuset, "Failed to allocate cpuset"); 112 - CPU_ZERO_S(CPU_ALLOC_SIZE(skel->rodata->nr_cpu_ids), cpuset); 113 - CPU_SET(skel->rodata->central_cpu, cpuset); 114 - SCX_BUG_ON(sched_setaffinity(0, sizeof(*cpuset), cpuset), 109 + cpuset_size = CPU_ALLOC_SIZE(skel->rodata->nr_cpu_ids); 110 + CPU_ZERO_S(cpuset_size, cpuset); 111 + CPU_SET_S(skel->rodata->central_cpu, cpuset_size, cpuset); 112 + SCX_BUG_ON(sched_setaffinity(0, cpuset_size, cpuset), 115 113 "Failed to affinitize to central CPU %d (max %d)", 116 114 skel->rodata->central_cpu, skel->rodata->nr_cpu_ids - 1); 117 115 CPU_FREE(cpuset);
+1
tools/sched_ext/scx_cpu0.c
··· 69 69 signal(SIGINT, sigint_handler); 70 70 signal(SIGTERM, sigint_handler); 71 71 restart: 72 + optind = 1; 72 73 skel = SCX_OPS_OPEN(cpu0_ops, scx_cpu0); 73 74 74 75 skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+10 -3
tools/sched_ext/scx_flatcg.c
··· 102 102 103 103 static void fcg_read_stats(struct scx_flatcg *skel, __u64 *stats) 104 104 { 105 - __u64 cnts[FCG_NR_STATS][skel->rodata->nr_cpus]; 105 + __u64 *cnts; 106 106 __u32 idx; 107 + 108 + cnts = calloc(skel->rodata->nr_cpus, sizeof(__u64)); 109 + if (!cnts) 110 + return; 107 111 108 112 memset(stats, 0, sizeof(stats[0]) * FCG_NR_STATS); 109 113 ··· 115 111 int ret, cpu; 116 112 117 113 ret = bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats), 118 - &idx, cnts[idx]); 114 + &idx, cnts); 119 115 if (ret < 0) 120 116 continue; 121 117 for (cpu = 0; cpu < skel->rodata->nr_cpus; cpu++) 122 - stats[idx] += cnts[idx][cpu]; 118 + stats[idx] += cnts[cpu]; 123 119 } 120 + 121 + free(cnts); 124 122 } 125 123 126 124 int main(int argc, char **argv) ··· 141 135 signal(SIGINT, sigint_handler); 142 136 signal(SIGTERM, sigint_handler); 143 137 restart: 138 + optind = 1; 144 139 skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg); 145 140 146 141 skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+7 -1
tools/sched_ext/scx_pair.c
··· 53 53 signal(SIGINT, sigint_handler); 54 54 signal(SIGTERM, sigint_handler); 55 55 restart: 56 + optind = 1; 56 57 skel = SCX_OPS_OPEN(pair_ops, scx_pair); 57 58 58 59 skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus(); 59 - assert(skel->rodata->nr_cpu_ids > 0); 60 60 skel->rodata->pair_batch_dur_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL"); 61 61 62 62 /* pair up the earlier half to the latter by default, override with -s */ ··· 76 76 } 77 77 } 78 78 79 + /* Stride must be positive to pair distinct CPUs. */ 80 + if (stride <= 0) { 81 + fprintf(stderr, "Invalid stride %d, must be positive\n", stride); 82 + scx_pair__destroy(skel); 83 + return -1; 84 + } 79 85 bpf_map__set_max_entries(skel->maps.pair_ctx, skel->rodata->nr_cpu_ids / 2); 80 86 81 87 /* Resize arrays so their element count is equal to cpu count. */
+1
tools/sched_ext/scx_sdt.c
··· 51 51 signal(SIGINT, sigint_handler); 52 52 signal(SIGTERM, sigint_handler); 53 53 restart: 54 + optind = 1; 54 55 skel = SCX_OPS_OPEN(sdt_ops, scx_sdt); 55 56 56 57 while ((opt = getopt(argc, argv, "fvh")) != -1) {
+1
tools/sched_ext/scx_simple.c
··· 71 71 signal(SIGINT, sigint_handler); 72 72 signal(SIGTERM, sigint_handler); 73 73 restart: 74 + optind = 1; 74 75 skel = SCX_OPS_OPEN(simple_ops, scx_simple); 75 76 76 77 while ((opt = getopt(argc, argv, "fvh")) != -1) {
+20 -11
tools/sched_ext/scx_userland.c
··· 54 54 static volatile int exit_req; 55 55 static int enqueued_fd, dispatched_fd; 56 56 57 + static pthread_t stats_printer; 57 58 static struct scx_userland *skel; 58 59 static struct bpf_link *ops_link; 59 60 ··· 157 156 158 157 err = bpf_map_update_elem(dispatched_fd, NULL, &pid, 0); 159 158 if (err) { 160 - nr_vruntime_failed++; 159 + __atomic_add_fetch(&nr_vruntime_failed, 1, __ATOMIC_RELAXED); 161 160 } else { 162 - nr_vruntime_dispatches++; 161 + __atomic_add_fetch(&nr_vruntime_dispatches, 1, __ATOMIC_RELAXED); 163 162 } 164 163 165 164 return err; ··· 202 201 return ENOENT; 203 202 204 203 update_enqueued(curr, bpf_task); 205 - nr_vruntime_enqueues++; 206 - nr_curr_enqueued++; 204 + __atomic_add_fetch(&nr_vruntime_enqueues, 1, __ATOMIC_RELAXED); 205 + __atomic_add_fetch(&nr_curr_enqueued, 1, __ATOMIC_RELAXED); 207 206 208 207 /* 209 208 * Enqueue the task in a vruntime-sorted list. A more optimal data ··· 279 278 LIST_INSERT_HEAD(&vruntime_head, task, entries); 280 279 break; 281 280 } 282 - nr_curr_enqueued--; 281 + __atomic_sub_fetch(&nr_curr_enqueued, 1, __ATOMIC_RELAXED); 283 282 } 284 - skel->bss->nr_scheduled = nr_curr_enqueued; 283 + skel->bss->nr_scheduled = __atomic_load_n(&nr_curr_enqueued, __ATOMIC_RELAXED); 285 284 } 286 285 287 286 static void *run_stats_printer(void *arg) ··· 306 305 printf("|-----------------------|\n"); 307 306 printf("| VRUNTIME / USER |\n"); 308 307 printf("|-----------------------|\n"); 309 - printf("| enq: %10llu |\n", nr_vruntime_enqueues); 310 - printf("| disp: %10llu |\n", nr_vruntime_dispatches); 311 - printf("| failed: %10llu |\n", nr_vruntime_failed); 308 + printf("| enq: %10llu |\n", __atomic_load_n(&nr_vruntime_enqueues, __ATOMIC_RELAXED)); 309 + printf("| disp: %10llu |\n", __atomic_load_n(&nr_vruntime_dispatches, __ATOMIC_RELAXED)); 310 + printf("| failed: %10llu |\n", __atomic_load_n(&nr_vruntime_failed, __ATOMIC_RELAXED)); 312 311 printf("o-----------------------o\n"); 313 312 printf("\n\n"); 314 313 fflush(stdout); ··· 320 319 321 320 static int spawn_stats_thread(void) 322 321 { 323 - pthread_t stats_printer; 324 - 325 322 return pthread_create(&stats_printer, NULL, run_stats_printer, NULL); 326 323 } 327 324 ··· 374 375 375 376 static void bootstrap(char *comm) 376 377 { 378 + exit_req = 0; 379 + min_vruntime = 0.0; 380 + __atomic_store_n(&nr_vruntime_enqueues, 0, __ATOMIC_RELAXED); 381 + __atomic_store_n(&nr_vruntime_dispatches, 0, __ATOMIC_RELAXED); 382 + __atomic_store_n(&nr_vruntime_failed, 0, __ATOMIC_RELAXED); 383 + __atomic_store_n(&nr_curr_enqueued, 0, __ATOMIC_RELAXED); 384 + memset(tasks, 0, pid_max * sizeof(*tasks)); 385 + LIST_INIT(&vruntime_head); 386 + 377 387 skel = SCX_OPS_OPEN(userland_ops, scx_userland); 378 388 379 389 skel->rodata->num_possible_cpus = libbpf_num_possible_cpus(); ··· 436 428 437 429 exit_req = 1; 438 430 bpf_link__destroy(ops_link); 431 + pthread_join(stats_printer, NULL); 439 432 ecode = UEI_REPORT(skel, uei); 440 433 scx_userland__destroy(skel); 441 434
+49
tools/testing/selftests/sched_ext/rt_stall.c
··· 23 23 #define CORE_ID 0 /* CPU to pin tasks to */ 24 24 #define RUN_TIME 5 /* How long to run the test in seconds */ 25 25 26 + /* Signal the parent that setup is complete by writing to a pipe */ 27 + static void signal_ready(int fd) 28 + { 29 + char c = 1; 30 + 31 + if (write(fd, &c, 1) != 1) { 32 + perror("write to ready pipe"); 33 + exit(EXIT_FAILURE); 34 + } 35 + close(fd); 36 + } 37 + 38 + /* Wait for a child to signal readiness via a pipe */ 39 + static void wait_ready(int fd) 40 + { 41 + char c; 42 + 43 + if (read(fd, &c, 1) != 1) { 44 + perror("read from ready pipe"); 45 + exit(EXIT_FAILURE); 46 + } 47 + close(fd); 48 + } 49 + 26 50 /* Simple busy-wait function for test tasks */ 27 51 static void process_func(void) 28 52 { ··· 146 122 147 123 float ext_runtime, rt_runtime, actual_ratio; 148 124 int ext_pid, rt_pid; 125 + int ext_ready[2], rt_ready[2]; 149 126 150 127 ksft_print_header(); 151 128 ksft_set_plan(1); 152 129 130 + if (pipe(ext_ready) || pipe(rt_ready)) { 131 + perror("pipe"); 132 + ksft_exit_fail(); 133 + } 134 + 153 135 /* Create and set up a EXT task */ 154 136 ext_pid = fork(); 155 137 if (ext_pid == 0) { 138 + close(ext_ready[0]); 139 + close(rt_ready[0]); 140 + close(rt_ready[1]); 156 141 set_affinity(CORE_ID); 142 + signal_ready(ext_ready[1]); 157 143 process_func(); 158 144 exit(0); 159 145 } else if (ext_pid < 0) { ··· 174 140 /* Create an RT task */ 175 141 rt_pid = fork(); 176 142 if (rt_pid == 0) { 143 + close(ext_ready[0]); 144 + close(ext_ready[1]); 145 + close(rt_ready[0]); 177 146 set_affinity(CORE_ID); 178 147 set_sched(SCHED_FIFO, 50); 148 + signal_ready(rt_ready[1]); 179 149 process_func(); 180 150 exit(0); 181 151 } else if (rt_pid < 0) { 182 152 perror("fork for RT task"); 183 153 ksft_exit_fail(); 184 154 } 155 + 156 + /* 157 + * Wait for both children to complete their setup (affinity and 158 + * scheduling policy) before starting the measurement window. 159 + * This prevents flaky failures caused by the RT child's setup 160 + * time eating into the measurement period. 161 + */ 162 + close(ext_ready[1]); 163 + close(rt_ready[1]); 164 + wait_ready(ext_ready[0]); 165 + wait_ready(rt_ready[0]); 185 166 186 167 /* Let the processes run for the specified time */ 187 168 sleep(RUN_TIME);