Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'general-enhancements-to-rqspinlock-stress-test'

Kumar Kartikeya Dwivedi says:

====================
General enhancements to rqspinlock stress test

Three enchancements, details in commit messages.

First, the CPU requirements are 2 for AA, 3 for ABBA, and 4 for ABBCCA,
hence relax the check during module initialization. Second, add a
per-CPU histogram to capture lock acquisition times to record which
buckets these acquisitions fall into for the normal task context and NMI
context. Anything below 10ms is not printed in detail, but above that
displays the full breakdown for each context. Finally, make the delay of
the NMI and task contexts configurable, set to 10 and 20 ms respectively
by default.
====================

Link: https://patch.msgid.link/20251125020749.2421610-1-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+117 -3
+117 -3
tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c
··· 5 5 #include <linux/delay.h> 6 6 #include <linux/module.h> 7 7 #include <linux/prandom.h> 8 + #include <linux/ktime.h> 8 9 #include <asm/rqspinlock.h> 9 10 #include <linux/perf_event.h> 10 11 #include <linux/kthread.h> ··· 25 24 static rqspinlock_t lock_b; 26 25 static rqspinlock_t lock_c; 27 26 27 + #define RQSL_SLOW_THRESHOLD_MS 10 28 + static const unsigned int rqsl_hist_ms[] = { 29 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 30 + 12, 14, 16, 18, 20, 25, 30, 40, 50, 75, 31 + 100, 150, 200, 250, 1000, 32 + }; 33 + #define RQSL_NR_HIST_BUCKETS ARRAY_SIZE(rqsl_hist_ms) 34 + 35 + struct rqsl_cpu_hist { 36 + atomic64_t normal[RQSL_NR_HIST_BUCKETS]; 37 + atomic64_t nmi[RQSL_NR_HIST_BUCKETS]; 38 + }; 39 + 40 + static DEFINE_PER_CPU(struct rqsl_cpu_hist, rqsl_cpu_hists); 41 + 28 42 enum rqsl_mode { 29 43 RQSL_MODE_AA = 0, 30 44 RQSL_MODE_ABBA, ··· 50 34 module_param(test_mode, int, 0644); 51 35 MODULE_PARM_DESC(test_mode, 52 36 "rqspinlock test mode: 0 = AA, 1 = ABBA, 2 = ABBCCA"); 37 + 38 + static int normal_delay = 20; 39 + module_param(normal_delay, int, 0644); 40 + MODULE_PARM_DESC(normal_delay, 41 + "rqspinlock critical section length for normal context (20ms default)"); 42 + 43 + static int nmi_delay = 10; 44 + module_param(nmi_delay, int, 0644); 45 + MODULE_PARM_DESC(nmi_delay, 46 + "rqspinlock critical section length for NMI context (10ms default)"); 53 47 54 48 static struct perf_event **rqsl_evts; 55 49 static int rqsl_nevts; ··· 105 79 } 106 80 } 107 81 82 + static u32 rqsl_hist_bucket_idx(u32 delta_ms) 83 + { 84 + int i; 85 + 86 + for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) { 87 + if (delta_ms <= rqsl_hist_ms[i]) 88 + return i; 89 + } 90 + 91 + return RQSL_NR_HIST_BUCKETS - 1; 92 + } 93 + 94 + static void rqsl_record_lock_time(u64 delta_ns, bool is_nmi) 95 + { 96 + struct rqsl_cpu_hist *hist = this_cpu_ptr(&rqsl_cpu_hists); 97 + u32 delta_ms = DIV_ROUND_UP_ULL(delta_ns, NSEC_PER_MSEC); 98 + u32 bucket = rqsl_hist_bucket_idx(delta_ms); 99 + atomic64_t *buckets = is_nmi ? hist->nmi : hist->normal; 100 + 101 + atomic64_inc(&buckets[bucket]); 102 + } 103 + 108 104 static int rqspinlock_worker_fn(void *arg) 109 105 { 110 106 int cpu = smp_processor_id(); 111 107 unsigned long flags; 108 + u64 start_ns; 112 109 int ret; 113 110 114 111 if (cpu) { ··· 145 96 msleep(1000); 146 97 continue; 147 98 } 99 + start_ns = ktime_get_mono_fast_ns(); 148 100 ret = raw_res_spin_lock_irqsave(worker_lock, flags); 149 - mdelay(20); 101 + rqsl_record_lock_time(ktime_get_mono_fast_ns() - start_ns, false); 102 + mdelay(normal_delay); 150 103 if (!ret) 151 104 raw_res_spin_unlock_irqrestore(worker_lock, flags); 152 105 cpu_relax(); ··· 181 130 struct rqsl_lock_pair locks; 182 131 int cpu = smp_processor_id(); 183 132 unsigned long flags; 133 + u64 start_ns; 184 134 int ret; 185 135 186 136 if (!cpu || READ_ONCE(pause)) 187 137 return; 188 138 189 139 locks = rqsl_get_lock_pair(cpu); 140 + start_ns = ktime_get_mono_fast_ns(); 190 141 ret = raw_res_spin_lock_irqsave(locks.nmi_lock, flags); 142 + rqsl_record_lock_time(ktime_get_mono_fast_ns() - start_ns, true); 191 143 192 - mdelay(10); 144 + mdelay(nmi_delay); 193 145 194 146 if (!ret) 195 147 raw_res_spin_unlock_irqrestore(locks.nmi_lock, flags); ··· 236 182 237 183 pr_err("Mode = %s\n", rqsl_mode_names[test_mode]); 238 184 239 - if (ncpus < 3) 185 + if (ncpus < test_mode + 2) 240 186 return -ENOTSUPP; 241 187 242 188 raw_res_spin_lock_init(&lock_a); ··· 289 235 290 236 module_init(bpf_test_rqspinlock_init); 291 237 238 + static void rqsl_print_histograms(void) 239 + { 240 + int cpu, i; 241 + 242 + pr_err("rqspinlock acquisition latency histogram (ms):\n"); 243 + 244 + for_each_online_cpu(cpu) { 245 + struct rqsl_cpu_hist *hist = per_cpu_ptr(&rqsl_cpu_hists, cpu); 246 + u64 norm_counts[RQSL_NR_HIST_BUCKETS]; 247 + u64 nmi_counts[RQSL_NR_HIST_BUCKETS]; 248 + u64 total_counts[RQSL_NR_HIST_BUCKETS]; 249 + u64 norm_total = 0, nmi_total = 0, total = 0; 250 + bool has_slow = false; 251 + 252 + for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) { 253 + norm_counts[i] = atomic64_read(&hist->normal[i]); 254 + nmi_counts[i] = atomic64_read(&hist->nmi[i]); 255 + total_counts[i] = norm_counts[i] + nmi_counts[i]; 256 + norm_total += norm_counts[i]; 257 + nmi_total += nmi_counts[i]; 258 + total += total_counts[i]; 259 + if (rqsl_hist_ms[i] > RQSL_SLOW_THRESHOLD_MS && 260 + total_counts[i]) 261 + has_slow = true; 262 + } 263 + 264 + if (!total) 265 + continue; 266 + 267 + if (!has_slow) { 268 + pr_err(" cpu%d: total %llu (normal %llu, nmi %llu), all within 0-%ums\n", 269 + cpu, total, norm_total, nmi_total, RQSL_SLOW_THRESHOLD_MS); 270 + continue; 271 + } 272 + 273 + pr_err(" cpu%d: total %llu (normal %llu, nmi %llu)\n", 274 + cpu, total, norm_total, nmi_total); 275 + for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) { 276 + unsigned int start_ms; 277 + 278 + if (!total_counts[i]) 279 + continue; 280 + 281 + start_ms = i == 0 ? 0 : rqsl_hist_ms[i - 1] + 1; 282 + if (i == RQSL_NR_HIST_BUCKETS - 1) { 283 + pr_err(" >= %ums: total %llu (normal %llu, nmi %llu)\n", 284 + start_ms, total_counts[i], 285 + norm_counts[i], nmi_counts[i]); 286 + } else { 287 + pr_err(" %u-%ums: total %llu (normal %llu, nmi %llu)\n", 288 + start_ms, rqsl_hist_ms[i], 289 + total_counts[i], 290 + norm_counts[i], nmi_counts[i]); 291 + } 292 + } 293 + } 294 + } 295 + 292 296 static void bpf_test_rqspinlock_exit(void) 293 297 { 298 + WRITE_ONCE(pause, 1); 294 299 free_rqsl_threads(); 295 300 free_rqsl_evts(); 301 + rqsl_print_histograms(); 296 302 } 297 303 298 304 module_exit(bpf_test_rqspinlock_exit);