Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

selftests/bpf/benchs: Add overwrite mode benchmark for BPF ring buffer

Add --rb-overwrite option to benchmark BPF ring buffer in overwrite mode.
Since overwrite mode is not yet supported by libbpf for consumer, also add
--rb-bench-producer option to benchmark producer directly without a consumer.

Benchmarks on an x86_64 and an arm64 CPU are shown below for reference.

- AMD EPYC 9654 (x86_64)

Ringbuf, multi-producer contention in overwrite mode, no consumer
=================================================================
rb-prod nr_prod 1 32.180 ± 0.033M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 2 9.617 ± 0.003M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 3 8.810 ± 0.002M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 4 9.272 ± 0.001M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 8 9.173 ± 0.001M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 12 3.086 ± 0.032M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 16 2.945 ± 0.021M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 20 2.519 ± 0.021M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 24 2.545 ± 0.021M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 28 2.363 ± 0.024M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 32 2.357 ± 0.021M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 36 2.267 ± 0.011M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 40 2.284 ± 0.020M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 44 2.215 ± 0.025M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 48 2.193 ± 0.023M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 52 2.208 ± 0.024M/s (drops 0.000 ± 0.000M/s)

- HiSilicon Kunpeng 920 (arm64)

Ringbuf, multi-producer contention in overwrite mode, no consumer
=================================================================
rb-prod nr_prod 1 14.478 ± 0.006M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 2 21.787 ± 0.010M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 3 6.045 ± 0.001M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 4 5.352 ± 0.003M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 8 4.850 ± 0.002M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 12 3.542 ± 0.016M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 16 3.509 ± 0.021M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 20 3.171 ± 0.010M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 24 3.154 ± 0.014M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 28 2.974 ± 0.015M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 32 3.167 ± 0.014M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 36 2.903 ± 0.010M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 40 2.866 ± 0.010M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 44 2.914 ± 0.010M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 48 2.806 ± 0.012M/s (drops 0.000 ± 0.000M/s)
Rb-prod nr_prod 52 2.840 ± 0.012M/s (drops 0.000 ± 0.000M/s)

Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20251018035738.4039621-4-xukuohai@huaweicloud.com

authored by

Xu Kuohai and committed by
Andrii Nakryiko
f9db3a38 8f7a86ec

+74 -6
+59 -6
tools/testing/selftests/bpf/benchs/bench_ringbufs.c
··· 19 19 int ringbuf_sz; /* per-ringbuf, in bytes */ 20 20 bool ringbuf_use_output; /* use slower output API */ 21 21 int perfbuf_sz; /* per-CPU size, in pages */ 22 + bool overwrite; 23 + bool bench_producer; 22 24 } args = { 23 25 .back2back = false, 24 26 .batch_cnt = 500, ··· 29 27 .ringbuf_sz = 512 * 1024, 30 28 .ringbuf_use_output = false, 31 29 .perfbuf_sz = 128, 30 + .overwrite = false, 31 + .bench_producer = false, 32 32 }; 33 33 34 34 enum { ··· 39 35 ARG_RB_BATCH_CNT = 2002, 40 36 ARG_RB_SAMPLED = 2003, 41 37 ARG_RB_SAMPLE_RATE = 2004, 38 + ARG_RB_OVERWRITE = 2005, 39 + ARG_RB_BENCH_PRODUCER = 2006, 42 40 }; 43 41 44 42 static const struct argp_option opts[] = { ··· 49 43 { "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"}, 50 44 { "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"}, 51 45 { "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"}, 46 + { "rb-overwrite", ARG_RB_OVERWRITE, NULL, 0, "Overwrite mode"}, 47 + { "rb-bench-producer", ARG_RB_BENCH_PRODUCER, NULL, 0, "Benchmark producer"}, 52 48 {}, 53 49 }; 54 50 ··· 80 72 argp_usage(state); 81 73 } 82 74 break; 75 + case ARG_RB_OVERWRITE: 76 + args.overwrite = true; 77 + break; 78 + case ARG_RB_BENCH_PRODUCER: 79 + args.bench_producer = true; 80 + break; 83 81 default: 84 82 return ARGP_ERR_UNKNOWN; 85 83 } ··· 109 95 110 96 static void bufs_validate(void) 111 97 { 112 - if (env.consumer_cnt != 1) { 113 - fprintf(stderr, "rb-libbpf benchmark needs one consumer!\n"); 98 + if (args.bench_producer && strcmp(env.bench_name, "rb-libbpf")) { 99 + fprintf(stderr, "--rb-bench-producer only works with rb-libbpf!\n"); 100 + exit(1); 101 + } 102 + 103 + if (args.overwrite && !args.bench_producer) { 104 + fprintf(stderr, "overwrite mode only works with --rb-bench-producer for now!\n"); 105 + exit(1); 106 + } 107 + 108 + if (args.bench_producer && env.consumer_cnt != 0) { 109 + fprintf(stderr, "no consumer is needed for --rb-bench-producer!\n"); 110 + exit(1); 111 + } 112 + 113 + if (args.bench_producer && args.back2back) { 114 + fprintf(stderr, "back-to-back mode makes no sense for --rb-bench-producer!\n"); 115 + exit(1); 116 + } 117 + 118 + if (args.bench_producer && args.sampled) { 119 + fprintf(stderr, "sampling mode makes no sense for --rb-bench-producer!\n"); 120 + exit(1); 121 + } 122 + 123 + if (!args.bench_producer && env.consumer_cnt != 1) { 124 + fprintf(stderr, "benchmarks without --rb-bench-producer require exactly one consumer!\n"); 114 125 exit(1); 115 126 } 116 127 ··· 167 128 { 168 129 struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx; 169 130 170 - res->hits = atomic_swap(&buf_hits.value, 0); 131 + if (args.bench_producer) 132 + res->hits = atomic_swap(&ctx->skel->bss->hits, 0); 133 + else 134 + res->hits = atomic_swap(&buf_hits.value, 0); 171 135 res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); 172 136 } 173 137 174 138 static struct ringbuf_bench *ringbuf_setup_skeleton(void) 175 139 { 140 + __u32 flags; 141 + struct bpf_map *ringbuf; 176 142 struct ringbuf_bench *skel; 177 143 178 144 setup_libbpf(); ··· 190 146 191 147 skel->rodata->batch_cnt = args.batch_cnt; 192 148 skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0; 149 + skel->rodata->bench_producer = args.bench_producer; 193 150 194 151 if (args.sampled) 195 152 /* record data + header take 16 bytes */ 196 153 skel->rodata->wakeup_data_size = args.sample_rate * 16; 197 154 198 - bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz); 155 + ringbuf = skel->maps.ringbuf; 156 + if (args.overwrite) { 157 + flags = bpf_map__map_flags(ringbuf) | BPF_F_RB_OVERWRITE; 158 + bpf_map__set_map_flags(ringbuf, flags); 159 + } 160 + 161 + bpf_map__set_max_entries(ringbuf, args.ringbuf_sz); 199 162 200 163 if (ringbuf_bench__load(skel)) { 201 164 fprintf(stderr, "failed to load skeleton\n"); ··· 222 171 { 223 172 struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx; 224 173 struct bpf_link *link; 174 + int map_fd; 225 175 226 176 ctx->skel = ringbuf_setup_skeleton(); 227 - ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf), 228 - buf_process_sample, NULL, NULL); 177 + 178 + map_fd = bpf_map__fd(ctx->skel->maps.ringbuf); 179 + ctx->ringbuf = ring_buffer__new(map_fd, buf_process_sample, NULL, NULL); 229 180 if (!ctx->ringbuf) { 230 181 fprintf(stderr, "failed to create ringbuf\n"); 231 182 exit(1);
+4
tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
··· 49 49 summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)" 50 50 done 51 51 52 + header "Ringbuf, multi-producer contention in overwrite mode, no consumer" 53 + for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do 54 + summarize "rb-prod nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 --rb-overwrite --rb-bench-producer rb-libbpf)" 55 + done
+11
tools/testing/selftests/bpf/progs/ringbuf_bench.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 // Copyright (c) 2020 Facebook 3 3 4 + #include <stdbool.h> 4 5 #include <linux/bpf.h> 5 6 #include <stdint.h> 6 7 #include <bpf/bpf_helpers.h> ··· 15 14 16 15 const volatile int batch_cnt = 0; 17 16 const volatile long use_output = 0; 17 + const volatile bool bench_producer = false; 18 18 19 19 long sample_val = 42; 20 20 long dropped __attribute__((aligned(128))) = 0; 21 + long hits __attribute__((aligned(128))) = 0; 21 22 22 23 const volatile long wakeup_data_size = 0; 23 24 24 25 static __always_inline long get_flags() 25 26 { 26 27 long sz; 28 + 29 + if (bench_producer) 30 + return BPF_RB_NO_WAKEUP; 27 31 28 32 if (!wakeup_data_size) 29 33 return 0; ··· 53 47 *sample = sample_val; 54 48 flags = get_flags(); 55 49 bpf_ringbuf_submit(sample, flags); 50 + if (bench_producer) 51 + __sync_add_and_fetch(&hits, 1); 56 52 } 57 53 } 58 54 } else { ··· 63 55 if (bpf_ringbuf_output(&ringbuf, &sample_val, 64 56 sizeof(sample_val), flags)) 65 57 __sync_add_and_fetch(&dropped, 1); 58 + else if (bench_producer) 59 + __sync_add_and_fetch(&hits, 1); 60 + 66 61 } 67 62 } 68 63 return 0;