Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2025 Cloudflare */
3
4/*
5 * All of these benchmarks operate on tries with keys in the range
6 * [0, args.nr_entries), i.e. there are no gaps or partially filled
7 * branches of the trie for any key < args.nr_entries.
8 *
9 * This gives an idea of worst-case behaviour.
10 */
11
12#include <argp.h>
13#include <linux/time64.h>
14#include <linux/if_ether.h>
15#include "lpm_trie_bench.skel.h"
16#include "lpm_trie_map.skel.h"
17#include "bench.h"
18#include "testing_helpers.h"
19#include "progs/lpm_trie.h"
20
21static struct ctx {
22 struct lpm_trie_bench *bench;
23} ctx;
24
25static struct {
26 __u32 nr_entries;
27 __u32 prefixlen;
28 bool random;
29} args = {
30 .nr_entries = 0,
31 .prefixlen = 32,
32 .random = false,
33};
34
35enum {
36 ARG_NR_ENTRIES = 9000,
37 ARG_PREFIX_LEN,
38 ARG_RANDOM,
39};
40
41static const struct argp_option opts[] = {
42 { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0,
43 "Number of unique entries in the LPM trie" },
44 { "prefix_len", ARG_PREFIX_LEN, "PREFIX_LEN", 0,
45 "Number of prefix bits to use in the LPM trie" },
46 { "random", ARG_RANDOM, NULL, 0, "Access random keys during op" },
47 {},
48};
49
50static error_t lpm_parse_arg(int key, char *arg, struct argp_state *state)
51{
52 long ret;
53
54 switch (key) {
55 case ARG_NR_ENTRIES:
56 ret = strtol(arg, NULL, 10);
57 if (ret < 1 || ret > UINT_MAX) {
58 fprintf(stderr, "Invalid nr_entries count.");
59 argp_usage(state);
60 }
61 args.nr_entries = ret;
62 break;
63 case ARG_PREFIX_LEN:
64 ret = strtol(arg, NULL, 10);
65 if (ret < 1 || ret > UINT_MAX) {
66 fprintf(stderr, "Invalid prefix_len value.");
67 argp_usage(state);
68 }
69 args.prefixlen = ret;
70 break;
71 case ARG_RANDOM:
72 args.random = true;
73 break;
74 default:
75 return ARGP_ERR_UNKNOWN;
76 }
77 return 0;
78}
79
80const struct argp bench_lpm_trie_map_argp = {
81 .options = opts,
82 .parser = lpm_parse_arg,
83};
84
85static void validate_common(void)
86{
87 if (env.consumer_cnt != 0) {
88 fprintf(stderr, "benchmark doesn't support consumer\n");
89 exit(1);
90 }
91
92 if (args.nr_entries == 0) {
93 fprintf(stderr, "Missing --nr_entries parameter\n");
94 exit(1);
95 }
96
97 if ((1UL << args.prefixlen) < args.nr_entries) {
98 fprintf(stderr, "prefix_len value too small for nr_entries\n");
99 exit(1);
100 }
101}
102
103static void lpm_insert_validate(void)
104{
105 validate_common();
106
107 if (env.producer_cnt != 1) {
108 fprintf(stderr, "lpm-trie-insert requires a single producer\n");
109 exit(1);
110 }
111
112 if (args.random) {
113 fprintf(stderr, "lpm-trie-insert does not support --random\n");
114 exit(1);
115 }
116}
117
118static void lpm_delete_validate(void)
119{
120 validate_common();
121
122 if (env.producer_cnt != 1) {
123 fprintf(stderr, "lpm-trie-delete requires a single producer\n");
124 exit(1);
125 }
126
127 if (args.random) {
128 fprintf(stderr, "lpm-trie-delete does not support --random\n");
129 exit(1);
130 }
131}
132
133static void lpm_free_validate(void)
134{
135 validate_common();
136
137 if (env.producer_cnt != 1) {
138 fprintf(stderr, "lpm-trie-free requires a single producer\n");
139 exit(1);
140 }
141
142 if (args.random) {
143 fprintf(stderr, "lpm-trie-free does not support --random\n");
144 exit(1);
145 }
146}
147
148static struct trie_key *keys;
149static __u32 *vals;
150
151static void fill_map(int map_fd)
152{
153 int err;
154
155 DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
156 .elem_flags = 0,
157 .flags = 0,
158 );
159
160 err = bpf_map_update_batch(map_fd, keys, vals, &args.nr_entries, &opts);
161 if (err) {
162 fprintf(stderr, "failed to batch update keys to map: %d\n",
163 -err);
164 exit(1);
165 }
166}
167
168static void empty_map(int map_fd)
169{
170 int err;
171
172 DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
173 .elem_flags = 0,
174 .flags = 0,
175 );
176
177 err = bpf_map_delete_batch(map_fd, keys, &args.nr_entries, &opts);
178 if (err) {
179 fprintf(stderr, "failed to batch delete keys for map: %d\n",
180 -err);
181 exit(1);
182 }
183}
184
185static void attach_prog(void)
186{
187 int i;
188
189 ctx.bench = lpm_trie_bench__open_and_load();
190 if (!ctx.bench) {
191 fprintf(stderr, "failed to open skeleton\n");
192 exit(1);
193 }
194
195 ctx.bench->bss->nr_entries = args.nr_entries;
196 ctx.bench->bss->prefixlen = args.prefixlen;
197 ctx.bench->bss->random = args.random;
198
199 if (lpm_trie_bench__attach(ctx.bench)) {
200 fprintf(stderr, "failed to attach skeleton\n");
201 exit(1);
202 }
203
204 keys = calloc(args.nr_entries, sizeof(*keys));
205 vals = calloc(args.nr_entries, sizeof(*vals));
206
207 for (i = 0; i < args.nr_entries; i++) {
208 struct trie_key *k = &keys[i];
209 __u32 *v = &vals[i];
210
211 k->prefixlen = args.prefixlen;
212 k->data = i;
213 *v = 1;
214 }
215}
216
217static void attach_prog_and_fill_map(void)
218{
219 int fd;
220
221 attach_prog();
222
223 fd = bpf_map__fd(ctx.bench->maps.trie_map);
224 fill_map(fd);
225}
226
227static void lpm_noop_setup(void)
228{
229 attach_prog();
230 ctx.bench->bss->op = LPM_OP_NOOP;
231}
232
233static void lpm_baseline_setup(void)
234{
235 attach_prog();
236 ctx.bench->bss->op = LPM_OP_BASELINE;
237}
238
239static void lpm_lookup_setup(void)
240{
241 attach_prog_and_fill_map();
242 ctx.bench->bss->op = LPM_OP_LOOKUP;
243}
244
245static void lpm_insert_setup(void)
246{
247 attach_prog();
248 ctx.bench->bss->op = LPM_OP_INSERT;
249}
250
251static void lpm_update_setup(void)
252{
253 attach_prog_and_fill_map();
254 ctx.bench->bss->op = LPM_OP_UPDATE;
255}
256
257static void lpm_delete_setup(void)
258{
259 attach_prog_and_fill_map();
260 ctx.bench->bss->op = LPM_OP_DELETE;
261}
262
263static void lpm_free_setup(void)
264{
265 attach_prog();
266 ctx.bench->bss->op = LPM_OP_FREE;
267}
268
269static void lpm_measure(struct bench_res *res)
270{
271 res->hits = atomic_swap(&ctx.bench->bss->hits, 0);
272 res->duration_ns = atomic_swap(&ctx.bench->bss->duration_ns, 0);
273}
274
275static void bench_reinit_map(void)
276{
277 int fd = bpf_map__fd(ctx.bench->maps.trie_map);
278
279 switch (ctx.bench->bss->op) {
280 case LPM_OP_INSERT:
281 /* trie_map needs to be emptied */
282 empty_map(fd);
283 break;
284 case LPM_OP_DELETE:
285 /* trie_map needs to be refilled */
286 fill_map(fd);
287 break;
288 default:
289 fprintf(stderr, "Unexpected REINIT return code for op %d\n",
290 ctx.bench->bss->op);
291 exit(1);
292 }
293}
294
295/* For NOOP, BASELINE, LOOKUP, INSERT, UPDATE, and DELETE */
296static void *lpm_producer(void *unused __always_unused)
297{
298 int err;
299 char in[ETH_HLEN]; /* unused */
300
301 LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = in,
302 .data_size_in = sizeof(in), .repeat = 1, );
303
304 while (true) {
305 int fd = bpf_program__fd(ctx.bench->progs.run_bench);
306 err = bpf_prog_test_run_opts(fd, &opts);
307 if (err) {
308 fprintf(stderr, "failed to run BPF prog: %d\n", err);
309 exit(1);
310 }
311
312 /* Check for kernel error code */
313 if ((int)opts.retval < 0) {
314 fprintf(stderr, "BPF prog returned error: %d\n",
315 opts.retval);
316 exit(1);
317 }
318
319 switch (opts.retval) {
320 case LPM_BENCH_SUCCESS:
321 break;
322 case LPM_BENCH_REINIT_MAP:
323 bench_reinit_map();
324 break;
325 default:
326 fprintf(stderr, "Unexpected BPF prog return code %d for op %d\n",
327 opts.retval, ctx.bench->bss->op);
328 exit(1);
329 }
330 }
331
332 return NULL;
333}
334
335static void *lpm_free_producer(void *unused __always_unused)
336{
337 while (true) {
338 struct lpm_trie_map *skel;
339
340 skel = lpm_trie_map__open_and_load();
341 if (!skel) {
342 fprintf(stderr, "failed to open skeleton\n");
343 exit(1);
344 }
345
346 fill_map(bpf_map__fd(skel->maps.trie_free_map));
347 lpm_trie_map__destroy(skel);
348 }
349
350 return NULL;
351}
352
353/*
354 * The standard bench op_report_*() functions assume measurements are
355 * taken over a 1-second interval but operations that modify the map
356 * (INSERT, DELETE, and FREE) cannot run indefinitely without
357 * "resetting" the map to the initial state. Depending on the size of
358 * the map, this likely needs to happen before the 1-second timer fires.
359 *
360 * Calculate the fraction of a second over which the op measurement was
361 * taken (to ignore any time spent doing the reset) and report the
362 * throughput results per second.
363 */
364static void frac_second_report_progress(int iter, struct bench_res *res,
365 long delta_ns, double rate_divisor,
366 char rate)
367{
368 double hits_per_sec, hits_per_prod;
369
370 hits_per_sec = res->hits / rate_divisor /
371 (res->duration_ns / (double)NSEC_PER_SEC);
372 hits_per_prod = hits_per_sec / env.producer_cnt;
373
374 printf("Iter %3d (%7.3lfus): ", iter,
375 (delta_ns - NSEC_PER_SEC) / 1000.0);
376 printf("hits %8.3lf%c/s (%7.3lf%c/prod)\n", hits_per_sec, rate,
377 hits_per_prod, rate);
378}
379
380static void frac_second_report_final(struct bench_res res[], int res_cnt,
381 double lat_divisor, double rate_divisor,
382 char rate, const char *unit)
383{
384 double hits_mean = 0.0, hits_stddev = 0.0;
385 double latency = 0.0;
386 int i;
387
388 for (i = 0; i < res_cnt; i++) {
389 double val = res[i].hits / rate_divisor /
390 (res[i].duration_ns / (double)NSEC_PER_SEC);
391 hits_mean += val / (0.0 + res_cnt);
392 latency += res[i].duration_ns / res[i].hits / (0.0 + res_cnt);
393 }
394
395 if (res_cnt > 1) {
396 for (i = 0; i < res_cnt; i++) {
397 double val =
398 res[i].hits / rate_divisor /
399 (res[i].duration_ns / (double)NSEC_PER_SEC);
400 hits_stddev += (hits_mean - val) * (hits_mean - val) /
401 (res_cnt - 1.0);
402 }
403
404 hits_stddev = sqrt(hits_stddev);
405 }
406 printf("Summary: throughput %8.3lf \u00B1 %5.3lf %c ops/s (%7.3lf%c ops/prod), ",
407 hits_mean, hits_stddev, rate, hits_mean / env.producer_cnt,
408 rate);
409 printf("latency %8.3lf %s/op\n",
410 latency / lat_divisor / env.producer_cnt, unit);
411}
412
413static void insert_ops_report_progress(int iter, struct bench_res *res,
414 long delta_ns)
415{
416 double rate_divisor = 1000000.0;
417 char rate = 'M';
418
419 frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
420}
421
422static void delete_ops_report_progress(int iter, struct bench_res *res,
423 long delta_ns)
424{
425 double rate_divisor = 1000000.0;
426 char rate = 'M';
427
428 frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
429}
430
431static void free_ops_report_progress(int iter, struct bench_res *res,
432 long delta_ns)
433{
434 double rate_divisor = 1000.0;
435 char rate = 'K';
436
437 frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
438}
439
440static void insert_ops_report_final(struct bench_res res[], int res_cnt)
441{
442 double lat_divisor = 1.0;
443 double rate_divisor = 1000000.0;
444 const char *unit = "ns";
445 char rate = 'M';
446
447 frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
448 unit);
449}
450
451static void delete_ops_report_final(struct bench_res res[], int res_cnt)
452{
453 double lat_divisor = 1.0;
454 double rate_divisor = 1000000.0;
455 const char *unit = "ns";
456 char rate = 'M';
457
458 frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
459 unit);
460}
461
462static void free_ops_report_final(struct bench_res res[], int res_cnt)
463{
464 double lat_divisor = 1000000.0;
465 double rate_divisor = 1000.0;
466 const char *unit = "ms";
467 char rate = 'K';
468
469 frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
470 unit);
471}
472
473/* noop bench measures harness-overhead */
474const struct bench bench_lpm_trie_noop = {
475 .name = "lpm-trie-noop",
476 .argp = &bench_lpm_trie_map_argp,
477 .validate = validate_common,
478 .setup = lpm_noop_setup,
479 .producer_thread = lpm_producer,
480 .measure = lpm_measure,
481 .report_progress = ops_report_progress,
482 .report_final = ops_report_final,
483};
484
485/* baseline overhead for lookup and update */
486const struct bench bench_lpm_trie_baseline = {
487 .name = "lpm-trie-baseline",
488 .argp = &bench_lpm_trie_map_argp,
489 .validate = validate_common,
490 .setup = lpm_baseline_setup,
491 .producer_thread = lpm_producer,
492 .measure = lpm_measure,
493 .report_progress = ops_report_progress,
494 .report_final = ops_report_final,
495};
496
497/* measure cost of doing a lookup on existing entries in a full trie */
498const struct bench bench_lpm_trie_lookup = {
499 .name = "lpm-trie-lookup",
500 .argp = &bench_lpm_trie_map_argp,
501 .validate = validate_common,
502 .setup = lpm_lookup_setup,
503 .producer_thread = lpm_producer,
504 .measure = lpm_measure,
505 .report_progress = ops_report_progress,
506 .report_final = ops_report_final,
507};
508
509/* measure cost of inserting new entries into an empty trie */
510const struct bench bench_lpm_trie_insert = {
511 .name = "lpm-trie-insert",
512 .argp = &bench_lpm_trie_map_argp,
513 .validate = lpm_insert_validate,
514 .setup = lpm_insert_setup,
515 .producer_thread = lpm_producer,
516 .measure = lpm_measure,
517 .report_progress = insert_ops_report_progress,
518 .report_final = insert_ops_report_final,
519};
520
521/* measure cost of updating existing entries in a full trie */
522const struct bench bench_lpm_trie_update = {
523 .name = "lpm-trie-update",
524 .argp = &bench_lpm_trie_map_argp,
525 .validate = validate_common,
526 .setup = lpm_update_setup,
527 .producer_thread = lpm_producer,
528 .measure = lpm_measure,
529 .report_progress = ops_report_progress,
530 .report_final = ops_report_final,
531};
532
533/* measure cost of deleting existing entries from a full trie */
534const struct bench bench_lpm_trie_delete = {
535 .name = "lpm-trie-delete",
536 .argp = &bench_lpm_trie_map_argp,
537 .validate = lpm_delete_validate,
538 .setup = lpm_delete_setup,
539 .producer_thread = lpm_producer,
540 .measure = lpm_measure,
541 .report_progress = delete_ops_report_progress,
542 .report_final = delete_ops_report_final,
543};
544
545/* measure cost of freeing a full trie */
546const struct bench bench_lpm_trie_free = {
547 .name = "lpm-trie-free",
548 .argp = &bench_lpm_trie_map_argp,
549 .validate = lpm_free_validate,
550 .setup = lpm_free_setup,
551 .producer_thread = lpm_free_producer,
552 .measure = lpm_measure,
553 .report_progress = free_ops_report_progress,
554 .report_final = free_ops_report_final,
555};