Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'libbpf-make-optimized-uprobes-backward-compatible'

Jiri Olsa says:

====================
libbpf: Make optimized uprobes backward compatible

hi,
we can currently optimize uprobes on top of nop5 instructions,
so application can define USDT_NOP to nop5 and use USDT macro
to define optimized usdt probes.

This works fine on new kernels, but could have performance penalty
on older kernels, that do not have the support to optimize and to
emulate nop5 instruction.

This patchset adds support to workaround the performance penalty
on older kernels that do not support uprobe optimization, please
see detailed description in patch 2.

v1: https://lore.kernel.org/bpf/20251117083551.517393-1-jolsa@kernel.org/
v2: https://lore.kernel.org/bpf/20260210133649.524292-1-jolsa@kernel.org/
v3: https://lore.kernel.org/bpf/20260211084858.750950-1-jolsa@kernel.org/T/#t
v4: https://lore.kernel.org/bpf/20260220104220.634154-1-jolsa@kernel.org/

v5 changes:
- keep nop_combo on stack and levae buf uninitialized
in has_nop_combo function [David]

v4 changes:
- rebased on latest bpf-next/master
- use pread for nop combo read [Andrii]
- renamed usdt triger benchmark names [Andrii]
- added more ip address checks to tests [Andrii]

v3 changes:
- fix __x86_64 define and other typos [CI]
- add missing '?' to usdt trigger program [CI]

v2 changes:
- after more investigation we realized there are some versions of
bpftrace and stap that does not work with solution suggested in
version 1, so we decided to switch to following solution:

- change USDT macro [1] emits nop,nop5 instructions combo by
default
- libbpf detects nop,nop5 instructions combo for USDT probe,
if there is and if uprobe syscall is detected libbpf installs
usdt probe on top of nop5 instruction to get it optimized

- added usdt trigger benchmarks [Andrii]
- several small fixes on uprobe syscall detection, tests and other places [Andrii]
- true usdt.h source [1] updated [Andrii]
- compile usdt_* objects unconditionally [Andrii]

thanks,
jirka

[1] https://github.com/libbpf/usdt
---
====================

Link: https://patch.msgid.link/20260224103915.1369690-1-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+289 -7
+24
tools/lib/bpf/features.c
··· 568 568 return 1; 569 569 } 570 570 571 + #ifdef __x86_64__ 572 + 573 + #ifndef __NR_uprobe 574 + #define __NR_uprobe 336 575 + #endif 576 + 577 + static int probe_uprobe_syscall(int token_fd) 578 + { 579 + /* 580 + * If kernel supports uprobe() syscall, it will return -ENXIO when called 581 + * from the outside of a kernel-generated uprobe trampoline. 582 + */ 583 + return syscall(__NR_uprobe) < 0 && errno == ENXIO; 584 + } 585 + #else 586 + static int probe_uprobe_syscall(int token_fd) 587 + { 588 + return 0; 589 + } 590 + #endif 591 + 571 592 typedef int (*feature_probe_fn)(int /* token_fd */); 572 593 573 594 static struct kern_feature_cache feature_cache; ··· 666 645 }, 667 646 [FEAT_LDIMM64_FULL_RANGE_OFF] = { 668 647 "full range LDIMM64 support", probe_ldimm64_full_range_off, 648 + }, 649 + [FEAT_UPROBE_SYSCALL] = { 650 + "kernel supports uprobe syscall", probe_uprobe_syscall, 669 651 }, 670 652 }; 671 653
+2
tools/lib/bpf/libbpf_internal.h
··· 394 394 FEAT_BTF_QMARK_DATASEC, 395 395 /* Kernel supports LDIMM64 imm offsets past 512 MiB. */ 396 396 FEAT_LDIMM64_FULL_RANGE_OFF, 397 + /* Kernel supports uprobe syscall */ 398 + FEAT_UPROBE_SYSCALL, 397 399 __FEAT_CNT, 398 400 }; 399 401
+43 -4
tools/lib/bpf/usdt.c
··· 262 262 bool has_bpf_cookie; 263 263 bool has_sema_refcnt; 264 264 bool has_uprobe_multi; 265 + bool has_uprobe_syscall; 265 266 }; 266 267 267 268 struct usdt_manager *usdt_manager_new(struct bpf_object *obj) ··· 302 301 * usdt probes. 303 302 */ 304 303 man->has_uprobe_multi = kernel_supports(obj, FEAT_UPROBE_MULTI_LINK); 304 + 305 + /* 306 + * Detect kernel support for uprobe() syscall, it's presence means we can 307 + * take advantage of faster nop5 uprobe handling. 308 + * Added in: 56101b69c919 ("uprobes/x86: Add uprobe syscall to speed up uprobe") 309 + */ 310 + man->has_uprobe_syscall = kernel_supports(obj, FEAT_UPROBE_SYSCALL); 305 311 return man; 306 312 } 307 313 ··· 593 585 594 586 static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie); 595 587 596 - static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid, 597 - const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie, 598 - struct usdt_target **out_targets, size_t *out_target_cnt) 588 + #if defined(__x86_64__) 589 + static bool has_nop_combo(int fd, long off) 590 + { 591 + unsigned char nop_combo[6] = { 592 + 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 /* nop,nop5 */ 593 + }; 594 + unsigned char buf[6]; 595 + 596 + if (pread(fd, buf, 6, off) != 6) 597 + return false; 598 + return memcmp(buf, nop_combo, 6) == 0; 599 + } 600 + #else 601 + static bool has_nop_combo(int fd, long off) 602 + { 603 + return false; 604 + } 605 + #endif 606 + 607 + static int collect_usdt_targets(struct usdt_manager *man, struct elf_fd *elf_fd, const char *path, 608 + pid_t pid, const char *usdt_provider, const char *usdt_name, 609 + __u64 usdt_cookie, struct usdt_target **out_targets, 610 + size_t *out_target_cnt) 599 611 { 600 612 size_t off, name_off, desc_off, seg_cnt = 0, vma_seg_cnt = 0, target_cnt = 0; 601 613 struct elf_seg *segs = NULL, *vma_segs = NULL; 602 614 struct usdt_target *targets = NULL, *target; 615 + Elf *elf = elf_fd->elf; 603 616 long base_addr = 0; 604 617 Elf_Scn *notes_scn, *base_scn; 605 618 GElf_Shdr base_shdr, notes_shdr; ··· 812 783 813 784 target = &targets[target_cnt]; 814 785 memset(target, 0, sizeof(*target)); 786 + 787 + /* 788 + * We have uprobe syscall and usdt with nop,nop5 instructions combo, 789 + * so we can place the uprobe directly on nop5 (+1) and get this probe 790 + * optimized. 791 + */ 792 + if (man->has_uprobe_syscall && has_nop_combo(elf_fd->fd, usdt_rel_ip)) { 793 + usdt_abs_ip++; 794 + usdt_rel_ip++; 795 + } 815 796 816 797 target->abs_ip = usdt_abs_ip; 817 798 target->rel_ip = usdt_rel_ip; ··· 1037 998 /* discover USDT in given binary, optionally limiting 1038 999 * activations to a given PID, if pid > 0 1039 1000 */ 1040 - err = collect_usdt_targets(man, elf_fd.elf, path, pid, usdt_provider, usdt_name, 1001 + err = collect_usdt_targets(man, &elf_fd, path, pid, usdt_provider, usdt_name, 1041 1002 usdt_cookie, &targets, &target_cnt); 1042 1003 if (err <= 0) { 1043 1004 err = (err == 0) ? -ENOENT : err;
+2
tools/testing/selftests/bpf/.gitignore
··· 47 47 *.BTF 48 48 *.BTF_ids 49 49 *.BTF.base 50 + usdt_1 51 + usdt_2
+4 -1
tools/testing/selftests/bpf/Makefile
··· 754 754 $(VERIFY_SIG_HDR) \ 755 755 flow_dissector_load.h \ 756 756 ip_check_defrag_frags.h \ 757 - bpftool_helpers.c 757 + bpftool_helpers.c \ 758 + usdt_1.c usdt_2.c 758 759 TRUNNER_LIB_SOURCES := find_bit.c 759 760 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ 760 761 $(OUTPUT)/liburandom_read.so \ ··· 879 878 $(OUTPUT)/bench_bpf_crypto.o \ 880 879 $(OUTPUT)/bench_sockmap.o \ 881 880 $(OUTPUT)/bench_lpm_trie_map.o \ 881 + $(OUTPUT)/usdt_1.o \ 882 + $(OUTPUT)/usdt_2.o \ 882 883 # 883 884 $(call msg,BINARY,,$@) 884 885 $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
+4
tools/testing/selftests/bpf/bench.c
··· 541 541 extern const struct bench bench_trig_uretprobe_nop5; 542 542 extern const struct bench bench_trig_uprobe_multi_nop5; 543 543 extern const struct bench bench_trig_uretprobe_multi_nop5; 544 + extern const struct bench bench_trig_usdt_nop; 545 + extern const struct bench bench_trig_usdt_nop5; 544 546 #endif 545 547 546 548 extern const struct bench bench_rb_libbpf; ··· 619 617 &bench_trig_uretprobe_nop5, 620 618 &bench_trig_uprobe_multi_nop5, 621 619 &bench_trig_uretprobe_multi_nop5, 620 + &bench_trig_usdt_nop, 621 + &bench_trig_usdt_nop5, 622 622 #endif 623 623 /* ringbuf/perfbuf benchmarks */ 624 624 &bench_rb_libbpf,
+60
tools/testing/selftests/bpf/benchs/bench_trigger.c
··· 407 407 uprobe_target_nop5(); 408 408 return NULL; 409 409 } 410 + 411 + void usdt_1(void); 412 + void usdt_2(void); 413 + 414 + static void *uprobe_producer_usdt_nop(void *input) 415 + { 416 + while (true) 417 + usdt_1(); 418 + return NULL; 419 + } 420 + 421 + static void *uprobe_producer_usdt_nop5(void *input) 422 + { 423 + while (true) 424 + usdt_2(); 425 + return NULL; 426 + } 410 427 #endif 411 428 412 429 static void usetup(bool use_retprobe, bool use_multi, void *target_addr) ··· 561 544 { 562 545 usetup(true, true /* use_multi */, &uprobe_target_nop5); 563 546 } 547 + 548 + static void usdt_setup(const char *name) 549 + { 550 + struct bpf_link *link; 551 + int err; 552 + 553 + setup_libbpf(); 554 + 555 + ctx.skel = trigger_bench__open(); 556 + if (!ctx.skel) { 557 + fprintf(stderr, "failed to open skeleton\n"); 558 + exit(1); 559 + } 560 + 561 + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_usdt, true); 562 + 563 + err = trigger_bench__load(ctx.skel); 564 + if (err) { 565 + fprintf(stderr, "failed to load skeleton\n"); 566 + exit(1); 567 + } 568 + 569 + link = bpf_program__attach_usdt(ctx.skel->progs.bench_trigger_usdt, 570 + 0 /*self*/, "/proc/self/exe", 571 + "optimized_attach", name, NULL); 572 + if (libbpf_get_error(link)) { 573 + fprintf(stderr, "failed to attach optimized_attach:%s usdt probe\n", name); 574 + exit(1); 575 + } 576 + ctx.skel->links.bench_trigger_usdt = link; 577 + } 578 + 579 + static void usdt_nop_setup(void) 580 + { 581 + usdt_setup("usdt_1"); 582 + } 583 + 584 + static void usdt_nop5_setup(void) 585 + { 586 + usdt_setup("usdt_2"); 587 + } 564 588 #endif 565 589 566 590 const struct bench bench_trig_syscall_count = { ··· 669 611 BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5"); 670 612 BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5"); 671 613 BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5"); 614 + BENCH_TRIG_USERMODE(usdt_nop, usdt_nop, "usdt-nop"); 615 + BENCH_TRIG_USERMODE(usdt_nop5, usdt_nop5, "usdt-nop5"); 672 616 #endif
+1 -1
tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
··· 2 2 3 3 set -eufo pipefail 4 4 5 - for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret,nop5} 5 + for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret,nop5} usdt-nop usdt-nop5 6 6 do 7 7 summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-) 8 8 printf "%-15s: %s\n" $i "$summary"
+92
tools/testing/selftests/bpf/prog_tests/usdt.c
··· 247 247 #undef TRIGGER 248 248 } 249 249 250 + #ifdef __x86_64__ 251 + extern void usdt_1(void); 252 + extern void usdt_2(void); 253 + 254 + static unsigned char nop1[1] = { 0x90 }; 255 + static unsigned char nop1_nop5_combo[6] = { 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 }; 256 + 257 + static void *find_instr(void *fn, unsigned char *instr, size_t cnt) 258 + { 259 + int i; 260 + 261 + for (i = 0; i < 10; i++) { 262 + if (!memcmp(instr, fn + i, cnt)) 263 + return fn + i; 264 + } 265 + return NULL; 266 + } 267 + 268 + static void subtest_optimized_attach(void) 269 + { 270 + struct test_usdt *skel; 271 + __u8 *addr_1, *addr_2; 272 + 273 + /* usdt_1 USDT probe has single nop instruction */ 274 + addr_1 = find_instr(usdt_1, nop1_nop5_combo, 6); 275 + if (!ASSERT_NULL(addr_1, "usdt_1_find_nop1_nop5_combo")) 276 + return; 277 + 278 + addr_1 = find_instr(usdt_1, nop1, 1); 279 + if (!ASSERT_OK_PTR(addr_1, "usdt_1_find_nop1")) 280 + return; 281 + 282 + /* usdt_2 USDT probe has nop,nop5 instructions combo */ 283 + addr_2 = find_instr(usdt_2, nop1_nop5_combo, 6); 284 + if (!ASSERT_OK_PTR(addr_2, "usdt_2_find_nop1_nop5_combo")) 285 + return; 286 + 287 + skel = test_usdt__open_and_load(); 288 + if (!ASSERT_OK_PTR(skel, "test_usdt__open_and_load")) 289 + return; 290 + 291 + skel->bss->expected_ip = (unsigned long) addr_1; 292 + 293 + /* 294 + * Attach program on top of usdt_1 which is single nop probe, 295 + * so the probe won't get optimized. 296 + */ 297 + skel->links.usdt_executed = bpf_program__attach_usdt(skel->progs.usdt_executed, 298 + 0 /*self*/, "/proc/self/exe", 299 + "optimized_attach", "usdt_1", NULL); 300 + if (!ASSERT_OK_PTR(skel->links.usdt_executed, "bpf_program__attach_usdt")) 301 + goto cleanup; 302 + 303 + usdt_1(); 304 + usdt_1(); 305 + 306 + /* int3 is on addr_1 address */ 307 + ASSERT_EQ(*addr_1, 0xcc, "int3"); 308 + ASSERT_EQ(skel->bss->executed, 2, "executed"); 309 + 310 + bpf_link__destroy(skel->links.usdt_executed); 311 + 312 + /* we expect the nop5 ip */ 313 + skel->bss->expected_ip = (unsigned long) addr_2 + 1; 314 + 315 + /* 316 + * Attach program on top of usdt_2 which is probe defined on top 317 + * of nop1,nop5 combo, so the probe gets optimized on top of nop5. 318 + */ 319 + skel->links.usdt_executed = bpf_program__attach_usdt(skel->progs.usdt_executed, 320 + 0 /*self*/, "/proc/self/exe", 321 + "optimized_attach", "usdt_2", NULL); 322 + if (!ASSERT_OK_PTR(skel->links.usdt_executed, "bpf_program__attach_usdt")) 323 + goto cleanup; 324 + 325 + usdt_2(); 326 + usdt_2(); 327 + 328 + /* nop stays on addr_2 address */ 329 + ASSERT_EQ(*addr_2, 0x90, "nop"); 330 + 331 + /* call is on addr_2 + 1 address */ 332 + ASSERT_EQ(*(addr_2 + 1), 0xe8, "call"); 333 + ASSERT_EQ(skel->bss->executed, 4, "executed"); 334 + 335 + cleanup: 336 + test_usdt__destroy(skel); 337 + } 338 + #endif 339 + 250 340 unsigned short test_usdt_100_semaphore SEC(".probes"); 251 341 unsigned short test_usdt_300_semaphore SEC(".probes"); 252 342 unsigned short test_usdt_400_semaphore SEC(".probes"); ··· 606 516 #ifdef __x86_64__ 607 517 if (test__start_subtest("basic_optimized")) 608 518 subtest_basic_usdt(true); 519 + if (test__start_subtest("optimized_attach")) 520 + subtest_optimized_attach(); 609 521 #endif 610 522 if (test__start_subtest("multispec")) 611 523 subtest_multispec_usdt();
+12
tools/testing/selftests/bpf/progs/test_usdt.c
··· 138 138 return 0; 139 139 } 140 140 141 + #ifdef __TARGET_ARCH_x86 142 + int executed; 143 + unsigned long expected_ip; 144 + 145 + SEC("usdt") 146 + int usdt_executed(struct pt_regs *ctx) 147 + { 148 + if (expected_ip == ctx->ip) 149 + executed++; 150 + return 0; 151 + } 152 + #endif 141 153 char _license[] SEC("license") = "GPL";
+9 -1
tools/testing/selftests/bpf/progs/trigger_bench.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 // Copyright (c) 2020 Facebook 3 - #include <linux/bpf.h> 3 + #include "vmlinux.h" 4 4 #include <asm/unistd.h> 5 5 #include <bpf/bpf_helpers.h> 6 6 #include <bpf/bpf_tracing.h> 7 7 #include "bpf_misc.h" 8 + #include "bpf/usdt.bpf.h" 8 9 9 10 char _license[] SEC("license") = "GPL"; 10 11 ··· 179 178 int bench_trigger_rawtp(void *ctx) 180 179 { 181 180 handle(ctx); 181 + return 0; 182 + } 183 + 184 + SEC("?usdt") 185 + int bench_trigger_usdt(void *ctx) 186 + { 187 + inc_counter(); 182 188 return 0; 183 189 }
+2
tools/testing/selftests/bpf/usdt.h
··· 312 312 #ifndef USDT_NOP 313 313 #if defined(__ia64__) || defined(__s390__) || defined(__s390x__) 314 314 #define USDT_NOP nop 0 315 + #elif defined(__x86_64__) 316 + #define USDT_NOP .byte 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x0 /* nop, nop5 */ 315 317 #else 316 318 #define USDT_NOP nop 317 319 #endif
+18
tools/testing/selftests/bpf/usdt_1.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #if defined(__x86_64__) 4 + 5 + /* 6 + * Include usdt.h with defined USDT_NOP macro to use single 7 + * nop instruction. 8 + */ 9 + #define USDT_NOP .byte 0x90 10 + #include "usdt.h" 11 + 12 + __attribute__((aligned(16))) 13 + void usdt_1(void) 14 + { 15 + USDT(optimized_attach, usdt_1); 16 + } 17 + 18 + #endif
+16
tools/testing/selftests/bpf/usdt_2.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #if defined(__x86_64__) 4 + 5 + /* 6 + * Include usdt.h with default nop,nop5 instructions combo. 7 + */ 8 + #include "usdt.h" 9 + 10 + __attribute__((aligned(16))) 11 + void usdt_2(void) 12 + { 13 + USDT(optimized_attach, usdt_2); 14 + } 15 + 16 + #endif