perf trace: Migrate BPF augmentation to use a skeleton

Previously a BPF event of augmented_raw_syscalls.c could be used to
enable augmentation of syscalls by perf trace. As BPF events are no
longer supported, switch to using a BPF skeleton which when attached
explicitly opens the sysenter and sysexit tracepoints.

The dump map is removed as debugging wasn't supported by the
augmentation and bpf_printk can be used when necessary.

Remove tools/perf/examples/bpf/augmented_raw_syscalls.c so that the
rename/migration to a BPF skeleton captures that this was the source.

Committer notes:

Some minor stylistic changes to help visualizing the diff.

Use libbpf_strerror when failing to load the augmented raw syscalls BPF.

Use bpf_object__for_each_program(prog, trace.skel->obj) to disable auto
attachment for all but the sys_enter, sys_exit tracepoints, to avoid
having to add extra lines as we go adding support for more pointer
receiving syscalls.

Committer testing:

# perf trace -e open* --max-events=10
0.000 ( 0.022 ms): systemd-oomd/1151 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 11
208.833 ( ): gnome-terminal/3223 openat(dfd: CWD, filename: "/proc/51250/cmdline") ...
249.993 ( 0.024 ms): systemd-oomd/1151 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 11
250.118 ( 0.030 ms): systemd-oomd/1151 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/memory.pressure", flags: RDONLY|CLOEXEC) = 11
250.205 ( 0.016 ms): systemd-oomd/1151 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/memory.current", flags: RDONLY|CLOEXEC) = 11
250.244 ( 0.014 ms): systemd-oomd/1151 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/memory.min", flags: RDONLY|CLOEXEC) = 11
250.282 ( 0.014 ms): systemd-oomd/1151 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/memory.low", flags: RDONLY|CLOEXEC) = 11
250.320 ( 0.014 ms): systemd-oomd/1151 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/memory.swap.current", flags: RDONLY|CLOEXEC) = 11
250.355 ( 0.014 ms): systemd-oomd/1151 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/memory.stat", flags: RDONLY|CLOEXEC) = 11
250.717 ( 0.016 ms): systemd-oomd/1151 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1001.slice/user@1001.service/memory.pressure", flags: RDONLY|CLOEXEC) = 11
#
# perf trace -e *nanosleep* --max-events=10
? ( ): SCTP timer/28304 ... [continued]: clock_nanosleep()) = 0
0.007 (10.058 ms): SCTP timer/28304 clock_nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }, rmtp: 0x7f0466b78de0) = 0
10.069 ( ): SCTP timer/28304 clock_nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }, rmtp: 0x7f0466b78de0) ...
10.069 (10.056 ms): SCTP timer/28304 ... [continued]: clock_nanosleep()) = 0
17.059 ( ): podman/3572 nanosleep(rqtp: 0x7fc4f4d75be0) ...
17.059 (10.061 ms): podman/3572 ... [continued]: nanosleep()) = 0
20.131 (10.059 ms): SCTP timer/28304 clock_nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }, rmtp: 0x7f0466b78de0) = 0
30.195 (10.038 ms): SCTP timer/28304 clock_nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }, rmtp: 0x7f0466b78de0) = 0
40.238 (10.057 ms): SCTP timer/28304 clock_nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }, rmtp: 0x7f0466b78de0) = 0
50.301 ( ): SCTP timer/28304 clock_nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }, rmtp: 0x7f0466b78de0) ...
#

# perf trace -e perf_event* -- perf stat -e instructions,cycles,cache-misses sleep 0.1
0.000 ( 0.011 ms): perf/51331 perf_event_open(attr_uptr: { type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0x1 (PERF_COUNT_HW_INSTRUCTIONS), sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 51332 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 3
0.013 ( 0.003 ms): perf/51331 perf_event_open(attr_uptr: { type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0 (PERF_COUNT_HW_CPU_CYCLES), sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 51332 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 4
0.017 ( 0.002 ms): perf/51331 perf_event_open(attr_uptr: { type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0x3 (PERF_COUNT_HW_CACHE_MISSES), sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 51332 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 5

Performance counter stats for 'sleep 0.1':

1,495,051 instructions # 1.11 insn per cycle
1,347,641 cycles
35,424 cache-misses

0.100935279 seconds time elapsed

0.000924000 seconds user
0.000000000 seconds sys

#

# perf trace -e connect* ssh localhost
0.000 ( 0.012 ms): ssh/51346 connect(fd: 4, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused)
0.118 ( 0.004 ms): ssh/51346 connect(fd: 6, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused)
0.399 ( 0.007 ms): ssh/51346 connect(fd: 4, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused)
0.426 ( 0.003 ms): ssh/51346 connect(fd: 4, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused)
0.754 ( 0.009 ms): ssh/51346 connect(fd: 4, uservaddr: { .family: INET, port: 22, addr: 127.0.0.1 }, addrlen: 16) = 0
0.771 ( 0.010 ms): ssh/51346 connect(fd: 4, uservaddr: { .family: INET6, port: 22, addr: ::1 }, addrlen: 28) = 0
0.798 ( 0.053 ms): ssh/51346 connect(fd: 4, uservaddr: { .family: INET6, port: 22, addr: ::1 }, addrlen: 28) = 0
0.870 ( 0.004 ms): ssh/51346 connect(fd: 5, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused)
0.904 ( 0.003 ms): ssh/51346 connect(fd: 5, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused)
0.930 ( 0.003 ms): ssh/51346 connect(fd: 5, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused)
0.957 ( 0.003 ms): ssh/51346 connect(fd: 5, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused)
0.981 ( 0.003 ms): ssh/51346 connect(fd: 5, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused)
1.006 ( 0.004 ms): ssh/51346 connect(fd: 5, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused)
1.036 ( 0.005 ms): ssh/51346 connect(fd: 5, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused)
65.077 ( 0.022 ms): ssh/51346 connect(fd: 5, uservaddr: { .family: LOCAL, path: /var/run/.heim_org.h5l.kcm-socket }, addrlen: 110) = 0
66.608 ( 0.014 ms): ssh/51346 connect(fd: 5, uservaddr: { .family: LOCAL, path: /var/run/.heim_org.h5l.kcm-socket }, addrlen: 110) = 0
root@localhost's password:
#

# perf trace -e sendto* ping -c 2 localhost
PING localhost(localhost (::1)) 56 data bytes
64 bytes from localhost (::1): icmp_seq=1 ttl=64 time=0.024 ms
0.000 ( 0.011 ms): ping/51357 sendto(fd: 5, buff: 0x7ffcca35e620, len: 20, addr: { .family: NETLINK }, addr_len: 0xc) = 20
0.135 ( 0.026 ms): ping/51357 sendto(fd: 4, buff: 0x5601398f7b20, len: 64, addr: { .family: INET6, port: 58, addr: ::1 }, addr_len: 0x1c) = 64
1014.929 ( 0.050 ms): ping/51357 sendto(fd: 4, buff: 0x5601398f7b20, len: 64, flags: CONFIRM, addr: { .family: INET6, port: 58, addr: ::1 }, addr_len: 0x1c) = 64
64 bytes from localhost (::1): icmp_seq=2 ttl=64 time=0.046 ms

--- localhost ping statistics ---
2 packets transmitted, 2 received, 0% packet loss, time 1015ms
rtt min/avg/max/mdev = 0.024/0.035/0.046/0.011 ms
#

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Carsten Haitzler <carsten.haitzler@arm.com>
Cc: Eduard Zingerman <eddyz87@gmail.com>
Cc: Fangrui Song <maskray@google.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
Cc: Tom Rix <trix@redhat.com>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Wang ShaoBo <bobo.shaobowang@huawei.com>
Cc: Yang Jihong <yangjihong1@huawei.com>
Cc: Yonghong Song <yhs@fb.com>
Cc: YueHaibing <yuehaibing@huawei.com>
Cc: bpf@vger.kernel.org
Cc: llvm@lists.linux.dev
Link: https://lore.kernel.org/r/20230810184853.2860737-3-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Ian Rogers and committed by

Arnaldo Carvalho de Melo 2 years ago 5e6da6be 3d6dfae8

+110 -77

3 changed files

expand all

tools

perf

Makefile.perf

builtin-trace.c

util

bpf_skel

augmented_raw_syscalls.bpf.c

tools/perf/Makefile.perf

··· 1038 1038 SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h 1039 1039 SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h $(SKEL_OUT)/sample_filter.skel.h 1040 1040 SKELETONS += $(SKEL_OUT)/bench_uprobe.skel.h 1041 + SKELETONS += $(SKEL_OUT)/augmented_raw_syscalls.skel.h 1041 1042 1042 1043 $(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT) $(LIBSYMBOL_OUTPUT): 1043 1044 $(Q)$(MKDIR) -p $@

+95 -64

tools/perf/builtin-trace.c

··· 19 19 #ifdef HAVE_LIBBPF_SUPPORT 20 20 #include <bpf/bpf.h> 21 21 #include <bpf/libbpf.h> 22 + #ifdef HAVE_BPF_SKEL 23 + #include "bpf_skel/augmented_raw_syscalls.skel.h" 24 + #endif 22 25 #endif 23 26 #include "util/bpf_map.h" 24 27 #include "util/rlimit.h" ··· 130 127 struct syscalltbl *sctbl; 131 128 struct { 132 129 struct syscall *table; 133 - struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY 134 - struct bpf_map *sys_enter, 135 - *sys_exit; 136 - } prog_array; 137 130 struct { 138 131 struct evsel *sys_enter, 139 - *sys_exit, 140 - *augmented; 132 + *sys_exit, 133 + *bpf_output; 141 134 } events; 142 - struct bpf_program *unaugmented_prog; 143 135 } syscalls; 144 - struct { 145 - struct bpf_map *map; 146 - } dump; 136 + #ifdef HAVE_BPF_SKEL 137 + struct augmented_raw_syscalls_bpf *skel; 138 + #endif 147 139 struct record_opts opts; 148 140 struct evlist *evlist; 149 141 struct machine *host; 150 142 struct thread *current; 151 - struct bpf_object *bpf_obj; 152 143 struct cgroup *cgroup; 153 144 u64 base_time; 154 145 FILE *output; ··· 412 415 if (evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") && 413 416 evsel__init_tp_uint_field(evsel, &sc->id, "nr")) 414 417 return -ENOENT; 418 + 415 419 return 0; 416 420 } 417 421 ··· 2843 2845 if (thread) 2844 2846 trace__fprintf_comm_tid(trace, thread, trace->output); 2845 2847 2846 - if (evsel == trace->syscalls.events.augmented) { 2848 + if (evsel == trace->syscalls.events.bpf_output) { 2847 2849 int id = perf_evsel__sc_tp_uint(evsel, id, sample); 2848 2850 struct syscall *sc = trace__syscall_info(trace, evsel, id); 2849 2851 ··· 3276 3278 goto out; 3277 3279 } 3278 3280 3279 - #ifdef HAVE_LIBBPF_SUPPORT 3280 - static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name) 3281 - { 3282 - if (trace->bpf_obj == NULL) 3283 - return NULL; 3284 - 3285 - return bpf_object__find_map_by_name(trace->bpf_obj, name); 3286 - } 3287 - 3281 + #ifdef HAVE_BPF_SKEL 3288 3282 static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name) 3289 3283 { 3290 3284 struct bpf_program *pos, *prog = NULL; 3291 3285 const char *sec_name; 3292 3286 3293 - if (trace->bpf_obj == NULL) 3287 + if (trace->skel->obj == NULL) 3294 3288 return NULL; 3295 3289 3296 - bpf_object__for_each_program(pos, trace->bpf_obj) { 3290 + bpf_object__for_each_program(pos, trace->skel->obj) { 3297 3291 sec_name = bpf_program__section_name(pos); 3298 3292 if (sec_name && !strcmp(sec_name, name)) { 3299 3293 prog = pos; ··· 3303 3313 3304 3314 if (prog_name == NULL) { 3305 3315 char default_prog_name[256]; 3306 - scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->name); 3316 + scnprintf(default_prog_name, sizeof(default_prog_name), "tp/syscalls/sys_%s_%s", type, sc->name); 3307 3317 prog = trace__find_bpf_program_by_title(trace, default_prog_name); 3308 3318 if (prog != NULL) 3309 3319 goto out_found; 3310 3320 if (sc->fmt && sc->fmt->alias) { 3311 - scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->fmt->alias); 3321 + scnprintf(default_prog_name, sizeof(default_prog_name), "tp/syscalls/sys_%s_%s", type, sc->fmt->alias); 3312 3322 prog = trace__find_bpf_program_by_title(trace, default_prog_name); 3313 3323 if (prog != NULL) 3314 3324 goto out_found; ··· 3326 3336 pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n", 3327 3337 prog_name, type, sc->name); 3328 3338 out_unaugmented: 3329 - return trace->syscalls.unaugmented_prog; 3339 + return trace->skel->progs.syscall_unaugmented; 3330 3340 } 3331 3341 3332 3342 static void trace__init_syscall_bpf_progs(struct trace *trace, int id) ··· 3343 3353 static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id) 3344 3354 { 3345 3355 struct syscall *sc = trace__syscall_info(trace, NULL, id); 3346 - return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->syscalls.unaugmented_prog); 3356 + return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->skel->progs.syscall_unaugmented); 3347 3357 } 3348 3358 3349 3359 static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id) 3350 3360 { 3351 3361 struct syscall *sc = trace__syscall_info(trace, NULL, id); 3352 - return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog); 3362 + return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->skel->progs.syscall_unaugmented); 3353 3363 } 3354 3364 3355 3365 static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc) ··· 3374 3384 bool is_candidate = false; 3375 3385 3376 3386 if (pair == NULL || pair == sc || 3377 - pair->bpf_prog.sys_enter == trace->syscalls.unaugmented_prog) 3387 + pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented) 3378 3388 continue; 3379 3389 3380 3390 for (field = sc->args, candidate_field = pair->args; ··· 3427 3437 */ 3428 3438 if (pair_prog == NULL) { 3429 3439 pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter"); 3430 - if (pair_prog == trace->syscalls.unaugmented_prog) 3440 + if (pair_prog == trace->skel->progs.syscall_unaugmented) 3431 3441 goto next_candidate; 3432 3442 } 3433 3443 ··· 3442 3452 3443 3453 static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) 3444 3454 { 3445 - int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter), 3446 - map_exit_fd = bpf_map__fd(trace->syscalls.prog_array.sys_exit); 3455 + int map_enter_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_enter); 3456 + int map_exit_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_exit); 3447 3457 int err = 0, key; 3448 3458 3449 3459 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) { ··· 3505 3515 * For now we're just reusing the sys_enter prog, and if it 3506 3516 * already has an augmenter, we don't need to find one. 3507 3517 */ 3508 - if (sc->bpf_prog.sys_enter != trace->syscalls.unaugmented_prog) 3518 + if (sc->bpf_prog.sys_enter != trace->skel->progs.syscall_unaugmented) 3509 3519 continue; 3510 3520 3511 3521 /* ··· 3528 3538 break; 3529 3539 } 3530 3540 3531 - 3532 3541 return err; 3533 3542 } 3534 - 3535 - #else // HAVE_LIBBPF_SUPPORT 3536 - static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace __maybe_unused, 3537 - const char *name __maybe_unused) 3538 - { 3539 - return NULL; 3540 - } 3541 - 3542 - static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused) 3543 - { 3544 - return 0; 3545 - } 3546 - #endif // HAVE_LIBBPF_SUPPORT 3543 + #endif // HAVE_BPF_SKEL 3547 3544 3548 3545 static int trace__set_ev_qualifier_filter(struct trace *trace) 3549 3546 { ··· 3894 3917 err = evlist__open(evlist); 3895 3918 if (err < 0) 3896 3919 goto out_error_open; 3920 + #ifdef HAVE_BPF_SKEL 3921 + { 3922 + struct perf_cpu cpu; 3897 3923 3924 + /* 3925 + * Set up the __augmented_syscalls__ BPF map to hold for each 3926 + * CPU the bpf-output event's file descriptor. 3927 + */ 3928 + perf_cpu_map__for_each_cpu(cpu, i, trace->syscalls.events.bpf_output->core.cpus) { 3929 + bpf_map__update_elem(trace->skel->maps.__augmented_syscalls__, 3930 + &cpu.cpu, sizeof(int), 3931 + xyarray__entry(trace->syscalls.events.bpf_output->core.fd, 3932 + cpu.cpu, 0), 3933 + sizeof(__u32), BPF_ANY); 3934 + } 3935 + } 3936 + #endif 3898 3937 err = trace__set_filter_pids(trace); 3899 3938 if (err < 0) 3900 3939 goto out_error_mem; 3901 3940 3902 - if (trace->syscalls.prog_array.sys_enter) 3941 + #ifdef HAVE_BPF_SKEL 3942 + if (trace->skel->progs.sys_enter) 3903 3943 trace__init_syscalls_bpf_prog_array_maps(trace); 3944 + #endif 3904 3945 3905 3946 if (trace->ev_qualifier_ids.nr > 0) { 3906 3947 err = trace__set_ev_qualifier_filter(trace); ··· 3950 3955 err = evlist__apply_filters(evlist, &evsel); 3951 3956 if (err < 0) 3952 3957 goto out_error_apply_filters; 3953 - 3954 - if (trace->dump.map) 3955 - bpf_map__fprintf(trace->dump.map, trace->output); 3956 3958 3957 3959 err = evlist__mmap(evlist, trace->opts.mmap_pages); 3958 3960 if (err < 0) ··· 4647 4655 zfree(&trace->perfconfig_events); 4648 4656 } 4649 4657 4658 + #ifdef HAVE_BPF_SKEL 4659 + static int bpf__setup_bpf_output(struct evlist *evlist) 4660 + { 4661 + int err = parse_event(evlist, "bpf-output/no-inherit=1,name=__augmented_syscalls__/"); 4662 + 4663 + if (err) 4664 + pr_debug("ERROR: failed to create the \"__augmented_syscalls__\" bpf-output event\n"); 4665 + 4666 + return err; 4667 + } 4668 + #endif 4669 + 4650 4670 int cmd_trace(int argc, const char **argv) 4651 4671 { 4652 4672 const char *trace_usage[] = { ··· 4690 4686 .max_stack = UINT_MAX, 4691 4687 .max_events = ULONG_MAX, 4692 4688 }; 4693 - const char *map_dump_str = NULL; 4694 4689 const char *output_name = NULL; 4695 4690 const struct option trace_options[] = { 4696 4691 OPT_CALLBACK('e', "event", &trace, "event", ··· 4723 4720 OPT_CALLBACK(0, "duration", &trace, "float", 4724 4721 "show only events with duration > N.M ms", 4725 4722 trace__set_duration), 4726 - #ifdef HAVE_LIBBPF_SUPPORT 4727 - OPT_STRING(0, "map-dump", &map_dump_str, "BPF map", "BPF map to periodically dump"), 4728 - #endif 4729 4723 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), 4730 4724 OPT_INCR('v', "verbose", &verbose, "be more verbose"), 4731 4725 OPT_BOOLEAN('T', "time", &trace.full_time, ··· 4849 4849 "cgroup monitoring only available in system-wide mode"); 4850 4850 } 4851 4851 4852 - err = -1; 4852 + #ifdef HAVE_BPF_SKEL 4853 + trace.skel = augmented_raw_syscalls_bpf__open(); 4854 + if (!trace.skel) { 4855 + pr_debug("Failed to open augmented syscalls BPF skeleton"); 4856 + } else { 4857 + /* 4858 + * Disable attaching the BPF programs except for sys_enter and 4859 + * sys_exit that tail call into this as necessary. 4860 + */ 4861 + struct bpf_program *prog; 4853 4862 4854 - if (map_dump_str) { 4855 - trace.dump.map = trace__find_bpf_map_by_name(&trace, map_dump_str); 4856 - if (trace.dump.map == NULL) { 4857 - pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str); 4858 - goto out; 4863 + bpf_object__for_each_program(prog, trace.skel->obj) { 4864 + if (prog != trace.skel->progs.sys_enter && prog != trace.skel->progs.sys_exit) 4865 + bpf_program__set_autoattach(prog, /*autoattach=*/false); 4866 + } 4867 + 4868 + err = augmented_raw_syscalls_bpf__load(trace.skel); 4869 + 4870 + if (err < 0) { 4871 + libbpf_strerror(err, bf, sizeof(bf)); 4872 + pr_debug("Failed to load augmented syscalls BPF skeleton: %s\n", bf); 4873 + } else { 4874 + augmented_raw_syscalls_bpf__attach(trace.skel); 4875 + trace__add_syscall_newtp(&trace); 4859 4876 } 4860 4877 } 4878 + 4879 + err = bpf__setup_bpf_output(trace.evlist); 4880 + if (err) { 4881 + libbpf_strerror(err, bf, sizeof(bf)); 4882 + pr_err("ERROR: Setup BPF output event failed: %s\n", bf); 4883 + goto out; 4884 + } 4885 + trace.syscalls.events.bpf_output = evlist__last(trace.evlist); 4886 + assert(!strcmp(evsel__name(trace.syscalls.events.bpf_output), "__augmented_syscalls__")); 4887 + #endif 4888 + err = -1; 4861 4889 4862 4890 if (trace.trace_pgfaults) { 4863 4891 trace.opts.sample_address = true; ··· 4937 4909 * buffers that are being copied from kernel to userspace, think 'read' 4938 4910 * syscall. 4939 4911 */ 4940 - if (trace.syscalls.events.augmented) { 4912 + if (trace.syscalls.events.bpf_output) { 4941 4913 evlist__for_each_entry(trace.evlist, evsel) { 4942 4914 bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0; 4943 4915 ··· 4946 4918 goto init_augmented_syscall_tp; 4947 4919 } 4948 4920 4949 - if (trace.syscalls.events.augmented->priv == NULL && 4921 + if (trace.syscalls.events.bpf_output->priv == NULL && 4950 4922 strstr(evsel__name(evsel), "syscalls:sys_enter")) { 4951 - struct evsel *augmented = trace.syscalls.events.augmented; 4923 + struct evsel *augmented = trace.syscalls.events.bpf_output; 4952 4924 if (evsel__init_augmented_syscall_tp(augmented, evsel) || 4953 4925 evsel__init_augmented_syscall_tp_args(augmented)) 4954 4926 goto out; ··· 5053 5025 fclose(trace.output); 5054 5026 out: 5055 5027 trace__exit(&trace); 5028 + #ifdef HAVE_BPF_SKEL 5029 + augmented_raw_syscalls_bpf__destroy(trace.skel); 5030 + #endif 5056 5031 return err; 5057 5032 }

+14 -13

tools/perf/examples/bpf/augmented_raw_syscalls.c tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c

··· 18 18 #include <bpf/bpf_helpers.h> 19 19 #include <linux/limits.h> 20 20 21 + #define MAX_CPUS 4096 22 + 21 23 // FIXME: These should come from system headers 22 24 typedef char bool; 23 25 typedef int pid_t; ··· 36 34 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 37 35 __type(key, int); 38 36 __type(value, __u32); 39 - __uint(max_entries, __NR_CPUS__); 37 + __uint(max_entries, MAX_CPUS); 40 38 } __augmented_syscalls__ SEC(".maps"); 41 39 42 40 /* ··· 172 170 return augmented_len; 173 171 } 174 172 175 - SEC("!raw_syscalls:unaugmented") 173 + SEC("tp/raw_syscalls/sys_enter") 176 174 int syscall_unaugmented(struct syscall_enter_args *args) 177 175 { 178 176 return 1; ··· 184 182 * on from there, reading the first syscall arg as a string, i.e. open's 185 183 * filename. 186 184 */ 187 - SEC("!syscalls:sys_enter_connect") 185 + SEC("tp/syscalls/sys_enter_connect") 188 186 int sys_enter_connect(struct syscall_enter_args *args) 189 187 { 190 188 struct augmented_args_payload *augmented_args = augmented_args_payload(); ··· 203 201 return augmented__output(args, augmented_args, len + socklen); 204 202 } 205 203 206 - SEC("!syscalls:sys_enter_sendto") 204 + SEC("tp/syscalls/sys_enter_sendto") 207 205 int sys_enter_sendto(struct syscall_enter_args *args) 208 206 { 209 207 struct augmented_args_payload *augmented_args = augmented_args_payload(); ··· 222 220 return augmented__output(args, augmented_args, len + socklen); 223 221 } 224 222 225 - SEC("!syscalls:sys_enter_open") 223 + SEC("tp/syscalls/sys_enter_open") 226 224 int sys_enter_open(struct syscall_enter_args *args) 227 225 { 228 226 struct augmented_args_payload *augmented_args = augmented_args_payload(); ··· 237 235 return augmented__output(args, augmented_args, len); 238 236 } 239 237 240 - SEC("!syscalls:sys_enter_openat") 238 + SEC("tp/syscalls/sys_enter_openat") 241 239 int sys_enter_openat(struct syscall_enter_args *args) 242 240 { 243 241 struct augmented_args_payload *augmented_args = augmented_args_payload(); ··· 252 250 return augmented__output(args, augmented_args, len); 253 251 } 254 252 255 - SEC("!syscalls:sys_enter_rename") 253 + SEC("tp/syscalls/sys_enter_rename") 256 254 int sys_enter_rename(struct syscall_enter_args *args) 257 255 { 258 256 struct augmented_args_payload *augmented_args = augmented_args_payload(); ··· 269 267 return augmented__output(args, augmented_args, len); 270 268 } 271 269 272 - SEC("!syscalls:sys_enter_renameat") 270 + SEC("tp/syscalls/sys_enter_renameat") 273 271 int sys_enter_renameat(struct syscall_enter_args *args) 274 272 { 275 273 struct augmented_args_payload *augmented_args = augmented_args_payload(); ··· 297 295 __u32 size; 298 296 }; 299 297 300 - SEC("!syscalls:sys_enter_perf_event_open") 298 + SEC("tp/syscalls/sys_enter_perf_event_open") 301 299 int sys_enter_perf_event_open(struct syscall_enter_args *args) 302 300 { 303 301 struct augmented_args_payload *augmented_args = augmented_args_payload(); ··· 329 327 return 1; /* Failure: don't filter */ 330 328 } 331 329 332 - SEC("!syscalls:sys_enter_clock_nanosleep") 330 + SEC("tp/syscalls/sys_enter_clock_nanosleep") 333 331 int sys_enter_clock_nanosleep(struct syscall_enter_args *args) 334 332 { 335 333 struct augmented_args_payload *augmented_args = augmented_args_payload(); ··· 360 358 return bpf_map_lookup_elem(pids, &pid) != NULL; 361 359 } 362 360 363 - SEC("raw_syscalls:sys_enter") 361 + SEC("tp/raw_syscalls/sys_enter") 364 362 int sys_enter(struct syscall_enter_args *args) 365 363 { 366 364 struct augmented_args_payload *augmented_args; ··· 373 371 * We'll add to this as we add augmented syscalls right after that 374 372 * initial, non-augmented raw_syscalls:sys_enter payload. 375 373 */ 376 - unsigned int len = sizeof(augmented_args->args); 377 374 378 375 if (pid_filter__has(&pids_filtered, getpid())) 379 376 return 0; ··· 394 393 return 0; 395 394 } 396 395 397 - SEC("raw_syscalls:sys_exit") 396 + SEC("tp/raw_syscalls/sys_exit") 398 397 int sys_exit(struct syscall_exit_args *args) 399 398 { 400 399 struct syscall_exit_args exit_args;

Configure Feed

Configure Feed