Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf trace: Skip internal syscall arguments

Recent changes in the linux-next kernel will add new field for syscalls
to have contents in the userspace like below.

# cat /sys/kernel/tracing/events/syscalls/sys_enter_write/format
name: sys_enter_write
ID: 758
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;

field:int __syscall_nr; offset:8; size:4; signed:1;
field:unsigned int fd; offset:16; size:8; signed:0;
field:const char * buf; offset:24; size:8; signed:0;
field:size_t count; offset:32; size:8; signed:0;
field:__data_loc char[] __buf_val; offset:40; size:4; signed:0;

print fmt: "fd: 0x%08lx, buf: 0x%08lx (%s), count: 0x%08lx", ((unsigned long)(REC->fd)),
((unsigned long)(REC->buf)), __print_dynamic_array(__buf_val, 1),
((unsigned long)(REC->count))

We have a different way to handle those arguments and this change
confuses perf trace then make some tests failing. Fix it by skipping
the new fields that have "__data_loc char[]" type.

Maybe we can switch to this instead of the BPF augmentation later.

Reviewed-by: Howard Chu <howardchu95@gmail.com>
Tested-by: Thomas Richter <tmricht@linux.ibm.com>
Tested-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Howard Chu <howardchu95@gmail.com>
Reported-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>

+21
+21
tools/perf/builtin-trace.c
··· 2069 2069 return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name); 2070 2070 } 2071 2071 2072 + /* 2073 + * v6.19 kernel added new fields to read userspace memory for event tracing. 2074 + * But it's not used by perf and confuses the syscall parameters. 2075 + */ 2076 + static bool is_internal_field(struct tep_format_field *field) 2077 + { 2078 + return !strcmp(field->type, "__data_loc char[]"); 2079 + } 2080 + 2072 2081 static struct tep_format_field * 2073 2082 syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field, 2074 2083 bool *use_btf) ··· 2086 2077 int len; 2087 2078 2088 2079 for (; field; field = field->next, ++arg) { 2080 + /* assume it's the last argument */ 2081 + if (is_internal_field(field)) 2082 + continue; 2083 + 2089 2084 last_field = field; 2090 2085 2091 2086 if (arg->scnprintf) ··· 2158 2145 { 2159 2146 char tp_name[128]; 2160 2147 const char *name; 2148 + struct tep_format_field *field; 2161 2149 int err; 2162 2150 2163 2151 if (sc->nonexistent) ··· 2213 2199 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) { 2214 2200 sc->args = sc->args->next; 2215 2201 --sc->nr_args; 2202 + } 2203 + 2204 + field = sc->args; 2205 + while (field) { 2206 + if (is_internal_field(field)) 2207 + --sc->nr_args; 2208 + field = field->next; 2216 2209 } 2217 2210 2218 2211 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");