Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

tracing: Add "fields" option to show raw trace event fields

The hex, raw and bin formats come from the old PREEMPT_RT patch set
latency tracer. That actually gave real alternatives to reading the ascii
buffer. But they have started to bit rot and they do not give a good
representation of the tracing data.

Add "fields" option that will read the trace event fields and parse the
data from how the fields are defined:

With "fields" = 0 (default)

echo 1 > events/sched/sched_switch/enable
cat trace
<idle>-0 [003] d..2. 540.078653: sched_switch: prev_comm=swapper/3 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=kworker/3:1 next_pid=83 next_prio=120
kworker/3:1-83 [003] d..2. 540.078860: sched_switch: prev_comm=kworker/3:1 prev_pid=83 prev_prio=120 prev_state=I ==> next_comm=swapper/3 next_pid=0 next_prio=120
<idle>-0 [003] d..2. 540.206423: sched_switch: prev_comm=swapper/3 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=sshd next_pid=807 next_prio=120
sshd-807 [003] d..2. 540.206531: sched_switch: prev_comm=sshd prev_pid=807 prev_prio=120 prev_state=S ==> next_comm=swapper/3 next_pid=0 next_prio=120
<idle>-0 [001] d..2. 540.206597: sched_switch: prev_comm=swapper/1 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=kworker/u16:4 next_pid=58 next_prio=120
kworker/u16:4-58 [001] d..2. 540.206617: sched_switch: prev_comm=kworker/u16:4 prev_pid=58 prev_prio=120 prev_state=I ==> next_comm=bash next_pid=830 next_prio=120
bash-830 [001] d..2. 540.206678: sched_switch: prev_comm=bash prev_pid=830 prev_prio=120 prev_state=R ==> next_comm=kworker/u16:4 next_pid=58 next_prio=120
kworker/u16:4-58 [001] d..2. 540.206696: sched_switch: prev_comm=kworker/u16:4 prev_pid=58 prev_prio=120 prev_state=I ==> next_comm=bash next_pid=830 next_prio=120
bash-830 [001] d..2. 540.206713: sched_switch: prev_comm=bash prev_pid=830 prev_prio=120 prev_state=R ==> next_comm=kworker/u16:4 next_pid=58 next_prio=120

echo 1 > options/fields
<...>-998 [002] d..2. 538.643732: sched_switch: next_prio=0x78 (120) next_pid=0x0 (0) next_comm=swapper/2 prev_state=0x20 (32) prev_prio=0x78 (120) prev_pid=0x3e6 (998) prev_comm=trace-cmd
<idle>-0 [001] d..2. 538.643806: sched_switch: next_prio=0x78 (120) next_pid=0x33e (830) next_comm=bash prev_state=0x0 (0) prev_prio=0x78 (120) prev_pid=0x0 (0) prev_comm=swapper/1
bash-830 [001] d..2. 538.644106: sched_switch: next_prio=0x78 (120) next_pid=0x3a (58) next_comm=kworker/u16:4 prev_state=0x0 (0) prev_prio=0x78 (120) prev_pid=0x33e (830) prev_comm=bash
kworker/u16:4-58 [001] d..2. 538.644130: sched_switch: next_prio=0x78 (120) next_pid=0x33e (830) next_comm=bash prev_state=0x80 (128) prev_prio=0x78 (120) prev_pid=0x3a (58) prev_comm=kworker/u16:4
bash-830 [001] d..2. 538.644180: sched_switch: next_prio=0x78 (120) next_pid=0x3a (58) next_comm=kworker/u16:4 prev_state=0x0 (0) prev_prio=0x78 (120) prev_pid=0x33e (830) prev_comm=bash
kworker/u16:4-58 [001] d..2. 538.644185: sched_switch: next_prio=0x78 (120) next_pid=0x33e (830) next_comm=bash prev_state=0x80 (128) prev_prio=0x78 (120) prev_pid=0x3a (58) prev_comm=kworker/u16:4
bash-830 [001] d..2. 538.644204: sched_switch: next_prio=0x78 (120) next_pid=0x0 (0) next_comm=swapper/1 prev_state=0x1 (1) prev_prio=0x78 (120) prev_pid=0x33e (830) prev_comm=bash
<idle>-0 [003] d..2. 538.644211: sched_switch: next_prio=0x78 (120) next_pid=0x327 (807) next_comm=sshd prev_state=0x0 (0) prev_prio=0x78 (120) prev_pid=0x0 (0) prev_comm=swapper/3
sshd-807 [003] d..2. 538.644340: sched_switch: next_prio=0x78 (120) next_pid=0x0 (0) next_comm=swapper/3 prev_state=0x1 (1) prev_prio=0x78 (120) prev_pid=0x327 (807) prev_comm=sshd

It traces the data safely without using the trace print formatting.

Link: https://lore.kernel.org/linux-trace-kernel/20230328145156.497651be@gandalf.local.home

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Beau Belgrave <beaub@linux.microsoft.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

+183 -2
+6
Documentation/trace/ftrace.rst
··· 1027 1027 nohex 1028 1028 nobin 1029 1029 noblock 1030 + nofields 1030 1031 trace_printk 1031 1032 annotate 1032 1033 nouserstacktrace ··· 1110 1109 1111 1110 block 1112 1111 When set, reading trace_pipe will not block when polled. 1112 + 1113 + fields 1114 + Print the fields as described by their types. This is a better 1115 + option than using hex, bin or raw, as it gives a better parsing 1116 + of the content of the event. 1113 1117 1114 1118 trace_printk 1115 1119 Can disable trace_printk() from writing into the buffer.
+5 -2
kernel/trace/trace.c
··· 3726 3726 #define STATIC_FMT_BUF_SIZE 128 3727 3727 static char static_fmt_buf[STATIC_FMT_BUF_SIZE]; 3728 3728 3729 - static char *trace_iter_expand_format(struct trace_iterator *iter) 3729 + char *trace_iter_expand_format(struct trace_iterator *iter) 3730 3730 { 3731 3731 char *tmp; 3732 3732 ··· 4446 4446 if (trace_seq_has_overflowed(s)) 4447 4447 return TRACE_TYPE_PARTIAL_LINE; 4448 4448 4449 - if (event) 4449 + if (event) { 4450 + if (tr->trace_flags & TRACE_ITER_FIELDS) 4451 + return print_event_fields(iter, event); 4450 4452 return event->funcs->trace(iter, sym_flags, event); 4453 + } 4451 4454 4452 4455 trace_seq_printf(s, "Unknown type %d\n", entry->type); 4453 4456
+2
kernel/trace/trace.h
··· 619 619 const char *trace_event_format(struct trace_iterator *iter, const char *fmt); 620 620 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, 621 621 va_list ap) __printf(2, 0); 622 + char *trace_iter_expand_format(struct trace_iterator *iter); 622 623 623 624 int trace_empty(struct trace_iterator *iter); 624 625 ··· 1200 1199 C(HEX, "hex"), \ 1201 1200 C(BIN, "bin"), \ 1202 1201 C(BLOCK, "block"), \ 1202 + C(FIELDS, "fields"), \ 1203 1203 C(PRINTK, "trace_printk"), \ 1204 1204 C(ANNOTATE, "annotate"), \ 1205 1205 C(USERSTACKTRACE, "userstacktrace"), \
+168
kernel/trace/trace_output.c
··· 808 808 * Standard events 809 809 */ 810 810 811 + static void print_array(struct trace_iterator *iter, void *pos, 812 + struct ftrace_event_field *field) 813 + { 814 + int offset; 815 + int len; 816 + int i; 817 + 818 + offset = *(int *)pos & 0xffff; 819 + len = *(int *)pos >> 16; 820 + 821 + if (field) 822 + offset += field->offset; 823 + 824 + if (offset + len >= iter->ent_size) { 825 + trace_seq_puts(&iter->seq, "<OVERFLOW>"); 826 + return; 827 + } 828 + 829 + for (i = 0; i < len; i++, pos++) { 830 + if (i) 831 + trace_seq_putc(&iter->seq, ','); 832 + trace_seq_printf(&iter->seq, "%02x", *(unsigned char *)pos); 833 + } 834 + } 835 + 836 + static void print_fields(struct trace_iterator *iter, struct trace_event_call *call, 837 + struct list_head *head) 838 + { 839 + struct ftrace_event_field *field; 840 + int offset; 841 + int len; 842 + int ret; 843 + void *pos; 844 + 845 + list_for_each_entry(field, head, link) { 846 + trace_seq_printf(&iter->seq, " %s=", field->name); 847 + if (field->offset + field->size > iter->ent_size) { 848 + trace_seq_puts(&iter->seq, "<OVERFLOW>"); 849 + continue; 850 + } 851 + pos = (void *)iter->ent + field->offset; 852 + 853 + switch (field->filter_type) { 854 + case FILTER_COMM: 855 + case FILTER_STATIC_STRING: 856 + trace_seq_printf(&iter->seq, "%.*s", field->size, (char *)pos); 857 + break; 858 + case FILTER_RDYN_STRING: 859 + case FILTER_DYN_STRING: 860 + offset = *(int *)pos & 0xffff; 861 + len = *(int *)pos >> 16; 862 + 863 + if (field->filter_type == FILTER_RDYN_STRING) 864 + offset += field->offset; 865 + 866 + if (offset + len >= iter->ent_size) { 867 + trace_seq_puts(&iter->seq, "<OVERFLOW>"); 868 + break; 869 + } 870 + pos = (void *)iter->ent + offset; 871 + trace_seq_printf(&iter->seq, "%.*s", len, (char *)pos); 872 + break; 873 + case FILTER_PTR_STRING: 874 + if (!iter->fmt_size) 875 + trace_iter_expand_format(iter); 876 + pos = *(void **)pos; 877 + ret = strncpy_from_kernel_nofault(iter->fmt, pos, 878 + iter->fmt_size); 879 + if (ret < 0) 880 + trace_seq_printf(&iter->seq, "(0x%px)", pos); 881 + else 882 + trace_seq_printf(&iter->seq, "(0x%px:%s)", 883 + pos, iter->fmt); 884 + break; 885 + case FILTER_TRACE_FN: 886 + pos = *(void **)pos; 887 + trace_seq_printf(&iter->seq, "%pS", pos); 888 + break; 889 + case FILTER_CPU: 890 + case FILTER_OTHER: 891 + switch (field->size) { 892 + case 1: 893 + if (isprint(*(char *)pos)) { 894 + trace_seq_printf(&iter->seq, "'%c'", 895 + *(unsigned char *)pos); 896 + } 897 + trace_seq_printf(&iter->seq, "(%d)", 898 + *(unsigned char *)pos); 899 + break; 900 + case 2: 901 + trace_seq_printf(&iter->seq, "0x%x (%d)", 902 + *(unsigned short *)pos, 903 + *(unsigned short *)pos); 904 + break; 905 + case 4: 906 + /* dynamic array info is 4 bytes */ 907 + if (strstr(field->type, "__data_loc")) { 908 + print_array(iter, pos, NULL); 909 + break; 910 + } 911 + 912 + if (strstr(field->type, "__rel_loc")) { 913 + print_array(iter, pos, field); 914 + break; 915 + } 916 + 917 + trace_seq_printf(&iter->seq, "0x%x (%d)", 918 + *(unsigned int *)pos, 919 + *(unsigned int *)pos); 920 + break; 921 + case 8: 922 + trace_seq_printf(&iter->seq, "0x%llx (%lld)", 923 + *(unsigned long long *)pos, 924 + *(unsigned long long *)pos); 925 + break; 926 + default: 927 + trace_seq_puts(&iter->seq, "<INVALID-SIZE>"); 928 + break; 929 + } 930 + break; 931 + default: 932 + trace_seq_puts(&iter->seq, "<INVALID-TYPE>"); 933 + } 934 + } 935 + trace_seq_putc(&iter->seq, '\n'); 936 + } 937 + 938 + enum print_line_t print_event_fields(struct trace_iterator *iter, 939 + struct trace_event *event) 940 + { 941 + struct trace_event_call *call; 942 + struct list_head *head; 943 + 944 + /* ftrace defined events have separate call structures */ 945 + if (event->type <= __TRACE_LAST_TYPE) { 946 + bool found = false; 947 + 948 + down_read(&trace_event_sem); 949 + list_for_each_entry(call, &ftrace_events, list) { 950 + if (call->event.type == event->type) { 951 + found = true; 952 + break; 953 + } 954 + /* No need to search all events */ 955 + if (call->event.type > __TRACE_LAST_TYPE) 956 + break; 957 + } 958 + up_read(&trace_event_sem); 959 + if (!found) { 960 + trace_seq_printf(&iter->seq, "UNKNOWN TYPE %d\n", event->type); 961 + goto out; 962 + } 963 + } else { 964 + call = container_of(event, struct trace_event_call, event); 965 + } 966 + head = trace_get_fields(call); 967 + 968 + trace_seq_printf(&iter->seq, "%s:", trace_event_name(call)); 969 + 970 + if (head && !list_empty(head)) 971 + print_fields(iter, call, head); 972 + else 973 + trace_seq_puts(&iter->seq, "No fields found\n"); 974 + 975 + out: 976 + return trace_handle_return(&iter->seq); 977 + } 978 + 811 979 enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags, 812 980 struct trace_event *event) 813 981 {
+2
kernel/trace/trace_output.h
··· 19 19 extern void trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset); 20 20 extern int trace_print_context(struct trace_iterator *iter); 21 21 extern int trace_print_lat_context(struct trace_iterator *iter); 22 + extern enum print_line_t print_event_fields(struct trace_iterator *iter, 23 + struct trace_event *event); 22 24 23 25 extern void trace_event_read_lock(void); 24 26 extern void trace_event_read_unlock(void);