Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf ftrace: Fix latency stats with BPF

When BPF collects the stats for the latency in usec, it first divides
the time by 1000. But that means it would have 0 if the delta is small
and won't update the total time properly.

Let's keep the stats in nsec always and adjust to usec before printing.

Before:

$ sudo ./perf ftrace latency -ab -T mutex_lock --hide-empty -- sleep 0.1
# DURATION | COUNT | GRAPH |
0 - 1 us | 765 | ############################################# |
1 - 2 us | 10 | |
2 - 4 us | 2 | |
4 - 8 us | 5 | |

# statistics (in usec)
total time: 0 <<<--- (here)
avg time: 0
max time: 6
min time: 0
count: 782

After:

$ sudo ./perf ftrace latency -ab -T mutex_lock --hide-empty -- sleep 0.1
# DURATION | COUNT | GRAPH |
0 - 1 us | 880 | ############################################ |
1 - 2 us | 13 | |
2 - 4 us | 8 | |
4 - 8 us | 3 | |

# statistics (in usec)
total time: 268 <<<--- (here)
avg time: 0
max time: 6
min time: 0
count: 904

Tested-by: Athira Rajeev <atrajeev@linux.ibm.com>
Cc: Gabriele Monaco <gmonaco@redhat.com>
Link: https://lore.kernel.org/r/20250227191223.1288473-1-namhyung@kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>

+15 -13
+7 -1
tools/perf/util/bpf_ftrace.c
··· 128 128 return 0; 129 129 } 130 130 131 - int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, 131 + int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace, 132 132 int buckets[], struct stats *stats) 133 133 { 134 134 int i, fd, err; ··· 158 158 stats->n = skel->bss->count; 159 159 stats->max = skel->bss->max; 160 160 stats->min = skel->bss->min; 161 + 162 + if (!ftrace->use_nsec) { 163 + stats->mean /= 1000; 164 + stats->max /= 1000; 165 + stats->min /= 1000; 166 + } 161 167 } 162 168 163 169 free(hist);
+8 -12
tools/perf/util/bpf_skel/func_latency.bpf.c
··· 102 102 start = bpf_map_lookup_elem(&functime, &tid); 103 103 if (start) { 104 104 __s64 delta = bpf_ktime_get_ns() - *start; 105 + __u64 val = delta; 105 106 __u32 key = 0; 106 107 __u64 *hist; 107 108 ··· 112 111 return 0; 113 112 114 113 if (bucket_range != 0) { 115 - delta /= cmp_base; 114 + val = delta / cmp_base; 116 115 117 116 if (min_latency > 0) { 118 - if (delta > min_latency) 119 - delta -= min_latency; 117 + if (val > min_latency) 118 + val -= min_latency; 120 119 else 121 120 goto do_lookup; 122 121 } 123 122 124 123 // Less than 1 unit (ms or ns), or, in the future, 125 124 // than the min latency desired. 126 - if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units ) 127 - // clang 12 doesn't like s64 / u32 division 128 - key = (__u64)delta / bucket_range + 1; 125 + if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units ) 126 + key = val / bucket_range + 1; 129 127 if (key >= bucket_num || 130 - delta >= max_latency - min_latency) 128 + val >= max_latency - min_latency) 131 129 key = bucket_num - 1; 132 130 } 133 131 134 - delta += min_latency; 135 132 goto do_lookup; 136 133 } 137 134 // calculate index using delta ··· 145 146 146 147 *hist += 1; 147 148 148 - if (bucket_range == 0) 149 - delta /= cmp_base; 150 - 151 - __sync_fetch_and_add(&total, delta); 149 + __sync_fetch_and_add(&total, delta); // always in nsec 152 150 __sync_fetch_and_add(&count, 1); 153 151 154 152 if (delta > max)