Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

ftrace: Do not disable interrupts in profiler

The function profiler disables interrupts before processing. This was
there since the profiler was introduced back in 2009 when there were
recursion issues to deal with. The function tracer is much more robust
today and has its own internal recursion protection. There's no reason to
disable interrupts in the function profiler.

Instead, just disable preemption and use the guard() infrastructure while
at it.

Before this change:

~# echo 1 > /sys/kernel/tracing/function_profile_enabled
~# perf stat -r 10 ./hackbench 10
Time: 3.099
Time: 2.556
Time: 2.500
Time: 2.705
Time: 2.985
Time: 2.959
Time: 2.859
Time: 2.621
Time: 2.742
Time: 2.631

Performance counter stats for '/work/c/hackbench 10' (10 runs):

23,156.77 msec task-clock # 6.951 CPUs utilized ( +- 2.36% )
18,306 context-switches # 790.525 /sec ( +- 5.95% )
495 cpu-migrations # 21.376 /sec ( +- 8.61% )
11,522 page-faults # 497.565 /sec ( +- 1.80% )
47,967,124,606 cycles # 2.071 GHz ( +- 0.41% )
80,009,078,371 instructions # 1.67 insn per cycle ( +- 0.34% )
16,389,249,798 branches # 707.752 M/sec ( +- 0.36% )
139,943,109 branch-misses # 0.85% of all branches ( +- 0.61% )

3.332 +- 0.101 seconds time elapsed ( +- 3.04% )

After this change:

~# echo 1 > /sys/kernel/tracing/function_profile_enabled
~# perf stat -r 10 ./hackbench 10
Time: 1.869
Time: 1.428
Time: 1.575
Time: 1.569
Time: 1.685
Time: 1.511
Time: 1.611
Time: 1.672
Time: 1.724
Time: 1.715

Performance counter stats for '/work/c/hackbench 10' (10 runs):

13,578.21 msec task-clock # 6.931 CPUs utilized ( +- 2.23% )
12,736 context-switches # 937.973 /sec ( +- 3.86% )
341 cpu-migrations # 25.114 /sec ( +- 5.27% )
11,378 page-faults # 837.960 /sec ( +- 1.74% )
27,638,039,036 cycles # 2.035 GHz ( +- 0.27% )
45,107,762,498 instructions # 1.63 insn per cycle ( +- 0.23% )
8,623,868,018 branches # 635.125 M/sec ( +- 0.27% )
125,738,443 branch-misses # 1.46% of all branches ( +- 0.32% )

1.9590 +- 0.0484 seconds time elapsed ( +- 2.47% )

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: https://lore.kernel.org/20241223184941.373853944@goodmis.org
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

+7 -13
+7 -13
kernel/trace/ftrace.c
··· 789 789 { 790 790 struct ftrace_profile_stat *stat; 791 791 struct ftrace_profile *rec; 792 - unsigned long flags; 793 792 794 793 if (!ftrace_profile_enabled) 795 794 return; 796 795 797 - local_irq_save(flags); 796 + guard(preempt_notrace)(); 798 797 799 798 stat = this_cpu_ptr(&ftrace_profile_stats); 800 799 if (!stat->hash || !ftrace_profile_enabled) 801 - goto out; 800 + return; 802 801 803 802 rec = ftrace_find_profiled_func(stat, ip); 804 803 if (!rec) { 805 804 rec = ftrace_profile_alloc(stat, ip); 806 805 if (!rec) 807 - goto out; 806 + return; 808 807 } 809 808 810 809 rec->counter++; 811 - out: 812 - local_irq_restore(flags); 813 810 } 814 811 815 812 #ifdef CONFIG_FUNCTION_GRAPH_TRACER ··· 853 856 unsigned long long calltime; 854 857 unsigned long long rettime = trace_clock_local(); 855 858 struct ftrace_profile *rec; 856 - unsigned long flags; 857 859 int size; 858 860 859 - local_irq_save(flags); 861 + guard(preempt_notrace)(); 862 + 860 863 stat = this_cpu_ptr(&ftrace_profile_stats); 861 864 if (!stat->hash || !ftrace_profile_enabled) 862 - goto out; 865 + return; 863 866 864 867 profile_data = fgraph_retrieve_data(gops->idx, &size); 865 868 866 869 /* If the calltime was zero'd ignore it */ 867 870 if (!profile_data || !profile_data->calltime) 868 - goto out; 871 + return; 869 872 870 873 calltime = rettime - profile_data->calltime; 871 874 ··· 893 896 rec->time += calltime; 894 897 rec->time_squared += calltime * calltime; 895 898 } 896 - 897 - out: 898 - local_irq_restore(flags); 899 899 } 900 900 901 901 static struct fgraph_ops fprofiler_ops = {