Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

fgraph: Remove unnecessary disabling of interrupts and recursion

The function graph tracer disables interrupts as well as prevents
recursion via NMIs when recording the graph tracer code. There's no reason
to do this today. That disabling goes back to 2008 when the function graph
tracer was first introduced and recursion protection wasn't part of the
code.

Today, there's no reason to disable interrupts or prevent the code from
recursing as the infrastructure can easily handle it.

Before this change:

~# echo function_graph > /sys/kernel/tracing/current_tracer
~# perf stat -r 10 ./hackbench 10
Time: 4.240
Time: 4.236
Time: 4.106
Time: 4.014
Time: 4.314
Time: 3.830
Time: 4.063
Time: 4.323
Time: 3.763
Time: 3.727

Performance counter stats for '/work/c/hackbench 10' (10 runs):

33,937.20 msec task-clock # 7.008 CPUs utilized ( +- 1.85% )
18,220 context-switches # 536.874 /sec ( +- 6.41% )
624 cpu-migrations # 18.387 /sec ( +- 9.07% )
11,319 page-faults # 333.528 /sec ( +- 1.97% )
76,657,643,617 cycles # 2.259 GHz ( +- 0.40% )
141,403,302,768 instructions # 1.84 insn per cycle ( +- 0.37% )
25,518,463,888 branches # 751.932 M/sec ( +- 0.35% )
156,151,050 branch-misses # 0.61% of all branches ( +- 0.63% )

4.8423 +- 0.0892 seconds time elapsed ( +- 1.84% )

After this change:

~# echo function_graph > /sys/kernel/tracing/current_tracer
~# perf stat -r 10 ./hackbench 10
Time: 3.340
Time: 3.192
Time: 3.129
Time: 2.579
Time: 2.589
Time: 2.798
Time: 2.791
Time: 2.955
Time: 3.044
Time: 3.065

Performance counter stats for './hackbench 10' (10 runs):

24,416.30 msec task-clock # 6.996 CPUs utilized ( +- 2.74% )
16,764 context-switches # 686.590 /sec ( +- 5.85% )
469 cpu-migrations # 19.208 /sec ( +- 6.14% )
11,519 page-faults # 471.775 /sec ( +- 1.92% )
53,895,628,450 cycles # 2.207 GHz ( +- 0.52% )
105,552,664,638 instructions # 1.96 insn per cycle ( +- 0.47% )
17,808,672,667 branches # 729.376 M/sec ( +- 0.48% )
133,075,435 branch-misses # 0.75% of all branches ( +- 0.59% )

3.490 +- 0.112 seconds time elapsed ( +- 3.22% )

Also removed unneeded "unlikely()" around the retaddr code.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: https://lore.kernel.org/20241223184941.204074053@goodmis.org
Fixes: 9cd2992f2d6c8 ("fgraph: Have set_graph_notrace only affect function_graph tracer") # Performance only
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

+15 -22
+15 -22
kernel/trace/trace_functions_graph.c
··· 181 181 struct trace_array *tr = gops->private; 182 182 struct trace_array_cpu *data; 183 183 struct fgraph_times *ftimes; 184 - unsigned long flags; 185 184 unsigned int trace_ctx; 186 185 long disabled; 187 - int ret; 186 + int ret = 0; 188 187 int cpu; 189 188 190 189 if (*task_var & TRACE_GRAPH_NOTRACE) ··· 234 235 if (tracing_thresh) 235 236 return 1; 236 237 237 - local_irq_save(flags); 238 + preempt_disable_notrace(); 238 239 cpu = raw_smp_processor_id(); 239 240 data = per_cpu_ptr(tr->array_buffer.data, cpu); 240 - disabled = atomic_inc_return(&data->disabled); 241 - if (likely(disabled == 1)) { 242 - trace_ctx = tracing_gen_ctx_flags(flags); 243 - if (unlikely(IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) && 244 - tracer_flags_is_set(TRACE_GRAPH_PRINT_RETADDR))) { 241 + disabled = atomic_read(&data->disabled); 242 + if (likely(!disabled)) { 243 + trace_ctx = tracing_gen_ctx(); 244 + if (IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) && 245 + tracer_flags_is_set(TRACE_GRAPH_PRINT_RETADDR)) { 245 246 unsigned long retaddr = ftrace_graph_top_ret_addr(current); 246 - 247 247 ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx, retaddr); 248 - } else 248 + } else { 249 249 ret = __trace_graph_entry(tr, trace, trace_ctx); 250 - } else { 251 - ret = 0; 250 + } 252 251 } 253 - 254 - atomic_dec(&data->disabled); 255 - local_irq_restore(flags); 252 + preempt_enable_notrace(); 256 253 257 254 return ret; 258 255 } ··· 315 320 struct trace_array *tr = gops->private; 316 321 struct trace_array_cpu *data; 317 322 struct fgraph_times *ftimes; 318 - unsigned long flags; 319 323 unsigned int trace_ctx; 320 324 long disabled; 321 325 int size; ··· 335 341 336 342 trace->calltime = ftimes->calltime; 337 343 338 - local_irq_save(flags); 344 + preempt_disable_notrace(); 339 345 cpu = raw_smp_processor_id(); 340 346 data = per_cpu_ptr(tr->array_buffer.data, cpu); 341 - disabled = atomic_inc_return(&data->disabled); 342 - if (likely(disabled == 1)) { 343 - trace_ctx = tracing_gen_ctx_flags(flags); 347 + disabled = atomic_read(&data->disabled); 348 + if (likely(!disabled)) { 349 + trace_ctx = tracing_gen_ctx(); 344 350 __trace_graph_return(tr, trace, trace_ctx); 345 351 } 346 - atomic_dec(&data->disabled); 347 - local_irq_restore(flags); 352 + preempt_enable_notrace(); 348 353 } 349 354 350 355 static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,