Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf callchain: Allow symbols to be optional when resolving a callchain

In uses like 'perf inject' it is not necessary to gather the symbol for
each call chain location, the map for the sample IP is wanted so that
build IDs and the like can be injected. Make gathering the symbol in the
callchain_cursor optional.

For a 'perf inject -B' command this lowers the peak RSS from 54.1MB to
29.6MB by avoiding loading symbols.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Anne Macedo <retpolanne@posteo.net>
Cc: Casey Chen <cachen@purestorage.com>
Cc: Colin Ian King <colin.i.king@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sun Haiyong <sunhaiyong@loongson.cn>
Link: https://lore.kernel.org/r/20240909203740.143492-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Ian Rogers and committed by
Arnaldo Carvalho de Melo
02b27050 64eed019

+85 -52
+1 -1
tools/perf/builtin-inject.c
··· 942 942 } 943 943 944 944 sample__for_each_callchain_node(thread, evsel, sample, PERF_MAX_STACK_DEPTH, 945 - mark_dso_hit_callback, &args); 945 + /*symbols=*/false, mark_dso_hit_callback, &args); 946 946 947 947 thread__put(thread); 948 948 repipe:
+4 -4
tools/perf/util/callchain.c
··· 1800 1800 1801 1801 int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel, 1802 1802 struct perf_sample *sample, int max_stack, 1803 - callchain_iter_fn cb, void *data) 1803 + bool symbols, callchain_iter_fn cb, void *data) 1804 1804 { 1805 1805 struct callchain_cursor *cursor = get_tls_callchain_cursor(); 1806 1806 int ret; ··· 1809 1809 return -ENOMEM; 1810 1810 1811 1811 /* Fill in the callchain. */ 1812 - ret = thread__resolve_callchain(thread, cursor, evsel, sample, 1813 - /*parent=*/NULL, /*root_al=*/NULL, 1814 - max_stack); 1812 + ret = __thread__resolve_callchain(thread, cursor, evsel, sample, 1813 + /*parent=*/NULL, /*root_al=*/NULL, 1814 + max_stack, symbols); 1815 1815 if (ret) 1816 1816 return ret; 1817 1817
+1 -1
tools/perf/util/callchain.h
··· 315 315 316 316 int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel, 317 317 struct perf_sample *sample, int max_stack, 318 - callchain_iter_fn cb, void *data); 318 + bool symbols, callchain_iter_fn cb, void *data); 319 319 320 320 #endif /* __PERF_CALLCHAIN_H */
+53 -39
tools/perf/util/machine.c
··· 2060 2060 bool branch, 2061 2061 struct branch_flags *flags, 2062 2062 struct iterations *iter, 2063 - u64 branch_from) 2063 + u64 branch_from, 2064 + bool symbols) 2064 2065 { 2065 2066 struct map_symbol ms = {}; 2066 2067 struct addr_location al; ··· 2100 2099 } 2101 2100 goto out; 2102 2101 } 2103 - thread__find_symbol(thread, *cpumode, ip, &al); 2102 + if (symbols) 2103 + thread__find_symbol(thread, *cpumode, ip, &al); 2104 2104 } 2105 2105 2106 2106 if (al.sym != NULL) { ··· 2230 2228 struct symbol **parent, 2231 2229 struct addr_location *root_al, 2232 2230 u64 branch_from, 2233 - bool callee, int end) 2231 + bool callee, int end, 2232 + bool symbols) 2234 2233 { 2235 2234 struct ip_callchain *chain = sample->callchain; 2236 2235 u8 cpumode = PERF_RECORD_MISC_USER; ··· 2241 2238 for (i = 0; i < end + 1; i++) { 2242 2239 err = add_callchain_ip(thread, cursor, parent, 2243 2240 root_al, &cpumode, chain->ips[i], 2244 - false, NULL, NULL, branch_from); 2241 + false, NULL, NULL, branch_from, 2242 + symbols); 2245 2243 if (err) 2246 2244 return err; 2247 2245 } ··· 2252 2248 for (i = end; i >= 0; i--) { 2253 2249 err = add_callchain_ip(thread, cursor, parent, 2254 2250 root_al, &cpumode, chain->ips[i], 2255 - false, NULL, NULL, branch_from); 2251 + false, NULL, NULL, branch_from, 2252 + symbols); 2256 2253 if (err) 2257 2254 return err; 2258 2255 } ··· 2296 2291 struct symbol **parent, 2297 2292 struct addr_location *root_al, 2298 2293 u64 *branch_from, 2299 - bool callee) 2294 + bool callee, 2295 + bool symbols) 2300 2296 { 2301 2297 struct branch_stack *lbr_stack = sample->branch_stack; 2302 2298 struct branch_entry *entries = perf_sample__branch_entries(sample); ··· 2330 2324 err = add_callchain_ip(thread, cursor, parent, 2331 2325 root_al, &cpumode, ip, 2332 2326 true, flags, NULL, 2333 - *branch_from); 2327 + *branch_from, symbols); 2334 2328 if (err) 2335 2329 return err; 2336 2330 ··· 2355 2349 err = add_callchain_ip(thread, cursor, parent, 2356 2350 root_al, &cpumode, ip, 2357 2351 true, flags, NULL, 2358 - *branch_from); 2352 + *branch_from, symbols); 2359 2353 if (err) 2360 2354 return err; 2361 2355 save_lbr_cursor_node(thread, cursor, i); ··· 2370 2364 err = add_callchain_ip(thread, cursor, parent, 2371 2365 root_al, &cpumode, ip, 2372 2366 true, flags, NULL, 2373 - *branch_from); 2367 + *branch_from, symbols); 2374 2368 if (err) 2375 2369 return err; 2376 2370 save_lbr_cursor_node(thread, cursor, i); ··· 2384 2378 err = add_callchain_ip(thread, cursor, parent, 2385 2379 root_al, &cpumode, ip, 2386 2380 true, flags, NULL, 2387 - *branch_from); 2381 + *branch_from, symbols); 2388 2382 if (err) 2389 2383 return err; 2390 2384 } ··· 2551 2545 struct symbol **parent, 2552 2546 struct addr_location *root_al, 2553 2547 int max_stack, 2554 - unsigned int max_lbr) 2548 + unsigned int max_lbr, 2549 + bool symbols) 2555 2550 { 2556 2551 bool callee = (callchain_param.order == ORDER_CALLEE); 2557 2552 struct ip_callchain *chain = sample->callchain; ··· 2594 2587 /* Add kernel ip */ 2595 2588 err = lbr_callchain_add_kernel_ip(thread, cursor, sample, 2596 2589 parent, root_al, branch_from, 2597 - true, i); 2590 + true, i, symbols); 2598 2591 if (err) 2599 2592 goto error; 2600 2593 2601 2594 err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent, 2602 - root_al, &branch_from, true); 2595 + root_al, &branch_from, true, symbols); 2603 2596 if (err) 2604 2597 goto error; 2605 2598 ··· 2616 2609 goto error; 2617 2610 } 2618 2611 err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent, 2619 - root_al, &branch_from, false); 2612 + root_al, &branch_from, false, symbols); 2620 2613 if (err) 2621 2614 goto error; 2622 2615 2623 2616 /* Add kernel ip */ 2624 2617 err = lbr_callchain_add_kernel_ip(thread, cursor, sample, 2625 2618 parent, root_al, branch_from, 2626 - false, i); 2619 + false, i, symbols); 2627 2620 if (err) 2628 2621 goto error; 2629 2622 } ··· 2637 2630 struct callchain_cursor *cursor, 2638 2631 struct symbol **parent, 2639 2632 struct addr_location *root_al, 2640 - u8 *cpumode, int ent) 2633 + u8 *cpumode, int ent, bool symbols) 2641 2634 { 2642 2635 int err = 0; 2643 2636 ··· 2647 2640 if (ip >= PERF_CONTEXT_MAX) { 2648 2641 err = add_callchain_ip(thread, cursor, parent, 2649 2642 root_al, cpumode, ip, 2650 - false, NULL, NULL, 0); 2643 + false, NULL, NULL, 0, symbols); 2651 2644 break; 2652 2645 } 2653 2646 } ··· 2669 2662 struct perf_sample *sample, 2670 2663 struct symbol **parent, 2671 2664 struct addr_location *root_al, 2672 - int max_stack) 2665 + int max_stack, 2666 + bool symbols) 2673 2667 { 2674 2668 struct branch_stack *branch = sample->branch_stack; 2675 2669 struct branch_entry *entries = perf_sample__branch_entries(sample); ··· 2690 2682 2691 2683 err = resolve_lbr_callchain_sample(thread, cursor, sample, parent, 2692 2684 root_al, max_stack, 2693 - !env ? 0 : env->max_branches); 2685 + !env ? 0 : env->max_branches, 2686 + symbols); 2694 2687 if (err) 2695 2688 return (err < 0) ? err : 0; 2696 2689 } ··· 2756 2747 root_al, 2757 2748 NULL, be[i].to, 2758 2749 true, &be[i].flags, 2759 - NULL, be[i].from); 2750 + NULL, be[i].from, symbols); 2760 2751 2761 - if (!err) 2752 + if (!err) { 2762 2753 err = add_callchain_ip(thread, cursor, parent, root_al, 2763 2754 NULL, be[i].from, 2764 2755 true, &be[i].flags, 2765 - &iter[i], 0); 2756 + &iter[i], 0, symbols); 2757 + } 2766 2758 if (err == -EINVAL) 2767 2759 break; 2768 2760 if (err) ··· 2779 2769 check_calls: 2780 2770 if (chain && callchain_param.order != ORDER_CALLEE) { 2781 2771 err = find_prev_cpumode(chain, thread, cursor, parent, root_al, 2782 - &cpumode, chain->nr - first_call); 2772 + &cpumode, chain->nr - first_call, symbols); 2783 2773 if (err) 2784 2774 return (err < 0) ? err : 0; 2785 2775 } ··· 2801 2791 ++nr_entries; 2802 2792 else if (callchain_param.order != ORDER_CALLEE) { 2803 2793 err = find_prev_cpumode(chain, thread, cursor, parent, 2804 - root_al, &cpumode, j); 2794 + root_al, &cpumode, j, symbols); 2805 2795 if (err) 2806 2796 return (err < 0) ? err : 0; 2807 2797 continue; ··· 2828 2818 if (leaf_frame_caller && leaf_frame_caller != ip) { 2829 2819 2830 2820 err = add_callchain_ip(thread, cursor, parent, 2831 - root_al, &cpumode, leaf_frame_caller, 2832 - false, NULL, NULL, 0); 2821 + root_al, &cpumode, leaf_frame_caller, 2822 + false, NULL, NULL, 0, symbols); 2833 2823 if (err) 2834 2824 return (err < 0) ? err : 0; 2835 2825 } ··· 2837 2827 2838 2828 err = add_callchain_ip(thread, cursor, parent, 2839 2829 root_al, &cpumode, ip, 2840 - false, NULL, NULL, 0); 2830 + false, NULL, NULL, 0, symbols); 2841 2831 2842 2832 if (err) 2843 2833 return (err < 0) ? err : 0; ··· 2917 2907 struct callchain_cursor *cursor, 2918 2908 struct evsel *evsel, 2919 2909 struct perf_sample *sample, 2920 - int max_stack) 2910 + int max_stack, bool symbols) 2921 2911 { 2922 2912 /* Can we do dwarf post unwind? */ 2923 2913 if (!((evsel->core.attr.sample_type & PERF_SAMPLE_REGS_USER) && ··· 2929 2919 (!sample->user_stack.size)) 2930 2920 return 0; 2931 2921 2922 + if (!symbols) 2923 + pr_debug("Not resolving symbols with an unwinder isn't currently supported\n"); 2924 + 2932 2925 return unwind__get_entries(unwind_entry, cursor, 2933 2926 thread, sample, max_stack, false); 2934 2927 } 2935 2928 2936 - int thread__resolve_callchain(struct thread *thread, 2937 - struct callchain_cursor *cursor, 2938 - struct evsel *evsel, 2939 - struct perf_sample *sample, 2940 - struct symbol **parent, 2941 - struct addr_location *root_al, 2942 - int max_stack) 2929 + int __thread__resolve_callchain(struct thread *thread, 2930 + struct callchain_cursor *cursor, 2931 + struct evsel *evsel, 2932 + struct perf_sample *sample, 2933 + struct symbol **parent, 2934 + struct addr_location *root_al, 2935 + int max_stack, 2936 + bool symbols) 2943 2937 { 2944 2938 int ret = 0; 2945 2939 ··· 2956 2942 ret = thread__resolve_callchain_sample(thread, cursor, 2957 2943 evsel, sample, 2958 2944 parent, root_al, 2959 - max_stack); 2945 + max_stack, symbols); 2960 2946 if (ret) 2961 2947 return ret; 2962 2948 ret = thread__resolve_callchain_unwind(thread, cursor, 2963 2949 evsel, sample, 2964 - max_stack); 2950 + max_stack, symbols); 2965 2951 } else { 2966 2952 ret = thread__resolve_callchain_unwind(thread, cursor, 2967 2953 evsel, sample, 2968 - max_stack); 2954 + max_stack, symbols); 2969 2955 if (ret) 2970 2956 return ret; 2971 2957 ret = thread__resolve_callchain_sample(thread, cursor, 2972 2958 evsel, sample, 2973 2959 parent, root_al, 2974 - max_stack); 2960 + max_stack, symbols); 2975 2961 } 2976 2962 2977 2963 return ret;
+26 -7
tools/perf/util/machine.h
··· 178 178 179 179 struct callchain_cursor; 180 180 181 - int thread__resolve_callchain(struct thread *thread, 182 - struct callchain_cursor *cursor, 183 - struct evsel *evsel, 184 - struct perf_sample *sample, 185 - struct symbol **parent, 186 - struct addr_location *root_al, 187 - int max_stack); 181 + int __thread__resolve_callchain(struct thread *thread, 182 + struct callchain_cursor *cursor, 183 + struct evsel *evsel, 184 + struct perf_sample *sample, 185 + struct symbol **parent, 186 + struct addr_location *root_al, 187 + int max_stack, 188 + bool symbols); 189 + 190 + static inline int thread__resolve_callchain(struct thread *thread, 191 + struct callchain_cursor *cursor, 192 + struct evsel *evsel, 193 + struct perf_sample *sample, 194 + struct symbol **parent, 195 + struct addr_location *root_al, 196 + int max_stack) 197 + { 198 + return __thread__resolve_callchain(thread, 199 + cursor, 200 + evsel, 201 + sample, 202 + parent, 203 + root_al, 204 + max_stack, 205 + /*symbols=*/true); 206 + } 188 207 189 208 /* 190 209 * Default guest kernel is defined by parameter --guestkallsyms