Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf/core: Add new branch sample type for HW index of raw branch records

The low level index is the index in the underlying hardware buffer of
the most recently captured taken branch which is always saved in
branch_entries[0]. It is very useful for reconstructing the call stack.
For example, in Intel LBR call stack mode, the depth of reconstructed
LBR call stack limits to the number of LBR registers. With the low level
index information, perf tool may stitch the stacks of two samples. The
reconstructed LBR call stack can break the HW limitation.

Add a new branch sample type to retrieve low level index of raw branch
records. The low level index is between -1 (unknown) and max depth which
can be retrieved in /sys/devices/cpu/caps/branches.

Only when the new branch sample type is set, the low level index
information is dumped into the PERF_SAMPLE_BRANCH_STACK output.
Perf tool should check the attr.branch_sample_type, and apply the
corresponding format for PERF_SAMPLE_BRANCH_STACK samples.
Otherwise, some user case may be broken. For example, users may parse a
perf.data, which include the new branch sample type, with an old version
perf tool (without the check). Users probably get incorrect information
without any warning.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lkml.kernel.org/r/20200127165355.27495-2-kan.liang@linux.intel.com

authored by

Kan Liang and committed by
Ingo Molnar
bbfd5e4f 6c1c07b3

+33 -1
+1
arch/powerpc/perf/core-book3s.c
··· 518 518 } 519 519 } 520 520 cpuhw->bhrb_stack.nr = u_index; 521 + cpuhw->bhrb_stack.hw_idx = -1ULL; 521 522 return; 522 523 } 523 524
+3
arch/x86/events/intel/lbr.c
··· 585 585 cpuc->lbr_entries[i].reserved = 0; 586 586 } 587 587 cpuc->lbr_stack.nr = i; 588 + cpuc->lbr_stack.hw_idx = -1ULL; 588 589 } 589 590 590 591 /* ··· 681 680 out++; 682 681 } 683 682 cpuc->lbr_stack.nr = out; 683 + cpuc->lbr_stack.hw_idx = -1ULL; 684 684 } 685 685 686 686 void intel_pmu_lbr_read(void) ··· 1122 1120 int i; 1123 1121 1124 1122 cpuc->lbr_stack.nr = x86_pmu.lbr_nr; 1123 + cpuc->lbr_stack.hw_idx = -1ULL; 1125 1124 for (i = 0; i < x86_pmu.lbr_nr; i++) { 1126 1125 u64 info = lbr->lbr[i].info; 1127 1126 struct perf_branch_entry *e = &cpuc->lbr_entries[i];
+12
include/linux/perf_event.h
··· 93 93 /* 94 94 * branch stack layout: 95 95 * nr: number of taken branches stored in entries[] 96 + * hw_idx: The low level index of raw branch records 97 + * for the most recent branch. 98 + * -1ULL means invalid/unknown. 96 99 * 97 100 * Note that nr can vary from sample to sample 98 101 * branches (to, from) are stored from most recent 99 102 * to least recent, i.e., entries[0] contains the most 100 103 * recent branch. 104 + * The entries[] is an abstraction of raw branch records, 105 + * which may not be stored in age order in HW, e.g. Intel LBR. 106 + * The hw_idx is to expose the low level index of raw 107 + * branch record for the most recent branch aka entries[0]. 108 + * The hw_idx index is between -1 (unknown) and max depth, 109 + * which can be retrieved in /sys/devices/cpu/caps/branches. 110 + * For the architectures whose raw branch records are 111 + * already stored in age order, the hw_idx should be 0. 101 112 */ 102 113 struct perf_branch_stack { 103 114 __u64 nr; 115 + __u64 hw_idx; 104 116 struct perf_branch_entry entries[0]; 105 117 }; 106 118
+7 -1
include/uapi/linux/perf_event.h
··· 181 181 182 182 PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */ 183 183 184 + PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ 185 + 184 186 PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ 185 187 }; 186 188 ··· 209 207 210 208 PERF_SAMPLE_BRANCH_TYPE_SAVE = 211 209 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, 210 + 211 + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, 212 212 213 213 PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, 214 214 }; ··· 857 853 * char data[size];}&& PERF_SAMPLE_RAW 858 854 * 859 855 * { u64 nr; 860 - * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK 856 + * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX 857 + * { u64 from, to, flags } lbr[nr]; 858 + * } && PERF_SAMPLE_BRANCH_STACK 861 859 * 862 860 * { u64 abi; # enum perf_sample_regs_abi 863 861 * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
+10
kernel/events/core.c
··· 6555 6555 perf_output_read_one(handle, event, enabled, running); 6556 6556 } 6557 6557 6558 + static inline bool perf_sample_save_hw_index(struct perf_event *event) 6559 + { 6560 + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; 6561 + } 6562 + 6558 6563 void perf_output_sample(struct perf_output_handle *handle, 6559 6564 struct perf_event_header *header, 6560 6565 struct perf_sample_data *data, ··· 6648 6643 * sizeof(struct perf_branch_entry); 6649 6644 6650 6645 perf_output_put(handle, data->br_stack->nr); 6646 + if (perf_sample_save_hw_index(event)) 6647 + perf_output_put(handle, data->br_stack->hw_idx); 6651 6648 perf_output_copy(handle, data->br_stack->entries, size); 6652 6649 } else { 6653 6650 /* ··· 6843 6836 if (sample_type & PERF_SAMPLE_BRANCH_STACK) { 6844 6837 int size = sizeof(u64); /* nr */ 6845 6838 if (data->br_stack) { 6839 + if (perf_sample_save_hw_index(event)) 6840 + size += sizeof(u64); 6841 + 6846 6842 size += data->br_stack->nr 6847 6843 * sizeof(struct perf_branch_entry); 6848 6844 }