Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'perf-tools-fixes-for-v5.18-2022-04-29' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull perf tools fixes from Arnaldo Carvalho de Melo:

- Fix Intel PT (Processor Trace) timeless decoding with perf.data
directory.

- ARM SPE (Statistical Profiling Extensions) address fixes, for
synthesized events and for SPE events with physical addresses. Add a
simple 'perf test' entry to make sure this doesn't regress.

- Remove arch specific processing of kallsyms data to fixup symbol end
address, fixing excessive memory consumption in the annotation code.

* tag 'perf-tools-fixes-for-v5.18-2022-04-29' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux:
perf symbol: Remove arch__symbols__fixup_end()
perf symbol: Update symbols__fixup_end()
perf symbol: Pass is_kallsyms to symbols__fixup_end()
perf test: Add perf_event_attr test for Arm SPE
perf arm-spe: Fix SPE events with phys addresses
perf arm-spe: Fix addresses of synthesized SPE events
perf intel-pt: Fix timeless decoding with perf.data directory

+56 -79
+10
tools/perf/arch/arm64/util/arm-spe.c
··· 148 148 bool privileged = perf_event_paranoid_check(-1); 149 149 struct evsel *tracking_evsel; 150 150 int err; 151 + u64 bit; 151 152 152 153 sper->evlist = evlist; 153 154 ··· 245 244 * on the opening of the event or the SPE data produced. 246 245 */ 247 246 evsel__set_sample_bit(arm_spe_evsel, DATA_SRC); 247 + 248 + /* 249 + * The PHYS_ADDR flag does not affect the driver behaviour, it is used to 250 + * inform that the resulting output's SPE samples contain physical addresses 251 + * where applicable. 252 + */ 253 + bit = perf_pmu__format_bits(&arm_spe_pmu->format, "pa_enable"); 254 + if (arm_spe_evsel->core.attr.config & bit) 255 + evsel__set_sample_bit(arm_spe_evsel, PHYS_ADDR); 248 256 249 257 /* Add dummy event to keep tracking */ 250 258 err = parse_events(evlist, "dummy:u", NULL);
-21
tools/perf/arch/arm64/util/machine.c
··· 8 8 #include "callchain.h" 9 9 #include "record.h" 10 10 11 - /* On arm64, kernel text segment starts at high memory address, 12 - * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory 13 - * address, like 0xffff 0000 00ax xxxx. When only small amount of 14 - * memory is used by modules, gap between end of module's text segment 15 - * and start of kernel text segment may reach 2G. 16 - * Therefore do not fill this gap and do not assign it to the kernel dso map. 17 - */ 18 - 19 - #define SYMBOL_LIMIT (1 << 12) /* 4K */ 20 - 21 - void arch__symbols__fixup_end(struct symbol *p, struct symbol *c) 22 - { 23 - if ((strchr(p->name, '[') && strchr(c->name, '[') == NULL) || 24 - (strchr(p->name, '[') == NULL && strchr(c->name, '['))) 25 - /* Limit range of last symbol in module and kernel */ 26 - p->end += SYMBOL_LIMIT; 27 - else 28 - p->end = c->start; 29 - pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end); 30 - } 31 - 32 11 void arch__add_leaf_frame_record_opts(struct record_opts *opts) 33 12 { 34 13 opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask;
-1
tools/perf/arch/powerpc/util/Build
··· 1 1 perf-y += header.o 2 - perf-y += machine.o 3 2 perf-y += kvm-stat.o 4 3 perf-y += perf_regs.o 5 4 perf-y += mem-events.o
-25
tools/perf/arch/powerpc/util/machine.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - 3 - #include <inttypes.h> 4 - #include <stdio.h> 5 - #include <string.h> 6 - #include <internal/lib.h> // page_size 7 - #include "debug.h" 8 - #include "symbol.h" 9 - 10 - /* On powerpc kernel text segment start at memory addresses, 0xc000000000000000 11 - * whereas the modules are located at very high memory addresses, 12 - * for example 0xc00800000xxxxxxx. The gap between end of kernel text segment 13 - * and beginning of first module's text segment is very high. 14 - * Therefore do not fill this gap and do not assign it to the kernel dso map. 15 - */ 16 - 17 - void arch__symbols__fixup_end(struct symbol *p, struct symbol *c) 18 - { 19 - if (strchr(p->name, '[') == NULL && strchr(c->name, '[')) 20 - /* Limit the range of last kernel symbol */ 21 - p->end += page_size; 22 - else 23 - p->end = c->start; 24 - pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end); 25 - }
-16
tools/perf/arch/s390/util/machine.c
··· 35 35 36 36 return 0; 37 37 } 38 - 39 - /* On s390 kernel text segment start is located at very low memory addresses, 40 - * for example 0x10000. Modules are located at very high memory addresses, 41 - * for example 0x3ff xxxx xxxx. The gap between end of kernel text segment 42 - * and beginning of first module's text segment is very big. 43 - * Therefore do not fill this gap and do not assign it to the kernel dso map. 44 - */ 45 - void arch__symbols__fixup_end(struct symbol *p, struct symbol *c) 46 - { 47 - if (strchr(p->name, '[') == NULL && strchr(c->name, '[')) 48 - /* Last kernel symbol mapped to end of page */ 49 - p->end = roundup(p->end, page_size); 50 - else 51 - p->end = c->start; 52 - pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end); 53 - }
+1
tools/perf/tests/attr/README
··· 60 60 perf record -R kill (test-record-raw) 61 61 perf record -c 2 -e arm_spe_0// -- kill (test-record-spe-period) 62 62 perf record -e arm_spe_0/period=3/ -- kill (test-record-spe-period-term) 63 + perf record -e arm_spe_0/pa_enable=1/ -- kill (test-record-spe-physical-address) 63 64 perf stat -e cycles kill (test-stat-basic) 64 65 perf stat kill (test-stat-default) 65 66 perf stat -d kill (test-stat-detailed-1)
+12
tools/perf/tests/attr/test-record-spe-physical-address
··· 1 + [config] 2 + command = record 3 + args = --no-bpf-event -e arm_spe_0/pa_enable=1/ -- kill >/dev/null 2>&1 4 + ret = 1 5 + arch = aarch64 6 + 7 + [event-10:base-record-spe] 8 + # 622727 is the decimal of IP|TID|TIME|CPU|IDENTIFIER|DATA_SRC|PHYS_ADDR 9 + sample_type=622727 10 + 11 + # dummy event 12 + [event-1:base-record-spe]
+3 -2
tools/perf/util/arm-spe.c
··· 1033 1033 memset(&attr, 0, sizeof(struct perf_event_attr)); 1034 1034 attr.size = sizeof(struct perf_event_attr); 1035 1035 attr.type = PERF_TYPE_HARDWARE; 1036 - attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; 1036 + attr.sample_type = evsel->core.attr.sample_type & 1037 + (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR); 1037 1038 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1038 1039 PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC | 1039 - PERF_SAMPLE_WEIGHT; 1040 + PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR; 1040 1041 if (spe->timeless_decoding) 1041 1042 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1042 1043 else
+1 -1
tools/perf/util/session.c
··· 2576 2576 if (perf_data__is_pipe(session->data)) 2577 2577 return __perf_session__process_pipe_events(session); 2578 2578 2579 - if (perf_data__is_dir(session->data)) 2579 + if (perf_data__is_dir(session->data) && session->data->dir.nr) 2580 2580 return __perf_session__process_dir_events(session); 2581 2581 2582 2582 return __perf_session__process_events(session);
+1 -1
tools/perf/util/symbol-elf.c
··· 1290 1290 * For misannotated, zeroed, ASM function sizes. 1291 1291 */ 1292 1292 if (nr > 0) { 1293 - symbols__fixup_end(&dso->symbols); 1293 + symbols__fixup_end(&dso->symbols, false); 1294 1294 symbols__fixup_duplicate(&dso->symbols); 1295 1295 if (kmap) { 1296 1296 /*
+27 -10
tools/perf/util/symbol.c
··· 101 101 return tail - str; 102 102 } 103 103 104 - void __weak arch__symbols__fixup_end(struct symbol *p, struct symbol *c) 105 - { 106 - p->end = c->start; 107 - } 108 - 109 104 const char * __weak arch__normalize_symbol_name(const char *name) 110 105 { 111 106 return name; ··· 212 217 } 213 218 } 214 219 215 - void symbols__fixup_end(struct rb_root_cached *symbols) 220 + /* Update zero-sized symbols using the address of the next symbol */ 221 + void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms) 216 222 { 217 223 struct rb_node *nd, *prevnd = rb_first_cached(symbols); 218 224 struct symbol *curr, *prev; ··· 227 231 prev = curr; 228 232 curr = rb_entry(nd, struct symbol, rb_node); 229 233 230 - if (prev->end == prev->start || prev->end != curr->start) 231 - arch__symbols__fixup_end(prev, curr); 234 + /* 235 + * On some architecture kernel text segment start is located at 236 + * some low memory address, while modules are located at high 237 + * memory addresses (or vice versa). The gap between end of 238 + * kernel text segment and beginning of first module's text 239 + * segment is very big. Therefore do not fill this gap and do 240 + * not assign it to the kernel dso map (kallsyms). 241 + * 242 + * In kallsyms, it determines module symbols using '[' character 243 + * like in: 244 + * ffffffffc1937000 T hdmi_driver_init [snd_hda_codec_hdmi] 245 + */ 246 + if (prev->end == prev->start) { 247 + /* Last kernel/module symbol mapped to end of page */ 248 + if (is_kallsyms && (!strchr(prev->name, '[') != 249 + !strchr(curr->name, '['))) 250 + prev->end = roundup(prev->end + 4096, 4096); 251 + else 252 + prev->end = curr->start; 253 + 254 + pr_debug4("%s sym:%s end:%#" PRIx64 "\n", 255 + __func__, prev->name, prev->end); 256 + } 232 257 } 233 258 234 259 /* Last entry */ ··· 1484 1467 if (kallsyms__delta(kmap, filename, &delta)) 1485 1468 return -1; 1486 1469 1487 - symbols__fixup_end(&dso->symbols); 1470 + symbols__fixup_end(&dso->symbols, true); 1488 1471 symbols__fixup_duplicate(&dso->symbols); 1489 1472 1490 1473 if (dso->kernel == DSO_SPACE__KERNEL_GUEST) ··· 1676 1659 #undef bfd_asymbol_section 1677 1660 #endif 1678 1661 1679 - symbols__fixup_end(&dso->symbols); 1662 + symbols__fixup_end(&dso->symbols, false); 1680 1663 symbols__fixup_duplicate(&dso->symbols); 1681 1664 dso->adjust_symbols = 1; 1682 1665
+1 -2
tools/perf/util/symbol.h
··· 203 203 bool kernel); 204 204 void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym); 205 205 void symbols__fixup_duplicate(struct rb_root_cached *symbols); 206 - void symbols__fixup_end(struct rb_root_cached *symbols); 206 + void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms); 207 207 void maps__fixup_end(struct maps *maps); 208 208 209 209 typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); ··· 241 241 #define SYMBOL_A 0 242 242 #define SYMBOL_B 1 243 243 244 - void arch__symbols__fixup_end(struct symbol *p, struct symbol *c); 245 244 int arch__compare_symbol_names(const char *namea, const char *nameb); 246 245 int arch__compare_symbol_names_n(const char *namea, const char *nameb, 247 246 unsigned int n);