Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Thomas Gleixner:
"A pile of perf related fixes:

Kernel:
- Fix SLOTS PEBS event constraints for Icelake CPUs

- Add the missing mask bit to allow counting hardware generated
prefetches on L3 for Icelake CPUs

- Make the test for hypervisor platforms more accurate (as far as
possible)

- Handle PMUs correctly which override event->cpu

- Yet another missing fallthrough annotation

Tools:
perf.data:
- Fix loading of compressed data split across adjacent records
- Fix buffer size setting for processing CPU topology perf.data
header.

perf stat:
- Fix segfault for event group in repeat mode
- Always separate "stalled cycles per insn" line, it was being
appended to the "instructions" line.

perf script:
- Fix --max-blocks man page description.
- Improve man page description of metrics.
- Fix off by one in brstackinsn IPC computation.

perf probe:
- Avoid calling freeing routine multiple times for same pointer.

perf build:
- Do not use -Wshadow on gcc < 4.8, avoiding too strict warnings
treated as errors, breaking the build"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/x86/intel: Mark expected switch fall-throughs
perf/core: Fix creating kernel counters for PMUs that override event->cpu
perf/x86: Apply more accurate check on hypervisor platform
perf/x86/intel: Fix invalid Bit 13 for Icelake MSR_OFFCORE_RSP_x register
perf/x86/intel: Fix SLOTS PEBS event constraint
perf build: Do not use -Wshadow on gcc < 4.8
perf probe: Avoid calling freeing routine multiple times for same pointer
perf probe: Set pev->nargs to zero after freeing pev->args entries
perf session: Fix loading of compressed data split across adjacent records
perf stat: Always separate stalled cycles per insn
perf stat: Fix segfault for event group in repeat mode
perf tools: Fix proper buffer size for feature processing
perf script: Fix off by one in brstackinsn IPC computation
perf script: Improve man page description of metrics
perf script: Fix --max-blocks man page description

+59 -25
+3 -4
arch/x86/events/intel/core.c
··· 20 20 #include <asm/intel-family.h> 21 21 #include <asm/apic.h> 22 22 #include <asm/cpu_device_id.h> 23 - #include <asm/hypervisor.h> 24 23 25 24 #include "../perf_event.h" 26 25 ··· 262 263 }; 263 264 264 265 static struct extra_reg intel_icl_extra_regs[] __read_mostly = { 265 - INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff9fffull, RSP_0), 266 - INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff9fffull, RSP_1), 266 + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffbfffull, RSP_0), 267 + INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffffbfffull, RSP_1), 267 268 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), 268 269 INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), 269 270 EVENT_EXTRA_END ··· 4052 4053 * Disable the check for real HW, so we don't 4053 4054 * mess with potentionaly enabled registers: 4054 4055 */ 4055 - if (hypervisor_is_type(X86_HYPER_NATIVE)) 4056 + if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) 4056 4057 return true; 4057 4058 4058 4059 /*
+1 -1
arch/x86/events/intel/ds.c
··· 851 851 852 852 struct event_constraint intel_icl_pebs_event_constraints[] = { 853 853 INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ 854 - INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x400000000ULL), /* SLOTS */ 854 + INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */ 855 855 856 856 INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ 857 857 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */
+1 -1
kernel/events/core.c
··· 11274 11274 goto err_unlock; 11275 11275 } 11276 11276 11277 - perf_install_in_context(ctx, event, cpu); 11277 + perf_install_in_context(ctx, event, event->cpu); 11278 11278 perf_unpin_context(ctx); 11279 11279 mutex_unlock(&ctx->mutex); 11280 11280
+4 -4
tools/perf/Documentation/perf-script.txt
··· 228 228 229 229 With the metric option perf script can compute metrics for 230 230 sampling periods, similar to perf stat. This requires 231 - specifying a group with multiple metrics with the :S option 231 + specifying a group with multiple events defining metrics with the :S option 232 232 for perf record. perf will sample on the first event, and 233 - compute metrics for all the events in the group. Please note 233 + print computed metrics for all the events in the group. Please note 234 234 that the metric computed is averaged over the whole sampling 235 - period, not just for the sample point. 235 + period (since the last sample), not just for the sample point. 236 236 237 237 For sample events it's possible to display misc field with -F +misc option, 238 238 following letters are displayed for each bit: ··· 384 384 perf script --time 0%-10%,30%-40% 385 385 386 386 --max-blocks:: 387 - Set the maximum number of program blocks to print with brstackasm for 387 + Set the maximum number of program blocks to print with brstackinsn for 388 388 each sample. 389 389 390 390 --reltime::
+10
tools/perf/builtin-probe.c
··· 698 698 699 699 ret = perf_add_probe_events(params.events, params.nevents); 700 700 if (ret < 0) { 701 + 702 + /* 703 + * When perf_add_probe_events() fails it calls 704 + * cleanup_perf_probe_events(pevs, npevs), i.e. 705 + * cleanup_perf_probe_events(params.events, params.nevents), which 706 + * will call clear_perf_probe_event(), so set nevents to zero 707 + * to avoid cleanup_params() to call clear_perf_probe_event() again 708 + * on the same pevs. 709 + */ 710 + params.nevents = 0; 701 711 pr_err_with_code(" Error: Failed to add events.", ret); 702 712 return ret; 703 713 }
+1 -1
tools/perf/builtin-script.c
··· 1059 1059 1060 1060 printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp); 1061 1061 if (ip == end) { 1062 - printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp, 1062 + printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, ++insn, fp, 1063 1063 &total_cycles); 1064 1064 if (PRINT_FIELD(SRCCODE)) 1065 1065 printed += print_srccode(thread, x.cpumode, ip);
+8 -1
tools/perf/builtin-stat.c
··· 607 607 * group leaders. 608 608 */ 609 609 read_counters(&(struct timespec) { .tv_nsec = t1-t0 }); 610 - perf_evlist__close(evsel_list); 610 + 611 + /* 612 + * We need to keep evsel_list alive, because it's processed 613 + * later the evsel_list will be closed after. 614 + */ 615 + if (!STAT_RECORD) 616 + perf_evlist__close(evsel_list); 611 617 612 618 return WEXITSTATUS(status); 613 619 } ··· 2003 1997 perf_session__write_header(perf_stat.session, evsel_list, fd, true); 2004 1998 } 2005 1999 2000 + perf_evlist__close(evsel_list); 2006 2001 perf_session__delete(perf_stat.session); 2007 2002 } 2008 2003
+2
tools/perf/util/evsel.c
··· 1291 1291 xyarray__delete(evsel->sample_id); 1292 1292 evsel->sample_id = NULL; 1293 1293 zfree(&evsel->id); 1294 + evsel->ids = 0; 1294 1295 } 1295 1296 1296 1297 static void perf_evsel__free_config_terms(struct perf_evsel *evsel) ··· 2078 2077 2079 2078 perf_evsel__close_fd(evsel); 2080 2079 perf_evsel__free_fd(evsel); 2080 + perf_evsel__free_id(evsel); 2081 2081 } 2082 2082 2083 2083 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
+1 -1
tools/perf/util/header.c
··· 3747 3747 return 0; 3748 3748 3749 3749 ff.buf = (void *)fe->data; 3750 - ff.size = event->header.size - sizeof(event->header); 3750 + ff.size = event->header.size - sizeof(*fe); 3751 3751 ff.ph = &session->header; 3752 3752 3753 3753 if (feat_ops[feat].process(&ff, NULL))
+1
tools/perf/util/probe-event.c
··· 2230 2230 field = next; 2231 2231 } 2232 2232 } 2233 + pev->nargs = 0; 2233 2234 zfree(&pev->args); 2234 2235 } 2235 2236
+14 -8
tools/perf/util/session.c
··· 36 36 void *src; 37 37 size_t decomp_size, src_size; 38 38 u64 decomp_last_rem = 0; 39 - size_t decomp_len = session->header.env.comp_mmap_len; 39 + size_t mmap_len, decomp_len = session->header.env.comp_mmap_len; 40 40 struct decomp *decomp, *decomp_last = session->decomp_last; 41 41 42 - decomp = mmap(NULL, sizeof(struct decomp) + decomp_len, PROT_READ|PROT_WRITE, 42 + if (decomp_last) { 43 + decomp_last_rem = decomp_last->size - decomp_last->head; 44 + decomp_len += decomp_last_rem; 45 + } 46 + 47 + mmap_len = sizeof(struct decomp) + decomp_len; 48 + decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE, 43 49 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 44 50 if (decomp == MAP_FAILED) { 45 51 pr_err("Couldn't allocate memory for decompression\n"); ··· 53 47 } 54 48 55 49 decomp->file_pos = file_offset; 50 + decomp->mmap_len = mmap_len; 56 51 decomp->head = 0; 57 52 58 - if (decomp_last) { 59 - decomp_last_rem = decomp_last->size - decomp_last->head; 53 + if (decomp_last_rem) { 60 54 memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem); 61 55 decomp->size = decomp_last_rem; 62 56 } ··· 67 61 decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size, 68 62 &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem); 69 63 if (!decomp_size) { 70 - munmap(decomp, sizeof(struct decomp) + decomp_len); 64 + munmap(decomp, mmap_len); 71 65 pr_err("Couldn't decompress data\n"); 72 66 return -1; 73 67 } ··· 261 255 static void perf_session__release_decomp_events(struct perf_session *session) 262 256 { 263 257 struct decomp *next, *decomp; 264 - size_t decomp_len; 258 + size_t mmap_len; 265 259 next = session->decomp; 266 - decomp_len = session->header.env.comp_mmap_len; 267 260 do { 268 261 decomp = next; 269 262 if (decomp == NULL) 270 263 break; 271 264 next = decomp->next; 272 - munmap(decomp, decomp_len + sizeof(struct decomp)); 265 + mmap_len = decomp->mmap_len; 266 + munmap(decomp, mmap_len); 273 267 } while (1); 274 268 } 275 269
+1
tools/perf/util/session.h
··· 46 46 struct decomp { 47 47 struct decomp *next; 48 48 u64 file_pos; 49 + size_t mmap_len; 49 50 u64 head; 50 51 size_t size; 51 52 char data[];
+2 -1
tools/perf/util/stat-shadow.c
··· 819 819 "stalled cycles per insn", 820 820 ratio); 821 821 } else if (have_frontend_stalled) { 822 - print_metric(config, ctxp, NULL, NULL, 822 + out->new_line(config, ctxp); 823 + print_metric(config, ctxp, NULL, "%7.2f ", 823 824 "stalled cycles per insn", 0); 824 825 } 825 826 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
+2 -2
tools/perf/util/zstd.c
··· 99 99 while (input.pos < input.size) { 100 100 ret = ZSTD_decompressStream(data->dstream, &output, &input); 101 101 if (ZSTD_isError(ret)) { 102 - pr_err("failed to decompress (B): %ld -> %ld : %s\n", 103 - src_size, output.size, ZSTD_getErrorName(ret)); 102 + pr_err("failed to decompress (B): %ld -> %ld, dst_size %ld : %s\n", 103 + src_size, output.size, dst_size, ZSTD_getErrorName(ret)); 104 104 break; 105 105 } 106 106 output.dst = dst + output.pos;
+8 -1
tools/scripts/Makefile.include
··· 32 32 EXTRA_WARNINGS += -Wold-style-definition 33 33 EXTRA_WARNINGS += -Wpacked 34 34 EXTRA_WARNINGS += -Wredundant-decls 35 - EXTRA_WARNINGS += -Wshadow 36 35 EXTRA_WARNINGS += -Wstrict-prototypes 37 36 EXTRA_WARNINGS += -Wswitch-default 38 37 EXTRA_WARNINGS += -Wswitch-enum ··· 68 69 # will do for now and keep the above -Wstrict-aliasing=3 in place 69 70 # in newer systems. 70 71 # Needed for the __raw_cmpxchg in tools/arch/x86/include/asm/cmpxchg.h 72 + # 73 + # See https://lkml.org/lkml/2006/11/28/253 and https://gcc.gnu.org/gcc-4.8/changes.html, 74 + # that takes into account Linus's comments (search for Wshadow) for the reasoning about 75 + # -Wshadow not being interesting before gcc 4.8. 76 + 71 77 ifneq ($(filter 3.%,$(MAKE_VERSION)),) # make-3 72 78 EXTRA_WARNINGS += -fno-strict-aliasing 79 + EXTRA_WARNINGS += -Wno-shadow 80 + else 81 + EXTRA_WARNINGS += -Wshadow 73 82 endif 74 83 75 84 ifneq ($(findstring $(MAKEFLAGS), w),w)