Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
"I'd like to apologize for this very late pull request: I was dithering
through the week whether to send the fixes, and then yesterday Jiri's
crash fix for a regression introduced in this cycle clearly marked
perf/urgent as 'must merge now'.

Most of the commits are tooling fixes, plus there's three kernel fixes
via four commits:

- race fix in the Intel PEBS code

- fix an AUX bug and roll back a previous attempt

- fix AMD family 17h generic HW cache-event perf counters

The largest diffstat contribution comes from the AMD fix - a new event
table is introduced, which is a fairly low risk change but has a large
linecount"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/x86/intel: Fix race in intel_pmu_disable_event()
perf/x86/intel/pt: Remove software double buffering PMU capability
perf/ring_buffer: Fix AUX software double buffering
perf tools: Remove needless asm/unistd.h include fixing build in some places
tools arch uapi: Copy missing unistd.h headers for arc, hexagon and riscv
tools build: Add -ldl to the disassembler-four-args feature test
perf cs-etm: Always allocate memory for cs_etm_queue::prev_packet
perf cs-etm: Don't check cs_etm_queue::prev_packet validity
perf report: Report OOM in status line in the GTK UI
perf bench numa: Add define for RUSAGE_THREAD if not present
tools lib traceevent: Change tag string for error
perf annotate: Fix build on 32 bit for BPF annotation
tools uapi x86: Sync vmx.h with the kernel
perf bpf: Return value with unlocking in perf_env__find_btf()
MAINTAINERS: Include vendor specific files under arch/*/events/*
perf/x86/amd: Update generic hardware cache events for Family 17h

+272 -32
+1
MAINTAINERS
··· 12176 12176 F: arch/*/include/asm/perf_event.h 12177 12177 F: arch/*/kernel/perf_callchain.c 12178 12178 F: arch/*/events/* 12179 + F: arch/*/events/*/* 12179 12180 F: tools/perf/ 12180 12181 12181 12182 PERSONALITY HANDLING
+108 -3
arch/x86/events/amd/core.c
··· 116 116 }, 117 117 }; 118 118 119 + static __initconst const u64 amd_hw_cache_event_ids_f17h 120 + [PERF_COUNT_HW_CACHE_MAX] 121 + [PERF_COUNT_HW_CACHE_OP_MAX] 122 + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 123 + [C(L1D)] = { 124 + [C(OP_READ)] = { 125 + [C(RESULT_ACCESS)] = 0x0040, /* Data Cache Accesses */ 126 + [C(RESULT_MISS)] = 0xc860, /* L2$ access from DC Miss */ 127 + }, 128 + [C(OP_WRITE)] = { 129 + [C(RESULT_ACCESS)] = 0, 130 + [C(RESULT_MISS)] = 0, 131 + }, 132 + [C(OP_PREFETCH)] = { 133 + [C(RESULT_ACCESS)] = 0xff5a, /* h/w prefetch DC Fills */ 134 + [C(RESULT_MISS)] = 0, 135 + }, 136 + }, 137 + [C(L1I)] = { 138 + [C(OP_READ)] = { 139 + [C(RESULT_ACCESS)] = 0x0080, /* Instruction cache fetches */ 140 + [C(RESULT_MISS)] = 0x0081, /* Instruction cache misses */ 141 + }, 142 + [C(OP_WRITE)] = { 143 + [C(RESULT_ACCESS)] = -1, 144 + [C(RESULT_MISS)] = -1, 145 + }, 146 + [C(OP_PREFETCH)] = { 147 + [C(RESULT_ACCESS)] = 0, 148 + [C(RESULT_MISS)] = 0, 149 + }, 150 + }, 151 + [C(LL)] = { 152 + [C(OP_READ)] = { 153 + [C(RESULT_ACCESS)] = 0, 154 + [C(RESULT_MISS)] = 0, 155 + }, 156 + [C(OP_WRITE)] = { 157 + [C(RESULT_ACCESS)] = 0, 158 + [C(RESULT_MISS)] = 0, 159 + }, 160 + [C(OP_PREFETCH)] = { 161 + [C(RESULT_ACCESS)] = 0, 162 + [C(RESULT_MISS)] = 0, 163 + }, 164 + }, 165 + [C(DTLB)] = { 166 + [C(OP_READ)] = { 167 + [C(RESULT_ACCESS)] = 0xff45, /* All L2 DTLB accesses */ 168 + [C(RESULT_MISS)] = 0xf045, /* L2 DTLB misses (PT walks) */ 169 + }, 170 + [C(OP_WRITE)] = { 171 + [C(RESULT_ACCESS)] = 0, 172 + [C(RESULT_MISS)] = 0, 173 + }, 174 + [C(OP_PREFETCH)] = { 175 + [C(RESULT_ACCESS)] = 0, 176 + [C(RESULT_MISS)] = 0, 177 + }, 178 + }, 179 + [C(ITLB)] = { 180 + [C(OP_READ)] = { 181 + [C(RESULT_ACCESS)] = 0x0084, /* L1 ITLB misses, L2 ITLB hits */ 182 + [C(RESULT_MISS)] = 0xff85, /* L1 ITLB misses, L2 misses */ 183 + }, 184 + [C(OP_WRITE)] = { 185 + [C(RESULT_ACCESS)] = -1, 186 + [C(RESULT_MISS)] = -1, 187 + }, 188 + [C(OP_PREFETCH)] = { 189 + [C(RESULT_ACCESS)] = -1, 190 + [C(RESULT_MISS)] = -1, 191 + }, 192 + }, 193 + [C(BPU)] = { 194 + [C(OP_READ)] = { 195 + [C(RESULT_ACCESS)] = 0x00c2, /* Retired Branch Instr. */ 196 + [C(RESULT_MISS)] = 0x00c3, /* Retired Mispredicted BI */ 197 + }, 198 + [C(OP_WRITE)] = { 199 + [C(RESULT_ACCESS)] = -1, 200 + [C(RESULT_MISS)] = -1, 201 + }, 202 + [C(OP_PREFETCH)] = { 203 + [C(RESULT_ACCESS)] = -1, 204 + [C(RESULT_MISS)] = -1, 205 + }, 206 + }, 207 + [C(NODE)] = { 208 + [C(OP_READ)] = { 209 + [C(RESULT_ACCESS)] = 0, 210 + [C(RESULT_MISS)] = 0, 211 + }, 212 + [C(OP_WRITE)] = { 213 + [C(RESULT_ACCESS)] = -1, 214 + [C(RESULT_MISS)] = -1, 215 + }, 216 + [C(OP_PREFETCH)] = { 217 + [C(RESULT_ACCESS)] = -1, 218 + [C(RESULT_MISS)] = -1, 219 + }, 220 + }, 221 + }; 222 + 119 223 /* 120 224 * AMD Performance Monitor K7 and later, up to and including Family 16h: 121 225 */ ··· 969 865 x86_pmu.amd_nb_constraints = 0; 970 866 } 971 867 972 - /* Events are common for all AMDs */ 973 - memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, 974 - sizeof(hw_cache_event_ids)); 868 + if (boot_cpu_data.x86 >= 0x17) 869 + memcpy(hw_cache_event_ids, amd_hw_cache_event_ids_f17h, sizeof(hw_cache_event_ids)); 870 + else 871 + memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 975 872 976 873 return 0; 977 874 }
+7 -3
arch/x86/events/intel/core.c
··· 2091 2091 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); 2092 2092 cpuc->intel_cp_status &= ~(1ull << hwc->idx); 2093 2093 2094 - if (unlikely(event->attr.precise_ip)) 2095 - intel_pmu_pebs_disable(event); 2096 - 2097 2094 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 2098 2095 intel_pmu_disable_fixed(hwc); 2099 2096 return; 2100 2097 } 2101 2098 2102 2099 x86_pmu_disable_event(event); 2100 + 2101 + /* 2102 + * Needs to be called after x86_pmu_disable_event, 2103 + * so we don't trigger the event without PEBS bit set. 2104 + */ 2105 + if (unlikely(event->attr.precise_ip)) 2106 + intel_pmu_pebs_disable(event); 2103 2107 } 2104 2108 2105 2109 static void intel_pmu_del_event(struct perf_event *event)
+1 -2
arch/x86/events/intel/pt.c
··· 1525 1525 } 1526 1526 1527 1527 if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) 1528 - pt_pmu.pmu.capabilities = 1529 - PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF; 1528 + pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG; 1530 1529 1531 1530 pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE; 1532 1531 pt_pmu.pmu.attr_groups = pt_attr_groups;
-1
include/linux/perf_event.h
··· 240 240 #define PERF_PMU_CAP_NO_INTERRUPT 0x01 241 241 #define PERF_PMU_CAP_NO_NMI 0x02 242 242 #define PERF_PMU_CAP_AUX_NO_SG 0x04 243 - #define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08 244 243 #define PERF_PMU_CAP_EXCLUSIVE 0x10 245 244 #define PERF_PMU_CAP_ITRACE 0x20 246 245 #define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40
+1 -2
kernel/events/ring_buffer.c
··· 610 610 * PMU requests more than one contiguous chunks of memory 611 611 * for SW double buffering 612 612 */ 613 - if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_SW_DOUBLEBUF) && 614 - !overwrite) { 613 + if (!overwrite) { 615 614 if (!max_order) 616 615 return -EINVAL; 617 616
+51
tools/arch/arc/include/uapi/asm/unistd.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 + /* 3 + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 4 + * 5 + * This program is free software; you can redistribute it and/or modify 6 + * it under the terms of the GNU General Public License version 2 as 7 + * published by the Free Software Foundation. 8 + */ 9 + 10 + /******** no-legacy-syscalls-ABI *******/ 11 + 12 + /* 13 + * Non-typical guard macro to enable inclusion twice in ARCH sys.c 14 + * That is how the Generic syscall wrapper generator works 15 + */ 16 + #if !defined(_UAPI_ASM_ARC_UNISTD_H) || defined(__SYSCALL) 17 + #define _UAPI_ASM_ARC_UNISTD_H 18 + 19 + #define __ARCH_WANT_RENAMEAT 20 + #define __ARCH_WANT_STAT64 21 + #define __ARCH_WANT_SET_GET_RLIMIT 22 + #define __ARCH_WANT_SYS_EXECVE 23 + #define __ARCH_WANT_SYS_CLONE 24 + #define __ARCH_WANT_SYS_VFORK 25 + #define __ARCH_WANT_SYS_FORK 26 + #define __ARCH_WANT_TIME32_SYSCALLS 27 + 28 + #define sys_mmap2 sys_mmap_pgoff 29 + 30 + #include <asm-generic/unistd.h> 31 + 32 + #define NR_syscalls __NR_syscalls 33 + 34 + /* Generic syscall (fs/filesystems.c - lost in asm-generic/unistd.h */ 35 + #define __NR_sysfs (__NR_arch_specific_syscall + 3) 36 + 37 + /* ARC specific syscall */ 38 + #define __NR_cacheflush (__NR_arch_specific_syscall + 0) 39 + #define __NR_arc_settls (__NR_arch_specific_syscall + 1) 40 + #define __NR_arc_gettls (__NR_arch_specific_syscall + 2) 41 + #define __NR_arc_usr_cmpxchg (__NR_arch_specific_syscall + 4) 42 + 43 + __SYSCALL(__NR_cacheflush, sys_cacheflush) 44 + __SYSCALL(__NR_arc_settls, sys_arc_settls) 45 + __SYSCALL(__NR_arc_gettls, sys_arc_gettls) 46 + __SYSCALL(__NR_arc_usr_cmpxchg, sys_arc_usr_cmpxchg) 47 + __SYSCALL(__NR_sysfs, sys_sysfs) 48 + 49 + #undef __SYSCALL 50 + 51 + #endif
+40
tools/arch/hexagon/include/uapi/asm/unistd.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 + /* 3 + * Syscall support for Hexagon 4 + * 5 + * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. 6 + * 7 + * This program is free software; you can redistribute it and/or modify 8 + * it under the terms of the GNU General Public License version 2 and 9 + * only version 2 as published by the Free Software Foundation. 10 + * 11 + * This program is distributed in the hope that it will be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write to the Free Software 18 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19 + * 02110-1301, USA. 20 + */ 21 + 22 + /* 23 + * The kernel pulls this unistd.h in three different ways: 24 + * 1. the "normal" way which gets all the __NR defines 25 + * 2. with __SYSCALL defined to produce function declarations 26 + * 3. with __SYSCALL defined to produce syscall table initialization 27 + * See also: syscalltab.c 28 + */ 29 + 30 + #define sys_mmap2 sys_mmap_pgoff 31 + #define __ARCH_WANT_RENAMEAT 32 + #define __ARCH_WANT_STAT64 33 + #define __ARCH_WANT_SET_GET_RLIMIT 34 + #define __ARCH_WANT_SYS_EXECVE 35 + #define __ARCH_WANT_SYS_CLONE 36 + #define __ARCH_WANT_SYS_VFORK 37 + #define __ARCH_WANT_SYS_FORK 38 + #define __ARCH_WANT_TIME32_SYSCALLS 39 + 40 + #include <asm-generic/unistd.h>
+42
tools/arch/riscv/include/uapi/asm/unistd.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 + /* 3 + * Copyright (C) 2018 David Abdurachmanov <david.abdurachmanov@gmail.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify 6 + * it under the terms of the GNU General Public License version 2 as 7 + * published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 + */ 17 + 18 + #ifdef __LP64__ 19 + #define __ARCH_WANT_NEW_STAT 20 + #define __ARCH_WANT_SET_GET_RLIMIT 21 + #endif /* __LP64__ */ 22 + 23 + #include <asm-generic/unistd.h> 24 + 25 + /* 26 + * Allows the instruction cache to be flushed from userspace. Despite RISC-V 27 + * having a direct 'fence.i' instruction available to userspace (which we 28 + * can't trap!), that's not actually viable when running on Linux because the 29 + * kernel might schedule a process on another hart. There is no way for 30 + * userspace to handle this without invoking the kernel (as it doesn't know the 31 + * thread->hart mappings), so we've defined a RISC-V specific system call to 32 + * flush the instruction cache. 33 + * 34 + * __NR_riscv_flush_icache is defined to flush the instruction cache over an 35 + * address range, with the flush applying to either all threads or just the 36 + * caller. We don't currently do anything with the address range, that's just 37 + * in there for forwards compatibility. 38 + */ 39 + #ifndef __NR_riscv_flush_icache 40 + #define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15) 41 + #endif 42 + __SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
+1
tools/arch/x86/include/uapi/asm/vmx.h
··· 146 146 147 147 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 148 148 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 149 + #define VMX_ABORT_VMCS_CORRUPTED 3 149 150 #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 150 151 151 152 #endif /* _UAPIVMX_H */
+1 -1
tools/lib/traceevent/parse-utils.c
··· 14 14 void __vwarning(const char *fmt, va_list ap) 15 15 { 16 16 if (errno) 17 - perror("trace-cmd"); 17 + perror("libtraceevent"); 18 18 errno = 0; 19 19 20 20 fprintf(stderr, " ");
+1 -1
tools/perf/Makefile.config
··· 227 227 228 228 FEATURE_CHECK_LDFLAGS-libaio = -lrt 229 229 230 - FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes 230 + FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl 231 231 232 232 CFLAGS += -fno-omit-frame-pointer 233 233 CFLAGS += -ggdb3
+4
tools/perf/bench/numa.c
··· 39 39 #include <numa.h> 40 40 #include <numaif.h> 41 41 42 + #ifndef RUSAGE_THREAD 43 + # define RUSAGE_THREAD 1 44 + #endif 45 + 42 46 /* 43 47 * Regular printout to the terminal, supressed if -q is specified: 44 48 */
+4 -4
tools/perf/util/annotate.c
··· 1714 1714 if (dso->binary_type != DSO_BINARY_TYPE__BPF_PROG_INFO) 1715 1715 return -1; 1716 1716 1717 - pr_debug("%s: handling sym %s addr %lx len %lx\n", __func__, 1718 - sym->name, sym->start, sym->end - sym->start); 1717 + pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__, 1718 + sym->name, sym->start, sym->end - sym->start); 1719 1719 1720 1720 memset(tpath, 0, sizeof(tpath)); 1721 1721 perf_exe(tpath, sizeof(tpath)); ··· 1740 1740 info_linear = info_node->info_linear; 1741 1741 sub_id = dso->bpf_prog.sub_id; 1742 1742 1743 - info.buffer = (void *)(info_linear->info.jited_prog_insns); 1743 + info.buffer = (void *)(uintptr_t)(info_linear->info.jited_prog_insns); 1744 1744 info.buffer_length = info_linear->info.jited_prog_len; 1745 1745 1746 1746 if (info_linear->info.nr_line_info) ··· 1776 1776 const char *srcline; 1777 1777 u64 addr; 1778 1778 1779 - addr = pc + ((u64 *)(info_linear->info.jited_ksyms))[sub_id]; 1779 + addr = pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id]; 1780 1780 count = disassemble(pc, &info); 1781 1781 1782 1782 if (prog_linfo)
-1
tools/perf/util/cloexec.c
··· 7 7 #include "asm/bug.h" 8 8 #include "debug.h" 9 9 #include <unistd.h> 10 - #include <asm/unistd.h> 11 10 #include <sys/syscall.h> 12 11 13 12 static unsigned long flag = PERF_FLAG_FD_CLOEXEC;
+4 -10
tools/perf/util/cs-etm.c
··· 422 422 if (!etmq->packet) 423 423 goto out_free; 424 424 425 - if (etm->synth_opts.last_branch || etm->sample_branches) { 426 - etmq->prev_packet = zalloc(szp); 427 - if (!etmq->prev_packet) 428 - goto out_free; 429 - } 425 + etmq->prev_packet = zalloc(szp); 426 + if (!etmq->prev_packet) 427 + goto out_free; 430 428 431 429 if (etm->synth_opts.last_branch) { 432 430 size_t sz = sizeof(struct branch_stack); ··· 979 981 * PREV_PACKET is a branch. 980 982 */ 981 983 if (etm->synth_opts.last_branch && 982 - etmq->prev_packet && 983 984 etmq->prev_packet->sample_type == CS_ETM_RANGE && 984 985 etmq->prev_packet->last_instr_taken_branch) 985 986 cs_etm__update_last_branch_rb(etmq); ··· 1011 1014 etmq->period_instructions = instrs_over; 1012 1015 } 1013 1016 1014 - if (etm->sample_branches && etmq->prev_packet) { 1017 + if (etm->sample_branches) { 1015 1018 bool generate_sample = false; 1016 1019 1017 1020 /* Generate sample for tracing on packet */ ··· 1067 1070 int err = 0; 1068 1071 struct cs_etm_auxtrace *etm = etmq->etm; 1069 1072 struct cs_etm_packet *tmp; 1070 - 1071 - if (!etmq->prev_packet) 1072 - return 0; 1073 1073 1074 1074 /* Handle start tracing packet */ 1075 1075 if (etmq->prev_packet->sample_type == CS_ETM_EMPTY)
+1 -1
tools/perf/util/env.c
··· 115 115 } 116 116 node = NULL; 117 117 118 - up_read(&env->bpf_progs.lock); 119 118 out: 119 + up_read(&env->bpf_progs.lock); 120 120 return node; 121 121 } 122 122
+5 -3
tools/perf/util/session.c
··· 1928 1928 1929 1929 size = event->header.size; 1930 1930 1931 + skip = -EINVAL; 1932 + 1931 1933 if (size < sizeof(struct perf_event_header) || 1932 1934 (skip = rd->process(session, event, file_pos)) < 0) { 1933 - pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", 1935 + pr_err("%#" PRIx64 " [%#x]: failed to process type: %d [%s]\n", 1934 1936 file_offset + head, event->header.size, 1935 - event->header.type); 1936 - err = -EINVAL; 1937 + event->header.type, strerror(-skip)); 1938 + err = skip; 1937 1939 goto out; 1938 1940 } 1939 1941