Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'perf-tools-fixes-for-v5.17-2022-02-06' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull perf tools fixes from Arnaldo Carvalho de Melo:

- Fix display of grouped aliased events in 'perf stat'.

- Add missing branch_sample_type to perf_event_attr__fprintf().

- Apply correct label to user/kernel symbols in branch mode.

- Fix 'perf ftrace' system_wide tracing, it has to be set before
creating the maps.

- Return error if procfs isn't mounted for PID namespaces when
synthesizing records for pre-existing processes.

- Set error stream of objdump process for 'perf annotate' TUI, to avoid
garbling the screen.

- Add missing arm64 support to perf_mmap__read_self(), the kernel part
got into 5.17.

- Check for NULL pointer before dereference writing debug info about a
sample.

- Update UAPI copies for asound, perf_event, prctl and kvm headers.

- Fix a typo in bpf_counter_cgroup.c.

* tag 'perf-tools-fixes-for-v5.17-2022-02-06' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux:
perf ftrace: system_wide collection is not effective by default
libperf: Add arm64 support to perf_mmap__read_self()
tools include UAPI: Sync sound/asound.h copy with the kernel sources
perf stat: Fix display of grouped aliased events
perf tools: Apply correct label to user/kernel symbols in branch mode
perf bpf: Fix a typo in bpf_counter_cgroup.c
perf synthetic-events: Return error if procfs isn't mounted for PID namespaces
perf session: Check for NULL pointer before dereference
perf annotate: Set error stream of objdump process for TUI
perf tools: Add missing branch_sample_type to perf_event_attr__fprintf()
tools headers UAPI: Sync linux/kvm.h with the kernel sources
tools headers UAPI: Sync linux/prctl.h with the kernel sources
perf beauty: Make the prctl arg regexp more strict to cope with PR_SET_VMA
tools headers cpufeatures: Sync with the kernel sources
tools headers UAPI: Sync linux/perf_event.h with the kernel sources
tools include UAPI: Sync sound/asound.h copy with the kernel sources

+186 -45
+2
tools/arch/x86/include/asm/cpufeatures.h
··· 299 299 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ 300 300 #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ 301 301 #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ 302 + #define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ 302 303 #define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ 304 + #define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ 303 305 304 306 /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ 305 307 #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
+3 -3
tools/include/uapi/linux/kvm.h
··· 1624 1624 #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) 1625 1625 #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) 1626 1626 1627 - /* Available with KVM_CAP_XSAVE2 */ 1628 - #define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) 1629 - 1630 1627 struct kvm_s390_pv_sec_parm { 1631 1628 __u64 origin; 1632 1629 __u64 length; ··· 2044 2047 }; 2045 2048 2046 2049 #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) 2050 + 2051 + /* Available with KVM_CAP_XSAVE2 */ 2052 + #define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) 2047 2053 2048 2054 #endif /* __LINUX_KVM_H */
+3 -3
tools/include/uapi/linux/perf_event.h
··· 1332 1332 1333 1333 /* hop level */ 1334 1334 #define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ 1335 - #define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ 1336 - #define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ 1337 - #define PERF_MEM_HOPS_3 0x04 /* remote board */ 1335 + #define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ 1336 + #define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ 1337 + #define PERF_MEM_HOPS_3 0x04 /* remote board */ 1338 1338 /* 5-7 available */ 1339 1339 #define PERF_MEM_HOPS_SHIFT 43 1340 1340
+3
tools/include/uapi/linux/prctl.h
··· 272 272 # define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1 273 273 # define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2 274 274 275 + #define PR_SET_VMA 0x53564d41 276 + # define PR_SET_VMA_ANON_NAME 0 277 + 275 278 #endif /* _LINUX_PRCTL_H */
+9 -2
tools/include/uapi/sound/asound.h
··· 56 56 * * 57 57 ****************************************************************************/ 58 58 59 + #define AES_IEC958_STATUS_SIZE 24 60 + 59 61 struct snd_aes_iec958 { 60 - unsigned char status[24]; /* AES/IEC958 channel status bits */ 62 + unsigned char status[AES_IEC958_STATUS_SIZE]; /* AES/IEC958 channel status bits */ 61 63 unsigned char subcode[147]; /* AES/IEC958 subcode bits */ 62 64 unsigned char pad; /* nothing */ 63 65 unsigned char dig_subframe[4]; /* AES/IEC958 subframe bits */ ··· 204 202 #define SNDRV_PCM_FORMAT_S24_BE ((__force snd_pcm_format_t) 7) /* low three bytes */ 205 203 #define SNDRV_PCM_FORMAT_U24_LE ((__force snd_pcm_format_t) 8) /* low three bytes */ 206 204 #define SNDRV_PCM_FORMAT_U24_BE ((__force snd_pcm_format_t) 9) /* low three bytes */ 205 + /* 206 + * For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the 207 + * available bit count in most significant bit. It's for the case of so-called 'left-justified' or 208 + * `right-padding` sample which has less width than 32 bit. 209 + */ 207 210 #define SNDRV_PCM_FORMAT_S32_LE ((__force snd_pcm_format_t) 10) 208 211 #define SNDRV_PCM_FORMAT_S32_BE ((__force snd_pcm_format_t) 11) 209 212 #define SNDRV_PCM_FORMAT_U32_LE ((__force snd_pcm_format_t) 12) ··· 307 300 #define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME 0x04000000 /* report estimated link audio time */ 308 301 #define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000 /* report synchronized audio/system time */ 309 302 #define SNDRV_PCM_INFO_EXPLICIT_SYNC 0x10000000 /* needs explicit sync of pointers and data */ 310 - 303 + #define SNDRV_PCM_INFO_NO_REWINDS 0x20000000 /* hardware can only support monotonic changes of appl_ptr */ 311 304 #define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */ 312 305 #define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */ 313 306
+98
tools/lib/perf/mmap.c
··· 13 13 #include <internal/lib.h> 14 14 #include <linux/kernel.h> 15 15 #include <linux/math64.h> 16 + #include <linux/stringify.h> 16 17 #include "internal.h" 17 18 18 19 void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, ··· 295 294 296 295 return low | ((u64)high) << 32; 297 296 } 297 + #elif defined(__aarch64__) 298 + #define read_sysreg(r) ({ \ 299 + u64 __val; \ 300 + asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ 301 + __val; \ 302 + }) 303 + 304 + static u64 read_pmccntr(void) 305 + { 306 + return read_sysreg(pmccntr_el0); 307 + } 308 + 309 + #define PMEVCNTR_READ(idx) \ 310 + static u64 read_pmevcntr_##idx(void) { \ 311 + return read_sysreg(pmevcntr##idx##_el0); \ 312 + } 313 + 314 + PMEVCNTR_READ(0); 315 + PMEVCNTR_READ(1); 316 + PMEVCNTR_READ(2); 317 + PMEVCNTR_READ(3); 318 + PMEVCNTR_READ(4); 319 + PMEVCNTR_READ(5); 320 + PMEVCNTR_READ(6); 321 + PMEVCNTR_READ(7); 322 + PMEVCNTR_READ(8); 323 + PMEVCNTR_READ(9); 324 + PMEVCNTR_READ(10); 325 + PMEVCNTR_READ(11); 326 + PMEVCNTR_READ(12); 327 + PMEVCNTR_READ(13); 328 + PMEVCNTR_READ(14); 329 + PMEVCNTR_READ(15); 330 + PMEVCNTR_READ(16); 331 + PMEVCNTR_READ(17); 332 + PMEVCNTR_READ(18); 333 + PMEVCNTR_READ(19); 334 + PMEVCNTR_READ(20); 335 + PMEVCNTR_READ(21); 336 + PMEVCNTR_READ(22); 337 + PMEVCNTR_READ(23); 338 + PMEVCNTR_READ(24); 339 + PMEVCNTR_READ(25); 340 + PMEVCNTR_READ(26); 341 + PMEVCNTR_READ(27); 342 + PMEVCNTR_READ(28); 343 + PMEVCNTR_READ(29); 344 + PMEVCNTR_READ(30); 345 + 346 + /* 347 + * Read a value direct from PMEVCNTR<idx> 348 + */ 349 + static u64 read_perf_counter(unsigned int counter) 350 + { 351 + static u64 (* const read_f[])(void) = { 352 + read_pmevcntr_0, 353 + read_pmevcntr_1, 354 + read_pmevcntr_2, 355 + read_pmevcntr_3, 356 + read_pmevcntr_4, 357 + read_pmevcntr_5, 358 + read_pmevcntr_6, 359 + read_pmevcntr_7, 360 + read_pmevcntr_8, 361 + read_pmevcntr_9, 362 + read_pmevcntr_10, 363 + read_pmevcntr_11, 364 + read_pmevcntr_13, 365 + read_pmevcntr_12, 366 + read_pmevcntr_14, 367 + read_pmevcntr_15, 368 + read_pmevcntr_16, 369 + read_pmevcntr_17, 370 + read_pmevcntr_18, 371 + read_pmevcntr_19, 372 + read_pmevcntr_20, 373 + read_pmevcntr_21, 374 + read_pmevcntr_22, 375 + read_pmevcntr_23, 376 + read_pmevcntr_24, 377 + read_pmevcntr_25, 378 + read_pmevcntr_26, 379 + read_pmevcntr_27, 380 + read_pmevcntr_28, 381 + read_pmevcntr_29, 382 + read_pmevcntr_30, 383 + read_pmccntr 384 + }; 385 + 386 + if (counter < ARRAY_SIZE(read_f)) 387 + return (read_f[counter])(); 388 + 389 + return 0; 390 + } 391 + 392 + static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); } 393 + 298 394 #else 299 395 static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } 300 396 static u64 read_timestamp(void) { return 0; }
+4 -1
tools/lib/perf/tests/test-evsel.c
··· 130 130 struct perf_event_attr attr = { 131 131 .type = PERF_TYPE_HARDWARE, 132 132 .config = event, 133 + #ifdef __aarch64__ 134 + .config1 = 0x2, /* Request user access */ 135 + #endif 133 136 }; 134 137 int err, i; 135 138 ··· 153 150 pc = perf_evsel__mmap_base(evsel, 0, 0); 154 151 __T("failed to get mmapped address", pc); 155 152 156 - #if defined(__i386__) || defined(__x86_64__) 153 + #if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) 157 154 __T("userspace counter access not supported", pc->cap_user_rdpmc); 158 155 __T("userspace counter access not enabled", pc->index); 159 156 __T("userspace counter width not set", pc->pmc_width >= 32);
+24 -21
tools/perf/builtin-ftrace.c
··· 1115 1115 int cmd_ftrace(int argc, const char **argv) 1116 1116 { 1117 1117 int ret; 1118 + int (*cmd_func)(struct perf_ftrace *) = NULL; 1118 1119 struct perf_ftrace ftrace = { 1119 1120 .tracer = DEFAULT_TRACER, 1120 1121 .target = { .uid = UINT_MAX, }, ··· 1222 1221 goto out_delete_filters; 1223 1222 } 1224 1223 1224 + switch (subcmd) { 1225 + case PERF_FTRACE_TRACE: 1226 + if (!argc && target__none(&ftrace.target)) 1227 + ftrace.target.system_wide = true; 1228 + cmd_func = __cmd_ftrace; 1229 + break; 1230 + case PERF_FTRACE_LATENCY: 1231 + if (list_empty(&ftrace.filters)) { 1232 + pr_err("Should provide a function to measure\n"); 1233 + parse_options_usage(ftrace_usage, options, "T", 1); 1234 + ret = -EINVAL; 1235 + goto out_delete_filters; 1236 + } 1237 + cmd_func = __cmd_latency; 1238 + break; 1239 + case PERF_FTRACE_NONE: 1240 + default: 1241 + pr_err("Invalid subcommand\n"); 1242 + ret = -EINVAL; 1243 + goto out_delete_filters; 1244 + } 1245 + 1225 1246 ret = target__validate(&ftrace.target); 1226 1247 if (ret) { 1227 1248 char errbuf[512]; ··· 1271 1248 goto out_delete_evlist; 1272 1249 } 1273 1250 1274 - switch (subcmd) { 1275 - case PERF_FTRACE_TRACE: 1276 - if (!argc && target__none(&ftrace.target)) 1277 - ftrace.target.system_wide = true; 1278 - ret = __cmd_ftrace(&ftrace); 1279 - break; 1280 - case PERF_FTRACE_LATENCY: 1281 - if (list_empty(&ftrace.filters)) { 1282 - pr_err("Should provide a function to measure\n"); 1283 - parse_options_usage(ftrace_usage, options, "T", 1); 1284 - ret = -EINVAL; 1285 - goto out_delete_evlist; 1286 - } 1287 - ret = __cmd_latency(&ftrace); 1288 - break; 1289 - case PERF_FTRACE_NONE: 1290 - default: 1291 - pr_err("Invalid subcommand\n"); 1292 - ret = -EINVAL; 1293 - break; 1294 - } 1251 + ret = cmd_func(&ftrace); 1295 1252 1296 1253 out_delete_evlist: 1297 1254 evlist__delete(ftrace.evlist);
+1 -1
tools/perf/trace/beauty/prctl_option.sh
··· 4 4 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ 5 5 6 6 printf "static const char *prctl_options[] = {\n" 7 - regex='^#define[[:space:]]+PR_(\w+)[[:space:]]*([[:xdigit:]]+).*' 7 + regex='^#define[[:space:]]{1}PR_(\w+)[[:space:]]*([[:xdigit:]]+)([[:space:]]*\/.*)?$' 8 8 egrep $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \ 9 9 sed -r "s/$regex/\2 \1/g" | \ 10 10 sort -n | xargs printf "\t[%s] = \"%s\",\n"
+1
tools/perf/util/annotate.c
··· 2036 2036 memset(&objdump_process, 0, sizeof(objdump_process)); 2037 2037 objdump_process.argv = objdump_argv; 2038 2038 objdump_process.out = -1; 2039 + objdump_process.err = -1; 2039 2040 if (start_command(&objdump_process)) { 2040 2041 pr_err("Failure starting to run %s\n", command); 2041 2042 err = -1;
+1 -1
tools/perf/util/bpf_counter_cgroup.c
··· 266 266 idx = evsel->core.idx; 267 267 err = bpf_map_lookup_elem(reading_map_fd, &idx, values); 268 268 if (err) { 269 - pr_err("bpf map lookup falied: idx=%u, event=%s, cgrp=%s\n", 269 + pr_err("bpf map lookup failed: idx=%u, event=%s, cgrp=%s\n", 270 270 idx, evsel__name(evsel), evsel->cgrp->name); 271 271 goto out; 272 272 }
+2
tools/perf/util/machine.c
··· 2073 2073 2074 2074 ams->addr = ip; 2075 2075 ams->al_addr = al.addr; 2076 + ams->al_level = al.level; 2076 2077 ams->ms.maps = al.maps; 2077 2078 ams->ms.sym = al.sym; 2078 2079 ams->ms.map = al.map; ··· 2093 2092 2094 2093 ams->addr = addr; 2095 2094 ams->al_addr = al.addr; 2095 + ams->al_level = al.level; 2096 2096 ams->ms.maps = al.maps; 2097 2097 ams->ms.sym = al.sym; 2098 2098 ams->ms.map = al.map;
+1
tools/perf/util/map_symbol.h
··· 18 18 struct map_symbol ms; 19 19 u64 addr; 20 20 u64 al_addr; 21 + char al_level; 21 22 u64 phys_addr; 22 23 u64 data_page_size; 23 24 };
+1 -1
tools/perf/util/perf_event_attr_fprintf.c
··· 52 52 bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX), 53 53 bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), 54 54 bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), 55 - bit_name(HW_INDEX), 55 + bit_name(TYPE_SAVE), bit_name(HW_INDEX), 56 56 { .name = NULL, } 57 57 }; 58 58 #undef bit_name
+2 -1
tools/perf/util/session.c
··· 1503 1503 ++evlist->stats.nr_unknown_id; 1504 1504 return 0; 1505 1505 } 1506 - dump_sample(evsel, event, sample, perf_env__arch(machine->env)); 1507 1506 if (machine == NULL) { 1508 1507 ++evlist->stats.nr_unprocessable_samples; 1508 + dump_sample(evsel, event, sample, perf_env__arch(NULL)); 1509 1509 return 0; 1510 1510 } 1511 + dump_sample(evsel, event, sample, perf_env__arch(machine->env)); 1511 1512 return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine); 1512 1513 case PERF_RECORD_MMAP: 1513 1514 return tool->mmap(tool, event, sample, machine);
+2 -2
tools/perf/util/sort.c
··· 915 915 struct addr_map_symbol *from = &he->branch_info->from; 916 916 917 917 return _hist_entry__sym_snprintf(&from->ms, from->al_addr, 918 - he->level, bf, size, width); 918 + from->al_level, bf, size, width); 919 919 } 920 920 921 921 return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); ··· 928 928 struct addr_map_symbol *to = &he->branch_info->to; 929 929 930 930 return _hist_entry__sym_snprintf(&to->ms, to->al_addr, 931 - he->level, bf, size, width); 931 + to->al_level, bf, size, width); 932 932 } 933 933 934 934 return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+10 -9
tools/perf/util/stat-display.c
··· 585 585 586 586 alias = list_prepare_entry(counter, &(evlist->core.entries), core.node); 587 587 list_for_each_entry_continue (alias, &evlist->core.entries, core.node) { 588 - if (strcmp(evsel__name(alias), evsel__name(counter)) || 589 - alias->scale != counter->scale || 590 - alias->cgrp != counter->cgrp || 591 - strcmp(alias->unit, counter->unit) || 592 - evsel__is_clock(alias) != evsel__is_clock(counter) || 593 - !strcmp(alias->pmu_name, counter->pmu_name)) 594 - break; 595 - alias->merged_stat = true; 596 - cb(config, alias, data, false); 588 + /* Merge events with the same name, etc. but on different PMUs. */ 589 + if (!strcmp(evsel__name(alias), evsel__name(counter)) && 590 + alias->scale == counter->scale && 591 + alias->cgrp == counter->cgrp && 592 + !strcmp(alias->unit, counter->unit) && 593 + evsel__is_clock(alias) == evsel__is_clock(counter) && 594 + strcmp(alias->pmu_name, counter->pmu_name)) { 595 + alias->merged_stat = true; 596 + cb(config, alias, data, false); 597 + } 597 598 } 598 599 } 599 600
+19
tools/perf/util/synthetic-events.c
··· 1784 1784 perf_event__handler_t process, bool needs_mmap, 1785 1785 bool data_mmap, unsigned int nr_threads_synthesize) 1786 1786 { 1787 + /* 1788 + * When perf runs in non-root PID namespace, and the namespace's proc FS 1789 + * is not mounted, nsinfo__is_in_root_namespace() returns false. 1790 + * In this case, the proc FS is coming for the parent namespace, thus 1791 + * perf tool will wrongly gather process info from its parent PID 1792 + * namespace. 1793 + * 1794 + * To avoid the confusion that the perf tool runs in a child PID 1795 + * namespace but it synthesizes thread info from its parent PID 1796 + * namespace, returns failure with warning. 1797 + */ 1798 + if (!nsinfo__is_in_root_namespace()) { 1799 + pr_err("Perf runs in non-root PID namespace but it tries to "); 1800 + pr_err("gather process info from its parent PID namespace.\n"); 1801 + pr_err("Please mount the proc file system properly, e.g. "); 1802 + pr_err("add the option '--mount-proc' for unshare command.\n"); 1803 + return -EPERM; 1804 + } 1805 + 1787 1806 if (target__has_task(target)) 1788 1807 return perf_event__synthesize_thread_map(tool, threads, process, machine, 1789 1808 needs_mmap, data_mmap);