Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'perf-tools-fixes-for-v5.18-2022-04-14' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull perf tools fixes from Arnaldo Carvalho de Melo:

- 'perf record --per-thread' mode doesn't have the CPU mask setup, so
it can use it to figure out the number of mmaps, fix it.

- Fix segfault accessing sample_id xyarray out of bounds, noticed while
using Intel PT where we have a dummy event to capture text poke perf
metadata events and we mixup the set of CPUs specified by the user
with the all CPUs map needed for text poke.

- Fix 'perf bench numa' to check if CPU used to bind task is online.

- Fix 'perf bench numa' usage of affinity for machines with more than
1000 CPUs.

- Fix misleading add event PMU debug message, noticed while using the
'intel_pt' PMU.

- Fix error check return value of hashmap__new() in 'perf stat', it
must use IS_ERR().

* tag 'perf-tools-fixes-for-v5.18-2022-04-14' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux:
perf bench: Fix numa bench to fix usage of affinity for machines with #CPUs > 1K
perf bench: Fix numa testcase to check if CPU used to bind task is online
perf record: Fix per-thread option
perf tools: Fix segfault accessing sample_id xyarray
perf stat: Fix error check return value of hashmap__new(), must use IS_ERR()
perf tools: Fix misleading add event PMU debug message

+176 -45
+1 -2
tools/lib/perf/evlist.c
··· 577 577 { 578 578 struct perf_evsel *evsel; 579 579 const struct perf_cpu_map *cpus = evlist->user_requested_cpus; 580 - const struct perf_thread_map *threads = evlist->threads; 581 580 582 581 if (!ops || !ops->get || !ops->mmap) 583 582 return -EINVAL; ··· 588 589 perf_evlist__for_each_entry(evlist, evsel) { 589 590 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 590 591 evsel->sample_id == NULL && 591 - perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) 592 + perf_evsel__alloc_id(evsel, evsel->fd->max_x, evsel->fd->max_y) < 0) 592 593 return -ENOMEM; 593 594 } 594 595
+101 -35
tools/perf/bench/numa.c
··· 34 34 #include <linux/numa.h> 35 35 #include <linux/zalloc.h> 36 36 37 + #include "../util/header.h" 37 38 #include <numa.h> 38 39 #include <numaif.h> 39 40 ··· 55 54 56 55 struct thread_data { 57 56 int curr_cpu; 58 - cpu_set_t bind_cpumask; 57 + cpu_set_t *bind_cpumask; 59 58 int bind_node; 60 59 u8 *process_data; 61 60 int process_nr; ··· 267 266 return ret; 268 267 } 269 268 270 - static cpu_set_t bind_to_cpu(int target_cpu) 269 + static cpu_set_t *bind_to_cpu(int target_cpu) 271 270 { 272 - cpu_set_t orig_mask, mask; 273 - int ret; 271 + int nrcpus = numa_num_possible_cpus(); 272 + cpu_set_t *orig_mask, *mask; 273 + size_t size; 274 274 275 - ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); 276 - BUG_ON(ret); 275 + orig_mask = CPU_ALLOC(nrcpus); 276 + BUG_ON(!orig_mask); 277 + size = CPU_ALLOC_SIZE(nrcpus); 278 + CPU_ZERO_S(size, orig_mask); 277 279 278 - CPU_ZERO(&mask); 280 + if (sched_getaffinity(0, size, orig_mask)) 281 + goto err_out; 282 + 283 + mask = CPU_ALLOC(nrcpus); 284 + if (!mask) 285 + goto err_out; 286 + 287 + CPU_ZERO_S(size, mask); 279 288 280 289 if (target_cpu == -1) { 281 290 int cpu; 282 291 283 292 for (cpu = 0; cpu < g->p.nr_cpus; cpu++) 284 - CPU_SET(cpu, &mask); 293 + CPU_SET_S(cpu, size, mask); 285 294 } else { 286 - BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus); 287 - CPU_SET(target_cpu, &mask); 295 + if (target_cpu < 0 || target_cpu >= g->p.nr_cpus) 296 + goto err; 297 + 298 + CPU_SET_S(target_cpu, size, mask); 288 299 } 289 300 290 - ret = sched_setaffinity(0, sizeof(mask), &mask); 291 - BUG_ON(ret); 301 + if (sched_setaffinity(0, size, mask)) 302 + goto err; 292 303 293 304 return orig_mask; 305 + 306 + err: 307 + CPU_FREE(mask); 308 + err_out: 309 + CPU_FREE(orig_mask); 310 + 311 + /* BUG_ON due to failure in allocation of orig_mask/mask */ 312 + BUG_ON(-1); 294 313 } 295 314 296 - static cpu_set_t bind_to_node(int target_node) 315 + static cpu_set_t *bind_to_node(int target_node) 297 316 { 298 - cpu_set_t orig_mask, mask; 317 + int nrcpus = numa_num_possible_cpus(); 318 + size_t size; 319 + cpu_set_t *orig_mask, *mask; 299 320 int cpu; 300 - int ret; 301 321 302 - ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); 303 - BUG_ON(ret); 322 + orig_mask = CPU_ALLOC(nrcpus); 323 + BUG_ON(!orig_mask); 324 + size = CPU_ALLOC_SIZE(nrcpus); 325 + CPU_ZERO_S(size, orig_mask); 304 326 305 - CPU_ZERO(&mask); 327 + if (sched_getaffinity(0, size, orig_mask)) 328 + goto err_out; 329 + 330 + mask = CPU_ALLOC(nrcpus); 331 + if (!mask) 332 + goto err_out; 333 + 334 + CPU_ZERO_S(size, mask); 306 335 307 336 if (target_node == NUMA_NO_NODE) { 308 337 for (cpu = 0; cpu < g->p.nr_cpus; cpu++) 309 - CPU_SET(cpu, &mask); 338 + CPU_SET_S(cpu, size, mask); 310 339 } else { 311 340 struct bitmask *cpumask = numa_allocate_cpumask(); 312 341 313 - BUG_ON(!cpumask); 342 + if (!cpumask) 343 + goto err; 344 + 314 345 if (!numa_node_to_cpus(target_node, cpumask)) { 315 346 for (cpu = 0; cpu < (int)cpumask->size; cpu++) { 316 347 if (numa_bitmask_isbitset(cpumask, cpu)) 317 - CPU_SET(cpu, &mask); 348 + CPU_SET_S(cpu, size, mask); 318 349 } 319 350 } 320 351 numa_free_cpumask(cpumask); 321 352 } 322 353 323 - ret = sched_setaffinity(0, sizeof(mask), &mask); 324 - BUG_ON(ret); 354 + if (sched_setaffinity(0, size, mask)) 355 + goto err; 325 356 326 357 return orig_mask; 358 + 359 + err: 360 + CPU_FREE(mask); 361 + err_out: 362 + CPU_FREE(orig_mask); 363 + 364 + /* BUG_ON due to failure in allocation of orig_mask/mask */ 365 + BUG_ON(-1); 327 366 } 328 367 329 - static void bind_to_cpumask(cpu_set_t mask) 368 + static void bind_to_cpumask(cpu_set_t *mask) 330 369 { 331 370 int ret; 371 + size_t size = CPU_ALLOC_SIZE(numa_num_possible_cpus()); 332 372 333 - ret = sched_setaffinity(0, sizeof(mask), &mask); 334 - BUG_ON(ret); 373 + ret = sched_setaffinity(0, size, mask); 374 + if (ret) { 375 + CPU_FREE(mask); 376 + BUG_ON(ret); 377 + } 335 378 } 336 379 337 380 static void mempol_restore(void) ··· 421 376 static u8 *alloc_data(ssize_t bytes0, int map_flags, 422 377 int init_zero, int init_cpu0, int thp, int init_random) 423 378 { 424 - cpu_set_t orig_mask; 379 + cpu_set_t *orig_mask = NULL; 425 380 ssize_t bytes; 426 381 u8 *buf; 427 382 int ret; ··· 479 434 /* Restore affinity: */ 480 435 if (init_cpu0) { 481 436 bind_to_cpumask(orig_mask); 437 + CPU_FREE(orig_mask); 482 438 mempol_restore(); 483 439 } 484 440 ··· 631 585 return -1; 632 586 } 633 587 588 + if (is_cpu_online(bind_cpu_0) != 1 || is_cpu_online(bind_cpu_1) != 1) { 589 + printf("\nTest not applicable, bind_cpu_0 or bind_cpu_1 is offline\n"); 590 + return -1; 591 + } 592 + 634 593 BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0); 635 594 BUG_ON(bind_cpu_0 > bind_cpu_1); 636 595 637 596 for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) { 597 + size_t size = CPU_ALLOC_SIZE(g->p.nr_cpus); 638 598 int i; 639 599 640 600 for (i = 0; i < mul; i++) { ··· 660 608 tprintf("%2d", bind_cpu); 661 609 } 662 610 663 - CPU_ZERO(&td->bind_cpumask); 611 + td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus); 612 + BUG_ON(!td->bind_cpumask); 613 + CPU_ZERO_S(size, td->bind_cpumask); 664 614 for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) { 665 - BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus); 666 - CPU_SET(cpu, &td->bind_cpumask); 615 + if (cpu < 0 || cpu >= g->p.nr_cpus) { 616 + CPU_FREE(td->bind_cpumask); 617 + BUG_ON(-1); 618 + } 619 + CPU_SET_S(cpu, size, td->bind_cpumask); 667 620 } 668 621 t++; 669 622 } ··· 808 751 809 752 return parse_node_list(arg); 810 753 } 811 - 812 - #define BIT(x) (1ul << x) 813 754 814 755 static inline uint32_t lfsr_32(uint32_t lfsr) 815 756 { ··· 1296 1241 * by migrating to CPU#0: 1297 1242 */ 1298 1243 if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) { 1299 - cpu_set_t orig_mask; 1244 + cpu_set_t *orig_mask; 1300 1245 int target_cpu; 1301 1246 int this_cpu; 1302 1247 ··· 1320 1265 printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu); 1321 1266 1322 1267 bind_to_cpumask(orig_mask); 1268 + CPU_FREE(orig_mask); 1323 1269 } 1324 1270 1325 1271 if (details >= 3) { ··· 1454 1398 1455 1399 for (t = 0; t < g->p.nr_tasks; t++) { 1456 1400 struct thread_data *td = g->threads + t; 1401 + size_t cpuset_size = CPU_ALLOC_SIZE(g->p.nr_cpus); 1457 1402 int cpu; 1458 1403 1459 1404 /* Allow all nodes by default: */ 1460 1405 td->bind_node = NUMA_NO_NODE; 1461 1406 1462 1407 /* Allow all CPUs by default: */ 1463 - CPU_ZERO(&td->bind_cpumask); 1408 + td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus); 1409 + BUG_ON(!td->bind_cpumask); 1410 + CPU_ZERO_S(cpuset_size, td->bind_cpumask); 1464 1411 for (cpu = 0; cpu < g->p.nr_cpus; cpu++) 1465 - CPU_SET(cpu, &td->bind_cpumask); 1412 + CPU_SET_S(cpu, cpuset_size, td->bind_cpumask); 1466 1413 } 1467 1414 } 1468 1415 1469 1416 static void deinit_thread_data(void) 1470 1417 { 1471 1418 ssize_t size = sizeof(*g->threads)*g->p.nr_tasks; 1419 + int t; 1420 + 1421 + /* Free the bind_cpumask allocated for thread_data */ 1422 + for (t = 0; t < g->p.nr_tasks; t++) { 1423 + struct thread_data *td = g->threads + t; 1424 + CPU_FREE(td->bind_cpumask); 1425 + } 1472 1426 1473 1427 free_data(g->threads, size); 1474 1428 }
+17 -5
tools/perf/builtin-record.c
··· 989 989 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 990 990 struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; 991 991 992 - thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 993 - thread_data->mask->maps.nbits); 992 + if (cpu_map__is_dummy(cpus)) 993 + thread_data->nr_mmaps = nr_mmaps; 994 + else 995 + thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 996 + thread_data->mask->maps.nbits); 994 997 if (mmap) { 995 998 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 996 999 if (!thread_data->maps) ··· 1010 1007 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1011 1008 1012 1009 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1013 - if (test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) { 1010 + if (cpu_map__is_dummy(cpus) || 1011 + test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) { 1014 1012 if (thread_data->maps) { 1015 1013 thread_data->maps[tm] = &mmap[m]; 1016 1014 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1017 - thread_data, cpus->map[m].cpu, tm, m); 1015 + thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1018 1016 } 1019 1017 if (thread_data->overwrite_maps) { 1020 1018 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1021 1019 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1022 - thread_data, cpus->map[m].cpu, tm, m); 1020 + thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1023 1021 } 1024 1022 tm++; 1025 1023 } ··· 3333 3329 { 3334 3330 int c; 3335 3331 3332 + if (cpu_map__is_dummy(cpus)) 3333 + return; 3334 + 3336 3335 for (c = 0; c < cpus->nr; c++) 3337 3336 set_bit(cpus->map[c].cpu, mask->bits); 3338 3337 } ··· 3686 3679 3687 3680 if (!record__threads_enabled(rec)) 3688 3681 return record__init_thread_default_masks(rec, cpus); 3682 + 3683 + if (cpu_map__is_dummy(cpus)) { 3684 + pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 3685 + return -EINVAL; 3686 + } 3689 3687 3690 3688 switch (rec->opts.threads_spec) { 3691 3689 case THREAD_SPEC__CPU:
+51
tools/perf/util/header.c
··· 983 983 return do_write(ff, &data->dir.version, sizeof(data->dir.version)); 984 984 } 985 985 986 + /* 987 + * Check whether a CPU is online 988 + * 989 + * Returns: 990 + * 1 -> if CPU is online 991 + * 0 -> if CPU is offline 992 + * -1 -> error case 993 + */ 994 + int is_cpu_online(unsigned int cpu) 995 + { 996 + char *str; 997 + size_t strlen; 998 + char buf[256]; 999 + int status = -1; 1000 + struct stat statbuf; 1001 + 1002 + snprintf(buf, sizeof(buf), 1003 + "/sys/devices/system/cpu/cpu%d", cpu); 1004 + if (stat(buf, &statbuf) != 0) 1005 + return 0; 1006 + 1007 + /* 1008 + * Check if /sys/devices/system/cpu/cpux/online file 1009 + * exists. Some cases cpu0 won't have online file since 1010 + * it is not expected to be turned off generally. 1011 + * In kernels without CONFIG_HOTPLUG_CPU, this 1012 + * file won't exist 1013 + */ 1014 + snprintf(buf, sizeof(buf), 1015 + "/sys/devices/system/cpu/cpu%d/online", cpu); 1016 + if (stat(buf, &statbuf) != 0) 1017 + return 1; 1018 + 1019 + /* 1020 + * Read online file using sysfs__read_str. 1021 + * If read or open fails, return -1. 1022 + * If read succeeds, return value from file 1023 + * which gets stored in "str" 1024 + */ 1025 + snprintf(buf, sizeof(buf), 1026 + "devices/system/cpu/cpu%d/online", cpu); 1027 + 1028 + if (sysfs__read_str(buf, &str, &strlen) < 0) 1029 + return status; 1030 + 1031 + status = atoi(str); 1032 + 1033 + free(str); 1034 + return status; 1035 + } 1036 + 986 1037 #ifdef HAVE_LIBBPF_SUPPORT 987 1038 static int write_bpf_prog_info(struct feat_fd *ff, 988 1039 struct evlist *evlist __maybe_unused)
+1
tools/perf/util/header.h
··· 158 158 int write_padded(struct feat_fd *fd, const void *bf, 159 159 size_t count, size_t count_aligned); 160 160 161 + int is_cpu_online(unsigned int cpu); 161 162 /* 162 163 * arch specific callback 163 164 */
+3 -2
tools/perf/util/parse-events.c
··· 1523 1523 bool use_uncore_alias; 1524 1524 LIST_HEAD(config_terms); 1525 1525 1526 - if (verbose > 1) { 1526 + pmu = parse_state->fake_pmu ?: perf_pmu__find(name); 1527 + 1528 + if (verbose > 1 && !(pmu && pmu->selectable)) { 1527 1529 fprintf(stderr, "Attempting to add event pmu '%s' with '", 1528 1530 name); 1529 1531 if (head_config) { ··· 1538 1536 fprintf(stderr, "' that may result in non-fatal errors\n"); 1539 1537 } 1540 1538 1541 - pmu = parse_state->fake_pmu ?: perf_pmu__find(name); 1542 1539 if (!pmu) { 1543 1540 char *err_str; 1544 1541
+2 -1
tools/perf/util/stat.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <errno.h> 3 + #include <linux/err.h> 3 4 #include <inttypes.h> 4 5 #include <math.h> 5 6 #include <string.h> ··· 312 311 313 312 if (!mask) { 314 313 mask = hashmap__new(pkg_id_hash, pkg_id_equal, NULL); 315 - if (!mask) 314 + if (IS_ERR(mask)) 316 315 return -ENOMEM; 317 316 318 317 counter->per_pkg_mask = mask;