Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf evlist: Reduce affinity use and move into iterator, fix no affinity

The evlist__for_each_cpu iterator will call sched_setaffitinity when
moving between CPUs to avoid IPIs.

If only 1 IPI is saved then this may be unprofitable as the delay to get
scheduled may be considerable.

This may be particularly true if reading an event group in `perf stat`
in interval mode.

Move the affinity handling completely into the iterator so that a single
evlist__use_affinity can determine whether CPU affinities will be used.

For `perf record` the change is minimal as the dummy event and the real
event will always make the use of affinities the thing to do.

In `perf stat`, tool events are ignored and affinities only used if >1
event on the same CPU occur.

Determining if affinities are useful is done by evlist__use_affinity
which tests per-event whether the event's PMU benefits from affinity use
- it is assumed only perf event using PMUs do.

Fix a bug where when there are no affinities that the CPU map iterator
may reference a CPU not present in the initial evsel. Fix by making the
iterator and non-iterator code common.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andres Freund <andres@anarazel.de>
Cc: Dapeng Mi <dapeng1.mi@linux.intel.com>
Cc: Dr. David Alan Gilbert <linux@treblig.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Falcon <thomas.falcon@intel.com>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Cc: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Ian Rogers and committed by
Arnaldo Carvalho de Melo
d4843615 47172912

+174 -131
+44 -64
tools/perf/builtin-stat.c
··· 369 369 static int read_counters_with_affinity(void) 370 370 { 371 371 struct evlist_cpu_iterator evlist_cpu_itr; 372 - struct affinity saved_affinity, *affinity; 373 372 374 373 if (all_counters_use_bpf) 375 374 return 0; 376 375 377 - if (!target__has_cpu(&target) || target__has_per_thread(&target)) 378 - affinity = NULL; 379 - else if (affinity__setup(&saved_affinity) < 0) 380 - return -1; 381 - else 382 - affinity = &saved_affinity; 383 - 384 - evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { 376 + evlist__for_each_cpu(evlist_cpu_itr, evsel_list) { 385 377 struct evsel *counter = evlist_cpu_itr.evsel; 386 378 387 379 if (evsel__is_bpf(counter)) ··· 385 393 if (!counter->err) 386 394 counter->err = read_counter_cpu(counter, evlist_cpu_itr.cpu_map_idx); 387 395 } 388 - if (affinity) 389 - affinity__cleanup(&saved_affinity); 390 396 391 397 return 0; 392 398 } ··· 783 793 const bool forks = (argc > 0); 784 794 bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; 785 795 struct evlist_cpu_iterator evlist_cpu_itr; 786 - struct affinity saved_affinity, *affinity = NULL; 787 796 int err, open_err = 0; 788 797 bool second_pass = false, has_supported_counters; 789 798 ··· 792 803 return -1; 793 804 } 794 805 child_pid = evsel_list->workload.pid; 795 - } 796 - 797 - if (!cpu_map__is_dummy(evsel_list->core.user_requested_cpus)) { 798 - if (affinity__setup(&saved_affinity) < 0) { 799 - err = -1; 800 - goto err_out; 801 - } 802 - affinity = &saved_affinity; 803 806 } 804 807 805 808 evlist__for_each_entry(evsel_list, counter) { ··· 806 825 807 826 evlist__reset_aggr_stats(evsel_list); 808 827 809 - evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { 810 - counter = evlist_cpu_itr.evsel; 828 + /* 829 + * bperf calls evsel__open_per_cpu() in bperf__load(), so 830 + * no need to call it again here. 831 + */ 832 + if (!target.use_bpf) { 833 + evlist__for_each_cpu(evlist_cpu_itr, evsel_list) { 834 + counter = evlist_cpu_itr.evsel; 811 835 812 - /* 813 - * bperf calls evsel__open_per_cpu() in bperf__load(), so 814 - * no need to call it again here. 815 - */ 816 - if (target.use_bpf) 817 - break; 836 + if (counter->reset_group || !counter->supported) 837 + continue; 838 + if (evsel__is_bperf(counter)) 839 + continue; 818 840 819 - if (counter->reset_group || !counter->supported) 820 - continue; 821 - if (evsel__is_bperf(counter)) 822 - continue; 841 + while (true) { 842 + if (create_perf_stat_counter(counter, &stat_config, 843 + evlist_cpu_itr.cpu_map_idx) == 0) 844 + break; 823 845 824 - while (true) { 825 - if (create_perf_stat_counter(counter, &stat_config, 826 - evlist_cpu_itr.cpu_map_idx) == 0) 827 - break; 846 + open_err = errno; 847 + /* 848 + * Weak group failed. We cannot just undo this 849 + * here because earlier CPUs might be in group 850 + * mode, and the kernel doesn't support mixing 851 + * group and non group reads. Defer it to later. 852 + * Don't close here because we're in the wrong 853 + * affinity. 854 + */ 855 + if ((open_err == EINVAL || open_err == EBADF) && 856 + evsel__leader(counter) != counter && 857 + counter->weak_group) { 858 + evlist__reset_weak_group(evsel_list, counter, false); 859 + assert(counter->reset_group); 860 + counter->supported = true; 861 + second_pass = true; 862 + break; 863 + } 828 864 829 - open_err = errno; 830 - /* 831 - * Weak group failed. We cannot just undo this here 832 - * because earlier CPUs might be in group mode, and the kernel 833 - * doesn't support mixing group and non group reads. Defer 834 - * it to later. 835 - * Don't close here because we're in the wrong affinity. 836 - */ 837 - if ((open_err == EINVAL || open_err == EBADF) && 838 - evsel__leader(counter) != counter && 839 - counter->weak_group) { 840 - evlist__reset_weak_group(evsel_list, counter, false); 841 - assert(counter->reset_group); 842 - counter->supported = true; 843 - second_pass = true; 844 - break; 865 + if (stat_handle_error(counter, open_err) != COUNTER_RETRY) 866 + break; 845 867 } 846 - 847 - if (stat_handle_error(counter, open_err) != COUNTER_RETRY) 848 - break; 849 868 } 850 869 } 851 - 852 870 if (second_pass) { 853 871 /* 854 872 * Now redo all the weak group after closing them, ··· 855 875 */ 856 876 857 877 /* First close errored or weak retry */ 858 - evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { 878 + evlist__for_each_cpu(evlist_cpu_itr, evsel_list) { 859 879 counter = evlist_cpu_itr.evsel; 860 880 861 881 if (!counter->reset_group && counter->supported) ··· 864 884 perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx); 865 885 } 866 886 /* Now reopen weak */ 867 - evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { 887 + evlist__for_each_cpu(evlist_cpu_itr, evsel_list) { 868 888 counter = evlist_cpu_itr.evsel; 869 889 870 890 if (!counter->reset_group) ··· 873 893 while (true) { 874 894 pr_debug2("reopening weak %s\n", evsel__name(counter)); 875 895 if (create_perf_stat_counter(counter, &stat_config, 876 - evlist_cpu_itr.cpu_map_idx) == 0) 896 + evlist_cpu_itr.cpu_map_idx) == 0) { 897 + evlist_cpu_iterator__exit(&evlist_cpu_itr); 877 898 break; 878 - 899 + } 879 900 open_err = errno; 880 - if (stat_handle_error(counter, open_err) != COUNTER_RETRY) 901 + if (stat_handle_error(counter, open_err) != COUNTER_RETRY) { 902 + evlist_cpu_iterator__exit(&evlist_cpu_itr); 881 903 break; 904 + } 882 905 } 883 906 } 884 907 } 885 - affinity__cleanup(affinity); 886 - affinity = NULL; 887 908 888 909 has_supported_counters = false; 889 910 evlist__for_each_entry(evsel_list, counter) { ··· 1046 1065 if (forks) 1047 1066 evlist__cancel_workload(evsel_list); 1048 1067 1049 - affinity__cleanup(affinity); 1050 1068 return err; 1051 1069 } 1052 1070
+98 -60
tools/perf/util/evlist.c
··· 359 359 } 360 360 #endif 361 361 362 - struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity) 362 + /* 363 + * Should sched_setaffinity be used with evlist__for_each_cpu? Determine if 364 + * migrating the thread will avoid possibly numerous IPIs. 365 + */ 366 + static bool evlist__use_affinity(struct evlist *evlist) 363 367 { 364 - struct evlist_cpu_iterator itr = { 368 + struct evsel *pos; 369 + struct perf_cpu_map *used_cpus = NULL; 370 + bool ret = false; 371 + 372 + /* 373 + * With perf record core.user_requested_cpus is usually NULL. 374 + * Use the old method to handle this for now. 375 + */ 376 + if (!evlist->core.user_requested_cpus || 377 + cpu_map__is_dummy(evlist->core.user_requested_cpus)) 378 + return false; 379 + 380 + evlist__for_each_entry(evlist, pos) { 381 + struct perf_cpu_map *intersect; 382 + 383 + if (!perf_pmu__benefits_from_affinity(pos->pmu)) 384 + continue; 385 + 386 + if (evsel__is_dummy_event(pos)) { 387 + /* 388 + * The dummy event is opened on all CPUs so assume >1 389 + * event with shared CPUs. 390 + */ 391 + ret = true; 392 + break; 393 + } 394 + if (evsel__is_retire_lat(pos)) { 395 + /* 396 + * Retirement latency events are similar to tool ones in 397 + * their implementation, and so don't require affinity. 398 + */ 399 + continue; 400 + } 401 + if (perf_cpu_map__is_empty(used_cpus)) { 402 + /* First benefitting event, we want >1 on a common CPU. */ 403 + used_cpus = perf_cpu_map__get(pos->core.cpus); 404 + continue; 405 + } 406 + if ((pos->core.attr.read_format & PERF_FORMAT_GROUP) && 407 + evsel__leader(pos) != pos) { 408 + /* Skip members of the same sample group. */ 409 + continue; 410 + } 411 + intersect = perf_cpu_map__intersect(used_cpus, pos->core.cpus); 412 + if (!perf_cpu_map__is_empty(intersect)) { 413 + /* >1 event with shared CPUs. */ 414 + perf_cpu_map__put(intersect); 415 + ret = true; 416 + break; 417 + } 418 + perf_cpu_map__put(intersect); 419 + perf_cpu_map__merge(&used_cpus, pos->core.cpus); 420 + } 421 + perf_cpu_map__put(used_cpus); 422 + return ret; 423 + } 424 + 425 + void evlist_cpu_iterator__init(struct evlist_cpu_iterator *itr, struct evlist *evlist) 426 + { 427 + *itr = (struct evlist_cpu_iterator){ 365 428 .container = evlist, 366 429 .evsel = NULL, 367 430 .cpu_map_idx = 0, 368 431 .evlist_cpu_map_idx = 0, 369 432 .evlist_cpu_map_nr = perf_cpu_map__nr(evlist->core.all_cpus), 370 433 .cpu = (struct perf_cpu){ .cpu = -1}, 371 - .affinity = affinity, 434 + .affinity = NULL, 372 435 }; 373 436 374 437 if (evlist__empty(evlist)) { 375 438 /* Ensure the empty list doesn't iterate. */ 376 - itr.evlist_cpu_map_idx = itr.evlist_cpu_map_nr; 377 - } else { 378 - itr.evsel = evlist__first(evlist); 379 - if (itr.affinity) { 380 - itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0); 381 - affinity__set(itr.affinity, itr.cpu.cpu); 382 - itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu); 383 - /* 384 - * If this CPU isn't in the evsel's cpu map then advance 385 - * through the list. 386 - */ 387 - if (itr.cpu_map_idx == -1) 388 - evlist_cpu_iterator__next(&itr); 389 - } 439 + itr->evlist_cpu_map_idx = itr->evlist_cpu_map_nr; 440 + return; 390 441 } 391 - return itr; 442 + 443 + if (evlist__use_affinity(evlist)) { 444 + if (affinity__setup(&itr->saved_affinity) == 0) 445 + itr->affinity = &itr->saved_affinity; 446 + } 447 + itr->evsel = evlist__first(evlist); 448 + itr->cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0); 449 + if (itr->affinity) 450 + affinity__set(itr->affinity, itr->cpu.cpu); 451 + itr->cpu_map_idx = perf_cpu_map__idx(itr->evsel->core.cpus, itr->cpu); 452 + /* 453 + * If this CPU isn't in the evsel's cpu map then advance 454 + * through the list. 455 + */ 456 + if (itr->cpu_map_idx == -1) 457 + evlist_cpu_iterator__next(itr); 458 + } 459 + 460 + void evlist_cpu_iterator__exit(struct evlist_cpu_iterator *itr) 461 + { 462 + if (!itr->affinity) 463 + return; 464 + 465 + affinity__cleanup(itr->affinity); 466 + itr->affinity = NULL; 392 467 } 393 468 394 469 void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr) ··· 493 418 */ 494 419 if (evlist_cpu_itr->cpu_map_idx == -1) 495 420 evlist_cpu_iterator__next(evlist_cpu_itr); 421 + } else { 422 + evlist_cpu_iterator__exit(evlist_cpu_itr); 496 423 } 497 - } 498 - 499 - bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr) 500 - { 501 - return evlist_cpu_itr->evlist_cpu_map_idx >= evlist_cpu_itr->evlist_cpu_map_nr; 502 424 } 503 425 504 426 static int evsel__strcmp(struct evsel *pos, char *evsel_name) ··· 525 453 { 526 454 struct evsel *pos; 527 455 struct evlist_cpu_iterator evlist_cpu_itr; 528 - struct affinity saved_affinity, *affinity = NULL; 529 456 bool has_imm = false; 530 - 531 - // See explanation in evlist__close() 532 - if (!cpu_map__is_dummy(evlist->core.user_requested_cpus)) { 533 - if (affinity__setup(&saved_affinity) < 0) 534 - return; 535 - affinity = &saved_affinity; 536 - } 537 457 538 458 /* Disable 'immediate' events last */ 539 459 for (int imm = 0; imm <= 1; imm++) { 540 - evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) { 460 + evlist__for_each_cpu(evlist_cpu_itr, evlist) { 541 461 pos = evlist_cpu_itr.evsel; 542 462 if (evsel__strcmp(pos, evsel_name)) 543 463 continue; ··· 547 483 break; 548 484 } 549 485 550 - affinity__cleanup(affinity); 551 486 evlist__for_each_entry(evlist, pos) { 552 487 if (evsel__strcmp(pos, evsel_name)) 553 488 continue; ··· 586 523 { 587 524 struct evsel *pos; 588 525 struct evlist_cpu_iterator evlist_cpu_itr; 589 - struct affinity saved_affinity, *affinity = NULL; 590 526 591 - // See explanation in evlist__close() 592 - if (!cpu_map__is_dummy(evlist->core.user_requested_cpus)) { 593 - if (affinity__setup(&saved_affinity) < 0) 594 - return; 595 - affinity = &saved_affinity; 596 - } 597 - 598 - evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) { 527 + evlist__for_each_cpu(evlist_cpu_itr, evlist) { 599 528 pos = evlist_cpu_itr.evsel; 600 529 if (evsel__strcmp(pos, evsel_name)) 601 530 continue; ··· 597 542 continue; 598 543 evsel__enable_cpu(pos, evlist_cpu_itr.cpu_map_idx); 599 544 } 600 - affinity__cleanup(affinity); 601 545 evlist__for_each_entry(evlist, pos) { 602 546 if (evsel__strcmp(pos, evsel_name)) 603 547 continue; ··· 1393 1339 { 1394 1340 struct evsel *evsel; 1395 1341 struct evlist_cpu_iterator evlist_cpu_itr; 1396 - struct affinity affinity; 1397 1342 1398 - /* 1399 - * With perf record core.user_requested_cpus is usually NULL. 1400 - * Use the old method to handle this for now. 1401 - */ 1402 - if (!evlist->core.user_requested_cpus || 1403 - cpu_map__is_dummy(evlist->core.user_requested_cpus)) { 1404 - evlist__for_each_entry_reverse(evlist, evsel) 1405 - evsel__close(evsel); 1406 - return; 1407 - } 1408 - 1409 - if (affinity__setup(&affinity) < 0) 1410 - return; 1411 - 1412 - evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) { 1343 + evlist__for_each_cpu(evlist_cpu_itr, evlist) { 1413 1344 if (evlist_cpu_itr.cpu_map_idx == 0 && evsel__is_retire_lat(evlist_cpu_itr.evsel)) 1414 1345 evsel__tpebs_close(evlist_cpu_itr.evsel); 1415 1346 perf_evsel__close_cpu(&evlist_cpu_itr.evsel->core, 1416 1347 evlist_cpu_itr.cpu_map_idx); 1417 1348 } 1418 1349 1419 - affinity__cleanup(&affinity); 1420 1350 evlist__for_each_entry_reverse(evlist, evsel) { 1421 1351 perf_evsel__free_fd(&evsel->core); 1422 1352 perf_evsel__free_id(&evsel->core);
+19 -7
tools/perf/util/evlist.h
··· 10 10 #include <internal/evlist.h> 11 11 #include <internal/evsel.h> 12 12 #include <perf/evlist.h> 13 + #include "affinity.h" 13 14 #include "events_stats.h" 14 15 #include "evsel.h" 15 16 #include "rblist.h" ··· 364 363 struct perf_cpu cpu; 365 364 /** If present, used to set the affinity when switching between CPUs. */ 366 365 struct affinity *affinity; 366 + /** Maybe be used to hold affinity state prior to iterating. */ 367 + struct affinity saved_affinity; 367 368 }; 368 369 369 370 /** ··· 373 370 * affinity, iterate over all CPUs and then the evlist 374 371 * for each evsel on that CPU. When switching between 375 372 * CPUs the affinity is set to the CPU to avoid IPIs 376 - * during syscalls. 373 + * during syscalls. The affinity is set up and removed 374 + * automatically, if the loop is broken a call to 375 + * evlist_cpu_iterator__exit is necessary. 377 376 * @evlist_cpu_itr: the iterator instance. 378 377 * @evlist: evlist instance to iterate. 379 - * @affinity: NULL or used to set the affinity to the current CPU. 380 378 */ 381 - #define evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) \ 382 - for ((evlist_cpu_itr) = evlist__cpu_begin(evlist, affinity); \ 379 + #define evlist__for_each_cpu(evlist_cpu_itr, evlist) \ 380 + for (evlist_cpu_iterator__init(&(evlist_cpu_itr), evlist); \ 383 381 !evlist_cpu_iterator__end(&evlist_cpu_itr); \ 384 382 evlist_cpu_iterator__next(&evlist_cpu_itr)) 385 383 386 - /** Returns an iterator set to the first CPU/evsel of evlist. */ 387 - struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity); 384 + /** Setup an iterator set to the first CPU/evsel of evlist. */ 385 + void evlist_cpu_iterator__init(struct evlist_cpu_iterator *itr, struct evlist *evlist); 386 + /** 387 + * Cleans up the iterator, automatically done by evlist_cpu_iterator__next when 388 + * the end of the list is reached. Multiple calls are safe. 389 + */ 390 + void evlist_cpu_iterator__exit(struct evlist_cpu_iterator *itr); 388 391 /** Move to next element in iterator, updating CPU, evsel and the affinity. */ 389 392 void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr); 390 393 /** Returns true when iterator is at the end of the CPUs and evlist. */ 391 - bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr); 394 + static inline bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr) 395 + { 396 + return evlist_cpu_itr->evlist_cpu_map_idx >= evlist_cpu_itr->evlist_cpu_map_nr; 397 + } 392 398 393 399 struct evsel *evlist__get_tracking_event(struct evlist *evlist); 394 400 void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel);
+12
tools/perf/util/pmu.c
··· 2375 2375 return false; 2376 2376 } 2377 2377 2378 + bool perf_pmu__benefits_from_affinity(struct perf_pmu *pmu) 2379 + { 2380 + if (!pmu) 2381 + return true; /* Assume is core. */ 2382 + 2383 + /* 2384 + * All perf event PMUs should benefit from accessing the perf event 2385 + * contexts on the local CPU. 2386 + */ 2387 + return pmu->type <= PERF_PMU_TYPE_PE_END; 2388 + } 2389 + 2378 2390 FILE *perf_pmu__open_file(const struct perf_pmu *pmu, const char *name) 2379 2391 { 2380 2392 char path[PATH_MAX];
+1
tools/perf/util/pmu.h
··· 303 303 * perf_sw_context in the kernel? 304 304 */ 305 305 bool perf_pmu__is_software(const struct perf_pmu *pmu); 306 + bool perf_pmu__benefits_from_affinity(struct perf_pmu *pmu); 306 307 307 308 FILE *perf_pmu__open_file(const struct perf_pmu *pmu, const char *name); 308 309 FILE *perf_pmu__open_file_at(const struct perf_pmu *pmu, int dirfd, const char *name);