Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf jevents: Group events by PMU

Prior to this change a cpuid would map to a list of events where the PMU
would be encoded alongside the event information. This change breaks
apart each group of events so that there is a group per PMU. A new table
is added with the PMU's name and the list of events, the original table
now holding an array of these per PMU tables.

These changes are to make it easier to get per PMU information about
events, rather than the current approach of scanning all events. The
perf binary size with BPF skeletons on x86 is reduced by about 1%. The
unidentified PMU is now always expanded to "cpu".

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Gaosheng Cui <cuigaosheng1@huawei.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jing Zhang <renyu.zj@linux.alibaba.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20230824041330.266337-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Ian Rogers and committed by
Arnaldo Carvalho de Melo
2e255b4f 4000519e

+154 -57
+136 -45
tools/perf/pmu-events/jevents.py
··· 42 42 # Order specific JsonEvent attributes will be visited. 43 43 _json_event_attributes = [ 44 44 # cmp_sevent related attributes. 45 - 'name', 'pmu', 'topic', 'desc', 45 + 'name', 'topic', 'desc', 46 46 # Seems useful, put it early. 47 47 'event', 48 48 # Short things in alphabetical order. ··· 53 53 54 54 # Attributes that are in pmu_metric rather than pmu_event. 55 55 _json_metric_attributes = [ 56 - 'pmu', 'metric_name', 'metric_group', 'metric_expr', 'metric_threshold', 56 + 'metric_name', 'metric_group', 'metric_expr', 'metric_threshold', 57 57 'desc', 'long_desc', 'unit', 'compat', 'metricgroup_no_group', 58 58 'default_metricgroup_name', 'aggr_mode', 'event_grouping' 59 59 ] ··· 252 252 def unit_to_pmu(unit: str) -> Optional[str]: 253 253 """Convert a JSON Unit to Linux PMU name.""" 254 254 if not unit: 255 - return None 255 + return 'cpu' 256 256 # Comment brought over from jevents.c: 257 257 # it's not realistic to keep adding these, we need something more scalable ... 258 258 table = { ··· 343 343 self.desc += extra_desc 344 344 if self.long_desc and extra_desc: 345 345 self.long_desc += extra_desc 346 - if self.pmu: 347 - if self.desc and not self.desc.endswith('. '): 348 - self.desc += '. ' 349 - self.desc = (self.desc if self.desc else '') + ('Unit: ' + self.pmu + ' ') 346 + if self.pmu and self.pmu != 'cpu': 347 + if not self.desc: 348 + self.desc = 'Unit: ' + self.pmu 349 + else: 350 + if not self.desc.endswith('. '): 351 + self.desc += '. ' 352 + self.desc += 'Unit: ' + self.pmu 350 353 if arch_std: 351 354 if arch_std.lower() in _arch_std_events: 352 355 event = _arch_std_events[arch_std.lower()].event ··· 440 437 def print_pending_events() -> None: 441 438 """Optionally close events table.""" 442 439 443 - def event_cmp_key(j: JsonEvent) -> Tuple[bool, str, str, str, str]: 440 + def event_cmp_key(j: JsonEvent) -> Tuple[str, str, bool, str, str]: 444 441 def fix_none(s: Optional[str]) -> str: 445 442 if s is None: 446 443 return '' 447 444 return s 448 445 449 - return (j.desc is not None, fix_none(j.topic), fix_none(j.name), fix_none(j.pmu), 446 + return (fix_none(j.pmu).replace(',','_'), fix_none(j.name), j.desc is not None, fix_none(j.topic), 450 447 fix_none(j.metric_name)) 451 448 452 449 global _pending_events ··· 461 458 global event_tables 462 459 _event_tables.append(_pending_events_tblname) 463 460 464 - _args.output_file.write( 465 - f'static const struct compact_pmu_event {_pending_events_tblname}[] = {{\n') 466 - 461 + first = True 462 + last_pmu = None 463 + pmus = set() 467 464 for event in sorted(_pending_events, key=event_cmp_key): 465 + if event.pmu != last_pmu: 466 + if not first: 467 + _args.output_file.write('};\n') 468 + pmu_name = event.pmu.replace(',', '_') 469 + _args.output_file.write( 470 + f'static const struct compact_pmu_event {_pending_events_tblname}_{pmu_name}[] = {{\n') 471 + first = False 472 + last_pmu = event.pmu 473 + pmus.add((event.pmu, pmu_name)) 474 + 468 475 _args.output_file.write(event.to_c_string(metric=False)) 469 476 _pending_events = [] 470 477 478 + _args.output_file.write(f""" 479 + }}; 480 + 481 + const struct pmu_table_entry {_pending_events_tblname}[] = {{ 482 + """) 483 + for (pmu, tbl_pmu) in sorted(pmus): 484 + pmu_name = f"{pmu}\\000" 485 + _args.output_file.write(f"""{{ 486 + .entries = {_pending_events_tblname}_{tbl_pmu}, 487 + .num_entries = ARRAY_SIZE({_pending_events_tblname}_{tbl_pmu}), 488 + .pmu_name = {{ {_bcs.offsets[pmu_name]} /* {pmu_name} */ }}, 489 + }}, 490 + """) 471 491 _args.output_file.write('};\n\n') 472 492 473 493 def print_pending_metrics() -> None: ··· 516 490 global metric_tables 517 491 _metric_tables.append(_pending_metrics_tblname) 518 492 519 - _args.output_file.write( 520 - f'static const struct compact_pmu_event {_pending_metrics_tblname}[] = {{\n') 521 - 493 + first = True 494 + last_pmu = None 495 + pmus = set() 522 496 for metric in sorted(_pending_metrics, key=metric_cmp_key): 497 + if metric.pmu != last_pmu: 498 + if not first: 499 + _args.output_file.write('};\n') 500 + pmu_name = metric.pmu.replace(',', '_') 501 + _args.output_file.write( 502 + f'static const struct compact_pmu_event {_pending_metrics_tblname}_{pmu_name}[] = {{\n') 503 + first = False 504 + last_pmu = metric.pmu 505 + pmus.add((metric.pmu, pmu_name)) 506 + 523 507 _args.output_file.write(metric.to_c_string(metric=True)) 524 508 _pending_metrics = [] 525 509 510 + _args.output_file.write(f""" 511 + }}; 512 + 513 + const struct pmu_table_entry {_pending_metrics_tblname}[] = {{ 514 + """) 515 + for (pmu, tbl_pmu) in sorted(pmus): 516 + pmu_name = f"{pmu}\\000" 517 + _args.output_file.write(f"""{{ 518 + .entries = {_pending_metrics_tblname}_{tbl_pmu}, 519 + .num_entries = ARRAY_SIZE({_pending_metrics_tblname}_{tbl_pmu}), 520 + .pmu_name = {{ {_bcs.offsets[pmu_name]} /* {pmu_name} */ }}, 521 + }}, 522 + """) 526 523 _args.output_file.write('};\n\n') 527 524 528 525 def get_topic(topic: str) -> str: ··· 581 532 582 533 topic = get_topic(item.name) 583 534 for event in read_json_events(item.path, topic): 535 + pmu_name = f"{event.pmu}\\000" 536 + _bcs.add(pmu_name) 584 537 if event.name: 585 538 _bcs.add(event.build_c_string(metric=False)) 586 539 if event.metric_name: ··· 628 577 _args.output_file.write(""" 629 578 /* Struct used to make the PMU event table implementation opaque to callers. */ 630 579 struct pmu_events_table { 631 - const struct compact_pmu_event *entries; 632 - size_t length; 580 + const struct pmu_table_entry *pmus; 581 + uint32_t num_pmus; 633 582 }; 634 583 635 584 /* Struct used to make the PMU metric table implementation opaque to callers. */ 636 585 struct pmu_metrics_table { 637 - const struct compact_pmu_event *entries; 638 - size_t length; 586 + const struct pmu_table_entry *pmus; 587 + uint32_t num_pmus; 639 588 }; 640 589 641 590 /* ··· 665 614 \t.arch = "testarch", 666 615 \t.cpuid = "testcpu", 667 616 \t.event_table = { 668 - \t\t.entries = pmu_events__test_soc_cpu, 669 - \t\t.length = ARRAY_SIZE(pmu_events__test_soc_cpu), 617 + \t\t.pmus = pmu_events__test_soc_cpu, 618 + \t\t.num_pmus = ARRAY_SIZE(pmu_events__test_soc_cpu), 670 619 \t}, 671 620 \t.metric_table = { 672 - \t\t.entries = pmu_metrics__test_soc_cpu, 673 - \t\t.length = ARRAY_SIZE(pmu_metrics__test_soc_cpu), 621 + \t\t.pmus = pmu_metrics__test_soc_cpu, 622 + \t\t.num_pmus = ARRAY_SIZE(pmu_metrics__test_soc_cpu), 674 623 \t} 675 624 }, 676 625 """) ··· 700 649 \t.arch = "{arch}", 701 650 \t.cpuid = "{cpuid}", 702 651 \t.event_table = {{ 703 - \t\t.entries = {event_tblname}, 704 - \t\t.length = {event_size} 652 + \t\t.pmus = {event_tblname}, 653 + \t\t.num_pmus = {event_size} 705 654 \t}}, 706 655 \t.metric_table = {{ 707 - \t\t.entries = {metric_tblname}, 708 - \t\t.length = {metric_size} 656 + \t\t.pmus = {metric_tblname}, 657 + \t\t.num_pmus = {metric_size} 709 658 \t}} 710 659 }}, 711 660 """) ··· 736 685 for tblname in _sys_event_tables: 737 686 _args.output_file.write(f"""\t{{ 738 687 \t\t.event_table = {{ 739 - \t\t\t.entries = {tblname}, 740 - \t\t\t.length = ARRAY_SIZE({tblname}) 688 + \t\t\t.pmus = {tblname}, 689 + \t\t\t.num_pmus = ARRAY_SIZE({tblname}) 741 690 \t\t}},""") 742 691 metric_tblname = _sys_event_table_to_metric_table_mapping[tblname] 743 692 if metric_tblname in _sys_metric_tables: 744 693 _args.output_file.write(f""" 745 694 \t\t.metric_table = {{ 746 - \t\t\t.entries = {metric_tblname}, 747 - \t\t\t.length = ARRAY_SIZE({metric_tblname}) 695 + \t\t\t.pmus = {metric_tblname}, 696 + \t\t\t.num_pmus = ARRAY_SIZE({metric_tblname}) 748 697 \t\t}},""") 749 698 printed_metric_tables.append(metric_tblname) 750 699 _args.output_file.write(f""" ··· 804 753 _args.output_file.write('\twhile (*p++);') 805 754 _args.output_file.write("""} 806 755 756 + static int pmu_events_table__for_each_event_pmu(const struct pmu_events_table *table, 757 + const struct pmu_table_entry *pmu, 758 + pmu_event_iter_fn fn, 759 + void *data) 760 + { 761 + int ret; 762 + struct pmu_event pe = { 763 + .pmu = &big_c_string[pmu->pmu_name.offset], 764 + }; 765 + 766 + for (uint32_t i = 0; i < pmu->num_entries; i++) { 767 + decompress_event(pmu->entries[i].offset, &pe); 768 + if (!pe.name) 769 + continue; 770 + ret = fn(&pe, table, data); 771 + if (ret) 772 + return ret; 773 + } 774 + return 0; 775 + } 776 + 807 777 int pmu_events_table__for_each_event(const struct pmu_events_table *table, 808 778 pmu_event_iter_fn fn, 809 779 void *data) 810 780 { 811 - for (size_t i = 0; i < table->length; i++) { 812 - struct pmu_event pe; 813 - int ret; 781 + for (size_t i = 0; i < table->num_pmus; i++) { 782 + int ret = pmu_events_table__for_each_event_pmu(table, &table->pmus[i], 783 + fn, data); 814 784 815 - decompress_event(table->entries[i].offset, &pe); 816 - if (!pe.name) 785 + if (ret) 786 + return ret; 787 + } 788 + return 0; 789 + } 790 + 791 + static int pmu_metrics_table__for_each_metric_pmu(const struct pmu_metrics_table *table, 792 + const struct pmu_table_entry *pmu, 793 + pmu_metric_iter_fn fn, 794 + void *data) 795 + { 796 + int ret; 797 + struct pmu_metric pm = { 798 + .pmu = &big_c_string[pmu->pmu_name.offset], 799 + }; 800 + 801 + for (uint32_t i = 0; i < pmu->num_entries; i++) { 802 + decompress_metric(pmu->entries[i].offset, &pm); 803 + if (!pm.metric_expr) 817 804 continue; 818 - ret = fn(&pe, table, data); 805 + ret = fn(&pm, table, data); 819 806 if (ret) 820 807 return ret; 821 808 } ··· 864 775 pmu_metric_iter_fn fn, 865 776 void *data) 866 777 { 867 - for (size_t i = 0; i < table->length; i++) { 868 - struct pmu_metric pm; 869 - int ret; 778 + for (size_t i = 0; i < table->num_pmus; i++) { 779 + int ret = pmu_metrics_table__for_each_metric_pmu(table, &table->pmus[i], 780 + fn, data); 870 781 871 - decompress_metric(table->entries[i].offset, &pm); 872 - if (!pm.metric_expr) 873 - continue; 874 - ret = fn(&pm, table, data); 875 782 if (ret) 876 783 return ret; 877 784 } ··· 1095 1010 #include <stddef.h> 1096 1011 1097 1012 struct compact_pmu_event { 1098 - int offset; 1013 + int offset; 1014 + }; 1015 + 1016 + struct pmu_table_entry { 1017 + const struct compact_pmu_event *entries; 1018 + uint32_t num_entries; 1019 + struct compact_pmu_event pmu_name; 1099 1020 }; 1100 1021 1101 1022 """)
+18 -12
tools/perf/tests/pmu-events.c
··· 44 44 45 45 static const struct perf_pmu_test_event bp_l1_btb_correct = { 46 46 .event = { 47 + .pmu = "cpu", 47 48 .name = "bp_l1_btb_correct", 48 49 .event = "event=0x8a", 49 50 .desc = "L1 BTB Correction", ··· 56 55 57 56 static const struct perf_pmu_test_event bp_l2_btb_correct = { 58 57 .event = { 58 + .pmu = "cpu", 59 59 .name = "bp_l2_btb_correct", 60 60 .event = "event=0x8b", 61 61 .desc = "L2 BTB Correction", ··· 68 66 69 67 static const struct perf_pmu_test_event segment_reg_loads_any = { 70 68 .event = { 69 + .pmu = "cpu", 71 70 .name = "segment_reg_loads.any", 72 71 .event = "event=0x6,period=200000,umask=0x80", 73 72 .desc = "Number of segment register loads", ··· 80 77 81 78 static const struct perf_pmu_test_event dispatch_blocked_any = { 82 79 .event = { 80 + .pmu = "cpu", 83 81 .name = "dispatch_blocked.any", 84 82 .event = "event=0x9,period=200000,umask=0x20", 85 83 .desc = "Memory cluster signals to block micro-op dispatch for any reason", ··· 92 88 93 89 static const struct perf_pmu_test_event eist_trans = { 94 90 .event = { 91 + .pmu = "cpu", 95 92 .name = "eist_trans", 96 93 .event = "event=0x3a,period=200000,umask=0x0", 97 94 .desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", ··· 104 99 105 100 static const struct perf_pmu_test_event l3_cache_rd = { 106 101 .event = { 102 + .pmu = "cpu", 107 103 .name = "l3_cache_rd", 108 104 .event = "event=0x40", 109 105 .desc = "L3 cache access, read", ··· 129 123 .event = { 130 124 .name = "uncore_hisi_ddrc.flux_wcmd", 131 125 .event = "event=0x2", 132 - .desc = "DDRC write commands. Unit: hisi_sccl,ddrc ", 126 + .desc = "DDRC write commands. Unit: hisi_sccl,ddrc", 133 127 .topic = "uncore", 134 128 .long_desc = "DDRC write commands", 135 129 .pmu = "hisi_sccl,ddrc", ··· 143 137 .event = { 144 138 .name = "unc_cbo_xsnp_response.miss_eviction", 145 139 .event = "event=0x22,umask=0x81", 146 - .desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core. Unit: uncore_cbox ", 140 + .desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core. Unit: uncore_cbox", 147 141 .topic = "uncore", 148 142 .long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core", 149 143 .pmu = "uncore_cbox", ··· 157 151 .event = { 158 152 .name = "event-hyphen", 159 153 .event = "event=0xe0,umask=0x00", 160 - .desc = "UNC_CBO_HYPHEN. Unit: uncore_cbox ", 154 + .desc = "UNC_CBO_HYPHEN. Unit: uncore_cbox", 161 155 .topic = "uncore", 162 156 .long_desc = "UNC_CBO_HYPHEN", 163 157 .pmu = "uncore_cbox", ··· 171 165 .event = { 172 166 .name = "event-two-hyph", 173 167 .event = "event=0xc0,umask=0x00", 174 - .desc = "UNC_CBO_TWO_HYPH. Unit: uncore_cbox ", 168 + .desc = "UNC_CBO_TWO_HYPH. Unit: uncore_cbox", 175 169 .topic = "uncore", 176 170 .long_desc = "UNC_CBO_TWO_HYPH", 177 171 .pmu = "uncore_cbox", ··· 185 179 .event = { 186 180 .name = "uncore_hisi_l3c.rd_hit_cpipe", 187 181 .event = "event=0x7", 188 - .desc = "Total read hits. Unit: hisi_sccl,l3c ", 182 + .desc = "Total read hits. Unit: hisi_sccl,l3c", 189 183 .topic = "uncore", 190 184 .long_desc = "Total read hits", 191 185 .pmu = "hisi_sccl,l3c", ··· 199 193 .event = { 200 194 .name = "uncore_imc_free_running.cache_miss", 201 195 .event = "event=0x12", 202 - .desc = "Total cache misses. Unit: uncore_imc_free_running ", 196 + .desc = "Total cache misses. Unit: uncore_imc_free_running", 203 197 .topic = "uncore", 204 198 .long_desc = "Total cache misses", 205 199 .pmu = "uncore_imc_free_running", ··· 213 207 .event = { 214 208 .name = "uncore_imc.cache_hits", 215 209 .event = "event=0x34", 216 - .desc = "Total cache hits. Unit: uncore_imc ", 210 + .desc = "Total cache hits. Unit: uncore_imc", 217 211 .topic = "uncore", 218 212 .long_desc = "Total cache hits", 219 213 .pmu = "uncore_imc", ··· 238 232 .event = { 239 233 .name = "sys_ddr_pmu.write_cycles", 240 234 .event = "event=0x2b", 241 - .desc = "ddr write-cycles event. Unit: uncore_sys_ddr_pmu ", 235 + .desc = "ddr write-cycles event. Unit: uncore_sys_ddr_pmu", 242 236 .topic = "uncore", 243 237 .pmu = "uncore_sys_ddr_pmu", 244 238 .compat = "v8", 245 239 }, 246 240 .alias_str = "event=0x2b", 247 - .alias_long_desc = "ddr write-cycles event. Unit: uncore_sys_ddr_pmu ", 241 + .alias_long_desc = "ddr write-cycles event. Unit: uncore_sys_ddr_pmu", 248 242 .matching_pmu = "uncore_sys_ddr_pmu", 249 243 }; 250 244 ··· 252 246 .event = { 253 247 .name = "sys_ccn_pmu.read_cycles", 254 248 .event = "config=0x2c", 255 - .desc = "ccn read-cycles event. Unit: uncore_sys_ccn_pmu ", 249 + .desc = "ccn read-cycles event. Unit: uncore_sys_ccn_pmu", 256 250 .topic = "uncore", 257 251 .pmu = "uncore_sys_ccn_pmu", 258 252 .compat = "0x01", 259 253 }, 260 254 .alias_str = "config=0x2c", 261 - .alias_long_desc = "ccn read-cycles event. Unit: uncore_sys_ccn_pmu ", 255 + .alias_long_desc = "ccn read-cycles event. Unit: uncore_sys_ccn_pmu", 262 256 .matching_pmu = "uncore_sys_ccn_pmu", 263 257 }; 264 258 ··· 409 403 struct perf_pmu_test_event const **test_event_table; 410 404 bool found = false; 411 405 412 - if (pe->pmu) 406 + if (strcmp(pe->pmu, "cpu")) 413 407 test_event_table = &uncore_events[0]; 414 408 else 415 409 test_event_table = &core_events[0];