Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf/x86/intel: Add core PMU support for DMR

This patch enables core PMU features for Diamond Rapids (Panther Cove
microarchitecture), including Panther Cove specific counter and PEBS
constraints, a new cache events ID table, and the model-specific OMR
events extra registers table.

For detailed information about counter constraints, please refer to
section 16.3 "COUNTER RESTRICTIONS" in the ISE documentation.

Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260114011750.350569-4-dapeng1.mi@linux.intel.com

authored by

Dapeng Mi and committed by
Peter Zijlstra
d345b6bb d2bdcde9

+207 -1
+178 -1
arch/x86/events/intel/core.c
··· 435 435 EVENT_EXTRA_END 436 436 }; 437 437 438 + static struct event_constraint intel_pnc_event_constraints[] = { 439 + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 440 + FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */ 441 + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 442 + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 443 + FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */ 444 + FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ 445 + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0), 446 + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1), 447 + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2), 448 + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3), 449 + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4), 450 + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5), 451 + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6), 452 + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7), 453 + 454 + INTEL_EVENT_CONSTRAINT(0x20, 0xf), 455 + INTEL_EVENT_CONSTRAINT(0x79, 0xf), 456 + 457 + INTEL_UEVENT_CONSTRAINT(0x0275, 0xf), 458 + INTEL_UEVENT_CONSTRAINT(0x0176, 0xf), 459 + INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1), 460 + INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1), 461 + INTEL_UEVENT_CONSTRAINT(0x01cd, 0xfc), 462 + INTEL_UEVENT_CONSTRAINT(0x02cd, 0x3), 463 + 464 + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), 465 + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), 466 + INTEL_EVENT_CONSTRAINT(0xd4, 0xf), 467 + INTEL_EVENT_CONSTRAINT(0xd6, 0xf), 468 + INTEL_EVENT_CONSTRAINT(0xdf, 0xf), 469 + INTEL_EVENT_CONSTRAINT(0xce, 0x1), 470 + 471 + INTEL_UEVENT_CONSTRAINT(0x01b1, 0x8), 472 + INTEL_UEVENT_CONSTRAINT(0x0847, 0xf), 473 + INTEL_UEVENT_CONSTRAINT(0x0446, 0xf), 474 + INTEL_UEVENT_CONSTRAINT(0x0846, 0xf), 475 + INTEL_UEVENT_CONSTRAINT(0x0148, 0xf), 476 + 477 + EVENT_CONSTRAINT_END 478 + }; 479 + 480 + static struct extra_reg intel_pnc_extra_regs[] __read_mostly = { 481 + /* must define OMR_X first, see intel_alt_er() */ 482 + INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OMR_0, 0x40ffffff0000ffffull, OMR_0), 483 + INTEL_UEVENT_EXTRA_REG(0x022a, MSR_OMR_1, 0x40ffffff0000ffffull, OMR_1), 484 + INTEL_UEVENT_EXTRA_REG(0x042a, MSR_OMR_2, 0x40ffffff0000ffffull, OMR_2), 485 + INTEL_UEVENT_EXTRA_REG(0x082a, MSR_OMR_3, 0x40ffffff0000ffffull, OMR_3), 486 + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), 487 + INTEL_UEVENT_EXTRA_REG(0x02c6, MSR_PEBS_FRONTEND, 0x9, FE), 488 + INTEL_UEVENT_EXTRA_REG(0x03c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), 489 + INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0xf, FE), 490 + INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), 491 + EVENT_EXTRA_END 492 + }; 493 + 438 494 EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); 439 495 EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); 440 496 EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); ··· 702 646 [ C(OP_READ) ] = { 703 647 [ C(RESULT_ACCESS) ] = 0x10c000001, 704 648 [ C(RESULT_MISS) ] = 0x3fb3000001, 649 + }, 650 + }, 651 + }; 652 + 653 + static __initconst const u64 pnc_hw_cache_event_ids 654 + [PERF_COUNT_HW_CACHE_MAX] 655 + [PERF_COUNT_HW_CACHE_OP_MAX] 656 + [PERF_COUNT_HW_CACHE_RESULT_MAX] = 657 + { 658 + [ C(L1D ) ] = { 659 + [ C(OP_READ) ] = { 660 + [ C(RESULT_ACCESS) ] = 0x81d0, 661 + [ C(RESULT_MISS) ] = 0xe124, 662 + }, 663 + [ C(OP_WRITE) ] = { 664 + [ C(RESULT_ACCESS) ] = 0x82d0, 665 + }, 666 + }, 667 + [ C(L1I ) ] = { 668 + [ C(OP_READ) ] = { 669 + [ C(RESULT_MISS) ] = 0xe424, 670 + }, 671 + [ C(OP_WRITE) ] = { 672 + [ C(RESULT_ACCESS) ] = -1, 673 + [ C(RESULT_MISS) ] = -1, 674 + }, 675 + }, 676 + [ C(LL ) ] = { 677 + [ C(OP_READ) ] = { 678 + [ C(RESULT_ACCESS) ] = 0x12a, 679 + [ C(RESULT_MISS) ] = 0x12a, 680 + }, 681 + [ C(OP_WRITE) ] = { 682 + [ C(RESULT_ACCESS) ] = 0x12a, 683 + [ C(RESULT_MISS) ] = 0x12a, 684 + }, 685 + }, 686 + [ C(DTLB) ] = { 687 + [ C(OP_READ) ] = { 688 + [ C(RESULT_ACCESS) ] = 0x81d0, 689 + [ C(RESULT_MISS) ] = 0xe12, 690 + }, 691 + [ C(OP_WRITE) ] = { 692 + [ C(RESULT_ACCESS) ] = 0x82d0, 693 + [ C(RESULT_MISS) ] = 0xe13, 694 + }, 695 + }, 696 + [ C(ITLB) ] = { 697 + [ C(OP_READ) ] = { 698 + [ C(RESULT_ACCESS) ] = -1, 699 + [ C(RESULT_MISS) ] = 0xe11, 700 + }, 701 + [ C(OP_WRITE) ] = { 702 + [ C(RESULT_ACCESS) ] = -1, 703 + [ C(RESULT_MISS) ] = -1, 704 + }, 705 + [ C(OP_PREFETCH) ] = { 706 + [ C(RESULT_ACCESS) ] = -1, 707 + [ C(RESULT_MISS) ] = -1, 708 + }, 709 + }, 710 + [ C(BPU ) ] = { 711 + [ C(OP_READ) ] = { 712 + [ C(RESULT_ACCESS) ] = 0x4c4, 713 + [ C(RESULT_MISS) ] = 0x4c5, 714 + }, 715 + [ C(OP_WRITE) ] = { 716 + [ C(RESULT_ACCESS) ] = -1, 717 + [ C(RESULT_MISS) ] = -1, 718 + }, 719 + [ C(OP_PREFETCH) ] = { 720 + [ C(RESULT_ACCESS) ] = -1, 721 + [ C(RESULT_MISS) ] = -1, 722 + }, 723 + }, 724 + [ C(NODE) ] = { 725 + [ C(OP_READ) ] = { 726 + [ C(RESULT_ACCESS) ] = -1, 727 + [ C(RESULT_MISS) ] = -1, 728 + }, 729 + }, 730 + }; 731 + 732 + static __initconst const u64 pnc_hw_cache_extra_regs 733 + [PERF_COUNT_HW_CACHE_MAX] 734 + [PERF_COUNT_HW_CACHE_OP_MAX] 735 + [PERF_COUNT_HW_CACHE_RESULT_MAX] = 736 + { 737 + [ C(LL ) ] = { 738 + [ C(OP_READ) ] = { 739 + [ C(RESULT_ACCESS) ] = 0x4000000000000001, 740 + [ C(RESULT_MISS) ] = 0xFFFFF000000001, 741 + }, 742 + [ C(OP_WRITE) ] = { 743 + [ C(RESULT_ACCESS) ] = 0x4000000000000002, 744 + [ C(RESULT_MISS) ] = 0xFFFFF000000002, 705 745 }, 706 746 }, 707 747 }; ··· 7388 7236 hybrid(pmu, extra_regs) = intel_lnc_extra_regs; 7389 7237 } 7390 7238 7239 + static __always_inline void intel_pmu_init_pnc(struct pmu *pmu) 7240 + { 7241 + intel_pmu_init_glc(pmu); 7242 + x86_pmu.flags &= ~PMU_FL_HAS_RSP_1; 7243 + x86_pmu.flags |= PMU_FL_HAS_OMR; 7244 + memcpy(hybrid_var(pmu, hw_cache_event_ids), 7245 + pnc_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 7246 + memcpy(hybrid_var(pmu, hw_cache_extra_regs), 7247 + pnc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 7248 + hybrid(pmu, event_constraints) = intel_pnc_event_constraints; 7249 + hybrid(pmu, pebs_constraints) = intel_pnc_pebs_event_constraints; 7250 + hybrid(pmu, extra_regs) = intel_pnc_extra_regs; 7251 + } 7252 + 7391 7253 static __always_inline void intel_pmu_init_skt(struct pmu *pmu) 7392 7254 { 7393 7255 intel_pmu_init_grt(pmu); ··· 8063 7897 x86_pmu.extra_regs = intel_rwc_extra_regs; 8064 7898 pr_cont("Granite Rapids events, "); 8065 7899 name = "granite_rapids"; 7900 + goto glc_common; 7901 + 7902 + case INTEL_DIAMONDRAPIDS_X: 7903 + intel_pmu_init_pnc(NULL); 7904 + x86_pmu.pebs_latency_data = pnc_latency_data; 7905 + 7906 + pr_cont("Panthercove events, "); 7907 + name = "panthercove"; 7908 + goto glc_base; 8066 7909 8067 7910 glc_common: 8068 7911 intel_pmu_init_glc(NULL); 7912 + intel_pmu_pebs_data_source_skl(true); 7913 + 7914 + glc_base: 8069 7915 x86_pmu.pebs_ept = 1; 8070 7916 x86_pmu.hw_config = hsw_hw_config; 8071 7917 x86_pmu.get_event_constraints = glc_get_event_constraints; ··· 8087 7909 mem_attr = glc_events_attrs; 8088 7910 td_attr = glc_td_events_attrs; 8089 7911 tsx_attr = glc_tsx_events_attrs; 8090 - intel_pmu_pebs_data_source_skl(true); 8091 7912 break; 8092 7913 8093 7914 case INTEL_ALDERLAKE:
+27
arch/x86/events/intel/ds.c
··· 1425 1425 EVENT_CONSTRAINT_END 1426 1426 }; 1427 1427 1428 + struct event_constraint intel_pnc_pebs_event_constraints[] = { 1429 + INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ 1430 + INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), 1431 + 1432 + INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0xfc), 1433 + INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3), 1434 + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ 1435 + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ 1436 + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ 1437 + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ 1438 + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ 1439 + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ 1440 + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ 1441 + 1442 + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), 1443 + 1444 + INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), 1445 + INTEL_FLAGS_EVENT_CONSTRAINT(0xd6, 0xf), 1446 + 1447 + /* 1448 + * Everything else is handled by PMU_FL_PEBS_ALL, because we 1449 + * need the full constraints from the main table. 1450 + */ 1451 + 1452 + EVENT_CONSTRAINT_END 1453 + }; 1454 + 1428 1455 struct event_constraint *intel_pebs_constraints(struct perf_event *event) 1429 1456 { 1430 1457 struct event_constraint *pebs_constraints = hybrid(event->pmu, pebs_constraints);
+2
arch/x86/events/perf_event.h
··· 1698 1698 1699 1699 extern struct event_constraint intel_lnc_pebs_event_constraints[]; 1700 1700 1701 + extern struct event_constraint intel_pnc_pebs_event_constraints[]; 1702 + 1701 1703 struct event_constraint *intel_pebs_constraints(struct perf_event *event); 1702 1704 1703 1705 void intel_pmu_pebs_add(struct perf_event *event);