Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf/x86/intel: Add support for PEBS memory auxiliary info field in NVL

Similar to DMR (Panther Cove uarch), both P-core (Coyote Cove uarch) and
E-core (Arctic Wolf uarch) of NVL adopt the new PEBS memory auxiliary
info layout.

Coyote Cove microarchitecture shares the same PMU capabilities, including
the memory auxiliary info layout, with Panther Cove. Arctic Wolf
microarchitecture has a similar layout to Panther Cove, with the only
difference being specific data source encoding for L2 hit cases (up to
the L2 cache level). The OMR encoding remains the same as in Panther Cove.

For detailed information on the memory auxiliary info encoding, please
refer to section 16.2 "PEBS LOAD LATENCY AND STORE LATENCY FACILITY" in
the latest ISE documentation.

This patch defines Arctic Wolf specific data source encoding and then
supports PEBS memory auxiliary info field for NVL.

Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260114011750.350569-5-dapeng1.mi@linux.intel.com

authored by

Dapeng Mi and committed by
Peter Zijlstra
7cd264d1 d345b6bb

+85
+83
arch/x86/events/intel/ds.c
··· 96 96 unsigned int pnc_fb_full:1; 97 97 unsigned int ld_reserved8:16; 98 98 }; 99 + struct { 100 + unsigned int arw_dse:8; 101 + unsigned int arw_l2_miss:1; 102 + unsigned int arw_xq_promotion:1; 103 + unsigned int arw_reissue:1; 104 + unsigned int arw_stlb_miss:1; 105 + unsigned int arw_locked:1; 106 + unsigned int arw_data_blk:1; 107 + unsigned int arw_addr_blk:1; 108 + unsigned int arw_fb_full:1; 109 + unsigned int ld_reserved9:16; 110 + }; 99 111 }; 100 112 101 113 ··· 284 272 0, /* 0x0d: Reserved */ 285 273 0, /* 0x0e: Reserved */ 286 274 OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */ 275 + }; 276 + 277 + /* Version for Arctic Wolf and later */ 278 + 279 + /* L2 hit */ 280 + #define ARW_PEBS_DATA_SOURCE_MAX 16 281 + static u64 arw_pebs_l2_hit_data_source[ARW_PEBS_DATA_SOURCE_MAX] = { 282 + P(OP, LOAD) | P(LVL, NA) | LEVEL(NA) | P(SNOOP, NA), /* 0x00: non-cache access */ 283 + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 hit */ 284 + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: WCB Hit */ 285 + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x03: L2 Hit Clean */ 286 + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT), /* 0x04: L2 Hit Snoop HIT */ 287 + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, HITM), /* 0x05: L2 Hit Snoop Hit Modified */ 288 + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x06: uncached */ 289 + 0, /* 0x07: Reserved */ 290 + 0, /* 0x08: Reserved */ 291 + 0, /* 0x09: Reserved */ 292 + 0, /* 0x0a: Reserved */ 293 + 0, /* 0x0b: Reserved */ 294 + 0, /* 0x0c: Reserved */ 295 + 0, /* 0x0d: Reserved */ 296 + 0, /* 0x0e: Reserved */ 297 + 0, /* 0x0f: Reserved */ 287 298 }; 288 299 289 300 /* L2 miss */ ··· 493 458 dse.mtl_fwd_blk); 494 459 } 495 460 461 + static u64 arw_latency_data(struct perf_event *event, u64 status) 462 + { 463 + union intel_x86_pebs_dse dse; 464 + union perf_mem_data_src src; 465 + u64 val; 466 + 467 + dse.val = status; 468 + 469 + if (!dse.arw_l2_miss) 470 + val = arw_pebs_l2_hit_data_source[dse.arw_dse & 0xf]; 471 + else 472 + val = parse_omr_data_source(dse.arw_dse); 473 + 474 + if (!val) 475 + val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA); 476 + 477 + if (dse.arw_stlb_miss) 478 + val |= P(TLB, MISS) | P(TLB, L2); 479 + else 480 + val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); 481 + 482 + if (dse.arw_locked) 483 + val |= P(LOCK, LOCKED); 484 + 485 + if (dse.arw_data_blk) 486 + val |= P(BLK, DATA); 487 + if (dse.arw_addr_blk) 488 + val |= P(BLK, ADDR); 489 + if (!dse.arw_data_blk && !dse.arw_addr_blk) 490 + val |= P(BLK, NA); 491 + 492 + src.val = val; 493 + if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) 494 + src.mem_op = P(OP, STORE); 495 + 496 + return src.val; 497 + } 498 + 496 499 static u64 lnc_latency_data(struct perf_event *event, u64 status) 497 500 { 498 501 union intel_x86_pebs_dse dse; ··· 622 549 src.mem_op = P(OP, STORE); 623 550 624 551 return src.val; 552 + } 553 + 554 + u64 nvl_latency_data(struct perf_event *event, u64 status) 555 + { 556 + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); 557 + 558 + if (pmu->pmu_type == hybrid_small) 559 + return arw_latency_data(event, status); 560 + 561 + return pnc_latency_data(event, status); 625 562 } 626 563 627 564 static u64 load_latency_data(struct perf_event *event, u64 status)
+2
arch/x86/events/perf_event.h
··· 1666 1666 1667 1667 u64 pnc_latency_data(struct perf_event *event, u64 status); 1668 1668 1669 + u64 nvl_latency_data(struct perf_event *event, u64 status); 1670 + 1669 1671 extern struct event_constraint intel_core2_pebs_event_constraints[]; 1670 1672 1671 1673 extern struct event_constraint intel_atom_pebs_event_constraints[];