Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'perf-tools-fixes-for-v6.8-1-2024-02-01' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools

Pull perf tools fixes from Arnaldo Carvalho de Melo:
"Vendor events:

- Intel Alderlake/Sapphire Rapids metric fixes, the CPU type
("cpu_atom", "cpu_core") needs to be used as a prefix to be
considered on a metric formula, detected via one of the 'perf test'
entries.

'perf test' fixes:

- Fix the creation of event selector lists on 'perf test' entries, by
initializing the sample ID flag, which is done by 'perf record', so
this fix affects only the tests, the common case isn't affected

- Make 'perf list' respect debug settings (-v) to fix its 'perf test'
entry

- Fix 'perf script' test when python support isn't enabled

- Special case 'perf script' tests on s390, where only DWARF call
graphs are supported and only on software events

- Make 'perf daemon' signal test less racy

Compiler warnings/errors:

- Remove needless malloc(0) call in 'perf top' that triggers
-Walloc-size

- Fix calloc() argument order to address error introduced in gcc-14

Build:

- Make minimal shellcheck version to v0.6.0, avoiding the build to
fail with older versions

Sync kernel header copies:

- stat.h to pick STATX_MNT_ID_UNIQUE

- msr-index.h to pick IA32_MKTME_KEYID_PARTITIONING

- drm.h to pick DRM_IOCTL_MODE_CLOSEFB

- unistd.h to pick {list,stat}mount,
lsm_{[gs]et_self_attr,list_modules} syscall numbers

- x86 cpufeatures to pick TDX, Zen, APIC MSR fence changes

- x86's mem{cpy,set}_64.S used in 'perf bench'

- Also, without tooling effects: asm-generic/unaligned.h, mount.h,
fcntl.h, kvm headers"

* tag 'perf-tools-fixes-for-v6.8-1-2024-02-01' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (21 commits)
perf tools headers: update the asm-generic/unaligned.h copy with the kernel sources
tools include UAPI: Sync linux/mount.h copy with the kernel sources
perf evlist: Fix evlist__new_default() for > 1 core PMU
tools headers: Update the copy of x86's mem{cpy,set}_64.S used in 'perf bench'
tools headers x86 cpufeatures: Sync with the kernel sources to pick TDX, Zen, APIC MSR fence changes
tools headers UAPI: Sync unistd.h to pick {list,stat}mount, lsm_{[gs]et_self_attr,list_modules} syscall numbers
perf vendor events intel: Alderlake/sapphirerapids metric fixes
tools headers UAPI: Sync kvm headers with the kernel sources
perf tools: Fix calloc() arguments to address error introduced in gcc-14
perf top: Remove needless malloc(0) call that triggers -Walloc-size
perf build: Make minimal shellcheck version to v0.6.0
tools headers UAPI: Update tools's copy of drm.h headers to pick DRM_IOCTL_MODE_CLOSEFB
perf test shell daemon: Make signal test less racy
perf test shell script: Fix test for python being disabled
perf test: Workaround debug output in list test
perf list: Add output file option
perf list: Switch error message to pr_err() to respect debug settings (-v)
perf test: Fix 'perf script' tests on s390
tools headers UAPI: Sync linux/fcntl.h with the kernel sources
tools arch x86: Sync the msr-index.h copy with the kernel sources to pick IA32_MKTME_KEYID_PARTITIONING
...

+588 -383
+7 -1
tools/arch/x86/include/asm/cpufeatures.h
··· 198 198 #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ 199 199 #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ 200 200 #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ 201 + #define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* Platform supports being a TDX host */ 201 202 #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ 202 203 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ 203 204 #define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ ··· 309 308 #define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ 310 309 #define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ 311 310 #define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */ 312 - 313 311 #define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ 314 312 #define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ 315 313 #define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ 314 + #define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ 315 + #define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */ 316 + #define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */ 317 + #define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */ 318 + #define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */ 316 319 317 320 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ 318 321 #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ ··· 500 495 #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ 501 496 #define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ 502 497 #define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */ 498 + #define X86_BUG_TDX_PW_MCE X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */ 503 499 504 500 /* BUG word 2 */ 505 501 #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
+8
tools/arch/x86/include/asm/msr-index.h
··· 237 237 #define LBR_INFO_CYCLES 0xffff 238 238 #define LBR_INFO_BR_TYPE_OFFSET 56 239 239 #define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET) 240 + #define LBR_INFO_BR_CNTR_OFFSET 32 241 + #define LBR_INFO_BR_CNTR_NUM 4 242 + #define LBR_INFO_BR_CNTR_BITS 2 243 + #define LBR_INFO_BR_CNTR_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_BITS - 1, 0) 244 + #define LBR_INFO_BR_CNTR_FULL_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS - 1, 0) 240 245 241 246 #define MSR_ARCH_LBR_CTL 0x000014ce 242 247 #define ARCH_LBR_CTL_LBREN BIT(0) ··· 540 535 /* Auto-reload via MSR instead of DS area */ 541 536 #define MSR_RELOAD_PMC0 0x000014c1 542 537 #define MSR_RELOAD_FIXED_CTR0 0x00001309 538 + 539 + /* KeyID partitioning between MKTME and TDX */ 540 + #define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087 543 541 544 542 /* 545 543 * AMD64 MSRs. Not complete. See the architecture manual for a more
+3
tools/arch/x86/include/uapi/asm/kvm.h
··· 562 562 /* x86-specific KVM_EXIT_HYPERCALL flags. */ 563 563 #define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0) 564 564 565 + #define KVM_X86_DEFAULT_VM 0 566 + #define KVM_X86_SW_PROTECTED_VM 1 567 + 565 568 #endif /* _ASM_X86_KVM_H */
+2 -2
tools/arch/x86/lib/memcpy_64.S
··· 1 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 2 /* Copyright 2002 Andi Kleen */ 3 3 4 + #include <linux/export.h> 4 5 #include <linux/linkage.h> 5 6 #include <asm/errno.h> 6 7 #include <asm/cpufeatures.h> 7 8 #include <asm/alternative.h> 8 - #include <asm/export.h> 9 9 10 10 .section .noinstr.text, "ax" 11 11 ··· 39 39 SYM_FUNC_END(__memcpy) 40 40 EXPORT_SYMBOL(__memcpy) 41 41 42 - SYM_FUNC_ALIAS(memcpy, __memcpy) 42 + SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy) 43 43 EXPORT_SYMBOL(memcpy) 44 44 45 45 SYM_FUNC_START_LOCAL(memcpy_orig)
+2 -2
tools/arch/x86/lib/memset_64.S
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 2 /* Copyright 2002 Andi Kleen, SuSE Labs */ 3 3 4 + #include <linux/export.h> 4 5 #include <linux/linkage.h> 5 6 #include <asm/cpufeatures.h> 6 7 #include <asm/alternative.h> 7 - #include <asm/export.h> 8 8 9 9 .section .noinstr.text, "ax" 10 10 ··· 40 40 SYM_FUNC_END(__memset) 41 41 EXPORT_SYMBOL(__memset) 42 42 43 - SYM_FUNC_ALIAS(memset, __memset) 43 + SYM_FUNC_ALIAS_MEMFUNC(memset, __memset) 44 44 EXPORT_SYMBOL(memset) 45 45 46 46 SYM_FUNC_START_LOCAL(memset_orig)
+12 -12
tools/include/asm-generic/unaligned.h
··· 105 105 106 106 static inline void __put_unaligned_be24(const u32 val, u8 *p) 107 107 { 108 - *p++ = val >> 16; 109 - *p++ = val >> 8; 110 - *p++ = val; 108 + *p++ = (val >> 16) & 0xff; 109 + *p++ = (val >> 8) & 0xff; 110 + *p++ = val & 0xff; 111 111 } 112 112 113 113 static inline void put_unaligned_be24(const u32 val, void *p) ··· 117 117 118 118 static inline void __put_unaligned_le24(const u32 val, u8 *p) 119 119 { 120 - *p++ = val; 121 - *p++ = val >> 8; 122 - *p++ = val >> 16; 120 + *p++ = val & 0xff; 121 + *p++ = (val >> 8) & 0xff; 122 + *p++ = (val >> 16) & 0xff; 123 123 } 124 124 125 125 static inline void put_unaligned_le24(const u32 val, void *p) ··· 129 129 130 130 static inline void __put_unaligned_be48(const u64 val, u8 *p) 131 131 { 132 - *p++ = val >> 40; 133 - *p++ = val >> 32; 134 - *p++ = val >> 24; 135 - *p++ = val >> 16; 136 - *p++ = val >> 8; 137 - *p++ = val; 132 + *p++ = (val >> 40) & 0xff; 133 + *p++ = (val >> 32) & 0xff; 134 + *p++ = (val >> 24) & 0xff; 135 + *p++ = (val >> 16) & 0xff; 136 + *p++ = (val >> 8) & 0xff; 137 + *p++ = val & 0xff; 138 138 } 139 139 140 140 static inline void put_unaligned_be48(const u64 val, void *p)
+14 -1
tools/include/uapi/asm-generic/unistd.h
··· 829 829 #define __NR_futex_requeue 456 830 830 __SYSCALL(__NR_futex_requeue, sys_futex_requeue) 831 831 832 + #define __NR_statmount 457 833 + __SYSCALL(__NR_statmount, sys_statmount) 834 + 835 + #define __NR_listmount 458 836 + __SYSCALL(__NR_listmount, sys_listmount) 837 + 838 + #define __NR_lsm_get_self_attr 459 839 + __SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr) 840 + #define __NR_lsm_set_self_attr 460 841 + __SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr) 842 + #define __NR_lsm_list_modules 461 843 + __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) 844 + 832 845 #undef __NR_syscalls 833 - #define __NR_syscalls 457 846 + #define __NR_syscalls 462 834 847 835 848 /* 836 849 * 32 bit systems traditionally used different
+71 -1
tools/include/uapi/drm/drm.h
··· 713 713 /** 714 714 * DRM_CAP_ASYNC_PAGE_FLIP 715 715 * 716 - * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC. 716 + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy 717 + * page-flips. 717 718 */ 718 719 #define DRM_CAP_ASYNC_PAGE_FLIP 0x7 719 720 /** ··· 774 773 * :ref:`drm_sync_objects`. 775 774 */ 776 775 #define DRM_CAP_SYNCOBJ_TIMELINE 0x14 776 + /** 777 + * DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 778 + * 779 + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic 780 + * commits. 781 + */ 782 + #define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 0x15 777 783 778 784 /* DRM_IOCTL_GET_CAP ioctl argument type */ 779 785 struct drm_get_cap { ··· 850 842 */ 851 843 #define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 852 844 845 + /** 846 + * DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT 847 + * 848 + * Drivers for para-virtualized hardware (e.g. vmwgfx, qxl, virtio and 849 + * virtualbox) have additional restrictions for cursor planes (thus 850 + * making cursor planes on those drivers not truly universal,) e.g. 851 + * they need cursor planes to act like one would expect from a mouse 852 + * cursor and have correctly set hotspot properties. 853 + * If this client cap is not set the DRM core will hide cursor plane on 854 + * those virtualized drivers because not setting it implies that the 855 + * client is not capable of dealing with those extra restictions. 856 + * Clients which do set cursor hotspot and treat the cursor plane 857 + * like a mouse cursor should set this property. 858 + * The client must enable &DRM_CLIENT_CAP_ATOMIC first. 859 + * 860 + * Setting this property on drivers which do not special case 861 + * cursor planes (i.e. non-virtualized drivers) will return 862 + * EOPNOTSUPP, which can be used by userspace to gauge 863 + * requirements of the hardware/drivers they're running on. 864 + * 865 + * This capability is always supported for atomic-capable virtualized 866 + * drivers starting from kernel version 6.6. 867 + */ 868 + #define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT 6 869 + 853 870 /* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ 854 871 struct drm_set_client_cap { 855 872 __u64 capability; ··· 926 893 #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) 927 894 #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) 928 895 #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */ 896 + #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE (1 << 3) /* set fence deadline to deadline_nsec */ 929 897 struct drm_syncobj_wait { 930 898 __u64 handles; 931 899 /* absolute timeout */ ··· 935 901 __u32 flags; 936 902 __u32 first_signaled; /* only valid when not waiting all */ 937 903 __u32 pad; 904 + /** 905 + * @deadline_nsec - fence deadline hint 906 + * 907 + * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing 908 + * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is 909 + * set. 910 + */ 911 + __u64 deadline_nsec; 938 912 }; 939 913 940 914 struct drm_syncobj_timeline_wait { ··· 955 913 __u32 flags; 956 914 __u32 first_signaled; /* only valid when not waiting all */ 957 915 __u32 pad; 916 + /** 917 + * @deadline_nsec - fence deadline hint 918 + * 919 + * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing 920 + * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is 921 + * set. 922 + */ 923 + __u64 deadline_nsec; 958 924 }; 959 925 960 926 /** ··· 1267 1217 #define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) 1268 1218 1269 1219 #define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd) 1220 + 1221 + /** 1222 + * DRM_IOCTL_MODE_CLOSEFB - Close a framebuffer. 1223 + * 1224 + * This closes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL 1225 + * argument is a framebuffer object ID. 1226 + * 1227 + * This IOCTL is similar to &DRM_IOCTL_MODE_RMFB, except it doesn't disable 1228 + * planes and CRTCs. As long as the framebuffer is used by a plane, it's kept 1229 + * alive. When the plane no longer uses the framebuffer (because the 1230 + * framebuffer is replaced with another one, or the plane is disabled), the 1231 + * framebuffer is cleaned up. 1232 + * 1233 + * This is useful to implement flicker-free transitions between two processes. 1234 + * 1235 + * Depending on the threat model, user-space may want to ensure that the 1236 + * framebuffer doesn't expose any sensitive user information: closed 1237 + * framebuffers attached to a plane can be read back by the next DRM master. 1238 + */ 1239 + #define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb) 1270 1240 1271 1241 /* 1272 1242 * Device specific ioctls should only be in their respective headers
+6 -6
tools/include/uapi/drm/i915_drm.h
··· 693 693 #define I915_PARAM_HAS_EXEC_FENCE 44 694 694 695 695 /* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture 696 - * user specified bufffers for post-mortem debugging of GPU hangs. See 696 + * user-specified buffers for post-mortem debugging of GPU hangs. See 697 697 * EXEC_OBJECT_CAPTURE. 698 698 */ 699 699 #define I915_PARAM_HAS_EXEC_CAPTURE 45 ··· 1606 1606 * is accurate. 1607 1607 * 1608 1608 * The returned dword is split into two fields to indicate both 1609 - * the engine classess on which the object is being read, and the 1609 + * the engine classes on which the object is being read, and the 1610 1610 * engine class on which it is currently being written (if any). 1611 1611 * 1612 1612 * The low word (bits 0:15) indicate if the object is being written ··· 1815 1815 __u32 handle; 1816 1816 1817 1817 /* Advice: either the buffer will be needed again in the near future, 1818 - * or wont be and could be discarded under memory pressure. 1818 + * or won't be and could be discarded under memory pressure. 1819 1819 */ 1820 1820 __u32 madv; 1821 1821 ··· 3246 3246 * // enough to hold our array of engines. The kernel will fill out the 3247 3247 * // item.length for us, which is the number of bytes we need. 3248 3248 * // 3249 - * // Alternatively a large buffer can be allocated straight away enabling 3249 + * // Alternatively a large buffer can be allocated straightaway enabling 3250 3250 * // querying in one pass, in which case item.length should contain the 3251 3251 * // length of the provided buffer. 3252 3252 * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); ··· 3256 3256 * // Now that we allocated the required number of bytes, we call the ioctl 3257 3257 * // again, this time with the data_ptr pointing to our newly allocated 3258 3258 * // blob, which the kernel can then populate with info on all engines. 3259 - * item.data_ptr = (uintptr_t)&info, 3259 + * item.data_ptr = (uintptr_t)&info; 3260 3260 * 3261 3261 * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); 3262 3262 * if (err) ... ··· 3286 3286 /** 3287 3287 * struct drm_i915_engine_info 3288 3288 * 3289 - * Describes one engine and it's capabilities as known to the driver. 3289 + * Describes one engine and its capabilities as known to the driver. 3290 3290 */ 3291 3291 struct drm_i915_engine_info { 3292 3292 /** @engine: Engine class and instance. */
+3
tools/include/uapi/linux/fcntl.h
··· 116 116 #define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to 117 117 compare object identity and may not 118 118 be usable to open_by_handle_at(2) */ 119 + #if defined(__KERNEL__) 120 + #define AT_GETATTR_NOSEC 0x80000000 121 + #endif 119 122 120 123 #endif /* _UAPI_LINUX_FCNTL_H */
+50 -90
tools/include/uapi/linux/kvm.h
··· 16 16 17 17 #define KVM_API_VERSION 12 18 18 19 - /* *** Deprecated interfaces *** */ 20 - 21 - #define KVM_TRC_SHIFT 16 22 - 23 - #define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT) 24 - #define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) 25 - 26 - #define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01) 27 - #define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02) 28 - #define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01) 29 - 30 - #define KVM_TRC_HEAD_SIZE 12 31 - #define KVM_TRC_CYCLE_SIZE 8 32 - #define KVM_TRC_EXTRA_MAX 7 33 - 34 - #define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) 35 - #define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) 36 - #define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) 37 - #define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) 38 - #define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) 39 - #define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) 40 - #define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) 41 - #define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) 42 - #define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) 43 - #define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) 44 - #define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) 45 - #define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) 46 - #define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) 47 - #define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) 48 - #define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) 49 - #define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) 50 - #define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) 51 - #define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) 52 - #define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) 53 - #define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) 54 - #define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) 55 - #define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) 56 - #define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) 57 - #define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) 58 - 59 - struct kvm_user_trace_setup { 60 - __u32 buf_size; 61 - __u32 buf_nr; 62 - }; 63 - 64 - #define __KVM_DEPRECATED_MAIN_W_0x06 \ 65 - _IOW(KVMIO, 0x06, struct kvm_user_trace_setup) 66 - #define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07) 67 - #define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08) 68 - 69 - #define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq) 70 - 71 - struct kvm_breakpoint { 72 - __u32 enabled; 73 - __u32 padding; 74 - __u64 address; 75 - }; 76 - 77 - struct kvm_debug_guest { 78 - __u32 enabled; 79 - __u32 pad; 80 - struct kvm_breakpoint breakpoints[4]; 81 - __u32 singlestep; 82 - }; 83 - 84 - #define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest) 85 - 86 - /* *** End of deprecated interfaces *** */ 87 - 88 - 89 19 /* for KVM_SET_USER_MEMORY_REGION */ 90 20 struct kvm_userspace_memory_region { 91 21 __u32 slot; ··· 25 95 __u64 userspace_addr; /* start of the userspace allocated memory */ 26 96 }; 27 97 98 + /* for KVM_SET_USER_MEMORY_REGION2 */ 99 + struct kvm_userspace_memory_region2 { 100 + __u32 slot; 101 + __u32 flags; 102 + __u64 guest_phys_addr; 103 + __u64 memory_size; 104 + __u64 userspace_addr; 105 + __u64 guest_memfd_offset; 106 + __u32 guest_memfd; 107 + __u32 pad1; 108 + __u64 pad2[14]; 109 + }; 110 + 28 111 /* 29 112 * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for 30 113 * userspace, other bits are reserved for kvm internal use which are defined ··· 45 102 */ 46 103 #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) 47 104 #define KVM_MEM_READONLY (1UL << 1) 105 + #define KVM_MEM_GUEST_MEMFD (1UL << 2) 48 106 49 107 /* for KVM_IRQ_LINE */ 50 108 struct kvm_irq_level { ··· 209 265 #define KVM_EXIT_RISCV_CSR 36 210 266 #define KVM_EXIT_NOTIFY 37 211 267 #define KVM_EXIT_LOONGARCH_IOCSR 38 268 + #define KVM_EXIT_MEMORY_FAULT 39 212 269 213 270 /* For KVM_EXIT_INTERNAL_ERROR */ 214 271 /* Emulate instruction failed. */ ··· 463 518 #define KVM_NOTIFY_CONTEXT_INVALID (1 << 0) 464 519 __u32 flags; 465 520 } notify; 521 + /* KVM_EXIT_MEMORY_FAULT */ 522 + struct { 523 + #define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3) 524 + __u64 flags; 525 + __u64 gpa; 526 + __u64 size; 527 + } memory_fault; 466 528 /* Fix the size of the union. */ 467 529 char padding[256]; 468 530 }; ··· 897 945 */ 898 946 #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ 899 947 #define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2) 900 - #define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 901 - #define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 902 - #define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 903 948 #define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) 904 949 #define KVM_GET_MSR_FEATURE_INDEX_LIST _IOWR(KVMIO, 0x0a, struct kvm_msr_list) 905 950 ··· 1150 1201 #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 1151 1202 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 1152 1203 #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 1204 + #define KVM_CAP_USER_MEMORY2 231 1205 + #define KVM_CAP_MEMORY_FAULT_INFO 232 1206 + #define KVM_CAP_MEMORY_ATTRIBUTES 233 1207 + #define KVM_CAP_GUEST_MEMFD 234 1208 + #define KVM_CAP_VM_TYPES 235 1153 1209 1154 1210 #ifdef KVM_CAP_IRQ_ROUTING 1155 1211 ··· 1245 1291 #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) 1246 1292 #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) 1247 1293 #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) 1294 + #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) 1248 1295 1249 1296 struct kvm_xen_hvm_config { 1250 1297 __u32 flags; ··· 1438 1483 struct kvm_userspace_memory_region) 1439 1484 #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) 1440 1485 #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) 1486 + #define KVM_SET_USER_MEMORY_REGION2 _IOW(KVMIO, 0x49, \ 1487 + struct kvm_userspace_memory_region2) 1441 1488 1442 1489 /* enable ucontrol for s390 */ 1443 1490 struct kvm_s390_ucas_mapping { ··· 1464 1507 _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) 1465 1508 #define KVM_UNREGISTER_COALESCED_MMIO \ 1466 1509 _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) 1467 - #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ 1468 - struct kvm_assigned_pci_dev) 1469 1510 #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) 1470 - /* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */ 1471 - #define KVM_ASSIGN_IRQ __KVM_DEPRECATED_VM_R_0x70 1472 - #define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) 1473 1511 #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) 1474 - #define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ 1475 - struct kvm_assigned_pci_dev) 1476 - #define KVM_ASSIGN_SET_MSIX_NR _IOW(KVMIO, 0x73, \ 1477 - struct kvm_assigned_msix_nr) 1478 - #define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO, 0x74, \ 1479 - struct kvm_assigned_msix_entry) 1480 - #define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) 1481 1512 #define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) 1482 1513 #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) 1483 1514 #define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) ··· 1482 1537 * KVM_CAP_VM_TSC_CONTROL to set defaults for a VM */ 1483 1538 #define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) 1484 1539 #define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) 1485 - /* Available with KVM_CAP_PCI_2_3 */ 1486 - #define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \ 1487 - struct kvm_assigned_pci_dev) 1488 1540 /* Available with KVM_CAP_SIGNAL_MSI */ 1489 1541 #define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi) 1490 1542 /* Available with KVM_CAP_PPC_GET_SMMU_INFO */ ··· 1534 1592 #define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs) 1535 1593 #define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) 1536 1594 #define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) 1537 - /* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */ 1538 - #define KVM_DEBUG_GUEST __KVM_DEPRECATED_VCPU_W_0x87 1539 1595 #define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) 1540 1596 #define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) 1541 1597 #define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) ··· 2206 2266 2207 2267 /* flags for kvm_s390_zpci_op->u.reg_aen.flags */ 2208 2268 #define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) 2269 + 2270 + /* Available with KVM_CAP_MEMORY_ATTRIBUTES */ 2271 + #define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes) 2272 + 2273 + struct kvm_memory_attributes { 2274 + __u64 address; 2275 + __u64 size; 2276 + __u64 attributes; 2277 + __u64 flags; 2278 + }; 2279 + 2280 + #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) 2281 + 2282 + #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) 2283 + 2284 + struct kvm_create_guest_memfd { 2285 + __u64 size; 2286 + __u64 flags; 2287 + __u64 reserved[6]; 2288 + }; 2209 2289 2210 2290 #endif /* __LINUX_KVM_H */
+70
tools/include/uapi/linux/mount.h
··· 138 138 /* List of all mount_attr versions. */ 139 139 #define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */ 140 140 141 + 142 + /* 143 + * Structure for getting mount/superblock/filesystem info with statmount(2). 144 + * 145 + * The interface is similar to statx(2): individual fields or groups can be 146 + * selected with the @mask argument of statmount(). Kernel will set the @mask 147 + * field according to the supported fields. 148 + * 149 + * If string fields are selected, then the caller needs to pass a buffer that 150 + * has space after the fixed part of the structure. Nul terminated strings are 151 + * copied there and offsets relative to @str are stored in the relevant fields. 152 + * If the buffer is too small, then EOVERFLOW is returned. The actually used 153 + * size is returned in @size. 154 + */ 155 + struct statmount { 156 + __u32 size; /* Total size, including strings */ 157 + __u32 __spare1; 158 + __u64 mask; /* What results were written */ 159 + __u32 sb_dev_major; /* Device ID */ 160 + __u32 sb_dev_minor; 161 + __u64 sb_magic; /* ..._SUPER_MAGIC */ 162 + __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */ 163 + __u32 fs_type; /* [str] Filesystem type */ 164 + __u64 mnt_id; /* Unique ID of mount */ 165 + __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */ 166 + __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */ 167 + __u32 mnt_parent_id_old; 168 + __u64 mnt_attr; /* MOUNT_ATTR_... */ 169 + __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */ 170 + __u64 mnt_peer_group; /* ID of shared peer group */ 171 + __u64 mnt_master; /* Mount receives propagation from this ID */ 172 + __u64 propagate_from; /* Propagation from in current namespace */ 173 + __u32 mnt_root; /* [str] Root of mount relative to root of fs */ 174 + __u32 mnt_point; /* [str] Mountpoint relative to current root */ 175 + __u64 __spare2[50]; 176 + char str[]; /* Variable size part containing strings */ 177 + }; 178 + 179 + /* 180 + * Structure for passing mount ID and miscellaneous parameters to statmount(2) 181 + * and listmount(2). 182 + * 183 + * For statmount(2) @param represents the request mask. 184 + * For listmount(2) @param represents the last listed mount id (or zero). 185 + */ 186 + struct mnt_id_req { 187 + __u32 size; 188 + __u32 spare; 189 + __u64 mnt_id; 190 + __u64 param; 191 + }; 192 + 193 + /* List of all mnt_id_req versions. */ 194 + #define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ 195 + 196 + /* 197 + * @mask bits for statmount(2) 198 + */ 199 + #define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */ 200 + #define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */ 201 + #define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */ 202 + #define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ 203 + #define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ 204 + #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ 205 + 206 + /* 207 + * Special @mnt_id values that can be passed to listmount 208 + */ 209 + #define LSMT_ROOT 0xffffffffffffffff /* root mount */ 210 + 141 211 #endif /* _UAPI_LINUX_MOUNT_H */
+1
tools/include/uapi/linux/stat.h
··· 154 154 #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ 155 155 #define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */ 156 156 #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ 157 + #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ 157 158 158 159 #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ 159 160
+4
tools/perf/Documentation/perf-list.txt
··· 47 47 --json:: 48 48 Output in JSON format. 49 49 50 + -o:: 51 + --output=:: 52 + Output file name. By default output is written to stdout. 53 + 50 54 [[EVENT_MODIFIERS]] 51 55 EVENT MODIFIERS 52 56 ---------------
+10
tools/perf/Makefile.perf
··· 236 236 SHELLCHECK := $(shell which shellcheck 2> /dev/null) 237 237 endif 238 238 239 + # shellcheck is using in tools/perf/tests/Build with option -a/--check-sourced ( 240 + # introduced in v0.4.7) and -S/--severity (introduced in v0.6.0). So make the 241 + # minimal shellcheck version as v0.6.0. 242 + ifneq ($(SHELLCHECK),) 243 + ifeq ($(shell expr $(shell $(SHELLCHECK) --version | grep version: | \ 244 + sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \< 060), 1) 245 + SHELLCHECK := 246 + endif 247 + endif 248 + 239 249 export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK 240 250 export HOSTCC HOSTLD HOSTAR HOSTCFLAGS SHELLCHECK 241 251
+129 -82
tools/perf/builtin-list.c
··· 30 30 * functions. 31 31 */ 32 32 struct print_state { 33 + /** @fp: File to write output to. */ 34 + FILE *fp; 33 35 /** 34 36 * @pmu_glob: Optionally restrict PMU and metric matching to PMU or 35 37 * debugfs subsystem name. ··· 68 66 { 69 67 struct print_state *print_state = ps; 70 68 71 - if (!print_state->name_only && pager_in_use()) 72 - printf("\nList of pre-defined events (to be used in -e or -M):\n\n"); 69 + if (!print_state->name_only && pager_in_use()) { 70 + fprintf(print_state->fp, 71 + "\nList of pre-defined events (to be used in -e or -M):\n\n"); 72 + } 73 73 } 74 74 75 75 static void default_print_end(void *print_state __maybe_unused) {} 76 76 77 - static void wordwrap(const char *s, int start, int max, int corr) 77 + static void wordwrap(FILE *fp, const char *s, int start, int max, int corr) 78 78 { 79 79 int column = start; 80 80 int n; ··· 86 82 int wlen = strcspn(s, " \t\n"); 87 83 88 84 if ((column + wlen >= max && column > start) || saw_newline) { 89 - printf("\n%*s", start, ""); 85 + fprintf(fp, "\n%*s", start, ""); 90 86 column = start + corr; 91 87 } 92 - n = printf("%s%.*s", column > start ? " " : "", wlen, s); 88 + n = fprintf(fp, "%s%.*s", column > start ? " " : "", wlen, s); 93 89 if (n <= 0) 94 90 break; 95 91 saw_newline = s[wlen] == '\n'; ··· 108 104 { 109 105 struct print_state *print_state = ps; 110 106 int pos; 107 + FILE *fp = print_state->fp; 111 108 112 109 if (deprecated && !print_state->deprecated) 113 110 return; ··· 124 119 125 120 if (print_state->name_only) { 126 121 if (event_alias && strlen(event_alias)) 127 - printf("%s ", event_alias); 122 + fprintf(fp, "%s ", event_alias); 128 123 else 129 - printf("%s ", event_name); 124 + fprintf(fp, "%s ", event_name); 130 125 return; 131 126 } 132 127 133 128 if (strcmp(print_state->last_topic, topic ?: "")) { 134 129 if (topic) 135 - printf("\n%s:\n", topic); 130 + fprintf(fp, "\n%s:\n", topic); 136 131 zfree(&print_state->last_topic); 137 132 print_state->last_topic = strdup(topic ?: ""); 138 133 } 139 134 140 135 if (event_alias && strlen(event_alias)) 141 - pos = printf(" %s OR %s", event_name, event_alias); 136 + pos = fprintf(fp, " %s OR %s", event_name, event_alias); 142 137 else 143 - pos = printf(" %s", event_name); 138 + pos = fprintf(fp, " %s", event_name); 144 139 145 140 if (!topic && event_type_desc) { 146 141 for (; pos < 53; pos++) 147 - putchar(' '); 148 - printf("[%s]\n", event_type_desc); 142 + fputc(' ', fp); 143 + fprintf(fp, "[%s]\n", event_type_desc); 149 144 } else 150 - putchar('\n'); 145 + fputc('\n', fp); 151 146 152 147 if (desc && print_state->desc) { 153 148 char *desc_with_unit = NULL; ··· 160 155 ? "%s. Unit: %s" : "%s Unit: %s", 161 156 desc, pmu_name); 162 157 } 163 - printf("%*s", 8, "["); 164 - wordwrap(desc_len > 0 ? desc_with_unit : desc, 8, pager_get_columns(), 0); 165 - printf("]\n"); 158 + fprintf(fp, "%*s", 8, "["); 159 + wordwrap(fp, desc_len > 0 ? desc_with_unit : desc, 8, pager_get_columns(), 0); 160 + fprintf(fp, "]\n"); 166 161 free(desc_with_unit); 167 162 } 168 163 long_desc = long_desc ?: desc; 169 164 if (long_desc && print_state->long_desc) { 170 - printf("%*s", 8, "["); 171 - wordwrap(long_desc, 8, pager_get_columns(), 0); 172 - printf("]\n"); 165 + fprintf(fp, "%*s", 8, "["); 166 + wordwrap(fp, long_desc, 8, pager_get_columns(), 0); 167 + fprintf(fp, "]\n"); 173 168 } 174 169 175 170 if (print_state->detailed && encoding_desc) { 176 - printf("%*s", 8, ""); 177 - wordwrap(encoding_desc, 8, pager_get_columns(), 0); 178 - putchar('\n'); 171 + fprintf(fp, "%*s", 8, ""); 172 + wordwrap(fp, encoding_desc, 8, pager_get_columns(), 0); 173 + fputc('\n', fp); 179 174 } 180 175 } 181 176 ··· 189 184 const char *unit __maybe_unused) 190 185 { 191 186 struct print_state *print_state = ps; 187 + FILE *fp = print_state->fp; 192 188 193 189 if (print_state->event_glob && 194 190 (!print_state->metrics || !name || !strglobmatch(name, print_state->event_glob)) && ··· 198 192 199 193 if (!print_state->name_only && !print_state->last_metricgroups) { 200 194 if (print_state->metricgroups) { 201 - printf("\nMetric Groups:\n"); 195 + fprintf(fp, "\nMetric Groups:\n"); 202 196 if (!print_state->metrics) 203 - putchar('\n'); 197 + fputc('\n', fp); 204 198 } else { 205 - printf("\nMetrics:\n\n"); 199 + fprintf(fp, "\nMetrics:\n\n"); 206 200 } 207 201 } 208 202 if (!print_state->last_metricgroups || 209 203 strcmp(print_state->last_metricgroups, group ?: "")) { 210 204 if (group && print_state->metricgroups) { 211 205 if (print_state->name_only) 212 - printf("%s ", group); 206 + fprintf(fp, "%s ", group); 213 207 else if (print_state->metrics) { 214 208 const char *gdesc = describe_metricgroup(group); 215 209 216 210 if (gdesc) 217 - printf("\n%s: [%s]\n", group, gdesc); 211 + fprintf(fp, "\n%s: [%s]\n", group, gdesc); 218 212 else 219 - printf("\n%s:\n", group); 213 + fprintf(fp, "\n%s:\n", group); 220 214 } else 221 - printf("%s\n", group); 215 + fprintf(fp, "%s\n", group); 222 216 } 223 217 zfree(&print_state->last_metricgroups); 224 218 print_state->last_metricgroups = strdup(group ?: ""); ··· 229 223 if (print_state->name_only) { 230 224 if (print_state->metrics && 231 225 !strlist__has_entry(print_state->visited_metrics, name)) { 232 - printf("%s ", name); 226 + fprintf(fp, "%s ", name); 233 227 strlist__add(print_state->visited_metrics, name); 234 228 } 235 229 return; 236 230 } 237 - printf(" %s\n", name); 231 + fprintf(fp, " %s\n", name); 238 232 239 233 if (desc && print_state->desc) { 240 - printf("%*s", 8, "["); 241 - wordwrap(desc, 8, pager_get_columns(), 0); 242 - printf("]\n"); 234 + fprintf(fp, "%*s", 8, "["); 235 + wordwrap(fp, desc, 8, pager_get_columns(), 0); 236 + fprintf(fp, "]\n"); 243 237 } 244 238 if (long_desc && print_state->long_desc) { 245 - printf("%*s", 8, "["); 246 - wordwrap(long_desc, 8, pager_get_columns(), 0); 247 - printf("]\n"); 239 + fprintf(fp, "%*s", 8, "["); 240 + wordwrap(fp, long_desc, 8, pager_get_columns(), 0); 241 + fprintf(fp, "]\n"); 248 242 } 249 243 if (expr && print_state->detailed) { 250 - printf("%*s", 8, "["); 251 - wordwrap(expr, 8, pager_get_columns(), 0); 252 - printf("]\n"); 244 + fprintf(fp, "%*s", 8, "["); 245 + wordwrap(fp, expr, 8, pager_get_columns(), 0); 246 + fprintf(fp, "]\n"); 253 247 } 254 248 if (threshold && print_state->detailed) { 255 - printf("%*s", 8, "["); 256 - wordwrap(threshold, 8, pager_get_columns(), 0); 257 - printf("]\n"); 249 + fprintf(fp, "%*s", 8, "["); 250 + wordwrap(fp, threshold, 8, pager_get_columns(), 0); 251 + fprintf(fp, "]\n"); 258 252 } 259 253 } 260 254 261 255 struct json_print_state { 256 + /** @fp: File to write output to. */ 257 + FILE *fp; 262 258 /** Should a separator be printed prior to the next item? */ 263 259 bool need_sep; 264 260 }; 265 261 266 - static void json_print_start(void *print_state __maybe_unused) 262 + static void json_print_start(void *ps) 267 263 { 268 - printf("[\n"); 264 + struct json_print_state *print_state = ps; 265 + FILE *fp = print_state->fp; 266 + 267 + fprintf(fp, "[\n"); 269 268 } 270 269 271 270 static void json_print_end(void *ps) 272 271 { 273 272 struct json_print_state *print_state = ps; 273 + FILE *fp = print_state->fp; 274 274 275 - printf("%s]\n", print_state->need_sep ? "\n" : ""); 275 + fprintf(fp, "%s]\n", print_state->need_sep ? "\n" : ""); 276 276 } 277 277 278 - static void fix_escape_printf(struct strbuf *buf, const char *fmt, ...) 278 + static void fix_escape_fprintf(FILE *fp, struct strbuf *buf, const char *fmt, ...) 279 279 { 280 280 va_list args; 281 281 ··· 330 318 } 331 319 } 332 320 va_end(args); 333 - fputs(buf->buf, stdout); 321 + fputs(buf->buf, fp); 334 322 } 335 323 336 324 static void json_print_event(void *ps, const char *pmu_name, const char *topic, ··· 342 330 { 343 331 struct json_print_state *print_state = ps; 344 332 bool need_sep = false; 333 + FILE *fp = print_state->fp; 345 334 struct strbuf buf; 346 335 347 336 strbuf_init(&buf, 0); 348 - printf("%s{\n", print_state->need_sep ? ",\n" : ""); 337 + fprintf(fp, "%s{\n", print_state->need_sep ? ",\n" : ""); 349 338 print_state->need_sep = true; 350 339 if (pmu_name) { 351 - fix_escape_printf(&buf, "\t\"Unit\": \"%S\"", pmu_name); 340 + fix_escape_fprintf(fp, &buf, "\t\"Unit\": \"%S\"", pmu_name); 352 341 need_sep = true; 353 342 } 354 343 if (topic) { 355 - fix_escape_printf(&buf, "%s\t\"Topic\": \"%S\"", need_sep ? ",\n" : "", topic); 344 + fix_escape_fprintf(fp, &buf, "%s\t\"Topic\": \"%S\"", 345 + need_sep ? ",\n" : "", 346 + topic); 356 347 need_sep = true; 357 348 } 358 349 if (event_name) { 359 - fix_escape_printf(&buf, "%s\t\"EventName\": \"%S\"", need_sep ? ",\n" : "", 360 - event_name); 350 + fix_escape_fprintf(fp, &buf, "%s\t\"EventName\": \"%S\"", 351 + need_sep ? ",\n" : "", 352 + event_name); 361 353 need_sep = true; 362 354 } 363 355 if (event_alias && strlen(event_alias)) { 364 - fix_escape_printf(&buf, "%s\t\"EventAlias\": \"%S\"", need_sep ? ",\n" : "", 365 - event_alias); 356 + fix_escape_fprintf(fp, &buf, "%s\t\"EventAlias\": \"%S\"", 357 + need_sep ? ",\n" : "", 358 + event_alias); 366 359 need_sep = true; 367 360 } 368 361 if (scale_unit && strlen(scale_unit)) { 369 - fix_escape_printf(&buf, "%s\t\"ScaleUnit\": \"%S\"", need_sep ? ",\n" : "", 370 - scale_unit); 362 + fix_escape_fprintf(fp, &buf, "%s\t\"ScaleUnit\": \"%S\"", 363 + need_sep ? ",\n" : "", 364 + scale_unit); 371 365 need_sep = true; 372 366 } 373 367 if (event_type_desc) { 374 - fix_escape_printf(&buf, "%s\t\"EventType\": \"%S\"", need_sep ? ",\n" : "", 375 - event_type_desc); 368 + fix_escape_fprintf(fp, &buf, "%s\t\"EventType\": \"%S\"", 369 + need_sep ? ",\n" : "", 370 + event_type_desc); 376 371 need_sep = true; 377 372 } 378 373 if (deprecated) { 379 - fix_escape_printf(&buf, "%s\t\"Deprecated\": \"%S\"", need_sep ? ",\n" : "", 380 - deprecated ? "1" : "0"); 374 + fix_escape_fprintf(fp, &buf, "%s\t\"Deprecated\": \"%S\"", 375 + need_sep ? ",\n" : "", 376 + deprecated ? "1" : "0"); 381 377 need_sep = true; 382 378 } 383 379 if (desc) { 384 - fix_escape_printf(&buf, "%s\t\"BriefDescription\": \"%S\"", need_sep ? ",\n" : "", 385 - desc); 380 + fix_escape_fprintf(fp, &buf, "%s\t\"BriefDescription\": \"%S\"", 381 + need_sep ? ",\n" : "", 382 + desc); 386 383 need_sep = true; 387 384 } 388 385 if (long_desc) { 389 - fix_escape_printf(&buf, "%s\t\"PublicDescription\": \"%S\"", need_sep ? ",\n" : "", 390 - long_desc); 386 + fix_escape_fprintf(fp, &buf, "%s\t\"PublicDescription\": \"%S\"", 387 + need_sep ? ",\n" : "", 388 + long_desc); 391 389 need_sep = true; 392 390 } 393 391 if (encoding_desc) { 394 - fix_escape_printf(&buf, "%s\t\"Encoding\": \"%S\"", need_sep ? ",\n" : "", 395 - encoding_desc); 392 + fix_escape_fprintf(fp, &buf, "%s\t\"Encoding\": \"%S\"", 393 + need_sep ? ",\n" : "", 394 + encoding_desc); 396 395 need_sep = true; 397 396 } 398 - printf("%s}", need_sep ? "\n" : ""); 397 + fprintf(fp, "%s}", need_sep ? "\n" : ""); 399 398 strbuf_release(&buf); 400 399 } 401 400 ··· 417 394 { 418 395 struct json_print_state *print_state = ps; 419 396 bool need_sep = false; 397 + FILE *fp = print_state->fp; 420 398 struct strbuf buf; 421 399 422 400 strbuf_init(&buf, 0); 423 - printf("%s{\n", print_state->need_sep ? ",\n" : ""); 401 + fprintf(fp, "%s{\n", print_state->need_sep ? ",\n" : ""); 424 402 print_state->need_sep = true; 425 403 if (group) { 426 - fix_escape_printf(&buf, "\t\"MetricGroup\": \"%S\"", group); 404 + fix_escape_fprintf(fp, &buf, "\t\"MetricGroup\": \"%S\"", group); 427 405 need_sep = true; 428 406 } 429 407 if (name) { 430 - fix_escape_printf(&buf, "%s\t\"MetricName\": \"%S\"", need_sep ? ",\n" : "", name); 408 + fix_escape_fprintf(fp, &buf, "%s\t\"MetricName\": \"%S\"", 409 + need_sep ? ",\n" : "", 410 + name); 431 411 need_sep = true; 432 412 } 433 413 if (expr) { 434 - fix_escape_printf(&buf, "%s\t\"MetricExpr\": \"%S\"", need_sep ? ",\n" : "", expr); 414 + fix_escape_fprintf(fp, &buf, "%s\t\"MetricExpr\": \"%S\"", 415 + need_sep ? ",\n" : "", 416 + expr); 435 417 need_sep = true; 436 418 } 437 419 if (threshold) { 438 - fix_escape_printf(&buf, "%s\t\"MetricThreshold\": \"%S\"", need_sep ? ",\n" : "", 439 - threshold); 420 + fix_escape_fprintf(fp, &buf, "%s\t\"MetricThreshold\": \"%S\"", 421 + need_sep ? ",\n" : "", 422 + threshold); 440 423 need_sep = true; 441 424 } 442 425 if (unit) { 443 - fix_escape_printf(&buf, "%s\t\"ScaleUnit\": \"%S\"", need_sep ? ",\n" : "", unit); 426 + fix_escape_fprintf(fp, &buf, "%s\t\"ScaleUnit\": \"%S\"", 427 + need_sep ? ",\n" : "", 428 + unit); 444 429 need_sep = true; 445 430 } 446 431 if (desc) { 447 - fix_escape_printf(&buf, "%s\t\"BriefDescription\": \"%S\"", need_sep ? ",\n" : "", 448 - desc); 432 + fix_escape_fprintf(fp, &buf, "%s\t\"BriefDescription\": \"%S\"", 433 + need_sep ? ",\n" : "", 434 + desc); 449 435 need_sep = true; 450 436 } 451 437 if (long_desc) { 452 - fix_escape_printf(&buf, "%s\t\"PublicDescription\": \"%S\"", need_sep ? ",\n" : "", 453 - long_desc); 438 + fix_escape_fprintf(fp, &buf, "%s\t\"PublicDescription\": \"%S\"", 439 + need_sep ? ",\n" : "", 440 + long_desc); 454 441 need_sep = true; 455 442 } 456 - printf("%s}", need_sep ? "\n" : ""); 443 + fprintf(fp, "%s}", need_sep ? "\n" : ""); 457 444 strbuf_release(&buf); 458 445 } 459 446 ··· 482 449 int cmd_list(int argc, const char **argv) 483 450 { 484 451 int i, ret = 0; 485 - struct print_state default_ps = {}; 486 - struct print_state json_ps = {}; 452 + struct print_state default_ps = { 453 + .fp = stdout, 454 + }; 455 + struct print_state json_ps = { 456 + .fp = stdout, 457 + }; 487 458 void *ps = &default_ps; 488 459 struct print_callbacks print_cb = { 489 460 .print_start = default_print_start, ··· 498 461 }; 499 462 const char *cputype = NULL; 500 463 const char *unit_name = NULL; 464 + const char *output_path = NULL; 501 465 bool json = false; 502 466 struct option list_options[] = { 503 467 OPT_BOOLEAN(0, "raw-dump", &default_ps.name_only, "Dump raw events"), ··· 509 471 "Print longer event descriptions."), 510 472 OPT_BOOLEAN(0, "details", &default_ps.detailed, 511 473 "Print information on the perf event names and expressions used internally by events."), 474 + OPT_STRING('o', "output", &output_path, "file", "output file name"), 512 475 OPT_BOOLEAN(0, "deprecated", &default_ps.deprecated, 513 476 "Print deprecated events."), 514 477 OPT_STRING(0, "cputype", &cputype, "cpu type", ··· 535 496 536 497 argc = parse_options(argc, argv, list_options, list_usage, 537 498 PARSE_OPT_STOP_AT_NON_OPTION); 499 + 500 + if (output_path) { 501 + default_ps.fp = fopen(output_path, "w"); 502 + json_ps.fp = default_ps.fp; 503 + } 538 504 539 505 setup_pager(); 540 506 ··· 662 618 free(default_ps.last_topic); 663 619 free(default_ps.last_metricgroups); 664 620 strlist__delete(default_ps.visited_metrics); 621 + if (output_path) 622 + fclose(default_ps.fp); 623 + 665 624 return ret; 666 625 }
+2 -2
tools/perf/builtin-record.c
··· 4080 4080 } 4081 4081 4082 4082 if (rec->switch_output.num_files) { 4083 - rec->switch_output.filenames = calloc(sizeof(char *), 4084 - rec->switch_output.num_files); 4083 + rec->switch_output.filenames = calloc(rec->switch_output.num_files, 4084 + sizeof(char *)); 4085 4085 if (!rec->switch_output.filenames) { 4086 4086 err = -EINVAL; 4087 4087 goto out_opts;
+1 -1
tools/perf/builtin-top.c
··· 357 357 358 358 static void prompt_integer(int *target, const char *msg) 359 359 { 360 - char *buf = malloc(0), *p; 360 + char *buf = NULL, *p; 361 361 size_t dummy = 0; 362 362 int tmp; 363 363
+118 -136
tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
··· 114 114 }, 115 115 { 116 116 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions.", 117 - "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / tma_info_core_slots", 117 + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS@ / tma_info_core_slots", 118 118 "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", 119 119 "MetricName": "tma_alloc_restriction", 120 120 "MetricThreshold": "tma_alloc_restriction > 0.1", ··· 124 124 { 125 125 "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", 126 126 "DefaultMetricgroupName": "TopdownL1", 127 - "MetricExpr": "TOPDOWN_BE_BOUND.ALL / tma_info_core_slots", 127 + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALL@ / tma_info_core_slots", 128 128 "MetricGroup": "Default;TopdownL1;tma_L1_group", 129 129 "MetricName": "tma_backend_bound", 130 130 "MetricThreshold": "tma_backend_bound > 0.1", ··· 169 169 }, 170 170 { 171 171 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend", 172 - "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / tma_info_core_slots", 172 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_DETECT@ / tma_info_core_slots", 173 173 "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", 174 174 "MetricName": "tma_branch_detect", 175 175 "MetricThreshold": "tma_branch_detect > 0.05", ··· 179 179 }, 180 180 { 181 181 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts.", 182 - "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / tma_info_core_slots", 182 + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MISPREDICT@ / tma_info_core_slots", 183 183 "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", 184 184 "MetricName": "tma_branch_mispredicts", 185 185 "MetricThreshold": "tma_branch_mispredicts > 0.05", ··· 189 189 }, 190 190 { 191 191 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.", 192 - "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / tma_info_core_slots", 192 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_RESTEER@ / tma_info_core_slots", 193 193 "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", 194 194 "MetricName": "tma_branch_resteer", 195 195 "MetricThreshold": "tma_branch_resteer > 0.05", ··· 198 198 }, 199 199 { 200 200 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).", 201 - "MetricExpr": "TOPDOWN_FE_BOUND.CISC / tma_info_core_slots", 201 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.CISC@ / tma_info_core_slots", 202 202 "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", 203 203 "MetricName": "tma_cisc", 204 204 "MetricThreshold": "tma_cisc > 0.05", ··· 217 217 }, 218 218 { 219 219 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.", 220 - "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / tma_info_core_slots", 220 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.DECODE@ / tma_info_core_slots", 221 221 "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", 222 222 "MetricName": "tma_decode", 223 223 "MetricThreshold": "tma_decode > 0.05", ··· 235 235 }, 236 236 { 237 237 "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", 238 - "MetricConstraint": "NO_GROUP_EVENTS", 239 238 "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@", 240 239 "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", 241 240 "MetricName": "tma_dram_bound", ··· 244 245 }, 245 246 { 246 247 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear classified as a fast nuke due to memory ordering, memory disambiguation and memory renaming.", 247 - "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / tma_info_core_slots", 248 + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.FASTNUKE@ / tma_info_core_slots", 248 249 "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group", 249 250 "MetricName": "tma_fast_nuke", 250 251 "MetricThreshold": "tma_fast_nuke > 0.05", ··· 253 254 }, 254 255 { 255 256 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", 256 - "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / tma_info_core_slots", 257 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH@ / tma_info_core_slots", 257 258 "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", 258 259 "MetricName": "tma_fetch_bandwidth", 259 260 "MetricThreshold": "tma_fetch_bandwidth > 0.1", ··· 263 264 }, 264 265 { 265 266 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", 266 - "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / tma_info_core_slots", 267 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_LATENCY@ / tma_info_core_slots", 267 268 "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", 268 269 "MetricName": "tma_fetch_latency", 269 270 "MetricThreshold": "tma_fetch_latency > 0.15", ··· 282 283 }, 283 284 { 284 285 "BriefDescription": "Counts the number of floating point divide operations per uop.", 285 - "MetricExpr": "UOPS_RETIRED.FPDIV / tma_info_core_slots", 286 + "MetricExpr": "cpu_atom@UOPS_RETIRED.FPDIV@ / tma_info_core_slots", 286 287 "MetricGroup": "TopdownL3;tma_L3_group;tma_base_group", 287 288 "MetricName": "tma_fpdiv_uops", 288 289 "MetricThreshold": "tma_fpdiv_uops > 0.2", ··· 292 293 { 293 294 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.", 294 295 "DefaultMetricgroupName": "TopdownL1", 295 - "MetricExpr": "TOPDOWN_FE_BOUND.ALL / tma_info_core_slots", 296 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ALL@ / tma_info_core_slots", 296 297 "MetricGroup": "Default;TopdownL1;tma_L1_group", 297 298 "MetricName": "tma_frontend_bound", 298 299 "MetricThreshold": "tma_frontend_bound > 0.2", ··· 302 303 }, 303 304 { 304 305 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.", 305 - "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / tma_info_core_slots", 306 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ICACHE@ / tma_info_core_slots", 306 307 "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", 307 308 "MetricName": "tma_icache_misses", 308 309 "MetricThreshold": "tma_icache_misses > 0.05", ··· 329 330 }, 330 331 { 331 332 "BriefDescription": "Instructions Per Cycle", 332 - "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks", 333 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / tma_info_core_clks", 333 334 "MetricName": "tma_info_core_ipc", 334 335 "Unit": "cpu_atom" 335 336 }, ··· 341 342 }, 342 343 { 343 344 "BriefDescription": "Uops Per Instruction", 344 - "MetricExpr": "UOPS_RETIRED.ALL / INST_RETIRED.ANY", 345 + "MetricExpr": "cpu_atom@UOPS_RETIRED.ALL@ / INST_RETIRED.ANY", 345 346 "MetricName": "tma_info_core_upi", 346 347 "Unit": "cpu_atom" 347 348 }, ··· 365 366 }, 366 367 { 367 368 "BriefDescription": "Ratio of all branches which mispredict", 368 - "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES", 369 + "MetricExpr": "cpu_atom@BR_MISP_RETIRED.ALL_BRANCHES@ / BR_INST_RETIRED.ALL_BRANCHES", 369 370 "MetricName": "tma_info_inst_mix_branch_mispredict_ratio", 370 371 "Unit": "cpu_atom" 371 372 }, 372 373 { 373 374 "BriefDescription": "Ratio between Mispredicted branches and unknown branches", 374 - "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY", 375 + "MetricExpr": "cpu_atom@BR_MISP_RETIRED.ALL_BRANCHES@ / BACLEARS.ANY", 375 376 "MetricName": "tma_info_inst_mix_branch_mispredict_to_unknown_branch_ratio", 376 377 "Unit": "cpu_atom" 377 378 }, ··· 389 390 }, 390 391 { 391 392 "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", 392 - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", 393 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_INST_RETIRED.ALL_BRANCHES", 393 394 "MetricName": "tma_info_inst_mix_ipbranch", 394 395 "Unit": "cpu_atom" 395 396 }, 396 397 { 397 398 "BriefDescription": "Instruction per (near) call (lower number means higher occurrence rate)", 398 - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL", 399 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_INST_RETIRED.CALL", 399 400 "MetricName": "tma_info_inst_mix_ipcall", 400 401 "Unit": "cpu_atom" 401 402 }, 402 403 { 403 404 "BriefDescription": "Instructions per Far Branch", 404 - "MetricExpr": "INST_RETIRED.ANY / (cpu_atom@BR_INST_RETIRED.FAR_BRANCH@ / 2)", 405 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / (cpu_atom@BR_INST_RETIRED.FAR_BRANCH@ / 2)", 405 406 "MetricName": "tma_info_inst_mix_ipfarbranch", 406 407 "Unit": "cpu_atom" 407 408 }, 408 409 { 409 410 "BriefDescription": "Instructions per Load", 410 - "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS", 411 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / MEM_UOPS_RETIRED.ALL_LOADS", 411 412 "MetricName": "tma_info_inst_mix_ipload", 412 413 "Unit": "cpu_atom" 413 414 }, 414 415 { 415 416 "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was not taken", 416 - "MetricExpr": "INST_RETIRED.ANY / (cpu_atom@BR_MISP_RETIRED.COND@ - cpu_atom@BR_MISP_RETIRED.COND_TAKEN@)", 417 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / (cpu_atom@BR_MISP_RETIRED.COND@ - cpu_atom@BR_MISP_RETIRED.COND_TAKEN@)", 417 418 "MetricName": "tma_info_inst_mix_ipmisp_cond_ntaken", 418 419 "Unit": "cpu_atom" 419 420 }, 420 421 { 421 422 "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was taken", 422 - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN", 423 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_TAKEN", 423 424 "MetricName": "tma_info_inst_mix_ipmisp_cond_taken", 424 425 "Unit": "cpu_atom" 425 426 }, 426 427 { 427 428 "BriefDescription": "Instructions per retired indirect call or jump Branch Misprediction", 428 - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT", 429 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.INDIRECT", 429 430 "MetricName": "tma_info_inst_mix_ipmisp_indirect", 430 431 "Unit": "cpu_atom" 431 432 }, 432 433 { 433 434 "BriefDescription": "Instructions per retired return Branch Misprediction", 434 - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RETURN", 435 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.RETURN", 435 436 "MetricName": "tma_info_inst_mix_ipmisp_ret", 436 437 "Unit": "cpu_atom" 437 438 }, 438 439 { 439 440 "BriefDescription": "Instructions per retired Branch Misprediction", 440 - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", 441 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.ALL_BRANCHES", 441 442 "MetricName": "tma_info_inst_mix_ipmispredict", 442 443 "Unit": "cpu_atom" 443 444 }, 444 445 { 445 446 "BriefDescription": "Instructions per Store", 446 - "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES", 447 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / MEM_UOPS_RETIRED.ALL_STORES", 447 448 "MetricName": "tma_info_inst_mix_ipstore", 448 449 "Unit": "cpu_atom" 449 450 }, ··· 479 480 }, 480 481 { 481 482 "BriefDescription": "Cycle cost per DRAM hit", 482 - "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_LOAD_UOPS_RETIRED.DRAM_HIT", 483 + "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / MEM_LOAD_UOPS_RETIRED.DRAM_HIT", 483 484 "MetricName": "tma_info_memory_cycles_per_demand_load_dram_hit", 484 485 "Unit": "cpu_atom" 485 486 }, 486 487 { 487 488 "BriefDescription": "Cycle cost per L2 hit", 488 - "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_LOAD_UOPS_RETIRED.L2_HIT", 489 + "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / MEM_LOAD_UOPS_RETIRED.L2_HIT", 489 490 "MetricName": "tma_info_memory_cycles_per_demand_load_l2_hit", 490 491 "Unit": "cpu_atom" 491 492 }, 492 493 { 493 494 "BriefDescription": "Cycle cost per LLC hit", 494 - "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_LOAD_UOPS_RETIRED.L3_HIT", 495 + "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / MEM_LOAD_UOPS_RETIRED.L3_HIT", 495 496 "MetricName": "tma_info_memory_cycles_per_demand_load_l3_hit", 496 497 "Unit": "cpu_atom" 497 498 }, ··· 503 504 }, 504 505 { 505 506 "BriefDescription": "Average CPU Utilization", 506 - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC", 507 + "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / TSC", 507 508 "MetricName": "tma_info_system_cpu_utilization", 508 509 "Unit": "cpu_atom" 509 510 }, ··· 523 524 }, 524 525 { 525 526 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.", 526 - "MetricExpr": "TOPDOWN_FE_BOUND.ITLB / tma_info_core_slots", 527 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ITLB@ / tma_info_core_slots", 527 528 "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", 528 529 "MetricName": "tma_itlb_misses", 529 530 "MetricThreshold": "tma_itlb_misses > 0.05", ··· 532 533 }, 533 534 { 534 535 "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a load block.", 535 - "MetricExpr": "LD_HEAD.L1_BOUND_AT_RET / tma_info_core_clks", 536 + "MetricExpr": "cpu_atom@LD_HEAD.L1_BOUND_AT_RET@ / tma_info_core_clks", 536 537 "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", 537 538 "MetricName": "tma_l1_bound", 538 539 "MetricThreshold": "tma_l1_bound > 0.1", ··· 541 542 }, 542 543 { 543 544 "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.", 544 - "MetricConstraint": "NO_GROUP_EVENTS", 545 545 "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@", 546 546 "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", 547 547 "MetricName": "tma_l2_bound", ··· 550 552 }, 551 553 { 552 554 "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", 553 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 554 555 "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@", 555 556 "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", 556 557 "MetricName": "tma_l3_bound", ··· 568 571 }, 569 572 { 570 573 "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.", 571 - "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / tma_info_core_slots", 574 + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS@ / tma_info_core_slots", 572 575 "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", 573 576 "MetricName": "tma_machine_clears", 574 577 "MetricThreshold": "tma_machine_clears > 0.05", ··· 578 581 }, 579 582 { 580 583 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.", 581 - "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / tma_info_core_slots", 584 + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.MEM_SCHEDULER@ / tma_info_core_slots", 582 585 "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", 583 586 "MetricName": "tma_mem_scheduler", 584 587 "MetricThreshold": "tma_mem_scheduler > 0.1", ··· 587 590 }, 588 591 { 589 592 "BriefDescription": "Counts the number of cycles the core is stalled due to stores or loads.", 590 - "MetricExpr": "min(cpu_atom@TOPDOWN_BE_BOUND.ALL@ / tma_info_core_slots, cpu_atom@LD_HEAD.ANY_AT_RET@ / tma_info_core_clks + tma_store_bound)", 593 + "MetricExpr": "min(tma_backend_bound, cpu_atom@LD_HEAD.ANY_AT_RET@ / tma_info_core_clks + tma_store_bound)", 591 594 "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group", 592 595 "MetricName": "tma_memory_bound", 593 596 "MetricThreshold": "tma_memory_bound > 0.2", ··· 606 609 }, 607 610 { 608 611 "BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS)", 609 - "MetricExpr": "UOPS_RETIRED.MS / tma_info_core_slots", 612 + "MetricExpr": "cpu_atom@UOPS_RETIRED.MS@ / tma_info_core_slots", 610 613 "MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group", 611 614 "MetricName": "tma_ms_uops", 612 615 "MetricThreshold": "tma_ms_uops > 0.05", ··· 617 620 }, 618 621 { 619 622 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.", 620 - "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / tma_info_core_slots", 623 + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER@ / tma_info_core_slots", 621 624 "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", 622 625 "MetricName": "tma_non_mem_scheduler", 623 626 "MetricThreshold": "tma_non_mem_scheduler > 0.1", ··· 626 629 }, 627 630 { 628 631 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear (slow nuke).", 629 - "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / tma_info_core_slots", 632 + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.NUKE@ / tma_info_core_slots", 630 633 "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group", 631 634 "MetricName": "tma_nuke", 632 635 "MetricThreshold": "tma_nuke > 0.05", ··· 635 638 }, 636 639 { 637 640 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.", 638 - "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / tma_info_core_slots", 641 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.OTHER@ / tma_info_core_slots", 639 642 "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", 640 643 "MetricName": "tma_other_fb", 641 644 "MetricThreshold": "tma_other_fb > 0.05", ··· 644 647 }, 645 648 { 646 649 "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a number of other load blocks.", 647 - "MetricExpr": "LD_HEAD.OTHER_AT_RET / tma_info_core_clks", 650 + "MetricExpr": "cpu_atom@LD_HEAD.OTHER_AT_RET@ / tma_info_core_clks", 648 651 "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", 649 652 "MetricName": "tma_other_l1", 650 653 "MetricThreshold": "tma_other_l1 > 0.05", ··· 680 683 }, 681 684 { 682 685 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.", 683 - "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / tma_info_core_slots", 686 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.PREDECODE@ / tma_info_core_slots", 684 687 "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", 685 688 "MetricName": "tma_predecode", 686 689 "MetricThreshold": "tma_predecode > 0.05", ··· 689 692 }, 690 693 { 691 694 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).", 692 - "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / tma_info_core_slots", 695 + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REGISTER@ / tma_info_core_slots", 693 696 "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", 694 697 "MetricName": "tma_register", 695 698 "MetricThreshold": "tma_register > 0.1", ··· 698 701 }, 699 702 { 700 703 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls).", 701 - "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / tma_info_core_slots", 704 + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REORDER_BUFFER@ / tma_info_core_slots", 702 705 "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", 703 706 "MetricName": "tma_reorder_buffer", 704 707 "MetricThreshold": "tma_reorder_buffer > 0.1", ··· 719 722 { 720 723 "BriefDescription": "Counts the number of issue slots that result in retirement slots.", 721 724 "DefaultMetricgroupName": "TopdownL1", 722 - "MetricExpr": "TOPDOWN_RETIRING.ALL / tma_info_core_slots", 725 + "MetricExpr": "cpu_atom@TOPDOWN_RETIRING.ALL@ / tma_info_core_slots", 723 726 "MetricGroup": "Default;TopdownL1;tma_L1_group", 724 727 "MetricName": "tma_retiring", 725 728 "MetricThreshold": "tma_retiring > 0.75", ··· 738 741 }, 739 742 { 740 743 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).", 741 - "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / tma_info_core_slots", 744 + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.SERIALIZATION@ / tma_info_core_slots", 742 745 "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", 743 746 "MetricName": "tma_serialization", 744 747 "MetricThreshold": "tma_serialization > 0.1", ··· 765 768 }, 766 769 { 767 770 "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a first level TLB miss.", 768 - "MetricExpr": "LD_HEAD.DTLB_MISS_AT_RET / tma_info_core_clks", 771 + "MetricExpr": "cpu_atom@LD_HEAD.DTLB_MISS_AT_RET@ / tma_info_core_clks", 769 772 "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", 770 773 "MetricName": "tma_stlb_hit", 771 774 "MetricThreshold": "tma_stlb_hit > 0.05", ··· 774 777 }, 775 778 { 776 779 "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a second level TLB miss requiring a page walk.", 777 - "MetricExpr": "LD_HEAD.PGWALK_AT_RET / tma_info_core_clks", 780 + "MetricExpr": "cpu_atom@LD_HEAD.PGWALK_AT_RET@ / tma_info_core_clks", 778 781 "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", 779 782 "MetricName": "tma_stlb_miss", 780 783 "MetricThreshold": "tma_stlb_miss > 0.05", ··· 792 795 }, 793 796 { 794 797 "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.", 795 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 796 - "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks", 798 + "MetricExpr": "cpu_atom@LD_HEAD.ST_ADDR_AT_RET@ / tma_info_core_clks", 797 799 "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", 798 800 "MetricName": "tma_store_fwd_blk", 799 801 "MetricThreshold": "tma_store_fwd_blk > 0.05", ··· 871 875 }, 872 876 { 873 877 "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers", 874 - "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks + tma_unknown_branches", 878 + "MetricExpr": "cpu_core@INT_MISC.CLEAR_RESTEER_CYCLES@ / tma_info_thread_clks + tma_unknown_branches", 875 879 "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group", 876 880 "MetricName": "tma_branch_resteers", 877 881 "MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", ··· 901 905 }, 902 906 { 903 907 "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses", 904 - "MetricConstraint": "NO_GROUP_EVENTS", 905 908 "MetricExpr": "(25 * tma_info_system_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) + 24 * tma_info_system_average_frequency * cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks", 906 909 "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", 907 910 "MetricName": "tma_contested_accesses", ··· 922 927 }, 923 928 { 924 929 "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses", 925 - "MetricConstraint": "NO_GROUP_EVENTS", 926 930 "MetricExpr": "24 * tma_info_system_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD@ + cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (1 - cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks", 927 931 "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", 928 932 "MetricName": "tma_data_sharing", ··· 942 948 }, 943 949 { 944 950 "BriefDescription": "This metric represents fraction of cycles where the Divider unit was active", 945 - "MetricExpr": "ARITH.DIV_ACTIVE / tma_info_thread_clks", 951 + "MetricExpr": "cpu_core@ARITH.DIV_ACTIVE@ / tma_info_thread_clks", 946 952 "MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group", 947 953 "MetricName": "tma_divider", 948 954 "MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)", ··· 952 958 }, 953 959 { 954 960 "BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads", 955 - "MetricConstraint": "NO_GROUP_EVENTS", 956 961 "MetricExpr": "cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@ / tma_info_thread_clks", 957 962 "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", 958 963 "MetricName": "tma_dram_bound", ··· 972 979 }, 973 980 { 974 981 "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines", 975 - "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks", 982 + "MetricExpr": "cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES@ / tma_info_thread_clks", 976 983 "MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB", 977 984 "MetricName": "tma_dsb_switches", 978 985 "MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", ··· 1012 1019 }, 1013 1020 { 1014 1021 "BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed", 1015 - "MetricExpr": "L1D_PEND_MISS.FB_FULL / tma_info_thread_clks", 1022 + "MetricExpr": "cpu_core@L1D_PEND_MISS.FB_FULL@ / tma_info_thread_clks", 1016 1023 "MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group", 1017 1024 "MetricName": "tma_fb_full", 1018 1025 "MetricThreshold": "tma_fb_full > 0.3", ··· 1147 1154 }, 1148 1155 { 1149 1156 "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses", 1150 - "MetricExpr": "ICACHE_DATA.STALLS / tma_info_thread_clks", 1157 + "MetricExpr": "cpu_core@ICACHE_DATA.STALLS@ / tma_info_thread_clks", 1151 1158 "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group", 1152 1159 "MetricName": "tma_icache_misses", 1153 1160 "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", ··· 1157 1164 }, 1158 1165 { 1159 1166 "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", 1160 - "MetricConstraint": "NO_GROUP_EVENTS", 1161 1167 "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES", 1162 1168 "MetricGroup": "Bad;BrMispredicts;tma_issueBM", 1163 1169 "MetricName": "tma_info_bad_spec_branch_misprediction_cost", ··· 1165 1173 }, 1166 1174 { 1167 1175 "BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).", 1168 - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_NTAKEN", 1176 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_NTAKEN", 1169 1177 "MetricGroup": "Bad;BrMispredicts", 1170 1178 "MetricName": "tma_info_bad_spec_ipmisp_cond_ntaken", 1171 1179 "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_ntaken < 200", ··· 1173 1181 }, 1174 1182 { 1175 1183 "BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).", 1176 - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN", 1184 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_TAKEN", 1177 1185 "MetricGroup": "Bad;BrMispredicts", 1178 1186 "MetricName": "tma_info_bad_spec_ipmisp_cond_taken", 1179 1187 "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_taken < 200", ··· 1189 1197 }, 1190 1198 { 1191 1199 "BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).", 1192 - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RET", 1200 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.RET", 1193 1201 "MetricGroup": "Bad;BrMispredicts", 1194 1202 "MetricName": "tma_info_bad_spec_ipmisp_ret", 1195 1203 "MetricThreshold": "tma_info_bad_spec_ipmisp_ret < 500", ··· 1197 1205 }, 1198 1206 { 1199 1207 "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)", 1200 - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", 1208 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.ALL_BRANCHES", 1201 1209 "MetricGroup": "Bad;BadSpec;BrMispredicts", 1202 1210 "MetricName": "tma_info_bad_spec_ipmispredict", 1203 1211 "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200", ··· 1205 1213 }, 1206 1214 { 1207 1215 "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts", 1208 - "MetricConstraint": "NO_GROUP_EVENTS", 1209 1216 "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)", 1210 1217 "MetricGroup": "Cor;SMT", 1211 1218 "MetricName": "tma_info_botlnk_l0_core_bound_likely", ··· 1213 1222 }, 1214 1223 { 1215 1224 "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck", 1216 - "MetricConstraint": "NO_GROUP_EVENTS", 1217 1225 "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))", 1218 1226 "MetricGroup": "DSBmiss;Fed;tma_issueFB", 1219 1227 "MetricName": "tma_info_botlnk_l2_dsb_misses", ··· 1222 1232 }, 1223 1233 { 1224 1234 "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck", 1225 - "MetricConstraint": "NO_GROUP_EVENTS", 1226 1235 "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", 1227 1236 "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL", 1228 1237 "MetricName": "tma_info_botlnk_l2_ic_misses", ··· 1231 1242 }, 1232 1243 { 1233 1244 "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)", 1234 - "MetricConstraint": "NO_GROUP_EVENTS", 1235 1245 "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)", 1236 1246 "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC", 1237 1247 "MetricName": "tma_info_bottleneck_big_code", ··· 1249 1261 }, 1250 1262 { 1251 1263 "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", 1252 - "MetricConstraint": "NO_GROUP_EVENTS", 1253 1264 "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code", 1254 1265 "MetricGroup": "Fed;FetchBW;Frontend", 1255 1266 "MetricName": "tma_info_bottleneck_instruction_fetch_bw", ··· 1257 1270 }, 1258 1271 { 1259 1272 "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", 1260 - "MetricConstraint": "NO_GROUP_EVENTS", 1261 1273 "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))", 1262 1274 "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW", 1263 1275 "MetricName": "tma_info_bottleneck_memory_bandwidth", ··· 1266 1280 }, 1267 1281 { 1268 1282 "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", 1269 - "MetricConstraint": "NO_GROUP_EVENTS", 1270 1283 "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))", 1271 1284 "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB", 1272 1285 "MetricName": "tma_info_bottleneck_memory_data_tlbs", ··· 1275 1290 }, 1276 1291 { 1277 1292 "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", 1278 - "MetricConstraint": "NO_GROUP_EVENTS", 1279 1293 "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))", 1280 1294 "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat", 1281 1295 "MetricName": "tma_info_bottleneck_memory_latency", ··· 1284 1300 }, 1285 1301 { 1286 1302 "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", 1287 - "MetricConstraint": "NO_GROUP_EVENTS", 1288 1303 "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", 1289 1304 "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM", 1290 1305 "MetricName": "tma_info_bottleneck_mispredictions", ··· 1300 1317 }, 1301 1318 { 1302 1319 "BriefDescription": "Fraction of branches that are non-taken conditionals", 1303 - "MetricExpr": "BR_INST_RETIRED.COND_NTAKEN / BR_INST_RETIRED.ALL_BRANCHES", 1320 + "MetricExpr": "cpu_core@BR_INST_RETIRED.COND_NTAKEN@ / BR_INST_RETIRED.ALL_BRANCHES", 1304 1321 "MetricGroup": "Bad;Branches;CodeGen;PGO", 1305 1322 "MetricName": "tma_info_branches_cond_nt", 1306 1323 "Unit": "cpu_core" 1307 1324 }, 1308 1325 { 1309 1326 "BriefDescription": "Fraction of branches that are taken conditionals", 1310 - "MetricExpr": "BR_INST_RETIRED.COND_TAKEN / BR_INST_RETIRED.ALL_BRANCHES", 1327 + "MetricExpr": "cpu_core@BR_INST_RETIRED.COND_TAKEN@ / BR_INST_RETIRED.ALL_BRANCHES", 1311 1328 "MetricGroup": "Bad;Branches;CodeGen;PGO", 1312 1329 "MetricName": "tma_info_branches_cond_tk", 1313 1330 "Unit": "cpu_core" ··· 1335 1352 }, 1336 1353 { 1337 1354 "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)", 1338 - "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks", 1355 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / tma_info_core_core_clks", 1339 1356 "MetricGroup": "Ret;SMT;TmaL1;tma_L1_group", 1340 1357 "MetricName": "tma_info_core_coreipc", 1341 1358 "Unit": "cpu_core" ··· 1357 1374 }, 1358 1375 { 1359 1376 "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core", 1360 - "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@)", 1377 + "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@)", 1361 1378 "MetricGroup": "Backend;Cor;Pipeline;PortsUtil", 1362 1379 "MetricName": "tma_info_core_ilp", 1363 1380 "Unit": "cpu_core" 1364 1381 }, 1365 1382 { 1366 1383 "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", 1367 - "MetricExpr": "IDQ.DSB_UOPS / cpu_core@UOPS_ISSUED.ANY@", 1384 + "MetricExpr": "cpu_core@IDQ.DSB_UOPS@ / cpu_core@UOPS_ISSUED.ANY@", 1368 1385 "MetricGroup": "DSB;Fed;FetchBW;tma_issueFB", 1369 1386 "MetricName": "tma_info_frontend_dsb_coverage", 1370 1387 "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 6 > 0.35", ··· 1373 1390 }, 1374 1391 { 1375 1392 "BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.", 1376 - "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@", 1393 + "MetricExpr": "cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES@ / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@", 1377 1394 "MetricGroup": "DSBmiss", 1378 1395 "MetricName": "tma_info_frontend_dsb_switch_cost", 1379 1396 "Unit": "cpu_core" 1380 1397 }, 1381 1398 { 1382 1399 "BriefDescription": "Average number of Uops issued by front-end when it issued something", 1383 - "MetricExpr": "UOPS_ISSUED.ANY / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=1@", 1400 + "MetricExpr": "cpu_core@UOPS_ISSUED.ANY@ / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=1@", 1384 1401 "MetricGroup": "Fed;FetchBW", 1385 1402 "MetricName": "tma_info_frontend_fetch_upc", 1386 1403 "Unit": "cpu_core" 1387 1404 }, 1388 1405 { 1389 1406 "BriefDescription": "Average Latency for L1 instruction cache misses", 1390 - "MetricExpr": "ICACHE_DATA.STALLS / cpu_core@ICACHE_DATA.STALLS\\,cmask\\=1\\,edge@", 1407 + "MetricExpr": "cpu_core@ICACHE_DATA.STALLS@ / cpu_core@ICACHE_DATA.STALLS\\,cmask\\=1\\,edge@", 1391 1408 "MetricGroup": "Fed;FetchLat;IcMiss", 1392 1409 "MetricName": "tma_info_frontend_icache_miss_latency", 1393 1410 "Unit": "cpu_core" 1394 1411 }, 1395 1412 { 1396 1413 "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)", 1397 - "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS", 1414 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FRONTEND_RETIRED.ANY_DSB_MISS", 1398 1415 "MetricGroup": "DSBmiss;Fed", 1399 1416 "MetricName": "tma_info_frontend_ipdsb_miss_ret", 1400 1417 "MetricThreshold": "tma_info_frontend_ipdsb_miss_ret < 50", ··· 1423 1440 }, 1424 1441 { 1425 1442 "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)", 1426 - "MetricExpr": "LSD.UOPS / cpu_core@UOPS_ISSUED.ANY@", 1443 + "MetricExpr": "cpu_core@LSD.UOPS@ / cpu_core@UOPS_ISSUED.ANY@", 1427 1444 "MetricGroup": "Fed;LSD", 1428 1445 "MetricName": "tma_info_frontend_lsd_coverage", 1429 1446 "Unit": "cpu_core" 1430 1447 }, 1431 1448 { 1432 1449 "BriefDescription": "Branch instructions per taken branch.", 1433 - "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", 1450 + "MetricExpr": "cpu_core@BR_INST_RETIRED.ALL_BRANCHES@ / BR_INST_RETIRED.NEAR_TAKEN", 1434 1451 "MetricGroup": "Branches;Fed;PGO", 1435 1452 "MetricName": "tma_info_inst_mix_bptkbranch", 1436 1453 "Unit": "cpu_core" ··· 1445 1462 }, 1446 1463 { 1447 1464 "BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)", 1448 - "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@)", 1465 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@)", 1449 1466 "MetricGroup": "Flops;InsType", 1450 1467 "MetricName": "tma_info_inst_mix_iparith", 1451 1468 "MetricThreshold": "tma_info_inst_mix_iparith < 10", ··· 1454 1471 }, 1455 1472 { 1456 1473 "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)", 1457 - "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)", 1474 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)", 1458 1475 "MetricGroup": "Flops;FpVector;InsType", 1459 1476 "MetricName": "tma_info_inst_mix_iparith_avx128", 1460 1477 "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10", ··· 1463 1480 }, 1464 1481 { 1465 1482 "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)", 1466 - "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)", 1483 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)", 1467 1484 "MetricGroup": "Flops;FpVector;InsType", 1468 1485 "MetricName": "tma_info_inst_mix_iparith_avx256", 1469 1486 "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10", ··· 1472 1489 }, 1473 1490 { 1474 1491 "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)", 1475 - "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", 1492 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", 1476 1493 "MetricGroup": "Flops;FpScalar;InsType", 1477 1494 "MetricName": "tma_info_inst_mix_iparith_scalar_dp", 1478 1495 "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10", ··· 1481 1498 }, 1482 1499 { 1483 1500 "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)", 1484 - "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE", 1501 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FP_ARITH_INST_RETIRED.SCALAR_SINGLE", 1485 1502 "MetricGroup": "Flops;FpScalar;InsType", 1486 1503 "MetricName": "tma_info_inst_mix_iparith_scalar_sp", 1487 1504 "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10", ··· 1490 1507 }, 1491 1508 { 1492 1509 "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", 1493 - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", 1510 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.ALL_BRANCHES", 1494 1511 "MetricGroup": "Branches;Fed;InsType", 1495 1512 "MetricName": "tma_info_inst_mix_ipbranch", 1496 1513 "MetricThreshold": "tma_info_inst_mix_ipbranch < 8", ··· 1498 1515 }, 1499 1516 { 1500 1517 "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)", 1501 - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", 1518 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.NEAR_CALL", 1502 1519 "MetricGroup": "Branches;Fed;PGO", 1503 1520 "MetricName": "tma_info_inst_mix_ipcall", 1504 1521 "MetricThreshold": "tma_info_inst_mix_ipcall < 200", ··· 1506 1523 }, 1507 1524 { 1508 1525 "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)", 1509 - "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)", 1526 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)", 1510 1527 "MetricGroup": "Flops;InsType", 1511 1528 "MetricName": "tma_info_inst_mix_ipflop", 1512 1529 "MetricThreshold": "tma_info_inst_mix_ipflop < 10", ··· 1514 1531 }, 1515 1532 { 1516 1533 "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)", 1517 - "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS", 1534 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / MEM_INST_RETIRED.ALL_LOADS", 1518 1535 "MetricGroup": "InsType", 1519 1536 "MetricName": "tma_info_inst_mix_ipload", 1520 1537 "MetricThreshold": "tma_info_inst_mix_ipload < 3", ··· 1522 1539 }, 1523 1540 { 1524 1541 "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)", 1525 - "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES", 1542 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / MEM_INST_RETIRED.ALL_STORES", 1526 1543 "MetricGroup": "InsType", 1527 1544 "MetricName": "tma_info_inst_mix_ipstore", 1528 1545 "MetricThreshold": "tma_info_inst_mix_ipstore < 8", ··· 1530 1547 }, 1531 1548 { 1532 1549 "BriefDescription": "Instructions per Software prefetch instruction (of any type: NTA/T0/T1/T2/Prefetch) (lower number means higher occurrence rate)", 1533 - "MetricExpr": "INST_RETIRED.ANY / cpu_core@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@", 1550 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@", 1534 1551 "MetricGroup": "Prefetches", 1535 1552 "MetricName": "tma_info_inst_mix_ipswpf", 1536 1553 "MetricThreshold": "tma_info_inst_mix_ipswpf < 100", ··· 1538 1555 }, 1539 1556 { 1540 1557 "BriefDescription": "Instruction per taken branch", 1541 - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", 1558 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.NEAR_TAKEN", 1542 1559 "MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB", 1543 1560 "MetricName": "tma_info_inst_mix_iptb", 1544 1561 "MetricThreshold": "tma_info_inst_mix_iptb < 13", ··· 1638 1655 }, 1639 1656 { 1640 1657 "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)", 1641 - "MetricExpr": "L1D_PEND_MISS.PENDING / MEM_LOAD_COMPLETED.L1_MISS_ANY", 1658 + "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / MEM_LOAD_COMPLETED.L1_MISS_ANY", 1642 1659 "MetricGroup": "Mem;MemoryBound;MemoryLat", 1643 1660 "MetricName": "tma_info_memory_load_miss_real_latency", 1644 1661 "Unit": "cpu_core" 1645 1662 }, 1646 1663 { 1647 1664 "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss", 1648 - "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", 1665 + "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / L1D_PEND_MISS.PENDING_CYCLES", 1649 1666 "MetricGroup": "Mem;MemoryBW;MemoryBound", 1650 1667 "MetricName": "tma_info_memory_mlp", 1651 1668 "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)", ··· 1653 1670 }, 1654 1671 { 1655 1672 "BriefDescription": "Average Parallel L2 cache miss data reads", 1656 - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", 1673 + "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD@ / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", 1657 1674 "MetricGroup": "Memory_BW;Offcore", 1658 1675 "MetricName": "tma_info_memory_oro_data_l2_mlp", 1659 1676 "Unit": "cpu_core" 1660 1677 }, 1661 1678 { 1662 1679 "BriefDescription": "Average Latency for L2 cache miss demand Loads", 1663 - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD", 1680 + "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD@ / OFFCORE_REQUESTS.DEMAND_DATA_RD", 1664 1681 "MetricGroup": "Memory_Lat;Offcore", 1665 1682 "MetricName": "tma_info_memory_oro_load_l2_miss_latency", 1666 1683 "Unit": "cpu_core" 1667 1684 }, 1668 1685 { 1669 1686 "BriefDescription": "Average Parallel L2 cache miss demand Loads", 1670 - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@", 1687 + "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD@ / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@", 1671 1688 "MetricGroup": "Memory_BW;Offcore", 1672 1689 "MetricName": "tma_info_memory_oro_load_l2_mlp", 1673 1690 "Unit": "cpu_core" 1674 1691 }, 1675 1692 { 1676 1693 "BriefDescription": "Average Latency for L3 cache miss demand Loads", 1677 - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", 1694 + "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD@ / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", 1678 1695 "MetricGroup": "Memory_Lat;Offcore", 1679 1696 "MetricName": "tma_info_memory_oro_load_l3_miss_latency", 1680 1697 "Unit": "cpu_core" ··· 1738 1755 }, 1739 1756 { 1740 1757 "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread", 1741 - "MetricExpr": "UOPS_EXECUTED.THREAD / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@", 1758 + "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@", 1742 1759 "MetricGroup": "Cor;Pipeline;PortsUtil;SMT", 1743 1760 "MetricName": "tma_info_pipeline_execute", 1744 1761 "Unit": "cpu_core" 1745 1762 }, 1746 1763 { 1747 1764 "BriefDescription": "Instructions per a microcode Assist invocation", 1748 - "MetricExpr": "INST_RETIRED.ANY / cpu_core@ASSISTS.ANY\\,umask\\=0x1B@", 1765 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@ASSISTS.ANY\\,umask\\=0x1B@", 1749 1766 "MetricGroup": "Pipeline;Ret;Retire", 1750 1767 "MetricName": "tma_info_pipeline_ipassist", 1751 1768 "MetricThreshold": "tma_info_pipeline_ipassist < 100e3", ··· 1761 1778 }, 1762 1779 { 1763 1780 "BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions", 1764 - "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@", 1781 + "MetricExpr": "cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@", 1765 1782 "MetricGroup": "Pipeline;Ret", 1766 1783 "MetricName": "tma_info_pipeline_strings_cycles", 1767 1784 "MetricThreshold": "tma_info_pipeline_strings_cycles > 0.1", ··· 1776 1793 }, 1777 1794 { 1778 1795 "BriefDescription": "Average CPU Utilization", 1779 - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC", 1796 + "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / TSC", 1780 1797 "MetricGroup": "HPC;Summary", 1781 1798 "MetricName": "tma_info_system_cpu_utilization", 1782 1799 "Unit": "cpu_core" ··· 1799 1816 }, 1800 1817 { 1801 1818 "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]", 1802 - "MetricExpr": "INST_RETIRED.ANY / cpu_core@BR_INST_RETIRED.FAR_BRANCH@u", 1819 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@BR_INST_RETIRED.FAR_BRANCH@u", 1803 1820 "MetricGroup": "Branches;OS", 1804 1821 "MetricName": "tma_info_system_ipfarbranch", 1805 1822 "MetricThreshold": "tma_info_system_ipfarbranch < 1e6", ··· 1830 1847 }, 1831 1848 { 1832 1849 "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)", 1850 + "MetricConstraint": "NO_GROUP_EVENTS", 1833 1851 "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.RD + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.RD", 1834 1852 "MetricGroup": "Mem;MemoryLat;SoC", 1835 1853 "MetricName": "tma_info_system_mem_read_latency", ··· 1839 1855 }, 1840 1856 { 1841 1857 "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)", 1858 + "MetricConstraint": "NO_GROUP_EVENTS", 1842 1859 "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.ALL + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.ALL", 1843 1860 "MetricGroup": "Mem;SoC", 1844 1861 "MetricName": "tma_info_system_mem_request_latency", ··· 1882 1897 }, 1883 1898 { 1884 1899 "BriefDescription": "The ratio of Executed- by Issued-Uops", 1885 - "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY", 1900 + "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / UOPS_ISSUED.ANY", 1886 1901 "MetricGroup": "Cor;Pipeline", 1887 1902 "MetricName": "tma_info_thread_execute_per_issue", 1888 1903 "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage.", ··· 1890 1905 }, 1891 1906 { 1892 1907 "BriefDescription": "Instructions Per Cycle (per Logical Processor)", 1893 - "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks", 1908 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / tma_info_thread_clks", 1894 1909 "MetricGroup": "Ret;Summary", 1895 1910 "MetricName": "tma_info_thread_ipc", 1896 1911 "Unit": "cpu_core" ··· 1957 1972 }, 1958 1973 { 1959 1974 "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses", 1960 - "MetricExpr": "ICACHE_TAG.STALLS / tma_info_thread_clks", 1975 + "MetricExpr": "cpu_core@ICACHE_TAG.STALLS@ / tma_info_thread_clks", 1961 1976 "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group", 1962 1977 "MetricName": "tma_itlb_misses", 1963 1978 "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", ··· 1977 1992 }, 1978 1993 { 1979 1994 "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads", 1980 - "MetricConstraint": "NO_GROUP_EVENTS", 1981 1995 "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L1D_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@) / tma_info_thread_clks", 1982 1996 "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", 1983 1997 "MetricName": "tma_l2_bound", ··· 1987 2003 }, 1988 2004 { 1989 2005 "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core", 1990 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 1991 2006 "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@) / tma_info_thread_clks", 1992 2007 "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", 1993 2008 "MetricName": "tma_l3_bound", ··· 2007 2024 }, 2008 2025 { 2009 2026 "BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)", 2010 - "MetricExpr": "DECODE.LCP / tma_info_thread_clks", 2027 + "MetricExpr": "cpu_core@DECODE.LCP@ / tma_info_thread_clks", 2011 2028 "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB", 2012 2029 "MetricName": "tma_lcp", 2013 2030 "MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", ··· 2028 2045 }, 2029 2046 { 2030 2047 "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations", 2031 - "MetricExpr": "UOPS_DISPATCHED.PORT_2_3_10 / (3 * tma_info_core_core_clks)", 2048 + "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_2_3_10@ / (3 * tma_info_core_core_clks)", 2032 2049 "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group", 2033 2050 "MetricName": "tma_load_op_utilization", 2034 2051 "MetricThreshold": "tma_load_op_utilization > 0.6", ··· 2047 2064 }, 2048 2065 { 2049 2066 "BriefDescription": "This metric estimates the fraction of cycles where the Second-level TLB (STLB) was missed by load accesses, performing a hardware page walk", 2050 - "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_thread_clks", 2067 + "MetricExpr": "cpu_core@DTLB_LOAD_MISSES.WALK_ACTIVE@ / tma_info_thread_clks", 2051 2068 "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group", 2052 2069 "MetricName": "tma_load_stlb_miss", 2053 2070 "MetricThreshold": "tma_load_stlb_miss > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))", ··· 2056 2073 }, 2057 2074 { 2058 2075 "BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations", 2059 - "MetricConstraint": "NO_GROUP_EVENTS", 2060 2076 "MetricExpr": "(16 * max(0, cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ - cpu_core@L2_RQSTS.ALL_RFO@) + cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ / cpu_core@MEM_INST_RETIRED.ALL_STORES@ * (10 * cpu_core@L2_RQSTS.RFO_HIT@ + min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO@))) / tma_info_thread_clks", 2061 2077 "MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group", 2062 2078 "MetricName": "tma_lock_latency", ··· 2118 2136 }, 2119 2137 { 2120 2138 "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.", 2139 + "MetricConstraint": "NO_GROUP_EVENTS_NMI", 2121 2140 "MetricExpr": "13 * cpu_core@MISC2_RETIRED.LFENCE@ / tma_info_thread_clks", 2122 2141 "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", 2123 2142 "MetricName": "tma_memory_fence", ··· 2128 2145 }, 2129 2146 { 2130 2147 "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.", 2131 - "MetricConstraint": "NO_GROUP_EVENTS", 2132 2148 "MetricExpr": "tma_light_operations * cpu_core@MEM_UOP_RETIRED.ANY@ / (tma_retiring * tma_info_thread_slots)", 2133 2149 "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", 2134 2150 "MetricName": "tma_memory_operations", ··· 2137 2155 }, 2138 2156 { 2139 2157 "BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit", 2140 - "MetricExpr": "UOPS_RETIRED.MS / tma_info_thread_slots", 2158 + "MetricExpr": "cpu_core@UOPS_RETIRED.MS@ / tma_info_thread_slots", 2141 2159 "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS", 2142 2160 "MetricName": "tma_microcode_sequencer", 2143 2161 "MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1", ··· 2207 2225 }, 2208 2226 { 2209 2227 "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes", 2210 - "MetricConstraint": "NO_GROUP_EVENTS", 2211 2228 "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))", 2212 2229 "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", 2213 2230 "MetricName": "tma_other_light_ops", ··· 2227 2246 }, 2228 2247 { 2229 2248 "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)", 2230 - "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_core_clks", 2249 + "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_0@ / tma_info_core_core_clks", 2231 2250 "MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P", 2232 2251 "MetricName": "tma_port_0", 2233 2252 "MetricThreshold": "tma_port_0 > 0.6", ··· 2237 2256 }, 2238 2257 { 2239 2258 "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)", 2240 - "MetricExpr": "UOPS_DISPATCHED.PORT_1 / tma_info_core_core_clks", 2259 + "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_1@ / tma_info_core_core_clks", 2241 2260 "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P", 2242 2261 "MetricName": "tma_port_1", 2243 2262 "MetricThreshold": "tma_port_1 > 0.6", ··· 2247 2266 }, 2248 2267 { 2249 2268 "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)", 2250 - "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_core_clks", 2269 + "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_6@ / tma_info_core_core_clks", 2251 2270 "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P", 2252 2271 "MetricName": "tma_port_6", 2253 2272 "MetricThreshold": "tma_port_6 > 0.6", ··· 2277 2296 }, 2278 2297 { 2279 2298 "BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", 2280 - "MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_thread_clks", 2299 + "MetricExpr": "cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ / tma_info_thread_clks", 2281 2300 "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group", 2282 2301 "MetricName": "tma_ports_utilized_1", 2283 2302 "MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))", ··· 2287 2306 }, 2288 2307 { 2289 2308 "BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", 2290 - "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks", 2309 + "MetricConstraint": "NO_GROUP_EVENTS_NMI", 2310 + "MetricExpr": "cpu_core@EXE_ACTIVITY.2_PORTS_UTIL@ / tma_info_thread_clks", 2291 2311 "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group", 2292 2312 "MetricName": "tma_ports_utilized_2", 2293 2313 "MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))", ··· 2298 2316 }, 2299 2317 { 2300 2318 "BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", 2301 - "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks", 2319 + "MetricConstraint": "NO_GROUP_EVENTS_NMI", 2320 + "MetricExpr": "cpu_core@UOPS_EXECUTED.CYCLES_GE_3@ / tma_info_thread_clks", 2302 2321 "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group", 2303 2322 "MetricName": "tma_ports_utilized_3m", 2304 2323 "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))", ··· 2321 2338 }, 2322 2339 { 2323 2340 "BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations", 2324 - "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks", 2341 + "MetricExpr": "cpu_core@RESOURCE_STALLS.SCOREBOARD@ / tma_info_thread_clks", 2325 2342 "MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group", 2326 2343 "MetricName": "tma_serializing_operation", 2327 2344 "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))", ··· 2331 2348 }, 2332 2349 { 2333 2350 "BriefDescription": "This metric represents Shuffle (cross \"vector lane\" data transfers) uops fraction the CPU has retired.", 2334 - "MetricExpr": "INT_VEC_RETIRED.SHUFFLES / (tma_retiring * tma_info_thread_slots)", 2351 + "MetricExpr": "cpu_core@INT_VEC_RETIRED.SHUFFLES@ / (tma_retiring * tma_info_thread_slots)", 2335 2352 "MetricGroup": "HPC;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group", 2336 2353 "MetricName": "tma_shuffles", 2337 2354 "MetricThreshold": "tma_shuffles > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)", ··· 2340 2357 }, 2341 2358 { 2342 2359 "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions", 2343 - "MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks", 2360 + "MetricConstraint": "NO_GROUP_EVENTS_NMI", 2361 + "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.PAUSE@ / tma_info_thread_clks", 2344 2362 "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", 2345 2363 "MetricName": "tma_slow_pause", 2346 2364 "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))", ··· 2361 2377 }, 2362 2378 { 2363 2379 "BriefDescription": "This metric represents rate of split store accesses", 2364 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 2365 - "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks", 2380 + "MetricExpr": "cpu_core@MEM_INST_RETIRED.SPLIT_STORES@ / tma_info_core_core_clks", 2366 2381 "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group", 2367 2382 "MetricName": "tma_split_stores", 2368 2383 "MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", ··· 2381 2398 }, 2382 2399 { 2383 2400 "BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write", 2384 - "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_thread_clks", 2401 + "MetricExpr": "cpu_core@EXE_ACTIVITY.BOUND_ON_STORES@ / tma_info_thread_clks", 2385 2402 "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", 2386 2403 "MetricName": "tma_store_bound", 2387 2404 "MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)", ··· 2391 2408 }, 2392 2409 { 2393 2410 "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores", 2394 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 2395 2411 "MetricExpr": "13 * cpu_core@LD_BLOCKS.STORE_FORWARD@ / tma_info_thread_clks", 2396 2412 "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", 2397 2413 "MetricName": "tma_store_fwd_blk", ··· 2430 2448 }, 2431 2449 { 2432 2450 "BriefDescription": "This metric estimates the fraction of cycles where the STLB was missed by store accesses, performing a hardware page walk", 2433 - "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_core_clks", 2451 + "MetricExpr": "cpu_core@DTLB_STORE_MISSES.WALK_ACTIVE@ / tma_info_core_core_clks", 2434 2452 "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group", 2435 2453 "MetricName": "tma_store_stlb_miss", 2436 2454 "MetricThreshold": "tma_store_stlb_miss > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))", ··· 2449 2467 }, 2450 2468 { 2451 2469 "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears", 2452 - "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / tma_info_thread_clks", 2470 + "MetricExpr": "cpu_core@INT_MISC.UNKNOWN_BRANCH_CYCLES@ / tma_info_thread_clks", 2453 2471 "MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group", 2454 2472 "MetricName": "tma_unknown_branches", 2455 2473 "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
-4
tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
··· 195 195 }, 196 196 { 197 197 "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", 198 - "MetricConstraint": "NO_GROUP_EVENTS", 199 198 "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD", 200 199 "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", 201 200 "MetricName": "tma_dram_bound", ··· 456 457 }, 457 458 { 458 459 "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.", 459 - "MetricConstraint": "NO_GROUP_EVENTS", 460 460 "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD", 461 461 "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", 462 462 "MetricName": "tma_l2_bound", ··· 464 466 }, 465 467 { 466 468 "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", 467 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 468 469 "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD", 469 470 "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", 470 471 "MetricName": "tma_l3_bound", ··· 680 683 }, 681 684 { 682 685 "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.", 683 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 684 686 "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks", 685 687 "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", 686 688 "MetricName": "tma_store_fwd_blk",
+5 -20
tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
··· 400 400 }, 401 401 { 402 402 "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses", 403 - "MetricConstraint": "NO_GROUP_EVENTS", 404 403 "MetricExpr": "(76 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 75.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks", 405 404 "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", 406 405 "MetricName": "tma_contested_accesses", ··· 420 421 }, 421 422 { 422 423 "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses", 423 - "MetricConstraint": "NO_GROUP_EVENTS", 424 424 "MetricExpr": "75.5 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks", 425 425 "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", 426 426 "MetricName": "tma_data_sharing", ··· 447 449 }, 448 450 { 449 451 "BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads", 450 - "MetricConstraint": "NO_GROUP_EVENTS", 451 452 "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks - tma_pmm_bound if #has_pmem > 0 else MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks)", 452 453 "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", 453 454 "MetricName": "tma_dram_bound", ··· 653 656 }, 654 657 { 655 658 "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", 656 - "MetricConstraint": "NO_GROUP_EVENTS", 657 659 "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES", 658 660 "MetricGroup": "Bad;BrMispredicts;tma_issueBM", 659 661 "MetricName": "tma_info_bad_spec_branch_misprediction_cost", ··· 695 699 }, 696 700 { 697 701 "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts", 698 - "MetricConstraint": "NO_GROUP_EVENTS", 699 702 "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)", 700 703 "MetricGroup": "Cor;SMT", 701 704 "MetricName": "tma_info_botlnk_l0_core_bound_likely", ··· 702 707 }, 703 708 { 704 709 "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck", 705 - "MetricConstraint": "NO_GROUP_EVENTS", 706 710 "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))", 707 711 "MetricGroup": "DSBmiss;Fed;tma_issueFB", 708 712 "MetricName": "tma_info_botlnk_l2_dsb_misses", ··· 710 716 }, 711 717 { 712 718 "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck", 713 - "MetricConstraint": "NO_GROUP_EVENTS", 714 719 "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", 715 720 "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL", 716 721 "MetricName": "tma_info_botlnk_l2_ic_misses", ··· 718 725 }, 719 726 { 720 727 "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)", 721 - "MetricConstraint": "NO_GROUP_EVENTS", 722 728 "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)", 723 729 "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC", 724 730 "MetricName": "tma_info_bottleneck_big_code", ··· 734 742 }, 735 743 { 736 744 "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", 737 - "MetricConstraint": "NO_GROUP_EVENTS", 738 745 "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code", 739 746 "MetricGroup": "Fed;FetchBW;Frontend", 740 747 "MetricName": "tma_info_bottleneck_instruction_fetch_bw", ··· 741 750 }, 742 751 { 743 752 "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", 744 - "MetricConstraint": "NO_GROUP_EVENTS", 745 753 "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))", 746 754 "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW", 747 755 "MetricName": "tma_info_bottleneck_memory_bandwidth", ··· 749 759 }, 750 760 { 751 761 "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", 752 - "MetricConstraint": "NO_GROUP_EVENTS", 753 762 "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))", 754 763 "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB", 755 764 "MetricName": "tma_info_bottleneck_memory_data_tlbs", ··· 757 768 }, 758 769 { 759 770 "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", 760 - "MetricConstraint": "NO_GROUP_EVENTS", 761 771 "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound))", 762 772 "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat", 763 773 "MetricName": "tma_info_bottleneck_memory_latency", ··· 765 777 }, 766 778 { 767 779 "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", 768 - "MetricConstraint": "NO_GROUP_EVENTS", 769 780 "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", 770 781 "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM", 771 782 "MetricName": "tma_info_bottleneck_mispredictions", ··· 1288 1301 }, 1289 1302 { 1290 1303 "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)", 1304 + "MetricConstraint": "NO_GROUP_EVENTS", 1291 1305 "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_system_socket_clks / duration_time)", 1292 1306 "MetricGroup": "Mem;MemoryLat;SoC", 1293 1307 "MetricName": "tma_info_system_mem_read_latency", ··· 1443 1455 }, 1444 1456 { 1445 1457 "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads", 1446 - "MetricConstraint": "NO_GROUP_EVENTS", 1447 1458 "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L1D_MISS - MEMORY_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks", 1448 1459 "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", 1449 1460 "MetricName": "tma_l2_bound", ··· 1452 1465 }, 1453 1466 { 1454 1467 "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core", 1455 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 1456 1468 "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L2_MISS - MEMORY_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks", 1457 1469 "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", 1458 1470 "MetricName": "tma_l3_bound", ··· 1524 1538 }, 1525 1539 { 1526 1540 "BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations", 1527 - "MetricConstraint": "NO_GROUP_EVENTS", 1528 1541 "MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks", 1529 1542 "MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group", 1530 1543 "MetricName": "tma_lock_latency", ··· 1581 1596 }, 1582 1597 { 1583 1598 "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.", 1599 + "MetricConstraint": "NO_GROUP_EVENTS_NMI", 1584 1600 "MetricExpr": "13 * MISC2_RETIRED.LFENCE / tma_info_thread_clks", 1585 1601 "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", 1586 1602 "MetricName": "tma_memory_fence", ··· 1590 1604 }, 1591 1605 { 1592 1606 "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.", 1593 - "MetricConstraint": "NO_GROUP_EVENTS", 1594 1607 "MetricExpr": "tma_light_operations * MEM_UOP_RETIRED.ANY / (tma_retiring * tma_info_thread_slots)", 1595 1608 "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", 1596 1609 "MetricName": "tma_memory_operations", ··· 1661 1676 }, 1662 1677 { 1663 1678 "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes", 1664 - "MetricConstraint": "NO_GROUP_EVENTS", 1665 1679 "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))", 1666 1680 "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", 1667 1681 "MetricName": "tma_other_light_ops", ··· 1742 1758 }, 1743 1759 { 1744 1760 "BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", 1761 + "MetricConstraint": "NO_GROUP_EVENTS_NMI", 1745 1762 "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks", 1746 1763 "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group", 1747 1764 "MetricName": "tma_ports_utilized_2", ··· 1752 1767 }, 1753 1768 { 1754 1769 "BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", 1770 + "MetricConstraint": "NO_GROUP_EVENTS_NMI", 1755 1771 "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks", 1756 1772 "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group", 1757 1773 "MetricName": "tma_ports_utilized_3m", ··· 1808 1822 }, 1809 1823 { 1810 1824 "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions", 1825 + "MetricConstraint": "NO_GROUP_EVENTS_NMI", 1811 1826 "MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks", 1812 1827 "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", 1813 1828 "MetricName": "tma_slow_pause", ··· 1827 1840 }, 1828 1841 { 1829 1842 "BriefDescription": "This metric represents rate of split store accesses", 1830 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 1831 1843 "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks", 1832 1844 "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group", 1833 1845 "MetricName": "tma_split_stores", ··· 1854 1868 }, 1855 1869 { 1856 1870 "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores", 1857 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 1858 1871 "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks", 1859 1872 "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", 1860 1873 "MetricName": "tma_store_fwd_blk",
+24 -10
tools/perf/tests/shell/daemon.sh
··· 414 414 # start daemon 415 415 daemon_start ${config} test 416 416 417 - # send 2 signals 418 - perf daemon signal --config ${config} --session test 419 - perf daemon signal --config ${config} 420 - 421 - # stop daemon 422 - daemon_exit ${config} 423 - 424 - # count is 2 perf.data for signals and 1 for perf record finished 425 - count=`ls ${base}/session-test/*perf.data* | wc -l` 426 - if [ ${count} -ne 3 ]; then 417 + # send 2 signals then exit. Do this in a loop watching the number of 418 + # files to avoid races. If the loop retries more than 600 times then 419 + # give up. 420 + local retries=0 421 + local signals=0 422 + local success=0 423 + while [ ${retries} -lt 600 ] && [ ${success} -eq 0 ]; do 424 + local files 425 + files=`ls ${base}/session-test/*perf.data* 2> /dev/null | wc -l` 426 + if [ ${signals} -eq 0 ]; then 427 + perf daemon signal --config ${config} --session test 428 + signals=1 429 + elif [ ${signals} -eq 1 ] && [ $files -ge 1 ]; then 430 + perf daemon signal --config ${config} 431 + signals=2 432 + elif [ ${signals} -eq 2 ] && [ $files -ge 2 ]; then 433 + daemon_exit ${config} 434 + signals=3 435 + elif [ ${signals} -eq 3 ] && [ $files -ge 3 ]; then 436 + success=1 437 + fi 438 + retries=$((${retries} +1)) 439 + done 440 + if [ ${success} -eq 0 ]; then 427 441 error=1 428 442 echo "FAILED: perf data no generated" 429 443 fi
+18 -3
tools/perf/tests/shell/list.sh
··· 3 3 # SPDX-License-Identifier: GPL-2.0 4 4 5 5 set -e 6 - err=0 7 6 8 7 shelldir=$(dirname "$0") 9 8 # shellcheck source=lib/setup_python.sh 10 9 . "${shelldir}"/lib/setup_python.sh 11 10 11 + list_output=$(mktemp /tmp/__perf_test.list_output.json.XXXXX) 12 + 13 + cleanup() { 14 + rm -f "${list_output}" 15 + 16 + trap - EXIT TERM INT 17 + } 18 + 19 + trap_cleanup() { 20 + cleanup 21 + exit 1 22 + } 23 + trap trap_cleanup EXIT TERM INT 24 + 12 25 test_list_json() { 13 26 echo "Json output test" 14 - perf list -j | $PYTHON -m json.tool 27 + perf list -j -o "${list_output}" 28 + $PYTHON -m json.tool "${list_output}" 15 29 echo "Json output test [Success]" 16 30 } 17 31 18 32 test_list_json 19 - exit $err 33 + cleanup 34 + exit 0
+9 -3
tools/perf/tests/shell/script.sh
··· 36 36 echo "DB test" 37 37 38 38 # Check if python script is supported 39 - libpython=$(perf version --build-options | grep python | grep -cv OFF) 40 - if [ "${libpython}" != "1" ] ; then 39 + if perf version --build-options | grep python | grep -q OFF ; then 41 40 echo "SKIP: python scripting is not supported" 42 41 err=2 43 42 return ··· 53 54 def call_path_table(*args): 54 55 print(f'call_path_table({args}') 55 56 _end_of_file_ 56 - perf record -g -o "${perfdatafile}" true 57 + case $(uname -m) 58 + in s390x) 59 + cmd_flags="--call-graph dwarf -e cpu-clock";; 60 + *) 61 + cmd_flags="-g";; 62 + esac 63 + 64 + perf record $cmd_flags -o "${perfdatafile}" true 57 65 perf script -i "${perfdatafile}" -s "${db_test}" 58 66 echo "DB test [Success]" 59 67 }
+1
tools/perf/trace/beauty/statx.c
··· 67 67 P_FLAG(BTIME); 68 68 P_FLAG(MNT_ID); 69 69 P_FLAG(DIOALIGN); 70 + P_FLAG(MNT_ID_UNIQUE); 70 71 71 72 #undef P_FLAG 72 73
+8 -1
tools/perf/util/evlist.c
··· 103 103 err = parse_event(evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu"); 104 104 if (err) { 105 105 evlist__delete(evlist); 106 - evlist = NULL; 106 + return NULL; 107 + } 108 + 109 + if (evlist->core.nr_entries > 1) { 110 + struct evsel *evsel; 111 + 112 + evlist__for_each_entry(evlist, evsel) 113 + evsel__set_sample_id(evsel, /*can_sample_identifier=*/false); 107 114 } 108 115 109 116 return evlist;
+2 -2
tools/perf/util/hist.c
··· 491 491 } 492 492 493 493 if (symbol_conf.res_sample) { 494 - he->res_samples = calloc(sizeof(struct res_sample), 495 - symbol_conf.res_sample); 494 + he->res_samples = calloc(symbol_conf.res_sample, 495 + sizeof(struct res_sample)); 496 496 if (!he->res_samples) 497 497 goto err_srcline; 498 498 }
+4
tools/perf/util/include/linux/linkage.h
··· 115 115 SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK) 116 116 #endif 117 117 118 + #ifndef SYM_FUNC_ALIAS_MEMFUNC 119 + #define SYM_FUNC_ALIAS_MEMFUNC SYM_FUNC_ALIAS 120 + #endif 121 + 118 122 // In the kernel sources (include/linux/cfi_types.h), this has a different 119 123 // definition when CONFIG_CFI_CLANG is used, for tools/ just use the !clang 120 124 // definition:
+1 -1
tools/perf/util/metricgroup.c
··· 286 286 *out_metric_events = NULL; 287 287 ids_size = hashmap__size(ids); 288 288 289 - metric_events = calloc(sizeof(void *), ids_size + 1); 289 + metric_events = calloc(ids_size + 1, sizeof(void *)); 290 290 if (!metric_events) 291 291 return -ENOMEM; 292 292
+1 -1
tools/perf/util/print-events.c
··· 66 66 67 67 put_tracing_file(events_path); 68 68 if (events_fd < 0) { 69 - printf("Error: failed to open tracing events directory\n"); 69 + pr_err("Error: failed to open tracing events directory\n"); 70 70 return; 71 71 } 72 72
+2 -2
tools/perf/util/synthetic-events.c
··· 1055 1055 if (thread_nr > n) 1056 1056 thread_nr = n; 1057 1057 1058 - synthesize_threads = calloc(sizeof(pthread_t), thread_nr); 1058 + synthesize_threads = calloc(thread_nr, sizeof(pthread_t)); 1059 1059 if (synthesize_threads == NULL) 1060 1060 goto free_dirent; 1061 1061 1062 - args = calloc(sizeof(*args), thread_nr); 1062 + args = calloc(thread_nr, sizeof(*args)); 1063 1063 if (args == NULL) 1064 1064 goto free_threads; 1065 1065