Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'perf-tools-for-v5.15-2021-09-11' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull more perf tools updates from Arnaldo Carvalho de Melo:

- Add missing fields and remove some duplicate fields when printing a
perf_event_attr.

- Fix hybrid config terms list corruption.

- Update kernel header copies, some resulted in new kernel features
being automagically added to 'perf trace' syscall/tracepoint argument
id->string translators.

- Add a file generated during the documentation build to .gitignore.

- Add an option to build without libbfd, as some distros, like Debian
consider its ABI unstable.

- Add support to print a textual representation of IBS raw sample data
in 'perf report'.

- Fix bpf 'perf test' sample mismatch reporting

- Fix passing arguments to stackcollapse report in a 'perf script'
python script.

- Allow build-id with trailing zeros.

- Look for ImageBase in PE file to compute .text offset.

* tag 'perf-tools-for-v5.15-2021-09-11' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (25 commits)
tools headers UAPI: Update tools's copy of drm.h headers
tools headers UAPI: Sync drm/i915_drm.h with the kernel sources
tools headers UAPI: Sync linux/fs.h with the kernel sources
tools headers UAPI: Sync linux/in.h copy with the kernel sources
perf tools: Add an option to build without libbfd
perf tools: Allow build-id with trailing zeros
perf tools: Fix hybrid config terms list corruption
perf tools: Factor out copy_config_terms() and free_config_terms()
perf tools: Fix perf_event_attr__fprintf() missing/dupl. fields
perf tools: Ignore Documentation dependency file
perf bpf: Provide a weak btf__load_from_kernel_by_id() for older libbpf versions
tools include UAPI: Update linux/mount.h copy
perf beauty: Cover more flags in the move_mount syscall argument beautifier
tools headers UAPI: Sync linux/prctl.h with the kernel sources
tools include UAPI: Sync sound/asound.h copy with the kernel sources
tools headers UAPI: Sync linux/kvm.h with the kernel sources
tools headers UAPI: Sync x86's asm/kvm.h with the kernel sources
perf report: Add support to print a textual representation of IBS raw sample data
perf report: Add tools/arch/x86/include/asm/amd-ibs.h
perf env: Add perf_env__cpuid, perf_env__{nr_}pmu_mappings
...

+1146 -174
+132
tools/arch/x86/include/asm/amd-ibs.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * From PPR Vol 1 for AMD Family 19h Model 01h B1 4 + * 55898 Rev 0.35 - Feb 5, 2021 5 + */ 6 + 7 + #include "msr-index.h" 8 + 9 + /* 10 + * IBS Hardware MSRs 11 + */ 12 + 13 + /* MSR 0xc0011030: IBS Fetch Control */ 14 + union ibs_fetch_ctl { 15 + __u64 val; 16 + struct { 17 + __u64 fetch_maxcnt:16,/* 0-15: instruction fetch max. count */ 18 + fetch_cnt:16, /* 16-31: instruction fetch count */ 19 + fetch_lat:16, /* 32-47: instruction fetch latency */ 20 + fetch_en:1, /* 48: instruction fetch enable */ 21 + fetch_val:1, /* 49: instruction fetch valid */ 22 + fetch_comp:1, /* 50: instruction fetch complete */ 23 + ic_miss:1, /* 51: i-cache miss */ 24 + phy_addr_valid:1,/* 52: physical address valid */ 25 + l1tlb_pgsz:2, /* 53-54: i-cache L1TLB page size 26 + * (needs IbsPhyAddrValid) */ 27 + l1tlb_miss:1, /* 55: i-cache fetch missed in L1TLB */ 28 + l2tlb_miss:1, /* 56: i-cache fetch missed in L2TLB */ 29 + rand_en:1, /* 57: random tagging enable */ 30 + fetch_l2_miss:1,/* 58: L2 miss for sampled fetch 31 + * (needs IbsFetchComp) */ 32 + reserved:5; /* 59-63: reserved */ 33 + }; 34 + }; 35 + 36 + /* MSR 0xc0011033: IBS Execution Control */ 37 + union ibs_op_ctl { 38 + __u64 val; 39 + struct { 40 + __u64 opmaxcnt:16, /* 0-15: periodic op max. count */ 41 + reserved0:1, /* 16: reserved */ 42 + op_en:1, /* 17: op sampling enable */ 43 + op_val:1, /* 18: op sample valid */ 44 + cnt_ctl:1, /* 19: periodic op counter control */ 45 + opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */ 46 + reserved1:5, /* 27-31: reserved */ 47 + opcurcnt:27, /* 32-58: periodic op counter current count */ 48 + reserved2:5; /* 59-63: reserved */ 49 + }; 50 + }; 51 + 52 + /* MSR 0xc0011035: IBS Op Data 2 */ 53 + union ibs_op_data { 54 + __u64 val; 55 + struct { 56 + __u64 comp_to_ret_ctr:16, /* 0-15: op completion to retire count */ 57 + tag_to_ret_ctr:16, /* 15-31: op tag to retire count */ 58 + reserved1:2, /* 32-33: reserved */ 59 + op_return:1, /* 34: return op */ 60 + op_brn_taken:1, /* 35: taken branch op */ 61 + op_brn_misp:1, /* 36: mispredicted branch op */ 62 + op_brn_ret:1, /* 37: branch op retired */ 63 + op_rip_invalid:1, /* 38: RIP is invalid */ 64 + op_brn_fuse:1, /* 39: fused branch op */ 65 + op_microcode:1, /* 40: microcode op */ 66 + reserved2:23; /* 41-63: reserved */ 67 + }; 68 + }; 69 + 70 + /* MSR 0xc0011036: IBS Op Data 2 */ 71 + union ibs_op_data2 { 72 + __u64 val; 73 + struct { 74 + __u64 data_src:3, /* 0-2: data source */ 75 + reserved0:1, /* 3: reserved */ 76 + rmt_node:1, /* 4: destination node */ 77 + cache_hit_st:1, /* 5: cache hit state */ 78 + reserved1:57; /* 5-63: reserved */ 79 + }; 80 + }; 81 + 82 + /* MSR 0xc0011037: IBS Op Data 3 */ 83 + union ibs_op_data3 { 84 + __u64 val; 85 + struct { 86 + __u64 ld_op:1, /* 0: load op */ 87 + st_op:1, /* 1: store op */ 88 + dc_l1tlb_miss:1, /* 2: data cache L1TLB miss */ 89 + dc_l2tlb_miss:1, /* 3: data cache L2TLB hit in 2M page */ 90 + dc_l1tlb_hit_2m:1, /* 4: data cache L1TLB hit in 2M page */ 91 + dc_l1tlb_hit_1g:1, /* 5: data cache L1TLB hit in 1G page */ 92 + dc_l2tlb_hit_2m:1, /* 6: data cache L2TLB hit in 2M page */ 93 + dc_miss:1, /* 7: data cache miss */ 94 + dc_mis_acc:1, /* 8: misaligned access */ 95 + reserved:4, /* 9-12: reserved */ 96 + dc_wc_mem_acc:1, /* 13: write combining memory access */ 97 + dc_uc_mem_acc:1, /* 14: uncacheable memory access */ 98 + dc_locked_op:1, /* 15: locked operation */ 99 + dc_miss_no_mab_alloc:1, /* 16: DC miss with no MAB allocated */ 100 + dc_lin_addr_valid:1, /* 17: data cache linear address valid */ 101 + dc_phy_addr_valid:1, /* 18: data cache physical address valid */ 102 + dc_l2_tlb_hit_1g:1, /* 19: data cache L2 hit in 1GB page */ 103 + l2_miss:1, /* 20: L2 cache miss */ 104 + sw_pf:1, /* 21: software prefetch */ 105 + op_mem_width:4, /* 22-25: load/store size in bytes */ 106 + op_dc_miss_open_mem_reqs:6, /* 26-31: outstanding mem reqs on DC fill */ 107 + dc_miss_lat:16, /* 32-47: data cache miss latency */ 108 + tlb_refill_lat:16; /* 48-63: L1 TLB refill latency */ 109 + }; 110 + }; 111 + 112 + /* MSR 0xc001103c: IBS Fetch Control Extended */ 113 + union ic_ibs_extd_ctl { 114 + __u64 val; 115 + struct { 116 + __u64 itlb_refill_lat:16, /* 0-15: ITLB Refill latency for sampled fetch */ 117 + reserved:48; /* 16-63: reserved */ 118 + }; 119 + }; 120 + 121 + /* 122 + * IBS driver related 123 + */ 124 + 125 + struct perf_ibs_data { 126 + u32 size; 127 + union { 128 + u32 data[0]; /* data buffer starts here */ 129 + u32 caps; 130 + }; 131 + u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; 132 + };
+1
tools/arch/x86/include/uapi/asm/kvm.h
··· 295 295 #define KVM_GUESTDBG_USE_HW_BP 0x00020000 296 296 #define KVM_GUESTDBG_INJECT_DB 0x00040000 297 297 #define KVM_GUESTDBG_INJECT_BP 0x00080000 298 + #define KVM_GUESTDBG_BLOCKIRQ 0x00100000 298 299 299 300 /* for KVM_SET_GUEST_DEBUG */ 300 301 struct kvm_guest_debug_arch {
+8 -6
tools/include/uapi/asm-generic/unistd.h
··· 673 673 #define __NR_remap_file_pages 234 674 674 __SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) 675 675 #define __NR_mbind 235 676 - __SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind) 676 + __SYSCALL(__NR_mbind, sys_mbind) 677 677 #define __NR_get_mempolicy 236 678 - __SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy) 678 + __SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) 679 679 #define __NR_set_mempolicy 237 680 - __SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy) 680 + __SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) 681 681 #define __NR_migrate_pages 238 682 - __SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages) 682 + __SYSCALL(__NR_migrate_pages, sys_migrate_pages) 683 683 #define __NR_move_pages 239 684 - __SC_COMP(__NR_move_pages, sys_move_pages, compat_sys_move_pages) 684 + __SYSCALL(__NR_move_pages, sys_move_pages) 685 685 #endif 686 686 687 687 #define __NR_rt_tgsigqueueinfo 240 ··· 877 877 #define __NR_memfd_secret 447 878 878 __SYSCALL(__NR_memfd_secret, sys_memfd_secret) 879 879 #endif 880 + #define __NR_process_mrelease 448 881 + __SYSCALL(__NR_process_mrelease, sys_process_mrelease) 880 882 881 883 #undef __NR_syscalls 882 - #define __NR_syscalls 448 884 + #define __NR_syscalls 449 883 885 884 886 /* 885 887 * 32 bit systems traditionally used different
+12 -2
tools/include/uapi/drm/drm.h
··· 635 635 /** 636 636 * DRM_CAP_VBLANK_HIGH_CRTC 637 637 * 638 - * If set to 1, the kernel supports specifying a CRTC index in the high bits of 639 - * &drm_wait_vblank_request.type. 638 + * If set to 1, the kernel supports specifying a :ref:`CRTC index<crtc_index>` 639 + * in the high bits of &drm_wait_vblank_request.type. 640 640 * 641 641 * Starting kernel version 2.6.39, this capability is always set to 1. 642 642 */ ··· 1050 1050 #define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob) 1051 1051 #define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd) 1052 1052 #define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd) 1053 + /** 1054 + * DRM_IOCTL_MODE_RMFB - Remove a framebuffer. 1055 + * 1056 + * This removes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL 1057 + * argument is a framebuffer object ID. 1058 + * 1059 + * Warning: removing a framebuffer currently in-use on an enabled plane will 1060 + * disable that plane. The CRTC the plane is linked to may also be disabled 1061 + * (depending on driver capabilities). 1062 + */ 1053 1063 #define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int) 1054 1064 #define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip) 1055 1065 #define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd)
+414 -78
tools/include/uapi/drm/i915_drm.h
··· 572 572 #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) 573 573 #define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) 574 574 #define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4) 575 + /* 576 + * Indicates the 2k user priority levels are statically mapped into 3 buckets as 577 + * follows: 578 + * 579 + * -1k to -1 Low priority 580 + * 0 Normal priority 581 + * 1 to 1k Highest priority 582 + */ 583 + #define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5) 575 584 576 585 #define I915_PARAM_HUC_STATUS 42 577 586 ··· 682 673 * I915_EXEC_USE_EXTENSIONS. 683 674 */ 684 675 #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 676 + 677 + /* Query if the kernel supports the I915_USERPTR_PROBE flag. */ 678 + #define I915_PARAM_HAS_USERPTR_PROBE 56 685 679 686 680 /* Must be kept compact -- no holes and well documented */ 687 681 ··· 861 849 __u64 offset; 862 850 }; 863 851 852 + /** 853 + * struct drm_i915_gem_mmap_offset - Retrieve an offset so we can mmap this buffer object. 854 + * 855 + * This struct is passed as argument to the `DRM_IOCTL_I915_GEM_MMAP_OFFSET` ioctl, 856 + * and is used to retrieve the fake offset to mmap an object specified by &handle. 857 + * 858 + * The legacy way of using `DRM_IOCTL_I915_GEM_MMAP` is removed on gen12+. 859 + * `DRM_IOCTL_I915_GEM_MMAP_GTT` is an older supported alias to this struct, but will behave 860 + * as setting the &extensions to 0, and &flags to `I915_MMAP_OFFSET_GTT`. 861 + */ 864 862 struct drm_i915_gem_mmap_offset { 865 - /** Handle for the object being mapped. */ 863 + /** @handle: Handle for the object being mapped. */ 866 864 __u32 handle; 865 + /** @pad: Must be zero */ 867 866 __u32 pad; 868 867 /** 869 - * Fake offset to use for subsequent mmap call 868 + * @offset: The fake offset to use for subsequent mmap call 870 869 * 871 870 * This is a fixed-size type for 32/64 compatibility. 872 871 */ 873 872 __u64 offset; 874 873 875 874 /** 876 - * Flags for extended behaviour. 875 + * @flags: Flags for extended behaviour. 877 876 * 878 - * It is mandatory that one of the MMAP_OFFSET types 879 - * (GTT, WC, WB, UC, etc) should be included. 877 + * It is mandatory that one of the `MMAP_OFFSET` types 878 + * should be included: 879 + * 880 + * - `I915_MMAP_OFFSET_GTT`: Use mmap with the object bound to GTT. (Write-Combined) 881 + * - `I915_MMAP_OFFSET_WC`: Use Write-Combined caching. 882 + * - `I915_MMAP_OFFSET_WB`: Use Write-Back caching. 883 + * - `I915_MMAP_OFFSET_FIXED`: Use object placement to determine caching. 884 + * 885 + * On devices with local memory `I915_MMAP_OFFSET_FIXED` is the only valid 886 + * type. On devices without local memory, this caching mode is invalid. 887 + * 888 + * As caching mode when specifying `I915_MMAP_OFFSET_FIXED`, WC or WB will 889 + * be used, depending on the object placement on creation. WB will be used 890 + * when the object can only exist in system memory, WC otherwise. 880 891 */ 881 892 __u64 flags; 882 - #define I915_MMAP_OFFSET_GTT 0 883 - #define I915_MMAP_OFFSET_WC 1 884 - #define I915_MMAP_OFFSET_WB 2 885 - #define I915_MMAP_OFFSET_UC 3 886 893 887 - /* 888 - * Zero-terminated chain of extensions. 894 + #define I915_MMAP_OFFSET_GTT 0 895 + #define I915_MMAP_OFFSET_WC 1 896 + #define I915_MMAP_OFFSET_WB 2 897 + #define I915_MMAP_OFFSET_UC 3 898 + #define I915_MMAP_OFFSET_FIXED 4 899 + 900 + /** 901 + * @extensions: Zero-terminated chain of extensions. 889 902 * 890 903 * No current extensions defined; mbz. 891 904 */ 892 905 __u64 extensions; 893 906 }; 894 907 908 + /** 909 + * struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in 910 + * preparation for accessing the pages via some CPU domain. 911 + * 912 + * Specifying a new write or read domain will flush the object out of the 913 + * previous domain(if required), before then updating the objects domain 914 + * tracking with the new domain. 915 + * 916 + * Note this might involve waiting for the object first if it is still active on 917 + * the GPU. 918 + * 919 + * Supported values for @read_domains and @write_domain: 920 + * 921 + * - I915_GEM_DOMAIN_WC: Uncached write-combined domain 922 + * - I915_GEM_DOMAIN_CPU: CPU cache domain 923 + * - I915_GEM_DOMAIN_GTT: Mappable aperture domain 924 + * 925 + * All other domains are rejected. 926 + * 927 + * Note that for discrete, starting from DG1, this is no longer supported, and 928 + * is instead rejected. On such platforms the CPU domain is effectively static, 929 + * where we also only support a single &drm_i915_gem_mmap_offset cache mode, 930 + * which can't be set explicitly and instead depends on the object placements, 931 + * as per the below. 932 + * 933 + * Implicit caching rules, starting from DG1: 934 + * 935 + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) 936 + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and 937 + * mapped as write-combined only. 938 + * 939 + * - Everything else is always allocated and mapped as write-back, with the 940 + * guarantee that everything is also coherent with the GPU. 941 + * 942 + * Note that this is likely to change in the future again, where we might need 943 + * more flexibility on future devices, so making this all explicit as part of a 944 + * new &drm_i915_gem_create_ext extension is probable. 945 + */ 895 946 struct drm_i915_gem_set_domain { 896 - /** Handle for the object */ 947 + /** @handle: Handle for the object. */ 897 948 __u32 handle; 898 949 899 - /** New read domains */ 950 + /** @read_domains: New read domains. */ 900 951 __u32 read_domains; 901 952 902 - /** New write domain */ 953 + /** 954 + * @write_domain: New write domain. 955 + * 956 + * Note that having something in the write domain implies it's in the 957 + * read domain, and only that read domain. 958 + */ 903 959 __u32 write_domain; 904 960 }; 905 961 ··· 1428 1348 * reading from the object simultaneously. 1429 1349 * 1430 1350 * The value of each engine class is the same as specified in the 1431 - * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e. 1351 + * I915_CONTEXT_PARAM_ENGINES context parameter and via perf, i.e. 1432 1352 * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc. 1433 - * reported as active itself. Some hardware may have parallel 1434 - * execution engines, e.g. multiple media engines, which are 1435 - * mapped to the same class identifier and so are not separately 1436 - * reported for busyness. 1353 + * Some hardware may have parallel execution engines, e.g. multiple 1354 + * media engines, which are mapped to the same class identifier and so 1355 + * are not separately reported for busyness. 1437 1356 * 1438 1357 * Caveat emptor: 1439 1358 * Only the boolean result of this query is reliable; that is whether ··· 1443 1364 }; 1444 1365 1445 1366 /** 1446 - * I915_CACHING_NONE 1367 + * struct drm_i915_gem_caching - Set or get the caching for given object 1368 + * handle. 1447 1369 * 1448 - * GPU access is not coherent with cpu caches. Default for machines without an 1449 - * LLC. 1450 - */ 1451 - #define I915_CACHING_NONE 0 1452 - /** 1453 - * I915_CACHING_CACHED 1370 + * Allow userspace to control the GTT caching bits for a given object when the 1371 + * object is later mapped through the ppGTT(or GGTT on older platforms lacking 1372 + * ppGTT support, or if the object is used for scanout). Note that this might 1373 + * require unbinding the object from the GTT first, if its current caching value 1374 + * doesn't match. 1454 1375 * 1455 - * GPU access is coherent with cpu caches and furthermore the data is cached in 1456 - * last-level caches shared between cpu cores and the gpu GT. Default on 1457 - * machines with HAS_LLC. 1458 - */ 1459 - #define I915_CACHING_CACHED 1 1460 - /** 1461 - * I915_CACHING_DISPLAY 1376 + * Note that this all changes on discrete platforms, starting from DG1, the 1377 + * set/get caching is no longer supported, and is now rejected. Instead the CPU 1378 + * caching attributes(WB vs WC) will become an immutable creation time property 1379 + * for the object, along with the GTT caching level. For now we don't expose any 1380 + * new uAPI for this, instead on DG1 this is all implicit, although this largely 1381 + * shouldn't matter since DG1 is coherent by default(without any way of 1382 + * controlling it). 1462 1383 * 1463 - * Special GPU caching mode which is coherent with the scanout engines. 1464 - * Transparently falls back to I915_CACHING_NONE on platforms where no special 1465 - * cache mode (like write-through or gfdt flushing) is available. The kernel 1466 - * automatically sets this mode when using a buffer as a scanout target. 1467 - * Userspace can manually set this mode to avoid a costly stall and clflush in 1468 - * the hotpath of drawing the first frame. 1384 + * Implicit caching rules, starting from DG1: 1385 + * 1386 + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) 1387 + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and 1388 + * mapped as write-combined only. 1389 + * 1390 + * - Everything else is always allocated and mapped as write-back, with the 1391 + * guarantee that everything is also coherent with the GPU. 1392 + * 1393 + * Note that this is likely to change in the future again, where we might need 1394 + * more flexibility on future devices, so making this all explicit as part of a 1395 + * new &drm_i915_gem_create_ext extension is probable. 1396 + * 1397 + * Side note: Part of the reason for this is that changing the at-allocation-time CPU 1398 + * caching attributes for the pages might be required(and is expensive) if we 1399 + * need to then CPU map the pages later with different caching attributes. This 1400 + * inconsistent caching behaviour, while supported on x86, is not universally 1401 + * supported on other architectures. So for simplicity we opt for setting 1402 + * everything at creation time, whilst also making it immutable, on discrete 1403 + * platforms. 1469 1404 */ 1470 - #define I915_CACHING_DISPLAY 2 1471 - 1472 1405 struct drm_i915_gem_caching { 1473 1406 /** 1474 - * Handle of the buffer to set/get the caching level of. */ 1407 + * @handle: Handle of the buffer to set/get the caching level. 1408 + */ 1475 1409 __u32 handle; 1476 1410 1477 1411 /** 1478 - * Cacheing level to apply or return value 1412 + * @caching: The GTT caching level to apply or possible return value. 1479 1413 * 1480 - * bits0-15 are for generic caching control (i.e. the above defined 1481 - * values). bits16-31 are reserved for platform-specific variations 1482 - * (e.g. l3$ caching on gen7). */ 1414 + * The supported @caching values: 1415 + * 1416 + * I915_CACHING_NONE: 1417 + * 1418 + * GPU access is not coherent with CPU caches. Default for machines 1419 + * without an LLC. This means manual flushing might be needed, if we 1420 + * want GPU access to be coherent. 1421 + * 1422 + * I915_CACHING_CACHED: 1423 + * 1424 + * GPU access is coherent with CPU caches and furthermore the data is 1425 + * cached in last-level caches shared between CPU cores and the GPU GT. 1426 + * 1427 + * I915_CACHING_DISPLAY: 1428 + * 1429 + * Special GPU caching mode which is coherent with the scanout engines. 1430 + * Transparently falls back to I915_CACHING_NONE on platforms where no 1431 + * special cache mode (like write-through or gfdt flushing) is 1432 + * available. The kernel automatically sets this mode when using a 1433 + * buffer as a scanout target. Userspace can manually set this mode to 1434 + * avoid a costly stall and clflush in the hotpath of drawing the first 1435 + * frame. 1436 + */ 1437 + #define I915_CACHING_NONE 0 1438 + #define I915_CACHING_CACHED 1 1439 + #define I915_CACHING_DISPLAY 2 1483 1440 __u32 caching; 1484 1441 }; 1485 1442 ··· 1754 1639 __u32 size; 1755 1640 __u64 param; 1756 1641 #define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 1642 + /* I915_CONTEXT_PARAM_NO_ZEROMAP has been removed. On the off chance 1643 + * someone somewhere has attempted to use it, never re-use this context 1644 + * param number. 1645 + */ 1757 1646 #define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2 1758 1647 #define I915_CONTEXT_PARAM_GTT_SIZE 0x3 1759 1648 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 ··· 1842 1723 */ 1843 1724 #define I915_CONTEXT_PARAM_PERSISTENCE 0xb 1844 1725 1845 - /* 1846 - * I915_CONTEXT_PARAM_RINGSIZE: 1847 - * 1848 - * Sets the size of the CS ringbuffer to use for logical ring contexts. This 1849 - * applies a limit of how many batches can be queued to HW before the caller 1850 - * is blocked due to lack of space for more commands. 1851 - * 1852 - * Only reliably possible to be set prior to first use, i.e. during 1853 - * construction. At any later point, the current execution must be flushed as 1854 - * the ring can only be changed while the context is idle. Note, the ringsize 1855 - * can be specified as a constructor property, see 1856 - * I915_CONTEXT_CREATE_EXT_SETPARAM, but can also be set later if required. 1857 - * 1858 - * Only applies to the current set of engine and lost when those engines 1859 - * are replaced by a new mapping (see I915_CONTEXT_PARAM_ENGINES). 1860 - * 1861 - * Must be between 4 - 512 KiB, in intervals of page size [4 KiB]. 1862 - * Default is 16 KiB. 1726 + /* This API has been removed. On the off chance someone somewhere has 1727 + * attempted to use it, never re-use this context param number. 1863 1728 */ 1864 1729 #define I915_CONTEXT_PARAM_RINGSIZE 0xc 1865 1730 /* Must be kept compact -- no holes and well documented */ ··· 1909 1806 */ 1910 1807 __u32 rsvd; 1911 1808 }; 1809 + 1810 + /** 1811 + * DOC: Virtual Engine uAPI 1812 + * 1813 + * Virtual engine is a concept where userspace is able to configure a set of 1814 + * physical engines, submit a batch buffer, and let the driver execute it on any 1815 + * engine from the set as it sees fit. 1816 + * 1817 + * This is primarily useful on parts which have multiple instances of a same 1818 + * class engine, like for example GT3+ Skylake parts with their two VCS engines. 1819 + * 1820 + * For instance userspace can enumerate all engines of a certain class using the 1821 + * previously described `Engine Discovery uAPI`_. After that userspace can 1822 + * create a GEM context with a placeholder slot for the virtual engine (using 1823 + * `I915_ENGINE_CLASS_INVALID` and `I915_ENGINE_CLASS_INVALID_NONE` for class 1824 + * and instance respectively) and finally using the 1825 + * `I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE` extension place a virtual engine in 1826 + * the same reserved slot. 1827 + * 1828 + * Example of creating a virtual engine and submitting a batch buffer to it: 1829 + * 1830 + * .. code-block:: C 1831 + * 1832 + * I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(virtual, 2) = { 1833 + * .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE, 1834 + * .engine_index = 0, // Place this virtual engine into engine map slot 0 1835 + * .num_siblings = 2, 1836 + * .engines = { { I915_ENGINE_CLASS_VIDEO, 0 }, 1837 + * { I915_ENGINE_CLASS_VIDEO, 1 }, }, 1838 + * }; 1839 + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1) = { 1840 + * .engines = { { I915_ENGINE_CLASS_INVALID, 1841 + * I915_ENGINE_CLASS_INVALID_NONE } }, 1842 + * .extensions = to_user_pointer(&virtual), // Chains after load_balance extension 1843 + * }; 1844 + * struct drm_i915_gem_context_create_ext_setparam p_engines = { 1845 + * .base = { 1846 + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, 1847 + * }, 1848 + * .param = { 1849 + * .param = I915_CONTEXT_PARAM_ENGINES, 1850 + * .value = to_user_pointer(&engines), 1851 + * .size = sizeof(engines), 1852 + * }, 1853 + * }; 1854 + * struct drm_i915_gem_context_create_ext create = { 1855 + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, 1856 + * .extensions = to_user_pointer(&p_engines); 1857 + * }; 1858 + * 1859 + * ctx_id = gem_context_create_ext(drm_fd, &create); 1860 + * 1861 + * // Now we have created a GEM context with its engine map containing a 1862 + * // single virtual engine. Submissions to this slot can go either to 1863 + * // vcs0 or vcs1, depending on the load balancing algorithm used inside 1864 + * // the driver. The load balancing is dynamic from one batch buffer to 1865 + * // another and transparent to userspace. 1866 + * 1867 + * ... 1868 + * execbuf.rsvd1 = ctx_id; 1869 + * execbuf.flags = 0; // Submits to index 0 which is the virtual engine 1870 + * gem_execbuf(drm_fd, &execbuf); 1871 + */ 1912 1872 1913 1873 /* 1914 1874 * i915_context_engines_load_balance: ··· 2049 1883 struct i915_engine_class_instance engines[N__]; \ 2050 1884 } __attribute__((packed)) name__ 2051 1885 1886 + /** 1887 + * DOC: Context Engine Map uAPI 1888 + * 1889 + * Context engine map is a new way of addressing engines when submitting batch- 1890 + * buffers, replacing the existing way of using identifiers like `I915_EXEC_BLT` 1891 + * inside the flags field of `struct drm_i915_gem_execbuffer2`. 1892 + * 1893 + * To use it created GEM contexts need to be configured with a list of engines 1894 + * the user is intending to submit to. This is accomplished using the 1895 + * `I915_CONTEXT_PARAM_ENGINES` parameter and `struct 1896 + * i915_context_param_engines`. 1897 + * 1898 + * For such contexts the `I915_EXEC_RING_MASK` field becomes an index into the 1899 + * configured map. 1900 + * 1901 + * Example of creating such context and submitting against it: 1902 + * 1903 + * .. code-block:: C 1904 + * 1905 + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = { 1906 + * .engines = { { I915_ENGINE_CLASS_RENDER, 0 }, 1907 + * { I915_ENGINE_CLASS_COPY, 0 } } 1908 + * }; 1909 + * struct drm_i915_gem_context_create_ext_setparam p_engines = { 1910 + * .base = { 1911 + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, 1912 + * }, 1913 + * .param = { 1914 + * .param = I915_CONTEXT_PARAM_ENGINES, 1915 + * .value = to_user_pointer(&engines), 1916 + * .size = sizeof(engines), 1917 + * }, 1918 + * }; 1919 + * struct drm_i915_gem_context_create_ext create = { 1920 + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, 1921 + * .extensions = to_user_pointer(&p_engines); 1922 + * }; 1923 + * 1924 + * ctx_id = gem_context_create_ext(drm_fd, &create); 1925 + * 1926 + * // We have now created a GEM context with two engines in the map: 1927 + * // Index 0 points to rcs0 while index 1 points to bcs0. Other engines 1928 + * // will not be accessible from this context. 1929 + * 1930 + * ... 1931 + * execbuf.rsvd1 = ctx_id; 1932 + * execbuf.flags = 0; // Submits to index 0, which is rcs0 for this context 1933 + * gem_execbuf(drm_fd, &execbuf); 1934 + * 1935 + * ... 1936 + * execbuf.rsvd1 = ctx_id; 1937 + * execbuf.flags = 1; // Submits to index 0, which is bcs0 for this context 1938 + * gem_execbuf(drm_fd, &execbuf); 1939 + */ 1940 + 2052 1941 struct i915_context_param_engines { 2053 1942 __u64 extensions; /* linked chain of extension blocks, 0 terminates */ 2054 1943 #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ ··· 2122 1901 struct drm_i915_gem_context_param param; 2123 1902 }; 2124 1903 2125 - struct drm_i915_gem_context_create_ext_clone { 1904 + /* This API has been removed. On the off chance someone somewhere has 1905 + * attempted to use it, never re-use this extension number. 1906 + */ 2126 1907 #define I915_CONTEXT_CREATE_EXT_CLONE 1 2127 - struct i915_user_extension base; 2128 - __u32 clone_id; 2129 - __u32 flags; 2130 - #define I915_CONTEXT_CLONE_ENGINES (1u << 0) 2131 - #define I915_CONTEXT_CLONE_FLAGS (1u << 1) 2132 - #define I915_CONTEXT_CLONE_SCHEDATTR (1u << 2) 2133 - #define I915_CONTEXT_CLONE_SSEU (1u << 3) 2134 - #define I915_CONTEXT_CLONE_TIMELINE (1u << 4) 2135 - #define I915_CONTEXT_CLONE_VM (1u << 5) 2136 - #define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1) 2137 - __u64 rsvd; 2138 - }; 2139 1908 2140 1909 struct drm_i915_gem_context_destroy { 2141 1910 __u32 ctx_id; ··· 2197 1986 __u32 pad; 2198 1987 }; 2199 1988 1989 + /** 1990 + * struct drm_i915_gem_userptr - Create GEM object from user allocated memory. 1991 + * 1992 + * Userptr objects have several restrictions on what ioctls can be used with the 1993 + * object handle. 1994 + */ 2200 1995 struct drm_i915_gem_userptr { 1996 + /** 1997 + * @user_ptr: The pointer to the allocated memory. 1998 + * 1999 + * Needs to be aligned to PAGE_SIZE. 2000 + */ 2201 2001 __u64 user_ptr; 2002 + 2003 + /** 2004 + * @user_size: 2005 + * 2006 + * The size in bytes for the allocated memory. This will also become the 2007 + * object size. 2008 + * 2009 + * Needs to be aligned to PAGE_SIZE, and should be at least PAGE_SIZE, 2010 + * or larger. 2011 + */ 2202 2012 __u64 user_size; 2013 + 2014 + /** 2015 + * @flags: 2016 + * 2017 + * Supported flags: 2018 + * 2019 + * I915_USERPTR_READ_ONLY: 2020 + * 2021 + * Mark the object as readonly, this also means GPU access can only be 2022 + * readonly. This is only supported on HW which supports readonly access 2023 + * through the GTT. If the HW can't support readonly access, an error is 2024 + * returned. 2025 + * 2026 + * I915_USERPTR_PROBE: 2027 + * 2028 + * Probe the provided @user_ptr range and validate that the @user_ptr is 2029 + * indeed pointing to normal memory and that the range is also valid. 2030 + * For example if some garbage address is given to the kernel, then this 2031 + * should complain. 2032 + * 2033 + * Returns -EFAULT if the probe failed. 2034 + * 2035 + * Note that this doesn't populate the backing pages, and also doesn't 2036 + * guarantee that the object will remain valid when the object is 2037 + * eventually used. 2038 + * 2039 + * The kernel supports this feature if I915_PARAM_HAS_USERPTR_PROBE 2040 + * returns a non-zero value. 2041 + * 2042 + * I915_USERPTR_UNSYNCHRONIZED: 2043 + * 2044 + * NOT USED. Setting this flag will result in an error. 2045 + */ 2203 2046 __u32 flags; 2204 2047 #define I915_USERPTR_READ_ONLY 0x1 2048 + #define I915_USERPTR_PROBE 0x2 2205 2049 #define I915_USERPTR_UNSYNCHRONIZED 0x80000000 2206 2050 /** 2207 - * Returned handle for the object. 2051 + * @handle: Returned handle for the object. 2208 2052 * 2209 2053 * Object handles are nonzero. 2210 2054 */ ··· 2641 2375 2642 2376 __u8 data[]; 2643 2377 }; 2378 + 2379 + /** 2380 + * DOC: Engine Discovery uAPI 2381 + * 2382 + * Engine discovery uAPI is a way of enumerating physical engines present in a 2383 + * GPU associated with an open i915 DRM file descriptor. This supersedes the old 2384 + * way of using `DRM_IOCTL_I915_GETPARAM` and engine identifiers like 2385 + * `I915_PARAM_HAS_BLT`. 2386 + * 2387 + * The need for this interface came starting with Icelake and newer GPUs, which 2388 + * started to establish a pattern of having multiple engines of a same class, 2389 + * where not all instances were always completely functionally equivalent. 2390 + * 2391 + * Entry point for this uapi is `DRM_IOCTL_I915_QUERY` with the 2392 + * `DRM_I915_QUERY_ENGINE_INFO` as the queried item id. 2393 + * 2394 + * Example for getting the list of engines: 2395 + * 2396 + * .. code-block:: C 2397 + * 2398 + * struct drm_i915_query_engine_info *info; 2399 + * struct drm_i915_query_item item = { 2400 + * .query_id = DRM_I915_QUERY_ENGINE_INFO; 2401 + * }; 2402 + * struct drm_i915_query query = { 2403 + * .num_items = 1, 2404 + * .items_ptr = (uintptr_t)&item, 2405 + * }; 2406 + * int err, i; 2407 + * 2408 + * // First query the size of the blob we need, this needs to be large 2409 + * // enough to hold our array of engines. The kernel will fill out the 2410 + * // item.length for us, which is the number of bytes we need. 2411 + * // 2412 + * // Alternatively a large buffer can be allocated straight away enabling 2413 + * // querying in one pass, in which case item.length should contain the 2414 + * // length of the provided buffer. 2415 + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); 2416 + * if (err) ... 2417 + * 2418 + * info = calloc(1, item.length); 2419 + * // Now that we allocated the required number of bytes, we call the ioctl 2420 + * // again, this time with the data_ptr pointing to our newly allocated 2421 + * // blob, which the kernel can then populate with info on all engines. 2422 + * item.data_ptr = (uintptr_t)&info, 2423 + * 2424 + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); 2425 + * if (err) ... 2426 + * 2427 + * // We can now access each engine in the array 2428 + * for (i = 0; i < info->num_engines; i++) { 2429 + * struct drm_i915_engine_info einfo = info->engines[i]; 2430 + * u16 class = einfo.engine.class; 2431 + * u16 instance = einfo.engine.instance; 2432 + * .... 2433 + * } 2434 + * 2435 + * free(info); 2436 + * 2437 + * Each of the enumerated engines, apart from being defined by its class and 2438 + * instance (see `struct i915_engine_class_instance`), also can have flags and 2439 + * capabilities defined as documented in i915_drm.h. 2440 + * 2441 + * For instance video engines which support HEVC encoding will have the 2442 + * `I915_VIDEO_CLASS_CAPABILITY_HEVC` capability bit set. 2443 + * 2444 + * Engine discovery only fully comes to its own when combined with the new way 2445 + * of addressing engines when submitting batch buffers using contexts with 2446 + * engine maps configured. 2447 + */ 2644 2448 2645 2449 /** 2646 2450 * struct drm_i915_engine_info
+1
tools/include/uapi/linux/fs.h
··· 184 184 #define BLKSECDISCARD _IO(0x12,125) 185 185 #define BLKROTATIONAL _IO(0x12,126) 186 186 #define BLKZEROOUT _IO(0x12,127) 187 + #define BLKGETDISKSEQ _IOR(0x12,128,__u64) 187 188 /* 188 189 * A jump here: 130-136 are reserved for zoned block devices 189 190 * (see uapi/linux/blkzoned.h)
+32 -10
tools/include/uapi/linux/in.h
··· 188 188 }; 189 189 190 190 struct ip_msfilter { 191 - __be32 imsf_multiaddr; 192 - __be32 imsf_interface; 193 - __u32 imsf_fmode; 194 - __u32 imsf_numsrc; 195 - __be32 imsf_slist[1]; 191 + union { 192 + struct { 193 + __be32 imsf_multiaddr_aux; 194 + __be32 imsf_interface_aux; 195 + __u32 imsf_fmode_aux; 196 + __u32 imsf_numsrc_aux; 197 + __be32 imsf_slist[1]; 198 + }; 199 + struct { 200 + __be32 imsf_multiaddr; 201 + __be32 imsf_interface; 202 + __u32 imsf_fmode; 203 + __u32 imsf_numsrc; 204 + __be32 imsf_slist_flex[]; 205 + }; 206 + }; 196 207 }; 197 208 198 209 #define IP_MSFILTER_SIZE(numsrc) \ ··· 222 211 }; 223 212 224 213 struct group_filter { 225 - __u32 gf_interface; /* interface index */ 226 - struct __kernel_sockaddr_storage gf_group; /* multicast address */ 227 - __u32 gf_fmode; /* filter mode */ 228 - __u32 gf_numsrc; /* number of sources */ 229 - struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ 214 + union { 215 + struct { 216 + __u32 gf_interface_aux; /* interface index */ 217 + struct __kernel_sockaddr_storage gf_group_aux; /* multicast address */ 218 + __u32 gf_fmode_aux; /* filter mode */ 219 + __u32 gf_numsrc_aux; /* number of sources */ 220 + struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ 221 + }; 222 + struct { 223 + __u32 gf_interface; /* interface index */ 224 + struct __kernel_sockaddr_storage gf_group; /* multicast address */ 225 + __u32 gf_fmode; /* filter mode */ 226 + __u32 gf_numsrc; /* number of sources */ 227 + struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */ 228 + }; 229 + }; 230 230 }; 231 231 232 232 #define GROUP_FILTER_SIZE(numsrc) \
+7 -4
tools/include/uapi/linux/kvm.h
··· 1965 1965 #define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT) 1966 1966 #define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT) 1967 1967 #define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT) 1968 - #define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_PEAK 1968 + #define KVM_STATS_TYPE_LINEAR_HIST (0x3 << KVM_STATS_TYPE_SHIFT) 1969 + #define KVM_STATS_TYPE_LOG_HIST (0x4 << KVM_STATS_TYPE_SHIFT) 1970 + #define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_LOG_HIST 1969 1971 1970 1972 #define KVM_STATS_UNIT_SHIFT 4 1971 1973 #define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT) ··· 1990 1988 * @size: The number of data items for this stats. 1991 1989 * Every data item is of type __u64. 1992 1990 * @offset: The offset of the stats to the start of stat structure in 1993 - * struture kvm or kvm_vcpu. 1994 - * @unused: Unused field for future usage. Always 0 for now. 1991 + * structure kvm or kvm_vcpu. 1992 + * @bucket_size: A parameter value used for histogram stats. It is only used 1993 + * for linear histogram stats, specifying the size of the bucket; 1995 1994 * @name: The name string for the stats. Its size is indicated by the 1996 1995 * &kvm_stats_header->name_size. 1997 1996 */ ··· 2001 1998 __s16 exponent; 2002 1999 __u16 size; 2003 2000 __u32 offset; 2004 - __u32 unused; 2001 + __u32 bucket_size; 2005 2002 char name[]; 2006 2003 }; 2007 2004
+2 -1
tools/include/uapi/linux/mount.h
··· 73 73 #define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */ 74 74 #define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */ 75 75 #define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ 76 - #define MOVE_MOUNT__MASK 0x00000077 76 + #define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */ 77 + #define MOVE_MOUNT__MASK 0x00000177 77 78 78 79 /* 79 80 * fsopen() flags.
+7 -5
tools/include/uapi/linux/prctl.h
··· 213 213 /* Speculation control variants */ 214 214 # define PR_SPEC_STORE_BYPASS 0 215 215 # define PR_SPEC_INDIRECT_BRANCH 1 216 + # define PR_SPEC_L1D_FLUSH 2 216 217 /* Return and control values for PR_SET/GET_SPECULATION_CTRL */ 217 218 # define PR_SPEC_NOT_AFFECTED 0 218 219 # define PR_SPEC_PRCTL (1UL << 0) ··· 235 234 #define PR_GET_TAGGED_ADDR_CTRL 56 236 235 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) 237 236 /* MTE tag check fault modes */ 238 - # define PR_MTE_TCF_SHIFT 1 239 - # define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) 240 - # define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) 241 - # define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) 242 - # define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) 237 + # define PR_MTE_TCF_NONE 0 238 + # define PR_MTE_TCF_SYNC (1UL << 1) 239 + # define PR_MTE_TCF_ASYNC (1UL << 2) 240 + # define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC) 243 241 /* MTE tag inclusion mask */ 244 242 # define PR_MTE_TAG_SHIFT 3 245 243 # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) 244 + /* Unused; kept only for source compatibility */ 245 + # define PR_MTE_TCF_SHIFT 1 246 246 247 247 /* Control reclaim behavior when allocating memory */ 248 248 #define PR_SET_IO_FLUSHER 57
+1
tools/include/uapi/sound/asound.h
··· 299 299 #define SNDRV_PCM_INFO_HAS_LINK_ABSOLUTE_ATIME 0x02000000 /* report absolute hardware link audio time, not reset on startup */ 300 300 #define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME 0x04000000 /* report estimated link audio time */ 301 301 #define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000 /* report synchronized audio/system time */ 302 + #define SNDRV_PCM_INFO_EXPLICIT_SYNC 0x10000000 /* needs explicit sync of pointers and data */ 302 303 303 304 #define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */ 304 305 #define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */
+1
tools/perf/.gitignore
··· 39 39 feature/ 40 40 fixdep 41 41 libtraceevent-dynamic-list 42 + Documentation/doc.dep
+27 -24
tools/perf/Makefile.config
··· 827 827 endif 828 828 endif 829 829 830 - ifeq ($(feature-libbfd), 1) 831 - EXTLIBS += -lbfd -lopcodes 832 - else 833 - # we are on a system that requires -liberty and (maybe) -lz 834 - # to link against -lbfd; test each case individually here 835 830 836 - # call all detections now so we get correct 837 - # status in VF output 838 - $(call feature_check,libbfd-liberty) 839 - $(call feature_check,libbfd-liberty-z) 840 - 841 - ifeq ($(feature-libbfd-liberty), 1) 842 - EXTLIBS += -lbfd -lopcodes -liberty 843 - FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl 831 + ifndef NO_LIBBFD 832 + ifeq ($(feature-libbfd), 1) 833 + EXTLIBS += -lbfd -lopcodes 844 834 else 845 - ifeq ($(feature-libbfd-liberty-z), 1) 846 - EXTLIBS += -lbfd -lopcodes -liberty -lz 847 - FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl 848 - endif 849 - endif 850 - $(call feature_check,disassembler-four-args) 851 - endif 835 + # we are on a system that requires -liberty and (maybe) -lz 836 + # to link against -lbfd; test each case individually here 852 837 853 - ifeq ($(feature-libbfd-buildid), 1) 854 - CFLAGS += -DHAVE_LIBBFD_BUILDID_SUPPORT 855 - else 856 - msg := $(warning Old version of libbfd/binutils things like PE executable profiling will not be available); 838 + # call all detections now so we get correct 839 + # status in VF output 840 + $(call feature_check,libbfd-liberty) 841 + $(call feature_check,libbfd-liberty-z) 842 + 843 + ifeq ($(feature-libbfd-liberty), 1) 844 + EXTLIBS += -lbfd -lopcodes -liberty 845 + FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl 846 + else 847 + ifeq ($(feature-libbfd-liberty-z), 1) 848 + EXTLIBS += -lbfd -lopcodes -liberty -lz 849 + FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl 850 + endif 851 + endif 852 + $(call feature_check,disassembler-four-args) 853 + endif 854 + 855 + ifeq ($(feature-libbfd-buildid), 1) 856 + CFLAGS += -DHAVE_LIBBFD_BUILDID_SUPPORT 857 + else 858 + msg := $(warning Old version of libbfd/binutils things like PE executable profiling will not be available); 859 + endif 857 860 endif 858 861 859 862 ifdef NO_DEMANGLE
+2
tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
··· 361 361 444 n64 landlock_create_ruleset sys_landlock_create_ruleset 362 362 445 n64 landlock_add_rule sys_landlock_add_rule 363 363 446 n64 landlock_restrict_self sys_landlock_restrict_self 364 + # 447 reserved for memfd_secret 365 + 448 n64 process_mrelease sys_process_mrelease
+7 -5
tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
··· 330 330 256 64 sys_debug_setcontext sys_ni_syscall 331 331 256 spu sys_debug_setcontext sys_ni_syscall 332 332 # 257 reserved for vserver 333 - 258 nospu migrate_pages sys_migrate_pages compat_sys_migrate_pages 334 - 259 nospu mbind sys_mbind compat_sys_mbind 335 - 260 nospu get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy 336 - 261 nospu set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy 333 + 258 nospu migrate_pages sys_migrate_pages 334 + 259 nospu mbind sys_mbind 335 + 260 nospu get_mempolicy sys_get_mempolicy 336 + 261 nospu set_mempolicy sys_set_mempolicy 337 337 262 nospu mq_open sys_mq_open compat_sys_mq_open 338 338 263 nospu mq_unlink sys_mq_unlink 339 339 264 32 mq_timedsend sys_mq_timedsend_time32 ··· 381 381 298 common faccessat sys_faccessat 382 382 299 common get_robust_list sys_get_robust_list compat_sys_get_robust_list 383 383 300 common set_robust_list sys_set_robust_list compat_sys_set_robust_list 384 - 301 common move_pages sys_move_pages compat_sys_move_pages 384 + 301 common move_pages sys_move_pages 385 385 302 common getcpu sys_getcpu 386 386 303 nospu epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait 387 387 304 32 utimensat sys_utimensat_time32 ··· 526 526 444 common landlock_create_ruleset sys_landlock_create_ruleset 527 527 445 common landlock_add_rule sys_landlock_add_rule 528 528 446 common landlock_restrict_self sys_landlock_restrict_self 529 + # 447 reserved for memfd_secret 530 + 448 common process_mrelease sys_process_mrelease
+8 -6
tools/perf/arch/s390/entry/syscalls/syscall.tbl
··· 122 122 131 common quotactl sys_quotactl sys_quotactl 123 123 132 common getpgid sys_getpgid sys_getpgid 124 124 133 common fchdir sys_fchdir sys_fchdir 125 - 134 common bdflush - - 125 + 134 common bdflush sys_ni_syscall sys_ni_syscall 126 126 135 common sysfs sys_sysfs sys_sysfs 127 127 136 common personality sys_s390_personality sys_s390_personality 128 128 137 common afs_syscall - - ··· 274 274 265 common statfs64 sys_statfs64 compat_sys_statfs64 275 275 266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 276 276 267 common remap_file_pages sys_remap_file_pages sys_remap_file_pages 277 - 268 common mbind sys_mbind compat_sys_mbind 278 - 269 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy 279 - 270 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy 277 + 268 common mbind sys_mbind sys_mbind 278 + 269 common get_mempolicy sys_get_mempolicy sys_get_mempolicy 279 + 270 common set_mempolicy sys_set_mempolicy sys_set_mempolicy 280 280 271 common mq_open sys_mq_open compat_sys_mq_open 281 281 272 common mq_unlink sys_mq_unlink sys_mq_unlink 282 282 273 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 ··· 293 293 284 common inotify_init sys_inotify_init sys_inotify_init 294 294 285 common inotify_add_watch sys_inotify_add_watch sys_inotify_add_watch 295 295 286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch 296 - 287 common migrate_pages sys_migrate_pages compat_sys_migrate_pages 296 + 287 common migrate_pages sys_migrate_pages sys_migrate_pages 297 297 288 common openat sys_openat compat_sys_openat 298 298 289 common mkdirat sys_mkdirat sys_mkdirat 299 299 290 common mknodat sys_mknodat sys_mknodat ··· 317 317 307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range 318 318 308 common tee sys_tee sys_tee 319 319 309 common vmsplice sys_vmsplice sys_vmsplice 320 - 310 common move_pages sys_move_pages compat_sys_move_pages 320 + 310 common move_pages sys_move_pages sys_move_pages 321 321 311 common getcpu sys_getcpu sys_getcpu 322 322 312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait 323 323 313 common utimes sys_utimes sys_utimes_time32 ··· 449 449 444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset 450 450 445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule 451 451 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self 452 + # 447 reserved for memfd_secret 453 + 448 common process_mrelease sys_process_mrelease sys_process_mrelease
+2 -1
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
··· 369 369 445 common landlock_add_rule sys_landlock_add_rule 370 370 446 common landlock_restrict_self sys_landlock_restrict_self 371 371 447 common memfd_secret sys_memfd_secret 372 + 448 common process_mrelease sys_process_mrelease 372 373 373 374 # 374 375 # Due to a historical design error, certain syscalls are numbered differently ··· 398 397 530 x32 set_robust_list compat_sys_set_robust_list 399 398 531 x32 get_robust_list compat_sys_get_robust_list 400 399 532 x32 vmsplice sys_vmsplice 401 - 533 x32 move_pages compat_sys_move_pages 400 + 533 x32 move_pages sys_move_pages 402 401 534 x32 preadv compat_sys_preadv64 403 402 535 x32 pwritev compat_sys_pwritev64 404 403 536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
+1
tools/perf/check-headers.sh
··· 144 144 # diff with extra ignore lines 145 145 check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))"' 146 146 check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' 147 + check arch/x86/include/asm/amd-ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"' 147 148 check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' 148 149 check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' 149 150 check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"'
+1 -1
tools/perf/scripts/python/bin/stackcollapse-report
··· 1 1 #!/bin/sh 2 2 # description: produce callgraphs in short form for scripting use 3 - perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py -- "$@" 3 + perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py "$@"
+1 -1
tools/perf/tests/bpf.c
··· 192 192 } 193 193 194 194 if (count != expect * evlist->core.nr_entries) { 195 - pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count); 195 + pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect * evlist->core.nr_entries, count); 196 196 goto out_delete_evlist; 197 197 } 198 198
+8 -1
tools/perf/trace/beauty/include/linux/socket.h
··· 223 223 * reuses AF_INET address family 224 224 */ 225 225 #define AF_XDP 44 /* XDP sockets */ 226 + #define AF_MCTP 45 /* Management component 227 + * transport protocol 228 + */ 226 229 227 - #define AF_MAX 45 /* For now.. */ 230 + #define AF_MAX 46 /* For now.. */ 228 231 229 232 /* Protocol families, same as address families. */ 230 233 #define PF_UNSPEC AF_UNSPEC ··· 277 274 #define PF_QIPCRTR AF_QIPCRTR 278 275 #define PF_SMC AF_SMC 279 276 #define PF_XDP AF_XDP 277 + #define PF_MCTP AF_MCTP 280 278 #define PF_MAX AF_MAX 281 279 282 280 /* Maximum queue length specifiable by listen. */ ··· 425 421 struct sockaddr __user *upeer_sockaddr, 426 422 int __user *upeer_addrlen, int flags, 427 423 unsigned long nofile); 424 + extern struct file *do_accept(struct file *file, unsigned file_flags, 425 + struct sockaddr __user *upeer_sockaddr, 426 + int __user *upeer_addrlen, int flags); 428 427 extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, 429 428 int __user *upeer_addrlen, int flags); 430 429 extern int __sys_socket(int family, int type, int protocol);
+1 -1
tools/perf/trace/beauty/move_mount_flags.sh
··· 10 10 linux_mount=${linux_header_dir}/mount.h 11 11 12 12 printf "static const char *move_mount_flags[] = {\n" 13 - regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([FT]_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' 13 + regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([^_]+_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' 14 14 egrep $regex ${linux_mount} | \ 15 15 sed -r "s/$regex/\2 \1/g" | \ 16 16 xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
+1
tools/perf/util/Build
··· 59 59 perf-y += session.o 60 60 perf-y += sample-raw.o 61 61 perf-y += s390-sample-raw.o 62 + perf-y += amd-sample-raw.o 62 63 perf-$(CONFIG_TRACE) += syscalltbl.o 63 64 perf-y += ordered-events.o 64 65 perf-y += namespaces.o
+289
tools/perf/util/amd-sample-raw.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * AMD specific. Provide textual annotation for IBS raw sample data. 4 + */ 5 + 6 + #include <unistd.h> 7 + #include <stdio.h> 8 + #include <string.h> 9 + #include <inttypes.h> 10 + 11 + #include <linux/string.h> 12 + #include "../../arch/x86/include/asm/amd-ibs.h" 13 + 14 + #include "debug.h" 15 + #include "session.h" 16 + #include "evlist.h" 17 + #include "sample-raw.h" 18 + #include "pmu-events/pmu-events.h" 19 + 20 + static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type; 21 + 22 + static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg) 23 + { 24 + const char * const ic_miss_strs[] = { 25 + " IcMiss 0", 26 + " IcMiss 1", 27 + }; 28 + const char * const l1tlb_pgsz_strs[] = { 29 + " L1TlbPgSz 4KB", 30 + " L1TlbPgSz 2MB", 31 + " L1TlbPgSz 1GB", 32 + " L1TlbPgSz RESERVED" 33 + }; 34 + const char * const l1tlb_pgsz_strs_erratum1347[] = { 35 + " L1TlbPgSz 4KB", 36 + " L1TlbPgSz 16KB", 37 + " L1TlbPgSz 2MB", 38 + " L1TlbPgSz 1GB" 39 + }; 40 + const char *ic_miss_str = NULL; 41 + const char *l1tlb_pgsz_str = NULL; 42 + 43 + if (cpu_family == 0x19 && cpu_model < 0x10) { 44 + /* 45 + * Erratum #1238 workaround is to ignore MSRC001_1030[IbsIcMiss] 46 + * Erratum #1347 workaround is to use table provided in erratum 47 + */ 48 + if (reg.phy_addr_valid) 49 + l1tlb_pgsz_str = l1tlb_pgsz_strs_erratum1347[reg.l1tlb_pgsz]; 50 + } else { 51 + if (reg.phy_addr_valid) 52 + l1tlb_pgsz_str = l1tlb_pgsz_strs[reg.l1tlb_pgsz]; 53 + ic_miss_str = ic_miss_strs[reg.ic_miss]; 54 + } 55 + 56 + printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s " 57 + "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s\n", 58 + reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat, 59 + reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "", 60 + reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss, 61 + reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : ""); 62 + } 63 + 64 + static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg) 65 + { 66 + printf("ic_ibs_ext_ctl:\t%016llx IbsItlbRefillLat %3d\n", reg.val, reg.itlb_refill_lat); 67 + } 68 + 69 + static void pr_ibs_op_ctl(union ibs_op_ctl reg) 70 + { 71 + printf("ibs_op_ctl:\t%016llx MaxCnt %9d En %d Val %d CntCtl %d=%s CurCnt %9d\n", 72 + reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, reg.op_en, reg.op_val, 73 + reg.cnt_ctl, reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt); 74 + } 75 + 76 + static void pr_ibs_op_data(union ibs_op_data reg) 77 + { 78 + printf("ibs_op_data:\t%016llx CompToRetCtr %5d TagToRetCtr %5d%s%s%s BrnRet %d " 79 + " RipInvalid %d BrnFuse %d Microcode %d\n", 80 + reg.val, reg.comp_to_ret_ctr, reg.tag_to_ret_ctr, 81 + reg.op_brn_ret ? (reg.op_return ? " OpReturn 1" : " OpReturn 0") : "", 82 + reg.op_brn_ret ? (reg.op_brn_taken ? " OpBrnTaken 1" : " OpBrnTaken 0") : "", 83 + reg.op_brn_ret ? (reg.op_brn_misp ? " OpBrnMisp 1" : " OpBrnMisp 0") : "", 84 + reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode); 85 + } 86 + 87 + static void pr_ibs_op_data2(union ibs_op_data2 reg) 88 + { 89 + static const char * const data_src_str[] = { 90 + "", 91 + " DataSrc 1=(reserved)", 92 + " DataSrc 2=Local node cache", 93 + " DataSrc 3=DRAM", 94 + " DataSrc 4=Remote node cache", 95 + " DataSrc 5=(reserved)", 96 + " DataSrc 6=(reserved)", 97 + " DataSrc 7=Other" 98 + }; 99 + 100 + printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val, 101 + reg.data_src == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State " 102 + : "CacheHitSt 0=M-state ") : "", 103 + reg.rmt_node, data_src_str[reg.data_src]); 104 + } 105 + 106 + static void pr_ibs_op_data3(union ibs_op_data3 reg) 107 + { 108 + char l2_miss_str[sizeof(" L2Miss _")] = ""; 109 + char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = ""; 110 + char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = ""; 111 + 112 + /* 113 + * Erratum #1293 114 + * Ignore L2Miss and OpDcMissOpenMemReqs (and opdata2) if DcMissNoMabAlloc or SwPf set 115 + */ 116 + if (!(cpu_family == 0x19 && cpu_model < 0x10 && (reg.dc_miss_no_mab_alloc || reg.sw_pf))) { 117 + snprintf(l2_miss_str, sizeof(l2_miss_str), " L2Miss %d", reg.l2_miss); 118 + snprintf(op_dc_miss_open_mem_reqs_str, sizeof(op_dc_miss_open_mem_reqs_str), 119 + " OpDcMissOpenMemReqs %2d", reg.op_dc_miss_open_mem_reqs); 120 + } 121 + 122 + if (reg.op_mem_width) 123 + snprintf(op_mem_width_str, sizeof(op_mem_width_str), 124 + " OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1)); 125 + 126 + printf("ibs_op_data3:\t%016llx LdOp %d StOp %d DcL1TlbMiss %d DcL2TlbMiss %d " 127 + "DcL1TlbHit2M %d DcL1TlbHit1G %d DcL2TlbHit2M %d DcMiss %d DcMisAcc %d " 128 + "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d DcLinAddrValid %d " 129 + "DcPhyAddrValid %d DcL2TlbHit1G %d%s SwPf %d%s%s DcMissLat %5d TlbRefillLat %5d\n", 130 + reg.val, reg.ld_op, reg.st_op, reg.dc_l1tlb_miss, reg.dc_l2tlb_miss, 131 + reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g, reg.dc_l2tlb_hit_2m, reg.dc_miss, 132 + reg.dc_mis_acc, reg.dc_wc_mem_acc, reg.dc_uc_mem_acc, reg.dc_locked_op, 133 + reg.dc_miss_no_mab_alloc, reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, 134 + reg.dc_l2_tlb_hit_1g, l2_miss_str, reg.sw_pf, op_mem_width_str, 135 + op_dc_miss_open_mem_reqs_str, reg.dc_miss_lat, reg.tlb_refill_lat); 136 + } 137 + 138 + /* 139 + * IBS Op/Execution MSRs always saved, in order, are: 140 + * IBS_OP_CTL, IBS_OP_RIP, IBS_OP_DATA, IBS_OP_DATA2, 141 + * IBS_OP_DATA3, IBS_DC_LINADDR, IBS_DC_PHYSADDR, BP_IBSTGT_RIP 142 + */ 143 + static void amd_dump_ibs_op(struct perf_sample *sample) 144 + { 145 + struct perf_ibs_data *data = sample->raw_data; 146 + union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data; 147 + __u64 *rip = (__u64 *)op_ctl + 1; 148 + union ibs_op_data *op_data = (union ibs_op_data *)(rip + 1); 149 + union ibs_op_data3 *op_data3 = (union ibs_op_data3 *)(rip + 3); 150 + 151 + pr_ibs_op_ctl(*op_ctl); 152 + if (!op_data->op_rip_invalid) 153 + printf("IbsOpRip:\t%016llx\n", *rip); 154 + pr_ibs_op_data(*op_data); 155 + /* 156 + * Erratum #1293: ignore op_data2 if DcMissNoMabAlloc or SwPf are set 157 + */ 158 + if (!(cpu_family == 0x19 && cpu_model < 0x10 && 159 + (op_data3->dc_miss_no_mab_alloc || op_data3->sw_pf))) 160 + pr_ibs_op_data2(*(union ibs_op_data2 *)(rip + 2)); 161 + pr_ibs_op_data3(*op_data3); 162 + if (op_data3->dc_lin_addr_valid) 163 + printf("IbsDCLinAd:\t%016llx\n", *(rip + 4)); 164 + if (op_data3->dc_phy_addr_valid) 165 + printf("IbsDCPhysAd:\t%016llx\n", *(rip + 5)); 166 + if (op_data->op_brn_ret && *(rip + 6)) 167 + printf("IbsBrTarget:\t%016llx\n", *(rip + 6)); 168 + } 169 + 170 + /* 171 + * IBS Fetch MSRs always saved, in order, are: 172 + * IBS_FETCH_CTL, IBS_FETCH_LINADDR, IBS_FETCH_PHYSADDR, IC_IBS_EXTD_CTL 173 + */ 174 + static void amd_dump_ibs_fetch(struct perf_sample *sample) 175 + { 176 + struct perf_ibs_data *data = sample->raw_data; 177 + union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data; 178 + __u64 *addr = (__u64 *)fetch_ctl + 1; 179 + union ic_ibs_extd_ctl *extd_ctl = (union ic_ibs_extd_ctl *)addr + 2; 180 + 181 + pr_ibs_fetch_ctl(*fetch_ctl); 182 + printf("IbsFetchLinAd:\t%016llx\n", *addr++); 183 + if (fetch_ctl->phy_addr_valid) 184 + printf("IbsFetchPhysAd:\t%016llx\n", *addr); 185 + pr_ic_ibs_extd_ctl(*extd_ctl); 186 + } 187 + 188 + /* 189 + * Test for enable and valid bits in captured control MSRs. 190 + */ 191 + static bool is_valid_ibs_fetch_sample(struct perf_sample *sample) 192 + { 193 + struct perf_ibs_data *data = sample->raw_data; 194 + union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data; 195 + 196 + if (fetch_ctl->fetch_en && fetch_ctl->fetch_val) 197 + return true; 198 + 199 + return false; 200 + } 201 + 202 + static bool is_valid_ibs_op_sample(struct perf_sample *sample) 203 + { 204 + struct perf_ibs_data *data = sample->raw_data; 205 + union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data; 206 + 207 + if (op_ctl->op_en && op_ctl->op_val) 208 + return true; 209 + 210 + return false; 211 + } 212 + 213 + /* AMD vendor specific raw sample function. Check for PERF_RECORD_SAMPLE events 214 + * and if the event was triggered by IBS, display its raw data with decoded text. 215 + * The function is only invoked when the dump flag -D is set. 216 + */ 217 + void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event, 218 + struct perf_sample *sample) 219 + { 220 + struct evsel *evsel; 221 + 222 + if (event->header.type != PERF_RECORD_SAMPLE || !sample->raw_size) 223 + return; 224 + 225 + evsel = evlist__event2evsel(evlist, event); 226 + if (!evsel) 227 + return; 228 + 229 + if (evsel->core.attr.type == ibs_fetch_type) { 230 + if (!is_valid_ibs_fetch_sample(sample)) { 231 + pr_debug("Invalid raw IBS Fetch MSR data encountered\n"); 232 + return; 233 + } 234 + amd_dump_ibs_fetch(sample); 235 + } else if (evsel->core.attr.type == ibs_op_type) { 236 + if (!is_valid_ibs_op_sample(sample)) { 237 + pr_debug("Invalid raw IBS Op MSR data encountered\n"); 238 + return; 239 + } 240 + amd_dump_ibs_op(sample); 241 + } 242 + } 243 + 244 + static void parse_cpuid(struct perf_env *env) 245 + { 246 + const char *cpuid; 247 + int ret; 248 + 249 + cpuid = perf_env__cpuid(env); 250 + /* 251 + * cpuid = "AuthenticAMD,family,model,stepping" 252 + */ 253 + ret = sscanf(cpuid, "%*[^,],%u,%u", &cpu_family, &cpu_model); 254 + if (ret != 2) 255 + pr_debug("problem parsing cpuid\n"); 256 + } 257 + 258 + /* 259 + * Find and assign the type number used for ibs_op or ibs_fetch samples. 260 + * Device names can be large - we are only interested in the first 9 characters, 261 + * to match "ibs_fetch". 262 + */ 263 + bool evlist__has_amd_ibs(struct evlist *evlist) 264 + { 265 + struct perf_env *env = evlist->env; 266 + int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env); 267 + const char *pmu_mapping = perf_env__pmu_mappings(env); 268 + char name[sizeof("ibs_fetch")]; 269 + u32 type; 270 + 271 + while (nr_pmu_mappings--) { 272 + ret = sscanf(pmu_mapping, "%u:%9s", &type, name); 273 + if (ret == 2) { 274 + if (strstarts(name, "ibs_op")) 275 + ibs_op_type = type; 276 + else if (strstarts(name, "ibs_fetch")) 277 + ibs_fetch_type = type; 278 + } 279 + pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */; 280 + } 281 + 282 + if (ibs_fetch_type || ibs_op_type) { 283 + if (!cpu_family) 284 + parse_cpuid(env); 285 + return true; 286 + } 287 + 288 + return false; 289 + }
+8
tools/perf/util/bpf-event.c
··· 21 21 #include "record.h" 22 22 #include "util/synthetic-events.h" 23 23 24 + struct btf * __weak btf__load_from_kernel_by_id(__u32 id) 25 + { 26 + struct btf *btf; 27 + int err = btf__get_from_id(id, &btf); 28 + 29 + return err ? ERR_PTR(err) : btf; 30 + } 31 + 24 32 #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) 25 33 26 34 static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len)
+10
tools/perf/util/dso.c
··· 1349 1349 1350 1350 bool dso__build_id_equal(const struct dso *dso, struct build_id *bid) 1351 1351 { 1352 + if (dso->bid.size > bid->size && dso->bid.size == BUILD_ID_SIZE) { 1353 + /* 1354 + * For the backward compatibility, it allows a build-id has 1355 + * trailing zeros. 1356 + */ 1357 + return !memcmp(dso->bid.data, bid->data, bid->size) && 1358 + !memchr_inv(&dso->bid.data[bid->size], 0, 1359 + dso->bid.size - bid->size); 1360 + } 1361 + 1352 1362 return dso->bid.size == bid->size && 1353 1363 memcmp(dso->bid.data, bid->data, dso->bid.size) == 0; 1354 1364 }
+78
tools/perf/util/env.c
··· 10 10 #include <sys/utsname.h> 11 11 #include <stdlib.h> 12 12 #include <string.h> 13 + #include "strbuf.h" 13 14 14 15 struct perf_env perf_env; 15 16 ··· 307 306 return 0; 308 307 } 309 308 309 + int perf_env__read_pmu_mappings(struct perf_env *env) 310 + { 311 + struct perf_pmu *pmu = NULL; 312 + u32 pmu_num = 0; 313 + struct strbuf sb; 314 + 315 + while ((pmu = perf_pmu__scan(pmu))) { 316 + if (!pmu->name) 317 + continue; 318 + pmu_num++; 319 + } 320 + if (!pmu_num) { 321 + pr_debug("pmu mappings not available\n"); 322 + return -ENOENT; 323 + } 324 + env->nr_pmu_mappings = pmu_num; 325 + 326 + if (strbuf_init(&sb, 128 * pmu_num) < 0) 327 + return -ENOMEM; 328 + 329 + while ((pmu = perf_pmu__scan(pmu))) { 330 + if (!pmu->name) 331 + continue; 332 + if (strbuf_addf(&sb, "%u:%s", pmu->type, pmu->name) < 0) 333 + goto error; 334 + /* include a NULL character at the end */ 335 + if (strbuf_add(&sb, "", 1) < 0) 336 + goto error; 337 + } 338 + 339 + env->pmu_mappings = strbuf_detach(&sb, NULL); 340 + 341 + return 0; 342 + 343 + error: 344 + strbuf_release(&sb); 345 + return -1; 346 + } 347 + 310 348 int perf_env__read_cpuid(struct perf_env *env) 311 349 { 312 350 char cpuid[128]; ··· 444 404 return normalize_arch(arch_name); 445 405 } 446 406 407 + const char *perf_env__cpuid(struct perf_env *env) 408 + { 409 + int status; 410 + 411 + if (!env || !env->cpuid) { /* Assume local operation */ 412 + status = perf_env__read_cpuid(env); 413 + if (status) 414 + return NULL; 415 + } 416 + 417 + return env->cpuid; 418 + } 419 + 420 + int perf_env__nr_pmu_mappings(struct perf_env *env) 421 + { 422 + int status; 423 + 424 + if (!env || !env->nr_pmu_mappings) { /* Assume local operation */ 425 + status = perf_env__read_pmu_mappings(env); 426 + if (status) 427 + return 0; 428 + } 429 + 430 + return env->nr_pmu_mappings; 431 + } 432 + 433 + const char *perf_env__pmu_mappings(struct perf_env *env) 434 + { 435 + int status; 436 + 437 + if (!env || !env->pmu_mappings) { /* Assume local operation */ 438 + status = perf_env__read_pmu_mappings(env); 439 + if (status) 440 + return NULL; 441 + } 442 + 443 + return env->pmu_mappings; 444 + } 447 445 448 446 int perf_env__numa_node(struct perf_env *env, int cpu) 449 447 {
+5
tools/perf/util/env.h
··· 149 149 int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); 150 150 151 151 int perf_env__read_cpuid(struct perf_env *env); 152 + int perf_env__read_pmu_mappings(struct perf_env *env); 153 + int perf_env__nr_pmu_mappings(struct perf_env *env); 154 + const char *perf_env__pmu_mappings(struct perf_env *env); 155 + 152 156 int perf_env__read_cpu_topology_map(struct perf_env *env); 153 157 154 158 void cpu_cache_level__free(struct cpu_cache_level *cache); 155 159 156 160 const char *perf_env__arch(struct perf_env *env); 161 + const char *perf_env__cpuid(struct perf_env *env); 157 162 const char *perf_env__raw_arch(struct perf_env *env); 158 163 int perf_env__nr_cpus_avail(struct perf_env *env); 159 164
+15 -5
tools/perf/util/evsel.c
··· 333 333 goto out; 334 334 } 335 335 336 - static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src) 336 + int copy_config_terms(struct list_head *dst, struct list_head *src) 337 337 { 338 338 struct evsel_config_term *pos, *tmp; 339 339 340 - list_for_each_entry(pos, &src->config_terms, list) { 340 + list_for_each_entry(pos, src, list) { 341 341 tmp = malloc(sizeof(*tmp)); 342 342 if (tmp == NULL) 343 343 return -ENOMEM; ··· 350 350 return -ENOMEM; 351 351 } 352 352 } 353 - list_add_tail(&tmp->list, &dst->config_terms); 353 + list_add_tail(&tmp->list, dst); 354 354 } 355 355 return 0; 356 + } 357 + 358 + static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src) 359 + { 360 + return copy_config_terms(&dst->config_terms, &src->config_terms); 356 361 } 357 362 358 363 /** ··· 1390 1385 return err; 1391 1386 } 1392 1387 1393 - static void evsel__free_config_terms(struct evsel *evsel) 1388 + void free_config_terms(struct list_head *config_terms) 1394 1389 { 1395 1390 struct evsel_config_term *term, *h; 1396 1391 1397 - list_for_each_entry_safe(term, h, &evsel->config_terms, list) { 1392 + list_for_each_entry_safe(term, h, config_terms, list) { 1398 1393 list_del_init(&term->list); 1399 1394 if (term->free_str) 1400 1395 zfree(&term->val.str); 1401 1396 free(term); 1402 1397 } 1398 + } 1399 + 1400 + static void evsel__free_config_terms(struct evsel *evsel) 1401 + { 1402 + free_config_terms(&evsel->config_terms); 1403 1403 } 1404 1404 1405 1405 void evsel__exit(struct evsel *evsel)
+3
tools/perf/util/evsel.h
··· 213 213 struct evsel *evsel__clone(struct evsel *orig); 214 214 struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx); 215 215 216 + int copy_config_terms(struct list_head *dst, struct list_head *src); 217 + void free_config_terms(struct list_head *config_terms); 218 + 216 219 /* 217 220 * Returns pointer with encoded error via <linux/err.h> interface. 218 221 */
+15 -3
tools/perf/util/parse-events-hybrid.c
··· 76 76 int ret; 77 77 78 78 perf_pmu__for_each_hybrid_pmu(pmu) { 79 + LIST_HEAD(terms); 80 + 79 81 if (pmu_cmp(parse_state, pmu)) 80 82 continue; 81 83 84 + copy_config_terms(&terms, config_terms); 82 85 ret = create_event_hybrid(PERF_TYPE_HARDWARE, 83 86 &parse_state->idx, list, attr, name, 84 - config_terms, pmu); 87 + &terms, pmu); 88 + free_config_terms(&terms); 85 89 if (ret) 86 90 return ret; 87 91 } ··· 119 115 int ret; 120 116 121 117 perf_pmu__for_each_hybrid_pmu(pmu) { 118 + LIST_HEAD(terms); 119 + 122 120 if (pmu_cmp(parse_state, pmu)) 123 121 continue; 124 122 123 + copy_config_terms(&terms, config_terms); 125 124 ret = create_raw_event_hybrid(&parse_state->idx, list, attr, 126 - name, config_terms, pmu); 125 + name, &terms, pmu); 126 + free_config_terms(&terms); 127 127 if (ret) 128 128 return ret; 129 129 } ··· 173 165 174 166 *hybrid = true; 175 167 perf_pmu__for_each_hybrid_pmu(pmu) { 168 + LIST_HEAD(terms); 169 + 176 170 if (pmu_cmp(parse_state, pmu)) 177 171 continue; 178 172 173 + copy_config_terms(&terms, config_terms); 179 174 ret = create_event_hybrid(PERF_TYPE_HW_CACHE, idx, list, 180 - attr, name, config_terms, pmu); 175 + attr, name, &terms, pmu); 176 + free_config_terms(&terms); 181 177 if (ret) 182 178 return ret; 183 179 }
+13 -14
tools/perf/util/parse-events.c
··· 387 387 evsel->name = strdup(name); 388 388 389 389 if (config_terms) 390 - list_splice(config_terms, &evsel->config_terms); 390 + list_splice_init(config_terms, &evsel->config_terms); 391 391 392 392 if (list) 393 393 list_add_tail(&evsel->core.node, list); ··· 535 535 config_name ? : name, &config_terms, 536 536 &hybrid, parse_state); 537 537 if (hybrid) 538 - return ret; 538 + goto out_free_terms; 539 539 540 - return add_event(list, idx, &attr, config_name ? : name, &config_terms); 540 + ret = add_event(list, idx, &attr, config_name ? : name, &config_terms); 541 + out_free_terms: 542 + free_config_terms(&config_terms); 543 + return ret; 541 544 } 542 545 543 546 static void tracepoint_error(struct parse_events_error *e, int err, ··· 1460 1457 get_config_name(head_config), 1461 1458 &config_terms, &hybrid); 1462 1459 if (hybrid) 1463 - return ret; 1460 + goto out_free_terms; 1464 1461 1465 - return add_event(list, &parse_state->idx, &attr, 1466 - get_config_name(head_config), &config_terms); 1462 + ret = add_event(list, &parse_state->idx, &attr, 1463 + get_config_name(head_config), &config_terms); 1464 + out_free_terms: 1465 + free_config_terms(&config_terms); 1466 + return ret; 1467 1467 } 1468 1468 1469 1469 int parse_events_add_tool(struct parse_events_state *parse_state, ··· 1614 1608 } 1615 1609 1616 1610 if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { 1617 - struct evsel_config_term *pos, *tmp; 1618 - 1619 - list_for_each_entry_safe(pos, tmp, &config_terms, list) { 1620 - list_del_init(&pos->list); 1621 - if (pos->free_str) 1622 - zfree(&pos->val.str); 1623 - free(pos); 1624 - } 1611 + free_config_terms(&config_terms); 1625 1612 return -EINVAL; 1626 1613 } 1627 1614
+4 -1
tools/perf/util/perf_event_attr_fprintf.c
··· 137 137 PRINT_ATTRf(cgroup, p_unsigned); 138 138 PRINT_ATTRf(text_poke, p_unsigned); 139 139 PRINT_ATTRf(build_id, p_unsigned); 140 + PRINT_ATTRf(inherit_thread, p_unsigned); 141 + PRINT_ATTRf(remove_on_exec, p_unsigned); 142 + PRINT_ATTRf(sigtrap, p_unsigned); 140 143 141 144 PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); 142 145 PRINT_ATTRf(bp_type, p_unsigned); ··· 153 150 PRINT_ATTRf(aux_watermark, p_unsigned); 154 151 PRINT_ATTRf(sample_max_stack, p_unsigned); 155 152 PRINT_ATTRf(aux_sample_size, p_unsigned); 156 - PRINT_ATTRf(text_poke, p_unsigned); 153 + PRINT_ATTRf(sig_data, p_unsigned); 157 154 158 155 return ret; 159 156 }
+8
tools/perf/util/sample-raw.c
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 2 3 3 #include <string.h> 4 + #include <linux/string.h> 4 5 #include "evlist.h" 5 6 #include "env.h" 7 + #include "header.h" 6 8 #include "sample-raw.h" 7 9 8 10 /* ··· 14 12 void evlist__init_trace_event_sample_raw(struct evlist *evlist) 15 13 { 16 14 const char *arch_pf = perf_env__arch(evlist->env); 15 + const char *cpuid = perf_env__cpuid(evlist->env); 17 16 18 17 if (arch_pf && !strcmp("s390", arch_pf)) 19 18 evlist->trace_event_sample_raw = evlist__s390_sample_raw; 19 + else if (arch_pf && !strcmp("x86", arch_pf) && 20 + cpuid && strstarts(cpuid, "AuthenticAMD") && 21 + evlist__has_amd_ibs(evlist)) { 22 + evlist->trace_event_sample_raw = evlist__amd_sample_raw; 23 + } 20 24 }
+5 -1
tools/perf/util/sample-raw.h
··· 6 6 union perf_event; 7 7 struct perf_sample; 8 8 9 - void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); 9 + void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, 10 + struct perf_sample *sample); 11 + bool evlist__has_amd_ibs(struct evlist *evlist); 12 + void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event, 13 + struct perf_sample *sample); 10 14 void evlist__init_trace_event_sample_raw(struct evlist *evlist); 11 15 #endif /* __PERF_EVLIST_H */
+16 -4
tools/perf/util/symbol.c
··· 1581 1581 if (bfd_get_flavour(abfd) == bfd_target_elf_flavour) 1582 1582 goto out_close; 1583 1583 1584 - section = bfd_get_section_by_name(abfd, ".text"); 1585 - if (section) 1586 - dso->text_offset = section->vma - section->filepos; 1587 - 1588 1584 symbols_size = bfd_get_symtab_upper_bound(abfd); 1589 1585 if (symbols_size == 0) { 1590 1586 bfd_close(abfd); ··· 1597 1601 symbols_count = bfd_canonicalize_symtab(abfd, symbols); 1598 1602 if (symbols_count < 0) 1599 1603 goto out_free; 1604 + 1605 + section = bfd_get_section_by_name(abfd, ".text"); 1606 + if (section) { 1607 + for (i = 0; i < symbols_count; ++i) { 1608 + if (!strcmp(bfd_asymbol_name(symbols[i]), "__ImageBase") || 1609 + !strcmp(bfd_asymbol_name(symbols[i]), "__image_base__")) 1610 + break; 1611 + } 1612 + if (i < symbols_count) { 1613 + /* PE symbols can only have 4 bytes, so use .text high bits */ 1614 + dso->text_offset = section->vma - (u32)section->vma; 1615 + dso->text_offset += (u32)bfd_asymbol_value(symbols[i]); 1616 + } else { 1617 + dso->text_offset = section->vma - section->filepos; 1618 + } 1619 + } 1600 1620 1601 1621 qsort(symbols, symbols_count, sizeof(asymbol *), bfd_symbols__cmpvalue); 1602 1622