Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

+1

arch/powerpc/perf/core-book3s.c

··· 518 518 } 519 519 } 520 520 cpuhw->bhrb_stack.nr = u_index; 521 + cpuhw->bhrb_stack.hw_idx = -1ULL; 521 522 return; 522 523 } 523 524

-1

arch/x86/Kconfig

··· 1875 1875 1876 1876 config X86_UMIP 1877 1877 def_bool y 1878 - depends on CPU_SUP_INTEL || CPU_SUP_AMD 1879 1878 prompt "User Mode Instruction Prevention" if EXPERT 1880 1879 ---help--- 1881 1880 User Mode Instruction Prevention (UMIP) is a security feature in

+1 -1

arch/x86/crypto/aesni-intel_glue.c

··· 1064 1064 static struct simd_aead_alg *aesni_simd_aeads[ARRAY_SIZE(aesni_aeads)]; 1065 1065 1066 1066 static const struct x86_cpu_id aesni_cpu_id[] = { 1067 - X86_FEATURE_MATCH(X86_FEATURE_AES), 1067 + X86_MATCH_FEATURE(X86_FEATURE_AES, NULL), 1068 1068 {} 1069 1069 }; 1070 1070 MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);

+1 -1

arch/x86/crypto/crc32-pclmul_glue.c

··· 170 170 }; 171 171 172 172 static const struct x86_cpu_id crc32pclmul_cpu_id[] = { 173 - X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), 173 + X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), 174 174 {} 175 175 }; 176 176 MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id);

+1 -1

arch/x86/crypto/crc32c-intel_glue.c

··· 221 221 }; 222 222 223 223 static const struct x86_cpu_id crc32c_cpu_id[] = { 224 - X86_FEATURE_MATCH(X86_FEATURE_XMM4_2), 224 + X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, NULL), 225 225 {} 226 226 }; 227 227 MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);

+1 -1

arch/x86/crypto/crct10dif-pclmul_glue.c

··· 114 114 }; 115 115 116 116 static const struct x86_cpu_id crct10dif_cpu_id[] = { 117 - X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), 117 + X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), 118 118 {} 119 119 }; 120 120 MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id);

+1 -1

arch/x86/crypto/ghash-clmulni-intel_glue.c

··· 313 313 }; 314 314 315 315 static const struct x86_cpu_id pcmul_cpu_id[] = { 316 - X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), /* Pickle-Mickle-Duck */ 316 + X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), /* Pickle-Mickle-Duck */ 317 317 {} 318 318 }; 319 319 MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);

+1 -1

arch/x86/events/amd/power.c

··· 259 259 } 260 260 261 261 static const struct x86_cpu_id cpu_match[] = { 262 - { .vendor = X86_VENDOR_AMD, .family = 0x15 }, 262 + X86_MATCH_VENDOR_FAM(AMD, 0x15, NULL), 263 263 {}, 264 264 }; 265 265

+31 -13

arch/x86/events/amd/uncore.c

··· 180 180 hwc->idx = -1; 181 181 } 182 182 183 + /* 184 + * Convert logical CPU number to L3 PMC Config ThreadMask format 185 + */ 186 + static u64 l3_thread_slice_mask(int cpu) 187 + { 188 + u64 thread_mask, core = topology_core_id(cpu); 189 + unsigned int shift, thread = 0; 190 + 191 + if (topology_smt_supported() && !topology_is_primary_thread(cpu)) 192 + thread = 1; 193 + 194 + if (boot_cpu_data.x86 <= 0x18) { 195 + shift = AMD64_L3_THREAD_SHIFT + 2 * (core % 4) + thread; 196 + thread_mask = BIT_ULL(shift); 197 + 198 + return AMD64_L3_SLICE_MASK | thread_mask; 199 + } 200 + 201 + core = (core << AMD64_L3_COREID_SHIFT) & AMD64_L3_COREID_MASK; 202 + shift = AMD64_L3_THREAD_SHIFT + thread; 203 + thread_mask = BIT_ULL(shift); 204 + 205 + return AMD64_L3_EN_ALL_SLICES | core | thread_mask; 206 + } 207 + 183 208 static int amd_uncore_event_init(struct perf_event *event) 184 209 { 185 210 struct amd_uncore *uncore; ··· 228 203 return -EINVAL; 229 204 230 205 /* 231 - * SliceMask and ThreadMask need to be set for certain L3 events in 232 - * Family 17h. For other events, the two fields do not affect the count. 206 + * SliceMask and ThreadMask need to be set for certain L3 events. 207 + * For other events, the two fields do not affect the count. 233 208 */ 234 - if (l3_mask && is_llc_event(event)) { 235 - int thread = 2 * (cpu_data(event->cpu).cpu_core_id % 4); 236 - 237 - if (smp_num_siblings > 1) 238 - thread += cpu_data(event->cpu).apicid & 1; 239 - 240 - hwc->config |= (1ULL << (AMD64_L3_THREAD_SHIFT + thread) & 241 - AMD64_L3_THREAD_MASK) | AMD64_L3_SLICE_MASK; 242 - } 209 + if (l3_mask && is_llc_event(event)) 210 + hwc->config |= l3_thread_slice_mask(event->cpu); 243 211 244 212 uncore = event_to_amd_uncore(event); 245 213 if (!uncore) ··· 538 520 if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) 539 521 return -ENODEV; 540 522 541 - if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) { 523 + if (boot_cpu_data.x86 >= 0x17) { 542 524 /* 543 - * For F17h or F18h, the Northbridge counters are 525 + * For F17h and above, the Northbridge counters are 544 526 * repurposed as Data Fabric counters. Also, L3 545 527 * counters are supported too. The PMUs are exported 546 528 * based on family as either L2 or L3 and NB or DF.

+22 -3

arch/x86/events/intel/core.c

··· 1945 1945 * intel_bts events don't coexist with intel PMU's BTS events because of 1946 1946 * x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them 1947 1947 * disabled around intel PMU's event batching etc, only inside the PMI handler. 1948 + * 1949 + * Avoid PEBS_ENABLE MSR access in PMIs. 1950 + * The GLOBAL_CTRL has been disabled. All the counters do not count anymore. 1951 + * It doesn't matter if the PEBS is enabled or not. 1952 + * Usually, the PEBS status are not changed in PMIs. It's unnecessary to 1953 + * access PEBS_ENABLE MSR in disable_all()/enable_all(). 1954 + * However, there are some cases which may change PEBS status, e.g. PMI 1955 + * throttle. The PEBS_ENABLE should be updated where the status changes. 1948 1956 */ 1949 1957 static void __intel_pmu_disable_all(void) 1950 1958 { ··· 1962 1954 1963 1955 if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) 1964 1956 intel_pmu_disable_bts(); 1965 - 1966 - intel_pmu_pebs_disable_all(); 1967 1957 } 1968 1958 1969 1959 static void intel_pmu_disable_all(void) 1970 1960 { 1971 1961 __intel_pmu_disable_all(); 1962 + intel_pmu_pebs_disable_all(); 1972 1963 intel_pmu_lbr_disable_all(); 1973 1964 } 1974 1965 ··· 1975 1968 { 1976 1969 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1977 1970 1978 - intel_pmu_pebs_enable_all(); 1979 1971 intel_pmu_lbr_enable_all(pmi); 1980 1972 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 1981 1973 x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); ··· 1992 1986 1993 1987 static void intel_pmu_enable_all(int added) 1994 1988 { 1989 + intel_pmu_pebs_enable_all(); 1995 1990 __intel_pmu_enable_all(added, false); 1996 1991 } 1997 1992 ··· 2381 2374 * PEBS overflow sets bit 62 in the global status register 2382 2375 */ 2383 2376 if (__test_and_clear_bit(62, (unsigned long *)&status)) { 2377 + u64 pebs_enabled = cpuc->pebs_enabled; 2378 + 2384 2379 handled++; 2385 2380 x86_pmu.drain_pebs(regs); 2386 2381 status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; 2382 + 2383 + /* 2384 + * PMI throttle may be triggered, which stops the PEBS event. 2385 + * Although cpuc->pebs_enabled is updated accordingly, the 2386 + * MSR_IA32_PEBS_ENABLE is not updated. Because the 2387 + * cpuc->enabled has been forced to 0 in PMI. 2388 + * Update the MSR if pebs_enabled is changed. 2389 + */ 2390 + if (pebs_enabled != cpuc->pebs_enabled) 2391 + wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 2387 2392 } 2388 2393 2389 2394 /*

+40 -43

arch/x86/events/intel/cstate.c

··· 594 594 }; 595 595 596 596 597 - #define X86_CSTATES_MODEL(model, states) \ 598 - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) } 599 - 600 597 static const struct x86_cpu_id intel_cstates_match[] __initconst = { 601 - X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM, nhm_cstates), 602 - X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates), 603 - X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates), 598 + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_cstates), 599 + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &nhm_cstates), 600 + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &nhm_cstates), 604 601 605 - X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE, nhm_cstates), 606 - X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates), 607 - X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates), 602 + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &nhm_cstates), 603 + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &nhm_cstates), 604 + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &nhm_cstates), 608 605 609 - X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE, snb_cstates), 610 - X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates), 606 + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &snb_cstates), 607 + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &snb_cstates), 611 608 612 - X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE, snb_cstates), 613 - X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates), 609 + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &snb_cstates), 610 + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &snb_cstates), 614 611 615 - X86_CSTATES_MODEL(INTEL_FAM6_HASWELL, snb_cstates), 616 - X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X, snb_cstates), 617 - X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_G, snb_cstates), 612 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &snb_cstates), 613 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &snb_cstates), 614 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &snb_cstates), 618 615 619 - X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_L, hswult_cstates), 616 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &hswult_cstates), 620 617 621 - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT, slm_cstates), 622 - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT_D, slm_cstates), 623 - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates), 618 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &slm_cstates), 619 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &slm_cstates), 620 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &slm_cstates), 624 621 625 - X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL, snb_cstates), 626 - X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_D, snb_cstates), 627 - X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_G, snb_cstates), 628 - X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X, snb_cstates), 622 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &snb_cstates), 623 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &snb_cstates), 624 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &snb_cstates), 625 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &snb_cstates), 629 626 630 - X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_L, snb_cstates), 631 - X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE, snb_cstates), 632 - X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates), 627 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &snb_cstates), 628 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &snb_cstates), 629 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &snb_cstates), 633 630 634 - X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_L, hswult_cstates), 635 - X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE, hswult_cstates), 636 - X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE_L, hswult_cstates), 637 - X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE, hswult_cstates), 631 + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &hswult_cstates), 632 + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &hswult_cstates), 633 + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &hswult_cstates), 634 + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &hswult_cstates), 638 635 639 - X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_L, cnl_cstates), 636 + X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &cnl_cstates), 640 637 641 - X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates), 642 - X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates), 638 + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &knl_cstates), 639 + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &knl_cstates), 643 640 644 - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates), 645 - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_D, glm_cstates), 646 - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates), 647 - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT_D, glm_cstates), 648 - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT, glm_cstates), 641 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &glm_cstates), 642 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &glm_cstates), 643 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &glm_cstates), 644 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &glm_cstates), 645 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &glm_cstates), 649 646 650 - X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, icl_cstates), 651 - X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE, icl_cstates), 652 - X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE_L, icl_cstates), 653 - X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE, icl_cstates), 647 + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_cstates), 648 + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates), 649 + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates), 650 + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates), 654 651 { }, 655 652 }; 656 653 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);

+9

arch/x86/events/intel/lbr.c

··· 585 585 cpuc->lbr_entries[i].reserved = 0; 586 586 } 587 587 cpuc->lbr_stack.nr = i; 588 + cpuc->lbr_stack.hw_idx = tos; 588 589 } 589 590 590 591 /* ··· 681 680 out++; 682 681 } 683 682 cpuc->lbr_stack.nr = out; 683 + cpuc->lbr_stack.hw_idx = tos; 684 684 } 685 685 686 686 void intel_pmu_lbr_read(void) ··· 1122 1120 int i; 1123 1121 1124 1122 cpuc->lbr_stack.nr = x86_pmu.lbr_nr; 1123 + 1124 + /* Cannot get TOS for large PEBS */ 1125 + if (cpuc->n_pebs == cpuc->n_large_pebs) 1126 + cpuc->lbr_stack.hw_idx = -1ULL; 1127 + else 1128 + cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos(); 1129 + 1125 1130 for (i = 0; i < x86_pmu.lbr_nr; i++) { 1126 1131 u64 info = lbr->lbr[i].info; 1127 1132 struct perf_branch_entry *e = &cpuc->lbr_entries[i];

+27 -31

arch/x86/events/intel/rapl.c

··· 668 668 return 0; 669 669 } 670 670 671 - #define X86_RAPL_MODEL_MATCH(model, init) \ 672 - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } 673 - 674 671 static struct rapl_model model_snb = { 675 672 .events = BIT(PERF_RAPL_PP0) | 676 673 BIT(PERF_RAPL_PKG) | ··· 713 716 }; 714 717 715 718 static const struct x86_cpu_id rapl_model_match[] __initconst = { 716 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, model_snb), 717 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, model_snbep), 718 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, model_snb), 719 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, model_snbep), 720 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL, model_hsw), 721 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, model_hsx), 722 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_L, model_hsw), 723 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_G, model_hsw), 724 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL, model_hsw), 725 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_G, model_hsw), 726 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, model_hsx), 727 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_D, model_hsx), 728 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, model_knl), 729 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, model_knl), 730 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_L, model_skl), 731 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE, model_skl), 732 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, model_hsx), 733 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_L, model_skl), 734 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE, model_skl), 735 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_L, model_skl), 736 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, model_hsw), 737 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_D, model_hsw), 738 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, model_hsw), 739 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_L, model_skl), 740 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE, model_skl), 741 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_COMETLAKE_L, model_skl), 742 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_COMETLAKE, model_skl), 719 + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &model_snb), 720 + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &model_snbep), 721 + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &model_snb), 722 + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &model_snbep), 723 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &model_hsw), 724 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &model_hsx), 725 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &model_hsw), 726 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &model_hsw), 727 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &model_hsw), 728 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &model_hsw), 729 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &model_hsx), 730 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &model_hsx), 731 + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &model_knl), 732 + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &model_knl), 733 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &model_skl), 734 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &model_skl), 735 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &model_hsx), 736 + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &model_skl), 737 + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &model_skl), 738 + X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &model_skl), 739 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &model_hsw), 740 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &model_hsw), 741 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &model_hsw), 742 + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &model_skl), 743 + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &model_skl), 744 + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl), 745 + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl), 743 746 {}, 744 747 }; 745 - 746 748 MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); 747 749 748 750 static int __init rapl_pmu_init(void)

+41 -34

arch/x86/events/intel/uncore.c

··· 1392 1392 return ret; 1393 1393 } 1394 1394 1395 - 1396 - #define X86_UNCORE_MODEL_MATCH(model, init) \ 1397 - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } 1398 - 1399 1395 struct intel_uncore_init_fun { 1400 1396 void (*cpu_init)(void); 1401 1397 int (*pci_init)(void); ··· 1466 1470 .pci_init = skl_uncore_pci_init, 1467 1471 }; 1468 1472 1473 + static const struct intel_uncore_init_fun tgl_uncore_init __initconst = { 1474 + .cpu_init = icl_uncore_cpu_init, 1475 + .mmio_init = tgl_uncore_mmio_init, 1476 + }; 1477 + 1478 + static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = { 1479 + .cpu_init = icl_uncore_cpu_init, 1480 + .mmio_init = tgl_l_uncore_mmio_init, 1481 + }; 1482 + 1469 1483 static const struct intel_uncore_init_fun snr_uncore_init __initconst = { 1470 1484 .cpu_init = snr_uncore_cpu_init, 1471 1485 .pci_init = snr_uncore_pci_init, ··· 1483 1477 }; 1484 1478 1485 1479 static const struct x86_cpu_id intel_uncore_match[] __initconst = { 1486 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init), 1487 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init), 1488 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE, nhm_uncore_init), 1489 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP, nhm_uncore_init), 1490 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_uncore_init), 1491 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, ivb_uncore_init), 1492 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL, hsw_uncore_init), 1493 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_L, hsw_uncore_init), 1494 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_G, hsw_uncore_init), 1495 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL, bdw_uncore_init), 1496 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_G, bdw_uncore_init), 1497 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_uncore_init), 1498 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX, nhmex_uncore_init), 1499 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX, nhmex_uncore_init), 1500 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, ivbep_uncore_init), 1501 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hswep_uncore_init), 1502 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, bdx_uncore_init), 1503 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_D, bdx_uncore_init), 1504 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init), 1505 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_uncore_init), 1506 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE, skl_uncore_init), 1507 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_L, skl_uncore_init), 1508 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, skx_uncore_init), 1509 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_L, skl_uncore_init), 1510 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE, skl_uncore_init), 1511 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_L, icl_uncore_init), 1512 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI, icl_uncore_init), 1513 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE, icl_uncore_init), 1514 - X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ATOM_TREMONT_D, snr_uncore_init), 1480 + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &nhm_uncore_init), 1481 + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_uncore_init), 1482 + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &nhm_uncore_init), 1483 + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &nhm_uncore_init), 1484 + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &snb_uncore_init), 1485 + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &ivb_uncore_init), 1486 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &hsw_uncore_init), 1487 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &hsw_uncore_init), 1488 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &hsw_uncore_init), 1489 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &bdw_uncore_init), 1490 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &bdw_uncore_init), 1491 + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &snbep_uncore_init), 1492 + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &nhmex_uncore_init), 1493 + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &nhmex_uncore_init), 1494 + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ivbep_uncore_init), 1495 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &hswep_uncore_init), 1496 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &bdx_uncore_init), 1497 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &bdx_uncore_init), 1498 + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &knl_uncore_init), 1499 + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &knl_uncore_init), 1500 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &skl_uncore_init), 1501 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &skl_uncore_init), 1502 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &skx_uncore_init), 1503 + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &skl_uncore_init), 1504 + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &skl_uncore_init), 1505 + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_uncore_init), 1506 + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &icl_uncore_init), 1507 + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_uncore_init), 1508 + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &tgl_l_uncore_init), 1509 + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &tgl_uncore_init), 1510 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), 1515 1511 {}, 1516 1512 }; 1517 - 1518 1513 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); 1519 1514 1520 1515 static int __init intel_uncore_init(void)

+6 -1

arch/x86/events/intel/uncore.h

··· 154 154 unsigned int box_offset; 155 155 unsigned int num_counters; 156 156 unsigned int bits; 157 + unsigned *box_offsets; 157 158 }; 158 159 159 160 struct pci2phy_map { ··· 311 310 312 311 return pmu->type->freerunning[type].counter_base + 313 312 pmu->type->freerunning[type].counter_offset * idx + 314 - pmu->type->freerunning[type].box_offset * pmu->pmu_idx; 313 + (pmu->type->freerunning[type].box_offsets ? 314 + pmu->type->freerunning[type].box_offsets[pmu->pmu_idx] : 315 + pmu->type->freerunning[type].box_offset * pmu->pmu_idx); 315 316 } 316 317 317 318 static inline ··· 530 527 void nhm_uncore_cpu_init(void); 531 528 void skl_uncore_cpu_init(void); 532 529 void icl_uncore_cpu_init(void); 530 + void tgl_uncore_mmio_init(void); 531 + void tgl_l_uncore_mmio_init(void); 533 532 int snb_pci2phy_map_init(int devid); 534 533 535 534 /* uncore_snbep.c */

+159

arch/x86/events/intel/uncore_snb.c

··· 44 44 #define PCI_DEVICE_ID_INTEL_WHL_UD_IMC 0x3e35 45 45 #define PCI_DEVICE_ID_INTEL_ICL_U_IMC 0x8a02 46 46 #define PCI_DEVICE_ID_INTEL_ICL_U2_IMC 0x8a12 47 + #define PCI_DEVICE_ID_INTEL_TGL_U1_IMC 0x9a02 48 + #define PCI_DEVICE_ID_INTEL_TGL_U2_IMC 0x9a04 49 + #define PCI_DEVICE_ID_INTEL_TGL_U3_IMC 0x9a12 50 + #define PCI_DEVICE_ID_INTEL_TGL_U4_IMC 0x9a14 51 + #define PCI_DEVICE_ID_INTEL_TGL_H_IMC 0x9a36 47 52 48 53 49 54 /* SNB event control */ ··· 1007 1002 } 1008 1003 1009 1004 /* end of Nehalem uncore support */ 1005 + 1006 + /* Tiger Lake MMIO uncore support */ 1007 + 1008 + static const struct pci_device_id tgl_uncore_pci_ids[] = { 1009 + { /* IMC */ 1010 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U1_IMC), 1011 + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), 1012 + }, 1013 + { /* IMC */ 1014 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U2_IMC), 1015 + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), 1016 + }, 1017 + { /* IMC */ 1018 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U3_IMC), 1019 + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), 1020 + }, 1021 + { /* IMC */ 1022 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U4_IMC), 1023 + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), 1024 + }, 1025 + { /* IMC */ 1026 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_H_IMC), 1027 + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), 1028 + }, 1029 + { /* end: all zeroes */ } 1030 + }; 1031 + 1032 + enum perf_tgl_uncore_imc_freerunning_types { 1033 + TGL_MMIO_UNCORE_IMC_DATA_TOTAL, 1034 + TGL_MMIO_UNCORE_IMC_DATA_READ, 1035 + TGL_MMIO_UNCORE_IMC_DATA_WRITE, 1036 + TGL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX 1037 + }; 1038 + 1039 + static struct freerunning_counters tgl_l_uncore_imc_freerunning[] = { 1040 + [TGL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0x5040, 0x0, 0x0, 1, 64 }, 1041 + [TGL_MMIO_UNCORE_IMC_DATA_READ] = { 0x5058, 0x0, 0x0, 1, 64 }, 1042 + [TGL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0x50A0, 0x0, 0x0, 1, 64 }, 1043 + }; 1044 + 1045 + static struct freerunning_counters tgl_uncore_imc_freerunning[] = { 1046 + [TGL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0xd840, 0x0, 0x0, 1, 64 }, 1047 + [TGL_MMIO_UNCORE_IMC_DATA_READ] = { 0xd858, 0x0, 0x0, 1, 64 }, 1048 + [TGL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0xd8A0, 0x0, 0x0, 1, 64 }, 1049 + }; 1050 + 1051 + static struct uncore_event_desc tgl_uncore_imc_events[] = { 1052 + INTEL_UNCORE_EVENT_DESC(data_total, "event=0xff,umask=0x10"), 1053 + INTEL_UNCORE_EVENT_DESC(data_total.scale, "6.103515625e-5"), 1054 + INTEL_UNCORE_EVENT_DESC(data_total.unit, "MiB"), 1055 + 1056 + INTEL_UNCORE_EVENT_DESC(data_read, "event=0xff,umask=0x20"), 1057 + INTEL_UNCORE_EVENT_DESC(data_read.scale, "6.103515625e-5"), 1058 + INTEL_UNCORE_EVENT_DESC(data_read.unit, "MiB"), 1059 + 1060 + INTEL_UNCORE_EVENT_DESC(data_write, "event=0xff,umask=0x30"), 1061 + INTEL_UNCORE_EVENT_DESC(data_write.scale, "6.103515625e-5"), 1062 + INTEL_UNCORE_EVENT_DESC(data_write.unit, "MiB"), 1063 + 1064 + { /* end: all zeroes */ } 1065 + }; 1066 + 1067 + static struct pci_dev *tgl_uncore_get_mc_dev(void) 1068 + { 1069 + const struct pci_device_id *ids = tgl_uncore_pci_ids; 1070 + struct pci_dev *mc_dev = NULL; 1071 + 1072 + while (ids && ids->vendor) { 1073 + mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, ids->device, NULL); 1074 + if (mc_dev) 1075 + return mc_dev; 1076 + ids++; 1077 + } 1078 + 1079 + return mc_dev; 1080 + } 1081 + 1082 + #define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000 1083 + 1084 + static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) 1085 + { 1086 + struct pci_dev *pdev = tgl_uncore_get_mc_dev(); 1087 + struct intel_uncore_pmu *pmu = box->pmu; 1088 + resource_size_t addr; 1089 + u32 mch_bar; 1090 + 1091 + if (!pdev) { 1092 + pr_warn("perf uncore: Cannot find matched IMC device.\n"); 1093 + return; 1094 + } 1095 + 1096 + pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET, &mch_bar); 1097 + /* MCHBAR is disabled */ 1098 + if (!(mch_bar & BIT(0))) { 1099 + pr_warn("perf uncore: MCHBAR is disabled. Failed to map IMC free-running counters.\n"); 1100 + return; 1101 + } 1102 + mch_bar &= ~BIT(0); 1103 + addr = (resource_size_t)(mch_bar + TGL_UNCORE_MMIO_IMC_MEM_OFFSET * pmu->pmu_idx); 1104 + 1105 + #ifdef CONFIG_PHYS_ADDR_T_64BIT 1106 + pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET + 4, &mch_bar); 1107 + addr |= ((resource_size_t)mch_bar << 32); 1108 + #endif 1109 + 1110 + box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE); 1111 + } 1112 + 1113 + static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = { 1114 + .init_box = tgl_uncore_imc_freerunning_init_box, 1115 + .exit_box = uncore_mmio_exit_box, 1116 + .read_counter = uncore_mmio_read_counter, 1117 + .hw_config = uncore_freerunning_hw_config, 1118 + }; 1119 + 1120 + static struct attribute *tgl_uncore_imc_formats_attr[] = { 1121 + &format_attr_event.attr, 1122 + &format_attr_umask.attr, 1123 + NULL 1124 + }; 1125 + 1126 + static const struct attribute_group tgl_uncore_imc_format_group = { 1127 + .name = "format", 1128 + .attrs = tgl_uncore_imc_formats_attr, 1129 + }; 1130 + 1131 + static struct intel_uncore_type tgl_uncore_imc_free_running = { 1132 + .name = "imc_free_running", 1133 + .num_counters = 3, 1134 + .num_boxes = 2, 1135 + .num_freerunning_types = TGL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX, 1136 + .freerunning = tgl_uncore_imc_freerunning, 1137 + .ops = &tgl_uncore_imc_freerunning_ops, 1138 + .event_descs = tgl_uncore_imc_events, 1139 + .format_group = &tgl_uncore_imc_format_group, 1140 + }; 1141 + 1142 + static struct intel_uncore_type *tgl_mmio_uncores[] = { 1143 + &tgl_uncore_imc_free_running, 1144 + NULL 1145 + }; 1146 + 1147 + void tgl_l_uncore_mmio_init(void) 1148 + { 1149 + tgl_uncore_imc_free_running.freerunning = tgl_l_uncore_imc_freerunning; 1150 + uncore_mmio_uncores = tgl_mmio_uncores; 1151 + } 1152 + 1153 + void tgl_uncore_mmio_init(void) 1154 + { 1155 + uncore_mmio_uncores = tgl_mmio_uncores; 1156 + } 1157 + 1158 + /* end of Tiger Lake MMIO uncore support */

+9 -3

arch/x86/events/intel/uncore_snbep.c

··· 4380 4380 return mc_dev; 4381 4381 } 4382 4382 4383 - static void snr_uncore_mmio_init_box(struct intel_uncore_box *box) 4383 + static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box, 4384 + unsigned int box_ctl, int mem_offset) 4384 4385 { 4385 4386 struct pci_dev *pdev = snr_uncore_get_mc_dev(box->dieid); 4386 - unsigned int box_ctl = uncore_mmio_box_ctl(box); 4387 4387 resource_size_t addr; 4388 4388 u32 pci_dword; 4389 4389 ··· 4393 4393 pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword); 4394 4394 addr = (pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; 4395 4395 4396 - pci_read_config_dword(pdev, SNR_IMC_MMIO_MEM0_OFFSET, &pci_dword); 4396 + pci_read_config_dword(pdev, mem_offset, &pci_dword); 4397 4397 addr |= (pci_dword & SNR_IMC_MMIO_MEM0_MASK) << 12; 4398 4398 4399 4399 addr += box_ctl; ··· 4403 4403 return; 4404 4404 4405 4405 writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr); 4406 + } 4407 + 4408 + static void snr_uncore_mmio_init_box(struct intel_uncore_box *box) 4409 + { 4410 + __snr_uncore_mmio_init_box(box, uncore_mmio_box_ctl(box), 4411 + SNR_IMC_MMIO_MEM0_OFFSET); 4406 4412 } 4407 4413 4408 4414 static void snr_uncore_mmio_disable_box(struct intel_uncore_box *box)

-1

arch/x86/include/asm/amd_nb.h

··· 12 12 u8 dev_limit; 13 13 }; 14 14 15 - extern const struct pci_device_id amd_nb_misc_ids[]; 16 15 extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; 17 16 18 17 extern bool early_is_amd_nb(u32 value);

+131 -1

arch/x86/include/asm/cpu_device_id.h

··· 5 5 /* 6 6 * Declare drivers belonging to specific x86 CPUs 7 7 * Similar in spirit to pci_device_id and related PCI functions 8 + * 9 + * The wildcard initializers are in mod_devicetable.h because 10 + * file2alias needs them. Sigh. 8 11 */ 9 - 10 12 #include <linux/mod_devicetable.h> 13 + /* Get the INTEL_FAM* model defines */ 14 + #include <asm/intel-family.h> 15 + /* And the X86_VENDOR_* ones */ 16 + #include <asm/processor.h> 17 + 18 + /* Centaur FAM6 models */ 19 + #define X86_CENTAUR_FAM6_C7_A 0xa 20 + #define X86_CENTAUR_FAM6_C7_D 0xd 21 + #define X86_CENTAUR_FAM6_NANO 0xf 22 + 23 + /** 24 + * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Base macro for CPU matching 25 + * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY 26 + * The name is expanded to X86_VENDOR_@_vendor 27 + * @_family: The family number or X86_FAMILY_ANY 28 + * @_model: The model number, model constant or X86_MODEL_ANY 29 + * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY 30 + * @_data: Driver specific data or NULL. The internal storage 31 + * format is unsigned long. The supplied value, pointer 32 + * etc. is casted to unsigned long internally. 33 + * 34 + * Use only if you need all selectors. Otherwise use one of the shorter 35 + * macros of the X86_MATCH_* family. If there is no matching shorthand 36 + * macro, consider to add one. If you really need to wrap one of the macros 37 + * into another macro at the usage site for good reasons, then please 38 + * start this local macro with X86_MATCH to allow easy grepping. 39 + */ 40 + #define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(_vendor, _family, _model, \ 41 + _feature, _data) { \ 42 + .vendor = X86_VENDOR_##_vendor, \ 43 + .family = _family, \ 44 + .model = _model, \ 45 + .feature = _feature, \ 46 + .driver_data = (unsigned long) _data \ 47 + } 48 + 49 + /** 50 + * X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature 51 + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY 52 + * The name is expanded to X86_VENDOR_@vendor 53 + * @family: The family number or X86_FAMILY_ANY 54 + * @feature: A X86_FEATURE bit 55 + * @data: Driver specific data or NULL. The internal storage 56 + * format is unsigned long. The supplied value, pointer 57 + * etc. is casted to unsigned long internally. 58 + * 59 + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are 60 + * set to wildcards. 61 + */ 62 + #define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \ 63 + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, \ 64 + X86_MODEL_ANY, feature, data) 65 + 66 + /** 67 + * X86_MATCH_VENDOR_FEATURE - Macro for matching vendor and CPU feature 68 + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY 69 + * The name is expanded to X86_VENDOR_@vendor 70 + * @feature: A X86_FEATURE bit 71 + * @data: Driver specific data or NULL. The internal storage 72 + * format is unsigned long. The supplied value, pointer 73 + * etc. is casted to unsigned long internally. 74 + * 75 + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are 76 + * set to wildcards. 77 + */ 78 + #define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \ 79 + X86_MATCH_VENDOR_FAM_FEATURE(vendor, X86_FAMILY_ANY, feature, data) 80 + 81 + /** 82 + * X86_MATCH_FEATURE - Macro for matching a CPU feature 83 + * @feature: A X86_FEATURE bit 84 + * @data: Driver specific data or NULL. The internal storage 85 + * format is unsigned long. The supplied value, pointer 86 + * etc. is casted to unsigned long internally. 87 + * 88 + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are 89 + * set to wildcards. 90 + */ 91 + #define X86_MATCH_FEATURE(feature, data) \ 92 + X86_MATCH_VENDOR_FEATURE(ANY, feature, data) 93 + 94 + /** 95 + * X86_MATCH_VENDOR_FAM_MODEL - Match vendor, family and model 96 + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY 97 + * The name is expanded to X86_VENDOR_@vendor 98 + * @family: The family number or X86_FAMILY_ANY 99 + * @model: The model number, model constant or X86_MODEL_ANY 100 + * @data: Driver specific data or NULL. The internal storage 101 + * format is unsigned long. The supplied value, pointer 102 + * etc. is casted to unsigned long internally. 103 + * 104 + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are 105 + * set to wildcards. 106 + */ 107 + #define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \ 108 + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, \ 109 + X86_FEATURE_ANY, data) 110 + 111 + /** 112 + * X86_MATCH_VENDOR_FAM - Match vendor and family 113 + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY 114 + * The name is expanded to X86_VENDOR_@vendor 115 + * @family: The family number or X86_FAMILY_ANY 116 + * @data: Driver specific data or NULL. The internal storage 117 + * format is unsigned long. The supplied value, pointer 118 + * etc. is casted to unsigned long internally. 119 + * 120 + * All other missing arguments to X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are 121 + * set of wildcards. 122 + */ 123 + #define X86_MATCH_VENDOR_FAM(vendor, family, data) \ 124 + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, X86_MODEL_ANY, data) 125 + 126 + /** 127 + * X86_MATCH_INTEL_FAM6_MODEL - Match vendor INTEL, family 6 and model 128 + * @model: The model name without the INTEL_FAM6_ prefix or ANY 129 + * The model name is expanded to INTEL_FAM6_@model internally 130 + * @data: Driver specific data or NULL. The internal storage 131 + * format is unsigned long. The supplied value, pointer 132 + * etc. is casted to unsigned long internally. 133 + * 134 + * The vendor is set to INTEL, the family to 6 and all other missing 135 + * arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are set to wildcards. 136 + * 137 + * See X86_MATCH_VENDOR_FAM_MODEL_FEATURE() for further information. 138 + */ 139 + #define X86_MATCH_INTEL_FAM6_MODEL(model, data) \ 140 + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, INTEL_FAM6_##model, data) 11 141 12 142 /* 13 143 * Match specific microcode revisions.

+1 -1

arch/x86/include/asm/cpufeatures.h

··· 217 217 #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ 218 218 #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ 219 219 #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ 220 - #define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ 220 + #define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 or above (Zen) */ 221 221 #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ 222 222 #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ 223 223 #define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */

+5 -12

arch/x86/include/asm/intel-family.h

··· 35 35 * The #define line may optionally include a comment including platform names. 36 36 */ 37 37 38 + /* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */ 39 + #define INTEL_FAM6_ANY X86_MODEL_ANY 40 + 38 41 #define INTEL_FAM6_CORE_YONAH 0x0E 39 42 40 43 #define INTEL_FAM6_CORE2_MEROM 0x0F ··· 121 118 #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ 122 119 #define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ 123 120 124 - /* Useful macros */ 125 - #define INTEL_CPU_FAM_ANY(_family, _model, _driver_data) \ 126 - { \ 127 - .vendor = X86_VENDOR_INTEL, \ 128 - .family = _family, \ 129 - .model = _model, \ 130 - .feature = X86_FEATURE_ANY, \ 131 - .driver_data = (kernel_ulong_t)&_driver_data \ 132 - } 133 - 134 - #define INTEL_CPU_FAM6(_model, _driver_data) \ 135 - INTEL_CPU_FAM_ANY(6, INTEL_FAM6_##_model, _driver_data) 121 + /* Family 5 */ 122 + #define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */ 136 123 137 124 #endif /* _ASM_X86_INTEL_FAMILY_H */

+1

arch/x86/include/asm/kprobes.h

··· 36 36 37 37 /* optinsn template addresses */ 38 38 extern __visible kprobe_opcode_t optprobe_template_entry[]; 39 + extern __visible kprobe_opcode_t optprobe_template_clac[]; 39 40 extern __visible kprobe_opcode_t optprobe_template_val[]; 40 41 extern __visible kprobe_opcode_t optprobe_template_call[]; 41 42 extern __visible kprobe_opcode_t optprobe_template_end[];

+13 -2

arch/x86/include/asm/perf_event.h

··· 50 50 51 51 #define AMD64_L3_SLICE_SHIFT 48 52 52 #define AMD64_L3_SLICE_MASK \ 53 - ((0xFULL) << AMD64_L3_SLICE_SHIFT) 53 + (0xFULL << AMD64_L3_SLICE_SHIFT) 54 + #define AMD64_L3_SLICEID_MASK \ 55 + (0x7ULL << AMD64_L3_SLICE_SHIFT) 54 56 55 57 #define AMD64_L3_THREAD_SHIFT 56 56 58 #define AMD64_L3_THREAD_MASK \ 57 - ((0xFFULL) << AMD64_L3_THREAD_SHIFT) 59 + (0xFFULL << AMD64_L3_THREAD_SHIFT) 60 + #define AMD64_L3_F19H_THREAD_MASK \ 61 + (0x3ULL << AMD64_L3_THREAD_SHIFT) 62 + 63 + #define AMD64_L3_EN_ALL_CORES BIT_ULL(47) 64 + #define AMD64_L3_EN_ALL_SLICES BIT_ULL(46) 65 + 66 + #define AMD64_L3_COREID_SHIFT 42 67 + #define AMD64_L3_COREID_MASK \ 68 + (0x7ULL << AMD64_L3_COREID_SHIFT) 58 69 59 70 #define X86_RAW_EVENT_MASK \ 60 71 (ARCH_PERFMON_EVENTSEL_EVENT | \

+1 -3

arch/x86/kernel/amd_nb.c

··· 36 36 {} 37 37 }; 38 38 39 - 40 39 #define PCI_DEVICE_ID_AMD_CNB17H_F4 0x1704 41 40 42 - const struct pci_device_id amd_nb_misc_ids[] = { 41 + static const struct pci_device_id amd_nb_misc_ids[] = { 43 42 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, 44 43 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, 45 44 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, ··· 55 56 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) }, 56 57 {} 57 58 }; 58 - EXPORT_SYMBOL_GPL(amd_nb_misc_ids); 59 59 60 60 static const struct pci_device_id amd_nb_link_ids[] = { 61 61 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) },

+13 -19

arch/x86/kernel/apic/apic.c

··· 546 546 }; 547 547 static DEFINE_PER_CPU(struct clock_event_device, lapic_events); 548 548 549 - #define DEADLINE_MODEL_MATCH_FUNC(model, func) \ 550 - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&func } 551 - 552 - #define DEADLINE_MODEL_MATCH_REV(model, rev) \ 553 - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)rev } 554 - 555 549 static u32 hsx_deadline_rev(void) 556 550 { 557 551 switch (boot_cpu_data.x86_stepping) { ··· 582 588 } 583 589 584 590 static const struct x86_cpu_id deadline_match[] = { 585 - DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev), 586 - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020), 587 - DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_D, bdx_deadline_rev), 588 - DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X, skx_deadline_rev), 591 + X86_MATCH_INTEL_FAM6_MODEL( HASWELL_X, &hsx_deadline_rev), 592 + X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X, 0x0b000020), 593 + X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_D, &bdx_deadline_rev), 594 + X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_X, &skx_deadline_rev), 589 595 590 - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL, 0x22), 591 - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_L, 0x20), 592 - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_G, 0x17), 596 + X86_MATCH_INTEL_FAM6_MODEL( HASWELL, 0x22), 597 + X86_MATCH_INTEL_FAM6_MODEL( HASWELL_L, 0x20), 598 + X86_MATCH_INTEL_FAM6_MODEL( HASWELL_G, 0x17), 593 599 594 - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL, 0x25), 595 - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_G, 0x17), 600 + X86_MATCH_INTEL_FAM6_MODEL( BROADWELL, 0x25), 601 + X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_G, 0x17), 596 602 597 - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_L, 0xb2), 598 - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE, 0xb2), 603 + X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_L, 0xb2), 604 + X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE, 0xb2), 599 605 600 - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_L, 0x52), 601 - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE, 0x52), 606 + X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE_L, 0x52), 607 + X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE, 0x52), 602 608 603 609 {}, 604 610 };

+2 -1

arch/x86/kernel/cpu/amd.c

··· 955 955 case 0x12: init_amd_ln(c); break; 956 956 case 0x15: init_amd_bd(c); break; 957 957 case 0x16: init_amd_jg(c); break; 958 - case 0x17: init_amd_zn(c); break; 958 + case 0x17: fallthrough; 959 + case 0x19: init_amd_zn(c); break; 959 960 } 960 961 961 962 /*

+2 -2

arch/x86/kernel/cpu/common.c

··· 1008 1008 #define NO_ITLB_MULTIHIT BIT(7) 1009 1009 #define NO_SPECTRE_V2 BIT(8) 1010 1010 1011 - #define VULNWL(_vendor, _family, _model, _whitelist) \ 1012 - { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } 1011 + #define VULNWL(vendor, family, model, whitelist) \ 1012 + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) 1013 1013 1014 1014 #define VULNWL_INTEL(model, whitelist) \ 1015 1015 VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist)

+9 -4

arch/x86/kernel/cpu/match.c

··· 16 16 * respective wildcard entries. 17 17 * 18 18 * A typical table entry would be to match a specific CPU 19 - * { X86_VENDOR_INTEL, 6, 0x12 } 20 - * or to match a specific CPU feature 21 - * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) } 19 + * 20 + * X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_BROADWELL, 21 + * X86_FEATURE_ANY, NULL); 22 22 * 23 23 * Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY, 24 - * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor) 24 + * %X86_MODEL_ANY, %X86_FEATURE_ANY (except for vendor) 25 + * 26 + * asm/cpu_device_id.h contains a set of useful macros which are shortcuts 27 + * for various common selections. The above can be shortened to: 28 + * 29 + * X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, NULL); 25 30 * 26 31 * Arrays used to match for this should also be declared using 27 32 * MODULE_DEVICE_TABLE(x86cpu, ...)

+25

arch/x86/kernel/kprobes/opt.c

··· 71 71 return (unsigned long)buf; 72 72 } 73 73 74 + static void synthesize_clac(kprobe_opcode_t *addr) 75 + { 76 + /* 77 + * Can't be static_cpu_has() due to how objtool treats this feature bit. 78 + * This isn't a fast path anyway. 79 + */ 80 + if (!boot_cpu_has(X86_FEATURE_SMAP)) 81 + return; 82 + 83 + /* Replace the NOP3 with CLAC */ 84 + addr[0] = 0x0f; 85 + addr[1] = 0x01; 86 + addr[2] = 0xca; 87 + } 88 + 74 89 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ 75 90 static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) 76 91 { ··· 107 92 /* We don't bother saving the ss register */ 108 93 " pushq %rsp\n" 109 94 " pushfq\n" 95 + ".global optprobe_template_clac\n" 96 + "optprobe_template_clac:\n" 97 + ASM_NOP3 110 98 SAVE_REGS_STRING 111 99 " movq %rsp, %rsi\n" 112 100 ".global optprobe_template_val\n" ··· 129 111 #else /* CONFIG_X86_32 */ 130 112 " pushl %esp\n" 131 113 " pushfl\n" 114 + ".global optprobe_template_clac\n" 115 + "optprobe_template_clac:\n" 116 + ASM_NOP3 132 117 SAVE_REGS_STRING 133 118 " movl %esp, %edx\n" 134 119 ".global optprobe_template_val\n" ··· 155 134 void optprobe_template_func(void); 156 135 STACK_FRAME_NON_STANDARD(optprobe_template_func); 157 136 137 + #define TMPL_CLAC_IDX \ 138 + ((long)optprobe_template_clac - (long)optprobe_template_entry) 158 139 #define TMPL_MOVE_IDX \ 159 140 ((long)optprobe_template_val - (long)optprobe_template_entry) 160 141 #define TMPL_CALL_IDX \ ··· 411 388 goto err; 412 389 op->optinsn.size = ret; 413 390 len = TMPL_END_IDX + op->optinsn.size; 391 + 392 + synthesize_clac(buf + TMPL_CLAC_IDX); 414 393 415 394 /* Set probe information */ 416 395 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);

+1 -1

arch/x86/kernel/smpboot.c

··· 466 466 */ 467 467 468 468 static const struct x86_cpu_id snc_cpu[] = { 469 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_X }, 469 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, NULL), 470 470 {} 471 471 }; 472 472

+7 -7

arch/x86/kernel/tsc_msr.c

··· 63 63 }; 64 64 65 65 static const struct x86_cpu_id tsc_msr_cpu_ids[] = { 66 - INTEL_CPU_FAM6(ATOM_SALTWELL_MID, freq_desc_pnw), 67 - INTEL_CPU_FAM6(ATOM_SALTWELL_TABLET, freq_desc_clv), 68 - INTEL_CPU_FAM6(ATOM_SILVERMONT, freq_desc_byt), 69 - INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, freq_desc_tng), 70 - INTEL_CPU_FAM6(ATOM_AIRMONT, freq_desc_cht), 71 - INTEL_CPU_FAM6(ATOM_AIRMONT_MID, freq_desc_ann), 72 - INTEL_CPU_FAM6(ATOM_AIRMONT_NP, freq_desc_lgm), 66 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL_MID, &freq_desc_pnw), 67 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL_TABLET,&freq_desc_clv), 68 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &freq_desc_byt), 69 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &freq_desc_tng), 70 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &freq_desc_cht), 71 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_MID, &freq_desc_ann), 72 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_NP, &freq_desc_lgm), 73 73 {} 74 74 }; 75 75

+2 -1

arch/x86/kvm/svm.c

··· 48 48 #include <asm/kvm_para.h> 49 49 #include <asm/irq_remapping.h> 50 50 #include <asm/spec-ctrl.h> 51 + #include <asm/cpu_device_id.h> 51 52 52 53 #include <asm/virtext.h> 53 54 #include "trace.h" ··· 60 59 61 60 #ifdef MODULE 62 61 static const struct x86_cpu_id svm_cpu_id[] = { 63 - X86_FEATURE_MATCH(X86_FEATURE_SVM), 62 + X86_MATCH_FEATURE(X86_FEATURE_SVM, NULL), 64 63 {} 65 64 }; 66 65 MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);

+2 -1

arch/x86/kvm/vmx/vmx.c

··· 31 31 #include <asm/apic.h> 32 32 #include <asm/asm.h> 33 33 #include <asm/cpu.h> 34 + #include <asm/cpu_device_id.h> 34 35 #include <asm/debugreg.h> 35 36 #include <asm/desc.h> 36 37 #include <asm/fpu/internal.h> ··· 67 66 68 67 #ifdef MODULE 69 68 static const struct x86_cpu_id vmx_cpu_id[] = { 70 - X86_FEATURE_MATCH(X86_FEATURE_VMX), 69 + X86_MATCH_FEATURE(X86_FEATURE_VMX, NULL), 71 70 {} 72 71 }; 73 72 MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);

+6 -7

arch/x86/platform/atom/punit_atom_debug.c

··· 117 117 debugfs_remove_recursive(punit_dbg_file); 118 118 } 119 119 120 - #define ICPU(model, drv_data) \ 121 - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT,\ 122 - (kernel_ulong_t)&drv_data } 120 + #define X86_MATCH(model, data) \ 121 + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_##model, \ 122 + X86_FEATURE_MWAIT, data) 123 123 124 124 static const struct x86_cpu_id intel_punit_cpu_ids[] = { 125 - ICPU(INTEL_FAM6_ATOM_SILVERMONT, punit_device_byt), 126 - ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, punit_device_tng), 127 - ICPU(INTEL_FAM6_ATOM_AIRMONT, punit_device_cht), 125 + X86_MATCH(ATOM_SILVERMONT, &punit_device_byt), 126 + X86_MATCH(ATOM_SILVERMONT_MID, &punit_device_tng), 127 + X86_MATCH(ATOM_AIRMONT, &punit_device_cht), 128 128 {} 129 129 }; 130 - 131 130 MODULE_DEVICE_TABLE(x86cpu, intel_punit_cpu_ids); 132 131 133 132 static int __init punit_atom_debug_init(void)

+2 -5

arch/x86/platform/efi/quirks.c

··· 663 663 return 1; 664 664 } 665 665 666 - #define ICPU(family, model, quirk_handler) \ 667 - { X86_VENDOR_INTEL, family, model, X86_FEATURE_ANY, \ 668 - (unsigned long)&quirk_handler } 669 - 670 666 static const struct x86_cpu_id efi_capsule_quirk_ids[] = { 671 - ICPU(5, 9, qrk_capsule_setup_info), /* Intel Quark X1000 */ 667 + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, 668 + &qrk_capsule_setup_info), 672 669 { } 673 670 }; 674 671

+1 -4

arch/x86/platform/intel-mid/device_libs/platform_bt.c

··· 60 60 .setup = tng_bt_sfi_setup, 61 61 }; 62 62 63 - #define ICPU(model, ddata) \ 64 - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (kernel_ulong_t)&ddata } 65 - 66 63 static const struct x86_cpu_id bt_sfi_cpu_ids[] = { 67 - ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, tng_bt_sfi_data), 64 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &tng_bt_sfi_data), 68 65 {} 69 66 }; 70 67

+1 -1

arch/x86/platform/intel-quark/imr.c

··· 569 569 } 570 570 571 571 static const struct x86_cpu_id imr_ids[] __initconst = { 572 - { X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */ 572 + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, NULL), 573 573 {} 574 574 }; 575 575

+1 -1

arch/x86/platform/intel-quark/imr_selftest.c

··· 105 105 } 106 106 107 107 static const struct x86_cpu_id imr_ids[] __initconst = { 108 - { X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */ 108 + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, NULL), 109 109 {} 110 110 }; 111 111

+2 -14

arch/x86/power/cpu.c

··· 475 475 } 476 476 477 477 static const struct x86_cpu_id msr_save_cpu_table[] = { 478 - { 479 - .vendor = X86_VENDOR_AMD, 480 - .family = 0x15, 481 - .model = X86_MODEL_ANY, 482 - .feature = X86_FEATURE_ANY, 483 - .driver_data = (kernel_ulong_t)msr_save_cpuid_features, 484 - }, 485 - { 486 - .vendor = X86_VENDOR_AMD, 487 - .family = 0x16, 488 - .model = X86_MODEL_ANY, 489 - .feature = X86_FEATURE_ANY, 490 - .driver_data = (kernel_ulong_t)msr_save_cpuid_features, 491 - }, 478 + X86_MATCH_VENDOR_FAM(AMD, 0x15, &msr_save_cpuid_features), 479 + X86_MATCH_VENDOR_FAM(AMD, 0x16, &msr_save_cpuid_features), 492 480 {} 493 481 }; 494 482

+2 -4

drivers/acpi/acpi_lpss.c

··· 306 306 .setup = lpss_deassert_reset, 307 307 }; 308 308 309 - #define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, } 310 - 311 309 static const struct x86_cpu_id lpss_cpu_ids[] = { 312 - ICPU(INTEL_FAM6_ATOM_SILVERMONT), /* Valleyview, Bay Trail */ 313 - ICPU(INTEL_FAM6_ATOM_AIRMONT), /* Braswell, Cherry Trail */ 310 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, NULL), 311 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, NULL), 314 312 {} 315 313 }; 316 314

+10 -10

drivers/acpi/x86/utils.c

··· 37 37 const char *uid; 38 38 }; 39 39 40 - #define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, } 40 + #define X86_MATCH(model) X86_MATCH_INTEL_FAM6_MODEL(model, NULL) 41 41 42 42 #define ENTRY(hid, uid, cpu_models, dmi...) { \ 43 43 { { hid, }, {} }, \ ··· 51 51 * Bay / Cherry Trail PWM directly poked by GPU driver in win10, 52 52 * but Linux uses a separate PWM driver, harmless if not used. 53 53 */ 54 - ENTRY("80860F09", "1", ICPU(INTEL_FAM6_ATOM_SILVERMONT), {}), 55 - ENTRY("80862288", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT), {}), 54 + ENTRY("80860F09", "1", X86_MATCH(ATOM_SILVERMONT), {}), 55 + ENTRY("80862288", "1", X86_MATCH(ATOM_AIRMONT), {}), 56 56 57 57 /* Lenovo Yoga Book uses PWM2 for keyboard backlight control */ 58 - ENTRY("80862289", "2", ICPU(INTEL_FAM6_ATOM_AIRMONT), { 58 + ENTRY("80862289", "2", X86_MATCH(ATOM_AIRMONT), { 59 59 DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9"), 60 60 }), 61 61 /* 62 62 * The INT0002 device is necessary to clear wakeup interrupt sources 63 63 * on Cherry Trail devices, without it we get nobody cared IRQ msgs. 64 64 */ 65 - ENTRY("INT0002", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT), {}), 65 + ENTRY("INT0002", "1", X86_MATCH(ATOM_AIRMONT), {}), 66 66 /* 67 67 * On the Dell Venue 11 Pro 7130 and 7139, the DSDT hides 68 68 * the touchscreen ACPI device until a certain time 69 69 * after _SB.PCI0.GFX0.LCD.LCD1._ON gets called has passed 70 70 * *and* _STA has been called at least 3 times since. 71 71 */ 72 - ENTRY("SYNA7500", "1", ICPU(INTEL_FAM6_HASWELL_L), { 72 + ENTRY("SYNA7500", "1", X86_MATCH(HASWELL_L), { 73 73 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), 74 74 DMI_MATCH(DMI_PRODUCT_NAME, "Venue 11 Pro 7130"), 75 75 }), 76 - ENTRY("SYNA7500", "1", ICPU(INTEL_FAM6_HASWELL_L), { 76 + ENTRY("SYNA7500", "1", X86_MATCH(HASWELL_L), { 77 77 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), 78 78 DMI_MATCH(DMI_PRODUCT_NAME, "Venue 11 Pro 7139"), 79 79 }), ··· 89 89 * was copy-pasted from the GPD win, so it has a disabled KIOX000A 90 90 * node which we should not enable, thus we also check the BIOS date. 91 91 */ 92 - ENTRY("KIOX000A", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT), { 92 + ENTRY("KIOX000A", "1", X86_MATCH(ATOM_AIRMONT), { 93 93 DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), 94 94 DMI_MATCH(DMI_BOARD_NAME, "Default string"), 95 95 DMI_MATCH(DMI_PRODUCT_NAME, "Default string"), 96 96 DMI_MATCH(DMI_BIOS_DATE, "02/21/2017") 97 97 }), 98 - ENTRY("KIOX000A", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT), { 98 + ENTRY("KIOX000A", "1", X86_MATCH(ATOM_AIRMONT), { 99 99 DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), 100 100 DMI_MATCH(DMI_BOARD_NAME, "Default string"), 101 101 DMI_MATCH(DMI_PRODUCT_NAME, "Default string"), 102 102 DMI_MATCH(DMI_BIOS_DATE, "03/20/2017") 103 103 }), 104 - ENTRY("KIOX000A", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT), { 104 + ENTRY("KIOX000A", "1", X86_MATCH(ATOM_AIRMONT), { 105 105 DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), 106 106 DMI_MATCH(DMI_BOARD_NAME, "Default string"), 107 107 DMI_MATCH(DMI_PRODUCT_NAME, "Default string"),

+1 -1

drivers/char/agp/amd64-agp.c

··· 775 775 } 776 776 777 777 /* First check that we have at least one AMD64 NB */ 778 - if (!pci_dev_present(amd_nb_misc_ids)) { 778 + if (!amd_nb_num()) { 779 779 pci_unregister_driver(&agp_amd64_pci_driver); 780 780 return -ENODEV; 781 781 }

+3 -4

drivers/char/hw_random/via-rng.c

··· 209 209 out: 210 210 return err; 211 211 } 212 + module_init(mod_init); 212 213 213 214 static void __exit mod_exit(void) 214 215 { 215 216 hwrng_unregister(&via_rng); 216 217 } 217 - 218 - module_init(mod_init); 219 218 module_exit(mod_exit); 220 219 221 220 static struct x86_cpu_id __maybe_unused via_rng_cpu_id[] = { 222 - X86_FEATURE_MATCH(X86_FEATURE_XSTORE), 221 + X86_MATCH_FEATURE(X86_FEATURE_XSTORE, NULL), 223 222 {} 224 223 }; 224 + MODULE_DEVICE_TABLE(x86cpu, via_rng_cpu_id); 225 225 226 226 MODULE_DESCRIPTION("H/W RNG driver for VIA CPU with PadLock"); 227 227 MODULE_LICENSE("GPL"); 228 - MODULE_DEVICE_TABLE(x86cpu, via_rng_cpu_id);

+3 -2

drivers/cpufreq/acpi-cpufreq.c

··· 30 30 #include <asm/msr.h> 31 31 #include <asm/processor.h> 32 32 #include <asm/cpufeature.h> 33 + #include <asm/cpu_device_id.h> 33 34 34 35 MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); 35 36 MODULE_DESCRIPTION("ACPI Processor P-States Driver"); ··· 992 991 module_exit(acpi_cpufreq_exit); 993 992 994 993 static const struct x86_cpu_id acpi_cpufreq_ids[] = { 995 - X86_FEATURE_MATCH(X86_FEATURE_ACPI), 996 - X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE), 994 + X86_MATCH_FEATURE(X86_FEATURE_ACPI, NULL), 995 + X86_MATCH_FEATURE(X86_FEATURE_HW_PSTATE, NULL), 997 996 {} 998 997 }; 999 998 MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids);

+2 -1

drivers/cpufreq/amd_freq_sensitivity.c

··· 18 18 19 19 #include <asm/msr.h> 20 20 #include <asm/cpufeature.h> 21 + #include <asm/cpu_device_id.h> 21 22 22 23 #include "cpufreq_ondemand.h" 23 24 ··· 145 144 module_exit(amd_freq_sensitivity_exit); 146 145 147 146 static const struct x86_cpu_id amd_freq_sensitivity_ids[] = { 148 - X86_FEATURE_MATCH(X86_FEATURE_PROC_FEEDBACK), 147 + X86_MATCH_FEATURE(X86_FEATURE_PROC_FEEDBACK, NULL), 149 148 {} 150 149 }; 151 150 MODULE_DEVICE_TABLE(x86cpu, amd_freq_sensitivity_ids);

+1 -1

drivers/cpufreq/e_powersaver.c

··· 385 385 /* This driver will work only on Centaur C7 processors with 386 386 * Enhanced SpeedStep/PowerSaver registers */ 387 387 static const struct x86_cpu_id eps_cpu_id[] = { 388 - { X86_VENDOR_CENTAUR, 6, X86_MODEL_ANY, X86_FEATURE_EST }, 388 + X86_MATCH_VENDOR_FAM_FEATURE(CENTAUR, 6, X86_FEATURE_EST, NULL), 389 389 {} 390 390 }; 391 391 MODULE_DEVICE_TABLE(x86cpu, eps_cpu_id);

+1 -1

drivers/cpufreq/elanfreq.c

··· 198 198 }; 199 199 200 200 static const struct x86_cpu_id elan_id[] = { 201 - { X86_VENDOR_AMD, 4, 10, }, 201 + X86_MATCH_VENDOR_FAM_MODEL(AMD, 4, 10, NULL), 202 202 {} 203 203 }; 204 204 MODULE_DEVICE_TABLE(x86cpu, elan_id);

+36 -35

drivers/cpufreq/intel_pstate.c

··· 1908 1908 .get_val = core_get_val, 1909 1909 }; 1910 1910 1911 - #define ICPU(model, policy) \ 1912 - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\ 1913 - (unsigned long)&policy } 1911 + #define X86_MATCH(model, policy) \ 1912 + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_##model, \ 1913 + X86_FEATURE_APERFMPERF, &policy) 1914 1914 1915 1915 static const struct x86_cpu_id intel_pstate_cpu_ids[] = { 1916 - ICPU(INTEL_FAM6_SANDYBRIDGE, core_funcs), 1917 - ICPU(INTEL_FAM6_SANDYBRIDGE_X, core_funcs), 1918 - ICPU(INTEL_FAM6_ATOM_SILVERMONT, silvermont_funcs), 1919 - ICPU(INTEL_FAM6_IVYBRIDGE, core_funcs), 1920 - ICPU(INTEL_FAM6_HASWELL, core_funcs), 1921 - ICPU(INTEL_FAM6_BROADWELL, core_funcs), 1922 - ICPU(INTEL_FAM6_IVYBRIDGE_X, core_funcs), 1923 - ICPU(INTEL_FAM6_HASWELL_X, core_funcs), 1924 - ICPU(INTEL_FAM6_HASWELL_L, core_funcs), 1925 - ICPU(INTEL_FAM6_HASWELL_G, core_funcs), 1926 - ICPU(INTEL_FAM6_BROADWELL_G, core_funcs), 1927 - ICPU(INTEL_FAM6_ATOM_AIRMONT, airmont_funcs), 1928 - ICPU(INTEL_FAM6_SKYLAKE_L, core_funcs), 1929 - ICPU(INTEL_FAM6_BROADWELL_X, core_funcs), 1930 - ICPU(INTEL_FAM6_SKYLAKE, core_funcs), 1931 - ICPU(INTEL_FAM6_BROADWELL_D, core_funcs), 1932 - ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_funcs), 1933 - ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_funcs), 1934 - ICPU(INTEL_FAM6_ATOM_GOLDMONT, core_funcs), 1935 - ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, core_funcs), 1936 - ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs), 1916 + X86_MATCH(SANDYBRIDGE, core_funcs), 1917 + X86_MATCH(SANDYBRIDGE_X, core_funcs), 1918 + X86_MATCH(ATOM_SILVERMONT, silvermont_funcs), 1919 + X86_MATCH(IVYBRIDGE, core_funcs), 1920 + X86_MATCH(HASWELL, core_funcs), 1921 + X86_MATCH(BROADWELL, core_funcs), 1922 + X86_MATCH(IVYBRIDGE_X, core_funcs), 1923 + X86_MATCH(HASWELL_X, core_funcs), 1924 + X86_MATCH(HASWELL_L, core_funcs), 1925 + X86_MATCH(HASWELL_G, core_funcs), 1926 + X86_MATCH(BROADWELL_G, core_funcs), 1927 + X86_MATCH(ATOM_AIRMONT, airmont_funcs), 1928 + X86_MATCH(SKYLAKE_L, core_funcs), 1929 + X86_MATCH(BROADWELL_X, core_funcs), 1930 + X86_MATCH(SKYLAKE, core_funcs), 1931 + X86_MATCH(BROADWELL_D, core_funcs), 1932 + X86_MATCH(XEON_PHI_KNL, knl_funcs), 1933 + X86_MATCH(XEON_PHI_KNM, knl_funcs), 1934 + X86_MATCH(ATOM_GOLDMONT, core_funcs), 1935 + X86_MATCH(ATOM_GOLDMONT_PLUS, core_funcs), 1936 + X86_MATCH(SKYLAKE_X, core_funcs), 1937 1937 {} 1938 1938 }; 1939 1939 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); 1940 1940 1941 1941 static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { 1942 - ICPU(INTEL_FAM6_BROADWELL_D, core_funcs), 1943 - ICPU(INTEL_FAM6_BROADWELL_X, core_funcs), 1944 - ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs), 1942 + X86_MATCH(BROADWELL_D, core_funcs), 1943 + X86_MATCH(BROADWELL_X, core_funcs), 1944 + X86_MATCH(SKYLAKE_X, core_funcs), 1945 1945 {} 1946 1946 }; 1947 1947 1948 1948 static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = { 1949 - ICPU(INTEL_FAM6_KABYLAKE, core_funcs), 1949 + X86_MATCH(KABYLAKE, core_funcs), 1950 1950 {} 1951 1951 }; 1952 1952 1953 1953 static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = { 1954 - ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs), 1955 - ICPU(INTEL_FAM6_SKYLAKE, core_funcs), 1954 + X86_MATCH(SKYLAKE_X, core_funcs), 1955 + X86_MATCH(SKYLAKE, core_funcs), 1956 1956 {} 1957 1957 }; 1958 1958 ··· 2725 2725 2726 2726 #define INTEL_PSTATE_HWP_BROADWELL 0x01 2727 2727 2728 - #define ICPU_HWP(model, hwp_mode) \ 2729 - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_HWP, hwp_mode } 2728 + #define X86_MATCH_HWP(model, hwp_mode) \ 2729 + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_##model, \ 2730 + X86_FEATURE_HWP, hwp_mode) 2730 2731 2731 2732 static const struct x86_cpu_id hwp_support_ids[] __initconst = { 2732 - ICPU_HWP(INTEL_FAM6_BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL), 2733 - ICPU_HWP(INTEL_FAM6_BROADWELL_D, INTEL_PSTATE_HWP_BROADWELL), 2734 - ICPU_HWP(X86_MODEL_ANY, 0), 2733 + X86_MATCH_HWP(BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL), 2734 + X86_MATCH_HWP(BROADWELL_D, INTEL_PSTATE_HWP_BROADWELL), 2735 + X86_MATCH_HWP(ANY, 0), 2735 2736 {} 2736 2737 }; 2737 2738

+1 -1

drivers/cpufreq/longhaul.c

··· 910 910 }; 911 911 912 912 static const struct x86_cpu_id longhaul_id[] = { 913 - { X86_VENDOR_CENTAUR, 6 }, 913 + X86_MATCH_VENDOR_FAM(CENTAUR, 6, NULL), 914 914 {} 915 915 }; 916 916 MODULE_DEVICE_TABLE(x86cpu, longhaul_id);

+1 -2

drivers/cpufreq/longrun.c

··· 281 281 }; 282 282 283 283 static const struct x86_cpu_id longrun_ids[] = { 284 - { X86_VENDOR_TRANSMETA, X86_FAMILY_ANY, X86_MODEL_ANY, 285 - X86_FEATURE_LONGRUN }, 284 + X86_MATCH_VENDOR_FEATURE(TRANSMETA, X86_FEATURE_LONGRUN, NULL), 286 285 {} 287 286 }; 288 287 MODULE_DEVICE_TABLE(x86cpu, longrun_ids);

+1 -1

drivers/cpufreq/p4-clockmod.c

··· 231 231 }; 232 232 233 233 static const struct x86_cpu_id cpufreq_p4_id[] = { 234 - { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_ACC }, 234 + X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_ACC, NULL), 235 235 {} 236 236 }; 237 237

+2 -2

drivers/cpufreq/powernow-k6.c

··· 258 258 }; 259 259 260 260 static const struct x86_cpu_id powernow_k6_ids[] = { 261 - { X86_VENDOR_AMD, 5, 12 }, 262 - { X86_VENDOR_AMD, 5, 13 }, 261 + X86_MATCH_VENDOR_FAM_MODEL(AMD, 5, 12, NULL), 262 + X86_MATCH_VENDOR_FAM_MODEL(AMD, 5, 13, NULL), 263 263 {} 264 264 }; 265 265 MODULE_DEVICE_TABLE(x86cpu, powernow_k6_ids);

+1 -1

drivers/cpufreq/powernow-k7.c

··· 109 109 } 110 110 111 111 static const struct x86_cpu_id powernow_k7_cpuids[] = { 112 - { X86_VENDOR_AMD, 6, }, 112 + X86_MATCH_VENDOR_FAM(AMD, 6, NULL), 113 113 {} 114 114 }; 115 115 MODULE_DEVICE_TABLE(x86cpu, powernow_k7_cpuids);

+1 -1

drivers/cpufreq/powernow-k8.c

··· 452 452 453 453 static const struct x86_cpu_id powernow_k8_ids[] = { 454 454 /* IO based frequency switching */ 455 - { X86_VENDOR_AMD, 0xf }, 455 + X86_MATCH_VENDOR_FAM(AMD, 0xf, NULL), 456 456 {} 457 457 }; 458 458 MODULE_DEVICE_TABLE(x86cpu, powernow_k8_ids);

+1 -1

drivers/cpufreq/sc520_freq.c

··· 95 95 }; 96 96 97 97 static const struct x86_cpu_id sc520_ids[] = { 98 - { X86_VENDOR_AMD, 4, 9 }, 98 + X86_MATCH_VENDOR_FAM_MODEL(AMD, 4, 9, NULL), 99 99 {} 100 100 }; 101 101 MODULE_DEVICE_TABLE(x86cpu, sc520_ids);

+4 -10

drivers/cpufreq/speedstep-centrino.c

··· 520 520 * or ASCII model IDs. 521 521 */ 522 522 static const struct x86_cpu_id centrino_ids[] = { 523 - { X86_VENDOR_INTEL, 6, 9, X86_FEATURE_EST }, 524 - { X86_VENDOR_INTEL, 6, 13, X86_FEATURE_EST }, 525 - { X86_VENDOR_INTEL, 6, 13, X86_FEATURE_EST }, 526 - { X86_VENDOR_INTEL, 6, 13, X86_FEATURE_EST }, 527 - { X86_VENDOR_INTEL, 15, 3, X86_FEATURE_EST }, 528 - { X86_VENDOR_INTEL, 15, 4, X86_FEATURE_EST }, 523 + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, 9, X86_FEATURE_EST, NULL), 524 + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, 13, X86_FEATURE_EST, NULL), 525 + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 15, 3, X86_FEATURE_EST, NULL), 526 + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 15, 4, X86_FEATURE_EST, NULL), 529 527 {} 530 528 }; 531 - #if 0 532 - /* Autoload or not? Do not for now. */ 533 - MODULE_DEVICE_TABLE(x86cpu, centrino_ids); 534 - #endif 535 529 536 530 /** 537 531 * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver

+3 -7

drivers/cpufreq/speedstep-ich.c

··· 319 319 }; 320 320 321 321 static const struct x86_cpu_id ss_smi_ids[] = { 322 - { X86_VENDOR_INTEL, 6, 0xb, }, 323 - { X86_VENDOR_INTEL, 6, 0x8, }, 324 - { X86_VENDOR_INTEL, 15, 2 }, 322 + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, 0x8, 0), 323 + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, 0xb, 0), 324 + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 15, 0x2, 0), 325 325 {} 326 326 }; 327 - #if 0 328 - /* Autoload or not? Do not for now. */ 329 - MODULE_DEVICE_TABLE(x86cpu, ss_smi_ids); 330 - #endif 331 327 332 328 /** 333 329 * speedstep_init - initializes the SpeedStep CPUFreq driver

+3 -7

drivers/cpufreq/speedstep-smi.c

··· 299 299 }; 300 300 301 301 static const struct x86_cpu_id ss_smi_ids[] = { 302 - { X86_VENDOR_INTEL, 6, 0xb, }, 303 - { X86_VENDOR_INTEL, 6, 0x8, }, 304 - { X86_VENDOR_INTEL, 15, 2 }, 302 + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, 0x8, 0), 303 + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, 0xb, 0), 304 + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 15, 0x2, 0), 305 305 {} 306 306 }; 307 - #if 0 308 - /* Not auto loaded currently */ 309 - MODULE_DEVICE_TABLE(x86cpu, ss_smi_ids); 310 - #endif 311 307 312 308 /** 313 309 * speedstep_init - initializes the SpeedStep CPUFreq driver

+1 -1

drivers/crypto/padlock-aes.c

··· 474 474 }; 475 475 476 476 static const struct x86_cpu_id padlock_cpu_id[] = { 477 - X86_FEATURE_MATCH(X86_FEATURE_XCRYPT), 477 + X86_MATCH_FEATURE(X86_FEATURE_XCRYPT, NULL), 478 478 {} 479 479 }; 480 480 MODULE_DEVICE_TABLE(x86cpu, padlock_cpu_id);

+1 -1

drivers/crypto/padlock-sha.c

··· 490 490 }; 491 491 492 492 static const struct x86_cpu_id padlock_sha_ids[] = { 493 - X86_FEATURE_MATCH(X86_FEATURE_PHE), 493 + X86_MATCH_FEATURE(X86_FEATURE_PHE, NULL), 494 494 {} 495 495 }; 496 496 MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids);

+7 -7

drivers/edac/amd64_edac.c

··· 3626 3626 } 3627 3627 3628 3628 static const struct x86_cpu_id amd64_cpuids[] = { 3629 - { X86_VENDOR_AMD, 0xF, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, 3630 - { X86_VENDOR_AMD, 0x10, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, 3631 - { X86_VENDOR_AMD, 0x15, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, 3632 - { X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, 3633 - { X86_VENDOR_AMD, 0x17, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, 3634 - { X86_VENDOR_HYGON, 0x18, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, 3635 - { X86_VENDOR_AMD, 0x19, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, 3629 + X86_MATCH_VENDOR_FAM(AMD, 0x0F, NULL), 3630 + X86_MATCH_VENDOR_FAM(AMD, 0x10, NULL), 3631 + X86_MATCH_VENDOR_FAM(AMD, 0x15, NULL), 3632 + X86_MATCH_VENDOR_FAM(AMD, 0x16, NULL), 3633 + X86_MATCH_VENDOR_FAM(AMD, 0x17, NULL), 3634 + X86_MATCH_VENDOR_FAM(HYGON, 0x18, NULL), 3635 + X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL), 3636 3636 { } 3637 3637 }; 3638 3638 MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids);

+4 -4

drivers/edac/i10nm_base.c

··· 123 123 } 124 124 125 125 static const struct x86_cpu_id i10nm_cpuids[] = { 126 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_TREMONT_D, 0, 0 }, 127 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ICELAKE_X, 0, 0 }, 128 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ICELAKE_D, 0, 0 }, 129 - { } 126 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, NULL), 127 + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, NULL), 128 + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL), 129 + {} 130 130 }; 131 131 MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); 132 132

+2 -2

drivers/edac/pnd2_edac.c

··· 1537 1537 }; 1538 1538 1539 1539 static const struct x86_cpu_id pnd2_cpuids[] = { 1540 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT, 0, (kernel_ulong_t)&apl_ops }, 1541 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_D, 0, (kernel_ulong_t)&dnv_ops }, 1540 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &apl_ops), 1541 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &dnv_ops), 1542 1542 { } 1543 1543 }; 1544 1544 MODULE_DEVICE_TABLE(x86cpu, pnd2_cpuids);

+7 -7

drivers/edac/sb_edac.c

··· 3420 3420 } 3421 3421 3422 3422 static const struct x86_cpu_id sbridge_cpuids[] = { 3423 - INTEL_CPU_FAM6(SANDYBRIDGE_X, pci_dev_descr_sbridge_table), 3424 - INTEL_CPU_FAM6(IVYBRIDGE_X, pci_dev_descr_ibridge_table), 3425 - INTEL_CPU_FAM6(HASWELL_X, pci_dev_descr_haswell_table), 3426 - INTEL_CPU_FAM6(BROADWELL_X, pci_dev_descr_broadwell_table), 3427 - INTEL_CPU_FAM6(BROADWELL_D, pci_dev_descr_broadwell_table), 3428 - INTEL_CPU_FAM6(XEON_PHI_KNL, pci_dev_descr_knl_table), 3429 - INTEL_CPU_FAM6(XEON_PHI_KNM, pci_dev_descr_knl_table), 3423 + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &pci_dev_descr_sbridge_table), 3424 + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &pci_dev_descr_ibridge_table), 3425 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &pci_dev_descr_haswell_table), 3426 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &pci_dev_descr_broadwell_table), 3427 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &pci_dev_descr_broadwell_table), 3428 + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &pci_dev_descr_knl_table), 3429 + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &pci_dev_descr_knl_table), 3430 3430 { } 3431 3431 }; 3432 3432 MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids);

+1 -1

drivers/edac/skx_base.c

··· 158 158 } 159 159 160 160 static const struct x86_cpu_id skx_cpuids[] = { 161 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_X, 0, 0 }, 161 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, NULL), 162 162 { } 163 163 }; 164 164 MODULE_DEVICE_TABLE(x86cpu, skx_cpuids);

+1 -1

drivers/extcon/extcon-axp288.c

··· 107 107 }; 108 108 109 109 static const struct x86_cpu_id cherry_trail_cpu_ids[] = { 110 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT, X86_FEATURE_ANY }, 110 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, NULL), 111 111 {} 112 112 }; 113 113

+1 -1

drivers/hwmon/coretemp.c

··· 709 709 return 0; 710 710 } 711 711 static const struct x86_cpu_id __initconst coretemp_ids[] = { 712 - { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_DTHERM }, 712 + X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_DTHERM, NULL), 713 713 {} 714 714 }; 715 715 MODULE_DEVICE_TABLE(x86cpu, coretemp_ids);

+4 -4

drivers/hwmon/via-cputemp.c

··· 270 270 } 271 271 272 272 static const struct x86_cpu_id __initconst cputemp_ids[] = { 273 - { X86_VENDOR_CENTAUR, 6, 0xa, }, /* C7 A */ 274 - { X86_VENDOR_CENTAUR, 6, 0xd, }, /* C7 D */ 275 - { X86_VENDOR_CENTAUR, 6, 0xf, }, /* Nano */ 276 - { X86_VENDOR_CENTAUR, 7, X86_MODEL_ANY, }, 273 + X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 6, X86_CENTAUR_FAM6_C7_A, NULL), 274 + X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 6, X86_CENTAUR_FAM6_C7_D, NULL), 275 + X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 6, X86_CENTAUR_FAM6_NANO, NULL), 276 + X86_MATCH_VENDOR_FAM_MODEL(CENTAUR, 7, X86_MODEL_ANY, NULL), 277 277 {} 278 278 }; 279 279 MODULE_DEVICE_TABLE(x86cpu, cputemp_ids);

+38 -41

drivers/idle/intel_idle.c

··· 1079 1079 }; 1080 1080 1081 1081 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1082 - INTEL_CPU_FAM6(NEHALEM_EP, idle_cpu_nhx), 1083 - INTEL_CPU_FAM6(NEHALEM, idle_cpu_nehalem), 1084 - INTEL_CPU_FAM6(NEHALEM_G, idle_cpu_nehalem), 1085 - INTEL_CPU_FAM6(WESTMERE, idle_cpu_nehalem), 1086 - INTEL_CPU_FAM6(WESTMERE_EP, idle_cpu_nhx), 1087 - INTEL_CPU_FAM6(NEHALEM_EX, idle_cpu_nhx), 1088 - INTEL_CPU_FAM6(ATOM_BONNELL, idle_cpu_atom), 1089 - INTEL_CPU_FAM6(ATOM_BONNELL_MID, idle_cpu_lincroft), 1090 - INTEL_CPU_FAM6(WESTMERE_EX, idle_cpu_nhx), 1091 - INTEL_CPU_FAM6(SANDYBRIDGE, idle_cpu_snb), 1092 - INTEL_CPU_FAM6(SANDYBRIDGE_X, idle_cpu_snx), 1093 - INTEL_CPU_FAM6(ATOM_SALTWELL, idle_cpu_atom), 1094 - INTEL_CPU_FAM6(ATOM_SILVERMONT, idle_cpu_byt), 1095 - INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, idle_cpu_tangier), 1096 - INTEL_CPU_FAM6(ATOM_AIRMONT, idle_cpu_cht), 1097 - INTEL_CPU_FAM6(IVYBRIDGE, idle_cpu_ivb), 1098 - INTEL_CPU_FAM6(IVYBRIDGE_X, idle_cpu_ivt), 1099 - INTEL_CPU_FAM6(HASWELL, idle_cpu_hsw), 1100 - INTEL_CPU_FAM6(HASWELL_X, idle_cpu_hsx), 1101 - INTEL_CPU_FAM6(HASWELL_L, idle_cpu_hsw), 1102 - INTEL_CPU_FAM6(HASWELL_G, idle_cpu_hsw), 1103 - INTEL_CPU_FAM6(ATOM_SILVERMONT_D, idle_cpu_avn), 1104 - INTEL_CPU_FAM6(BROADWELL, idle_cpu_bdw), 1105 - INTEL_CPU_FAM6(BROADWELL_G, idle_cpu_bdw), 1106 - INTEL_CPU_FAM6(BROADWELL_X, idle_cpu_bdx), 1107 - INTEL_CPU_FAM6(BROADWELL_D, idle_cpu_bdx), 1108 - INTEL_CPU_FAM6(SKYLAKE_L, idle_cpu_skl), 1109 - INTEL_CPU_FAM6(SKYLAKE, idle_cpu_skl), 1110 - INTEL_CPU_FAM6(KABYLAKE_L, idle_cpu_skl), 1111 - INTEL_CPU_FAM6(KABYLAKE, idle_cpu_skl), 1112 - INTEL_CPU_FAM6(SKYLAKE_X, idle_cpu_skx), 1113 - INTEL_CPU_FAM6(XEON_PHI_KNL, idle_cpu_knl), 1114 - INTEL_CPU_FAM6(XEON_PHI_KNM, idle_cpu_knl), 1115 - INTEL_CPU_FAM6(ATOM_GOLDMONT, idle_cpu_bxt), 1116 - INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, idle_cpu_bxt), 1117 - INTEL_CPU_FAM6(ATOM_GOLDMONT_D, idle_cpu_dnv), 1118 - INTEL_CPU_FAM6(ATOM_TREMONT_D, idle_cpu_dnv), 1082 + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), 1083 + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), 1084 + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem), 1085 + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem), 1086 + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx), 1087 + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx), 1088 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom), 1089 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft), 1090 + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx), 1091 + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb), 1092 + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx), 1093 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom), 1094 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt), 1095 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier), 1096 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht), 1097 + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb), 1098 + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt), 1099 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw), 1100 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx), 1101 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw), 1102 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw), 1103 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn), 1104 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw), 1105 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw), 1106 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx), 1107 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx), 1108 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl), 1109 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl), 1110 + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl), 1111 + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), 1112 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), 1113 + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), 1114 + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), 1115 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), 1116 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), 1117 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), 1118 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_dnv), 1119 1119 {} 1120 1120 }; 1121 1121 1122 - #define INTEL_CPU_FAM6_MWAIT \ 1123 - { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_MWAIT, 0 } 1124 - 1125 1122 static const struct x86_cpu_id intel_mwait_ids[] __initconst = { 1126 - INTEL_CPU_FAM6_MWAIT, 1123 + X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), 1127 1124 {} 1128 1125 }; 1129 1126

+2 -2

drivers/mmc/host/sdhci-acpi.c

··· 242 242 static bool sdhci_acpi_byt(void) 243 243 { 244 244 static const struct x86_cpu_id byt[] = { 245 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, 245 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, NULL), 246 246 {} 247 247 }; 248 248 ··· 252 252 static bool sdhci_acpi_cht(void) 253 253 { 254 254 static const struct x86_cpu_id cht[] = { 255 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, 255 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, NULL), 256 256 {} 257 257 }; 258 258

+2 -4

drivers/pci/pci-mid.c

··· 55 55 .need_resume = mid_pci_need_resume, 56 56 }; 57 57 58 - #define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, } 59 - 60 58 /* 61 59 * This table should be in sync with the one in 62 60 * arch/x86/platform/intel-mid/pwr.c. 63 61 */ 64 62 static const struct x86_cpu_id lpss_cpu_ids[] = { 65 - ICPU(INTEL_FAM6_ATOM_SALTWELL_MID), 66 - ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID), 63 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL_MID, NULL), 64 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, NULL), 67 65 {} 68 66 }; 69 67

+6 -8

drivers/platform/x86/intel-uncore-frequency.c

··· 358 358 .notifier_call = uncore_pm_notify, 359 359 }; 360 360 361 - #define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, } 362 - 363 361 static const struct x86_cpu_id intel_uncore_cpu_ids[] = { 364 - ICPU(INTEL_FAM6_BROADWELL_G), 365 - ICPU(INTEL_FAM6_BROADWELL_X), 366 - ICPU(INTEL_FAM6_BROADWELL_D), 367 - ICPU(INTEL_FAM6_SKYLAKE_X), 368 - ICPU(INTEL_FAM6_ICELAKE_X), 369 - ICPU(INTEL_FAM6_ICELAKE_D), 362 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, NULL), 363 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, NULL), 364 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, NULL), 365 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, NULL), 366 + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, NULL), 367 + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL), 370 368 {} 371 369 }; 372 370

+2 -2

drivers/platform/x86/intel_int0002_vgpio.c

··· 148 148 }; 149 149 150 150 static const struct x86_cpu_id int0002_cpu_ids[] = { 151 - INTEL_CPU_FAM6(ATOM_SILVERMONT, int0002_byt_irqchip), /* Valleyview, Bay Trail */ 152 - INTEL_CPU_FAM6(ATOM_AIRMONT, int0002_cht_irqchip), /* Braswell, Cherry Trail */ 151 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &int0002_byt_irqchip), 152 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &int0002_cht_irqchip), 153 153 {} 154 154 }; 155 155

+2 -2

drivers/platform/x86/intel_mid_powerbtn.c

··· 113 113 }; 114 114 115 115 static const struct x86_cpu_id mid_pb_cpu_ids[] = { 116 - INTEL_CPU_FAM6(ATOM_SALTWELL_MID, mfld_ddata), 117 - INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, mrfld_ddata), 116 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL_MID, &mfld_ddata), 117 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &mrfld_ddata), 118 118 {} 119 119 }; 120 120

+12 -12

drivers/platform/x86/intel_pmc_core.c

··· 871 871 #endif /* CONFIG_DEBUG_FS */ 872 872 873 873 static const struct x86_cpu_id intel_pmc_core_ids[] = { 874 - INTEL_CPU_FAM6(SKYLAKE_L, spt_reg_map), 875 - INTEL_CPU_FAM6(SKYLAKE, spt_reg_map), 876 - INTEL_CPU_FAM6(KABYLAKE_L, spt_reg_map), 877 - INTEL_CPU_FAM6(KABYLAKE, spt_reg_map), 878 - INTEL_CPU_FAM6(CANNONLAKE_L, cnp_reg_map), 879 - INTEL_CPU_FAM6(ICELAKE_L, icl_reg_map), 880 - INTEL_CPU_FAM6(ICELAKE_NNPI, icl_reg_map), 881 - INTEL_CPU_FAM6(COMETLAKE, cnp_reg_map), 882 - INTEL_CPU_FAM6(COMETLAKE_L, cnp_reg_map), 883 - INTEL_CPU_FAM6(TIGERLAKE_L, tgl_reg_map), 884 - INTEL_CPU_FAM6(TIGERLAKE, tgl_reg_map), 885 - INTEL_CPU_FAM6(ATOM_TREMONT, tgl_reg_map), 874 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &spt_reg_map), 875 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &spt_reg_map), 876 + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &spt_reg_map), 877 + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &spt_reg_map), 878 + X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &cnp_reg_map), 879 + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_reg_map), 880 + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &icl_reg_map), 881 + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &cnp_reg_map), 882 + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &cnp_reg_map), 883 + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &tgl_reg_map), 884 + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &tgl_reg_map), 885 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &tgl_reg_map), 886 886 {} 887 887 }; 888 888

+8 -8

drivers/platform/x86/intel_pmc_core_pltdrv.c

··· 38 38 * other list may grow, but this list should not. 39 39 */ 40 40 static const struct x86_cpu_id intel_pmc_core_platform_ids[] = { 41 - INTEL_CPU_FAM6(SKYLAKE_L, pmc_core_device), 42 - INTEL_CPU_FAM6(SKYLAKE, pmc_core_device), 43 - INTEL_CPU_FAM6(KABYLAKE_L, pmc_core_device), 44 - INTEL_CPU_FAM6(KABYLAKE, pmc_core_device), 45 - INTEL_CPU_FAM6(CANNONLAKE_L, pmc_core_device), 46 - INTEL_CPU_FAM6(ICELAKE_L, pmc_core_device), 47 - INTEL_CPU_FAM6(COMETLAKE, pmc_core_device), 48 - INTEL_CPU_FAM6(COMETLAKE_L, pmc_core_device), 41 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &pmc_core_device), 42 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &pmc_core_device), 43 + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &pmc_core_device), 44 + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &pmc_core_device), 45 + X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &pmc_core_device), 46 + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &pmc_core_device), 47 + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &pmc_core_device), 48 + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &pmc_core_device), 49 49 {} 50 50 }; 51 51 MODULE_DEVICE_TABLE(x86cpu, intel_pmc_core_platform_ids);

+1 -3

drivers/platform/x86/intel_speed_select_if/isst_if_mbox_msr.c

··· 160 160 .notifier_call = isst_pm_notify, 161 161 }; 162 162 163 - #define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, } 164 - 165 163 static const struct x86_cpu_id isst_if_cpu_ids[] = { 166 - ICPU(INTEL_FAM6_SKYLAKE_X), 164 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, NULL), 167 165 {} 168 166 }; 169 167 MODULE_DEVICE_TABLE(x86cpu, isst_if_cpu_ids);

+2 -3

drivers/platform/x86/intel_telemetry_debugfs.c

··· 308 308 }; 309 309 310 310 static const struct x86_cpu_id telemetry_debugfs_cpu_ids[] = { 311 - INTEL_CPU_FAM6(ATOM_GOLDMONT, telem_apl_debugfs_conf), 312 - INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, telem_apl_debugfs_conf), 311 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &telem_apl_debugfs_conf), 312 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &telem_apl_debugfs_conf), 313 313 {} 314 314 }; 315 - 316 315 MODULE_DEVICE_TABLE(x86cpu, telemetry_debugfs_cpu_ids); 317 316 318 317 static int telemetry_debugfs_check_evts(void)

+2 -5

drivers/platform/x86/intel_telemetry_pltdrv.c

··· 67 67 #define TELEM_CLEAR_VERBOSITY_BITS(x) ((x) &= ~(BIT(27) | BIT(28))) 68 68 #define TELEM_SET_VERBOSITY_BITS(x, y) ((x) |= ((y) << 27)) 69 69 70 - #define TELEM_CPU(model, data) \ 71 - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&data } 72 - 73 70 enum telemetry_action { 74 71 TELEM_UPDATE = 0, 75 72 TELEM_ADD, ··· 180 183 }; 181 184 182 185 static const struct x86_cpu_id telemetry_cpu_ids[] = { 183 - TELEM_CPU(INTEL_FAM6_ATOM_GOLDMONT, telem_apl_config), 184 - TELEM_CPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, telem_glk_config), 186 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &telem_apl_config), 187 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &telem_glk_config), 185 188 {} 186 189 }; 187 190

+2 -4

drivers/platform/x86/intel_turbo_max_3.c

··· 113 113 return 0; 114 114 } 115 115 116 - #define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, } 117 - 118 116 static const struct x86_cpu_id itmt_legacy_cpu_ids[] = { 119 - ICPU(INTEL_FAM6_BROADWELL_X), 120 - ICPU(INTEL_FAM6_SKYLAKE_X), 117 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, NULL), 118 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, NULL), 121 119 {} 122 120 }; 123 121

+37 -38

drivers/powercap/intel_rapl_common.c

··· 951 951 }; 952 952 953 953 static const struct x86_cpu_id rapl_ids[] __initconst = { 954 - INTEL_CPU_FAM6(SANDYBRIDGE, rapl_defaults_core), 955 - INTEL_CPU_FAM6(SANDYBRIDGE_X, rapl_defaults_core), 954 + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &rapl_defaults_core), 955 + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &rapl_defaults_core), 956 956 957 - INTEL_CPU_FAM6(IVYBRIDGE, rapl_defaults_core), 958 - INTEL_CPU_FAM6(IVYBRIDGE_X, rapl_defaults_core), 957 + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &rapl_defaults_core), 958 + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &rapl_defaults_core), 959 959 960 - INTEL_CPU_FAM6(HASWELL, rapl_defaults_core), 961 - INTEL_CPU_FAM6(HASWELL_L, rapl_defaults_core), 962 - INTEL_CPU_FAM6(HASWELL_G, rapl_defaults_core), 963 - INTEL_CPU_FAM6(HASWELL_X, rapl_defaults_hsw_server), 960 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &rapl_defaults_core), 961 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &rapl_defaults_core), 962 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &rapl_defaults_core), 963 + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &rapl_defaults_hsw_server), 964 964 965 - INTEL_CPU_FAM6(BROADWELL, rapl_defaults_core), 966 - INTEL_CPU_FAM6(BROADWELL_G, rapl_defaults_core), 967 - INTEL_CPU_FAM6(BROADWELL_D, rapl_defaults_core), 968 - INTEL_CPU_FAM6(BROADWELL_X, rapl_defaults_hsw_server), 965 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &rapl_defaults_core), 966 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &rapl_defaults_core), 967 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &rapl_defaults_core), 968 + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &rapl_defaults_hsw_server), 969 969 970 - INTEL_CPU_FAM6(SKYLAKE, rapl_defaults_core), 971 - INTEL_CPU_FAM6(SKYLAKE_L, rapl_defaults_core), 972 - INTEL_CPU_FAM6(SKYLAKE_X, rapl_defaults_hsw_server), 973 - INTEL_CPU_FAM6(KABYLAKE_L, rapl_defaults_core), 974 - INTEL_CPU_FAM6(KABYLAKE, rapl_defaults_core), 975 - INTEL_CPU_FAM6(CANNONLAKE_L, rapl_defaults_core), 976 - INTEL_CPU_FAM6(ICELAKE_L, rapl_defaults_core), 977 - INTEL_CPU_FAM6(ICELAKE, rapl_defaults_core), 978 - INTEL_CPU_FAM6(ICELAKE_NNPI, rapl_defaults_core), 979 - INTEL_CPU_FAM6(ICELAKE_X, rapl_defaults_hsw_server), 980 - INTEL_CPU_FAM6(ICELAKE_D, rapl_defaults_hsw_server), 981 - INTEL_CPU_FAM6(COMETLAKE_L, rapl_defaults_core), 982 - INTEL_CPU_FAM6(COMETLAKE, rapl_defaults_core), 983 - INTEL_CPU_FAM6(TIGERLAKE_L, rapl_defaults_core), 970 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &rapl_defaults_core), 971 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &rapl_defaults_core), 972 + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &rapl_defaults_hsw_server), 973 + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &rapl_defaults_core), 974 + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &rapl_defaults_core), 975 + X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &rapl_defaults_core), 976 + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &rapl_defaults_core), 977 + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &rapl_defaults_core), 978 + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &rapl_defaults_core), 979 + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &rapl_defaults_hsw_server), 980 + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &rapl_defaults_hsw_server), 981 + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &rapl_defaults_core), 982 + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &rapl_defaults_core), 983 + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &rapl_defaults_core), 984 984 985 - INTEL_CPU_FAM6(ATOM_SILVERMONT, rapl_defaults_byt), 986 - INTEL_CPU_FAM6(ATOM_AIRMONT, rapl_defaults_cht), 987 - INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, rapl_defaults_tng), 988 - INTEL_CPU_FAM6(ATOM_AIRMONT_MID, rapl_defaults_ann), 989 - INTEL_CPU_FAM6(ATOM_GOLDMONT, rapl_defaults_core), 990 - INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, rapl_defaults_core), 991 - INTEL_CPU_FAM6(ATOM_GOLDMONT_D, rapl_defaults_core), 992 - INTEL_CPU_FAM6(ATOM_TREMONT_D, rapl_defaults_core), 993 - INTEL_CPU_FAM6(ATOM_TREMONT_L, rapl_defaults_core), 985 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt), 986 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &rapl_defaults_cht), 987 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &rapl_defaults_tng), 988 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_MID, &rapl_defaults_ann), 989 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &rapl_defaults_core), 990 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &rapl_defaults_core), 991 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &rapl_defaults_core), 992 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &rapl_defaults_core), 993 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &rapl_defaults_core), 994 994 995 - INTEL_CPU_FAM6(XEON_PHI_KNL, rapl_defaults_hsw_server), 996 - INTEL_CPU_FAM6(XEON_PHI_KNM, rapl_defaults_hsw_server), 995 + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &rapl_defaults_hsw_server), 996 + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &rapl_defaults_hsw_server), 997 997 {} 998 998 }; 999 - 1000 999 MODULE_DEVICE_TABLE(x86cpu, rapl_ids); 1001 1000 1002 1001 /* Read once for all raw primitive data for domains */

+1 -1

drivers/thermal/intel/intel_powerclamp.c

··· 651 651 }; 652 652 653 653 static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = { 654 - { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_MWAIT }, 654 + X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_MWAIT, NULL), 655 655 {} 656 656 }; 657 657 MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);

+1 -4

drivers/thermal/intel/intel_quark_dts_thermal.c

··· 64 64 #include <asm/cpu_device_id.h> 65 65 #include <asm/iosf_mbi.h> 66 66 67 - #define X86_FAMILY_QUARK 0x5 68 - #define X86_MODEL_QUARK_X1000 0x9 69 - 70 67 /* DTS reset is programmed via QRK_MBI_UNIT_SOC */ 71 68 #define QRK_DTS_REG_OFFSET_RESET 0x34 72 69 #define QRK_DTS_RESET_BIT BIT(0) ··· 430 433 } 431 434 432 435 static const struct x86_cpu_id qrk_thermal_ids[] __initconst = { 433 - { X86_VENDOR_INTEL, X86_FAMILY_QUARK, X86_MODEL_QUARK_X1000 }, 436 + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, NULL), 434 437 {} 435 438 }; 436 439 MODULE_DEVICE_TABLE(x86cpu, qrk_thermal_ids);

+1 -2

drivers/thermal/intel/intel_soc_dts_thermal.c

··· 36 36 } 37 37 38 38 static const struct x86_cpu_id soc_thermal_ids[] = { 39 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT, 0, 40 - BYT_SOC_DTS_APIC_IRQ}, 39 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, BYT_SOC_DTS_APIC_IRQ), 41 40 {} 42 41 }; 43 42 MODULE_DEVICE_TABLE(x86cpu, soc_thermal_ids);

+1 -1

drivers/thermal/intel/x86_pkg_temp_thermal.c

··· 478 478 } 479 479 480 480 static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = { 481 - { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_PTS }, 481 + X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_PTS, NULL), 482 482 {} 483 483 }; 484 484 MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids);

+134

include/linux/min_heap.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _LINUX_MIN_HEAP_H 3 + #define _LINUX_MIN_HEAP_H 4 + 5 + #include <linux/bug.h> 6 + #include <linux/string.h> 7 + #include <linux/types.h> 8 + 9 + /** 10 + * struct min_heap - Data structure to hold a min-heap. 11 + * @data: Start of array holding the heap elements. 12 + * @nr: Number of elements currently in the heap. 13 + * @size: Maximum number of elements that can be held in current storage. 14 + */ 15 + struct min_heap { 16 + void *data; 17 + int nr; 18 + int size; 19 + }; 20 + 21 + /** 22 + * struct min_heap_callbacks - Data/functions to customise the min_heap. 23 + * @elem_size: The nr of each element in bytes. 24 + * @less: Partial order function for this heap. 25 + * @swp: Swap elements function. 26 + */ 27 + struct min_heap_callbacks { 28 + int elem_size; 29 + bool (*less)(const void *lhs, const void *rhs); 30 + void (*swp)(void *lhs, void *rhs); 31 + }; 32 + 33 + /* Sift the element at pos down the heap. */ 34 + static __always_inline 35 + void min_heapify(struct min_heap *heap, int pos, 36 + const struct min_heap_callbacks *func) 37 + { 38 + void *left, *right, *parent, *smallest; 39 + void *data = heap->data; 40 + 41 + for (;;) { 42 + if (pos * 2 + 1 >= heap->nr) 43 + break; 44 + 45 + left = data + ((pos * 2 + 1) * func->elem_size); 46 + parent = data + (pos * func->elem_size); 47 + smallest = parent; 48 + if (func->less(left, smallest)) 49 + smallest = left; 50 + 51 + if (pos * 2 + 2 < heap->nr) { 52 + right = data + ((pos * 2 + 2) * func->elem_size); 53 + if (func->less(right, smallest)) 54 + smallest = right; 55 + } 56 + if (smallest == parent) 57 + break; 58 + func->swp(smallest, parent); 59 + if (smallest == left) 60 + pos = (pos * 2) + 1; 61 + else 62 + pos = (pos * 2) + 2; 63 + } 64 + } 65 + 66 + /* Floyd's approach to heapification that is O(nr). */ 67 + static __always_inline 68 + void min_heapify_all(struct min_heap *heap, 69 + const struct min_heap_callbacks *func) 70 + { 71 + int i; 72 + 73 + for (i = heap->nr / 2; i >= 0; i--) 74 + min_heapify(heap, i, func); 75 + } 76 + 77 + /* Remove minimum element from the heap, O(log2(nr)). */ 78 + static __always_inline 79 + void min_heap_pop(struct min_heap *heap, 80 + const struct min_heap_callbacks *func) 81 + { 82 + void *data = heap->data; 83 + 84 + if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap")) 85 + return; 86 + 87 + /* Place last element at the root (position 0) and then sift down. */ 88 + heap->nr--; 89 + memcpy(data, data + (heap->nr * func->elem_size), func->elem_size); 90 + min_heapify(heap, 0, func); 91 + } 92 + 93 + /* 94 + * Remove the minimum element and then push the given element. The 95 + * implementation performs 1 sift (O(log2(nr))) and is therefore more 96 + * efficient than a pop followed by a push that does 2. 97 + */ 98 + static __always_inline 99 + void min_heap_pop_push(struct min_heap *heap, 100 + const void *element, 101 + const struct min_heap_callbacks *func) 102 + { 103 + memcpy(heap->data, element, func->elem_size); 104 + min_heapify(heap, 0, func); 105 + } 106 + 107 + /* Push an element on to the heap, O(log2(nr)). */ 108 + static __always_inline 109 + void min_heap_push(struct min_heap *heap, const void *element, 110 + const struct min_heap_callbacks *func) 111 + { 112 + void *data = heap->data; 113 + void *child, *parent; 114 + int pos; 115 + 116 + if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap")) 117 + return; 118 + 119 + /* Place at the end of data. */ 120 + pos = heap->nr; 121 + memcpy(data + (pos * func->elem_size), element, func->elem_size); 122 + heap->nr++; 123 + 124 + /* Sift child at pos up. */ 125 + for (; pos > 0; pos = (pos - 1) / 2) { 126 + child = data + (pos * func->elem_size); 127 + parent = data + ((pos - 1) / 2) * func->elem_size; 128 + if (func->less(parent, child)) 129 + break; 130 + func->swp(parent, child); 131 + } 132 + } 133 + 134 + #endif /* _LINUX_MIN_HEAP_H */

+1 -3

include/linux/mod_devicetable.h

··· 667 667 kernel_ulong_t driver_data; 668 668 }; 669 669 670 - #define X86_FEATURE_MATCH(x) \ 671 - { X86_VENDOR_ANY, X86_FAMILY_ANY, X86_MODEL_ANY, x } 672 - 670 + /* Wild cards for x86_cpu_id::vendor, family, model and feature */ 673 671 #define X86_VENDOR_ANY 0xffff 674 672 #define X86_FAMILY_ANY 0 675 673 #define X86_MODEL_ANY 0

+19

include/linux/perf_event.h

··· 93 93 /* 94 94 * branch stack layout: 95 95 * nr: number of taken branches stored in entries[] 96 + * hw_idx: The low level index of raw branch records 97 + * for the most recent branch. 98 + * -1ULL means invalid/unknown. 96 99 * 97 100 * Note that nr can vary from sample to sample 98 101 * branches (to, from) are stored from most recent 99 102 * to least recent, i.e., entries[0] contains the most 100 103 * recent branch. 104 + * The entries[] is an abstraction of raw branch records, 105 + * which may not be stored in age order in HW, e.g. Intel LBR. 106 + * The hw_idx is to expose the low level index of raw 107 + * branch record for the most recent branch aka entries[0]. 108 + * The hw_idx index is between -1 (unknown) and max depth, 109 + * which can be retrieved in /sys/devices/cpu/caps/branches. 110 + * For the architectures whose raw branch records are 111 + * already stored in age order, the hw_idx should be 0. 101 112 */ 102 113 struct perf_branch_stack { 103 114 __u64 nr; 115 + __u64 hw_idx; 104 116 struct perf_branch_entry entries[0]; 105 117 }; 106 118 ··· 862 850 int sched_cb_usage; 863 851 864 852 int online; 853 + /* 854 + * Per-CPU storage for iterators used in visit_groups_merge. The default 855 + * storage is of size 2 to hold the CPU and any CPU event iterators. 856 + */ 857 + int heap_size; 858 + struct perf_event **heap; 859 + struct perf_event *heap_default[2]; 865 860 }; 866 861 867 862 struct perf_output_handle {

+7 -1

include/uapi/linux/perf_event.h

··· 181 181 182 182 PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */ 183 183 184 + PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ 185 + 184 186 PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ 185 187 }; 186 188 ··· 209 207 210 208 PERF_SAMPLE_BRANCH_TYPE_SAVE = 211 209 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, 210 + 211 + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, 212 212 213 213 PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, 214 214 }; ··· 857 853 * char data[size];}&& PERF_SAMPLE_RAW 858 854 * 859 855 * { u64 nr; 860 - * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK 856 + * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX 857 + * { u64 from, to, flags } lbr[nr]; 858 + * } && PERF_SAMPLE_BRANCH_STACK 861 859 * 862 860 * { u64 abi; # enum perf_sample_regs_abi 863 861 * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER

+263 -98

kernel/events/core.c

··· 49 49 #include <linux/sched/mm.h> 50 50 #include <linux/proc_ns.h> 51 51 #include <linux/mount.h> 52 + #include <linux/min_heap.h> 52 53 53 54 #include "internal.h" 54 55 ··· 892 891 rcu_read_unlock(); 893 892 } 894 893 894 + static int perf_cgroup_ensure_storage(struct perf_event *event, 895 + struct cgroup_subsys_state *css) 896 + { 897 + struct perf_cpu_context *cpuctx; 898 + struct perf_event **storage; 899 + int cpu, heap_size, ret = 0; 900 + 901 + /* 902 + * Allow storage to have sufficent space for an iterator for each 903 + * possibly nested cgroup plus an iterator for events with no cgroup. 904 + */ 905 + for (heap_size = 1; css; css = css->parent) 906 + heap_size++; 907 + 908 + for_each_possible_cpu(cpu) { 909 + cpuctx = per_cpu_ptr(event->pmu->pmu_cpu_context, cpu); 910 + if (heap_size <= cpuctx->heap_size) 911 + continue; 912 + 913 + storage = kmalloc_node(heap_size * sizeof(struct perf_event *), 914 + GFP_KERNEL, cpu_to_node(cpu)); 915 + if (!storage) { 916 + ret = -ENOMEM; 917 + break; 918 + } 919 + 920 + raw_spin_lock_irq(&cpuctx->ctx.lock); 921 + if (cpuctx->heap_size < heap_size) { 922 + swap(cpuctx->heap, storage); 923 + if (storage == cpuctx->heap_default) 924 + storage = NULL; 925 + cpuctx->heap_size = heap_size; 926 + } 927 + raw_spin_unlock_irq(&cpuctx->ctx.lock); 928 + 929 + kfree(storage); 930 + } 931 + 932 + return ret; 933 + } 934 + 895 935 static inline int perf_cgroup_connect(int fd, struct perf_event *event, 896 936 struct perf_event_attr *attr, 897 937 struct perf_event *group_leader) ··· 951 909 ret = PTR_ERR(css); 952 910 goto out; 953 911 } 912 + 913 + ret = perf_cgroup_ensure_storage(event, css); 914 + if (ret) 915 + goto out; 954 916 955 917 cgrp = container_of(css, struct perf_cgroup, css); 956 918 event->cgrp = cgrp; ··· 1577 1531 if (left->cpu > right->cpu) 1578 1532 return false; 1579 1533 1534 + #ifdef CONFIG_CGROUP_PERF 1535 + if (left->cgrp != right->cgrp) { 1536 + if (!left->cgrp || !left->cgrp->css.cgroup) { 1537 + /* 1538 + * Left has no cgroup but right does, no cgroups come 1539 + * first. 1540 + */ 1541 + return true; 1542 + } 1543 + if (!right->cgrp || !right->cgrp->css.cgroup) { 1544 + /* 1545 + * Right has no cgroup but left does, no cgroups come 1546 + * first. 1547 + */ 1548 + return false; 1549 + } 1550 + /* Two dissimilar cgroups, order by id. */ 1551 + if (left->cgrp->css.cgroup->kn->id < right->cgrp->css.cgroup->kn->id) 1552 + return true; 1553 + 1554 + return false; 1555 + } 1556 + #endif 1557 + 1580 1558 if (left->group_index < right->group_index) 1581 1559 return true; 1582 1560 if (left->group_index > right->group_index) ··· 1680 1610 } 1681 1611 1682 1612 /* 1683 - * Get the leftmost event in the @cpu subtree. 1613 + * Get the leftmost event in the cpu/cgroup subtree. 1684 1614 */ 1685 1615 static struct perf_event * 1686 - perf_event_groups_first(struct perf_event_groups *groups, int cpu) 1616 + perf_event_groups_first(struct perf_event_groups *groups, int cpu, 1617 + struct cgroup *cgrp) 1687 1618 { 1688 1619 struct perf_event *node_event = NULL, *match = NULL; 1689 1620 struct rb_node *node = groups->tree.rb_node; 1621 + #ifdef CONFIG_CGROUP_PERF 1622 + u64 node_cgrp_id, cgrp_id = 0; 1623 + 1624 + if (cgrp) 1625 + cgrp_id = cgrp->kn->id; 1626 + #endif 1690 1627 1691 1628 while (node) { 1692 1629 node_event = container_of(node, struct perf_event, group_node); 1693 1630 1694 1631 if (cpu < node_event->cpu) { 1695 1632 node = node->rb_left; 1696 - } else if (cpu > node_event->cpu) { 1697 - node = node->rb_right; 1698 - } else { 1699 - match = node_event; 1700 - node = node->rb_left; 1633 + continue; 1701 1634 } 1635 + if (cpu > node_event->cpu) { 1636 + node = node->rb_right; 1637 + continue; 1638 + } 1639 + #ifdef CONFIG_CGROUP_PERF 1640 + node_cgrp_id = 0; 1641 + if (node_event->cgrp && node_event->cgrp->css.cgroup) 1642 + node_cgrp_id = node_event->cgrp->css.cgroup->kn->id; 1643 + 1644 + if (cgrp_id < node_cgrp_id) { 1645 + node = node->rb_left; 1646 + continue; 1647 + } 1648 + if (cgrp_id > node_cgrp_id) { 1649 + node = node->rb_right; 1650 + continue; 1651 + } 1652 + #endif 1653 + match = node_event; 1654 + node = node->rb_left; 1702 1655 } 1703 1656 1704 1657 return match; ··· 1734 1641 perf_event_groups_next(struct perf_event *event) 1735 1642 { 1736 1643 struct perf_event *next; 1644 + #ifdef CONFIG_CGROUP_PERF 1645 + u64 curr_cgrp_id = 0; 1646 + u64 next_cgrp_id = 0; 1647 + #endif 1737 1648 1738 1649 next = rb_entry_safe(rb_next(&event->group_node), typeof(*event), group_node); 1739 - if (next && next->cpu == event->cpu) 1740 - return next; 1650 + if (next == NULL || next->cpu != event->cpu) 1651 + return NULL; 1741 1652 1742 - return NULL; 1653 + #ifdef CONFIG_CGROUP_PERF 1654 + if (event->cgrp && event->cgrp->css.cgroup) 1655 + curr_cgrp_id = event->cgrp->css.cgroup->kn->id; 1656 + 1657 + if (next->cgrp && next->cgrp->css.cgroup) 1658 + next_cgrp_id = next->cgrp->css.cgroup->kn->id; 1659 + 1660 + if (curr_cgrp_id != next_cgrp_id) 1661 + return NULL; 1662 + #endif 1663 + return next; 1743 1664 } 1744 1665 1745 1666 /* ··· 2093 1986 return 1; 2094 1987 } 2095 1988 1989 + static inline struct list_head *get_event_list(struct perf_event *event) 1990 + { 1991 + struct perf_event_context *ctx = event->ctx; 1992 + return event->attr.pinned ? &ctx->pinned_active : &ctx->flexible_active; 1993 + } 1994 + 2096 1995 static void perf_group_detach(struct perf_event *event) 2097 1996 { 2098 1997 struct perf_event *sibling, *tmp; ··· 2141 2028 if (!RB_EMPTY_NODE(&event->group_node)) { 2142 2029 add_event_to_groups(sibling, event->ctx); 2143 2030 2144 - if (sibling->state == PERF_EVENT_STATE_ACTIVE) { 2145 - struct list_head *list = sibling->attr.pinned ? 2146 - &ctx->pinned_active : &ctx->flexible_active; 2147 - 2148 - list_add_tail(&sibling->active_list, list); 2149 - } 2031 + if (sibling->state == PERF_EVENT_STATE_ACTIVE) 2032 + list_add_tail(&sibling->active_list, get_event_list(sibling)); 2150 2033 } 2151 2034 2152 2035 WARN_ON_ONCE(sibling->ctx != event->ctx); ··· 2291 2182 2292 2183 if (!ctx->nr_events && ctx->is_active) { 2293 2184 ctx->is_active = 0; 2185 + ctx->rotate_necessary = 0; 2294 2186 if (ctx->task) { 2295 2187 WARN_ON_ONCE(cpuctx->task_ctx != ctx); 2296 2188 cpuctx->task_ctx = NULL; ··· 2459 2349 struct perf_event_context *ctx) 2460 2350 { 2461 2351 int ret = 0; 2352 + 2353 + WARN_ON_ONCE(event->ctx != ctx); 2462 2354 2463 2355 lockdep_assert_held(&ctx->lock); 2464 2356 ··· 3189 3077 if (!ctx->nr_active || !(is_active & EVENT_ALL)) 3190 3078 return; 3191 3079 3192 - /* 3193 - * If we had been multiplexing, no rotations are necessary, now no events 3194 - * are active. 3195 - */ 3196 - ctx->rotate_necessary = 0; 3197 - 3198 3080 perf_pmu_disable(ctx->pmu); 3199 3081 if (is_active & EVENT_PINNED) { 3200 3082 list_for_each_entry_safe(event, tmp, &ctx->pinned_active, active_list) ··· 3198 3092 if (is_active & EVENT_FLEXIBLE) { 3199 3093 list_for_each_entry_safe(event, tmp, &ctx->flexible_active, active_list) 3200 3094 group_sched_out(event, cpuctx, ctx); 3095 + 3096 + /* 3097 + * Since we cleared EVENT_FLEXIBLE, also clear 3098 + * rotate_necessary, is will be reset by 3099 + * ctx_flexible_sched_in() when needed. 3100 + */ 3101 + ctx->rotate_necessary = 0; 3201 3102 } 3202 3103 perf_pmu_enable(ctx->pmu); 3203 3104 } ··· 3501 3388 ctx_sched_out(&cpuctx->ctx, cpuctx, event_type); 3502 3389 } 3503 3390 3504 - static int visit_groups_merge(struct perf_event_groups *groups, int cpu, 3505 - int (*func)(struct perf_event *, void *), void *data) 3391 + static bool perf_less_group_idx(const void *l, const void *r) 3506 3392 { 3507 - struct perf_event **evt, *evt1, *evt2; 3393 + const struct perf_event *le = l, *re = r; 3394 + 3395 + return le->group_index < re->group_index; 3396 + } 3397 + 3398 + static void swap_ptr(void *l, void *r) 3399 + { 3400 + void **lp = l, **rp = r; 3401 + 3402 + swap(*lp, *rp); 3403 + } 3404 + 3405 + static const struct min_heap_callbacks perf_min_heap = { 3406 + .elem_size = sizeof(struct perf_event *), 3407 + .less = perf_less_group_idx, 3408 + .swp = swap_ptr, 3409 + }; 3410 + 3411 + static void __heap_add(struct min_heap *heap, struct perf_event *event) 3412 + { 3413 + struct perf_event **itrs = heap->data; 3414 + 3415 + if (event) { 3416 + itrs[heap->nr] = event; 3417 + heap->nr++; 3418 + } 3419 + } 3420 + 3421 + static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx, 3422 + struct perf_event_groups *groups, int cpu, 3423 + int (*func)(struct perf_event *, void *), 3424 + void *data) 3425 + { 3426 + #ifdef CONFIG_CGROUP_PERF 3427 + struct cgroup_subsys_state *css = NULL; 3428 + #endif 3429 + /* Space for per CPU and/or any CPU event iterators. */ 3430 + struct perf_event *itrs[2]; 3431 + struct min_heap event_heap; 3432 + struct perf_event **evt; 3508 3433 int ret; 3509 3434 3510 - evt1 = perf_event_groups_first(groups, -1); 3511 - evt2 = perf_event_groups_first(groups, cpu); 3435 + if (cpuctx) { 3436 + event_heap = (struct min_heap){ 3437 + .data = cpuctx->heap, 3438 + .nr = 0, 3439 + .size = cpuctx->heap_size, 3440 + }; 3512 3441 3513 - while (evt1 || evt2) { 3514 - if (evt1 && evt2) { 3515 - if (evt1->group_index < evt2->group_index) 3516 - evt = &evt1; 3517 - else 3518 - evt = &evt2; 3519 - } else if (evt1) { 3520 - evt = &evt1; 3521 - } else { 3522 - evt = &evt2; 3523 - } 3442 + lockdep_assert_held(&cpuctx->ctx.lock); 3524 3443 3444 + #ifdef CONFIG_CGROUP_PERF 3445 + if (cpuctx->cgrp) 3446 + css = &cpuctx->cgrp->css; 3447 + #endif 3448 + } else { 3449 + event_heap = (struct min_heap){ 3450 + .data = itrs, 3451 + .nr = 0, 3452 + .size = ARRAY_SIZE(itrs), 3453 + }; 3454 + /* Events not within a CPU context may be on any CPU. */ 3455 + __heap_add(&event_heap, perf_event_groups_first(groups, -1, NULL)); 3456 + } 3457 + evt = event_heap.data; 3458 + 3459 + __heap_add(&event_heap, perf_event_groups_first(groups, cpu, NULL)); 3460 + 3461 + #ifdef CONFIG_CGROUP_PERF 3462 + for (; css; css = css->parent) 3463 + __heap_add(&event_heap, perf_event_groups_first(groups, cpu, css->cgroup)); 3464 + #endif 3465 + 3466 + min_heapify_all(&event_heap, &perf_min_heap); 3467 + 3468 + while (event_heap.nr) { 3525 3469 ret = func(*evt, data); 3526 3470 if (ret) 3527 3471 return ret; 3528 3472 3529 3473 *evt = perf_event_groups_next(*evt); 3474 + if (*evt) 3475 + min_heapify(&event_heap, 0, &perf_min_heap); 3476 + else 3477 + min_heap_pop(&event_heap, &perf_min_heap); 3530 3478 } 3531 3479 3532 3480 return 0; 3533 3481 } 3534 3482 3535 - struct sched_in_data { 3536 - struct perf_event_context *ctx; 3537 - struct perf_cpu_context *cpuctx; 3538 - int can_add_hw; 3539 - }; 3540 - 3541 - static int pinned_sched_in(struct perf_event *event, void *data) 3483 + static int merge_sched_in(struct perf_event *event, void *data) 3542 3484 { 3543 - struct sched_in_data *sid = data; 3485 + struct perf_event_context *ctx = event->ctx; 3486 + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); 3487 + int *can_add_hw = data; 3544 3488 3545 3489 if (event->state <= PERF_EVENT_STATE_OFF) 3546 3490 return 0; ··· 3605 3435 if (!event_filter_match(event)) 3606 3436 return 0; 3607 3437 3608 - if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw)) { 3609 - if (!group_sched_in(event, sid->cpuctx, sid->ctx)) 3610 - list_add_tail(&event->active_list, &sid->ctx->pinned_active); 3438 + if (group_can_go_on(event, cpuctx, *can_add_hw)) { 3439 + if (!group_sched_in(event, cpuctx, ctx)) 3440 + list_add_tail(&event->active_list, get_event_list(event)); 3611 3441 } 3612 3442 3613 - /* 3614 - * If this pinned group hasn't been scheduled, 3615 - * put it in error state. 3616 - */ 3617 - if (event->state == PERF_EVENT_STATE_INACTIVE) 3618 - perf_event_set_state(event, PERF_EVENT_STATE_ERROR); 3443 + if (event->state == PERF_EVENT_STATE_INACTIVE) { 3444 + if (event->attr.pinned) 3445 + perf_event_set_state(event, PERF_EVENT_STATE_ERROR); 3619 3446 3620 - return 0; 3621 - } 3622 - 3623 - static int flexible_sched_in(struct perf_event *event, void *data) 3624 - { 3625 - struct sched_in_data *sid = data; 3626 - 3627 - if (event->state <= PERF_EVENT_STATE_OFF) 3628 - return 0; 3629 - 3630 - if (!event_filter_match(event)) 3631 - return 0; 3632 - 3633 - if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw)) { 3634 - int ret = group_sched_in(event, sid->cpuctx, sid->ctx); 3635 - if (ret) { 3636 - sid->can_add_hw = 0; 3637 - sid->ctx->rotate_necessary = 1; 3638 - return 0; 3639 - } 3640 - list_add_tail(&event->active_list, &sid->ctx->flexible_active); 3447 + *can_add_hw = 0; 3448 + ctx->rotate_necessary = 1; 3641 3449 } 3642 3450 3643 3451 return 0; ··· 3625 3477 ctx_pinned_sched_in(struct perf_event_context *ctx, 3626 3478 struct perf_cpu_context *cpuctx) 3627 3479 { 3628 - struct sched_in_data sid = { 3629 - .ctx = ctx, 3630 - .cpuctx = cpuctx, 3631 - .can_add_hw = 1, 3632 - }; 3480 + int can_add_hw = 1; 3633 3481 3634 - visit_groups_merge(&ctx->pinned_groups, 3482 + if (ctx != &cpuctx->ctx) 3483 + cpuctx = NULL; 3484 + 3485 + visit_groups_merge(cpuctx, &ctx->pinned_groups, 3635 3486 smp_processor_id(), 3636 - pinned_sched_in, &sid); 3487 + merge_sched_in, &can_add_hw); 3637 3488 } 3638 3489 3639 3490 static void 3640 3491 ctx_flexible_sched_in(struct perf_event_context *ctx, 3641 3492 struct perf_cpu_context *cpuctx) 3642 3493 { 3643 - struct sched_in_data sid = { 3644 - .ctx = ctx, 3645 - .cpuctx = cpuctx, 3646 - .can_add_hw = 1, 3647 - }; 3494 + int can_add_hw = 1; 3648 3495 3649 - visit_groups_merge(&ctx->flexible_groups, 3496 + if (ctx != &cpuctx->ctx) 3497 + cpuctx = NULL; 3498 + 3499 + visit_groups_merge(cpuctx, &ctx->flexible_groups, 3650 3500 smp_processor_id(), 3651 - flexible_sched_in, &sid); 3501 + merge_sched_in, &can_add_hw); 3652 3502 } 3653 3503 3654 3504 static void ··· 3986 3840 event = rb_entry_safe(rb_first(&ctx->flexible_groups.tree), 3987 3841 typeof(*event), group_node); 3988 3842 } 3843 + 3844 + /* 3845 + * Unconditionally clear rotate_necessary; if ctx_flexible_sched_in() 3846 + * finds there are unschedulable events, it will set it again. 3847 + */ 3848 + ctx->rotate_necessary = 0; 3989 3849 3990 3850 return event; 3991 3851 } ··· 6707 6555 perf_output_read_one(handle, event, enabled, running); 6708 6556 } 6709 6557 6558 + static inline bool perf_sample_save_hw_index(struct perf_event *event) 6559 + { 6560 + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; 6561 + } 6562 + 6710 6563 void perf_output_sample(struct perf_output_handle *handle, 6711 6564 struct perf_event_header *header, 6712 6565 struct perf_sample_data *data, ··· 6800 6643 * sizeof(struct perf_branch_entry); 6801 6644 6802 6645 perf_output_put(handle, data->br_stack->nr); 6646 + if (perf_sample_save_hw_index(event)) 6647 + perf_output_put(handle, data->br_stack->hw_idx); 6803 6648 perf_output_copy(handle, data->br_stack->entries, size); 6804 6649 } else { 6805 6650 /* ··· 6995 6836 if (sample_type & PERF_SAMPLE_BRANCH_STACK) { 6996 6837 int size = sizeof(u64); /* nr */ 6997 6838 if (data->br_stack) { 6839 + if (perf_sample_save_hw_index(event)) 6840 + size += sizeof(u64); 6841 + 6998 6842 size += data->br_stack->nr 6999 6843 * sizeof(struct perf_branch_entry); 7000 6844 } ··· 10511 10349 cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask); 10512 10350 10513 10351 __perf_mux_hrtimer_init(cpuctx, cpu); 10352 + 10353 + cpuctx->heap_size = ARRAY_SIZE(cpuctx->heap_default); 10354 + cpuctx->heap = cpuctx->heap_default; 10514 10355 } 10515 10356 10516 10357 got_cpu_context: ··· 10959 10794 if (!has_branch_stack(event)) 10960 10795 event->attr.branch_sample_type = 0; 10961 10796 10962 - if (cgroup_fd != -1) { 10963 - err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader); 10964 - if (err) 10965 - goto err_ns; 10966 - } 10967 - 10968 10797 pmu = perf_init_event(event); 10969 10798 if (IS_ERR(pmu)) { 10970 10799 err = PTR_ERR(pmu); ··· 10978 10819 !(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT)) { 10979 10820 err = -EOPNOTSUPP; 10980 10821 goto err_pmu; 10822 + } 10823 + 10824 + if (cgroup_fd != -1) { 10825 + err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader); 10826 + if (err) 10827 + goto err_pmu; 10981 10828 } 10982 10829 10983 10830 err = exclusive_event_init(event); ··· 11046 10881 exclusive_event_destroy(event); 11047 10882 11048 10883 err_pmu: 10884 + if (is_cgroup_event(event)) 10885 + perf_detach_cgroup(event); 11049 10886 if (event->destroy) 11050 10887 event->destroy(event); 11051 10888 module_put(pmu->module); 11052 10889 err_ns: 11053 - if (is_cgroup_event(event)) 11054 - perf_detach_cgroup(event); 11055 10890 if (event->ns) 11056 10891 put_pid_ns(event->ns); 11057 10892 if (event->hw.target)

+10

lib/Kconfig.debug

··· 1786 1786 1787 1787 If unsure, say N. 1788 1788 1789 + config TEST_MIN_HEAP 1790 + tristate "Min heap test" 1791 + depends on DEBUG_KERNEL || m 1792 + help 1793 + Enable this to turn on min heap function tests. This test is 1794 + executed only once during system boot (so affects only boot time), 1795 + or at module load time. 1796 + 1797 + If unsure, say N. 1798 + 1789 1799 config TEST_SORT 1790 1800 tristate "Array-based sort test" 1791 1801 depends on DEBUG_KERNEL || m

+1

lib/Makefile

··· 67 67 UBSAN_SANITIZE_test_ubsan.o := y 68 68 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o 69 69 obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o 70 + obj-$(CONFIG_TEST_MIN_HEAP) += test_min_heap.o 70 71 obj-$(CONFIG_TEST_LKM) += test_module.o 71 72 obj-$(CONFIG_TEST_VMALLOC) += test_vmalloc.o 72 73 obj-$(CONFIG_TEST_OVERFLOW) += test_overflow.o

+194

lib/test_min_heap.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + #define pr_fmt(fmt) "min_heap_test: " fmt 3 + 4 + /* 5 + * Test cases for the min max heap. 6 + */ 7 + 8 + #include <linux/log2.h> 9 + #include <linux/min_heap.h> 10 + #include <linux/module.h> 11 + #include <linux/printk.h> 12 + #include <linux/random.h> 13 + 14 + static __init bool less_than(const void *lhs, const void *rhs) 15 + { 16 + return *(int *)lhs < *(int *)rhs; 17 + } 18 + 19 + static __init bool greater_than(const void *lhs, const void *rhs) 20 + { 21 + return *(int *)lhs > *(int *)rhs; 22 + } 23 + 24 + static __init void swap_ints(void *lhs, void *rhs) 25 + { 26 + int temp = *(int *)lhs; 27 + 28 + *(int *)lhs = *(int *)rhs; 29 + *(int *)rhs = temp; 30 + } 31 + 32 + static __init int pop_verify_heap(bool min_heap, 33 + struct min_heap *heap, 34 + const struct min_heap_callbacks *funcs) 35 + { 36 + int *values = heap->data; 37 + int err = 0; 38 + int last; 39 + 40 + last = values[0]; 41 + min_heap_pop(heap, funcs); 42 + while (heap->nr > 0) { 43 + if (min_heap) { 44 + if (last > values[0]) { 45 + pr_err("error: expected %d <= %d\n", last, 46 + values[0]); 47 + err++; 48 + } 49 + } else { 50 + if (last < values[0]) { 51 + pr_err("error: expected %d >= %d\n", last, 52 + values[0]); 53 + err++; 54 + } 55 + } 56 + last = values[0]; 57 + min_heap_pop(heap, funcs); 58 + } 59 + return err; 60 + } 61 + 62 + static __init int test_heapify_all(bool min_heap) 63 + { 64 + int values[] = { 3, 1, 2, 4, 0x8000000, 0x7FFFFFF, 0, 65 + -3, -1, -2, -4, 0x8000000, 0x7FFFFFF }; 66 + struct min_heap heap = { 67 + .data = values, 68 + .nr = ARRAY_SIZE(values), 69 + .size = ARRAY_SIZE(values), 70 + }; 71 + struct min_heap_callbacks funcs = { 72 + .elem_size = sizeof(int), 73 + .less = min_heap ? less_than : greater_than, 74 + .swp = swap_ints, 75 + }; 76 + int i, err; 77 + 78 + /* Test with known set of values. */ 79 + min_heapify_all(&heap, &funcs); 80 + err = pop_verify_heap(min_heap, &heap, &funcs); 81 + 82 + 83 + /* Test with randomly generated values. */ 84 + heap.nr = ARRAY_SIZE(values); 85 + for (i = 0; i < heap.nr; i++) 86 + values[i] = get_random_int(); 87 + 88 + min_heapify_all(&heap, &funcs); 89 + err += pop_verify_heap(min_heap, &heap, &funcs); 90 + 91 + return err; 92 + } 93 + 94 + static __init int test_heap_push(bool min_heap) 95 + { 96 + const int data[] = { 3, 1, 2, 4, 0x80000000, 0x7FFFFFFF, 0, 97 + -3, -1, -2, -4, 0x80000000, 0x7FFFFFFF }; 98 + int values[ARRAY_SIZE(data)]; 99 + struct min_heap heap = { 100 + .data = values, 101 + .nr = 0, 102 + .size = ARRAY_SIZE(values), 103 + }; 104 + struct min_heap_callbacks funcs = { 105 + .elem_size = sizeof(int), 106 + .less = min_heap ? less_than : greater_than, 107 + .swp = swap_ints, 108 + }; 109 + int i, temp, err; 110 + 111 + /* Test with known set of values copied from data. */ 112 + for (i = 0; i < ARRAY_SIZE(data); i++) 113 + min_heap_push(&heap, &data[i], &funcs); 114 + 115 + err = pop_verify_heap(min_heap, &heap, &funcs); 116 + 117 + /* Test with randomly generated values. */ 118 + while (heap.nr < heap.size) { 119 + temp = get_random_int(); 120 + min_heap_push(&heap, &temp, &funcs); 121 + } 122 + err += pop_verify_heap(min_heap, &heap, &funcs); 123 + 124 + return err; 125 + } 126 + 127 + static __init int test_heap_pop_push(bool min_heap) 128 + { 129 + const int data[] = { 3, 1, 2, 4, 0x80000000, 0x7FFFFFFF, 0, 130 + -3, -1, -2, -4, 0x80000000, 0x7FFFFFFF }; 131 + int values[ARRAY_SIZE(data)]; 132 + struct min_heap heap = { 133 + .data = values, 134 + .nr = 0, 135 + .size = ARRAY_SIZE(values), 136 + }; 137 + struct min_heap_callbacks funcs = { 138 + .elem_size = sizeof(int), 139 + .less = min_heap ? less_than : greater_than, 140 + .swp = swap_ints, 141 + }; 142 + int i, temp, err; 143 + 144 + /* Fill values with data to pop and replace. */ 145 + temp = min_heap ? 0x80000000 : 0x7FFFFFFF; 146 + for (i = 0; i < ARRAY_SIZE(data); i++) 147 + min_heap_push(&heap, &temp, &funcs); 148 + 149 + /* Test with known set of values copied from data. */ 150 + for (i = 0; i < ARRAY_SIZE(data); i++) 151 + min_heap_pop_push(&heap, &data[i], &funcs); 152 + 153 + err = pop_verify_heap(min_heap, &heap, &funcs); 154 + 155 + heap.nr = 0; 156 + for (i = 0; i < ARRAY_SIZE(data); i++) 157 + min_heap_push(&heap, &temp, &funcs); 158 + 159 + /* Test with randomly generated values. */ 160 + for (i = 0; i < ARRAY_SIZE(data); i++) { 161 + temp = get_random_int(); 162 + min_heap_pop_push(&heap, &temp, &funcs); 163 + } 164 + err += pop_verify_heap(min_heap, &heap, &funcs); 165 + 166 + return err; 167 + } 168 + 169 + static int __init test_min_heap_init(void) 170 + { 171 + int err = 0; 172 + 173 + err += test_heapify_all(true); 174 + err += test_heapify_all(false); 175 + err += test_heap_push(true); 176 + err += test_heap_push(false); 177 + err += test_heap_pop_push(true); 178 + err += test_heap_pop_push(false); 179 + if (err) { 180 + pr_err("test failed with %d errors\n", err); 181 + return -EINVAL; 182 + } 183 + pr_info("test passed\n"); 184 + return 0; 185 + } 186 + module_init(test_min_heap_init); 187 + 188 + static void __exit test_min_heap_exit(void) 189 + { 190 + /* do nothing */ 191 + } 192 + module_exit(test_min_heap_exit); 193 + 194 + MODULE_LICENSE("GPL");

+6 -8

sound/soc/intel/common/soc-intel-quirks.h

··· 15 15 #include <asm/intel-family.h> 16 16 #include <asm/iosf_mbi.h> 17 17 18 - #define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, } 19 - 20 18 #define SOC_INTEL_IS_CPU(soc, type) \ 21 19 static inline bool soc_intel_is_##soc(void) \ 22 20 { \ 23 21 static const struct x86_cpu_id soc##_cpu_ids[] = { \ 24 - ICPU(type), \ 22 + X86_MATCH_INTEL_FAM6_MODEL(type, NULL), \ 25 23 {} \ 26 24 }; \ 27 25 const struct x86_cpu_id *id; \ ··· 30 32 return false; \ 31 33 } 32 34 33 - SOC_INTEL_IS_CPU(byt, INTEL_FAM6_ATOM_SILVERMONT); 34 - SOC_INTEL_IS_CPU(cht, INTEL_FAM6_ATOM_AIRMONT); 35 - SOC_INTEL_IS_CPU(apl, INTEL_FAM6_ATOM_GOLDMONT); 36 - SOC_INTEL_IS_CPU(glk, INTEL_FAM6_ATOM_GOLDMONT_PLUS); 37 - SOC_INTEL_IS_CPU(cml, INTEL_FAM6_KABYLAKE_L); 35 + SOC_INTEL_IS_CPU(byt, ATOM_SILVERMONT); 36 + SOC_INTEL_IS_CPU(cht, ATOM_AIRMONT); 37 + SOC_INTEL_IS_CPU(apl, ATOM_GOLDMONT); 38 + SOC_INTEL_IS_CPU(glk, ATOM_GOLDMONT_PLUS); 39 + SOC_INTEL_IS_CPU(cml, KABYLAKE_L); 38 40 39 41 static inline bool soc_intel_is_byt_cr(struct platform_device *pdev) 40 42 {

+7 -1

tools/include/uapi/linux/perf_event.h

··· 181 181 182 182 PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */ 183 183 184 + PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ 185 + 184 186 PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ 185 187 }; 186 188 ··· 209 207 210 208 PERF_SAMPLE_BRANCH_TYPE_SAVE = 211 209 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, 210 + 211 + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, 212 212 213 213 PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, 214 214 }; ··· 857 853 * char data[size];}&& PERF_SAMPLE_RAW 858 854 * 859 855 * { u64 nr; 860 - * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK 856 + * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX 857 + * { u64 from, to, flags } lbr[nr]; 858 + * } && PERF_SAMPLE_BRANCH_STACK 861 859 * 862 860 * { u64 abi; # enum perf_sample_regs_abi 863 861 * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER

+1

tools/lib/api/fs/Build

··· 1 1 libapi-y += fs.o 2 2 libapi-y += tracing_path.o 3 + libapi-y += cgroup.o

+67

tools/lib/api/fs/cgroup.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/stringify.h> 3 + #include <sys/types.h> 4 + #include <sys/stat.h> 5 + #include <fcntl.h> 6 + #include <stdio.h> 7 + #include <stdlib.h> 8 + #include <string.h> 9 + #include "fs.h" 10 + 11 + int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys) 12 + { 13 + FILE *fp; 14 + char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1]; 15 + char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path; 16 + char *token, *saved_ptr = NULL; 17 + 18 + fp = fopen("/proc/mounts", "r"); 19 + if (!fp) 20 + return -1; 21 + 22 + /* 23 + * in order to handle split hierarchy, we need to scan /proc/mounts 24 + * and inspect every cgroupfs mount point to find one that has 25 + * perf_event subsystem 26 + */ 27 + path_v1[0] = '\0'; 28 + path_v2[0] = '\0'; 29 + 30 + while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %" 31 + __stringify(PATH_MAX)"s %*d %*d\n", 32 + mountpoint, type, tokens) == 3) { 33 + 34 + if (!path_v1[0] && !strcmp(type, "cgroup")) { 35 + 36 + token = strtok_r(tokens, ",", &saved_ptr); 37 + 38 + while (token != NULL) { 39 + if (subsys && !strcmp(token, subsys)) { 40 + strcpy(path_v1, mountpoint); 41 + break; 42 + } 43 + token = strtok_r(NULL, ",", &saved_ptr); 44 + } 45 + } 46 + 47 + if (!path_v2[0] && !strcmp(type, "cgroup2")) 48 + strcpy(path_v2, mountpoint); 49 + 50 + if (path_v1[0] && path_v2[0]) 51 + break; 52 + } 53 + fclose(fp); 54 + 55 + if (path_v1[0]) 56 + path = path_v1; 57 + else if (path_v2[0]) 58 + path = path_v2; 59 + else 60 + return -1; 61 + 62 + if (strlen(path) < maxlen) { 63 + strcpy(buf, path); 64 + return 0; 65 + } 66 + return -1; 67 + }

+2

tools/lib/api/fs/fs.h

··· 28 28 #undef FS 29 29 30 30 31 + int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys); 32 + 31 33 int filename__read_int(const char *filename, int *value); 32 34 int filename__read_ull(const char *filename, unsigned long long *value); 33 35 int filename__read_xll(const char *filename, unsigned long long *value);

+83

tools/lib/perf/Documentation/examples/counting.c

··· 1 + #include <linux/perf_event.h> 2 + #include <perf/evlist.h> 3 + #include <perf/evsel.h> 4 + #include <perf/cpumap.h> 5 + #include <perf/threadmap.h> 6 + #include <perf/mmap.h> 7 + #include <perf/core.h> 8 + #include <perf/event.h> 9 + #include <stdio.h> 10 + #include <unistd.h> 11 + 12 + static int libperf_print(enum libperf_print_level level, 13 + const char *fmt, va_list ap) 14 + { 15 + return vfprintf(stderr, fmt, ap); 16 + } 17 + 18 + int main(int argc, char **argv) 19 + { 20 + int count = 100000, err = 0; 21 + struct perf_evlist *evlist; 22 + struct perf_evsel *evsel; 23 + struct perf_thread_map *threads; 24 + struct perf_counts_values counts; 25 + 26 + struct perf_event_attr attr1 = { 27 + .type = PERF_TYPE_SOFTWARE, 28 + .config = PERF_COUNT_SW_CPU_CLOCK, 29 + .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING, 30 + .disabled = 1, 31 + }; 32 + struct perf_event_attr attr2 = { 33 + .type = PERF_TYPE_SOFTWARE, 34 + .config = PERF_COUNT_SW_TASK_CLOCK, 35 + .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING, 36 + .disabled = 1, 37 + }; 38 + 39 + libperf_init(libperf_print); 40 + threads = perf_thread_map__new_dummy(); 41 + if (!threads) { 42 + fprintf(stderr, "failed to create threads\n"); 43 + return -1; 44 + } 45 + perf_thread_map__set_pid(threads, 0, 0); 46 + evlist = perf_evlist__new(); 47 + if (!evlist) { 48 + fprintf(stderr, "failed to create evlist\n"); 49 + goto out_threads; 50 + } 51 + evsel = perf_evsel__new(&attr1); 52 + if (!evsel) { 53 + fprintf(stderr, "failed to create evsel1\n"); 54 + goto out_evlist; 55 + } 56 + perf_evlist__add(evlist, evsel); 57 + evsel = perf_evsel__new(&attr2); 58 + if (!evsel) { 59 + fprintf(stderr, "failed to create evsel2\n"); 60 + goto out_evlist; 61 + } 62 + perf_evlist__add(evlist, evsel); 63 + perf_evlist__set_maps(evlist, NULL, threads); 64 + err = perf_evlist__open(evlist); 65 + if (err) { 66 + fprintf(stderr, "failed to open evsel\n"); 67 + goto out_evlist; 68 + } 69 + perf_evlist__enable(evlist); 70 + while (count--); 71 + perf_evlist__disable(evlist); 72 + perf_evlist__for_each_evsel(evlist, evsel) { 73 + perf_evsel__read(evsel, 0, 0, &counts); 74 + fprintf(stdout, "count %llu, enabled %llu, run %llu\n", 75 + counts.val, counts.ena, counts.run); 76 + } 77 + perf_evlist__close(evlist); 78 + out_evlist: 79 + perf_evlist__delete(evlist); 80 + out_threads: 81 + perf_thread_map__put(threads); 82 + return err; 83 + }

+1 -1

tools/lib/traceevent/event-parse.c

··· 5541 5541 if (p10 > 1 && p10 < time) 5542 5542 trace_seq_printf(s, "%5llu.%0*llu", time / p10, prec, time % p10); 5543 5543 else 5544 - trace_seq_printf(s, "%12llu\n", time); 5544 + trace_seq_printf(s, "%12llu", time); 5545 5545 } 5546 5546 5547 5547 struct print_event_type {

+4 -1

tools/perf/Documentation/Makefile

··· 295 295 $(OUTPUT)%.xml : %.txt 296 296 $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ 297 297 $(ASCIIDOC) -b docbook -d manpage \ 298 - $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ 298 + $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) \ 299 + -aperf_date=$(shell git log -1 --pretty="format:%cd" \ 300 + --date=short $<) \ 301 + -o $@+ $< && \ 299 302 mv $@+ $@ 300 303 301 304 XSLT = docbook.xsl

+1 -991

tools/perf/Documentation/intel-pt.txt

··· 1 - Intel Processor Trace 2 - ===================== 3 - 4 - Overview 5 - ======== 6 - 7 - Intel Processor Trace (Intel PT) is an extension of Intel Architecture that 8 - collects information about software execution such as control flow, execution 9 - modes and timings and formats it into highly compressed binary packets. 10 - Technical details are documented in the Intel 64 and IA-32 Architectures 11 - Software Developer Manuals, Chapter 36 Intel Processor Trace. 12 - 13 - Intel PT is first supported in Intel Core M and 5th generation Intel Core 14 - processors that are based on the Intel micro-architecture code name Broadwell. 15 - 16 - Trace data is collected by 'perf record' and stored within the perf.data file. 17 - See below for options to 'perf record'. 18 - 19 - Trace data must be 'decoded' which involves walking the object code and matching 20 - the trace data packets. For example a TNT packet only tells whether a 21 - conditional branch was taken or not taken, so to make use of that packet the 22 - decoder must know precisely which instruction was being executed. 23 - 24 - Decoding is done on-the-fly. The decoder outputs samples in the same format as 25 - samples output by perf hardware events, for example as though the "instructions" 26 - or "branches" events had been recorded. Presently 3 tools support this: 27 - 'perf script', 'perf report' and 'perf inject'. See below for more information 28 - on using those tools. 29 - 30 - The main distinguishing feature of Intel PT is that the decoder can determine 31 - the exact flow of software execution. Intel PT can be used to understand why 32 - and how did software get to a certain point, or behave a certain way. The 33 - software does not have to be recompiled, so Intel PT works with debug or release 34 - builds, however the executed images are needed - which makes use in JIT-compiled 35 - environments, or with self-modified code, a challenge. Also symbols need to be 36 - provided to make sense of addresses. 37 - 38 - A limitation of Intel PT is that it produces huge amounts of trace data 39 - (hundreds of megabytes per second per core) which takes a long time to decode, 40 - for example two or three orders of magnitude longer than it took to collect. 41 - Another limitation is the performance impact of tracing, something that will 42 - vary depending on the use-case and architecture. 43 - 44 - 45 - Quickstart 46 - ========== 47 - 48 - It is important to start small. That is because it is easy to capture vastly 49 - more data than can possibly be processed. 50 - 51 - The simplest thing to do with Intel PT is userspace profiling of small programs. 52 - Data is captured with 'perf record' e.g. to trace 'ls' userspace-only: 53 - 54 - perf record -e intel_pt//u ls 55 - 56 - And profiled with 'perf report' e.g. 57 - 58 - perf report 59 - 60 - To also trace kernel space presents a problem, namely kernel self-modifying 61 - code. A fairly good kernel image is available in /proc/kcore but to get an 62 - accurate image a copy of /proc/kcore needs to be made under the same conditions 63 - as the data capture. A script perf-with-kcore can do that, but beware that the 64 - script makes use of 'sudo' to copy /proc/kcore. If you have perf installed 65 - locally from the source tree you can do: 66 - 67 - ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls 68 - 69 - which will create a directory named 'pt_ls' and put the perf.data file and 70 - copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use 71 - 'perf report' becomes: 72 - 73 - ~/libexec/perf-core/perf-with-kcore report pt_ls 74 - 75 - Because samples are synthesized after-the-fact, the sampling period can be 76 - selected for reporting. e.g. sample every microsecond 77 - 78 - ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge 79 - 80 - See the sections below for more information about the --itrace option. 81 - 82 - Beware the smaller the period, the more samples that are produced, and the 83 - longer it takes to process them. 84 - 85 - Also note that the coarseness of Intel PT timing information will start to 86 - distort the statistical value of the sampling as the sampling period becomes 87 - smaller. 88 - 89 - To represent software control flow, "branches" samples are produced. By default 90 - a branch sample is synthesized for every single branch. To get an idea what 91 - data is available you can use the 'perf script' tool with all itrace sampling 92 - options, which will list all the samples. 93 - 94 - perf record -e intel_pt//u ls 95 - perf script --itrace=ibxwpe 96 - 97 - An interesting field that is not printed by default is 'flags' which can be 98 - displayed as follows: 99 - 100 - perf script --itrace=ibxwpe -F+flags 101 - 102 - The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, 103 - system, asynchronous, interrupt, transaction abort, trace begin, trace end, and 104 - in transaction, respectively. 105 - 106 - Another interesting field that is not printed by default is 'ipc' which can be 107 - displayed as follows: 108 - 109 - perf script --itrace=be -F+ipc 110 - 111 - There are two ways that instructions-per-cycle (IPC) can be calculated depending 112 - on the recording. 113 - 114 - If the 'cyc' config term (see config terms section below) was used, then IPC is 115 - calculated using the cycle count from CYC packets, otherwise MTC packets are 116 - used - refer to the 'mtc' config term. When MTC is used, however, the values 117 - are less accurate because the timing is less accurate. 118 - 119 - Because Intel PT does not update the cycle count on every branch or instruction, 120 - the values will often be zero. When there are values, they will be the number 121 - of instructions and number of cycles since the last update, and thus represent 122 - the average IPC since the last IPC for that event type. Note IPC for "branches" 123 - events is calculated separately from IPC for "instructions" events. 124 - 125 - Also note that the IPC instruction count may or may not include the current 126 - instruction. If the cycle count is associated with an asynchronous branch 127 - (e.g. page fault or interrupt), then the instruction count does not include the 128 - current instruction, otherwise it does. That is consistent with whether or not 129 - that instruction has retired when the cycle count is updated. 130 - 131 - Another note, in the case of "branches" events, non-taken branches are not 132 - presently sampled, so IPC values for them do not appear e.g. a CYC packet with a 133 - TNT packet that starts with a non-taken branch. To see every possible IPC 134 - value, "instructions" events can be used e.g. --itrace=i0ns 135 - 136 - While it is possible to create scripts to analyze the data, an alternative 137 - approach is available to export the data to a sqlite or postgresql database. 138 - Refer to script export-to-sqlite.py or export-to-postgresql.py for more details, 139 - and to script exported-sql-viewer.py for an example of using the database. 140 - 141 - There is also script intel-pt-events.py which provides an example of how to 142 - unpack the raw data for power events and PTWRITE. 143 - 144 - As mentioned above, it is easy to capture too much data. One way to limit the 145 - data captured is to use 'snapshot' mode which is explained further below. 146 - Refer to 'new snapshot option' and 'Intel PT modes of operation' further below. 147 - 148 - Another problem that will be experienced is decoder errors. They can be caused 149 - by inability to access the executed image, self-modified or JIT-ed code, or the 150 - inability to match side-band information (such as context switches and mmaps) 151 - which results in the decoder not knowing what code was executed. 152 - 153 - There is also the problem of perf not being able to copy the data fast enough, 154 - resulting in data lost because the buffer was full. See 'Buffer handling' below 155 - for more details. 156 - 157 - 158 - perf record 159 - =========== 160 - 161 - new event 162 - --------- 163 - 164 - The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are 165 - selected by providing the PMU name followed by the "config" separated by slashes. 166 - An enhancement has been made to allow default "config" e.g. the option 167 - 168 - -e intel_pt// 169 - 170 - will use a default config value. Currently that is the same as 171 - 172 - -e intel_pt/tsc,noretcomp=0/ 173 - 174 - which is the same as 175 - 176 - -e intel_pt/tsc=1,noretcomp=0/ 177 - 178 - Note there are now new config terms - see section 'config terms' further below. 179 - 180 - The config terms are listed in /sys/devices/intel_pt/format. They are bit 181 - fields within the config member of the struct perf_event_attr which is 182 - passed to the kernel by the perf_event_open system call. They correspond to bit 183 - fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions: 184 - 185 - $ grep -H . /sys/bus/event_source/devices/intel_pt/format/* 186 - /sys/bus/event_source/devices/intel_pt/format/cyc:config:1 187 - /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22 188 - /sys/bus/event_source/devices/intel_pt/format/mtc:config:9 189 - /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17 190 - /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11 191 - /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27 192 - /sys/bus/event_source/devices/intel_pt/format/tsc:config:10 193 - 194 - Note that the default config must be overridden for each term i.e. 195 - 196 - -e intel_pt/noretcomp=0/ 197 - 198 - is the same as: 199 - 200 - -e intel_pt/tsc=1,noretcomp=0/ 201 - 202 - So, to disable TSC packets use: 203 - 204 - -e intel_pt/tsc=0/ 205 - 206 - It is also possible to specify the config value explicitly: 207 - 208 - -e intel_pt/config=0x400/ 209 - 210 - Note that, as with all events, the event is suffixed with event modifiers: 211 - 212 - u userspace 213 - k kernel 214 - h hypervisor 215 - G guest 216 - H host 217 - p precise ip 218 - 219 - 'h', 'G' and 'H' are for virtualization which is not supported by Intel PT. 220 - 'p' is also not relevant to Intel PT. So only options 'u' and 'k' are 221 - meaningful for Intel PT. 222 - 223 - perf_event_attr is displayed if the -vv option is used e.g. 224 - 225 - ------------------------------------------------------------ 226 - perf_event_attr: 227 - type 6 228 - size 112 229 - config 0x400 230 - { sample_period, sample_freq } 1 231 - sample_type IP|TID|TIME|CPU|IDENTIFIER 232 - read_format ID 233 - disabled 1 234 - inherit 1 235 - exclude_kernel 1 236 - exclude_hv 1 237 - enable_on_exec 1 238 - sample_id_all 1 239 - ------------------------------------------------------------ 240 - sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 241 - sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 242 - sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 243 - sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 244 - ------------------------------------------------------------ 245 - 246 - 247 - config terms 248 - ------------ 249 - 250 - The June 2015 version of Intel 64 and IA-32 Architectures Software Developer 251 - Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features. 252 - Some of the features are reflect in new config terms. All the config terms are 253 - described below. 254 - 255 - tsc Always supported. Produces TSC timestamp packets to provide 256 - timing information. In some cases it is possible to decode 257 - without timing information, for example a per-thread context 258 - that does not overlap executable memory maps. 259 - 260 - The default config selects tsc (i.e. tsc=1). 261 - 262 - noretcomp Always supported. Disables "return compression" so a TIP packet 263 - is produced when a function returns. Causes more packets to be 264 - produced but might make decoding more reliable. 265 - 266 - The default config does not select noretcomp (i.e. noretcomp=0). 267 - 268 - psb_period Allows the frequency of PSB packets to be specified. 269 - 270 - The PSB packet is a synchronization packet that provides a 271 - starting point for decoding or recovery from errors. 272 - 273 - Support for psb_period is indicated by: 274 - 275 - /sys/bus/event_source/devices/intel_pt/caps/psb_cyc 276 - 277 - which contains "1" if the feature is supported and "0" 278 - otherwise. 279 - 280 - Valid values are given by: 281 - 282 - /sys/bus/event_source/devices/intel_pt/caps/psb_periods 283 - 284 - which contains a hexadecimal value, the bits of which represent 285 - valid values e.g. bit 2 set means value 2 is valid. 286 - 287 - The psb_period value is converted to the approximate number of 288 - trace bytes between PSB packets as: 289 - 290 - 2 ^ (value + 11) 291 - 292 - e.g. value 3 means 16KiB bytes between PSBs 293 - 294 - If an invalid value is entered, the error message 295 - will give a list of valid values e.g. 296 - 297 - $ perf record -e intel_pt/psb_period=15/u uname 298 - Invalid psb_period for intel_pt. Valid values are: 0-5 299 - 300 - If MTC packets are selected, the default config selects a value 301 - of 3 (i.e. psb_period=3) or the nearest lower value that is 302 - supported (0 is always supported). Otherwise the default is 0. 303 - 304 - If decoding is expected to be reliable and the buffer is large 305 - then a large PSB period can be used. 306 - 307 - Because a TSC packet is produced with PSB, the PSB period can 308 - also affect the granularity to timing information in the absence 309 - of MTC or CYC. 310 - 311 - mtc Produces MTC timing packets. 312 - 313 - MTC packets provide finer grain timestamp information than TSC 314 - packets. MTC packets record time using the hardware crystal 315 - clock (CTC) which is related to TSC packets using a TMA packet. 316 - 317 - Support for this feature is indicated by: 318 - 319 - /sys/bus/event_source/devices/intel_pt/caps/mtc 320 - 321 - which contains "1" if the feature is supported and 322 - "0" otherwise. 323 - 324 - The frequency of MTC packets can also be specified - see 325 - mtc_period below. 326 - 327 - mtc_period Specifies how frequently MTC packets are produced - see mtc 328 - above for how to determine if MTC packets are supported. 329 - 330 - Valid values are given by: 331 - 332 - /sys/bus/event_source/devices/intel_pt/caps/mtc_periods 333 - 334 - which contains a hexadecimal value, the bits of which represent 335 - valid values e.g. bit 2 set means value 2 is valid. 336 - 337 - The mtc_period value is converted to the MTC frequency as: 338 - 339 - CTC-frequency / (2 ^ value) 340 - 341 - e.g. value 3 means one eighth of CTC-frequency 342 - 343 - Where CTC is the hardware crystal clock, the frequency of which 344 - can be related to TSC via values provided in cpuid leaf 0x15. 345 - 346 - If an invalid value is entered, the error message 347 - will give a list of valid values e.g. 348 - 349 - $ perf record -e intel_pt/mtc_period=15/u uname 350 - Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9 351 - 352 - The default value is 3 or the nearest lower value 353 - that is supported (0 is always supported). 354 - 355 - cyc Produces CYC timing packets. 356 - 357 - CYC packets provide even finer grain timestamp information than 358 - MTC and TSC packets. A CYC packet contains the number of CPU 359 - cycles since the last CYC packet. Unlike MTC and TSC packets, 360 - CYC packets are only sent when another packet is also sent. 361 - 362 - Support for this feature is indicated by: 363 - 364 - /sys/bus/event_source/devices/intel_pt/caps/psb_cyc 365 - 366 - which contains "1" if the feature is supported and 367 - "0" otherwise. 368 - 369 - The number of CYC packets produced can be reduced by specifying 370 - a threshold - see cyc_thresh below. 371 - 372 - cyc_thresh Specifies how frequently CYC packets are produced - see cyc 373 - above for how to determine if CYC packets are supported. 374 - 375 - Valid cyc_thresh values are given by: 376 - 377 - /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds 378 - 379 - which contains a hexadecimal value, the bits of which represent 380 - valid values e.g. bit 2 set means value 2 is valid. 381 - 382 - The cyc_thresh value represents the minimum number of CPU cycles 383 - that must have passed before a CYC packet can be sent. The 384 - number of CPU cycles is: 385 - 386 - 2 ^ (value - 1) 387 - 388 - e.g. value 4 means 8 CPU cycles must pass before a CYC packet 389 - can be sent. Note a CYC packet is still only sent when another 390 - packet is sent, not at, e.g. every 8 CPU cycles. 391 - 392 - If an invalid value is entered, the error message 393 - will give a list of valid values e.g. 394 - 395 - $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname 396 - Invalid cyc_thresh for intel_pt. Valid values are: 0-12 397 - 398 - CYC packets are not requested by default. 399 - 400 - pt Specifies pass-through which enables the 'branch' config term. 401 - 402 - The default config selects 'pt' if it is available, so a user will 403 - never need to specify this term. 404 - 405 - branch Enable branch tracing. Branch tracing is enabled by default so to 406 - disable branch tracing use 'branch=0'. 407 - 408 - The default config selects 'branch' if it is available. 409 - 410 - ptw Enable PTWRITE packets which are produced when a ptwrite instruction 411 - is executed. 412 - 413 - Support for this feature is indicated by: 414 - 415 - /sys/bus/event_source/devices/intel_pt/caps/ptwrite 416 - 417 - which contains "1" if the feature is supported and 418 - "0" otherwise. 419 - 420 - fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet 421 - provides the address of the ptwrite instruction. In the absence of 422 - fup_on_ptw, the decoder will use the address of the previous branch 423 - if branch tracing is enabled, otherwise the address will be zero. 424 - Note that fup_on_ptw will work even when branch tracing is disabled. 425 - 426 - pwr_evt Enable power events. The power events provide information about 427 - changes to the CPU C-state. 428 - 429 - Support for this feature is indicated by: 430 - 431 - /sys/bus/event_source/devices/intel_pt/caps/power_event_trace 432 - 433 - which contains "1" if the feature is supported and 434 - "0" otherwise. 435 - 436 - 437 - AUX area sampling option 438 - ------------------------ 439 - 440 - To select Intel PT "sampling" the AUX area sampling option can be used: 441 - 442 - --aux-sample 443 - 444 - Optionally it can be followed by the sample size in bytes e.g. 445 - 446 - --aux-sample=8192 447 - 448 - In addition, the Intel PT event to sample must be defined e.g. 449 - 450 - -e intel_pt//u 451 - 452 - Samples on other events will be created containing Intel PT data e.g. the 453 - following will create Intel PT samples on the branch-misses event, note the 454 - events must be grouped using {}: 455 - 456 - perf record --aux-sample -e '{intel_pt//u,branch-misses:u}' 457 - 458 - An alternative to '--aux-sample' is to add the config term 'aux-sample-size' to 459 - events. In this case, the grouping is implied e.g. 460 - 461 - perf record -e intel_pt//u -e branch-misses/aux-sample-size=8192/u 462 - 463 - is the same as: 464 - 465 - perf record -e '{intel_pt//u,branch-misses/aux-sample-size=8192/u}' 466 - 467 - but allows for also using an address filter e.g.: 468 - 469 - perf record -e intel_pt//u --filter 'filter * @/bin/ls' -e branch-misses/aux-sample-size=8192/u -- ls 470 - 471 - It is important to select a sample size that is big enough to contain at least 472 - one PSB packet. If not a warning will be displayed: 473 - 474 - Intel PT sample size (%zu) may be too small for PSB period (%zu) 475 - 476 - The calculation used for that is: if sample_size <= psb_period + 256 display the 477 - warning. When sampling is used, psb_period defaults to 0 (2KiB). 478 - 479 - The default sample size is 4KiB. 480 - 481 - The sample size is passed in aux_sample_size in struct perf_event_attr. The 482 - sample size is limited by the maximum event size which is 64KiB. It is 483 - difficult to know how big the event might be without the trace sample attached, 484 - but the tool validates that the sample size is not greater than 60KiB. 485 - 486 - 487 - new snapshot option 488 - ------------------- 489 - 490 - The difference between full trace and snapshot from the kernel's perspective is 491 - that in full trace we don't overwrite trace data that the user hasn't collected 492 - yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let 493 - the trace run and overwrite older data in the buffer so that whenever something 494 - interesting happens, we can stop it and grab a snapshot of what was going on 495 - around that interesting moment. 496 - 497 - To select snapshot mode a new option has been added: 498 - 499 - -S 500 - 501 - Optionally it can be followed by the snapshot size e.g. 502 - 503 - -S0x100000 504 - 505 - The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size 506 - nor snapshot size is specified, then the default is 4MiB for privileged users 507 - (or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. 508 - If an unprivileged user does not specify mmap pages, the mmap pages will be 509 - reduced as described in the 'new auxtrace mmap size option' section below. 510 - 511 - The snapshot size is displayed if the option -vv is used e.g. 512 - 513 - Intel PT snapshot size: %zu 514 - 515 - 516 - new auxtrace mmap size option 517 - --------------------------- 518 - 519 - Intel PT buffer size is specified by an addition to the -m option e.g. 520 - 521 - -m,16 522 - 523 - selects a buffer size of 16 pages i.e. 64KiB. 524 - 525 - Note that the existing functionality of -m is unchanged. The auxtrace mmap size 526 - is specified by the optional addition of a comma and the value. 527 - 528 - The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users 529 - (or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. 530 - If an unprivileged user does not specify mmap pages, the mmap pages will be 531 - reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the 532 - user is likely to get an error as they exceed their mlock limit (Max locked 533 - memory as shown in /proc/self/limits). Note that perf does not count the first 534 - 512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu 535 - against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus 536 - their mlock limit (which defaults to 64KiB but is not multiplied by the number 537 - of cpus). 538 - 539 - In full-trace mode, powers of two are allowed for buffer size, with a minimum 540 - size of 2 pages. In snapshot mode or sampling mode, it is the same but the 541 - minimum size is 1 page. 542 - 543 - The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g. 544 - 545 - mmap length 528384 546 - auxtrace mmap length 4198400 547 - 548 - 549 - Intel PT modes of operation 550 - --------------------------- 551 - 552 - Intel PT can be used in 2 modes: 553 - full-trace mode 554 - sample mode 555 - snapshot mode 556 - 557 - Full-trace mode traces continuously e.g. 558 - 559 - perf record -e intel_pt//u uname 560 - 561 - Sample mode attaches a Intel PT sample to other events e.g. 562 - 563 - perf record --aux-sample -e intel_pt//u -e branch-misses:u 564 - 565 - Snapshot mode captures the available data when a signal is sent e.g. 566 - 567 - perf record -v -e intel_pt//u -S ./loopy 1000000000 & 568 - [1] 11435 569 - kill -USR2 11435 570 - Recording AUX area tracing snapshot 571 - 572 - Note that the signal sent is SIGUSR2. 573 - Note that "Recording AUX area tracing snapshot" is displayed because the -v 574 - option is used. 575 - 576 - The 2 modes cannot be used together. 577 - 578 - 579 - Buffer handling 580 - --------------- 581 - 582 - There may be buffer limitations (i.e. single ToPa entry) which means that actual 583 - buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to 584 - provide other sizes, and in particular an arbitrarily large size, multiple 585 - buffers are logically concatenated. However an interrupt must be used to switch 586 - between buffers. That has two potential problems: 587 - a) the interrupt may not be handled in time so that the current buffer 588 - becomes full and some trace data is lost. 589 - b) the interrupts may slow the system and affect the performance 590 - results. 591 - 592 - If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event 593 - which the tools report as an error. 594 - 595 - In full-trace mode, the driver waits for data to be copied out before allowing 596 - the (logical) buffer to wrap-around. If data is not copied out quickly enough, 597 - again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to 598 - wait, the intel_pt event gets disabled. Because it is difficult to know when 599 - that happens, perf tools always re-enable the intel_pt event after copying out 600 - data. 601 - 602 - 603 - Intel PT and build ids 604 - ---------------------- 605 - 606 - By default "perf record" post-processes the event stream to find all build ids 607 - for executables for all addresses sampled. Deliberately, Intel PT is not 608 - decoded for that purpose (it would take too long). Instead the build ids for 609 - all executables encountered (due to mmap, comm or task events) are included 610 - in the perf.data file. 611 - 612 - To see buildids included in the perf.data file use the command: 613 - 614 - perf buildid-list 615 - 616 - If the perf.data file contains Intel PT data, that is the same as: 617 - 618 - perf buildid-list --with-hits 619 - 620 - 621 - Snapshot mode and event disabling 622 - --------------------------------- 623 - 624 - In order to make a snapshot, the intel_pt event is disabled using an IOCTL, 625 - namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the 626 - collection of side-band information. In order to prevent that, a dummy 627 - software event has been introduced that permits tracking events (like mmaps) to 628 - continue to be recorded while intel_pt is disabled. That is important to ensure 629 - there is complete side-band information to allow the decoding of subsequent 630 - snapshots. 631 - 632 - A test has been created for that. To find the test: 633 - 634 - perf test list 635 - ... 636 - 23: Test using a dummy software event to keep tracking 637 - 638 - To run the test: 639 - 640 - perf test 23 641 - 23: Test using a dummy software event to keep tracking : Ok 642 - 643 - 644 - perf record modes (nothing new here) 645 - ------------------------------------ 646 - 647 - perf record essentially operates in one of three modes: 648 - per thread 649 - per cpu 650 - workload only 651 - 652 - "per thread" mode is selected by -t or by --per-thread (with -p or -u or just a 653 - workload). 654 - "per cpu" is selected by -C or -a. 655 - "workload only" mode is selected by not using the other options but providing a 656 - command to run (i.e. the workload). 657 - 658 - In per-thread mode an exact list of threads is traced. There is no inheritance. 659 - Each thread has its own event buffer. 660 - 661 - In per-cpu mode all processes (or processes from the selected cgroup i.e. -G 662 - option, or processes selected with -p or -u) are traced. Each cpu has its own 663 - buffer. Inheritance is allowed. 664 - 665 - In workload-only mode, the workload is traced but with per-cpu buffers. 666 - Inheritance is allowed. Note that you can now trace a workload in per-thread 667 - mode by using the --per-thread option. 668 - 669 - 670 - Privileged vs non-privileged users 671 - ---------------------------------- 672 - 673 - Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users 674 - have memory limits imposed upon them. That affects what buffer sizes they can 675 - have as outlined above. 676 - 677 - The v4.2 kernel introduced support for a context switch metadata event, 678 - PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes 679 - are scheduled out and in, just not by whom, which is left for the 680 - PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context, 681 - which in turn requires CAP_SYS_ADMIN. 682 - 683 - Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context 684 - switches") commit, that introduces these metadata events for further info. 685 - 686 - When working with kernels < v4.2, the following considerations must be taken, 687 - as the sched:sched_switch tracepoints will be used to receive such information: 688 - 689 - Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are 690 - not permitted to use tracepoints which means there is insufficient side-band 691 - information to decode Intel PT in per-cpu mode, and potentially workload-only 692 - mode too if the workload creates new processes. 693 - 694 - Note also, that to use tracepoints, read-access to debugfs is required. So if 695 - debugfs is not mounted or the user does not have read-access, it will again not 696 - be possible to decode Intel PT in per-cpu mode. 697 - 698 - 699 - sched_switch tracepoint 700 - ----------------------- 701 - 702 - The sched_switch tracepoint is used to provide side-band data for Intel PT 703 - decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't 704 - available. 705 - 706 - The sched_switch events are automatically added. e.g. the second event shown 707 - below: 708 - 709 - $ perf record -vv -e intel_pt//u uname 710 - ------------------------------------------------------------ 711 - perf_event_attr: 712 - type 6 713 - size 112 714 - config 0x400 715 - { sample_period, sample_freq } 1 716 - sample_type IP|TID|TIME|CPU|IDENTIFIER 717 - read_format ID 718 - disabled 1 719 - inherit 1 720 - exclude_kernel 1 721 - exclude_hv 1 722 - enable_on_exec 1 723 - sample_id_all 1 724 - ------------------------------------------------------------ 725 - sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 726 - sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 727 - sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 728 - sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 729 - ------------------------------------------------------------ 730 - perf_event_attr: 731 - type 2 732 - size 112 733 - config 0x108 734 - { sample_period, sample_freq } 1 735 - sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER 736 - read_format ID 737 - inherit 1 738 - sample_id_all 1 739 - exclude_guest 1 740 - ------------------------------------------------------------ 741 - sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 742 - sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 743 - sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8 744 - sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8 745 - ------------------------------------------------------------ 746 - perf_event_attr: 747 - type 1 748 - size 112 749 - config 0x9 750 - { sample_period, sample_freq } 1 751 - sample_type IP|TID|TIME|IDENTIFIER 752 - read_format ID 753 - disabled 1 754 - inherit 1 755 - exclude_kernel 1 756 - exclude_hv 1 757 - mmap 1 758 - comm 1 759 - enable_on_exec 1 760 - task 1 761 - sample_id_all 1 762 - mmap2 1 763 - comm_exec 1 764 - ------------------------------------------------------------ 765 - sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 766 - sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 767 - sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 768 - sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 769 - mmap size 528384B 770 - AUX area mmap length 4194304 771 - perf event ring buffer mmapped per cpu 772 - Synthesizing auxtrace information 773 - Linux 774 - [ perf record: Woken up 1 times to write data ] 775 - [ perf record: Captured and wrote 0.042 MB perf.data ] 776 - 777 - Note, the sched_switch event is only added if the user is permitted to use it 778 - and only in per-cpu mode. 779 - 780 - Note also, the sched_switch event is only added if TSC packets are requested. 781 - That is because, in the absence of timing information, the sched_switch events 782 - cannot be matched against the Intel PT trace. 783 - 784 - 785 - perf script 786 - =========== 787 - 788 - By default, perf script will decode trace data found in the perf.data file. 789 - This can be further controlled by new option --itrace. 790 - 791 - 792 - New --itrace option 793 - ------------------- 794 - 795 - Having no option is the same as 796 - 797 - --itrace 798 - 799 - which, in turn, is the same as 800 - 801 - --itrace=cepwx 802 - 803 - The letters are: 804 - 805 - i synthesize "instructions" events 806 - b synthesize "branches" events 807 - x synthesize "transactions" events 808 - w synthesize "ptwrite" events 809 - p synthesize "power" events 810 - c synthesize branches events (calls only) 811 - r synthesize branches events (returns only) 812 - e synthesize tracing error events 813 - d create a debug log 814 - g synthesize a call chain (use with i or x) 815 - l synthesize last branch entries (use with i or x) 816 - s skip initial number of events 817 - 818 - "Instructions" events look like they were recorded by "perf record -e 819 - instructions". 820 - 821 - "Branches" events look like they were recorded by "perf record -e branches". "c" 822 - and "r" can be combined to get calls and returns. 823 - 824 - "Transactions" events correspond to the start or end of transactions. The 825 - 'flags' field can be used in perf script to determine whether the event is a 826 - tranasaction start, commit or abort. 827 - 828 - Note that "instructions", "branches" and "transactions" events depend on code 829 - flow packets which can be disabled by using the config term "branch=0". Refer 830 - to the config terms section above. 831 - 832 - "ptwrite" events record the payload of the ptwrite instruction and whether 833 - "fup_on_ptw" was used. "ptwrite" events depend on PTWRITE packets which are 834 - recorded only if the "ptw" config term was used. Refer to the config terms 835 - section above. perf script "synth" field displays "ptwrite" information like 836 - this: "ip: 0 payload: 0x123456789abcdef0" where "ip" is 1 if "fup_on_ptw" was 837 - used. 838 - 839 - "Power" events correspond to power event packets and CBR (core-to-bus ratio) 840 - packets. While CBR packets are always recorded when tracing is enabled, power 841 - event packets are recorded only if the "pwr_evt" config term was used. Refer to 842 - the config terms section above. The power events record information about 843 - C-state changes, whereas CBR is indicative of CPU frequency. perf script 844 - "event,synth" fields display information like this: 845 - cbr: cbr: 22 freq: 2189 MHz (200%) 846 - mwait: hints: 0x60 extensions: 0x1 847 - pwre: hw: 0 cstate: 2 sub-cstate: 0 848 - exstop: ip: 1 849 - pwrx: deepest cstate: 2 last cstate: 2 wake reason: 0x4 850 - Where: 851 - "cbr" includes the frequency and the percentage of maximum non-turbo 852 - "mwait" shows mwait hints and extensions 853 - "pwre" shows C-state transitions (to a C-state deeper than C0) and 854 - whether initiated by hardware 855 - "exstop" indicates execution stopped and whether the IP was recorded 856 - exactly, 857 - "pwrx" indicates return to C0 858 - For more details refer to the Intel 64 and IA-32 Architectures Software 859 - Developer Manuals. 860 - 861 - Error events show where the decoder lost the trace. Error events 862 - are quite important. Users must know if what they are seeing is a complete 863 - picture or not. 864 - 865 - The "d" option will cause the creation of a file "intel_pt.log" containing all 866 - decoded packets and instructions. Note that this option slows down the decoder 867 - and that the resulting file may be very large. 868 - 869 - In addition, the period of the "instructions" event can be specified. e.g. 870 - 871 - --itrace=i10us 872 - 873 - sets the period to 10us i.e. one instruction sample is synthesized for each 10 874 - microseconds of trace. Alternatives to "us" are "ms" (milliseconds), 875 - "ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions). 876 - 877 - "ms", "us" and "ns" are converted to TSC ticks. 878 - 879 - The timing information included with Intel PT does not give the time of every 880 - instruction. Consequently, for the purpose of sampling, the decoder estimates 881 - the time since the last timing packet based on 1 tick per instruction. The time 882 - on the sample is *not* adjusted and reflects the last known value of TSC. 883 - 884 - For Intel PT, the default period is 100us. 885 - 886 - Setting it to a zero period means "as often as possible". 887 - 888 - In the case of Intel PT that is the same as a period of 1 and a unit of 889 - 'instructions' (i.e. --itrace=i1i). 890 - 891 - Also the call chain size (default 16, max. 1024) for instructions or 892 - transactions events can be specified. e.g. 893 - 894 - --itrace=ig32 895 - --itrace=xg32 896 - 897 - Also the number of last branch entries (default 64, max. 1024) for instructions or 898 - transactions events can be specified. e.g. 899 - 900 - --itrace=il10 901 - --itrace=xl10 902 - 903 - Note that last branch entries are cleared for each sample, so there is no overlap 904 - from one sample to the next. 905 - 906 - To disable trace decoding entirely, use the option --no-itrace. 907 - 908 - It is also possible to skip events generated (instructions, branches, transactions) 909 - at the beginning. This is useful to ignore initialization code. 910 - 911 - --itrace=i0nss1000000 912 - 913 - skips the first million instructions. 914 - 915 - dump option 916 - ----------- 917 - 918 - perf script has an option (-D) to "dump" the events i.e. display the binary 919 - data. 920 - 921 - When -D is used, Intel PT packets are displayed. The packet decoder does not 922 - pay attention to PSB packets, but just decodes the bytes - so the packets seen 923 - by the actual decoder may not be identical in places where the data is corrupt. 924 - One example of that would be when the buffer-switching interrupt has been too 925 - slow, and the buffer has been filled completely. In that case, the last packet 926 - in the buffer might be truncated and immediately followed by a PSB as the trace 927 - continues in the next buffer. 928 - 929 - To disable the display of Intel PT packets, combine the -D option with 930 - --no-itrace. 931 - 932 - 933 - perf report 934 - =========== 935 - 936 - By default, perf report will decode trace data found in the perf.data file. 937 - This can be further controlled by new option --itrace exactly the same as 938 - perf script, with the exception that the default is --itrace=igxe. 939 - 940 - 941 - perf inject 942 - =========== 943 - 944 - perf inject also accepts the --itrace option in which case tracing data is 945 - removed and replaced with the synthesized events. e.g. 946 - 947 - perf inject --itrace -i perf.data -o perf.data.new 948 - 949 - Below is an example of using Intel PT with autofdo. It requires autofdo 950 - (https://github.com/google/autofdo) and gcc version 5. The bubble 951 - sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial) 952 - amended to take the number of elements as a parameter. 953 - 954 - $ gcc-5 -O3 sort.c -o sort_optimized 955 - $ ./sort_optimized 30000 956 - Bubble sorting array of 30000 elements 957 - 2254 ms 958 - 959 - $ cat ~/.perfconfig 960 - [intel-pt] 961 - mispred-all = on 962 - 963 - $ perf record -e intel_pt//u ./sort 3000 964 - Bubble sorting array of 3000 elements 965 - 58 ms 966 - [ perf record: Woken up 2 times to write data ] 967 - [ perf record: Captured and wrote 3.939 MB perf.data ] 968 - $ perf inject -i perf.data -o inj --itrace=i100usle --strip 969 - $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1 970 - $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo 971 - $ ./sort_autofdo 30000 972 - Bubble sorting array of 30000 elements 973 - 2155 ms 974 - 975 - Note there is currently no advantage to using Intel PT instead of LBR, but 976 - that may change in the future if greater use is made of the data. 977 - 978 - 979 - PEBS via Intel PT 980 - ================= 981 - 982 - Some hardware has the feature to redirect PEBS records to the Intel PT trace. 983 - Recording is selected by using the aux-output config term e.g. 984 - 985 - perf record -c 10000 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname 986 - 987 - Note that currently, software only supports redirecting at most one PEBS event. 988 - 989 - To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g. 990 - 991 - perf script --itrace=oe 1 + Documentation for support for Intel Processor Trace within perf tools' has moved to file perf-intel-pt.txt

+2 -1

tools/perf/Documentation/perf-inject.txt

··· 66 66 67 67 SEE ALSO 68 68 -------- 69 - linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1] 69 + linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1], 70 + linkperf:perf-intel-pt[1]

+1007

tools/perf/Documentation/perf-intel-pt.txt

··· 1 + perf-intel-pt(1) 2 + ================ 3 + 4 + NAME 5 + ---- 6 + perf-intel-pt - Support for Intel Processor Trace within perf tools 7 + 8 + SYNOPSIS 9 + -------- 10 + [verse] 11 + 'perf record' -e intel_pt// 12 + 13 + DESCRIPTION 14 + ----------- 15 + 16 + Intel Processor Trace (Intel PT) is an extension of Intel Architecture that 17 + collects information about software execution such as control flow, execution 18 + modes and timings and formats it into highly compressed binary packets. 19 + Technical details are documented in the Intel 64 and IA-32 Architectures 20 + Software Developer Manuals, Chapter 36 Intel Processor Trace. 21 + 22 + Intel PT is first supported in Intel Core M and 5th generation Intel Core 23 + processors that are based on the Intel micro-architecture code name Broadwell. 24 + 25 + Trace data is collected by 'perf record' and stored within the perf.data file. 26 + See below for options to 'perf record'. 27 + 28 + Trace data must be 'decoded' which involves walking the object code and matching 29 + the trace data packets. For example a TNT packet only tells whether a 30 + conditional branch was taken or not taken, so to make use of that packet the 31 + decoder must know precisely which instruction was being executed. 32 + 33 + Decoding is done on-the-fly. The decoder outputs samples in the same format as 34 + samples output by perf hardware events, for example as though the "instructions" 35 + or "branches" events had been recorded. Presently 3 tools support this: 36 + 'perf script', 'perf report' and 'perf inject'. See below for more information 37 + on using those tools. 38 + 39 + The main distinguishing feature of Intel PT is that the decoder can determine 40 + the exact flow of software execution. Intel PT can be used to understand why 41 + and how did software get to a certain point, or behave a certain way. The 42 + software does not have to be recompiled, so Intel PT works with debug or release 43 + builds, however the executed images are needed - which makes use in JIT-compiled 44 + environments, or with self-modified code, a challenge. Also symbols need to be 45 + provided to make sense of addresses. 46 + 47 + A limitation of Intel PT is that it produces huge amounts of trace data 48 + (hundreds of megabytes per second per core) which takes a long time to decode, 49 + for example two or three orders of magnitude longer than it took to collect. 50 + Another limitation is the performance impact of tracing, something that will 51 + vary depending on the use-case and architecture. 52 + 53 + 54 + Quickstart 55 + ---------- 56 + 57 + It is important to start small. That is because it is easy to capture vastly 58 + more data than can possibly be processed. 59 + 60 + The simplest thing to do with Intel PT is userspace profiling of small programs. 61 + Data is captured with 'perf record' e.g. to trace 'ls' userspace-only: 62 + 63 + perf record -e intel_pt//u ls 64 + 65 + And profiled with 'perf report' e.g. 66 + 67 + perf report 68 + 69 + To also trace kernel space presents a problem, namely kernel self-modifying 70 + code. A fairly good kernel image is available in /proc/kcore but to get an 71 + accurate image a copy of /proc/kcore needs to be made under the same conditions 72 + as the data capture. A script perf-with-kcore can do that, but beware that the 73 + script makes use of 'sudo' to copy /proc/kcore. If you have perf installed 74 + locally from the source tree you can do: 75 + 76 + ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls 77 + 78 + which will create a directory named 'pt_ls' and put the perf.data file and 79 + copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use 80 + 'perf report' becomes: 81 + 82 + ~/libexec/perf-core/perf-with-kcore report pt_ls 83 + 84 + Because samples are synthesized after-the-fact, the sampling period can be 85 + selected for reporting. e.g. sample every microsecond 86 + 87 + ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge 88 + 89 + See the sections below for more information about the --itrace option. 90 + 91 + Beware the smaller the period, the more samples that are produced, and the 92 + longer it takes to process them. 93 + 94 + Also note that the coarseness of Intel PT timing information will start to 95 + distort the statistical value of the sampling as the sampling period becomes 96 + smaller. 97 + 98 + To represent software control flow, "branches" samples are produced. By default 99 + a branch sample is synthesized for every single branch. To get an idea what 100 + data is available you can use the 'perf script' tool with all itrace sampling 101 + options, which will list all the samples. 102 + 103 + perf record -e intel_pt//u ls 104 + perf script --itrace=ibxwpe 105 + 106 + An interesting field that is not printed by default is 'flags' which can be 107 + displayed as follows: 108 + 109 + perf script --itrace=ibxwpe -F+flags 110 + 111 + The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, 112 + system, asynchronous, interrupt, transaction abort, trace begin, trace end, and 113 + in transaction, respectively. 114 + 115 + Another interesting field that is not printed by default is 'ipc' which can be 116 + displayed as follows: 117 + 118 + perf script --itrace=be -F+ipc 119 + 120 + There are two ways that instructions-per-cycle (IPC) can be calculated depending 121 + on the recording. 122 + 123 + If the 'cyc' config term (see config terms section below) was used, then IPC is 124 + calculated using the cycle count from CYC packets, otherwise MTC packets are 125 + used - refer to the 'mtc' config term. When MTC is used, however, the values 126 + are less accurate because the timing is less accurate. 127 + 128 + Because Intel PT does not update the cycle count on every branch or instruction, 129 + the values will often be zero. When there are values, they will be the number 130 + of instructions and number of cycles since the last update, and thus represent 131 + the average IPC since the last IPC for that event type. Note IPC for "branches" 132 + events is calculated separately from IPC for "instructions" events. 133 + 134 + Also note that the IPC instruction count may or may not include the current 135 + instruction. If the cycle count is associated with an asynchronous branch 136 + (e.g. page fault or interrupt), then the instruction count does not include the 137 + current instruction, otherwise it does. That is consistent with whether or not 138 + that instruction has retired when the cycle count is updated. 139 + 140 + Another note, in the case of "branches" events, non-taken branches are not 141 + presently sampled, so IPC values for them do not appear e.g. a CYC packet with a 142 + TNT packet that starts with a non-taken branch. To see every possible IPC 143 + value, "instructions" events can be used e.g. --itrace=i0ns 144 + 145 + While it is possible to create scripts to analyze the data, an alternative 146 + approach is available to export the data to a sqlite or postgresql database. 147 + Refer to script export-to-sqlite.py or export-to-postgresql.py for more details, 148 + and to script exported-sql-viewer.py for an example of using the database. 149 + 150 + There is also script intel-pt-events.py which provides an example of how to 151 + unpack the raw data for power events and PTWRITE. 152 + 153 + As mentioned above, it is easy to capture too much data. One way to limit the 154 + data captured is to use 'snapshot' mode which is explained further below. 155 + Refer to 'new snapshot option' and 'Intel PT modes of operation' further below. 156 + 157 + Another problem that will be experienced is decoder errors. They can be caused 158 + by inability to access the executed image, self-modified or JIT-ed code, or the 159 + inability to match side-band information (such as context switches and mmaps) 160 + which results in the decoder not knowing what code was executed. 161 + 162 + There is also the problem of perf not being able to copy the data fast enough, 163 + resulting in data lost because the buffer was full. See 'Buffer handling' below 164 + for more details. 165 + 166 + 167 + perf record 168 + ----------- 169 + 170 + new event 171 + ~~~~~~~~~ 172 + 173 + The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are 174 + selected by providing the PMU name followed by the "config" separated by slashes. 175 + An enhancement has been made to allow default "config" e.g. the option 176 + 177 + -e intel_pt// 178 + 179 + will use a default config value. Currently that is the same as 180 + 181 + -e intel_pt/tsc,noretcomp=0/ 182 + 183 + which is the same as 184 + 185 + -e intel_pt/tsc=1,noretcomp=0/ 186 + 187 + Note there are now new config terms - see section 'config terms' further below. 188 + 189 + The config terms are listed in /sys/devices/intel_pt/format. They are bit 190 + fields within the config member of the struct perf_event_attr which is 191 + passed to the kernel by the perf_event_open system call. They correspond to bit 192 + fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions: 193 + 194 + $ grep -H . /sys/bus/event_source/devices/intel_pt/format/* 195 + /sys/bus/event_source/devices/intel_pt/format/cyc:config:1 196 + /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22 197 + /sys/bus/event_source/devices/intel_pt/format/mtc:config:9 198 + /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17 199 + /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11 200 + /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27 201 + /sys/bus/event_source/devices/intel_pt/format/tsc:config:10 202 + 203 + Note that the default config must be overridden for each term i.e. 204 + 205 + -e intel_pt/noretcomp=0/ 206 + 207 + is the same as: 208 + 209 + -e intel_pt/tsc=1,noretcomp=0/ 210 + 211 + So, to disable TSC packets use: 212 + 213 + -e intel_pt/tsc=0/ 214 + 215 + It is also possible to specify the config value explicitly: 216 + 217 + -e intel_pt/config=0x400/ 218 + 219 + Note that, as with all events, the event is suffixed with event modifiers: 220 + 221 + u userspace 222 + k kernel 223 + h hypervisor 224 + G guest 225 + H host 226 + p precise ip 227 + 228 + 'h', 'G' and 'H' are for virtualization which is not supported by Intel PT. 229 + 'p' is also not relevant to Intel PT. So only options 'u' and 'k' are 230 + meaningful for Intel PT. 231 + 232 + perf_event_attr is displayed if the -vv option is used e.g. 233 + 234 + ------------------------------------------------------------ 235 + perf_event_attr: 236 + type 6 237 + size 112 238 + config 0x400 239 + { sample_period, sample_freq } 1 240 + sample_type IP|TID|TIME|CPU|IDENTIFIER 241 + read_format ID 242 + disabled 1 243 + inherit 1 244 + exclude_kernel 1 245 + exclude_hv 1 246 + enable_on_exec 1 247 + sample_id_all 1 248 + ------------------------------------------------------------ 249 + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 250 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 251 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 252 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 253 + ------------------------------------------------------------ 254 + 255 + 256 + config terms 257 + ~~~~~~~~~~~~ 258 + 259 + The June 2015 version of Intel 64 and IA-32 Architectures Software Developer 260 + Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features. 261 + Some of the features are reflect in new config terms. All the config terms are 262 + described below. 263 + 264 + tsc Always supported. Produces TSC timestamp packets to provide 265 + timing information. In some cases it is possible to decode 266 + without timing information, for example a per-thread context 267 + that does not overlap executable memory maps. 268 + 269 + The default config selects tsc (i.e. tsc=1). 270 + 271 + noretcomp Always supported. Disables "return compression" so a TIP packet 272 + is produced when a function returns. Causes more packets to be 273 + produced but might make decoding more reliable. 274 + 275 + The default config does not select noretcomp (i.e. noretcomp=0). 276 + 277 + psb_period Allows the frequency of PSB packets to be specified. 278 + 279 + The PSB packet is a synchronization packet that provides a 280 + starting point for decoding or recovery from errors. 281 + 282 + Support for psb_period is indicated by: 283 + 284 + /sys/bus/event_source/devices/intel_pt/caps/psb_cyc 285 + 286 + which contains "1" if the feature is supported and "0" 287 + otherwise. 288 + 289 + Valid values are given by: 290 + 291 + /sys/bus/event_source/devices/intel_pt/caps/psb_periods 292 + 293 + which contains a hexadecimal value, the bits of which represent 294 + valid values e.g. bit 2 set means value 2 is valid. 295 + 296 + The psb_period value is converted to the approximate number of 297 + trace bytes between PSB packets as: 298 + 299 + 2 ^ (value + 11) 300 + 301 + e.g. value 3 means 16KiB bytes between PSBs 302 + 303 + If an invalid value is entered, the error message 304 + will give a list of valid values e.g. 305 + 306 + $ perf record -e intel_pt/psb_period=15/u uname 307 + Invalid psb_period for intel_pt. Valid values are: 0-5 308 + 309 + If MTC packets are selected, the default config selects a value 310 + of 3 (i.e. psb_period=3) or the nearest lower value that is 311 + supported (0 is always supported). Otherwise the default is 0. 312 + 313 + If decoding is expected to be reliable and the buffer is large 314 + then a large PSB period can be used. 315 + 316 + Because a TSC packet is produced with PSB, the PSB period can 317 + also affect the granularity to timing information in the absence 318 + of MTC or CYC. 319 + 320 + mtc Produces MTC timing packets. 321 + 322 + MTC packets provide finer grain timestamp information than TSC 323 + packets. MTC packets record time using the hardware crystal 324 + clock (CTC) which is related to TSC packets using a TMA packet. 325 + 326 + Support for this feature is indicated by: 327 + 328 + /sys/bus/event_source/devices/intel_pt/caps/mtc 329 + 330 + which contains "1" if the feature is supported and 331 + "0" otherwise. 332 + 333 + The frequency of MTC packets can also be specified - see 334 + mtc_period below. 335 + 336 + mtc_period Specifies how frequently MTC packets are produced - see mtc 337 + above for how to determine if MTC packets are supported. 338 + 339 + Valid values are given by: 340 + 341 + /sys/bus/event_source/devices/intel_pt/caps/mtc_periods 342 + 343 + which contains a hexadecimal value, the bits of which represent 344 + valid values e.g. bit 2 set means value 2 is valid. 345 + 346 + The mtc_period value is converted to the MTC frequency as: 347 + 348 + CTC-frequency / (2 ^ value) 349 + 350 + e.g. value 3 means one eighth of CTC-frequency 351 + 352 + Where CTC is the hardware crystal clock, the frequency of which 353 + can be related to TSC via values provided in cpuid leaf 0x15. 354 + 355 + If an invalid value is entered, the error message 356 + will give a list of valid values e.g. 357 + 358 + $ perf record -e intel_pt/mtc_period=15/u uname 359 + Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9 360 + 361 + The default value is 3 or the nearest lower value 362 + that is supported (0 is always supported). 363 + 364 + cyc Produces CYC timing packets. 365 + 366 + CYC packets provide even finer grain timestamp information than 367 + MTC and TSC packets. A CYC packet contains the number of CPU 368 + cycles since the last CYC packet. Unlike MTC and TSC packets, 369 + CYC packets are only sent when another packet is also sent. 370 + 371 + Support for this feature is indicated by: 372 + 373 + /sys/bus/event_source/devices/intel_pt/caps/psb_cyc 374 + 375 + which contains "1" if the feature is supported and 376 + "0" otherwise. 377 + 378 + The number of CYC packets produced can be reduced by specifying 379 + a threshold - see cyc_thresh below. 380 + 381 + cyc_thresh Specifies how frequently CYC packets are produced - see cyc 382 + above for how to determine if CYC packets are supported. 383 + 384 + Valid cyc_thresh values are given by: 385 + 386 + /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds 387 + 388 + which contains a hexadecimal value, the bits of which represent 389 + valid values e.g. bit 2 set means value 2 is valid. 390 + 391 + The cyc_thresh value represents the minimum number of CPU cycles 392 + that must have passed before a CYC packet can be sent. The 393 + number of CPU cycles is: 394 + 395 + 2 ^ (value - 1) 396 + 397 + e.g. value 4 means 8 CPU cycles must pass before a CYC packet 398 + can be sent. Note a CYC packet is still only sent when another 399 + packet is sent, not at, e.g. every 8 CPU cycles. 400 + 401 + If an invalid value is entered, the error message 402 + will give a list of valid values e.g. 403 + 404 + $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname 405 + Invalid cyc_thresh for intel_pt. Valid values are: 0-12 406 + 407 + CYC packets are not requested by default. 408 + 409 + pt Specifies pass-through which enables the 'branch' config term. 410 + 411 + The default config selects 'pt' if it is available, so a user will 412 + never need to specify this term. 413 + 414 + branch Enable branch tracing. Branch tracing is enabled by default so to 415 + disable branch tracing use 'branch=0'. 416 + 417 + The default config selects 'branch' if it is available. 418 + 419 + ptw Enable PTWRITE packets which are produced when a ptwrite instruction 420 + is executed. 421 + 422 + Support for this feature is indicated by: 423 + 424 + /sys/bus/event_source/devices/intel_pt/caps/ptwrite 425 + 426 + which contains "1" if the feature is supported and 427 + "0" otherwise. 428 + 429 + fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet 430 + provides the address of the ptwrite instruction. In the absence of 431 + fup_on_ptw, the decoder will use the address of the previous branch 432 + if branch tracing is enabled, otherwise the address will be zero. 433 + Note that fup_on_ptw will work even when branch tracing is disabled. 434 + 435 + pwr_evt Enable power events. The power events provide information about 436 + changes to the CPU C-state. 437 + 438 + Support for this feature is indicated by: 439 + 440 + /sys/bus/event_source/devices/intel_pt/caps/power_event_trace 441 + 442 + which contains "1" if the feature is supported and 443 + "0" otherwise. 444 + 445 + 446 + AUX area sampling option 447 + ~~~~~~~~~~~~~~~~~~~~~~~~ 448 + 449 + To select Intel PT "sampling" the AUX area sampling option can be used: 450 + 451 + --aux-sample 452 + 453 + Optionally it can be followed by the sample size in bytes e.g. 454 + 455 + --aux-sample=8192 456 + 457 + In addition, the Intel PT event to sample must be defined e.g. 458 + 459 + -e intel_pt//u 460 + 461 + Samples on other events will be created containing Intel PT data e.g. the 462 + following will create Intel PT samples on the branch-misses event, note the 463 + events must be grouped using {}: 464 + 465 + perf record --aux-sample -e '{intel_pt//u,branch-misses:u}' 466 + 467 + An alternative to '--aux-sample' is to add the config term 'aux-sample-size' to 468 + events. In this case, the grouping is implied e.g. 469 + 470 + perf record -e intel_pt//u -e branch-misses/aux-sample-size=8192/u 471 + 472 + is the same as: 473 + 474 + perf record -e '{intel_pt//u,branch-misses/aux-sample-size=8192/u}' 475 + 476 + but allows for also using an address filter e.g.: 477 + 478 + perf record -e intel_pt//u --filter 'filter * @/bin/ls' -e branch-misses/aux-sample-size=8192/u -- ls 479 + 480 + It is important to select a sample size that is big enough to contain at least 481 + one PSB packet. If not a warning will be displayed: 482 + 483 + Intel PT sample size (%zu) may be too small for PSB period (%zu) 484 + 485 + The calculation used for that is: if sample_size <= psb_period + 256 display the 486 + warning. When sampling is used, psb_period defaults to 0 (2KiB). 487 + 488 + The default sample size is 4KiB. 489 + 490 + The sample size is passed in aux_sample_size in struct perf_event_attr. The 491 + sample size is limited by the maximum event size which is 64KiB. It is 492 + difficult to know how big the event might be without the trace sample attached, 493 + but the tool validates that the sample size is not greater than 60KiB. 494 + 495 + 496 + new snapshot option 497 + ~~~~~~~~~~~~~~~~~~~ 498 + 499 + The difference between full trace and snapshot from the kernel's perspective is 500 + that in full trace we don't overwrite trace data that the user hasn't collected 501 + yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let 502 + the trace run and overwrite older data in the buffer so that whenever something 503 + interesting happens, we can stop it and grab a snapshot of what was going on 504 + around that interesting moment. 505 + 506 + To select snapshot mode a new option has been added: 507 + 508 + -S 509 + 510 + Optionally it can be followed by the snapshot size e.g. 511 + 512 + -S0x100000 513 + 514 + The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size 515 + nor snapshot size is specified, then the default is 4MiB for privileged users 516 + (or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. 517 + If an unprivileged user does not specify mmap pages, the mmap pages will be 518 + reduced as described in the 'new auxtrace mmap size option' section below. 519 + 520 + The snapshot size is displayed if the option -vv is used e.g. 521 + 522 + Intel PT snapshot size: %zu 523 + 524 + 525 + new auxtrace mmap size option 526 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 527 + 528 + Intel PT buffer size is specified by an addition to the -m option e.g. 529 + 530 + -m,16 531 + 532 + selects a buffer size of 16 pages i.e. 64KiB. 533 + 534 + Note that the existing functionality of -m is unchanged. The auxtrace mmap size 535 + is specified by the optional addition of a comma and the value. 536 + 537 + The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users 538 + (or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. 539 + If an unprivileged user does not specify mmap pages, the mmap pages will be 540 + reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the 541 + user is likely to get an error as they exceed their mlock limit (Max locked 542 + memory as shown in /proc/self/limits). Note that perf does not count the first 543 + 512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu 544 + against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus 545 + their mlock limit (which defaults to 64KiB but is not multiplied by the number 546 + of cpus). 547 + 548 + In full-trace mode, powers of two are allowed for buffer size, with a minimum 549 + size of 2 pages. In snapshot mode or sampling mode, it is the same but the 550 + minimum size is 1 page. 551 + 552 + The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g. 553 + 554 + mmap length 528384 555 + auxtrace mmap length 4198400 556 + 557 + 558 + Intel PT modes of operation 559 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 560 + 561 + Intel PT can be used in 2 modes: 562 + full-trace mode 563 + sample mode 564 + snapshot mode 565 + 566 + Full-trace mode traces continuously e.g. 567 + 568 + perf record -e intel_pt//u uname 569 + 570 + Sample mode attaches a Intel PT sample to other events e.g. 571 + 572 + perf record --aux-sample -e intel_pt//u -e branch-misses:u 573 + 574 + Snapshot mode captures the available data when a signal is sent e.g. 575 + 576 + perf record -v -e intel_pt//u -S ./loopy 1000000000 & 577 + [1] 11435 578 + kill -USR2 11435 579 + Recording AUX area tracing snapshot 580 + 581 + Note that the signal sent is SIGUSR2. 582 + Note that "Recording AUX area tracing snapshot" is displayed because the -v 583 + option is used. 584 + 585 + The 2 modes cannot be used together. 586 + 587 + 588 + Buffer handling 589 + ~~~~~~~~~~~~~~~ 590 + 591 + There may be buffer limitations (i.e. single ToPa entry) which means that actual 592 + buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to 593 + provide other sizes, and in particular an arbitrarily large size, multiple 594 + buffers are logically concatenated. However an interrupt must be used to switch 595 + between buffers. That has two potential problems: 596 + a) the interrupt may not be handled in time so that the current buffer 597 + becomes full and some trace data is lost. 598 + b) the interrupts may slow the system and affect the performance 599 + results. 600 + 601 + If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event 602 + which the tools report as an error. 603 + 604 + In full-trace mode, the driver waits for data to be copied out before allowing 605 + the (logical) buffer to wrap-around. If data is not copied out quickly enough, 606 + again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to 607 + wait, the intel_pt event gets disabled. Because it is difficult to know when 608 + that happens, perf tools always re-enable the intel_pt event after copying out 609 + data. 610 + 611 + 612 + Intel PT and build ids 613 + ~~~~~~~~~~~~~~~~~~~~~~ 614 + 615 + By default "perf record" post-processes the event stream to find all build ids 616 + for executables for all addresses sampled. Deliberately, Intel PT is not 617 + decoded for that purpose (it would take too long). Instead the build ids for 618 + all executables encountered (due to mmap, comm or task events) are included 619 + in the perf.data file. 620 + 621 + To see buildids included in the perf.data file use the command: 622 + 623 + perf buildid-list 624 + 625 + If the perf.data file contains Intel PT data, that is the same as: 626 + 627 + perf buildid-list --with-hits 628 + 629 + 630 + Snapshot mode and event disabling 631 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 632 + 633 + In order to make a snapshot, the intel_pt event is disabled using an IOCTL, 634 + namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the 635 + collection of side-band information. In order to prevent that, a dummy 636 + software event has been introduced that permits tracking events (like mmaps) to 637 + continue to be recorded while intel_pt is disabled. That is important to ensure 638 + there is complete side-band information to allow the decoding of subsequent 639 + snapshots. 640 + 641 + A test has been created for that. To find the test: 642 + 643 + perf test list 644 + ... 645 + 23: Test using a dummy software event to keep tracking 646 + 647 + To run the test: 648 + 649 + perf test 23 650 + 23: Test using a dummy software event to keep tracking : Ok 651 + 652 + 653 + perf record modes (nothing new here) 654 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 655 + 656 + perf record essentially operates in one of three modes: 657 + per thread 658 + per cpu 659 + workload only 660 + 661 + "per thread" mode is selected by -t or by --per-thread (with -p or -u or just a 662 + workload). 663 + "per cpu" is selected by -C or -a. 664 + "workload only" mode is selected by not using the other options but providing a 665 + command to run (i.e. the workload). 666 + 667 + In per-thread mode an exact list of threads is traced. There is no inheritance. 668 + Each thread has its own event buffer. 669 + 670 + In per-cpu mode all processes (or processes from the selected cgroup i.e. -G 671 + option, or processes selected with -p or -u) are traced. Each cpu has its own 672 + buffer. Inheritance is allowed. 673 + 674 + In workload-only mode, the workload is traced but with per-cpu buffers. 675 + Inheritance is allowed. Note that you can now trace a workload in per-thread 676 + mode by using the --per-thread option. 677 + 678 + 679 + Privileged vs non-privileged users 680 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 681 + 682 + Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users 683 + have memory limits imposed upon them. That affects what buffer sizes they can 684 + have as outlined above. 685 + 686 + The v4.2 kernel introduced support for a context switch metadata event, 687 + PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes 688 + are scheduled out and in, just not by whom, which is left for the 689 + PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context, 690 + which in turn requires CAP_SYS_ADMIN. 691 + 692 + Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context 693 + switches") commit, that introduces these metadata events for further info. 694 + 695 + When working with kernels < v4.2, the following considerations must be taken, 696 + as the sched:sched_switch tracepoints will be used to receive such information: 697 + 698 + Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are 699 + not permitted to use tracepoints which means there is insufficient side-band 700 + information to decode Intel PT in per-cpu mode, and potentially workload-only 701 + mode too if the workload creates new processes. 702 + 703 + Note also, that to use tracepoints, read-access to debugfs is required. So if 704 + debugfs is not mounted or the user does not have read-access, it will again not 705 + be possible to decode Intel PT in per-cpu mode. 706 + 707 + 708 + sched_switch tracepoint 709 + ~~~~~~~~~~~~~~~~~~~~~~~ 710 + 711 + The sched_switch tracepoint is used to provide side-band data for Intel PT 712 + decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't 713 + available. 714 + 715 + The sched_switch events are automatically added. e.g. the second event shown 716 + below: 717 + 718 + $ perf record -vv -e intel_pt//u uname 719 + ------------------------------------------------------------ 720 + perf_event_attr: 721 + type 6 722 + size 112 723 + config 0x400 724 + { sample_period, sample_freq } 1 725 + sample_type IP|TID|TIME|CPU|IDENTIFIER 726 + read_format ID 727 + disabled 1 728 + inherit 1 729 + exclude_kernel 1 730 + exclude_hv 1 731 + enable_on_exec 1 732 + sample_id_all 1 733 + ------------------------------------------------------------ 734 + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 735 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 736 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 737 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 738 + ------------------------------------------------------------ 739 + perf_event_attr: 740 + type 2 741 + size 112 742 + config 0x108 743 + { sample_period, sample_freq } 1 744 + sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER 745 + read_format ID 746 + inherit 1 747 + sample_id_all 1 748 + exclude_guest 1 749 + ------------------------------------------------------------ 750 + sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 751 + sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 752 + sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8 753 + sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8 754 + ------------------------------------------------------------ 755 + perf_event_attr: 756 + type 1 757 + size 112 758 + config 0x9 759 + { sample_period, sample_freq } 1 760 + sample_type IP|TID|TIME|IDENTIFIER 761 + read_format ID 762 + disabled 1 763 + inherit 1 764 + exclude_kernel 1 765 + exclude_hv 1 766 + mmap 1 767 + comm 1 768 + enable_on_exec 1 769 + task 1 770 + sample_id_all 1 771 + mmap2 1 772 + comm_exec 1 773 + ------------------------------------------------------------ 774 + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 775 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 776 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 777 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 778 + mmap size 528384B 779 + AUX area mmap length 4194304 780 + perf event ring buffer mmapped per cpu 781 + Synthesizing auxtrace information 782 + Linux 783 + [ perf record: Woken up 1 times to write data ] 784 + [ perf record: Captured and wrote 0.042 MB perf.data ] 785 + 786 + Note, the sched_switch event is only added if the user is permitted to use it 787 + and only in per-cpu mode. 788 + 789 + Note also, the sched_switch event is only added if TSC packets are requested. 790 + That is because, in the absence of timing information, the sched_switch events 791 + cannot be matched against the Intel PT trace. 792 + 793 + 794 + perf script 795 + ----------- 796 + 797 + By default, perf script will decode trace data found in the perf.data file. 798 + This can be further controlled by new option --itrace. 799 + 800 + 801 + New --itrace option 802 + ~~~~~~~~~~~~~~~~~~~ 803 + 804 + Having no option is the same as 805 + 806 + --itrace 807 + 808 + which, in turn, is the same as 809 + 810 + --itrace=cepwx 811 + 812 + The letters are: 813 + 814 + i synthesize "instructions" events 815 + b synthesize "branches" events 816 + x synthesize "transactions" events 817 + w synthesize "ptwrite" events 818 + p synthesize "power" events 819 + c synthesize branches events (calls only) 820 + r synthesize branches events (returns only) 821 + e synthesize tracing error events 822 + d create a debug log 823 + g synthesize a call chain (use with i or x) 824 + l synthesize last branch entries (use with i or x) 825 + s skip initial number of events 826 + 827 + "Instructions" events look like they were recorded by "perf record -e 828 + instructions". 829 + 830 + "Branches" events look like they were recorded by "perf record -e branches". "c" 831 + and "r" can be combined to get calls and returns. 832 + 833 + "Transactions" events correspond to the start or end of transactions. The 834 + 'flags' field can be used in perf script to determine whether the event is a 835 + tranasaction start, commit or abort. 836 + 837 + Note that "instructions", "branches" and "transactions" events depend on code 838 + flow packets which can be disabled by using the config term "branch=0". Refer 839 + to the config terms section above. 840 + 841 + "ptwrite" events record the payload of the ptwrite instruction and whether 842 + "fup_on_ptw" was used. "ptwrite" events depend on PTWRITE packets which are 843 + recorded only if the "ptw" config term was used. Refer to the config terms 844 + section above. perf script "synth" field displays "ptwrite" information like 845 + this: "ip: 0 payload: 0x123456789abcdef0" where "ip" is 1 if "fup_on_ptw" was 846 + used. 847 + 848 + "Power" events correspond to power event packets and CBR (core-to-bus ratio) 849 + packets. While CBR packets are always recorded when tracing is enabled, power 850 + event packets are recorded only if the "pwr_evt" config term was used. Refer to 851 + the config terms section above. The power events record information about 852 + C-state changes, whereas CBR is indicative of CPU frequency. perf script 853 + "event,synth" fields display information like this: 854 + cbr: cbr: 22 freq: 2189 MHz (200%) 855 + mwait: hints: 0x60 extensions: 0x1 856 + pwre: hw: 0 cstate: 2 sub-cstate: 0 857 + exstop: ip: 1 858 + pwrx: deepest cstate: 2 last cstate: 2 wake reason: 0x4 859 + Where: 860 + "cbr" includes the frequency and the percentage of maximum non-turbo 861 + "mwait" shows mwait hints and extensions 862 + "pwre" shows C-state transitions (to a C-state deeper than C0) and 863 + whether initiated by hardware 864 + "exstop" indicates execution stopped and whether the IP was recorded 865 + exactly, 866 + "pwrx" indicates return to C0 867 + For more details refer to the Intel 64 and IA-32 Architectures Software 868 + Developer Manuals. 869 + 870 + Error events show where the decoder lost the trace. Error events 871 + are quite important. Users must know if what they are seeing is a complete 872 + picture or not. 873 + 874 + The "d" option will cause the creation of a file "intel_pt.log" containing all 875 + decoded packets and instructions. Note that this option slows down the decoder 876 + and that the resulting file may be very large. 877 + 878 + In addition, the period of the "instructions" event can be specified. e.g. 879 + 880 + --itrace=i10us 881 + 882 + sets the period to 10us i.e. one instruction sample is synthesized for each 10 883 + microseconds of trace. Alternatives to "us" are "ms" (milliseconds), 884 + "ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions). 885 + 886 + "ms", "us" and "ns" are converted to TSC ticks. 887 + 888 + The timing information included with Intel PT does not give the time of every 889 + instruction. Consequently, for the purpose of sampling, the decoder estimates 890 + the time since the last timing packet based on 1 tick per instruction. The time 891 + on the sample is *not* adjusted and reflects the last known value of TSC. 892 + 893 + For Intel PT, the default period is 100us. 894 + 895 + Setting it to a zero period means "as often as possible". 896 + 897 + In the case of Intel PT that is the same as a period of 1 and a unit of 898 + 'instructions' (i.e. --itrace=i1i). 899 + 900 + Also the call chain size (default 16, max. 1024) for instructions or 901 + transactions events can be specified. e.g. 902 + 903 + --itrace=ig32 904 + --itrace=xg32 905 + 906 + Also the number of last branch entries (default 64, max. 1024) for instructions or 907 + transactions events can be specified. e.g. 908 + 909 + --itrace=il10 910 + --itrace=xl10 911 + 912 + Note that last branch entries are cleared for each sample, so there is no overlap 913 + from one sample to the next. 914 + 915 + To disable trace decoding entirely, use the option --no-itrace. 916 + 917 + It is also possible to skip events generated (instructions, branches, transactions) 918 + at the beginning. This is useful to ignore initialization code. 919 + 920 + --itrace=i0nss1000000 921 + 922 + skips the first million instructions. 923 + 924 + dump option 925 + ~~~~~~~~~~~ 926 + 927 + perf script has an option (-D) to "dump" the events i.e. display the binary 928 + data. 929 + 930 + When -D is used, Intel PT packets are displayed. The packet decoder does not 931 + pay attention to PSB packets, but just decodes the bytes - so the packets seen 932 + by the actual decoder may not be identical in places where the data is corrupt. 933 + One example of that would be when the buffer-switching interrupt has been too 934 + slow, and the buffer has been filled completely. In that case, the last packet 935 + in the buffer might be truncated and immediately followed by a PSB as the trace 936 + continues in the next buffer. 937 + 938 + To disable the display of Intel PT packets, combine the -D option with 939 + --no-itrace. 940 + 941 + 942 + perf report 943 + ----------- 944 + 945 + By default, perf report will decode trace data found in the perf.data file. 946 + This can be further controlled by new option --itrace exactly the same as 947 + perf script, with the exception that the default is --itrace=igxe. 948 + 949 + 950 + perf inject 951 + ----------- 952 + 953 + perf inject also accepts the --itrace option in which case tracing data is 954 + removed and replaced with the synthesized events. e.g. 955 + 956 + perf inject --itrace -i perf.data -o perf.data.new 957 + 958 + Below is an example of using Intel PT with autofdo. It requires autofdo 959 + (https://github.com/google/autofdo) and gcc version 5. The bubble 960 + sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial) 961 + amended to take the number of elements as a parameter. 962 + 963 + $ gcc-5 -O3 sort.c -o sort_optimized 964 + $ ./sort_optimized 30000 965 + Bubble sorting array of 30000 elements 966 + 2254 ms 967 + 968 + $ cat ~/.perfconfig 969 + [intel-pt] 970 + mispred-all = on 971 + 972 + $ perf record -e intel_pt//u ./sort 3000 973 + Bubble sorting array of 3000 elements 974 + 58 ms 975 + [ perf record: Woken up 2 times to write data ] 976 + [ perf record: Captured and wrote 3.939 MB perf.data ] 977 + $ perf inject -i perf.data -o inj --itrace=i100usle --strip 978 + $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1 979 + $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo 980 + $ ./sort_autofdo 30000 981 + Bubble sorting array of 30000 elements 982 + 2155 ms 983 + 984 + Note there is currently no advantage to using Intel PT instead of LBR, but 985 + that may change in the future if greater use is made of the data. 986 + 987 + 988 + PEBS via Intel PT 989 + ----------------- 990 + 991 + Some hardware has the feature to redirect PEBS records to the Intel PT trace. 992 + Recording is selected by using the aux-output config term e.g. 993 + 994 + perf record -c 10000 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname 995 + 996 + Note that currently, software only supports redirecting at most one PEBS event. 997 + 998 + To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g. 999 + 1000 + perf script --itrace=oe 1001 + 1002 + 1003 + SEE ALSO 1004 + -------- 1005 + 1006 + linkperf:perf-record[1], linkperf:perf-script[1], linkperf:perf-report[1], 1007 + linkperf:perf-inject[1]

+1 -1

tools/perf/Documentation/perf-record.txt

··· 589 589 590 590 SEE ALSO 591 591 -------- 592 - linkperf:perf-stat[1], linkperf:perf-list[1] 592 + linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]

+2 -1

tools/perf/Documentation/perf-report.txt

··· 546 546 547 547 SEE ALSO 548 548 -------- 549 - linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1] 549 + linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1], 550 + linkperf:perf-intel-pt[1]

+1 -1

tools/perf/Documentation/perf-script.txt

··· 429 429 SEE ALSO 430 430 -------- 431 431 linkperf:perf-record[1], linkperf:perf-script-perl[1], 432 - linkperf:perf-script-python[1] 432 + linkperf:perf-script-python[1], linkperf:perf-intel-pt[1]

+9

tools/perf/Documentation/perf-stat.txt

··· 334 334 --all-user:: 335 335 Configure all used events to run in user space. 336 336 337 + --percore-show-thread:: 338 + The event modifier "percore" has supported to sum up the event counts 339 + for all hardware threads in a core and show the counts per core. 340 + 341 + This option with event modifier "percore" enabled also sums up the event 342 + counts for all hardware threads in a core but show the sum counts per 343 + hardware thread. This is essentially a replacement for the any bit and 344 + convenient for post processing. 345 + 337 346 EXAMPLES 338 347 -------- 339 348

+2 -19

tools/perf/builtin-diff.c

··· 572 572 bh->valid = true; 573 573 } 574 574 575 - static int block_pair_cmp(struct hist_entry *a, struct hist_entry *b) 576 - { 577 - struct block_info *bi_a = a->block_info; 578 - struct block_info *bi_b = b->block_info; 579 - int cmp; 580 - 581 - if (!bi_a->sym || !bi_b->sym) 582 - return -1; 583 - 584 - cmp = strcmp(bi_a->sym->name, bi_b->sym->name); 585 - 586 - if ((!cmp) && (bi_a->start == bi_b->start) && (bi_a->end == bi_b->end)) 587 - return 0; 588 - 589 - return -1; 590 - } 591 - 592 575 static struct hist_entry *get_block_pair(struct hist_entry *he, 593 576 struct hists *hists_pair) 594 577 { 595 578 struct rb_root_cached *root = hists_pair->entries_in; 596 579 struct rb_node *next = rb_first_cached(root); 597 - int cmp; 580 + int64_t cmp; 598 581 599 582 while (next != NULL) { 600 583 struct hist_entry *he_pair = rb_entry(next, struct hist_entry, ··· 585 602 586 603 next = rb_next(&he_pair->rb_node_in); 587 604 588 - cmp = block_pair_cmp(he_pair, he); 605 + cmp = __block_info__cmp(he_pair, he); 589 606 if (!cmp) 590 607 return he_pair; 591 608 }

+22 -8

tools/perf/builtin-report.c

··· 104 104 bool symbol_ipc; 105 105 bool total_cycles_mode; 106 106 struct block_report *block_reports; 107 + int nr_block_reports; 107 108 }; 108 109 109 110 static int report__config(const char *var, const char *value, void *cb) ··· 186 185 { 187 186 struct hist_entry *he = iter->he; 188 187 struct report *rep = arg; 189 - struct branch_info *bi; 188 + struct branch_info *bi = he->branch_info; 190 189 struct perf_sample *sample = iter->sample; 191 190 struct evsel *evsel = iter->evsel; 192 191 int err; 193 192 193 + branch_type_count(&rep->brtype_stat, &bi->flags, 194 + bi->from.addr, bi->to.addr); 195 + 194 196 if (!ui__has_annotation() && !rep->symbol_ipc) 195 197 return 0; 196 198 197 - bi = he->branch_info; 198 199 err = addr_map_symbol__inc_samples(&bi->from, sample, evsel); 199 200 if (err) 200 201 goto out; 201 202 202 203 err = addr_map_symbol__inc_samples(&bi->to, sample, evsel); 203 - 204 - branch_type_count(&rep->brtype_stat, &bi->flags, 205 - bi->from.addr, bi->to.addr); 206 204 207 205 out: 208 206 return err; ··· 966 966 report__output_resort(rep); 967 967 968 968 if (rep->total_cycles_mode) { 969 + int block_hpps[6] = { 970 + PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT, 971 + PERF_HPP_REPORT__BLOCK_LBR_CYCLES, 972 + PERF_HPP_REPORT__BLOCK_CYCLES_PCT, 973 + PERF_HPP_REPORT__BLOCK_AVG_CYCLES, 974 + PERF_HPP_REPORT__BLOCK_RANGE, 975 + PERF_HPP_REPORT__BLOCK_DSO, 976 + }; 977 + 969 978 rep->block_reports = block_info__create_report(session->evlist, 970 - rep->total_cycles); 979 + rep->total_cycles, 980 + block_hpps, 6, 981 + &rep->nr_block_reports); 971 982 if (!rep->block_reports) 972 983 return -1; 973 984 } ··· 1562 1551 zfree(&report.ptime_range); 1563 1552 } 1564 1553 1565 - if (report.block_reports) 1566 - zfree(&report.block_reports); 1554 + if (report.block_reports) { 1555 + block_info__free_report(report.block_reports, 1556 + report.nr_block_reports); 1557 + report.block_reports = NULL; 1558 + } 1567 1559 1568 1560 zstd_fini(&(session->zstd_data)); 1569 1561 perf_session__delete(session);

+37 -33

tools/perf/builtin-script.c

··· 735 735 struct perf_event_attr *attr, FILE *fp) 736 736 { 737 737 struct branch_stack *br = sample->branch_stack; 738 + struct branch_entry *entries = perf_sample__branch_entries(sample); 738 739 struct addr_location alf, alt; 739 740 u64 i, from, to; 740 741 int printed = 0; ··· 744 743 return 0; 745 744 746 745 for (i = 0; i < br->nr; i++) { 747 - from = br->entries[i].from; 748 - to = br->entries[i].to; 746 + from = entries[i].from; 747 + to = entries[i].to; 749 748 750 749 if (PRINT_FIELD(DSO)) { 751 750 memset(&alf, 0, sizeof(alf)); ··· 769 768 } 770 769 771 770 printed += fprintf(fp, "/%c/%c/%c/%d ", 772 - mispred_str( br->entries + i), 773 - br->entries[i].flags.in_tx? 'X' : '-', 774 - br->entries[i].flags.abort? 'A' : '-', 775 - br->entries[i].flags.cycles); 771 + mispred_str(entries + i), 772 + entries[i].flags.in_tx ? 'X' : '-', 773 + entries[i].flags.abort ? 'A' : '-', 774 + entries[i].flags.cycles); 776 775 } 777 776 778 777 return printed; ··· 783 782 struct perf_event_attr *attr, FILE *fp) 784 783 { 785 784 struct branch_stack *br = sample->branch_stack; 785 + struct branch_entry *entries = perf_sample__branch_entries(sample); 786 786 struct addr_location alf, alt; 787 787 u64 i, from, to; 788 788 int printed = 0; ··· 795 793 796 794 memset(&alf, 0, sizeof(alf)); 797 795 memset(&alt, 0, sizeof(alt)); 798 - from = br->entries[i].from; 799 - to = br->entries[i].to; 796 + from = entries[i].from; 797 + to = entries[i].to; 800 798 801 799 thread__find_symbol_fb(thread, sample->cpumode, from, &alf); 802 800 thread__find_symbol_fb(thread, sample->cpumode, to, &alt); ··· 815 813 printed += fprintf(fp, ")"); 816 814 } 817 815 printed += fprintf(fp, "/%c/%c/%c/%d ", 818 - mispred_str( br->entries + i), 819 - br->entries[i].flags.in_tx? 'X' : '-', 820 - br->entries[i].flags.abort? 'A' : '-', 821 - br->entries[i].flags.cycles); 816 + mispred_str(entries + i), 817 + entries[i].flags.in_tx ? 'X' : '-', 818 + entries[i].flags.abort ? 'A' : '-', 819 + entries[i].flags.cycles); 822 820 } 823 821 824 822 return printed; ··· 829 827 struct perf_event_attr *attr, FILE *fp) 830 828 { 831 829 struct branch_stack *br = sample->branch_stack; 830 + struct branch_entry *entries = perf_sample__branch_entries(sample); 832 831 struct addr_location alf, alt; 833 832 u64 i, from, to; 834 833 int printed = 0; ··· 841 838 842 839 memset(&alf, 0, sizeof(alf)); 843 840 memset(&alt, 0, sizeof(alt)); 844 - from = br->entries[i].from; 845 - to = br->entries[i].to; 841 + from = entries[i].from; 842 + to = entries[i].to; 846 843 847 844 if (thread__find_map_fb(thread, sample->cpumode, from, &alf) && 848 845 !alf.map->dso->adjust_symbols) ··· 865 862 printed += fprintf(fp, ")"); 866 863 } 867 864 printed += fprintf(fp, "/%c/%c/%c/%d ", 868 - mispred_str(br->entries + i), 869 - br->entries[i].flags.in_tx ? 'X' : '-', 870 - br->entries[i].flags.abort ? 'A' : '-', 871 - br->entries[i].flags.cycles); 865 + mispred_str(entries + i), 866 + entries[i].flags.in_tx ? 'X' : '-', 867 + entries[i].flags.abort ? 'A' : '-', 868 + entries[i].flags.cycles); 872 869 } 873 870 874 871 return printed; ··· 1056 1053 struct machine *machine, FILE *fp) 1057 1054 { 1058 1055 struct branch_stack *br = sample->branch_stack; 1056 + struct branch_entry *entries = perf_sample__branch_entries(sample); 1059 1057 u64 start, end; 1060 1058 int i, insn, len, nr, ilen, printed = 0; 1061 1059 struct perf_insn x; ··· 1077 1073 printed += fprintf(fp, "%c", '\n'); 1078 1074 1079 1075 /* Handle first from jump, of which we don't know the entry. */ 1080 - len = grab_bb(buffer, br->entries[nr-1].from, 1081 - br->entries[nr-1].from, 1076 + len = grab_bb(buffer, entries[nr-1].from, 1077 + entries[nr-1].from, 1082 1078 machine, thread, &x.is64bit, &x.cpumode, false); 1083 1079 if (len > 0) { 1084 - printed += ip__fprintf_sym(br->entries[nr - 1].from, thread, 1080 + printed += ip__fprintf_sym(entries[nr - 1].from, thread, 1085 1081 x.cpumode, x.cpu, &lastsym, attr, fp); 1086 - printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1], 1082 + printed += ip__fprintf_jump(entries[nr - 1].from, &entries[nr - 1], 1087 1083 &x, buffer, len, 0, fp, &total_cycles); 1088 1084 if (PRINT_FIELD(SRCCODE)) 1089 - printed += print_srccode(thread, x.cpumode, br->entries[nr - 1].from); 1085 + printed += print_srccode(thread, x.cpumode, entries[nr - 1].from); 1090 1086 } 1091 1087 1092 1088 /* Print all blocks */ 1093 1089 for (i = nr - 2; i >= 0; i--) { 1094 - if (br->entries[i].from || br->entries[i].to) 1090 + if (entries[i].from || entries[i].to) 1095 1091 pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i, 1096 - br->entries[i].from, 1097 - br->entries[i].to); 1098 - start = br->entries[i + 1].to; 1099 - end = br->entries[i].from; 1092 + entries[i].from, 1093 + entries[i].to); 1094 + start = entries[i + 1].to; 1095 + end = entries[i].from; 1100 1096 1101 1097 len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); 1102 1098 /* Patch up missing kernel transfers due to ring filters */ 1103 1099 if (len == -ENXIO && i > 0) { 1104 - end = br->entries[--i].from; 1100 + end = entries[--i].from; 1105 1101 pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end); 1106 1102 len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); 1107 1103 } ··· 1114 1110 1115 1111 printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp); 1116 1112 if (ip == end) { 1117 - printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, ++insn, fp, 1113 + printed += ip__fprintf_jump(ip, &entries[i], &x, buffer + off, len - off, ++insn, fp, 1118 1114 &total_cycles); 1119 1115 if (PRINT_FIELD(SRCCODE)) 1120 1116 printed += print_srccode(thread, x.cpumode, ip); ··· 1138 1134 * Hit the branch? In this case we are already done, and the target 1139 1135 * has not been executed yet. 1140 1136 */ 1141 - if (br->entries[0].from == sample->ip) 1137 + if (entries[0].from == sample->ip) 1142 1138 goto out; 1143 - if (br->entries[0].flags.abort) 1139 + if (entries[0].flags.abort) 1144 1140 goto out; 1145 1141 1146 1142 /* ··· 1151 1147 * between final branch and sample. When this happens just 1152 1148 * continue walking after the last TO until we hit a branch. 1153 1149 */ 1154 - start = br->entries[0].to; 1150 + start = entries[0].to; 1155 1151 end = sample->ip; 1156 1152 if (end < start) { 1157 1153 /* Missing jump. Scan 128 bytes for the next branch */

+4

tools/perf/builtin-stat.c

··· 929 929 OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user, 930 930 "Configure all used events to run in user space.", 931 931 PARSE_OPT_EXCLUSIVE), 932 + OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread, 933 + "Use with 'percore' event qualifier to show the event " 934 + "counts of one hardware thread by sum up total hardware " 935 + "threads of same physical core"), 932 936 OPT_END() 933 937 }; 934 938

+4 -4

tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json

··· 4 4 "EventCode": "80", 5 5 "EventName": "ECC_FUNCTION_COUNT", 6 6 "BriefDescription": "ECC Function Count", 7 - "PublicDescription": "Long ECC function Count" 7 + "PublicDescription": "This counter counts the total number of the elliptic-curve cryptography (ECC) functions issued by the CPU." 8 8 }, 9 9 { 10 10 "Unit": "CPU-M-CF", 11 11 "EventCode": "81", 12 12 "EventName": "ECC_CYCLES_COUNT", 13 13 "BriefDescription": "ECC Cycles Count", 14 - "PublicDescription": "Long ECC Function cycles count" 14 + "PublicDescription": "This counter counts the total number of CPU cycles when the ECC coprocessor is busy performing the elliptic-curve cryptography (ECC) functions issued by the CPU." 15 15 }, 16 16 { 17 17 "Unit": "CPU-M-CF", 18 18 "EventCode": "82", 19 19 "EventName": "ECC_BLOCKED_FUNCTION_COUNT", 20 20 "BriefDescription": "Ecc Blocked Function Count", 21 - "PublicDescription": "Long ECC blocked function count" 21 + "PublicDescription": "This counter counts the total number of the elliptic-curve cryptography (ECC) functions that are issued by the CPU and are blocked because the ECC coprocessor is busy performing a function issued by another CPU." 22 22 }, 23 23 { 24 24 "Unit": "CPU-M-CF", 25 25 "EventCode": "83", 26 26 "EventName": "ECC_BLOCKED_CYCLES_COUNT", 27 27 "BriefDescription": "ECC Blocked Cycles Count", 28 - "PublicDescription": "Long ECC blocked cycles count" 28 + "PublicDescription": "This counter counts the total number of CPU cycles blocked for the elliptic-curve cryptography (ECC) functions issued by the CPU because the ECC coprocessor is busy performing a function issued by another CPU." 29 29 }, 30 30 ]

+29 -1

tools/perf/pmu-events/arch/s390/cf_z15/extended.json

··· 25 25 "EventCode": "131", 26 26 "EventName": "DTLB2_HPAGE_WRITES", 27 27 "BriefDescription": "DTLB2 One-Megabyte Page Writes", 28 - "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page or a Last Host Translation was done" 28 + "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page" 29 29 }, 30 30 { 31 31 "Unit": "CPU-M-CF", ··· 355 355 "EventName": "TX_C_TABORT_SPECIAL", 356 356 "BriefDescription": "Aborted transactions in constrained TX mode using special completion logic", 357 357 "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete" 358 + }, 359 + { 360 + "Unit": "CPU-M-CF", 361 + "EventCode": "247", 362 + "EventName": "DFLT_ACCESS", 363 + "BriefDescription": "Cycles CPU spent obtaining access to Deflate unit", 364 + "PublicDescription": "Cycles CPU spent obtaining access to Deflate unit" 365 + }, 366 + { 367 + "Unit": "CPU-M-CF", 368 + "EventCode": "252", 369 + "EventName": "DFLT_CYCLES", 370 + "BriefDescription": "Cycles CPU is using Deflate unit", 371 + "PublicDescription": "Cycles CPU is using Deflate unit" 372 + }, 373 + { 374 + "Unit": "CPU-M-CF", 375 + "EventCode": "264", 376 + "EventName": "DFLT_CC", 377 + "BriefDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed", 378 + "PublicDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed" 379 + }, 380 + { 381 + "Unit": "CPU-M-CF", 382 + "EventCode": "265", 383 + "EventName": "DFLT_CCERROR", 384 + "BriefDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2", 385 + "PublicDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2" 358 386 }, 359 387 { 360 388 "Unit": "CPU-M-CF",

+2 -1

tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json

··· 215 215 "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", 216 216 "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", 217 217 "MetricGroup": "TLB", 218 - "MetricName": "Page_Walks_Utilization" 218 + "MetricName": "Page_Walks_Utilization", 219 + "MetricConstraint": "NO_NMI_WATCHDOG" 219 220 }, 220 221 { 221 222 "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",

+2 -1

tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json

··· 215 215 "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", 216 216 "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", 217 217 "MetricGroup": "TLB", 218 - "MetricName": "Page_Walks_Utilization" 218 + "MetricName": "Page_Walks_Utilization", 219 + "MetricConstraint": "NO_NMI_WATCHDOG" 219 220 }, 220 221 { 221 222 "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",

+2 -1

tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json

··· 215 215 "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", 216 216 "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", 217 217 "MetricGroup": "TLB", 218 - "MetricName": "Page_Walks_Utilization" 218 + "MetricName": "Page_Walks_Utilization", 219 + "MetricConstraint": "NO_NMI_WATCHDOG" 219 220 }, 220 221 { 221 222 "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",

+13 -6

tools/perf/pmu-events/jevents.c

··· 323 323 char *pmu, char *unit, char *perpkg, 324 324 char *metric_expr, 325 325 char *metric_name, char *metric_group, 326 - char *deprecated) 326 + char *deprecated, char *metric_constraint) 327 327 { 328 328 struct perf_entry_data *pd = data; 329 329 FILE *outfp = pd->outfp; ··· 357 357 fprintf(outfp, "\t.metric_group = \"%s\",\n", metric_group); 358 358 if (deprecated) 359 359 fprintf(outfp, "\t.deprecated = \"%s\",\n", deprecated); 360 + if (metric_constraint) 361 + fprintf(outfp, "\t.metric_constraint = \"%s\",\n", metric_constraint); 360 362 fprintf(outfp, "},\n"); 361 363 362 364 return 0; ··· 377 375 char *metric_name; 378 376 char *metric_group; 379 377 char *deprecated; 378 + char *metric_constraint; 380 379 }; 381 380 382 381 #define ADD_EVENT_FIELD(field) do { if (field) { \ ··· 425 422 char *desc, char *long_desc, char *pmu, 426 423 char *unit, char *perpkg, char *metric_expr, 427 424 char *metric_name, char *metric_group, 428 - char *deprecated) 425 + char *deprecated, char *metric_constraint) 429 426 { 430 427 struct event_struct *es; 431 428 ··· 489 486 char **name, char **long_desc, char **pmu, char **filter, 490 487 char **perpkg, char **unit, char **metric_expr, char **metric_name, 491 488 char **metric_group, unsigned long long eventcode, 492 - char **deprecated) 489 + char **deprecated, char **metric_constraint) 493 490 { 494 491 /* try to find matching event from arch standard values */ 495 492 struct event_struct *es; ··· 518 515 char *pmu, char *unit, char *perpkg, 519 516 char *metric_expr, 520 517 char *metric_name, char *metric_group, 521 - char *deprecated), 518 + char *deprecated, char *metric_constraint), 522 519 void *data) 523 520 { 524 521 int err; ··· 548 545 char *metric_name = NULL; 549 546 char *metric_group = NULL; 550 547 char *deprecated = NULL; 548 + char *metric_constraint = NULL; 551 549 char *arch_std = NULL; 552 550 unsigned long long eventcode = 0; 553 551 struct msrmap *msr = NULL; ··· 633 629 addfield(map, &metric_name, "", "", val); 634 630 } else if (json_streq(map, field, "MetricGroup")) { 635 631 addfield(map, &metric_group, "", "", val); 632 + } else if (json_streq(map, field, "MetricConstraint")) { 633 + addfield(map, &metric_constraint, "", "", val); 636 634 } else if (json_streq(map, field, "MetricExpr")) { 637 635 addfield(map, &metric_expr, "", "", val); 638 636 for (s = metric_expr; *s; s++) ··· 676 670 &long_desc, &pmu, &filter, &perpkg, 677 671 &unit, &metric_expr, &metric_name, 678 672 &metric_group, eventcode, 679 - &deprecated); 673 + &deprecated, &metric_constraint); 680 674 if (err) 681 675 goto free_strings; 682 676 } 683 677 err = func(data, name, real_event(name, event), desc, long_desc, 684 678 pmu, unit, perpkg, metric_expr, metric_name, 685 - metric_group, deprecated); 679 + metric_group, deprecated, metric_constraint); 686 680 free_strings: 687 681 free(event); 688 682 free(desc); ··· 697 691 free(metric_expr); 698 692 free(metric_name); 699 693 free(metric_group); 694 + free(metric_constraint); 700 695 free(arch_std); 701 696 702 697 if (err)

+1 -1

tools/perf/pmu-events/jevents.h

··· 8 8 char *pmu, 9 9 char *unit, char *perpkg, char *metric_expr, 10 10 char *metric_name, char *metric_group, 11 - char *deprecated), 11 + char *deprecated, char *metric_constraint), 12 12 void *data); 13 13 char *get_cpu_str(void); 14 14

+1

tools/perf/pmu-events/pmu-events.h

··· 18 18 const char *metric_name; 19 19 const char *metric_group; 20 20 const char *deprecated; 21 + const char *metric_constraint; 21 22 }; 22 23 23 24 /*

+3 -3

tools/perf/scripts/perl/check-perf-trace.pl

··· 28 28 sub irq::softirq_entry 29 29 { 30 30 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 31 - $common_pid, $common_comm, 31 + $common_pid, $common_comm, $common_callchain, 32 32 $vec) = @_; 33 33 34 34 print_header($event_name, $common_cpu, $common_secs, $common_nsecs, ··· 43 43 sub kmem::kmalloc 44 44 { 45 45 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 46 - $common_pid, $common_comm, 46 + $common_pid, $common_comm, $common_callchain, 47 47 $call_site, $ptr, $bytes_req, $bytes_alloc, 48 48 $gfp_flags) = @_; 49 49 ··· 92 92 sub trace_unhandled 93 93 { 94 94 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 95 - $common_pid, $common_comm) = @_; 95 + $common_pid, $common_comm, $common_callchain) = @_; 96 96 97 97 $unhandled{$event_name}++; 98 98 }

+1 -1

tools/perf/scripts/perl/failed-syscalls.pl

··· 18 18 sub raw_syscalls::sys_exit 19 19 { 20 20 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 21 - $common_pid, $common_comm, 21 + $common_pid, $common_comm, $common_callchain, 22 22 $id, $ret) = @_; 23 23 24 24 if ($ret < 0) {

+3 -3

tools/perf/scripts/perl/rw-by-file.pl

··· 28 28 sub syscalls::sys_enter_read 29 29 { 30 30 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 31 - $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_; 31 + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; 32 32 33 33 if ($common_comm eq $for_comm) { 34 34 $reads{$fd}{bytes_requested} += $count; ··· 39 39 sub syscalls::sys_enter_write 40 40 { 41 41 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 42 - $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_; 42 + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; 43 43 44 44 if ($common_comm eq $for_comm) { 45 45 $writes{$fd}{bytes_written} += $count; ··· 98 98 sub trace_unhandled 99 99 { 100 100 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 101 - $common_pid, $common_comm) = @_; 101 + $common_pid, $common_comm, $common_callchain) = @_; 102 102 103 103 $unhandled{$event_name}++; 104 104 }

+5 -5

tools/perf/scripts/perl/rw-by-pid.pl

··· 24 24 sub syscalls::sys_exit_read 25 25 { 26 26 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 27 - $common_pid, $common_comm, 27 + $common_pid, $common_comm, $common_callchain, 28 28 $nr, $ret) = @_; 29 29 30 30 if ($ret > 0) { ··· 40 40 sub syscalls::sys_enter_read 41 41 { 42 42 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 43 - $common_pid, $common_comm, 43 + $common_pid, $common_comm, $common_callchain, 44 44 $nr, $fd, $buf, $count) = @_; 45 45 46 46 $reads{$common_pid}{bytes_requested} += $count; ··· 51 51 sub syscalls::sys_exit_write 52 52 { 53 53 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 54 - $common_pid, $common_comm, 54 + $common_pid, $common_comm, $common_callchain, 55 55 $nr, $ret) = @_; 56 56 57 57 if ($ret <= 0) { ··· 62 62 sub syscalls::sys_enter_write 63 63 { 64 64 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 65 - $common_pid, $common_comm, 65 + $common_pid, $common_comm, $common_callchain, 66 66 $nr, $fd, $buf, $count) = @_; 67 67 68 68 $writes{$common_pid}{bytes_written} += $count; ··· 178 178 sub trace_unhandled 179 179 { 180 180 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 181 - $common_pid, $common_comm) = @_; 181 + $common_pid, $common_comm, $common_callchain) = @_; 182 182 183 183 $unhandled{$event_name}++; 184 184 }

+5 -5

tools/perf/scripts/perl/rwtop.pl

··· 35 35 sub syscalls::sys_exit_read 36 36 { 37 37 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 38 - $common_pid, $common_comm, 38 + $common_pid, $common_comm, $common_callchain, 39 39 $nr, $ret) = @_; 40 40 41 41 print_check(); ··· 53 53 sub syscalls::sys_enter_read 54 54 { 55 55 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 56 - $common_pid, $common_comm, 56 + $common_pid, $common_comm, $common_callchain, 57 57 $nr, $fd, $buf, $count) = @_; 58 58 59 59 print_check(); ··· 66 66 sub syscalls::sys_exit_write 67 67 { 68 68 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 69 - $common_pid, $common_comm, 69 + $common_pid, $common_comm, $common_callchain, 70 70 $nr, $ret) = @_; 71 71 72 72 print_check(); ··· 79 79 sub syscalls::sys_enter_write 80 80 { 81 81 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 82 - $common_pid, $common_comm, 82 + $common_pid, $common_comm, $common_callchain, 83 83 $nr, $fd, $buf, $count) = @_; 84 84 85 85 print_check(); ··· 197 197 sub trace_unhandled 198 198 { 199 199 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 200 - $common_pid, $common_comm) = @_; 200 + $common_pid, $common_comm, $common_callchain) = @_; 201 201 202 202 $unhandled{$event_name}++; 203 203 }

+3 -3

tools/perf/scripts/perl/wakeup-latency.pl

··· 28 28 sub sched::sched_switch 29 29 { 30 30 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 31 - $common_pid, $common_comm, 31 + $common_pid, $common_comm, $common_callchain, 32 32 $prev_comm, $prev_pid, $prev_prio, $prev_state, $next_comm, $next_pid, 33 33 $next_prio) = @_; 34 34 ··· 51 51 sub sched::sched_wakeup 52 52 { 53 53 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 54 - $common_pid, $common_comm, 54 + $common_pid, $common_comm, $common_callchain, 55 55 $comm, $pid, $prio, $success, $target_cpu) = @_; 56 56 57 57 $last_wakeup{$target_cpu}{ts} = nsecs($common_secs, $common_nsecs); ··· 101 101 sub trace_unhandled 102 102 { 103 103 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, 104 - $common_pid, $common_comm) = @_; 104 + $common_pid, $common_comm, $common_callchain) = @_; 105 105 106 106 $unhandled{$event_name}++; 107 107 }

+4 -1

tools/perf/tests/builtin-test.c

··· 543 543 return -1; 544 544 545 545 dir = opendir(st.dir); 546 - if (!dir) 546 + if (!dir) { 547 + pr_err("failed to open shell test directory: %s\n", 548 + st.dir); 547 549 return -1; 550 + } 548 551 549 552 for_each_shell_test(dir, st.dir, ent) { 550 553 int curr = i++;

+5 -5

tools/perf/tests/expr.c

··· 10 10 { 11 11 double val; 12 12 13 - if (expr__parse(&val, ctx, &e)) 13 + if (expr__parse(&val, ctx, e)) 14 14 TEST_ASSERT_VAL("parse test failed", 0); 15 15 TEST_ASSERT_VAL("unexpected value", val == val2); 16 16 return 0; ··· 44 44 return ret; 45 45 46 46 p = "FOO/0"; 47 - ret = expr__parse(&val, &ctx, &p); 48 - TEST_ASSERT_VAL("division by zero", ret == 1); 47 + ret = expr__parse(&val, &ctx, p); 48 + TEST_ASSERT_VAL("division by zero", ret == -1); 49 49 50 50 p = "BAR/"; 51 - ret = expr__parse(&val, &ctx, &p); 52 - TEST_ASSERT_VAL("missing operand", ret == 1); 51 + ret = expr__parse(&val, &ctx, p); 52 + TEST_ASSERT_VAL("missing operand", ret == -1); 53 53 54 54 TEST_ASSERT_VAL("find other", 55 55 expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0);

+6 -1

tools/perf/tests/sample-parsing.c

··· 99 99 100 100 if (type & PERF_SAMPLE_BRANCH_STACK) { 101 101 COMP(branch_stack->nr); 102 + COMP(branch_stack->hw_idx); 102 103 for (i = 0; i < s1->branch_stack->nr; i++) 103 104 MCOMP(branch_stack->entries[i]); 104 105 } ··· 187 186 u64 data[64]; 188 187 } branch_stack = { 189 188 /* 1 branch_entry */ 190 - .data = {1, 211, 212, 213}, 189 + .data = {1, -1ULL, 211, 212, 213}, 191 190 }; 192 191 u64 regs[64]; 193 192 const u64 raw_data[] = {0x123456780a0b0c0dULL, 0x1102030405060708ULL}; ··· 209 208 .transaction = 112, 210 209 .raw_data = (void *)raw_data, 211 210 .callchain = &callchain.callchain, 211 + .no_hw_idx = false, 212 212 .branch_stack = &branch_stack.branch_stack, 213 213 .user_regs = { 214 214 .abi = PERF_SAMPLE_REGS_ABI_64, ··· 245 243 246 244 if (sample_type & PERF_SAMPLE_REGS_INTR) 247 245 evsel.core.attr.sample_regs_intr = sample_regs; 246 + 247 + if (sample_type & PERF_SAMPLE_BRANCH_STACK) 248 + evsel.core.attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; 248 249 249 250 for (i = 0; i < sizeof(regs); i++) 250 251 *(i + (u8 *)regs) = i & 0xfe;

+10 -1

tools/perf/util/Build

··· 121 121 perf-y += vsprintf.o 122 122 perf-y += units.o 123 123 perf-y += time-utils.o 124 + perf-y += expr-flex.o 124 125 perf-y += expr-bison.o 126 + perf-y += expr.o 125 127 perf-y += branch.o 126 128 perf-y += mem2node.o 127 129 ··· 191 189 $(call rule_mkdir) 192 190 $(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_ 193 191 192 + $(OUTPUT)util/expr-flex.c: util/expr.l $(OUTPUT)util/expr-bison.c 193 + $(call rule_mkdir) 194 + $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) util/expr.l 195 + 194 196 $(OUTPUT)util/expr-bison.c: util/expr.y 195 197 $(call rule_mkdir) 196 - $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr__ 198 + $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr_ 197 199 198 200 $(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c 199 201 $(call rule_mkdir) ··· 209 203 210 204 CFLAGS_parse-events-flex.o += -w 211 205 CFLAGS_pmu-flex.o += -w 206 + CFLAGS_expr-flex.o += -w 212 207 CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w 213 208 CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w 214 209 CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w 215 210 216 211 $(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c 217 212 $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c 213 + $(OUTPUT)util/expr.o: $(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-bison.c 218 214 219 215 CFLAGS_bitmap.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" 220 216 CFLAGS_find_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" ··· 224 216 CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" 225 217 CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" 226 218 CFLAGS_parse-events.o += -Wno-redundant-decls 219 + CFLAGS_expr.o += -Wno-redundant-decls 227 220 CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE 228 221 229 222 $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE

-2

tools/perf/util/annotate.c

··· 2611 2611 2612 2612 if (++al->jump_sources > notes->max_jump_sources) 2613 2613 notes->max_jump_sources = al->jump_sources; 2614 - 2615 - ++notes->nr_jumps; 2616 2614 } 2617 2615 } 2618 2616

-1

tools/perf/util/annotate.h

··· 279 279 struct annotation_options *options; 280 280 struct annotation_line **offsets; 281 281 int nr_events; 282 - int nr_jumps; 283 282 int max_jump_sources; 284 283 int nr_entries; 285 284 int nr_asm_entries;

+63 -43

tools/perf/util/block-info.c

··· 65 65 return bi; 66 66 } 67 67 68 - int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, 69 - struct hist_entry *left, struct hist_entry *right) 68 + int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right) 70 69 { 71 70 struct block_info *bi_l = left->block_info; 72 71 struct block_info *bi_r = right->block_info; ··· 73 74 74 75 if (!bi_l->sym || !bi_r->sym) { 75 76 if (!bi_l->sym && !bi_r->sym) 76 - return 0; 77 + return -1; 77 78 else if (!bi_l->sym) 78 79 return -1; 79 80 else 80 81 return 1; 81 82 } 82 83 83 - if (bi_l->sym == bi_r->sym) { 84 - if (bi_l->start == bi_r->start) { 85 - if (bi_l->end == bi_r->end) 86 - return 0; 87 - else 88 - return (int64_t)(bi_r->end - bi_l->end); 89 - } else 90 - return (int64_t)(bi_r->start - bi_l->start); 91 - } else { 92 - cmp = strcmp(bi_l->sym->name, bi_r->sym->name); 84 + cmp = strcmp(bi_l->sym->name, bi_r->sym->name); 85 + if (cmp) 93 86 return cmp; 94 - } 95 87 96 - if (bi_l->sym->start != bi_r->sym->start) 97 - return (int64_t)(bi_r->sym->start - bi_l->sym->start); 88 + if (bi_l->start != bi_r->start) 89 + return (int64_t)(bi_r->start - bi_l->start); 98 90 99 - return (int64_t)(bi_r->sym->end - bi_l->sym->end); 91 + return (int64_t)(bi_r->end - bi_l->end); 92 + } 93 + 94 + int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, 95 + struct hist_entry *left, struct hist_entry *right) 96 + { 97 + return __block_info__cmp(left, right); 100 98 } 101 99 102 100 static void init_block_info(struct block_info *bi, struct symbol *sym, ··· 181 185 return block_fmt->width; 182 186 } 183 187 188 + static int color_pct(struct perf_hpp *hpp, int width, double pct) 189 + { 190 + #ifdef HAVE_SLANG_SUPPORT 191 + if (use_browser) { 192 + return __hpp__slsmg_color_printf(hpp, "%*.2f%%", 193 + width - 1, pct); 194 + } 195 + #endif 196 + return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, pct); 197 + } 198 + 184 199 static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt, 185 200 struct perf_hpp *hpp, 186 201 struct hist_entry *he) ··· 199 192 struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt); 200 193 struct block_info *bi = he->block_info; 201 194 double ratio = 0.0; 202 - char buf[16]; 203 195 204 196 if (block_fmt->total_cycles) 205 197 ratio = (double)bi->cycles / (double)block_fmt->total_cycles; 206 198 207 - sprintf(buf, "%.2f%%", 100.0 * ratio); 208 - 209 - return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf); 199 + return color_pct(hpp, block_fmt->width, 100.0 * ratio); 210 200 } 211 201 212 202 static int64_t block_total_cycles_pct_sort(struct perf_hpp_fmt *fmt, ··· 256 252 struct block_info *bi = he->block_info; 257 253 double ratio = 0.0; 258 254 u64 avg; 259 - char buf[16]; 260 255 261 256 if (block_fmt->block_cycles && bi->num_aggr) { 262 257 avg = bi->cycles_aggr / bi->num_aggr; 263 258 ratio = (double)avg / (double)block_fmt->block_cycles; 264 259 } 265 260 266 - sprintf(buf, "%.2f%%", 100.0 * ratio); 267 - 268 - return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf); 261 + return color_pct(hpp, block_fmt->width, 100.0 * ratio); 269 262 } 270 263 271 264 static int block_avg_cycles_entry(struct perf_hpp_fmt *fmt, ··· 350 349 351 350 switch (idx) { 352 351 case PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT: 353 - fmt->entry = block_total_cycles_pct_entry; 352 + fmt->color = block_total_cycles_pct_entry; 354 353 fmt->cmp = block_info__cmp; 355 354 fmt->sort = block_total_cycles_pct_sort; 356 355 break; ··· 358 357 fmt->entry = block_cycles_lbr_entry; 359 358 break; 360 359 case PERF_HPP_REPORT__BLOCK_CYCLES_PCT: 361 - fmt->entry = block_cycles_pct_entry; 360 + fmt->color = block_cycles_pct_entry; 362 361 break; 363 362 case PERF_HPP_REPORT__BLOCK_AVG_CYCLES: 364 363 fmt->entry = block_avg_cycles_entry; ··· 378 377 } 379 378 380 379 static void register_block_columns(struct perf_hpp_list *hpp_list, 381 - struct block_fmt *block_fmts) 380 + struct block_fmt *block_fmts, 381 + int *block_hpps, int nr_hpps) 382 382 { 383 - for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++) 384 - hpp_register(&block_fmts[i], i, hpp_list); 383 + for (int i = 0; i < nr_hpps; i++) 384 + hpp_register(&block_fmts[i], block_hpps[i], hpp_list); 385 385 } 386 386 387 - static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts) 387 + static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts, 388 + int *block_hpps, int nr_hpps) 388 389 { 389 390 __hists__init(&bh->block_hists, &bh->block_list); 390 391 perf_hpp_list__init(&bh->block_list); 391 392 bh->block_list.nr_header_lines = 1; 392 393 393 - register_block_columns(&bh->block_list, block_fmts); 394 + register_block_columns(&bh->block_list, block_fmts, 395 + block_hpps, nr_hpps); 394 396 395 - perf_hpp_list__register_sort_field(&bh->block_list, 396 - &block_fmts[PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT].fmt); 397 + /* Sort by the first fmt */ 398 + perf_hpp_list__register_sort_field(&bh->block_list, &block_fmts[0].fmt); 397 399 } 398 400 399 - static void process_block_report(struct hists *hists, 400 - struct block_report *block_report, 401 - u64 total_cycles) 401 + static int process_block_report(struct hists *hists, 402 + struct block_report *block_report, 403 + u64 total_cycles, int *block_hpps, 404 + int nr_hpps) 402 405 { 403 406 struct rb_node *next = rb_first_cached(&hists->entries); 404 407 struct block_hist *bh = &block_report->hist; 405 408 struct hist_entry *he; 406 409 407 - init_block_hist(bh, block_report->fmts); 410 + if (nr_hpps > PERF_HPP_REPORT__BLOCK_MAX_INDEX) 411 + return -1; 412 + 413 + block_report->nr_fmts = nr_hpps; 414 + init_block_hist(bh, block_report->fmts, block_hpps, nr_hpps); 408 415 409 416 while (next) { 410 417 he = rb_entry(next, struct hist_entry, rb_node); ··· 421 412 next = rb_next(&he->rb_node); 422 413 } 423 414 424 - for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++) { 415 + for (int i = 0; i < nr_hpps; i++) { 425 416 block_report->fmts[i].total_cycles = total_cycles; 426 417 block_report->fmts[i].block_cycles = block_report->cycles; 427 418 } 428 419 429 420 hists__output_resort(&bh->block_hists, NULL); 421 + return 0; 430 422 } 431 423 432 424 struct block_report *block_info__create_report(struct evlist *evlist, 433 - u64 total_cycles) 425 + u64 total_cycles, 426 + int *block_hpps, int nr_hpps, 427 + int *nr_reps) 434 428 { 435 429 struct block_report *block_reports; 436 430 int nr_hists = evlist->core.nr_entries, i = 0; ··· 446 434 evlist__for_each_entry(evlist, pos) { 447 435 struct hists *hists = evsel__hists(pos); 448 436 449 - process_block_report(hists, &block_reports[i], total_cycles); 437 + process_block_report(hists, &block_reports[i], total_cycles, 438 + block_hpps, nr_hpps); 450 439 i++; 451 440 } 452 441 442 + *nr_reps = nr_hists; 453 443 return block_reports; 444 + } 445 + 446 + void block_info__free_report(struct block_report *reps, int nr_reps) 447 + { 448 + for (int i = 0; i < nr_reps; i++) 449 + hists__delete_entries(&reps[i].hist.block_hists); 450 + 451 + free(reps); 454 452 } 455 453 456 454 int report__browse_block_hists(struct block_hist *bh, float min_percent, ··· 474 452 symbol_conf.report_individual_block = true; 475 453 hists__fprintf(&bh->block_hists, true, 0, 0, min_percent, 476 454 stdout, true); 477 - hists__delete_entries(&bh->block_hists); 478 455 return 0; 479 456 case 1: 480 457 symbol_conf.report_individual_block = true; 481 458 ret = block_hists_tui_browse(bh, evsel, min_percent, 482 459 env, annotation_opts); 483 - hists__delete_entries(&bh->block_hists); 484 460 return ret; 485 461 default: 486 462 return -1;

+8 -1

tools/perf/util/block-info.h

··· 45 45 struct block_hist hist; 46 46 u64 cycles; 47 47 struct block_fmt fmts[PERF_HPP_REPORT__BLOCK_MAX_INDEX]; 48 + int nr_fmts; 48 49 }; 49 50 50 51 struct block_hist; ··· 62 61 63 62 #define block_info__zput(bi) __block_info__zput(&bi) 64 63 64 + int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right); 65 + 65 66 int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, 66 67 struct hist_entry *left, struct hist_entry *right); 67 68 ··· 71 68 u64 *block_cycles_aggr, u64 total_cycles); 72 69 73 70 struct block_report *block_info__create_report(struct evlist *evlist, 74 - u64 total_cycles); 71 + u64 total_cycles, 72 + int *block_hpps, int nr_hpps, 73 + int *nr_reps); 74 + 75 + void block_info__free_report(struct block_report *reps, int nr_reps); 75 76 76 77 int report__browse_block_hists(struct block_hist *bh, float min_percent, 77 78 struct evsel *evsel, struct perf_env *env,

+22

tools/perf/util/branch.h

··· 12 12 #include <linux/stddef.h> 13 13 #include <linux/perf_event.h> 14 14 #include <linux/types.h> 15 + #include "event.h" 15 16 16 17 struct branch_flags { 17 18 u64 mispred:1; ··· 40 39 41 40 struct branch_stack { 42 41 u64 nr; 42 + u64 hw_idx; 43 43 struct branch_entry entries[0]; 44 44 }; 45 + 46 + /* 47 + * The hw_idx is only available when PERF_SAMPLE_BRANCH_HW_INDEX is applied. 48 + * Otherwise, the output format of a sample with branch stack is 49 + * struct branch_stack { 50 + * u64 nr; 51 + * struct branch_entry entries[0]; 52 + * } 53 + * Check whether the hw_idx is available, 54 + * and return the corresponding pointer of entries[0]. 55 + */ 56 + static inline struct branch_entry *perf_sample__branch_entries(struct perf_sample *sample) 57 + { 58 + u64 *entry = (u64 *)sample->branch_stack; 59 + 60 + entry++; 61 + if (sample->no_hw_idx) 62 + return (struct branch_entry *)entry; 63 + return (struct branch_entry *)(++entry); 64 + } 45 65 46 66 struct branch_type_stat { 47 67 bool branch_to;

+2 -61

tools/perf/util/cgroup.c

··· 3 3 #include "evsel.h" 4 4 #include "cgroup.h" 5 5 #include "evlist.h" 6 - #include <linux/stringify.h> 7 6 #include <linux/zalloc.h> 8 7 #include <sys/types.h> 9 8 #include <sys/stat.h> 10 9 #include <fcntl.h> 11 10 #include <stdlib.h> 12 11 #include <string.h> 12 + #include <api/fs/fs.h> 13 13 14 14 int nr_cgroups; 15 - 16 - static int 17 - cgroupfs_find_mountpoint(char *buf, size_t maxlen) 18 - { 19 - FILE *fp; 20 - char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1]; 21 - char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path; 22 - char *token, *saved_ptr = NULL; 23 - 24 - fp = fopen("/proc/mounts", "r"); 25 - if (!fp) 26 - return -1; 27 - 28 - /* 29 - * in order to handle split hierarchy, we need to scan /proc/mounts 30 - * and inspect every cgroupfs mount point to find one that has 31 - * perf_event subsystem 32 - */ 33 - path_v1[0] = '\0'; 34 - path_v2[0] = '\0'; 35 - 36 - while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %" 37 - __stringify(PATH_MAX)"s %*d %*d\n", 38 - mountpoint, type, tokens) == 3) { 39 - 40 - if (!path_v1[0] && !strcmp(type, "cgroup")) { 41 - 42 - token = strtok_r(tokens, ",", &saved_ptr); 43 - 44 - while (token != NULL) { 45 - if (!strcmp(token, "perf_event")) { 46 - strcpy(path_v1, mountpoint); 47 - break; 48 - } 49 - token = strtok_r(NULL, ",", &saved_ptr); 50 - } 51 - } 52 - 53 - if (!path_v2[0] && !strcmp(type, "cgroup2")) 54 - strcpy(path_v2, mountpoint); 55 - 56 - if (path_v1[0] && path_v2[0]) 57 - break; 58 - } 59 - fclose(fp); 60 - 61 - if (path_v1[0]) 62 - path = path_v1; 63 - else if (path_v2[0]) 64 - path = path_v2; 65 - else 66 - return -1; 67 - 68 - if (strlen(path) < maxlen) { 69 - strcpy(buf, path); 70 - return 0; 71 - } 72 - return -1; 73 - } 74 15 75 16 static int open_cgroup(const char *name) 76 17 { ··· 20 79 int fd; 21 80 22 81 23 - if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1)) 82 + if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1, "perf_event")) 24 83 return -1; 25 84 26 85 scnprintf(path, PATH_MAX, "%s/%s", mnt, name);

+113 -46

tools/perf/util/cs-etm.c

··· 363 363 return NULL; 364 364 } 365 365 366 + static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, 367 + struct cs_etm_traceid_queue *tidq) 368 + { 369 + struct cs_etm_packet *tmp; 370 + 371 + if (etm->sample_branches || etm->synth_opts.last_branch || 372 + etm->sample_instructions) { 373 + /* 374 + * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 375 + * the next incoming packet. 376 + */ 377 + tmp = tidq->packet; 378 + tidq->packet = tidq->prev_packet; 379 + tidq->prev_packet = tmp; 380 + } 381 + } 382 + 366 383 static void cs_etm__packet_dump(const char *pkt_string) 367 384 { 368 385 const char *color = PERF_COLOR_BLUE; ··· 962 945 if (packet->isa == CS_ETM_ISA_T32) { 963 946 u64 addr = packet->start_addr; 964 947 965 - while (offset > 0) { 948 + while (offset) { 966 949 addr += cs_etm__t32_instr_size(etmq, 967 950 trace_chan_id, addr); 968 951 offset--; ··· 1151 1134 1152 1135 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample); 1153 1136 1154 - if (etm->synth_opts.last_branch) { 1155 - cs_etm__copy_last_branch_rb(etmq, tidq); 1137 + if (etm->synth_opts.last_branch) 1156 1138 sample.branch_stack = tidq->last_branch; 1157 - } 1158 1139 1159 1140 if (etm->synth_opts.inject) { 1160 1141 ret = cs_etm__inject_event(event, &sample, ··· 1167 1152 pr_err( 1168 1153 "CS ETM Trace: failed to deliver instruction event, error %d\n", 1169 1154 ret); 1170 - 1171 - if (etm->synth_opts.last_branch) 1172 - cs_etm__reset_last_branch_rb(tidq); 1173 1155 1174 1156 return ret; 1175 1157 } ··· 1184 1172 union perf_event *event = tidq->event_buf; 1185 1173 struct dummy_branch_stack { 1186 1174 u64 nr; 1175 + u64 hw_idx; 1187 1176 struct branch_entry entries; 1188 1177 } dummy_bs; 1189 1178 u64 ip; ··· 1215 1202 if (etm->synth_opts.last_branch) { 1216 1203 dummy_bs = (struct dummy_branch_stack){ 1217 1204 .nr = 1, 1205 + .hw_idx = -1ULL, 1218 1206 .entries = { 1219 1207 .from = sample.ip, 1220 1208 .to = sample.addr, ··· 1354 1340 struct cs_etm_traceid_queue *tidq) 1355 1341 { 1356 1342 struct cs_etm_auxtrace *etm = etmq->etm; 1357 - struct cs_etm_packet *tmp; 1358 1343 int ret; 1359 1344 u8 trace_chan_id = tidq->trace_chan_id; 1360 - u64 instrs_executed = tidq->packet->instr_count; 1345 + u64 instrs_prev; 1361 1346 1362 - tidq->period_instructions += instrs_executed; 1347 + /* Get instructions remainder from previous packet */ 1348 + instrs_prev = tidq->period_instructions; 1349 + 1350 + tidq->period_instructions += tidq->packet->instr_count; 1363 1351 1364 1352 /* 1365 1353 * Record a branch when the last instruction in ··· 1379 1363 * TODO: allow period to be defined in cycles and clock time 1380 1364 */ 1381 1365 1382 - /* Get number of instructions executed after the sample point */ 1383 - u64 instrs_over = tidq->period_instructions - 1384 - etm->instructions_sample_period; 1366 + /* 1367 + * Below diagram demonstrates the instruction samples 1368 + * generation flows: 1369 + * 1370 + * Instrs Instrs Instrs Instrs 1371 + * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3) 1372 + * | | | | 1373 + * V V V V 1374 + * -------------------------------------------------- 1375 + * ^ ^ 1376 + * | | 1377 + * Period Period 1378 + * instructions(Pi) instructions(Pi') 1379 + * 1380 + * | | 1381 + * \---------------- -----------------/ 1382 + * V 1383 + * tidq->packet->instr_count 1384 + * 1385 + * Instrs Sample(n...) are the synthesised samples occurring 1386 + * every etm->instructions_sample_period instructions - as 1387 + * defined on the perf command line. Sample(n) is being the 1388 + * last sample before the current etm packet, n+1 to n+3 1389 + * samples are generated from the current etm packet. 1390 + * 1391 + * tidq->packet->instr_count represents the number of 1392 + * instructions in the current etm packet. 1393 + * 1394 + * Period instructions (Pi) contains the the number of 1395 + * instructions executed after the sample point(n) from the 1396 + * previous etm packet. This will always be less than 1397 + * etm->instructions_sample_period. 1398 + * 1399 + * When generate new samples, it combines with two parts 1400 + * instructions, one is the tail of the old packet and another 1401 + * is the head of the new coming packet, to generate 1402 + * sample(n+1); sample(n+2) and sample(n+3) consume the 1403 + * instructions with sample period. After sample(n+3), the rest 1404 + * instructions will be used by later packet and it is assigned 1405 + * to tidq->period_instructions for next round calculation. 1406 + */ 1385 1407 1386 1408 /* 1387 - * Calculate the address of the sampled instruction (-1 as 1388 - * sample is reported as though instruction has just been 1389 - * executed, but PC has not advanced to next instruction) 1409 + * Get the initial offset into the current packet instructions; 1410 + * entry conditions ensure that instrs_prev is less than 1411 + * etm->instructions_sample_period. 1390 1412 */ 1391 - u64 offset = (instrs_executed - instrs_over - 1); 1392 - u64 addr = cs_etm__instr_addr(etmq, trace_chan_id, 1393 - tidq->packet, offset); 1413 + u64 offset = etm->instructions_sample_period - instrs_prev; 1414 + u64 addr; 1394 1415 1395 - ret = cs_etm__synth_instruction_sample( 1396 - etmq, tidq, addr, etm->instructions_sample_period); 1397 - if (ret) 1398 - return ret; 1416 + /* Prepare last branches for instruction sample */ 1417 + if (etm->synth_opts.last_branch) 1418 + cs_etm__copy_last_branch_rb(etmq, tidq); 1399 1419 1400 - /* Carry remaining instructions into next sample period */ 1401 - tidq->period_instructions = instrs_over; 1420 + while (tidq->period_instructions >= 1421 + etm->instructions_sample_period) { 1422 + /* 1423 + * Calculate the address of the sampled instruction (-1 1424 + * as sample is reported as though instruction has just 1425 + * been executed, but PC has not advanced to next 1426 + * instruction) 1427 + */ 1428 + addr = cs_etm__instr_addr(etmq, trace_chan_id, 1429 + tidq->packet, offset - 1); 1430 + ret = cs_etm__synth_instruction_sample( 1431 + etmq, tidq, addr, 1432 + etm->instructions_sample_period); 1433 + if (ret) 1434 + return ret; 1435 + 1436 + offset += etm->instructions_sample_period; 1437 + tidq->period_instructions -= 1438 + etm->instructions_sample_period; 1439 + } 1402 1440 } 1403 1441 1404 1442 if (etm->sample_branches) { ··· 1474 1404 } 1475 1405 } 1476 1406 1477 - if (etm->sample_branches || etm->synth_opts.last_branch) { 1478 - /* 1479 - * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 1480 - * the next incoming packet. 1481 - */ 1482 - tmp = tidq->packet; 1483 - tidq->packet = tidq->prev_packet; 1484 - tidq->prev_packet = tmp; 1485 - } 1407 + cs_etm__packet_swap(etm, tidq); 1486 1408 1487 1409 return 0; 1488 1410 } ··· 1503 1441 { 1504 1442 int err = 0; 1505 1443 struct cs_etm_auxtrace *etm = etmq->etm; 1506 - struct cs_etm_packet *tmp; 1507 1444 1508 1445 /* Handle start tracing packet */ 1509 1446 if (tidq->prev_packet->sample_type == CS_ETM_EMPTY) ··· 1510 1449 1511 1450 if (etmq->etm->synth_opts.last_branch && 1512 1451 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1452 + u64 addr; 1453 + 1454 + /* Prepare last branches for instruction sample */ 1455 + cs_etm__copy_last_branch_rb(etmq, tidq); 1456 + 1513 1457 /* 1514 1458 * Generate a last branch event for the branches left in the 1515 1459 * circular buffer at the end of the trace. ··· 1522 1456 * Use the address of the end of the last reported execution 1523 1457 * range 1524 1458 */ 1525 - u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); 1459 + addr = cs_etm__last_executed_instr(tidq->prev_packet); 1526 1460 1527 1461 err = cs_etm__synth_instruction_sample( 1528 1462 etmq, tidq, addr, ··· 1542 1476 } 1543 1477 1544 1478 swap_packet: 1545 - if (etm->sample_branches || etm->synth_opts.last_branch) { 1546 - /* 1547 - * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 1548 - * the next incoming packet. 1549 - */ 1550 - tmp = tidq->packet; 1551 - tidq->packet = tidq->prev_packet; 1552 - tidq->prev_packet = tmp; 1553 - } 1479 + cs_etm__packet_swap(etm, tidq); 1480 + 1481 + /* Reset last branches after flush the trace */ 1482 + if (etm->synth_opts.last_branch) 1483 + cs_etm__reset_last_branch_rb(tidq); 1554 1484 1555 1485 return err; 1556 1486 } ··· 1567 1505 */ 1568 1506 if (etmq->etm->synth_opts.last_branch && 1569 1507 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1508 + u64 addr; 1509 + 1510 + /* Prepare last branches for instruction sample */ 1511 + cs_etm__copy_last_branch_rb(etmq, tidq); 1512 + 1570 1513 /* 1571 1514 * Use the address of the end of the last reported execution 1572 1515 * range. 1573 1516 */ 1574 - u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); 1517 + addr = cs_etm__last_executed_instr(tidq->prev_packet); 1575 1518 1576 1519 err = cs_etm__synth_instruction_sample( 1577 1520 etmq, tidq, addr,

+1

tools/perf/util/event.h

··· 139 139 u16 insn_len; 140 140 u8 cpumode; 141 141 u16 misc; 142 + bool no_hw_idx; /* No hw_idx collected in branch_stack */ 142 143 char insn[MAX_INSN]; 143 144 void *raw_data; 144 145 struct ip_callchain *callchain;

+17 -3

tools/perf/util/evsel.c

··· 712 712 attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | 713 713 PERF_SAMPLE_BRANCH_CALL_STACK | 714 714 PERF_SAMPLE_BRANCH_NO_CYCLES | 715 - PERF_SAMPLE_BRANCH_NO_FLAGS; 715 + PERF_SAMPLE_BRANCH_NO_FLAGS | 716 + PERF_SAMPLE_BRANCH_HW_INDEX; 716 717 } 717 718 } else 718 719 pr_warning("Cannot use LBR callstack with branch stack. " ··· 764 763 if (param->record_mode == CALLCHAIN_LBR) { 765 764 perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); 766 765 attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER | 767 - PERF_SAMPLE_BRANCH_CALL_STACK); 766 + PERF_SAMPLE_BRANCH_CALL_STACK | 767 + PERF_SAMPLE_BRANCH_HW_INDEX); 768 768 } 769 769 if (param->record_mode == CALLCHAIN_DWARF) { 770 770 perf_evsel__reset_sample_bit(evsel, REGS_USER); ··· 1675 1673 evsel->core.attr.ksymbol = 0; 1676 1674 if (perf_missing_features.bpf) 1677 1675 evsel->core.attr.bpf_event = 0; 1676 + if (perf_missing_features.branch_hw_idx) 1677 + evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX; 1678 1678 retry_sample_id: 1679 1679 if (perf_missing_features.sample_id_all) 1680 1680 evsel->core.attr.sample_id_all = 0; ··· 1788 1784 * Must probe features in the order they were added to the 1789 1785 * perf_event_attr interface. 1790 1786 */ 1791 - if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { 1787 + if (!perf_missing_features.branch_hw_idx && 1788 + (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) { 1789 + perf_missing_features.branch_hw_idx = true; 1790 + pr_debug2("switching off branch HW index support\n"); 1791 + goto fallback_missing_features; 1792 + } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { 1792 1793 perf_missing_features.aux_output = true; 1793 1794 pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n"); 1794 1795 goto out_close; ··· 2178 2169 2179 2170 if (data->branch_stack->nr > max_branch_nr) 2180 2171 return -EFAULT; 2172 + 2181 2173 sz = data->branch_stack->nr * sizeof(struct branch_entry); 2174 + if (perf_evsel__has_branch_hw_idx(evsel)) 2175 + sz += sizeof(u64); 2176 + else 2177 + data->no_hw_idx = true; 2182 2178 OVERFLOW_CHECK(array, sz, max_size); 2183 2179 array = (void *)array + sz; 2184 2180 }

+6

tools/perf/util/evsel.h

··· 119 119 bool ksymbol; 120 120 bool bpf; 121 121 bool aux_output; 122 + bool branch_hw_idx; 122 123 }; 123 124 124 125 extern struct perf_missing_features perf_missing_features; ··· 388 387 static inline bool perf_evsel__has_branch_callstack(const struct evsel *evsel) 389 388 { 390 389 return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; 390 + } 391 + 392 + static inline bool perf_evsel__has_branch_hw_idx(const struct evsel *evsel) 393 + { 394 + return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; 391 395 } 392 396 393 397 static inline bool evsel__has_callchain(const struct evsel *evsel)

+112

tools/perf/util/expr.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <stdbool.h> 3 + #include <assert.h> 4 + #include "expr.h" 5 + #include "expr-bison.h" 6 + #define YY_EXTRA_TYPE int 7 + #include "expr-flex.h" 8 + 9 + #ifdef PARSER_DEBUG 10 + extern int expr_debug; 11 + #endif 12 + 13 + /* Caller must make sure id is allocated */ 14 + void expr__add_id(struct parse_ctx *ctx, const char *name, double val) 15 + { 16 + int idx; 17 + 18 + assert(ctx->num_ids < MAX_PARSE_ID); 19 + idx = ctx->num_ids++; 20 + ctx->ids[idx].name = name; 21 + ctx->ids[idx].val = val; 22 + } 23 + 24 + void expr__ctx_init(struct parse_ctx *ctx) 25 + { 26 + ctx->num_ids = 0; 27 + } 28 + 29 + static int 30 + __expr__parse(double *val, struct parse_ctx *ctx, const char *expr, 31 + int start) 32 + { 33 + YY_BUFFER_STATE buffer; 34 + void *scanner; 35 + int ret; 36 + 37 + ret = expr_lex_init_extra(start, &scanner); 38 + if (ret) 39 + return ret; 40 + 41 + buffer = expr__scan_string(expr, scanner); 42 + 43 + #ifdef PARSER_DEBUG 44 + expr_debug = 1; 45 + #endif 46 + 47 + ret = expr_parse(val, ctx, scanner); 48 + 49 + expr__flush_buffer(buffer, scanner); 50 + expr__delete_buffer(buffer, scanner); 51 + expr_lex_destroy(scanner); 52 + return ret; 53 + } 54 + 55 + int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr) 56 + { 57 + return __expr__parse(final_val, ctx, expr, EXPR_PARSE) ? -1 : 0; 58 + } 59 + 60 + static bool 61 + already_seen(const char *val, const char *one, const char **other, 62 + int num_other) 63 + { 64 + int i; 65 + 66 + if (one && !strcasecmp(one, val)) 67 + return true; 68 + for (i = 0; i < num_other; i++) 69 + if (!strcasecmp(other[i], val)) 70 + return true; 71 + return false; 72 + } 73 + 74 + int expr__find_other(const char *expr, const char *one, const char ***other, 75 + int *num_other) 76 + { 77 + int err, i = 0, j = 0; 78 + struct parse_ctx ctx; 79 + 80 + expr__ctx_init(&ctx); 81 + err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER); 82 + if (err) 83 + return -1; 84 + 85 + *other = malloc((ctx.num_ids + 1) * sizeof(char *)); 86 + if (!*other) 87 + return -ENOMEM; 88 + 89 + for (i = 0, j = 0; i < ctx.num_ids; i++) { 90 + const char *str = ctx.ids[i].name; 91 + 92 + if (already_seen(str, one, *other, j)) 93 + continue; 94 + 95 + str = strdup(str); 96 + if (!str) 97 + goto out; 98 + (*other)[j++] = str; 99 + } 100 + (*other)[j] = NULL; 101 + 102 + out: 103 + if (i != ctx.num_ids) { 104 + while (--j) 105 + free((char *) (*other)[i]); 106 + free(*other); 107 + err = -1; 108 + } 109 + 110 + *num_other = j; 111 + return err; 112 + }

+3 -5

tools/perf/util/expr.h

··· 2 2 #ifndef PARSE_CTX_H 3 3 #define PARSE_CTX_H 1 4 4 5 - #define EXPR_MAX_OTHER 15 5 + #define EXPR_MAX_OTHER 20 6 6 #define MAX_PARSE_ID EXPR_MAX_OTHER 7 7 8 8 struct parse_id { ··· 17 17 18 18 void expr__ctx_init(struct parse_ctx *ctx); 19 19 void expr__add_id(struct parse_ctx *ctx, const char *id, double val); 20 - #ifndef IN_EXPR_Y 21 - int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp); 22 - #endif 23 - int expr__find_other(const char *p, const char *one, const char ***other, 20 + int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr); 21 + int expr__find_other(const char *expr, const char *one, const char ***other, 24 22 int *num_other); 25 23 26 24 #endif

+114

tools/perf/util/expr.l

··· 1 + %option prefix="expr_" 2 + %option reentrant 3 + %option bison-bridge 4 + 5 + %{ 6 + #include <linux/compiler.h> 7 + #include "expr.h" 8 + #include "expr-bison.h" 9 + 10 + char *expr_get_text(yyscan_t yyscanner); 11 + YYSTYPE *expr_get_lval(yyscan_t yyscanner); 12 + 13 + static int __value(YYSTYPE *yylval, char *str, int base, int token) 14 + { 15 + u64 num; 16 + 17 + errno = 0; 18 + num = strtoull(str, NULL, base); 19 + if (errno) 20 + return EXPR_ERROR; 21 + 22 + yylval->num = num; 23 + return token; 24 + } 25 + 26 + static int value(yyscan_t scanner, int base) 27 + { 28 + YYSTYPE *yylval = expr_get_lval(scanner); 29 + char *text = expr_get_text(scanner); 30 + 31 + return __value(yylval, text, base, NUMBER); 32 + } 33 + 34 + /* 35 + * Allow @ instead of / to be able to specify pmu/event/ without 36 + * conflicts with normal division. 37 + */ 38 + static char *normalize(char *str) 39 + { 40 + char *ret = str; 41 + char *dst = str; 42 + 43 + while (*str) { 44 + if (*str == '@') 45 + *dst++ = '/'; 46 + else if (*str == '\\') 47 + *dst++ = *++str; 48 + else 49 + *dst++ = *str; 50 + str++; 51 + } 52 + 53 + *dst = 0x0; 54 + return ret; 55 + } 56 + 57 + static int str(yyscan_t scanner, int token) 58 + { 59 + YYSTYPE *yylval = expr_get_lval(scanner); 60 + char *text = expr_get_text(scanner); 61 + 62 + yylval->str = normalize(strdup(text)); 63 + if (!yylval->str) 64 + return EXPR_ERROR; 65 + 66 + yylval->str = normalize(yylval->str); 67 + return token; 68 + } 69 + %} 70 + 71 + number [0-9]+ 72 + 73 + sch [-,=] 74 + spec \\{sch} 75 + sym [0-9a-zA-Z_\.:@]+ 76 + symbol {spec}*{sym}*{spec}*{sym}* 77 + 78 + %% 79 + { 80 + int start_token; 81 + 82 + start_token = expr_get_extra(yyscanner); 83 + 84 + if (start_token) { 85 + expr_set_extra(NULL, yyscanner); 86 + return start_token; 87 + } 88 + } 89 + 90 + max { return MAX; } 91 + min { return MIN; } 92 + if { return IF; } 93 + else { return ELSE; } 94 + #smt_on { return SMT_ON; } 95 + {number} { return value(yyscanner, 10); } 96 + {symbol} { return str(yyscanner, ID); } 97 + "|" { return '|'; } 98 + "^" { return '^'; } 99 + "&" { return '&'; } 100 + "-" { return '-'; } 101 + "+" { return '+'; } 102 + "*" { return '*'; } 103 + "/" { return '/'; } 104 + "%" { return '%'; } 105 + "(" { return '('; } 106 + ")" { return ')'; } 107 + "," { return ','; } 108 + . { } 109 + %% 110 + 111 + int expr_wrap(void *scanner __maybe_unused) 112 + { 113 + return 1; 114 + }

+31 -154

tools/perf/util/expr.y

··· 1 1 /* Simple expression parser */ 2 2 %{ 3 + #define YYDEBUG 1 4 + #include <stdio.h> 3 5 #include "util.h" 4 6 #include "util/debug.h" 5 7 #include <stdlib.h> // strtod() 6 8 #define IN_EXPR_Y 1 7 9 #include "expr.h" 8 10 #include "smt.h" 9 - #include <assert.h> 10 11 #include <string.h> 11 12 12 - #define MAXIDLEN 256 13 13 %} 14 14 15 15 %define api.pure full 16 16 17 17 %parse-param { double *final_val } 18 18 %parse-param { struct parse_ctx *ctx } 19 - %parse-param { const char **pp } 20 - %lex-param { const char **pp } 19 + %parse-param {void *scanner} 20 + %lex-param {void* scanner} 21 21 22 22 %union { 23 - double num; 24 - char id[MAXIDLEN+1]; 23 + double num; 24 + char *str; 25 25 } 26 26 27 + %token EXPR_PARSE EXPR_OTHER EXPR_ERROR 27 28 %token <num> NUMBER 28 - %token <id> ID 29 + %token <str> ID 29 30 %token MIN MAX IF ELSE SMT_ON 30 31 %left MIN MAX IF 31 32 %left '|' ··· 38 37 %type <num> expr if_expr 39 38 40 39 %{ 41 - static int expr__lex(YYSTYPE *res, const char **pp); 42 - 43 - static void expr__error(double *final_val __maybe_unused, 40 + static void expr_error(double *final_val __maybe_unused, 44 41 struct parse_ctx *ctx __maybe_unused, 45 - const char **pp __maybe_unused, 42 + void *scanner, 46 43 const char *s) 47 44 { 48 45 pr_debug("%s\n", s); ··· 61 62 62 63 %} 63 64 %% 65 + 66 + start: 67 + EXPR_PARSE all_expr 68 + | 69 + EXPR_OTHER all_other 70 + 71 + all_other: all_other other 72 + | 73 + 74 + other: ID 75 + { 76 + if (ctx->num_ids + 1 >= EXPR_MAX_OTHER) { 77 + pr_err("failed: way too many variables"); 78 + YYABORT; 79 + } 80 + 81 + ctx->ids[ctx->num_ids++].name = $1; 82 + } 83 + | 84 + MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' 85 + 64 86 65 87 all_expr: if_expr { *final_val = $1; } 66 88 ; ··· 113 93 ; 114 94 115 95 %% 116 - 117 - static int expr__symbol(YYSTYPE *res, const char *p, const char **pp) 118 - { 119 - char *dst = res->id; 120 - const char *s = p; 121 - 122 - if (*p == '#') 123 - *dst++ = *p++; 124 - 125 - while (isalnum(*p) || *p == '_' || *p == '.' || *p == ':' || *p == '@' || *p == '\\') { 126 - if (p - s >= MAXIDLEN) 127 - return -1; 128 - /* 129 - * Allow @ instead of / to be able to specify pmu/event/ without 130 - * conflicts with normal division. 131 - */ 132 - if (*p == '@') 133 - *dst++ = '/'; 134 - else if (*p == '\\') 135 - *dst++ = *++p; 136 - else 137 - *dst++ = *p; 138 - p++; 139 - } 140 - *dst = 0; 141 - *pp = p; 142 - dst = res->id; 143 - switch (dst[0]) { 144 - case 'm': 145 - if (!strcmp(dst, "min")) 146 - return MIN; 147 - if (!strcmp(dst, "max")) 148 - return MAX; 149 - break; 150 - case 'i': 151 - if (!strcmp(dst, "if")) 152 - return IF; 153 - break; 154 - case 'e': 155 - if (!strcmp(dst, "else")) 156 - return ELSE; 157 - break; 158 - case '#': 159 - if (!strcasecmp(dst, "#smt_on")) 160 - return SMT_ON; 161 - break; 162 - } 163 - return ID; 164 - } 165 - 166 - static int expr__lex(YYSTYPE *res, const char **pp) 167 - { 168 - int tok; 169 - const char *s; 170 - const char *p = *pp; 171 - 172 - while (isspace(*p)) 173 - p++; 174 - s = p; 175 - switch (*p++) { 176 - case '#': 177 - case 'a' ... 'z': 178 - case 'A' ... 'Z': 179 - return expr__symbol(res, p - 1, pp); 180 - case '0' ... '9': case '.': 181 - res->num = strtod(s, (char **)&p); 182 - tok = NUMBER; 183 - break; 184 - default: 185 - tok = *s; 186 - break; 187 - } 188 - *pp = p; 189 - return tok; 190 - } 191 - 192 - /* Caller must make sure id is allocated */ 193 - void expr__add_id(struct parse_ctx *ctx, const char *name, double val) 194 - { 195 - int idx; 196 - assert(ctx->num_ids < MAX_PARSE_ID); 197 - idx = ctx->num_ids++; 198 - ctx->ids[idx].name = name; 199 - ctx->ids[idx].val = val; 200 - } 201 - 202 - void expr__ctx_init(struct parse_ctx *ctx) 203 - { 204 - ctx->num_ids = 0; 205 - } 206 - 207 - static bool already_seen(const char *val, const char *one, const char **other, 208 - int num_other) 209 - { 210 - int i; 211 - 212 - if (one && !strcasecmp(one, val)) 213 - return true; 214 - for (i = 0; i < num_other; i++) 215 - if (!strcasecmp(other[i], val)) 216 - return true; 217 - return false; 218 - } 219 - 220 - int expr__find_other(const char *p, const char *one, const char ***other, 221 - int *num_otherp) 222 - { 223 - const char *orig = p; 224 - int err = -1; 225 - int num_other; 226 - 227 - *other = malloc((EXPR_MAX_OTHER + 1) * sizeof(char *)); 228 - if (!*other) 229 - return -1; 230 - 231 - num_other = 0; 232 - for (;;) { 233 - YYSTYPE val; 234 - int tok = expr__lex(&val, &p); 235 - if (tok == 0) { 236 - err = 0; 237 - break; 238 - } 239 - if (tok == ID && !already_seen(val.id, one, *other, num_other)) { 240 - if (num_other >= EXPR_MAX_OTHER - 1) { 241 - pr_debug("Too many extra events in %s\n", orig); 242 - break; 243 - } 244 - (*other)[num_other] = strdup(val.id); 245 - if (!(*other)[num_other]) 246 - return -1; 247 - num_other++; 248 - } 249 - } 250 - (*other)[num_other] = NULL; 251 - *num_otherp = num_other; 252 - if (err) { 253 - *num_otherp = 0; 254 - free(*other); 255 - *other = NULL; 256 - } 257 - return err; 258 - }

+37

tools/perf/util/header.c

··· 1590 1590 free(events); 1591 1591 } 1592 1592 1593 + static bool perf_attr_check(struct perf_event_attr *attr) 1594 + { 1595 + if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) { 1596 + pr_warning("Reserved bits are set unexpectedly. " 1597 + "Please update perf tool.\n"); 1598 + return false; 1599 + } 1600 + 1601 + if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) { 1602 + pr_warning("Unknown sample type (0x%llx) is detected. " 1603 + "Please update perf tool.\n", 1604 + attr->sample_type); 1605 + return false; 1606 + } 1607 + 1608 + if (attr->read_format & ~(PERF_FORMAT_MAX-1)) { 1609 + pr_warning("Unknown read format (0x%llx) is detected. " 1610 + "Please update perf tool.\n", 1611 + attr->read_format); 1612 + return false; 1613 + } 1614 + 1615 + if ((attr->sample_type & PERF_SAMPLE_BRANCH_STACK) && 1616 + (attr->branch_sample_type & ~(PERF_SAMPLE_BRANCH_MAX-1))) { 1617 + pr_warning("Unknown branch sample type (0x%llx) is detected. " 1618 + "Please update perf tool.\n", 1619 + attr->branch_sample_type); 1620 + 1621 + return false; 1622 + } 1623 + 1624 + return true; 1625 + } 1626 + 1593 1627 static struct evsel *read_event_desc(struct feat_fd *ff) 1594 1628 { 1595 1629 struct evsel *evsel, *events = NULL; ··· 1667 1633 perf_event__attr_swap(buf); 1668 1634 1669 1635 memcpy(&evsel->core.attr, buf, msz); 1636 + 1637 + if (!perf_attr_check(&evsel->core.attr)) 1638 + goto error; 1670 1639 1671 1640 if (do_read_u32(ff, &nr)) 1672 1641 goto error;

+2 -1

tools/perf/util/hist.c

··· 2584 2584 u64 *total_cycles) 2585 2585 { 2586 2586 struct branch_info *bi; 2587 + struct branch_entry *entries = perf_sample__branch_entries(sample); 2587 2588 2588 2589 /* If we have branch cycles always annotate them. */ 2589 - if (bs && bs->nr && bs->entries[0].flags.cycles) { 2590 + if (bs && bs->nr && entries[0].flags.cycles) { 2590 2591 int i; 2591 2592 2592 2593 bi = sample__resolve_bstack(sample, al);

+2

tools/perf/util/intel-pt.c

··· 1295 1295 struct perf_sample sample = { .ip = 0, }; 1296 1296 struct dummy_branch_stack { 1297 1297 u64 nr; 1298 + u64 hw_idx; 1298 1299 struct branch_entry entries; 1299 1300 } dummy_bs; 1300 1301 ··· 1317 1316 if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) { 1318 1317 dummy_bs = (struct dummy_branch_stack){ 1319 1318 .nr = 1, 1319 + .hw_idx = -1ULL, 1320 1320 .entries = { 1321 1321 .from = sample.ip, 1322 1322 .to = sample.addr,

+2

tools/perf/util/llvm-utils.c

··· 265 265 return -ENOMEM; 266 266 return 0; 267 267 } 268 + pr_debug("%s: Couldn't find \"%s\", missing kernel-devel package?.\n", 269 + __func__, autoconf_path); 268 270 free(autoconf_path); 269 271 return -ENOENT; 270 272 }

+18 -17

tools/perf/util/machine.c

··· 2081 2081 { 2082 2082 unsigned int i; 2083 2083 const struct branch_stack *bs = sample->branch_stack; 2084 + struct branch_entry *entries = perf_sample__branch_entries(sample); 2084 2085 struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info)); 2085 2086 2086 2087 if (!bi) 2087 2088 return NULL; 2088 2089 2089 2090 for (i = 0; i < bs->nr; i++) { 2090 - ip__resolve_ams(al->thread, &bi[i].to, bs->entries[i].to); 2091 - ip__resolve_ams(al->thread, &bi[i].from, bs->entries[i].from); 2092 - bi[i].flags = bs->entries[i].flags; 2091 + ip__resolve_ams(al->thread, &bi[i].to, entries[i].to); 2092 + ip__resolve_ams(al->thread, &bi[i].from, entries[i].from); 2093 + bi[i].flags = entries[i].flags; 2093 2094 } 2094 2095 return bi; 2095 2096 } ··· 2186 2185 /* LBR only affects the user callchain */ 2187 2186 if (i != chain_nr) { 2188 2187 struct branch_stack *lbr_stack = sample->branch_stack; 2188 + struct branch_entry *entries = perf_sample__branch_entries(sample); 2189 2189 int lbr_nr = lbr_stack->nr, j, k; 2190 2190 bool branch; 2191 2191 struct branch_flags *flags; ··· 2212 2210 ip = chain->ips[j]; 2213 2211 else if (j > i + 1) { 2214 2212 k = j - i - 2; 2215 - ip = lbr_stack->entries[k].from; 2213 + ip = entries[k].from; 2216 2214 branch = true; 2217 - flags = &lbr_stack->entries[k].flags; 2215 + flags = &entries[k].flags; 2218 2216 } else { 2219 - ip = lbr_stack->entries[0].to; 2217 + ip = entries[0].to; 2220 2218 branch = true; 2221 - flags = &lbr_stack->entries[0].flags; 2222 - branch_from = 2223 - lbr_stack->entries[0].from; 2219 + flags = &entries[0].flags; 2220 + branch_from = entries[0].from; 2224 2221 } 2225 2222 } else { 2226 2223 if (j < lbr_nr) { 2227 2224 k = lbr_nr - j - 1; 2228 - ip = lbr_stack->entries[k].from; 2225 + ip = entries[k].from; 2229 2226 branch = true; 2230 - flags = &lbr_stack->entries[k].flags; 2227 + flags = &entries[k].flags; 2231 2228 } 2232 2229 else if (j > lbr_nr) 2233 2230 ip = chain->ips[i + 1 - (j - lbr_nr)]; 2234 2231 else { 2235 - ip = lbr_stack->entries[0].to; 2232 + ip = entries[0].to; 2236 2233 branch = true; 2237 - flags = &lbr_stack->entries[0].flags; 2238 - branch_from = 2239 - lbr_stack->entries[0].from; 2234 + flags = &entries[0].flags; 2235 + branch_from = entries[0].from; 2240 2236 } 2241 2237 } 2242 2238 ··· 2281 2281 int max_stack) 2282 2282 { 2283 2283 struct branch_stack *branch = sample->branch_stack; 2284 + struct branch_entry *entries = perf_sample__branch_entries(sample); 2284 2285 struct ip_callchain *chain = sample->callchain; 2285 2286 int chain_nr = 0; 2286 2287 u8 cpumode = PERF_RECORD_MISC_USER; ··· 2329 2328 2330 2329 for (i = 0; i < nr; i++) { 2331 2330 if (callchain_param.order == ORDER_CALLEE) { 2332 - be[i] = branch->entries[i]; 2331 + be[i] = entries[i]; 2333 2332 2334 2333 if (chain == NULL) 2335 2334 continue; ··· 2348 2347 be[i].from >= chain->ips[first_call] - 8) 2349 2348 first_call++; 2350 2349 } else 2351 - be[i] = branch->entries[branch->nr - i - 1]; 2350 + be[i] = entries[branch->nr - i - 1]; 2352 2351 } 2353 2352 2354 2353 memset(iter, 0, sizeof(struct iterations) * nr);

+4 -4

tools/perf/util/map.c

··· 44 44 45 45 static inline int is_android_lib(const char *filename) 46 46 { 47 - return !strncmp(filename, "/data/app-lib", 13) || 48 - !strncmp(filename, "/system/lib", 11); 47 + return strstarts(filename, "/data/app-lib/") || 48 + strstarts(filename, "/system/lib/"); 49 49 } 50 50 51 51 static inline bool replace_android_lib(const char *filename, char *newfilename) ··· 65 65 66 66 app_abi_length = strlen(app_abi); 67 67 68 - if (!strncmp(filename, "/data/app-lib", 13)) { 68 + if (strstarts(filename, "/data/app-lib/")) { 69 69 char *apk_path; 70 70 71 71 if (!app_abi_length) ··· 89 89 return true; 90 90 } 91 91 92 - if (!strncmp(filename, "/system/lib/", 12)) { 92 + if (strstarts(filename, "/system/lib/")) { 93 93 char *ndk, *app; 94 94 const char *arch; 95 95 size_t ndk_length;

+85 -24

tools/perf/util/metricgroup.c

··· 22 22 #include <linux/string.h> 23 23 #include <linux/zalloc.h> 24 24 #include <subcmd/parse-options.h> 25 + #include <api/fs/fs.h> 26 + #include "util.h" 25 27 26 28 struct metric_event *metricgroup__lookup(struct rblist *metric_events, 27 29 struct evsel *evsel, ··· 401 399 strlist__delete(metriclist); 402 400 } 403 401 402 + static void metricgroup__add_metric_weak_group(struct strbuf *events, 403 + const char **ids, 404 + int idnum) 405 + { 406 + bool no_group = false; 407 + int i; 408 + 409 + for (i = 0; i < idnum; i++) { 410 + pr_debug("found event %s\n", ids[i]); 411 + /* 412 + * Duration time maps to a software event and can make 413 + * groups not count. Always use it outside a 414 + * group. 415 + */ 416 + if (!strcmp(ids[i], "duration_time")) { 417 + if (i > 0) 418 + strbuf_addf(events, "}:W,"); 419 + strbuf_addf(events, "duration_time"); 420 + no_group = true; 421 + continue; 422 + } 423 + strbuf_addf(events, "%s%s", 424 + i == 0 || no_group ? "{" : ",", 425 + ids[i]); 426 + no_group = false; 427 + } 428 + if (!no_group) 429 + strbuf_addf(events, "}:W"); 430 + } 431 + 432 + static void metricgroup__add_metric_non_group(struct strbuf *events, 433 + const char **ids, 434 + int idnum) 435 + { 436 + int i; 437 + 438 + for (i = 0; i < idnum; i++) 439 + strbuf_addf(events, ",%s", ids[i]); 440 + } 441 + 442 + static void metricgroup___watchdog_constraint_hint(const char *name, bool foot) 443 + { 444 + static bool violate_nmi_constraint; 445 + 446 + if (!foot) { 447 + pr_warning("Splitting metric group %s into standalone metrics.\n", name); 448 + violate_nmi_constraint = true; 449 + return; 450 + } 451 + 452 + if (!violate_nmi_constraint) 453 + return; 454 + 455 + pr_warning("Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint:\n" 456 + " echo 0 > /proc/sys/kernel/nmi_watchdog\n" 457 + " perf stat ...\n" 458 + " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); 459 + } 460 + 461 + static bool metricgroup__has_constraint(struct pmu_event *pe) 462 + { 463 + if (!pe->metric_constraint) 464 + return false; 465 + 466 + if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") && 467 + sysctl__nmi_watchdog_enabled()) { 468 + metricgroup___watchdog_constraint_hint(pe->metric_name, false); 469 + return true; 470 + } 471 + 472 + return false; 473 + } 474 + 404 475 static int metricgroup__add_metric(const char *metric, struct strbuf *events, 405 476 struct list_head *group_list) 406 477 { 407 478 struct pmu_events_map *map = perf_pmu__find_map(NULL); 408 479 struct pmu_event *pe; 409 - int ret = -EINVAL; 410 - int i, j; 480 + int i, ret = -EINVAL; 411 481 412 482 if (!map) 413 483 return 0; ··· 496 422 const char **ids; 497 423 int idnum; 498 424 struct egroup *eg; 499 - bool no_group = false; 500 425 501 426 pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); 502 427 ··· 504 431 continue; 505 432 if (events->len > 0) 506 433 strbuf_addf(events, ","); 507 - for (j = 0; j < idnum; j++) { 508 - pr_debug("found event %s\n", ids[j]); 509 - /* 510 - * Duration time maps to a software event and can make 511 - * groups not count. Always use it outside a 512 - * group. 513 - */ 514 - if (!strcmp(ids[j], "duration_time")) { 515 - if (j > 0) 516 - strbuf_addf(events, "}:W,"); 517 - strbuf_addf(events, "duration_time"); 518 - no_group = true; 519 - continue; 520 - } 521 - strbuf_addf(events, "%s%s", 522 - j == 0 || no_group ? "{" : ",", 523 - ids[j]); 524 - no_group = false; 525 - } 526 - if (!no_group) 527 - strbuf_addf(events, "}:W"); 434 + 435 + if (metricgroup__has_constraint(pe)) 436 + metricgroup__add_metric_non_group(events, ids, idnum); 437 + else 438 + metricgroup__add_metric_weak_group(events, ids, idnum); 528 439 529 440 eg = malloc(sizeof(struct egroup)); 530 441 if (!eg) { ··· 550 493 } 551 494 } 552 495 free(nlist); 496 + 497 + if (!ret) 498 + metricgroup___watchdog_constraint_hint(NULL, true); 499 + 553 500 return ret; 554 501 } 555 502

+15 -6

tools/perf/util/mmap.c

··· 98 98 { 99 99 void *data; 100 100 size_t mmap_len; 101 - unsigned long node_mask; 101 + unsigned long *node_mask; 102 + unsigned long node_index; 103 + int err = 0; 102 104 103 105 if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) { 104 106 data = map->aio.data[idx]; 105 107 mmap_len = mmap__mmap_len(map); 106 - node_mask = 1UL << cpu__get_node(cpu); 107 - if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) { 108 - pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n", 109 - data, data + mmap_len, cpu__get_node(cpu)); 108 + node_index = cpu__get_node(cpu); 109 + node_mask = bitmap_alloc(node_index + 1); 110 + if (!node_mask) { 111 + pr_err("Failed to allocate node mask for mbind: error %m\n"); 110 112 return -1; 111 113 } 114 + set_bit(node_index, node_mask); 115 + if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1, 0)) { 116 + pr_err("Failed to bind [%p-%p] AIO buffer to node %lu: error %m\n", 117 + data, data + mmap_len, node_index); 118 + err = -1; 119 + } 120 + bitmap_free(node_mask); 112 121 } 113 122 114 - return 0; 123 + return err; 115 124 } 116 125 #else /* !HAVE_LIBNUMA_SUPPORT */ 117 126 static int perf_mmap__aio_alloc(struct mmap *map, int idx)

+1

tools/perf/util/perf_event_attr_fprintf.c

··· 50 50 bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX), 51 51 bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), 52 52 bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), 53 + bit_name(HW_INDEX), 53 54 { .name = NULL, } 54 55 }; 55 56 #undef bit_name

+16 -14

tools/perf/util/scripting-engines/trace-event-python.c

··· 464 464 struct thread *thread) 465 465 { 466 466 struct branch_stack *br = sample->branch_stack; 467 + struct branch_entry *entries = perf_sample__branch_entries(sample); 467 468 PyObject *pylist; 468 469 u64 i; 469 470 ··· 485 484 Py_FatalError("couldn't create Python dictionary"); 486 485 487 486 pydict_set_item_string_decref(pyelem, "from", 488 - PyLong_FromUnsignedLongLong(br->entries[i].from)); 487 + PyLong_FromUnsignedLongLong(entries[i].from)); 489 488 pydict_set_item_string_decref(pyelem, "to", 490 - PyLong_FromUnsignedLongLong(br->entries[i].to)); 489 + PyLong_FromUnsignedLongLong(entries[i].to)); 491 490 pydict_set_item_string_decref(pyelem, "mispred", 492 - PyBool_FromLong(br->entries[i].flags.mispred)); 491 + PyBool_FromLong(entries[i].flags.mispred)); 493 492 pydict_set_item_string_decref(pyelem, "predicted", 494 - PyBool_FromLong(br->entries[i].flags.predicted)); 493 + PyBool_FromLong(entries[i].flags.predicted)); 495 494 pydict_set_item_string_decref(pyelem, "in_tx", 496 - PyBool_FromLong(br->entries[i].flags.in_tx)); 495 + PyBool_FromLong(entries[i].flags.in_tx)); 497 496 pydict_set_item_string_decref(pyelem, "abort", 498 - PyBool_FromLong(br->entries[i].flags.abort)); 497 + PyBool_FromLong(entries[i].flags.abort)); 499 498 pydict_set_item_string_decref(pyelem, "cycles", 500 - PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles)); 499 + PyLong_FromUnsignedLongLong(entries[i].flags.cycles)); 501 500 502 501 thread__find_map_fb(thread, sample->cpumode, 503 - br->entries[i].from, &al); 502 + entries[i].from, &al); 504 503 dsoname = get_dsoname(al.map); 505 504 pydict_set_item_string_decref(pyelem, "from_dsoname", 506 505 _PyUnicode_FromString(dsoname)); 507 506 508 507 thread__find_map_fb(thread, sample->cpumode, 509 - br->entries[i].to, &al); 508 + entries[i].to, &al); 510 509 dsoname = get_dsoname(al.map); 511 510 pydict_set_item_string_decref(pyelem, "to_dsoname", 512 511 _PyUnicode_FromString(dsoname)); ··· 562 561 struct thread *thread) 563 562 { 564 563 struct branch_stack *br = sample->branch_stack; 564 + struct branch_entry *entries = perf_sample__branch_entries(sample); 565 565 PyObject *pylist; 566 566 u64 i; 567 567 char bf[512]; ··· 583 581 Py_FatalError("couldn't create Python dictionary"); 584 582 585 583 thread__find_symbol_fb(thread, sample->cpumode, 586 - br->entries[i].from, &al); 584 + entries[i].from, &al); 587 585 get_symoff(al.sym, &al, true, bf, sizeof(bf)); 588 586 pydict_set_item_string_decref(pyelem, "from", 589 587 _PyUnicode_FromString(bf)); 590 588 591 589 thread__find_symbol_fb(thread, sample->cpumode, 592 - br->entries[i].to, &al); 590 + entries[i].to, &al); 593 591 get_symoff(al.sym, &al, true, bf, sizeof(bf)); 594 592 pydict_set_item_string_decref(pyelem, "to", 595 593 _PyUnicode_FromString(bf)); 596 594 597 - get_br_mspred(&br->entries[i].flags, bf, sizeof(bf)); 595 + get_br_mspred(&entries[i].flags, bf, sizeof(bf)); 598 596 pydict_set_item_string_decref(pyelem, "pred", 599 597 _PyUnicode_FromString(bf)); 600 598 601 - if (br->entries[i].flags.in_tx) { 599 + if (entries[i].flags.in_tx) { 602 600 pydict_set_item_string_decref(pyelem, "in_tx", 603 601 _PyUnicode_FromString("X")); 604 602 } else { ··· 606 604 _PyUnicode_FromString("-")); 607 605 } 608 606 609 - if (br->entries[i].flags.abort) { 607 + if (entries[i].flags.abort) { 610 608 pydict_set_item_string_decref(pyelem, "abort", 611 609 _PyUnicode_FromString("A")); 612 610 } else {

+5 -3

tools/perf/util/session.c

··· 1007 1007 { 1008 1008 struct ip_callchain *callchain = sample->callchain; 1009 1009 struct branch_stack *lbr_stack = sample->branch_stack; 1010 + struct branch_entry *entries = perf_sample__branch_entries(sample); 1010 1011 u64 kernel_callchain_nr = callchain->nr; 1011 1012 unsigned int i; 1012 1013 ··· 1044 1043 i, callchain->ips[i]); 1045 1044 1046 1045 printf("..... %2d: %016" PRIx64 "\n", 1047 - (int)(kernel_callchain_nr), lbr_stack->entries[0].to); 1046 + (int)(kernel_callchain_nr), entries[0].to); 1048 1047 for (i = 0; i < lbr_stack->nr; i++) 1049 1048 printf("..... %2d: %016" PRIx64 "\n", 1050 - (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from); 1049 + (int)(i + kernel_callchain_nr + 1), entries[i].from); 1051 1050 } 1052 1051 } 1053 1052 ··· 1069 1068 1070 1069 static void branch_stack__printf(struct perf_sample *sample, bool callstack) 1071 1070 { 1071 + struct branch_entry *entries = perf_sample__branch_entries(sample); 1072 1072 uint64_t i; 1073 1073 1074 1074 printf("%s: nr:%" PRIu64 "\n", ··· 1077 1075 sample->branch_stack->nr); 1078 1076 1079 1077 for (i = 0; i < sample->branch_stack->nr; i++) { 1080 - struct branch_entry *e = &sample->branch_stack->entries[i]; 1078 + struct branch_entry *e = &entries[i]; 1081 1079 1082 1080 if (!callstack) { 1083 1081 printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",

+30 -9

tools/perf/util/stat-display.c

··· 16 16 #include <linux/ctype.h> 17 17 #include "cgroup.h" 18 18 #include <api/fs/fs.h> 19 + #include "util.h" 19 20 20 21 #define CNTR_NOT_SUPPORTED "<not supported>" 21 22 #define CNTR_NOT_COUNTED "<not counted>" ··· 111 110 config->csv_sep); 112 111 break; 113 112 case AGGR_NONE: 114 - if (evsel->percore) { 113 + if (evsel->percore && !config->percore_show_thread) { 115 114 fprintf(config->output, "S%d-D%d-C%*d%s", 116 115 cpu_map__id_to_socket(id), 117 116 cpu_map__id_to_die(id), ··· 629 628 static void print_counter_aggrdata(struct perf_stat_config *config, 630 629 struct evsel *counter, int s, 631 630 char *prefix, bool metric_only, 632 - bool *first) 631 + bool *first, int cpu) 633 632 { 634 633 struct aggr_data ad; 635 634 FILE *output = config->output; ··· 655 654 fprintf(output, "%s", prefix); 656 655 657 656 uval = val * counter->scale; 658 - printout(config, id, nr, counter, uval, prefix, 657 + printout(config, cpu != -1 ? cpu : id, nr, counter, uval, prefix, 659 658 run, ena, 1.0, &rt_stat); 660 659 if (!metric_only) 661 660 fputc('\n', output); ··· 688 687 evlist__for_each_entry(evlist, counter) { 689 688 print_counter_aggrdata(config, counter, s, 690 689 prefix, metric_only, 691 - &first); 690 + &first, -1); 692 691 } 693 692 if (metric_only) 694 693 fputc('\n', output); ··· 1098 1097 { 1099 1098 double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC; 1100 1099 FILE *output = config->output; 1101 - int n; 1102 1100 1103 1101 if (!config->null_run) 1104 1102 fprintf(output, "\n"); ··· 1131 1131 } 1132 1132 fprintf(output, "\n\n"); 1133 1133 1134 - if (config->print_free_counters_hint && 1135 - sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && 1136 - n > 0) 1134 + if (config->print_free_counters_hint && sysctl__nmi_watchdog_enabled()) 1137 1135 fprintf(output, 1138 1136 "Some events weren't counted. Try disabling the NMI watchdog:\n" 1139 1137 " echo 0 > /proc/sys/kernel/nmi_watchdog\n" ··· 1142 1144 fprintf(output, 1143 1145 "The events in group usually have to be from " 1144 1146 "the same PMU. Try reorganizing the group.\n"); 1147 + } 1148 + 1149 + static void print_percore_thread(struct perf_stat_config *config, 1150 + struct evsel *counter, char *prefix) 1151 + { 1152 + int s, s2, id; 1153 + bool first = true; 1154 + 1155 + for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) { 1156 + s2 = config->aggr_get_id(config, evsel__cpus(counter), i); 1157 + for (s = 0; s < config->aggr_map->nr; s++) { 1158 + id = config->aggr_map->map[s]; 1159 + if (s2 == id) 1160 + break; 1161 + } 1162 + 1163 + print_counter_aggrdata(config, counter, s, 1164 + prefix, false, 1165 + &first, i); 1166 + } 1145 1167 } 1146 1168 1147 1169 static void print_percore(struct perf_stat_config *config, ··· 1175 1157 if (!(config->aggr_map || config->aggr_get_id)) 1176 1158 return; 1177 1159 1160 + if (config->percore_show_thread) 1161 + return print_percore_thread(config, counter, prefix); 1162 + 1178 1163 for (s = 0; s < config->aggr_map->nr; s++) { 1179 1164 if (prefix && metric_only) 1180 1165 fprintf(output, "%s", prefix); 1181 1166 1182 1167 print_counter_aggrdata(config, counter, s, 1183 1168 prefix, metric_only, 1184 - &first); 1169 + &first, -1); 1185 1170 } 1186 1171 1187 1172 if (metric_only)

+1 -3

tools/perf/util/stat-shadow.c

··· 777 777 } 778 778 779 779 if (!metric_events[i]) { 780 - const char *p = metric_expr; 781 - 782 - if (expr__parse(&ratio, &pctx, &p) == 0) { 780 + if (expr__parse(&ratio, &pctx, metric_expr) == 0) { 783 781 char *unit; 784 782 char metric_bf[64]; 785 783

+1

tools/perf/util/stat.h

··· 109 109 bool walltime_run_table; 110 110 bool all_kernel; 111 111 bool all_user; 112 + bool percore_show_thread; 112 113 FILE *output; 113 114 unsigned int interval; 114 115 unsigned int timeout;

+5 -2

tools/perf/util/synthetic-events.c

··· 345 345 continue; 346 346 347 347 event->mmap2.ino = (u64)ino; 348 + event->mmap2.ino_generation = 0; 348 349 349 350 /* 350 351 * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c ··· 1184 1183 1185 1184 if (type & PERF_SAMPLE_BRANCH_STACK) { 1186 1185 sz = sample->branch_stack->nr * sizeof(struct branch_entry); 1187 - sz += sizeof(u64); 1186 + /* nr, hw_idx */ 1187 + sz += 2 * sizeof(u64); 1188 1188 result += sz; 1189 1189 } 1190 1190 ··· 1346 1344 1347 1345 if (type & PERF_SAMPLE_BRANCH_STACK) { 1348 1346 sz = sample->branch_stack->nr * sizeof(struct branch_entry); 1349 - sz += sizeof(u64); 1347 + /* nr, hw_idx */ 1348 + sz += 2 * sizeof(u64); 1350 1349 memcpy(array, sample->branch_stack, sz); 1351 1350 array = (void *)array + sz; 1352 1351 }

+18

tools/perf/util/util.c

··· 55 55 return sysctl_perf_event_max_stack; 56 56 } 57 57 58 + bool sysctl__nmi_watchdog_enabled(void) 59 + { 60 + static bool cached; 61 + static bool nmi_watchdog; 62 + int value; 63 + 64 + if (cached) 65 + return nmi_watchdog; 66 + 67 + if (sysctl__read_int("kernel/nmi_watchdog", &value) < 0) 68 + return false; 69 + 70 + nmi_watchdog = (value > 0) ? true : false; 71 + cached = true; 72 + 73 + return nmi_watchdog; 74 + } 75 + 58 76 bool test_attr__enabled; 59 77 60 78 bool perf_host = true;

+2

tools/perf/util/util.h

··· 29 29 30 30 int sysctl__max_stack(void); 31 31 32 + bool sysctl__nmi_watchdog_enabled(void); 33 + 32 34 int fetch_kernel_version(unsigned int *puint, 33 35 char *str, size_t str_sz); 34 36 #define KVER_VERSION(x) (((x) >> 16) & 0xff)

Configure Feed

Configure Feed