Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'perf-core-2026-04-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull performance events updates from Ingo Molnar:
"Core updates:

- Try to allocate task_ctx_data quickly, to optimize O(N^2) algorithm
on large systems with O(100k) threads (Namhyung Kim)

AMD PMU driver IBS support updates and fixes, by Ravi Bangoria:
- Fix interrupt accounting for discarded samples
- Fix a Zen5-specific quirk
- Fix PhyAddrVal handling
- Fix NMI-safety with perf_allow_kernel()
- Fix a race between event add and NMIs

Intel PMU driver updates:
- Only check GP counters for PEBS constraints validation (Dapeng Mi)

MSR driver:
- Turn SMI_COUNT and PPERF on by default, instead of a long list of
CPU models to enable them on (Kan Liang)

... and misc cleanups and fixes by Aldf Conte, Anshuman Khandual,
Namhyung Kim, Ravi Bangoria and Yen-Hsiang Hsu"

* tag 'perf-core-2026-04-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/events: Replace READ_ONCE() with standard pgtable accessors
perf/x86/msr: Make SMI and PPERF on by default
perf/x86/intel/p4: Fix unused variable warning in p4_pmu_init()
perf/x86/intel: Only check GP counters for PEBS constraints validation
perf/x86/amd/ibs: Fix comment typo in ibs_op_data
perf/amd/ibs: Advertise remote socket capability
perf/amd/ibs: Enable streaming store filter
perf/amd/ibs: Enable RIP bit63 hardware filtering
perf/amd/ibs: Enable fetch latency filtering
perf/amd/ibs: Support IBS_{FETCH|OP}_CTL2[Dis] to eliminate RMW race
perf/amd/ibs: Add new MSRs and CPUID bits definitions
perf/amd/ibs: Define macro for ldlat mask and shift
perf/amd/ibs: Avoid race between event add and NMI
perf/amd/ibs: Avoid calling perf_allow_kernel() from the IBS NMI handler
perf/amd/ibs: Preserve PhyAddrVal bit when clearing PhyAddr MSR
perf/amd/ibs: Limit ldlat->l3missonly dependency to Zen5
perf/amd/ibs: Account interrupt for discarded samples
perf/core: Simplify __detach_global_ctx_data()
perf/core: Try to allocate task_ctx_data quickly
perf/core: Pass GFP flags to attach_task_ctx_data()

+331 -152
+246 -18
arch/x86/events/amd/ibs.c
··· 32 32 /* attr.config2 */ 33 33 #define IBS_SW_FILTER_MASK 1 34 34 35 + /* attr.config1 */ 36 + #define IBS_OP_CONFIG1_LDLAT_MASK (0xFFFULL << 0) 37 + #define IBS_OP_CONFIG1_STRMST_MASK (1ULL << 12) 38 + #define IBS_OP_CONFIG1_STRMST_SHIFT (12) 39 + 40 + #define IBS_FETCH_CONFIG1_FETCHLAT_MASK (0x7FFULL << 0) 41 + 35 42 /* 36 43 * IBS states: 37 44 * ··· 90 83 struct perf_ibs { 91 84 struct pmu pmu; 92 85 unsigned int msr; 86 + unsigned int msr2; 93 87 u64 config_mask; 94 88 u64 cnt_mask; 95 89 u64 enable_mask; 90 + u64 disable_mask; 96 91 u64 valid_mask; 97 92 u16 min_period; 98 93 u64 max_period; ··· 283 274 { 284 275 return perf_ibs == &perf_ibs_op && 285 276 (ibs_caps & IBS_CAPS_OPLDLAT) && 286 - (event->attr.config1 & 0xFFF); 277 + (event->attr.config1 & IBS_OP_CONFIG1_LDLAT_MASK); 278 + } 279 + 280 + static bool perf_ibs_fetch_lat_event(struct perf_ibs *perf_ibs, 281 + struct perf_event *event) 282 + { 283 + return perf_ibs == &perf_ibs_fetch && 284 + (ibs_caps & IBS_CAPS_FETCHLAT) && 285 + (event->attr.config1 & IBS_FETCH_CONFIG1_FETCHLAT_MASK); 286 + } 287 + 288 + static bool perf_ibs_strmst_event(struct perf_ibs *perf_ibs, 289 + struct perf_event *event) 290 + { 291 + return perf_ibs == &perf_ibs_op && 292 + (ibs_caps & IBS_CAPS_STRMST_RMTSOCKET) && 293 + (event->attr.config1 & IBS_OP_CONFIG1_STRMST_MASK); 287 294 } 288 295 289 296 static int perf_ibs_init(struct perf_event *event) ··· 314 289 return -ENOENT; 315 290 316 291 config = event->attr.config; 292 + hwc->extra_reg.config = 0; 293 + hwc->extra_reg.reg = 0; 317 294 318 295 if (event->pmu != &perf_ibs->pmu) 319 296 return -ENOENT; ··· 331 304 event->attr.exclude_idle) 332 305 return -EINVAL; 333 306 334 - if (!(event->attr.config2 & IBS_SW_FILTER_MASK) && 335 - (event->attr.exclude_kernel || event->attr.exclude_user || 336 - event->attr.exclude_hv)) 337 - return -EINVAL; 338 - 339 307 ret = validate_group(event); 340 308 if (ret) 341 309 return ret; 310 + 311 + if (perf_allow_kernel()) 312 + hwc->flags |= PERF_X86_EVENT_UNPRIVILEGED; 313 + 314 + if (ibs_caps & IBS_CAPS_DIS) { 315 + hwc->extra_reg.config &= ~perf_ibs->disable_mask; 316 + hwc->extra_reg.reg = perf_ibs->msr2; 317 + } 318 + 319 + if (ibs_caps & IBS_CAPS_BIT63_FILTER) { 320 + if (perf_ibs == &perf_ibs_fetch) { 321 + if (event->attr.exclude_kernel) { 322 + hwc->extra_reg.config |= IBS_FETCH_2_EXCL_RIP_63_EQ_1; 323 + hwc->extra_reg.reg = perf_ibs->msr2; 324 + } 325 + if (event->attr.exclude_user) { 326 + hwc->extra_reg.config |= IBS_FETCH_2_EXCL_RIP_63_EQ_0; 327 + hwc->extra_reg.reg = perf_ibs->msr2; 328 + } 329 + } else { 330 + if (event->attr.exclude_kernel) { 331 + hwc->extra_reg.config |= IBS_OP_2_EXCL_RIP_63_EQ_1; 332 + hwc->extra_reg.reg = perf_ibs->msr2; 333 + } 334 + if (event->attr.exclude_user) { 335 + hwc->extra_reg.config |= IBS_OP_2_EXCL_RIP_63_EQ_0; 336 + hwc->extra_reg.reg = perf_ibs->msr2; 337 + } 338 + } 339 + } else if (!(event->attr.config2 & IBS_SW_FILTER_MASK) && 340 + (event->attr.exclude_kernel || event->attr.exclude_user || 341 + event->attr.exclude_hv)) { 342 + return -EINVAL; 343 + } 342 344 343 345 if (hwc->sample_period) { 344 346 if (config & perf_ibs->cnt_mask) ··· 405 349 } 406 350 407 351 if (perf_ibs_ldlat_event(perf_ibs, event)) { 408 - u64 ldlat = event->attr.config1 & 0xFFF; 352 + u64 ldlat = event->attr.config1 & IBS_OP_CONFIG1_LDLAT_MASK; 409 353 410 354 if (ldlat < 128 || ldlat > 2048) 411 355 return -EINVAL; 412 356 ldlat >>= 7; 413 357 414 - config |= (ldlat - 1) << 59; 415 - config |= IBS_OP_L3MISSONLY | IBS_OP_LDLAT_EN; 358 + config |= (ldlat - 1) << IBS_OP_LDLAT_THRSH_SHIFT; 359 + 360 + config |= IBS_OP_LDLAT_EN; 361 + if (cpu_feature_enabled(X86_FEATURE_ZEN5)) 362 + config |= IBS_OP_L3MISSONLY; 363 + } 364 + 365 + if (perf_ibs_fetch_lat_event(perf_ibs, event)) { 366 + u64 fetchlat = event->attr.config1 & IBS_FETCH_CONFIG1_FETCHLAT_MASK; 367 + 368 + if (fetchlat < 128 || fetchlat > 1920) 369 + return -EINVAL; 370 + fetchlat >>= 7; 371 + 372 + hwc->extra_reg.reg = perf_ibs->msr2; 373 + hwc->extra_reg.config |= fetchlat << IBS_FETCH_2_FETCHLAT_FILTER_SHIFT; 374 + } 375 + 376 + if (perf_ibs_strmst_event(perf_ibs, event)) { 377 + u64 strmst = event->attr.config1 & IBS_OP_CONFIG1_STRMST_MASK; 378 + 379 + strmst >>= IBS_OP_CONFIG1_STRMST_SHIFT; 380 + 381 + hwc->extra_reg.reg = perf_ibs->msr2; 382 + hwc->extra_reg.config |= strmst << IBS_OP_2_STRM_ST_FILTER_SHIFT; 416 383 } 417 384 418 385 /* ··· 518 439 wrmsrq(hwc->config_base, tmp & ~perf_ibs->enable_mask); 519 440 520 441 wrmsrq(hwc->config_base, tmp | perf_ibs->enable_mask); 442 + 443 + if (hwc->extra_reg.reg) 444 + wrmsrq(hwc->extra_reg.reg, hwc->extra_reg.config); 521 445 } 522 446 523 447 /* ··· 533 451 static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, 534 452 struct hw_perf_event *hwc, u64 config) 535 453 { 454 + if (ibs_caps & IBS_CAPS_DIS) { 455 + wrmsrq(hwc->extra_reg.reg, perf_ibs->disable_mask); 456 + return; 457 + } 458 + 536 459 config &= ~perf_ibs->cnt_mask; 537 460 if (boot_cpu_data.x86 == 0x10) 538 461 wrmsrq(hwc->config_base, config); ··· 573 486 period &= ~IBS_OP_MAX_CNT_EXT_MASK; 574 487 } 575 488 config |= period >> 4; 489 + 490 + /* 491 + * Reset the IBS_{FETCH|OP}_CTL MSR before updating pcpu->state. 492 + * Doing so prevents a race condition in which an NMI due to other 493 + * source might accidentally activate the event before we enable 494 + * it ourselves. 495 + */ 496 + perf_ibs_disable_event(perf_ibs, hwc, 0); 576 497 577 498 /* 578 499 * Set STARTED before enabling the hardware, such that a subsequent NMI ··· 726 631 PMU_EVENT_ATTR_STRING(zen4_ibs_extensions, zen4_ibs_extensions, "1"); 727 632 PMU_EVENT_ATTR_STRING(ldlat, ibs_op_ldlat_cap, "1"); 728 633 PMU_EVENT_ATTR_STRING(dtlb_pgsize, ibs_op_dtlb_pgsize_cap, "1"); 634 + PMU_EVENT_ATTR_STRING(fetchlat, ibs_fetch_lat_format, "config1:0-10"); 635 + PMU_EVENT_ATTR_STRING(fetchlat, ibs_fetch_lat_cap, "1"); 636 + PMU_EVENT_ATTR_STRING(strmst, ibs_op_strmst_format, "config1:12"); 637 + PMU_EVENT_ATTR_STRING(strmst, ibs_op_strmst_cap, "1"); 638 + PMU_EVENT_ATTR_STRING(rmtsocket, ibs_op_rmtsocket_cap, "1"); 729 639 730 640 static umode_t 731 641 zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int i) 732 642 { 733 643 return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0; 644 + } 645 + 646 + static umode_t 647 + ibs_fetch_lat_is_visible(struct kobject *kobj, struct attribute *attr, int i) 648 + { 649 + return ibs_caps & IBS_CAPS_FETCHLAT ? attr->mode : 0; 650 + } 651 + 652 + static umode_t 653 + ibs_op_strmst_is_visible(struct kobject *kobj, struct attribute *attr, int i) 654 + { 655 + return ibs_caps & IBS_CAPS_STRMST_RMTSOCKET ? attr->mode : 0; 656 + } 657 + 658 + static umode_t 659 + ibs_op_rmtsocket_is_visible(struct kobject *kobj, struct attribute *attr, int i) 660 + { 661 + return ibs_caps & IBS_CAPS_STRMST_RMTSOCKET ? attr->mode : 0; 734 662 } 735 663 736 664 static umode_t ··· 784 666 NULL, 785 667 }; 786 668 669 + static struct attribute *ibs_fetch_lat_format_attrs[] = { 670 + &ibs_fetch_lat_format.attr.attr, 671 + NULL, 672 + }; 673 + 674 + static struct attribute *ibs_fetch_lat_cap_attrs[] = { 675 + &ibs_fetch_lat_cap.attr.attr, 676 + NULL, 677 + }; 678 + 787 679 static struct attribute *ibs_op_ldlat_cap_attrs[] = { 788 680 &ibs_op_ldlat_cap.attr.attr, 789 681 NULL, ··· 801 673 802 674 static struct attribute *ibs_op_dtlb_pgsize_cap_attrs[] = { 803 675 &ibs_op_dtlb_pgsize_cap.attr.attr, 676 + NULL, 677 + }; 678 + 679 + static struct attribute *ibs_op_strmst_cap_attrs[] = { 680 + &ibs_op_strmst_cap.attr.attr, 681 + NULL, 682 + }; 683 + 684 + static struct attribute *ibs_op_rmtsocket_cap_attrs[] = { 685 + &ibs_op_rmtsocket_cap.attr.attr, 804 686 NULL, 805 687 }; 806 688 ··· 831 693 .is_visible = zen4_ibs_extensions_is_visible, 832 694 }; 833 695 696 + static struct attribute_group group_ibs_fetch_lat_cap = { 697 + .name = "caps", 698 + .attrs = ibs_fetch_lat_cap_attrs, 699 + .is_visible = ibs_fetch_lat_is_visible, 700 + }; 701 + 702 + static struct attribute_group group_ibs_fetch_lat_format = { 703 + .name = "format", 704 + .attrs = ibs_fetch_lat_format_attrs, 705 + .is_visible = ibs_fetch_lat_is_visible, 706 + }; 707 + 834 708 static struct attribute_group group_ibs_op_ldlat_cap = { 835 709 .name = "caps", 836 710 .attrs = ibs_op_ldlat_cap_attrs, ··· 855 705 .is_visible = ibs_op_dtlb_pgsize_is_visible, 856 706 }; 857 707 708 + static struct attribute_group group_ibs_op_strmst_cap = { 709 + .name = "caps", 710 + .attrs = ibs_op_strmst_cap_attrs, 711 + .is_visible = ibs_op_strmst_is_visible, 712 + }; 713 + 714 + static struct attribute_group group_ibs_op_rmtsocket_cap = { 715 + .name = "caps", 716 + .attrs = ibs_op_rmtsocket_cap_attrs, 717 + .is_visible = ibs_op_rmtsocket_is_visible, 718 + }; 719 + 858 720 static const struct attribute_group *fetch_attr_groups[] = { 859 721 &group_fetch_formats, 860 722 &empty_caps_group, ··· 876 714 static const struct attribute_group *fetch_attr_update[] = { 877 715 &group_fetch_l3missonly, 878 716 &group_zen4_ibs_extensions, 717 + &group_ibs_fetch_lat_cap, 718 + &group_ibs_fetch_lat_format, 879 719 NULL, 880 720 }; 881 721 ··· 912 748 NULL, 913 749 }; 914 750 751 + static struct attribute *ibs_op_strmst_format_attrs[] = { 752 + &ibs_op_strmst_format.attr.attr, 753 + NULL, 754 + }; 755 + 915 756 static struct attribute_group group_cnt_ctl = { 916 757 .name = "format", 917 758 .attrs = cnt_ctl_attrs, ··· 941 772 .is_visible = ibs_op_ldlat_is_visible, 942 773 }; 943 774 775 + static struct attribute_group group_ibs_op_strmst_format = { 776 + .name = "format", 777 + .attrs = ibs_op_strmst_format_attrs, 778 + .is_visible = ibs_op_strmst_is_visible, 779 + }; 780 + 944 781 static const struct attribute_group *op_attr_update[] = { 945 782 &group_cnt_ctl, 946 783 &group_op_l3missonly, ··· 954 779 &group_ibs_op_ldlat_cap, 955 780 &group_ibs_op_ldlat_format, 956 781 &group_ibs_op_dtlb_pgsize_cap, 782 + &group_ibs_op_strmst_cap, 783 + &group_ibs_op_strmst_format, 784 + &group_ibs_op_rmtsocket_cap, 957 785 NULL, 958 786 }; 959 787 ··· 973 795 .check_period = perf_ibs_check_period, 974 796 }, 975 797 .msr = MSR_AMD64_IBSFETCHCTL, 798 + .msr2 = MSR_AMD64_IBSFETCHCTL2, 976 799 .config_mask = IBS_FETCH_MAX_CNT | IBS_FETCH_RAND_EN, 977 800 .cnt_mask = IBS_FETCH_MAX_CNT, 978 801 .enable_mask = IBS_FETCH_ENABLE, ··· 999 820 .check_period = perf_ibs_check_period, 1000 821 }, 1001 822 .msr = MSR_AMD64_IBSOPCTL, 823 + .msr2 = MSR_AMD64_IBSOPCTL2, 1002 824 .config_mask = IBS_OP_MAX_CNT, 1003 825 .cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT | 1004 826 IBS_OP_CUR_CNT_RAND, ··· 1335 1155 { 1336 1156 if (event->attr.sample_type & PERF_SAMPLE_RAW || 1337 1157 perf_ibs_is_mem_sample_type(perf_ibs, event) || 1338 - perf_ibs_ldlat_event(perf_ibs, event)) 1158 + perf_ibs_ldlat_event(perf_ibs, event) || 1159 + perf_ibs_fetch_lat_event(perf_ibs, event)) 1339 1160 return perf_ibs->offset_max; 1340 1161 else if (check_rip) 1341 1162 return 3; ··· 1371 1190 op_data.op_brn_ret && kernel_ip(br_target)); 1372 1191 } 1373 1192 1374 - static bool perf_ibs_swfilt_discard(struct perf_ibs *perf_ibs, struct perf_event *event, 1193 + static bool perf_ibs_discard_sample(struct perf_ibs *perf_ibs, struct perf_event *event, 1375 1194 struct pt_regs *regs, struct perf_ibs_data *ibs_data, 1376 1195 int br_target_idx) 1377 1196 { ··· 1395 1214 struct perf_ibs_data *ibs_data) 1396 1215 { 1397 1216 if (perf_ibs == &perf_ibs_op) { 1398 - ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)] &= ~(1ULL << 18); 1399 1217 ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)] = 0; 1400 1218 return; 1401 1219 } 1402 1220 1403 - ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHCTL)] &= ~(1ULL << 52); 1404 1221 ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHPHYSAD)] = 0; 1405 1222 } 1406 1223 ··· 1472 1293 * within [128, 2048] range. 1473 1294 */ 1474 1295 if (!op_data3.ld_op || !op_data3.dc_miss || 1475 - op_data3.dc_miss_lat <= (event->attr.config1 & 0xFFF)) 1296 + op_data3.dc_miss_lat <= (event->attr.config1 & IBS_OP_CONFIG1_LDLAT_MASK)) { 1297 + throttle = perf_event_account_interrupt(event); 1476 1298 goto out; 1299 + } 1300 + } 1301 + 1302 + if (perf_ibs_fetch_lat_event(perf_ibs, event)) { 1303 + union ibs_fetch_ctl fetch_ctl; 1304 + 1305 + fetch_ctl.val = ibs_data.regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHCTL)]; 1306 + if (fetch_ctl.fetch_lat < (event->attr.config1 & IBS_FETCH_CONFIG1_FETCHLAT_MASK)) { 1307 + throttle = perf_event_account_interrupt(event); 1308 + goto out; 1309 + } 1477 1310 } 1478 1311 1479 1312 /* ··· 1517 1326 regs.flags &= ~PERF_EFLAGS_EXACT; 1518 1327 } else { 1519 1328 /* Workaround for erratum #1197 */ 1520 - if (perf_ibs->fetch_ignore_if_zero_rip && !(ibs_data.regs[1])) 1329 + if (perf_ibs->fetch_ignore_if_zero_rip && !(ibs_data.regs[1])) { 1330 + throttle = perf_event_account_interrupt(event); 1521 1331 goto out; 1332 + } 1522 1333 1523 1334 set_linear_ip(&regs, ibs_data.regs[1]); 1524 1335 regs.flags |= PERF_EFLAGS_EXACT; 1525 1336 } 1526 1337 1527 - if ((event->attr.config2 & IBS_SW_FILTER_MASK) && 1528 - perf_ibs_swfilt_discard(perf_ibs, event, &regs, &ibs_data, br_target_idx)) { 1338 + if (((ibs_caps & IBS_CAPS_BIT63_FILTER) || 1339 + (event->attr.config2 & IBS_SW_FILTER_MASK)) && 1340 + perf_ibs_discard_sample(perf_ibs, event, &regs, &ibs_data, br_target_idx)) { 1529 1341 throttle = perf_event_account_interrupt(event); 1530 1342 goto out; 1531 1343 } ··· 1538 1344 * unprivileged users. 1539 1345 */ 1540 1346 if ((event->attr.sample_type & PERF_SAMPLE_RAW) && 1541 - perf_allow_kernel()) { 1347 + (hwc->flags & PERF_X86_EVENT_UNPRIVILEGED)) { 1542 1348 perf_ibs_phyaddr_clear(perf_ibs, &ibs_data); 1543 1349 } 1544 1350 ··· 1569 1375 1570 1376 out: 1571 1377 if (!throttle) { 1378 + if (ibs_caps & IBS_CAPS_DIS) 1379 + wrmsrq(hwc->extra_reg.reg, perf_ibs->disable_mask); 1380 + 1572 1381 if (perf_ibs == &perf_ibs_op) { 1573 1382 if (ibs_caps & IBS_CAPS_OPCNTEXT) { 1574 1383 new_config = period & IBS_OP_MAX_CNT_EXT_MASK; ··· 1643 1446 if (ibs_caps & IBS_CAPS_ZEN4) 1644 1447 perf_ibs_fetch.config_mask |= IBS_FETCH_L3MISSONLY; 1645 1448 1449 + if (ibs_caps & IBS_CAPS_DIS) 1450 + perf_ibs_fetch.disable_mask = IBS_FETCH_2_DIS; 1451 + 1646 1452 perf_ibs_fetch.pmu.attr_groups = fetch_attr_groups; 1647 1453 perf_ibs_fetch.pmu.attr_update = fetch_attr_update; 1648 1454 ··· 1666 1466 1667 1467 if (ibs_caps & IBS_CAPS_ZEN4) 1668 1468 perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY; 1469 + 1470 + if (ibs_caps & IBS_CAPS_DIS) 1471 + perf_ibs_op.disable_mask = IBS_OP_2_DIS; 1669 1472 1670 1473 perf_ibs_op.pmu.attr_groups = op_attr_groups; 1671 1474 perf_ibs_op.pmu.attr_update = op_attr_update; ··· 1916 1713 static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu) 1917 1714 { 1918 1715 setup_APIC_ibs(); 1716 + 1717 + if (ibs_caps & IBS_CAPS_DIS) { 1718 + /* 1719 + * IBS enable sequence: 1720 + * CTL[En] = 1; 1721 + * CTL2[Dis] = 0; 1722 + * 1723 + * IBS disable sequence: 1724 + * CTL2[Dis] = 1; 1725 + * 1726 + * Set CTL2[Dis] when CPU comes up. This is needed to make 1727 + * enable sequence effective. 1728 + */ 1729 + wrmsrq(MSR_AMD64_IBSFETCHCTL2, IBS_FETCH_2_DIS); 1730 + wrmsrq(MSR_AMD64_IBSOPCTL2, IBS_OP_2_DIS); 1731 + } 1732 + 1919 1733 return 0; 1920 1734 } 1921 1735 ··· 1990 1770 return -EINVAL; 1991 1771 1992 1772 perf_ibs_pm_init(); 1773 + 1774 + #ifdef CONFIG_X86_32 1775 + /* 1776 + * IBS_CAPS_BIT63_FILTER is used for exclude_kernel/user filtering, 1777 + * which obviously won't work for 32 bit kernel. 1778 + */ 1779 + caps &= ~IBS_CAPS_BIT63_FILTER; 1780 + #endif 1993 1781 1994 1782 ibs_caps = caps; 1995 1783 /* make ibs_caps visible to other cpus: */
+14 -8
arch/x86/events/intel/core.c
··· 5783 5783 } 5784 5784 5785 5785 if (check_fail) { 5786 - pr_info("The two events 0x%llx and 0x%llx may not be " 5786 + pr_warn("The two events 0x%llx and 0x%llx may not be " 5787 5787 "fully scheduled under some circumstances as " 5788 5788 "%s.\n", 5789 5789 c1->code, c2->code, dyn_constr_type_name[type]); ··· 5796 5796 struct event_constraint *constr, 5797 5797 u64 cntr_mask) 5798 5798 { 5799 + u64 gp_mask = GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0); 5799 5800 enum dyn_constr_type i; 5800 5801 u64 mask; 5801 5802 ··· 5811 5810 mask = x86_pmu.lbr_counters; 5812 5811 break; 5813 5812 case DYN_CONSTR_ACR_CNTR: 5814 - mask = hybrid(pmu, acr_cntr_mask64) & GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0); 5813 + mask = hybrid(pmu, acr_cntr_mask64) & gp_mask; 5815 5814 break; 5816 5815 case DYN_CONSTR_ACR_CAUSE: 5817 - if (hybrid(pmu, acr_cntr_mask64) == hybrid(pmu, acr_cause_mask64)) 5816 + if (hybrid(pmu, acr_cntr_mask64) == 5817 + hybrid(pmu, acr_cause_mask64)) 5818 5818 continue; 5819 - mask = hybrid(pmu, acr_cause_mask64) & GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0); 5819 + mask = hybrid(pmu, acr_cause_mask64) & gp_mask; 5820 5820 break; 5821 5821 case DYN_CONSTR_PEBS: 5822 - if (x86_pmu.arch_pebs) 5823 - mask = hybrid(pmu, arch_pebs_cap).counters; 5822 + if (x86_pmu.arch_pebs) { 5823 + mask = hybrid(pmu, arch_pebs_cap).counters & 5824 + gp_mask; 5825 + } 5824 5826 break; 5825 5827 case DYN_CONSTR_PDIST: 5826 - if (x86_pmu.arch_pebs) 5827 - mask = hybrid(pmu, arch_pebs_cap).pdists; 5828 + if (x86_pmu.arch_pebs) { 5829 + mask = hybrid(pmu, arch_pebs_cap).pdists & 5830 + gp_mask; 5831 + } 5828 5832 break; 5829 5833 default: 5830 5834 pr_warn("Unsupported dynamic constraint type %d\n", i);
+3 -3
arch/x86/events/intel/p4.c
··· 1367 1367 1368 1368 __init int p4_pmu_init(void) 1369 1369 { 1370 - unsigned int low, high; 1370 + unsigned int misc; 1371 1371 int i, reg; 1372 1372 1373 1373 /* If we get stripped -- indexing fails */ 1374 1374 BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC); 1375 1375 1376 - rdmsr(MSR_IA32_MISC_ENABLE, low, high); 1377 - if (!(low & (1 << 7))) { 1376 + rdmsrq(MSR_IA32_MISC_ENABLE, misc); 1377 + if (!(misc & MSR_IA32_MISC_ENABLE_EMON)) { 1378 1378 pr_cont("unsupported Netburst CPU model %d ", 1379 1379 boot_cpu_data.x86_model); 1380 1380 return -ENODEV;
+3 -79
arch/x86/events/msr.c
··· 2 2 #include <linux/perf_event.h> 3 3 #include <linux/sysfs.h> 4 4 #include <linux/nospec.h> 5 - #include <asm/cpu_device_id.h> 6 5 #include <asm/msr.h> 7 6 8 7 #include "probe.h" ··· 40 41 41 42 static bool test_intel(int idx, void *data) 42 43 { 43 - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || 44 - boot_cpu_data.x86 != 6) 44 + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 45 45 return false; 46 46 47 - switch (boot_cpu_data.x86_vfm) { 48 - case INTEL_NEHALEM: 49 - case INTEL_NEHALEM_G: 50 - case INTEL_NEHALEM_EP: 51 - case INTEL_NEHALEM_EX: 52 - 53 - case INTEL_WESTMERE: 54 - case INTEL_WESTMERE_EP: 55 - case INTEL_WESTMERE_EX: 56 - 57 - case INTEL_SANDYBRIDGE: 58 - case INTEL_SANDYBRIDGE_X: 59 - 60 - case INTEL_IVYBRIDGE: 61 - case INTEL_IVYBRIDGE_X: 62 - 63 - case INTEL_HASWELL: 64 - case INTEL_HASWELL_X: 65 - case INTEL_HASWELL_L: 66 - case INTEL_HASWELL_G: 67 - 68 - case INTEL_BROADWELL: 69 - case INTEL_BROADWELL_D: 70 - case INTEL_BROADWELL_G: 71 - case INTEL_BROADWELL_X: 72 - case INTEL_SAPPHIRERAPIDS_X: 73 - case INTEL_EMERALDRAPIDS_X: 74 - case INTEL_GRANITERAPIDS_X: 75 - case INTEL_GRANITERAPIDS_D: 76 - 77 - case INTEL_ATOM_SILVERMONT: 78 - case INTEL_ATOM_SILVERMONT_D: 79 - case INTEL_ATOM_AIRMONT: 80 - case INTEL_ATOM_AIRMONT_NP: 81 - 82 - case INTEL_ATOM_GOLDMONT: 83 - case INTEL_ATOM_GOLDMONT_D: 84 - case INTEL_ATOM_GOLDMONT_PLUS: 85 - case INTEL_ATOM_TREMONT_D: 86 - case INTEL_ATOM_TREMONT: 87 - case INTEL_ATOM_TREMONT_L: 88 - 89 - case INTEL_XEON_PHI_KNL: 90 - case INTEL_XEON_PHI_KNM: 91 - if (idx == PERF_MSR_SMI) 92 - return true; 93 - break; 94 - 95 - case INTEL_SKYLAKE_L: 96 - case INTEL_SKYLAKE: 97 - case INTEL_SKYLAKE_X: 98 - case INTEL_KABYLAKE_L: 99 - case INTEL_KABYLAKE: 100 - case INTEL_COMETLAKE_L: 101 - case INTEL_COMETLAKE: 102 - case INTEL_ICELAKE_L: 103 - case INTEL_ICELAKE: 104 - case INTEL_ICELAKE_X: 105 - case INTEL_ICELAKE_D: 106 - case INTEL_TIGERLAKE_L: 107 - case INTEL_TIGERLAKE: 108 - case INTEL_ROCKETLAKE: 109 - case INTEL_ALDERLAKE: 110 - case INTEL_ALDERLAKE_L: 111 - case INTEL_ATOM_GRACEMONT: 112 - case INTEL_RAPTORLAKE: 113 - case INTEL_RAPTORLAKE_P: 114 - case INTEL_RAPTORLAKE_S: 115 - case INTEL_METEORLAKE: 116 - case INTEL_METEORLAKE_L: 117 - if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) 118 - return true; 119 - break; 120 - } 121 - 122 - return false; 47 + /* Rely on perf_msr_probe() to check the availability */ 48 + return true; 123 49 } 124 50 125 51 PMU_EVENT_ATTR_STRING(tsc, attr_tsc, "event=0x00" );
+1
arch/x86/events/perf_event_flags.h
··· 23 23 PERF_ARCH(NEEDS_BRANCH_STACK, 0x0040000) /* require branch stack setup */ 24 24 PERF_ARCH(BRANCH_COUNTERS, 0x0080000) /* logs the counters in the extra space of each branch */ 25 25 PERF_ARCH(ACR, 0x0100000) /* Auto counter reload */ 26 + PERF_ARCH(UNPRIVILEGED, 0x0200000) /* Unprivileged event (wrt perf_allow_kernel()) */
+4 -2
arch/x86/include/asm/amd/ibs.h
··· 77 77 __u64 val; 78 78 struct { 79 79 __u64 comp_to_ret_ctr:16, /* 0-15: op completion to retire count */ 80 - tag_to_ret_ctr:16, /* 15-31: op tag to retire count */ 80 + tag_to_ret_ctr:16, /* 16-31: op tag to retire count */ 81 81 reserved1:2, /* 32-33: reserved */ 82 82 op_return:1, /* 34: return op */ 83 83 op_brn_taken:1, /* 35: taken branch op */ ··· 99 99 rmt_node:1, /* 4: destination node */ 100 100 cache_hit_st:1, /* 5: cache hit state */ 101 101 data_src_hi:2, /* 6-7: data source high */ 102 - reserved1:56; /* 8-63: reserved */ 102 + strm_st:1, /* 8: streaming store */ 103 + rmt_socket:1, /* 9: remote socket */ 104 + reserved1:54; /* 10-63: reserved */ 103 105 }; 104 106 }; 105 107
+2
arch/x86/include/asm/msr-index.h
··· 698 698 #define MSR_AMD64_IBSBRTARGET 0xc001103b 699 699 #define MSR_AMD64_ICIBSEXTDCTL 0xc001103c 700 700 #define MSR_AMD64_IBSOPDATA4 0xc001103d 701 + #define MSR_AMD64_IBSOPCTL2 0xc001103e 702 + #define MSR_AMD64_IBSFETCHCTL2 0xc001103f 701 703 #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ 702 704 #define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b 703 705 #define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e
+36 -19
arch/x86/include/asm/perf_event.h
··· 643 643 #define IBS_CAPS_OPDATA4 (1U<<10) 644 644 #define IBS_CAPS_ZEN4 (1U<<11) 645 645 #define IBS_CAPS_OPLDLAT (1U<<12) 646 + #define IBS_CAPS_DIS (1U<<13) 647 + #define IBS_CAPS_FETCHLAT (1U<<14) 648 + #define IBS_CAPS_BIT63_FILTER (1U<<15) 649 + #define IBS_CAPS_STRMST_RMTSOCKET (1U<<16) 646 650 #define IBS_CAPS_OPDTLBPGSIZE (1U<<19) 647 651 648 652 #define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ ··· 661 657 #define IBSCTL_LVT_OFFSET_MASK 0x0F 662 658 663 659 /* IBS fetch bits/masks */ 664 - #define IBS_FETCH_L3MISSONLY (1ULL<<59) 665 - #define IBS_FETCH_RAND_EN (1ULL<<57) 666 - #define IBS_FETCH_VAL (1ULL<<49) 667 - #define IBS_FETCH_ENABLE (1ULL<<48) 668 - #define IBS_FETCH_CNT 0xFFFF0000ULL 669 - #define IBS_FETCH_MAX_CNT 0x0000FFFFULL 660 + #define IBS_FETCH_L3MISSONLY (1ULL << 59) 661 + #define IBS_FETCH_RAND_EN (1ULL << 57) 662 + #define IBS_FETCH_VAL (1ULL << 49) 663 + #define IBS_FETCH_ENABLE (1ULL << 48) 664 + #define IBS_FETCH_CNT 0xFFFF0000ULL 665 + #define IBS_FETCH_MAX_CNT 0x0000FFFFULL 666 + 667 + #define IBS_FETCH_2_DIS (1ULL << 0) 668 + #define IBS_FETCH_2_FETCHLAT_FILTER (0xFULL << 1) 669 + #define IBS_FETCH_2_FETCHLAT_FILTER_SHIFT (1) 670 + #define IBS_FETCH_2_EXCL_RIP_63_EQ_1 (1ULL << 5) 671 + #define IBS_FETCH_2_EXCL_RIP_63_EQ_0 (1ULL << 6) 670 672 671 673 /* 672 674 * IBS op bits/masks 673 675 * The lower 7 bits of the current count are random bits 674 676 * preloaded by hardware and ignored in software 675 677 */ 676 - #define IBS_OP_LDLAT_EN (1ULL<<63) 677 - #define IBS_OP_LDLAT_THRSH (0xFULL<<59) 678 - #define IBS_OP_CUR_CNT (0xFFF80ULL<<32) 679 - #define IBS_OP_CUR_CNT_RAND (0x0007FULL<<32) 680 - #define IBS_OP_CUR_CNT_EXT_MASK (0x7FULL<<52) 681 - #define IBS_OP_CNT_CTL (1ULL<<19) 682 - #define IBS_OP_VAL (1ULL<<18) 683 - #define IBS_OP_ENABLE (1ULL<<17) 684 - #define IBS_OP_L3MISSONLY (1ULL<<16) 685 - #define IBS_OP_MAX_CNT 0x0000FFFFULL 686 - #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ 687 - #define IBS_OP_MAX_CNT_EXT_MASK (0x7FULL<<20) /* separate upper 7 bits */ 688 - #define IBS_RIP_INVALID (1ULL<<38) 678 + #define IBS_OP_LDLAT_EN (1ULL << 63) 679 + #define IBS_OP_LDLAT_THRSH (0xFULL << 59) 680 + #define IBS_OP_LDLAT_THRSH_SHIFT (59) 681 + #define IBS_OP_CUR_CNT (0xFFF80ULL << 32) 682 + #define IBS_OP_CUR_CNT_RAND (0x0007FULL << 32) 683 + #define IBS_OP_CUR_CNT_EXT_MASK (0x7FULL << 52) 684 + #define IBS_OP_CNT_CTL (1ULL << 19) 685 + #define IBS_OP_VAL (1ULL << 18) 686 + #define IBS_OP_ENABLE (1ULL << 17) 687 + #define IBS_OP_L3MISSONLY (1ULL << 16) 688 + #define IBS_OP_MAX_CNT 0x0000FFFFULL 689 + #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ 690 + #define IBS_OP_MAX_CNT_EXT_MASK (0x7FULL << 20) /* separate upper 7 bits */ 691 + #define IBS_RIP_INVALID (1ULL << 38) 692 + 693 + #define IBS_OP_2_DIS (1ULL << 0) 694 + #define IBS_OP_2_EXCL_RIP_63_EQ_0 (1ULL << 1) 695 + #define IBS_OP_2_EXCL_RIP_63_EQ_1 (1ULL << 2) 696 + #define IBS_OP_2_STRM_ST_FILTER (1ULL << 3) 697 + #define IBS_OP_2_STRM_ST_FILTER_SHIFT (3) 689 698 690 699 #ifdef CONFIG_X86_LOCAL_APIC 691 700 extern u32 get_ibs_caps(void);
+21 -22
kernel/events/core.c
··· 5368 5368 5369 5369 5370 5370 static struct perf_ctx_data * 5371 - alloc_perf_ctx_data(struct kmem_cache *ctx_cache, bool global) 5371 + alloc_perf_ctx_data(struct kmem_cache *ctx_cache, bool global, gfp_t gfp_flags) 5372 5372 { 5373 5373 struct perf_ctx_data *cd; 5374 5374 5375 - cd = kzalloc_obj(*cd); 5375 + cd = kzalloc_obj(*cd, gfp_flags); 5376 5376 if (!cd) 5377 5377 return NULL; 5378 5378 5379 - cd->data = kmem_cache_zalloc(ctx_cache, GFP_KERNEL); 5379 + cd->data = kmem_cache_zalloc(ctx_cache, gfp_flags); 5380 5380 if (!cd->data) { 5381 5381 kfree(cd); 5382 5382 return NULL; ··· 5410 5410 5411 5411 static int 5412 5412 attach_task_ctx_data(struct task_struct *task, struct kmem_cache *ctx_cache, 5413 - bool global) 5413 + bool global, gfp_t gfp_flags) 5414 5414 { 5415 5415 struct perf_ctx_data *cd, *old = NULL; 5416 5416 5417 - cd = alloc_perf_ctx_data(ctx_cache, global); 5417 + cd = alloc_perf_ctx_data(ctx_cache, global, gfp_flags); 5418 5418 if (!cd) 5419 5419 return -ENOMEM; 5420 5420 ··· 5487 5487 cd = NULL; 5488 5488 } 5489 5489 if (!cd) { 5490 + /* 5491 + * Try to allocate context quickly before 5492 + * traversing the whole thread list again. 5493 + */ 5494 + if (!attach_task_ctx_data(p, ctx_cache, true, GFP_NOWAIT)) 5495 + continue; 5490 5496 get_task_struct(p); 5491 5497 goto alloc; 5492 5498 } ··· 5503 5497 5504 5498 return 0; 5505 5499 alloc: 5506 - ret = attach_task_ctx_data(p, ctx_cache, true); 5500 + ret = attach_task_ctx_data(p, ctx_cache, true, GFP_KERNEL); 5507 5501 put_task_struct(p); 5508 5502 if (ret) { 5509 5503 __detach_global_ctx_data(); ··· 5523 5517 return -ENOMEM; 5524 5518 5525 5519 if (task) 5526 - return attach_task_ctx_data(task, ctx_cache, false); 5520 + return attach_task_ctx_data(task, ctx_cache, false, GFP_KERNEL); 5527 5521 5528 5522 ret = attach_global_ctx_data(ctx_cache); 5529 5523 if (ret) ··· 5558 5552 struct task_struct *g, *p; 5559 5553 struct perf_ctx_data *cd; 5560 5554 5561 - again: 5562 5555 scoped_guard (rcu) { 5563 5556 for_each_process_thread(g, p) { 5564 5557 cd = rcu_dereference(p->perf_ctx_data); 5565 - if (!cd || !cd->global) 5566 - continue; 5567 - cd->global = 0; 5568 - get_task_struct(p); 5569 - goto detach; 5558 + if (cd && cd->global) { 5559 + cd->global = 0; 5560 + detach_task_ctx_data(p); 5561 + } 5570 5562 } 5571 5563 } 5572 - return; 5573 - detach: 5574 - detach_task_ctx_data(p); 5575 - put_task_struct(p); 5576 - goto again; 5577 5564 } 5578 5565 5579 5566 static void detach_global_ctx_data(void) ··· 8419 8420 pte_t *ptep, pte; 8420 8421 8421 8422 pgdp = pgd_offset(mm, addr); 8422 - pgd = READ_ONCE(*pgdp); 8423 + pgd = pgdp_get(pgdp); 8423 8424 if (pgd_none(pgd)) 8424 8425 return 0; 8425 8426 ··· 8427 8428 return pgd_leaf_size(pgd); 8428 8429 8429 8430 p4dp = p4d_offset_lockless(pgdp, pgd, addr); 8430 - p4d = READ_ONCE(*p4dp); 8431 + p4d = p4dp_get(p4dp); 8431 8432 if (!p4d_present(p4d)) 8432 8433 return 0; 8433 8434 ··· 8435 8436 return p4d_leaf_size(p4d); 8436 8437 8437 8438 pudp = pud_offset_lockless(p4dp, p4d, addr); 8438 - pud = READ_ONCE(*pudp); 8439 + pud = pudp_get(pudp); 8439 8440 if (!pud_present(pud)) 8440 8441 return 0; 8441 8442 ··· 9237 9238 9238 9239 return; 9239 9240 attach: 9240 - attach_task_ctx_data(child, ctx_cache, true); 9241 + attach_task_ctx_data(child, ctx_cache, true, GFP_KERNEL); 9241 9242 } 9242 9243 9243 9244 void perf_event_fork(struct task_struct *task)
+1 -1
tools/arch/x86/include/asm/amd/ibs.h
··· 77 77 __u64 val; 78 78 struct { 79 79 __u64 comp_to_ret_ctr:16, /* 0-15: op completion to retire count */ 80 - tag_to_ret_ctr:16, /* 15-31: op tag to retire count */ 80 + tag_to_ret_ctr:16, /* 16-31: op tag to retire count */ 81 81 reserved1:2, /* 32-33: reserved */ 82 82 op_return:1, /* 34: return op */ 83 83 op_brn_taken:1, /* 35: taken branch op */