Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf/x86/core: Register a new vector for handling mediated guest PMIs

Wire up system vector 0xf5 for handling PMIs (i.e. interrupts delivered
through the LVTPC) while running KVM guests with a mediated PMU. Perf
currently delivers all PMIs as NMIs, e.g. so that events that trigger while
IRQs are disabled aren't delayed and generate useless records, but due to
the multiplexing of NMIs throughout the system, correctly identifying NMIs
for a mediated PMU is practically infeasible.

To (greatly) simplify identifying guest mediated PMU PMIs, perf will
switch the CPU's LVTPC between PERF_GUEST_MEDIATED_PMI_VECTOR and NMI when
guest PMU context is loaded/put. I.e. PMIs that are generated by the CPU
while the guest is active will be identified purely based on the IRQ
vector.

Route the vector through perf, e.g. as opposed to letting KVM attach a
handler directly a la posted interrupt notification vectors, as perf owns
the LVTPC and thus is the rightful owner of PERF_GUEST_MEDIATED_PMI_VECTOR.
Functionally, having KVM directly own the vector would be fine (both KVM
and perf will be completely aware of when a mediated PMU is active), but
would lead to an undesirable split in ownership: perf would be responsible
for installing the vector, but not handling the resulting IRQs.

Add a new perf_guest_info_callbacks hook (and static call) to allow KVM to
register its handler with perf when running guests with mediated PMUs.

Note, because KVM always runs guests with host IRQs enabled, there is no
danger of a PMI being delayed from the guest's perspective due to using a
regular IRQ instead of an NMI.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Xudong Hao <xudong.hao@intel.com>
Link: https://patch.msgid.link/20251206001720.468579-9-seanjc@google.com

authored by

Sean Christopherson and committed by
Peter Zijlstra
a05385d8 42457a7f

+55 -4
+1
arch/x86/entry/entry_fred.c
··· 114 114 115 115 SYSVEC(IRQ_WORK_VECTOR, irq_work), 116 116 117 + SYSVEC(PERF_GUEST_MEDIATED_PMI_VECTOR, perf_guest_mediated_pmi_handler), 117 118 SYSVEC(POSTED_INTR_VECTOR, kvm_posted_intr_ipi), 118 119 SYSVEC(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi), 119 120 SYSVEC(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi),
+3
arch/x86/include/asm/hardirq.h
··· 19 19 unsigned int kvm_posted_intr_wakeup_ipis; 20 20 unsigned int kvm_posted_intr_nested_ipis; 21 21 #endif 22 + #ifdef CONFIG_GUEST_PERF_EVENTS 23 + unsigned int perf_guest_mediated_pmis; 24 + #endif 22 25 unsigned int x86_platform_ipis; /* arch dependent */ 23 26 unsigned int apic_perf_irqs; 24 27 unsigned int apic_irq_work_irqs;
+6
arch/x86/include/asm/idtentry.h
··· 746 746 # define fred_sysvec_kvm_posted_intr_nested_ipi NULL 747 747 #endif 748 748 749 + # ifdef CONFIG_GUEST_PERF_EVENTS 750 + DECLARE_IDTENTRY_SYSVEC(PERF_GUEST_MEDIATED_PMI_VECTOR, sysvec_perf_guest_mediated_pmi_handler); 751 + #else 752 + # define fred_sysvec_perf_guest_mediated_pmi_handler NULL 753 + #endif 754 + 749 755 # ifdef CONFIG_X86_POSTED_MSI 750 756 DECLARE_IDTENTRY_SYSVEC(POSTED_MSI_NOTIFICATION_VECTOR, sysvec_posted_msi_notification); 751 757 #else
+3 -1
arch/x86/include/asm/irq_vectors.h
··· 77 77 */ 78 78 #define IRQ_WORK_VECTOR 0xf6 79 79 80 - /* 0xf5 - unused, was UV_BAU_MESSAGE */ 80 + /* IRQ vector for PMIs when running a guest with a mediated PMU. */ 81 + #define PERF_GUEST_MEDIATED_PMI_VECTOR 0xf5 82 + 81 83 #define DEFERRED_ERROR_VECTOR 0xf4 82 84 83 85 /* Vector on which hypervisor callbacks will be delivered */
+3
arch/x86/kernel/idt.c
··· 158 158 INTG(POSTED_INTR_WAKEUP_VECTOR, asm_sysvec_kvm_posted_intr_wakeup_ipi), 159 159 INTG(POSTED_INTR_NESTED_VECTOR, asm_sysvec_kvm_posted_intr_nested_ipi), 160 160 # endif 161 + #ifdef CONFIG_GUEST_PERF_EVENTS 162 + INTG(PERF_GUEST_MEDIATED_PMI_VECTOR, asm_sysvec_perf_guest_mediated_pmi_handler), 163 + #endif 161 164 # ifdef CONFIG_IRQ_WORK 162 165 INTG(IRQ_WORK_VECTOR, asm_sysvec_irq_work), 163 166 # endif
+19
arch/x86/kernel/irq.c
··· 192 192 irq_stats(j)->kvm_posted_intr_wakeup_ipis); 193 193 seq_puts(p, " Posted-interrupt wakeup event\n"); 194 194 #endif 195 + #ifdef CONFIG_GUEST_PERF_EVENTS 196 + seq_printf(p, "%*s: ", prec, "VPMI"); 197 + for_each_online_cpu(j) 198 + seq_printf(p, "%10u ", 199 + irq_stats(j)->perf_guest_mediated_pmis); 200 + seq_puts(p, " Perf Guest Mediated PMI\n"); 201 + #endif 195 202 #ifdef CONFIG_X86_POSTED_MSI 196 203 seq_printf(p, "%*s: ", prec, "PMN"); 197 204 for_each_online_cpu(j) ··· 353 346 x86_platform_ipi_callback(); 354 347 trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR); 355 348 set_irq_regs(old_regs); 349 + } 350 + #endif 351 + 352 + #ifdef CONFIG_GUEST_PERF_EVENTS 353 + /* 354 + * Handler for PERF_GUEST_MEDIATED_PMI_VECTOR. 355 + */ 356 + DEFINE_IDTENTRY_SYSVEC(sysvec_perf_guest_mediated_pmi_handler) 357 + { 358 + apic_eoi(); 359 + inc_irq_stat(perf_guest_mediated_pmis); 360 + perf_guest_handle_mediated_pmi(); 356 361 } 357 362 #endif 358 363
+8
include/linux/perf_event.h
··· 1677 1677 unsigned int (*state)(void); 1678 1678 unsigned long (*get_ip)(void); 1679 1679 unsigned int (*handle_intel_pt_intr)(void); 1680 + 1681 + void (*handle_mediated_pmi)(void); 1680 1682 }; 1681 1683 1682 1684 #ifdef CONFIG_GUEST_PERF_EVENTS ··· 1688 1686 DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state); 1689 1687 DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip); 1690 1688 DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr); 1689 + DECLARE_STATIC_CALL(__perf_guest_handle_mediated_pmi, *perf_guest_cbs->handle_mediated_pmi); 1691 1690 1692 1691 static inline unsigned int perf_guest_state(void) 1693 1692 { ··· 1703 1700 static inline unsigned int perf_guest_handle_intel_pt_intr(void) 1704 1701 { 1705 1702 return static_call(__perf_guest_handle_intel_pt_intr)(); 1703 + } 1704 + 1705 + static inline void perf_guest_handle_mediated_pmi(void) 1706 + { 1707 + static_call(__perf_guest_handle_mediated_pmi)(); 1706 1708 } 1707 1709 1708 1710 extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
+7 -2
kernel/events/core.c
··· 7644 7644 DEFINE_STATIC_CALL_RET0(__perf_guest_state, *perf_guest_cbs->state); 7645 7645 DEFINE_STATIC_CALL_RET0(__perf_guest_get_ip, *perf_guest_cbs->get_ip); 7646 7646 DEFINE_STATIC_CALL_RET0(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr); 7647 + DEFINE_STATIC_CALL_RET0(__perf_guest_handle_mediated_pmi, *perf_guest_cbs->handle_mediated_pmi); 7647 7648 7648 7649 void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) 7649 7650 { ··· 7659 7658 if (cbs->handle_intel_pt_intr) 7660 7659 static_call_update(__perf_guest_handle_intel_pt_intr, 7661 7660 cbs->handle_intel_pt_intr); 7661 + 7662 + if (cbs->handle_mediated_pmi) 7663 + static_call_update(__perf_guest_handle_mediated_pmi, 7664 + cbs->handle_mediated_pmi); 7662 7665 } 7663 7666 EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks); 7664 7667 ··· 7674 7669 rcu_assign_pointer(perf_guest_cbs, NULL); 7675 7670 static_call_update(__perf_guest_state, (void *)&__static_call_return0); 7676 7671 static_call_update(__perf_guest_get_ip, (void *)&__static_call_return0); 7677 - static_call_update(__perf_guest_handle_intel_pt_intr, 7678 - (void *)&__static_call_return0); 7672 + static_call_update(__perf_guest_handle_intel_pt_intr, (void *)&__static_call_return0); 7673 + static_call_update(__perf_guest_handle_mediated_pmi, (void *)&__static_call_return0); 7679 7674 synchronize_rcu(); 7680 7675 } 7681 7676 EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
+2 -1
tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h
··· 77 77 */ 78 78 #define IRQ_WORK_VECTOR 0xf6 79 79 80 - /* 0xf5 - unused, was UV_BAU_MESSAGE */ 80 + #define PERF_GUEST_MEDIATED_PMI_VECTOR 0xf5 81 + 81 82 #define DEFERRED_ERROR_VECTOR 0xf4 82 83 83 84 /* Vector on which hypervisor callbacks will be delivered */
+3
virt/kvm/kvm_main.c
··· 6467 6467 .state = kvm_guest_state, 6468 6468 .get_ip = kvm_guest_get_ip, 6469 6469 .handle_intel_pt_intr = NULL, 6470 + .handle_mediated_pmi = NULL, 6470 6471 }; 6471 6472 6472 6473 void kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void)) 6473 6474 { 6474 6475 kvm_guest_cbs.handle_intel_pt_intr = pt_intr_handler; 6476 + kvm_guest_cbs.handle_mediated_pmi = NULL; 6477 + 6475 6478 perf_register_guest_info_callbacks(&kvm_guest_cbs); 6476 6479 } 6477 6480 void kvm_unregister_perf_callbacks(void)