Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'perf_core_for_v5.17_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Borislav Petkov:
"Cleanup of the perf/kvm interaction."

* tag 'perf_core_for_v5.17_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf: Drop guest callback (un)register stubs
KVM: arm64: Drop perf.c and fold its tiny bits of code into arm.c
KVM: arm64: Hide kvm_arm_pmu_available behind CONFIG_HW_PERF_EVENTS=y
KVM: arm64: Convert to the generic perf callbacks
KVM: x86: Move Intel Processor Trace interrupt handler to vmx.c
KVM: Move x86's perf guest info callbacks to generic KVM
KVM: x86: More precisely identify NMI from guest when handling PMI
KVM: x86: Drop current_vcpu for kvm_running_vcpu + kvm_arch_vcpu variable
perf/core: Use static_call to optimize perf_guest_info_callbacks
perf: Force architectures to opt-in to guest callbacks
perf: Add wrappers for invoking guest callbacks
perf/core: Rework guest callbacks to prepare for static_call support
perf: Drop dead and useless guest "support" from arm, csky, nds32 and riscv
perf: Stop pretending that perf can handle multiple guest callbacks
KVM: x86: Register Processor Trace interrupt hook iff PT enabled in guest
KVM: x86: Register perf callbacks after calling vendor's hardware_setup()
perf: Protect perf_guest_cbs with RCU

+248 -256
+4 -24
arch/arm/kernel/perf_callchain.c
··· 64 64 { 65 65 struct frame_tail __user *tail; 66 66 67 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 68 - /* We don't support guest os callchain now */ 69 - return; 70 - } 71 - 72 67 perf_callchain_store(entry, regs->ARM_pc); 73 68 74 69 if (!current->mm) ··· 95 100 { 96 101 struct stackframe fr; 97 102 98 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 99 - /* We don't support guest os callchain now */ 100 - return; 101 - } 102 - 103 103 arm_get_current_stackframe(regs, &fr); 104 104 walk_stackframe(&fr, callchain_trace, entry); 105 105 } 106 106 107 107 unsigned long perf_instruction_pointer(struct pt_regs *regs) 108 108 { 109 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) 110 - return perf_guest_cbs->get_guest_ip(); 111 - 112 109 return instruction_pointer(regs); 113 110 } 114 111 ··· 108 121 { 109 122 int misc = 0; 110 123 111 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 112 - if (perf_guest_cbs->is_user_mode()) 113 - misc |= PERF_RECORD_MISC_GUEST_USER; 114 - else 115 - misc |= PERF_RECORD_MISC_GUEST_KERNEL; 116 - } else { 117 - if (user_mode(regs)) 118 - misc |= PERF_RECORD_MISC_USER; 119 - else 120 - misc |= PERF_RECORD_MISC_KERNEL; 121 - } 124 + if (user_mode(regs)) 125 + misc |= PERF_RECORD_MISC_USER; 126 + else 127 + misc |= PERF_RECORD_MISC_KERNEL; 122 128 123 129 return misc; 124 130 }
+9 -2
arch/arm64/include/asm/kvm_host.h
··· 675 675 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu); 676 676 int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa); 677 677 678 - int kvm_perf_init(void); 679 - int kvm_perf_teardown(void); 678 + /* 679 + * Returns true if a Performance Monitoring Interrupt (PMI), a.k.a. perf event, 680 + * arrived in guest context. For arm64, any event that arrives while a vCPU is 681 + * loaded is considered to be "in guest". 682 + */ 683 + static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu) 684 + { 685 + return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu; 686 + } 680 687 681 688 long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu); 682 689 gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu);
+2
arch/arm64/kernel/image-vars.h
··· 102 102 KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base); 103 103 104 104 /* PMU available static key */ 105 + #ifdef CONFIG_HW_PERF_EVENTS 105 106 KVM_NVHE_ALIAS(kvm_arm_pmu_available); 107 + #endif 106 108 107 109 /* Position-independent library routines */ 108 110 KVM_NVHE_ALIAS_HYP(clear_page, __pi_clear_page);
+7 -6
arch/arm64/kernel/perf_callchain.c
··· 102 102 void perf_callchain_user(struct perf_callchain_entry_ctx *entry, 103 103 struct pt_regs *regs) 104 104 { 105 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 105 + if (perf_guest_state()) { 106 106 /* We don't support guest os callchain now */ 107 107 return; 108 108 } ··· 141 141 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, 142 142 struct pt_regs *regs) 143 143 { 144 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 144 + if (perf_guest_state()) { 145 145 /* We don't support guest os callchain now */ 146 146 return; 147 147 } ··· 151 151 152 152 unsigned long perf_instruction_pointer(struct pt_regs *regs) 153 153 { 154 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) 155 - return perf_guest_cbs->get_guest_ip(); 154 + if (perf_guest_state()) 155 + return perf_guest_get_ip(); 156 156 157 157 return instruction_pointer(regs); 158 158 } 159 159 160 160 unsigned long perf_misc_flags(struct pt_regs *regs) 161 161 { 162 + unsigned int guest_state = perf_guest_state(); 162 163 int misc = 0; 163 164 164 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 165 - if (perf_guest_cbs->is_user_mode()) 165 + if (guest_state) { 166 + if (guest_state & PERF_GUEST_USER) 166 167 misc |= PERF_RECORD_MISC_GUEST_USER; 167 168 else 168 169 misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+1
arch/arm64/kvm/Kconfig
··· 39 39 select HAVE_KVM_IRQ_BYPASS 40 40 select HAVE_KVM_VCPU_RUN_PID_CHANGE 41 41 select SCHED_INFO 42 + select GUEST_PERF_EVENTS if PERF_EVENTS 42 43 help 43 44 Support hosting virtualized guest machines. 44 45
+1 -1
arch/arm64/kvm/Makefile
··· 12 12 13 13 kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ 14 14 $(KVM)/vfio.o $(KVM)/irqchip.o $(KVM)/binary_stats.o \ 15 - arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \ 15 + arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ 16 16 inject_fault.o va_layout.o handle_exit.o \ 17 17 guest.o debug.o reset.o sys_regs.o \ 18 18 vgic-sys-reg-v3.o fpsimd.o pmu.o \
+10 -2
arch/arm64/kvm/arm.c
··· 503 503 return vcpu_mode_priv(vcpu); 504 504 } 505 505 506 + #ifdef CONFIG_GUEST_PERF_EVENTS 507 + unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) 508 + { 509 + return *vcpu_pc(vcpu); 510 + } 511 + #endif 512 + 506 513 /* Just ensure a guest exit from a particular CPU */ 507 514 static void exit_vm_noop(void *info) 508 515 { ··· 1782 1775 if (err) 1783 1776 goto out; 1784 1777 1785 - kvm_perf_init(); 1778 + kvm_register_perf_callbacks(NULL); 1779 + 1786 1780 kvm_sys_reg_table_init(); 1787 1781 1788 1782 out: ··· 2171 2163 /* NOP: Compiling as a module not supported */ 2172 2164 void kvm_arch_exit(void) 2173 2165 { 2174 - kvm_perf_teardown(); 2166 + kvm_unregister_perf_callbacks(); 2175 2167 } 2176 2168 2177 2169 static int __init early_kvm_mode_cfg(char *arg)
-59
arch/arm64/kvm/perf.c
··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* 3 - * Based on the x86 implementation. 4 - * 5 - * Copyright (C) 2012 ARM Ltd. 6 - * Author: Marc Zyngier <marc.zyngier@arm.com> 7 - */ 8 - 9 - #include <linux/perf_event.h> 10 - #include <linux/kvm_host.h> 11 - 12 - #include <asm/kvm_emulate.h> 13 - 14 - DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); 15 - 16 - static int kvm_is_in_guest(void) 17 - { 18 - return kvm_get_running_vcpu() != NULL; 19 - } 20 - 21 - static int kvm_is_user_mode(void) 22 - { 23 - struct kvm_vcpu *vcpu; 24 - 25 - vcpu = kvm_get_running_vcpu(); 26 - 27 - if (vcpu) 28 - return !vcpu_mode_priv(vcpu); 29 - 30 - return 0; 31 - } 32 - 33 - static unsigned long kvm_get_guest_ip(void) 34 - { 35 - struct kvm_vcpu *vcpu; 36 - 37 - vcpu = kvm_get_running_vcpu(); 38 - 39 - if (vcpu) 40 - return *vcpu_pc(vcpu); 41 - 42 - return 0; 43 - } 44 - 45 - static struct perf_guest_info_callbacks kvm_guest_cbs = { 46 - .is_in_guest = kvm_is_in_guest, 47 - .is_user_mode = kvm_is_user_mode, 48 - .get_guest_ip = kvm_get_guest_ip, 49 - }; 50 - 51 - int kvm_perf_init(void) 52 - { 53 - return perf_register_guest_info_callbacks(&kvm_guest_cbs); 54 - } 55 - 56 - int kvm_perf_teardown(void) 57 - { 58 - return perf_unregister_guest_info_callbacks(&kvm_guest_cbs); 59 - }
+2
arch/arm64/kvm/pmu-emul.c
··· 14 14 #include <kvm/arm_pmu.h> 15 15 #include <kvm/arm_vgic.h> 16 16 17 + DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); 18 + 17 19 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); 18 20 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); 19 21 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
-10
arch/csky/kernel/perf_callchain.c
··· 88 88 { 89 89 unsigned long fp = 0; 90 90 91 - /* C-SKY does not support virtualization. */ 92 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) 93 - return; 94 - 95 91 fp = regs->regs[4]; 96 92 perf_callchain_store(entry, regs->pc); 97 93 ··· 107 111 struct pt_regs *regs) 108 112 { 109 113 struct stackframe fr; 110 - 111 - /* C-SKY does not support virtualization. */ 112 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 113 - pr_warn("C-SKY does not support perf in guest mode!"); 114 - return; 115 - } 116 114 117 115 fr.fp = regs->regs[4]; 118 116 fr.lr = regs->lr;
+4 -25
arch/nds32/kernel/perf_event_cpu.c
··· 1371 1371 1372 1372 leaf_fp = 0; 1373 1373 1374 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 1375 - /* We don't support guest os callchain now */ 1376 - return; 1377 - } 1378 - 1379 1374 perf_callchain_store(entry, regs->ipc); 1380 1375 fp = regs->fp; 1381 1376 gp = regs->gp; ··· 1476 1481 { 1477 1482 struct stackframe fr; 1478 1483 1479 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 1480 - /* We don't support guest os callchain now */ 1481 - return; 1482 - } 1483 1484 fr.fp = regs->fp; 1484 1485 fr.lp = regs->lp; 1485 1486 fr.sp = regs->sp; ··· 1484 1493 1485 1494 unsigned long perf_instruction_pointer(struct pt_regs *regs) 1486 1495 { 1487 - /* However, NDS32 does not support virtualization */ 1488 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) 1489 - return perf_guest_cbs->get_guest_ip(); 1490 - 1491 1496 return instruction_pointer(regs); 1492 1497 } 1493 1498 ··· 1491 1504 { 1492 1505 int misc = 0; 1493 1506 1494 - /* However, NDS32 does not support virtualization */ 1495 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 1496 - if (perf_guest_cbs->is_user_mode()) 1497 - misc |= PERF_RECORD_MISC_GUEST_USER; 1498 - else 1499 - misc |= PERF_RECORD_MISC_GUEST_KERNEL; 1500 - } else { 1501 - if (user_mode(regs)) 1502 - misc |= PERF_RECORD_MISC_USER; 1503 - else 1504 - misc |= PERF_RECORD_MISC_KERNEL; 1505 - } 1507 + if (user_mode(regs)) 1508 + misc |= PERF_RECORD_MISC_USER; 1509 + else 1510 + misc |= PERF_RECORD_MISC_KERNEL; 1506 1511 1507 1512 return misc; 1508 1513 }
-10
arch/riscv/kernel/perf_callchain.c
··· 58 58 { 59 59 unsigned long fp = 0; 60 60 61 - /* RISC-V does not support perf in guest mode. */ 62 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) 63 - return; 64 - 65 61 fp = regs->s0; 66 62 perf_callchain_store(entry, regs->epc); 67 63 ··· 74 78 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, 75 79 struct pt_regs *regs) 76 80 { 77 - /* RISC-V does not support perf in guest mode. */ 78 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 79 - pr_warn("RISC-V does not support perf in guest mode!"); 80 - return; 81 - } 82 - 83 81 walk_stackframe(NULL, regs, fill_callchain, entry); 84 82 }
+7 -6
arch/x86/events/core.c
··· 2771 2771 struct unwind_state state; 2772 2772 unsigned long addr; 2773 2773 2774 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 2774 + if (perf_guest_state()) { 2775 2775 /* TODO: We don't support guest os callchain now */ 2776 2776 return; 2777 2777 } ··· 2874 2874 struct stack_frame frame; 2875 2875 const struct stack_frame __user *fp; 2876 2876 2877 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 2877 + if (perf_guest_state()) { 2878 2878 /* TODO: We don't support guest os callchain now */ 2879 2879 return; 2880 2880 } ··· 2951 2951 2952 2952 unsigned long perf_instruction_pointer(struct pt_regs *regs) 2953 2953 { 2954 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) 2955 - return perf_guest_cbs->get_guest_ip(); 2954 + if (perf_guest_state()) 2955 + return perf_guest_get_ip(); 2956 2956 2957 2957 return regs->ip + code_segment_base(regs); 2958 2958 } 2959 2959 2960 2960 unsigned long perf_misc_flags(struct pt_regs *regs) 2961 2961 { 2962 + unsigned int guest_state = perf_guest_state(); 2962 2963 int misc = 0; 2963 2964 2964 - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 2965 - if (perf_guest_cbs->is_user_mode()) 2965 + if (guest_state) { 2966 + if (guest_state & PERF_GUEST_USER) 2966 2967 misc |= PERF_RECORD_MISC_GUEST_USER; 2967 2968 else 2968 2969 misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+1 -4
arch/x86/events/intel/core.c
··· 2901 2901 */ 2902 2902 if (__test_and_clear_bit(GLOBAL_STATUS_TRACE_TOPAPMI_BIT, (unsigned long *)&status)) { 2903 2903 handled++; 2904 - if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() && 2905 - perf_guest_cbs->handle_intel_pt_intr)) 2906 - perf_guest_cbs->handle_intel_pt_intr(); 2907 - else 2904 + if (!perf_guest_handle_intel_pt_intr()) 2908 2905 intel_pt_interrupt(); 2909 2906 } 2910 2907
+5 -2
arch/x86/include/asm/kvm_host.h
··· 774 774 unsigned nmi_pending; /* NMI queued after currently running handler */ 775 775 bool nmi_injected; /* Trying to inject an NMI this entry */ 776 776 bool smi_pending; /* SMI queued after currently running handler */ 777 + u8 handling_intr_from_guest; 777 778 778 779 struct kvm_mtrr mtrr_state; 779 780 u64 pat; ··· 1520 1519 int (*disabled_by_bios)(void); 1521 1520 int (*check_processor_compatibility)(void); 1522 1521 int (*hardware_setup)(void); 1522 + unsigned int (*handle_intel_pt_intr)(void); 1523 1523 1524 1524 struct kvm_x86_ops *runtime_ops; 1525 1525 }; ··· 1569 1567 else 1570 1568 return -ENOTSUPP; 1571 1569 } 1570 + 1571 + #define kvm_arch_pmi_in_guest(vcpu) \ 1572 + ((vcpu) && (vcpu)->arch.handling_intr_from_guest) 1572 1573 1573 1574 int kvm_mmu_module_init(void); 1574 1575 void kvm_mmu_module_exit(void); ··· 1901 1896 int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu); 1902 1897 int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); 1903 1898 void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu); 1904 - 1905 - int kvm_is_in_guest(void); 1906 1899 1907 1900 void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, 1908 1901 u32 size);
+1
arch/x86/kvm/Kconfig
··· 36 36 select KVM_MMIO 37 37 select SCHED_INFO 38 38 select PERF_EVENTS 39 + select GUEST_PERF_EVENTS 39 40 select HAVE_KVM_MSI 40 41 select HAVE_KVM_CPU_RELAX_INTERCEPT 41 42 select HAVE_KVM_NO_POLL
+1 -1
arch/x86/kvm/pmu.c
··· 87 87 * woken up. So we should wake it, but this is impossible from 88 88 * NMI context. Do it from irq work instead. 89 89 */ 90 - if (!kvm_is_in_guest()) 90 + if (!kvm_handling_nmi_from_guest(pmc->vcpu)) 91 91 irq_work_queue(&pmc_to_pmu(pmc)->irq_work); 92 92 else 93 93 kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
+1 -1
arch/x86/kvm/svm/svm.c
··· 3933 3933 } 3934 3934 3935 3935 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) 3936 - kvm_before_interrupt(vcpu); 3936 + kvm_before_interrupt(vcpu, KVM_HANDLING_NMI); 3937 3937 3938 3938 kvm_load_host_xsave_state(vcpu); 3939 3939 stgi();
+24 -1
arch/x86/kvm/vmx/vmx.c
··· 6344 6344 static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, 6345 6345 unsigned long entry) 6346 6346 { 6347 - kvm_before_interrupt(vcpu); 6347 + bool is_nmi = entry == (unsigned long)asm_exc_nmi_noist; 6348 + 6349 + kvm_before_interrupt(vcpu, is_nmi ? KVM_HANDLING_NMI : KVM_HANDLING_IRQ); 6348 6350 vmx_do_interrupt_nmi_irqoff(entry); 6349 6351 kvm_after_interrupt(vcpu); 6350 6352 } ··· 7695 7693 .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector, 7696 7694 }; 7697 7695 7696 + static unsigned int vmx_handle_intel_pt_intr(void) 7697 + { 7698 + struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); 7699 + 7700 + /* '0' on failure so that the !PT case can use a RET0 static call. */ 7701 + if (!kvm_arch_pmi_in_guest(vcpu)) 7702 + return 0; 7703 + 7704 + kvm_make_request(KVM_REQ_PMI, vcpu); 7705 + __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT, 7706 + (unsigned long *)&vcpu->arch.pmu.global_status); 7707 + return 1; 7708 + } 7709 + 7698 7710 static __init void vmx_setup_user_return_msrs(void) 7699 7711 { 7700 7712 ··· 7734 7718 for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) 7735 7719 kvm_add_user_return_msr(vmx_uret_msrs_list[i]); 7736 7720 } 7721 + 7722 + static struct kvm_x86_init_ops vmx_init_ops __initdata; 7737 7723 7738 7724 static __init int hardware_setup(void) 7739 7725 { ··· 7895 7877 return -EINVAL; 7896 7878 if (!enable_ept || !cpu_has_vmx_intel_pt()) 7897 7879 pt_mode = PT_MODE_SYSTEM; 7880 + if (pt_mode == PT_MODE_HOST_GUEST) 7881 + vmx_init_ops.handle_intel_pt_intr = vmx_handle_intel_pt_intr; 7882 + else 7883 + vmx_init_ops.handle_intel_pt_intr = NULL; 7898 7884 7899 7885 setup_default_sgx_lepubkeyhash(); 7900 7886 ··· 7927 7905 .disabled_by_bios = vmx_disabled_by_bios, 7928 7906 .check_processor_compatibility = vmx_check_processor_compat, 7929 7907 .hardware_setup = hardware_setup, 7908 + .handle_intel_pt_intr = NULL, 7930 7909 7931 7910 .runtime_ops = &vmx_x86_ops, 7932 7911 };
+10 -48
arch/x86/kvm/x86.c
··· 8519 8519 kvmclock_cpu_online, kvmclock_cpu_down_prep); 8520 8520 } 8521 8521 8522 - DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); 8523 - EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu); 8524 - 8525 - int kvm_is_in_guest(void) 8526 - { 8527 - return __this_cpu_read(current_vcpu) != NULL; 8528 - } 8529 - 8530 - static int kvm_is_user_mode(void) 8531 - { 8532 - int user_mode = 3; 8533 - 8534 - if (__this_cpu_read(current_vcpu)) 8535 - user_mode = static_call(kvm_x86_get_cpl)(__this_cpu_read(current_vcpu)); 8536 - 8537 - return user_mode != 0; 8538 - } 8539 - 8540 - static unsigned long kvm_get_guest_ip(void) 8541 - { 8542 - unsigned long ip = 0; 8543 - 8544 - if (__this_cpu_read(current_vcpu)) 8545 - ip = kvm_rip_read(__this_cpu_read(current_vcpu)); 8546 - 8547 - return ip; 8548 - } 8549 - 8550 - static void kvm_handle_intel_pt_intr(void) 8551 - { 8552 - struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu); 8553 - 8554 - kvm_make_request(KVM_REQ_PMI, vcpu); 8555 - __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT, 8556 - (unsigned long *)&vcpu->arch.pmu.global_status); 8557 - } 8558 - 8559 - static struct perf_guest_info_callbacks kvm_guest_cbs = { 8560 - .is_in_guest = kvm_is_in_guest, 8561 - .is_user_mode = kvm_is_user_mode, 8562 - .get_guest_ip = kvm_get_guest_ip, 8563 - .handle_intel_pt_intr = kvm_handle_intel_pt_intr, 8564 - }; 8565 - 8566 8522 #ifdef CONFIG_X86_64 8567 8523 static void pvclock_gtod_update_fn(struct work_struct *work) 8568 8524 { ··· 8632 8676 8633 8677 kvm_timer_init(); 8634 8678 8635 - perf_register_guest_info_callbacks(&kvm_guest_cbs); 8636 - 8637 8679 if (boot_cpu_has(X86_FEATURE_XSAVE)) { 8638 8680 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 8639 8681 supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0; ··· 8663 8709 clear_hv_tscchange_cb(); 8664 8710 #endif 8665 8711 kvm_lapic_exit(); 8666 - perf_unregister_guest_info_callbacks(&kvm_guest_cbs); 8667 8712 8668 8713 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 8669 8714 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, ··· 9889 9936 * interrupts on processors that implement an interrupt shadow, the 9890 9937 * stat.exits increment will do nicely. 9891 9938 */ 9892 - kvm_before_interrupt(vcpu); 9939 + kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ); 9893 9940 local_irq_enable(); 9894 9941 ++vcpu->stat.exits; 9895 9942 local_irq_disable(); ··· 11222 11269 memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops)); 11223 11270 kvm_ops_static_call_update(); 11224 11271 11272 + kvm_register_perf_callbacks(ops->handle_intel_pt_intr); 11273 + 11225 11274 if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) 11226 11275 supported_xss = 0; 11227 11276 ··· 11251 11296 11252 11297 void kvm_arch_hardware_unsetup(void) 11253 11298 { 11299 + kvm_unregister_perf_callbacks(); 11300 + 11254 11301 static_call(kvm_x86_hardware_unsetup)(); 11255 11302 } 11256 11303 ··· 11840 11883 return true; 11841 11884 11842 11885 return vcpu->arch.preempted_in_kernel; 11886 + } 11887 + 11888 + unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) 11889 + { 11890 + return kvm_rip_read(vcpu); 11843 11891 } 11844 11892 11845 11893 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+13 -4
arch/x86/kvm/x86.h
··· 392 392 return kvm->arch.cstate_in_guest; 393 393 } 394 394 395 - DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu); 395 + enum kvm_intr_type { 396 + /* Values are arbitrary, but must be non-zero. */ 397 + KVM_HANDLING_IRQ = 1, 398 + KVM_HANDLING_NMI, 399 + }; 396 400 397 - static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu) 401 + static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu, 402 + enum kvm_intr_type intr) 398 403 { 399 - __this_cpu_write(current_vcpu, vcpu); 404 + WRITE_ONCE(vcpu->arch.handling_intr_from_guest, (u8)intr); 400 405 } 401 406 402 407 static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu) 403 408 { 404 - __this_cpu_write(current_vcpu, NULL); 409 + WRITE_ONCE(vcpu->arch.handling_intr_from_guest, 0); 405 410 } 406 411 412 + static inline bool kvm_handling_nmi_from_guest(struct kvm_vcpu *vcpu) 413 + { 414 + return vcpu->arch.handling_intr_from_guest == KVM_HANDLING_NMI; 415 + } 407 416 408 417 static inline bool kvm_pat_valid(u64 data) 409 418 {
+1
arch/x86/xen/Kconfig
··· 23 23 select PARAVIRT_XXL 24 24 select XEN_HAVE_PVMMU 25 25 select XEN_HAVE_VPMU 26 + select GUEST_PERF_EVENTS 26 27 help 27 28 Support running as a Xen PV guest. 28 29
+13 -19
arch/x86/xen/pmu.c
··· 413 413 } 414 414 415 415 /* perf callbacks */ 416 - static int xen_is_in_guest(void) 416 + static unsigned int xen_guest_state(void) 417 417 { 418 418 const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 419 + unsigned int state = 0; 419 420 420 421 if (!xenpmu_data) { 421 422 pr_warn_once("%s: pmudata not initialized\n", __func__); 422 - return 0; 423 + return state; 423 424 } 424 425 425 426 if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF)) 426 - return 0; 427 + return state; 427 428 428 - return 1; 429 - } 429 + state |= PERF_GUEST_ACTIVE; 430 430 431 - static int xen_is_user_mode(void) 432 - { 433 - const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 434 - 435 - if (!xenpmu_data) { 436 - pr_warn_once("%s: pmudata not initialized\n", __func__); 437 - return 0; 431 + if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV) { 432 + if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER) 433 + state |= PERF_GUEST_USER; 434 + } else if (xenpmu_data->pmu.r.regs.cpl & 3) { 435 + state |= PERF_GUEST_USER; 438 436 } 439 437 440 - if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV) 441 - return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER); 442 - else 443 - return !!(xenpmu_data->pmu.r.regs.cpl & 3); 438 + return state; 444 439 } 445 440 446 441 static unsigned long xen_get_guest_ip(void) ··· 451 456 } 452 457 453 458 static struct perf_guest_info_callbacks xen_guest_cbs = { 454 - .is_in_guest = xen_is_in_guest, 455 - .is_user_mode = xen_is_user_mode, 456 - .get_guest_ip = xen_get_guest_ip, 459 + .state = xen_guest_state, 460 + .get_ip = xen_get_guest_ip, 457 461 }; 458 462 459 463 /* Convert registers from Xen's format to Linux' */
+12 -7
include/kvm/arm_pmu.h
··· 13 13 #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) 14 14 #define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1) 15 15 16 - DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available); 17 - 18 - static __always_inline bool kvm_arm_support_pmu_v3(void) 19 - { 20 - return static_branch_likely(&kvm_arm_pmu_available); 21 - } 22 - 23 16 #ifdef CONFIG_HW_PERF_EVENTS 24 17 25 18 struct kvm_pmc { ··· 28 35 bool irq_level; 29 36 struct irq_work overflow_work; 30 37 }; 38 + 39 + DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available); 40 + 41 + static __always_inline bool kvm_arm_support_pmu_v3(void) 42 + { 43 + return static_branch_likely(&kvm_arm_pmu_available); 44 + } 31 45 32 46 #define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num >= VGIC_NR_SGIS) 33 47 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx); ··· 64 64 #else 65 65 struct kvm_pmu { 66 66 }; 67 + 68 + static inline bool kvm_arm_support_pmu_v3(void) 69 + { 70 + return false; 71 + } 67 72 68 73 #define kvm_arm_pmu_irq_initialized(v) (false) 69 74 static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
+10
include/linux/kvm_host.h
··· 1166 1166 } 1167 1167 #endif 1168 1168 1169 + #ifdef CONFIG_GUEST_PERF_EVENTS 1170 + unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu); 1171 + 1172 + void kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void)); 1173 + void kvm_unregister_perf_callbacks(void); 1174 + #else 1175 + static inline void kvm_register_perf_callbacks(void *ign) {} 1176 + static inline void kvm_unregister_perf_callbacks(void) {} 1177 + #endif /* CONFIG_GUEST_PERF_EVENTS */ 1178 + 1169 1179 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); 1170 1180 void kvm_arch_destroy_vm(struct kvm *kvm); 1171 1181 void kvm_arch_sync_events(struct kvm *kvm);
+32 -12
include/linux/perf_event.h
··· 26 26 # include <asm/local64.h> 27 27 #endif 28 28 29 + #define PERF_GUEST_ACTIVE 0x01 30 + #define PERF_GUEST_USER 0x02 31 + 29 32 struct perf_guest_info_callbacks { 30 - int (*is_in_guest)(void); 31 - int (*is_user_mode)(void); 32 - unsigned long (*get_guest_ip)(void); 33 - void (*handle_intel_pt_intr)(void); 33 + unsigned int (*state)(void); 34 + unsigned long (*get_ip)(void); 35 + unsigned int (*handle_intel_pt_intr)(void); 34 36 }; 35 37 36 38 #ifdef CONFIG_HAVE_HW_BREAKPOINT ··· 1253 1251 enum perf_bpf_event_type type, 1254 1252 u16 flags); 1255 1253 1256 - extern struct perf_guest_info_callbacks *perf_guest_cbs; 1257 - extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); 1258 - extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); 1254 + #ifdef CONFIG_GUEST_PERF_EVENTS 1255 + extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs; 1256 + 1257 + DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state); 1258 + DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip); 1259 + DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr); 1260 + 1261 + static inline unsigned int perf_guest_state(void) 1262 + { 1263 + return static_call(__perf_guest_state)(); 1264 + } 1265 + static inline unsigned long perf_guest_get_ip(void) 1266 + { 1267 + return static_call(__perf_guest_get_ip)(); 1268 + } 1269 + static inline unsigned int perf_guest_handle_intel_pt_intr(void) 1270 + { 1271 + return static_call(__perf_guest_handle_intel_pt_intr)(); 1272 + } 1273 + extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); 1274 + extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); 1275 + #else 1276 + static inline unsigned int perf_guest_state(void) { return 0; } 1277 + static inline unsigned long perf_guest_get_ip(void) { return 0; } 1278 + static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; } 1279 + #endif /* CONFIG_GUEST_PERF_EVENTS */ 1259 1280 1260 1281 extern void perf_event_exec(void); 1261 1282 extern void perf_event_comm(struct task_struct *tsk, bool exec); ··· 1521 1496 perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } 1522 1497 static inline void 1523 1498 perf_bp_event(struct perf_event *event, void *data) { } 1524 - 1525 - static inline int perf_register_guest_info_callbacks 1526 - (struct perf_guest_info_callbacks *callbacks) { return 0; } 1527 - static inline int perf_unregister_guest_info_callbacks 1528 - (struct perf_guest_info_callbacks *callbacks) { return 0; } 1529 1499 1530 1500 static inline void perf_event_mmap(struct vm_area_struct *vma) { } 1531 1501
+4
init/Kconfig
··· 1797 1797 help 1798 1798 See tools/perf/design.txt for details. 1799 1799 1800 + config GUEST_PERF_EVENTS 1801 + bool 1802 + depends on HAVE_PERF_EVENTS 1803 + 1800 1804 config PERF_USE_VMALLOC 1801 1805 bool 1802 1806 help
+29 -12
kernel/events/core.c
··· 6525 6525 perf_swevent_put_recursion_context(rctx); 6526 6526 } 6527 6527 6528 - /* 6529 - * We assume there is only KVM supporting the callbacks. 6530 - * Later on, we might change it to a list if there is 6531 - * another virtualization implementation supporting the callbacks. 6532 - */ 6533 - struct perf_guest_info_callbacks *perf_guest_cbs; 6528 + #ifdef CONFIG_GUEST_PERF_EVENTS 6529 + struct perf_guest_info_callbacks __rcu *perf_guest_cbs; 6534 6530 6535 - int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) 6531 + DEFINE_STATIC_CALL_RET0(__perf_guest_state, *perf_guest_cbs->state); 6532 + DEFINE_STATIC_CALL_RET0(__perf_guest_get_ip, *perf_guest_cbs->get_ip); 6533 + DEFINE_STATIC_CALL_RET0(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr); 6534 + 6535 + void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) 6536 6536 { 6537 - perf_guest_cbs = cbs; 6538 - return 0; 6537 + if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs))) 6538 + return; 6539 + 6540 + rcu_assign_pointer(perf_guest_cbs, cbs); 6541 + static_call_update(__perf_guest_state, cbs->state); 6542 + static_call_update(__perf_guest_get_ip, cbs->get_ip); 6543 + 6544 + /* Implementing ->handle_intel_pt_intr is optional. */ 6545 + if (cbs->handle_intel_pt_intr) 6546 + static_call_update(__perf_guest_handle_intel_pt_intr, 6547 + cbs->handle_intel_pt_intr); 6539 6548 } 6540 6549 EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks); 6541 6550 6542 - int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) 6551 + void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) 6543 6552 { 6544 - perf_guest_cbs = NULL; 6545 - return 0; 6553 + if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs) != cbs)) 6554 + return; 6555 + 6556 + rcu_assign_pointer(perf_guest_cbs, NULL); 6557 + static_call_update(__perf_guest_state, (void *)&__static_call_return0); 6558 + static_call_update(__perf_guest_get_ip, (void *)&__static_call_return0); 6559 + static_call_update(__perf_guest_handle_intel_pt_intr, 6560 + (void *)&__static_call_return0); 6561 + synchronize_rcu(); 6546 6562 } 6547 6563 EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); 6564 + #endif 6548 6565 6549 6566 static void 6550 6567 perf_output_sample_regs(struct perf_output_handle *handle,
+44
virt/kvm/kvm_main.c
··· 5419 5419 return &kvm_running_vcpu; 5420 5420 } 5421 5421 5422 + #ifdef CONFIG_GUEST_PERF_EVENTS 5423 + static unsigned int kvm_guest_state(void) 5424 + { 5425 + struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); 5426 + unsigned int state; 5427 + 5428 + if (!kvm_arch_pmi_in_guest(vcpu)) 5429 + return 0; 5430 + 5431 + state = PERF_GUEST_ACTIVE; 5432 + if (!kvm_arch_vcpu_in_kernel(vcpu)) 5433 + state |= PERF_GUEST_USER; 5434 + 5435 + return state; 5436 + } 5437 + 5438 + static unsigned long kvm_guest_get_ip(void) 5439 + { 5440 + struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); 5441 + 5442 + /* Retrieving the IP must be guarded by a call to kvm_guest_state(). */ 5443 + if (WARN_ON_ONCE(!kvm_arch_pmi_in_guest(vcpu))) 5444 + return 0; 5445 + 5446 + return kvm_arch_vcpu_get_ip(vcpu); 5447 + } 5448 + 5449 + static struct perf_guest_info_callbacks kvm_guest_cbs = { 5450 + .state = kvm_guest_state, 5451 + .get_ip = kvm_guest_get_ip, 5452 + .handle_intel_pt_intr = NULL, 5453 + }; 5454 + 5455 + void kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void)) 5456 + { 5457 + kvm_guest_cbs.handle_intel_pt_intr = pt_intr_handler; 5458 + perf_register_guest_info_callbacks(&kvm_guest_cbs); 5459 + } 5460 + void kvm_unregister_perf_callbacks(void) 5461 + { 5462 + perf_unregister_guest_info_callbacks(&kvm_guest_cbs); 5463 + } 5464 + #endif 5465 + 5422 5466 struct kvm_cpu_compat_check { 5423 5467 void *opaque; 5424 5468 int *ret;