Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
"ARM:

- Correctly save/restore PMUSERNR_EL0 when host userspace is using
PMU counters directly

- Fix GICv2 emulation on GICv3 after the locking rework

- Don't use smp_processor_id() in kvm_pmu_probe_armpmu(), and
document why

Generic:

- Avoid setting page table entries pointing to a deleted memslot if a
host page table entry is changed concurrently with the deletion"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: Avoid illegal stage2 mapping on invalid memory slot
KVM: arm64: Use raw_smp_processor_id() in kvm_pmu_probe_armpmu()
KVM: arm64: Restore GICv2-on-GICv3 functionality
KVM: arm64: PMU: Don't overwrite PMUSERENR with vcpu loaded
KVM: arm64: PMU: Restore the host's PMUSERENR_EL0

+129 -11
+5
arch/arm/include/asm/arm_pmuv3.h
··· 222 222 return false; 223 223 } 224 224 225 + static inline bool kvm_set_pmuserenr(u64 val) 226 + { 227 + return false; 228 + } 229 + 225 230 /* PMU Version in DFR Register */ 226 231 #define ARMV8_PMU_DFR_VER_NI 0 227 232 #define ARMV8_PMU_DFR_VER_V3P4 0x5
+7
arch/arm64/include/asm/kvm_host.h
··· 699 699 #define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4)) 700 700 /* Software step state is Active-pending */ 701 701 #define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5)) 702 + /* PMUSERENR for the guest EL0 is on physical CPU */ 703 + #define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(6)) 702 704 703 705 704 706 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ ··· 1067 1065 #ifdef CONFIG_KVM 1068 1066 void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr); 1069 1067 void kvm_clr_pmu_events(u32 clr); 1068 + bool kvm_set_pmuserenr(u64 val); 1070 1069 #else 1071 1070 static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} 1072 1071 static inline void kvm_clr_pmu_events(u32 clr) {} 1072 + static inline bool kvm_set_pmuserenr(u64 val) 1073 + { 1074 + return false; 1075 + } 1073 1076 #endif 1074 1077 1075 1078 void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu);
+13 -2
arch/arm64/kvm/hyp/include/hyp/switch.h
··· 82 82 * EL1 instead of being trapped to EL2. 83 83 */ 84 84 if (kvm_arm_support_pmu_v3()) { 85 + struct kvm_cpu_context *hctxt; 86 + 85 87 write_sysreg(0, pmselr_el0); 88 + 89 + hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; 90 + ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0); 86 91 write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); 92 + vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); 87 93 } 88 94 89 95 vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2); ··· 112 106 write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2); 113 107 114 108 write_sysreg(0, hstr_el2); 115 - if (kvm_arm_support_pmu_v3()) 116 - write_sysreg(0, pmuserenr_el0); 109 + if (kvm_arm_support_pmu_v3()) { 110 + struct kvm_cpu_context *hctxt; 111 + 112 + hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; 113 + write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); 114 + vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU); 115 + } 117 116 118 117 if (cpus_have_final_cap(ARM64_SME)) { 119 118 sysreg_clear_set_s(SYS_HFGRTR_EL2, 0,
+14
arch/arm64/kvm/hyp/vhe/switch.c
··· 92 92 } 93 93 NOKPROBE_SYMBOL(__deactivate_traps); 94 94 95 + /* 96 + * Disable IRQs in {activate,deactivate}_traps_vhe_{load,put}() to 97 + * prevent a race condition between context switching of PMUSERENR_EL0 98 + * in __{activate,deactivate}_traps_common() and IPIs that attempts to 99 + * update PMUSERENR_EL0. See also kvm_set_pmuserenr(). 100 + */ 95 101 void activate_traps_vhe_load(struct kvm_vcpu *vcpu) 96 102 { 103 + unsigned long flags; 104 + 105 + local_irq_save(flags); 97 106 __activate_traps_common(vcpu); 107 + local_irq_restore(flags); 98 108 } 99 109 100 110 void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu) 101 111 { 112 + unsigned long flags; 113 + 114 + local_irq_save(flags); 102 115 __deactivate_traps_common(vcpu); 116 + local_irq_restore(flags); 103 117 } 104 118 105 119 static const exit_handler_fn hyp_exit_handlers[] = {
+19 -1
arch/arm64/kvm/pmu-emul.c
··· 700 700 701 701 mutex_lock(&arm_pmus_lock); 702 702 703 - cpu = smp_processor_id(); 703 + /* 704 + * It is safe to use a stale cpu to iterate the list of PMUs so long as 705 + * the same value is used for the entirety of the loop. Given this, and 706 + * the fact that no percpu data is used for the lookup there is no need 707 + * to disable preemption. 708 + * 709 + * It is still necessary to get a valid cpu, though, to probe for the 710 + * default PMU instance as userspace is not required to specify a PMU 711 + * type. In order to uphold the preexisting behavior KVM selects the 712 + * PMU instance for the core where the first call to the 713 + * KVM_ARM_VCPU_PMU_V3_CTRL attribute group occurs. A dependent use case 714 + * would be a user with disdain of all things big.LITTLE that affines 715 + * the VMM to a particular cluster of cores. 716 + * 717 + * In any case, userspace should just do the sane thing and use the UAPI 718 + * to select a PMU type directly. But, be wary of the baggage being 719 + * carried here. 720 + */ 721 + cpu = raw_smp_processor_id(); 704 722 list_for_each_entry(entry, &arm_pmus, entry) { 705 723 tmp = entry->arm_pmu; 706 724
+27
arch/arm64/kvm/pmu.c
··· 209 209 kvm_vcpu_pmu_enable_el0(events_host); 210 210 kvm_vcpu_pmu_disable_el0(events_guest); 211 211 } 212 + 213 + /* 214 + * With VHE, keep track of the PMUSERENR_EL0 value for the host EL0 on the pCPU 215 + * where PMUSERENR_EL0 for the guest is loaded, since PMUSERENR_EL0 is switched 216 + * to the value for the guest on vcpu_load(). The value for the host EL0 217 + * will be restored on vcpu_put(), before returning to userspace. 218 + * This isn't necessary for nVHE, as the register is context switched for 219 + * every guest enter/exit. 220 + * 221 + * Return true if KVM takes care of the register. Otherwise return false. 222 + */ 223 + bool kvm_set_pmuserenr(u64 val) 224 + { 225 + struct kvm_cpu_context *hctxt; 226 + struct kvm_vcpu *vcpu; 227 + 228 + if (!kvm_arm_support_pmu_v3() || !has_vhe()) 229 + return false; 230 + 231 + vcpu = kvm_get_running_vcpu(); 232 + if (!vcpu || !vcpu_get_flag(vcpu, PMUSERENR_ON_CPU)) 233 + return false; 234 + 235 + hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; 236 + ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val; 237 + return true; 238 + }
+7 -4
arch/arm64/kvm/vgic/vgic-init.c
··· 446 446 int kvm_vgic_map_resources(struct kvm *kvm) 447 447 { 448 448 struct vgic_dist *dist = &kvm->arch.vgic; 449 + enum vgic_type type; 449 450 gpa_t dist_base; 450 451 int ret = 0; 451 452 ··· 461 460 if (!irqchip_in_kernel(kvm)) 462 461 goto out; 463 462 464 - if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) 463 + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) { 465 464 ret = vgic_v2_map_resources(kvm); 466 - else 465 + type = VGIC_V2; 466 + } else { 467 467 ret = vgic_v3_map_resources(kvm); 468 + type = VGIC_V3; 469 + } 468 470 469 471 if (ret) { 470 472 __kvm_vgic_destroy(kvm); ··· 477 473 dist_base = dist->vgic_dist_base; 478 474 mutex_unlock(&kvm->arch.config_lock); 479 475 480 - ret = vgic_register_dist_iodev(kvm, dist_base, 481 - kvm_vgic_global_state.type); 476 + ret = vgic_register_dist_iodev(kvm, dist_base, type); 482 477 if (ret) { 483 478 kvm_err("Unable to register VGIC dist MMIO regions\n"); 484 479 kvm_vgic_destroy(kvm);
+18 -3
drivers/perf/arm_pmuv3.c
··· 677 677 return value; 678 678 } 679 679 680 + static void update_pmuserenr(u64 val) 681 + { 682 + lockdep_assert_irqs_disabled(); 683 + 684 + /* 685 + * The current PMUSERENR_EL0 value might be the value for the guest. 686 + * If that's the case, have KVM keep tracking of the register value 687 + * for the host EL0 so that KVM can restore it before returning to 688 + * the host EL0. Otherwise, update the register now. 689 + */ 690 + if (kvm_set_pmuserenr(val)) 691 + return; 692 + 693 + write_pmuserenr(val); 694 + } 695 + 680 696 static void armv8pmu_disable_user_access(void) 681 697 { 682 - write_pmuserenr(0); 698 + update_pmuserenr(0); 683 699 } 684 700 685 701 static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) ··· 711 695 armv8pmu_write_evcntr(i, 0); 712 696 } 713 697 714 - write_pmuserenr(0); 715 - write_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR); 698 + update_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR); 716 699 } 717 700 718 701 static void armv8pmu_enable_event(struct perf_event *event)
+19 -1
virt/kvm/kvm_main.c
··· 686 686 687 687 return __kvm_handle_hva_range(kvm, &range); 688 688 } 689 + 690 + static bool kvm_change_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) 691 + { 692 + /* 693 + * Skipping invalid memslots is correct if and only change_pte() is 694 + * surrounded by invalidate_range_{start,end}(), which is currently 695 + * guaranteed by the primary MMU. If that ever changes, KVM needs to 696 + * unmap the memslot instead of skipping the memslot to ensure that KVM 697 + * doesn't hold references to the old PFN. 698 + */ 699 + WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count)); 700 + 701 + if (range->slot->flags & KVM_MEMSLOT_INVALID) 702 + return false; 703 + 704 + return kvm_set_spte_gfn(kvm, range); 705 + } 706 + 689 707 static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, 690 708 struct mm_struct *mm, 691 709 unsigned long address, ··· 725 707 if (!READ_ONCE(kvm->mmu_invalidate_in_progress)) 726 708 return; 727 709 728 - kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn); 710 + kvm_handle_hva_range(mn, address, address + 1, pte, kvm_change_spte_gfn); 729 711 } 730 712 731 713 void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,