Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
"ARM:
- selftest fix
- force PTE mapping on device pages provided via VFIO
- fix detection of cacheable mapping at S2
- fallback to PMD/PTE mappings for composite huge pages
- fix accounting of Stage-2 PGD allocation
- fix AArch32 handling of some of the debug registers
- simplify host HYP entry
- fix stray pointer conversion on nVHE TLB invalidation
- fix initialization of the nVHE code
- simplify handling of capabilities exposed to HYP
- nuke VCPUs caught using a forbidden AArch32 EL0

x86:
- new nested virtualization selftest
- miscellaneous fixes
- make W=1 fixes
- reserve new CPUID bit in the KVM leaves"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: vmx: remove unused variable
KVM: selftests: Don't require THP to run tests
KVM: VMX: eVMCS: make evmcs_sanitize_exec_ctrls() work again
KVM: selftests: test behavior of unmapped L2 APIC-access address
KVM: x86: Fix NULL dereference at kvm_msr_ignored_check()
KVM: x86: replace static const variables with macros
KVM: arm64: Handle Asymmetric AArch32 systems
arm64: cpufeature: upgrade hyp caps to final
arm64: cpufeature: reorder cpus_have_{const, final}_cap()
KVM: arm64: Factor out is_{vhe,nvhe}_hyp_code()
KVM: arm64: Force PTE mapping on fault resulting in a device mapping
KVM: arm64: Use fallback mapping sizes for contiguous huge page sizes
KVM: arm64: Fix masks in stage2_pte_cacheable()
KVM: arm64: Fix AArch32 handling of DBGD{CCINT,SCRext} and DBGVCR
KVM: arm64: Allocate stage-2 pgd pages with GFP_KERNEL_ACCOUNT
KVM: arm64: Drop useless PAN setting on host EL1 to EL2 transition
KVM: arm64: Remove leftover kern_hyp_va() in nVHE TLB invalidation
KVM: arm64: Don't corrupt tpidr_el2 on failed HVC call
x86/kvm: Reserve KVM_FEATURE_MSI_EXT_DEST_ID

+312 -82
+4
Documentation/virt/kvm/cpuid.rst
··· 92 92 async pf acknowledgment msr 93 93 0x4b564d07. 94 94 95 + KVM_FEATURE_MSI_EXT_DEST_ID 15 guest checks this feature bit 96 + before using extended destination 97 + ID bits in MSI address bits 11-5. 98 + 95 99 KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24 host will warn if no guest-side 96 100 per-cpu warps are expected in 97 101 kvmclock
+38 -16
arch/arm64/include/asm/cpufeature.h
··· 375 375 return false; 376 376 } 377 377 378 + static __always_inline bool is_vhe_hyp_code(void) 379 + { 380 + /* Only defined for code run in VHE hyp context */ 381 + return __is_defined(__KVM_VHE_HYPERVISOR__); 382 + } 383 + 384 + static __always_inline bool is_nvhe_hyp_code(void) 385 + { 386 + /* Only defined for code run in NVHE hyp context */ 387 + return __is_defined(__KVM_NVHE_HYPERVISOR__); 388 + } 389 + 390 + static __always_inline bool is_hyp_code(void) 391 + { 392 + return is_vhe_hyp_code() || is_nvhe_hyp_code(); 393 + } 394 + 378 395 extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); 379 396 extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; 380 397 extern struct static_key_false arm64_const_caps_ready; ··· 445 428 } 446 429 447 430 /* 448 - * Test for a capability, possibly with a runtime check. 449 - * 450 - * Before capabilities are finalized, this behaves as cpus_have_cap(). 451 - * After capabilities are finalized, this is patched to avoid a runtime check. 452 - * 453 - * @num must be a compile-time constant. 454 - */ 455 - static __always_inline bool cpus_have_const_cap(int num) 456 - { 457 - if (system_capabilities_finalized()) 458 - return __cpus_have_const_cap(num); 459 - else 460 - return cpus_have_cap(num); 461 - } 462 - 463 - /* 464 431 * Test for a capability without a runtime check. 465 432 * 466 433 * Before capabilities are finalized, this will BUG(). ··· 458 457 return __cpus_have_const_cap(num); 459 458 else 460 459 BUG(); 460 + } 461 + 462 + /* 463 + * Test for a capability, possibly with a runtime check for non-hyp code. 464 + * 465 + * For hyp code, this behaves the same as cpus_have_final_cap(). 466 + * 467 + * For non-hyp code: 468 + * Before capabilities are finalized, this behaves as cpus_have_cap(). 469 + * After capabilities are finalized, this is patched to avoid a runtime check. 470 + * 471 + * @num must be a compile-time constant. 472 + */ 473 + static __always_inline bool cpus_have_const_cap(int num) 474 + { 475 + if (is_hyp_code()) 476 + return cpus_have_final_cap(num); 477 + else if (system_capabilities_finalized()) 478 + return __cpus_have_const_cap(num); 479 + else 480 + return cpus_have_cap(num); 461 481 } 462 482 463 483 static inline void cpus_set_cap(unsigned int num)
+1
arch/arm64/include/asm/kvm_host.h
··· 239 239 #define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) 240 240 #define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) 241 241 #define cp14_DBGDCCINT (MDCCINT_EL1 * 2) 242 + #define cp14_DBGVCR (DBGVCR32_EL2 * 2) 242 243 243 244 #define NR_COPRO_REGS (NR_SYS_REGS * 2) 244 245
+4 -5
arch/arm64/include/asm/virt.h
··· 86 86 static __always_inline bool has_vhe(void) 87 87 { 88 88 /* 89 - * The following macros are defined for code specic to VHE/nVHE. 90 - * If has_vhe() is inlined into those compilation units, it can 91 - * be determined statically. Otherwise fall back to caps. 89 + * Code only run in VHE/NVHE hyp context can assume VHE is present or 90 + * absent. Otherwise fall back to caps. 92 91 */ 93 - if (__is_defined(__KVM_VHE_HYPERVISOR__)) 92 + if (is_vhe_hyp_code()) 94 93 return true; 95 - else if (__is_defined(__KVM_NVHE_HYPERVISOR__)) 94 + else if (is_nvhe_hyp_code()) 96 95 return false; 97 96 else 98 97 return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN);
-1
arch/arm64/kernel/image-vars.h
··· 87 87 /* Kernel symbols needed for cpus_have_final/const_caps checks. */ 88 88 KVM_NVHE_ALIAS(arm64_const_caps_ready); 89 89 KVM_NVHE_ALIAS(cpu_hwcap_keys); 90 - KVM_NVHE_ALIAS(cpu_hwcaps); 91 90 92 91 /* Static keys which are set if a vGIC trap should be handled in hyp. */ 93 92 KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
+19
arch/arm64/kvm/arm.c
··· 808 808 809 809 preempt_enable(); 810 810 811 + /* 812 + * The ARMv8 architecture doesn't give the hypervisor 813 + * a mechanism to prevent a guest from dropping to AArch32 EL0 814 + * if implemented by the CPU. If we spot the guest in such 815 + * state and that we decided it wasn't supposed to do so (like 816 + * with the asymmetric AArch32 case), return to userspace with 817 + * a fatal error. 818 + */ 819 + if (!system_supports_32bit_el0() && vcpu_mode_is_32bit(vcpu)) { 820 + /* 821 + * As we have caught the guest red-handed, decide that 822 + * it isn't fit for purpose anymore by making the vcpu 823 + * invalid. The VMM can try and fix it by issuing a 824 + * KVM_ARM_VCPU_INIT if it really wants to. 825 + */ 826 + vcpu->arch.target = -1; 827 + ret = ARM_EXCEPTION_IL; 828 + } 829 + 811 830 ret = handle_exit(vcpu, ret); 812 831 } 813 832
-2
arch/arm64/kvm/hyp/nvhe/host.S
··· 17 17 18 18 get_host_ctxt x0, x1 19 19 20 - ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) 21 - 22 20 /* Store the host regs x2 and x3 */ 23 21 stp x2, x3, [x0, #CPU_XREG_OFFSET(2)] 24 22
+15 -6
arch/arm64/kvm/hyp/nvhe/hyp-init.S
··· 57 57 cmp x0, #HVC_STUB_HCALL_NR 58 58 b.lo __kvm_handle_stub_hvc 59 59 60 - /* Set tpidr_el2 for use by HYP to free a register */ 61 - msr tpidr_el2, x2 60 + // We only actively check bits [24:31], and everything 61 + // else has to be zero, which we check at build time. 62 + #if (KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) & 0xFFFFFFFF00FFFFFF) 63 + #error Unexpected __KVM_HOST_SMCCC_FUNC___kvm_hyp_init value 64 + #endif 62 65 63 - mov x2, #KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) 64 - cmp x0, x2 65 - b.eq 1f 66 + ror x0, x0, #24 67 + eor x0, x0, #((KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) >> 24) & 0xF) 68 + ror x0, x0, #4 69 + eor x0, x0, #((KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) >> 28) & 0xF) 70 + cbz x0, 1f 66 71 mov x0, #SMCCC_RET_NOT_SUPPORTED 67 72 eret 68 73 69 - 1: phys_to_ttbr x0, x1 74 + 1: 75 + /* Set tpidr_el2 for use by HYP to free a register */ 76 + msr tpidr_el2, x2 77 + 78 + phys_to_ttbr x0, x1 70 79 alternative_if ARM64_HAS_CNP 71 80 orr x0, x0, #TTBR_CNP_BIT 72 81 alternative_else_nop_endif
-1
arch/arm64/kvm/hyp/nvhe/tlb.c
··· 128 128 struct tlb_inv_context cxt; 129 129 130 130 /* Switch to requested VMID */ 131 - mmu = kern_hyp_va(mmu); 132 131 __tlb_switch_to_guest(mmu, &cxt); 133 132 134 133 __tlbi(vmalle1);
+2 -2
arch/arm64/kvm/hyp/pgtable.c
··· 635 635 636 636 static bool stage2_pte_cacheable(kvm_pte_t pte) 637 637 { 638 - u64 memattr = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR, pte); 638 + u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; 639 639 return memattr == PAGE_S2_MEMATTR(NORMAL); 640 640 } 641 641 ··· 846 846 u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; 847 847 848 848 pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; 849 - pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL | __GFP_ZERO); 849 + pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT | __GFP_ZERO); 850 850 if (!pgt->pgd) 851 851 return -ENOMEM; 852 852
+20 -7
arch/arm64/kvm/mmu.c
··· 787 787 vma_shift = PAGE_SHIFT; 788 788 } 789 789 790 - if (vma_shift == PUD_SHIFT && 791 - !fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE)) 792 - vma_shift = PMD_SHIFT; 793 - 794 - if (vma_shift == PMD_SHIFT && 795 - !fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) { 796 - force_pte = true; 790 + switch (vma_shift) { 791 + case PUD_SHIFT: 792 + if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE)) 793 + break; 794 + fallthrough; 795 + case CONT_PMD_SHIFT: 796 + vma_shift = PMD_SHIFT; 797 + fallthrough; 798 + case PMD_SHIFT: 799 + if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) 800 + break; 801 + fallthrough; 802 + case CONT_PTE_SHIFT: 797 803 vma_shift = PAGE_SHIFT; 804 + force_pte = true; 805 + fallthrough; 806 + case PAGE_SHIFT: 807 + break; 808 + default: 809 + WARN_ONCE(1, "Unknown vma_shift %d", vma_shift); 798 810 } 799 811 800 812 vma_pagesize = 1UL << vma_shift; ··· 851 839 852 840 if (kvm_is_device_pfn(pfn)) { 853 841 device = true; 842 + force_pte = true; 854 843 } else if (logging_active && !write_fault) { 855 844 /* 856 845 * Only actually map the page as writable if this was a write
+3 -3
arch/arm64/kvm/sys_regs.c
··· 1897 1897 { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi }, 1898 1898 DBG_BCR_BVR_WCR_WVR(1), 1899 1899 /* DBGDCCINT */ 1900 - { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 }, 1900 + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32, NULL, cp14_DBGDCCINT }, 1901 1901 /* DBGDSCRext */ 1902 - { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 }, 1902 + { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32, NULL, cp14_DBGDSCRext }, 1903 1903 DBG_BCR_BVR_WCR_WVR(2), 1904 1904 /* DBGDTR[RT]Xint */ 1905 1905 { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi }, ··· 1914 1914 { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi }, 1915 1915 DBG_BCR_BVR_WCR_WVR(6), 1916 1916 /* DBGVCR */ 1917 - { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 }, 1917 + { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32, NULL, cp14_DBGVCR }, 1918 1918 DBG_BCR_BVR_WCR_WVR(7), 1919 1919 DBG_BCR_BVR_WCR_WVR(8), 1920 1920 DBG_BCR_BVR_WCR_WVR(9),
+1
arch/x86/include/uapi/asm/kvm_para.h
··· 32 32 #define KVM_FEATURE_POLL_CONTROL 12 33 33 #define KVM_FEATURE_PV_SCHED_YIELD 13 34 34 #define KVM_FEATURE_ASYNC_PF_INT 14 35 + #define KVM_FEATURE_MSI_EXT_DEST_ID 15 35 36 36 37 #define KVM_HINTS_REALTIME 0 37 38
+5 -5
arch/x86/kvm/mmu/mmu.c
··· 225 225 { 226 226 u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask; 227 227 228 - gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len) 228 + gpa |= (spte >> SHADOW_NONPRESENT_OR_RSVD_MASK_LEN) 229 229 & shadow_nonpresent_or_rsvd_mask; 230 230 231 231 return gpa >> PAGE_SHIFT; ··· 591 591 static u64 restore_acc_track_spte(u64 spte) 592 592 { 593 593 u64 new_spte = spte; 594 - u64 saved_bits = (spte >> shadow_acc_track_saved_bits_shift) 595 - & shadow_acc_track_saved_bits_mask; 594 + u64 saved_bits = (spte >> SHADOW_ACC_TRACK_SAVED_BITS_SHIFT) 595 + & SHADOW_ACC_TRACK_SAVED_BITS_MASK; 596 596 597 597 WARN_ON_ONCE(spte_ad_enabled(spte)); 598 598 WARN_ON_ONCE(!is_access_track_spte(spte)); 599 599 600 600 new_spte &= ~shadow_acc_track_mask; 601 - new_spte &= ~(shadow_acc_track_saved_bits_mask << 602 - shadow_acc_track_saved_bits_shift); 601 + new_spte &= ~(SHADOW_ACC_TRACK_SAVED_BITS_MASK << 602 + SHADOW_ACC_TRACK_SAVED_BITS_SHIFT); 603 603 new_spte |= saved_bits; 604 604 605 605 return new_spte;
+8 -8
arch/x86/kvm/mmu/spte.c
··· 55 55 mask |= shadow_mmio_value | access; 56 56 mask |= gpa | shadow_nonpresent_or_rsvd_mask; 57 57 mask |= (gpa & shadow_nonpresent_or_rsvd_mask) 58 - << shadow_nonpresent_or_rsvd_mask_len; 58 + << SHADOW_NONPRESENT_OR_RSVD_MASK_LEN; 59 59 60 60 return mask; 61 61 } ··· 231 231 !spte_can_locklessly_be_made_writable(spte), 232 232 "kvm: Writable SPTE is not locklessly dirty-trackable\n"); 233 233 234 - WARN_ONCE(spte & (shadow_acc_track_saved_bits_mask << 235 - shadow_acc_track_saved_bits_shift), 234 + WARN_ONCE(spte & (SHADOW_ACC_TRACK_SAVED_BITS_MASK << 235 + SHADOW_ACC_TRACK_SAVED_BITS_SHIFT), 236 236 "kvm: Access Tracking saved bit locations are not zero\n"); 237 237 238 - spte |= (spte & shadow_acc_track_saved_bits_mask) << 239 - shadow_acc_track_saved_bits_shift; 238 + spte |= (spte & SHADOW_ACC_TRACK_SAVED_BITS_MASK) << 239 + SHADOW_ACC_TRACK_SAVED_BITS_SHIFT; 240 240 spte &= ~shadow_acc_track_mask; 241 241 242 242 return spte; ··· 245 245 void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask) 246 246 { 247 247 BUG_ON((u64)(unsigned)access_mask != access_mask); 248 - WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << shadow_nonpresent_or_rsvd_mask_len)); 248 + WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << SHADOW_NONPRESENT_OR_RSVD_MASK_LEN)); 249 249 WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask); 250 250 shadow_mmio_value = mmio_value | SPTE_MMIO_MASK; 251 251 shadow_mmio_access_mask = access_mask; ··· 306 306 low_phys_bits = boot_cpu_data.x86_phys_bits; 307 307 if (boot_cpu_has_bug(X86_BUG_L1TF) && 308 308 !WARN_ON_ONCE(boot_cpu_data.x86_cache_bits >= 309 - 52 - shadow_nonpresent_or_rsvd_mask_len)) { 309 + 52 - SHADOW_NONPRESENT_OR_RSVD_MASK_LEN)) { 310 310 low_phys_bits = boot_cpu_data.x86_cache_bits 311 - - shadow_nonpresent_or_rsvd_mask_len; 311 + - SHADOW_NONPRESENT_OR_RSVD_MASK_LEN; 312 312 shadow_nonpresent_or_rsvd_mask = 313 313 rsvd_bits(low_phys_bits, boot_cpu_data.x86_cache_bits - 1); 314 314 }
+8 -8
arch/x86/kvm/mmu/spte.h
··· 105 105 extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask; 106 106 107 107 /* 108 + * The number of high-order 1 bits to use in the mask above. 109 + */ 110 + #define SHADOW_NONPRESENT_OR_RSVD_MASK_LEN 5 111 + 112 + /* 108 113 * The mask/shift to use for saving the original R/X bits when marking the PTE 109 114 * as not-present for access tracking purposes. We do not save the W bit as the 110 115 * PTEs being access tracked also need to be dirty tracked, so the W bit will be 111 116 * restored only when a write is attempted to the page. 112 117 */ 113 - static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK | 114 - PT64_EPT_EXECUTABLE_MASK; 115 - static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT; 116 - 117 - /* 118 - * The number of high-order 1 bits to use in the mask above. 119 - */ 120 - static const u64 shadow_nonpresent_or_rsvd_mask_len = 5; 118 + #define SHADOW_ACC_TRACK_SAVED_BITS_MASK (PT64_EPT_READABLE_MASK | \ 119 + PT64_EPT_EXECUTABLE_MASK) 120 + #define SHADOW_ACC_TRACK_SAVED_BITS_SHIFT PT64_SECOND_AVAIL_BITS_SHIFT 121 121 122 122 /* 123 123 * In some cases, we need to preserve the GFN of a non-present or reserved
+1 -2
arch/x86/kvm/vmx/evmcs.c
··· 297 297 }; 298 298 const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1); 299 299 300 - void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) 300 + __init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) 301 301 { 302 302 vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL; 303 303 vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC; 304 304 305 305 vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL; 306 306 vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; 307 - 308 307 } 309 308 #endif 310 309
+1 -2
arch/x86/kvm/vmx/evmcs.h
··· 185 185 vp_ap->enlighten_vmentry = 1; 186 186 } 187 187 188 - void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf); 188 + __init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf); 189 189 #else /* !IS_ENABLED(CONFIG_HYPERV) */ 190 190 static inline void evmcs_write64(unsigned long field, u64 value) {} 191 191 static inline void evmcs_write32(unsigned long field, u32 value) {} ··· 194 194 static inline u32 evmcs_read32(unsigned long field) { return 0; } 195 195 static inline u16 evmcs_read16(unsigned long field) { return 0; } 196 196 static inline void evmcs_load(u64 phys_addr) {} 197 - static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {} 198 197 static inline void evmcs_touch_msr_bitmap(void) {} 199 198 #endif /* IS_ENABLED(CONFIG_HYPERV) */ 200 199
+3 -3
arch/x86/kvm/vmx/vmx.c
··· 2560 2560 vmcs_conf->vmexit_ctrl = _vmexit_control; 2561 2561 vmcs_conf->vmentry_ctrl = _vmentry_control; 2562 2562 2563 - if (static_branch_unlikely(&enable_evmcs)) 2563 + #if IS_ENABLED(CONFIG_HYPERV) 2564 + if (enlightened_vmcs) 2564 2565 evmcs_sanitize_exec_ctrls(vmcs_conf); 2566 + #endif 2565 2567 2566 2568 return 0; 2567 2569 } ··· 6836 6834 static int vmx_create_vcpu(struct kvm_vcpu *vcpu) 6837 6835 { 6838 6836 struct vcpu_vmx *vmx; 6839 - unsigned long *msr_bitmap; 6840 6837 int i, cpu, err; 6841 6838 6842 6839 BUILD_BUG_ON(offsetof(struct vcpu_vmx, vcpu) != 0); ··· 6895 6894 bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS); 6896 6895 bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS); 6897 6896 6898 - msr_bitmap = vmx->vmcs01.msr_bitmap; 6899 6897 vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R); 6900 6898 vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW); 6901 6899 vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW);
+4 -4
arch/x86/kvm/x86.c
··· 265 265 266 266 if (ignore_msrs) { 267 267 if (report_ignored_msrs) 268 - vcpu_unimpl(vcpu, "ignored %s: 0x%x data 0x%llx\n", 269 - op, msr, data); 268 + kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n", 269 + op, msr, data); 270 270 /* Mask the error */ 271 271 return 0; 272 272 } else { 273 - vcpu_debug_ratelimited(vcpu, "unhandled %s: 0x%x data 0x%llx\n", 274 - op, msr, data); 273 + kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n", 274 + op, msr, data); 275 275 return -ENOENT; 276 276 } 277 277 }
+1
tools/testing/selftests/kvm/.gitignore
··· 15 15 /x86_64/vmx_preemption_timer_test 16 16 /x86_64/svm_vmcall_test 17 17 /x86_64/sync_regs_test 18 + /x86_64/vmx_apic_access_test 18 19 /x86_64/vmx_close_while_nested_test 19 20 /x86_64/vmx_dirty_log_test 20 21 /x86_64/vmx_set_nested_state_test
+1
tools/testing/selftests/kvm/Makefile
··· 49 49 TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test 50 50 TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test 51 51 TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test 52 + TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test 52 53 TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test 53 54 TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test 54 55 TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
+6
tools/testing/selftests/kvm/include/x86_64/vmx.h
··· 573 573 void *eptp_hva; 574 574 uint64_t eptp_gpa; 575 575 void *eptp; 576 + 577 + void *apic_access_hva; 578 + uint64_t apic_access_gpa; 579 + void *apic_access; 576 580 }; 577 581 578 582 union vmx_basic { ··· 619 615 uint32_t memslot, uint32_t eptp_memslot); 620 616 void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, 621 617 uint32_t eptp_memslot); 618 + void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm, 619 + uint32_t eptp_memslot); 622 620 623 621 #endif /* SELFTEST_KVM_VMX_H */
+16 -7
tools/testing/selftests/kvm/lib/kvm_util.c
··· 14 14 #include <sys/mman.h> 15 15 #include <sys/types.h> 16 16 #include <sys/stat.h> 17 + #include <unistd.h> 17 18 #include <linux/kernel.h> 18 19 19 20 #define KVM_UTIL_PGS_PER_HUGEPG 512 ··· 665 664 666 665 /* As needed perform madvise */ 667 666 if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) { 668 - ret = madvise(region->host_mem, npages * vm->page_size, 669 - src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); 670 - TEST_ASSERT(ret == 0, "madvise failed,\n" 671 - " addr: %p\n" 672 - " length: 0x%lx\n" 673 - " src_type: %x", 674 - region->host_mem, npages * vm->page_size, src_type); 667 + struct stat statbuf; 668 + 669 + ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf); 670 + TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT), 671 + "stat /sys/kernel/mm/transparent_hugepage"); 672 + 673 + TEST_ASSERT(ret == 0 || src_type != VM_MEM_SRC_ANONYMOUS_THP, 674 + "VM_MEM_SRC_ANONYMOUS_THP requires THP to be configured in the host kernel"); 675 + 676 + if (ret == 0) { 677 + ret = madvise(region->host_mem, npages * vm->page_size, 678 + src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); 679 + TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %x", 680 + region->host_mem, npages * vm->page_size, src_type); 681 + } 675 682 } 676 683 677 684 region->unused_phy_pages = sparsebit_alloc();
+9
tools/testing/selftests/kvm/lib/x86_64/vmx.c
··· 542 542 vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); 543 543 vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); 544 544 } 545 + 546 + void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm, 547 + uint32_t eptp_memslot) 548 + { 549 + vmx->apic_access = (void *)vm_vaddr_alloc(vm, getpagesize(), 550 + 0x10000, 0, 0); 551 + vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access); 552 + vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access); 553 + }
+142
tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * vmx_apic_access_test 4 + * 5 + * Copyright (C) 2020, Google LLC. 6 + * 7 + * This work is licensed under the terms of the GNU GPL, version 2. 8 + * 9 + * The first subtest simply checks to see that an L2 guest can be 10 + * launched with a valid APIC-access address that is backed by a 11 + * page of L1 physical memory. 12 + * 13 + * The second subtest sets the APIC-access address to a (valid) L1 14 + * physical address that is not backed by memory. KVM can't handle 15 + * this situation, so resuming L2 should result in a KVM exit for 16 + * internal error (emulation). This is not an architectural 17 + * requirement. It is just a shortcoming of KVM. The internal error 18 + * is unfortunate, but it's better than what used to happen! 19 + */ 20 + 21 + #include "test_util.h" 22 + #include "kvm_util.h" 23 + #include "processor.h" 24 + #include "vmx.h" 25 + 26 + #include <string.h> 27 + #include <sys/ioctl.h> 28 + 29 + #include "kselftest.h" 30 + 31 + #define VCPU_ID 0 32 + 33 + /* The virtual machine object. */ 34 + static struct kvm_vm *vm; 35 + 36 + static void l2_guest_code(void) 37 + { 38 + /* Exit to L1 */ 39 + __asm__ __volatile__("vmcall"); 40 + } 41 + 42 + static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa) 43 + { 44 + #define L2_GUEST_STACK_SIZE 64 45 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 46 + uint32_t control; 47 + 48 + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); 49 + GUEST_ASSERT(load_vmcs(vmx_pages)); 50 + 51 + /* Prepare the VMCS for L2 execution. */ 52 + prepare_vmcs(vmx_pages, l2_guest_code, 53 + &l2_guest_stack[L2_GUEST_STACK_SIZE]); 54 + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); 55 + control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; 56 + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); 57 + control = vmreadz(SECONDARY_VM_EXEC_CONTROL); 58 + control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; 59 + vmwrite(SECONDARY_VM_EXEC_CONTROL, control); 60 + vmwrite(APIC_ACCESS_ADDR, vmx_pages->apic_access_gpa); 61 + 62 + /* Try to launch L2 with the memory-backed APIC-access address. */ 63 + GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR)); 64 + GUEST_ASSERT(!vmlaunch()); 65 + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 66 + 67 + vmwrite(APIC_ACCESS_ADDR, high_gpa); 68 + 69 + /* Try to resume L2 with the unbacked APIC-access address. */ 70 + GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR)); 71 + GUEST_ASSERT(!vmresume()); 72 + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 73 + 74 + GUEST_DONE(); 75 + } 76 + 77 + int main(int argc, char *argv[]) 78 + { 79 + unsigned long apic_access_addr = ~0ul; 80 + unsigned int paddr_width; 81 + unsigned int vaddr_width; 82 + vm_vaddr_t vmx_pages_gva; 83 + unsigned long high_gpa; 84 + struct vmx_pages *vmx; 85 + bool done = false; 86 + 87 + nested_vmx_check_supported(); 88 + 89 + vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); 90 + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); 91 + 92 + kvm_get_cpu_address_width(&paddr_width, &vaddr_width); 93 + high_gpa = (1ul << paddr_width) - getpagesize(); 94 + if ((unsigned long)DEFAULT_GUEST_PHY_PAGES * getpagesize() > high_gpa) { 95 + print_skip("No unbacked physical page available"); 96 + exit(KSFT_SKIP); 97 + } 98 + 99 + vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); 100 + prepare_virtualize_apic_accesses(vmx, vm, 0); 101 + vcpu_args_set(vm, VCPU_ID, 2, vmx_pages_gva, high_gpa); 102 + 103 + while (!done) { 104 + volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); 105 + struct ucall uc; 106 + 107 + vcpu_run(vm, VCPU_ID); 108 + if (apic_access_addr == high_gpa) { 109 + TEST_ASSERT(run->exit_reason == 110 + KVM_EXIT_INTERNAL_ERROR, 111 + "Got exit reason other than KVM_EXIT_INTERNAL_ERROR: %u (%s)\n", 112 + run->exit_reason, 113 + exit_reason_str(run->exit_reason)); 114 + TEST_ASSERT(run->internal.suberror == 115 + KVM_INTERNAL_ERROR_EMULATION, 116 + "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u\n", 117 + run->internal.suberror); 118 + break; 119 + } 120 + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, 121 + "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", 122 + run->exit_reason, 123 + exit_reason_str(run->exit_reason)); 124 + 125 + switch (get_ucall(vm, VCPU_ID, &uc)) { 126 + case UCALL_ABORT: 127 + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], 128 + __FILE__, uc.args[1]); 129 + /* NOT REACHED */ 130 + case UCALL_SYNC: 131 + apic_access_addr = uc.args[1]; 132 + break; 133 + case UCALL_DONE: 134 + done = true; 135 + break; 136 + default: 137 + TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); 138 + } 139 + } 140 + kvm_vm_free(vm); 141 + return 0; 142 + }