Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
"x86:

- Miscellaneous bugfixes

- A small cleanup for the new workqueue code

- Documentation syntax fix

RISC-V:

- Remove hgatp zeroing in kvm_arch_vcpu_put()

- Fix alignment of the guest_hang() in KVM selftest

- Fix PTE A and D bits in KVM selftest

- Missing #include in vcpu_fp.c

ARM:

- Some PSCI fixes after introducing PSCIv1.1 and SYSTEM_RESET2

- Fix the MMU write-lock not being taken on THP split

- Fix mixed-width VM handling

- Fix potential UAF when debugfs registration fails

- Various selftest updates for all of the above"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (24 commits)
KVM: x86: hyper-v: Avoid writing to TSC page without an active vCPU
KVM: SVM: Do not activate AVIC for SEV-enabled guest
Documentation: KVM: Add SPDX-License-Identifier tag
selftests: kvm: add tsc_scaling_sync to .gitignore
RISC-V: KVM: include missing hwcap.h into vcpu_fp
KVM: selftests: riscv: Fix alignment of the guest_hang() function
KVM: selftests: riscv: Set PTE A and D bits in VS-stage page table
RISC-V: KVM: Don't clear hgatp CSR in kvm_arch_vcpu_put()
selftests: KVM: Free the GIC FD when cleaning up in arch_timer
selftests: KVM: Don't leak GIC FD across dirty log test iterations
KVM: Don't create VM debugfs files outside of the VM directory
KVM: selftests: get-reg-list: Add KVM_REG_ARM_FW_REG(3)
KVM: avoid NULL pointer dereference in kvm_dirty_ring_push
KVM: arm64: selftests: Introduce vcpu_width_config
KVM: arm64: mixed-width check should be skipped for uninitialized vCPUs
KVM: arm64: vgic: Remove unnecessary type castings
KVM: arm64: Don't split hugepages outside of MMU write lock
KVM: arm64: Drop unneeded minor version check from PSCI v1.x handler
KVM: arm64: Actually prevent SMC64 SYSTEM_RESET2 from AArch32
KVM: arm64: Generally disallow SMC64 for AArch32 guests
...

+360 -127
+1
Documentation/virt/kvm/api.rst
··· 6190 6190 unsigned long args[6]; 6191 6191 unsigned long ret[2]; 6192 6192 } riscv_sbi; 6193 + 6193 6194 If exit reason is KVM_EXIT_RISCV_SBI then it indicates that the VCPU has 6194 6195 done a SBI call which is not handled by KVM RISC-V kernel module. The details 6195 6196 of the SBI call are available in 'riscv_sbi' member of kvm_run structure. The
+2
Documentation/virt/kvm/vcpu-requests.rst
··· 1 + .. SPDX-License-Identifier: GPL-2.0 2 + 1 3 ================= 2 4 KVM VCPU Requests 3 5 =================
+2
Documentation/virt/kvm/x86/amd-memory-encryption.rst
··· 1 + .. SPDX-License-Identifier: GPL-2.0 2 + 1 3 ====================================== 2 4 Secure Encrypted Virtualization (SEV) 3 5 ======================================
+1 -1
Documentation/virt/kvm/x86/errata.rst
··· 1 + .. SPDX-License-Identifier: GPL-2.0 1 2 2 3 ======================================= 3 4 Known limitations of CPU virtualization ··· 37 36 ------------------------------ 38 37 39 38 TBD 40 -
+2
Documentation/virt/kvm/x86/running-nested-guests.rst
··· 1 + .. SPDX-License-Identifier: GPL-2.0 2 + 1 3 ============================== 2 4 Running nested guests with KVM 3 5 ==============================
+19 -8
arch/arm64/include/asm/kvm_emulate.h
··· 43 43 44 44 void kvm_vcpu_wfi(struct kvm_vcpu *vcpu); 45 45 46 + #if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__) 46 47 static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) 47 48 { 48 49 return !(vcpu->arch.hcr_el2 & HCR_RW); 49 50 } 51 + #else 52 + static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) 53 + { 54 + struct kvm *kvm = vcpu->kvm; 55 + 56 + WARN_ON_ONCE(!test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, 57 + &kvm->arch.flags)); 58 + 59 + return test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags); 60 + } 61 + #endif 50 62 51 63 static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) 52 64 { ··· 84 72 vcpu->arch.hcr_el2 |= HCR_TVM; 85 73 } 86 74 87 - if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) 75 + if (vcpu_el1_is_32bit(vcpu)) 88 76 vcpu->arch.hcr_el2 &= ~HCR_RW; 89 - 90 - /* 91 - * TID3: trap feature register accesses that we virtualise. 92 - * For now this is conditional, since no AArch32 feature regs 93 - * are currently virtualised. 94 - */ 95 - if (!vcpu_el1_is_32bit(vcpu)) 77 + else 78 + /* 79 + * TID3: trap feature register accesses that we virtualise. 80 + * For now this is conditional, since no AArch32 feature regs 81 + * are currently virtualised. 82 + */ 96 83 vcpu->arch.hcr_el2 |= HCR_TID3; 97 84 98 85 if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
+10
arch/arm64/include/asm/kvm_host.h
··· 127 127 #define KVM_ARCH_FLAG_MTE_ENABLED 1 128 128 /* At least one vCPU has ran in the VM */ 129 129 #define KVM_ARCH_FLAG_HAS_RAN_ONCE 2 130 + /* 131 + * The following two bits are used to indicate the guest's EL1 132 + * register width configuration. A value of KVM_ARCH_FLAG_EL1_32BIT 133 + * bit is valid only when KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED is set. 134 + * Otherwise, the guest's EL1 register width has not yet been 135 + * determined yet. 136 + */ 137 + #define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 3 138 + #define KVM_ARCH_FLAG_EL1_32BIT 4 139 + 130 140 unsigned long flags; 131 141 132 142 /*
+7 -4
arch/arm64/kvm/mmu.c
··· 1079 1079 gfn_t gfn; 1080 1080 kvm_pfn_t pfn; 1081 1081 bool logging_active = memslot_is_logging(memslot); 1082 - bool logging_perm_fault = false; 1082 + bool use_read_lock = false; 1083 1083 unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu); 1084 1084 unsigned long vma_pagesize, fault_granule; 1085 1085 enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; ··· 1114 1114 if (logging_active) { 1115 1115 force_pte = true; 1116 1116 vma_shift = PAGE_SHIFT; 1117 - logging_perm_fault = (fault_status == FSC_PERM && write_fault); 1117 + use_read_lock = (fault_status == FSC_PERM && write_fault && 1118 + fault_granule == PAGE_SIZE); 1118 1119 } else { 1119 1120 vma_shift = get_vma_page_shift(vma, hva); 1120 1121 } ··· 1219 1218 * logging dirty logging, only acquire read lock for permission 1220 1219 * relaxation. 1221 1220 */ 1222 - if (logging_perm_fault) 1221 + if (use_read_lock) 1223 1222 read_lock(&kvm->mmu_lock); 1224 1223 else 1225 1224 write_lock(&kvm->mmu_lock); ··· 1269 1268 if (fault_status == FSC_PERM && vma_pagesize == fault_granule) { 1270 1269 ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); 1271 1270 } else { 1271 + WARN_ONCE(use_read_lock, "Attempted stage-2 map outside of write lock\n"); 1272 + 1272 1273 ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, 1273 1274 __pfn_to_phys(pfn), prot, 1274 1275 memcache); ··· 1283 1280 } 1284 1281 1285 1282 out_unlock: 1286 - if (logging_perm_fault) 1283 + if (use_read_lock) 1287 1284 read_unlock(&kvm->mmu_lock); 1288 1285 else 1289 1286 write_unlock(&kvm->mmu_lock);
+14 -17
arch/arm64/kvm/psci.c
··· 215 215 216 216 static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 fn) 217 217 { 218 - switch(fn) { 219 - case PSCI_0_2_FN64_CPU_SUSPEND: 220 - case PSCI_0_2_FN64_CPU_ON: 221 - case PSCI_0_2_FN64_AFFINITY_INFO: 222 - /* Disallow these functions for 32bit guests */ 223 - if (vcpu_mode_is_32bit(vcpu)) 224 - return PSCI_RET_NOT_SUPPORTED; 225 - break; 226 - } 218 + /* 219 + * Prevent 32 bit guests from calling 64 bit PSCI functions. 220 + */ 221 + if ((fn & PSCI_0_2_64BIT) && vcpu_mode_is_32bit(vcpu)) 222 + return PSCI_RET_NOT_SUPPORTED; 227 223 228 224 return 0; 229 225 } ··· 230 234 u32 psci_fn = smccc_get_function(vcpu); 231 235 unsigned long val; 232 236 int ret = 1; 233 - 234 - val = kvm_psci_check_allowed_function(vcpu, psci_fn); 235 - if (val) 236 - goto out; 237 237 238 238 switch (psci_fn) { 239 239 case PSCI_0_2_FN_PSCI_VERSION: ··· 298 306 break; 299 307 } 300 308 301 - out: 302 309 smccc_set_retval(vcpu, val, 0, 0, 0); 303 310 return ret; 304 311 } ··· 308 317 u32 arg; 309 318 unsigned long val; 310 319 int ret = 1; 311 - 312 - if (minor > 1) 313 - return -EINVAL; 314 320 315 321 switch(psci_fn) { 316 322 case PSCI_0_2_FN_PSCI_VERSION: ··· 414 426 */ 415 427 int kvm_psci_call(struct kvm_vcpu *vcpu) 416 428 { 429 + u32 psci_fn = smccc_get_function(vcpu); 430 + unsigned long val; 431 + 432 + val = kvm_psci_check_allowed_function(vcpu, psci_fn); 433 + if (val) { 434 + smccc_set_retval(vcpu, val, 0, 0, 0); 435 + return 1; 436 + } 437 + 417 438 switch (kvm_psci_version(vcpu)) { 418 439 case KVM_ARM_PSCI_1_1: 419 440 return kvm_psci_1_x_call(vcpu, 1);
+46 -21
arch/arm64/kvm/reset.c
··· 181 181 return 0; 182 182 } 183 183 184 - static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu) 184 + /** 185 + * kvm_set_vm_width() - set the register width for the guest 186 + * @vcpu: Pointer to the vcpu being configured 187 + * 188 + * Set both KVM_ARCH_FLAG_EL1_32BIT and KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 189 + * in the VM flags based on the vcpu's requested register width, the HW 190 + * capabilities and other options (such as MTE). 191 + * When REG_WIDTH_CONFIGURED is already set, the vcpu settings must be 192 + * consistent with the value of the FLAG_EL1_32BIT bit in the flags. 193 + * 194 + * Return: 0 on success, negative error code on failure. 195 + */ 196 + static int kvm_set_vm_width(struct kvm_vcpu *vcpu) 185 197 { 186 - struct kvm_vcpu *tmp; 198 + struct kvm *kvm = vcpu->kvm; 187 199 bool is32bit; 188 - unsigned long i; 189 200 190 201 is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT); 191 - if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit) 192 - return false; 193 202 194 - /* MTE is incompatible with AArch32 */ 195 - if (kvm_has_mte(vcpu->kvm) && is32bit) 196 - return false; 203 + lockdep_assert_held(&kvm->lock); 197 204 198 - /* Check that the vcpus are either all 32bit or all 64bit */ 199 - kvm_for_each_vcpu(i, tmp, vcpu->kvm) { 200 - if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit) 201 - return false; 205 + if (test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags)) { 206 + /* 207 + * The guest's register width is already configured. 208 + * Make sure that the vcpu is consistent with it. 209 + */ 210 + if (is32bit == test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags)) 211 + return 0; 212 + 213 + return -EINVAL; 202 214 } 203 215 204 - return true; 216 + if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit) 217 + return -EINVAL; 218 + 219 + /* MTE is incompatible with AArch32 */ 220 + if (kvm_has_mte(kvm) && is32bit) 221 + return -EINVAL; 222 + 223 + if (is32bit) 224 + set_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags); 225 + 226 + set_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags); 227 + 228 + return 0; 205 229 } 206 230 207 231 /** ··· 254 230 u32 pstate; 255 231 256 232 mutex_lock(&vcpu->kvm->lock); 257 - reset_state = vcpu->arch.reset_state; 258 - WRITE_ONCE(vcpu->arch.reset_state.reset, false); 233 + ret = kvm_set_vm_width(vcpu); 234 + if (!ret) { 235 + reset_state = vcpu->arch.reset_state; 236 + WRITE_ONCE(vcpu->arch.reset_state.reset, false); 237 + } 259 238 mutex_unlock(&vcpu->kvm->lock); 239 + 240 + if (ret) 241 + return ret; 260 242 261 243 /* Reset PMU outside of the non-preemptible section */ 262 244 kvm_pmu_vcpu_reset(vcpu); ··· 290 260 } 291 261 } 292 262 293 - if (!vcpu_allowed_register_width(vcpu)) { 294 - ret = -EINVAL; 295 - goto out; 296 - } 297 - 298 263 switch (vcpu->arch.target) { 299 264 default: 300 - if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { 265 + if (vcpu_el1_is_32bit(vcpu)) { 301 266 pstate = VCPU_RESET_PSTATE_SVC; 302 267 } else { 303 268 pstate = VCPU_RESET_PSTATE_EL1;
+5 -5
arch/arm64/kvm/vgic/vgic-debug.c
··· 82 82 83 83 static void *vgic_debug_start(struct seq_file *s, loff_t *pos) 84 84 { 85 - struct kvm *kvm = (struct kvm *)s->private; 85 + struct kvm *kvm = s->private; 86 86 struct vgic_state_iter *iter; 87 87 88 88 mutex_lock(&kvm->lock); ··· 110 110 111 111 static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos) 112 112 { 113 - struct kvm *kvm = (struct kvm *)s->private; 113 + struct kvm *kvm = s->private; 114 114 struct vgic_state_iter *iter = kvm->arch.vgic.iter; 115 115 116 116 ++*pos; ··· 122 122 123 123 static void vgic_debug_stop(struct seq_file *s, void *v) 124 124 { 125 - struct kvm *kvm = (struct kvm *)s->private; 125 + struct kvm *kvm = s->private; 126 126 struct vgic_state_iter *iter; 127 127 128 128 /* ··· 229 229 230 230 static int vgic_debug_show(struct seq_file *s, void *v) 231 231 { 232 - struct kvm *kvm = (struct kvm *)s->private; 233 - struct vgic_state_iter *iter = (struct vgic_state_iter *)v; 232 + struct kvm *kvm = s->private; 233 + struct vgic_state_iter *iter = v; 234 234 struct vgic_irq *irq; 235 235 struct kvm_vcpu *vcpu = NULL; 236 236 unsigned long flags;
+1 -1
arch/arm64/kvm/vgic/vgic-its.c
··· 2143 2143 static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id, 2144 2144 void *ptr, void *opaque) 2145 2145 { 2146 - struct its_device *dev = (struct its_device *)opaque; 2146 + struct its_device *dev = opaque; 2147 2147 struct its_collection *collection; 2148 2148 struct kvm *kvm = its->dev->kvm; 2149 2149 struct kvm_vcpu *vcpu = NULL;
-2
arch/riscv/kvm/vcpu.c
··· 653 653 vcpu->arch.isa); 654 654 kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context); 655 655 656 - csr_write(CSR_HGATP, 0); 657 - 658 656 csr->vsstatus = csr_read(CSR_VSSTATUS); 659 657 csr->vsie = csr_read(CSR_VSIE); 660 658 csr->vstvec = csr_read(CSR_VSTVEC);
+1
arch/riscv/kvm/vcpu_fp.c
··· 11 11 #include <linux/err.h> 12 12 #include <linux/kvm_host.h> 13 13 #include <linux/uaccess.h> 14 + #include <asm/hwcap.h> 14 15 15 16 #ifdef CONFIG_FPU 16 17 void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
+5 -5
arch/x86/include/asm/kvm_host.h
··· 974 974 HV_TSC_PAGE_UNSET = 0, 975 975 /* TSC page MSR was written by the guest, update pending */ 976 976 HV_TSC_PAGE_GUEST_CHANGED, 977 - /* TSC page MSR was written by KVM userspace, update pending */ 977 + /* TSC page update was triggered from the host side */ 978 978 HV_TSC_PAGE_HOST_CHANGED, 979 979 /* TSC page was properly set up and is currently active */ 980 980 HV_TSC_PAGE_SET, 981 - /* TSC page is currently being updated and therefore is inactive */ 982 - HV_TSC_PAGE_UPDATING, 983 981 /* TSC page was set up with an inaccessible GPA */ 984 982 HV_TSC_PAGE_BROKEN, 985 983 }; ··· 1050 1052 APICV_INHIBIT_REASON_X2APIC, 1051 1053 APICV_INHIBIT_REASON_BLOCKIRQ, 1052 1054 APICV_INHIBIT_REASON_ABSENT, 1055 + APICV_INHIBIT_REASON_SEV, 1053 1056 }; 1054 1057 1055 1058 struct kvm_arch { ··· 1584 1585 #define kvm_arch_pmi_in_guest(vcpu) \ 1585 1586 ((vcpu) && (vcpu)->arch.handling_intr_from_guest) 1586 1587 1587 - int kvm_mmu_module_init(void); 1588 - void kvm_mmu_module_exit(void); 1588 + void kvm_mmu_x86_module_init(void); 1589 + int kvm_mmu_vendor_module_init(void); 1590 + void kvm_mmu_vendor_module_exit(void); 1589 1591 1590 1592 void kvm_mmu_destroy(struct kvm_vcpu *vcpu); 1591 1593 int kvm_mmu_create(struct kvm_vcpu *vcpu);
+10 -34
arch/x86/kvm/hyperv.c
··· 1135 1135 BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); 1136 1136 BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0); 1137 1137 1138 - if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || 1139 - hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET) 1140 - return; 1141 - 1142 1138 mutex_lock(&hv->hv_lock); 1139 + 1140 + if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || 1141 + hv->hv_tsc_page_status == HV_TSC_PAGE_SET || 1142 + hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET) 1143 + goto out_unlock; 1144 + 1143 1145 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 1144 1146 goto out_unlock; 1145 1147 ··· 1203 1201 mutex_unlock(&hv->hv_lock); 1204 1202 } 1205 1203 1206 - void kvm_hv_invalidate_tsc_page(struct kvm *kvm) 1204 + void kvm_hv_request_tsc_page_update(struct kvm *kvm) 1207 1205 { 1208 1206 struct kvm_hv *hv = to_kvm_hv(kvm); 1209 - u64 gfn; 1210 - int idx; 1211 - 1212 - if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || 1213 - hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET || 1214 - tsc_page_update_unsafe(hv)) 1215 - return; 1216 1207 1217 1208 mutex_lock(&hv->hv_lock); 1218 1209 1219 - if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 1220 - goto out_unlock; 1210 + if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET && 1211 + !tsc_page_update_unsafe(hv)) 1212 + hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED; 1221 1213 1222 - /* Preserve HV_TSC_PAGE_GUEST_CHANGED/HV_TSC_PAGE_HOST_CHANGED states */ 1223 - if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET) 1224 - hv->hv_tsc_page_status = HV_TSC_PAGE_UPDATING; 1225 - 1226 - gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; 1227 - 1228 - hv->tsc_ref.tsc_sequence = 0; 1229 - 1230 - /* 1231 - * Take the srcu lock as memslots will be accessed to check the gfn 1232 - * cache generation against the memslots generation. 1233 - */ 1234 - idx = srcu_read_lock(&kvm->srcu); 1235 - if (kvm_write_guest(kvm, gfn_to_gpa(gfn), 1236 - &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) 1237 - hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN; 1238 - srcu_read_unlock(&kvm->srcu, idx); 1239 - 1240 - out_unlock: 1241 1214 mutex_unlock(&hv->hv_lock); 1242 1215 } 1243 - 1244 1216 1245 1217 static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr) 1246 1218 {
+1 -1
arch/x86/kvm/hyperv.h
··· 137 137 138 138 void kvm_hv_setup_tsc_page(struct kvm *kvm, 139 139 struct pvclock_vcpu_time_info *hv_clock); 140 - void kvm_hv_invalidate_tsc_page(struct kvm *kvm); 140 + void kvm_hv_request_tsc_page_update(struct kvm *kvm); 141 141 142 142 void kvm_hv_init_vm(struct kvm *kvm); 143 143 void kvm_hv_destroy_vm(struct kvm *kvm);
+16 -4
arch/x86/kvm/mmu/mmu.c
··· 6237 6237 return 0; 6238 6238 } 6239 6239 6240 - int kvm_mmu_module_init(void) 6240 + /* 6241 + * nx_huge_pages needs to be resolved to true/false when kvm.ko is loaded, as 6242 + * its default value of -1 is technically undefined behavior for a boolean. 6243 + */ 6244 + void kvm_mmu_x86_module_init(void) 6241 6245 { 6242 - int ret = -ENOMEM; 6243 - 6244 6246 if (nx_huge_pages == -1) 6245 6247 __set_nx_huge_pages(get_nx_auto_mode()); 6248 + } 6249 + 6250 + /* 6251 + * The bulk of the MMU initialization is deferred until the vendor module is 6252 + * loaded as many of the masks/values may be modified by VMX or SVM, i.e. need 6253 + * to be reset when a potentially different vendor module is loaded. 6254 + */ 6255 + int kvm_mmu_vendor_module_init(void) 6256 + { 6257 + int ret = -ENOMEM; 6246 6258 6247 6259 /* 6248 6260 * MMU roles use union aliasing which is, generally speaking, an ··· 6302 6290 mmu_free_memory_caches(vcpu); 6303 6291 } 6304 6292 6305 - void kvm_mmu_module_exit(void) 6293 + void kvm_mmu_vendor_module_exit(void) 6306 6294 { 6307 6295 mmu_destroy_caches(); 6308 6296 percpu_counter_destroy(&kvm_total_used_mmu_pages);
+1 -1
arch/x86/kvm/mmu/tdp_mmu.c
··· 51 51 if (!kvm->arch.tdp_mmu_enabled) 52 52 return; 53 53 54 - flush_workqueue(kvm->arch.tdp_mmu_zap_wq); 54 + /* Also waits for any queued work items. */ 55 55 destroy_workqueue(kvm->arch.tdp_mmu_zap_wq); 56 56 57 57 WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages));
+2 -1
arch/x86/kvm/svm/avic.c
··· 837 837 BIT(APICV_INHIBIT_REASON_IRQWIN) | 838 838 BIT(APICV_INHIBIT_REASON_PIT_REINJ) | 839 839 BIT(APICV_INHIBIT_REASON_X2APIC) | 840 - BIT(APICV_INHIBIT_REASON_BLOCKIRQ); 840 + BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | 841 + BIT(APICV_INHIBIT_REASON_SEV); 841 842 842 843 return supported & BIT(reason); 843 844 }
+3
arch/x86/kvm/svm/sev.c
··· 260 260 INIT_LIST_HEAD(&sev->regions_list); 261 261 INIT_LIST_HEAD(&sev->mirror_vms); 262 262 263 + kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_SEV); 264 + 263 265 return 0; 264 266 265 267 e_free: ··· 467 465 page_virtual = kmap_atomic(pages[i]); 468 466 clflush_cache_range(page_virtual, PAGE_SIZE); 469 467 kunmap_atomic(page_virtual); 468 + cond_resched(); 470 469 } 471 470 } 472 471
+21 -6
arch/x86/kvm/x86.c
··· 2901 2901 2902 2902 static void kvm_update_masterclock(struct kvm *kvm) 2903 2903 { 2904 - kvm_hv_invalidate_tsc_page(kvm); 2904 + kvm_hv_request_tsc_page_update(kvm); 2905 2905 kvm_start_pvclock_update(kvm); 2906 2906 pvclock_update_vm_gtod_copy(kvm); 2907 2907 kvm_end_pvclock_update(kvm); ··· 3113 3113 offsetof(struct compat_vcpu_info, time)); 3114 3114 if (vcpu->xen.vcpu_time_info_set) 3115 3115 kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_time_info_cache, 0); 3116 - if (!v->vcpu_idx) 3117 - kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock); 3116 + kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock); 3118 3117 return 0; 3119 3118 } 3120 3119 ··· 6240 6241 if (data.flags & ~KVM_CLOCK_VALID_FLAGS) 6241 6242 return -EINVAL; 6242 6243 6243 - kvm_hv_invalidate_tsc_page(kvm); 6244 + kvm_hv_request_tsc_page_update(kvm); 6244 6245 kvm_start_pvclock_update(kvm); 6245 6246 pvclock_update_vm_gtod_copy(kvm); 6246 6247 ··· 8925 8926 } 8926 8927 kvm_nr_uret_msrs = 0; 8927 8928 8928 - r = kvm_mmu_module_init(); 8929 + r = kvm_mmu_vendor_module_init(); 8929 8930 if (r) 8930 8931 goto out_free_percpu; 8931 8932 ··· 8973 8974 cancel_work_sync(&pvclock_gtod_work); 8974 8975 #endif 8975 8976 kvm_x86_ops.hardware_enable = NULL; 8976 - kvm_mmu_module_exit(); 8977 + kvm_mmu_vendor_module_exit(); 8977 8978 free_percpu(user_return_msrs); 8978 8979 kmem_cache_destroy(x86_emulator_cache); 8979 8980 #ifdef CONFIG_KVM_XEN ··· 12985 12986 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit); 12986 12987 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter); 12987 12988 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit); 12989 + 12990 + static int __init kvm_x86_init(void) 12991 + { 12992 + kvm_mmu_x86_module_init(); 12993 + return 0; 12994 + } 12995 + module_init(kvm_x86_init); 12996 + 12997 + static void __exit kvm_x86_exit(void) 12998 + { 12999 + /* 13000 + * If module_init() is implemented, module_exit() must also be 13001 + * implemented to allow module unload. 13002 + */ 13003 + } 13004 + module_exit(kvm_x86_exit);
+2
tools/testing/selftests/kvm/.gitignore
··· 3 3 /aarch64/debug-exceptions 4 4 /aarch64/get-reg-list 5 5 /aarch64/psci_cpu_on_test 6 + /aarch64/vcpu_width_config 6 7 /aarch64/vgic_init 7 8 /aarch64/vgic_irq 8 9 /s390x/memop ··· 34 33 /x86_64/state_test 35 34 /x86_64/svm_vmcall_test 36 35 /x86_64/svm_int_ctl_test 36 + /x86_64/tsc_scaling_sync 37 37 /x86_64/sync_regs_test 38 38 /x86_64/tsc_msrs_test 39 39 /x86_64/userspace_io_test
+1
tools/testing/selftests/kvm/Makefile
··· 106 106 TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions 107 107 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list 108 108 TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test 109 + TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config 109 110 TEST_GEN_PROGS_aarch64 += aarch64/vgic_init 110 111 TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq 111 112 TEST_GEN_PROGS_aarch64 += demand_paging_test
+11 -4
tools/testing/selftests/kvm/aarch64/arch_timer.c
··· 362 362 pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); 363 363 } 364 364 365 + static int gic_fd; 366 + 365 367 static struct kvm_vm *test_vm_create(void) 366 368 { 367 369 struct kvm_vm *vm; 368 370 unsigned int i; 369 - int ret; 370 371 int nr_vcpus = test_args.nr_vcpus; 371 372 372 373 vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL); ··· 384 383 385 384 ucall_init(vm, NULL); 386 385 test_init_timer_irq(vm); 387 - ret = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); 388 - if (ret < 0) { 386 + gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); 387 + if (gic_fd < 0) { 389 388 print_skip("Failed to create vgic-v3"); 390 389 exit(KSFT_SKIP); 391 390 } ··· 394 393 sync_global_to_guest(vm, test_args); 395 394 396 395 return vm; 396 + } 397 + 398 + static void test_vm_cleanup(struct kvm_vm *vm) 399 + { 400 + close(gic_fd); 401 + kvm_vm_free(vm); 397 402 } 398 403 399 404 static void test_print_help(char *name) ··· 485 478 486 479 vm = test_vm_create(); 487 480 test_run(vm); 488 - kvm_vm_free(vm); 481 + test_vm_cleanup(vm); 489 482 490 483 return 0; 491 484 }
+10 -4
tools/testing/selftests/kvm/aarch64/get-reg-list.c
··· 503 503 ++missing_regs; 504 504 505 505 if (new_regs || missing_regs) { 506 + n = 0; 507 + for_each_reg_filtered(i) 508 + ++n; 509 + 506 510 printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n); 507 - printf("%s: Number registers: %5lld\n", config_name(c), reg_list->n); 511 + printf("%s: Number registers: %5lld (includes %lld filtered registers)\n", 512 + config_name(c), reg_list->n, reg_list->n - n); 508 513 } 509 514 510 515 if (new_regs) { ··· 688 683 KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]), 689 684 KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr), 690 685 KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr), 691 - KVM_REG_ARM_FW_REG(0), 692 - KVM_REG_ARM_FW_REG(1), 693 - KVM_REG_ARM_FW_REG(2), 686 + KVM_REG_ARM_FW_REG(0), /* KVM_REG_ARM_PSCI_VERSION */ 687 + KVM_REG_ARM_FW_REG(1), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 */ 688 + KVM_REG_ARM_FW_REG(2), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 */ 689 + KVM_REG_ARM_FW_REG(3), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 */ 694 690 ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ 695 691 ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ 696 692 ARM64_SYS_REG(3, 3, 14, 0, 2),
+122
tools/testing/selftests/kvm/aarch64/vcpu_width_config.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * vcpu_width_config - Test KVM_ARM_VCPU_INIT() with KVM_ARM_VCPU_EL1_32BIT. 4 + * 5 + * Copyright (c) 2022 Google LLC. 6 + * 7 + * This is a test that ensures that non-mixed-width vCPUs (all 64bit vCPUs 8 + * or all 32bit vcPUs) can be configured and mixed-width vCPUs cannot be 9 + * configured. 10 + */ 11 + 12 + #include "kvm_util.h" 13 + #include "processor.h" 14 + #include "test_util.h" 15 + 16 + 17 + /* 18 + * Add a vCPU, run KVM_ARM_VCPU_INIT with @init1, and then 19 + * add another vCPU, and run KVM_ARM_VCPU_INIT with @init2. 20 + */ 21 + static int add_init_2vcpus(struct kvm_vcpu_init *init1, 22 + struct kvm_vcpu_init *init2) 23 + { 24 + struct kvm_vm *vm; 25 + int ret; 26 + 27 + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); 28 + 29 + vm_vcpu_add(vm, 0); 30 + ret = _vcpu_ioctl(vm, 0, KVM_ARM_VCPU_INIT, init1); 31 + if (ret) 32 + goto free_exit; 33 + 34 + vm_vcpu_add(vm, 1); 35 + ret = _vcpu_ioctl(vm, 1, KVM_ARM_VCPU_INIT, init2); 36 + 37 + free_exit: 38 + kvm_vm_free(vm); 39 + return ret; 40 + } 41 + 42 + /* 43 + * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init1, 44 + * and run KVM_ARM_VCPU_INIT for another vCPU with @init2. 45 + */ 46 + static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init1, 47 + struct kvm_vcpu_init *init2) 48 + { 49 + struct kvm_vm *vm; 50 + int ret; 51 + 52 + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); 53 + 54 + vm_vcpu_add(vm, 0); 55 + vm_vcpu_add(vm, 1); 56 + 57 + ret = _vcpu_ioctl(vm, 0, KVM_ARM_VCPU_INIT, init1); 58 + if (ret) 59 + goto free_exit; 60 + 61 + ret = _vcpu_ioctl(vm, 1, KVM_ARM_VCPU_INIT, init2); 62 + 63 + free_exit: 64 + kvm_vm_free(vm); 65 + return ret; 66 + } 67 + 68 + /* 69 + * Tests that two 64bit vCPUs can be configured, two 32bit vCPUs can be 70 + * configured, and two mixed-width vCPUs cannot be configured. 71 + * Each of those three cases, configure vCPUs in two different orders. 72 + * The one is running KVM_CREATE_VCPU for 2 vCPUs, and then running 73 + * KVM_ARM_VCPU_INIT for them. 74 + * The other is running KVM_CREATE_VCPU and KVM_ARM_VCPU_INIT for a vCPU, 75 + * and then run those commands for another vCPU. 76 + */ 77 + int main(void) 78 + { 79 + struct kvm_vcpu_init init1, init2; 80 + struct kvm_vm *vm; 81 + int ret; 82 + 83 + if (!kvm_check_cap(KVM_CAP_ARM_EL1_32BIT)) { 84 + print_skip("KVM_CAP_ARM_EL1_32BIT is not supported"); 85 + exit(KSFT_SKIP); 86 + } 87 + 88 + /* Get the preferred target type and copy that to init2 for later use */ 89 + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); 90 + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init1); 91 + kvm_vm_free(vm); 92 + init2 = init1; 93 + 94 + /* Test with 64bit vCPUs */ 95 + ret = add_init_2vcpus(&init1, &init1); 96 + TEST_ASSERT(ret == 0, 97 + "Configuring 64bit EL1 vCPUs failed unexpectedly"); 98 + ret = add_2vcpus_init_2vcpus(&init1, &init1); 99 + TEST_ASSERT(ret == 0, 100 + "Configuring 64bit EL1 vCPUs failed unexpectedly"); 101 + 102 + /* Test with 32bit vCPUs */ 103 + init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT); 104 + ret = add_init_2vcpus(&init1, &init1); 105 + TEST_ASSERT(ret == 0, 106 + "Configuring 32bit EL1 vCPUs failed unexpectedly"); 107 + ret = add_2vcpus_init_2vcpus(&init1, &init1); 108 + TEST_ASSERT(ret == 0, 109 + "Configuring 32bit EL1 vCPUs failed unexpectedly"); 110 + 111 + /* Test with mixed-width vCPUs */ 112 + init1.features[0] = 0; 113 + init2.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT); 114 + ret = add_init_2vcpus(&init1, &init2); 115 + TEST_ASSERT(ret != 0, 116 + "Configuring mixed-width vCPUs worked unexpectedly"); 117 + ret = add_2vcpus_init_2vcpus(&init1, &init2); 118 + TEST_ASSERT(ret != 0, 119 + "Configuring mixed-width vCPUs worked unexpectedly"); 120 + 121 + return 0; 122 + }
+31 -3
tools/testing/selftests/kvm/dirty_log_perf_test.c
··· 18 18 #include "test_util.h" 19 19 #include "perf_test_util.h" 20 20 #include "guest_modes.h" 21 + 21 22 #ifdef __aarch64__ 22 23 #include "aarch64/vgic.h" 23 24 24 25 #define GICD_BASE_GPA 0x8000000ULL 25 26 #define GICR_BASE_GPA 0x80A0000ULL 27 + 28 + static int gic_fd; 29 + 30 + static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) 31 + { 32 + /* 33 + * The test can still run even if hardware does not support GICv3, as it 34 + * is only an optimization to reduce guest exits. 35 + */ 36 + gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); 37 + } 38 + 39 + static void arch_cleanup_vm(struct kvm_vm *vm) 40 + { 41 + if (gic_fd > 0) 42 + close(gic_fd); 43 + } 44 + 45 + #else /* __aarch64__ */ 46 + 47 + static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) 48 + { 49 + } 50 + 51 + static void arch_cleanup_vm(struct kvm_vm *vm) 52 + { 53 + } 54 + 26 55 #endif 27 56 28 57 /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ ··· 235 206 vm_enable_cap(vm, &cap); 236 207 } 237 208 238 - #ifdef __aarch64__ 239 - vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); 240 - #endif 209 + arch_setup_vm(vm, nr_vcpus); 241 210 242 211 /* Start the iterations */ 243 212 iteration = 0; ··· 329 302 } 330 303 331 304 free_bitmaps(bitmaps, p->slots); 305 + arch_cleanup_vm(vm); 332 306 perf_test_destroy_vm(vm); 333 307 } 334 308
+3 -1
tools/testing/selftests/kvm/include/riscv/processor.h
··· 101 101 #define PGTBL_PTE_WRITE_SHIFT 2 102 102 #define PGTBL_PTE_READ_MASK 0x0000000000000002ULL 103 103 #define PGTBL_PTE_READ_SHIFT 1 104 - #define PGTBL_PTE_PERM_MASK (PGTBL_PTE_EXECUTE_MASK | \ 104 + #define PGTBL_PTE_PERM_MASK (PGTBL_PTE_ACCESSED_MASK | \ 105 + PGTBL_PTE_DIRTY_MASK | \ 106 + PGTBL_PTE_EXECUTE_MASK | \ 105 107 PGTBL_PTE_WRITE_MASK | \ 106 108 PGTBL_PTE_READ_MASK) 107 109 #define PGTBL_PTE_VALID_MASK 0x0000000000000001ULL
+1 -1
tools/testing/selftests/kvm/lib/riscv/processor.c
··· 268 268 core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6); 269 269 } 270 270 271 - static void guest_hang(void) 271 + static void __aligned(16) guest_hang(void) 272 272 { 273 273 while (1) 274 274 ;
+9 -3
virt/kvm/kvm_main.c
··· 434 434 435 435 static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) 436 436 { 437 - kvm_dirty_ring_free(&vcpu->dirty_ring); 438 437 kvm_arch_vcpu_destroy(vcpu); 438 + kvm_dirty_ring_free(&vcpu->dirty_ring); 439 439 440 440 /* 441 441 * No need for rcu_read_lock as VCPU_RUN is the only place that changes ··· 932 932 int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc + 933 933 kvm_vcpu_stats_header.num_desc; 934 934 935 - if (!kvm->debugfs_dentry) 935 + if (IS_ERR(kvm->debugfs_dentry)) 936 936 return; 937 937 938 938 debugfs_remove_recursive(kvm->debugfs_dentry); ··· 954 954 int i, ret; 955 955 int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc + 956 956 kvm_vcpu_stats_header.num_desc; 957 + 958 + /* 959 + * Force subsequent debugfs file creations to fail if the VM directory 960 + * is not created. 961 + */ 962 + kvm->debugfs_dentry = ERR_PTR(-ENOENT); 957 963 958 964 if (!debugfs_initialized()) 959 965 return 0; ··· 5485 5479 } 5486 5480 add_uevent_var(env, "PID=%d", kvm->userspace_pid); 5487 5481 5488 - if (kvm->debugfs_dentry) { 5482 + if (!IS_ERR(kvm->debugfs_dentry)) { 5489 5483 char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT); 5490 5484 5491 5485 if (p) {