Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
"ARM:

- Avoid use of uninitialized memcache pointer in user_mem_abort()

- Always set HCR_EL2.xMO bits when running in VHE, allowing
interrupts to be taken while TGE=0 and fixing an ugly bug on
AmpereOne that occurs when taking an interrupt while clearing the
xMO bits (AC03_CPU_36)

- Prevent VMMs from hiding support for AArch64 at any EL virtualized
by KVM

- Save/restore the host value for HCRX_EL2 instead of restoring an
incorrect fixed value

- Make host_stage2_set_owner_locked() check that the entire requested
range is memory rather than just the first page

RISC-V:

- Add missing reset of smstateen CSRs

x86:

- Forcibly leave SMM on SHUTDOWN interception on AMD CPUs to avoid
causing problems due to KVM stuffing INIT on SHUTDOWN (KVM needs to
sanitize the VMCB as its state is undefined after SHUTDOWN,
emulating INIT is the least awful choice).

- Track the valid sync/dirty fields in kvm_run as a u64 to ensure KVM
KVM doesn't goof a sanity check in the future.

- Free obsolete roots when (re)loading the MMU to fix a bug where
pre-faulting memory can get stuck due to always encountering a
stale root.

- When dumping GHCB state, use KVM's snapshot instead of the raw GHCB
page to print state, so that KVM doesn't print stale/wrong
information.

- When changing memory attributes (e.g. shared <=> private), add
potential hugepage ranges to the mmu_invalidate_range_{start,end}
set so that KVM doesn't create a shared/private hugepage when the
the corresponding attributes will become mixed (the attributes are
commited *after* KVM finishes the invalidation).

- Rework the SRSO mitigation to enable BP_SPEC_REDUCE only when KVM
has at least one active VM. Effectively BP_SPEC_REDUCE when KVM is
loaded led to very measurable performance regressions for non-KVM
workloads"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: SVM: Set/clear SRSO's BP_SPEC_REDUCE on 0 <=> 1 VM count transitions
KVM: arm64: Fix memory check in host_stage2_set_owner_locked()
KVM: arm64: Kill HCRX_HOST_FLAGS
KVM: arm64: Properly save/restore HCRX_EL2
KVM: arm64: selftest: Don't try to disable AArch64 support
KVM: arm64: Prevent userspace from disabling AArch64 support at any virtualisable EL
KVM: arm64: Force HCR_EL2.xMO to 1 at all times in VHE mode
KVM: arm64: Fix uninitialized memcache pointer in user_mem_abort()
KVM: x86/mmu: Prevent installing hugepages when mem attributes are changing
KVM: SVM: Update dump_ghcb() to use the GHCB snapshot fields
KVM: RISC-V: reset smstateen CSRs
KVM: x86/mmu: Check and free obsolete roots in kvm_mmu_reload()
KVM: x86: Check that the high 32bits are clear in kvm_arch_vcpu_ioctl_run()
KVM: SVM: Forcibly leave SMM mode on SHUTDOWN interception

+210 -82
+1 -1
arch/arm64/include/asm/el2_setup.h
··· 52 52 mrs x0, id_aa64mmfr1_el1 53 53 ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 54 54 cbz x0, .Lskip_hcrx_\@ 55 - mov_q x0, HCRX_HOST_FLAGS 55 + mov_q x0, (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) 56 56 57 57 /* Enable GCS if supported */ 58 58 mrs_s x1, SYS_ID_AA64PFR1_EL1
+1 -2
arch/arm64/include/asm/kvm_arm.h
··· 100 100 HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID1) 101 101 #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) 102 102 #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) 103 - #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) 103 + #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H | HCR_AMO | HCR_IMO | HCR_FMO) 104 104 105 - #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) 106 105 #define MPAMHCR_HOST_FLAGS 0 107 106 108 107 /* TCR_EL2 Registers bits */
+6 -7
arch/arm64/kvm/hyp/include/hyp/switch.h
··· 235 235 236 236 static inline void __activate_traps_common(struct kvm_vcpu *vcpu) 237 237 { 238 + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); 239 + 238 240 /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ 239 241 write_sysreg(1 << 15, hstr_el2); 240 242 ··· 247 245 * EL1 instead of being trapped to EL2. 248 246 */ 249 247 if (system_supports_pmuv3()) { 250 - struct kvm_cpu_context *hctxt; 251 - 252 248 write_sysreg(0, pmselr_el0); 253 249 254 - hctxt = host_data_ptr(host_ctxt); 255 250 ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0); 256 251 write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); 257 252 vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); ··· 268 269 hcrx &= ~clr; 269 270 } 270 271 272 + ctxt_sys_reg(hctxt, HCRX_EL2) = read_sysreg_s(SYS_HCRX_EL2); 271 273 write_sysreg_s(hcrx, SYS_HCRX_EL2); 272 274 } 273 275 ··· 278 278 279 279 static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) 280 280 { 281 + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); 282 + 281 283 write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2); 282 284 283 285 write_sysreg(0, hstr_el2); 284 286 if (system_supports_pmuv3()) { 285 - struct kvm_cpu_context *hctxt; 286 - 287 - hctxt = host_data_ptr(host_ctxt); 288 287 write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); 289 288 vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU); 290 289 } 291 290 292 291 if (cpus_have_final_cap(ARM64_HAS_HCX)) 293 - write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2); 292 + write_sysreg_s(ctxt_sys_reg(hctxt, HCRX_EL2), SYS_HCRX_EL2); 294 293 295 294 __deactivate_traps_hfgxtr(vcpu); 296 295 __deactivate_traps_mpam();
+1 -1
arch/arm64/kvm/hyp/nvhe/mem_protect.c
··· 503 503 { 504 504 int ret; 505 505 506 - if (!addr_is_memory(addr)) 506 + if (!range_is_memory(addr, addr + size)) 507 507 return -EPERM; 508 508 509 509 ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
+21 -15
arch/arm64/kvm/hyp/vgic-v3-sr.c
··· 429 429 /* 430 430 * To check whether we have a MMIO-based (GICv2 compatible) 431 431 * CPU interface, we need to disable the system register 432 - * view. To do that safely, we have to prevent any interrupt 433 - * from firing (which would be deadly). 432 + * view. 434 433 * 435 - * Note that this only makes sense on VHE, as interrupts are 436 - * already masked for nVHE as part of the exception entry to 437 - * EL2. 438 - */ 439 - if (has_vhe()) 440 - flags = local_daif_save(); 441 - 442 - /* 443 434 * Table 11-2 "Permitted ICC_SRE_ELx.SRE settings" indicates 444 435 * that to be able to set ICC_SRE_EL1.SRE to 0, all the 445 436 * interrupt overrides must be set. You've got to love this. 437 + * 438 + * As we always run VHE with HCR_xMO set, no extra xMO 439 + * manipulation is required in that case. 440 + * 441 + * To safely disable SRE, we have to prevent any interrupt 442 + * from firing (which would be deadly). This only makes sense 443 + * on VHE, as interrupts are already masked for nVHE as part 444 + * of the exception entry to EL2. 446 445 */ 447 - sysreg_clear_set(hcr_el2, 0, HCR_AMO | HCR_FMO | HCR_IMO); 448 - isb(); 446 + if (has_vhe()) { 447 + flags = local_daif_save(); 448 + } else { 449 + sysreg_clear_set(hcr_el2, 0, HCR_AMO | HCR_FMO | HCR_IMO); 450 + isb(); 451 + } 452 + 449 453 write_gicreg(0, ICC_SRE_EL1); 450 454 isb(); 451 455 ··· 457 453 458 454 write_gicreg(sre, ICC_SRE_EL1); 459 455 isb(); 460 - sysreg_clear_set(hcr_el2, HCR_AMO | HCR_FMO | HCR_IMO, 0); 461 - isb(); 462 456 463 - if (has_vhe()) 457 + if (has_vhe()) { 464 458 local_daif_restore(flags); 459 + } else { 460 + sysreg_clear_set(hcr_el2, HCR_AMO | HCR_FMO | HCR_IMO, 0); 461 + isb(); 462 + } 465 463 466 464 val = (val & ICC_SRE_EL1_SRE) ? 0 : (1ULL << 63); 467 465 val |= read_gicreg(ICH_VTR_EL2);
+8 -5
arch/arm64/kvm/mmu.c
··· 1501 1501 return -EFAULT; 1502 1502 } 1503 1503 1504 + if (!is_protected_kvm_enabled()) 1505 + memcache = &vcpu->arch.mmu_page_cache; 1506 + else 1507 + memcache = &vcpu->arch.pkvm_memcache; 1508 + 1504 1509 /* 1505 1510 * Permission faults just need to update the existing leaf entry, 1506 1511 * and so normally don't require allocations from the memcache. The ··· 1515 1510 if (!fault_is_perm || (logging_active && write_fault)) { 1516 1511 int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu); 1517 1512 1518 - if (!is_protected_kvm_enabled()) { 1519 - memcache = &vcpu->arch.mmu_page_cache; 1513 + if (!is_protected_kvm_enabled()) 1520 1514 ret = kvm_mmu_topup_memory_cache(memcache, min_pages); 1521 - } else { 1522 - memcache = &vcpu->arch.pkvm_memcache; 1515 + else 1523 1516 ret = topup_hyp_memcache(memcache, min_pages); 1524 - } 1517 + 1525 1518 if (ret) 1526 1519 return ret; 1527 1520 }
+6
arch/arm64/kvm/sys_regs.c
··· 1945 1945 if ((hw_val & mpam_mask) == (user_val & mpam_mask)) 1946 1946 user_val &= ~ID_AA64PFR0_EL1_MPAM_MASK; 1947 1947 1948 + /* Fail the guest's request to disable the AA64 ISA at EL{0,1,2} */ 1949 + if (!FIELD_GET(ID_AA64PFR0_EL1_EL0, user_val) || 1950 + !FIELD_GET(ID_AA64PFR0_EL1_EL1, user_val) || 1951 + (vcpu_has_nv(vcpu) && !FIELD_GET(ID_AA64PFR0_EL1_EL2, user_val))) 1952 + return -EINVAL; 1953 + 1948 1954 return set_id_reg(vcpu, rd, user_val); 1949 1955 } 1950 1956
+2
arch/riscv/kvm/vcpu.c
··· 77 77 memcpy(cntx, reset_cntx, sizeof(*cntx)); 78 78 spin_unlock(&vcpu->arch.reset_cntx_lock); 79 79 80 + memset(&vcpu->arch.smstateen_csr, 0, sizeof(vcpu->arch.smstateen_csr)); 81 + 80 82 kvm_riscv_vcpu_fp_reset(vcpu); 81 83 82 84 kvm_riscv_vcpu_vector_reset(vcpu);
+3
arch/x86/kvm/mmu.h
··· 104 104 105 105 static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) 106 106 { 107 + if (kvm_check_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu)) 108 + kvm_mmu_free_obsolete_roots(vcpu); 109 + 107 110 /* 108 111 * Checking root.hpa is sufficient even when KVM has mirror root. 109 112 * We can have either:
+64 -26
arch/x86/kvm/mmu/mmu.c
··· 5974 5974 __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.root_mmu); 5975 5975 __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu); 5976 5976 } 5977 + EXPORT_SYMBOL_GPL(kvm_mmu_free_obsolete_roots); 5977 5978 5978 5979 static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, 5979 5980 int *bytes) ··· 7670 7669 } 7671 7670 7672 7671 #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES 7673 - bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, 7674 - struct kvm_gfn_range *range) 7675 - { 7676 - /* 7677 - * Zap SPTEs even if the slot can't be mapped PRIVATE. KVM x86 only 7678 - * supports KVM_MEMORY_ATTRIBUTE_PRIVATE, and so it *seems* like KVM 7679 - * can simply ignore such slots. But if userspace is making memory 7680 - * PRIVATE, then KVM must prevent the guest from accessing the memory 7681 - * as shared. And if userspace is making memory SHARED and this point 7682 - * is reached, then at least one page within the range was previously 7683 - * PRIVATE, i.e. the slot's possible hugepage ranges are changing. 7684 - * Zapping SPTEs in this case ensures KVM will reassess whether or not 7685 - * a hugepage can be used for affected ranges. 7686 - */ 7687 - if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm))) 7688 - return false; 7689 - 7690 - /* Unmap the old attribute page. */ 7691 - if (range->arg.attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE) 7692 - range->attr_filter = KVM_FILTER_SHARED; 7693 - else 7694 - range->attr_filter = KVM_FILTER_PRIVATE; 7695 - 7696 - return kvm_unmap_gfn_range(kvm, range); 7697 - } 7698 - 7699 7672 static bool hugepage_test_mixed(struct kvm_memory_slot *slot, gfn_t gfn, 7700 7673 int level) 7701 7674 { ··· 7687 7712 { 7688 7713 lpage_info_slot(gfn, slot, level)->disallow_lpage |= KVM_LPAGE_MIXED_FLAG; 7689 7714 } 7715 + 7716 + bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, 7717 + struct kvm_gfn_range *range) 7718 + { 7719 + struct kvm_memory_slot *slot = range->slot; 7720 + int level; 7721 + 7722 + /* 7723 + * Zap SPTEs even if the slot can't be mapped PRIVATE. KVM x86 only 7724 + * supports KVM_MEMORY_ATTRIBUTE_PRIVATE, and so it *seems* like KVM 7725 + * can simply ignore such slots. But if userspace is making memory 7726 + * PRIVATE, then KVM must prevent the guest from accessing the memory 7727 + * as shared. And if userspace is making memory SHARED and this point 7728 + * is reached, then at least one page within the range was previously 7729 + * PRIVATE, i.e. the slot's possible hugepage ranges are changing. 7730 + * Zapping SPTEs in this case ensures KVM will reassess whether or not 7731 + * a hugepage can be used for affected ranges. 7732 + */ 7733 + if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm))) 7734 + return false; 7735 + 7736 + if (WARN_ON_ONCE(range->end <= range->start)) 7737 + return false; 7738 + 7739 + /* 7740 + * If the head and tail pages of the range currently allow a hugepage, 7741 + * i.e. reside fully in the slot and don't have mixed attributes, then 7742 + * add each corresponding hugepage range to the ongoing invalidation, 7743 + * e.g. to prevent KVM from creating a hugepage in response to a fault 7744 + * for a gfn whose attributes aren't changing. Note, only the range 7745 + * of gfns whose attributes are being modified needs to be explicitly 7746 + * unmapped, as that will unmap any existing hugepages. 7747 + */ 7748 + for (level = PG_LEVEL_2M; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) { 7749 + gfn_t start = gfn_round_for_level(range->start, level); 7750 + gfn_t end = gfn_round_for_level(range->end - 1, level); 7751 + gfn_t nr_pages = KVM_PAGES_PER_HPAGE(level); 7752 + 7753 + if ((start != range->start || start + nr_pages > range->end) && 7754 + start >= slot->base_gfn && 7755 + start + nr_pages <= slot->base_gfn + slot->npages && 7756 + !hugepage_test_mixed(slot, start, level)) 7757 + kvm_mmu_invalidate_range_add(kvm, start, start + nr_pages); 7758 + 7759 + if (end == start) 7760 + continue; 7761 + 7762 + if ((end + nr_pages) > range->end && 7763 + (end + nr_pages) <= (slot->base_gfn + slot->npages) && 7764 + !hugepage_test_mixed(slot, end, level)) 7765 + kvm_mmu_invalidate_range_add(kvm, end, end + nr_pages); 7766 + } 7767 + 7768 + /* Unmap the old attribute page. */ 7769 + if (range->arg.attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE) 7770 + range->attr_filter = KVM_FILTER_SHARED; 7771 + else 7772 + range->attr_filter = KVM_FILTER_PRIVATE; 7773 + 7774 + return kvm_unmap_gfn_range(kvm, range); 7775 + } 7776 + 7777 + 7690 7778 7691 7779 static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot, 7692 7780 gfn_t gfn, int level, unsigned long attrs)
+1
arch/x86/kvm/smm.c
··· 131 131 132 132 kvm_mmu_reset_context(vcpu); 133 133 } 134 + EXPORT_SYMBOL_GPL(kvm_smm_changed); 134 135 135 136 void process_smi(struct kvm_vcpu *vcpu) 136 137 {
+19 -13
arch/x86/kvm/svm/sev.c
··· 3173 3173 kvfree(svm->sev_es.ghcb_sa); 3174 3174 } 3175 3175 3176 + static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control) 3177 + { 3178 + return (((u64)control->exit_code_hi) << 32) | control->exit_code; 3179 + } 3180 + 3176 3181 static void dump_ghcb(struct vcpu_svm *svm) 3177 3182 { 3178 - struct ghcb *ghcb = svm->sev_es.ghcb; 3183 + struct vmcb_control_area *control = &svm->vmcb->control; 3179 3184 unsigned int nbits; 3180 3185 3181 3186 /* Re-use the dump_invalid_vmcb module parameter */ ··· 3189 3184 return; 3190 3185 } 3191 3186 3192 - nbits = sizeof(ghcb->save.valid_bitmap) * 8; 3187 + nbits = sizeof(svm->sev_es.valid_bitmap) * 8; 3193 3188 3194 - pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa); 3189 + /* 3190 + * Print KVM's snapshot of the GHCB values that were (unsuccessfully) 3191 + * used to handle the exit. If the guest has since modified the GHCB 3192 + * itself, dumping the raw GHCB won't help debug why KVM was unable to 3193 + * handle the VMGEXIT that KVM observed. 3194 + */ 3195 + pr_err("GHCB (GPA=%016llx) snapshot:\n", svm->vmcb->control.ghcb_gpa); 3195 3196 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code", 3196 - ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb)); 3197 + kvm_ghcb_get_sw_exit_code(control), kvm_ghcb_sw_exit_code_is_valid(svm)); 3197 3198 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1", 3198 - ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb)); 3199 + control->exit_info_1, kvm_ghcb_sw_exit_info_1_is_valid(svm)); 3199 3200 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2", 3200 - ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb)); 3201 + control->exit_info_2, kvm_ghcb_sw_exit_info_2_is_valid(svm)); 3201 3202 pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch", 3202 - ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb)); 3203 - pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap); 3203 + svm->sev_es.sw_scratch, kvm_ghcb_sw_scratch_is_valid(svm)); 3204 + pr_err("%-20s%*pb\n", "valid_bitmap", nbits, svm->sev_es.valid_bitmap); 3204 3205 } 3205 3206 3206 3207 static void sev_es_sync_to_ghcb(struct vcpu_svm *svm) ··· 3275 3264 3276 3265 /* Clear the valid entries fields */ 3277 3266 memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); 3278 - } 3279 - 3280 - static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control) 3281 - { 3282 - return (((u64)control->exit_code_hi) << 32) | control->exit_code; 3283 3267 } 3284 3268 3285 3269 static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
+69 -6
arch/x86/kvm/svm/svm.c
··· 607 607 kvm_cpu_svm_disable(); 608 608 609 609 amd_pmu_disable_virt(); 610 - 611 - if (cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) 612 - msr_clear_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); 613 610 } 614 611 615 612 static int svm_enable_virtualization_cpu(void) ··· 683 686 684 687 rdmsr(MSR_TSC_AUX, sev_es_host_save_area(sd)->tsc_aux, msr_hi); 685 688 } 686 - 687 - if (cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) 688 - msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); 689 689 690 690 return 0; 691 691 } ··· 1512 1518 __free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE)); 1513 1519 } 1514 1520 1521 + #ifdef CONFIG_CPU_MITIGATIONS 1522 + static DEFINE_SPINLOCK(srso_lock); 1523 + static atomic_t srso_nr_vms; 1524 + 1525 + static void svm_srso_clear_bp_spec_reduce(void *ign) 1526 + { 1527 + struct svm_cpu_data *sd = this_cpu_ptr(&svm_data); 1528 + 1529 + if (!sd->bp_spec_reduce_set) 1530 + return; 1531 + 1532 + msr_clear_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); 1533 + sd->bp_spec_reduce_set = false; 1534 + } 1535 + 1536 + static void svm_srso_vm_destroy(void) 1537 + { 1538 + if (!cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) 1539 + return; 1540 + 1541 + if (atomic_dec_return(&srso_nr_vms)) 1542 + return; 1543 + 1544 + guard(spinlock)(&srso_lock); 1545 + 1546 + /* 1547 + * Verify a new VM didn't come along, acquire the lock, and increment 1548 + * the count before this task acquired the lock. 1549 + */ 1550 + if (atomic_read(&srso_nr_vms)) 1551 + return; 1552 + 1553 + on_each_cpu(svm_srso_clear_bp_spec_reduce, NULL, 1); 1554 + } 1555 + 1556 + static void svm_srso_vm_init(void) 1557 + { 1558 + if (!cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) 1559 + return; 1560 + 1561 + /* 1562 + * Acquire the lock on 0 => 1 transitions to ensure a potential 1 => 0 1563 + * transition, i.e. destroying the last VM, is fully complete, e.g. so 1564 + * that a delayed IPI doesn't clear BP_SPEC_REDUCE after a vCPU runs. 1565 + */ 1566 + if (atomic_inc_not_zero(&srso_nr_vms)) 1567 + return; 1568 + 1569 + guard(spinlock)(&srso_lock); 1570 + 1571 + atomic_inc(&srso_nr_vms); 1572 + } 1573 + #else 1574 + static void svm_srso_vm_init(void) { } 1575 + static void svm_srso_vm_destroy(void) { } 1576 + #endif 1577 + 1515 1578 static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) 1516 1579 { 1517 1580 struct vcpu_svm *svm = to_svm(vcpu); ··· 1601 1550 (!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm))) 1602 1551 kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull); 1603 1552 1553 + if (cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE) && 1554 + !sd->bp_spec_reduce_set) { 1555 + sd->bp_spec_reduce_set = true; 1556 + msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); 1557 + } 1604 1558 svm->guest_state_loaded = true; 1605 1559 } 1606 1560 ··· 2287 2231 */ 2288 2232 if (!sev_es_guest(vcpu->kvm)) { 2289 2233 clear_page(svm->vmcb); 2234 + #ifdef CONFIG_KVM_SMM 2235 + if (is_smm(vcpu)) 2236 + kvm_smm_changed(vcpu, false); 2237 + #endif 2290 2238 kvm_vcpu_reset(vcpu, true); 2291 2239 } 2292 2240 ··· 5096 5036 { 5097 5037 avic_vm_destroy(kvm); 5098 5038 sev_vm_destroy(kvm); 5039 + 5040 + svm_srso_vm_destroy(); 5099 5041 } 5100 5042 5101 5043 static int svm_vm_init(struct kvm *kvm) ··· 5123 5061 return ret; 5124 5062 } 5125 5063 5064 + svm_srso_vm_init(); 5126 5065 return 0; 5127 5066 } 5128 5067
+2
arch/x86/kvm/svm/svm.h
··· 335 335 u32 next_asid; 336 336 u32 min_asid; 337 337 338 + bool bp_spec_reduce_set; 339 + 338 340 struct vmcb *save_area; 339 341 unsigned long save_area_pa; 340 342
+2 -2
arch/x86/kvm/x86.c
··· 4597 4597 return type < 32 && (kvm_caps.supported_vm_types & BIT(type)); 4598 4598 } 4599 4599 4600 - static inline u32 kvm_sync_valid_fields(struct kvm *kvm) 4600 + static inline u64 kvm_sync_valid_fields(struct kvm *kvm) 4601 4601 { 4602 4602 return kvm && kvm->arch.has_protected_state ? 0 : KVM_SYNC_X86_VALID_FIELDS; 4603 4603 } ··· 11493 11493 { 11494 11494 struct kvm_queued_exception *ex = &vcpu->arch.exception; 11495 11495 struct kvm_run *kvm_run = vcpu->run; 11496 - u32 sync_valid_fields; 11496 + u64 sync_valid_fields; 11497 11497 int r; 11498 11498 11499 11499 r = kvm_mmu_post_init_vm(vcpu->kvm);
+4 -4
tools/testing/selftests/kvm/arm64/set_id_regs.c
··· 129 129 REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0), 130 130 REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0), 131 131 REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0), 132 - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0), 133 - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0), 134 - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0), 135 - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0), 132 + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 1), 133 + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 1), 134 + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 1), 135 + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 1), 136 136 REG_FTR_END, 137 137 }; 138 138